From 595051fac5b64dc292c26e5698f4051990c10ceb Mon Sep 17 00:00:00 2001 From: Alexis Pentori Date: Mon, 15 Apr 2024 14:30:44 +0200 Subject: [PATCH] source-discourse-fetcher: adding new endpoints fixing mistake in simplecast Signed-off-by: Alexis Pentori --- source-discourse-fetcher/metadata.yaml | 2 +- .../sample_files/configured_catalog.json | 71 ++++++++ .../schemas/category.json | 121 ++++++++++++++ .../schemas/group.json | 102 ++++++++++++ .../schemas/group_member.json | 33 ++++ .../source_discourse_fetcher/schemas/tag.json | 21 +++ .../schemas/topic.json | 122 ++++++++++++++ .../schemas/user.json | 3 + .../source_discourse_fetcher/source.py | 155 +++++++++++++++++- .../schemas/analytic_time_of_week.json | 2 +- 10 files changed, 621 insertions(+), 11 deletions(-) create mode 100644 source-discourse-fetcher/source_discourse_fetcher/schemas/category.json create mode 100644 source-discourse-fetcher/source_discourse_fetcher/schemas/group.json create mode 100644 source-discourse-fetcher/source_discourse_fetcher/schemas/group_member.json create mode 100644 source-discourse-fetcher/source_discourse_fetcher/schemas/tag.json create mode 100644 source-discourse-fetcher/source_discourse_fetcher/schemas/topic.json diff --git a/source-discourse-fetcher/metadata.yaml b/source-discourse-fetcher/metadata.yaml index b229dbc..aceb065 100644 --- a/source-discourse-fetcher/metadata.yaml +++ b/source-discourse-fetcher/metadata.yaml @@ -9,7 +9,7 @@ data: connectorSubtype: api connectorType: source definitionId: d12c5a88-5e78-452d-b957-eb4b2fd6e1dd - dockerImageTag: 0.1.0 + dockerImageTag: 0.1.2 dockerRepository: harbor.status.im/status-im/airbyte/source-discourse-fetcher githubIssueLabel: source-discourse-fetcher icon: discourse-fetcher.svg diff --git a/source-discourse-fetcher/sample_files/configured_catalog.json b/source-discourse-fetcher/sample_files/configured_catalog.json index 72e39df..776cef6 100644 --- a/source-discourse-fetcher/sample_files/configured_catalog.json +++ b/source-discourse-fetcher/sample_files/configured_catalog.json @@ -27,6 +27,77 @@ }, "sync_mode": "incremental", "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "topic", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "group", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "group_member`", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "tag", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + } + , + { + "stream": { + "name": "category", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" } ] } diff --git a/source-discourse-fetcher/source_discourse_fetcher/schemas/category.json b/source-discourse-fetcher/source_discourse_fetcher/schemas/category.json new file mode 100644 index 0000000..56731c4 --- /dev/null +++ b/source-discourse-fetcher/source_discourse_fetcher/schemas/category.json @@ -0,0 +1,121 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Generated schema for Root", + "type": "object", + "properties": { + "id": { + "type": ["null", "number"] + }, + "name": { + "type": ["null", "string"] + }, + "color": { + "type": ["null", "string"] + }, + "text_color": { + "type": ["null", "string"] + }, + "slug": { + "type": ["null", "string"] + }, + "topic_count": { + "type": ["null", "number"] + }, + "post_count": { + "type": ["null", "number"] + }, + "position": { + "type": ["null", "number"] + }, + "description": { + "type": ["null", "string"] + }, + "description_text": { + "type": ["null", "string"] + }, + "description_excerpt": { + "type": ["null", "string"] + }, + "topic_url": { + "type": ["null", "string"] + }, + "read_restricted": { + "type": ["null", "boolean"] + }, + "permission": { + "type": ["null", "number"] + }, + "notification_level": { + "type": ["null", "number"] + }, + "can_edit": { + "type": ["null", "boolean"] + }, + "topic_template": { + "type": ["null", "string"] + }, + "has_children": { + "type": ["null", "boolean"] + }, + "sort_order": { + "type": ["null", "string"] + }, + "sort_ascending": { + "type": ["null", "string"] + }, + "show_subcategory_list": { + "type": ["null", "boolean"] + }, + "num_featured_topics": { + "type": ["null", "number"] + }, + "default_view": { + "type": ["null", "string"] + }, + "subcategory_list_style": { + "type": ["null", "string"] + }, + "default_top_period": { + "type": ["null", "string"] + }, + "default_list_filter": { + "type": ["null", "string"] + }, + "minimum_required_tags": { + "type": ["null", "number"] + }, + "navigate_to_first_post_after_read": { + "type": ["null", "boolean"] + }, + "topics_day": { + "type": ["null", "number"] + }, + "topics_week": { + "type": ["null", "number"] + }, + "topics_month": { + "type": ["null", "number"] + }, + "topics_year": { + "type": ["null", "number"] + }, + "topics_all_time": { + "type": ["null", "number"] + }, + "is_uncategorized": { + "type": ["null", "boolean"] + }, + "uploaded_logo": { + "type": ["null", "string"] + }, + "uploaded_logo_dark": { + "type": ["null", "string"] + }, + "uploaded_background": { + "type": ["null", "string"] + }, + "uploaded_background_dark": { + "type": ["null", "string"] + } + } +} diff --git a/source-discourse-fetcher/source_discourse_fetcher/schemas/group.json b/source-discourse-fetcher/source_discourse_fetcher/schemas/group.json new file mode 100644 index 0000000..74fe94b --- /dev/null +++ b/source-discourse-fetcher/source_discourse_fetcher/schemas/group.json @@ -0,0 +1,102 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "number"] + }, + "automatic": { + "type": ["null", "boolean"] + }, + "name": { + "type": ["null", "string"] + }, + "display_name": { + "type": ["null", "string"] + }, + "user_count": { + "type": ["null", "number"] + }, + "mentionable_level": { + "type": ["null", "number"] + }, + "messageable_level": { + "type": ["null", "number"] + }, + "visibility_level": { + "type": ["null", "number"] + }, + "primary_group": { + "type": ["null", "boolean"] + }, + "title": { + "type": ["null", "string"] + }, + "grant_trust_level": { + "type": ["null", "string"] + }, + "incoming_email": { + "type": ["null", "string"] + }, + "has_messages": { + "type": ["null", "boolean"] + }, + "flair_url": { + "type": ["null", "string"] + }, + "flair_bg_color": { + "type": ["null", "string"] + }, + "flair_color": { + "type": ["null", "string"] + }, + "bio_raw": { + "type": ["null", "string"] + }, + "bio_cooked": { + "type": ["null", "string"] + }, + "bio_excerpt": { + "type": ["null", "string"] + }, + "public_admission": { + "type": ["null", "boolean"] + }, + "public_exit": { + "type": ["null", "boolean"] + }, + "allow_membership_requests": { + "type": ["null", "boolean"] + }, + "full_name": { + "type": ["null", "string"] + }, + "default_notification_level": { + "type": ["null", "number"] + }, + "membership_request_template": { + "type": ["null", "string"] + }, + "is_group_user": { + "type": ["null", "boolean"] + }, + "is_group_owner": { + "type": ["null", "boolean"] + }, + "members_visibility_level": { + "type": ["null", "number"] + }, + "can_see_members": { + "type": ["null", "boolean"] + }, + "can_admin_group": { + "type": ["null", "boolean"] + }, + "can_edit_group": { + "type": ["null", "boolean"] + }, + "publish_read_state": { + "type": ["null", "boolean"] + } + } +} diff --git a/source-discourse-fetcher/source_discourse_fetcher/schemas/group_member.json b/source-discourse-fetcher/source_discourse_fetcher/schemas/group_member.json new file mode 100644 index 0000000..132c399 --- /dev/null +++ b/source-discourse-fetcher/source_discourse_fetcher/schemas/group_member.json @@ -0,0 +1,33 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Generated schema for Root", + "type": "object", + "properties": { + "id": { + "type": ["null", "number"] + }, + "username": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "avatar_template": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "last_posted_at": { + "type": ["null", "string"] + }, + "last_seen_at": { + "type": ["null", "string"] + }, + "added_at": { + "type": ["null", "string"] + }, + "timezone": { + "type": ["null", "string"] + } + }} diff --git a/source-discourse-fetcher/source_discourse_fetcher/schemas/tag.json b/source-discourse-fetcher/source_discourse_fetcher/schemas/tag.json new file mode 100644 index 0000000..84e6468 --- /dev/null +++ b/source-discourse-fetcher/source_discourse_fetcher/schemas/tag.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "text": { + "type": ["null", "string"] + }, + "count": { + "type": ["null", "number"] + }, + "pm_count": { + "type": ["null", "number"] + }, + "target_tag": { + "type": ["null", "string"] + } + } +} diff --git a/source-discourse-fetcher/source_discourse_fetcher/schemas/topic.json b/source-discourse-fetcher/source_discourse_fetcher/schemas/topic.json new file mode 100644 index 0000000..dac3a80 --- /dev/null +++ b/source-discourse-fetcher/source_discourse_fetcher/schemas/topic.json @@ -0,0 +1,122 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null","number"] + }, + "title": { + "type": ["null","string"] + }, + "fancy_title": { + "type": ["null","string"] + }, + "slug": { + "type": ["null","string"] + }, + "posts_count": { + "type": ["null","number"] + }, + "reply_count": { + "type": ["null","number"] + }, + "highest_post_number": { + "type": ["null","number"] + }, + "image_url": { + "type": ["null","string"] + }, + "created_at": { + "type": ["null","string"] + }, + "last_posted_at": { + "type": ["null","string"] + }, + "bumped": { + "type": ["null","boolean"] + }, + "bumped_at": { + "type": ["null","string"] + }, + "archetype": { + "type": ["null","string"] + }, + "unseen": { + "type": ["null","boolean"] + }, + "last_read_post_number": { + "type": ["null","number"] + }, + "unread_posts": { + "type": ["null","number"] + }, + "pinned": { + "type": ["null","boolean"] + }, + "unpinned": { + "type": ["null","string"] + }, + "visible": { + "type": ["null","boolean"] + }, + "closed": { + "type": ["null","boolean"] + }, + "archived": { + "type": ["null","boolean"] + }, + "notification_level": { + "type": ["null","number"] + }, + "bookmarked": { + "type": ["null","boolean"] + }, + "liked": { + "type": ["null","boolean"] + }, + "views": { + "type": ["null","number"] + }, + "like_count": { + "type": ["null","number"] + }, + "has_summary": { + "type": ["null", "boolean"] + }, + "last_poster_username": { + "type": ["null", "string"] + }, + "category_id": { + "type": ["null", "number"] + }, + "op_like_count": { + "type": ["null", "number"] + }, + "pinned_globally": { + "type": ["null", "boolean"] + }, + "featured_link": { + "type": ["null", "string"] + }, + "posters": { + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "extras": { + "type": ["null", "string"] + }, + "description": { + "type": ["null", "string"] + }, + "user_id": { + "type": ["null", "number"] + }, + "primary_group_id": { + "type": ["null", "string"] + } + } + } + } + } +} diff --git a/source-discourse-fetcher/source_discourse_fetcher/schemas/user.json b/source-discourse-fetcher/source_discourse_fetcher/schemas/user.json index 1baf2b5..2d7e9c7 100644 --- a/source-discourse-fetcher/source_discourse_fetcher/schemas/user.json +++ b/source-discourse-fetcher/source_discourse_fetcher/schemas/user.json @@ -11,6 +11,9 @@ "username": { "type": ["null", "string"] }, + "email": { + "type": ["null", "string"] + }, "active": { "type": ["null", "string"] }, diff --git a/source-discourse-fetcher/source_discourse_fetcher/source.py b/source-discourse-fetcher/source_discourse_fetcher/source.py index cae4426..5c61b85 100644 --- a/source-discourse-fetcher/source_discourse_fetcher/source.py +++ b/source-discourse-fetcher/source_discourse_fetcher/source.py @@ -9,20 +9,17 @@ import logging import requests from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources.streams import Stream -from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator logger = logging.getLogger("airbyte") USER_KEYS = [ - "id","name","username","active","created_at","trust_level","title","time_read" - "staged","days_visited","posts_read_count","topics_entered","post_count" + "id","name","username","active","created_at","trust_level","title","time_read", "staged","days_visited","posts_read_count","topics_entered","post_count", "email" ] POST_KEYS = [ - "id","name","username","created_at","post_number","post_type","updated_at","reply_count" - "reply_to_post_number","quote_count","incoming_link_count","reads","score","topic_id" - "topic_slug","topic_title","topic_html_title","category_id" + "id","name","username","created_at","post_number","post_type","updated_at","reply_count", "reply_to_post_number","quote_count","incoming_link_count","reads","score","topic_id", "topic_slug","topic_title","topic_html_title","category_id" ] class DiscourseStream(HttpStream): @@ -45,7 +42,7 @@ class DiscourseStream(HttpStream): return { "Api-Key" : f"{self.api_key}", "Api-Username": f"{self.api_username}"} class User(DiscourseStream): - primary_key="user_id" + primary_key="id" def path( self, @@ -67,7 +64,7 @@ class User(DiscourseStream): yield user class Post(DiscourseStream): - primary_key="post_id" + primary_key="id" def path( self, @@ -88,6 +85,119 @@ class Post(DiscourseStream): post = { key : elt.get(key) for key in POST_KEYS } yield post +class Topic(DiscourseStream): + primary_key="id" + # https://docs.discourse.org/#tag/Topics/operation/listLatestTopics + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.url}/latest.json" + + def parse_response( + self, + response: requests.Response, + **kwargs + ) -> Iterable[Mapping]: + data = response.json() + logger.debug("Response latest topics %s", data) + for elt in data.get("topic_list").get("topics"): + yield elt + +class Group(DiscourseStream): + primary_key="id" + use_cache=True + + # https://docs.discourse.org/#tag/Groups/operation/listGroups + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.url}/groups.json" + + def parse_response( + self, + response: requests.Response, + **kwargs + ) -> Iterable[Mapping]: + data = response.json() + logger.debug("Response groups %s", data) + for elt in data.get("groups"): + yield elt + +class GroupMember(HttpSubStream, DiscourseStream): + primary_key="id" + + # https://docs.discourse.org/#tag/Groups/operation/listGroupMembers + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + group_id = stream_slice.get('parent').get('id') + return f"{self.url}/groups/{group_id}/members.json" + + def parse_response( + self, + response: requests.Response, + **kwargs + ) -> Iterable[Mapping]: + data = response.json() + logger.debug("Response groups %s", data) + for elt in data.get("groups"): + yield elt + + +class Tag(DiscourseStream): + primary_key="id" + # https://docs.discourse.org/#tag/Topics/operation/listLatestTopics + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.url}/tags.json" + + def parse_response( + self, + response: requests.Response, + **kwargs + ) -> Iterable[Mapping]: + data = response.json() + logger.debug("Response groups %s", data) + for elt in data.get("tags"): + yield elt + + +class Category(DiscourseStream): + primary_key="id" + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.url}/categories.json" + + def parse_response( + self, + response: requests.Response, + **kwargs + ) -> Iterable[Mapping]: + data = response.json() + logger.debug("Response groups %s", data) + for elt in data.get("category_list").get("categories"): + yield elt + + + + # Source class SourceDiscourseFetcher(AbstractSource): def check_connection(self, logger, config) -> Tuple[bool, any]: @@ -95,6 +205,11 @@ class SourceDiscourseFetcher(AbstractSource): def streams(self, config: Mapping[str, Any]) -> List[Stream]: logger.info("Configuring Stream fron %s", config["url"]) + group=Group( + api_key = config['api-key'], + api_username = config['api-username'], + url = config['url'] + ) s = [ User( api_key = config['api-key'], @@ -105,6 +220,28 @@ class SourceDiscourseFetcher(AbstractSource): api_key = config['api-key'], api_username = config['api-username'], url = config['url'] - ) + ), + Topic( + api_key = config['api-key'], + api_username = config['api-username'], + url = config['url'] + ), + group, + GroupMember( + api_key = config['api-key'], + api_username = config['api-username'], + url = config['url'], + parent=group + ), + Tag( + api_key = config['api-key'], + api_username = config['api-username'], + url = config['url'] + ), + Category( + api_key = config['api-key'], + api_username = config['api-username'], + url = config['url'] + ) ] return s diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_time_of_week.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_time_of_week.json index a84de45..72de2cc 100644 --- a/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_time_of_week.json +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_time_of_week.json @@ -15,7 +15,7 @@ "type": ["null", "number"] }, "count": { - "type": ["null", "count"] + "type": ["null", "number"] } } }