source-discourse-fetcher: adding new endpoints

fixing mistake in simplecast

Signed-off-by: Alexis Pentori <alexis@status.im>
This commit is contained in:
Alexis Pentori 2024-04-15 14:30:44 +02:00
parent 11efa92811
commit 595051fac5
No known key found for this signature in database
GPG Key ID: 65250D2801E47A10
10 changed files with 621 additions and 11 deletions

View File

@ -9,7 +9,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: d12c5a88-5e78-452d-b957-eb4b2fd6e1dd
dockerImageTag: 0.1.0
dockerImageTag: 0.1.2
dockerRepository: harbor.status.im/status-im/airbyte/source-discourse-fetcher
githubIssueLabel: source-discourse-fetcher
icon: discourse-fetcher.svg

View File

@ -27,6 +27,77 @@
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
"stream": {
"name": "topic",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
"stream": {
"name": "group",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
"stream": {
"name": "group_member`",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
"stream": {
"name": "tag",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
}
,
{
"stream": {
"name": "category",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
}
]
}

View File

@ -0,0 +1,121 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Generated schema for Root",
"type": "object",
"properties": {
"id": {
"type": ["null", "number"]
},
"name": {
"type": ["null", "string"]
},
"color": {
"type": ["null", "string"]
},
"text_color": {
"type": ["null", "string"]
},
"slug": {
"type": ["null", "string"]
},
"topic_count": {
"type": ["null", "number"]
},
"post_count": {
"type": ["null", "number"]
},
"position": {
"type": ["null", "number"]
},
"description": {
"type": ["null", "string"]
},
"description_text": {
"type": ["null", "string"]
},
"description_excerpt": {
"type": ["null", "string"]
},
"topic_url": {
"type": ["null", "string"]
},
"read_restricted": {
"type": ["null", "boolean"]
},
"permission": {
"type": ["null", "number"]
},
"notification_level": {
"type": ["null", "number"]
},
"can_edit": {
"type": ["null", "boolean"]
},
"topic_template": {
"type": ["null", "string"]
},
"has_children": {
"type": ["null", "boolean"]
},
"sort_order": {
"type": ["null", "string"]
},
"sort_ascending": {
"type": ["null", "string"]
},
"show_subcategory_list": {
"type": ["null", "boolean"]
},
"num_featured_topics": {
"type": ["null", "number"]
},
"default_view": {
"type": ["null", "string"]
},
"subcategory_list_style": {
"type": ["null", "string"]
},
"default_top_period": {
"type": ["null", "string"]
},
"default_list_filter": {
"type": ["null", "string"]
},
"minimum_required_tags": {
"type": ["null", "number"]
},
"navigate_to_first_post_after_read": {
"type": ["null", "boolean"]
},
"topics_day": {
"type": ["null", "number"]
},
"topics_week": {
"type": ["null", "number"]
},
"topics_month": {
"type": ["null", "number"]
},
"topics_year": {
"type": ["null", "number"]
},
"topics_all_time": {
"type": ["null", "number"]
},
"is_uncategorized": {
"type": ["null", "boolean"]
},
"uploaded_logo": {
"type": ["null", "string"]
},
"uploaded_logo_dark": {
"type": ["null", "string"]
},
"uploaded_background": {
"type": ["null", "string"]
},
"uploaded_background_dark": {
"type": ["null", "string"]
}
}
}

View File

@ -0,0 +1,102 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"id": {
"type": ["null", "number"]
},
"automatic": {
"type": ["null", "boolean"]
},
"name": {
"type": ["null", "string"]
},
"display_name": {
"type": ["null", "string"]
},
"user_count": {
"type": ["null", "number"]
},
"mentionable_level": {
"type": ["null", "number"]
},
"messageable_level": {
"type": ["null", "number"]
},
"visibility_level": {
"type": ["null", "number"]
},
"primary_group": {
"type": ["null", "boolean"]
},
"title": {
"type": ["null", "string"]
},
"grant_trust_level": {
"type": ["null", "string"]
},
"incoming_email": {
"type": ["null", "string"]
},
"has_messages": {
"type": ["null", "boolean"]
},
"flair_url": {
"type": ["null", "string"]
},
"flair_bg_color": {
"type": ["null", "string"]
},
"flair_color": {
"type": ["null", "string"]
},
"bio_raw": {
"type": ["null", "string"]
},
"bio_cooked": {
"type": ["null", "string"]
},
"bio_excerpt": {
"type": ["null", "string"]
},
"public_admission": {
"type": ["null", "boolean"]
},
"public_exit": {
"type": ["null", "boolean"]
},
"allow_membership_requests": {
"type": ["null", "boolean"]
},
"full_name": {
"type": ["null", "string"]
},
"default_notification_level": {
"type": ["null", "number"]
},
"membership_request_template": {
"type": ["null", "string"]
},
"is_group_user": {
"type": ["null", "boolean"]
},
"is_group_owner": {
"type": ["null", "boolean"]
},
"members_visibility_level": {
"type": ["null", "number"]
},
"can_see_members": {
"type": ["null", "boolean"]
},
"can_admin_group": {
"type": ["null", "boolean"]
},
"can_edit_group": {
"type": ["null", "boolean"]
},
"publish_read_state": {
"type": ["null", "boolean"]
}
}
}

View File

@ -0,0 +1,33 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Generated schema for Root",
"type": "object",
"properties": {
"id": {
"type": ["null", "number"]
},
"username": {
"type": ["null", "string"]
},
"name": {
"type": ["null", "string"]
},
"avatar_template": {
"type": ["null", "string"]
},
"title": {
"type": ["null", "string"]
},
"last_posted_at": {
"type": ["null", "string"]
},
"last_seen_at": {
"type": ["null", "string"]
},
"added_at": {
"type": ["null", "string"]
},
"timezone": {
"type": ["null", "string"]
}
}}

View File

@ -0,0 +1,21 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"id": {
"type": ["null", "string"]
},
"text": {
"type": ["null", "string"]
},
"count": {
"type": ["null", "number"]
},
"pm_count": {
"type": ["null", "number"]
},
"target_tag": {
"type": ["null", "string"]
}
}
}

View File

@ -0,0 +1,122 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"id": {
"type": ["null","number"]
},
"title": {
"type": ["null","string"]
},
"fancy_title": {
"type": ["null","string"]
},
"slug": {
"type": ["null","string"]
},
"posts_count": {
"type": ["null","number"]
},
"reply_count": {
"type": ["null","number"]
},
"highest_post_number": {
"type": ["null","number"]
},
"image_url": {
"type": ["null","string"]
},
"created_at": {
"type": ["null","string"]
},
"last_posted_at": {
"type": ["null","string"]
},
"bumped": {
"type": ["null","boolean"]
},
"bumped_at": {
"type": ["null","string"]
},
"archetype": {
"type": ["null","string"]
},
"unseen": {
"type": ["null","boolean"]
},
"last_read_post_number": {
"type": ["null","number"]
},
"unread_posts": {
"type": ["null","number"]
},
"pinned": {
"type": ["null","boolean"]
},
"unpinned": {
"type": ["null","string"]
},
"visible": {
"type": ["null","boolean"]
},
"closed": {
"type": ["null","boolean"]
},
"archived": {
"type": ["null","boolean"]
},
"notification_level": {
"type": ["null","number"]
},
"bookmarked": {
"type": ["null","boolean"]
},
"liked": {
"type": ["null","boolean"]
},
"views": {
"type": ["null","number"]
},
"like_count": {
"type": ["null","number"]
},
"has_summary": {
"type": ["null", "boolean"]
},
"last_poster_username": {
"type": ["null", "string"]
},
"category_id": {
"type": ["null", "number"]
},
"op_like_count": {
"type": ["null", "number"]
},
"pinned_globally": {
"type": ["null", "boolean"]
},
"featured_link": {
"type": ["null", "string"]
},
"posters": {
"type": ["null", "array"],
"items": {
"type": "object",
"properties": {
"extras": {
"type": ["null", "string"]
},
"description": {
"type": ["null", "string"]
},
"user_id": {
"type": ["null", "number"]
},
"primary_group_id": {
"type": ["null", "string"]
}
}
}
}
}
}

View File

@ -11,6 +11,9 @@
"username": {
"type": ["null", "string"]
},
"email": {
"type": ["null", "string"]
},
"active": {
"type": ["null", "string"]
},

View File

@ -9,20 +9,17 @@ import logging
import requests
from airbyte_cdk.sources import AbstractSource
from airbyte_cdk.sources.streams import Stream
from airbyte_cdk.sources.streams.http import HttpStream
from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream
from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator
logger = logging.getLogger("airbyte")
USER_KEYS = [
"id","name","username","active","created_at","trust_level","title","time_read"
"staged","days_visited","posts_read_count","topics_entered","post_count"
"id","name","username","active","created_at","trust_level","title","time_read", "staged","days_visited","posts_read_count","topics_entered","post_count", "email"
]
POST_KEYS = [
"id","name","username","created_at","post_number","post_type","updated_at","reply_count"
"reply_to_post_number","quote_count","incoming_link_count","reads","score","topic_id"
"topic_slug","topic_title","topic_html_title","category_id"
"id","name","username","created_at","post_number","post_type","updated_at","reply_count", "reply_to_post_number","quote_count","incoming_link_count","reads","score","topic_id", "topic_slug","topic_title","topic_html_title","category_id"
]
class DiscourseStream(HttpStream):
@ -45,7 +42,7 @@ class DiscourseStream(HttpStream):
return { "Api-Key" : f"{self.api_key}", "Api-Username": f"{self.api_username}"}
class User(DiscourseStream):
primary_key="user_id"
primary_key="id"
def path(
self,
@ -67,7 +64,7 @@ class User(DiscourseStream):
yield user
class Post(DiscourseStream):
primary_key="post_id"
primary_key="id"
def path(
self,
@ -88,6 +85,119 @@ class Post(DiscourseStream):
post = { key : elt.get(key) for key in POST_KEYS }
yield post
class Topic(DiscourseStream):
primary_key="id"
# https://docs.discourse.org/#tag/Topics/operation/listLatestTopics
def path(
self,
stream_state: Mapping[str, Any] = None,
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None
) -> str:
return f"{self.url}/latest.json"
def parse_response(
self,
response: requests.Response,
**kwargs
) -> Iterable[Mapping]:
data = response.json()
logger.debug("Response latest topics %s", data)
for elt in data.get("topic_list").get("topics"):
yield elt
class Group(DiscourseStream):
primary_key="id"
use_cache=True
# https://docs.discourse.org/#tag/Groups/operation/listGroups
def path(
self,
stream_state: Mapping[str, Any] = None,
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None
) -> str:
return f"{self.url}/groups.json"
def parse_response(
self,
response: requests.Response,
**kwargs
) -> Iterable[Mapping]:
data = response.json()
logger.debug("Response groups %s", data)
for elt in data.get("groups"):
yield elt
class GroupMember(HttpSubStream, DiscourseStream):
primary_key="id"
# https://docs.discourse.org/#tag/Groups/operation/listGroupMembers
def path(
self,
stream_state: Mapping[str, Any] = None,
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None
) -> str:
group_id = stream_slice.get('parent').get('id')
return f"{self.url}/groups/{group_id}/members.json"
def parse_response(
self,
response: requests.Response,
**kwargs
) -> Iterable[Mapping]:
data = response.json()
logger.debug("Response groups %s", data)
for elt in data.get("groups"):
yield elt
class Tag(DiscourseStream):
primary_key="id"
# https://docs.discourse.org/#tag/Topics/operation/listLatestTopics
def path(
self,
stream_state: Mapping[str, Any] = None,
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None
) -> str:
return f"{self.url}/tags.json"
def parse_response(
self,
response: requests.Response,
**kwargs
) -> Iterable[Mapping]:
data = response.json()
logger.debug("Response groups %s", data)
for elt in data.get("tags"):
yield elt
class Category(DiscourseStream):
primary_key="id"
def path(
self,
stream_state: Mapping[str, Any] = None,
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None
) -> str:
return f"{self.url}/categories.json"
def parse_response(
self,
response: requests.Response,
**kwargs
) -> Iterable[Mapping]:
data = response.json()
logger.debug("Response groups %s", data)
for elt in data.get("category_list").get("categories"):
yield elt
# Source
class SourceDiscourseFetcher(AbstractSource):
def check_connection(self, logger, config) -> Tuple[bool, any]:
@ -95,6 +205,11 @@ class SourceDiscourseFetcher(AbstractSource):
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
logger.info("Configuring Stream fron %s", config["url"])
group=Group(
api_key = config['api-key'],
api_username = config['api-username'],
url = config['url']
)
s = [
User(
api_key = config['api-key'],
@ -105,6 +220,28 @@ class SourceDiscourseFetcher(AbstractSource):
api_key = config['api-key'],
api_username = config['api-username'],
url = config['url']
)
),
Topic(
api_key = config['api-key'],
api_username = config['api-username'],
url = config['url']
),
group,
GroupMember(
api_key = config['api-key'],
api_username = config['api-username'],
url = config['url'],
parent=group
),
Tag(
api_key = config['api-key'],
api_username = config['api-username'],
url = config['url']
),
Category(
api_key = config['api-key'],
api_username = config['api-username'],
url = config['url']
)
]
return s

View File

@ -15,7 +15,7 @@
"type": ["null", "number"]
},
"count": {
"type": ["null", "count"]
"type": ["null", "number"]
}
}
}