diff --git a/source-discourse-fetcher/Dockerfile b/source-discourse-fetcher/Dockerfile new file mode 100644 index 0000000..0d1e900 --- /dev/null +++ b/source-discourse-fetcher/Dockerfile @@ -0,0 +1,8 @@ +FROM airbyte/python-connector-base:1.1.0 + +COPY . ./airbyte/integration_code +RUN pip install ./airbyte/integration_code + +# The entrypoint and default env vars are already set in the base image +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] diff --git a/source-discourse-fetcher/README.md b/source-discourse-fetcher/README.md new file mode 100644 index 0000000..cf56e03 --- /dev/null +++ b/source-discourse-fetcher/README.md @@ -0,0 +1,62 @@ +# Discourse Fetcher Source + +This is the repository for fetching data from Discourse forum, written in Python. + +## Usage + +This connector fecth user and post data from a discourse forum instance. + +### Configuration + +The connector takes the following input: + +```yaml +- api-key +- api-username +- url +``` + +### Output + +The connector will return the following: +- `posts`: List of post on the discourse instance. +- `users`: List of user on the discourse instance. + +## Local development + +### Prerequisites + +#### Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` +``` +source .venv/bin/activate +pip install -r requirements.txt +``` + +### Locally running the connector +``` +python main.py spec +python main.py check --config sample_files/config-example.json +python main.py discover --config sample_files/config-example.json +python main.py read --config sample_files/config-example.json --catalog sample_files/configured_catalog.json +``` + +### Locally running the connector docker image + +```bash +docker build -t airbyte/twitter-fetcher:dev . +# Running the spec command against your patched connector +docker run airbyte/twitter-fetcher:dev spec +```` + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/twitter-fetcher:dev spec +docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev check --config /sample_files/config-example.json +docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev discover --config /sample_files/config-example.json +docker run --rm -v $(pwd)/sample_files:/sample_files -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev read --config /sample_files/config-example.json --catalog /sample_files/configured_catalog.json +``` diff --git a/source-discourse-fetcher/icon.svg b/source-discourse-fetcher/icon.svg new file mode 100644 index 0000000..fc730a4 --- /dev/null +++ b/source-discourse-fetcher/icon.svg @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/source-discourse-fetcher/main.py b/source-discourse-fetcher/main.py new file mode 100644 index 0000000..a22a1db --- /dev/null +++ b/source-discourse-fetcher/main.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from source_discourse_fetcher.run import run + +if __name__ == "__main__": + run() diff --git a/source-discourse-fetcher/metadata.yaml b/source-discourse-fetcher/metadata.yaml new file mode 100644 index 0000000..b229dbc --- /dev/null +++ b/source-discourse-fetcher/metadata.yaml @@ -0,0 +1,24 @@ +data: + registries: + oss: + enabled: true + cloud: + enabled: false + connectorBuildOptions: + baseImage: docker.io/airbyte/python-connector-base:1.0.0@sha256:dd17e347fbda94f7c3abff539be298a65af2d7fc27a307d89297df1081a45c27 + connectorSubtype: api + connectorType: source + definitionId: d12c5a88-5e78-452d-b957-eb4b2fd6e1dd + dockerImageTag: 0.1.0 + dockerRepository: harbor.status.im/status-im/airbyte/source-discourse-fetcher + githubIssueLabel: source-discourse-fetcher + icon: discourse-fetcher.svg + license: MIT + name: Discourse Fetcher + releaseDate: TODO + supportLevel: community + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/sources/discourse-fetcher + tags: + - language:python +metadataSpecVersion: "1.0" diff --git a/source-discourse-fetcher/requirements.txt b/source-discourse-fetcher/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/source-discourse-fetcher/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/source-discourse-fetcher/sample_files/config-example.json b/source-discourse-fetcher/sample_files/config-example.json new file mode 100644 index 0000000..1a15458 --- /dev/null +++ b/source-discourse-fetcher/sample_files/config-example.json @@ -0,0 +1,5 @@ +{ + "url": "https://example.com", + "api-key": "SomeKey", + "api-username": "r0b0t" +} diff --git a/source-discourse-fetcher/sample_files/configured_catalog.json b/source-discourse-fetcher/sample_files/configured_catalog.json new file mode 100644 index 0000000..72e39df --- /dev/null +++ b/source-discourse-fetcher/sample_files/configured_catalog.json @@ -0,0 +1,32 @@ +{ + "streams": [ + { + "stream": { + "name": "user", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "post", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/source-discourse-fetcher/setup.py b/source-discourse-fetcher/setup.py new file mode 100644 index 0000000..a3ec948 --- /dev/null +++ b/source-discourse-fetcher/setup.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.2", +] + +TEST_REQUIREMENTS = [ + "requests-mock~=1.9.3", + "pytest~=6.2", + "pytest-mock~=3.6.1", + "connector-acceptance-test", +] + +setup( + name="source_discourse_fetcher", + description="Source implementation for Discourse Fetcher.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, + entry_points={ + "console_scripts": [ + "source-discourse-fetcher=source_discourse_fetcher.run:run", + ], + }, +) diff --git a/source-discourse-fetcher/source_discourse_fetcher/__init__.py b/source-discourse-fetcher/source_discourse_fetcher/__init__.py new file mode 100644 index 0000000..2155cf4 --- /dev/null +++ b/source-discourse-fetcher/source_discourse_fetcher/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceDiscourseFetcher + +__all__ = ["SourceDiscourseFetcher"] diff --git a/source-discourse-fetcher/source_discourse_fetcher/run.py b/source-discourse-fetcher/source_discourse_fetcher/run.py new file mode 100644 index 0000000..679dec7 --- /dev/null +++ b/source-discourse-fetcher/source_discourse_fetcher/run.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from .source import SourceDiscourseFetcher + +def run(): + source = SourceDiscourseFetcher() + launch(source, sys.argv[1:]) diff --git a/source-discourse-fetcher/source_discourse_fetcher/schemas/post.json b/source-discourse-fetcher/source_discourse_fetcher/schemas/post.json new file mode 100644 index 0000000..8cc7515 --- /dev/null +++ b/source-discourse-fetcher/source_discourse_fetcher/schemas/post.json @@ -0,0 +1,63 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "number"] + }, + "name": { + "type": ["null", "string"] + }, + "username": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"] + }, + "post_number": { + "type": ["null", "number"] + }, + "post_type": { + "type": ["null", "number"] + }, + "updated_at": { + "type": ["null", "string"] + }, + "reply_count": { + "type": ["null", "number"] + }, + "reply_to_post_number": { + "type": ["null", "string"] + }, + "quote_count": { + "type": ["null", "number"] + }, + "incoming_link_count": { + "type": ["null", "number"] + }, + "reads": { + "type": ["null", "number"] + }, + "readers_count": { + "type": ["null", "number"] + }, + "score": { + "type": ["null", "number"] + }, + "topic_id": { + "type": ["null", "number"] + }, + "topic_slug": { + "type": ["null", "string"] + }, + "topic_title": { + "type": ["null", "string"] + }, + "topic_html_title": { + "type": ["null", "string"] + }, + "category_id": { + "type": ["null", "number"] + } + } +} diff --git a/source-discourse-fetcher/source_discourse_fetcher/schemas/user.json b/source-discourse-fetcher/source_discourse_fetcher/schemas/user.json new file mode 100644 index 0000000..1baf2b5 --- /dev/null +++ b/source-discourse-fetcher/source_discourse_fetcher/schemas/user.json @@ -0,0 +1,45 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "number"] + }, + "name": { + "type": ["null", "string"] + }, + "username": { + "type": ["null", "string"] + }, + "active": { + "type": ["null", "string"] + }, + "created_at": { + "type": ["null", "string"] + }, + "trust_level": { + "type": ["null", "number"] + }, + "title": { + "type": ["null", "string"] + }, + "time_read": { + "type": ["null", "number"] + }, + "staged": { + "type": ["null", "string"] + }, + "days_visited": { + "type": ["null", "number"] + }, + "posts_read_count": { + "type": ["null", "number"] + }, + "topics_entered": { + "type": ["null", "number"] + }, + "post_count": { + "type": ["null", "number"] + } + } +} diff --git a/source-discourse-fetcher/source_discourse_fetcher/source.py b/source-discourse-fetcher/source_discourse_fetcher/source.py new file mode 100644 index 0000000..cae4426 --- /dev/null +++ b/source-discourse-fetcher/source_discourse_fetcher/source.py @@ -0,0 +1,110 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple +import logging +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator + +logger = logging.getLogger("airbyte") + +USER_KEYS = [ + "id","name","username","active","created_at","trust_level","title","time_read" + "staged","days_visited","posts_read_count","topics_entered","post_count" + ] + +POST_KEYS = [ + "id","name","username","created_at","post_number","post_type","updated_at","reply_count" + "reply_to_post_number","quote_count","incoming_link_count","reads","score","topic_id" + "topic_slug","topic_title","topic_html_title","category_id" + ] + +class DiscourseStream(HttpStream): + + url_base = "" + primary_key = None + + def __init__(self, api_key: str, api_username: str, url: str, **kwargs): + super().__init__(**kwargs) + self.api_key = api_key + self.api_username = api_username + self.url= url + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + return None + + def request_headers( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + return { "Api-Key" : f"{self.api_key}", "Api-Username": f"{self.api_username}"} + +class User(DiscourseStream): + primary_key="user_id" + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.url}/admin/users/list/active.json" + + def parse_response( + self, + response: requests.Response, + **kwargs + ) -> Iterable[Mapping]: + data = response.json() + for elt in data: + logger.debug("Response %s", elt) + user = { key : elt.get(key) for key in USER_KEYS } + yield user + +class Post(DiscourseStream): + primary_key="post_id" + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + return f"{self.url}/posts.json" + + def parse_response( + self, + response: requests.Response, + **kwargs + ) -> Iterable[Mapping]: + data = response.json() + logger.debug("Response %s", data) + for elt in data.get("latest_posts"): + post = { key : elt.get(key) for key in POST_KEYS } + yield post + +# Source +class SourceDiscourseFetcher(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + return True, None + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + logger.info("Configuring Stream fron %s", config["url"]) + s = [ + User( + api_key = config['api-key'], + api_username = config['api-username'], + url = config['url'] + ), + Post( + api_key = config['api-key'], + api_username = config['api-username'], + url = config['url'] + ) + ] + return s diff --git a/source-discourse-fetcher/source_discourse_fetcher/spec.yaml b/source-discourse-fetcher/source_discourse_fetcher/spec.yaml new file mode 100644 index 0000000..644cdf5 --- /dev/null +++ b/source-discourse-fetcher/source_discourse_fetcher/spec.yaml @@ -0,0 +1,21 @@ +documentationUrl: https://docsurl.com +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + title: Discourse Fetcher Spec + type: object + required: + - url + - api-key + - api-username + properties: + url: + type: string + description: Discourse url + api-username: + type: string + description: Username to login + api-key: + type: string + description: API Key for Authentication + airbyte_secret: true +