From 4618e2b1480b037539f31ef779765efbd0d705f6 Mon Sep 17 00:00:00 2001 From: Alexis Pentori Date: Thu, 7 Mar 2024 13:58:44 +0100 Subject: [PATCH] discord: init connector Signed-off-by: Alexis Pentori --- source-discord-fetcher/Dockerfile | 8 ++ source-discord-fetcher/README.md | 90 ++++++++++++ source-discord-fetcher/main.py | 8 ++ source-discord-fetcher/metadata.yaml | 32 +++++ source-discord-fetcher/requirements.txt | 1 + .../sample_files/config-example.json | 4 + .../sample_files/configured_catalog.json | 61 ++++++++ source-discord-fetcher/setup.py | 35 +++++ .../source_discord_fetcher/__init__.py | 8 ++ .../source_discord_fetcher/run.py | 13 ++ .../schemas/channel.json | 48 +++++++ .../source_discord_fetcher/schemas/guild.json | 27 ++++ .../schemas/guild_channel.json | 48 +++++++ .../schemas/member.json | 72 ++++++++++ .../source_discord_fetcher/source.py | 134 ++++++++++++++++++ .../source_discord_fetcher/spec.yaml | 18 +++ 16 files changed, 607 insertions(+) create mode 100644 source-discord-fetcher/Dockerfile create mode 100644 source-discord-fetcher/README.md create mode 100644 source-discord-fetcher/main.py create mode 100644 source-discord-fetcher/metadata.yaml create mode 100644 source-discord-fetcher/requirements.txt create mode 100644 source-discord-fetcher/sample_files/config-example.json create mode 100644 source-discord-fetcher/sample_files/configured_catalog.json create mode 100644 source-discord-fetcher/setup.py create mode 100644 source-discord-fetcher/source_discord_fetcher/__init__.py create mode 100644 source-discord-fetcher/source_discord_fetcher/run.py create mode 100644 source-discord-fetcher/source_discord_fetcher/schemas/channel.json create mode 100644 source-discord-fetcher/source_discord_fetcher/schemas/guild.json create mode 100644 source-discord-fetcher/source_discord_fetcher/schemas/guild_channel.json create mode 100644 source-discord-fetcher/source_discord_fetcher/schemas/member.json create mode 100644 source-discord-fetcher/source_discord_fetcher/source.py create mode 100644 source-discord-fetcher/source_discord_fetcher/spec.yaml diff --git a/source-discord-fetcher/Dockerfile b/source-discord-fetcher/Dockerfile new file mode 100644 index 0000000..0d1e900 --- /dev/null +++ b/source-discord-fetcher/Dockerfile @@ -0,0 +1,8 @@ +FROM airbyte/python-connector-base:1.1.0 + +COPY . ./airbyte/integration_code +RUN pip install ./airbyte/integration_code + +# The entrypoint and default env vars are already set in the base image +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] diff --git a/source-discord-fetcher/README.md b/source-discord-fetcher/README.md new file mode 100644 index 0000000..51523ce --- /dev/null +++ b/source-discord-fetcher/README.md @@ -0,0 +1,90 @@ +# Discord Fetcher Source + +This is the repository for fetching data from Discord Server, written in Python. + +> *Note*: In the rest of the document, the term guild design a discord server (API term) + +## Usage + +This connector fetches channels and members data from servers instances. + +### Configuration + +The connector takes the following input: + +```yaml +api_key: 'Token of the bot used' +guilds_id: + - 123456789 +``` + +The `guild_id` can be found in the `server setting` > `Widget` > `Server Id` +The `api-key` is the token of a bot account associated to the servers. + + +#### Bot Configuration + + +In order to access the API endpoints of the server, the connector must be authentified as a discord BOT with the priviledge intent `SERVER MEMBERS INTENT`. + +For that: +1. Create a Discord Application at https://discord.com/developers/applications +2. Generate a BOT account: + * Go to the OAuth2 page on the application settings: + * https://discord.com/developers/applications//oauth2) + * Store the Client ID and Client Secret in a password manager + * Select `bot` in the `Oauth2 URL generator` scope and copy the url at the end of the page. + * Visite the URL and select the Discord Server you want to log into. +3. Configure the BOT token + * Go to the `Bot` page of the application settings + - https://discord.com/developers/applications//bot + * Store the bot token in a password manager (the `api-key`) + * Select `SERVER MEMBERS INTENT` in the `Privileged Gateway Intents` category. It will give the bot access to the `members` endpoint. + + +### Output + +The connector will return the following: +- `guild`: List of server information based on the `guilds_id` values. +- `guild_channel`: List of channel for each discord server (contains partial data). +- `channel`: List of channel for each discord server. +- `members`: List of user on the Discord server. + +## Local development + +### Prerequisites + +#### Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` +``` +source .venv/bin/activate +pip install -r requirements.txt +``` + +### Locally running the connector +``` +python main.py spec +python main.py check --config sample_files/config-example.json +python main.py discover --config sample_files/config-example.json +python main.py read --config sample_files/config-example.json --catalog sample_files/configured_catalog.json +``` + +### Locally running the connector docker image + +```bash +docker build -t airbyte/twitter-fetcher:dev . +# Running the spec command against your patched connector +docker run airbyte/twitter-fetcher:dev spec +```` + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/twitter-fetcher:dev spec +docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev check --config /sample_files/config-example.json +docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev discover --config /sample_files/config-example.json +docker run --rm -v $(pwd)/sample_files:/sample_files -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev read --config /sample_files/config-example.json --catalog /sample_files/configured_catalog.json +``` diff --git a/source-discord-fetcher/main.py b/source-discord-fetcher/main.py new file mode 100644 index 0000000..b7da14d --- /dev/null +++ b/source-discord-fetcher/main.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from source_discord_fetcher.run import run + +if __name__ == "__main__": + run() diff --git a/source-discord-fetcher/metadata.yaml b/source-discord-fetcher/metadata.yaml new file mode 100644 index 0000000..37c5d70 --- /dev/null +++ b/source-discord-fetcher/metadata.yaml @@ -0,0 +1,32 @@ +data: + allowedHosts: + registries: + oss: + enabled: true + cloud: + enabled: false + remoteRegistries: + pypi: + enabled: true + packageName: airbyte-source-discord-fetcher + connectorBuildOptions: + # Please update to the latest version of the connector base image. + # https://hub.docker.com/r/airbyte/python-connector-base + # Please use the full address with sha256 hash to guarantee build reproducibility. + baseImage: docker.io/airbyte/python-connector-base:1.0.0@sha256:dd17e347fbda94f7c3abff539be298a65af2d7fc27a307d89297df1081a45c27 + connectorSubtype: api + connectorType: source + definitionId: b62a9143-1b59-41b2-9942-bae709e7da6b + dockerImageTag: 1.0.0 + dockerRepository: harbor.status.im/status-im/airbyte/source-discord-fetcher + githubIssueLabel: source-discord-fetcher + icon: discord-fetcher.svg + license: MIT + name: Discord Fetcher + releaseDate: TODO + supportLevel: community + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/sources/discord-fetcher + tags: + - language:python +metadataSpecVersion: "1.0" diff --git a/source-discord-fetcher/requirements.txt b/source-discord-fetcher/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/source-discord-fetcher/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/source-discord-fetcher/sample_files/config-example.json b/source-discord-fetcher/sample_files/config-example.json new file mode 100644 index 0000000..754bf83 --- /dev/null +++ b/source-discord-fetcher/sample_files/config-example.json @@ -0,0 +1,4 @@ +{ + "api_key": "some-bot-token", + "guilds_id": ["some-guild-id"] +} diff --git a/source-discord-fetcher/sample_files/configured_catalog.json b/source-discord-fetcher/sample_files/configured_catalog.json new file mode 100644 index 0000000..a5e269c --- /dev/null +++ b/source-discord-fetcher/sample_files/configured_catalog.json @@ -0,0 +1,61 @@ +{ + "streams": [ + { + "stream": { + "name": "guild", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "guild_channel", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "channel", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "member", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + } + + ] +} diff --git a/source-discord-fetcher/setup.py b/source-discord-fetcher/setup.py new file mode 100644 index 0000000..38aecf1 --- /dev/null +++ b/source-discord-fetcher/setup.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.2", +] + +TEST_REQUIREMENTS = [ + "requests-mock~=1.9.3", + "pytest~=6.2", + "pytest-mock~=3.6.1", + "connector-acceptance-test", +] + +setup( + name="source_discord_fetcher", + description="Source implementation for Discord Fetcher.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, + entry_points={ + "console_scripts": [ + "source-discord-fetcher=source_discord_fetcher.run:run", + ], + }, +) diff --git a/source-discord-fetcher/source_discord_fetcher/__init__.py b/source-discord-fetcher/source_discord_fetcher/__init__.py new file mode 100644 index 0000000..2cb5354 --- /dev/null +++ b/source-discord-fetcher/source_discord_fetcher/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceDiscordFetcher + +__all__ = ["SourceDiscordFetcher"] diff --git a/source-discord-fetcher/source_discord_fetcher/run.py b/source-discord-fetcher/source_discord_fetcher/run.py new file mode 100644 index 0000000..3b18abd --- /dev/null +++ b/source-discord-fetcher/source_discord_fetcher/run.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from .source import SourceDiscordFetcher + +def run(): + source = SourceDiscordFetcher() + launch(source, sys.argv[1:]) diff --git a/source-discord-fetcher/source_discord_fetcher/schemas/channel.json b/source-discord-fetcher/source_discord_fetcher/schemas/channel.json new file mode 100644 index 0000000..f65c497 --- /dev/null +++ b/source-discord-fetcher/source_discord_fetcher/schemas/channel.json @@ -0,0 +1,48 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "number"] + }, + "type": { + "type": ["null", "string"] + }, + "guild_id": { + "type": ["null", "number"] + }, + "position": { + "type": ["null", "number"] + }, + "name": { + "type": ["null", "string"] + }, + "topic": { + "type": ["null", "string"] + }, + "last_message_id": { + "type": ["null", "number"] + }, + "managed":{ + "type": ["null", "boolean"] + }, + "parent_id":{ + "type": ["null", "number"] + }, + "last_pin_timestamp":{ + "type": ["null", "string"] + }, + "message_count":{ + "type": ["null", "number"] + }, + "member_count":{ + "type": ["null", "number"] + }, + "flags":{ + "type": ["null", "number"] + }, + "total_message_sent":{ + "type": ["null", "number"] + } + } +} diff --git a/source-discord-fetcher/source_discord_fetcher/schemas/guild.json b/source-discord-fetcher/source_discord_fetcher/schemas/guild.json new file mode 100644 index 0000000..bfe7a53 --- /dev/null +++ b/source-discord-fetcher/source_discord_fetcher/schemas/guild.json @@ -0,0 +1,27 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": [ "null", "number" ] + }, + "name": { + "type": [ "null", "string" ] + }, + "owner_id": { + "type": [ "null", "number" ] + }, + "roles": { + "type": [ "null", "string" ] + }, + "chain": { + "type": [ "null", "string" ] + }, + "description": { + "type": [ "null", "string" ] + }, + "max_members": { + "type": [ "null", "number" ] + } + } +} diff --git a/source-discord-fetcher/source_discord_fetcher/schemas/guild_channel.json b/source-discord-fetcher/source_discord_fetcher/schemas/guild_channel.json new file mode 100644 index 0000000..39b53a6 --- /dev/null +++ b/source-discord-fetcher/source_discord_fetcher/schemas/guild_channel.json @@ -0,0 +1,48 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "number"] + }, + "type": { + "type": ["null", "string"] + }, + "guild_id": { + "type": ["null", "number"] + }, + "position": { + "type": ["null", "number"] + }, + "name": { + "type": ["null", "string"] + }, + "topic": { + "type": ["null", "string"] + }, + "last_message_id": { + "type": ["null", "number"] + }, + "managed":{ + "type": ["null", "boolean"] + }, + "parent_id":{ + "type": ["null", "number"] + }, + "last_pin_timestamp":{ + "type": ["null", "string"] + }, + "message_count":{ + "type": ["null", "number"] + }, + "member_count":{ + "type": ["null", "number"] + }, + "flags":{ + "type": ["null", "number"] + }, + "total_message_sent":{ + "type": ["null", "number"] + } + } +} diff --git a/source-discord-fetcher/source_discord_fetcher/schemas/member.json b/source-discord-fetcher/source_discord_fetcher/schemas/member.json new file mode 100644 index 0000000..37d07f6 --- /dev/null +++ b/source-discord-fetcher/source_discord_fetcher/schemas/member.json @@ -0,0 +1,72 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": [ + "null", + "number" + ] + }, + "username": { + "type": [ + "null", + "string" + ] + }, + "discriminator": { + "type": [ + "null", + "string" + ] + }, + "global_name": { + "type": [ + "null", + "string" + ] + }, + "bot": { + "type": [ + "null", + "boolean" + ] + }, + "mfa_enabled": { + "type": [ + "null", + "boolean" + ] + }, + "verified":{ + "type": [ + "null", + "boolean" + ] + }, + "email":{ + "type": [ + "null", + "string" + ] + }, + "prenium_type":{ + "type": [ + "null", + "integer" + ] + }, + "public_flags":{ + "type": [ + "null", + "integer" + ] + }, + "guild_id":{ + "type": [ + "null", + "integer" + ] + } + } +} diff --git a/source-discord-fetcher/source_discord_fetcher/source.py b/source-discord-fetcher/source_discord_fetcher/source.py new file mode 100644 index 0000000..e2d55e4 --- /dev/null +++ b/source-discord-fetcher/source_discord_fetcher/source.py @@ -0,0 +1,134 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple +import logging +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpSubStream, HttpStream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator + +logger = logging.getLogger("airbyte") + +GUILD_KEYS = ["id", "name", "owner_id", "roles", "description", "chain", "max_members"] +CHANNEL_KEYS = ["id", "type", "guild_id", "position", "name", "topic", "last_message_id", "managed", "parent_id", "last_pin_timestamp", "message_count", "member_count", "falgs", "total_message_sent"] +USER_KEYS = [ "id", "username", "discriminator", "global_name", "bot", "mfa_enabled", "verified", "email", "premium_type", "public_flags"] +# Basic full refresh stream +class DiscordFetcherStream(HttpStream, ABC): + # TODO: Fill in the url base. Required. + url_base = "https://discord.com/api/" + + def __init__(self, guilds_id: str, endpoint: str="", **kwargs): + super().__init__(**kwargs) + self.guilds_id = guilds_id + self.endpoint = endpoint + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + return f"guilds/{stream_slice['guild_id']}{self.endpoint}" + + def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: + for guild_id in self.guilds_id: + yield { + "guild_id": guild_id + } + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + return None + +class Guild(DiscordFetcherStream): + primary_key = "guild_id" + + def parse_response( + self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs + ) -> Iterable[Mapping]: + logger.debug("Response: %s", response.json()) + data=response.json() + guild = { key : data.get(key) for key in GUILD_KEYS } + yield guild + + +class GuildChannel(DiscordFetcherStream): + primary_key="channel_id" + + use_cache=True + + def parse_response( + self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs + ) -> Iterable[Mapping]: + logger.debug("Response: %s", response.json()) + data=response.json() + #Fixme For some reason the HttpSubstream provoke a call to the GuildChannel endpoint that return a single elt + # Ignore this call for the meantime + if type(data) is dict: + logger.info("Weird case due to the Substream") + return + for elt in data: + channel = { key : elt.get(key) for key in CHANNEL_KEYS } + yield channel + + +class Channel(HttpSubStream, GuildChannel): + primary_key="channel_id" + def __init__(self,**kwargs): + super().__init__(GuildChannel(**kwargs),**kwargs) + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + logger.info("Parent: %s", stream_slice.get('parent')) + channel_id = stream_slice.get('parent').get('id') + return f"channels/{channel_id}" + + def parse_response( + self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs + ) -> Iterable[Mapping]: + logger.debug("Response: %s", response.json()) + data=response.json() + channel = { key : data.get(key) for key in CHANNEL_KEYS } + yield channel + +class Member(DiscordFetcherStream): + primary_key="member_id" + + def request_params( + self, + stream_state: Optional[Mapping[str, Any]], + stream_slice: Optional[Mapping[str, Any]] = None, + next_page_token: Optional[Mapping[str, Any]] = None, + ) -> MutableMapping[str, Any]: + return {"limit": 1000} + + + def parse_response( + self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs + ) -> Iterable[Mapping]: + logger.debug("Response: %s", response.json()) + data=response.json() + for elt in data: + user = { key : elt.get('user').get(key) for key in USER_KEYS } + user['guild_id']=stream_slice['guild_id'] + yield user + +# Source +class SourceDiscordFetcher(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + return True, None + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = TokenAuthenticator(token=config["api_key"], auth_method="Bot") + return [ + Guild(guilds_id=config["guilds_id"], authenticator=auth), + GuildChannel(guilds_id=config["guilds_id"], endpoint="/channels", authenticator=auth), + Channel(guilds_id=config["guilds_id"], authenticator=auth), + Member(guilds_id=config["guilds_id"], endpoint="/members", authenticator=auth) + ] diff --git a/source-discord-fetcher/source_discord_fetcher/spec.yaml b/source-discord-fetcher/source_discord_fetcher/spec.yaml new file mode 100644 index 0000000..152f47b --- /dev/null +++ b/source-discord-fetcher/source_discord_fetcher/spec.yaml @@ -0,0 +1,18 @@ +documentationUrl: https://docsurl.com +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + title: Discord Fetcher Spec + type: object + required: + - api_key + - guilds_id + properties: + api_key: + type: string + description: Token to authentify as a bot + airbyte_secret: true + guilds_id: + type: array + description: IDs of Server to querry + items: + type: string