discord: init connector

Signed-off-by: Alexis Pentori <alexis@status.im>
This commit is contained in:
Alexis Pentori 2024-03-07 13:58:44 +01:00
parent 493256269f
commit 4618e2b148
No known key found for this signature in database
GPG Key ID: 65250D2801E47A10
16 changed files with 607 additions and 0 deletions

View File

@ -0,0 +1,8 @@
FROM airbyte/python-connector-base:1.1.0
COPY . ./airbyte/integration_code
RUN pip install ./airbyte/integration_code
# The entrypoint and default env vars are already set in the base image
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

View File

@ -0,0 +1,90 @@
# Discord Fetcher Source
This is the repository for fetching data from Discord Server, written in Python.
> *Note*: In the rest of the document, the term guild design a discord server (API term)
## Usage
This connector fetches channels and members data from servers instances.
### Configuration
The connector takes the following input:
```yaml
api_key: 'Token of the bot used'
guilds_id:
- 123456789
```
The `guild_id` can be found in the `server setting` > `Widget` > `Server Id`
The `api-key` is the token of a bot account associated to the servers.
#### Bot Configuration
In order to access the API endpoints of the server, the connector must be authentified as a discord BOT with the priviledge intent `SERVER MEMBERS INTENT`.
For that:
1. Create a Discord Application at https://discord.com/developers/applications
2. Generate a BOT account:
* Go to the OAuth2 page on the application settings:
* https://discord.com/developers/applications/<app-id>/oauth2)
* Store the Client ID and Client Secret in a password manager
* Select `bot` in the `Oauth2 URL generator` scope and copy the url at the end of the page.
* Visite the URL and select the Discord Server you want to log into.
3. Configure the BOT token
* Go to the `Bot` page of the application settings
- https://discord.com/developers/applications/<app-id>/bot
* Store the bot token in a password manager (the `api-key`)
* Select `SERVER MEMBERS INTENT` in the `Privileged Gateway Intents` category. It will give the bot access to the `members` endpoint.
### Output
The connector will return the following:
- `guild`: List of server information based on the `guilds_id` values.
- `guild_channel`: List of channel for each discord server (contains partial data).
- `channel`: List of channel for each discord server.
- `members`: List of user on the Discord server.
## Local development
### Prerequisites
#### Activate Virtual Environment and install dependencies
From this connector directory, create a virtual environment:
```
python -m venv .venv
```
```
source .venv/bin/activate
pip install -r requirements.txt
```
### Locally running the connector
```
python main.py spec
python main.py check --config sample_files/config-example.json
python main.py discover --config sample_files/config-example.json
python main.py read --config sample_files/config-example.json --catalog sample_files/configured_catalog.json
```
### Locally running the connector docker image
```bash
docker build -t airbyte/twitter-fetcher:dev .
# Running the spec command against your patched connector
docker run airbyte/twitter-fetcher:dev spec
````
#### Run
Then run any of the connector commands as follows:
```
docker run --rm airbyte/twitter-fetcher:dev spec
docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev check --config /sample_files/config-example.json
docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev discover --config /sample_files/config-example.json
docker run --rm -v $(pwd)/sample_files:/sample_files -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev read --config /sample_files/config-example.json --catalog /sample_files/configured_catalog.json
```

View File

@ -0,0 +1,8 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
from source_discord_fetcher.run import run
if __name__ == "__main__":
run()

View File

@ -0,0 +1,32 @@
data:
allowedHosts:
registries:
oss:
enabled: true
cloud:
enabled: false
remoteRegistries:
pypi:
enabled: true
packageName: airbyte-source-discord-fetcher
connectorBuildOptions:
# Please update to the latest version of the connector base image.
# https://hub.docker.com/r/airbyte/python-connector-base
# Please use the full address with sha256 hash to guarantee build reproducibility.
baseImage: docker.io/airbyte/python-connector-base:1.0.0@sha256:dd17e347fbda94f7c3abff539be298a65af2d7fc27a307d89297df1081a45c27
connectorSubtype: api
connectorType: source
definitionId: b62a9143-1b59-41b2-9942-bae709e7da6b
dockerImageTag: 1.0.0
dockerRepository: harbor.status.im/status-im/airbyte/source-discord-fetcher
githubIssueLabel: source-discord-fetcher
icon: discord-fetcher.svg
license: MIT
name: Discord Fetcher
releaseDate: TODO
supportLevel: community
releaseStage: alpha
documentationUrl: https://docs.airbyte.com/integrations/sources/discord-fetcher
tags:
- language:python
metadataSpecVersion: "1.0"

View File

@ -0,0 +1 @@
-e .

View File

@ -0,0 +1,4 @@
{
"api_key": "some-bot-token",
"guilds_id": ["some-guild-id"]
}

View File

@ -0,0 +1,61 @@
{
"streams": [
{
"stream": {
"name": "guild",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
"stream": {
"name": "guild_channel",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
"stream": {
"name": "channel",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
"stream": {
"name": "member",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
}
]
}

View File

@ -0,0 +1,35 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
from setuptools import find_packages, setup
MAIN_REQUIREMENTS = [
"airbyte-cdk~=0.2",
]
TEST_REQUIREMENTS = [
"requests-mock~=1.9.3",
"pytest~=6.2",
"pytest-mock~=3.6.1",
"connector-acceptance-test",
]
setup(
name="source_discord_fetcher",
description="Source implementation for Discord Fetcher.",
author="Airbyte",
author_email="contact@airbyte.io",
packages=find_packages(),
install_requires=MAIN_REQUIREMENTS,
package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]},
extras_require={
"tests": TEST_REQUIREMENTS,
},
entry_points={
"console_scripts": [
"source-discord-fetcher=source_discord_fetcher.run:run",
],
},
)

View File

@ -0,0 +1,8 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
from .source import SourceDiscordFetcher
__all__ = ["SourceDiscordFetcher"]

View File

@ -0,0 +1,13 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import sys
from airbyte_cdk.entrypoint import launch
from .source import SourceDiscordFetcher
def run():
source = SourceDiscordFetcher()
launch(source, sys.argv[1:])

View File

@ -0,0 +1,48 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"id": {
"type": ["null", "number"]
},
"type": {
"type": ["null", "string"]
},
"guild_id": {
"type": ["null", "number"]
},
"position": {
"type": ["null", "number"]
},
"name": {
"type": ["null", "string"]
},
"topic": {
"type": ["null", "string"]
},
"last_message_id": {
"type": ["null", "number"]
},
"managed":{
"type": ["null", "boolean"]
},
"parent_id":{
"type": ["null", "number"]
},
"last_pin_timestamp":{
"type": ["null", "string"]
},
"message_count":{
"type": ["null", "number"]
},
"member_count":{
"type": ["null", "number"]
},
"flags":{
"type": ["null", "number"]
},
"total_message_sent":{
"type": ["null", "number"]
}
}
}

View File

@ -0,0 +1,27 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"id": {
"type": [ "null", "number" ]
},
"name": {
"type": [ "null", "string" ]
},
"owner_id": {
"type": [ "null", "number" ]
},
"roles": {
"type": [ "null", "string" ]
},
"chain": {
"type": [ "null", "string" ]
},
"description": {
"type": [ "null", "string" ]
},
"max_members": {
"type": [ "null", "number" ]
}
}
}

View File

@ -0,0 +1,48 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"id": {
"type": ["null", "number"]
},
"type": {
"type": ["null", "string"]
},
"guild_id": {
"type": ["null", "number"]
},
"position": {
"type": ["null", "number"]
},
"name": {
"type": ["null", "string"]
},
"topic": {
"type": ["null", "string"]
},
"last_message_id": {
"type": ["null", "number"]
},
"managed":{
"type": ["null", "boolean"]
},
"parent_id":{
"type": ["null", "number"]
},
"last_pin_timestamp":{
"type": ["null", "string"]
},
"message_count":{
"type": ["null", "number"]
},
"member_count":{
"type": ["null", "number"]
},
"flags":{
"type": ["null", "number"]
},
"total_message_sent":{
"type": ["null", "number"]
}
}
}

View File

@ -0,0 +1,72 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"id": {
"type": [
"null",
"number"
]
},
"username": {
"type": [
"null",
"string"
]
},
"discriminator": {
"type": [
"null",
"string"
]
},
"global_name": {
"type": [
"null",
"string"
]
},
"bot": {
"type": [
"null",
"boolean"
]
},
"mfa_enabled": {
"type": [
"null",
"boolean"
]
},
"verified":{
"type": [
"null",
"boolean"
]
},
"email":{
"type": [
"null",
"string"
]
},
"prenium_type":{
"type": [
"null",
"integer"
]
},
"public_flags":{
"type": [
"null",
"integer"
]
},
"guild_id":{
"type": [
"null",
"integer"
]
}
}
}

View File

@ -0,0 +1,134 @@
#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
from abc import ABC
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple
import logging
import requests
from airbyte_cdk.sources import AbstractSource
from airbyte_cdk.sources.streams import Stream
from airbyte_cdk.sources.streams.http import HttpSubStream, HttpStream
from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator
logger = logging.getLogger("airbyte")
GUILD_KEYS = ["id", "name", "owner_id", "roles", "description", "chain", "max_members"]
CHANNEL_KEYS = ["id", "type", "guild_id", "position", "name", "topic", "last_message_id", "managed", "parent_id", "last_pin_timestamp", "message_count", "member_count", "falgs", "total_message_sent"]
USER_KEYS = [ "id", "username", "discriminator", "global_name", "bot", "mfa_enabled", "verified", "email", "premium_type", "public_flags"]
# Basic full refresh stream
class DiscordFetcherStream(HttpStream, ABC):
# TODO: Fill in the url base. Required.
url_base = "https://discord.com/api/"
def __init__(self, guilds_id: str, endpoint: str="", **kwargs):
super().__init__(**kwargs)
self.guilds_id = guilds_id
self.endpoint = endpoint
def path(
self,
stream_state: Mapping[str, Any] = None,
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None
) -> str:
return f"guilds/{stream_slice['guild_id']}{self.endpoint}"
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
for guild_id in self.guilds_id:
yield {
"guild_id": guild_id
}
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
return None
class Guild(DiscordFetcherStream):
primary_key = "guild_id"
def parse_response(
self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs
) -> Iterable[Mapping]:
logger.debug("Response: %s", response.json())
data=response.json()
guild = { key : data.get(key) for key in GUILD_KEYS }
yield guild
class GuildChannel(DiscordFetcherStream):
primary_key="channel_id"
use_cache=True
def parse_response(
self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs
) -> Iterable[Mapping]:
logger.debug("Response: %s", response.json())
data=response.json()
#Fixme For some reason the HttpSubstream provoke a call to the GuildChannel endpoint that return a single elt
# Ignore this call for the meantime
if type(data) is dict:
logger.info("Weird case due to the Substream")
return
for elt in data:
channel = { key : elt.get(key) for key in CHANNEL_KEYS }
yield channel
class Channel(HttpSubStream, GuildChannel):
primary_key="channel_id"
def __init__(self,**kwargs):
super().__init__(GuildChannel(**kwargs),**kwargs)
def path(
self,
stream_state: Mapping[str, Any] = None,
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None
) -> str:
logger.info("Parent: %s", stream_slice.get('parent'))
channel_id = stream_slice.get('parent').get('id')
return f"channels/{channel_id}"
def parse_response(
self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs
) -> Iterable[Mapping]:
logger.debug("Response: %s", response.json())
data=response.json()
channel = { key : data.get(key) for key in CHANNEL_KEYS }
yield channel
class Member(DiscordFetcherStream):
primary_key="member_id"
def request_params(
self,
stream_state: Optional[Mapping[str, Any]],
stream_slice: Optional[Mapping[str, Any]] = None,
next_page_token: Optional[Mapping[str, Any]] = None,
) -> MutableMapping[str, Any]:
return {"limit": 1000}
def parse_response(
self, response: requests.Response, stream_slice: Mapping[str, Any] = None, **kwargs
) -> Iterable[Mapping]:
logger.debug("Response: %s", response.json())
data=response.json()
for elt in data:
user = { key : elt.get('user').get(key) for key in USER_KEYS }
user['guild_id']=stream_slice['guild_id']
yield user
# Source
class SourceDiscordFetcher(AbstractSource):
def check_connection(self, logger, config) -> Tuple[bool, any]:
return True, None
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
auth = TokenAuthenticator(token=config["api_key"], auth_method="Bot")
return [
Guild(guilds_id=config["guilds_id"], authenticator=auth),
GuildChannel(guilds_id=config["guilds_id"], endpoint="/channels", authenticator=auth),
Channel(guilds_id=config["guilds_id"], authenticator=auth),
Member(guilds_id=config["guilds_id"], endpoint="/members", authenticator=auth)
]

View File

@ -0,0 +1,18 @@
documentationUrl: https://docsurl.com
connectionSpecification:
$schema: http://json-schema.org/draft-07/schema#
title: Discord Fetcher Spec
type: object
required:
- api_key
- guilds_id
properties:
api_key:
type: string
description: Token to authentify as a bot
airbyte_secret: true
guilds_id:
type: array
description: IDs of Server to querry
items:
type: string