From fb9a6bf821190a9e258176b32c8bda0f73e66403 Mon Sep 17 00:00:00 2001 From: Alexis Pentori Date: Mon, 22 Jan 2024 18:53:52 +0100 Subject: [PATCH] social-network-fetcher: Init connector Signed-off-by: Alexis Pentori --- source-social-network-fetcher/Dockerfile | 8 ++ source-social-network-fetcher/README.md | 74 ++++++++++ source-social-network-fetcher/main.py | 8 ++ source-social-network-fetcher/metadata.yaml | 25 ++++ .../requirements.txt | 1 + .../sample_files/config-example.json | 15 ++ .../sample_files/configured_catalog.json | 33 +++++ source-social-network-fetcher/setup.py | 35 +++++ .../source_social_network_fetcher/__init__.py | 8 ++ .../source_social_network_fetcher/run.py | 13 ++ .../schemas/twitter_account_data.json | 54 +++++++ .../schemas/twitter_tweet.json | 48 +++++++ .../source_social_network_fetcher/source.py | 136 ++++++++++++++++++ .../source_social_network_fetcher/spec.yaml | 18 +++ 14 files changed, 476 insertions(+) create mode 100644 source-social-network-fetcher/Dockerfile create mode 100644 source-social-network-fetcher/README.md create mode 100644 source-social-network-fetcher/main.py create mode 100644 source-social-network-fetcher/metadata.yaml create mode 100644 source-social-network-fetcher/requirements.txt create mode 100644 source-social-network-fetcher/sample_files/config-example.json create mode 100644 source-social-network-fetcher/sample_files/configured_catalog.json create mode 100644 source-social-network-fetcher/setup.py create mode 100644 source-social-network-fetcher/source_social_network_fetcher/__init__.py create mode 100644 source-social-network-fetcher/source_social_network_fetcher/run.py create mode 100644 source-social-network-fetcher/source_social_network_fetcher/schemas/twitter_account_data.json create mode 100644 source-social-network-fetcher/source_social_network_fetcher/schemas/twitter_tweet.json create mode 100644 source-social-network-fetcher/source_social_network_fetcher/source.py create mode 100644 source-social-network-fetcher/source_social_network_fetcher/spec.yaml diff --git a/source-social-network-fetcher/Dockerfile b/source-social-network-fetcher/Dockerfile new file mode 100644 index 0000000..0d1e900 --- /dev/null +++ b/source-social-network-fetcher/Dockerfile @@ -0,0 +1,8 @@ +FROM airbyte/python-connector-base:1.1.0 + +COPY . ./airbyte/integration_code +RUN pip install ./airbyte/integration_code + +# The entrypoint and default env vars are already set in the base image +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] diff --git a/source-social-network-fetcher/README.md b/source-social-network-fetcher/README.md new file mode 100644 index 0000000..7ff564a --- /dev/null +++ b/source-social-network-fetcher/README.md @@ -0,0 +1,74 @@ +# Social Network Fetcher Source + +This is the repository for fetching Social Network information, written in Python. + +## Todos + +* [ ] Implements first version based on original script + * [ ] Fetch the Data users of each count. + * [ ] Fetch Tweets details +* [ ] Improve version: + * limit the data fetching based on input date + + +## Usage + +This connector fetch information from different social network based on their API: + +* Twitter: + * Tweets reaction + * [...] + +### Configuration + +The connector takes the following input: + +```yaml +twitter: + - API-KEY + - Account List +``` + +### Output + +The connector will return the following: + + +## Local development + +### Prerequisites + +#### Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` +``` +source .venv/bin/activate +pip install -r requirements.txt +``` + +### Locally running the connector +``` +python main.py spec +python main.py check --config sample_files/coin_list.json +python main.py discover --config sample_files/coin_list.json +python main.py read --config sample_files/coin_list.json --catalog sample_files/configured_catalog.json +``` + +### Locally running the connector docker image + +```bash +docker build -t airbyte/social-network-fetcher:dev . +# Running the spec command against your patched connector +docker run airbyte/social-network-fetcher:dev spec +```` + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/social-network-fetcher:dev spec +docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/social-network-fetcher:dev check --config /sample_files/coin_list.json +docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/social-network-fetcher:dev discover --config /sample_files/coin_list.json +docker run --rm -v $(pwd)/sample_files:/sample_files -v $(pwd)/sample_files:/sample_files airbyte/social-network-fetcher:dev read --config /sample_files/coin_list.json --catalog /sample_files/configured_catalog.json +``` diff --git a/source-social-network-fetcher/main.py b/source-social-network-fetcher/main.py new file mode 100644 index 0000000..55e6f57 --- /dev/null +++ b/source-social-network-fetcher/main.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from source_social_network_fetcher.run import run + +if __name__ == "__main__": + run() diff --git a/source-social-network-fetcher/metadata.yaml b/source-social-network-fetcher/metadata.yaml new file mode 100644 index 0000000..73b95d2 --- /dev/null +++ b/source-social-network-fetcher/metadata.yaml @@ -0,0 +1,25 @@ +data: + allowedHosts: + registries: + oss: + enabled: false + cloud: + enabled: false + connectorBuildOptions: + baseImage: docker.io/airbyte/python-connector-base:1.0.0@sha256:dd17e347fbda94f7c3abff539be298a65af2d7fc27a307d89297df1081a45c27 + connectorSubtype: api + connectorType: source + definitionId: 1c448bfb-8950-478c-9ae0-f03aaaf4e920 + dockerImageTag: 0.0.2 + dockerRepository: harbor.status.im/status-im/airbyte/source-social-network-fetcher + githubIssueLabel: source-social-network-fetcher + icon: social-network-fetcher.svg + license: MIT + name: Social Network Fetcher + releaseDate: TODO + supportLevel: community + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/sources/social-network-fetcher + tags: + - language:python +metadataSpecVersion: "1.0" diff --git a/source-social-network-fetcher/requirements.txt b/source-social-network-fetcher/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/source-social-network-fetcher/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/source-social-network-fetcher/sample_files/config-example.json b/source-social-network-fetcher/sample_files/config-example.json new file mode 100644 index 0000000..1461bf9 --- /dev/null +++ b/source-social-network-fetcher/sample_files/config-example.json @@ -0,0 +1,15 @@ +{ + "api_key": "some_key", + "accounts": [ + "Logos_network", + "Codex_storage", + "Waku_org", + "ethnimbus", + "ac1d_info", + "HashingItOutPod", + "vacp2p", + "InstituteFT" + ], + "start_time": "2024-01-01", + "stop_time": "2024-01-26" +} diff --git a/source-social-network-fetcher/sample_files/configured_catalog.json b/source-social-network-fetcher/sample_files/configured_catalog.json new file mode 100644 index 0000000..47475f2 --- /dev/null +++ b/source-social-network-fetcher/sample_files/configured_catalog.json @@ -0,0 +1,33 @@ +{ + "streams": [ + { + "stream": { + "name": "twitter_account_data", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "twitter_tweet", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + } + + ] +} diff --git a/source-social-network-fetcher/setup.py b/source-social-network-fetcher/setup.py new file mode 100644 index 0000000..d2800b8 --- /dev/null +++ b/source-social-network-fetcher/setup.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.2", +] + +TEST_REQUIREMENTS = [ + "requests-mock~=1.9.3", + "pytest~=6.2", + "pytest-mock~=3.6.1", + "connector-acceptance-test", +] + +setup( + name="source_social_network_fetcher", + description="Source implementation for Social Network Fetcher.", + author="Status", + author_email="devops@status.im", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, + entry_points={ + "console_scripts": [ + "source-social-network-fetcher=source_social_network_fetcher.run:run", + ], + }, +) diff --git a/source-social-network-fetcher/source_social_network_fetcher/__init__.py b/source-social-network-fetcher/source_social_network_fetcher/__init__.py new file mode 100644 index 0000000..6ff2eb4 --- /dev/null +++ b/source-social-network-fetcher/source_social_network_fetcher/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceSocialNetworkFetcher + +__all__ = ["SourceSocialNetworkFetcher"] diff --git a/source-social-network-fetcher/source_social_network_fetcher/run.py b/source-social-network-fetcher/source_social_network_fetcher/run.py new file mode 100644 index 0000000..a752963 --- /dev/null +++ b/source-social-network-fetcher/source_social_network_fetcher/run.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from .source import SourceSocialNetworkFetcher + +def run(): + source = SourceSocialNetworkFetcher() + launch(source, sys.argv[1:]) diff --git a/source-social-network-fetcher/source_social_network_fetcher/schemas/twitter_account_data.json b/source-social-network-fetcher/source_social_network_fetcher/schemas/twitter_account_data.json new file mode 100644 index 0000000..f10226a --- /dev/null +++ b/source-social-network-fetcher/source_social_network_fetcher/schemas/twitter_account_data.json @@ -0,0 +1,54 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "account_id": { + "type": [ + "null", + "string" + ] + }, + "account_name": { + "type": [ + "null", + "string" + ] + }, + "username": { + "type": [ + "null", + "string" + ] + }, + "tweet_count": { + "type": [ + "null", + "number" + ] + }, + "like_count": { + "type": [ + "null", + "number" + ] + }, + "following_count": { + "type": [ + "null", + "number" + ] + }, + "follower_count": { + "type": [ + "null", + "number" + ] + }, + "listed_count": { + "type": [ + "null", + "number" + ] + } + } +} diff --git a/source-social-network-fetcher/source_social_network_fetcher/schemas/twitter_tweet.json b/source-social-network-fetcher/source_social_network_fetcher/schemas/twitter_tweet.json new file mode 100644 index 0000000..586a5e2 --- /dev/null +++ b/source-social-network-fetcher/source_social_network_fetcher/schemas/twitter_tweet.json @@ -0,0 +1,48 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": [ + "null", + "string" + ] + }, + "id": { + "created_at": [ + "null", + "string" + ] + }, + "retweet_count": { + "type": [ + "null", + "number" + ] + }, + "reply_count": { + "type": [ + "null", + "number" + ] + }, + "like_count": { + "type": [ + "null", + "number" + ] + }, + "quote_count": { + "type": [ + "null", + "number" + ] + }, + "referenced_tweets": { + "type": [ + "null", + "string" + ] + } + } +} diff --git a/source-social-network-fetcher/source_social_network_fetcher/source.py b/source-social-network-fetcher/source_social_network_fetcher/source.py new file mode 100644 index 0000000..4f3aa57 --- /dev/null +++ b/source-social-network-fetcher/source_social_network_fetcher/source.py @@ -0,0 +1,136 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple +import logging +import requests +from datetime import datetime +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator + +logger = logging.getLogger("airbyte") + +class TwitterStream(HttpStream): + + url_base = "https://api.twitter.com/2/" + + def __init__(self, api_key: str=None, accounts: List=None, start_time: str = None, stop_time: str = None, **kwargs): + super().__init__(**kwargs) + self.api_key = api_key + self.accounts = accounts + self.start_time = start_time + self.stop_time = stop_time; + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + return None + +class TwitterAccountData(TwitterStream): + + primary_key = "account_id" + + @property + def use_cache(self) -> bool: + return True + + + def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]: + for account in self.accounts: + yield { + "name": account + } + + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return f"users/by/username/{stream_slice['name']}?user.fields=public_metrics" + + def request_headers( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + return { + "Authorization" : f"Bearer {self.api_key}", + "User-Agent": "v2RecentSearchPython" + } + + def parse_response( + self, + response: requests.Response, + stream_slice: Mapping[str, Any] = None, + **kwargs + ) -> Iterable[Mapping]: + logger.info("Getting data of %s account", stream_slice['name']) + logger.info("Response: %s", response.json()) + data=response.json()['data'] + yield { + "account_id": data['id'], + "username": data['username'], + "account_name": data['name'], + "tweet_count": data['public_metrics']['tweet_count'], + "like_count": data['public_metrics']['like_count'], + "following_count": data['public_metrics']['following_count'], + "follower_count": data['public_metrics']['followers_count'], + "listed_count": data['public_metrics']['listed_count'], + } + +class TwitterTweet(HttpSubStream, TwitterAccountData): + #TODO: See how to get the account ID + primary_key = "" + def __init__(self, **kwargs): + super().__init__(TwitterAccountData(**kwargs),**kwargs) + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + account_id = stream_slice.get("parent").get("account_id") + return f"users/{account_id}/tweets?tweet.fields=text,public_metrics,author_id,referenced_tweets,created_at&start_time={self.start_time}" + + def request_headers( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + return { + "Authorization" : f"Bearer {self.api_key}", + "User-Agent": "v2RecentSearchPython" + } + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + data=response.json()['data'] + logger.info("Response: %s", response.json()) + referenced_tweets="" + for t in data: + if "referenced_tweets" in t: + for rt in t.get('referenced_tweets'): + referenced_tweets += f"{rt.get('type')}:{rt.get('id')};" + yield { + "id": t['id'], + "created_at": t.get('created_at'), + "retweet_count": t.get('public_metrics').get('retweet_count'), + "reply_count": t.get('public_metrics').get('reply_count'), + "like_count": t.get('public_metrics').get('like_count'), + "quote_count": t.get('public_metrics').get('quote_count'), + "referenced_tweets": referenced_tweets + } + + + +# Source +class SourceSocialNetworkFetcher(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + return True, None + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + return [ + TwitterAccountData( + api_key=config['api_key'], + accounts=config['accounts']), + TwitterTweet( + api_key=config['api_key'], + accounts=config['accounts'], + start_time=config['start_time'], + stop_time=datetime.now().isoformat()) + ] diff --git a/source-social-network-fetcher/source_social_network_fetcher/spec.yaml b/source-social-network-fetcher/source_social_network_fetcher/spec.yaml new file mode 100644 index 0000000..db56885 --- /dev/null +++ b/source-social-network-fetcher/source_social_network_fetcher/spec.yaml @@ -0,0 +1,18 @@ +documentationUrl: https://docsurl.com +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + title: Social Network Fetcher + type: object + required: + - api_key + - accounts + properties: + api_key: + type: string + description: API Key to authentify to twitter + airbyte_secret: true + accounts: + type: array + description: List of accounts needing to be extracted + items: + type: string