social-network-fetcher: Init connector
Signed-off-by: Alexis Pentori <alexis@status.im>
This commit is contained in:
parent
38866cf78b
commit
fb9a6bf821
|
@ -0,0 +1,8 @@
|
||||||
|
FROM airbyte/python-connector-base:1.1.0
|
||||||
|
|
||||||
|
COPY . ./airbyte/integration_code
|
||||||
|
RUN pip install ./airbyte/integration_code
|
||||||
|
|
||||||
|
# The entrypoint and default env vars are already set in the base image
|
||||||
|
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
|
||||||
|
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
|
|
@ -0,0 +1,74 @@
|
||||||
|
# Social Network Fetcher Source
|
||||||
|
|
||||||
|
This is the repository for fetching Social Network information, written in Python.
|
||||||
|
|
||||||
|
## Todos
|
||||||
|
|
||||||
|
* [ ] Implements first version based on original script
|
||||||
|
* [ ] Fetch the Data users of each count.
|
||||||
|
* [ ] Fetch Tweets details
|
||||||
|
* [ ] Improve version:
|
||||||
|
* limit the data fetching based on input date
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
This connector fetch information from different social network based on their API:
|
||||||
|
|
||||||
|
* Twitter:
|
||||||
|
* Tweets reaction
|
||||||
|
* [...]
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
|
||||||
|
The connector takes the following input:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
twitter:
|
||||||
|
- API-KEY
|
||||||
|
- Account List
|
||||||
|
```
|
||||||
|
|
||||||
|
### Output
|
||||||
|
|
||||||
|
The connector will return the following:
|
||||||
|
|
||||||
|
|
||||||
|
## Local development
|
||||||
|
|
||||||
|
### Prerequisites
|
||||||
|
|
||||||
|
#### Activate Virtual Environment and install dependencies
|
||||||
|
From this connector directory, create a virtual environment:
|
||||||
|
```
|
||||||
|
python -m venv .venv
|
||||||
|
```
|
||||||
|
```
|
||||||
|
source .venv/bin/activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### Locally running the connector
|
||||||
|
```
|
||||||
|
python main.py spec
|
||||||
|
python main.py check --config sample_files/coin_list.json
|
||||||
|
python main.py discover --config sample_files/coin_list.json
|
||||||
|
python main.py read --config sample_files/coin_list.json --catalog sample_files/configured_catalog.json
|
||||||
|
```
|
||||||
|
|
||||||
|
### Locally running the connector docker image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build -t airbyte/social-network-fetcher:dev .
|
||||||
|
# Running the spec command against your patched connector
|
||||||
|
docker run airbyte/social-network-fetcher:dev spec
|
||||||
|
````
|
||||||
|
|
||||||
|
#### Run
|
||||||
|
Then run any of the connector commands as follows:
|
||||||
|
```
|
||||||
|
docker run --rm airbyte/social-network-fetcher:dev spec
|
||||||
|
docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/social-network-fetcher:dev check --config /sample_files/coin_list.json
|
||||||
|
docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/social-network-fetcher:dev discover --config /sample_files/coin_list.json
|
||||||
|
docker run --rm -v $(pwd)/sample_files:/sample_files -v $(pwd)/sample_files:/sample_files airbyte/social-network-fetcher:dev read --config /sample_files/coin_list.json --catalog /sample_files/configured_catalog.json
|
||||||
|
```
|
|
@ -0,0 +1,8 @@
|
||||||
|
#
|
||||||
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||||
|
#
|
||||||
|
|
||||||
|
from source_social_network_fetcher.run import run
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run()
|
|
@ -0,0 +1,25 @@
|
||||||
|
data:
|
||||||
|
allowedHosts:
|
||||||
|
registries:
|
||||||
|
oss:
|
||||||
|
enabled: false
|
||||||
|
cloud:
|
||||||
|
enabled: false
|
||||||
|
connectorBuildOptions:
|
||||||
|
baseImage: docker.io/airbyte/python-connector-base:1.0.0@sha256:dd17e347fbda94f7c3abff539be298a65af2d7fc27a307d89297df1081a45c27
|
||||||
|
connectorSubtype: api
|
||||||
|
connectorType: source
|
||||||
|
definitionId: 1c448bfb-8950-478c-9ae0-f03aaaf4e920
|
||||||
|
dockerImageTag: 0.0.2
|
||||||
|
dockerRepository: harbor.status.im/status-im/airbyte/source-social-network-fetcher
|
||||||
|
githubIssueLabel: source-social-network-fetcher
|
||||||
|
icon: social-network-fetcher.svg
|
||||||
|
license: MIT
|
||||||
|
name: Social Network Fetcher
|
||||||
|
releaseDate: TODO
|
||||||
|
supportLevel: community
|
||||||
|
releaseStage: alpha
|
||||||
|
documentationUrl: https://docs.airbyte.com/integrations/sources/social-network-fetcher
|
||||||
|
tags:
|
||||||
|
- language:python
|
||||||
|
metadataSpecVersion: "1.0"
|
|
@ -0,0 +1 @@
|
||||||
|
-e .
|
|
@ -0,0 +1,15 @@
|
||||||
|
{
|
||||||
|
"api_key": "some_key",
|
||||||
|
"accounts": [
|
||||||
|
"Logos_network",
|
||||||
|
"Codex_storage",
|
||||||
|
"Waku_org",
|
||||||
|
"ethnimbus",
|
||||||
|
"ac1d_info",
|
||||||
|
"HashingItOutPod",
|
||||||
|
"vacp2p",
|
||||||
|
"InstituteFT"
|
||||||
|
],
|
||||||
|
"start_time": "2024-01-01",
|
||||||
|
"stop_time": "2024-01-26"
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
{
|
||||||
|
"streams": [
|
||||||
|
{
|
||||||
|
"stream": {
|
||||||
|
"name": "twitter_account_data",
|
||||||
|
"json_schema": {
|
||||||
|
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||||
|
"type": "object"
|
||||||
|
},
|
||||||
|
"supported_sync_modes": [
|
||||||
|
"full_refresh", "incremental"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"sync_mode": "incremental",
|
||||||
|
"destination_sync_mode": "overwrite"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"stream": {
|
||||||
|
"name": "twitter_tweet",
|
||||||
|
"json_schema": {
|
||||||
|
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||||
|
"type": "object"
|
||||||
|
},
|
||||||
|
"supported_sync_modes": [
|
||||||
|
"full_refresh", "incremental"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"sync_mode": "incremental",
|
||||||
|
"destination_sync_mode": "overwrite"
|
||||||
|
}
|
||||||
|
|
||||||
|
]
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
#
|
||||||
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
from setuptools import find_packages, setup
|
||||||
|
|
||||||
|
MAIN_REQUIREMENTS = [
|
||||||
|
"airbyte-cdk~=0.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
TEST_REQUIREMENTS = [
|
||||||
|
"requests-mock~=1.9.3",
|
||||||
|
"pytest~=6.2",
|
||||||
|
"pytest-mock~=3.6.1",
|
||||||
|
"connector-acceptance-test",
|
||||||
|
]
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="source_social_network_fetcher",
|
||||||
|
description="Source implementation for Social Network Fetcher.",
|
||||||
|
author="Status",
|
||||||
|
author_email="devops@status.im",
|
||||||
|
packages=find_packages(),
|
||||||
|
install_requires=MAIN_REQUIREMENTS,
|
||||||
|
package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]},
|
||||||
|
extras_require={
|
||||||
|
"tests": TEST_REQUIREMENTS,
|
||||||
|
},
|
||||||
|
entry_points={
|
||||||
|
"console_scripts": [
|
||||||
|
"source-social-network-fetcher=source_social_network_fetcher.run:run",
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
|
@ -0,0 +1,8 @@
|
||||||
|
#
|
||||||
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
from .source import SourceSocialNetworkFetcher
|
||||||
|
|
||||||
|
__all__ = ["SourceSocialNetworkFetcher"]
|
|
@ -0,0 +1,13 @@
|
||||||
|
#
|
||||||
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from airbyte_cdk.entrypoint import launch
|
||||||
|
from .source import SourceSocialNetworkFetcher
|
||||||
|
|
||||||
|
def run():
|
||||||
|
source = SourceSocialNetworkFetcher()
|
||||||
|
launch(source, sys.argv[1:])
|
|
@ -0,0 +1,54 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"account_id": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"string"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"account_name": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"string"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"username": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"string"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"tweet_count": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"number"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"like_count": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"number"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"following_count": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"number"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"follower_count": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"number"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"listed_count": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"number"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"string"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"created_at": [
|
||||||
|
"null",
|
||||||
|
"string"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"retweet_count": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"number"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"reply_count": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"number"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"like_count": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"number"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"quote_count": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"number"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"referenced_tweets": {
|
||||||
|
"type": [
|
||||||
|
"null",
|
||||||
|
"string"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,136 @@
|
||||||
|
#
|
||||||
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
from abc import ABC
|
||||||
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple
|
||||||
|
import logging
|
||||||
|
import requests
|
||||||
|
from datetime import datetime
|
||||||
|
from airbyte_cdk.sources import AbstractSource
|
||||||
|
from airbyte_cdk.sources.streams import Stream
|
||||||
|
from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream
|
||||||
|
from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator
|
||||||
|
|
||||||
|
logger = logging.getLogger("airbyte")
|
||||||
|
|
||||||
|
class TwitterStream(HttpStream):
|
||||||
|
|
||||||
|
url_base = "https://api.twitter.com/2/"
|
||||||
|
|
||||||
|
def __init__(self, api_key: str=None, accounts: List=None, start_time: str = None, stop_time: str = None, **kwargs):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.api_key = api_key
|
||||||
|
self.accounts = accounts
|
||||||
|
self.start_time = start_time
|
||||||
|
self.stop_time = stop_time;
|
||||||
|
|
||||||
|
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
||||||
|
return None
|
||||||
|
|
||||||
|
class TwitterAccountData(TwitterStream):
|
||||||
|
|
||||||
|
primary_key = "account_id"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def use_cache(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
||||||
|
for account in self.accounts:
|
||||||
|
yield {
|
||||||
|
"name": account
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def path(
|
||||||
|
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
||||||
|
) -> str:
|
||||||
|
return f"users/by/username/{stream_slice['name']}?user.fields=public_metrics"
|
||||||
|
|
||||||
|
def request_headers(
|
||||||
|
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None
|
||||||
|
) -> MutableMapping[str, Any]:
|
||||||
|
return {
|
||||||
|
"Authorization" : f"Bearer {self.api_key}",
|
||||||
|
"User-Agent": "v2RecentSearchPython"
|
||||||
|
}
|
||||||
|
|
||||||
|
def parse_response(
|
||||||
|
self,
|
||||||
|
response: requests.Response,
|
||||||
|
stream_slice: Mapping[str, Any] = None,
|
||||||
|
**kwargs
|
||||||
|
) -> Iterable[Mapping]:
|
||||||
|
logger.info("Getting data of %s account", stream_slice['name'])
|
||||||
|
logger.info("Response: %s", response.json())
|
||||||
|
data=response.json()['data']
|
||||||
|
yield {
|
||||||
|
"account_id": data['id'],
|
||||||
|
"username": data['username'],
|
||||||
|
"account_name": data['name'],
|
||||||
|
"tweet_count": data['public_metrics']['tweet_count'],
|
||||||
|
"like_count": data['public_metrics']['like_count'],
|
||||||
|
"following_count": data['public_metrics']['following_count'],
|
||||||
|
"follower_count": data['public_metrics']['followers_count'],
|
||||||
|
"listed_count": data['public_metrics']['listed_count'],
|
||||||
|
}
|
||||||
|
|
||||||
|
class TwitterTweet(HttpSubStream, TwitterAccountData):
|
||||||
|
#TODO: See how to get the account ID
|
||||||
|
primary_key = ""
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(TwitterAccountData(**kwargs),**kwargs)
|
||||||
|
|
||||||
|
def path(
|
||||||
|
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
||||||
|
) -> str:
|
||||||
|
account_id = stream_slice.get("parent").get("account_id")
|
||||||
|
return f"users/{account_id}/tweets?tweet.fields=text,public_metrics,author_id,referenced_tweets,created_at&start_time={self.start_time}"
|
||||||
|
|
||||||
|
def request_headers(
|
||||||
|
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None
|
||||||
|
) -> MutableMapping[str, Any]:
|
||||||
|
return {
|
||||||
|
"Authorization" : f"Bearer {self.api_key}",
|
||||||
|
"User-Agent": "v2RecentSearchPython"
|
||||||
|
}
|
||||||
|
|
||||||
|
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
|
||||||
|
data=response.json()['data']
|
||||||
|
logger.info("Response: %s", response.json())
|
||||||
|
referenced_tweets=""
|
||||||
|
for t in data:
|
||||||
|
if "referenced_tweets" in t:
|
||||||
|
for rt in t.get('referenced_tweets'):
|
||||||
|
referenced_tweets += f"{rt.get('type')}:{rt.get('id')};"
|
||||||
|
yield {
|
||||||
|
"id": t['id'],
|
||||||
|
"created_at": t.get('created_at'),
|
||||||
|
"retweet_count": t.get('public_metrics').get('retweet_count'),
|
||||||
|
"reply_count": t.get('public_metrics').get('reply_count'),
|
||||||
|
"like_count": t.get('public_metrics').get('like_count'),
|
||||||
|
"quote_count": t.get('public_metrics').get('quote_count'),
|
||||||
|
"referenced_tweets": referenced_tweets
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Source
|
||||||
|
class SourceSocialNetworkFetcher(AbstractSource):
|
||||||
|
def check_connection(self, logger, config) -> Tuple[bool, any]:
|
||||||
|
return True, None
|
||||||
|
|
||||||
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
||||||
|
return [
|
||||||
|
TwitterAccountData(
|
||||||
|
api_key=config['api_key'],
|
||||||
|
accounts=config['accounts']),
|
||||||
|
TwitterTweet(
|
||||||
|
api_key=config['api_key'],
|
||||||
|
accounts=config['accounts'],
|
||||||
|
start_time=config['start_time'],
|
||||||
|
stop_time=datetime.now().isoformat())
|
||||||
|
]
|
|
@ -0,0 +1,18 @@
|
||||||
|
documentationUrl: https://docsurl.com
|
||||||
|
connectionSpecification:
|
||||||
|
$schema: http://json-schema.org/draft-07/schema#
|
||||||
|
title: Social Network Fetcher
|
||||||
|
type: object
|
||||||
|
required:
|
||||||
|
- api_key
|
||||||
|
- accounts
|
||||||
|
properties:
|
||||||
|
api_key:
|
||||||
|
type: string
|
||||||
|
description: API Key to authentify to twitter
|
||||||
|
airbyte_secret: true
|
||||||
|
accounts:
|
||||||
|
type: array
|
||||||
|
description: List of accounts needing to be extracted
|
||||||
|
items:
|
||||||
|
type: string
|
Loading…
Reference in New Issue