twitter-fetcher: update connector
Signed-off-by: Alexis Pentori <alexis@status.im>
This commit is contained in:
parent
58c1ea967e
commit
11efa92811
|
@ -10,7 +10,7 @@ data:
|
||||||
connectorSubtype: api
|
connectorSubtype: api
|
||||||
connectorType: source
|
connectorType: source
|
||||||
definitionId: 1c448bfb-8950-478c-9ae0-f03aaaf4e920
|
definitionId: 1c448bfb-8950-478c-9ae0-f03aaaf4e920
|
||||||
dockerImageTag: '0.0.1'
|
dockerImageTag: '0.2.0'
|
||||||
dockerRepository: status-im/airbyte/source-twitter-fetcher
|
dockerRepository: status-im/airbyte/source-twitter-fetcher
|
||||||
githubIssueLabel: source-twitter-fetcher
|
githubIssueLabel: source-twitter-fetcher
|
||||||
icon: twitter-fetcher.svg
|
icon: twitter-fetcher.svg
|
||||||
|
|
|
@ -1,15 +1,10 @@
|
||||||
{
|
{
|
||||||
"api_key": "some_key",
|
"credentials":{
|
||||||
"accounts": [
|
"client_id": "some-id",
|
||||||
"Logos_network",
|
"client_secret": "some-secret",
|
||||||
"Codex_storage",
|
"access_token": "some-access-token",
|
||||||
"Waku_org",
|
"refresh_token": "some-refresh-token",
|
||||||
"ethnimbus",
|
"token_expiry_date": ""
|
||||||
"ac1d_info",
|
},
|
||||||
"HashingItOutPod",
|
"start_time": "2024-01-01"
|
||||||
"vacp2p",
|
|
||||||
"InstituteFT"
|
|
||||||
],
|
|
||||||
"start_time": "2024-01-01",
|
|
||||||
"stop_time": "2024-01-26"
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
"streams": [
|
"streams": [
|
||||||
{
|
{
|
||||||
"stream": {
|
"stream": {
|
||||||
"name": "twitter_account_data",
|
"name": "account",
|
||||||
"json_schema": {
|
"json_schema": {
|
||||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||||
"type": "object"
|
"type": "object"
|
||||||
|
@ -16,7 +16,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"stream": {
|
"stream": {
|
||||||
"name": "twitter_tweet",
|
"name": "tweet",
|
||||||
"json_schema": {
|
"json_schema": {
|
||||||
"$schema": "http://json-schema.org/draft-04/schema#",
|
"$schema": "http://json-schema.org/draft-04/schema#",
|
||||||
"type": "object"
|
"type": "object"
|
||||||
|
@ -28,6 +28,5 @@
|
||||||
"sync_mode": "incremental",
|
"sync_mode": "incremental",
|
||||||
"destination_sync_mode": "overwrite"
|
"destination_sync_mode": "overwrite"
|
||||||
}
|
}
|
||||||
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
},
|
||||||
|
"username": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
},
|
||||||
|
"public_metrics": {
|
||||||
|
"type": ["null", "object" ],
|
||||||
|
"properties": {
|
||||||
|
"tweet_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"like_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"following_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"follower_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"listed_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
},
|
||||||
|
"name": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
},
|
||||||
|
"username": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
},
|
||||||
|
"location": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
},
|
||||||
|
"url": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
},
|
||||||
|
"description": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
},
|
||||||
|
"verified": {
|
||||||
|
"type": [ "null", "boolean" ]
|
||||||
|
},
|
||||||
|
"verified_type": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,98 @@
|
||||||
|
{
|
||||||
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": [ "null", "string"]
|
||||||
|
},
|
||||||
|
"text": {
|
||||||
|
"type": [ "null", "string"]
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": [ "null", "string"]
|
||||||
|
},
|
||||||
|
"author_id": {
|
||||||
|
"type": [ "null", "string"]
|
||||||
|
},
|
||||||
|
"conversation_id": {
|
||||||
|
"type": [ "null", "string"]
|
||||||
|
},
|
||||||
|
"reply_settings": {
|
||||||
|
"type": ["null", "string"]
|
||||||
|
},
|
||||||
|
"referenced_tweets": {
|
||||||
|
"type": [ "null", "array" ],
|
||||||
|
"items": {
|
||||||
|
"type": ["object"],
|
||||||
|
"properties":{
|
||||||
|
"type": {
|
||||||
|
"type": [ "null", "string" ]
|
||||||
|
},
|
||||||
|
"id": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"public_metrics": {
|
||||||
|
"type": ["null", "object" ],
|
||||||
|
"properties": {
|
||||||
|
"retweet_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"reply_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"like_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"quote_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"impression_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"bookmark_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"non_public_metrics": {
|
||||||
|
"type": ["null", "object" ],
|
||||||
|
"properties": {
|
||||||
|
"impression_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"url_link_clicks": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"user_profile_clicks": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"organic_metrics": {
|
||||||
|
"type": ["null", "object" ],
|
||||||
|
"properties": {
|
||||||
|
"impression_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"url_link_clicks": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"user_profile_clicks": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"retweet_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"reply_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
},
|
||||||
|
"like_count": {
|
||||||
|
"type": [ "null", "number" ]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,54 +0,0 @@
|
||||||
{
|
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"account_id": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"string"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"account_name": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"string"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"username": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"string"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"tweet_count": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"number"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"like_count": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"number"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"following_count": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"number"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"follower_count": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"number"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"listed_count": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"number"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,48 +0,0 @@
|
||||||
{
|
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"id": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"string"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"id": {
|
|
||||||
"created_at": [
|
|
||||||
"null",
|
|
||||||
"string"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"retweet_count": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"number"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"reply_count": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"number"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"like_count": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"number"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"quote_count": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"number"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"referenced_tweets": {
|
|
||||||
"type": [
|
|
||||||
"null",
|
|
||||||
"string"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -12,127 +12,85 @@ from datetime import datetime
|
||||||
from airbyte_cdk.sources import AbstractSource
|
from airbyte_cdk.sources import AbstractSource
|
||||||
from airbyte_cdk.sources.streams import Stream
|
from airbyte_cdk.sources.streams import Stream
|
||||||
from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream
|
from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream
|
||||||
from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator
|
from airbyte_cdk.sources.streams.http.auth.core import HttpAuthenticator
|
||||||
|
from airbyte_cdk.sources.streams.http.requests_native_auth import SingleUseRefreshTokenOauth2Authenticator
|
||||||
|
|
||||||
logger = logging.getLogger("airbyte")
|
logger = logging.getLogger("airbyte")
|
||||||
|
|
||||||
class TwitterStream(HttpStream):
|
class TwitterStream(HttpStream):
|
||||||
|
|
||||||
url_base = "https://api.twitter.com/2/"
|
url_base = "https://api.twitter.com/2/"
|
||||||
|
|
||||||
def __init__(self, api_key: str=None, accounts: List=None, start_time: str = None, stop_time: str = None, **kwargs):
|
def __init__(self, start_time: str = None, stop_time: str = None, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.api_key = api_key
|
self.start_time = start_time
|
||||||
self.accounts = accounts
|
self.stop_time = stop_time;
|
||||||
self.start_time = start_time
|
|
||||||
self.stop_time = stop_time;
|
|
||||||
|
|
||||||
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
class TwitterAccountData(TwitterStream):
|
|
||||||
|
|
||||||
primary_key = "account_id"
|
|
||||||
|
|
||||||
@property
|
|
||||||
def use_cache(self) -> bool:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def stream_slices(self, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
|
class Account(TwitterStream):
|
||||||
for account in self.accounts:
|
|
||||||
yield {
|
primary_key = "id"
|
||||||
"name": account
|
|
||||||
}
|
@property
|
||||||
|
def use_cache(self) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def path(
|
||||||
|
self, stream_state: Mapping[str, Any] = None,
|
||||||
|
stream_slice: Mapping[str, Any] = None,
|
||||||
|
next_page_token: Mapping[str, Any] = None
|
||||||
|
) -> str:
|
||||||
|
return f"users/me?user.fields=public_metrics"
|
||||||
|
|
||||||
|
def parse_response(
|
||||||
|
self,
|
||||||
|
response: requests.Response,
|
||||||
|
stream_slice: Mapping[str, Any] = None,
|
||||||
|
**kwargs
|
||||||
|
) -> Iterable[Mapping]:
|
||||||
|
logger.info("Response: %s", response.json())
|
||||||
|
data=response.json()['data']
|
||||||
|
yield data
|
||||||
|
# Wait to avoid reaching API limit
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
||||||
def path(
|
class Tweet(HttpSubStream, Account):
|
||||||
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
primary_key = "id"
|
||||||
) -> str:
|
|
||||||
return f"users/by/username/{stream_slice['name']}?user.fields=public_metrics"
|
|
||||||
|
|
||||||
def request_headers(
|
def __init__(self, **kwargs):
|
||||||
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None
|
super().__init__(Account(**kwargs),**kwargs)
|
||||||
) -> MutableMapping[str, Any]:
|
|
||||||
return {
|
|
||||||
"Authorization" : f"Bearer {self.api_key}",
|
|
||||||
"User-Agent": "v2RecentSearchPython"
|
|
||||||
}
|
|
||||||
|
|
||||||
def parse_response(
|
def path(
|
||||||
self,
|
self, stream_state: Mapping[str, Any] = None,
|
||||||
response: requests.Response,
|
stream_slice: Mapping[str, Any] = None,
|
||||||
stream_slice: Mapping[str, Any] = None,
|
next_page_token: Mapping[str, Any] = None
|
||||||
**kwargs
|
) -> str:
|
||||||
) -> Iterable[Mapping]:
|
account_id = stream_slice.get("parent").get("id")
|
||||||
logger.info("Getting data of %s account", stream_slice['name'])
|
logger.info("Account id %s", account_id)
|
||||||
logger.info("Response: %s", response.json())
|
return f"users/{account_id}/tweets?tweet.fields=text,public_metrics,non_public_metrics,organic_metrics,author_id,referenced_tweets,created_at"
|
||||||
data=response.json()['data']
|
|
||||||
yield {
|
|
||||||
"account_id": data['id'],
|
|
||||||
"username": data['username'],
|
|
||||||
"account_name": data['name'],
|
|
||||||
"tweet_count": data['public_metrics']['tweet_count'],
|
|
||||||
"like_count": data['public_metrics']['like_count'],
|
|
||||||
"following_count": data['public_metrics']['following_count'],
|
|
||||||
"follower_count": data['public_metrics']['followers_count'],
|
|
||||||
"listed_count": data['public_metrics']['listed_count'],
|
|
||||||
}
|
|
||||||
time.sleep(2)
|
|
||||||
|
|
||||||
|
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
|
||||||
class TwitterTweet(HttpSubStream, TwitterAccountData):
|
logger.debug("Twtter Response: %s", response.json())
|
||||||
#TODO: See how to get the account ID
|
data=response.json()['data']
|
||||||
primary_key = ""
|
for t in data:
|
||||||
def __init__(self, **kwargs):
|
yield t
|
||||||
super().__init__(TwitterAccountData(**kwargs),**kwargs)
|
time.sleep(2)
|
||||||
|
|
||||||
def path(
|
|
||||||
self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
|
|
||||||
) -> str:
|
|
||||||
account_id = stream_slice.get("parent").get("account_id")
|
|
||||||
return f"users/{account_id}/tweets?tweet.fields=text,public_metrics,author_id,referenced_tweets,created_at&start_time={self.start_time}"
|
|
||||||
|
|
||||||
def request_headers(
|
|
||||||
self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, any] = None, next_page_token: Mapping[str, Any] = None
|
|
||||||
) -> MutableMapping[str, Any]:
|
|
||||||
return {
|
|
||||||
"Authorization" : f"Bearer {self.api_key}",
|
|
||||||
"User-Agent": "v2RecentSearchPython"
|
|
||||||
}
|
|
||||||
|
|
||||||
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
|
|
||||||
data=response.json()['data']
|
|
||||||
logger.info("Response: %s", response.json())
|
|
||||||
referenced_tweets=""
|
|
||||||
for t in data:
|
|
||||||
if "referenced_tweets" in t:
|
|
||||||
for rt in t.get('referenced_tweets'):
|
|
||||||
referenced_tweets += f"{rt.get('type')}:{rt.get('id')};"
|
|
||||||
yield {
|
|
||||||
"id": t['id'],
|
|
||||||
"created_at": t.get('created_at'),
|
|
||||||
"retweet_count": t.get('public_metrics').get('retweet_count'),
|
|
||||||
"reply_count": t.get('public_metrics').get('reply_count'),
|
|
||||||
"like_count": t.get('public_metrics').get('like_count'),
|
|
||||||
"quote_count": t.get('public_metrics').get('quote_count'),
|
|
||||||
"referenced_tweets": referenced_tweets
|
|
||||||
}
|
|
||||||
time.sleep(2)
|
|
||||||
|
|
||||||
# Source
|
# Source
|
||||||
class SourceTwitterFetcher(AbstractSource):
|
class SourceTwitterFetcher(AbstractSource):
|
||||||
def check_connection(self, logger, config) -> Tuple[bool, any]:
|
def check_connection(self, logger, config) -> Tuple[bool, any]:
|
||||||
return True, None
|
return True, None
|
||||||
|
|
||||||
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
||||||
return [
|
auth=SingleUseRefreshTokenOauth2Authenticator(
|
||||||
TwitterAccountData(
|
config, token_refresh_endpoint="https://api.twitter.com/2/oauth2/token")
|
||||||
api_key=config['api_key'],
|
return [
|
||||||
accounts=config['accounts']),
|
Account(authenticator=auth),
|
||||||
TwitterTweet(
|
Tweet(authenticator=auth, start_time=config['start_time'],
|
||||||
api_key=config['api_key'],
|
stop_time=datetime.now().isoformat()
|
||||||
accounts=config['accounts'],
|
)
|
||||||
start_time=config['start_time'],
|
]
|
||||||
stop_time=datetime.now().isoformat())
|
|
||||||
]
|
|
||||||
|
|
|
@ -4,20 +4,38 @@ connectionSpecification:
|
||||||
title: Twitter Fetcher
|
title: Twitter Fetcher
|
||||||
type: object
|
type: object
|
||||||
required:
|
required:
|
||||||
- api_key
|
- credentials
|
||||||
- accounts
|
|
||||||
- start_time
|
- start_time
|
||||||
properties:
|
properties:
|
||||||
api_key:
|
credentials:
|
||||||
type: string
|
title: Twitter Dev account Credentials
|
||||||
description: "API Key to authentify to twitter"
|
type: object
|
||||||
airbyte_secret: true
|
properties:
|
||||||
accounts:
|
client_id:
|
||||||
type: array
|
title: client_id
|
||||||
description: "List of accounts needing to be extracted"
|
type: string
|
||||||
items:
|
description: "Client ID of Twitter Application"
|
||||||
type: string
|
airbyte_secret: true
|
||||||
|
client_secret:
|
||||||
|
title: client_secret
|
||||||
|
type: string
|
||||||
|
description: "Client secret of Twitter Application"
|
||||||
|
airbyte_secret: true
|
||||||
|
access_token:
|
||||||
|
title: access_token
|
||||||
|
type: string
|
||||||
|
description: "Access Token of Twitter Dev Account link"
|
||||||
|
airbyte_secret: true
|
||||||
|
refresh_token:
|
||||||
|
title: refresh_token
|
||||||
|
type: string
|
||||||
|
description: "Access Token of Twitter Dev Account link"
|
||||||
|
airbyte_secret: true
|
||||||
|
token_expiry_date:
|
||||||
|
title: token_expiry_date
|
||||||
|
type: string
|
||||||
|
description: "Access Token of Twitter Dev Account link"
|
||||||
start_time:
|
start_time:
|
||||||
type: string
|
type: string
|
||||||
description: "Start date of fetching data"
|
description: "Start date of fetching data"
|
||||||
format: date-time
|
format: date-time
|
||||||
|
|
Loading…
Reference in New Issue