twitter: adding separate Stream for promoted metrics

Signed-off-by: Alexis Pentori <alexis@status.im>
This commit is contained in:
Alexis Pentori 2024-10-21 12:23:01 +02:00
parent 4985ffcd40
commit 101a18b1e6
No known key found for this signature in database
GPG Key ID: 65250D2801E47A10
6 changed files with 120 additions and 30 deletions

View File

@ -41,6 +41,20 @@
}, },
"sync_mode": "incremental", "sync_mode": "incremental",
"destination_sync_mode": "overwrite" "destination_sync_mode": "overwrite"
},
{
"stream": {
"name": "tweet_promoted",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
} }
] ]
} }

View File

@ -26,6 +26,10 @@
"verified_type": { "verified_type": {
"type": [ "null", "string" ] "type": [ "null", "string" ]
}, },
"created_at": {
"type": [ "null", "string" ],
"format": "date-time"
},
"public_metrics": { "public_metrics": {
"type": ["null", "object" ], "type": ["null", "object" ],
"properties": { "properties": {

View File

@ -36,7 +36,7 @@
}, },
"public_metrics": { "public_metrics": {
"type": ["null", "object"], "type": ["null", "object"],
"properties": { "properties": {
"retweet_count": { "retweet_count": {
"type": ["null", "number"] "type": ["null", "number"]
}, },
@ -52,7 +52,7 @@
"impression_count": { "impression_count": {
"type": ["null", "number"] "type": ["null", "number"]
}, },
"bookmark_count": { "bookmark_count": {
"type": ["null", "number"] "type": ["null", "number"]
} }
} }

View File

@ -23,7 +23,7 @@
"public_metrics": { "public_metrics": {
"type": ["null", "object"], "type": ["null", "object"],
"properties": { "properties": {
"retweet_count": { "retweet_count": {
"type": ["null", "number"] "type": ["null", "number"]
}, },
@ -39,14 +39,14 @@
"impression_count": { "impression_count": {
"type": ["null", "number"] "type": ["null", "number"]
}, },
"bookmark_count": { "bookmark_count": {
"type": ["null", "number"] "type": ["null", "number"]
} }
} }
}, },
"non_public_metrics": { "non_public_metrics": {
"type": ["null", "object" ], "type": ["null", "object" ],
"properties": { "properties": {
"impression_count": { "impression_count": {
"type": ["null", "number"] "type": ["null", "number"]
}, },
@ -69,30 +69,7 @@
}, },
"organic_metrics": { "organic_metrics": {
"type": ["null", "object" ], "type": ["null", "object" ],
"properties": { "properties": {
"impression_count": {
"type": ["null", "number"]
},
"url_link_clicks": {
"type": ["null", "number"]
},
"user_profile_clicks": {
"type": ["null", "number"]
},
"retweet_count": {
"type": ["null", "number"]
},
"reply_count": {
"type": ["null", "number"]
},
"like_count": {
"type": ["null", "number"]
}
}
},
"promoted_metrics": {
"type": ["null", "object" ],
"properties": {
"impression_count": { "impression_count": {
"type": ["null", "number"] "type": ["null", "number"]
}, },

View File

@ -0,0 +1,47 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"id": {
"type": ["null", "string"]
},
"text": {
"type": ["null", "string"]
},
"created_at": {
"type": ["null", "string"]
},
"author_id": {
"type": ["null", "string"]
},
"conversation_id": {
"type": ["null", "string"]
},
"reply_settings": {
"type": ["null", "string"]
},
"promoted_metrics": {
"type": ["null", "object" ],
"properties": {
"impression_count": {
"type": ["null", "number"]
},
"url_link_clicks": {
"type": ["null", "number"]
},
"user_profile_clicks": {
"type": ["null", "number"]
},
"retweet_count": {
"type": ["null", "number"]
},
"reply_count": {
"type": ["null", "number"]
},
"like_count": {
"type": ["null", "number"]
}
}
}
}
}

View File

@ -144,6 +144,48 @@ class TweetMetrics(HttpSubStream, Tweet):
yield data yield data
time.sleep(2) time.sleep(2)
class TweetPromoted(HttpSubStream, Tweet):
primary_key = "id"
def path(
self, stream_state: Mapping[str, Any] = None,
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None
) -> str:
tweet_id = stream_slice.get("id")
logger.debug("Fetching tweet %s from Account id %s", tweet_id, self.account_id)
return f"tweets/{tweet_id}"
def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
limit_date = datetime.today()- timedelta(31)
for parent_slice in super().stream_slices(sync_mode=SyncMode.full_refresh):
tweet = parent_slice["parent"]
if datetime.strptime(tweet.get("created_at"), "%Y-%m-%dT%H:%M:%S.%fZ") > limit_date:
yield {"id": tweet.get('id') }
else:
logger.info("Not calling promoted_metrics endpoint for tweet %s, tweet too old", tweet.get('id'))
def request_params(
self, stream_state: Optional[Mapping[str, Any]],
stream_slice: Optional[Mapping[str, Any]] = None,
next_page_token: Optional[Mapping[str, Any]] = None,
) -> MutableMapping[str, Any]:
params = {
"tweet.fields" : "promoted_metrics",
}
# Add condition later:
logger.debug(f"DBG-FULL - query params: %s", params)
return params
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
if 'data' in response.json():
data=response.json()['data']
yield data
elif 'error' in response.json():
logger.info("No promoted Metrics for this tweet")
time.sleep(2)
# Source # Source
class SourceTwitterFetcher(AbstractSource): class SourceTwitterFetcher(AbstractSource):
def check_connection(self, logger, config) -> Tuple[bool, any]: def check_connection(self, logger, config) -> Tuple[bool, any]:
@ -162,8 +204,14 @@ class SourceTwitterFetcher(AbstractSource):
account_id=config['account_id'], account_id=config['account_id'],
parent=tweet parent=tweet
) )
tweetPromoted = TweetPromoted(
authenticator=auth,
account_id=config['account_id'],
parent=tweet
)
return [ return [
Account(authenticator=auth, account_id=config["account_id"]), Account(authenticator=auth, account_id=config["account_id"]),
tweet, tweet,
tweetMetrics tweetMetrics,
tweetPromoted
] ]