simplecat: adding new endpoint and pagination

Signed-off-by: Alexis Pentori <alexis@status.im>
This commit is contained in:
Alexis Pentori 2024-04-17 12:09:01 +02:00
parent 36555d0c23
commit 204812e8f8
No known key found for this signature in database
GPG Key ID: 65250D2801E47A10
5 changed files with 234 additions and 19 deletions

View File

@ -14,7 +14,7 @@ data:
connectorSubtype: api connectorSubtype: api
connectorType: source connectorType: source
definitionId: 464a7cea-0317-485e-9a9c-bcd06155bfff definitionId: 464a7cea-0317-485e-9a9c-bcd06155bfff
dockerImageTag: 1.0.0 dockerImageTag: 1.1.0
dockerRepository: harbor.status.im/status-im/airbyte/source-simplecast-fetcher dockerRepository: harbor.status.im/status-im/airbyte/source-simplecast-fetcher
githubIssueLabel: source-simplecast-fecther githubIssueLabel: source-simplecast-fecther
icon: simplecast-fecther.svg icon: simplecast-fecther.svg

View File

@ -125,7 +125,34 @@
}, },
"sync_mode": "incremental", "sync_mode": "incremental",
"destination_sync_mode": "overwrite" "destination_sync_mode": "overwrite"
},
{
"stream": {
"name": "analytic_podcast_v2",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
},
{
"stream": {
"name": "analytic_episode_v2",
"json_schema": {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object"
},
"supported_sync_modes": [
"full_refresh", "incremental"
]
},
"sync_mode": "incremental",
"destination_sync_mode": "overwrite"
} }
] ]
} }

View File

@ -0,0 +1,60 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Generated schema for Root",
"type": "object",
"properties": {
"href": {
"type": ["null", "string"]
},
"time_of_week": {},
"technology": {
"type": "object",
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"recast": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"mapbox": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"location": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"episodes": {},
"embed": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"downloads": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
}
}
}

View File

@ -0,0 +1,73 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Generated schema for Root",
"type": "object",
"properties": {
"href": {
"type": ["null", "string"]
},
"time_of_week": {
"type": "object",
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"technology": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"recast": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"mapbox": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"location": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"episodes": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"embed": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
},
"downloads": {
"type": ["null", "object"],
"properties": {
"href": {
"type": ["null", "string"]
}
}
}
}

View File

@ -7,6 +7,7 @@ from abc import ABC
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple
import logging import logging
import requests import requests
import time
from airbyte_cdk.sources import AbstractSource from airbyte_cdk.sources import AbstractSource
from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams import Stream
from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream
@ -22,9 +23,27 @@ TECH_KEY=["rank", "name", "downloads_total", "downloads_percent"]
# Basic full refresh stream # Basic full refresh stream
class SimplecastFectherStream(HttpStream): class SimplecastFectherStream(HttpStream):
url_base = "https://api.simplecast.com/" url_base = "https://api.simplecast.com/"
primary_key = None
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
return None pages = response.json().get('pages')
if pages and pages.get('next'):
time.sleep(2)
return {
'limit': pages.get('limit'),
'offset': pages.get('limit')* pages.get('current')
}
def request_params(
self,
stream_state: Optional[Mapping[str, Any]],
stream_slice: Optional[Mapping[str, Any]] = None,
next_page_token: Optional[Mapping[str, Any]] = None,
) -> MutableMapping[str, Any]:
if next_page_token:
return next_page_token
class Podcast(SimplecastFectherStream): class Podcast(SimplecastFectherStream):
@ -55,11 +74,12 @@ class Podcast(SimplecastFectherStream):
} }
yield podcast yield podcast
class Episode(HttpSubStream, Podcast): class Episode(HttpSubStream, SimplecastFectherStream):
primary_key="episode_id" primary_key="episode_id"
def __init__(self, **kwargs): @property
super().__init__(Podcast(**kwargs), **kwargs) def use_cache(self) -> bool:
return True
def path( def path(
self, self,
@ -89,11 +109,11 @@ class Episode(HttpSubStream, Podcast):
} }
yield episode yield episode
class AnalyticSubStream(HttpSubStream, Podcast, ABC): class AnalyticSubStream(HttpSubStream, SimplecastFectherStream, ABC):
primary_key=None primary_key=None
def __init__(self, endpoint:str, keys_dict:dict, collection_name:str, **kwargs): def __init__(self, endpoint:str, keys_dict:dict, collection_name:str, **kwargs):
super().__init__(Podcast(**kwargs), **kwargs) super().__init__(**kwargs)
self.endpoint=endpoint self.endpoint=endpoint
self.keys_dict=keys_dict self.keys_dict=keys_dict
self.collection_name = collection_name self.collection_name = collection_name
@ -151,7 +171,7 @@ class AnalyticEpisode(AnalyticSubStream):
} }
yield analytic_episode yield analytic_episode
class AnalyticDownload(AnalyticSubStream, Podcast): class AnalyticDownload(AnalyticSubStream):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__(endpoint="downloads", keys_dict=DOWNLOADS_KEY, collection_name="by_interval", **kwargs) super().__init__(endpoint="downloads", keys_dict=DOWNLOADS_KEY, collection_name="by_interval", **kwargs)
@ -171,6 +191,37 @@ class TechnologyListeningMethod(AnalyticSubStream):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__(endpoint="technology/listening_methods", keys_dict=TECH_KEY, collection_name="collection", **kwargs) super().__init__(endpoint="technology/listening_methods", keys_dict=TECH_KEY, collection_name="collection", **kwargs)
class AnalyticEpisodeV2(HttpSubStream,SimplecastFectherStream):
def path(
self,
stream_state: Mapping[str, Any] = None,
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None
) -> str:
episode_id=stream_slice.get("parent").get("id")
return f"analytics?episode={episode_id}"
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
data=response.json()
logger.debug("Response: %s", data)
yield data
class AnalyticPodcastV2(HttpSubStream, SimplecastFectherStream):
def path(
self,
stream_state: Mapping[str, Any] = None,
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None
) -> str:
podcast_id=stream_slice.get("parent").get("id")
return f"analytics?podcast={podcast_id}"
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
data=response.json()
logger.debug("Response: %s", data)
yield data
# Source # Source
class SourceSimplecastFecther(AbstractSource): class SourceSimplecastFecther(AbstractSource):
@ -179,14 +230,18 @@ class SourceSimplecastFecther(AbstractSource):
def streams(self, config: Mapping[str, Any]) -> List[Stream]: def streams(self, config: Mapping[str, Any]) -> List[Stream]:
auth = TokenAuthenticator(token=config["api_key"]) auth = TokenAuthenticator(token=config["api_key"])
podcasts=Podcast(authenticator=auth)
episodes=Episode(authenticator=auth, parent=podcasts)
return [ return [
Podcast(authenticator=auth), podcasts,
Episode(authenticator=auth), episodes,
AnalyticLocation(authenticator=auth), AnalyticLocation(authenticator=auth, parent=podcasts),
AnalyticTimeOfWeek(authenticator=auth), AnalyticTimeOfWeek(authenticator=auth, parent=podcasts),
AnalyticEpisode(authenticator=auth), AnalyticEpisode(authenticator=auth, parent=podcasts),
AnalyticDownload(authenticator=auth), AnalyticDownload(authenticator=auth,parent=podcasts),
TechnologyApplication(authenticator=auth), TechnologyApplication(authenticator=auth, parent=podcasts),
TechnologyDeviceClass(authenticator=auth), TechnologyDeviceClass(authenticator=auth, parent=podcasts),
TechnologyListeningMethod(authenticator=auth) TechnologyListeningMethod(authenticator=auth, parent=podcasts),
AnalyticEpisodeV2(authenticator=auth, parent=episodes),
AnalyticPodcastV2(authenticator=auth, parent=podcasts)
] ]