From 493256269f4493403e6465601000437218c55eb6 Mon Sep 17 00:00:00 2001 From: Alexis Pentori Date: Tue, 12 Mar 2024 08:51:47 +0100 Subject: [PATCH] simplecast-connector: adding analytic/technology subendpoint * adding endpoint technology_application and technology_device_class * factorizing code Signed-off-by: Alexis Pentori --- source-simplecast-fecther/README.md | 7 + source-simplecast-fecther/metadata.yaml | 4 +- .../sample_files/configured_catalog.json | 42 +++++ .../schemas/analytic_download.json | 2 +- .../schemas/analytic_episode.json | 2 +- .../schemas/episode.json | 11 +- .../schemas/technology_application.json | 18 +++ .../schemas/technology_device_class.json | 18 +++ .../schemas/technology_listening_method.json | 18 +++ .../source_simplecast_fecther/source.py | 151 +++++++----------- 10 files changed, 173 insertions(+), 100 deletions(-) create mode 100644 source-simplecast-fecther/source_simplecast_fecther/schemas/technology_application.json create mode 100644 source-simplecast-fecther/source_simplecast_fecther/schemas/technology_device_class.json create mode 100644 source-simplecast-fecther/source_simplecast_fecther/schemas/technology_listening_method.json diff --git a/source-simplecast-fecther/README.md b/source-simplecast-fecther/README.md index 180ce5e..8d1c5e9 100644 --- a/source-simplecast-fecther/README.md +++ b/source-simplecast-fecther/README.md @@ -19,6 +19,13 @@ The connector takes the following input: The connector will return the following objects: - [podcast](./source_simplecast_fecther/schemas/podcast.json) - [episode](./source_simplecast_fecther/schemas/episode.json) +- [analytics/download](./source_simplecast_fecther/schemas/analytic_download.json) +- [analytics/episode](./source_simplecast_fecther/schemas/analytic_episode.json) +- [analytics/location](./source_simplecast_fecther/schemas/analytic_location.json) +- [analytics/time of week](./source_simplecast_fecther/schemas/analytic_time_of_week.json) +- [analytics/technology/application](./source_simplecast_fecther/schemas/technology_application.json) +- [analytics/technology/device class](./source_simplecast_fecther/schemas/technology_device_class.json) + ## Local development diff --git a/source-simplecast-fecther/metadata.yaml b/source-simplecast-fecther/metadata.yaml index 763ef6a..be1edda 100644 --- a/source-simplecast-fecther/metadata.yaml +++ b/source-simplecast-fecther/metadata.yaml @@ -14,8 +14,8 @@ data: connectorSubtype: api connectorType: source definitionId: 464a7cea-0317-485e-9a9c-bcd06155bfff - dockerImageTag: 0.1.0 - dockerRepository: harbor.status.im/status-im/airbyte/source-simplecast-fecther + dockerImageTag: 1.0.0 + dockerRepository: harbor.status.im/status-im/airbyte/source-simplecast-fetcher githubIssueLabel: source-simplecast-fecther icon: simplecast-fecther.svg license: MIT diff --git a/source-simplecast-fecther/sample_files/configured_catalog.json b/source-simplecast-fecther/sample_files/configured_catalog.json index 6ffc192..2d49e58 100644 --- a/source-simplecast-fecther/sample_files/configured_catalog.json +++ b/source-simplecast-fecther/sample_files/configured_catalog.json @@ -83,6 +83,48 @@ }, "sync_mode": "incremental", "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "technology_application", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "technology_device_class", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "technology_listening_method", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" } ] diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_download.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_download.json index f510815..7c4ad8c 100644 --- a/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_download.json +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_download.json @@ -3,7 +3,7 @@ "type": "object", "properties": { "interval": { - "name": ["null", "string"] + "type": ["null", "string"] }, "downloads_total": { "type": ["null", "number"] diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_episode.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_episode.json index 628d336..bce93fb 100644 --- a/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_episode.json +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_episode.json @@ -3,7 +3,7 @@ "type": "object", "properties": { "id": { - "name": ["null", "string"] + "type": ["null", "string"] }, "type": { "type": ["null", "string"] diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/episode.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/episode.json index 632643d..b7dbf55 100644 --- a/source-simplecast-fecther/source_simplecast_fecther/schemas/episode.json +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/episode.json @@ -17,11 +17,12 @@ "status": { "type": ["null", "string"] }, - "season_href": { - "type": ["null", "string"] - }, - "season_number": { - "type": ["null", "number"] + "season": { + "type": ["null", "object"], + "properties": { + "href": { "type" : ["null", "string"]}, + "number": {"type": ["null", "number"]} + } }, "number": { "type": ["null", "number"] diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/technology_application.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/technology_application.json new file mode 100644 index 0000000..a21bb65 --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/technology_application.json @@ -0,0 +1,18 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "rank": { + "type": ["null", "number"] + }, + "name": { + "type": ["null", "string"] + }, + "downloads_total": { + "type": ["null", "number"] + }, + "downloads_percent": { + "type": ["null", "number"] + } + } +} diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/technology_device_class.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/technology_device_class.json new file mode 100644 index 0000000..a21bb65 --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/technology_device_class.json @@ -0,0 +1,18 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "rank": { + "type": ["null", "number"] + }, + "name": { + "type": ["null", "string"] + }, + "downloads_total": { + "type": ["null", "number"] + }, + "downloads_percent": { + "type": ["null", "number"] + } + } +} diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/technology_listening_method.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/technology_listening_method.json new file mode 100644 index 0000000..a21bb65 --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/technology_listening_method.json @@ -0,0 +1,18 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "rank": { + "type": ["null", "number"] + }, + "name": { + "type": ["null", "string"] + }, + "downloads_total": { + "type": ["null", "number"] + }, + "downloads_percent": { + "type": ["null", "number"] + } + } +} diff --git a/source-simplecast-fecther/source_simplecast_fecther/source.py b/source-simplecast-fecther/source_simplecast_fecther/source.py index 8a4f084..3b2009c 100644 --- a/source-simplecast-fecther/source_simplecast_fecther/source.py +++ b/source-simplecast-fecther/source_simplecast_fecther/source.py @@ -18,6 +18,7 @@ logger = logging.getLogger("airbyte") LOCATION_KEYS=["id", "rank", "name", "downloads_total", "downloads_percent"] TIME_OF_WEEK_KEYS=["rank", "hour_of_week", "hour_of_day", "day_of_week", "count"] DOWNLOADS_KEY=["interval", "downloads_total", "downloads_percent"] +TECH_KEY=["rank", "name", "downloads_total", "downloads_percent"] # Basic full refresh stream class SimplecastFectherStream(HttpStream): url_base = "https://api.simplecast.com/" @@ -41,10 +42,7 @@ class Podcast(SimplecastFectherStream): def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: data=response.json() - logger.info("Response: %s", data) - if 'collection' not in data.keys(): - logger.debug("Error when trying to get the data %s", data) - raise Exception("error when calling the api") + logger.debug("Response: %s", data) for elt in data.get('collection'): podcast={ "id": elt.get("id"), @@ -76,9 +74,6 @@ class Episode(HttpSubStream, Podcast): def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: data=response.json() logger.debug("Response: %s", data) - if 'collection' not in data.keys(): - logger.error("Error when trying to get the data %s", data) - raise Exception("error when calling the api") for elt in data.get('collection'): episode={ "id": elt.get("id"), @@ -86,8 +81,7 @@ class Episode(HttpSubStream, Podcast): "status": elt.get("status"), "published_at": elt.get("published_at"), "updated_at": elt.get("updated_at"), - "season_href": elt.get('season').get("href"), - "season_number": elt.get('season').get("number"), + "season": elt.get('season'), "number": elt.get("number"), "description": elt.get("description"), "token": elt.get("token"), @@ -95,81 +89,58 @@ class Episode(HttpSubStream, Podcast): } yield episode -class AnalyticLocation(HttpSubStream, Podcast): +class AnalyticSubStream(HttpSubStream, Podcast, ABC): + primary_key=None + + def __init__(self, endpoint:str, keys_dict:dict, collection_name:str, **kwargs): + super().__init__(Podcast(**kwargs), **kwargs) + self.endpoint=endpoint + self.keys_dict=keys_dict + self.collection_name = collection_name + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + podcast_id=stream_slice.get("parent").get("id") + + return f"analytics/{self.endpoint}?podcast={podcast_id}" + + """ + Default implementation of the parse_response to get the data from the json_objection collection_name. + If the object mapping is not a simple key mapping then this function as to be overwriten. + """ + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + data=response.json() + logger.debug("Response: %s", data) + for elt in data.get(self.collection_name): + logger.debug("Elt %s", elt) + analytic={ key: elt.get(key) for key in self.keys_dict } + yield analytic + +class AnalyticLocation(AnalyticSubStream): primary_key="analytic_location_id" - def __init__(self, **kwargs): - super().__init__(Podcast(**kwargs), **kwargs) + super().__init__(endpoint="location", keys_dict=LOCATION_KEYS, collection_name="countries", **kwargs) - def path( - self, - stream_state: Mapping[str, Any] = None, - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None - ) -> str: - podcast_id=stream_slice.get("parent").get("id") - - return f"analytics/location?podcast={podcast_id}" - - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: - data=response.json() - logger.info("Response: %s", data) - if 'countries' not in data.keys(): - logger.error("Error when trying to get the data %s", data) - raise Exception("error when calling the api") - for elt in data.get('countries'): - location={ key: elt.get(key) for key in LOCATION_KEYS } - yield location - -class AnalyticTimeOfWeek(HttpSubStream, Podcast): +class AnalyticTimeOfWeek(AnalyticSubStream): primary_key=None def __init__(self, **kwargs): - super().__init__(Podcast(**kwargs), **kwargs) + super().__init__(endpoint="time_of_week", keys_dict=TIME_OF_WEEK_KEYS, collection_name="collection", **kwargs) - def path( - self, - stream_state: Mapping[str, Any] = None, - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None - ) -> str: - podcast_id=stream_slice.get("parent").get("id") - - return f"analytics/time_of_week?podcast={podcast_id}" - - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: - data=response.json() - logger.info("Response: %s", data) - if 'collection' not in data.keys(): - logger.error("Error when trying to get the data %s", data) - raise Exception("error when calling the api") - for elt in data.get('collection'): - time_of_week={ key: elt.get(key) for key in TIME_OF_WEEK_KEYS } - yield time_of_week - -class AnalyticEpisode(HttpSubStream, Podcast): +class AnalyticEpisode(AnalyticSubStream): primary_key=None def __init__(self, **kwargs): - super().__init__(Podcast(**kwargs), **kwargs) - - def path( - self, - stream_state: Mapping[str, Any] = None, - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None - ) -> str: - podcast_id=stream_slice.get("parent").get("id") - - return f"analytics/episodes?podcast={podcast_id}" + super().__init__(endpoint="episodes", keys_dict=[], collection_name="", **kwargs) def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: data=response.json() - logger.info("Response: %s", data) - if 'collection' not in data.keys(): - logger.error("Error when trying to get the data %s", data) - raise Exception("error when calling the api") + logger.debug("Response: %s", data) for elt in data.get('collection'): analytic_episode={ "id": elt.get("id"), @@ -180,31 +151,26 @@ class AnalyticEpisode(HttpSubStream, Podcast): } yield analytic_episode -class AnalyticDownload(HttpSubStream, Podcast): - primary_key=None +class AnalyticDownload(AnalyticSubStream, Podcast): def __init__(self, **kwargs): - super().__init__(Podcast(**kwargs), **kwargs) + super().__init__(endpoint="downloads", keys_dict=DOWNLOADS_KEY, collection_name="by_interval", **kwargs) - def path( - self, - stream_state: Mapping[str, Any] = None, - stream_slice: Mapping[str, Any] = None, - next_page_token: Mapping[str, Any] = None - ) -> str: - podcast_id=stream_slice.get("parent").get("id") +class TechnologyApplication(AnalyticSubStream): - return f"analytics/downloads?podcast={podcast_id}" + def __init__(self, **kwargs): + super().__init__(endpoint="technology/applications", keys_dict=TECH_KEY, collection_name="collection", **kwargs) + +class TechnologyDeviceClass(AnalyticSubStream): + + def __init__(self, **kwargs): + super().__init__(endpoint="technology/devices", keys_dict=TECH_KEY, collection_name="collection", **kwargs) + +class TechnologyListeningMethod(AnalyticSubStream): + + def __init__(self, **kwargs): + super().__init__(endpoint="technology/listening_methods", keys_dict=TECH_KEY, collection_name="collection", **kwargs) - def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: - data=response.json() - logger.info("Response: %s", data) - if 'by_interval' not in data.keys(): - logger.error("Error when trying to get the data %s", data) - raise Exception("error when calling the api") - for elt in data.get('by_interval'): - download={ key: elt.get(key) for key in DOWNLOADS_KEY } - yield download # Source class SourceSimplecastFecther(AbstractSource): @@ -219,5 +185,8 @@ class SourceSimplecastFecther(AbstractSource): AnalyticLocation(authenticator=auth), AnalyticTimeOfWeek(authenticator=auth), AnalyticEpisode(authenticator=auth), - AnalyticDownload(authenticator=auth) + AnalyticDownload(authenticator=auth), + TechnologyApplication(authenticator=auth), + TechnologyDeviceClass(authenticator=auth), + TechnologyListeningMethod(authenticator=auth) ]