diff --git a/source-simplecast-fecther/Dockerfile b/source-simplecast-fecther/Dockerfile new file mode 100644 index 0000000..0d1e900 --- /dev/null +++ b/source-simplecast-fecther/Dockerfile @@ -0,0 +1,8 @@ +FROM airbyte/python-connector-base:1.1.0 + +COPY . ./airbyte/integration_code +RUN pip install ./airbyte/integration_code + +# The entrypoint and default env vars are already set in the base image +ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" +ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] diff --git a/source-simplecast-fecther/README.md b/source-simplecast-fecther/README.md new file mode 100644 index 0000000..180ce5e --- /dev/null +++ b/source-simplecast-fecther/README.md @@ -0,0 +1,60 @@ +# SimpleCast Fetcher Source + +This is the repository for fetching SimpleCast data, written in Python. + +## Usage + +The connector fetch the list of podcasts, episodes and analytics metrics from [SimpleCast](https://www.simplecast.com/). + +### Configuration + +The connector takes the following input: + +```yaml +- api_key +``` + +### Output + +The connector will return the following objects: +- [podcast](./source_simplecast_fecther/schemas/podcast.json) +- [episode](./source_simplecast_fecther/schemas/episode.json) + +## Local development + +### Prerequisites + +#### Activate Virtual Environment and install dependencies +From this connector directory, create a virtual environment: +``` +python -m venv .venv +``` +``` +source .venv/bin/activate +pip install -r requirements.txt +``` + +### Locally running the connector +``` +python main.py spec +python main.py check --config sample_files/config-example.json +python main.py discover --config sample_files/config-example.json +python main.py read --config sample_files/config-example.json --catalog sample_files/configured_catalog.json +``` + +### Locally running the connector docker image + +```bash +docker build -t airbyte/twitter-fetcher:dev . +# Running the spec command against your patched connector +docker run airbyte/twitter-fetcher:dev spec +```` + +#### Run +Then run any of the connector commands as follows: +``` +docker run --rm airbyte/twitter-fetcher:dev spec +docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev check --config /sample_files/config-example.json +docker run --rm -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev discover --config /sample_files/config-example.json +docker run --rm -v $(pwd)/sample_files:/sample_files -v $(pwd)/sample_files:/sample_files airbyte/twitter-fetcher:dev read --config /sample_files/config-example.json --catalog /sample_files/configured_catalog.json +``` diff --git a/source-simplecast-fecther/main.py b/source-simplecast-fecther/main.py new file mode 100644 index 0000000..85a6fcd --- /dev/null +++ b/source-simplecast-fecther/main.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +from source_simplecast_fecther.run import run + +if __name__ == "__main__": + run() diff --git a/source-simplecast-fecther/metadata.yaml b/source-simplecast-fecther/metadata.yaml new file mode 100644 index 0000000..763ef6a --- /dev/null +++ b/source-simplecast-fecther/metadata.yaml @@ -0,0 +1,29 @@ +data: + allowedHosts: + registries: + oss: + enabled: true + cloud: + enabled: false + remoteRegistries: + pypi: + enabled: true + packageName: airbyte-source-simplecast-fecther + connectorBuildOptions: + baseImage: docker.io/airbyte/python-connector-base:1.0.0@sha256:dd17e347fbda94f7c3abff539be298a65af2d7fc27a307d89297df1081a45c27 + connectorSubtype: api + connectorType: source + definitionId: 464a7cea-0317-485e-9a9c-bcd06155bfff + dockerImageTag: 0.1.0 + dockerRepository: harbor.status.im/status-im/airbyte/source-simplecast-fecther + githubIssueLabel: source-simplecast-fecther + icon: simplecast-fecther.svg + license: MIT + name: Simplecast Fecther + releaseDate: TODO + supportLevel: community + releaseStage: alpha + documentationUrl: https://docs.airbyte.com/integrations/sources/simplecast-fecther + tags: + - language:python +metadataSpecVersion: "1.0" diff --git a/source-simplecast-fecther/requirements.txt b/source-simplecast-fecther/requirements.txt new file mode 100644 index 0000000..d6e1198 --- /dev/null +++ b/source-simplecast-fecther/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/source-simplecast-fecther/sample_files/config-example.json b/source-simplecast-fecther/sample_files/config-example.json new file mode 100644 index 0000000..9bcc289 --- /dev/null +++ b/source-simplecast-fecther/sample_files/config-example.json @@ -0,0 +1,3 @@ +{ + "api_key": "not-a-real-token" +} diff --git a/source-simplecast-fecther/sample_files/configured_catalog.json b/source-simplecast-fecther/sample_files/configured_catalog.json new file mode 100644 index 0000000..6ffc192 --- /dev/null +++ b/source-simplecast-fecther/sample_files/configured_catalog.json @@ -0,0 +1,89 @@ +{ + "streams": [ + { + "stream": { + "name": "podcast", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "episode", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "analytic_location", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "analytic_time_of_week", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "analytic_episode", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "analytic_download", + "json_schema": { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object" + }, + "supported_sync_modes": [ + "full_refresh", "incremental" + ] + }, + "sync_mode": "incremental", + "destination_sync_mode": "overwrite" + } + + ] +} diff --git a/source-simplecast-fecther/setup.py b/source-simplecast-fecther/setup.py new file mode 100644 index 0000000..0c387b5 --- /dev/null +++ b/source-simplecast-fecther/setup.py @@ -0,0 +1,35 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from setuptools import find_packages, setup + +MAIN_REQUIREMENTS = [ + "airbyte-cdk~=0.2", +] + +TEST_REQUIREMENTS = [ + "requests-mock~=1.9.3", + "pytest~=6.2", + "pytest-mock~=3.6.1", + "connector-acceptance-test", +] + +setup( + name="source_simplecast_fecther", + description="Source implementation for Simplecast Fecther.", + author="Airbyte", + author_email="contact@airbyte.io", + packages=find_packages(), + install_requires=MAIN_REQUIREMENTS, + package_data={"": ["*.json", "*.yaml", "schemas/*.json", "schemas/shared/*.json"]}, + extras_require={ + "tests": TEST_REQUIREMENTS, + }, + entry_points={ + "console_scripts": [ + "source-simplecast-fecther=source_simplecast_fecther.run:run", + ], + }, +) diff --git a/source-simplecast-fecther/simplecast-fetcher.svg b/source-simplecast-fecther/simplecast-fetcher.svg new file mode 100644 index 0000000..cf69b9c --- /dev/null +++ b/source-simplecast-fecther/simplecast-fetcher.svg @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/source-simplecast-fecther/source_simplecast_fecther/__init__.py b/source-simplecast-fecther/source_simplecast_fecther/__init__.py new file mode 100644 index 0000000..a021c10 --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/__init__.py @@ -0,0 +1,8 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from .source import SourceSimplecastFecther + +__all__ = ["SourceSimplecastFecther"] diff --git a/source-simplecast-fecther/source_simplecast_fecther/run.py b/source-simplecast-fecther/source_simplecast_fecther/run.py new file mode 100644 index 0000000..7a451fa --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/run.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import sys + +from airbyte_cdk.entrypoint import launch +from .source import SourceSimplecastFecther + +def run(): + source = SourceSimplecastFecther() + launch(source, sys.argv[1:]) diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_download.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_download.json new file mode 100644 index 0000000..f510815 --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_download.json @@ -0,0 +1,15 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "interval": { + "name": ["null", "string"] + }, + "downloads_total": { + "type": ["null", "number"] + }, + "downloads_percent": { + "type": ["null", "number"] + } + } +} diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_episode.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_episode.json new file mode 100644 index 0000000..628d336 --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_episode.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "name": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "downloads": { + "type": ["null", "number"] + }, + "number": { + "type": ["null", "number"] + } + } +} diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_location.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_location.json new file mode 100644 index 0000000..30a9c2b --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_location.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "rank": { + "type": ["null", "number"] + }, + "name": { + "type": ["null", "string"] + }, + "downloads_total": { + "type": ["null", "number"] + }, + "downloads_percent": { + "type": ["null", "number"] + } + } +} diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_time_of_week.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_time_of_week.json new file mode 100644 index 0000000..a84de45 --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/analytic_time_of_week.json @@ -0,0 +1,21 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "rank": { + "type": ["null", "number"] + }, + "hour_of_week": { + "type": ["null", "number"] + }, + "hour_of_day": { + "type": ["null", "number"] + }, + "day_of_week": { + "type": ["null", "number"] + }, + "count": { + "type": ["null", "count"] + } + } +} diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/episode.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/episode.json new file mode 100644 index 0000000..632643d --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/episode.json @@ -0,0 +1,42 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "published_at": { + "type": ["null", "string"] + }, + "updated_at": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, + "season_href": { + "type": ["null", "string"] + }, + "season_number": { + "type": ["null", "number"] + }, + "number": { + "type": ["null", "number"] + }, + "descritpion": { + "type": ["null", "string"] + }, + "duration": { + "type": ["null", "number"] + }, + "token": { + "type": ["null", "string"] + }, + "type": { + "type": ["null", "string"] + } + } +} diff --git a/source-simplecast-fecther/source_simplecast_fecther/schemas/podcast.json b/source-simplecast-fecther/source_simplecast_fecther/schemas/podcast.json new file mode 100644 index 0000000..7c939f0 --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/schemas/podcast.json @@ -0,0 +1,27 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, + "href": { + "type": ["null", "string"] + }, + "episode_count": { + "type": ["null", "number"] + }, + "account_id": { + "type": ["null", "string"] + }, + "account_owner_name": { + "type": ["null", "string"] + } + } +} diff --git a/source-simplecast-fecther/source_simplecast_fecther/source.py b/source-simplecast-fecther/source_simplecast_fecther/source.py new file mode 100644 index 0000000..8a4f084 --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/source.py @@ -0,0 +1,223 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC +from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple +import logging +import requests +from airbyte_cdk.sources import AbstractSource +from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http import HttpStream, HttpSubStream +from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator + +logger = logging.getLogger("airbyte") + + +LOCATION_KEYS=["id", "rank", "name", "downloads_total", "downloads_percent"] +TIME_OF_WEEK_KEYS=["rank", "hour_of_week", "hour_of_day", "day_of_week", "count"] +DOWNLOADS_KEY=["interval", "downloads_total", "downloads_percent"] +# Basic full refresh stream +class SimplecastFectherStream(HttpStream): + url_base = "https://api.simplecast.com/" + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + return None + +class Podcast(SimplecastFectherStream): + + primary_key = "podcast_id" + + + @property + def use_cache(self) -> bool: + return True + + def path( + self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return "podcasts" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + data=response.json() + logger.info("Response: %s", data) + if 'collection' not in data.keys(): + logger.debug("Error when trying to get the data %s", data) + raise Exception("error when calling the api") + for elt in data.get('collection'): + podcast={ + "id": elt.get("id"), + "title": elt.get("title"), + "status": elt.get("status"), + "href": elt.get("href"), + "episode_count": elt.get("episodes").get("count"), + "account_id": elt.get("account_id"), + "account_owner_name": elt.get("account").get("owner").get("name") + } + yield podcast + +class Episode(HttpSubStream, Podcast): + primary_key="episode_id" + + def __init__(self, **kwargs): + super().__init__(Podcast(**kwargs), **kwargs) + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + podcast_id=stream_slice.get("parent").get("id") + + return f"podcasts/{podcast_id}/episodes" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + data=response.json() + logger.debug("Response: %s", data) + if 'collection' not in data.keys(): + logger.error("Error when trying to get the data %s", data) + raise Exception("error when calling the api") + for elt in data.get('collection'): + episode={ + "id": elt.get("id"), + "title": elt.get("title"), + "status": elt.get("status"), + "published_at": elt.get("published_at"), + "updated_at": elt.get("updated_at"), + "season_href": elt.get('season').get("href"), + "season_number": elt.get('season').get("number"), + "number": elt.get("number"), + "description": elt.get("description"), + "token": elt.get("token"), + "type": elt.get("type") + } + yield episode + +class AnalyticLocation(HttpSubStream, Podcast): + primary_key="analytic_location_id" + + + def __init__(self, **kwargs): + super().__init__(Podcast(**kwargs), **kwargs) + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + podcast_id=stream_slice.get("parent").get("id") + + return f"analytics/location?podcast={podcast_id}" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + data=response.json() + logger.info("Response: %s", data) + if 'countries' not in data.keys(): + logger.error("Error when trying to get the data %s", data) + raise Exception("error when calling the api") + for elt in data.get('countries'): + location={ key: elt.get(key) for key in LOCATION_KEYS } + yield location + +class AnalyticTimeOfWeek(HttpSubStream, Podcast): + primary_key=None + + def __init__(self, **kwargs): + super().__init__(Podcast(**kwargs), **kwargs) + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + podcast_id=stream_slice.get("parent").get("id") + + return f"analytics/time_of_week?podcast={podcast_id}" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + data=response.json() + logger.info("Response: %s", data) + if 'collection' not in data.keys(): + logger.error("Error when trying to get the data %s", data) + raise Exception("error when calling the api") + for elt in data.get('collection'): + time_of_week={ key: elt.get(key) for key in TIME_OF_WEEK_KEYS } + yield time_of_week + +class AnalyticEpisode(HttpSubStream, Podcast): + primary_key=None + + def __init__(self, **kwargs): + super().__init__(Podcast(**kwargs), **kwargs) + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + podcast_id=stream_slice.get("parent").get("id") + + return f"analytics/episodes?podcast={podcast_id}" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + data=response.json() + logger.info("Response: %s", data) + if 'collection' not in data.keys(): + logger.error("Error when trying to get the data %s", data) + raise Exception("error when calling the api") + for elt in data.get('collection'): + analytic_episode={ + "id": elt.get("id"), + "type": elt.get("type"), + "title": elt.get("title"), + "downloads": elt.get("downloads").get("total"), + "number": elt.get("number") + } + yield analytic_episode + +class AnalyticDownload(HttpSubStream, Podcast): + primary_key=None + + def __init__(self, **kwargs): + super().__init__(Podcast(**kwargs), **kwargs) + + def path( + self, + stream_state: Mapping[str, Any] = None, + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None + ) -> str: + podcast_id=stream_slice.get("parent").get("id") + + return f"analytics/downloads?podcast={podcast_id}" + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + data=response.json() + logger.info("Response: %s", data) + if 'by_interval' not in data.keys(): + logger.error("Error when trying to get the data %s", data) + raise Exception("error when calling the api") + for elt in data.get('by_interval'): + download={ key: elt.get(key) for key in DOWNLOADS_KEY } + yield download + +# Source +class SourceSimplecastFecther(AbstractSource): + def check_connection(self, logger, config) -> Tuple[bool, any]: + return True, None + + def streams(self, config: Mapping[str, Any]) -> List[Stream]: + auth = TokenAuthenticator(token=config["api_key"]) + return [ + Podcast(authenticator=auth), + Episode(authenticator=auth), + AnalyticLocation(authenticator=auth), + AnalyticTimeOfWeek(authenticator=auth), + AnalyticEpisode(authenticator=auth), + AnalyticDownload(authenticator=auth) + ] diff --git a/source-simplecast-fecther/source_simplecast_fecther/spec.yaml b/source-simplecast-fecther/source_simplecast_fecther/spec.yaml new file mode 100644 index 0000000..0648d8a --- /dev/null +++ b/source-simplecast-fecther/source_simplecast_fecther/spec.yaml @@ -0,0 +1,12 @@ +documentationUrl: https://docsurl.com +connectionSpecification: + $schema: http://json-schema.org/draft-07/schema# + title: Simplecast Fecther Spec + type: object + required: + - api_key + properties: + api_key: + type: string + description: Key to authentify to the API + airbyte_secret: true