diff --git a/.github/workflows/example.yml b/.github/workflows/example.yml new file mode 100644 index 0000000..cc23cc3 --- /dev/null +++ b/.github/workflows/example.yml @@ -0,0 +1,72 @@ +name: Example syndicate flow +on: + push: + # Watch these branches... + branches: + - master + # ...for changes to these files. + paths: + - 'pages/**/*.mdx?' + +jobs: + syndicate: + runs-on: ubuntu-latest + name: Syndicate posts + env: + # This is provided to all actions by Github, and needed to access the posts + # in your repository. + # @see https://help.github.com/en/actions/automating-your-workflow-with-github-actions/authenticating-with-the-github_token + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # Tell me the path (relative to the project root) where your content lives + # so I can find them in the commit that triggered this flow. + # Defaults to 'posts' if not specified. + SYNDICATE_POST_DIR: 'pages/posts' + + steps: + # This step will syndicate your content to DEV.to, if supported, but will + # NOT commit the platform-specific content IDs back to your repo. This is + # useful for bundling the results of multiple actions into a single commit. + # + # However, note that future syndications to DEV.to will be unaware of the + # drafts already created by this step and will result in duplicate drafts. + # + # To avoid this, include a future step which does not specify any silos, but + # sets the `mark_as_syndicated` flag to true. This will commit the syndicate + # IDs that have not already been added during this job. + - name: Push to DEV.to + uses: dabrady/syndicate@develop + with: + silos: DEV + env: + DEV_API_KEY: ${{ secrets.DEV_API_KEY }} + + # This step will syndicate your content to Medium and Planet Mars, if + # supported, and will commit any new platform-specific content IDs to the + # appropriate files. + # + # Note that this will NOT commit IDs generated by previous actions, just + # the ones from this action. + - name: Push to Medium and Planet Mars and commit new IDs + uses: dabrady/syndicate@develop + with: + # Use a multiline YAML string to specify multiple silos. + silos: | + Medium + Planet_Mars + mark_as_syndicated: true + env: + MEDIUM_API_KEY: ${{ secrets.MEDIUM_API_KEY }} + PLANET_MARS_API_KEY: ${{ secrets.MARS_API_KEY }} + + # This step will not syndicate any content to any silos, but instead will + # commit any new platform-specific content IDs generated by previous steps + # in this job to the appropriate files, if they haven't already been added. + # + # It's a nice way of bundling the results of multiple steps into a single + # commit, or just cleaning up at the end of a job and you didn't miss + # anything. + - name: Commit IDs of newly syndicated posts if necessary + uses: dabrady/syndicate@develop + with: + mark_as_syndicated: true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b6e4761 --- /dev/null +++ b/.gitignore @@ -0,0 +1,129 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..16d4cc7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3-alpine + +WORKDIR /action + +# Copy action code +COPY requirements.txt entrypoint.py ./ +COPY syndicate/ ./syndicate/ + +# Install action requirements +RUN pip install --no-cache-dir -r ./requirements.txt + +# Hardcoding WORKDIR into ENTRYPOINT. +# Can't use environment variables in "exec" form of ENTRYPOINT, but "exec" form +# is recommended. +ENTRYPOINT [ "/action/entrypoint.py" ] \ No newline at end of file diff --git a/README.md b/README.md index 1123750..9a0d9e0 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,183 @@ # syndicate -a simple implementation of the [P.O.S.S.E.](https://indieweb.org/POSSE) content publishing model + +A simple implementation of the [P.O.S.S.E.](https://indieweb.org/POSSE) content publishing model. + +Write your content, store it on Github, and use this action in a workflow to draft it to silo platforms like [DEV.to](https://dev.to). The action will keep the silos up to date with your latest changes here on Github. + +Wherever possible, when content is syndicated to a silo for the first time, it is created in an unpublished/"draft" form. Any exceptions to this will be called out in the documentation for [`silos`](#silos) below. + +## Example usage + +See [the example workflow](https://github.com/dabrady/syndicate/blob/develop/.github/workflows/example.yml) for a fully annotated example, but here's the quick version: + +```yaml +uses: dabrady/syndicate@v1.0 +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DEV_API_KEY: ${{ secrets.DEV_API_KEY }} + SYNDICATE_POSTS_DIR: pages/posts +with: + silos: DEV + mark_as_syndicated: true +``` + +## Be aware: Github is the source of truth +Syndication is a one-way street: changes made to your content on Github will be copied to your silos, but changes made to a copy of the content on a particular silo will not be synced to your Github repository. + +**Github is the source of truth**: any changes made on specific platforms will be overwritten with whatever is in Github the next time this action processes a change to that content. + +This can have undesirable effects. Not all platforms support the same writing systems, and you might often find yourself needing to tweak your content on a particular silo before you publish it; but if you then make an update to it on Github, those silo-specific tweaks will be wiped away and you'll have to do it again. + +For this reason, by default this action treats your content as immutable, and creates a new draft in the specified silos for every commit you make to a particular file. This prevents overwriting existing published content with content that is unsuitable for that platform. + +This comes with its own set of drawbacks and annoyances, however, so it is possible to simply manifest new content as new drafts, and push updates to existing content directly to their existing syndicated counterparts. + +## Inputs + +### `silos` + +_Default: `none`_ + +A YAML list of platforms to syndicate your content to. Silo names are case insensitive but should be snake_cased if they contain spaces. +E.g. + +```yaml +with: + silos: | + DEV + Medium + CNN + BBC +``` + +If a given silo is unsupported, it will be ignored and called out in the action log. + +The current supported silos are: +- `DEV` (https://dev.to) + +### `mark_as_syndicated` + +_Default: `false`_ + +A flag used to trigger a commit upstream which adds the silo-specific IDs to the YAML frontmatter of your syndicated content. This ensures that any subsequent changes you make to your posts will trigger an update to the syndicated copy, instead of triggering the creation of a new draft on your silos. + +For instance, if the commit that triggered this workflow added a new post called `pages/posts/i-got-a-new-cat.md`, a step in your workflow configured like this: + +```yaml +steps: +- name: Push to DEV.to and sync IDs + uses: dabrady/syndicate@v1.0 + with: + silos: DEV + mark_as_syndicated: true +``` + +will create a new draft on DEV.to with a copy of `pages/posts/i-got-a-new-cat.md` and result in a commit to the upstream head of the branch that triggered this workflow that looks like this: + +```diff +diff --git a/pages/posts/i-got-a-new-cat.md b/pages/posts/i-got-a-new-cat.md +index e94caa8..cc23cc3 100644 +--- a/pages/posts/i-got-a-new-cat.md ++++ b/pages/posts/i-got-a-new-cat.md +@@ -2,3 +2,4 @@ on: +--- ++dev_silo_id: 5316572 +title: I got a new cat! +--- +``` + +Providing no silos, but asking to mark new posts as syndicated, will ensure any posts added to a silo **by previous steps** are properly marked before the job completes. Think of it like a save point: this approach to using the flag allows you to bundle silo syndication into as many or as few commits as you wish: + +```yaml +steps: +... +- name: Save unsaved silo IDs to Github + uses: dabrady/syndicate@v1.0 + with: + mark_as_syndicated: true +``` + +### Environment variables + +#### Required + +##### `GITHUB_TOKEN` + +In order to syndicate your content, this action needs access to your content. + +A unique `GITHUB_TOKEN` secret is created by Github for every workflow run for use by actions to access the repository, and needs to be added to the environment of this action in your workflow setup. E.g. +```yaml +steps: +- name: Push to DEV.to and sync IDs + uses: dabrady/syndicate@v1.0 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + silos: DEV + mark_as_syndicated: true +``` + +See the [Github Workflow documentation](https://help.github.com/en/actions/automating-your-workflow-with-github-actions/authenticating-with-the-github_token) for full details. + +##### `_API_KEY` + +It is assumed that this action will need to interact with every supported silo via a public API, and that that API authenticates via a personal API key. + +Thus, this action will ignore any silos specified unless a corresponding API key is exposed in its environment. The keys are expected to be found in environment variables matching the following format: + +``` +_API_KEY +``` + +where `` is a SCREAMING\_SNAKE\_CASE version of a recognized argument for the `silos` action input. For example, the API key for the `DEV` silo should be exposed as the `DEV_API_KEY` environment variable. + +For details on how to expose these secrets to the action without exposing them to the world, see the [Github documentation on working with secrets](https://help.github.com/en/actions/automating-your-workflow-with-github-actions/creating-and-using-encrypted-secrets). + +#### Optional + +##### `SYNDICATE_POST_DIR` + +_Default: `posts`_ + +Natrually, not all commits to your repo may contain a change to the content you want to syndicate. + +The simplistic approach currently implemented for identifying the proper files is to look for file paths with a given prefix/in a particular directory of your repo. Set this environment variable to the place in your repo (relative to the root) where you keep the stuff you want to share elsewhere. + +(The choice to use an environment variable for this instead of an input is so that you can set it once in your workflow and not have to specify it on every use of this action, should you choose to use it multiple times in a given workflow.) + +## Outputs + +### `time` + +A timestamp marking the end of the action. + +### `syndicated_posts` + +A JSON-formatted string of posts that were added/modified on each of the `silos` specified, including the unique identifiers given to them by the silo and the public URL of the added/modified post. +E.g. +```json +{ + "DEV": { + "added": { + "pages/posts/i-got-a-new-cat.md": [ 201054451, "https://dev.to/daniel13rady/i-got-a-new-cat-aej2-temp-slug-0246" ] + }, + "modified": {} + }, + "Medium": { ... }, + ... +} +``` + +#### Environment variables + +##### `SYNDICATE_SHA` +:warning: Internal, do not set this yourself. + +Using the `mark_as_syndicated` flag will cause a commit to be generated and pushed to the upstream of the branch that triggered the workflow. The generated commit SHA is stored in this variable for use as the parent of any commits generated by later steps and considered to be the 'head' of the branch when present. + +##### `SYNDICATE_POSTS` +:warning: Internal, do not set this yourself. + +**NOTE** The word is 'syndicate', not ~~'syndicate**d**'~~. It is a prefix used by convention on all environment variables set by this action. + +A JSON string formatted identically to the `syndicated_posts` action output, but containing the composite results of all invocations of this action so far in the running workflow. diff --git a/action.yml b/action.yml new file mode 100644 index 0000000..79a8f11 --- /dev/null +++ b/action.yml @@ -0,0 +1,25 @@ +name: 'syndicate' +description: 'Publish your content elsewhere (P.O.S.S.E)' +branding: + icon: rss + color: red + +runs: + using: 'docker' + image: 'Dockerfile' + +inputs: + silos: + description: 'A list of platforms to update with your new/changed content' + required: false + default: '' + mark_as_syndicated: + description: 'Commit silo IDs to the frontmatter of newly syndicated posts for synchronization purposes' + required: false + default: false + +outputs: + time: + description: 'The time this action finished' + syndicated_posts: + description: 'A JSON object mapping silos to the posts that were added or modified on those platforms' diff --git a/entrypoint.py b/entrypoint.py new file mode 100755 index 0000000..40be513 --- /dev/null +++ b/entrypoint.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +from datetime import datetime +import json +import os +import sys +import syndicate +from syndicate.utils import action_log, action_setoutput, job_getoutput, job_addoutput, get_posts, fronted, mark_syndicated_posts + +action_inputs = { + 'silos': os.getenv('INPUT_SILOS').splitlines(), + 'mark_as_syndicated': json.loads(os.getenv('INPUT_MARK_AS_SYNDICATED')) +} + +posts = get_posts() +if not posts: + action_log("No posts added or updated, nothing to do.") + action_setoutput("time", datetime.now()) + sys.exit() + +# Do the thing. +# Result set format: +# { +# '': { +# 'added': { +# 'path/to/new_post': ( , ), +# ... +# }, +# 'modified': { +# 'path/to/updated_post': ( , ), +# ... +# }, +# }, +# ... +# } +syndicated_posts = syndicate.elsewhere(posts, action_inputs['silos']) or {} +action_setoutput('syndicated_posts', syndicated_posts) +# Merge output with output of any previous runs +job_addoutput(syndicated_posts) + +if action_inputs['mark_as_syndicated']: + action_log("Marking newly syndicated posts...") + ## NOTE + # If silos were provided, commit only the results of this step. In the case + # where no silos were provided, commit all job results so far. + # + # This allows us to bundle syndications into as few or many commits as we + # want in our workflows. + ## + if not action_inputs['silos']: + syndicated_posts = job_getoutput() + + # Just focus on the added ones. + indexed_paths_by_silo = { + silo: results['added'] + for silo, results in syndicated_posts.items() + if results and 'added' in results + } + + if not indexed_paths_by_silo or not any(indexed_paths_by_silo.values()): + action_log("Nothing new to mark.") + sys.exit() + + # { + # 'path/to/post': { + # '': 42, + # '': 'abc123', + # ... + # }, + # ... + # } + silo_ids_by_path = {} + for silo, indexed_paths in indexed_paths_by_silo.items(): + for path, ( sid, _ ) in indexed_paths.items(): + silo_ids_by_path.setdefault(path, {}) + silo_ids_by_path[path][silo] = sid + + mark_syndicated_posts( + silo_ids_by_path, + {post.path:fronted(post) for post in posts} + ) + +action_setoutput("time", datetime.now()) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..50baab3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +requests +PyGithub +python-frontmatter diff --git a/syndicate/__init__.py b/syndicate/__init__.py new file mode 100755 index 0000000..bb8df01 --- /dev/null +++ b/syndicate/__init__.py @@ -0,0 +1,94 @@ +from syndicate.utils import action_log, action_warn + +import functools +import importlib.util +import os +import sys + +def elsewhere(posts, silos): + """ + Syndicates the given posts to the given silos and returns a dictionary of + the results keyed by the silo that generated them. + + If a silo has no defined adapter, it is ignored. + If a silo has no defined API key, it is ignored. + + Result dictionary is formatted like so: + + { + : { + 'added': { + : , + ... + }, + 'modified': { + : , + ... + } + }, + ... + } + + Since not all silos may be in sync, the 'added' posts of one silo may be + merely 'modified' by another, and vice versa. + + Where possible, silo adapters should only create posts in a 'draft' or + unpublished status, to allow time for review and any platform-specific + changes to be made by the author. + """ + if not posts: + action_log("No posts to syndicate, nothing to syndicate.") + return None + if not silos: + action_log('No silos specified, nowhere to syndicate.') + return None + + silos = list(set(silos)) # de-dupe the given list of silos + action_log(f"You want to publish to these places: {silos}") + + specs = {silo:_locate(silo) for silo in silos if _locate(silo)} + if list(specs.keys()) != silos: + action_warn(f"I don't know how to publish to these places: { [silo for silo in silos if silo not in specs] }") + + api_keys = {silo:_get_api_key(silo) for silo in silos if _get_api_key(silo)} + if list(api_keys.keys()) != silos: + action_warn(f"I don't have API keys for these places: { [silo for silo in silos if silo not in api_keys] }") + + action_log("I'll do what I can.") + results = { + silo:_syndicate(spec, api_keys[silo], posts) + for silo, spec in specs.items() + if silo in api_keys + } + if results: + return results + else: + action_warn("Sorry, can't do anything with that!") + return None + +### privates ### + +@functools.lru_cache(maxsize=10) +def _locate(silo): + """Locates the given silo adapter and returns its Python module name if found.""" + if not silo: + raise ValueError('missing silo') + return getattr(importlib.util.find_spec(f'syndicate.silos.{silo.lower()}'), 'name', None) + +def _syndicate(silo_spec, api_key, posts): + """Loads and invokes the entrypoint of the given silo adaptor, returning the results.""" + if not silo_spec: + raise ValueError('missing silo spec') + if not api_key: + raise ValueError('missing API key') + return importlib.import_module(silo_spec).syndicate(posts, api_key) + +def _get_api_key(silo): + """Returns the API key for the given silo, as defined in the environment.""" + if not silo: + raise ValueError('missing silo') + return os.getenv(_api_key_for(silo)) + +def _api_key_for(silo): + """Returns the name of the environment variable expected to contain an API key for the given silo.""" + return f"{silo.upper()}_API_KEY" diff --git a/syndicate/silos/__init__.py b/syndicate/silos/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/syndicate/silos/dev.py b/syndicate/silos/dev.py new file mode 100644 index 0000000..6f37bd2 --- /dev/null +++ b/syndicate/silos/dev.py @@ -0,0 +1,93 @@ +from syndicate.utils import action_log_group, action_log, action_error, fronted, silo_id_for +import requests +import pprint + +SILO_NAME = 'DEV' +@action_log_group(SILO_NAME) +def syndicate(posts, api_key): + """ + Syndicates the given posts to https://dev.to, updating the ones that + already exist there and creating articles for the ones that don't. + + By default, articles are created in a "draft"/unpublished state, but this + can be overridden by individual posts by specifying `published: true` in + their frontmatter, if you prefer a "just do it" approach. + + This uses the DEV API, which is currently in beta: https://docs.dev.to/api + + The required API key can be generated for your account by following the steps + outlined here: https://docs.dev.to/api/#section/Authentication + """ + + action_log(f"Hello? Yes, this is {SILO_NAME}.") + results = { + 'added': {post.path:_create(post, api_key) for post in posts if not silo_id_for(post, SILO_NAME)}, + 'modified': {post.path:_update(post, api_key) for post in posts if silo_id_for(post, SILO_NAME)} + } + action_log("The results are in:") + action_log(pprint.pformat(results)) + return results + +### privates ### + +def _create(post, api_key=None): + """ + Creates a new article for the given post on DEV.to and returns the silo ID + and URL of the newly created article. + + This tries to create an **unpublished** draft. However, the 'published' + status can be overridden in the frontmatter of the post itself for a + "just do it" approach. + + @see https://docs.dev.to/api/#operation/createArticle + """ + if not api_key: + raise ValueError("missing API key") + if not post: + raise ValueError("missing post") + if not fronted(post).get('title'): + raise ValueError("article is missing a title") + + payload = { + 'article': { + # NOTE This can be overridden by explicitly setting 'published' in + # the frontmatter. + 'published': False, + 'body_markdown': post.decoded_content.decode('utf-8') + } + } + endpoint = "https://dev.to/api/articles" + headers = {'api-key': api_key} + response = requests.post(endpoint, headers=headers, json=payload) + + if response.status_code != requests.codes.created: + action_error(f"Failed to create draft for '{post.name}': {response.json()}") + return None + else: + results = response.json() + return (results['id'], results['url']) + +def _update(post, api_key=None): + """ + Updates an article corresponding to the given post on DEV.to and returns the + silo ID and URL of the updated arcticle. + + If a corresponding article does not exist, this will fail. + + @see https://docs.dev.to/api/#operation/updateArticle + """ + if not api_key: + raise ValueError("missing API key") + if not post: + raise ValueError("missing post") + + endpoint = f'https://dev.to/api/articles/{silo_id_for(post, SILO_NAME)}' + headers = {'api-key': api_key} + payload = {'article': { 'body_markdown': post.decoded_content.decode('utf-8') } } + response = requests.put(endpoint, headers=headers, json=payload) + if response.status_code != requests.codes.ok: + action_error(f"Failed to update post '{post.name}': {response.json()}") + return None + else: + results = response.json() + return (results['id'], results['url']) diff --git a/syndicate/utils.py b/syndicate/utils.py new file mode 100644 index 0000000..06b4d5c --- /dev/null +++ b/syndicate/utils.py @@ -0,0 +1,260 @@ +import frontmatter +import functools +from github import Github, InputGitTreeElement +import json +import os +import requests + +def action_log(msg): + """(SIDE-EFFECT) Prints `msg` to the Github workflow log.""" + print(msg) + +def action_debug(msg): + """(SIDE-EFFECT) Prints `msg` to the Github workflow debug log.""" + print(f"::debug::{msg}") + +def action_warn(msg): + """(SIDE-EFFECT) Prints `msg` to the Github workflow warning log.""" + print(f"::warning::{msg}") + +def action_error(msg): + """(SIDE-EFFECT) Prints `msg` to the Github workflow error log.""" + print(f"::error::{msg}") + +def action_log_group(title): + """ + Decorates a function such that all its generated log statements are grouped + in the Github workflow log under `title`. + """ + + def _decorator(func): + @functools.wraps(func) + def _wrapper(*args, **kwargs): + print(f"::group::{title}") + result = func(*args, **kwargs) + print("::endgroup::") + return result + return _wrapper + return _decorator + +def action_setenv(key, value): + """ + (SIDE-EFFECT) Sets an environment variable of the running Github workflow job. + """ + print(f"::set-env name={key}::{value}") + +def action_setoutput(key, value): + """(SIDE-EFFECT) Sets an output variable of the running Github workflow step.""" + print(f"::set-output name={key}::{value}") + +def job_addoutput(results): + """ + (SIDE-EFFECT) Persist `results` for future steps in the running Github + workflow job. + """ + syndicated_posts = job_getoutput() + syndicated_posts.update(results) + action_setenv('SYNDICATE_POSTS', json.dumps(syndicated_posts)) + +def job_getoutput(): + """Returns the persisted results of the running Github workflow job.""" + # Default to an empty dictionary if no results have yet been persisted. + return json.loads(os.getenv('SYNDICATE_POSTS', '{}')) + +# Memoize authentication and repo fetching. +@functools.lru_cache(maxsize=1) +def repo(): + """ + (MEMOIZED) Returns an authenticated reference to a repository object for the + repository this Github action is running in. + @see https://pygithub.readthedocs.io/en/latest/github_objects/Repository.html#github.Repository.Repository + """ + if not os.getenv("GITHUB_TOKEN"): + raise ValueError("missing GITHUB_TOKEN") + if not os.getenv("GITHUB_REPOSITORY"): + raise ValueError("missing GITHUB_REPOSITORY") + + gh = Github(os.getenv("GITHUB_TOKEN")) + return gh.get_repo(os.getenv("GITHUB_REPOSITORY")) + +def parent_sha(): + """ + Returns the git SHA to use as parent for any commits generated by this + Github workflow step. + """ + if not os.getenv("GITHUB_SHA"): + raise ValueError("missing GITHUB_SHA") + return os.getenv('SYNDICATE_SHA', os.getenv("GITHUB_SHA")) + +def get_trigger_payload(): + """ + Returns a list of lightweight File objects describing each of the modified + files in the commit that triggered this Github workflow. + @see https://pygithub.readthedocs.io/en/latest/github_objects/File.html#github.File.File + """ + if not os.getenv("GITHUB_SHA"): + raise ValueError("missing GITHUB_SHA") + # NOTE + # Explicitly using GITHUB_SHA to ensure we always have access to the changed + # files even if other steps generate commits. + return repo().get_commit(os.getenv("GITHUB_SHA")).files + +def file_contents(filepath): + """ + Returns a `ContentFile` object of the matching the given path in latest known + commit to this repo. + @see https://pygithub.readthedocs.io/en/latest/github_objects/ContentFile.html#github.ContentFile.ContentFile + @see :func:`~syndicate.utils.parent_sha` + """ + # NOTE + # Using the latest known commit to ensure we capture any modifications made + # to the post frontmatter by previous actions. + return repo().get_contents(filepath, ref=parent_sha()) + +def get_posts(post_dir=os.getenv('SYNDICATE_POST_DIR', 'posts')): + """ + Returns the latest known :func:`~syndicate.utils.file_contents` of the files + added and modified in the commit that triggered this Github workflow. + """ + files = get_trigger_payload() + if not files: + raise ValueError("target commit was empty") + + posts = [file for file in files if file.filename.startswith(post_dir)] + return [ + file_contents(post.filename) + for post in posts + if post.status != 'deleted' # ignore deleted files + ] + +def fronted(post): + """ + Returns the :py:class:`frontmatter.Post` representation of the given + :func:`~syndicate.utils.file_contents` object. + + If `post` is actually already a `frontmatter.Post`, this is a no-op. + """ + if not post: + raise ValueError("missing post") + if isinstance(post, frontmatter.Post): + return post + raw_contents = post.decoded_content.decode('utf-8') + return frontmatter.loads(raw_contents) + +def silo_key_for(silo): + """Returns a formatted string used to identify a silo ID in post frontmatter.""" + return f'{silo.lower()}_silo_id' + +def silo_id_for(post, silo): + """ + Retrieves the ID appropriate for `silo` from the frontmatter of the given + `post`; returns None if no relevant ID exists. + """ + if not post: + raise ValueError("missing post") + if not silo: + raise ValueError("missing silo") + return fronted(post).get(silo_key_for(silo)) + +def mark_syndicated_posts(silo_ids_by_path, fronted_posts_by_path): + """ + Injects the given silo IDs for the given posts into their frontmatter + and commits the updated posts back to this repo. + + If a silo ID already exists in a given post, that's fine: we assume IDs don't + change, and so we don't try to change them. + + Returns a dictionary which is the response of the commit request. + """ + if not silo_ids_by_path: + raise ValueError("missing silo IDs") + if not fronted_posts_by_path: + raise ValueError("missing fronted posts") + + updated_fronted_posts_by_path = {} + silos_included = set() + for path, silo_ids_by_silo in silo_ids_by_path.items(): + fronted_post = fronted_posts_by_path[path] + + # Format: + # { + # 'dev_silo_id': 42, + # 'medium_silo_id': 'abc123', + # ... + # } + new_silo_ids = {} + for silo, sid in silo_ids_by_silo.items(): + # Ignore already posts marked with this silo + if not silo_id_for(fronted_post, silo): + new_silo_ids[silo_key_for(silo)] = sid + silos_included.add(silo) + + # Only add to commit if there're any new IDs to add. + if not new_silo_ids: + continue + + # Create new fronted post with old frontmatter merged with silo IDs. + updated_post = frontmatter.Post(**dict(fronted_post.to_dict(), **new_silo_ids)) + updated_fronted_posts_by_path[path] = updated_post + return commit_updated_posts(updated_fronted_posts_by_path, silos_included) + +def commit_updated_posts(fronted_posts_by_path, silos): + """ + Returns the response of committing the (presumably changed) given posts to + the remote GITHUB_REF of this repo by following the recipe outlined here: + + https://developer.github.com/v3/git/ + + 1. Get the current commit object + 2. Retrieve the tree it points to + 3. Retrieve the content of the blob object that tree has for that + particular file path + 4. Change the content somehow and post a new blob object with that new + content, getting a blob SHA back + 5. Post a new tree object with that file path pointer replaced with your + new blob SHA getting a tree SHA back + 6. Create a new commit object with the current commit SHA as the parent + and the new tree SHA, getting a commit SHA back + 7. Update the reference of your branch to point to the new commit SHA + """ + if not fronted_posts_by_path: + action_log("All good: already marked.") + return None + if not os.getenv("GITHUB_TOKEN"): + raise ValueError("missing GITHUB_TOKEN") + if not os.getenv("GITHUB_REPOSITORY"): + raise ValueError("missing GITHUB_REPOSITORY") + if not os.getenv("GITHUB_REF"): + raise ValueError("missing GITHUB_REF") + + parent = parent_sha() + # Create a new tree with our updated blobs. + new_tree = repo().create_git_tree( + [ + InputGitTreeElement( + path, + mode='100644', # 'file', @see https://developer.github.com/v3/git/trees/#tree-object + type='blob', + content=frontmatter.dumps(fronted_post) + ) + for path, fronted_post in fronted_posts_by_path.items() + ], + base_tree=repo().get_git_tree(parent) + ) + + # Commit the new tree. + new_commit = repo().create_git_commit( + f'(syndicate): adding IDs for {silos}', + new_tree, + [repo().get_git_commit(parent)] + ) + # Poosh it. + ref_name = os.getenv('GITHUB_REF').lstrip('refs/') + try: + repo().get_git_ref(ref_name).edit(new_commit.sha) + except github.GithubException as err: + action_error(f"Failed to mark syndicated posts: {err}") + return None + ## NOTE Need to update the reference SHA for future workflow steps. + action_setenv('SYNDICATE_SHA', new_commit.sha) + action_log("Syndicate posts marked.") diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/mocks.py b/tests/mocks.py new file mode 100644 index 0000000..136e47b --- /dev/null +++ b/tests/mocks.py @@ -0,0 +1,20 @@ +import frontmatter +import textwrap + +class MockPost: + """ + A light-weight mock of a post object. + @see https://pygithub.readthedocs.io/en/latest/github_objects/ContentFile.html#github.ContentFile.ContentFile + """ + def __init__(self): + self.raw_contents = textwrap.dedent( + """ + --- + dev_silo_id: 42 + title: A beautiful mock + tags: beauty, fake + --- + What is a body? + """).strip() + self.decoded_content = self.raw_contents.encode('utf-8') + self.name = 'a-beautiful-mock.md' diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000..4dfbdaa --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,2 @@ +pytest +requests-mock diff --git a/tests/test_dev.py b/tests/test_dev.py new file mode 100644 index 0000000..e577761 --- /dev/null +++ b/tests/test_dev.py @@ -0,0 +1,57 @@ +from syndicate.utils import silo_id_for +from syndicate.silos import dev +from .mocks import MockPost +import pytest +import requests +import requests_mock + +def test_create_error_when_api_key_missing(): + with pytest.raises(ValueError): + dev._create(MockPost()) + +def test_create_error_when_post_missing(): + with pytest.raises(ValueError): + dev._create(None) + +def test_create_returns_nothing_when_request_fails(requests_mock, monkeypatch): + monkeypatch.setenv('GITHUB_REPOSITORY', 'herp/derp') + requests_mock.post( + "https://dev.to/api/articles", + status_code=requests.codes.unprocessable_entity, + json={"error": "you made a unintelligble request"}) + assert not dev._create(MockPost(), api_key='fake_api_key') + +def test_create_returns_something_on_success(requests_mock, monkeypatch): + monkeypatch.setenv('GITHUB_REPOSITORY', 'herp/derp') + requests_mock.post( + "https://dev.to/api/articles", + status_code=requests.codes.created, + json={ 'type_of': 'article', 'id': 42, 'url': 'https://fake.url/for-this-post' }) + assert dev._create(MockPost(), api_key='fake_api_key') + +def test_update_error_when_api_key_missing(): + with pytest.raises(ValueError): + dev._update(MockPost()) + +def test_update_error_when_post_missing(): + with pytest.raises(ValueError): + dev._update(None) + +def test_update_returns_nothing_when_request_fails(requests_mock, monkeypatch): + monkeypatch.setenv('GITHUB_REPOSITORY', 'herp/derp') + mock = MockPost() + requests_mock.put( + f"https://dev.to/api/articles/{silo_id_for(mock, dev.SILO_NAME)}", + status_code=requests.codes.unprocessable_entity, + json={"error": "you made an unintelligble request"}) + assert not dev._update(mock, api_key='fake_api_key') + +def test_update_returns_something_on_success(requests_mock, monkeypatch): + monkeypatch.setenv('GITHUB_REPOSITORY', 'herp/derp') + mock = MockPost() + mock_id= silo_id_for(mock, dev.SILO_NAME) + requests_mock.put( + f"https://dev.to/api/articles/{mock_id}", + status_code=requests.codes.ok, + json={'type_of': 'article', 'id': mock_id, 'url': 'https://fake.url/for-this-post'}) + assert dev._update(mock, api_key='fake_api_key') diff --git a/tests/test_syndicate.py b/tests/test_syndicate.py new file mode 100644 index 0000000..c62bf78 --- /dev/null +++ b/tests/test_syndicate.py @@ -0,0 +1,45 @@ +import importlib.util +import pytest +import syndicate + +@pytest.fixture(autouse=True) +def clear_silo_cache(): + """Needed to ensure our monkeypatching doesn't get cached between tests.""" + yield + syndicate._locate.cache_clear() + +def test_elsewhere_returns_none_when_given_no_posts(): + assert not syndicate.elsewhere([], ['Fake_Silo']) + +def test_elsewhere_returns_none_when_given_no_silos(): + assert not syndicate.elsewhere(['a post'], []) + +def test_elsewhere_returns_none_when_no_api_keys_exist_for_given_silos(monkeypatch): + fake_silo = 'Fake_Silo' + # Ensure we cannot use the fake silo adapter. + monkeypatch.delenv(syndicate._api_key_for(fake_silo), raising=False) + assert not syndicate.elsewhere(['a post'], [fake_silo]) + +def test_elsewhere_returns_none_when_no_adapter_exists_for_given_silos(monkeypatch): + fake_silo = 'Fake_Silo' + # Ensure we cannot find the fake silo adapter. + monkeypatch.setattr(importlib.util, 'find_spec', lambda s: None) + # Ensure we can use the fake silo adapter. + monkeypatch.setenv(syndicate._api_key_for(fake_silo), 'fake API key') + assert not syndicate.elsewhere(['a post'], [fake_silo]) + +def test_elsewhere_returns_syndication_results_for_recognized_silos_when_given_api_keys(monkeypatch): + class MockSpec: + def __init__(self): + self.name = 'mock_spec' + class MockSilo: + def syndicate(posts, api_key): + return 'mock results' + fake_silo = 'Fake_Silo' + # Ensure we can find the fake silo adapter. + monkeypatch.setattr(importlib.util, 'find_spec', lambda s: MockSpec()) + # Ensure we can load the fake silo adapter. + monkeypatch.setattr(importlib, 'import_module', lambda s: MockSilo) + # Ensure we can use the fake silo adapter. + monkeypatch.setenv(syndicate._api_key_for(fake_silo), 'fake API key') + assert syndicate.elsewhere(['a post'], [fake_silo])