Skip to content

Commit

Permalink
Extension - Create okdp python module for cutsom extensions
Browse files Browse the repository at this point in the history
  • Loading branch information
idirze committed Feb 6, 2024
1 parent 6e213a4 commit d2c62c6
Show file tree
Hide file tree
Showing 29 changed files with 547 additions and 36 deletions.
4 changes: 2 additions & 2 deletions .github/actions/install-patchs-and-extension/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ runs:

- name: Copy patchs 📦
run: |
cp -fr ../python/src/patch/* ./
cp -fr ../python/src/extension ./
cp -fr ../python/okdp/patch/* ./
cp -fr ../python/okdp ./
working-directory: ./docker-stacks
shell: bash

2 changes: 2 additions & 0 deletions .github/workflows/docker-build-test-push-latest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ jobs:
if: inputs.parent-image != ''
run: docker pull ${{ inputs.registry }}/${{ github.repository_owner }}/${{ inputs.parent-image }}
shell: bash

- name: Prepare image build (build args) 📦
run: |
for build_arg in ${{ inputs.build-args }}
Expand Down Expand Up @@ -97,6 +98,7 @@ jobs:
BUILDKIT_PROGRESS: plain
shell: bash

# Run docker-stacks tests (docker-stacks/tests)
- name: Run tests ✅
# Skip tests when running with ACT
if: env.ACT_SKIP_TESTS == ''
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docker-tag-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
shell: bash

- name: Apply tags to the loaded image 🏷
run: python3 -m extension.tagging.apply_tags --short-image-name ${{ inputs.image }} --registry ${{ inputs.registry }} --owner ${{ github.repository_owner }}
run: python3 -m okdp.extension.tagging.apply_tags --short-image-name ${{ inputs.image }} --registry ${{ inputs.registry }} --owner ${{ github.repository_owner }}

- name: Prepare image push 📦
run: |
Expand Down
26 changes: 17 additions & 9 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,13 @@ on:

- ".build/.versions.yml"

- "python/src/patch/**"
- "python/src/extension/**"
- "python/okdp/**"
- "docker-stacks/images/**"
- "docker-stacks/tests/**"
- "docker-stacks/tagging/**"
- "images/**"

- "!python/src/patch/README.md"
- "!python/okdp/patch/README.md"
- "!images/README.md"

push:
Expand All @@ -39,14 +38,13 @@ on:

- ".build/.versions.yml"

- "python/src/patch/**"
- "python/src/extension/**"
- "python/okdp/**"
- "docker-stacks/images/**"
- "docker-stacks/tests/**"
- "docker-stacks/tagging/**"
- "images/**"

- "!python/src/patch/README.md"
- "!python/okdp/patch/README.md"
- "!images/README.md"

workflow_dispatch:
Expand All @@ -57,7 +55,16 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

defaults:
run:
working-directory: ./docker-stacks

jobs:
run-unit-tests:
uses: ./.github/workflows/unit-tests.yml
with:
runs-on: ubuntu-latest

build-version-compatibility-matrix:
runs-on: ubuntu-latest
outputs:
Expand All @@ -80,11 +87,12 @@ jobs:
- name: Build version compatibility matrix 🛠
id: set-matrix
run: |
python python/src/extension/matrix/version_compatibility_matrix.py \
--versions-matrix-path .build/.versions.yml \
--git-branch ${{ steps.branch.outputs.current_branch || steps.branch.outputs.tag}} >> $GITHUB_OUTPUT
python3 -m okdp.extension.matrix.version_compatibility_matrix \
--versions-matrix-path ../.build/.versions.yml \
--git-branch ${{ steps.branch.outputs.current_branch || steps.branch.outputs.tag}} >> $GITHUB_OUTPUT
cat $GITHUB_OUTPUT
shell: bash
needs: [run-unit-tests]

build-base:
name: build-base (python-${{ matrix.python.python_version }})
Expand Down
28 changes: 28 additions & 0 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
on:
workflow_call:
inputs:
runs-on:
description: GitHub Actions Runner image
required: true
type: string

jobs:

unit-tests:
runs-on: ${{ inputs.runs-on }}

steps:
- name: Checkout Repo ⚡️
uses: actions/checkout@v4

- name: Setup dev env patchs 📦
uses: ./.github/actions/install-patchs-and-extension

- name: Create dev environment 📦
uses: ./docker-stacks/.github/actions/create-dev-env

- name: Run unit tests
run: pytest python/tests -v
shell: bash


2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,6 @@ bin/
/nbdist/
/.nb-gradle/

### VS Code ###
.vscode/

### Mac OS ###
.DS_Store
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"python.analysis.extraPaths": [
"./python/okdp",
"./python/tests",
"./docker-stacks"
]
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from itertools import groupby
import itertools
from constants import *
from okdp.extension.matrix.constants import *

def merge_on(elem) -> str:
def group_on(elem) -> str:
return str(elem[PYTHON_VERSION]) + "_".join(str(elem[JAVA_VERSION])) + str(elem[HADOOP_VERSION])

def intersect_dicts(dict1: dict, dict2: dict) -> dict:
Expand Down Expand Up @@ -31,7 +31,7 @@ def merge_dicts(dict1: dict, *args: dict) -> dict:
dict_res[key] = list(set(sum([value , dict1[key]], [])))
return dict_res if len(args) == 1 else merge_dicts(dict_res, *args[1:])

def intersect_versions(groups: list[dict], on_dict: dict) -> list[dict]:
def join_versions(groups: list[dict], on_dict: dict) -> list[dict]:
""" Intersect groups of dicts values with the provided on_dict """
### Intersect the groups with on_dict
result = []
Expand All @@ -40,14 +40,13 @@ def intersect_versions(groups: list[dict], on_dict: dict) -> list[dict]:

return result

def merge_versions(dicts: list[dict]) -> list[dict]:
""" Merge list of dicts by keeping all the values for the keys,
Then group the elements by python_version key
def group_versions_by(dicts: list[dict], group_on) -> list[dict]:
""" Group the spark versions by PYTHON_VERSION/JAVA_VERSION/HADOOP_VERSION
"""
### Group the elements by python_version
python_groups = []
data = sorted(dicts, key=merge_on)
for k, g in groupby(data, merge_on):
data = sorted(dicts, key=group_on)
for k, g in groupby(data, group_on):
python_groups.append(list(g))

### Merge the groups
Expand All @@ -56,7 +55,7 @@ def merge_versions(dicts: list[dict]) -> list[dict]:
result.extend(group)
return result

def filter_versions (dicts: list[dict]) -> list[dict]:
def ignore_invalid_versions (dicts: list[dict]) -> list[dict]:
return list(filter(lambda elem:
elem.get(SPARK_VERSION) and
elem.get(JAVA_VERSION) and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import yaml
import argparse
import logging
from constants import *
from okdp.extension.matrix.constants import *

from utils.matrix_utils import filter_versions, intersect_versions, merge_versions, normalize_matrix, normalize_scala_version, normalize_value, remove_duplicates
from okdp.extension.matrix.utils.matrix_utils import ignore_invalid_versions, join_versions, group_versions_by, normalize_matrix, normalize_scala_version, normalize_value, remove_duplicates
from okdp.extension.matrix.utils.matrix_utils import group_on

LOGGER = logging.getLogger(__name__)

Expand All @@ -23,17 +24,24 @@ def __init__(self, path: str, git_branch: str):
self.git_branch = git_branch.replace("/", "-")

self.__validate__()
self._normalize_values_()


def _normalize_values_(self):
""""Convert simple value to an array
Ex.: python_version: 3.11 => python_version: ['3.11']
"""
self.compatibility_matrix = [dict(map(lambda kv: (kv[0], normalize_value(kv[1])), e.items())) for e in self.compatibility_matrix]
self.build_matrix = dict(map(lambda kv: (kv[0], normalize_value(kv[1])), self.build_matrix.items()))

def __validate__(self):
if not self.compatibility_matrix:
raise ValueError(f"The compatibility-matrix section is mandatory")
if not self.compatibility_matrix:
raise ValueError(f"The compatibility-matrix section is mandatory")

def generate_matrix(self) -> (str, dict):

compatibility_versions_matrix = [dict(map(lambda kv: (kv[0], normalize_value(kv[1])), e.items())) for e in self.compatibility_matrix]
spark_version_matrix = normalize_matrix(filter_versions(intersect_versions(merge_versions(compatibility_versions_matrix), self.build_matrix)))
spark_version_matrix = normalize_matrix(ignore_invalid_versions(join_versions(group_versions_by(compatibility_versions_matrix, group_on=group_on), self.build_matrix)))
spark_version_matrix = normalize_scala_version(self.add_latest_dev_tags(spark_version_matrix))
python_version_matrix = remove_duplicates([{PYTHON_VERSION: e.get(PYTHON_VERSION), PYTHON_DEV_TAG: e.get(PYTHON_DEV_TAG)} for e in spark_version_matrix ])
return (spark_version_matrix, python_version_matrix)
Expand Down Expand Up @@ -73,11 +81,11 @@ def spark_dev_tag(self, e: dict) -> str:
)

args = arg_parser.parse_args()
bm = VersionCompatibilityMatrix(args.versions_matrix_path, args.git_branch)
#bm = VersionCompatibilityMatrix(".build/.versions.yml", "main")
vcm = VersionCompatibilityMatrix(args.versions_matrix_path, args.git_branch)
#vcm = VersionCompatibilityMatrix(".build/.versions.yml", "main")
#with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
# print(f"spark_matrix={json.dumps(bm.generate_matrix())}", file=fh)
(spark_matrix, python_version) = bm.generate_matrix()
# print(f"spark_matrix={json.dumps(vcm.generate_matrix())}", file=fh)
(spark_matrix, python_version) = vcm.generate_matrix()
assert spark_matrix, ("The resulting build matrix was empty. Please, review your configuration '.build/.versions.yml'")
print(f"spark={json.dumps(spark_matrix)}")
print(f"python={json.dumps(python_version)}")
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import plumbum

from tagging.docker_runner import DockerRunner
from extension.tagging.get_taggers_and_manifests import get_taggers_and_manifests
from okdp.extension.tagging.get_taggers_and_manifests import get_taggers_and_manifests

docker = plumbum.local["docker"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from typing import Optional

from extension.tagging.images_hierarchy import ALL_IMAGES
from okdp.extension.tagging.images_hierarchy import ALL_IMAGES
from tagging.manifests import ManifestInterface
from tagging.taggers import TaggerInterface

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from dataclasses import dataclass, field
from typing import Optional
from extension.tagging.taggers import (
from okdp.extension.tagging.taggers import (
JavaMajorVersionTagger,
JavaVersionTagger,
LongTagger,
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file added python/tests/__init__.py
Empty file.
Empty file.
Empty file.
49 changes: 49 additions & 0 deletions python/tests/extension/matrix/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import json
import pytest
from okdp.extension.matrix.version_compatibility_matrix import VersionCompatibilityMatrix # type: ignore

class MockedVersionCompatibilityMatrix(VersionCompatibilityMatrix):
def __init__(self, compatibility_matrix: str, build_matrix: str, git_branch: str):
self.compatibility_matrix = compatibility_matrix
self.build_matrix = build_matrix
self.git_branch = git_branch

def to_dict(str_as_json: str) -> list[dict]:
return json.loads(str_as_json)

@pytest.fixture(scope="module")
def version_compatibility_matrix_data():
return [
{'python_version': ['3.9'],
'spark_version': ['3.2.1', '3.2.2', '3.2.3', '3.2.4'],
'java_version': ['11'],
'scala_version': ['2.12', '2.13'],
'hadoop_version': ['3.2'],
'spark_download_url': ['https://archive.apache.org/dist/spark/']
},
{'python_version': ['3.10'],
'spark_version': ['3.3.1', '3.3.2', '3.3.3', '3.3.4'],
'java_version': ['17'],
'scala_version': ['2.12', '2.13'],
'hadoop_version': ['3'],
'spark_download_url': ['https://archive.apache.org/dist/spark/']
},
{'python_version': ['3.11'],
'spark_version': ['3.4.1', '3.4.2'],
'java_version': ['17'],
'scala_version': ['2.12', '2.13'],
'hadoop_version': ['3'],
'spark_download_url': ['https://archive.apache.org/dist/spark/']
},
{'python_version': ['3.11'],
'spark_version': ['3.5.0'],
'java_version': ['17', '21'],
'scala_version': ['2.12', '2.13'],
'hadoop_version': ['3'],
'spark_download_url': ['https://archive.apache.org/dist/spark/']
}
]




Loading

0 comments on commit d2c62c6

Please sign in to comment.