Skip to content

Commit 82ab360

Browse files
authored
Merge pull request #31 from kbase/dev_cdm_mcp
upgrade deps and add spark log
2 parents 8e62a5c + da0741c commit 82ab360

File tree

6 files changed

+106
-51
lines changed

6 files changed

+106
-51
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM ghcr.io/kbase/cdm-spark-standalone:pr-34
1+
FROM ghcr.io/kbase/cdm-spark-standalone:pr-36
22

33
# Switch to root to install packages
44
USER root
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
{
3+
"Version": "2012-10-17",
4+
"Statement": [
5+
{
6+
"Effect": "Allow",
7+
"Action": [
8+
"s3:GetBucketLocation",
9+
"s3:ListBucket",
10+
"s3:GetObject",
11+
"s3:PutObject",
12+
"s3:DeleteObject"
13+
],
14+
"Resource": [
15+
"arn:aws:s3:::cdm-spark-job-logs",
16+
"arn:aws:s3:::cdm-spark-job-logs/*"
17+
]
18+
}
19+
]
20+
}
21+

docker-compose.yaml

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
services:
44

55
spark-master:
6-
image: ghcr.io/kbase/cdm-spark-standalone:pr-34
6+
image: ghcr.io/kbase/cdm-spark-standalone:pr-36
77
platform: linux/amd64
88
ports:
99
- "8090:8090"
@@ -27,7 +27,7 @@ services:
2727
- cdm-jupyterhub-network
2828

2929
spark-worker-1:
30-
image: ghcr.io/kbase/cdm-spark-standalone:pr-34
30+
image: ghcr.io/kbase/cdm-spark-standalone:pr-36
3131
platform: linux/amd64
3232
ports:
3333
- "8081:8081"
@@ -51,7 +51,7 @@ services:
5151
- cdm-jupyterhub-network
5252

5353
spark-worker-2:
54-
image: ghcr.io/kbase/cdm-spark-standalone:pr-34
54+
image: ghcr.io/kbase/cdm-spark-standalone:pr-36
5555
platform: linux/amd64
5656
ports:
5757
- "8082:8082"
@@ -103,12 +103,13 @@ services:
103103
volumes:
104104
- ./config/cdm-read-only-policy.json:/config/cdm-read-only-policy.json
105105
- ./config/cdm-read-write-policy.json:/config/cdm-read-write-policy.json
106+
- ./config/cdm-spark-job-logs-policy.json:/config/cdm-spark-job-logs-policy.json
106107
- ./scripts/minio_create_bucket_entrypoint.sh:/scripts/minio_create_bucket_entrypoint.sh
107108
networks:
108109
- cdm-jupyterhub-network
109110

110111
cdm_jupyterhub:
111-
image: ghcr.io/kbase/cdm-jupyterhub:pr-177
112+
image: ghcr.io/kbase/cdm-jupyterhub:pr-193
112113
platform: linux/amd64
113114
ports:
114115
- "4043:4043"
@@ -140,6 +141,11 @@ services:
140141
- USE_KBASE_AUTHENTICATOR=true
141142
- USE_KUBE_SPAWNER=false
142143
- REMOVE_STOPPED_CONTAINER_AND_POD=false
144+
- SPARK_JOB_LOG_DIR=s3a://cdm-spark-job-logs/spark-job-logs
145+
- SPARK_JOB_LOG_DIR_CATEGORY=jupyter-master
146+
- MINIO_URL=http://minio:9002
147+
- MINIO_LOG_USER_ACCESS_KEY=minio-log-access
148+
- MINIO_LOG_USER_SECRET_KEY=minio123
143149
volumes:
144150
- ./cdr/cdm/jupyter/cdm_shared_workspace:/cdm_shared_workspace
145151
- ./cdr/cdm/jupyter/jupyterhub_secrets:/jupyterhub_secrets
@@ -198,6 +204,11 @@ services:
198204
- REDIS_HOST=redis
199205
- REDIS_PORT=6379
200206
- DELTALAKE_WAREHOUSE_DIR=s3a://cdm-lake/warehouse
207+
- SPARK_JOB_LOG_DIR=s3a://cdm-spark-job-logs/spark-job-logs
208+
- SPARK_JOB_LOG_DIR_CATEGORY=mcp-server
209+
- MINIO_URL=http://minio:9002
210+
- MINIO_LOG_USER_ACCESS_KEY=minio-log-access
211+
- MINIO_LOG_USER_SECRET_KEY=minio123
201212
networks:
202213
- cdm-jupyterhub-network
203214

pyproject.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ readme = "README.md"
66
requires-python = "==3.11.9"
77
dependencies = [
88
"fastapi==0.115.12",
9-
"uvicorn==0.34.2",
9+
"uvicorn==0.34.3",
1010
"fastapi-mcp==0.3.4",
11-
"mcp==1.9.1",
11+
"mcp==1.9.2",
1212
"pyspark==3.5.5",
13-
"delta-spark==3.3.1",
14-
"aiohttp==3.11.18",
13+
"delta-spark==3.3.2",
14+
"aiohttp==3.12.8",
1515
"cacheout==0.16.0",
1616
"minio==7.2.15",
1717
"psycopg==3.2.9",
@@ -20,7 +20,7 @@ dependencies = [
2020

2121
[dependency-groups]
2222
dev = [
23-
"ipython>=9.2.0",
24-
"pytest==8.3.5",
23+
"ipython==9.3.0",
24+
"pytest==8.4.0",
2525
"pytest-cov==6.1.1",
2626
]

scripts/minio_create_bucket_entrypoint.sh

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,37 @@ else
99
echo 'bucket cdm-lake already exists'
1010
fi
1111

12+
# make spark job logs bucket
13+
if ! mc ls minio/cdm-spark-job-logs 2>/dev/null; then
14+
mc mb minio/cdm-spark-job-logs && echo 'Bucket cdm-spark-job-logs created'
15+
else
16+
echo 'bucket cdm-spark-job-logs already exists'
17+
fi
18+
19+
# create spark-job-logs directory inside the bucket
20+
# Create a dummy file to ensure the directory exists, then remove it
21+
echo "dummy" | mc pipe minio/cdm-spark-job-logs/spark-job-logs/.dummy
22+
echo 'spark-job-logs directory created in cdm-spark-job-logs bucket'
23+
24+
1225
# create policies
1326
mc admin policy create minio cdm-read-only-policy /config/cdm-read-only-policy.json
1427
mc admin policy create minio cdm-read-write-policy /config/cdm-read-write-policy.json
28+
mc admin policy create minio cdm-spark-job-logs-policy /config/cdm-spark-job-logs-policy.json
1529

1630
# make read only user for user notebook
1731
mc admin user add minio minio-readonly minio123
1832
mc admin policy attach minio cdm-read-only-policy --user=minio-readonly
33+
mc admin policy attach minio cdm-spark-job-logs-policy --user=minio-readonly
1934
echo 'CDM Read-only user and policy set'
2035

2136
# make read/write user
2237
mc admin user add minio minio-readwrite minio123
2338
mc admin policy attach minio cdm-read-write-policy --user=minio-readwrite
24-
echo 'CDM read-write user and policy set'
39+
mc admin policy attach minio cdm-spark-job-logs-policy --user=minio-readwrite
40+
echo 'CDM read-write user and policy set'
41+
42+
# make spark events log access user
43+
mc admin user add minio minio-log-access minio123
44+
mc admin policy attach minio cdm-spark-job-logs-policy --user=minio-log-access
45+
echo 'Spark events log access user and policy set'

0 commit comments

Comments
 (0)