Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ env/
*.tmp
*.bak
*.swp
.tox

# Build artifacts
dist/
Expand Down
62 changes: 62 additions & 0 deletions .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
version = "3.8.3"

runner.dialect = scala212

# Maximum column width
maxColumn = 100

# Indentation
indent.main = 2
indent.significant = 2
indent.callSite = 2
indent.ctorSite = 2
indent.defnSite = 2

# Alignment
align.preset = most
align.openParenCallSite = false
align.openParenDefnSite = false
align.tokens = [
{code = "=>", owner = "Case"},
{code = "%", owner = "Term.ApplyInfix"},
{code = "%%", owner = "Term.ApplyInfix"}
]

# Newlines
newlines.beforeCurlyLambdaParams = multilineWithCaseOnly
newlines.afterCurlyLambdaParams = squash
newlines.implicitParamListModifierPrefer = before
newlines.avoidForSimpleOverflow = [punct, slc, tooLong]

# Rewrite rules
rewrite.rules = [
RedundantBraces,
RedundantParens,
SortModifiers,
PreferCurlyFors
]
rewrite.redundantBraces.stringInterpolation = true

# Docstrings
docstrings.style = Asterisk
docstrings.wrap = no

# Imports
rewrite.imports.sort = scalastyle
rewrite.imports.groups = [
["java\\..*"],
["scala\\..*"],
["org\\.apache\\.spark\\..*"],
["org\\.scalanlp\\..*"],
["robustinfer\\..*"]
]

# Formatting for comments and spaces
spaces.inImportCurlyBraces = false
includeNoParensInSelectChains = false
optIn.breakChainOnFirstMethodDot = true

# Vertical multiline
verticalMultiline.atDefnSite = true
verticalMultiline.arityThreshold = 4

39 changes: 30 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,39 @@ RUN echo "Testing Java setup..." && \
"$JAVA_HOME/bin/java" -version

# ───────────────────────────────────────────────────────────────────────
# Step 2: Install Jupyter (Python) + Python dependencies
# Step 2: Install UV and Jupyter
# ───────────────────────────────────────────────────────────────────────
# Install uv
COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv

# Install Jupyter
RUN pip install --no-cache-dir jupyterlab

COPY requirements.txt /app/requirements.txt
RUN pip install --no-cache-dir -r /app/requirements.txt
# ───────────────────────────────────────────────────────────────────────
# Step 2.5: Install Python library using UV
# ───────────────────────────────────────────────────────────────────────
# Install at system level
COPY python_lib /app/python_lib
WORKDIR /app/python_lib
# note: this will install the project in editable mode, so any changes to the project on the image will be reflected.
# docker image so no need to use uv sync to install to virtual environment
RUN uv pip install --system -e . --group dev

# Caveat:
# 1. when launching image while mounting python_lib to app folder, the mounted python_lib will override the python_lib in the image, hence the project is live reloaded.
# 2. when launching image without mounting python_lib to app folder, the python_lib in the image will be used, hence the project is using the version when the image was built.

# Switch back to main app directory
WORKDIR /app

# ───────────────────────────────────────────────────────────────────────
# Step 3: Install R kernel and any R packages
# Step 3: Install R kernel and any R packages (optional)
# ───────────────────────────────────────────────────────────────────────
RUN R -e "install.packages('IRkernel', repos='http://cran.us.r-project.org')" && \
R -e "IRkernel::installspec(user = FALSE)"
# RUN R -e "install.packages('IRkernel', repos='http://cran.us.r-project.org')" && \
# R -e "IRkernel::installspec(user = FALSE)"

COPY install_r_packages.R /tmp/install_r_packages.R
RUN Rscript /tmp/install_r_packages.R
# COPY install_r_packages.R /tmp/install_r_packages.R
# RUN Rscript /tmp/install_r_packages.R

# ───────────────────────────────────────────────────────────────────────
# Step 4: Install sbt (Scala Build Tool) for Apache Toree
Expand All @@ -58,13 +76,16 @@ RUN echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" > /etc/apt/so

# ───────────────────────────────────────────────────────────────────────
# Step 5: Download & extract Apache Spark
# * Adjust SPARK_VERSION and HADOOP_VERSION if needed.
# * Version is synced with scala_lib/gradle.properties via Makefile
# ───────────────────────────────────────────────────────────────────────
ARG SPARK_VERSION=3.4.1
ARG HADOOP_VERSION=3
ENV SPARK_HOME=/opt/spark
ENV PATH="${SPARK_HOME}/bin:${PATH}"

# Print versions for verification
RUN echo "=== Building with Spark ${SPARK_VERSION} and Hadoop ${HADOOP_VERSION} ==="

RUN wget --quiet https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
&& mkdir -p /opt \
&& tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /opt \
Expand Down
127 changes: 106 additions & 21 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,43 +1,128 @@
.PHONY: venv build test clean build_and_test
.PHONY: install build test clean lint format docker-build docker-test

# Create a virtual environment
venv:
python3 -m venv venv
./venv/bin/pip install -r requirements.txt
# =============================================================================
# PYTHON TARGETS
# =============================================================================

# # Build the package
py-build:
python3 setup.py sdist bdist_wheel
# Install Python dependencies using UV
python-install:
cd python_lib && uv sync --group dev

scala-build:
gradle -p scala_lib build
# Build Python package using UV + Hatchling
python-build:
cd python_lib && uv build

# Run Python tests with UV + Tox
python-test:
cd python_lib && uv run tox -e py313

# Lint Python code with Ruff
python-lint:
cd python_lib && uv run ruff check .

build: py-build scala-build
# Format Python code with Ruff
python-format:
cd python_lib && uv run ruff format .

# Run unit tests
py-test: venv
./venv/bin/pip install -e .
./venv/bin/pytest python_lib/tests/
# Clean up Python build artifacts
python-clean:
cd python_lib && rm -rf dist build *.egg-info .tox .pytest_cache .ruff_cache
rm -rf dist build *.egg-info

# =============================================================================
# SCALA TARGETS
# =============================================================================

# Install Scala dependencies (if needed)
scala-install:
gradle -p scala_lib dependencies

# Build Scala package
scala-build:
gradle -p scala_lib build

# Run Scala tests
scala-test:
gradle -p scala_lib test

test: scala-test py-test
# Format Scala code with scalafmt
scala-format:
gradle -p scala_lib format

# Check Scala formatting
scala-format-check:
gradle -p scala_lib checkFormat

# Clean up Scala build artifacts using Gradle
# Lint Scala code (check formatting + compiler warnings)
scala-lint:
gradle -p scala_lib lint

# Clean up Scala build artifacts
scala-clean:
gradle -p scala_lib clean

# Clean up build artifacts and virtual environment
# =============================================================================
# COMBINED TARGETS
# =============================================================================

# Install all dependencies
install: python-install scala-install

# Build both Python and Scala
build: python-build scala-build

# Run all tests
test: scala-test python-test

# Lint both Python and Scala
lint: python-lint scala-lint

# Format both Python and Scala
format: python-format scala-format

# Check formatting without applying changes
format-check: scala-format-check
cd python_lib && uv run ruff format --check .

# Clean up all build artifacts
clean-eggs:
find . -type d -name '*.egg-info' -exec rm -rf {} +

clean: clean-eggs
rm -rf dist build *.egg-info venv
make scala-clean
clean: python-clean scala-clean clean-eggs

clean-all: clean
git clean -fdX

# Build and test in one step
build_and_test: build test

# =============================================================================
# DOCKER TARGETS
# =============================================================================

# Extract Spark version profile from gradle.properties
SPARK_PROFILE := $(shell grep 'versionProfile=' scala_lib/gradle.properties | cut -d'=' -f2)

# Map profiles to Spark and Hadoop versions
SPARK_VERSION_spark31 := 3.1.3
HADOOP_VERSION_spark31 := 3.2

SPARK_VERSION_spark34 := 3.4.1
HADOOP_VERSION_spark34 := 3

SPARK_VERSION_spark35 := 3.5.0
HADOOP_VERSION_spark35 := 3

# Get versions for current profile
SPARK_VERSION := $(SPARK_VERSION_$(SPARK_PROFILE))
HADOOP_VERSION := $(HADOOP_VERSION_$(SPARK_PROFILE))

docker-build:
@echo "Building Docker image with Spark $(SPARK_VERSION) (profile: $(SPARK_PROFILE))"
docker build \
--build-arg SPARK_VERSION=$(SPARK_VERSION) \
--build-arg HADOOP_VERSION=$(HADOOP_VERSION) \
-t robustinfer-notebook .

docker-test:
docker run robustinfer-notebook python -c "import robustinfer; print('Python import successful')"
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ See [License](LICENSE) in the project root for license information.
## Usage
### Run Notebooks from Docker
1. Build the Docker Image:
```docker build -t robustinfer-notebook .```
```make docker-build``` (build with spark version synced)
```docker build -t robustinfer-notebook .``` (build with default version)
2. Run the Docker Container:
```docker run -p 8888:8888 -v $(pwd):/app robustinfer-notebook```
The `-v $(pwd):/app` mounts the project directory into the container.
Expand Down
Loading