harbor-framework · benediktstroebl · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/README.md b/README.md
@@ -1,5 +1,6 @@
 # Harbor Cookbook
 
+ [![](https://dcbadge.limes.pink/api/server/https://discord.gg/6xWPKhGDbA)](https://discord.gg/6xWPKhGDbA)
 [![Docs](https://img.shields.io/badge/Docs-000000?style=for-the-badge&logo=mdbook&color=105864)](https://harborframework.com/docs)
 
 Realistic examples of building evals and optimizing agents using [Harbor](https://github.com/harbor-framework/harbor). 

diff --git a/harbor_cookbook/recipes/dns-blacklisting/tests/test.sh b/harbor_cookbook/recipes/dns-blacklisting/tests/test.sh
@@ -1,15 +1,18 @@
 #!/bin/bash
-set -uo pipefail
+
+apt-get update
+apt-get install -y curl
 
 curl -LsSf https://astral.sh/uv/0.9.7/install.sh | sh
+
 source $HOME/.local/bin/env
 
 uvx \
   --with pytest==8.4.1 \
   --with pytest-json-ctrf==0.3.5 \
-  pytest --ctrf /logs/verifier/ctrf.json /tests/test_dns.py -rA || true
+  pytest --ctrf /logs/verifier/ctrf.json /tests/test_dns.py -rA
 
-if [ "${PIPESTATUS[0]}" -eq 0 ]; then
+if [ $? -eq 0 ]; then
   echo 1 > /logs/verifier/reward.txt
 else
   echo 0 > /logs/verifier/reward.txt

diff --git a/harbor_cookbook/recipes/multi-reward/README.md b/harbor_cookbook/recipes/multi-reward/README.md
@@ -24,16 +24,14 @@ multi-reward/
 
 ## Run
 
+This recipe writes two reward dimensions (`correctness`, `performance`) to `reward.json`. Harbor's default `mean` metric only supports single-key rewards, so you must pass the included `config.yaml` which uses a custom per-dimension metric:
+
 ```bash
-harbor trials start -p harbor_cookbook/recipes/multi-reward
+harbor run -p harbor_cookbook/recipes/multi-reward -c harbor_cookbook/recipes/multi-reward/config.yaml
 ```
 
-## Metrics note
-
-Harbor's default `mean` metric only supports single-key `reward.json`. Since this recipe writes two keys (`correctness`, `performance`), running `harbor run` requires a custom metric config:
+To run a single trial without metrics (useful for quick iteration):
 
 ```bash
-harbor run -p harbor_cookbook/recipes/multi-reward -c harbor_cookbook/recipes/multi-reward/config.yaml
+harbor trials start -p harbor_cookbook/recipes/multi-reward
 ```
-
-The included `config.yaml` uses a `uv-script` metric (`metrics/per_dimension.py`) that computes mean reward per dimension.
diff --git a/harbor_cookbook/recipes/multi-reward/task.toml b/harbor_cookbook/recipes/multi-reward/task.toml
@@ -9,5 +9,5 @@ timeout_sec = 120.0
 [environment]
 build_timeout_sec = 600.0
 cpus = 1
-memory = "2G"
-storage = "10G"
+memory_mb = 2048
+storage_mb = 10240
diff --git a/harbor_cookbook/recipes/simple-task/task.toml b/harbor_cookbook/recipes/simple-task/task.toml
@@ -9,5 +9,5 @@ timeout_sec = 120.0
 [environment]
 build_timeout_sec = 600.0
 cpus = 1
-memory = "2G"
-storage = "10G"
+memory_mb = 2048
+storage_mb = 10240