-
-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathfly.toml
More file actions
36 lines (32 loc) · 1.29 KB
/
Copy pathfly.toml
File metadata and controls
36 lines (32 loc) · 1.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
app = 'statewave-api'
primary_region = 'ord'
[build]
[env]
PORT = "8100"
STATEWAVE_COMPILER_TYPE = "llm"
STATEWAVE_EMBEDDING_PROVIDER = "litellm"
STATEWAVE_LITELLM_MODEL = "gpt-4o-mini"
STATEWAVE_LITELLM_EMBEDDING_MODEL = "text-embedding-3-small"
STATEWAVE_EMBEDDING_DIMENSIONS = "1536"
# Production seeds the docs pack via the dedicated GitHub Actions
# refresh workflow (purge + rebuild on docs-repo push). The image
# itself doesn't bundle the corpus, so the start-time auto-bootstrap
# would silently skip — but disable it explicitly to avoid any
# confusion in the Fly logs.
STATEWAVE_BOOTSTRAP_DOCS_PACK = "false"
[http_service]
internal_port = 8100
force_https = true
auto_stop_machines = "suspend"
auto_start_machines = true
# Both machines stay warm. min_machines_running = 1 lets one auto-suspend,
# which forces Fly's LB to send all traffic to the surviving machine. Long
# /v1/context calls (semantic retrieval + provider embed_query, can be 5–30s)
# then queue and easily blow past the Vercel edge / Vite proxy timeout
# → 504s in the support widget. Keeping both machines running spreads
# traffic and removes the cold-start spike on the second one.
min_machines_running = 2
processes = ["app"]
[[vm]]
size = "shared-cpu-1x"
memory = "512mb"