statewave/fly.toml at main · smaramwbc/statewave · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
app = 'statewave-api'
primary_region = 'ord'

[build]

[env]
  PORT = "8100"
  STATEWAVE_COMPILER_TYPE = "llm"
  STATEWAVE_EMBEDDING_PROVIDER = "litellm"
  STATEWAVE_LITELLM_MODEL = "gpt-4o-mini"
  STATEWAVE_LITELLM_EMBEDDING_MODEL = "text-embedding-3-small"
  STATEWAVE_EMBEDDING_DIMENSIONS = "1536"
  # Production seeds the docs pack via the dedicated GitHub Actions
  # refresh workflow (purge + rebuild on docs-repo push). The image
  # itself doesn't bundle the corpus, so the start-time auto-bootstrap
  # would silently skip — but disable it explicitly to avoid any
  # confusion in the Fly logs.
  STATEWAVE_BOOTSTRAP_DOCS_PACK = "false"

[http_service]
  internal_port = 8100
  force_https = true
  auto_stop_machines = "suspend"
  auto_start_machines = true
  # Both machines stay warm. min_machines_running = 1 lets one auto-suspend,
  # which forces Fly's LB to send all traffic to the surviving machine. Long
  # /v1/context calls (semantic retrieval + provider embed_query, can be 5–30s)
  # then queue and easily blow past the Vercel edge / Vite proxy timeout
  # → 504s in the support widget. Keeping both machines running spreads
  # traffic and removes the cold-start spike on the second one.
  min_machines_running = 2
  processes = ["app"]

[[vm]]
  size = "shared-cpu-1x"
  memory = "512mb"