-
Notifications
You must be signed in to change notification settings - Fork 102
Expand file tree
/
Copy path.env.example
More file actions
226 lines (202 loc) · 8.06 KB
/
.env.example
File metadata and controls
226 lines (202 loc) · 8.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# Copy this file to `.env` (repo root).
# Priority: shell `export` > `.env` > `i2p_config.json` > code defaults.
#
# IMPORTANT:
# - Do NOT commit `.env` (it is gitignored).
# - Boolean values in env are treated as: only `"1"` is true, everything else is false.
# (So use 1/0, not true/false.)
#
# -----------------------------
# LLM API Key (secret)
# -----------------------------
# Used for LLM calls only. Do not put in i2p_config.json.
LLM_API_KEY=your_key_here
# -----------------------------
# LLM (provider selection)
# -----------------------------
# Provider options:
# - openai_compatible_chat
# - openai_responses
# - anthropic
# - gemini
LLM_PROVIDER=openai_compatible_chat
# Optional: base URL (recommended)
# For OpenAI-compatible Chat:
# `https://api.openai.com/v1` (default)
# For other compatible services:
# `https://dashscope.aliyuncs.com/compatible-mode/v1`
LLM_BASE_URL=https://api.openai.com/v1
# Optional: full API URL (highest priority override)
# Example (OpenAI-compatible Chat):
# `https://api.openai.com/v1/chat/completions`
# Example (OpenAI Responses):
# `https://api.openai.com/v1/responses`
# Example (Anthropic):
# `https://api.anthropic.com/v1/messages`
# Example (Gemini):
# `https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent`
LLM_API_URL=https://api.openai.com/v1/chat/completions
# Model (provider-specific)
LLM_MODEL=claude-opus-4-5-20251101
# Anthropic optional version override
LLM_ANTHROPIC_VERSION=2025-11-01
# Optional: extra headers/body as JSON (advanced use)
# LLM_EXTRA_HEADERS_JSON={"x-foo":"bar"}
# LLM_EXTRA_BODY_JSON={"top_p":0.9}
# Optional: per-stage LLM temperatures (defaults preserve current behavior)
# Critic is usually low temp for stability; story generation can be moderate.
I2P_LLM_TEMPERATURE_DEFAULT=0.7
I2P_LLM_TEMPERATURE_STORY_GENERATOR=0.7
I2P_LLM_TEMPERATURE_STORY_GENERATOR_REWRITE=0.3
I2P_LLM_TEMPERATURE_STORY_REFLECTOR=0.5
I2P_LLM_TEMPERATURE_PATTERN_SELECTOR=0.3
I2P_LLM_TEMPERATURE_IDEA_FUSION=0.7
I2P_LLM_TEMPERATURE_IDEA_FUSION_STAGE2=0.8
I2P_LLM_TEMPERATURE_IDEA_FUSION_STAGE3=0.9
I2P_LLM_TEMPERATURE_CRITIC_MAIN=0.0
I2P_LLM_TEMPERATURE_CRITIC_REPAIR=0.0
I2P_LLM_TEMPERATURE_IDEA_PACKAGING_PARSE=0.0
I2P_LLM_TEMPERATURE_IDEA_PACKAGING_PATTERN_GUIDED=0.3
I2P_LLM_TEMPERATURE_IDEA_PACKAGING_JUDGE=0.0
# -----------------------------
# Embedding (optional overrides)
# -----------------------------
# If not set, Embedding uses:
# - EMBEDDING_API_URL=https://api.openai.com/v1/embeddings
# - EMBEDDING_MODEL=text-embedding-3-large (example)
# - EMBEDDING_API_KEY falls back to LLM_API_KEY
# Tip: For frequent switching, set I2P_INDEX_DIR_MODE=auto_profile to auto-select
# per-embedding index dirs (no manual profile scripts needed). You can still override
# I2P_NOVELTY_INDEX_DIR / I2P_RECALL_INDEX_DIR if you prefer.
EMBEDDING_API_URL=https://api.openai.com/v1/embeddings
EMBEDDING_MODEL=text-embedding-3-large
EMBEDDING_API_KEY=your_embedding_key_here
# Optional: auto profile index directories
I2P_INDEX_DIR_MODE=auto_profile
# -----------------------------
# Idea Packaging (optional; default off)
# -----------------------------
# Enable idea packaging (pattern-guided, double recall)
# I2P_IDEA_PACKAGING_ENABLE=1
# I2P_IDEA_PACKAGING_TOPN_PATTERNS=5
# I2P_IDEA_PACKAGING_MAX_EXEMPLAR_PAPERS=8
# I2P_IDEA_PACKAGING_CANDIDATE_K=3
# I2P_IDEA_PACKAGING_SELECT_MODE=llm_then_recall # llm_then_recall|llm_only|recall_only
# I2P_IDEA_PACKAGING_FORCE_EN_QUERY=1
# -----------------------------
# Run logging (repo root `log/`)
# -----------------------------
# 1 = enable structured run logs under `log/run_.../`
# 0 = disable run logs (pipeline still runs)
I2P_ENABLE_LOGGING=1
# Optional: override log output directory (absolute path recommended)
# I2P_LOG_DIR=/abs/path/to/log
# Optional: max chars saved for prompt/response per call (avoid huge JSONL)
# I2P_LOG_MAX_TEXT_CHARS=20000
# -----------------------------
# Results bundling (repo root `results/`)
# -----------------------------
# 1 = enable bundling final artifacts under `results/run_.../`
# 0 = disable bundling (pipeline still runs)
I2P_RESULTS_ENABLE=1
# Results are always copied (no symlinks) to make artifacts portable across platforms.
# Optional: override results output directory (absolute path recommended)
# I2P_RESULTS_DIR=/abs/path/to/results
# -----------------------------
# Critic strictness (quality)
# -----------------------------
# 1 = strict JSON mode (quality-first): critic JSON invalid -> retry -> still invalid => fail the run
# 0 = allow non-strict behavior (useful for offline smoke tests when no API key)
I2P_CRITIC_STRICT_JSON=1
# How many retries after the first failure (default 2)
I2P_CRITIC_JSON_RETRIES=2
# -----------------------------
# Blind Judge (τ calibration)
# -----------------------------
# Path to τ file produced by fit_judge_tau.py (non-secret)
I2P_JUDGE_TAU_PATH=Paper-KG-Pipeline/output/judge_tau.json
#
# Fallback τ values if the τ file is missing (useful for cheap smoke tests)
I2P_TAU_METHODOLOGY=1.0
I2P_TAU_NOVELTY=1.4
I2P_TAU_STORYTELLER=1.0
#
# Coach layer (field-level edit guidance)
I2P_CRITIC_COACH_ENABLE=1
I2P_CRITIC_COACH_TEMPERATURE=0.3
I2P_CRITIC_COACH_MAX_TOKENS=4096
# -----------------------------
# Pass rule (pattern-aware)
# -----------------------------
# Default is the objective "Scheme B":
# - at least 2 of 3 role scores >= pattern q75
# - and avg_score >= pattern q50
#
# If pattern has too few papers (see I2P_PASS_MIN_PATTERN_PAPERS), fallback is controlled by I2P_PASS_FALLBACK.
# I2P_PASS_MODE=two_of_three_q75_and_avg_ge_q50
# I2P_PASS_MIN_PATTERN_PAPERS=20
# I2P_PASS_FALLBACK=global # global|fixed
# I2P_PASS_SCORE=7.0 # only used when fallback=fixed or distribution unavailable
# -----------------------------
# Advanced: anchors & scoring
# -----------------------------
# Quantiles for initial anchors (comma-separated floats)
I2P_ANCHOR_QUANTILES=0.05,0.15,0.25,0.35,0.5,0.65,0.75,0.85,0.95
I2P_ANCHOR_MAX_INITIAL=11
I2P_ANCHOR_MAX_EXEMPLARS=2
# Optional: densify candidates around score buckets (speed-first)
I2P_ANCHOR_BUCKET_SIZE=1.0
I2P_ANCHOR_BUCKET_COUNT=3
#
# Deterministic score inference grid (smaller = slower but more precise)
# I2P_GRID_STEP=0.01
# Densify triggers when loss/high violations/low avg_strength
I2P_DENSIFY_LOSS_THRESHOLD=0.05
I2P_DENSIFY_MIN_AVG_CONF=0.35
I2P_ANCHOR_DENSIFY_ENABLE=0
# -----------------------------
# Local novelty check (A方案)
# -----------------------------
# Enable local novelty check against nodes_paper.json
I2P_NOVELTY_ENABLE=1
# Do NOT auto-build novelty index during run (quality-first + predictable)
I2P_NOVELTY_AUTO_BUILD_INDEX=1
# Offline build batch size
I2P_NOVELTY_INDEX_BUILD_BATCH_SIZE=32
# Action on high similarity: report_only | pivot | fail
I2P_NOVELTY_ACTION=pivot
# Max pivot attempts when similarity is high
I2P_NOVELTY_MAX_PIVOTS=2
# -----------------------------
# Index auto-prepare (one-command run)
# -----------------------------
# 1 = auto-preflight and build missing indexes; 0 = skip preflight
I2P_INDEX_AUTO_PREPARE=1
# 1 = allow auto-build when missing; 0 = fail and ask for manual build
I2P_INDEX_ALLOW_BUILD=1
# -----------------------------
# Final collision threshold (Phase 4)
# -----------------------------
# 1 = enable final verification (Phase 4), 0 = skip
I2P_VERIFICATION_ENABLE=1
# Recommendation: set between novelty.medium_th and novelty.high_th (e.g. 0.82~0.88)
I2P_COLLISION_THRESHOLD=0.88
# -----------------------------
# Recall audit (persist recall candidates)
# -----------------------------
# 1 = enable recall audit, 0 = disable
I2P_RECALL_AUDIT_ENABLE=1
# Top-N pattern scores per path to persist
I2P_RECALL_AUDIT_TOPN=50
# Recall embedding batch params
I2P_RECALL_EMBED_BATCH_SIZE=32
I2P_RECALL_EMBED_MAX_RETRIES=3
I2P_RECALL_EMBED_SLEEP_SEC=0.5
# Recall offline index (optional)
I2P_RECALL_USE_OFFLINE_INDEX=1
# Optional: canonical subdomain taxonomy for Path2 (reduce duplicates & long-tail)
# When enabled, pipeline auto-detects/builds (if I2P_INDEX_ALLOW_BUILD=1) at:
# {recall_index_dir}/subdomain_taxonomy.json
I2P_SUBDOMAIN_TAXONOMY_ENABLE=1
I2P_SUBDOMAIN_TAXONOMY_PATH=
I2P_SUBDOMAIN_TAXONOMY_STOPLIST_MODE=drop # drop