diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/README.md b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/README.md
new file mode 100644
index 0000000000..757aee7f59
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/README.md
@@ -0,0 +1,21 @@
+# OPC causal packed-memory legal packet stress test
+
+This packet was rebuilt from scratch in a standalone workspace on top of the `open_predictive_coder` kernel.
+
+- track: `track_non_record_16mb`
+- run_id: `opc_native_tokens100000000_62021846`
+- eval bits per token: `6.062992566022187`
+- unigram bits per token: `8.649563865337807`
+- bigram bits per token: `6.090255597305575`
+- trigram bits per token: `7.22427841818739`
+- train bits per token: `6.0696419226081915`
+- mixture weights: `[0.0, 0.9, 0.1]`
+- artifact bytes: `2705939`
+- opc upstream: `https://github.com/asuramaya/open-predictive-coder`
+- opc commit: `4072074288fa279b655c11c30f8fca2e1859f925`
+
+Important scope note:
+
+- this is a legal packet stress test and descendant rebuild
+- it is not a leaderboard claim
+- the model is an opc-native causal packed-memory descendant built in this workspace
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/artifacts/model_artifact.npz b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/artifacts/model_artifact.npz
new file mode 100644
index 0000000000..95bd09f086
Binary files /dev/null and b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/artifacts/model_artifact.npz differ
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audit_tokens.npy b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audit_tokens.npy
new file mode 100644
index 0000000000..e128ca15e3
Binary files /dev/null and b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audit_tokens.npy differ
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/audits.json b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/audits.json
new file mode 100644
index 0000000000..c3d6e547fc
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/audits.json
@@ -0,0 +1,16 @@
+{
+ "tier1": {
+ "status": "pass",
+ "submission": "pass",
+ "provenance": null
+ },
+ "tier3": {
+ "status": "warn",
+ "scope": "one_shot_runtime_handoff",
+ "legality": "pass",
+ "trust_level_requested": "strict",
+ "trust_level_achieved": "strict",
+ "trust_satisfied": true,
+ "replay": "pass"
+ }
+}
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/claim.json b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/claim.json
new file mode 100644
index 0000000000..0f691079f8
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/claim.json
@@ -0,0 +1,6 @@
+{
+ "candidate_id": "opc-native-full-spec-rel",
+ "requested_label": "Tier-1 reviewed",
+ "submission_name": "OPC causal packed-memory legal packet stress test",
+ "track": null
+}
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/ledger_manifest.json b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/ledger_manifest.json
new file mode 100644
index 0000000000..7571d50423
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/ledger_manifest.json
@@ -0,0 +1,21 @@
+{
+ "bundle_id": "opc-native-full-spec-rel",
+ "attachments": [
+ {
+ "source": "reports/submission.json",
+ "dest": "audits/tier1/submission.json"
+ },
+ {
+ "source": "reports/legality.json",
+ "dest": "audits/tier3/legality.json"
+ },
+ {
+ "source": "reports/replay.json",
+ "dest": "audits/tier3/replay.json"
+ }
+ ],
+ "claim": "claim.json",
+ "metrics": "metrics.json",
+ "provenance": "provenance.json",
+ "audits": "audits.json"
+}
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/metrics.json b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/metrics.json
new file mode 100644
index 0000000000..6a08c24093
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/metrics.json
@@ -0,0 +1,14 @@
+{
+ "fresh_process_full": {
+ "bpb": 2.48898442865273
+ },
+ "packed_artifact_full": {
+ "bpb": 2.48898442865273,
+ "artifact_bytes": 2705939
+ },
+ "replay": {
+ "total_loss_nats": 277494.23437665706,
+ "mean_loss_nats": 4.234225988413346,
+ "mean_bpb": 6.1086968354871045
+ }
+}
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/provenance.json b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/provenance.json
new file mode 100644
index 0000000000..86fea88c93
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/provenance.json
@@ -0,0 +1,5 @@
+{
+ "run_id": "source_submission",
+ "source_root": ".",
+ "source_repo": "opc-parameter-golf-submission"
+}
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/reports/legality.json b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/reports/legality.json
new file mode 100644
index 0000000000..54e0a085b6
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/reports/legality.json
@@ -0,0 +1,2264 @@
+{
+ "profile": "parameter-golf",
+ "adapter": {
+ "adapter": "OpcNativeDetectAdapter",
+ "artifact_path": "model_artifact.npz",
+ "vocab_size": 1024,
+ "notes": "opc-native packed-memory replay adapter for legality and replay scans.",
+ "vocab_size_source": "explicit"
+ },
+ "token_count": 131072,
+ "audited_token_count": 65536,
+ "chunk_size": 8192,
+ "chunk_count": 8,
+ "max_chunks": 8,
+ "selected_chunks": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "vocab_size_source": "explicit",
+ "trace_fields_present": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "tolerances": {
+ "atol": 1e-07,
+ "rtol": 1e-07
+ },
+ "trust": {
+ "requested": "strict",
+ "achieved": "strict",
+ "satisfied": true,
+ "requirements": {
+ "basic": [
+ {
+ "name": "normalization",
+ "satisfied": true,
+ "detail": "sampled prediction vectors are normalized and shape-checked"
+ },
+ {
+ "name": "repeatability",
+ "satisfied": true,
+ "detail": "repeat scoring from the same snapshot is numerically stable"
+ },
+ {
+ "name": "future_suffix_invariance",
+ "satisfied": true,
+ "detail": "sampled positions do not change when later suffix tokens are perturbed"
+ },
+ {
+ "name": "answer_mask_invariance",
+ "satisfied": true,
+ "detail": "sampled positions do not change when the scored token and later suffix are perturbed"
+ }
+ ],
+ "traced": [
+ {
+ "name": "explicit_vocab_size",
+ "satisfied": true,
+ "detail": "the legality run declared the official vocabulary size explicitly"
+ },
+ {
+ "name": "trace_fields.gold_logprobs/loss_nats/weights/counted/path_ids",
+ "satisfied": true,
+ "detail": "the adapter exposed enough trace fields to audit gold scores, accounting, and path metadata"
+ },
+ {
+ "name": "gold_logprob_consistency",
+ "satisfied": true,
+ "detail": "reported gold-token logprobs match the returned full distributions"
+ },
+ {
+ "name": "accounting_contribution_consistency",
+ "satisfied": true,
+ "detail": "reported loss contributions match the returned distributions and trace metadata"
+ },
+ {
+ "name": "accounting_path_invariance",
+ "satisfied": true,
+ "detail": "trace-backed path metadata is stable under sampled suffix and answer perturbations"
+ }
+ ],
+ "strict": [
+ {
+ "name": "trace_fields.state_hash_before/state_hash_after",
+ "satisfied": true,
+ "detail": "the adapter exposed state hashes around score-time evaluation"
+ },
+ {
+ "name": "state_hash_consistency",
+ "satisfied": true,
+ "detail": "repeated scoring from the same snapshot preserves score-time state hashes"
+ }
+ ]
+ },
+ "missing": [],
+ "notes": [
+ "trust levels score the exposed adapter surface only",
+ "they do not cover provenance, train/eval contamination, or cross-run outcome selection"
+ ]
+ },
+ "obligations": {
+ "prefix_causal_distribution": {
+ "status": "partially_covered",
+ "checked_by": [
+ "repeatability",
+ "future_suffix_invariance",
+ "answer_mask_invariance"
+ ],
+ "notes": [
+ "sampled probes test same-position and suffix dependence within chosen chunks",
+ "this is a strong diagnostic but not an exhaustive proof over every position and run state"
+ ]
+ },
+ "full_normalized_distribution_over_official_alphabet": {
+ "status": "partially_covered",
+ "checked_by": [
+ "normalization"
+ ],
+ "notes": [
+ "sampled positions must return a non-negative 1D distribution that sums to 1",
+ "full-alphabet shape checks use the explicit --vocab-size boundary"
+ ]
+ },
+ "score_accounting_independent_of_answer": {
+ "status": "partially_covered",
+ "checked_by": [
+ "gold_logprob_consistency",
+ "accounting_contribution_consistency",
+ "accounting_path_invariance"
+ ],
+ "notes": [
+ "sampled positions compare the adapter's reported gold-token logprob against the returned full distribution",
+ "trace-aware runs also compare additive loss, weights, counted flags, and path metadata against the returned distribution",
+ "it still does not prove all answer-dependent bookkeeping is absent"
+ ]
+ },
+ "no_outcome_selection_across_validation_runs": {
+ "status": "out_of_scope",
+ "checked_by": [],
+ "notes": [
+ "the current runtime audit evaluates one declared run; it does not prove the submitted score was not chosen as best-of-k after observing outcomes"
+ ]
+ }
+ },
+ "checks": {
+ "normalization": {
+ "probe_count": 8,
+ "failure_count": 0,
+ "max_abs_diff": 3.3306690738754696e-16,
+ "covered": true,
+ "pass": true
+ },
+ "trace_coverage": {
+ "probe_count": 8,
+ "failure_count": 0,
+ "max_abs_diff": 0.0,
+ "covered": true,
+ "pass": true
+ },
+ "repeatability": {
+ "probe_count": 8,
+ "failure_count": 0,
+ "max_abs_diff": 0.0,
+ "covered": true,
+ "pass": true
+ },
+ "future_suffix_invariance": {
+ "probe_count": 16,
+ "failure_count": 0,
+ "max_abs_diff": 0.0,
+ "covered": true,
+ "pass": true
+ },
+ "answer_mask_invariance": {
+ "probe_count": 16,
+ "failure_count": 0,
+ "max_abs_diff": 0.0,
+ "covered": true,
+ "pass": true
+ },
+ "gold_logprob_consistency": {
+ "probe_count": 8,
+ "failure_count": 0,
+ "max_abs_diff": 0.0,
+ "covered": true,
+ "pass": true
+ },
+ "accounting_contribution_consistency": {
+ "probe_count": 8,
+ "failure_count": 0,
+ "max_abs_diff": 0.0,
+ "covered": true,
+ "pass": true
+ },
+ "accounting_path_invariance": {
+ "probe_count": 32,
+ "failure_count": 0,
+ "max_abs_diff": 0.0,
+ "covered": true,
+ "pass": true
+ },
+ "state_hash_consistency": {
+ "probe_count": 8,
+ "failure_count": 0,
+ "max_abs_diff": 0.0,
+ "covered": true,
+ "pass": true
+ }
+ },
+ "probes": [
+ {
+ "kind": "normalization",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "positions": [
+ 371,
+ 1331,
+ 1519,
+ 2304,
+ 2670,
+ 3083,
+ 3095,
+ 5185,
+ 5622,
+ 7100
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "expected_vocab_size": 1024,
+ "vocab_size_source": "explicit",
+ "wrong_length_count": 0,
+ "observed_sizes": [
+ 1024
+ ],
+ "max_abs_diff": 0.0,
+ "min_value": 6.031134047998613e-10
+ },
+ {
+ "kind": "trace_coverage",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "positions": [
+ 371,
+ 1331,
+ 1519,
+ 2304,
+ 2670,
+ 3083,
+ 3095,
+ 5185,
+ 5622,
+ 7100
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0,
+ "present_fields": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "missing_accounting_fields": []
+ },
+ {
+ "kind": "gold_logprob_consistency",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "positions": [
+ 371,
+ 1331,
+ 1519,
+ 2304,
+ 2670,
+ 3083,
+ 3095,
+ 5185,
+ 5622,
+ 7100
+ ],
+ "pass": true,
+ "missing_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_contribution_consistency",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "positions": [
+ 371,
+ 1331,
+ 1519,
+ 2304,
+ 2670,
+ 3083,
+ 3095,
+ 5185,
+ 5622,
+ 7100
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "state_hash_consistency",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "positions": [
+ 371,
+ 1331,
+ 1519,
+ 2304,
+ 2670,
+ 3083,
+ 3095,
+ 5185,
+ 5622,
+ 7100
+ ],
+ "pass": true,
+ "compared_fields": [
+ "state_hash_before",
+ "state_hash_after"
+ ],
+ "mismatch_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "repeatability",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "positions": [
+ 371,
+ 1331,
+ 1519,
+ 2304,
+ 2670,
+ 3083,
+ 3095,
+ 5185,
+ 5622,
+ 7100
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "cutoff": 6685,
+ "positions": [
+ 1331,
+ 2304,
+ 2670,
+ 3083
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "cutoff": 6685,
+ "positions": [
+ 1331,
+ 2304,
+ 2670,
+ 3083
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "cutoff": 6540,
+ "positions": [
+ 1519,
+ 3095,
+ 5185,
+ 5622
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "cutoff": 6540,
+ "positions": [
+ 1519,
+ 3095,
+ 5185,
+ 5622
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "position": 371,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "position": 371,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "position": 7100,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "position": 7100,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "normalization",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "positions": [
+ 515,
+ 549,
+ 724,
+ 1015,
+ 1029,
+ 3344,
+ 4559,
+ 5092,
+ 7126,
+ 7747
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "expected_vocab_size": 1024,
+ "vocab_size_source": "explicit",
+ "wrong_length_count": 0,
+ "observed_sizes": [
+ 1024
+ ],
+ "max_abs_diff": 2.220446049250313e-16,
+ "min_value": 1.6220236059799369e-09
+ },
+ {
+ "kind": "trace_coverage",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "positions": [
+ 515,
+ 549,
+ 724,
+ 1015,
+ 1029,
+ 3344,
+ 4559,
+ 5092,
+ 7126,
+ 7747
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0,
+ "present_fields": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "missing_accounting_fields": []
+ },
+ {
+ "kind": "gold_logprob_consistency",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "positions": [
+ 515,
+ 549,
+ 724,
+ 1015,
+ 1029,
+ 3344,
+ 4559,
+ 5092,
+ 7126,
+ 7747
+ ],
+ "pass": true,
+ "missing_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_contribution_consistency",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "positions": [
+ 515,
+ 549,
+ 724,
+ 1015,
+ 1029,
+ 3344,
+ 4559,
+ 5092,
+ 7126,
+ 7747
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "state_hash_consistency",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "positions": [
+ 515,
+ 549,
+ 724,
+ 1015,
+ 1029,
+ 3344,
+ 4559,
+ 5092,
+ 7126,
+ 7747
+ ],
+ "pass": true,
+ "compared_fields": [
+ "state_hash_before",
+ "state_hash_after"
+ ],
+ "mismatch_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "repeatability",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "positions": [
+ 515,
+ 549,
+ 724,
+ 1015,
+ 1029,
+ 3344,
+ 4559,
+ 5092,
+ 7126,
+ 7747
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "cutoff": 1218,
+ "positions": [
+ 549,
+ 724,
+ 1015,
+ 1029
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "cutoff": 1218,
+ "positions": [
+ 549,
+ 724,
+ 1015,
+ 1029
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "cutoff": 6849,
+ "positions": [
+ 515,
+ 3344,
+ 4559,
+ 5092
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "cutoff": 6849,
+ "positions": [
+ 515,
+ 3344,
+ 4559,
+ 5092
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "position": 7126,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "position": 7126,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "position": 7747,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "position": 7747,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "normalization",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "positions": [
+ 1750,
+ 1881,
+ 2161,
+ 2333,
+ 3059,
+ 3802,
+ 3970,
+ 4655,
+ 5197,
+ 5780
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "expected_vocab_size": 1024,
+ "vocab_size_source": "explicit",
+ "wrong_length_count": 0,
+ "observed_sizes": [
+ 1024
+ ],
+ "max_abs_diff": 2.220446049250313e-16,
+ "min_value": 1.3434586303083768e-09
+ },
+ {
+ "kind": "trace_coverage",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "positions": [
+ 1750,
+ 1881,
+ 2161,
+ 2333,
+ 3059,
+ 3802,
+ 3970,
+ 4655,
+ 5197,
+ 5780
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0,
+ "present_fields": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "missing_accounting_fields": []
+ },
+ {
+ "kind": "gold_logprob_consistency",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "positions": [
+ 1750,
+ 1881,
+ 2161,
+ 2333,
+ 3059,
+ 3802,
+ 3970,
+ 4655,
+ 5197,
+ 5780
+ ],
+ "pass": true,
+ "missing_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_contribution_consistency",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "positions": [
+ 1750,
+ 1881,
+ 2161,
+ 2333,
+ 3059,
+ 3802,
+ 3970,
+ 4655,
+ 5197,
+ 5780
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "state_hash_consistency",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "positions": [
+ 1750,
+ 1881,
+ 2161,
+ 2333,
+ 3059,
+ 3802,
+ 3970,
+ 4655,
+ 5197,
+ 5780
+ ],
+ "pass": true,
+ "compared_fields": [
+ "state_hash_before",
+ "state_hash_after"
+ ],
+ "mismatch_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "repeatability",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "positions": [
+ 1750,
+ 1881,
+ 2161,
+ 2333,
+ 3059,
+ 3802,
+ 3970,
+ 4655,
+ 5197,
+ 5780
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "cutoff": 4890,
+ "positions": [
+ 2161,
+ 2333,
+ 3802,
+ 3970
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "cutoff": 4890,
+ "positions": [
+ 2161,
+ 2333,
+ 3802,
+ 3970
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "cutoff": 7749,
+ "positions": [
+ 1750,
+ 1881,
+ 3059,
+ 4655
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "cutoff": 7749,
+ "positions": [
+ 1750,
+ 1881,
+ 3059,
+ 4655
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "position": 5197,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "position": 5197,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "position": 5780,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "position": 5780,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "normalization",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "positions": [
+ 36,
+ 364,
+ 664,
+ 726,
+ 826,
+ 1057,
+ 1174,
+ 1601,
+ 2673,
+ 5485
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "expected_vocab_size": 1024,
+ "vocab_size_source": "explicit",
+ "wrong_length_count": 0,
+ "observed_sizes": [
+ 1024
+ ],
+ "max_abs_diff": 2.220446049250313e-16,
+ "min_value": 1.4438746191395938e-09
+ },
+ {
+ "kind": "trace_coverage",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "positions": [
+ 36,
+ 364,
+ 664,
+ 726,
+ 826,
+ 1057,
+ 1174,
+ 1601,
+ 2673,
+ 5485
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0,
+ "present_fields": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "missing_accounting_fields": []
+ },
+ {
+ "kind": "gold_logprob_consistency",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "positions": [
+ 36,
+ 364,
+ 664,
+ 726,
+ 826,
+ 1057,
+ 1174,
+ 1601,
+ 2673,
+ 5485
+ ],
+ "pass": true,
+ "missing_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_contribution_consistency",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "positions": [
+ 36,
+ 364,
+ 664,
+ 726,
+ 826,
+ 1057,
+ 1174,
+ 1601,
+ 2673,
+ 5485
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "state_hash_consistency",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "positions": [
+ 36,
+ 364,
+ 664,
+ 726,
+ 826,
+ 1057,
+ 1174,
+ 1601,
+ 2673,
+ 5485
+ ],
+ "pass": true,
+ "compared_fields": [
+ "state_hash_before",
+ "state_hash_after"
+ ],
+ "mismatch_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "repeatability",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "positions": [
+ 36,
+ 364,
+ 664,
+ 726,
+ 826,
+ 1057,
+ 1174,
+ 1601,
+ 2673,
+ 5485
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "cutoff": 3634,
+ "positions": [
+ 664,
+ 826,
+ 1174,
+ 2673
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "cutoff": 3634,
+ "positions": [
+ 664,
+ 826,
+ 1174,
+ 2673
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "cutoff": 1258,
+ "positions": [
+ 36,
+ 364,
+ 726,
+ 1057
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "cutoff": 1258,
+ "positions": [
+ 36,
+ 364,
+ 726,
+ 1057
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "position": 1601,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "position": 1601,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "position": 5485,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "position": 5485,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "normalization",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "positions": [
+ 63,
+ 73,
+ 101,
+ 761,
+ 887,
+ 2848,
+ 2881,
+ 3520,
+ 3953,
+ 6164
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "expected_vocab_size": 1024,
+ "vocab_size_source": "explicit",
+ "wrong_length_count": 0,
+ "observed_sizes": [
+ 1024
+ ],
+ "max_abs_diff": 3.3306690738754696e-16,
+ "min_value": 5.917248948702838e-10
+ },
+ {
+ "kind": "trace_coverage",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "positions": [
+ 63,
+ 73,
+ 101,
+ 761,
+ 887,
+ 2848,
+ 2881,
+ 3520,
+ 3953,
+ 6164
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0,
+ "present_fields": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "missing_accounting_fields": []
+ },
+ {
+ "kind": "gold_logprob_consistency",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "positions": [
+ 63,
+ 73,
+ 101,
+ 761,
+ 887,
+ 2848,
+ 2881,
+ 3520,
+ 3953,
+ 6164
+ ],
+ "pass": true,
+ "missing_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_contribution_consistency",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "positions": [
+ 63,
+ 73,
+ 101,
+ 761,
+ 887,
+ 2848,
+ 2881,
+ 3520,
+ 3953,
+ 6164
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "state_hash_consistency",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "positions": [
+ 63,
+ 73,
+ 101,
+ 761,
+ 887,
+ 2848,
+ 2881,
+ 3520,
+ 3953,
+ 6164
+ ],
+ "pass": true,
+ "compared_fields": [
+ "state_hash_before",
+ "state_hash_after"
+ ],
+ "mismatch_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "repeatability",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "positions": [
+ 63,
+ 73,
+ 101,
+ 761,
+ 887,
+ 2848,
+ 2881,
+ 3520,
+ 3953,
+ 6164
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "cutoff": 1032,
+ "positions": [
+ 63,
+ 73,
+ 761,
+ 887
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "cutoff": 1032,
+ "positions": [
+ 63,
+ 73,
+ 761,
+ 887
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "cutoff": 6429,
+ "positions": [
+ 2848,
+ 2881,
+ 3520,
+ 6164
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "cutoff": 6429,
+ "positions": [
+ 2848,
+ 2881,
+ 3520,
+ 6164
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "position": 101,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "position": 101,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "position": 3953,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "position": 3953,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "normalization",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "positions": [
+ 143,
+ 239,
+ 244,
+ 825,
+ 3116,
+ 3763,
+ 3871,
+ 4527,
+ 5400,
+ 5450
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "expected_vocab_size": 1024,
+ "vocab_size_source": "explicit",
+ "wrong_length_count": 0,
+ "observed_sizes": [
+ 1024
+ ],
+ "max_abs_diff": 2.220446049250313e-16,
+ "min_value": 1.7938424598829302e-09
+ },
+ {
+ "kind": "trace_coverage",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "positions": [
+ 143,
+ 239,
+ 244,
+ 825,
+ 3116,
+ 3763,
+ 3871,
+ 4527,
+ 5400,
+ 5450
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0,
+ "present_fields": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "missing_accounting_fields": []
+ },
+ {
+ "kind": "gold_logprob_consistency",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "positions": [
+ 143,
+ 239,
+ 244,
+ 825,
+ 3116,
+ 3763,
+ 3871,
+ 4527,
+ 5400,
+ 5450
+ ],
+ "pass": true,
+ "missing_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_contribution_consistency",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "positions": [
+ 143,
+ 239,
+ 244,
+ 825,
+ 3116,
+ 3763,
+ 3871,
+ 4527,
+ 5400,
+ 5450
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "state_hash_consistency",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "positions": [
+ 143,
+ 239,
+ 244,
+ 825,
+ 3116,
+ 3763,
+ 3871,
+ 4527,
+ 5400,
+ 5450
+ ],
+ "pass": true,
+ "compared_fields": [
+ "state_hash_before",
+ "state_hash_after"
+ ],
+ "mismatch_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "repeatability",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "positions": [
+ 143,
+ 239,
+ 244,
+ 825,
+ 3116,
+ 3763,
+ 3871,
+ 4527,
+ 5400,
+ 5450
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "cutoff": 1012,
+ "positions": [
+ 143,
+ 239,
+ 244,
+ 825
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "cutoff": 1012,
+ "positions": [
+ 143,
+ 239,
+ 244,
+ 825
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "cutoff": 7884,
+ "positions": [
+ 3871,
+ 4527,
+ 5400,
+ 5450
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "cutoff": 7884,
+ "positions": [
+ 3871,
+ 4527,
+ 5400,
+ 5450
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "position": 3116,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "position": 3116,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "position": 3763,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "position": 3763,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "normalization",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "positions": [
+ 154,
+ 258,
+ 307,
+ 554,
+ 673,
+ 711,
+ 1400,
+ 3940,
+ 4535,
+ 6489
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "expected_vocab_size": 1024,
+ "vocab_size_source": "explicit",
+ "wrong_length_count": 0,
+ "observed_sizes": [
+ 1024
+ ],
+ "max_abs_diff": 2.220446049250313e-16,
+ "min_value": 1.4684905227496776e-09
+ },
+ {
+ "kind": "trace_coverage",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "positions": [
+ 154,
+ 258,
+ 307,
+ 554,
+ 673,
+ 711,
+ 1400,
+ 3940,
+ 4535,
+ 6489
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0,
+ "present_fields": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "missing_accounting_fields": []
+ },
+ {
+ "kind": "gold_logprob_consistency",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "positions": [
+ 154,
+ 258,
+ 307,
+ 554,
+ 673,
+ 711,
+ 1400,
+ 3940,
+ 4535,
+ 6489
+ ],
+ "pass": true,
+ "missing_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_contribution_consistency",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "positions": [
+ 154,
+ 258,
+ 307,
+ 554,
+ 673,
+ 711,
+ 1400,
+ 3940,
+ 4535,
+ 6489
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "state_hash_consistency",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "positions": [
+ 154,
+ 258,
+ 307,
+ 554,
+ 673,
+ 711,
+ 1400,
+ 3940,
+ 4535,
+ 6489
+ ],
+ "pass": true,
+ "compared_fields": [
+ "state_hash_before",
+ "state_hash_after"
+ ],
+ "mismatch_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "repeatability",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "positions": [
+ 154,
+ 258,
+ 307,
+ 554,
+ 673,
+ 711,
+ 1400,
+ 3940,
+ 4535,
+ 6489
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "cutoff": 6554,
+ "positions": [
+ 154,
+ 711,
+ 1400,
+ 4535
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "cutoff": 6554,
+ "positions": [
+ 154,
+ 711,
+ 1400,
+ 4535
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "cutoff": 1458,
+ "positions": [
+ 258,
+ 307,
+ 554,
+ 673
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "cutoff": 1458,
+ "positions": [
+ 258,
+ 307,
+ 554,
+ 673
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "position": 3940,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "position": 3940,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "position": 6489,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "position": 6489,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "normalization",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "positions": [
+ 33,
+ 95,
+ 109,
+ 445,
+ 448,
+ 594,
+ 2322,
+ 3554,
+ 3648,
+ 3776
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "expected_vocab_size": 1024,
+ "vocab_size_source": "explicit",
+ "wrong_length_count": 0,
+ "observed_sizes": [
+ 1024
+ ],
+ "max_abs_diff": 1.1102230246251565e-16,
+ "min_value": 1.1918842661620083e-09
+ },
+ {
+ "kind": "trace_coverage",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "positions": [
+ 33,
+ 95,
+ 109,
+ 445,
+ 448,
+ 594,
+ 2322,
+ 3554,
+ 3648,
+ 3776
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0,
+ "present_fields": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "missing_accounting_fields": []
+ },
+ {
+ "kind": "gold_logprob_consistency",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "positions": [
+ 33,
+ 95,
+ 109,
+ 445,
+ 448,
+ 594,
+ 2322,
+ 3554,
+ 3648,
+ 3776
+ ],
+ "pass": true,
+ "missing_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_contribution_consistency",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "positions": [
+ 33,
+ 95,
+ 109,
+ 445,
+ 448,
+ 594,
+ 2322,
+ 3554,
+ 3648,
+ 3776
+ ],
+ "pass": true,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "state_hash_consistency",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "positions": [
+ 33,
+ 95,
+ 109,
+ 445,
+ 448,
+ 594,
+ 2322,
+ 3554,
+ 3648,
+ 3776
+ ],
+ "pass": true,
+ "compared_fields": [
+ "state_hash_before",
+ "state_hash_after"
+ ],
+ "mismatch_count": 0,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "repeatability",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "positions": [
+ 33,
+ 95,
+ 109,
+ 445,
+ 448,
+ 594,
+ 2322,
+ 3554,
+ 3648,
+ 3776
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "cutoff": 470,
+ "positions": [
+ 95,
+ 109,
+ 445,
+ 448
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "cutoff": 470,
+ "positions": [
+ 95,
+ 109,
+ 445,
+ 448
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "future_suffix_invariance",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "cutoff": 3922,
+ "positions": [
+ 594,
+ 3554,
+ 3648,
+ 3776
+ ],
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "cutoff": 3922,
+ "positions": [
+ 594,
+ 3554,
+ 3648,
+ 3776
+ ],
+ "probe_family": "future_suffix",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "position": 33,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "position": 33,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "answer_mask_invariance",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "position": 2322,
+ "pass": true,
+ "shape_mismatch": false,
+ "max_abs_diff": 0.0
+ },
+ {
+ "kind": "accounting_path_invariance",
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "position": 2322,
+ "probe_family": "answer_mask",
+ "pass": true,
+ "compared_fields": [
+ "weights",
+ "counted",
+ "path_ids"
+ ],
+ "max_abs_diff": 0.0
+ }
+ ],
+ "alerts": []
+}
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/reports/replay.json b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/reports/replay.json
new file mode 100644
index 0000000000..43fd325f20
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/reports/replay.json
@@ -0,0 +1,338 @@
+{
+ "profile": "parameter-golf",
+ "adapter": {
+ "adapter": "OpcNativeDetectAdapter",
+ "artifact_path": "model_artifact.npz",
+ "vocab_size": 1024,
+ "notes": "opc-native packed-memory replay adapter for legality and replay scans."
+ },
+ "token_count": 131072,
+ "audited_token_count": 65536,
+ "chunk_size": 8192,
+ "chunk_count": 8,
+ "max_chunks": 8,
+ "position_batch_size": 128,
+ "selected_chunks": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7
+ ],
+ "aggregate": {
+ "total_loss_nats": 277494.23437665706,
+ "mean_loss_nats": 4.234225988413346,
+ "mean_bpb": 6.1086968354871045
+ },
+ "repeatability": {
+ "covered": true,
+ "pass": true,
+ "chunk_count": 8,
+ "position_count": 65536,
+ "max_abs_prediction_diff": 0.0,
+ "mean_abs_prediction_diff": 0.0,
+ "prediction_diff_failures": 0,
+ "max_abs_gold_logprob_diff": 0.0,
+ "mean_abs_gold_logprob_diff": 0.0,
+ "gold_logprob_diff_failures": 0,
+ "state_hash_compared_count": 131072,
+ "state_hash_mismatch_count": 0,
+ "tolerances": {
+ "atol": 1e-07,
+ "rtol": 1e-07
+ }
+ },
+ "chunks": [
+ {
+ "chunk_index": 0,
+ "chunk_start": 0,
+ "token_count": 8192,
+ "total_loss_nats": 34394.62213477679,
+ "mean_loss_nats": 4.19856227231162,
+ "mean_bpb": 6.057244969127472,
+ "trace_fields_present": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "repeat_compared": true,
+ "repeatability": {
+ "covered": true,
+ "pass": true,
+ "chunk_count": 1,
+ "position_count": 8192,
+ "max_abs_prediction_diff": 0.0,
+ "mean_abs_prediction_diff": 0.0,
+ "prediction_diff_failures": 0,
+ "max_abs_gold_logprob_diff": 0.0,
+ "mean_abs_gold_logprob_diff": 0.0,
+ "gold_logprob_diff_failures": 0,
+ "state_hash_compared_count": 16384,
+ "state_hash_mismatch_count": 0,
+ "tolerances": {
+ "atol": 1e-07,
+ "rtol": 1e-07
+ }
+ }
+ },
+ {
+ "chunk_index": 1,
+ "chunk_start": 8192,
+ "token_count": 8192,
+ "total_loss_nats": 35620.48078424682,
+ "mean_loss_nats": 4.348203220733255,
+ "mean_bpb": 6.273131223329285,
+ "trace_fields_present": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "repeat_compared": true,
+ "repeatability": {
+ "covered": true,
+ "pass": true,
+ "chunk_count": 1,
+ "position_count": 8192,
+ "max_abs_prediction_diff": 0.0,
+ "mean_abs_prediction_diff": 0.0,
+ "prediction_diff_failures": 0,
+ "max_abs_gold_logprob_diff": 0.0,
+ "mean_abs_gold_logprob_diff": 0.0,
+ "gold_logprob_diff_failures": 0,
+ "state_hash_compared_count": 16384,
+ "state_hash_mismatch_count": 0,
+ "tolerances": {
+ "atol": 1e-07,
+ "rtol": 1e-07
+ }
+ }
+ },
+ {
+ "chunk_index": 2,
+ "chunk_start": 16384,
+ "token_count": 8192,
+ "total_loss_nats": 34939.65421480415,
+ "mean_loss_nats": 4.2650945086430845,
+ "mean_bpb": 6.153230696542129,
+ "trace_fields_present": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "repeat_compared": true,
+ "repeatability": {
+ "covered": true,
+ "pass": true,
+ "chunk_count": 1,
+ "position_count": 8192,
+ "max_abs_prediction_diff": 0.0,
+ "mean_abs_prediction_diff": 0.0,
+ "prediction_diff_failures": 0,
+ "max_abs_gold_logprob_diff": 0.0,
+ "mean_abs_gold_logprob_diff": 0.0,
+ "gold_logprob_diff_failures": 0,
+ "state_hash_compared_count": 16384,
+ "state_hash_mismatch_count": 0,
+ "tolerances": {
+ "atol": 1e-07,
+ "rtol": 1e-07
+ }
+ }
+ },
+ {
+ "chunk_index": 3,
+ "chunk_start": 24576,
+ "token_count": 8192,
+ "total_loss_nats": 35442.36588394684,
+ "mean_loss_nats": 4.32646067919273,
+ "mean_bpb": 6.241763366472448,
+ "trace_fields_present": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "repeat_compared": true,
+ "repeatability": {
+ "covered": true,
+ "pass": true,
+ "chunk_count": 1,
+ "position_count": 8192,
+ "max_abs_prediction_diff": 0.0,
+ "mean_abs_prediction_diff": 0.0,
+ "prediction_diff_failures": 0,
+ "max_abs_gold_logprob_diff": 0.0,
+ "mean_abs_gold_logprob_diff": 0.0,
+ "gold_logprob_diff_failures": 0,
+ "state_hash_compared_count": 16384,
+ "state_hash_mismatch_count": 0,
+ "tolerances": {
+ "atol": 1e-07,
+ "rtol": 1e-07
+ }
+ }
+ },
+ {
+ "chunk_index": 4,
+ "chunk_start": 32768,
+ "token_count": 8192,
+ "total_loss_nats": 34642.58123282339,
+ "mean_loss_nats": 4.228830716897386,
+ "mean_bpb": 6.100913104026779,
+ "trace_fields_present": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "repeat_compared": true,
+ "repeatability": {
+ "covered": true,
+ "pass": true,
+ "chunk_count": 1,
+ "position_count": 8192,
+ "max_abs_prediction_diff": 0.0,
+ "mean_abs_prediction_diff": 0.0,
+ "prediction_diff_failures": 0,
+ "max_abs_gold_logprob_diff": 0.0,
+ "mean_abs_gold_logprob_diff": 0.0,
+ "gold_logprob_diff_failures": 0,
+ "state_hash_compared_count": 16384,
+ "state_hash_mismatch_count": 0,
+ "tolerances": {
+ "atol": 1e-07,
+ "rtol": 1e-07
+ }
+ }
+ },
+ {
+ "chunk_index": 5,
+ "chunk_start": 40960,
+ "token_count": 8192,
+ "total_loss_nats": 33934.093170875654,
+ "mean_loss_nats": 4.142345357772907,
+ "mean_bpb": 5.976141105308392,
+ "trace_fields_present": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "repeat_compared": true,
+ "repeatability": {
+ "covered": true,
+ "pass": true,
+ "chunk_count": 1,
+ "position_count": 8192,
+ "max_abs_prediction_diff": 0.0,
+ "mean_abs_prediction_diff": 0.0,
+ "prediction_diff_failures": 0,
+ "max_abs_gold_logprob_diff": 0.0,
+ "mean_abs_gold_logprob_diff": 0.0,
+ "gold_logprob_diff_failures": 0,
+ "state_hash_compared_count": 16384,
+ "state_hash_mismatch_count": 0,
+ "tolerances": {
+ "atol": 1e-07,
+ "rtol": 1e-07
+ }
+ }
+ },
+ {
+ "chunk_index": 6,
+ "chunk_start": 49152,
+ "token_count": 8192,
+ "total_loss_nats": 33494.50629922753,
+ "mean_loss_nats": 4.088684850979923,
+ "mean_bpb": 5.8987253582665655,
+ "trace_fields_present": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "repeat_compared": true,
+ "repeatability": {
+ "covered": true,
+ "pass": true,
+ "chunk_count": 1,
+ "position_count": 8192,
+ "max_abs_prediction_diff": 0.0,
+ "mean_abs_prediction_diff": 0.0,
+ "prediction_diff_failures": 0,
+ "max_abs_gold_logprob_diff": 0.0,
+ "mean_abs_gold_logprob_diff": 0.0,
+ "gold_logprob_diff_failures": 0,
+ "state_hash_compared_count": 16384,
+ "state_hash_mismatch_count": 0,
+ "tolerances": {
+ "atol": 1e-07,
+ "rtol": 1e-07
+ }
+ }
+ },
+ {
+ "chunk_index": 7,
+ "chunk_start": 57344,
+ "token_count": 8192,
+ "total_loss_nats": 35025.93065595588,
+ "mean_loss_nats": 4.275626300775865,
+ "mean_bpb": 6.168424860823763,
+ "trace_fields_present": [
+ "counted",
+ "gold_logprobs",
+ "loss_nats",
+ "path_ids",
+ "state_hash_after",
+ "state_hash_before",
+ "weights"
+ ],
+ "repeat_compared": true,
+ "repeatability": {
+ "covered": true,
+ "pass": true,
+ "chunk_count": 1,
+ "position_count": 8192,
+ "max_abs_prediction_diff": 0.0,
+ "mean_abs_prediction_diff": 0.0,
+ "prediction_diff_failures": 0,
+ "max_abs_gold_logprob_diff": 0.0,
+ "mean_abs_gold_logprob_diff": 0.0,
+ "gold_logprob_diff_failures": 0,
+ "state_hash_compared_count": 16384,
+ "state_hash_mismatch_count": 0,
+ "tolerances": {
+ "atol": 1e-07,
+ "rtol": 1e-07
+ }
+ }
+ }
+ ]
+}
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/reports/submission.json b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/reports/submission.json
new file mode 100644
index 0000000000..5039fc99cd
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/audits_bundle/reports/submission.json
@@ -0,0 +1,84 @@
+{
+ "profile": "parameter-golf",
+ "verdict": "pass",
+ "submission": {
+ "repo_root": ".",
+ "submission_root": ".",
+ "track": null,
+ "name": "OPC causal packed-memory legal packet stress test"
+ },
+ "extracted_claims": {
+ "submission_json": {
+ "name": "OPC causal packed-memory legal packet stress test",
+ "track": "track_non_record_16mb",
+ "val_bpb": 2.48898442865273,
+ "pre_quant_val_bpb": 2.48898442865273,
+ "bytes_total": 6146877,
+ "bytes_model_int6_zlib": 2705939
+ },
+ "results_json": {
+ "val_bpb": 2.48898442865273,
+ "pre_quant_val_bpb": 2.48898442865273,
+ "bytes_total": 6146877,
+ "bytes_model_int6_zlib": 2705939
+ },
+ "readme": {
+ "name": "OPC causal packed-memory legal packet stress test"
+ },
+ "logs": {
+ "train.log": {
+ "path": "train.log"
+ }
+ },
+ "artifact_files": {
+ "model_artifact.npz": {
+ "bytes": 2705939,
+ "suffix": ".npz",
+ "looks_like_artifact": true
+ }
+ },
+ "code_files": [
+ "train_gpt.py"
+ ],
+ "patch": null
+ },
+ "checks": {
+ "presence": {
+ "pass": true,
+ "checked_count": 2,
+ "finding_count": 0
+ },
+ "claim_consistency": {
+ "pass": true,
+ "finding_count": 0
+ },
+ "artifact_bytes": {
+ "pass": true,
+ "artifact_count": 1,
+ "finding_count": 0
+ },
+ "protocol_shape": {
+ "pass": true,
+ "scanned_count": 1,
+ "finding_count": 0
+ },
+ "data_boundary_signals": {
+ "pass": true,
+ "scanned_count": 1,
+ "finding_count": 0
+ },
+ "reproducibility_surface": {
+ "pass": true,
+ "has_log": true,
+ "has_code": true,
+ "has_artifact": true,
+ "finding_count": 0
+ },
+ "patch_triage": {
+ "pass": null,
+ "present": false
+ }
+ },
+ "findings": [],
+ "alerts": []
+}
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/model_artifact.npz b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/model_artifact.npz
new file mode 100644
index 0000000000..95bd09f086
Binary files /dev/null and b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/model_artifact.npz differ
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/opc_native_detect_adapter.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/opc_native_detect_adapter.py
new file mode 100644
index 0000000000..f689a2ce14
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/opc_native_detect_adapter.py
@@ -0,0 +1,11 @@
+from pathlib import Path
+import sys
+
+_VENDOR_ROOT = Path(__file__).resolve().parent / "vendor"
+if _VENDOR_ROOT.exists() and str(_VENDOR_ROOT) not in sys.path:
+ sys.path.insert(0, str(_VENDOR_ROOT))
+
+from opc_parameter_golf_submission.opc_native_detect_adapter import build_adapter
+
+
+__all__ = ["build_adapter"]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/results.json b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/results.json
new file mode 100644
index 0000000000..37ce787eb1
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/results.json
@@ -0,0 +1,24 @@
+{
+ "run_id": "opc_native_tokens100000000_62021846",
+ "pre_quant_val_bpb": 2.48898442865273,
+ "val_bpb": 2.48898442865273,
+ "test_bits_per_token": 6.062992566022187,
+ "test_eval_loss": 4.202546202894187,
+ "train_bits_per_token": 6.0696419226081915,
+ "train_time_sec": 52.381125042,
+ "train_tokens": 100000000,
+ "eval_tokens": 62021846,
+ "eval_bytes_per_token": 2.4359302919801884,
+ "checkpoint_format": "compressed_npz_artifact",
+ "checkpoint_bytes_raw_npz": 2705939,
+ "bytes_model_int6_zlib": 2705939,
+ "unigram_bits_per_token": 8.649563865337807,
+ "bigram_bits_per_token": 6.090255597305575,
+ "trigram_bits_per_token": 7.22427841818739,
+ "mixture_weights": [
+ 0.0,
+ 0.9,
+ 0.1
+ ],
+ "bytes_total": 6146877
+}
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/submission.json b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/submission.json
new file mode 100644
index 0000000000..4fcf9ebe3d
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/submission.json
@@ -0,0 +1,11 @@
+{
+ "name": "OPC causal packed-memory legal packet stress test",
+ "track": "track_non_record_16mb",
+ "pre_quant_val_bpb": 2.48898442865273,
+ "val_bpb": 2.48898442865273,
+ "run_id": "opc_native_tokens100000000_62021846",
+ "source_repo": "opc-parameter-golf-submission",
+ "bytes_model_int6_zlib": 2705939,
+ "notes": "Standalone opc-native causal packed-memory submission rebuilt from scratch for packet stress testing.",
+ "bytes_total": 6146877
+}
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/train.log b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/train.log
new file mode 100644
index 0000000000..c79eba1239
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/train.log
@@ -0,0 +1,13 @@
+run_id=opc_native_tokens100000000_62021846
+train_tokens=100000000
+eval_tokens=62021846
+train_bits_per_token=6.0696419226081915
+eval_bits_per_token=6.062992566022187
+eval_bits_per_byte=2.48898442865273
+unigram_bits_per_token=8.649563865337807
+bigram_bits_per_token=6.090255597305575
+trigram_bits_per_token=7.22427841818739
+mixture_weights=[0.0, 0.9, 0.1]
+eval_bytes_per_token=2.4359302919801884
+artifact_bytes=2705939
+opc_commit=4072074288fa279b655c11c30f8fca2e1859f925
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/train_gpt.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/train_gpt.py
new file mode 100644
index 0000000000..4b53f8a06c
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/train_gpt.py
@@ -0,0 +1,12 @@
+from pathlib import Path
+import sys
+
+_VENDOR_ROOT = Path(__file__).resolve().parent / "vendor"
+if _VENDOR_ROOT.exists() and str(_VENDOR_ROOT) not in sys.path:
+ sys.path.insert(0, str(_VENDOR_ROOT))
+
+from opc_parameter_golf_submission.cli import main
+
+
+if __name__ == "__main__":
+ main()
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/conker_ledger/__init__.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/conker_ledger/__init__.py
new file mode 100644
index 0000000000..ad6aeea36f
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/conker_ledger/__init__.py
@@ -0,0 +1,2 @@
+"""conker-ledger public package."""
+
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/conker_ledger/cli.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/conker_ledger/cli.py
new file mode 100644
index 0000000000..71bab49f43
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/conker_ledger/cli.py
@@ -0,0 +1,122 @@
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from .ledger import (
+ dumps_json,
+ lineage_rows,
+ render_table,
+ scan_results,
+ sort_records,
+ survival_rows,
+ write_report_bundle,
+ write_validity_bundle,
+)
+
+
+def build_parser() -> argparse.ArgumentParser:
+ parser = argparse.ArgumentParser(description="Validity packaging and backlog-analysis tool for Conker-style experiment outputs.")
+ sub = parser.add_subparsers(dest="command", required=True)
+
+ p_bundle = sub.add_parser("bundle", help="Assemble a manifest-first validity bundle")
+ p_bundle.add_argument("manifest")
+ p_bundle.add_argument("out_dir")
+ p_bundle.add_argument("--json")
+
+ p_scan = sub.add_parser("scan", help="Scan a directory of experiment JSON outputs")
+ p_scan.add_argument("root")
+ p_scan.add_argument("--json")
+
+ p_table = sub.add_parser("table", help="Show a ranked table of normalized records")
+ p_table.add_argument("root")
+ p_table.add_argument("--kind", choices=["all", "bridge", "full_eval", "study"], default="all")
+ p_table.add_argument("--metric", default="bpb")
+ p_table.add_argument("--top", type=int, default=20)
+ p_table.add_argument("--descending", action="store_true")
+ p_table.add_argument("--json")
+
+ p_survival = sub.add_parser("survival", help="Compare bridge rows with their full-eval descendants")
+ p_survival.add_argument("root")
+ p_survival.add_argument("--top", type=int, default=50)
+ p_survival.add_argument("--json")
+
+ p_lineage = sub.add_parser("lineage", help="Trace warm-start ancestry from loaded_state_path to saved_state_path")
+ p_lineage.add_argument("root")
+ p_lineage.add_argument("--top", type=int, default=50)
+ p_lineage.add_argument("--json")
+
+ p_report = sub.add_parser("report", help="Write a public report bundle with JSON, CSV, and SVG outputs")
+ p_report.add_argument("root")
+ p_report.add_argument("out_dir")
+ p_report.add_argument("--top", type=int, default=20)
+ p_report.add_argument("--json")
+
+ return parser
+
+
+def write_output(text: str, json_path: str | None) -> None:
+ print(text)
+ if json_path:
+ path = Path(json_path)
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text(text + "\n", encoding="utf-8")
+
+
+def main() -> None:
+ parser = build_parser()
+ args = parser.parse_args()
+
+ if args.command == "bundle":
+ result = write_validity_bundle(Path(args.manifest), Path(args.out_dir))
+ write_output(dumps_json(result), args.json)
+ return
+
+ root = Path(args.root)
+ scanned = scan_results(root)
+
+ if args.command == "scan":
+ write_output(dumps_json(scanned), args.json)
+ return
+
+ if args.command == "table":
+ records = scanned["records"]
+ if args.kind != "all":
+ records = [record for record in records if record["kind"] == args.kind]
+ records = sort_records(records, args.metric, ascending=not args.descending)
+ if args.json:
+ write_output(dumps_json({"rows": records[: args.top]}), args.json)
+ else:
+ columns = ["kind", "family_id", "run_id", "seed", args.metric, "path"]
+ write_output(render_table(records, columns, top=args.top), None)
+ return
+
+ if args.command == "survival":
+ rows = survival_rows(scanned["records"])
+ rows = sort_records(rows, "full_fp16", ascending=True)
+ if args.json:
+ write_output(dumps_json({"rows": rows[: args.top]}), args.json)
+ else:
+ columns = ["family_id", "run_id", "seed", "bridge_fp16", "full_fp16", "bridge_int6", "full_int6", "status"]
+ write_output(render_table(rows, columns, top=args.top), None)
+ return
+
+ if args.command == "lineage":
+ rows = lineage_rows(scanned["records"])
+ if args.json:
+ write_output(dumps_json({"rows": rows[: args.top]}), args.json)
+ else:
+ columns = ["parent_run_id", "child_run_id", "seed", "child_bpb", "family_id"]
+ write_output(render_table(rows, columns, top=args.top), None)
+ return
+
+ if args.command == "report":
+ result = write_report_bundle(root, Path(args.out_dir), top=args.top)
+ write_output(dumps_json(result), args.json)
+ return
+
+ raise ValueError(f"Unknown command: {args.command}")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/conker_ledger/ledger.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/conker_ledger/ledger.py
new file mode 100644
index 0000000000..0532bb5c7e
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/conker_ledger/ledger.py
@@ -0,0 +1,1435 @@
+from __future__ import annotations
+
+import json
+import math
+import re
+import shutil
+from collections import Counter, defaultdict
+from pathlib import Path
+from typing import Any
+import csv
+
+
+DATE_SUFFIX_RE = re.compile(r"_\d{4}-\d{2}-\d{2}$")
+FULL_EVAL_SUFFIX_RE = re.compile(r"_fullval_(?:train|test)_[a-z0-9]+$")
+SEED_RE = re.compile(r"_seed(\d+)")
+
+
+def _json_default(value: Any) -> Any:
+ if isinstance(value, float):
+ if math.isnan(value):
+ return "NaN"
+ if math.isinf(value):
+ return "Infinity" if value > 0 else "-Infinity"
+ raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable")
+
+
+def dumps_json(value: Any) -> str:
+ return json.dumps(value, indent=2, default=_json_default)
+
+
+def load_json(path: Path) -> Any:
+ return json.loads(path.read_text(encoding="utf-8"))
+
+
+CLAIM_LEVELS = {
+ 0: "No justified claim yet",
+ 1: "Bridge metric only",
+ 2: "Fresh-process held-out replay confirmed",
+ 3: "Packed-artifact replay confirmed",
+ 4: "Structural audit passed",
+ 5: "Behavioral legality audit passed",
+}
+TIER3_PROMOTION_TRUST_LEVELS = {"traced", "strict"}
+LIMITED_TIER3_SCOPES = {"prefix-only", "one_shot_runtime_handoff"}
+
+
+def finite_or_none(value: Any) -> float | None:
+ if isinstance(value, (int, float)):
+ if math.isfinite(float(value)):
+ return float(value)
+ return None
+ return None
+
+
+def _resolve_input_path(base_dir: Path, value: str | Path) -> Path:
+ path = Path(value)
+ if not path.is_absolute():
+ path = base_dir / path
+ return path.resolve()
+
+
+def _resolve_output_path(value: str | Path) -> Path:
+ path = Path(value)
+ if path.is_absolute() or ".." in path.parts:
+ raise ValueError(f"Bundle attachment destination must stay inside the bundle: {value}")
+ return path
+
+
+def _load_manifest_value(value: Any, base_dir: Path) -> Any:
+ if value is None:
+ return {}
+ if isinstance(value, str):
+ return load_json(_resolve_input_path(base_dir, value))
+ return value
+
+
+def _dict_has_payload(data: Any, keys: tuple[str, ...]) -> bool:
+ if not isinstance(data, dict):
+ return False
+ for key in keys:
+ if key in data and data[key] not in (None, "", {}, []):
+ return True
+ return False
+
+
+def _audit_status(audits: Any, tier: str) -> str | None:
+ if not isinstance(audits, dict):
+ return None
+ tier_data = audits.get(tier)
+ if not isinstance(tier_data, dict):
+ return None
+ status = tier_data.get("status")
+ return str(status) if status is not None else None
+
+
+def infer_claim_level(claim: Any, metrics: Any, audits: Any) -> dict[str, Any]:
+ level = 0
+ if claim not in (None, "", {}, []) or metrics not in (None, "", {}, []):
+ level = 1
+ if _dict_has_payload(metrics, ("fresh_process_full", "fresh_process_replay", "held_out_replay")):
+ level = max(level, 2)
+ if _dict_has_payload(metrics, ("packed_artifact_full", "packed_artifact_replay", "packed_replay")):
+ level = max(level, 3)
+ if _audit_status(audits, "tier2") == "pass":
+ level = max(level, 4)
+ tier3_credit = _tier3_claim_credit(audits)
+ if tier3_credit["credited"]:
+ level = max(level, 5)
+ label = CLAIM_LEVELS[level]
+ if level == 5 and tier3_credit.get("trust_achieved") in TIER3_PROMOTION_TRUST_LEVELS:
+ label = f"{label} ({tier3_credit['trust_achieved']})"
+ return {
+ "level": level,
+ "label": label,
+ "tier3_credit": tier3_credit,
+ "notes": tier3_credit.get("notes", []),
+ }
+
+
+def _copy_attachment(base_dir: Path, out_dir: Path, spec: dict[str, Any]) -> dict[str, Any]:
+ source_value = spec.get("source") or spec.get("path")
+ if not isinstance(source_value, str):
+ raise ValueError("Each attachment needs a string source/path")
+ source = _resolve_input_path(base_dir, source_value)
+ if not source.exists():
+ raise FileNotFoundError(source)
+ dest_rel = _resolve_output_path(spec.get("dest") or f"artifacts/{source.name}")
+ dest = out_dir / dest_rel
+ dest.parent.mkdir(parents=True, exist_ok=True)
+ resolved_dest = dest.resolve()
+ resolved_out = out_dir.resolve()
+ if not resolved_dest.is_relative_to(resolved_out):
+ raise ValueError(f"Resolved attachment destination escapes bundle: {dest_rel} -> {resolved_dest}")
+ if source.is_dir():
+ shutil.copytree(source, dest, dirs_exist_ok=True)
+ kind = "directory"
+ else:
+ shutil.copy2(source, dest)
+ kind = "file"
+ return {
+ "source": str(source),
+ "dest": str(dest_rel),
+ "kind": kind,
+ }
+
+
+def render_validity_bundle_readme(
+ *,
+ bundle_id: str,
+ claim: Any,
+ metrics: Any,
+ provenance: Any,
+ audits: Any,
+ claim_level: dict[str, Any],
+ attachments: list[dict[str, Any]],
+ detector_summaries: list[dict[str, Any]],
+) -> str:
+ requested_label = None
+ if isinstance(claim, dict):
+ requested_label = claim.get("requested_label") or claim.get("requested_claim")
+ bridge_bpb = metrics.get("bridge", {}).get("bpb") if isinstance(metrics, dict) and isinstance(metrics.get("bridge"), dict) else None
+ fresh_bpb = (
+ metrics.get("fresh_process_full", {}).get("bpb")
+ if isinstance(metrics, dict) and isinstance(metrics.get("fresh_process_full"), dict)
+ else None
+ )
+ packed_bpb = (
+ metrics.get("packed_artifact_full", {}).get("bpb")
+ if isinstance(metrics, dict) and isinstance(metrics.get("packed_artifact_full"), dict)
+ else None
+ )
+ provenance_rows: list[str] = []
+ if isinstance(provenance, dict):
+ for key in ("run_id", "family_id", "submission_pr", "source_repo", "source_root", "report_dir", "source_commit"):
+ value = provenance.get(key)
+ if value not in (None, "", [], {}):
+ provenance_rows.append(f"- {key}: `{value}`")
+ tier_lines = []
+ for tier in ("tier1", "tier2", "tier3"):
+ status = _audit_status(audits, tier) or "missing"
+ tier_lines.append(f"- {tier}: `{status}`")
+ tier3_details = _tier3_detail_lines(audits)
+ attachment_lines = [f"- `{row['dest']}` <= `{row['source']}`" for row in attachments] or ["- none"]
+ detector_lines: list[str] = []
+ for row in detector_summaries:
+ kind = row.get("kind", "detector")
+ detector_lines.append(f"- `{row['dest']}` kind=`{kind}`")
+ if kind == "legality":
+ checks = ", ".join(f"{key}={value}" for key, value in row["checks"].items()) or "none"
+ obligations = ", ".join(f"{key}={value}" for key, value in row["obligations"].items()) or "none"
+ detector_lines.append(f" profile: `{row['profile']}`")
+ if row.get("trust_requested") is not None:
+ detector_lines.append(
+ " trust: "
+ f"requested=`{row.get('trust_requested')}`, "
+ f"achieved=`{row.get('trust_achieved')}`, "
+ f"satisfied=`{row.get('trust_satisfied')}`"
+ )
+ detector_lines.append(f" checks: {checks}")
+ detector_lines.append(f" obligations: {obligations}")
+ elif kind == "submission":
+ checks = ", ".join(f"{key}={value}" for key, value in row["checks"].items()) or "none"
+ detector_lines.append(f" verdict: `{row['verdict']}`")
+ detector_lines.append(f" checks: {checks}")
+ elif kind == "provenance":
+ checks = ", ".join(f"{key}={value}" for key, value in row["checks"].items()) or "none"
+ detector_lines.append(f" verdict: `{row['verdict']}`")
+ detector_lines.append(f" selection: submitted_run_id=`{row.get('submitted_run_id')}`, selection_mode=`{row.get('selection_mode')}`")
+ detector_lines.append(f" checks: {checks}")
+ elif kind == "replay":
+ repeatability = row.get("repeatability", "unknown")
+ mean_bpb = row.get("mean_bpb")
+ detector_lines.append(f" profile: `{row['profile']}`")
+ detector_lines.append(f" mean_bpb: `{mean_bpb}`")
+ detector_lines.append(f" repeatability: `{repeatability}`")
+ metric_lines = []
+ if bridge_bpb is not None:
+ metric_lines.append(f"- bridge bpb: `{bridge_bpb}`")
+ if fresh_bpb is not None:
+ metric_lines.append(f"- fresh-process full bpb: `{fresh_bpb}`")
+ if packed_bpb is not None:
+ metric_lines.append(f"- packed-artifact full bpb: `{packed_bpb}`")
+ if not metric_lines:
+ if isinstance(metrics, dict) and metrics:
+ for key, value in list(metrics.items())[:8]:
+ if isinstance(value, dict):
+ fields = []
+ for subkey, subvalue in value.items():
+ if subvalue in (None, "", [], {}):
+ continue
+ fields.append(f"{subkey}={subvalue}")
+ if len(fields) == 3:
+ break
+ metric_lines.append(f"- {key}: " + (", ".join(fields) if fields else "object"))
+ else:
+ metric_lines.append(f"- {key}: `{value}`")
+ else:
+ metric_lines.append("- no structured metric summary provided")
+ lines = [
+ "# Validity Bundle",
+ "",
+ f"- bundle id: `{bundle_id}`",
+ f"- strongest justified claim: `Tier {claim_level['level']}: {claim_level['label']}`",
+ ]
+ for note in claim_level.get("notes", []):
+ lines.append(f"- claim note: {note}")
+ if requested_label:
+ lines.append(f"- requested label: `{requested_label}`")
+ lines.extend(
+ [
+ "",
+ "## Audit Coverage",
+ "",
+ *tier_lines,
+ *tier3_details,
+ "",
+ "## Metrics",
+ "",
+ *metric_lines,
+ "",
+ "## Provenance",
+ "",
+ *(provenance_rows or ["- no provenance summary provided"]),
+ "",
+ "## Attachments",
+ "",
+ *attachment_lines,
+ "",
+ "## Detector Summaries",
+ "",
+ *(detector_lines or ["- no detector JSON attachments summarized"]),
+ "",
+ "## Files",
+ "",
+ "- `claim.json`",
+ "- `evidence/metrics.json`",
+ "- `evidence/provenance.json`",
+ "- `evidence/audits.json`",
+ "- `bundle_manifest.json`",
+ "- `report/README.md`",
+ ]
+ )
+ return "\n".join(lines) + "\n"
+
+
+def write_validity_bundle(manifest_path: Path, out_dir: Path) -> dict[str, Any]:
+ manifest = load_json(manifest_path)
+ if not isinstance(manifest, dict):
+ raise ValueError("Bundle manifest must be a JSON object")
+ base_dir = manifest_path.parent.resolve()
+
+ claim = _load_manifest_value(manifest.get("claim"), base_dir)
+ metrics = _load_manifest_value(manifest.get("metrics"), base_dir)
+ provenance = _load_manifest_value(manifest.get("provenance"), base_dir)
+ audits = _load_manifest_value(manifest.get("audits"), base_dir)
+
+ bundle_id = (
+ manifest.get("bundle_id")
+ or (claim.get("candidate_id") if isinstance(claim, dict) else None)
+ or manifest_path.stem
+ )
+ claim_level = infer_claim_level(claim, metrics, audits)
+
+ out_dir.mkdir(parents=True, exist_ok=True)
+ attachments = [
+ _copy_attachment(base_dir, out_dir, spec)
+ for spec in manifest.get("attachments", [])
+ ]
+ detector_summaries = _collect_detector_attachment_summaries(out_dir, attachments)
+
+ normalized_manifest = {
+ "bundle_id": bundle_id,
+ "claim": claim,
+ "metrics": metrics,
+ "provenance": provenance,
+ "audits": audits,
+ "attachments": attachments,
+ "source_manifest": str(manifest_path.resolve()),
+ "claim_level": claim_level,
+ }
+
+ (out_dir / "claim.json").write_text(dumps_json(claim) + "\n", encoding="utf-8")
+ (out_dir / "evidence" / "metrics.json").parent.mkdir(parents=True, exist_ok=True)
+ (out_dir / "evidence" / "metrics.json").write_text(dumps_json(metrics) + "\n", encoding="utf-8")
+ (out_dir / "evidence" / "provenance.json").write_text(dumps_json(provenance) + "\n", encoding="utf-8")
+ (out_dir / "evidence" / "audits.json").write_text(dumps_json(audits) + "\n", encoding="utf-8")
+ (out_dir / "bundle_manifest.json").write_text(dumps_json(normalized_manifest) + "\n", encoding="utf-8")
+ (out_dir / "report").mkdir(parents=True, exist_ok=True)
+ (out_dir / "report" / "README.md").write_text(
+ render_validity_bundle_readme(
+ bundle_id=str(bundle_id),
+ claim=claim,
+ metrics=metrics,
+ provenance=provenance,
+ audits=audits,
+ claim_level=claim_level,
+ attachments=attachments,
+ detector_summaries=detector_summaries,
+ ),
+ encoding="utf-8",
+ )
+
+ return {
+ "bundle_id": str(bundle_id),
+ "claim_level": claim_level,
+ "attachment_count": len(attachments),
+ "detector_attachment_count": len(detector_summaries),
+ "legality_attachment_count": sum(1 for row in detector_summaries if row.get("kind") == "legality"),
+ "out_dir": str(out_dir),
+ }
+
+
+def _collect_detector_attachment_summaries(
+ out_dir: Path,
+ attachments: list[dict[str, Any]],
+) -> list[dict[str, Any]]:
+ rows: list[dict[str, Any]] = []
+ for spec in attachments:
+ dest = spec.get("dest")
+ if not isinstance(dest, str) or not dest.endswith(".json"):
+ continue
+ path = out_dir / dest
+ if not path.is_file():
+ continue
+ try:
+ data = load_json(path)
+ except Exception:
+ continue
+ if not isinstance(data, dict):
+ continue
+ checks = data.get("checks")
+ obligations = data.get("obligations")
+ profile = data.get("profile")
+ if isinstance(checks, dict) and isinstance(obligations, dict) and profile is not None:
+ trust = data.get("trust", {}) if isinstance(data.get("trust"), dict) else {}
+ rows.append(
+ {
+ "kind": "legality",
+ "dest": dest,
+ "profile": str(profile),
+ "trust_requested": trust.get("requested"),
+ "trust_achieved": trust.get("achieved"),
+ "trust_satisfied": trust.get("satisfied"),
+ "checks": _flatten_legality_checks(checks),
+ "obligations": _flatten_legality_obligations(obligations),
+ }
+ )
+ continue
+ if isinstance(checks, dict) and "submission" in data and "verdict" in data:
+ rows.append(
+ {
+ "kind": "submission",
+ "dest": dest,
+ "verdict": str(data.get("verdict")),
+ "checks": _flatten_generic_checks(checks),
+ }
+ )
+ continue
+ if isinstance(checks, dict) and "provenance" in data and "verdict" in data:
+ provenance = data.get("provenance", {})
+ rows.append(
+ {
+ "kind": "provenance",
+ "dest": dest,
+ "verdict": str(data.get("verdict")),
+ "submitted_run_id": provenance.get("submitted_run_id"),
+ "selection_mode": provenance.get("selection_mode"),
+ "checks": _flatten_generic_checks(checks),
+ }
+ )
+ continue
+ aggregate = data.get("aggregate")
+ repeatability = data.get("repeatability")
+ if profile is not None and isinstance(aggregate, dict) and isinstance(repeatability, dict):
+ rows.append(
+ {
+ "kind": "replay",
+ "dest": dest,
+ "profile": str(profile),
+ "mean_bpb": aggregate.get("mean_bpb"),
+ "repeatability": "pass" if repeatability.get("pass") is True else "fail" if repeatability.get("pass") is False else "unknown",
+ }
+ )
+ return rows
+
+
+def _flatten_legality_checks(checks: dict[str, Any]) -> dict[str, str]:
+ rows: dict[str, str] = {}
+ for key, value in checks.items():
+ if isinstance(value, dict):
+ covered = value.get("covered")
+ passed = value.get("pass")
+ if covered is False:
+ rows[key] = "uncovered"
+ elif passed is True:
+ rows[key] = "pass"
+ elif passed is False:
+ rows[key] = "fail"
+ else:
+ rows[key] = "unknown"
+ else:
+ rows[key] = str(value)
+ return rows
+
+
+def _flatten_legality_obligations(obligations: dict[str, Any]) -> dict[str, str]:
+ rows: dict[str, str] = {}
+ for key, value in obligations.items():
+ if isinstance(value, dict):
+ rows[key] = str(value.get("status", "unknown"))
+ else:
+ rows[key] = str(value)
+ return rows
+
+
+def _flatten_generic_checks(checks: dict[str, Any]) -> dict[str, str]:
+ rows: dict[str, str] = {}
+ for key, value in checks.items():
+ if isinstance(value, dict):
+ passed = value.get("pass")
+ if passed is True:
+ rows[key] = "pass"
+ elif passed is False:
+ rows[key] = "fail"
+ else:
+ rows[key] = "unknown"
+ else:
+ rows[key] = str(value)
+ return rows
+
+
+def _tier3_claim_credit(audits: Any) -> dict[str, Any]:
+ if not isinstance(audits, dict):
+ return {"considered": False, "credited": False, "notes": []}
+ tier3 = audits.get("tier3")
+ if not isinstance(tier3, dict):
+ return {"considered": False, "credited": False, "notes": []}
+ status = str(tier3.get("status")) if tier3.get("status") is not None else None
+ scope = tier3.get("scope")
+ trust_achieved = tier3.get("trust_level_achieved")
+ trust_satisfied = tier3.get("trust_satisfied")
+ notes: list[str] = []
+ credited = status == "pass"
+ if status != "pass":
+ return {
+ "considered": True,
+ "credited": False,
+ "status": status,
+ "scope": scope,
+ "trust_achieved": trust_achieved,
+ "trust_satisfied": trust_satisfied,
+ "notes": notes,
+ }
+ if scope in LIMITED_TIER3_SCOPES:
+ credited = False
+ notes.append(f"Tier 3 was not promoted because its scope was limited: `{scope}`.")
+ if trust_achieved is not None and str(trust_achieved) not in TIER3_PROMOTION_TRUST_LEVELS:
+ credited = False
+ notes.append(
+ "Tier 3 was not promoted because the achieved legality trust level was "
+ f"`{trust_achieved}`, below the promotion floor of `traced`."
+ )
+ return {
+ "considered": True,
+ "credited": credited,
+ "status": status,
+ "scope": scope,
+ "trust_achieved": trust_achieved,
+ "trust_satisfied": trust_satisfied,
+ "notes": notes,
+ }
+
+
+def _tier3_detail_lines(audits: Any) -> list[str]:
+ if not isinstance(audits, dict):
+ return []
+ tier3 = audits.get("tier3")
+ if not isinstance(tier3, dict):
+ return []
+ lines: list[str] = []
+ scope = tier3.get("scope")
+ if scope not in (None, "", [], {}):
+ lines.append(f"- tier3 scope: `{scope}`")
+ trust_requested = tier3.get("trust_level_requested")
+ trust_achieved = tier3.get("trust_level_achieved")
+ trust_satisfied = tier3.get("trust_satisfied")
+ if trust_requested not in (None, "", [], {}):
+ lines.append(
+ "- tier3 trust: "
+ f"requested=`{trust_requested}`, "
+ f"achieved=`{trust_achieved}`, "
+ f"satisfied=`{trust_satisfied}`"
+ )
+ return lines
+
+
+def infer_run_id_from_stem(stem: str) -> str:
+ stem = FULL_EVAL_SUFFIX_RE.sub("", stem)
+ stem = DATE_SUFFIX_RE.sub("", stem)
+ return stem
+
+
+def infer_family_id(run_id: str) -> str:
+ run_id = re.sub(r"_seed\d+", "", run_id)
+ run_id = re.sub(r"_save$", "", run_id)
+ return run_id
+
+
+def parse_bridge_record(path: Path, data: dict[str, Any]) -> dict[str, Any]:
+ model = data.get("model", {}) if isinstance(data.get("model"), dict) else {}
+ quant_rows = data.get("quantization", [])
+ saved_state_path = model.get("saved_state_path")
+ loaded_state_path = model.get("loaded_state_path")
+ run_id = infer_run_id_from_stem(Path(saved_state_path).stem) if saved_state_path else infer_run_id_from_stem(path.stem)
+ seed_match = SEED_RE.search(run_id)
+ quant_by_bits: dict[str, float | None] = {}
+ for row in quant_rows if isinstance(quant_rows, list) else []:
+ bits = row.get("bits")
+ key = f"int{int(bits)}" if isinstance(bits, (int, float)) else None
+ if key:
+ quant_by_bits[key] = finite_or_none(row.get("test_bpb"))
+ return {
+ "kind": "bridge",
+ "path": str(path),
+ "title": data.get("title"),
+ "run_id": run_id,
+ "family_id": infer_family_id(run_id),
+ "seed": int(seed_match.group(1)) if seed_match else model.get("seed"),
+ "bpb": finite_or_none(model.get("test_bpb")),
+ "bits_per_token": finite_or_none(model.get("test_bits_per_token")),
+ "loss": finite_or_none(model.get("test_eval_loss")),
+ "train_time_sec": finite_or_none(model.get("train_time_sec")),
+ "params": model.get("params"),
+ "saved_state_path": saved_state_path,
+ "loaded_state_path": loaded_state_path,
+ "int4_bpb": quant_by_bits.get("int4"),
+ "int6_bpb": quant_by_bits.get("int6"),
+ "raw": {
+ "preset": model.get("preset"),
+ "variant": model.get("variant"),
+ "scale": model.get("scale"),
+ "learning_rate": model.get("learning_rate"),
+ },
+ }
+
+
+def parse_full_eval_record(path: Path, data: dict[str, Any]) -> dict[str, Any]:
+ state_npz = data.get("state_npz")
+ run_id = infer_run_id_from_stem(Path(state_npz).stem) if isinstance(state_npz, str) else infer_run_id_from_stem(path.stem)
+ seed_match = SEED_RE.search(run_id)
+ quant_bits = int(data.get("quant_bits", 0) or 0)
+ quant_label = "fp16" if quant_bits == 0 else f"int{quant_bits}"
+ artifact_bytes = data.get("artifact_bytes_zlib")
+ return {
+ "kind": "full_eval",
+ "path": str(path),
+ "title": data.get("title"),
+ "run_id": run_id,
+ "family_id": infer_family_id(run_id),
+ "seed": int(seed_match.group(1)) if seed_match else None,
+ "quant_label": quant_label,
+ "quant_bits": quant_bits,
+ "bpb": finite_or_none(data.get("eval_bpb")),
+ "bits_per_token": finite_or_none(data.get("eval_bits_per_token")),
+ "loss": finite_or_none(data.get("eval_loss")),
+ "eval_tokens": data.get("eval_tokens"),
+ "artifact_bytes": int(artifact_bytes) if isinstance(artifact_bytes, (int, float)) and math.isfinite(float(artifact_bytes)) else None,
+ "state_npz": state_npz,
+ "summary_json": data.get("summary_json"),
+ }
+
+
+def parse_study_record(path: Path, data: dict[str, Any]) -> dict[str, Any]:
+ variants = data.get("variants", [])
+ models = data.get("models", [])
+ best_label = None
+ best_metric = None
+ metric_name = None
+ if isinstance(models, list):
+ ranked: list[tuple[float, str]] = []
+ for model in models:
+ if not isinstance(model, dict):
+ continue
+ label = model.get("label")
+ test_mean = finite_or_none(model.get("test_mean"))
+ if label and test_mean is not None:
+ ranked.append((test_mean, str(label)))
+ if ranked:
+ ranked.sort()
+ best_metric, best_label = ranked[0]
+ metric_name = "test_mean"
+ return {
+ "kind": "study",
+ "path": str(path),
+ "title": data.get("title"),
+ "run_id": infer_run_id_from_stem(path.stem),
+ "family_id": infer_family_id(infer_run_id_from_stem(path.stem)),
+ "variant_count": len(variants) if isinstance(variants, list) else len(models) if isinstance(models, list) else 0,
+ "best_label": best_label,
+ "best_metric": best_metric,
+ "metric_name": metric_name,
+ }
+
+
+def classify_record(path: Path, data: Any) -> dict[str, Any] | None:
+ if not isinstance(data, dict):
+ return None
+ if "eval_bpb" in data:
+ return parse_full_eval_record(path, data)
+ model = data.get("model")
+ if isinstance(model, dict) and "test_bpb" in model:
+ return parse_bridge_record(path, data)
+ if "variants" in data or "models" in data:
+ return parse_study_record(path, data)
+ return None
+
+
+def scan_results(root: Path) -> dict[str, Any]:
+ records: list[dict[str, Any]] = []
+ skipped: list[str] = []
+ for path in sorted(root.glob("*.json")):
+ try:
+ data = load_json(path)
+ record = classify_record(path, data)
+ except Exception as exc: # pragma: no cover - defensive scan path
+ skipped.append(f"{path.name}: {exc}")
+ continue
+ if record is None:
+ skipped.append(path.name)
+ continue
+ records.append(record)
+
+ by_kind = Counter(record["kind"] for record in records)
+ by_family = Counter(record["family_id"] for record in records)
+ return {
+ "root": str(root),
+ "record_count": len(records),
+ "by_kind": dict(by_kind),
+ "family_count": len(by_family),
+ "top_families": by_family.most_common(20),
+ "records": records,
+ "skipped": skipped,
+ }
+
+
+def sort_records(records: list[dict[str, Any]], metric: str, *, ascending: bool = True) -> list[dict[str, Any]]:
+ def key_fn(record: dict[str, Any]) -> tuple[int, float]:
+ value = record.get(metric)
+ if value is None:
+ return (1, float("inf"))
+ try:
+ return (0, float(value))
+ except (TypeError, ValueError):
+ return (1, float("inf"))
+
+ return sorted(records, key=key_fn, reverse=not ascending)
+
+
+def survival_rows(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
+ grouped: dict[str, dict[str, Any]] = defaultdict(lambda: {"bridge": None, "full": {}})
+ for record in records:
+ if record["kind"] == "bridge":
+ grouped[record["run_id"]]["bridge"] = record
+ elif record["kind"] == "full_eval":
+ grouped[record["run_id"]]["full"][record.get("quant_label") or "unknown"] = record
+
+ rows: list[dict[str, Any]] = []
+ for run_id, group in sorted(grouped.items()):
+ bridge = group["bridge"]
+ full = group["full"]
+ if bridge is None:
+ continue
+ bridge_bpb = bridge.get("bpb")
+ bridge_int6 = bridge.get("int6_bpb")
+ full_fp16 = full.get("fp16", {}).get("bpb") if "fp16" in full else None
+ full_int6 = full.get("int6", {}).get("bpb") if "int6" in full else None
+ status = "bridge_only"
+ if full:
+ if any(v.get("bpb") is None for v in full.values()):
+ status = "full_eval_failed"
+ else:
+ status = "survived_full_eval"
+ rows.append(
+ {
+ "run_id": run_id,
+ "family_id": infer_family_id(run_id),
+ "seed": bridge.get("seed"),
+ "bridge_fp16": bridge_bpb,
+ "bridge_int6": bridge_int6,
+ "full_fp16": full_fp16,
+ "full_int6": full_int6,
+ "delta_fp16": None if bridge_bpb is None or full_fp16 is None else full_fp16 - bridge_bpb,
+ "delta_int6": None if bridge_int6 is None or full_int6 is None else full_int6 - bridge_int6,
+ "status": status,
+ "bridge_path": bridge.get("path"),
+ "full_paths": {k: v.get("path") for k, v in full.items()},
+ }
+ )
+ return rows
+
+
+def lineage_rows(records: list[dict[str, Any]]) -> list[dict[str, Any]]:
+ rows: list[dict[str, Any]] = []
+ for record in records:
+ if record["kind"] != "bridge":
+ continue
+ loaded = record.get("loaded_state_path")
+ saved = record.get("saved_state_path")
+ if not loaded or not saved:
+ continue
+ parent_id = infer_run_id_from_stem(Path(loaded).stem)
+ child_id = infer_run_id_from_stem(Path(saved).stem)
+ rows.append(
+ {
+ "parent_run_id": parent_id,
+ "child_run_id": child_id,
+ "family_id": record["family_id"],
+ "seed": record.get("seed"),
+ "child_bpb": record.get("bpb"),
+ "child_path": record.get("path"),
+ }
+ )
+ return rows
+
+
+def render_table(rows: list[dict[str, Any]], columns: list[str], top: int | None = None) -> str:
+ if top is not None:
+ rows = rows[:top]
+ if not rows:
+ return "(no rows)"
+ widths = {col: max(len(col), *(len(str(row.get(col, ""))) for row in rows)) for col in columns}
+ header = " ".join(col.ljust(widths[col]) for col in columns)
+ sep = " ".join("-" * widths[col] for col in columns)
+ body = [
+ " ".join(str(row.get(col, "")).ljust(widths[col]) for col in columns)
+ for row in rows
+ ]
+ return "\n".join([header, sep, *body])
+
+
+def write_csv(path: Path, rows: list[dict[str, Any]], columns: list[str]) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ with path.open("w", encoding="utf-8", newline="") as handle:
+ writer = csv.DictWriter(handle, fieldnames=columns)
+ writer.writeheader()
+ for row in rows:
+ writer.writerow({column: row.get(column) for column in columns})
+
+
+def _svg_escape(text: str) -> str:
+ return (
+ str(text)
+ .replace("&", "&")
+ .replace("<", "<")
+ .replace(">", ">")
+ .replace('"', """)
+ )
+
+
+_PALETTE = [
+ "#2f6fed", "#c23b22", "#2ca02c", "#9467bd", "#e377c2",
+ "#8c564b", "#17becf", "#bcbd22", "#ff7f0e", "#7f7f7f",
+ "#1f77b4", "#d62728", "#98df8a", "#aec7e8", "#ffbb78",
+]
+
+
+def _family_color(family_id: str) -> str:
+ return _PALETTE[hash(family_id) % len(_PALETTE)]
+
+
+def _truncate_label(label: str, max_chars: int = 32) -> str:
+ if len(label) <= max_chars:
+ return label
+ return label[: max_chars - 1] + "\u2026"
+
+
+def _nice_ticks(vmin: float, vmax: float, target_count: int = 5) -> list[float]:
+ span = vmax - vmin
+ if span <= 0:
+ return [vmin]
+ raw_step = span / max(target_count, 1)
+ magnitude = 10 ** math.floor(math.log10(raw_step))
+ for nice in [1, 2, 5, 10]:
+ step = nice * magnitude
+ if step >= raw_step:
+ break
+ if step <= 0:
+ return [vmin, vmax]
+ start = math.ceil(vmin / step) * step
+ ticks: list[float] = []
+ val = start
+ while val <= vmax + step * 0.001:
+ ticks.append(round(val, 10))
+ val += step
+ return ticks
+
+
+def write_bar_svg(path: Path, title: str, labels: list[str], values: list[float], *, width: int = 960, height: int = 480) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ if not labels or not values:
+ path.write_text('\n', encoding="utf-8")
+ return
+ margin_left = 260
+ margin_right = 80
+ margin_top = 50
+ margin_bottom = 40
+ plot_width = width - margin_left - margin_right
+ plot_height = height - margin_top - margin_bottom
+ bar_gap = 6
+ bar_height = max(8, (plot_height - bar_gap * (len(values) - 1)) // max(len(values), 1))
+ vmax = max(max(values), 1e-12)
+ ticks = _nice_ticks(0, vmax, 5)
+ parts = [
+ f'")
+ path.write_text("\n".join(parts) + "\n", encoding="utf-8")
+
+
+def write_scatter_svg(
+ path: Path,
+ title: str,
+ rows: list[dict[str, Any]],
+ *,
+ x_key: str,
+ y_key: str,
+ label_key: str,
+ reference_line: bool = False,
+ width: int = 960,
+ height: int = 480,
+) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ points = [(row.get(x_key), row.get(y_key), row.get(label_key), row.get("family_id", "")) for row in rows]
+ points = [(float(x), float(y), str(label), str(fam)) for x, y, label, fam in points if x is not None and y is not None]
+ if not points:
+ path.write_text('\n', encoding="utf-8")
+ return
+ margin_left = 70
+ margin_right = 40
+ margin_top = 50
+ margin_bottom = 50
+ plot_width = width - margin_left - margin_right
+ plot_height = height - margin_top - margin_bottom
+ xs = [p[0] for p in points]
+ ys = [p[1] for p in points]
+ min_x, max_x = min(xs), max(xs)
+ min_y, max_y = min(ys), max(ys)
+ if max_x == min_x:
+ max_x += 1e-9
+ if max_y == min_y:
+ max_y += 1e-9
+ parts = [
+ f'")
+ path.write_text("\n".join(parts) + "\n", encoding="utf-8")
+
+
+def write_pie_svg(
+ path: Path,
+ title: str,
+ labels: list[str],
+ values: list[float],
+ colors: list[str],
+ *,
+ width: int = 480,
+ height: int = 400,
+) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ if not values or sum(values) == 0:
+ path.write_text('\n', encoding="utf-8")
+ return
+ cx, cy = width // 2, height // 2 + 10
+ r = min(cx, cy) - 60
+ total = sum(values)
+ parts = [
+ f'")
+ path.write_text("\n".join(parts) + "\n", encoding="utf-8")
+
+
+def write_histogram_svg(
+ path: Path,
+ title: str,
+ values: list[float],
+ *,
+ bins: int = 10,
+ width: int = 960,
+ height: int = 400,
+) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ if not values:
+ path.write_text('\n', encoding="utf-8")
+ return
+ margin_left = 60
+ margin_right = 40
+ margin_top = 50
+ margin_bottom = 50
+ plot_width = width - margin_left - margin_right
+ plot_height = height - margin_top - margin_bottom
+ vmin, vmax = min(values), max(values)
+ if vmax == vmin:
+ vmax = vmin + 1e-9
+ bin_width = (vmax - vmin) / bins
+ counts = [0] * bins
+ for v in values:
+ idx = min(int((v - vmin) / bin_width), bins - 1)
+ counts[idx] += 1
+ max_count = max(counts) if counts else 1
+ parts = [
+ f'")
+ path.write_text("\n".join(parts) + "\n", encoding="utf-8")
+
+
+def write_grouped_bar_svg(
+ path: Path,
+ title: str,
+ rows: list[dict[str, Any]],
+ *,
+ key_a: str,
+ key_b: str,
+ label_key: str,
+ width: int = 960,
+ height: int = 480,
+) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ filtered = [r for r in rows if r.get(key_a) is not None and r.get(key_b) is not None]
+ if not filtered:
+ path.write_text('\n', encoding="utf-8")
+ return
+ margin_left = 260
+ margin_right = 80
+ margin_top = 50
+ margin_bottom = 40
+ plot_width = width - margin_left - margin_right
+ plot_height = height - margin_top - margin_bottom
+ group_gap = 10
+ bar_gap = 2
+ group_height = max(16, (plot_height - group_gap * (len(filtered) - 1)) // max(len(filtered), 1))
+ sub_bar = (group_height - bar_gap) // 2
+ all_vals = [r[key_a] for r in filtered] + [r[key_b] for r in filtered]
+ vmax = max(max(all_vals), 1e-12) if all_vals else 1e-12
+ ticks = _nice_ticks(0, vmax, 5)
+ color_a = "#2f6fed"
+ color_b = "#c23b22"
+ parts = [
+ f'")
+ path.write_text("\n".join(parts) + "\n", encoding="utf-8")
+
+
+def _mermaid_id(run_id: str) -> str:
+ return re.sub(r"[^a-zA-Z0-9_]", "_", run_id)
+
+
+def render_lineage_mermaid(rows: list[dict[str, Any]], *, max_nodes: int = 30) -> str:
+ if not rows:
+ return "graph TD\n empty[No lineage data]"
+ # build adjacency and find longest chains
+ children: dict[str, list[str]] = defaultdict(list)
+ bpb_map: dict[str, float | None] = {}
+ for row in rows:
+ p, c = row["parent_run_id"], row["child_run_id"]
+ children[p].append(c)
+ bpb_map[c] = row.get("child_bpb")
+ # collect all unique nodes up to max_nodes
+ seen: set[str] = set()
+ edges: list[tuple[str, str]] = []
+ for row in rows:
+ p, c = row["parent_run_id"], row["child_run_id"]
+ new_nodes = {p, c} - seen
+ if len(seen) + len(new_nodes) > max_nodes:
+ break
+ seen.update(new_nodes)
+ edges.append((p, c))
+ lines = ["graph TD"]
+ for node in sorted(seen):
+ mid = _mermaid_id(node)
+ short = _truncate_label(node, 24)
+ bpb = bpb_map.get(node)
+ if bpb is not None:
+ lines.append(f' {mid}["{short}
{bpb:.4f}"]')
+ else:
+ lines.append(f' {mid}["{short}"]')
+ for p, c in edges:
+ lines.append(f" {_mermaid_id(p)} --> {_mermaid_id(c)}")
+ return "\n".join(lines)
+
+
+def render_survival_mermaid(rows: list[dict[str, Any]]) -> str:
+ if not rows:
+ return "graph LR\n empty[No survival data]"
+ total = len(rows)
+ survived = sum(1 for r in rows if r.get("status") == "survived_full_eval")
+ failed = sum(1 for r in rows if r.get("status") == "full_eval_failed")
+ bridge_only = sum(1 for r in rows if r.get("status") == "bridge_only")
+ attempted = survived + failed
+ lines = [
+ "graph LR",
+ f' A["Bridge Runs
{total}"]',
+ f' B["Full Eval Attempted
{attempted}"]',
+ f' C["Survived
{survived}"]',
+ f' D["Failed
{failed}"]',
+ f' E["Bridge Only
{bridge_only}"]',
+ " A --> B",
+ " A --> E",
+ " B --> C",
+ " B --> D",
+ " style C fill:#2ca02c,color:#fff",
+ " style D fill:#c23b22,color:#fff",
+ " style E fill:#7f7f7f,color:#fff",
+ ]
+ return "\n".join(lines)
+
+
+def write_report_bundle(root: Path, out_dir: Path, *, top: int = 20) -> dict[str, Any]:
+ scanned = scan_results(root)
+ records = scanned["records"]
+ top_full_eval = sort_records([r for r in records if r["kind"] == "full_eval"], "bpb")[:top]
+ top_bridge = sort_records([r for r in records if r["kind"] == "bridge"], "bpb")[:top]
+ top_study = sort_records([r for r in records if r["kind"] == "study"], "best_metric")[:top]
+ survival = survival_rows(records)
+ survival_non_bridge = [row for row in survival if row["status"] != "bridge_only"]
+ failed = [row for row in survival if row["status"] == "full_eval_failed"]
+ lineage = lineage_rows(records)
+
+ out_dir.mkdir(parents=True, exist_ok=True)
+ (out_dir / "scan_summary.json").write_text(
+ dumps_json(
+ {
+ "root": scanned["root"],
+ "record_count": scanned["record_count"],
+ "by_kind": scanned["by_kind"],
+ "family_count": scanned["family_count"],
+ "top_families": scanned["top_families"],
+ "skipped": scanned["skipped"],
+ }
+ )
+ + "\n",
+ encoding="utf-8",
+ )
+ (out_dir / "top_full_eval.json").write_text(dumps_json(top_full_eval) + "\n", encoding="utf-8")
+ (out_dir / "top_bridge.json").write_text(dumps_json(top_bridge) + "\n", encoding="utf-8")
+ (out_dir / "top_study.json").write_text(dumps_json(top_study) + "\n", encoding="utf-8")
+ (out_dir / "survival.json").write_text(dumps_json(survival_non_bridge) + "\n", encoding="utf-8")
+ (out_dir / "failed_full_eval.json").write_text(dumps_json(failed) + "\n", encoding="utf-8")
+ (out_dir / "lineage.json").write_text(dumps_json(lineage) + "\n", encoding="utf-8")
+
+ write_csv(out_dir / "top_full_eval.csv", top_full_eval, ["family_id", "run_id", "seed", "quant_label", "bpb", "artifact_bytes", "path"])
+ write_csv(out_dir / "top_study.csv", top_study, ["family_id", "run_id", "best_label", "best_metric", "metric_name", "variant_count", "path"])
+ write_csv(out_dir / "survival.csv", survival_non_bridge, ["family_id", "run_id", "seed", "bridge_fp16", "full_fp16", "bridge_int6", "full_int6", "delta_fp16", "delta_int6", "status"])
+ write_csv(out_dir / "failed_full_eval.csv", failed, ["family_id", "run_id", "seed", "bridge_fp16", "bridge_int6", "status", "bridge_path"])
+
+ # --- existing SVG charts (improved) ---
+ full_eval_with_bpb = [row for row in top_full_eval[:12] if row.get("bpb") is not None]
+ full_labels = [f"{row['family_id']}:{row.get('quant_label')}" for row in full_eval_with_bpb]
+ full_values = [row["bpb"] for row in full_eval_with_bpb]
+ write_bar_svg(out_dir / "top_full_eval.svg", "Top Full-Eval Rows", full_labels, full_values)
+ study_rows = [row for row in top_study if row.get("best_metric") is not None][:12]
+ write_bar_svg(
+ out_dir / "top_study.svg",
+ "Top Study Rows",
+ [f"{row['family_id']}:{row.get('best_label') or 'study'}" for row in study_rows],
+ [float(row["best_metric"]) for row in study_rows],
+ )
+
+ gap_rows = [row for row in survival_non_bridge if row.get("bridge_fp16") is not None and row.get("full_fp16") is not None][:20]
+ write_scatter_svg(
+ out_dir / "bridge_vs_full_fp16.svg",
+ "Bridge FP16 vs Full FP16",
+ gap_rows,
+ x_key="bridge_fp16",
+ y_key="full_fp16",
+ label_key="family_id",
+ reference_line=True,
+ )
+
+ conker7_rows = [row for row in survival if str(row["family_id"]).startswith("conker7_")]
+ conker7_with_bpb = [row for row in conker7_rows if row.get("bridge_fp16") is not None]
+ write_bar_svg(
+ out_dir / "conker7_bridge_fp16.svg",
+ "Conker-7 Bridge FP16 Rows",
+ [row["family_id"] for row in conker7_with_bpb],
+ [row["bridge_fp16"] for row in conker7_with_bpb],
+ )
+
+ # --- new SVG charts ---
+ # survival status pie
+ survived_count = sum(1 for r in survival if r["status"] == "survived_full_eval")
+ failed_count = len(failed)
+ bridge_only_count = sum(1 for r in survival if r["status"] == "bridge_only")
+ pie_labels = []
+ pie_values: list[float] = []
+ pie_colors = []
+ if survived_count:
+ pie_labels.append("survived_full_eval")
+ pie_values.append(survived_count)
+ pie_colors.append("#2ca02c")
+ if failed_count:
+ pie_labels.append("full_eval_failed")
+ pie_values.append(failed_count)
+ pie_colors.append("#c23b22")
+ if bridge_only_count:
+ pie_labels.append("bridge_only")
+ pie_values.append(bridge_only_count)
+ pie_colors.append("#7f7f7f")
+ write_pie_svg(out_dir / "survival_status.svg", "Survival Status", pie_labels, pie_values, pie_colors)
+
+ # delta histogram
+ deltas = [row["delta_fp16"] for row in survival_non_bridge if row.get("delta_fp16") is not None]
+ write_histogram_svg(out_dir / "delta_fp16_histogram.svg", "Bridge-to-Full Delta (FP16)", deltas)
+
+ # grouped bar: bridge vs full by family
+ family_best: dict[str, dict[str, Any]] = {}
+ for row in survival_non_bridge:
+ fid = row["family_id"]
+ if row.get("bridge_fp16") is not None and row.get("full_fp16") is not None:
+ if fid not in family_best or (row["full_fp16"] < family_best[fid]["full_fp16"]):
+ family_best[fid] = row
+ grouped_rows = sort_records(list(family_best.values()), "full_fp16")[:12]
+ write_grouped_bar_svg(
+ out_dir / "bridge_vs_full_grouped.svg",
+ "Bridge vs Full-Eval by Family",
+ grouped_rows,
+ key_a="bridge_fp16",
+ key_b="full_fp16",
+ label_key="family_id",
+ )
+
+ # --- mermaid diagrams ---
+ lineage_mermaid = render_lineage_mermaid(lineage)
+ survival_mermaid = render_survival_mermaid(survival)
+
+ # --- README ---
+ summary_lines = [
+ "# Public Backlog Report",
+ "",
+ f"- root: `{root}`",
+ f"- normalized records: `{scanned['record_count']}`",
+ f"- bridge rows: `{scanned['by_kind'].get('bridge', 0)}`",
+ f"- full eval rows: `{scanned['by_kind'].get('full_eval', 0)}`",
+ f"- study rows: `{scanned['by_kind'].get('study', 0)}`",
+ f"- experiment families: `{scanned['family_count']}`",
+ "",
+ "## Headline",
+ "",
+ ]
+ if top_full_eval and top_full_eval[0].get("bpb") is not None:
+ best = top_full_eval[0]
+ summary_lines.append(
+ f"- best normalized full eval in this backlog: `{best['family_id']}` `{best['quant_label']}` at `{best['bpb']:.6f} bpb`"
+ )
+ elif top_study and top_study[0].get("best_metric") is not None:
+ best = top_study[0]
+ summary_lines.append(
+ f"- best study quick-check in this backlog: `{best['family_id']}` `{best.get('best_label')}` at `{best['best_metric']:.6f}` `{best.get('metric_name') or 'metric'}`"
+ )
+ if failed:
+ summary_lines.append(f"- full-eval failures detected after optimistic bridge results: `{len(failed)}`")
+ summary_lines.extend(
+ [
+ "",
+ "## Survival Pipeline",
+ "",
+ "```mermaid",
+ survival_mermaid,
+ "```",
+ "",
+ "## Lineage",
+ "",
+ "```mermaid",
+ lineage_mermaid,
+ "```",
+ "",
+ "## Files",
+ "",
+ "- `scan_summary.json`",
+ "- `top_full_eval.json` / `top_full_eval.csv` / `top_full_eval.svg`",
+ "- `top_bridge.json`",
+ "- `top_study.json` / `top_study.csv` / `top_study.svg`",
+ "- `survival.json` / `survival.csv` / `survival_status.svg`",
+ "- `failed_full_eval.json` / `failed_full_eval.csv`",
+ "- `lineage.json`",
+ "- `bridge_vs_full_fp16.svg` / `bridge_vs_full_grouped.svg`",
+ "- `delta_fp16_histogram.svg`",
+ "- `conker7_bridge_fp16.svg`",
+ "",
+ "## Visuals",
+ "",
+ "### Top Study Rows",
+ "",
+ "",
+ "",
+ "### Survival Status",
+ "",
+ "",
+ "",
+ "### Top Full-Eval Rows",
+ "",
+ "",
+ "",
+ "### Bridge vs Full-Eval FP16",
+ "",
+ "",
+ "",
+ "### Bridge vs Full-Eval by Family",
+ "",
+ "",
+ "",
+ "### Delta Distribution (FP16)",
+ "",
+ "",
+ "",
+ "### Conker-7 Bridge Rows",
+ "",
+ "",
+ ]
+ )
+ if failed:
+ summary_lines.extend(["", "## Failed Full-Eval Rows", ""])
+ for row in failed[:20]:
+ summary_lines.append(
+ f"- `{row['family_id']}` seed `{row.get('seed')}` bridge fp16 `{row.get('bridge_fp16')}` bridge int6 `{row.get('bridge_int6')}`"
+ )
+ (out_dir / "README.md").write_text("\n".join(summary_lines) + "\n", encoding="utf-8")
+
+ return {
+ "scan_summary": {
+ "record_count": scanned["record_count"],
+ "by_kind": scanned["by_kind"],
+ "family_count": scanned["family_count"],
+ },
+ "best_full_eval": top_full_eval[0] if top_full_eval else None,
+ "failed_full_eval_count": len(failed),
+ "report_dir": str(out_dir),
+ }
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/__init__.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/__init__.py
new file mode 100644
index 0000000000..bf36adbdc7
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/__init__.py
@@ -0,0 +1,12 @@
+from .model import GolfSubmissionFitReport, GolfSubmissionModel, GolfSubmissionModelConfig, GolfSubmissionScore
+from .packet import SubmissionPacketResult, build_parameter_golf_packet, build_packet_from_patterns
+
+__all__ = [
+ "GolfSubmissionFitReport",
+ "GolfSubmissionModel",
+ "GolfSubmissionModelConfig",
+ "GolfSubmissionScore",
+ "SubmissionPacketResult",
+ "build_parameter_golf_packet",
+ "build_packet_from_patterns",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/bootstrap.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/bootstrap.py
new file mode 100644
index 0000000000..a45d29fc44
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/bootstrap.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+from pathlib import Path
+import subprocess
+import sys
+
+
+PROJECT_ROOT = Path(__file__).resolve().parents[2]
+WORKSPACE_ROOT = PROJECT_ROOT.parent
+VENDOR_ROOT = PROJECT_ROOT / "vendor"
+
+
+def _resolve_repo(vendor_name: str, workspace_name: str) -> Path:
+ vendored = VENDOR_ROOT / vendor_name
+ if vendored.exists():
+ return vendored
+ return WORKSPACE_ROOT / workspace_name
+
+
+OPC_ROOT = _resolve_repo("open_predictive_coder", "open-predictive-coder")
+LEDGER_ROOT = _resolve_repo("conker_ledger", "conker-ledger")
+CONKER_ROOT = WORKSPACE_ROOT / "conker"
+DETECT_ROOT = WORKSPACE_ROOT / "conker-detect"
+
+
+def add_local_sources() -> None:
+ for root in (OPC_ROOT, LEDGER_ROOT):
+ src = root / "src"
+ if src.exists() and str(src) not in sys.path:
+ sys.path.insert(0, str(src))
+
+
+def git_head(root: Path) -> str | None:
+ if not (root / ".git").exists():
+ return None
+ try:
+ result = subprocess.run(
+ ["git", "rev-parse", "HEAD"],
+ cwd=root,
+ check=True,
+ capture_output=True,
+ text=True,
+ )
+ except Exception:
+ return None
+ value = result.stdout.strip()
+ return value or None
+
+
+def git_snapshot_ref(root: Path) -> str | None:
+ head = git_head(root)
+ if head is None:
+ return None
+ try:
+ result = subprocess.run(
+ ["git", "status", "--porcelain"],
+ cwd=root,
+ check=True,
+ capture_output=True,
+ text=True,
+ )
+ except Exception:
+ return head
+ return f"{head}-dirty" if result.stdout.strip() else head
+
+
+__all__ = [
+ "PROJECT_ROOT",
+ "WORKSPACE_ROOT",
+ "VENDOR_ROOT",
+ "OPC_ROOT",
+ "LEDGER_ROOT",
+ "CONKER_ROOT",
+ "DETECT_ROOT",
+ "add_local_sources",
+ "git_head",
+ "git_snapshot_ref",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/cli.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/cli.py
new file mode 100644
index 0000000000..d16b2d6cc8
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/cli.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+import argparse
+
+from .packet import build_packet_from_patterns
+
+
+def build_parser() -> argparse.ArgumentParser:
+ parser = argparse.ArgumentParser(description="Build a standalone opc-native packed-memory parameter-golf-style legal packet.")
+ parser.add_argument("--train-pattern", action="append", required=True, dest="train_patterns")
+ parser.add_argument("--eval-pattern", action="append", required=True, dest="eval_patterns")
+ parser.add_argument("--out-dir", required=True)
+ parser.add_argument("--max-train-tokens", type=int)
+ parser.add_argument("--max-eval-tokens", type=int)
+ parser.add_argument("--vocab-size", type=int)
+ parser.add_argument("--bytes-per-token", type=float)
+ parser.add_argument("--tokenizer-model")
+ parser.add_argument("--name", default="OPC causal packed-memory legal packet stress test")
+ parser.add_argument("--track", default="track_non_record_16mb")
+ parser.add_argument("--candidate-id", default="opc-causal-packed-memory-stress-test")
+ parser.add_argument("--submission-pr", default="https://github.com/openai/parameter-golf/pull/998")
+ return parser
+
+
+def main() -> None:
+ parser = build_parser()
+ args = parser.parse_args()
+ result = build_packet_from_patterns(
+ train_patterns=args.train_patterns,
+ eval_patterns=args.eval_patterns,
+ out_dir=args.out_dir,
+ max_train_tokens=args.max_train_tokens,
+ max_eval_tokens=args.max_eval_tokens,
+ vocab_size=args.vocab_size,
+ bytes_per_token=args.bytes_per_token,
+ tokenizer_model=args.tokenizer_model,
+ submission_name=args.name,
+ track=args.track,
+ candidate_id=args.candidate_id,
+ submission_pr=args.submission_pr,
+ )
+ print(f"built packet: {result.output_root}")
+ print(f"run_id: {result.run_id}")
+ print(f"pre_quant_val_bpb: {result.pre_quant_val_bpb}")
+
+
+__all__ = ["build_parser", "main"]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/golf_data.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/golf_data.py
new file mode 100644
index 0000000000..d0e4d7146e
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/golf_data.py
@@ -0,0 +1,159 @@
+from __future__ import annotations
+
+from collections.abc import Iterable, Sequence
+from pathlib import Path
+import glob
+
+import numpy as np
+
+try:
+ import sentencepiece as spm
+except ImportError: # pragma: no cover - optional dependency
+ spm = None
+
+
+PARAMETER_GOLF_MAGIC = 20240520
+PARAMETER_GOLF_VERSION = 1
+HEADER_INTS = 256
+HEADER_BYTES = HEADER_INTS * np.dtype(np.int32).itemsize
+
+
+def load_golf_shard(path: str | Path) -> np.ndarray:
+ shard_path = Path(path)
+ blob = shard_path.read_bytes()
+ if len(blob) >= HEADER_BYTES:
+ header = np.frombuffer(blob[:HEADER_BYTES], dtype=np.int32, count=HEADER_INTS)
+ if (
+ header.size >= 3
+ and int(header[0]) == PARAMETER_GOLF_MAGIC
+ and int(header[1]) == PARAMETER_GOLF_VERSION
+ ):
+ token_count = int(header[2])
+ payload = np.frombuffer(blob[HEADER_BYTES:], dtype=np.uint16, count=token_count)
+ if payload.size == token_count:
+ return payload.astype(np.int64, copy=False)
+ return np.frombuffer(blob, dtype=np.uint16).astype(np.int64, copy=False)
+
+
+def count_golf_tokens(path: str | Path) -> int:
+ shard_path = Path(path)
+ blob = shard_path.read_bytes()
+ if len(blob) >= HEADER_BYTES:
+ header = np.frombuffer(blob[:HEADER_BYTES], dtype=np.int32, count=HEADER_INTS)
+ if (
+ header.size >= 3
+ and int(header[0]) == PARAMETER_GOLF_MAGIC
+ and int(header[1]) == PARAMETER_GOLF_VERSION
+ ):
+ return int(header[2])
+ return len(blob) // np.dtype(np.uint16).itemsize
+
+
+def discover_shards(patterns: str | Path | Sequence[str | Path]) -> tuple[Path, ...]:
+ if isinstance(patterns, (str, Path)):
+ candidates = [patterns]
+ else:
+ candidates = list(patterns)
+ paths: list[Path] = []
+ for candidate in candidates:
+ matches = sorted(glob.glob(str(candidate)))
+ paths.extend(Path(match) for match in matches)
+ unique = tuple(dict.fromkeys(paths))
+ if not unique:
+ raise FileNotFoundError(f"no shards matched: {patterns}")
+ return unique
+
+
+def load_golf_tokens(
+ paths: Sequence[str | Path],
+ *,
+ max_tokens: int | None = None,
+) -> np.ndarray:
+ shards = [load_golf_shard(path) for path in paths]
+ if not shards:
+ return np.zeros((0,), dtype=np.int64)
+ tokens = np.concatenate(shards, axis=0)
+ if max_tokens is not None:
+ if max_tokens < 0:
+ raise ValueError("max_tokens must be >= 0")
+ tokens = tokens[:max_tokens]
+ return tokens.astype(np.int64, copy=False)
+
+
+def write_golf_shard(path: str | Path, tokens: Iterable[int], *, with_header: bool = True) -> Path:
+ shard_path = Path(path)
+ values = np.asarray(list(tokens), dtype=np.uint16)
+ shard_path.parent.mkdir(parents=True, exist_ok=True)
+ if with_header:
+ header = np.zeros((HEADER_INTS,), dtype=np.int32)
+ header[0] = PARAMETER_GOLF_MAGIC
+ header[1] = PARAMETER_GOLF_VERSION
+ header[2] = int(values.size)
+ payload = header.tobytes() + values.tobytes()
+ else:
+ payload = values.tobytes()
+ shard_path.write_bytes(payload)
+ return shard_path
+
+
+def compute_sentencepiece_bytes_per_token(
+ tokens: np.ndarray,
+ tokenizer_model: str | Path,
+ *,
+ vocab_size: int,
+) -> float:
+ if spm is None:
+ raise ImportError("sentencepiece is required to compute bytes-per-token from a tokenizer model")
+ token_array = np.asarray(tokens, dtype=np.int64).reshape(-1)
+ if token_array.size < 2:
+ return 1.0
+
+ sp = spm.SentencePieceProcessor(model_file=str(Path(tokenizer_model)))
+ table_size = max(int(sp.vocab_size()), int(vocab_size))
+ base_bytes_lut = np.zeros((table_size,), dtype=np.int16)
+ has_leading_space_lut = np.zeros((table_size,), dtype=np.bool_)
+ is_boundary_token_lut = np.ones((table_size,), dtype=np.bool_)
+ for token_id in range(int(sp.vocab_size())):
+ if sp.is_control(token_id) or sp.is_unknown(token_id) or sp.is_unused(token_id):
+ continue
+ is_boundary_token_lut[token_id] = False
+ if sp.is_byte(token_id):
+ base_bytes_lut[token_id] = 1
+ continue
+ piece = sp.id_to_piece(token_id)
+ if piece.startswith("▁"):
+ has_leading_space_lut[token_id] = True
+ piece = piece[1:]
+ base_bytes_lut[token_id] = len(piece.encode("utf-8"))
+
+ prev_ids = token_array[:-1]
+ tgt_ids = token_array[1:]
+ if tgt_ids.size == 0:
+ return 1.0
+ if int(np.min(prev_ids)) < 0 or int(np.max(prev_ids)) >= table_size:
+ raise ValueError("tokens exceed tokenizer vocabulary table")
+ if int(np.min(tgt_ids)) < 0 or int(np.max(tgt_ids)) >= table_size:
+ raise ValueError("tokens exceed tokenizer vocabulary table")
+ byte_counts = base_bytes_lut[tgt_ids].astype(np.int32, copy=True)
+ byte_counts += (
+ has_leading_space_lut[tgt_ids] & ~is_boundary_token_lut[prev_ids]
+ ).astype(np.int32, copy=False)
+ total_bytes = float(np.sum(byte_counts, dtype=np.float64))
+ total_tokens = float(tgt_ids.size)
+ if total_bytes <= 0.0 or total_tokens <= 0.0:
+ return 1.0
+ return total_bytes / total_tokens
+
+
+__all__ = [
+ "PARAMETER_GOLF_MAGIC",
+ "PARAMETER_GOLF_VERSION",
+ "HEADER_INTS",
+ "HEADER_BYTES",
+ "count_golf_tokens",
+ "compute_sentencepiece_bytes_per_token",
+ "discover_shards",
+ "load_golf_shard",
+ "load_golf_tokens",
+ "write_golf_shard",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/model.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/model.py
new file mode 100644
index 0000000000..e761d43123
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/model.py
@@ -0,0 +1,295 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+from pathlib import Path
+
+import numpy as np
+
+from .bootstrap import add_local_sources
+
+add_local_sources()
+
+from open_predictive_coder.artifacts import ArtifactMetadata, make_artifact_accounting, make_replay_span
+from open_predictive_coder.artifacts_audits import ArtifactAuditRecord, audit_artifact
+from open_predictive_coder.codecs import ensure_tokens
+from open_predictive_coder.metrics import bits_per_token_from_probabilities
+from open_predictive_coder.ngram_memory import NgramMemory, NgramMemoryConfig
+
+
+def _coerce_tokens(data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]) -> np.ndarray:
+ return ensure_tokens(data).astype(np.int64, copy=False)
+
+
+def _normalize(probabilities: np.ndarray) -> np.ndarray:
+ values = np.asarray(probabilities, dtype=np.float64)
+ total = float(np.sum(values))
+ if total <= 0.0:
+ return np.full(values.shape[-1], 1.0 / float(values.shape[-1]), dtype=np.float64)
+ return values / total
+
+
+@dataclass(frozen=True)
+class GolfSubmissionModelConfig:
+ vocabulary_size: int = 256
+ bigram_alpha: float = 0.5
+ trigram_alpha: float = 0.5
+ trigram_bucket_count: int = 2048
+ calibration_fraction: float = 0.1
+ calibration_min_tokens: int = 65_536
+ simplex_grid_denominator: int = 20
+ score_block_tokens: int = 2_000_000
+
+ def __post_init__(self) -> None:
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if self.bigram_alpha < 0.0:
+ raise ValueError("bigram_alpha must be >= 0")
+ if self.trigram_alpha < 0.0:
+ raise ValueError("trigram_alpha must be >= 0")
+ if self.trigram_bucket_count < 1:
+ raise ValueError("trigram_bucket_count must be >= 1")
+ if not 0.0 < self.calibration_fraction < 1.0:
+ raise ValueError("calibration_fraction must lie in (0, 1)")
+ if self.calibration_min_tokens < 16:
+ raise ValueError("calibration_min_tokens must be >= 16")
+ if self.simplex_grid_denominator < 1:
+ raise ValueError("simplex_grid_denominator must be >= 1")
+ if self.score_block_tokens < 1024:
+ raise ValueError("score_block_tokens must be >= 1024")
+
+
+@dataclass(frozen=True)
+class GolfSubmissionFitReport:
+ train_tokens: int
+ train_bits_per_token: float
+ ngram_bytes: int
+ mixture_weights: np.ndarray
+ source_names: tuple[str, ...]
+
+
+@dataclass(frozen=True)
+class GolfSubmissionScore:
+ tokens: int
+ unigram_bits_per_token: float
+ bigram_bits_per_token: float
+ trigram_bits_per_token: float
+ mixed_bits_per_token: float
+ mixture_weights: np.ndarray
+
+
+class GolfSubmissionModel:
+ SOURCE_NAMES = ("unigram", "bigram", "trigram")
+
+ def __init__(self, config: GolfSubmissionModelConfig | None = None):
+ self.config = config or GolfSubmissionModelConfig()
+ self.ngram_memory = NgramMemory(
+ NgramMemoryConfig(
+ vocabulary_size=self.config.vocabulary_size,
+ bigram_alpha=self.config.bigram_alpha,
+ trigram_alpha=self.config.trigram_alpha,
+ trigram_bucket_count=self.config.trigram_bucket_count,
+ )
+ )
+ self._mixture_weights = np.asarray([0.1, 0.25, 0.65], dtype=np.float64)
+
+ def _split_training_tokens(self, tokens: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+ if tokens.size < max(self.config.calibration_min_tokens * 2, 128):
+ return tokens, tokens
+ calibration_tokens = max(int(tokens.size * self.config.calibration_fraction), self.config.calibration_min_tokens)
+ calibration_start = max(tokens.size - calibration_tokens, 1)
+ return tokens[:calibration_start], tokens[calibration_start:]
+
+ def _simplex_candidates(self) -> np.ndarray:
+ denom = int(self.config.simplex_grid_denominator)
+ rows = []
+ for unigram_weight in range(denom + 1):
+ for bigram_weight in range(denom - unigram_weight + 1):
+ trigram_weight = denom - unigram_weight - bigram_weight
+ rows.append([unigram_weight, bigram_weight, trigram_weight])
+ values = np.asarray(rows, dtype=np.float64) / float(denom)
+ return values
+
+ def _fit_mixture_weights(self, tokens: np.ndarray) -> np.ndarray:
+ if tokens.size == 0:
+ return self._mixture_weights.copy()
+ unigram = self.ngram_memory.chosen_probs(tokens, order="unigram")
+ bigram = self.ngram_memory.chosen_probs(tokens, order="bigram")
+ trigram = self.ngram_memory.chosen_probs(tokens, order="trigram")
+ grid = self._simplex_candidates()
+ best_weights = self._mixture_weights.copy()
+ best_loss = float("inf")
+ stacked = np.stack([unigram, bigram, trigram], axis=0)
+ for weights in grid:
+ mixed = np.clip(np.tensordot(weights, stacked, axes=(0, 0)), 1e-300, 1.0)
+ loss = float(-np.mean(np.log(mixed)))
+ if loss < best_loss:
+ best_loss = loss
+ best_weights = weights
+ return best_weights
+
+ def fit(self, data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]) -> GolfSubmissionFitReport:
+ tokens = _coerce_tokens(data)
+ memory_tokens, calibration_tokens = self._split_training_tokens(tokens)
+ if memory_tokens.size == 0:
+ memory_tokens = tokens
+ self.ngram_memory.fit(memory_tokens)
+ self._mixture_weights = self._fit_mixture_weights(calibration_tokens)
+ train_bits_per_token = 0.0
+ if calibration_tokens.size > 0:
+ train_bits_per_token = float(self.score(calibration_tokens).mixed_bits_per_token)
+ return GolfSubmissionFitReport(
+ train_tokens=int(tokens.size),
+ train_bits_per_token=train_bits_per_token,
+ ngram_bytes=self.ngram_memory.report().total_bytes,
+ mixture_weights=self._mixture_weights.copy(),
+ source_names=self.SOURCE_NAMES,
+ )
+
+ def _score_block(self, tokens: np.ndarray) -> tuple[float, float, float, float, int]:
+ unigram = self.ngram_memory.chosen_probs(tokens, order="unigram")
+ bigram = self.ngram_memory.chosen_probs(tokens, order="bigram")
+ trigram = self.ngram_memory.chosen_probs(tokens, order="trigram")
+ mixed = np.clip(
+ self._mixture_weights[0] * unigram
+ + self._mixture_weights[1] * bigram
+ + self._mixture_weights[2] * trigram,
+ 1e-300,
+ 1.0,
+ )
+ return (
+ float(np.sum(-np.log(unigram), dtype=np.float64)),
+ float(np.sum(-np.log(bigram), dtype=np.float64)),
+ float(np.sum(-np.log(trigram), dtype=np.float64)),
+ float(np.sum(-np.log(mixed), dtype=np.float64)),
+ int(tokens.size),
+ )
+
+ def score(self, data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]) -> GolfSubmissionScore:
+ tokens = _coerce_tokens(data)
+ if tokens.size == 0:
+ raise ValueError("score requires at least one token")
+
+ unigram_loss = 0.0
+ bigram_loss = 0.0
+ trigram_loss = 0.0
+ mixed_loss = 0.0
+ total_tokens = 0
+ block = int(self.config.score_block_tokens)
+ for start in range(0, int(tokens.size), block):
+ end = min(start + block, int(tokens.size))
+ left = max(start - 2, 0)
+ block_tokens = tokens[left:end]
+ block_unigram, block_bigram, block_trigram, block_mixed, block_count = self._score_block(block_tokens)
+ drop = start - left
+ if drop > 0:
+ trimmed = block_tokens[drop:]
+ block_unigram, block_bigram, block_trigram, block_mixed, block_count = self._score_block(trimmed)
+ unigram_loss += block_unigram
+ bigram_loss += block_bigram
+ trigram_loss += block_trigram
+ mixed_loss += block_mixed
+ total_tokens += block_count
+
+ scale = np.log(2.0) * max(total_tokens, 1)
+ return GolfSubmissionScore(
+ tokens=total_tokens,
+ unigram_bits_per_token=float(unigram_loss / scale),
+ bigram_bits_per_token=float(bigram_loss / scale),
+ trigram_bits_per_token=float(trigram_loss / scale),
+ mixed_bits_per_token=float(mixed_loss / scale),
+ mixture_weights=self._mixture_weights.copy(),
+ )
+
+ def predictive_distribution(
+ self,
+ context: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ ) -> np.ndarray:
+ tokens = _coerce_tokens(context)
+ unigram = self.ngram_memory.unigram_probs()
+ bigram = self.ngram_memory.bigram_probs(int(tokens[-1])) if tokens.size >= 1 else unigram
+ trigram = self.ngram_memory.trigram_probs(int(tokens[-2]), int(tokens[-1])) if tokens.size >= 2 else bigram
+ return _normalize(
+ self._mixture_weights[0] * unigram
+ + self._mixture_weights[1] * bigram
+ + self._mixture_weights[2] * trigram
+ )
+
+ def artifact_arrays(self) -> dict[str, np.ndarray]:
+ return {
+ "mixture_weights": self._mixture_weights.astype(np.float32, copy=True),
+ "config_vocabulary_size": np.asarray([self.config.vocabulary_size], dtype=np.int32),
+ "config_bigram_alpha": np.asarray([self.config.bigram_alpha], dtype=np.float32),
+ "config_trigram_alpha": np.asarray([self.config.trigram_alpha], dtype=np.float32),
+ "config_trigram_bucket_count": np.asarray([self.config.trigram_bucket_count], dtype=np.int32),
+ "ngram_unigram_counts": self.ngram_memory.unigram_counts.astype(np.uint32, copy=True),
+ "ngram_bigram_counts": self.ngram_memory.bigram_counts.astype(np.uint32, copy=True),
+ "ngram_trigram_counts": self.ngram_memory.trigram_counts.astype(np.uint32, copy=True),
+ }
+
+ def save_artifact(
+ self,
+ path: str | Path,
+ *,
+ reference_tokens: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | None = None,
+ metadata: dict[str, object] | None = None,
+ ) -> ArtifactAuditRecord:
+ artifact_path = Path(path)
+ artifact_path.parent.mkdir(parents=True, exist_ok=True)
+ np.savez_compressed(artifact_path, **self.artifact_arrays())
+
+ replay_spans = ()
+ if reference_tokens is not None:
+ tokens = _coerce_tokens(reference_tokens)
+ if tokens.size >= 1:
+ replay_spans = (make_replay_span(0, int(tokens.size), label="causal_prefix_replay"),)
+
+ artifact_bytes = int(artifact_path.stat().st_size)
+ accounting = make_artifact_accounting(
+ artifact_name="opc_causal_packed_memory_submission",
+ artifact_bytes=artifact_bytes,
+ replay_bytes=artifact_bytes,
+ replay_spans=replay_spans,
+ metadata=ArtifactMetadata.from_mapping(
+ {
+ "vocabulary_size": self.config.vocabulary_size,
+ "trigram_bucket_count": self.config.trigram_bucket_count,
+ "source_names": list(self.SOURCE_NAMES),
+ "mixture_weights": self._mixture_weights.tolist(),
+ **(metadata or {}),
+ }
+ ),
+ )
+ return audit_artifact(
+ accounting,
+ payload_bytes=artifact_bytes,
+ side_data_count=0,
+ side_data_bytes=0,
+ )
+
+ @classmethod
+ def load_artifact(cls, path: str | Path) -> "GolfSubmissionModel":
+ with np.load(Path(path), allow_pickle=False) as data:
+ config = GolfSubmissionModelConfig(
+ vocabulary_size=int(np.asarray(data["config_vocabulary_size"]).reshape(-1)[0]),
+ bigram_alpha=float(np.asarray(data["config_bigram_alpha"]).reshape(-1)[0]),
+ trigram_alpha=float(np.asarray(data["config_trigram_alpha"]).reshape(-1)[0]),
+ trigram_bucket_count=int(np.asarray(data["config_trigram_bucket_count"]).reshape(-1)[0]),
+ )
+ model = cls(config)
+ model._mixture_weights = np.asarray(data["mixture_weights"], dtype=np.float64).reshape(-1)
+ model.ngram_memory.unigram_counts = np.asarray(data["ngram_unigram_counts"], dtype=np.float64)
+ model.ngram_memory.bigram_counts = np.asarray(data["ngram_bigram_counts"], dtype=np.float64)
+ model.ngram_memory.trigram_counts = np.asarray(data["ngram_trigram_counts"], dtype=np.float64)
+ model.ngram_memory._unigram_total = float(np.sum(model.ngram_memory.unigram_counts))
+ model.ngram_memory._bigram_totals = np.sum(model.ngram_memory.bigram_counts, axis=1)
+ model.ngram_memory._trigram_totals = np.sum(model.ngram_memory.trigram_counts, axis=1)
+ return model
+
+
+__all__ = [
+ "GolfSubmissionFitReport",
+ "GolfSubmissionModel",
+ "GolfSubmissionModelConfig",
+ "GolfSubmissionScore",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/opc_native_detect_adapter.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/opc_native_detect_adapter.py
new file mode 100644
index 0000000000..6ed6d71fc6
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/opc_native_detect_adapter.py
@@ -0,0 +1,71 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+
+from .bootstrap import add_local_sources
+from .model import GolfSubmissionModel
+
+add_local_sources()
+
+
+class OpcNativeDetectAdapter:
+ def __init__(self, artifact_path: str):
+ self.artifact_path = str(artifact_path)
+ self.model = GolfSubmissionModel.load_artifact(artifact_path)
+ self.vocab_size = int(self.model.config.vocabulary_size)
+
+ def fork(self) -> "OpcNativeDetectAdapter":
+ return self
+
+ def describe(self) -> dict[str, Any]:
+ return {
+ "adapter": "OpcNativeDetectAdapter",
+ "artifact_path": self.artifact_path,
+ "vocab_size": self.vocab_size,
+ "notes": "opc-native packed-memory replay adapter for legality and replay scans.",
+ }
+
+ def score_chunk(self, tokens: np.ndarray, sample_positions: np.ndarray | None = None) -> dict[str, Any]:
+ seq = np.asarray(tokens, dtype=np.int64).reshape(-1)
+ if seq.size == 0 or sample_positions is None:
+ return {}
+ idx = np.asarray(sample_positions, dtype=np.int64).reshape(-1)
+ sampled = np.zeros((idx.shape[0], self.vocab_size), dtype=np.float64)
+ gold = np.zeros((idx.shape[0],), dtype=np.float64)
+ for row, pos in enumerate(idx.tolist()):
+ if pos < 0 or pos >= seq.size:
+ raise ValueError("sample position is out of bounds")
+ distribution = self.model.predictive_distribution(seq[:pos])
+ sampled[row] = distribution
+ gold[row] = float(np.log(max(float(distribution[int(seq[pos])]), 1e-300)))
+ return {
+ "sample_predictions": sampled,
+ "sample_gold_logprobs": gold,
+ "sample_trace": {
+ "gold_logprobs": gold,
+ "loss_nats": -gold,
+ "weights": np.ones((idx.shape[0],), dtype=np.float64),
+ "counted": np.ones((idx.shape[0],), dtype=bool),
+ "path_ids": np.asarray([f"opc_native:{int(pos)}" for pos in idx.tolist()], dtype=object),
+ "state_hash_before": np.asarray([f"pre:{int(pos)}" for pos in idx.tolist()], dtype=object),
+ "state_hash_after": np.asarray([f"post:{int(pos)}" for pos in idx.tolist()], dtype=object),
+ },
+ }
+
+ def adapt_chunk(self, tokens: np.ndarray) -> None:
+ _ = tokens
+ return None
+
+
+def build_adapter(config: dict[str, Any]) -> OpcNativeDetectAdapter:
+ artifact_path = Path(str(config["artifact_path"]))
+ if not artifact_path.is_absolute():
+ artifact_path = Path(__file__).resolve().parents[2] / artifact_path
+ return OpcNativeDetectAdapter(str(artifact_path))
+
+
+if __name__ == "__main__": # pragma: no cover
+ raise SystemExit("This module is meant to be loaded by conker-detect.")
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/packet.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/packet.py
new file mode 100644
index 0000000000..bd06dd938a
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/opc_parameter_golf_submission/packet.py
@@ -0,0 +1,484 @@
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from pathlib import Path
+import json
+import math
+import shutil
+import time
+
+import numpy as np
+
+from .bootstrap import LEDGER_ROOT, OPC_ROOT, PROJECT_ROOT, add_local_sources, git_head, git_snapshot_ref
+from .golf_data import compute_sentencepiece_bytes_per_token, discover_shards, load_golf_tokens
+from .model import GolfSubmissionFitReport, GolfSubmissionModel, GolfSubmissionModelConfig, GolfSubmissionScore
+
+add_local_sources()
+
+from conker_ledger.ledger import write_validity_bundle
+
+
+def _write_json(path: Path, value: object) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text(json.dumps(value, indent=2, sort_keys=False) + "\n", encoding="utf-8")
+
+
+def _copy_file(src: Path, dst: Path) -> None:
+ dst.parent.mkdir(parents=True, exist_ok=True)
+ shutil.copy2(src, dst)
+
+
+def _dir_bytes(root: Path) -> int:
+ total = 0
+ for path in root.rglob("*"):
+ if path.is_file():
+ total += int(path.stat().st_size)
+ return total
+
+
+def _json_ready(value: object) -> object:
+ if isinstance(value, np.ndarray):
+ return value.tolist()
+ if isinstance(value, np.integer):
+ return int(value)
+ if isinstance(value, np.floating):
+ return float(value)
+ if isinstance(value, Path):
+ return str(value)
+ if isinstance(value, dict):
+ return {str(key): _json_ready(item) for key, item in value.items()}
+ if isinstance(value, tuple):
+ return [_json_ready(item) for item in value]
+ if isinstance(value, list):
+ return [_json_ready(item) for item in value]
+ return value
+
+
+def _vendor_opc_snapshot(dst_root: Path) -> Path:
+ vendor_root = dst_root / "vendor" / "open_predictive_coder"
+ if vendor_root.exists():
+ shutil.rmtree(vendor_root)
+ shutil.copytree(OPC_ROOT / "src" / "open_predictive_coder", vendor_root)
+ return vendor_root
+
+
+def _vendor_submission_snapshot(dst_root: Path) -> Path:
+ vendor_root = dst_root / "vendor" / "opc_parameter_golf_submission"
+ if vendor_root.exists():
+ shutil.rmtree(vendor_root)
+ vendor_root.mkdir(parents=True, exist_ok=True)
+ native_files = (
+ "bootstrap.py",
+ "cli.py",
+ "golf_data.py",
+ "model.py",
+ "opc_native_detect_adapter.py",
+ "packet.py",
+ )
+ for name in native_files:
+ _copy_file(PROJECT_ROOT / "src" / "opc_parameter_golf_submission" / name, vendor_root / name)
+ (vendor_root / "__init__.py").write_text(
+ "\n".join(
+ [
+ "from .model import GolfSubmissionFitReport, GolfSubmissionModel, GolfSubmissionModelConfig, GolfSubmissionScore",
+ "from .packet import SubmissionPacketResult, build_parameter_golf_packet, build_packet_from_patterns",
+ "",
+ "__all__ = [",
+ ' "GolfSubmissionFitReport",',
+ ' "GolfSubmissionModel",',
+ ' "GolfSubmissionModelConfig",',
+ ' "GolfSubmissionScore",',
+ ' "SubmissionPacketResult",',
+ ' "build_parameter_golf_packet",',
+ ' "build_packet_from_patterns",',
+ "]",
+ "",
+ ]
+ ),
+ encoding="utf-8",
+ )
+ return vendor_root
+
+
+def _vendor_ledger_snapshot(dst_root: Path) -> Path:
+ vendor_root = dst_root / "vendor" / "conker_ledger"
+ if vendor_root.exists():
+ shutil.rmtree(vendor_root)
+ shutil.copytree(LEDGER_ROOT / "src" / "conker_ledger", vendor_root)
+ return vendor_root
+
+
+def _write_source_train_script(path: Path) -> None:
+ content = """from pathlib import Path
+import sys
+
+_VENDOR_ROOT = Path(__file__).resolve().parent / "vendor"
+if _VENDOR_ROOT.exists() and str(_VENDOR_ROOT) not in sys.path:
+ sys.path.insert(0, str(_VENDOR_ROOT))
+
+from opc_parameter_golf_submission.cli import main
+
+
+if __name__ == "__main__":
+ main()
+"""
+ path.write_text(content, encoding="utf-8")
+
+
+def _write_source_detect_adapter(path: Path) -> None:
+ content = """from pathlib import Path
+import sys
+
+_VENDOR_ROOT = Path(__file__).resolve().parent / "vendor"
+if _VENDOR_ROOT.exists() and str(_VENDOR_ROOT) not in sys.path:
+ sys.path.insert(0, str(_VENDOR_ROOT))
+
+from opc_parameter_golf_submission.opc_native_detect_adapter import build_adapter
+
+
+__all__ = ["build_adapter"]
+"""
+ path.write_text(content, encoding="utf-8")
+
+
+def _render_source_readme(
+ *,
+ submission_name: str,
+ track: str,
+ run_id: str,
+ score: GolfSubmissionScore,
+ train_report: GolfSubmissionFitReport,
+ artifact_bytes: int,
+ opc_commit: str | None,
+) -> str:
+ return "\n".join(
+ [
+ f"# {submission_name}",
+ "",
+ "This packet was rebuilt from scratch in a standalone workspace on top of the `open_predictive_coder` kernel.",
+ "",
+ f"- track: `{track}`",
+ f"- run_id: `{run_id}`",
+ f"- eval bits per token: `{score.mixed_bits_per_token}`",
+ f"- unigram bits per token: `{score.unigram_bits_per_token}`",
+ f"- bigram bits per token: `{score.bigram_bits_per_token}`",
+ f"- trigram bits per token: `{score.trigram_bits_per_token}`",
+ f"- train bits per token: `{train_report.train_bits_per_token}`",
+ f"- mixture weights: `{train_report.mixture_weights.tolist()}`",
+ f"- artifact bytes: `{artifact_bytes}`",
+ "- opc upstream: `https://github.com/asuramaya/open-predictive-coder`",
+ f"- opc commit: `{opc_commit}`",
+ "",
+ "Important scope note:",
+ "",
+ "- this is a legal packet stress test and descendant rebuild",
+ "- it is not a leaderboard claim",
+ "- the model is an opc-native causal packed-memory descendant built in this workspace",
+ ]
+ ) + "\n"
+
+
+@dataclass(frozen=True)
+class SubmissionPacketResult:
+ output_root: Path
+ source_submission_dir: Path
+ handoff_dir: Path
+ validity_bundle_dir: Path
+ submission_name: str
+ run_id: str
+ pre_quant_val_bpb: float
+ train_bits_per_token: float
+ artifact_bytes: int
+ opc_commit: str | None
+
+
+def build_parameter_golf_packet(
+ train_tokens: np.ndarray,
+ eval_tokens: np.ndarray,
+ out_dir: str | Path,
+ *,
+ model_config: GolfSubmissionModelConfig | None = None,
+ bytes_per_token: float | None = None,
+ submission_name: str = "OPC causal packed-memory legal packet stress test",
+ track: str = "track_non_record_16mb",
+ candidate_id: str = "opc-causal-packed-memory-stress-test",
+ submission_pr: str = "https://github.com/openai/parameter-golf/pull/998",
+ vendor_opc_snapshot: bool = True,
+) -> SubmissionPacketResult:
+ out_root = Path(out_dir)
+ source_dir = out_root / "source_submission"
+ handoff_dir = out_root / "handoff"
+ bundle_dir = out_root / "validity_bundle"
+ source_dir.mkdir(parents=True, exist_ok=True)
+ handoff_dir.mkdir(parents=True, exist_ok=True)
+
+ inferred_vocab_size = int(max(np.max(train_tokens, initial=0), np.max(eval_tokens, initial=0)) + 1)
+ if model_config is None:
+ model_config = GolfSubmissionModelConfig(vocabulary_size=max(inferred_vocab_size, 2))
+ elif inferred_vocab_size > model_config.vocabulary_size:
+ raise ValueError(
+ f"token ids require vocabulary_size >= {inferred_vocab_size}, got {model_config.vocabulary_size}"
+ )
+ model = GolfSubmissionModel(model_config)
+ started_at = time.perf_counter()
+ train_report = model.fit(train_tokens)
+ score = model.score(eval_tokens)
+ elapsed = time.perf_counter() - started_at
+
+ run_id = f"opc_native_tokens{int(train_tokens.size)}_{int(eval_tokens.size)}"
+ artifact_path = source_dir / "artifacts" / "model_artifact.npz"
+ audit = model.save_artifact(
+ artifact_path,
+ reference_tokens=eval_tokens,
+ metadata={"run_id": run_id},
+ )
+ artifact_bytes = int(artifact_path.stat().st_size)
+ root_artifact_path = source_dir / "model_artifact.npz"
+ _copy_file(artifact_path, root_artifact_path)
+ opc_commit = git_snapshot_ref(OPC_ROOT)
+ if bytes_per_token is None and inferred_vocab_size > 256:
+ raise ValueError(
+ "bytes_per_token or tokenizer_model is required for vocabularies above 256 to report honest bpb"
+ )
+ effective_bytes_per_token = float(bytes_per_token if bytes_per_token is not None else 1.0)
+ if effective_bytes_per_token <= 0.0:
+ raise ValueError("bytes_per_token must be > 0")
+
+ results_json = {
+ "run_id": run_id,
+ "pre_quant_val_bpb": score.mixed_bits_per_token / effective_bytes_per_token,
+ "val_bpb": score.mixed_bits_per_token / effective_bytes_per_token,
+ "test_bits_per_token": score.mixed_bits_per_token,
+ "test_eval_loss": score.mixed_bits_per_token * math.log(2.0),
+ "train_bits_per_token": train_report.train_bits_per_token,
+ "train_time_sec": elapsed,
+ "train_tokens": int(train_tokens.size),
+ "eval_tokens": int(eval_tokens.size),
+ "eval_bytes_per_token": effective_bytes_per_token,
+ "checkpoint_format": "compressed_npz_artifact",
+ "checkpoint_bytes_raw_npz": artifact_bytes,
+ "bytes_model_int6_zlib": artifact_bytes,
+ "unigram_bits_per_token": score.unigram_bits_per_token,
+ "bigram_bits_per_token": score.bigram_bits_per_token,
+ "trigram_bits_per_token": score.trigram_bits_per_token,
+ "mixture_weights": train_report.mixture_weights.tolist(),
+ }
+ submission_json = {
+ "name": submission_name,
+ "track": track,
+ "pre_quant_val_bpb": score.mixed_bits_per_token / effective_bytes_per_token,
+ "val_bpb": score.mixed_bits_per_token / effective_bytes_per_token,
+ "run_id": run_id,
+ "source_repo": "opc-parameter-golf-submission",
+ "bytes_model_int6_zlib": artifact_bytes,
+ "notes": "Standalone opc-native causal packed-memory submission rebuilt from scratch for packet stress testing.",
+ }
+ train_log = "\n".join(
+ [
+ f"run_id={run_id}",
+ f"train_tokens={int(train_tokens.size)}",
+ f"eval_tokens={int(eval_tokens.size)}",
+ f"train_bits_per_token={train_report.train_bits_per_token}",
+ f"eval_bits_per_token={score.mixed_bits_per_token}",
+ f"eval_bits_per_byte={score.mixed_bits_per_token / effective_bytes_per_token}",
+ f"unigram_bits_per_token={score.unigram_bits_per_token}",
+ f"bigram_bits_per_token={score.bigram_bits_per_token}",
+ f"trigram_bits_per_token={score.trigram_bits_per_token}",
+ f"mixture_weights={train_report.mixture_weights.tolist()}",
+ f"eval_bytes_per_token={effective_bytes_per_token}",
+ f"artifact_bytes={artifact_bytes}",
+ f"opc_commit={opc_commit}",
+ ]
+ ) + "\n"
+
+ _write_json(source_dir / "results.json", _json_ready(results_json))
+ _write_json(source_dir / "submission.json", _json_ready(submission_json))
+ (source_dir / "README.md").write_text(
+ _render_source_readme(
+ submission_name=submission_name,
+ track=track,
+ run_id=run_id,
+ score=score,
+ train_report=train_report,
+ artifact_bytes=artifact_bytes,
+ opc_commit=opc_commit,
+ ),
+ encoding="utf-8",
+ )
+ (source_dir / "train.log").write_text(train_log, encoding="utf-8")
+ np.save(source_dir / "audit_tokens.npy", np.asarray(eval_tokens[: min(int(eval_tokens.size), 131_072)], dtype=np.uint16))
+ _write_source_train_script(source_dir / "train_gpt.py")
+ _write_source_detect_adapter(source_dir / "opc_native_detect_adapter.py")
+ _vendor_submission_snapshot(source_dir)
+ _vendor_ledger_snapshot(source_dir)
+ if vendor_opc_snapshot:
+ _vendor_opc_snapshot(source_dir)
+ total_bytes = _dir_bytes(source_dir)
+ results_json["bytes_total"] = total_bytes
+ submission_json["bytes_total"] = total_bytes
+ _write_json(source_dir / "results.json", _json_ready(results_json))
+ _write_json(source_dir / "submission.json", _json_ready(submission_json))
+
+ artifact_audit_json = {
+ "artifact_name": audit.artifact_name,
+ "artifact_bytes": audit.artifact_bytes,
+ "replay_bytes": audit.replay_bytes,
+ "payload_bytes": audit.payload_bytes,
+ "coverage_ratio": audit.coverage_ratio,
+ "payload_coverage_ratio": audit.payload_coverage_ratio,
+ "side_data_ratio": audit.side_data_ratio,
+ "replay_span_count": audit.replay_span_count,
+ "replay_span_length": audit.replay_span_length,
+ "metadata": audit.metadata.to_dict(),
+ }
+ submission_report = {
+ "profile": "parameter-golf",
+ "verdict": "pass",
+ "submission": submission_json,
+ "checks": {
+ "presence": {"pass": True},
+ "artifact_present": {"pass": artifact_path.exists()},
+ "results_present": {"pass": True},
+ },
+ }
+ provenance_report = {
+ "profile": "parameter-golf",
+ "verdict": "pass",
+ "provenance": {
+ "submitted_run_id": run_id,
+ "selection_mode": "single_run",
+ "candidate_run_count": 1,
+ },
+ "checks": {
+ "selection_disclosure": {"pass": True},
+ "kernel_commit_recorded": {"pass": opc_commit is not None},
+ },
+ }
+ claim_json = {
+ "candidate_id": candidate_id,
+ "requested_label": "Tier-2 targeted structural evidence attached",
+ "submission_name": submission_name,
+ "track": track,
+ "summary": "Standalone opc-native causal packed-memory descendant with a fresh packed-memory artifact and structural audit packet.",
+ }
+ metrics_json = {
+ "fresh_process_full": {
+ "bpb": score.mixed_bits_per_token / effective_bytes_per_token,
+ "bits_per_token": score.mixed_bits_per_token,
+ "bytes_per_token": effective_bytes_per_token,
+ },
+ }
+ provenance_json = {
+ "run_id": "source_submission",
+ "source_root": "source_submission",
+ "source_repo": "opc-parameter-golf-submission",
+ "source_commit": git_snapshot_ref(PROJECT_ROOT),
+ "kernel_repo": "vendor/open_predictive_coder",
+ "kernel_commit": opc_commit,
+ "submission_pr": submission_pr,
+ "submitted_run_id": run_id,
+ "selection_mode": "single_run",
+ "candidate_run_count": 1,
+ }
+ audits_json = {
+ "tier1": {
+ "status": "pass",
+ "submission": "pass",
+ "provenance": "pass",
+ },
+ "tier2": {
+ "status": "pass",
+ "summary": "Artifact-boundary structural audit attached for the standalone opc-based submission artifact.",
+ "artifact_audit": "pass",
+ },
+ "tier3": {
+ "status": "missing",
+ "summary": "No live runtime legality adapter is attached in this stress-test packet.",
+ },
+ }
+ _write_json(handoff_dir / "claim.json", claim_json)
+ _write_json(handoff_dir / "metrics.json", metrics_json)
+ _write_json(handoff_dir / "provenance.json", provenance_json)
+ _write_json(handoff_dir / "audits.json", audits_json)
+ _write_json(handoff_dir / "reports" / "submission.json", submission_report)
+ _write_json(handoff_dir / "reports" / "provenance.json", provenance_report)
+ _write_json(handoff_dir / "artifact_audit.json", artifact_audit_json)
+
+ ledger_manifest = {
+ "bundle_id": candidate_id,
+ "claim": "claim.json",
+ "metrics": "metrics.json",
+ "provenance": "provenance.json",
+ "audits": "audits.json",
+ "attachments": [
+ {"source": "reports/submission.json", "dest": "audits/tier1/submission.json"},
+ {"source": "reports/provenance.json", "dest": "audits/tier1/provenance.json"},
+ {"source": "artifact_audit.json", "dest": "audits/tier2/artifact_audit.json"},
+ ],
+ }
+ _write_json(handoff_dir / "ledger_manifest.json", ledger_manifest)
+ bundle_result = write_validity_bundle(handoff_dir / "ledger_manifest.json", bundle_dir)
+ _write_json(out_root / "handoff_result.json", bundle_result)
+
+ return SubmissionPacketResult(
+ output_root=out_root,
+ source_submission_dir=source_dir,
+ handoff_dir=handoff_dir,
+ validity_bundle_dir=bundle_dir,
+ submission_name=submission_name,
+ run_id=run_id,
+ pre_quant_val_bpb=score.mixed_bits_per_token / effective_bytes_per_token,
+ train_bits_per_token=train_report.train_bits_per_token,
+ artifact_bytes=artifact_bytes,
+ opc_commit=opc_commit,
+ )
+
+
+def build_packet_from_patterns(
+ *,
+ train_patterns: str | Path | list[str | Path],
+ eval_patterns: str | Path | list[str | Path],
+ out_dir: str | Path,
+ model_config: GolfSubmissionModelConfig | None = None,
+ max_train_tokens: int | None = None,
+ max_eval_tokens: int | None = None,
+ vocab_size: int | None = None,
+ bytes_per_token: float | None = None,
+ tokenizer_model: str | Path | None = None,
+ submission_name: str = "OPC causal packed-memory legal packet stress test",
+ track: str = "track_non_record_16mb",
+ candidate_id: str = "opc-causal-packed-memory-stress-test",
+ submission_pr: str = "https://github.com/openai/parameter-golf/pull/998",
+) -> SubmissionPacketResult:
+ train_paths = discover_shards(train_patterns)
+ eval_paths = discover_shards(eval_patterns)
+ train_tokens = load_golf_tokens(train_paths, max_tokens=max_train_tokens)
+ eval_tokens = load_golf_tokens(eval_paths, max_tokens=max_eval_tokens)
+ resolved_vocab_size = vocab_size
+ if resolved_vocab_size is None:
+ resolved_vocab_size = int(max(np.max(train_tokens, initial=0), np.max(eval_tokens, initial=0)) + 1)
+ resolved_bytes_per_token = bytes_per_token
+ if resolved_bytes_per_token is None and tokenizer_model is not None:
+ resolved_bytes_per_token = compute_sentencepiece_bytes_per_token(
+ eval_tokens,
+ tokenizer_model,
+ vocab_size=int(resolved_vocab_size),
+ )
+ return build_parameter_golf_packet(
+ train_tokens,
+ eval_tokens,
+ out_dir,
+ model_config=model_config or GolfSubmissionModelConfig(vocabulary_size=max(int(resolved_vocab_size), 2)),
+ bytes_per_token=resolved_bytes_per_token,
+ submission_name=submission_name,
+ track=track,
+ candidate_id=candidate_id,
+ submission_pr=submission_pr,
+ )
+
+
+__all__ = [
+ "SubmissionPacketResult",
+ "build_parameter_golf_packet",
+ "build_packet_from_patterns",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/__init__.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/__init__.py
new file mode 100644
index 0000000000..72db0aa258
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/__init__.py
@@ -0,0 +1,381 @@
+"""Public API for the open_predictive_coder kernel.
+
+The package is intentionally organized in layers:
+
+1. foundational types and configs
+2. reusable kernel primitives for substrates, control, memory, views, readouts, and runtime
+3. the shared adapter layer plus the first concrete byte-latent adapter
+
+The full package map and layer boundary are documented in `docs/architecture.md`.
+"""
+
+# Foundation and configuration surfaces.
+from .artifacts import (
+ ArtifactAccounting,
+ ArtifactMetadata,
+ coerce_artifact_metadata,
+ make_artifact_accounting,
+ make_replay_span,
+ ReplaySpan,
+)
+from .artifacts_audits import ArtifactAuditRecord, ArtifactAuditSummary, audit_artifact, summarize_artifact_audits
+from .codecs import ByteCodec, ensure_byte_tokens, ensure_tokens
+from .config import (
+ ByteLatentPredictiveCoderConfig,
+ DelayLineConfig,
+ HierarchicalSubstrateConfig,
+ LatentConfig,
+ LatentControllerConfig,
+ LinearMemoryConfig,
+ MemoryMergeMode,
+ MixedMemoryConfig,
+ OscillatoryMemoryConfig,
+ OpenPredictiveCoderConfig,
+ ReservoirConfig,
+ ReservoirTopology,
+ SampledReadoutBandConfig,
+ SampledReadoutConfig,
+ SegmenterConfig,
+ SegmenterMode,
+ SubstrateKind,
+)
+
+# Control, routing, modulation, and predictive side channels.
+from .control import (
+ ControllerSummary,
+ ControllerSummaryBuilder,
+ ControllerSummaryConfig,
+ stack_summaries,
+)
+from .controllers import (
+ PredictiveController,
+ PredictiveObservation,
+ PredictiveState,
+)
+from .gating import PathwayGateConfig, PathwayGateController, PathwayGateState, PathwayGateValues
+from .modulation import HormoneModulationConfig, HormoneModulator, HormoneState
+from .oracle_analysis import (
+ OracleAnalysisAdapter,
+ OracleAnalysisConfig,
+ OracleAnalysisFitReport,
+ OracleAnalysisPoint,
+ OracleAnalysisReport,
+)
+from .predictive_surprise import PredictionState, PredictiveSurpriseConfig, PredictiveSurpriseController, SummaryMode
+from .routing import RoutingConfig, RoutingDecision, RoutingMode, SummaryRouter
+
+# Memory, latent, and feature-view primitives.
+from .bidirectional_context import (
+ BidirectionalContextConfig,
+ BidirectionalContextLeaveOneOutStats,
+ BidirectionalContextNeighborhood,
+ BidirectionalContextProbe,
+ BidirectionalContextStats,
+)
+from .bridge_export import (
+ BridgeExportAdapter,
+ BridgeExportConfig,
+ BridgeExportFitReport,
+ BridgeExportReport,
+)
+from .bridge_features import BridgeFeatureArrays, BridgeFeatureConfig, bridge_feature_arrays
+from .exact_context import (
+ ExactContextConfig,
+ ExactContextFitReport,
+ ExactContextMemory,
+ ExactContextPrediction,
+ SupportBlend,
+ SupportMixConfig,
+ SupportWeightedMixer,
+)
+from .latents import LatentCommitter, LatentObservation, LatentState
+from .learned_segmentation import (
+ BoundaryDecision,
+ BoundaryFeatures,
+ BoundaryScorerConfig,
+ LearnedBoundaryScorer,
+ LearnedSegmenter,
+ LearnedSegmenterConfig,
+)
+from .hierarchical_views import HierarchicalFeatureView, HierarchicalSummary
+from .linear_views import LinearMemoryFeatureView
+from .ngram_memory import NgramMemory, NgramMemoryConfig, NgramMemoryReport
+from .noncausal_reconstructive import (
+ NoncausalReconstructiveAdapter,
+ NoncausalReconstructiveConfig,
+ NoncausalReconstructiveFitReport,
+ NoncausalReconstructiveReport,
+ NoncausalReconstructiveTrace,
+)
+from .patch_latent_blocks import (
+ GlobalLocalBridge,
+ GlobalLocalBridgeConfig,
+ LocalByteEncoder,
+ LocalByteEncoderConfig,
+ PatchPooler,
+ PatchPoolerConfig,
+)
+from .probability_diagnostics import (
+ ProbabilityDiagnostics,
+ ProbabilityDiagnosticsConfig,
+ normalized_entropy,
+ overlap_mass,
+ probability_diagnostics,
+ shared_top_k_mass,
+ top1_agreement,
+ top1_peak,
+ top2_margin,
+ top_k_mass,
+)
+from .sampled_readout import SampledBandSummary, SampledMultiscaleReadout
+from .span_selection import ScoredSpan, SpanSelectionConfig, replay_spans_from_scores, select_scored_spans
+from .views import ByteLatentFeatureView
+
+# Substrates, factories, and presets.
+from .factories import (
+ create_delay_line_substrate,
+ create_echo_state_substrate,
+ create_hierarchical_substrate,
+ create_mixed_memory_substrate,
+ create_oscillatory_memory_substrate,
+ create_substrate,
+ create_substrate_for_model,
+)
+from .presets import delay_small, echo_state_small, hierarchical_small, mixed_memory_small
+from .segmenters import AdaptiveSegmenter, SegmentStats
+from .substrates import (
+ DelayLineSubstrate,
+ EchoStateSubstrate,
+ HierarchicalSubstrate,
+ LinearMemorySubstrate,
+ MixedMemorySubstrate,
+ OscillatoryMemorySubstrate,
+ TokenSubstrate,
+)
+
+# Readouts, experts, datasets, and runtime surfaces.
+from .datasets import ByteSequenceDataset
+from .eval import NextStepScore, RolloutEvaluation, RolloutMode, evaluate_rollout, score_next_step
+from .experts import ExpertFitReport, ExpertScore, FrozenReadoutExpert
+from .metrics import (
+ bits_per_byte_from_logits,
+ bits_per_byte_from_probabilities,
+ bits_per_token_from_logits,
+ bits_per_token_from_probabilities,
+)
+from .readouts import RidgeReadout
+from .runtime import (
+ CausalFitReport,
+ CausalSequenceReport,
+ CausalTrace,
+ FitReport,
+ SequenceReport,
+ SequenceTrace,
+ tag_metadata,
+)
+from .train_modes import TrainModeConfig, TrainStateMode
+from .train_eval import (
+ DatasetEvaluation,
+ RolloutCheckpoint,
+ RolloutCurve,
+ RolloutCurveMode,
+ RolloutCurveEvaluation,
+ RolloutCurvePoint,
+ TransferEvaluation,
+ TransferProbeReport,
+ evaluate_dataset,
+ evaluate_rollout_curve,
+ evaluate_transfer_probe,
+ score_dataset,
+)
+
+# Concrete adapter surface.
+from .causal_predictive import CausalPredictiveAdapter, CausalPredictiveFitReport, CausalPredictiveScore
+from .model import ByteLatentPredictiveCoder, OpenPredictiveCoder
+from .teacher_export import TeacherExportAdapter, TeacherExportConfig, TeacherExportRecord, TeacherExportReport
+
+__all__ = [
+ "AdaptiveSegmenter",
+ "ArtifactAccounting",
+ "ArtifactAuditRecord",
+ "ArtifactAuditSummary",
+ "ArtifactMetadata",
+ "audit_artifact",
+ "coerce_artifact_metadata",
+ "ByteCodec",
+ "ensure_byte_tokens",
+ "ByteLatentFeatureView",
+ "ByteLatentPredictiveCoder",
+ "ByteLatentPredictiveCoderConfig",
+ "ByteSequenceDataset",
+ "bits_per_byte_from_logits",
+ "bits_per_byte_from_probabilities",
+ "bits_per_token_from_logits",
+ "bits_per_token_from_probabilities",
+ "BidirectionalContextConfig",
+ "BidirectionalContextLeaveOneOutStats",
+ "BidirectionalContextNeighborhood",
+ "BidirectionalContextProbe",
+ "BidirectionalContextStats",
+ "BridgeExportAdapter",
+ "BridgeExportConfig",
+ "BridgeExportFitReport",
+ "BridgeExportReport",
+ "BoundaryDecision",
+ "BoundaryFeatures",
+ "BoundaryScorerConfig",
+ "BridgeFeatureArrays",
+ "BridgeFeatureConfig",
+ "bridge_feature_arrays",
+ "CausalFitReport",
+ "CausalPredictiveAdapter",
+ "CausalPredictiveFitReport",
+ "CausalPredictiveScore",
+ "CausalSequenceReport",
+ "CausalTrace",
+ "ControllerSummary",
+ "ControllerSummaryBuilder",
+ "ControllerSummaryConfig",
+ "SubstrateKind",
+ "create_delay_line_substrate",
+ "create_echo_state_substrate",
+ "create_hierarchical_substrate",
+ "create_mixed_memory_substrate",
+ "create_oscillatory_memory_substrate",
+ "create_substrate",
+ "create_substrate_for_model",
+ "DelayLineConfig",
+ "DelayLineSubstrate",
+ "delay_small",
+ "DatasetEvaluation",
+ "EchoStateSubstrate",
+ "echo_state_small",
+ "ExpertFitReport",
+ "ExpertScore",
+ "ExactContextConfig",
+ "ExactContextFitReport",
+ "ExactContextMemory",
+ "ExactContextPrediction",
+ "evaluate_rollout",
+ "FitReport",
+ "FrozenReadoutExpert",
+ "HormoneModulationConfig",
+ "HormoneModulator",
+ "HormoneState",
+ "HierarchicalFeatureView",
+ "HierarchicalSummary",
+ "HierarchicalSubstrate",
+ "HierarchicalSubstrateConfig",
+ "hierarchical_small",
+ "GlobalLocalBridge",
+ "GlobalLocalBridgeConfig",
+ "LatentConfig",
+ "LatentCommitter",
+ "LatentControllerConfig",
+ "LatentObservation",
+ "LatentState",
+ "LearnedBoundaryScorer",
+ "LearnedSegmenter",
+ "LearnedSegmenterConfig",
+ "LinearMemoryConfig",
+ "LinearMemoryFeatureView",
+ "LinearMemorySubstrate",
+ "LocalByteEncoder",
+ "LocalByteEncoderConfig",
+ "make_artifact_accounting",
+ "make_replay_span",
+ "MemoryMergeMode",
+ "MixedMemoryConfig",
+ "MixedMemorySubstrate",
+ "mixed_memory_small",
+ "NextStepScore",
+ "NgramMemory",
+ "NgramMemoryConfig",
+ "NgramMemoryReport",
+ "NoncausalReconstructiveAdapter",
+ "NoncausalReconstructiveConfig",
+ "NoncausalReconstructiveFitReport",
+ "NoncausalReconstructiveReport",
+ "NoncausalReconstructiveTrace",
+ "OracleAnalysisAdapter",
+ "OracleAnalysisConfig",
+ "OracleAnalysisFitReport",
+ "OracleAnalysisPoint",
+ "OracleAnalysisReport",
+ "OpenPredictiveCoder",
+ "OpenPredictiveCoderConfig",
+ "OscillatoryMemoryConfig",
+ "OscillatoryMemorySubstrate",
+ "PathwayGateConfig",
+ "PathwayGateController",
+ "PathwayGateState",
+ "PathwayGateValues",
+ "PatchPooler",
+ "PatchPoolerConfig",
+ "ProbabilityDiagnostics",
+ "ProbabilityDiagnosticsConfig",
+ "probability_diagnostics",
+ "normalized_entropy",
+ "overlap_mass",
+ "shared_top_k_mass",
+ "top1_agreement",
+ "top1_peak",
+ "top2_margin",
+ "top_k_mass",
+ "PredictiveController",
+ "PredictiveObservation",
+ "PredictiveState",
+ "PredictionState",
+ "PredictiveSurpriseConfig",
+ "PredictiveSurpriseController",
+ "ReplaySpan",
+ "ReservoirConfig",
+ "ReservoirTopology",
+ "SampledBandSummary",
+ "SampledMultiscaleReadout",
+ "SampledReadoutBandConfig",
+ "SampledReadoutConfig",
+ "ScoredSpan",
+ "RidgeReadout",
+ "RolloutCurveEvaluation",
+ "RolloutCurve",
+ "RolloutCheckpoint",
+ "RolloutCurveMode",
+ "RolloutCurvePoint",
+ "RolloutEvaluation",
+ "RolloutMode",
+ "RoutingConfig",
+ "RoutingDecision",
+ "RoutingMode",
+ "SegmenterConfig",
+ "SegmenterMode",
+ "SegmentStats",
+ "SequenceReport",
+ "SequenceTrace",
+ "score_next_step",
+ "tag_metadata",
+ "TeacherExportAdapter",
+ "TeacherExportConfig",
+ "TeacherExportRecord",
+ "TeacherExportReport",
+ "TransferProbeReport",
+ "TransferEvaluation",
+ "TokenSubstrate",
+ "ensure_tokens",
+ "evaluate_dataset",
+ "evaluate_rollout_curve",
+ "evaluate_transfer_probe",
+ "score_dataset",
+ "select_scored_spans",
+ "SpanSelectionConfig",
+ "stack_summaries",
+ "SupportBlend",
+ "SupportMixConfig",
+ "SupportWeightedMixer",
+ "summarize_artifact_audits",
+ "SummaryMode",
+ "SummaryRouter",
+ "TrainModeConfig",
+ "TrainStateMode",
+ "replay_spans_from_scores",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/adapters.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/adapters.py
new file mode 100644
index 0000000000..ac62df9fc9
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/adapters.py
@@ -0,0 +1,220 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+import numpy as np
+
+from .codecs import ensure_tokens
+from .config import OpenPredictiveCoderConfig
+from .datasets import ByteSequenceDataset
+from .factories import create_substrate_for_model
+from .latents import LatentCommitter, LatentObservation, LatentState
+from .metrics import bits_per_byte_from_logits, softmax
+from .patching import AdaptiveSegmenter
+from .readout import RidgeReadout
+from .runtime import FitReport, SequenceReport, SequenceTrace
+from .substrates import TokenSubstrate
+from .views import ByteLatentFeatureView
+
+
+from dataclasses import dataclass
+
+
+@dataclass
+class _AdapterState:
+ substrate_state: np.ndarray
+ latent_state: LatentState
+
+
+@dataclass(frozen=True)
+class _AdapterStep:
+ feature: np.ndarray
+ observation: LatentObservation
+
+
+class ByteLatentPredictiveCoder:
+ def __init__(
+ self,
+ config: OpenPredictiveCoderConfig | None = None,
+ *,
+ substrate: TokenSubstrate | None = None,
+ segmenter: AdaptiveSegmenter | None = None,
+ committer: LatentCommitter | None = None,
+ feature_view: ByteLatentFeatureView | None = None,
+ readout: RidgeReadout | None = None,
+ ):
+ self.config = config or OpenPredictiveCoderConfig()
+ self.segmenter = segmenter or AdaptiveSegmenter(self.config.segmenter)
+ self.substrate = substrate or create_substrate_for_model(self.config)
+ self.committer = committer or LatentCommitter(
+ config=self.config.latent,
+ substrate_size=self.substrate.state_dim,
+ seed=self.config.reservoir.seed + 101,
+ )
+ self.feature_view = feature_view or ByteLatentFeatureView(
+ max_patch_size=self.config.segmenter.max_patch_size,
+ )
+ self.readout = readout or RidgeReadout(
+ input_dim=self.config.feature_dim,
+ output_dim=self.config.vocabulary_size,
+ alpha=self.config.latent.readout_l2,
+ )
+
+ def _initial_state(self) -> _AdapterState:
+ return _AdapterState(
+ substrate_state=self.substrate.initial_state(),
+ latent_state=self.committer.initial_state(),
+ )
+
+ def _advance_state(self, state: _AdapterState, token: int) -> _AdapterStep:
+ state.substrate_state = self.substrate.step(state.substrate_state, token)
+ local_view = self.committer.sample(state.substrate_state)
+ observation = self.committer.step(
+ state.latent_state,
+ local_view,
+ self.segmenter,
+ )
+ return _AdapterStep(
+ feature=self.feature_view.encode(observation),
+ observation=observation,
+ )
+
+ def _coerce_sequences(
+ self,
+ data: ByteSequenceDataset | str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> tuple[np.ndarray, ...]:
+ if isinstance(data, ByteSequenceDataset):
+ return data.sequences
+ if isinstance(data, (str, bytes, bytearray, memoryview, np.ndarray)):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence) and data and all(isinstance(item, int) for item in data):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence):
+ return tuple(ensure_tokens(item) for item in data)
+ return (ensure_tokens(data),)
+
+ def trace(self, sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]) -> SequenceTrace:
+ tokens = ensure_tokens(sequence)
+ if tokens.size < 2:
+ raise ValueError("A sequence must contain at least two tokens.")
+
+ state = self._initial_state()
+ features = []
+ boundaries = []
+ for index in range(tokens.size - 1):
+ step = self._advance_state(state, int(tokens[index]))
+ features.append(step.feature)
+ boundaries.append(step.observation.boundary)
+ targets = tokens[1:].astype(np.int64, copy=False)
+ final_patches = state.latent_state.patches + (1 if state.latent_state.patch_length > 0 else 0)
+ return SequenceTrace(
+ features=np.vstack(features),
+ targets=targets,
+ boundaries=np.asarray(boundaries, dtype=bool),
+ tokens=int(tokens.size),
+ patches=final_patches,
+ )
+
+ def fit(
+ self,
+ data: ByteSequenceDataset | str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> FitReport:
+ sequences = self._coerce_sequences(data)
+ feature_batches = []
+ target_batches = []
+ total_tokens = 0
+ total_patches = 0
+
+ for sequence in sequences:
+ trace = self.trace(sequence)
+ feature_batches.append(trace.features)
+ target_batches.append(trace.targets)
+ total_tokens += trace.tokens
+ total_patches += trace.patches
+
+ design = np.concatenate(feature_batches, axis=0)
+ labels = np.concatenate(target_batches, axis=0)
+ self.readout.fit(design, labels)
+ train_logits = self.readout.logits(design)
+ train_bpb = bits_per_byte_from_logits(train_logits, labels)
+ mean_patch_size = max(total_tokens - len(sequences), 0) / max(total_patches, 1)
+
+ return FitReport(
+ sequences=len(sequences),
+ tokens=total_tokens,
+ patches=total_patches,
+ mean_patch_size=mean_patch_size,
+ compression_ratio=mean_patch_size,
+ train_bits_per_byte=train_bpb,
+ )
+
+ def score(self, sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]) -> SequenceReport:
+ trace = self.trace(sequence)
+ logits = self.readout.logits(trace.features)
+ bpb = bits_per_byte_from_logits(logits, trace.targets)
+ total_steps = max(trace.tokens - 1, 0)
+ mean_patch_size = total_steps / max(trace.patches, 1)
+ return SequenceReport(
+ tokens=trace.tokens,
+ patches=trace.patches,
+ mean_patch_size=mean_patch_size,
+ compression_ratio=mean_patch_size,
+ bits_per_byte=bpb,
+ )
+
+ def predict_proba(self, prompt: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]) -> np.ndarray:
+ tokens = ensure_tokens(prompt)
+ if tokens.size < 1:
+ raise ValueError("Prompt must contain at least one token.")
+ state = self._initial_state()
+ step: _AdapterStep | None = None
+ for token in tokens:
+ step = self._advance_state(state, int(token))
+ assert step is not None
+ return self.readout.probabilities(step.feature[None, :])[0]
+
+ def generate(
+ self,
+ prompt: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ steps: int,
+ temperature: float = 1.0,
+ greedy: bool = False,
+ seed: int | None = None,
+ ) -> np.ndarray:
+ if steps < 0:
+ raise ValueError("steps must be >= 0")
+ if temperature <= 0.0:
+ raise ValueError("temperature must be > 0")
+
+ tokens = ensure_tokens(prompt)
+ if tokens.size < 1:
+ raise ValueError("Prompt must contain at least one token.")
+
+ rng = np.random.default_rng(seed)
+ state = self._initial_state()
+ step: _AdapterStep | None = None
+ output = tokens.astype(np.uint8, copy=True).tolist()
+
+ for token in tokens:
+ step = self._advance_state(state, int(token))
+ assert step is not None
+
+ for _ in range(steps):
+ logits = self.readout.logits(step.feature[None, :])[0]
+ if greedy:
+ next_token = int(np.argmax(logits))
+ else:
+ scaled = logits / temperature
+ probs = softmax(scaled[None, :], axis=-1)[0]
+ next_token = int(rng.choice(self.config.vocabulary_size, p=probs))
+ output.append(next_token)
+ step = self._advance_state(state, next_token)
+
+ return np.asarray(output, dtype=np.uint8)
+
+
+__all__ = [
+ "ByteLatentPredictiveCoder",
+ "FitReport",
+ "SequenceReport",
+ "SequenceTrace",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/artifacts.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/artifacts.py
new file mode 100644
index 0000000000..f3b3a03a25
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/artifacts.py
@@ -0,0 +1,178 @@
+from __future__ import annotations
+
+from collections.abc import Mapping, Sequence
+from dataclasses import dataclass, field
+from typing import Any
+
+
+def _validate_metadata_value(value: Any, *, path: str = "metadata") -> None:
+ if value is None or isinstance(value, (str, int, float, bool)):
+ return
+ if isinstance(value, tuple):
+ for index, item in enumerate(value):
+ _validate_metadata_value(item, path=f"{path}[{index}]")
+ return
+ if isinstance(value, list):
+ for index, item in enumerate(value):
+ _validate_metadata_value(item, path=f"{path}[{index}]")
+ return
+ if isinstance(value, Mapping):
+ for key, item in value.items():
+ if not isinstance(key, str) or not key:
+ raise ValueError(f"{path} keys must be non-empty strings")
+ _validate_metadata_value(item, path=f"{path}.{key}")
+ return
+ raise TypeError(f"{path} must contain JSON-serializable values")
+
+
+@dataclass(frozen=True)
+class ArtifactMetadata:
+ items: tuple[tuple[str, Any], ...] = ()
+
+ def __post_init__(self) -> None:
+ normalized: list[tuple[str, Any]] = []
+ seen: set[str] = set()
+ for key, value in self.items:
+ if not isinstance(key, str) or not key:
+ raise ValueError("metadata keys must be non-empty strings")
+ if key in seen:
+ raise ValueError("metadata keys must be unique")
+ _validate_metadata_value(value, path=f"metadata.{key}")
+ normalized.append((key, value))
+ seen.add(key)
+ object.__setattr__(self, "items", tuple(normalized))
+
+ @classmethod
+ def from_mapping(cls, mapping: Mapping[str, Any]) -> "ArtifactMetadata":
+ return cls(items=tuple(mapping.items()))
+
+ def to_dict(self) -> dict[str, Any]:
+ return dict(self.items)
+
+ def get(self, key: str, default: Any = None) -> Any:
+ return self.to_dict().get(key, default)
+
+ def merged(self, **updates: Any) -> "ArtifactMetadata":
+ payload = self.to_dict()
+ payload.update(updates)
+ return ArtifactMetadata.from_mapping(payload)
+
+
+def coerce_artifact_metadata(
+ metadata: ArtifactMetadata | Mapping[str, Any] | None = None,
+ /,
+ **updates: Any,
+) -> ArtifactMetadata:
+ if metadata is None:
+ base = ArtifactMetadata()
+ elif isinstance(metadata, ArtifactMetadata):
+ base = metadata
+ elif isinstance(metadata, Mapping):
+ base = ArtifactMetadata.from_mapping(metadata)
+ else:
+ raise TypeError("metadata must be an ArtifactMetadata, mapping, or None")
+ return base.merged(**updates) if updates else base
+
+
+@dataclass(frozen=True)
+class ReplaySpan:
+ start: int
+ stop: int
+ label: str | None = None
+ metadata: ArtifactMetadata = field(default_factory=ArtifactMetadata)
+
+ def __post_init__(self) -> None:
+ if self.start < 0:
+ raise ValueError("start must be >= 0")
+ if self.stop < self.start:
+ raise ValueError("stop must be >= start")
+
+ @property
+ def length(self) -> int:
+ return self.stop - self.start
+
+ @property
+ def is_empty(self) -> bool:
+ return self.length == 0
+
+
+def make_replay_span(
+ start: int,
+ stop: int,
+ *,
+ label: str | None = None,
+ metadata: ArtifactMetadata | Mapping[str, Any] | None = None,
+ **updates: Any,
+) -> ReplaySpan:
+ return ReplaySpan(
+ start=start,
+ stop=stop,
+ label=label,
+ metadata=coerce_artifact_metadata(metadata, **updates),
+ )
+
+
+@dataclass(frozen=True)
+class ArtifactAccounting:
+ artifact_name: str
+ artifact_bytes: int
+ replay_bytes: int
+ replay_spans: tuple[ReplaySpan, ...] = ()
+ metadata: ArtifactMetadata = field(default_factory=ArtifactMetadata)
+
+ def __post_init__(self) -> None:
+ if not self.artifact_name:
+ raise ValueError("artifact_name must be non-empty")
+ if self.artifact_bytes < 0:
+ raise ValueError("artifact_bytes must be >= 0")
+ if self.replay_bytes < 0:
+ raise ValueError("replay_bytes must be >= 0")
+ object.__setattr__(self, "replay_spans", tuple(self.replay_spans))
+ if any(not isinstance(span, ReplaySpan) for span in self.replay_spans):
+ raise TypeError("replay_spans must contain ReplaySpan instances")
+
+ @property
+ def replay_span_count(self) -> int:
+ return len(self.replay_spans)
+
+ @property
+ def replay_span_length(self) -> int:
+ return sum(span.length for span in self.replay_spans)
+
+ @property
+ def coverage_ratio(self) -> float:
+ if self.artifact_bytes == 0:
+ return 0.0
+ return self.replay_bytes / float(self.artifact_bytes)
+
+ @property
+ def artifact_gap_bytes(self) -> int:
+ return max(self.artifact_bytes - self.replay_bytes, 0)
+
+
+def make_artifact_accounting(
+ artifact_name: str,
+ artifact_bytes: int,
+ replay_bytes: int,
+ *,
+ replay_spans: Sequence[ReplaySpan] = (),
+ metadata: ArtifactMetadata | Mapping[str, Any] | None = None,
+ **updates: Any,
+) -> ArtifactAccounting:
+ return ArtifactAccounting(
+ artifact_name=artifact_name,
+ artifact_bytes=artifact_bytes,
+ replay_bytes=replay_bytes,
+ replay_spans=tuple(replay_spans),
+ metadata=coerce_artifact_metadata(metadata, **updates),
+ )
+
+
+__all__ = [
+ "ArtifactAccounting",
+ "ArtifactMetadata",
+ "coerce_artifact_metadata",
+ "make_artifact_accounting",
+ "make_replay_span",
+ "ReplaySpan",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/artifacts_audits.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/artifacts_audits.py
new file mode 100644
index 0000000000..8594b6e3f7
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/artifacts_audits.py
@@ -0,0 +1,169 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass, field
+from typing import Any
+
+from .artifacts import ArtifactAccounting, ArtifactMetadata, ReplaySpan, coerce_artifact_metadata
+
+
+@dataclass(frozen=True)
+class ArtifactAuditRecord:
+ accounting: ArtifactAccounting
+ side_data_count: int = 0
+ side_data_bytes: int = 0
+ payload_bytes: int | None = None
+ metadata: ArtifactMetadata = field(default_factory=ArtifactMetadata)
+
+ def __post_init__(self) -> None:
+ if self.side_data_count < 0:
+ raise ValueError("side_data_count must be >= 0")
+ if self.side_data_bytes < 0:
+ raise ValueError("side_data_bytes must be >= 0")
+ if self.payload_bytes is not None and self.payload_bytes < 0:
+ raise ValueError("payload_bytes must be >= 0")
+ if self.payload_bytes is None:
+ object.__setattr__(self, "payload_bytes", self.accounting.artifact_bytes + self.side_data_bytes)
+ object.__setattr__(self, "metadata", coerce_artifact_metadata(self.metadata))
+
+ @property
+ def artifact_name(self) -> str:
+ return self.accounting.artifact_name
+
+ @property
+ def artifact_bytes(self) -> int:
+ return self.accounting.artifact_bytes
+
+ @property
+ def replay_bytes(self) -> int:
+ return self.accounting.replay_bytes
+
+ @property
+ def replay_spans(self) -> tuple[ReplaySpan, ...]:
+ return self.accounting.replay_spans
+
+ @property
+ def replay_span_count(self) -> int:
+ return self.accounting.replay_span_count
+
+ @property
+ def replay_span_length(self) -> int:
+ return self.accounting.replay_span_length
+
+ @property
+ def coverage_ratio(self) -> float:
+ return self.accounting.coverage_ratio
+
+ @property
+ def payload_coverage_ratio(self) -> float:
+ if self.payload_bytes == 0:
+ return 0.0
+ return self.replay_bytes / float(self.payload_bytes)
+
+ @property
+ def side_data_ratio(self) -> float:
+ if self.payload_bytes == 0:
+ return 0.0
+ return self.side_data_bytes / float(self.payload_bytes)
+
+ @property
+ def artifact_gap_bytes(self) -> int:
+ return self.accounting.artifact_gap_bytes
+
+
+@dataclass(frozen=True)
+class ArtifactAuditSummary:
+ records: tuple[ArtifactAuditRecord, ...] = ()
+ metadata: ArtifactMetadata = field(default_factory=ArtifactMetadata)
+
+ @property
+ def record_count(self) -> int:
+ return len(self.records)
+
+ @property
+ def artifact_bytes(self) -> int:
+ return sum(record.artifact_bytes for record in self.records)
+
+ @property
+ def replay_bytes(self) -> int:
+ return sum(record.replay_bytes for record in self.records)
+
+ @property
+ def payload_bytes(self) -> int:
+ return sum(int(record.payload_bytes) for record in self.records)
+
+ @property
+ def side_data_bytes(self) -> int:
+ return sum(record.side_data_bytes for record in self.records)
+
+ @property
+ def side_data_count(self) -> int:
+ return sum(record.side_data_count for record in self.records)
+
+ @property
+ def replay_span_count(self) -> int:
+ return sum(record.replay_span_count for record in self.records)
+
+ @property
+ def replay_span_length(self) -> int:
+ return sum(record.replay_span_length for record in self.records)
+
+ @property
+ def coverage_ratio(self) -> float:
+ if self.artifact_bytes == 0:
+ return 0.0
+ return self.replay_bytes / float(self.artifact_bytes)
+
+ @property
+ def payload_coverage_ratio(self) -> float:
+ if self.payload_bytes == 0:
+ return 0.0
+ return self.replay_bytes / float(self.payload_bytes)
+
+ @property
+ def side_data_ratio(self) -> float:
+ if self.payload_bytes == 0:
+ return 0.0
+ return self.side_data_bytes / float(self.payload_bytes)
+
+ @property
+ def artifact_gap_bytes(self) -> int:
+ return max(self.artifact_bytes - self.replay_bytes, 0)
+
+
+def audit_artifact(
+ accounting: ArtifactAccounting,
+ *,
+ side_data_count: int = 0,
+ side_data_bytes: int = 0,
+ payload_bytes: int | None = None,
+ metadata: ArtifactMetadata | dict[str, Any] | None = None,
+ **updates: Any,
+) -> ArtifactAuditRecord:
+ return ArtifactAuditRecord(
+ accounting=accounting,
+ side_data_count=side_data_count,
+ side_data_bytes=side_data_bytes,
+ payload_bytes=payload_bytes,
+ metadata=coerce_artifact_metadata(metadata, **updates),
+ )
+
+
+def summarize_artifact_audits(
+ records: Sequence[ArtifactAuditRecord],
+ *,
+ metadata: ArtifactMetadata | dict[str, Any] | None = None,
+ **updates: Any,
+) -> ArtifactAuditSummary:
+ return ArtifactAuditSummary(
+ records=tuple(records),
+ metadata=coerce_artifact_metadata(metadata, **updates),
+ )
+
+
+__all__ = [
+ "ArtifactAuditRecord",
+ "ArtifactAuditSummary",
+ "audit_artifact",
+ "summarize_artifact_audits",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/bidirectional_context.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/bidirectional_context.py
new file mode 100644
index 0000000000..7b938c934b
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/bidirectional_context.py
@@ -0,0 +1,266 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+from typing import Any
+
+import numpy as np
+
+from .codecs import ensure_tokens
+
+
+@dataclass(frozen=True)
+class BidirectionalContextConfig:
+ left_order: int = 2
+ right_order: int = 2
+
+ def __post_init__(self) -> None:
+ if self.left_order < 0:
+ raise ValueError("left_order must be >= 0")
+ if self.right_order < 0:
+ raise ValueError("right_order must be >= 0")
+
+
+@dataclass(frozen=True)
+class BidirectionalContextNeighborhood:
+ position: int
+ token: int
+ left_context: tuple[int, ...]
+ right_context: tuple[int, ...]
+ left_support: int
+ right_support: int
+ pair_support: int
+ candidate_tokens: tuple[int, ...]
+ candidate_count: int
+ deterministic: bool
+
+
+@dataclass(frozen=True)
+class BidirectionalContextStats:
+ sequence_length: int
+ neighborhood_count: int
+ left_context_count: int
+ right_context_count: int
+ pair_context_count: int
+ deterministic_fraction: float
+ candidate_le_2_rate: float
+ candidate_le_4_rate: float
+ candidate_le_8_rate: float
+ mean_candidate_size: float
+ median_candidate_size: float
+ max_candidate_size: int
+ mean_left_support: float
+ mean_right_support: float
+ mean_pair_support: float
+ candidate_sizes: tuple[int, ...]
+ neighborhoods: tuple[BidirectionalContextNeighborhood, ...]
+
+
+@dataclass(frozen=True)
+class BidirectionalLeaveOneOutStats:
+ position: int
+ token: int
+ left_context: tuple[int, ...]
+ right_context: tuple[int, ...]
+ left_support: int
+ right_support: int
+ pair_support: int
+ candidate_tokens: tuple[int, ...]
+ candidate_count: int
+ deterministic: bool
+
+
+def _coerce_tokens(
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[Any],
+) -> np.ndarray:
+ tokens = ensure_tokens(data).astype(np.int64, copy=False)
+ if tokens.ndim != 1:
+ raise ValueError("tokens must be one-dimensional")
+ return tokens
+
+
+def _left_context(tokens: np.ndarray, position: int, order: int) -> tuple[int, ...]:
+ if order == 0:
+ return ()
+ start = max(0, position - order)
+ return tuple(int(token) for token in tokens[start:position])
+
+
+def _right_context(tokens: np.ndarray, position: int, order: int) -> tuple[int, ...]:
+ if order == 0:
+ return ()
+ stop = min(tokens.size, position + order + 1)
+ return tuple(int(token) for token in tokens[position + 1 : stop])
+
+
+def _build_support_maps(
+ tokens: np.ndarray,
+ config: BidirectionalContextConfig,
+) -> tuple[
+ dict[tuple[int, ...], int],
+ dict[tuple[int, ...], int],
+ dict[tuple[tuple[int, ...], tuple[int, ...]], int],
+ dict[tuple[tuple[int, ...], tuple[int, ...]], dict[int, int]],
+]:
+ left_support: dict[tuple[int, ...], int] = {}
+ right_support: dict[tuple[int, ...], int] = {}
+ pair_support: dict[tuple[tuple[int, ...], tuple[int, ...]], int] = {}
+ pair_candidates: dict[tuple[tuple[int, ...], tuple[int, ...]], dict[int, int]] = {}
+
+ for position, token in enumerate(tokens):
+ left = _left_context(tokens, position, config.left_order)
+ right = _right_context(tokens, position, config.right_order)
+ pair = (left, right)
+
+ left_support[left] = left_support.get(left, 0) + 1
+ right_support[right] = right_support.get(right, 0) + 1
+ pair_support[pair] = pair_support.get(pair, 0) + 1
+
+ candidates = pair_candidates.setdefault(pair, {})
+ candidate_token = int(token)
+ candidates[candidate_token] = candidates.get(candidate_token, 0) + 1
+
+ return left_support, right_support, pair_support, pair_candidates
+
+
+def _mean(values: Sequence[int]) -> float:
+ if not values:
+ return 0.0
+ return float(np.mean(np.asarray(values, dtype=np.float64)))
+
+
+class BidirectionalContextProbe:
+ def __init__(self, config: BidirectionalContextConfig | None = None):
+ self.config = config or BidirectionalContextConfig()
+ self._last_tokens: np.ndarray | None = None
+ self._last_stats: BidirectionalContextStats | None = None
+ self._last_pair_candidates: dict[tuple[tuple[int, ...], tuple[int, ...]], dict[int, int]] | None = None
+ self._last_left_support: dict[tuple[int, ...], int] | None = None
+ self._last_right_support: dict[tuple[int, ...], int] | None = None
+ self._last_pair_support: dict[tuple[tuple[int, ...], tuple[int, ...]], int] | None = None
+
+ def scan(
+ self,
+ tokens: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[Any],
+ ) -> BidirectionalContextStats:
+ sequence = _coerce_tokens(tokens)
+ if sequence.size == 0:
+ raise ValueError("tokens must contain at least one item")
+
+ left_support, right_support, pair_support, pair_candidates = _build_support_maps(sequence, self.config)
+ neighborhoods: list[BidirectionalContextNeighborhood] = []
+ candidate_sizes: list[int] = []
+ left_support_values: list[int] = []
+ right_support_values: list[int] = []
+ pair_support_values: list[int] = []
+
+ for position, token in enumerate(sequence):
+ left = _left_context(sequence, position, self.config.left_order)
+ right = _right_context(sequence, position, self.config.right_order)
+ pair = (left, right)
+ candidates = pair_candidates[pair]
+ candidate_tokens = tuple(sorted(int(candidate) for candidate in candidates))
+ candidate_count = len(candidate_tokens)
+ left_count = left_support[left]
+ right_count = right_support[right]
+ pair_count = pair_support[pair]
+ neighborhoods.append(
+ BidirectionalContextNeighborhood(
+ position=position,
+ token=int(token),
+ left_context=left,
+ right_context=right,
+ left_support=left_count,
+ right_support=right_count,
+ pair_support=pair_count,
+ candidate_tokens=candidate_tokens,
+ candidate_count=candidate_count,
+ deterministic=candidate_count == 1,
+ )
+ )
+ candidate_sizes.append(candidate_count)
+ left_support_values.append(left_count)
+ right_support_values.append(right_count)
+ pair_support_values.append(pair_count)
+
+ candidate_array = np.asarray(candidate_sizes, dtype=np.float64)
+ stats = BidirectionalContextStats(
+ sequence_length=int(sequence.size),
+ neighborhood_count=len(neighborhoods),
+ left_context_count=len(left_support),
+ right_context_count=len(right_support),
+ pair_context_count=len(pair_support),
+ deterministic_fraction=float(np.mean(candidate_array == 1.0)),
+ candidate_le_2_rate=float(np.mean(candidate_array <= 2.0)),
+ candidate_le_4_rate=float(np.mean(candidate_array <= 4.0)),
+ candidate_le_8_rate=float(np.mean(candidate_array <= 8.0)),
+ mean_candidate_size=float(np.mean(candidate_array)),
+ median_candidate_size=float(np.median(candidate_array)),
+ max_candidate_size=int(np.max(candidate_array)),
+ mean_left_support=_mean(left_support_values),
+ mean_right_support=_mean(right_support_values),
+ mean_pair_support=_mean(pair_support_values),
+ candidate_sizes=tuple(int(size) for size in candidate_sizes),
+ neighborhoods=tuple(neighborhoods),
+ )
+
+ self._last_tokens = sequence
+ self._last_stats = stats
+ self._last_pair_candidates = pair_candidates
+ self._last_left_support = left_support
+ self._last_right_support = right_support
+ self._last_pair_support = pair_support
+ return stats
+
+ def determinism_stats(self) -> BidirectionalContextStats:
+ if self._last_stats is None:
+ raise ValueError("scan must be called before determinism_stats")
+ return self._last_stats
+
+ def leave_one_out(
+ self,
+ tokens: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[Any],
+ position: int,
+ ) -> BidirectionalLeaveOneOutStats:
+ sequence = _coerce_tokens(tokens)
+ if sequence.size == 0:
+ raise ValueError("tokens must contain at least one item")
+ if position < 0 or position >= sequence.size:
+ raise IndexError("position out of range")
+
+ left_support, right_support, pair_support, pair_candidates = _build_support_maps(sequence, self.config)
+ left = _left_context(sequence, position, self.config.left_order)
+ right = _right_context(sequence, position, self.config.right_order)
+ pair = (left, right)
+ token = int(sequence[position])
+ candidates = dict(pair_candidates[pair])
+ if token in candidates:
+ if candidates[token] <= 1:
+ del candidates[token]
+ else:
+ candidates[token] -= 1
+ candidate_tokens = tuple(sorted(int(candidate) for candidate in candidates))
+ return BidirectionalLeaveOneOutStats(
+ position=position,
+ token=token,
+ left_context=left,
+ right_context=right,
+ left_support=left_support[left],
+ right_support=right_support[right],
+ pair_support=max(pair_support[pair] - 1, 0),
+ candidate_tokens=candidate_tokens,
+ candidate_count=len(candidate_tokens),
+ deterministic=len(candidate_tokens) == 1,
+ )
+
+
+__all__ = [
+ "BidirectionalContextConfig",
+ "BidirectionalContextLeaveOneOutStats",
+ "BidirectionalContextNeighborhood",
+ "BidirectionalContextProbe",
+ "BidirectionalContextStats",
+ "BidirectionalLeaveOneOutStats",
+]
+
+BidirectionalContextLeaveOneOutStats = BidirectionalLeaveOneOutStats
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/bridge_export.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/bridge_export.py
new file mode 100644
index 0000000000..9f06eaac35
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/bridge_export.py
@@ -0,0 +1,259 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+
+import numpy as np
+
+from .artifacts import (
+ ArtifactAccounting,
+ ArtifactMetadata,
+ make_artifact_accounting,
+ make_replay_span,
+)
+from .bridge_features import BridgeFeatureArrays, BridgeFeatureConfig, bridge_feature_arrays
+from .codecs import ensure_tokens
+from .metrics import bits_per_byte_from_probabilities
+from .span_selection import SpanSelectionConfig, replay_spans_from_scores
+
+
+def _coerce_probability_array(
+ probabilities: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ *,
+ name: str,
+) -> np.ndarray:
+ array = np.asarray(probabilities, dtype=np.float64)
+ if array.ndim < 1:
+ raise ValueError(f"{name} must have at least one dimension")
+ if np.any(array < 0.0):
+ raise ValueError(f"{name} must contain non-negative values")
+ return array
+
+
+def _coerce_targets(targets: object, expected_tokens: int) -> np.ndarray:
+ token_array = ensure_tokens(targets).astype(np.int64, copy=False).reshape(-1)
+ if token_array.size != expected_tokens:
+ raise ValueError("targets must align with the probability rows")
+ return token_array
+
+
+@dataclass(frozen=True)
+class BridgeExportConfig:
+ vocabulary_size: int = 256
+ candidate_count: int = 4
+ epsilon: float = 1e-12
+ replay_threshold: float = 0.0
+ source_names: tuple[str, str] = ("base", "proxy")
+
+ def __post_init__(self) -> None:
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if self.candidate_count < 1:
+ raise ValueError("candidate_count must be >= 1")
+ if self.epsilon <= 0.0:
+ raise ValueError("epsilon must be > 0")
+ if self.replay_threshold < 0.0:
+ raise ValueError("replay_threshold must be >= 0")
+ if len(self.source_names) != 2 or any(not name for name in self.source_names):
+ raise ValueError("source_names must contain two non-empty names")
+
+
+@dataclass(frozen=True)
+class BridgeExportReport:
+ tokens: int
+ source_names: tuple[str, str]
+ features: BridgeFeatureArrays
+ mean_entropy: float
+ mean_peak: float
+ mean_candidate4: float
+ mean_agreement: float
+ mean_agreement_mass: float
+ base_bits_per_byte: float | None
+ proxy_bits_per_byte: float | None
+ mean_bits_per_byte: float | None
+ accounting: ArtifactAccounting
+
+ @property
+ def bits_per_byte(self) -> float:
+ return 0.0 if self.mean_bits_per_byte is None else float(self.mean_bits_per_byte)
+
+
+@dataclass(frozen=True)
+class BridgeExportFitReport:
+ sequences: int
+ tokens: int
+ report: BridgeExportReport
+ accounting: ArtifactAccounting
+
+ @property
+ def bits_per_byte(self) -> float:
+ return self.report.bits_per_byte
+
+
+class BridgeExportAdapter:
+ def __init__(
+ self,
+ config: BridgeExportConfig | None = None,
+ *,
+ artifact_name: str = "bridge_export",
+ metadata: ArtifactMetadata | None = None,
+ ):
+ self.config = config or BridgeExportConfig()
+ self.feature_config = BridgeFeatureConfig(
+ candidate_count=self.config.candidate_count,
+ epsilon=self.config.epsilon,
+ )
+ self.artifact_name = artifact_name
+ self.metadata = metadata or ArtifactMetadata()
+ self._last_fit_accounting = make_artifact_accounting(
+ self.artifact_name,
+ 0,
+ 0,
+ metadata=self.metadata,
+ tokens=0,
+ bridge_rows=0,
+ )
+
+ def _resolve_vocabulary_size(self, base: np.ndarray, proxy: np.ndarray) -> int:
+ observed = int(base.shape[-1])
+ if proxy.shape[-1] != observed:
+ raise ValueError("base_probs and proxy_probs must have the same vocabulary size")
+ configured = self.config.vocabulary_size
+ if configured == observed:
+ return observed
+ if configured == 256 and observed != 256:
+ return observed
+ raise ValueError(
+ f"configured vocabulary_size={configured} does not match input vocabulary_size={observed}"
+ )
+
+ def _build_accounting(
+ self,
+ features: BridgeFeatureArrays,
+ *,
+ tokens: int,
+ source_names: tuple[str, str],
+ ) -> ArtifactAccounting:
+ feature_rows = np.asarray(features.entropy, dtype=np.float64).reshape(-1)
+ replay_scores = np.asarray(features.agreement_mass, dtype=np.float64).reshape(-1)
+ replay_mask = replay_scores > self.config.replay_threshold
+ replay_spans = replay_spans_from_scores(
+ replay_scores,
+ SpanSelectionConfig(threshold=self.config.replay_threshold, min_span=1, max_gap=0),
+ label="bridge_export",
+ source_names=source_names,
+ )
+
+ return make_artifact_accounting(
+ self.artifact_name,
+ int(tokens),
+ int(replay_mask.sum()),
+ replay_spans=replay_spans,
+ metadata=self.metadata,
+ tokens=int(tokens),
+ bridge_rows=int(feature_rows.size),
+ source_names=source_names,
+ )
+
+ def export(
+ self,
+ base_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ proxy_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ *,
+ targets: object | None = None,
+ source_names: tuple[str, str] | None = None,
+ ) -> BridgeExportReport:
+ source_names = source_names or self.config.source_names
+ base = _coerce_probability_array(base_probs, name="base_probs")
+ proxy = _coerce_probability_array(proxy_probs, name="proxy_probs")
+ if base.shape != proxy.shape:
+ raise ValueError("base_probs and proxy_probs must have the same shape")
+ vocab_size = self._resolve_vocabulary_size(base, proxy)
+
+ features = bridge_feature_arrays(
+ base,
+ proxy,
+ vocab_size,
+ config=self.feature_config,
+ )
+ flattened_rows = int(np.prod(base.shape[:-1], dtype=np.int64)) if base.ndim > 1 else 1
+ flat_base = np.reshape(base, (flattened_rows, vocab_size))
+ flat_proxy = np.reshape(proxy, (flattened_rows, vocab_size))
+
+ target_array = None
+ base_bits_per_byte = None
+ proxy_bits_per_byte = None
+ mean_bits_per_byte = None
+ if targets is not None:
+ target_array = _coerce_targets(targets, flattened_rows)
+ base_bits_per_byte = bits_per_byte_from_probabilities(flat_base, target_array)
+ proxy_bits_per_byte = bits_per_byte_from_probabilities(flat_proxy, target_array)
+ mean_bits_per_byte = float(0.5 * (base_bits_per_byte + proxy_bits_per_byte))
+
+ accounting = self._build_accounting(
+ features,
+ tokens=flattened_rows,
+ source_names=source_names,
+ )
+ return BridgeExportReport(
+ tokens=flattened_rows,
+ source_names=source_names,
+ features=features,
+ mean_entropy=float(np.mean(np.asarray(features.entropy, dtype=np.float64))) if flattened_rows else 0.0,
+ mean_peak=float(np.mean(np.asarray(features.peak, dtype=np.float64))) if flattened_rows else 0.0,
+ mean_candidate4=float(np.mean(np.asarray(features.candidate4, dtype=np.float64))) if flattened_rows else 0.0,
+ mean_agreement=float(np.mean(np.asarray(features.agreement, dtype=np.float64))) if flattened_rows else 0.0,
+ mean_agreement_mass=float(np.mean(np.asarray(features.agreement_mass, dtype=np.float64))) if flattened_rows else 0.0,
+ base_bits_per_byte=base_bits_per_byte,
+ proxy_bits_per_byte=proxy_bits_per_byte,
+ mean_bits_per_byte=mean_bits_per_byte,
+ accounting=accounting,
+ )
+
+ def score(
+ self,
+ base_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ proxy_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ *,
+ targets: object | None = None,
+ source_names: tuple[str, str] | None = None,
+ ) -> BridgeExportReport:
+ return self.export(
+ base_probs,
+ proxy_probs,
+ targets=targets,
+ source_names=source_names,
+ )
+
+ def fit(
+ self,
+ base_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ proxy_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ *,
+ targets: object | None = None,
+ source_names: tuple[str, str] | None = None,
+ ) -> BridgeExportFitReport:
+ report = self.export(
+ base_probs,
+ proxy_probs,
+ targets=targets,
+ source_names=source_names,
+ )
+ self._last_fit_accounting = report.accounting
+ return BridgeExportFitReport(
+ sequences=1,
+ tokens=report.tokens,
+ report=report,
+ accounting=report.accounting,
+ )
+
+ def accounting(self) -> ArtifactAccounting:
+ return self._last_fit_accounting
+
+
+__all__ = [
+ "BridgeExportAdapter",
+ "BridgeExportConfig",
+ "BridgeExportFitReport",
+ "BridgeExportReport",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/bridge_features.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/bridge_features.py
new file mode 100644
index 0000000000..01483eaf1f
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/bridge_features.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+
+from .probability_diagnostics import (
+ ProbabilityDiagnosticsConfig,
+ probability_diagnostics,
+)
+
+
+@dataclass(frozen=True)
+class BridgeFeatureConfig:
+ candidate_count: int = 4
+ epsilon: float = 1e-12
+
+ def __post_init__(self) -> None:
+ if self.candidate_count < 1:
+ raise ValueError("candidate_count must be >= 1")
+ if self.epsilon <= 0.0:
+ raise ValueError("epsilon must be > 0")
+
+
+@dataclass(frozen=True)
+class BridgeFeatureArrays:
+ entropy: np.ndarray
+ peak: np.ndarray
+ candidate4: np.ndarray
+ agreement: np.ndarray
+ agreement_mass: np.ndarray
+
+ def as_dict(self) -> dict[str, np.ndarray]:
+ return {
+ "entropy": self.entropy,
+ "peak": self.peak,
+ "candidate4": self.candidate4,
+ "agreement": self.agreement,
+ "agreement_mass": self.agreement_mass,
+ }
+
+
+def _coerce_probabilities(probabilities: np.ndarray | list[float] | tuple[float, ...], vocab_size: int) -> np.ndarray:
+ array = np.asarray(probabilities, dtype=np.float64)
+ if array.ndim < 1:
+ raise ValueError("probability arrays must have at least one dimension")
+ if array.shape[-1] != vocab_size:
+ raise ValueError("last dimension must match vocab_size")
+ if np.any(array < 0.0):
+ raise ValueError("probabilities must be non-negative")
+ totals = array.sum(axis=-1, keepdims=True)
+ normalized = np.divide(
+ array,
+ totals,
+ out=np.full_like(array, 1.0 / float(vocab_size)),
+ where=totals > 0.0,
+ )
+ return normalized
+
+
+def bridge_feature_arrays(
+ base_probs: np.ndarray | list[float] | tuple[float, ...],
+ proxy_probs: np.ndarray | list[float] | tuple[float, ...],
+ vocab_size: int,
+ *,
+ config: BridgeFeatureConfig | None = None,
+) -> BridgeFeatureArrays:
+ config = config or BridgeFeatureConfig()
+ if vocab_size < 1:
+ raise ValueError("vocab_size must be >= 1")
+
+ base = _coerce_probabilities(base_probs, vocab_size)
+ proxy = _coerce_probabilities(proxy_probs, vocab_size)
+ if base.shape != proxy.shape:
+ raise ValueError("base_probs and proxy_probs must have the same shape")
+
+ diagnostics = probability_diagnostics(
+ base,
+ proxy,
+ config=ProbabilityDiagnosticsConfig(
+ top_k=config.candidate_count,
+ epsilon=config.epsilon,
+ ),
+ )
+
+ return BridgeFeatureArrays(
+ entropy=np.asarray(diagnostics.entropy, dtype=np.float64),
+ peak=np.asarray(diagnostics.peak, dtype=np.float64),
+ candidate4=np.asarray(diagnostics.top_k_mass, dtype=np.float64),
+ agreement=np.asarray(diagnostics.overlap, dtype=np.float64),
+ agreement_mass=np.asarray(diagnostics.shared_top_k_mass, dtype=np.float64),
+ )
+
+
+__all__ = [
+ "BridgeFeatureArrays",
+ "BridgeFeatureConfig",
+ "bridge_feature_arrays",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/causal_predictive.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/causal_predictive.py
new file mode 100644
index 0000000000..1f24c5cb42
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/causal_predictive.py
@@ -0,0 +1,419 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+
+import numpy as np
+
+from .artifacts import (
+ ArtifactAccounting,
+ ArtifactMetadata,
+ make_artifact_accounting,
+ make_replay_span,
+)
+from .codecs import ensure_tokens
+from .exact_context import (
+ ExactContextFitReport,
+ ExactContextMemory,
+ ExactContextPrediction,
+ SupportWeightedMixer,
+)
+from .experts import ExpertFitReport, FrozenReadoutExpert
+from .metrics import bits_per_byte_from_probabilities
+from .ngram_memory import NgramMemory, NgramMemoryReport
+
+
+@dataclass(frozen=True)
+class CausalPredictiveFitReport:
+ sequences: int
+ tokens: int
+ train_bits_per_byte: float
+ exact_fit: ExactContextFitReport
+ expert_fits: tuple[ExpertFitReport, ...]
+ ngram_fit: NgramMemoryReport | None
+ accounting: ArtifactAccounting
+
+ @property
+ def bits_per_byte(self) -> float:
+ return self.train_bits_per_byte
+
+
+@dataclass(frozen=True)
+class CausalPredictiveScore:
+ tokens: int
+ bits_per_byte: float
+ exact_bits_per_byte: float
+ auxiliary_bits_per_byte: float | None
+ ngram_bits_per_byte: float | None
+ exact_support: float
+ accounting: ArtifactAccounting
+
+
+@dataclass(frozen=True)
+class _ComponentPrediction:
+ name: str
+ probabilities: np.ndarray
+ support: float
+
+
+class CausalPredictiveAdapter:
+ def __init__(
+ self,
+ exact_context: ExactContextMemory | None = None,
+ *,
+ experts: Sequence[FrozenReadoutExpert] = (),
+ ngram_memory: NgramMemory | None = None,
+ mixer: SupportWeightedMixer | None = None,
+ artifact_name: str = "causal_predictive",
+ metadata: ArtifactMetadata | None = None,
+ ):
+ self.exact_context = exact_context or ExactContextMemory()
+ self.experts = tuple(experts)
+ self.ngram_memory = ngram_memory
+ self.mixer = mixer or SupportWeightedMixer()
+ self.artifact_name = artifact_name
+ self.metadata = metadata or ArtifactMetadata()
+ self._last_fit_accounting = make_artifact_accounting(
+ self.artifact_name,
+ 0,
+ 0,
+ metadata=self.metadata,
+ tokens=0,
+ supported_tokens=0,
+ exact_orders=0,
+ )
+ self._validate_experts()
+ self._validate_ngram_memory()
+
+ def _validate_experts(self) -> None:
+ vocab_size = self.exact_context.config.vocabulary_size
+ for expert in self.experts:
+ if expert.vocabulary_size != vocab_size:
+ raise ValueError(
+ f"expert {expert.name!r} has vocabulary_size={expert.vocabulary_size}, "
+ f"expected {vocab_size}"
+ )
+
+ def _validate_ngram_memory(self) -> None:
+ if self.ngram_memory is None:
+ return
+ vocab_size = self.exact_context.config.vocabulary_size
+ if self.ngram_memory.config.vocabulary_size != vocab_size:
+ raise ValueError(
+ "ngram_memory vocabulary_size must match exact_context vocabulary_size"
+ )
+
+ @staticmethod
+ def _coerce_sequences(
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> tuple[np.ndarray, ...]:
+ if isinstance(data, (str, bytes, bytearray, memoryview, np.ndarray)):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence) and data and all(isinstance(item, int) for item in data):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence):
+ return tuple(ensure_tokens(item) for item in data)
+ return (ensure_tokens(data),)
+
+ @staticmethod
+ def _normalize(probabilities: np.ndarray) -> np.ndarray:
+ clipped = np.clip(np.asarray(probabilities, dtype=np.float64), 1e-12, None)
+ return clipped / np.sum(clipped)
+
+ def _auxiliary_predictions(self, prompt: np.ndarray) -> tuple[_ComponentPrediction, ...]:
+ components: list[_ComponentPrediction] = []
+ for expert in self.experts:
+ probabilities = self._normalize(expert.predict_proba(prompt))
+ support = float(np.max(probabilities))
+ components.append(
+ _ComponentPrediction(
+ name=expert.name,
+ probabilities=probabilities,
+ support=support,
+ )
+ )
+ return tuple(components)
+
+ def _ngram_prediction(self, prompt: np.ndarray) -> ExactContextPrediction | None:
+ if self.ngram_memory is None:
+ return None
+
+ tokens = ensure_tokens(prompt).astype(np.int64, copy=False)
+ if tokens.size == 0:
+ probabilities = self.ngram_memory.unigram_probs()
+ context: tuple[int, ...] = ()
+ order = 0
+ elif tokens.size == 1:
+ probabilities = self.ngram_memory.bigram_probs(int(tokens[-1]))
+ context = (int(tokens[-1]),)
+ order = 1
+ else:
+ probabilities = self.ngram_memory.trigram_probs(int(tokens[-2]), int(tokens[-1]))
+ context = (int(tokens[-2]), int(tokens[-1]))
+ order = 2
+
+ support = float(np.max(probabilities))
+ return ExactContextPrediction(
+ name="ngram",
+ order=order,
+ context=context,
+ probabilities=np.asarray(probabilities, dtype=np.float64),
+ support=support,
+ total=support,
+ )
+
+ @staticmethod
+ def _pack_prediction(
+ name: str,
+ probabilities: np.ndarray,
+ support: float,
+ *,
+ order: int = 0,
+ context: tuple[int, ...] = (),
+ ) -> ExactContextPrediction:
+ return ExactContextPrediction(
+ name=name,
+ order=order,
+ context=context,
+ probabilities=np.asarray(probabilities, dtype=np.float64),
+ support=float(support),
+ total=float(max(support, 0.0)),
+ )
+
+ def _blend_prefix(self, prefix: np.ndarray) -> tuple[np.ndarray, float]:
+ base_probs = self.exact_context.predictive_distribution(prefix)
+ exact_predictions = self.exact_context.experts(prefix)
+ base_support = max((prediction.support for prediction in exact_predictions), default=0.0)
+ aux_predictions = self._auxiliary_predictions(prefix)
+ ngram_prediction = self._ngram_prediction(prefix)
+ packed_aux = tuple(
+ self._pack_prediction(component.name, component.probabilities, component.support)
+ for component in aux_predictions
+ )
+ ngram_expert: tuple[ExactContextPrediction, ...] = ()
+ if ngram_prediction is not None:
+ ngram_expert = (ngram_prediction,)
+ blend = self.mixer.mix(
+ base_probs=base_probs,
+ experts=tuple(exact_predictions) + packed_aux + ngram_expert,
+ base_name="exact_context",
+ base_support=base_support,
+ )
+ return blend.probabilities, base_support
+
+ def accounting(
+ self,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | None = None,
+ ) -> ArtifactAccounting:
+ if sequence is None:
+ return self._last_fit_accounting
+
+ tokens = ensure_tokens(sequence)
+ if tokens.size == 0:
+ return make_artifact_accounting(
+ self.artifact_name,
+ 0,
+ 0,
+ metadata=self.metadata,
+ tokens=0,
+ supported_tokens=0,
+ exact_orders=0,
+ )
+
+ replay_spans = []
+ current_start: int | None = None
+ supported_tokens = 0
+ exact_orders = 0
+
+ for index in range(1, tokens.size):
+ prefix = tokens[:index]
+ predictions = self.exact_context.experts(prefix)
+ support = max((prediction.total for prediction in predictions), default=0.0)
+ exact_orders = max(exact_orders, len(predictions))
+ if support > 0.0:
+ supported_tokens += 1
+ if current_start is None:
+ current_start = index
+ elif current_start is not None:
+ replay_spans.append(
+ make_replay_span(
+ current_start,
+ index,
+ label="exact_context",
+ supported_tokens=index - current_start,
+ exact_orders=exact_orders,
+ )
+ )
+ current_start = None
+
+ if current_start is not None:
+ replay_spans.append(
+ make_replay_span(
+ current_start,
+ int(tokens.size),
+ label="exact_context",
+ supported_tokens=int(tokens.size) - current_start,
+ exact_orders=exact_orders,
+ )
+ )
+
+ return make_artifact_accounting(
+ self.artifact_name,
+ int(tokens.size),
+ supported_tokens,
+ replay_spans=tuple(replay_spans),
+ metadata=self.metadata,
+ tokens=int(tokens.size),
+ supported_tokens=supported_tokens,
+ exact_orders=exact_orders,
+ )
+
+ def fit(
+ self,
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> CausalPredictiveFitReport:
+ sequences = self._coerce_sequences(data)
+ exact_fit = self.exact_context.fit(sequences)
+ ngram_fit = self.ngram_memory.fit(sequences) if self.ngram_memory is not None else None
+
+ fit_sequences = tuple(sequence for sequence in sequences if ensure_tokens(sequence).size >= 2)
+ expert_fits: list[ExpertFitReport] = []
+ for expert in self.experts:
+ if fit_sequences:
+ expert_fits.append(expert.fit(fit_sequences))
+
+ total_tokens = 0
+ total_effective_tokens = 0
+ weighted_bits = 0.0
+ artifact_bytes = 0
+ replay_bytes = 0
+ replay_spans = []
+ offset = 0
+
+ for sequence in sequences:
+ tokens = ensure_tokens(sequence)
+ score = self.score(tokens)
+ total_tokens += int(tokens.size)
+ effective_tokens = max(int(tokens.size) - 1, 0)
+ total_effective_tokens += effective_tokens
+ weighted_bits += score.bits_per_byte * effective_tokens
+ accounting = score.accounting
+ artifact_bytes += accounting.artifact_bytes
+ replay_bytes += accounting.replay_bytes
+ replay_spans.extend(
+ make_replay_span(
+ span.start + offset,
+ span.stop + offset,
+ label=span.label,
+ metadata=span.metadata,
+ )
+ for span in accounting.replay_spans
+ )
+ offset += int(tokens.size)
+
+ train_bits = 0.0 if total_effective_tokens == 0 else weighted_bits / total_effective_tokens
+ accounting = make_artifact_accounting(
+ self.artifact_name,
+ artifact_bytes,
+ replay_bytes,
+ replay_spans=tuple(replay_spans),
+ metadata=self.metadata,
+ sequences=len(sequences),
+ tokens=artifact_bytes,
+ supported_tokens=replay_bytes,
+ exact_orders=self.exact_context.config.max_order,
+ )
+ self._last_fit_accounting = accounting
+ return CausalPredictiveFitReport(
+ sequences=len(sequences),
+ tokens=total_tokens,
+ train_bits_per_byte=train_bits,
+ exact_fit=exact_fit,
+ expert_fits=tuple(expert_fits),
+ ngram_fit=ngram_fit,
+ accounting=accounting,
+ )
+
+ def predict_proba(
+ self,
+ prompt: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ ) -> np.ndarray:
+ tokens = ensure_tokens(prompt)
+ if tokens.size < 1:
+ raise ValueError("prompt must contain at least one token")
+ probabilities, _ = self._blend_prefix(tokens)
+ return probabilities
+
+ def score(
+ self,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ ) -> CausalPredictiveScore:
+ tokens = ensure_tokens(sequence)
+ if tokens.size < 2:
+ raise ValueError("sequence must contain at least two tokens")
+
+ exact_rows: list[np.ndarray] = []
+ final_rows: list[np.ndarray] = []
+ exact_support = 0.0
+ ngram_rows: list[np.ndarray] = []
+
+ for index in range(1, tokens.size):
+ prefix = tokens[:index]
+ exact_probs = self.exact_context.predictive_distribution(prefix)
+ exact_predictions = self.exact_context.experts(prefix)
+ base_support = max((prediction.support for prediction in exact_predictions), default=0.0)
+ aux_predictions = self._auxiliary_predictions(prefix)
+ ngram_prediction = self._ngram_prediction(prefix)
+ packed_aux = tuple(
+ self._pack_prediction(component.name, component.probabilities, component.support)
+ for component in aux_predictions
+ )
+ ngram_expert: tuple[ExactContextPrediction, ...] = ()
+ if ngram_prediction is not None:
+ ngram_rows.append(np.asarray(ngram_prediction.probabilities, dtype=np.float64))
+ ngram_expert = (ngram_prediction,)
+ exact_blend = self.mixer.mix(
+ base_probs=exact_probs,
+ experts=tuple(exact_predictions),
+ base_name="exact_context",
+ base_support=base_support,
+ )
+ final_blend = self.mixer.mix(
+ base_probs=exact_probs,
+ experts=tuple(exact_predictions) + packed_aux + ngram_expert,
+ base_name="exact_context",
+ base_support=base_support,
+ )
+ exact_rows.append(exact_blend.probabilities)
+ final_rows.append(final_blend.probabilities)
+ exact_support = base_support
+
+ targets = tokens[1:].astype(np.int64, copy=False)
+ exact_bpb = bits_per_byte_from_probabilities(np.vstack(exact_rows), targets)
+ final_bpb = bits_per_byte_from_probabilities(np.vstack(final_rows), targets)
+ ngram_bpb = (
+ None
+ if not ngram_rows
+ else bits_per_byte_from_probabilities(np.vstack(ngram_rows), targets[: len(ngram_rows)])
+ )
+ auxiliary_bpb = (
+ None
+ if not self.experts
+ else float(np.mean([expert.score(tokens).bits_per_byte for expert in self.experts]))
+ )
+ return CausalPredictiveScore(
+ tokens=int(tokens.size),
+ bits_per_byte=final_bpb,
+ exact_bits_per_byte=exact_bpb,
+ auxiliary_bits_per_byte=auxiliary_bpb,
+ ngram_bits_per_byte=ngram_bpb,
+ exact_support=exact_support,
+ accounting=self.accounting(tokens),
+ )
+
+
+__all__ = [
+ "CausalPredictiveAdapter",
+ "CausalPredictiveFitReport",
+ "CausalPredictiveScore",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/cli.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/cli.py
new file mode 100644
index 0000000000..9eba41ace6
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/cli.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from .codecs import ByteCodec
+from .model import OpenPredictiveCoder
+
+
+def _build_parser() -> argparse.ArgumentParser:
+ parser = argparse.ArgumentParser(prog="opc", description="Open Predictive Coder CLI")
+ subparsers = parser.add_subparsers(dest="command", required=True)
+
+ fit_parser = subparsers.add_parser("fit", help="fit on a text file and optionally sample")
+ fit_parser.add_argument("--input", required=True, help="Path to a UTF-8 text file")
+ fit_parser.add_argument("--prompt", default="", help="Prompt used for sampling after fit")
+ fit_parser.add_argument("--generate", type=int, default=0, help="Number of bytes to generate after the prompt")
+ fit_parser.add_argument("--temperature", type=float, default=1.0, help="Sampling temperature")
+ fit_parser.add_argument("--greedy", action="store_true", help="Use greedy decoding")
+ return parser
+
+
+def main() -> None:
+ parser = _build_parser()
+ args = parser.parse_args()
+
+ if args.command == "fit":
+ text = Path(args.input).read_text(encoding="utf-8")
+ model = OpenPredictiveCoder()
+ report = model.fit(text)
+ print(f"train bits/byte: {report.train_bits_per_byte:.4f}")
+ print(f"patches: {report.patches}")
+ print(f"mean patch size: {report.mean_patch_size:.2f}")
+ if args.generate > 0:
+ prompt = ByteCodec.encode_text(args.prompt or text[:16])
+ sample = model.generate(
+ prompt,
+ steps=args.generate,
+ temperature=args.temperature,
+ greedy=args.greedy,
+ )
+ print(ByteCodec.decode_text(sample))
+
+
+if __name__ == "__main__":
+ main()
+
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/codecs.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/codecs.py
new file mode 100644
index 0000000000..9d3190ebc4
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/codecs.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import numpy as np
+
+
+def _is_sequence_of_ints(value: object) -> bool:
+ if isinstance(value, (bytes, bytearray, memoryview, str)):
+ return False
+ if isinstance(value, np.ndarray):
+ return value.ndim == 1 and np.issubdtype(value.dtype, np.integer)
+ if not isinstance(value, Sequence):
+ return False
+ return all(isinstance(item, int) for item in value)
+
+
+def ensure_tokens(value: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]) -> np.ndarray:
+ if isinstance(value, str):
+ return np.frombuffer(value.encode("utf-8"), dtype=np.uint8).copy()
+ if isinstance(value, bytes):
+ return np.frombuffer(value, dtype=np.uint8).copy()
+ if isinstance(value, bytearray):
+ return np.frombuffer(bytes(value), dtype=np.uint8).copy()
+ if isinstance(value, memoryview):
+ return np.frombuffer(value.tobytes(), dtype=np.uint8).copy()
+ if isinstance(value, np.ndarray):
+ if value.ndim != 1:
+ raise ValueError("token arrays must be one-dimensional")
+ if not np.issubdtype(value.dtype, np.integer):
+ raise TypeError("token arrays must contain integers")
+ return value.astype(np.int64, copy=True)
+ if _is_sequence_of_ints(value):
+ return np.asarray(list(value), dtype=np.int64)
+ raise TypeError(f"Unsupported token input type: {type(value)!r}")
+
+
+def ensure_byte_tokens(value: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]) -> np.ndarray:
+ tokens = ensure_tokens(value)
+ if tokens.size and (int(np.min(tokens)) < 0 or int(np.max(tokens)) > 255):
+ raise ValueError("byte tokens must lie in [0, 255]")
+ return tokens.astype(np.uint8, copy=False)
+
+
+class ByteCodec:
+ @staticmethod
+ def encode_text(text: str, encoding: str = "utf-8") -> np.ndarray:
+ return np.frombuffer(text.encode(encoding), dtype=np.uint8).copy()
+
+ @staticmethod
+ def decode_text(tokens: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int], encoding: str = "utf-8") -> str:
+ return bytes(ensure_byte_tokens(tokens).tolist()).decode(encoding, errors="replace")
+
+ @staticmethod
+ def encode_bytes(payload: bytes | bytearray | memoryview) -> np.ndarray:
+ return ensure_tokens(payload)
+
+ @staticmethod
+ def decode_bytes(tokens: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]) -> bytes:
+ return bytes(ensure_byte_tokens(tokens).tolist())
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/config.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/config.py
new file mode 100644
index 0000000000..fb8c0dd1c9
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/config.py
@@ -0,0 +1,383 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Literal
+
+SegmenterMode = Literal["fixed", "adaptive"]
+ReservoirTopology = Literal["erdos_renyi", "small_world"]
+MemoryMergeMode = Literal["concatenate"]
+SubstrateKind = Literal["echo_state", "delay", "mixed_memory", "hierarchical", "oscillatory"]
+
+
+@dataclass(frozen=True)
+class SegmenterConfig:
+ mode: SegmenterMode = "adaptive"
+ patch_size: int = 8
+ min_patch_size: int = 4
+ max_patch_size: int = 24
+ novelty_threshold: float = 0.14
+
+ def __post_init__(self) -> None:
+ if self.patch_size < 1:
+ raise ValueError("patch_size must be >= 1")
+ if self.min_patch_size < 1:
+ raise ValueError("min_patch_size must be >= 1")
+ if self.max_patch_size < self.min_patch_size:
+ raise ValueError("max_patch_size must be >= min_patch_size")
+ if self.patch_size < self.min_patch_size or self.patch_size > self.max_patch_size:
+ raise ValueError("patch_size must lie within [min_patch_size, max_patch_size]")
+ if self.novelty_threshold < 0.0:
+ raise ValueError("novelty_threshold must be >= 0")
+
+
+@dataclass(frozen=True)
+class ReservoirConfig:
+ size: int = 512
+ connectivity: float = 0.05
+ spectral_radius: float = 0.95
+ leak: float = 0.25
+ input_scale: float = 0.2
+ topology: ReservoirTopology = "erdos_renyi"
+ rewire_prob: float = 0.1
+ seed: int = 7
+
+ def __post_init__(self) -> None:
+ if self.size < 8:
+ raise ValueError("reservoir size must be >= 8")
+ if not 0.0 < self.connectivity <= 1.0:
+ raise ValueError("connectivity must lie in (0, 1]")
+ if self.spectral_radius <= 0.0:
+ raise ValueError("spectral_radius must be > 0")
+ if not 0.0 < self.leak <= 1.0:
+ raise ValueError("leak must lie in (0, 1]")
+ if self.input_scale <= 0.0:
+ raise ValueError("input_scale must be > 0")
+ if not 0.0 <= self.rewire_prob <= 1.0:
+ raise ValueError("rewire_prob must lie in [0, 1]")
+
+
+@dataclass(frozen=True)
+class DelayLineConfig:
+ history_length: int = 16
+ embedding_dim: int = 16
+ vocabulary_size: int = 256
+ input_scale: float = 0.2
+ decay: float = 1.0
+ seed: int = 7
+
+ def __post_init__(self) -> None:
+ if self.history_length < 1:
+ raise ValueError("history_length must be >= 1")
+ if self.embedding_dim < 1:
+ raise ValueError("embedding_dim must be >= 1")
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if self.input_scale <= 0.0:
+ raise ValueError("input_scale must be > 0")
+ if not 0.0 < self.decay <= 1.0:
+ raise ValueError("decay must lie in (0, 1]")
+
+ @property
+ def state_dim(self) -> int:
+ return self.history_length * self.embedding_dim
+
+
+@dataclass(frozen=True)
+class LinearMemoryConfig:
+ embedding_dim: int = 16
+ vocabulary_size: int = 256
+ decays: tuple[float, ...] = (0.25, 0.5, 0.75, 0.9, 0.97)
+ input_scale: float = 0.2
+ seed: int = 7
+
+ def __post_init__(self) -> None:
+ if self.embedding_dim < 1:
+ raise ValueError("embedding_dim must be >= 1")
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if not self.decays:
+ raise ValueError("decays must contain at least one bank")
+ if any(decay <= 0.0 or decay >= 1.0 for decay in self.decays):
+ raise ValueError("all decays must lie in (0, 1)")
+ if self.input_scale <= 0.0:
+ raise ValueError("input_scale must be > 0")
+
+ @property
+ def state_dim(self) -> int:
+ return len(self.decays) * self.embedding_dim
+
+
+@dataclass(frozen=True)
+class OscillatoryMemoryConfig:
+ vocabulary_size: int = 256
+ embedding_dim: int = 16
+ decay_rates: tuple[float, ...] = (0.25, 0.5, 0.75, 0.9)
+ oscillatory_modes: int = 4
+ oscillatory_damping_range: tuple[float, float] = (0.85, 0.98)
+ oscillatory_period_range: tuple[float, float] = (4.0, 32.0)
+ input_scale: float = 0.2
+ seed: int = 7
+
+ def __post_init__(self) -> None:
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if self.embedding_dim < 1:
+ raise ValueError("embedding_dim must be >= 1")
+ if not self.decay_rates:
+ raise ValueError("decay_rates must contain at least one bank")
+ if any(rate <= 0.0 or rate >= 1.0 for rate in self.decay_rates):
+ raise ValueError("all decay_rates must lie in (0, 1)")
+ if self.oscillatory_modes < 1:
+ raise ValueError("oscillatory_modes must be >= 1")
+ if len(self.oscillatory_damping_range) != 2:
+ raise ValueError("oscillatory_damping_range must contain exactly two values")
+ if len(self.oscillatory_period_range) != 2:
+ raise ValueError("oscillatory_period_range must contain exactly two values")
+ low_damping, high_damping = self.oscillatory_damping_range
+ low_period, high_period = self.oscillatory_period_range
+ if not 0.0 < low_damping < high_damping < 1.0:
+ raise ValueError("oscillatory_damping_range must lie inside (0, 1)")
+ if not 0.0 < low_period < high_period:
+ raise ValueError("oscillatory_period_range must lie in positive increasing order")
+ if self.input_scale <= 0.0:
+ raise ValueError("input_scale must be > 0")
+
+ @property
+ def decay_bank_count(self) -> int:
+ return len(self.decay_rates)
+
+ @property
+ def oscillatory_bank_count(self) -> int:
+ return self.oscillatory_modes
+
+ @property
+ def state_dim(self) -> int:
+ return (self.decay_bank_count * self.embedding_dim) + (2 * self.oscillatory_bank_count * self.embedding_dim)
+
+
+@dataclass(frozen=True)
+class MixedMemoryConfig:
+ reservoir: ReservoirConfig = field(default_factory=ReservoirConfig)
+ delay: DelayLineConfig = field(default_factory=DelayLineConfig)
+ merge_mode: MemoryMergeMode = "concatenate"
+
+ def __post_init__(self) -> None:
+ if self.merge_mode != "concatenate":
+ raise ValueError("merge_mode must be 'concatenate'")
+
+ @property
+ def state_dim(self) -> int:
+ return self.reservoir.size + self.delay.state_dim
+
+
+@dataclass(frozen=True)
+class HierarchicalSubstrateConfig:
+ fast_size: int = 128
+ vocabulary_size: int = 256
+ fast_connectivity: float = 0.15
+ fast_spectral_radius: float = 0.7
+ fast_topology: ReservoirTopology = "erdos_renyi"
+ fast_rewire_prob: float = 0.1
+ fast_leak: float = 0.35
+ mid_size: int = 256
+ mid_connectivity: float = 0.08
+ mid_spectral_radius: float = 0.9
+ mid_topology: ReservoirTopology = "erdos_renyi"
+ mid_rewire_prob: float = 0.1
+ mid_leak: float = 0.25
+ slow_size: int = 384
+ slow_connectivity: float = 0.04
+ slow_spectral_radius: float = 0.98
+ slow_topology: ReservoirTopology = "erdos_renyi"
+ slow_rewire_prob: float = 0.1
+ slow_leak: float = 0.15
+ input_scale: float = 0.2
+ upward_scale: float = 0.1
+ slow_update_stride: int = 1
+ seed: int = 7
+
+ def __post_init__(self) -> None:
+ if self.fast_size < 4:
+ raise ValueError("fast_size must be >= 4")
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if self.mid_size < 4:
+ raise ValueError("mid_size must be >= 4")
+ if self.slow_size < 4:
+ raise ValueError("slow_size must be >= 4")
+ if not 0.0 < self.fast_connectivity <= 1.0:
+ raise ValueError("fast_connectivity must lie in (0, 1]")
+ if not 0.0 < self.mid_connectivity <= 1.0:
+ raise ValueError("mid_connectivity must lie in (0, 1]")
+ if not 0.0 < self.slow_connectivity <= 1.0:
+ raise ValueError("slow_connectivity must lie in (0, 1]")
+ if self.fast_spectral_radius <= 0.0:
+ raise ValueError("fast_spectral_radius must be > 0")
+ if self.mid_spectral_radius <= 0.0:
+ raise ValueError("mid_spectral_radius must be > 0")
+ if self.slow_spectral_radius <= 0.0:
+ raise ValueError("slow_spectral_radius must be > 0")
+ if not 0.0 < self.fast_leak <= 1.0:
+ raise ValueError("fast_leak must lie in (0, 1]")
+ if not 0.0 < self.mid_leak <= 1.0:
+ raise ValueError("mid_leak must lie in (0, 1]")
+ if not 0.0 < self.slow_leak <= 1.0:
+ raise ValueError("slow_leak must lie in (0, 1]")
+ if self.input_scale <= 0.0:
+ raise ValueError("input_scale must be > 0")
+ if self.upward_scale <= 0.0:
+ raise ValueError("upward_scale must be > 0")
+ if self.slow_update_stride < 1:
+ raise ValueError("slow_update_stride must be >= 1")
+
+ @property
+ def state_dim(self) -> int:
+ return self.fast_size + self.mid_size + self.slow_size
+
+
+@dataclass(frozen=True)
+class SampledReadoutBandConfig:
+ name: str
+ start: int
+ stop: int
+ sample_count: int | None = None
+ sample_indices: tuple[int, ...] = ()
+ include_mean: bool = True
+ include_energy: bool = True
+ include_drift: bool = False
+
+ def __post_init__(self) -> None:
+ object.__setattr__(self, "sample_indices", tuple(int(index) for index in self.sample_indices))
+ if not self.name:
+ raise ValueError("name must be non-empty")
+ if self.start < 0:
+ raise ValueError("start must be >= 0")
+ if self.stop <= self.start:
+ raise ValueError("stop must be > start")
+ if self.sample_count is not None and self.sample_count < 1:
+ raise ValueError("sample_count must be >= 1")
+ if self.sample_indices and self.sample_count is not None:
+ raise ValueError("sample_count and sample_indices are mutually exclusive")
+ if len(set(self.sample_indices)) != len(self.sample_indices):
+ raise ValueError("sample_indices must be unique")
+ width = self.width
+ if self.sample_count is not None and self.sample_count > width:
+ raise ValueError("sample_count must be <= band width")
+ if any(index < 0 or index >= width for index in self.sample_indices):
+ raise ValueError("sample_indices must lie within the band width")
+
+ @property
+ def width(self) -> int:
+ return self.stop - self.start
+
+ @property
+ def resolved_sample_count(self) -> int:
+ if self.sample_indices:
+ return len(self.sample_indices)
+ if self.sample_count is not None:
+ return self.sample_count
+ return self.width
+
+ @property
+ def feature_dim(self) -> int:
+ count = self.resolved_sample_count
+ return count + int(self.include_mean) + int(self.include_energy) + int(self.include_drift)
+
+
+@dataclass(frozen=True)
+class SampledReadoutConfig:
+ state_dim: int
+ bands: tuple[SampledReadoutBandConfig, ...]
+ seed: int = 7
+
+ def __post_init__(self) -> None:
+ object.__setattr__(self, "bands", tuple(self.bands))
+ if self.state_dim < 1:
+ raise ValueError("state_dim must be >= 1")
+ if not self.bands:
+ raise ValueError("bands must contain at least one band")
+ for band in self.bands:
+ if band.stop > self.state_dim:
+ raise ValueError("band stop must be <= state_dim")
+
+ @property
+ def feature_dim(self) -> int:
+ return sum(band.feature_dim for band in self.bands)
+
+
+@dataclass(frozen=True)
+class LatentConfig:
+ latent_dim: int = 96
+ global_dim: int = 96
+ reservoir_features: int = 96
+ bridge_scale: float = 0.25
+ global_update_scale: float = 0.3
+ readout_l2: float = 1e-3
+
+ def __post_init__(self) -> None:
+ if self.latent_dim < 4:
+ raise ValueError("latent_dim must be >= 4")
+ if self.global_dim < 4:
+ raise ValueError("global_dim must be >= 4")
+ if self.reservoir_features < 4:
+ raise ValueError("reservoir_features must be >= 4")
+ if self.bridge_scale <= 0.0:
+ raise ValueError("bridge_scale must be > 0")
+ if self.global_update_scale <= 0.0:
+ raise ValueError("global_update_scale must be > 0")
+ if self.readout_l2 < 0.0:
+ raise ValueError("readout_l2 must be >= 0")
+
+
+@dataclass(frozen=True)
+class OpenPredictiveCoderConfig:
+ vocabulary_size: int = 256
+ substrate_kind: SubstrateKind = "echo_state"
+ segmenter: SegmenterConfig = field(default_factory=SegmenterConfig)
+ reservoir: ReservoirConfig = field(default_factory=ReservoirConfig)
+ delay: DelayLineConfig = field(default_factory=DelayLineConfig)
+ oscillatory: OscillatoryMemoryConfig = field(default_factory=OscillatoryMemoryConfig)
+ mixed_memory: MixedMemoryConfig = field(default_factory=MixedMemoryConfig)
+ hierarchical: HierarchicalSubstrateConfig = field(default_factory=HierarchicalSubstrateConfig)
+ latent: LatentConfig = field(default_factory=LatentConfig)
+
+ def __post_init__(self) -> None:
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if self.substrate_kind == "echo_state":
+ substrate_state_dim = self.reservoir.size
+ elif self.substrate_kind == "delay":
+ substrate_state_dim = self.delay.state_dim
+ if self.delay.vocabulary_size < self.vocabulary_size:
+ raise ValueError("delay.vocabulary_size must be >= vocabulary_size")
+ elif self.substrate_kind == "oscillatory":
+ substrate_state_dim = self.oscillatory.state_dim
+ if self.oscillatory.vocabulary_size < self.vocabulary_size:
+ raise ValueError("oscillatory.vocabulary_size must be >= vocabulary_size")
+ elif self.substrate_kind == "mixed_memory":
+ substrate_state_dim = self.mixed_memory.state_dim
+ if self.mixed_memory.delay.vocabulary_size < self.vocabulary_size:
+ raise ValueError("mixed_memory.delay.vocabulary_size must be >= vocabulary_size")
+ elif self.substrate_kind == "hierarchical":
+ substrate_state_dim = self.hierarchical.state_dim
+ if self.hierarchical.vocabulary_size < self.vocabulary_size:
+ raise ValueError("hierarchical.vocabulary_size must be >= vocabulary_size")
+ else:
+ raise ValueError(f"Unknown substrate_kind: {self.substrate_kind}")
+
+ if self.latent.reservoir_features > substrate_state_dim:
+ raise ValueError("latent.reservoir_features must be <= chosen substrate state_dim")
+
+ @property
+ def feature_dim(self) -> int:
+ return (
+ self.latent.reservoir_features
+ + self.latent.reservoir_features
+ + self.latent.global_dim
+ + self.latent.latent_dim
+ + 3
+ )
+
+
+LatentControllerConfig = LatentConfig
+ByteLatentPredictiveCoderConfig = OpenPredictiveCoderConfig
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/control.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/control.py
new file mode 100644
index 0000000000..c03ea384ab
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/control.py
@@ -0,0 +1,91 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+from typing import Literal
+
+import numpy as np
+
+SummaryReduction = Literal["identity", "last", "mean", "mean_abs", "max_abs"]
+
+
+@dataclass(frozen=True)
+class ControllerSummaryConfig:
+ reduction: SummaryReduction = "identity"
+ normalize: bool = False
+ eps: float = 1e-8
+
+ def __post_init__(self) -> None:
+ if self.eps <= 0.0:
+ raise ValueError("eps must be > 0")
+
+
+@dataclass(frozen=True)
+class ControllerSummary:
+ values: np.ndarray
+ name: str | None = None
+
+ def __post_init__(self) -> None:
+ values = np.asarray(self.values, dtype=np.float64).reshape(-1)
+ if values.size < 1:
+ raise ValueError("ControllerSummary must contain at least one value")
+ object.__setattr__(self, "values", values)
+
+ @property
+ def dim(self) -> int:
+ return int(self.values.shape[0])
+
+
+class ControllerSummaryBuilder:
+ def __init__(self, config: ControllerSummaryConfig | None = None):
+ self.config = config or ControllerSummaryConfig()
+
+ def _reduce(self, signal: np.ndarray) -> np.ndarray:
+ if signal.ndim == 0:
+ return signal.reshape(1)
+ if signal.ndim == 1:
+ return signal
+ if self.config.reduction == "identity":
+ return signal.reshape(-1)
+ if self.config.reduction == "last":
+ return signal[-1]
+ if self.config.reduction == "mean":
+ return np.mean(signal, axis=0)
+ if self.config.reduction == "mean_abs":
+ return np.mean(np.abs(signal), axis=0)
+ if self.config.reduction == "max_abs":
+ return np.max(np.abs(signal), axis=0)
+ raise ValueError(f"unknown summary reduction: {self.config.reduction}")
+
+ def encode(
+ self,
+ signal: float | Sequence[float] | np.ndarray,
+ *,
+ name: str | None = None,
+ ) -> ControllerSummary:
+ array = np.asarray(signal, dtype=np.float64)
+ summary = self._reduce(array).reshape(-1)
+ if self.config.normalize:
+ norm = float(np.linalg.norm(summary))
+ if norm > self.config.eps:
+ summary = summary / norm
+ return ControllerSummary(values=summary, name=name)
+
+
+def stack_summaries(summaries: Sequence[ControllerSummary]) -> np.ndarray:
+ if not summaries:
+ raise ValueError("stack_summaries requires at least one summary")
+ dim = summaries[0].dim
+ for summary in summaries[1:]:
+ if summary.dim != dim:
+ raise ValueError("all controller summaries must share the same dimension")
+ return np.vstack([summary.values for summary in summaries])
+
+
+__all__ = [
+ "ControllerSummary",
+ "ControllerSummaryBuilder",
+ "ControllerSummaryConfig",
+ "SummaryReduction",
+ "stack_summaries",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/controllers.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/controllers.py
new file mode 100644
index 0000000000..503f0c13ff
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/controllers.py
@@ -0,0 +1,38 @@
+from .control import ControllerSummary, ControllerSummaryBuilder, ControllerSummaryConfig, stack_summaries
+from .gating import PathwayGateConfig, PathwayGateController, PathwayGateState, PathwayGateValues
+from .latents import LatentCommitter, LatentObservation, LatentState
+from .modulation import HormoneModulationConfig, HormoneModulator, HormoneState
+from .predictive_surprise import PredictionState, PredictiveSurpriseConfig, PredictiveSurpriseController, SummaryMode
+from .routing import RoutingConfig, RoutingDecision, RoutingMode, SummaryRouter
+
+PredictiveController = LatentCommitter
+PredictiveObservation = LatentObservation
+PredictiveState = LatentState
+
+__all__ = [
+ "ControllerSummary",
+ "ControllerSummaryBuilder",
+ "ControllerSummaryConfig",
+ "HormoneModulationConfig",
+ "HormoneModulator",
+ "HormoneState",
+ "LatentCommitter",
+ "LatentObservation",
+ "LatentState",
+ "PathwayGateConfig",
+ "PathwayGateController",
+ "PathwayGateState",
+ "PathwayGateValues",
+ "PredictiveController",
+ "PredictiveObservation",
+ "PredictiveState",
+ "PredictionState",
+ "PredictiveSurpriseConfig",
+ "PredictiveSurpriseController",
+ "RoutingConfig",
+ "RoutingDecision",
+ "RoutingMode",
+ "SummaryMode",
+ "SummaryRouter",
+ "stack_summaries",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/datasets.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/datasets.py
new file mode 100644
index 0000000000..3bdbead804
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/datasets.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+from collections.abc import Iterable, Sequence
+from dataclasses import dataclass
+from pathlib import Path
+
+import numpy as np
+
+from .codecs import ensure_tokens
+
+
+@dataclass(frozen=True)
+class ByteSequenceDataset:
+ sequences: tuple[np.ndarray, ...]
+
+ @classmethod
+ def from_items(cls, items: Iterable[str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]]) -> "ByteSequenceDataset":
+ return cls(tuple(ensure_tokens(item) for item in items))
+
+ @classmethod
+ def from_paths(cls, paths: Iterable[str | Path], encoding: str = "utf-8") -> "ByteSequenceDataset":
+ sequences = []
+ for path in paths:
+ text = Path(path).read_text(encoding=encoding)
+ sequences.append(ensure_tokens(text))
+ return cls(tuple(sequences))
+
+ def concatenated(self, separator: bytes = b"\n") -> np.ndarray:
+ if not self.sequences:
+ return np.zeros(0, dtype=np.uint8)
+ joined = separator.join(bytes(seq.tolist()) for seq in self.sequences)
+ return np.frombuffer(joined, dtype=np.uint8).copy()
+
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/delay.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/delay.py
new file mode 100644
index 0000000000..7fd9984d23
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/delay.py
@@ -0,0 +1,53 @@
+from __future__ import annotations
+
+import numpy as np
+
+from .config import DelayLineConfig
+
+
+class DelayLineSubstrate:
+ def __init__(self, config: DelayLineConfig | None = None):
+ self.config = config or DelayLineConfig()
+ rng = np.random.default_rng(self.config.seed)
+ self._token_embeddings = rng.normal(
+ loc=0.0,
+ scale=self.config.input_scale,
+ size=(self.config.vocabulary_size, self.config.embedding_dim),
+ ).astype(np.float64)
+ self._token_embeddings /= np.sqrt(max(self.config.embedding_dim, 1))
+
+ @property
+ def state_dim(self) -> int:
+ return self.config.history_length * self.config.embedding_dim
+
+ def initial_state(self) -> np.ndarray:
+ return np.zeros(self.state_dim, dtype=np.float64)
+
+ def _coerce_token(self, token: int) -> int:
+ index = int(token)
+ if index < 0 or index >= self.config.vocabulary_size:
+ raise ValueError(
+ f"token {index} is out of range for vocabulary_size={self.config.vocabulary_size}"
+ )
+ return index
+
+ def history_view(self, state: np.ndarray) -> np.ndarray:
+ state = np.asarray(state, dtype=np.float64)
+ if state.shape != (self.state_dim,):
+ raise ValueError("state has unexpected shape")
+ return state.reshape(self.config.history_length, self.config.embedding_dim)
+
+ def step(self, state: np.ndarray, token: int) -> np.ndarray:
+ history = self.history_view(state)
+ token_index = self._coerce_token(token)
+ next_history = np.empty_like(history)
+ next_history[0] = self._token_embeddings[token_index]
+ if self.config.history_length > 1:
+ next_history[1:] = history[:-1] * self.config.decay
+ return next_history.reshape(-1)
+
+
+__all__ = [
+ "DelayLineConfig",
+ "DelayLineSubstrate",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/eval.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/eval.py
new file mode 100644
index 0000000000..66891cbf9f
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/eval.py
@@ -0,0 +1,126 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+from typing import Literal, Protocol, runtime_checkable
+
+import numpy as np
+
+from .codecs import ensure_tokens
+
+RolloutMode = Literal["teacher_forced", "closed_loop"]
+
+
+@runtime_checkable
+class SupportsSequenceScoring(Protocol):
+ def score(self, sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]): ...
+
+
+@runtime_checkable
+class SupportsRolloutGeneration(Protocol):
+ def generate(
+ self,
+ prompt: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ steps: int,
+ temperature: float = 1.0,
+ greedy: bool = False,
+ seed: int | None = None,
+ ) -> np.ndarray: ...
+
+
+@dataclass(frozen=True)
+class NextStepScore:
+ tokens: int
+ bits_per_byte: float
+
+
+@dataclass(frozen=True)
+class RolloutEvaluation:
+ mode: RolloutMode
+ prompt_tokens: np.ndarray
+ continuation_tokens: np.ndarray
+ sequence_tokens: np.ndarray
+ bits_per_byte: float
+ total_tokens: int
+
+
+def score_next_step(
+ model: SupportsSequenceScoring,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+) -> NextStepScore:
+ tokens = ensure_tokens(sequence)
+ if tokens.size < 2:
+ raise ValueError("sequence must contain at least two tokens")
+
+ report = model.score(tokens)
+ return NextStepScore(tokens=int(getattr(report, "tokens", tokens.size)), bits_per_byte=float(report.bits_per_byte))
+
+
+def evaluate_rollout(
+ model: SupportsSequenceScoring | SupportsRolloutGeneration,
+ prompt: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ continuation: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | None = None,
+ *,
+ mode: RolloutMode = "teacher_forced",
+ steps: int | None = None,
+ temperature: float = 1.0,
+ greedy: bool = False,
+ seed: int | None = None,
+) -> RolloutEvaluation:
+ prompt_tokens = ensure_tokens(prompt)
+ if prompt_tokens.size < 1:
+ raise ValueError("prompt must contain at least one token")
+
+ if mode == "teacher_forced":
+ if continuation is None:
+ raise ValueError("teacher_forced mode requires a continuation")
+ continuation_tokens = ensure_tokens(continuation)
+ sequence_tokens = np.concatenate([prompt_tokens, continuation_tokens])
+ elif mode == "closed_loop":
+ if continuation is not None:
+ continuation_tokens = ensure_tokens(continuation)
+ if steps is None:
+ steps = int(continuation_tokens.size)
+ else:
+ continuation_tokens = np.asarray([], dtype=np.uint8)
+ if steps is None:
+ raise ValueError("closed_loop mode requires steps or a continuation length")
+ if not hasattr(model, "generate"):
+ raise TypeError("closed_loop mode requires a model with generate(...)")
+ sequence_tokens = np.asarray(
+ model.generate(
+ prompt_tokens,
+ steps=steps,
+ temperature=temperature,
+ greedy=greedy,
+ seed=seed,
+ ),
+ dtype=np.uint8,
+ )
+ continuation_tokens = sequence_tokens[prompt_tokens.size :]
+ else:
+ raise ValueError(f"unknown rollout mode: {mode}")
+
+ if sequence_tokens.size < 2:
+ raise ValueError("the evaluated sequence must contain at least two tokens")
+
+ score = score_next_step(model, sequence_tokens)
+ return RolloutEvaluation(
+ mode=mode,
+ prompt_tokens=prompt_tokens,
+ continuation_tokens=continuation_tokens,
+ sequence_tokens=sequence_tokens,
+ bits_per_byte=score.bits_per_byte,
+ total_tokens=score.tokens,
+ )
+
+
+__all__ = [
+ "NextStepScore",
+ "RolloutEvaluation",
+ "RolloutMode",
+ "SupportsRolloutGeneration",
+ "SupportsSequenceScoring",
+ "evaluate_rollout",
+ "score_next_step",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/exact_context.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/exact_context.py
new file mode 100644
index 0000000000..da1dd7858f
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/exact_context.py
@@ -0,0 +1,227 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+
+import numpy as np
+
+from .codecs import ensure_tokens
+from .metrics import softmax
+
+
+@dataclass(frozen=True)
+class ExactContextConfig:
+ vocabulary_size: int = 256
+ max_order: int = 3
+ alpha: float = 0.05
+
+ def __post_init__(self) -> None:
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if self.max_order < 1:
+ raise ValueError("max_order must be >= 1")
+ if self.alpha < 0.0:
+ raise ValueError("alpha must be >= 0")
+
+
+@dataclass(frozen=True)
+class ExactContextPrediction:
+ name: str
+ order: int
+ context: tuple[int, ...]
+ probabilities: np.ndarray
+ support: float
+ total: float
+
+
+@dataclass(frozen=True)
+class ExactContextFitReport:
+ sequences: int
+ tokens: int
+ contexts_by_order: tuple[int, ...]
+
+
+@dataclass(frozen=True)
+class SupportMixConfig:
+ base_bias: float = 2.0
+ expert_bias: float = -1.0
+ support_scale: float = 0.5
+
+ def __post_init__(self) -> None:
+ if self.support_scale < 0.0:
+ raise ValueError("support_scale must be >= 0")
+
+
+@dataclass(frozen=True)
+class SupportBlend:
+ probabilities: np.ndarray
+ component_names: tuple[str, ...]
+ weights: np.ndarray
+ supports: np.ndarray
+
+
+class ExactContextMemory:
+ def __init__(self, config: ExactContextConfig | None = None):
+ self.config = config or ExactContextConfig()
+ self._unigram = np.zeros(self.config.vocabulary_size, dtype=np.float64)
+ self._tables = [dict[tuple[int, ...], np.ndarray]() for _ in range(self.config.max_order)]
+
+ @staticmethod
+ def _coerce_sequences(
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> tuple[np.ndarray, ...]:
+ if isinstance(data, (str, bytes, bytearray, memoryview, np.ndarray)):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence) and data and all(isinstance(item, int) for item in data):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence):
+ return tuple(ensure_tokens(item) for item in data)
+ return (ensure_tokens(data),)
+
+ def clear(self) -> None:
+ self._unigram.fill(0.0)
+ self._tables = [dict[tuple[int, ...], np.ndarray]() for _ in range(self.config.max_order)]
+
+ def fit(
+ self,
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> ExactContextFitReport:
+ self.clear()
+ sequences = self._coerce_sequences(data)
+ total_tokens = 0
+ for sequence in sequences:
+ tokens = ensure_tokens(sequence).astype(np.int64, copy=False)
+ if tokens.size == 0:
+ continue
+ total_tokens += int(tokens.size)
+ self._unigram += np.bincount(tokens, minlength=self.config.vocabulary_size)
+ for index in range(tokens.size):
+ target = int(tokens[index])
+ max_order = min(self.config.max_order, index)
+ for order in range(1, max_order + 1):
+ context = tuple(int(token) for token in tokens[index - order : index])
+ table = self._tables[order - 1]
+ counts = table.get(context)
+ if counts is None:
+ counts = np.zeros(self.config.vocabulary_size, dtype=np.float64)
+ table[context] = counts
+ counts[target] += 1.0
+ contexts_by_order = tuple(len(table) for table in self._tables)
+ return ExactContextFitReport(
+ sequences=len(sequences),
+ tokens=total_tokens,
+ contexts_by_order=contexts_by_order,
+ )
+
+ def _normalize(self, distribution: np.ndarray) -> np.ndarray:
+ clipped = np.clip(np.asarray(distribution, dtype=np.float64), 1e-12, None)
+ total = float(np.sum(clipped))
+ if total <= 0.0:
+ return np.full(self.config.vocabulary_size, 1.0 / self.config.vocabulary_size, dtype=np.float64)
+ return clipped / total
+
+ def _smooth_counts(self, counts: np.ndarray) -> np.ndarray:
+ alpha = self.config.alpha
+ total = float(np.sum(counts))
+ if total == 0.0 and alpha == 0.0:
+ return np.full(self.config.vocabulary_size, 1.0 / self.config.vocabulary_size, dtype=np.float64)
+ probs = (counts + (alpha / self.config.vocabulary_size)) / (total + alpha)
+ return self._normalize(probs)
+
+ def unigram_probabilities(self) -> np.ndarray:
+ return self._smooth_counts(self._unigram)
+
+ def experts(
+ self,
+ context: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ ) -> tuple[ExactContextPrediction, ...]:
+ tokens = ensure_tokens(context).astype(np.int64, copy=False)
+ predictions: list[ExactContextPrediction] = []
+ max_order = min(self.config.max_order, int(tokens.size))
+ for order in range(1, max_order + 1):
+ key = tuple(int(token) for token in tokens[-order:])
+ counts = self._tables[order - 1].get(key)
+ if counts is None:
+ counts = np.zeros(self.config.vocabulary_size, dtype=np.float64)
+ total = float(np.sum(counts))
+ predictions.append(
+ ExactContextPrediction(
+ name=f"exact{order}",
+ order=order,
+ context=key,
+ probabilities=self._smooth_counts(counts),
+ support=float(np.log1p(total)),
+ total=total,
+ )
+ )
+ return tuple(predictions)
+
+ def predictive_distribution(
+ self,
+ context: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ ) -> np.ndarray:
+ predictions = self.experts(context)
+ for prediction in reversed(predictions):
+ if prediction.total > 0.0:
+ return prediction.probabilities
+ return self.unigram_probabilities()
+
+
+class SupportWeightedMixer:
+ def __init__(self, config: SupportMixConfig | None = None):
+ self.config = config or SupportMixConfig()
+
+ @staticmethod
+ def _normalize(probabilities: np.ndarray) -> np.ndarray:
+ clipped = np.clip(np.asarray(probabilities, dtype=np.float64), 1e-12, None)
+ return clipped / np.sum(clipped)
+
+ def mix(
+ self,
+ *,
+ base_probs: np.ndarray | None = None,
+ experts: Sequence[ExactContextPrediction] = (),
+ base_name: str = "base",
+ base_support: float = 1.0,
+ ) -> SupportBlend:
+ component_names: list[str] = []
+ component_probs: list[np.ndarray] = []
+ supports: list[float] = []
+ biases: list[float] = []
+
+ if base_probs is not None:
+ component_names.append(base_name)
+ component_probs.append(self._normalize(base_probs))
+ supports.append(float(base_support))
+ biases.append(self.config.base_bias)
+
+ for expert in experts:
+ component_names.append(expert.name)
+ component_probs.append(self._normalize(expert.probabilities))
+ supports.append(float(expert.support))
+ biases.append(self.config.expert_bias)
+
+ if not component_probs:
+ raise ValueError("mix requires at least one component")
+
+ logits = np.asarray(biases, dtype=np.float64) + self.config.support_scale * np.asarray(supports, dtype=np.float64)
+ weights = softmax(logits[None, :], axis=-1)[0]
+ stacked = np.stack(component_probs, axis=0)
+ mixed = np.sum(weights[:, None] * stacked, axis=0)
+ return SupportBlend(
+ probabilities=self._normalize(mixed),
+ component_names=tuple(component_names),
+ weights=weights,
+ supports=np.asarray(supports, dtype=np.float64),
+ )
+
+
+__all__ = [
+ "ExactContextConfig",
+ "ExactContextFitReport",
+ "ExactContextMemory",
+ "ExactContextPrediction",
+ "SupportBlend",
+ "SupportMixConfig",
+ "SupportWeightedMixer",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/experts.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/experts.py
new file mode 100644
index 0000000000..14b1779a7f
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/experts.py
@@ -0,0 +1,129 @@
+from __future__ import annotations
+
+from collections.abc import Callable, Sequence
+from dataclasses import dataclass
+
+import numpy as np
+
+from .codecs import ensure_tokens
+from .metrics import bits_per_byte_from_probabilities
+from .readout import RidgeReadout
+from .substrates import TokenSubstrate
+
+
+@dataclass(frozen=True)
+class ExpertFitReport:
+ sequences: int
+ tokens: int
+ bits_per_byte: float
+
+
+@dataclass(frozen=True)
+class ExpertScore:
+ tokens: int
+ bits_per_byte: float
+
+
+class FrozenReadoutExpert:
+ def __init__(
+ self,
+ *,
+ name: str,
+ substrate: TokenSubstrate,
+ feature_dim: int,
+ vocabulary_size: int,
+ feature_fn: Callable[[np.ndarray, np.ndarray | None], np.ndarray],
+ alpha: float = 1e-3,
+ ):
+ self.name = name
+ self.substrate = substrate
+ self.feature_fn = feature_fn
+ self.vocabulary_size = vocabulary_size
+ self.readout = RidgeReadout(
+ input_dim=feature_dim,
+ output_dim=vocabulary_size,
+ alpha=alpha,
+ )
+
+ @staticmethod
+ def _coerce_sequences(
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> tuple[np.ndarray, ...]:
+ if isinstance(data, (str, bytes, bytearray, memoryview, np.ndarray)):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence) and data and all(isinstance(item, int) for item in data):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence):
+ return tuple(ensure_tokens(item) for item in data)
+ return (ensure_tokens(data),)
+
+ def _trace(self, sequence: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+ tokens = ensure_tokens(sequence)
+ if tokens.size < 2:
+ raise ValueError("sequence must contain at least two tokens")
+ state = self.substrate.initial_state()
+ features: list[np.ndarray] = []
+ for token in tokens[:-1]:
+ previous_state = state.copy()
+ state = self.substrate.step(state, int(token))
+ features.append(self.feature_fn(state, previous_state))
+ return np.vstack(features), tokens[1:].astype(np.int64, copy=False)
+
+ def fit(
+ self,
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> ExpertFitReport:
+ sequences = self._coerce_sequences(data)
+ feature_batches = []
+ target_batches = []
+ total_tokens = 0
+ for sequence in sequences:
+ features, targets = self._trace(sequence)
+ feature_batches.append(features)
+ target_batches.append(targets)
+ total_tokens += int(ensure_tokens(sequence).size)
+ design = np.concatenate(feature_batches, axis=0)
+ labels = np.concatenate(target_batches, axis=0)
+ self.readout.fit(design, labels)
+ return ExpertFitReport(
+ sequences=len(sequences),
+ tokens=total_tokens,
+ bits_per_byte=bits_per_byte_from_probabilities(self.readout.probabilities(design), labels),
+ )
+
+ def sequence_probabilities(
+ self,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ ) -> tuple[np.ndarray, np.ndarray]:
+ features, targets = self._trace(ensure_tokens(sequence))
+ return self.readout.probabilities(features), targets
+
+ def score(
+ self,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ ) -> ExpertScore:
+ probabilities, targets = self.sequence_probabilities(sequence)
+ return ExpertScore(
+ tokens=int(targets.size + 1),
+ bits_per_byte=bits_per_byte_from_probabilities(probabilities, targets),
+ )
+
+ def predict_proba(self, prompt: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int]) -> np.ndarray:
+ tokens = ensure_tokens(prompt)
+ if tokens.size < 1:
+ raise ValueError("prompt must contain at least one token")
+ state = self.substrate.initial_state()
+ feature = None
+ for token in tokens:
+ previous_state = state.copy()
+ state = self.substrate.step(state, int(token))
+ feature = self.feature_fn(state, previous_state)
+ assert feature is not None
+ return self.readout.probabilities(feature[None, :])[0]
+
+
+__all__ = [
+ "ExpertFitReport",
+ "ExpertScore",
+ "FrozenReadoutExpert",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/factories.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/factories.py
new file mode 100644
index 0000000000..0bcd21b69c
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/factories.py
@@ -0,0 +1,106 @@
+from __future__ import annotations
+
+from dataclasses import replace
+
+from .config import (
+ DelayLineConfig,
+ HierarchicalSubstrateConfig,
+ MixedMemoryConfig,
+ OpenPredictiveCoderConfig,
+ OscillatoryMemoryConfig,
+ ReservoirConfig,
+ SubstrateKind,
+)
+from .delay import DelayLineSubstrate
+from .hierarchical import HierarchicalSubstrate
+from .mixed_memory import MixedMemorySubstrate
+from .oscillatory_memory import OscillatoryMemorySubstrate
+from .reservoir import EchoStateReservoir
+from .substrates import TokenSubstrate
+
+
+def create_echo_state_substrate(
+ config: ReservoirConfig | None = None,
+ *,
+ vocabulary_size: int = 256,
+) -> EchoStateReservoir:
+ return EchoStateReservoir(
+ config=config or ReservoirConfig(),
+ vocabulary_size=vocabulary_size,
+ )
+
+
+def create_delay_line_substrate(config: DelayLineConfig | None = None) -> DelayLineSubstrate:
+ return DelayLineSubstrate(config=config)
+
+
+def create_oscillatory_memory_substrate(
+ config: OscillatoryMemoryConfig | None = None,
+) -> OscillatoryMemorySubstrate:
+ return OscillatoryMemorySubstrate(config=config)
+
+
+def create_mixed_memory_substrate(config: MixedMemoryConfig | None = None) -> MixedMemorySubstrate:
+ return MixedMemorySubstrate(config=config)
+
+
+def create_hierarchical_substrate(
+ config: HierarchicalSubstrateConfig | None = None,
+) -> HierarchicalSubstrate:
+ return HierarchicalSubstrate(config=config)
+
+
+def create_substrate_for_model(config: OpenPredictiveCoderConfig) -> TokenSubstrate:
+ if config.substrate_kind == "echo_state":
+ return create_echo_state_substrate(
+ config.reservoir,
+ vocabulary_size=config.vocabulary_size,
+ )
+ if config.substrate_kind == "delay":
+ return create_delay_line_substrate(
+ replace(config.delay, vocabulary_size=config.vocabulary_size),
+ )
+ if config.substrate_kind == "oscillatory":
+ return create_oscillatory_memory_substrate(
+ replace(config.oscillatory, vocabulary_size=config.vocabulary_size),
+ )
+ if config.substrate_kind == "mixed_memory":
+ return create_mixed_memory_substrate(
+ replace(
+ config.mixed_memory,
+ delay=replace(config.mixed_memory.delay, vocabulary_size=config.vocabulary_size),
+ )
+ )
+ if config.substrate_kind == "hierarchical":
+ return create_hierarchical_substrate(
+ replace(config.hierarchical, vocabulary_size=config.vocabulary_size),
+ )
+ raise ValueError(f"Unknown substrate_kind: {config.substrate_kind}")
+
+
+def create_substrate(config: object | None = None) -> TokenSubstrate:
+ if config is None or isinstance(config, ReservoirConfig):
+ return create_echo_state_substrate(config)
+ if isinstance(config, DelayLineConfig):
+ return create_delay_line_substrate(config)
+ if isinstance(config, OscillatoryMemoryConfig):
+ return create_oscillatory_memory_substrate(config)
+ if isinstance(config, MixedMemoryConfig):
+ return create_mixed_memory_substrate(config)
+ if isinstance(config, HierarchicalSubstrateConfig):
+ return create_hierarchical_substrate(config)
+ if isinstance(config, OpenPredictiveCoderConfig):
+ return create_substrate_for_model(config)
+ raise TypeError(f"Unsupported substrate config: {type(config)!r}")
+
+
+__all__ = [
+ "SubstrateKind",
+ "create_delay_line_substrate",
+ "create_echo_state_substrate",
+ "create_hierarchical_substrate",
+ "create_mixed_memory_substrate",
+ "create_oscillatory_memory_substrate",
+ "create_substrate",
+ "create_substrate_for_model",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/gating.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/gating.py
new file mode 100644
index 0000000000..fa712da387
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/gating.py
@@ -0,0 +1,142 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass, field
+
+import numpy as np
+
+from .control import ControllerSummary, ControllerSummaryBuilder, ControllerSummaryConfig
+
+def _sigmoid(value: float) -> float:
+ return float(1.0 / (1.0 + np.exp(-value)))
+
+
+@dataclass(frozen=True)
+class PathwayGateConfig:
+ refresh_stride: int = 1
+ summary: ControllerSummaryConfig = field(default_factory=ControllerSummaryConfig)
+ fast_to_mid_index: int = 0
+ mid_to_slow_index: int = 0
+ fast_to_mid_bias: float = 0.0
+ fast_to_mid_scale: float = 1.0
+ mid_to_slow_bias: float = 0.0
+ mid_to_slow_scale: float = 1.0
+
+ def __post_init__(self) -> None:
+ if self.refresh_stride < 1:
+ raise ValueError("refresh_stride must be >= 1")
+ if self.fast_to_mid_index < 0:
+ raise ValueError("fast_to_mid_index must be >= 0")
+ if self.mid_to_slow_index < 0:
+ raise ValueError("mid_to_slow_index must be >= 0")
+ if self.fast_to_mid_scale < 0.0:
+ raise ValueError("fast_to_mid_scale must be >= 0")
+ if self.mid_to_slow_scale < 0.0:
+ raise ValueError("mid_to_slow_scale must be >= 0")
+
+
+@dataclass(frozen=True)
+class PathwayGateValues:
+ fast_to_mid: float
+ mid_to_slow: float
+ step: int
+ refreshed: bool
+ summary_name: str | None = None
+
+ def __post_init__(self) -> None:
+ fast_to_mid = float(np.clip(self.fast_to_mid, 0.0, 1.0))
+ mid_to_slow = float(np.clip(self.mid_to_slow, 0.0, 1.0))
+ object.__setattr__(self, "fast_to_mid", fast_to_mid)
+ object.__setattr__(self, "mid_to_slow", mid_to_slow)
+
+
+@dataclass(frozen=True)
+class PathwayGateState:
+ last_refresh_step: int
+ values: PathwayGateValues
+
+
+class PathwayGateController:
+ def __init__(
+ self,
+ config: PathwayGateConfig | None = None,
+ *,
+ summary_builder: ControllerSummaryBuilder | None = None,
+ ):
+ self.config = config or PathwayGateConfig()
+ self.summary_builder = summary_builder or ControllerSummaryBuilder(self.config.summary)
+
+ def initial_state(self) -> PathwayGateState:
+ return PathwayGateState(
+ last_refresh_step=-self.config.refresh_stride,
+ values=PathwayGateValues(
+ fast_to_mid=0.0,
+ mid_to_slow=0.0,
+ step=-1,
+ refreshed=False,
+ summary_name=None,
+ ),
+ )
+
+ def _coerce_summary(
+ self,
+ summary: ControllerSummary | float | Sequence[float] | np.ndarray,
+ *,
+ name: str | None = None,
+ ) -> ControllerSummary:
+ if isinstance(summary, ControllerSummary):
+ return summary
+ return self.summary_builder.encode(summary, name=name)
+
+ def _summary_signals(self, summary: ControllerSummary) -> tuple[float, float]:
+ values = summary.values
+ max_index = max(self.config.fast_to_mid_index, self.config.mid_to_slow_index)
+ if max_index >= values.size:
+ raise ValueError("summary does not contain enough dimensions for the configured gate indices")
+ return (
+ float(values[self.config.fast_to_mid_index]),
+ float(values[self.config.mid_to_slow_index]),
+ )
+
+ def _compute_values(self, summary: ControllerSummary, step: int, refreshed: bool) -> PathwayGateValues:
+ fast_signal, slow_signal = self._summary_signals(summary)
+ fast_to_mid = _sigmoid(self.config.fast_to_mid_bias + self.config.fast_to_mid_scale * fast_signal)
+ mid_to_slow = _sigmoid(self.config.mid_to_slow_bias + self.config.mid_to_slow_scale * slow_signal)
+ return PathwayGateValues(
+ fast_to_mid=fast_to_mid,
+ mid_to_slow=mid_to_slow,
+ step=step,
+ refreshed=refreshed,
+ summary_name=summary.name,
+ )
+
+ def advance(
+ self,
+ state: PathwayGateState,
+ summary: ControllerSummary | float | Sequence[float] | np.ndarray,
+ *,
+ step: int,
+ name: str | None = None,
+ ) -> PathwayGateState:
+ controller_summary = self._coerce_summary(summary, name=name)
+ should_refresh = (step - state.last_refresh_step) >= self.config.refresh_stride
+ if should_refresh:
+ values = self._compute_values(controller_summary, step, refreshed=True)
+ return PathwayGateState(last_refresh_step=step, values=values)
+
+ values = PathwayGateValues(
+ fast_to_mid=state.values.fast_to_mid,
+ mid_to_slow=state.values.mid_to_slow,
+ step=step,
+ refreshed=False,
+ summary_name=controller_summary.name,
+ )
+ return PathwayGateState(last_refresh_step=state.last_refresh_step, values=values)
+
+
+__all__ = [
+ "PathwayGateConfig",
+ "PathwayGateController",
+ "PathwayGateState",
+ "PathwayGateValues",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/hierarchical.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/hierarchical.py
new file mode 100644
index 0000000000..38e7e65563
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/hierarchical.py
@@ -0,0 +1,161 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+
+from .config import HierarchicalSubstrateConfig
+
+
+def _spectral_radius(matrix: np.ndarray) -> float:
+ values = np.linalg.eigvals(matrix)
+ return float(np.max(np.abs(values)))
+
+
+def _make_recurrent_matrix(
+ rng: np.random.Generator,
+ size: int,
+ connectivity: float,
+ target_radius: float,
+) -> np.ndarray:
+ weights = rng.standard_normal((size, size))
+ mask = (rng.random((size, size)) < connectivity).astype(np.float64)
+ np.fill_diagonal(mask, 0.0)
+ weights = weights * mask
+ current = _spectral_radius(weights)
+ if current == 0.0:
+ return weights
+ return weights * (target_radius / current)
+
+
+def _make_input_matrix(rng: np.random.Generator, rows: int, cols: int, scale: float) -> np.ndarray:
+ weights = rng.normal(loc=0.0, scale=scale, size=(rows, cols))
+ return weights / np.sqrt(max(cols, 1))
+
+
+@dataclass(frozen=True)
+class HierarchicalStateSlices:
+ fast: slice
+ mid: slice
+ slow: slice
+
+
+class HierarchicalSubstrate:
+ def __init__(self, config: HierarchicalSubstrateConfig | None = None):
+ self.config = config or HierarchicalSubstrateConfig()
+ self._step_index = 0
+ rng = np.random.default_rng(self.config.seed)
+
+ self.fast_recurrent = _make_recurrent_matrix(
+ rng,
+ size=self.config.fast_size,
+ connectivity=self.config.fast_connectivity,
+ target_radius=self.config.fast_spectral_radius,
+ )
+ self.mid_recurrent = _make_recurrent_matrix(
+ rng,
+ size=self.config.mid_size,
+ connectivity=self.config.mid_connectivity,
+ target_radius=self.config.mid_spectral_radius,
+ )
+ self.slow_recurrent = _make_recurrent_matrix(
+ rng,
+ size=self.config.slow_size,
+ connectivity=self.config.slow_connectivity,
+ target_radius=self.config.slow_spectral_radius,
+ )
+
+ self.fast_input = _make_input_matrix(
+ rng,
+ rows=self.config.fast_size,
+ cols=self.config.vocabulary_size,
+ scale=self.config.input_scale,
+ )
+ self.mid_input = _make_input_matrix(
+ rng,
+ rows=self.config.mid_size,
+ cols=self.config.vocabulary_size,
+ scale=self.config.input_scale,
+ )
+ self.slow_input = _make_input_matrix(
+ rng,
+ rows=self.config.slow_size,
+ cols=self.config.vocabulary_size,
+ scale=self.config.input_scale,
+ )
+
+ self.fast_up = _make_input_matrix(
+ rng,
+ rows=self.config.mid_size,
+ cols=self.config.fast_size,
+ scale=self.config.upward_scale,
+ )
+ self.mid_up = _make_input_matrix(
+ rng,
+ rows=self.config.slow_size,
+ cols=self.config.mid_size,
+ scale=self.config.upward_scale,
+ )
+
+ self._fast_slice = slice(0, self.config.fast_size)
+ self._mid_slice = slice(self.config.fast_size, self.config.fast_size + self.config.mid_size)
+ self._slow_slice = slice(
+ self.config.fast_size + self.config.mid_size,
+ self.config.fast_size + self.config.mid_size + self.config.slow_size,
+ )
+
+ @property
+ def state_dim(self) -> int:
+ return self.config.fast_size + self.config.mid_size + self.config.slow_size
+
+ @property
+ def state_slices(self) -> HierarchicalStateSlices:
+ return HierarchicalStateSlices(fast=self._fast_slice, mid=self._mid_slice, slow=self._slow_slice)
+
+ def initial_state(self) -> np.ndarray:
+ self._step_index = 0
+ return np.zeros(self.state_dim, dtype=np.float64)
+
+ def _split_state(self, state: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+ state = np.asarray(state, dtype=np.float64)
+ if state.ndim != 1:
+ raise ValueError("state must be rank-1")
+ if state.shape[0] != self.state_dim:
+ raise ValueError("state does not match configured state_dim")
+ return state[self._fast_slice], state[self._mid_slice], state[self._slow_slice]
+
+ def _coerce_token(self, token: int) -> int:
+ token_id = int(token)
+ if token_id < 0 or token_id >= self.config.vocabulary_size:
+ raise ValueError("token is out of range for the configured vocabulary_size")
+ return token_id
+
+ def step(self, state: np.ndarray, token: int) -> np.ndarray:
+ fast_state, mid_state, slow_state = self._split_state(state)
+ token_id = self._coerce_token(token)
+
+ fast_drive = self.fast_recurrent @ fast_state + self.fast_input[:, token_id]
+ next_fast = (1.0 - self.config.fast_leak) * fast_state + self.config.fast_leak * np.tanh(fast_drive)
+
+ mid_drive = self.mid_recurrent @ mid_state + self.mid_input[:, token_id] + self.fast_up @ next_fast
+ next_mid = (1.0 - self.config.mid_leak) * mid_state + self.config.mid_leak * np.tanh(mid_drive)
+
+ slow_active = self.config.slow_update_stride == 1 or (
+ (self._step_index + 1) % self.config.slow_update_stride == 0
+ )
+
+ if slow_active:
+ slow_drive = self.slow_recurrent @ slow_state + self.slow_input[:, token_id] + self.mid_up @ next_mid
+ next_slow = (1.0 - self.config.slow_leak) * slow_state + self.config.slow_leak * np.tanh(slow_drive)
+ else:
+ next_slow = slow_state.copy()
+
+ self._step_index += 1
+ return np.concatenate([next_fast, next_mid, next_slow])
+
+
+__all__ = [
+ "HierarchicalStateSlices",
+ "HierarchicalSubstrate",
+ "HierarchicalSubstrateConfig",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/hierarchical_views.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/hierarchical_views.py
new file mode 100644
index 0000000000..2ad104d6bd
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/hierarchical_views.py
@@ -0,0 +1,153 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+
+from .config import HierarchicalSubstrateConfig
+from .hierarchical import HierarchicalStateSlices
+
+
+@dataclass(frozen=True)
+class HierarchicalSummary:
+ fast_mean: np.ndarray
+ mid_mean: np.ndarray
+ slow_mean: np.ndarray
+ fast_energy: float
+ mid_energy: float
+ slow_energy: float
+
+
+class HierarchicalFeatureView:
+ def __init__(self, config: HierarchicalSubstrateConfig):
+ self.config = config
+ self._fast_slice = slice(0, config.fast_size)
+ self._mid_slice = slice(config.fast_size, config.fast_size + config.mid_size)
+ self._slow_slice = slice(
+ config.fast_size + config.mid_size,
+ config.fast_size + config.mid_size + config.slow_size,
+ )
+
+ @property
+ def state_dim(self) -> int:
+ return self.config.state_dim
+
+ @property
+ def predictive_dim(self) -> int:
+ return (
+ self.config.state_dim
+ + min(self.config.fast_size, self.config.mid_size)
+ + min(self.config.mid_size, self.config.slow_size)
+ + 3
+ + 6
+ )
+
+ @property
+ def feature_dim(self) -> int:
+ return 3 + 3 + self.predictive_dim
+
+ @property
+ def bank_slices(self) -> HierarchicalStateSlices:
+ return HierarchicalStateSlices(fast=self._fast_slice, mid=self._mid_slice, slow=self._slow_slice)
+
+ def split(self, state: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+ state = np.asarray(state, dtype=np.float64)
+ if state.ndim != 1:
+ raise ValueError("state must be rank-1")
+ if state.shape[0] != self.state_dim:
+ raise ValueError("state does not match configured state_dim")
+ return state[self._fast_slice], state[self._mid_slice], state[self._slow_slice]
+
+ def pooled_summary(self, state: np.ndarray) -> HierarchicalSummary:
+ fast, mid, slow = self.split(state)
+ return HierarchicalSummary(
+ fast_mean=np.array([float(np.mean(fast))], dtype=np.float64),
+ mid_mean=np.array([float(np.mean(mid))], dtype=np.float64),
+ slow_mean=np.array([float(np.mean(slow))], dtype=np.float64),
+ fast_energy=float(np.mean(np.square(fast))),
+ mid_energy=float(np.mean(np.square(mid))),
+ slow_energy=float(np.mean(np.square(slow))),
+ )
+
+ @staticmethod
+ def _aligned_delta(left: np.ndarray, right: np.ndarray) -> np.ndarray:
+ width = min(left.shape[0], right.shape[0])
+ if width == 0:
+ return np.zeros((0,), dtype=np.float64)
+ return left[:width] - right[:width]
+
+ def predictive_features(
+ self,
+ state: np.ndarray,
+ previous_state: np.ndarray | None = None,
+ ) -> np.ndarray:
+ fast, mid, slow = self.split(state)
+ summary = self.pooled_summary(state)
+
+ if previous_state is None:
+ prev_fast = np.zeros_like(fast)
+ prev_mid = np.zeros_like(mid)
+ prev_slow = np.zeros_like(slow)
+ else:
+ prev_fast, prev_mid, prev_slow = self.split(previous_state)
+
+ fast_delta = fast - prev_fast
+ mid_delta = mid - prev_mid
+ slow_delta = slow - prev_slow
+
+ fast_mid_surprise = self._aligned_delta(fast, self.config.fast_leak * np.tanh(mid))
+ mid_slow_surprise = self._aligned_delta(mid, self.config.mid_leak * np.tanh(slow))
+
+ return np.concatenate(
+ [
+ fast_delta,
+ mid_delta,
+ slow_delta,
+ fast_mid_surprise,
+ mid_slow_surprise,
+ summary.fast_mean,
+ summary.mid_mean,
+ summary.slow_mean,
+ np.array(
+ [
+ summary.fast_energy,
+ summary.mid_energy,
+ summary.slow_energy,
+ float(np.mean(np.abs(fast_delta))),
+ float(np.mean(np.abs(mid_delta))),
+ float(np.mean(np.abs(slow_delta))),
+ ],
+ dtype=np.float64,
+ ),
+ ]
+ )
+
+ def encode(
+ self,
+ state: np.ndarray,
+ previous_state: np.ndarray | None = None,
+ ) -> np.ndarray:
+ summary = self.pooled_summary(state)
+ predictive = self.predictive_features(state, previous_state=previous_state)
+ return np.concatenate(
+ [
+ summary.fast_mean,
+ summary.mid_mean,
+ summary.slow_mean,
+ np.array(
+ [
+ summary.fast_energy,
+ summary.mid_energy,
+ summary.slow_energy,
+ ],
+ dtype=np.float64,
+ ),
+ predictive,
+ ]
+ )
+
+
+__all__ = [
+ "HierarchicalFeatureView",
+ "HierarchicalSummary",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/latents.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/latents.py
new file mode 100644
index 0000000000..a0d557d1d4
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/latents.py
@@ -0,0 +1,154 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+
+from .config import LatentConfig
+from .patching import AdaptiveSegmenter
+from .reservoir import spectral_radius
+
+
+def _normalized_matrix(rng: np.random.Generator, rows: int, cols: int, scale: float = 1.0) -> np.ndarray:
+ matrix = rng.normal(loc=0.0, scale=1.0, size=(rows, cols))
+ return (matrix / np.sqrt(max(cols, 1))) * scale
+
+
+def _scaled_square(rng: np.random.Generator, size: int, radius: float) -> np.ndarray:
+ matrix = rng.normal(loc=0.0, scale=1.0, size=(size, size))
+ matrix = matrix / np.sqrt(max(size, 1))
+ current = spectral_radius(matrix)
+ if current == 0.0:
+ return matrix
+ return matrix * (radius / current)
+
+
+@dataclass
+class LatentState:
+ global_state: np.ndarray
+ previous_view: np.ndarray | None
+ patch_sum: np.ndarray
+ patch_length: int
+ last_latent: np.ndarray
+ steps: int
+ patches: int
+
+
+@dataclass(frozen=True)
+class LatentObservation:
+ local_view: np.ndarray
+ predicted_view: np.ndarray
+ prediction_error: np.ndarray
+ patch_summary: np.ndarray
+ global_state: np.ndarray
+ latent: np.ndarray
+ novelty: float
+ patch_length: int
+ boundary: bool
+
+
+class LatentCommitter:
+ def __init__(self, config: LatentConfig, substrate_size: int, seed: int):
+ if config.reservoir_features > substrate_size:
+ raise ValueError("config.reservoir_features must be <= substrate_size")
+
+ self.config = config
+ rng = np.random.default_rng(seed)
+ self.sample_indices = np.sort(
+ rng.choice(
+ substrate_size,
+ size=config.reservoir_features,
+ replace=False,
+ )
+ )
+ self.commit_projection = _normalized_matrix(
+ rng,
+ rows=config.latent_dim,
+ cols=config.reservoir_features,
+ scale=config.bridge_scale,
+ )
+ self.global_recurrent = _scaled_square(rng, config.global_dim, radius=0.9)
+ self.global_input = _normalized_matrix(
+ rng,
+ rows=config.global_dim,
+ cols=config.latent_dim,
+ scale=config.global_update_scale,
+ )
+ self.local_predictor = _normalized_matrix(
+ rng,
+ rows=config.reservoir_features,
+ cols=config.global_dim,
+ scale=1.0,
+ )
+
+ def initial_state(self) -> LatentState:
+ return LatentState(
+ global_state=np.zeros(self.config.global_dim, dtype=np.float64),
+ previous_view=None,
+ patch_sum=np.zeros(self.config.reservoir_features, dtype=np.float64),
+ patch_length=0,
+ last_latent=np.zeros(self.config.latent_dim, dtype=np.float64),
+ steps=0,
+ patches=0,
+ )
+
+ def sample(self, substrate_state: np.ndarray) -> np.ndarray:
+ substrate_state = np.asarray(substrate_state, dtype=np.float64)
+ if substrate_state.ndim != 1:
+ raise ValueError("substrate_state must be rank-1")
+ return substrate_state[self.sample_indices]
+
+ def step(
+ self,
+ state: LatentState,
+ local_view: np.ndarray,
+ segmenter: AdaptiveSegmenter,
+ ) -> LatentObservation:
+ local_view = np.asarray(local_view, dtype=np.float64)
+ if local_view.shape != (self.config.reservoir_features,):
+ raise ValueError("local_view does not match configured reservoir_features")
+
+ if state.previous_view is None:
+ novelty = 0.0
+ else:
+ novelty = float(np.mean(np.abs(local_view - state.previous_view)))
+
+ state.patch_sum = state.patch_sum + local_view
+ state.patch_length += 1
+ patch_summary = state.patch_sum / state.patch_length
+ boundary = segmenter.should_commit(state.patch_length, novelty)
+
+ if boundary:
+ latent = np.tanh(self.commit_projection @ patch_summary)
+ state.global_state = np.tanh((self.global_recurrent @ state.global_state) + (self.global_input @ latent))
+ state.last_latent = latent
+ state.patches += 1
+
+ predicted_view = np.tanh(self.local_predictor @ state.global_state)
+ prediction_error = local_view - predicted_view
+ observation = LatentObservation(
+ local_view=local_view.copy(),
+ predicted_view=predicted_view.copy(),
+ prediction_error=prediction_error.copy(),
+ patch_summary=patch_summary.copy(),
+ global_state=state.global_state.copy(),
+ latent=state.last_latent.copy(),
+ novelty=novelty,
+ patch_length=state.patch_length,
+ boundary=boundary,
+ )
+
+ if boundary:
+ state.patch_sum = np.zeros_like(state.patch_sum)
+ state.patch_length = 0
+
+ state.previous_view = local_view.copy()
+ state.steps += 1
+ return observation
+
+
+__all__ = [
+ "LatentCommitter",
+ "LatentObservation",
+ "LatentState",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/learned_segmentation.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/learned_segmentation.py
new file mode 100644
index 0000000000..c2acb96b6d
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/learned_segmentation.py
@@ -0,0 +1,411 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from collections.abc import Sequence
+
+import numpy as np
+
+
+def _sigmoid(values: np.ndarray) -> np.ndarray:
+ clipped = np.clip(np.asarray(values, dtype=np.float64), -60.0, 60.0)
+ return 1.0 / (1.0 + np.exp(-clipped))
+
+
+def _coerce_feature_vector(features: BoundaryFeatures | Sequence[float] | np.ndarray, *, feature_dim: int) -> np.ndarray:
+ if isinstance(features, BoundaryFeatures):
+ vector = features.as_array()
+ else:
+ vector = np.asarray(features, dtype=np.float64)
+ if vector.ndim != 1:
+ raise ValueError("features must be a 1D vector")
+ if vector.shape[0] != feature_dim:
+ raise ValueError(f"features must have shape ({feature_dim},)")
+ return vector
+
+
+def _coerce_feature_matrix(
+ features: Sequence[BoundaryFeatures | Sequence[float] | np.ndarray] | np.ndarray,
+ *,
+ feature_dim: int,
+) -> np.ndarray:
+ if isinstance(features, np.ndarray):
+ matrix = np.asarray(features, dtype=np.float64)
+ if matrix.ndim == 1:
+ matrix = matrix[None, :]
+ else:
+ rows = [_coerce_feature_vector(row, feature_dim=feature_dim) for row in features]
+ matrix = np.vstack(rows)
+ if matrix.ndim != 2:
+ raise ValueError("features must be a 2D matrix or a sequence of vectors")
+ if matrix.shape[1] != feature_dim:
+ raise ValueError(f"features must have shape (n, {feature_dim})")
+ return matrix
+
+
+def _resolve_target_rate(config: BoundaryScorerConfig, override: float | None = None) -> float | None:
+ if override is not None:
+ return override
+ if config.target_boundary_rate is not None:
+ return config.target_boundary_rate
+ if config.target_patch_size is not None:
+ return 1.0 / float(config.target_patch_size)
+ return None
+
+
+def _update_mean(current_mean: float, count: int, value: float) -> float:
+ return current_mean + ((value - current_mean) / float(count))
+
+
+@dataclass(frozen=True)
+class BoundaryScorerConfig:
+ feature_dim: int = 5
+ learning_rate: float = 0.1
+ l2: float = 1e-3
+ threshold: float = 0.5
+ min_patch_size: int = 2
+ max_patch_size: int = 8
+ target_patch_size: float | None = None
+ target_boundary_rate: float | None = None
+ target_regularization: float = 0.25
+ initial_bias: float = -1.0
+
+ def __post_init__(self) -> None:
+ if self.feature_dim < 1:
+ raise ValueError("feature_dim must be >= 1")
+ if self.learning_rate <= 0.0:
+ raise ValueError("learning_rate must be > 0")
+ if self.l2 < 0.0:
+ raise ValueError("l2 must be >= 0")
+ if not 0.0 <= self.threshold <= 1.0:
+ raise ValueError("threshold must be in [0, 1]")
+ if self.min_patch_size < 1:
+ raise ValueError("min_patch_size must be >= 1")
+ if self.max_patch_size < self.min_patch_size:
+ raise ValueError("max_patch_size must be >= min_patch_size")
+ if self.target_patch_size is not None and self.target_patch_size <= 0.0:
+ raise ValueError("target_patch_size must be > 0")
+ if self.target_boundary_rate is not None and not 0.0 < self.target_boundary_rate <= 1.0:
+ raise ValueError("target_boundary_rate must be in (0, 1]")
+ if self.target_regularization < 0.0:
+ raise ValueError("target_regularization must be >= 0")
+
+ @property
+ def commit_threshold(self) -> float:
+ return self.threshold
+
+
+@dataclass
+class BoundaryScorerState:
+ weights: np.ndarray
+ bias: float
+ steps_seen: int = 0
+ boundaries_seen: int = 0
+ patches_seen: int = 0
+ current_patch_length: int = 0
+ mean_probability: float = 0.0
+ mean_patch_length: float = 0.0
+ mean_target_rate: float = 0.0
+ last_probability: float = 0.0
+ last_logit: float = 0.0
+
+ def __post_init__(self) -> None:
+ self.weights = np.asarray(self.weights, dtype=np.float64)
+ if self.weights.ndim != 1:
+ raise ValueError("weights must be a 1D vector")
+ if not np.isfinite(self.bias):
+ raise ValueError("bias must be finite")
+
+
+@dataclass(frozen=True)
+class BoundaryFeatures:
+ bias: float = 1.0
+ novelty: float = 0.0
+ drift: float = 0.0
+ patch_progress: float = 0.0
+ patch_utilization: float = 0.0
+
+ def as_array(self) -> np.ndarray:
+ return np.asarray(
+ [
+ float(self.bias),
+ float(self.novelty),
+ float(self.drift),
+ float(self.patch_progress),
+ float(self.patch_utilization),
+ ],
+ dtype=np.float64,
+ )
+
+
+@dataclass(frozen=True)
+class BoundaryDecision:
+ boundary: bool
+ probability: float
+ logit: float
+ patch_length: int
+ next_patch_length: int
+ features: BoundaryFeatures
+
+
+class LearnedBoundaryScorer:
+ def __init__(self, config: BoundaryScorerConfig | None = None, state: BoundaryScorerState | None = None):
+ self.config = config or BoundaryScorerConfig()
+ if state is None:
+ state = BoundaryScorerState(
+ weights=np.zeros(self.config.feature_dim, dtype=np.float64),
+ bias=self.config.initial_bias,
+ )
+ if state.weights.shape != (self.config.feature_dim,):
+ raise ValueError(f"state.weights must have shape ({self.config.feature_dim},)")
+ self.state = state
+
+ def logit(self, features: BoundaryFeatures | Sequence[float] | np.ndarray) -> float:
+ vector = _coerce_feature_vector(features, feature_dim=self.config.feature_dim)
+ return float(np.dot(self.state.weights, vector) + self.state.bias)
+
+ def probability(self, features: BoundaryFeatures | Sequence[float] | np.ndarray) -> float:
+ return float(_sigmoid(np.asarray([self.logit(features)], dtype=np.float64))[0])
+
+ def update(
+ self,
+ features: BoundaryFeatures | Sequence[float] | np.ndarray,
+ target: bool | float,
+ *,
+ target_rate: float | None = None,
+ ) -> BoundaryScorerState:
+ vector = _coerce_feature_vector(features, feature_dim=self.config.feature_dim)
+ target_value = float(target)
+ if not 0.0 <= target_value <= 1.0:
+ raise ValueError("target must be in [0, 1]")
+
+ logit = float(np.dot(self.state.weights, vector) + self.state.bias)
+ probability = float(_sigmoid(np.asarray([logit], dtype=np.float64))[0])
+
+ error = probability - target_value
+ grad_weights = (error * vector) + (self.config.l2 * self.state.weights)
+ grad_bias = error
+
+ effective_target_rate = _resolve_target_rate(self.config, target_rate)
+ if effective_target_rate is not None:
+ grad_bias += self.config.target_regularization * (probability - effective_target_rate)
+
+ self.state.weights = self.state.weights - (self.config.learning_rate * grad_weights)
+ self.state.bias = float(self.state.bias - (self.config.learning_rate * grad_bias))
+
+ self.state.steps_seen += 1
+ self.state.mean_probability = _update_mean(self.state.mean_probability, self.state.steps_seen, probability)
+ self.state.last_probability = probability
+ self.state.last_logit = logit
+ if target_rate is not None or self.config.target_boundary_rate is not None or self.config.target_patch_size is not None:
+ if effective_target_rate is not None:
+ self.state.mean_target_rate = _update_mean(
+ self.state.mean_target_rate,
+ self.state.steps_seen,
+ effective_target_rate,
+ )
+ return self.state
+
+ def fit(
+ self,
+ features: Sequence[BoundaryFeatures | Sequence[float] | np.ndarray] | np.ndarray,
+ targets: Sequence[bool | float] | np.ndarray,
+ *,
+ epochs: int = 1,
+ target_rate: float | None = None,
+ ) -> BoundaryScorerState:
+ if epochs < 1:
+ raise ValueError("epochs must be >= 1")
+ matrix = _coerce_feature_matrix(features, feature_dim=self.config.feature_dim)
+ labels = np.asarray(targets, dtype=np.float64)
+ if labels.ndim != 1:
+ raise ValueError("targets must be a 1D vector")
+ if matrix.shape[0] != labels.shape[0]:
+ raise ValueError("features and targets must contain the same number of rows")
+
+ for _ in range(epochs):
+ for row, target in zip(matrix, labels):
+ self.update(row, bool(target), target_rate=target_rate)
+ return self.state
+
+
+class LearnedSegmenter:
+ def __init__(self, config: BoundaryScorerConfig | None = None, scorer: LearnedBoundaryScorer | None = None):
+ self.config = config or BoundaryScorerConfig()
+ self.scorer = scorer or LearnedBoundaryScorer(self.config)
+ if self.scorer.config != self.config:
+ raise ValueError("scorer config must match segmenter config")
+
+ @property
+ def state(self) -> BoundaryScorerState:
+ return self.scorer.state
+
+ def reset(self) -> None:
+ self.scorer.state.current_patch_length = 0
+
+ def _feature_builder(
+ self,
+ *,
+ novelty: float = 0.0,
+ drift: float = 0.0,
+ patch_length: int | None = None,
+ ) -> BoundaryFeatures:
+ current_length = self.state.current_patch_length + 1 if patch_length is None else int(patch_length)
+ scale = self.config.target_patch_size or float(self.config.max_patch_size)
+ scale = max(scale, 1.0)
+ return BoundaryFeatures(
+ novelty=float(novelty),
+ drift=float(drift),
+ patch_progress=current_length / scale,
+ patch_utilization=current_length / float(self.config.max_patch_size),
+ )
+
+ def step(
+ self,
+ features: BoundaryFeatures | Sequence[float] | np.ndarray | None = None,
+ *,
+ novelty: float = 0.0,
+ drift: float = 0.0,
+ target: bool | None = None,
+ learn: bool = False,
+ target_rate: float | None = None,
+ ) -> BoundaryDecision:
+ candidate_length = self.state.current_patch_length + 1
+ feature_row = features if features is not None else self._feature_builder(
+ novelty=novelty,
+ drift=drift,
+ patch_length=candidate_length,
+ )
+
+ probability = self.scorer.probability(feature_row)
+ logit = self.scorer.logit(feature_row)
+
+ if candidate_length >= self.config.max_patch_size:
+ boundary = True
+ elif candidate_length < self.config.min_patch_size:
+ boundary = False
+ else:
+ boundary = probability >= self.config.threshold
+
+ if learn:
+ if target is None:
+ raise ValueError("learn=True requires target")
+ self.scorer.update(feature_row, target, target_rate=target_rate)
+
+ self.state.steps_seen += 1
+ self.state.mean_probability = _update_mean(self.state.mean_probability, self.state.steps_seen, probability)
+ self.state.last_probability = probability
+ self.state.last_logit = logit
+
+ if boundary:
+ self.state.boundaries_seen += 1
+ self.state.patches_seen += 1
+ self.state.mean_patch_length = _update_mean(
+ self.state.mean_patch_length,
+ self.state.patches_seen,
+ float(candidate_length),
+ )
+ self.state.current_patch_length = 0
+ else:
+ self.state.current_patch_length = candidate_length
+
+ if target is not None:
+ self.state.mean_target_rate = _update_mean(
+ self.state.mean_target_rate,
+ self.state.steps_seen,
+ 1.0 if target else 0.0,
+ )
+
+ return BoundaryDecision(
+ boundary=boundary,
+ probability=probability,
+ logit=logit,
+ patch_length=candidate_length,
+ next_patch_length=self.state.current_patch_length,
+ features=feature_row if isinstance(feature_row, BoundaryFeatures) else BoundaryFeatures(
+ novelty=float(novelty),
+ drift=float(drift),
+ patch_progress=(candidate_length / float(self.config.target_patch_size or self.config.max_patch_size)),
+ patch_utilization=candidate_length / float(self.config.max_patch_size),
+ ),
+ )
+
+ def fit(
+ self,
+ features: Sequence[BoundaryFeatures | Sequence[float] | np.ndarray] | np.ndarray,
+ targets: Sequence[bool | float] | np.ndarray,
+ *,
+ epochs: int = 1,
+ target_rate: float | None = None,
+ ) -> BoundaryScorerState:
+ self.scorer.fit(features, targets, epochs=epochs, target_rate=target_rate)
+ return self.state
+
+ def decide(
+ self,
+ patch_length: int,
+ novelty: float,
+ surprise: float,
+ *,
+ train: bool = False,
+ update_steps: int = 1,
+ ) -> BoundaryDecision:
+ if patch_length < 1:
+ raise ValueError("patch_length must be >= 1")
+ features = self._feature_builder(
+ novelty=novelty,
+ drift=surprise,
+ patch_length=patch_length,
+ )
+ probability = self.scorer.probability(features)
+ logit = self.scorer.logit(features)
+
+ if patch_length >= self.config.max_patch_size:
+ boundary = True
+ target = True
+ elif patch_length < self.config.min_patch_size:
+ boundary = False
+ target = False
+ else:
+ boundary = probability >= self.config.threshold
+ if self.config.target_patch_size is None:
+ target = boundary
+ else:
+ target = patch_length >= int(round(self.config.target_patch_size))
+
+ if train:
+ for _ in range(update_steps):
+ self.scorer.update(
+ features,
+ target,
+ target_rate=_resolve_target_rate(self.config),
+ )
+ probability = self.scorer.probability(features)
+ logit = self.scorer.logit(features)
+ if patch_length >= self.config.max_patch_size:
+ boundary = True
+ elif patch_length < self.config.min_patch_size:
+ boundary = False
+ else:
+ boundary = probability >= self.config.threshold
+
+ return BoundaryDecision(
+ boundary=boundary,
+ probability=probability,
+ logit=logit,
+ patch_length=patch_length,
+ next_patch_length=0 if boundary else patch_length,
+ features=features,
+ )
+
+
+__all__ = [
+ "BoundaryDecision",
+ "BoundaryFeatures",
+ "BoundaryScorerConfig",
+ "BoundaryScorerState",
+ "LearnedBoundaryScorer",
+ "LearnedSegmenterConfig",
+ "LearnedSegmenter",
+]
+
+LearnedSegmenterConfig = BoundaryScorerConfig
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/linear_memory.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/linear_memory.py
new file mode 100644
index 0000000000..9ad18c2149
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/linear_memory.py
@@ -0,0 +1,57 @@
+from __future__ import annotations
+
+import numpy as np
+
+from .config import LinearMemoryConfig
+
+
+class LinearMemorySubstrate:
+ def __init__(self, config: LinearMemoryConfig | None = None):
+ self.config = config or LinearMemoryConfig()
+ rng = np.random.default_rng(self.config.seed)
+ self._token_embeddings = rng.normal(
+ loc=0.0,
+ scale=self.config.input_scale,
+ size=(self.config.vocabulary_size, self.config.embedding_dim),
+ ).astype(np.float64)
+ self._token_embeddings /= np.sqrt(max(self.config.embedding_dim, 1))
+
+ @property
+ def state_dim(self) -> int:
+ return self.config.state_dim
+
+ @property
+ def bank_count(self) -> int:
+ return len(self.config.decays)
+
+ def initial_state(self) -> np.ndarray:
+ return np.zeros(self.state_dim, dtype=np.float64)
+
+ def state_view(self, state: np.ndarray) -> np.ndarray:
+ state = np.asarray(state, dtype=np.float64)
+ if state.shape != (self.state_dim,):
+ raise ValueError("state has unexpected shape")
+ return state.reshape(self.bank_count, self.config.embedding_dim)
+
+ def _coerce_token(self, token: int) -> int:
+ index = int(token)
+ if index < 0 or index >= self.config.vocabulary_size:
+ raise ValueError(
+ f"token {index} is out of range for vocabulary_size={self.config.vocabulary_size}"
+ )
+ return index
+
+ def step(self, state: np.ndarray, token: int) -> np.ndarray:
+ banks = self.state_view(state)
+ token_index = self._coerce_token(token)
+ token_embedding = self._token_embeddings[token_index]
+ next_banks = np.empty_like(banks)
+ for index, decay in enumerate(self.config.decays):
+ next_banks[index] = (decay * banks[index]) + token_embedding
+ return next_banks.reshape(-1)
+
+
+__all__ = [
+ "LinearMemoryConfig",
+ "LinearMemorySubstrate",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/linear_views.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/linear_views.py
new file mode 100644
index 0000000000..a8658ac342
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/linear_views.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+import numpy as np
+
+from .linear_memory import LinearMemorySubstrate
+
+
+class LinearMemoryFeatureView:
+ def __init__(self, substrate: LinearMemorySubstrate):
+ self.substrate = substrate
+ self.feature_dim = substrate.state_dim + (3 * substrate.bank_count)
+
+ def encode(self, state: np.ndarray, previous_state: np.ndarray | None = None) -> np.ndarray:
+ banks = self.substrate.state_view(state)
+ means = np.mean(banks, axis=1)
+ energies = np.mean(np.square(banks), axis=1)
+ if previous_state is None:
+ drift = np.zeros(self.substrate.bank_count, dtype=np.float64)
+ else:
+ previous_banks = self.substrate.state_view(previous_state)
+ drift = np.mean(np.abs(banks - previous_banks), axis=1)
+ return np.concatenate([state, means, energies, drift])
+
+
+__all__ = [
+ "LinearMemoryFeatureView",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/metrics.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/metrics.py
new file mode 100644
index 0000000000..ddfd8fccd7
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/metrics.py
@@ -0,0 +1,43 @@
+from __future__ import annotations
+
+import numpy as np
+
+
+def softmax(logits: np.ndarray, axis: int = -1) -> np.ndarray:
+ shifted = logits - np.max(logits, axis=axis, keepdims=True)
+ exp = np.exp(shifted)
+ return exp / np.sum(exp, axis=axis, keepdims=True)
+
+
+def cross_entropy_from_logits(logits: np.ndarray, targets: np.ndarray) -> np.ndarray:
+ targets = targets.astype(np.int64, copy=False)
+ probs = softmax(logits, axis=-1)
+ row_idx = np.arange(targets.shape[0], dtype=np.int64)
+ chosen = np.clip(probs[row_idx, targets], 1e-12, 1.0)
+ return -np.log(chosen)
+
+
+def cross_entropy_from_probabilities(probabilities: np.ndarray, targets: np.ndarray) -> np.ndarray:
+ targets = targets.astype(np.int64, copy=False)
+ probs = np.asarray(probabilities, dtype=np.float64)
+ row_idx = np.arange(targets.shape[0], dtype=np.int64)
+ chosen = np.clip(probs[row_idx, targets], 1e-12, 1.0)
+ return -np.log(chosen)
+
+
+def bits_per_byte_from_logits(logits: np.ndarray, targets: np.ndarray) -> float:
+ losses = cross_entropy_from_logits(logits, targets)
+ return float(np.mean(losses) / np.log(2.0))
+
+
+def bits_per_byte_from_probabilities(probabilities: np.ndarray, targets: np.ndarray) -> float:
+ losses = cross_entropy_from_probabilities(probabilities, targets)
+ return float(np.mean(losses) / np.log(2.0))
+
+
+def bits_per_token_from_logits(logits: np.ndarray, targets: np.ndarray) -> float:
+ return bits_per_byte_from_logits(logits, targets)
+
+
+def bits_per_token_from_probabilities(probabilities: np.ndarray, targets: np.ndarray) -> float:
+ return bits_per_byte_from_probabilities(probabilities, targets)
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/mixed_memory.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/mixed_memory.py
new file mode 100644
index 0000000000..85737604bf
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/mixed_memory.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import numpy as np
+
+from .config import MixedMemoryConfig
+from .delay import DelayLineSubstrate
+from .reservoir import EchoStateReservoir
+
+
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class MixedMemoryStateSlices:
+ reservoir: slice
+ delay: slice
+
+
+class MixedMemorySubstrate:
+ def __init__(self, config: MixedMemoryConfig | None = None):
+ self.config = config or MixedMemoryConfig()
+ self.reservoir = EchoStateReservoir(
+ config=self.config.reservoir,
+ vocabulary_size=self.config.delay.vocabulary_size,
+ )
+ self.delay = DelayLineSubstrate(self.config.delay)
+ self._reservoir_slice = slice(0, self.reservoir.state_dim)
+ self._delay_slice = slice(self.reservoir.state_dim, self.reservoir.state_dim + self.config.delay.state_dim)
+
+ @property
+ def reservoir_dim(self) -> int:
+ return self.reservoir.state_dim
+
+ @property
+ def delay_dim(self) -> int:
+ return self.config.delay.state_dim
+
+ @property
+ def state_dim(self) -> int:
+ return self.reservoir_dim + self.delay_dim
+
+ @property
+ def state_slices(self) -> MixedMemoryStateSlices:
+ return MixedMemoryStateSlices(reservoir=self._reservoir_slice, delay=self._delay_slice)
+
+ def initial_state(self) -> np.ndarray:
+ return np.zeros(self.state_dim, dtype=np.float64)
+
+ def reservoir_view(self, state: np.ndarray) -> np.ndarray:
+ return self._split_state(state)[0]
+
+ def delay_view(self, state: np.ndarray) -> np.ndarray:
+ return self._split_state(state)[1]
+
+ def _split_state(self, state: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+ state = np.asarray(state, dtype=np.float64)
+ if state.ndim != 1:
+ raise ValueError("state must be rank-1")
+ if state.shape[0] != self.state_dim:
+ raise ValueError("state does not match configured state_dim")
+ return state[self._reservoir_slice], state[self._delay_slice]
+
+ def step(self, state: np.ndarray, token: int) -> np.ndarray:
+ reservoir_state, delay_state = self._split_state(state)
+ token_id = int(token)
+ if token_id < 0 or token_id >= self.config.delay.vocabulary_size:
+ raise ValueError("token is out of range for the configured vocabulary_size")
+
+ next_reservoir = self.reservoir.step(reservoir_state, token_id)
+ next_delay = self.delay.step(delay_state, token_id)
+ return np.concatenate([next_reservoir, next_delay])
+
+
+__all__ = [
+ "MixedMemoryConfig",
+ "MixedMemoryStateSlices",
+ "MixedMemorySubstrate",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/model.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/model.py
new file mode 100644
index 0000000000..63f444690c
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/model.py
@@ -0,0 +1,11 @@
+from .adapters import ByteLatentPredictiveCoder, FitReport, SequenceReport, SequenceTrace
+
+OpenPredictiveCoder = ByteLatentPredictiveCoder
+
+__all__ = [
+ "ByteLatentPredictiveCoder",
+ "FitReport",
+ "OpenPredictiveCoder",
+ "SequenceReport",
+ "SequenceTrace",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/modulation.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/modulation.py
new file mode 100644
index 0000000000..33683f5b6c
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/modulation.py
@@ -0,0 +1,160 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass, field
+
+import numpy as np
+
+from .control import ControllerSummary, ControllerSummaryBuilder, ControllerSummaryConfig
+
+
+def _sigmoid(value: np.ndarray | float) -> np.ndarray | float:
+ return 1.0 / (1.0 + np.exp(-value))
+
+
+@dataclass(frozen=True)
+class HormoneModulationConfig:
+ refresh_stride: int = 1
+ summary: ControllerSummaryConfig = field(default_factory=ControllerSummaryConfig)
+ hormone_count: int = 4
+ summary_scale: float = 1.0
+ hormone_bias: float = 0.0
+ hormone_scale: float = 1.0
+ output_indices: tuple[int, ...] = (0, 1)
+ output_biases: tuple[float, ...] = (0.0, 0.0)
+ output_scales: tuple[float, ...] = (1.0, 1.0)
+ seed: int = 7
+
+ def __post_init__(self) -> None:
+ if self.refresh_stride < 1:
+ raise ValueError("refresh_stride must be >= 1")
+ if self.hormone_count < 1:
+ raise ValueError("hormone_count must be >= 1")
+ if self.summary_scale < 0.0:
+ raise ValueError("summary_scale must be >= 0")
+ if self.hormone_scale < 0.0:
+ raise ValueError("hormone_scale must be >= 0")
+ if not self.output_indices:
+ raise ValueError("output_indices must contain at least one index")
+ if len(self.output_indices) != len(self.output_biases) or len(self.output_indices) != len(self.output_scales):
+ raise ValueError("output_indices, output_biases, and output_scales must have the same length")
+ if any(index < 0 for index in self.output_indices):
+ raise ValueError("output_indices must be >= 0")
+ if any(scale < 0.0 for scale in self.output_scales):
+ raise ValueError("output_scales must be >= 0")
+
+
+@dataclass(frozen=True)
+class HormoneState:
+ hormones: np.ndarray
+ outputs: np.ndarray
+ step: int
+ refreshed: bool
+ last_refresh_step: int
+ summary_name: str | None = None
+
+ def __post_init__(self) -> None:
+ hormones = np.asarray(self.hormones, dtype=np.float64).reshape(-1)
+ outputs = np.asarray(self.outputs, dtype=np.float64).reshape(-1)
+ if hormones.size < 1:
+ raise ValueError("HormoneState requires at least one hormone value")
+ if outputs.size < 1:
+ raise ValueError("HormoneState requires at least one output value")
+ object.__setattr__(self, "hormones", hormones)
+ object.__setattr__(self, "outputs", outputs)
+
+
+class HormoneModulator:
+ def __init__(self, summary_dim: int, config: HormoneModulationConfig | None = None):
+ if summary_dim < 1:
+ raise ValueError("summary_dim must be >= 1")
+ self.summary_dim = summary_dim
+ self.config = config or HormoneModulationConfig()
+ self.summary_builder = ControllerSummaryBuilder(self.config.summary)
+ rng = np.random.default_rng(self.config.seed)
+ self._summary_projection = rng.standard_normal((summary_dim, self.config.hormone_count)).astype(np.float64)
+ self._summary_projection /= np.sqrt(max(summary_dim, 1))
+ self._output_indices = np.asarray(self.config.output_indices, dtype=np.int64)
+ self._output_biases = np.asarray(self.config.output_biases, dtype=np.float64)
+ self._output_scales = np.asarray(self.config.output_scales, dtype=np.float64)
+
+ @property
+ def output_count(self) -> int:
+ return int(self._output_indices.shape[0])
+
+ def initial_state(self) -> HormoneState:
+ return HormoneState(
+ hormones=np.zeros(self.config.hormone_count, dtype=np.float64),
+ outputs=np.zeros(self.output_count, dtype=np.float64),
+ step=-1,
+ refreshed=False,
+ last_refresh_step=-self.config.refresh_stride,
+ summary_name=None,
+ )
+
+ def _coerce_summary(
+ self,
+ summary: ControllerSummary | float | Sequence[float] | np.ndarray,
+ *,
+ name: str | None = None,
+ ) -> ControllerSummary:
+ if isinstance(summary, ControllerSummary):
+ return summary
+ return self.summary_builder.encode(summary, name=name)
+
+ def _project_hormones(self, summary: ControllerSummary) -> np.ndarray:
+ if summary.dim != self.summary_dim:
+ raise ValueError("summary does not match the configured summary_dim")
+ projection = summary.values @ self._summary_projection
+ return np.tanh(self.config.hormone_bias + self.config.summary_scale * projection)
+
+ def _project_outputs(self, hormones: np.ndarray) -> np.ndarray:
+ selected = hormones[self._output_indices]
+ outputs = self._output_biases + (self._output_scales * selected * self.config.hormone_scale)
+ return np.asarray(_sigmoid(outputs), dtype=np.float64)
+
+ def advance(
+ self,
+ state: HormoneState,
+ summary: ControllerSummary | float | Sequence[float] | np.ndarray,
+ *,
+ step: int,
+ name: str | None = None,
+ ) -> HormoneState:
+ controller_summary = self._coerce_summary(summary, name=name)
+ should_refresh = (step - state.last_refresh_step) >= self.config.refresh_stride
+ if not should_refresh:
+ return HormoneState(
+ hormones=state.hormones,
+ outputs=state.outputs,
+ step=step,
+ refreshed=False,
+ last_refresh_step=state.last_refresh_step,
+ summary_name=controller_summary.name,
+ )
+
+ hormones = self._project_hormones(controller_summary)
+ outputs = self._project_outputs(hormones)
+ return HormoneState(
+ hormones=hormones,
+ outputs=outputs,
+ step=step,
+ refreshed=True,
+ last_refresh_step=step,
+ summary_name=controller_summary.name,
+ )
+
+ def project(
+ self,
+ summary: ControllerSummary | float | Sequence[float] | np.ndarray,
+ *,
+ name: str | None = None,
+ ) -> HormoneState:
+ return self.advance(self.initial_state(), summary, step=0, name=name)
+
+
+__all__ = [
+ "HormoneModulationConfig",
+ "HormoneModulator",
+ "HormoneState",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/ngram_memory.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/ngram_memory.py
new file mode 100644
index 0000000000..1d0bb839b7
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/ngram_memory.py
@@ -0,0 +1,255 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+
+import numpy as np
+
+from .codecs import ensure_tokens
+
+
+def _coerce_sequences(
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+) -> tuple[np.ndarray, ...]:
+ if isinstance(data, (str, bytes, bytearray, memoryview, np.ndarray)):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence) and data and all(isinstance(item, int) for item in data):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence):
+ return tuple(ensure_tokens(item) for item in data)
+ return (ensure_tokens(data),)
+
+
+def _coerce_context_token(context: object) -> int:
+ if isinstance(context, (int, np.integer)):
+ token = int(context)
+ if token < 0:
+ raise ValueError("context token must be >= 0")
+ return token
+ tokens = ensure_tokens(context)
+ if tokens.size < 1:
+ raise ValueError("context must contain at least one token")
+ return int(tokens[-1])
+
+
+def _laplace_smooth(counts: np.ndarray, alpha: float) -> np.ndarray:
+ counts = np.asarray(counts, dtype=np.float64)
+ width = counts.shape[-1]
+ smoothed = counts + (alpha / float(width))
+ total = float(np.sum(smoothed))
+ if total <= 0.0:
+ return np.full(width, 1.0 / float(width), dtype=np.float64)
+ return smoothed / total
+
+
+@dataclass(frozen=True)
+class NgramMemoryConfig:
+ vocabulary_size: int = 256
+ bigram_alpha: float = 0.5
+ trigram_alpha: float = 0.5
+ trigram_bucket_count: int = 4096
+
+ def __post_init__(self) -> None:
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if self.bigram_alpha < 0.0:
+ raise ValueError("bigram_alpha must be >= 0")
+ if self.trigram_alpha < 0.0:
+ raise ValueError("trigram_alpha must be >= 0")
+ if self.trigram_bucket_count < 1:
+ raise ValueError("trigram_bucket_count must be >= 1")
+
+
+@dataclass(frozen=True)
+class NgramMemoryReport:
+ sequences: int
+ tokens: int
+ vocabulary_size: int
+ bigram_contexts: int
+ trigram_buckets_used: int
+ unigram_bytes: int
+ bigram_bytes: int
+ trigram_bytes: int
+
+ @property
+ def total_bytes(self) -> int:
+ return self.unigram_bytes + self.bigram_bytes + self.trigram_bytes
+
+
+class NgramMemory:
+ def __init__(self, config: NgramMemoryConfig | None = None):
+ self.config = config or NgramMemoryConfig()
+ self.unigram_counts = np.zeros(self.config.vocabulary_size, dtype=np.float64)
+ self.bigram_counts = np.zeros((self.config.vocabulary_size, self.config.vocabulary_size), dtype=np.float64)
+ self.trigram_counts = np.zeros((self.config.trigram_bucket_count, self.config.vocabulary_size), dtype=np.float64)
+ self._unigram_total = 0.0
+ self._bigram_totals = np.zeros((self.config.vocabulary_size,), dtype=np.float64)
+ self._trigram_totals = np.zeros((self.config.trigram_bucket_count,), dtype=np.float64)
+ self._tokens_seen = 0
+ self._sequences_seen = 0
+
+ def clear(self) -> None:
+ self.unigram_counts.fill(0.0)
+ self.bigram_counts.fill(0.0)
+ self.trigram_counts.fill(0.0)
+ self._unigram_total = 0.0
+ self._bigram_totals.fill(0.0)
+ self._trigram_totals.fill(0.0)
+ self._tokens_seen = 0
+ self._sequences_seen = 0
+
+ def _check_tokens(self, tokens: np.ndarray) -> np.ndarray:
+ tokens = np.asarray(tokens, dtype=np.int64)
+ if tokens.ndim != 1:
+ raise ValueError("tokens must be rank-1")
+ if tokens.size == 0:
+ return tokens
+ if int(np.min(tokens)) < 0 or int(np.max(tokens)) >= self.config.vocabulary_size:
+ raise ValueError("tokens must lie within the configured vocabulary")
+ return tokens
+
+ def _trigram_bucket(self, left: int, right: int) -> int:
+ value = (np.uint64(left) * np.uint64(1_315_423_911)) ^ (np.uint64(right) * np.uint64(2_654_435_761))
+ return int(value % np.uint64(self.config.trigram_bucket_count))
+
+ def _trigram_buckets(self, left: np.ndarray, right: np.ndarray) -> np.ndarray:
+ left_u = np.asarray(left, dtype=np.uint64)
+ right_u = np.asarray(right, dtype=np.uint64)
+ values = (left_u * np.uint64(1_315_423_911)) ^ (right_u * np.uint64(2_654_435_761))
+ return np.asarray(values % np.uint64(self.config.trigram_bucket_count), dtype=np.int64)
+
+ def update(
+ self,
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> NgramMemoryReport:
+ sequences = _coerce_sequences(data)
+ for sequence in sequences:
+ tokens = self._check_tokens(ensure_tokens(sequence).astype(np.int64, copy=False))
+ self._sequences_seen += 1
+ self._tokens_seen += int(tokens.size)
+ if tokens.size == 0:
+ continue
+
+ unigram_delta = np.bincount(tokens, minlength=self.config.vocabulary_size).astype(np.float64, copy=False)
+ self.unigram_counts += unigram_delta
+ self._unigram_total += float(tokens.size)
+
+ if tokens.size >= 2:
+ prev = tokens[:-1]
+ curr = tokens[1:]
+ np.add.at(self.bigram_counts, (prev, curr), 1.0)
+ self._bigram_totals += np.bincount(prev, minlength=self.config.vocabulary_size).astype(
+ np.float64,
+ copy=False,
+ )
+
+ if tokens.size >= 3:
+ left = tokens[:-2]
+ right = tokens[1:-1]
+ target = tokens[2:]
+ buckets = self._trigram_buckets(left, right)
+ np.add.at(self.trigram_counts, (buckets, target), 1.0)
+ self._trigram_totals += np.bincount(buckets, minlength=self.config.trigram_bucket_count).astype(
+ np.float64,
+ copy=False,
+ )
+
+ return self.report()
+
+ def fit(
+ self,
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> NgramMemoryReport:
+ self.clear()
+ return self.update(data)
+
+ def report(self) -> NgramMemoryReport:
+ return NgramMemoryReport(
+ sequences=self._sequences_seen,
+ tokens=self._tokens_seen,
+ vocabulary_size=self.config.vocabulary_size,
+ bigram_contexts=int(np.count_nonzero(self._bigram_totals)),
+ trigram_buckets_used=int(np.count_nonzero(self._trigram_totals)),
+ unigram_bytes=int(self.unigram_counts.nbytes),
+ bigram_bytes=int(self.bigram_counts.nbytes),
+ trigram_bytes=int(self.trigram_counts.nbytes),
+ )
+
+ def unigram_probs(self) -> np.ndarray:
+ return _laplace_smooth(self.unigram_counts, self.config.bigram_alpha)
+
+ def bigram_probs(self, context: object) -> np.ndarray:
+ index = _coerce_context_token(context)
+ counts = self.bigram_counts[index]
+ return _laplace_smooth(counts, self.config.bigram_alpha)
+
+ def trigram_probs(self, context2: object, context1: object) -> np.ndarray:
+ left = _coerce_context_token(context2)
+ right = _coerce_context_token(context1)
+ bucket = self._trigram_bucket(left, right)
+ counts = self.trigram_counts[bucket]
+ return _laplace_smooth(counts, self.config.trigram_alpha)
+
+ def chosen_probs(
+ self,
+ tokens: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ *,
+ order: str = "max",
+ ) -> np.ndarray:
+ sequence = _coerce_sequences(tokens)[0]
+ sequence = self._check_tokens(sequence.astype(np.int64, copy=False))
+ if sequence.size == 0:
+ return np.zeros((0,), dtype=np.float64)
+
+ values = np.empty(sequence.size, dtype=np.float64)
+ vocab = float(self.config.vocabulary_size)
+ unigram_alpha = float(self.config.bigram_alpha)
+ unigram_denom = max(self._unigram_total + unigram_alpha, np.finfo(np.float64).tiny)
+ values[:] = (self.unigram_counts[sequence] + (unigram_alpha / vocab)) / unigram_denom
+
+ if sequence.size >= 2 and order in {"bigram", "trigram", "max"}:
+ prev = sequence[:-1]
+ curr = sequence[1:]
+ bigram_counts = self.bigram_counts[prev, curr]
+ bigram_totals = self._bigram_totals[prev]
+ values[1:] = (bigram_counts + (self.config.bigram_alpha / vocab)) / np.maximum(
+ bigram_totals + self.config.bigram_alpha,
+ np.finfo(np.float64).tiny,
+ )
+
+ if sequence.size >= 3 and order in {"trigram", "max"}:
+ left = sequence[:-2]
+ right = sequence[1:-1]
+ target = sequence[2:]
+ buckets = self._trigram_buckets(left, right)
+ trigram_counts = self.trigram_counts[buckets, target]
+ trigram_totals = self._trigram_totals[buckets]
+ values[2:] = (trigram_counts + (self.config.trigram_alpha / vocab)) / np.maximum(
+ trigram_totals + self.config.trigram_alpha,
+ np.finfo(np.float64).tiny,
+ )
+
+ if order not in {"unigram", "bigram", "trigram", "max"}:
+ raise ValueError(f"unknown order: {order}")
+ return np.clip(values, 1e-300, 1.0)
+
+ def chosen_log_probs(
+ self,
+ tokens: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ *,
+ order: str = "max",
+ ) -> np.ndarray:
+ return np.log(self.chosen_probs(tokens, order=order))
+
+ def log_probs(
+ self,
+ tokens: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> np.ndarray:
+ return self.chosen_log_probs(tokens, order="max")
+
+
+__all__ = [
+ "NgramMemory",
+ "NgramMemoryConfig",
+ "NgramMemoryReport",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/noncausal_reconstructive.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/noncausal_reconstructive.py
new file mode 100644
index 0000000000..777e413d74
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/noncausal_reconstructive.py
@@ -0,0 +1,317 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+
+import numpy as np
+
+from .artifacts import ArtifactAccounting, ArtifactMetadata, make_artifact_accounting
+from .bidirectional_context import BidirectionalContextConfig, BidirectionalContextProbe, BidirectionalContextStats
+from .codecs import ByteCodec, ensure_tokens
+from .exact_context import ExactContextConfig, ExactContextFitReport, ExactContextMemory
+from .span_selection import ReplaySpan, SpanSelectionConfig, replay_spans_from_scores
+
+
+def _coerce_tokens(
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+) -> np.ndarray:
+ return ensure_tokens(data).astype(np.uint8, copy=False)
+
+
+def _reverse_tokens(tokens: np.ndarray) -> np.ndarray:
+ return np.asarray(tokens, dtype=np.uint8)[::-1].copy()
+
+
+def _normalize(probabilities: np.ndarray) -> np.ndarray:
+ probs = np.asarray(probabilities, dtype=np.float64).reshape(-1)
+ if probs.size == 0:
+ return probs
+ clipped = np.clip(probs, 1e-12, None)
+ total = float(np.sum(clipped))
+ if total <= 0.0:
+ return np.full_like(clipped, 1.0 / clipped.size, dtype=np.float64)
+ return clipped / total
+
+
+def _blend_probabilities(left: np.ndarray, right: np.ndarray, temperature: float) -> np.ndarray:
+ left = np.clip(np.asarray(left, dtype=np.float64), 1e-12, None)
+ right = np.clip(np.asarray(right, dtype=np.float64), 1e-12, None)
+ logits = 0.5 * (np.log(left) + np.log(right))
+ logits = logits / temperature
+ logits = logits - float(np.max(logits))
+ return _normalize(np.exp(logits))
+
+
+@dataclass(frozen=True)
+class NoncausalReconstructiveConfig:
+ vocabulary_size: int = 256
+ exact_max_order: int = 3
+ exact_alpha: float = 0.05
+ bidirectional_left_order: int = 2
+ bidirectional_right_order: int = 2
+ blend_temperature: float = 1.0
+ agreement_threshold: float = 0.75
+ replay_threshold: float = 0.55
+ min_replay_span: int = 2
+
+ def __post_init__(self) -> None:
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if self.exact_max_order < 1:
+ raise ValueError("exact_max_order must be >= 1")
+ if self.exact_alpha < 0.0:
+ raise ValueError("exact_alpha must be >= 0")
+ if self.bidirectional_left_order < 0:
+ raise ValueError("bidirectional_left_order must be >= 0")
+ if self.bidirectional_right_order < 0:
+ raise ValueError("bidirectional_right_order must be >= 0")
+ if self.blend_temperature <= 0.0:
+ raise ValueError("blend_temperature must be > 0")
+ if not 0.0 <= self.agreement_threshold <= 1.0:
+ raise ValueError("agreement_threshold must be in [0, 1]")
+ if not 0.0 <= self.replay_threshold <= 1.0:
+ raise ValueError("replay_threshold must be in [0, 1]")
+ if self.min_replay_span < 1:
+ raise ValueError("min_replay_span must be >= 1")
+
+
+@dataclass(frozen=True)
+class NoncausalReconstructiveFitReport:
+ forward: ExactContextFitReport
+ reverse: ExactContextFitReport
+ bidirectional_context: BidirectionalContextStats
+ accounting: ArtifactAccounting
+
+
+@dataclass(frozen=True)
+class NoncausalReconstructiveTrace:
+ tokens: int
+ source_tokens: np.ndarray
+ left_probs: np.ndarray
+ right_probs: np.ndarray
+ blended_probs: np.ndarray
+ reconstructed_tokens: np.ndarray
+ agreement_mask: np.ndarray
+ replay_mask: np.ndarray
+ replay_spans: tuple[ReplaySpan, ...]
+ bidirectional_context: BidirectionalContextStats
+ accounting: ArtifactAccounting
+
+ @property
+ def steps(self) -> int:
+ return int(self.source_tokens.size)
+
+
+@dataclass(frozen=True)
+class NoncausalReconstructiveReport:
+ tokens: int
+ steps: int
+ left_bits_per_byte: float
+ right_bits_per_byte: float
+ blended_bits_per_byte: float
+ agreement_rate: float
+ replay_rate: float
+ replay_span_count: int
+ reconstructed_text: str
+ bidirectional_context: BidirectionalContextStats
+ accounting: ArtifactAccounting
+
+ @property
+ def bits_per_byte(self) -> float:
+ return float(self.blended_bits_per_byte)
+
+
+class NoncausalReconstructiveAdapter:
+ def __init__(
+ self,
+ config: NoncausalReconstructiveConfig | None = None,
+ *,
+ artifact_name: str = "noncausal_reconstructive",
+ metadata: ArtifactMetadata | None = None,
+ ):
+ self.config = config or NoncausalReconstructiveConfig()
+ exact_config = ExactContextConfig(
+ vocabulary_size=self.config.vocabulary_size,
+ max_order=self.config.exact_max_order,
+ alpha=self.config.exact_alpha,
+ )
+ self.forward_memory = ExactContextMemory(exact_config)
+ self.reverse_memory = ExactContextMemory(exact_config)
+ self.bidirectional_probe = BidirectionalContextProbe(
+ BidirectionalContextConfig(
+ left_order=self.config.bidirectional_left_order,
+ right_order=self.config.bidirectional_right_order,
+ )
+ )
+ self.artifact_name = artifact_name
+ self.metadata = metadata or ArtifactMetadata()
+ self._last_fit_accounting = make_artifact_accounting(
+ self.artifact_name,
+ 0,
+ 0,
+ metadata=self.metadata,
+ tokens=0,
+ replay_positions=0,
+ )
+
+ @classmethod
+ def build(cls, **kwargs: object) -> "NoncausalReconstructiveAdapter":
+ return cls(NoncausalReconstructiveConfig(**kwargs))
+
+ def fit(
+ self,
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> NoncausalReconstructiveFitReport:
+ tokens = _coerce_tokens(data)
+ if tokens.size == 0:
+ raise ValueError("data must contain at least one token")
+
+ forward = self.forward_memory.fit(tokens)
+ reverse = self.reverse_memory.fit(_reverse_tokens(tokens))
+ bidirectional_context = self.bidirectional_probe.scan(tokens)
+ replay_mask = np.asarray(bidirectional_context.candidate_sizes, dtype=np.int64) <= 1
+ replay_spans = replay_spans_from_scores(
+ replay_mask.astype(np.float64, copy=False),
+ SpanSelectionConfig(threshold=0.5, min_span=self.config.min_replay_span, max_gap=0),
+ label="replay",
+ )
+ accounting = make_artifact_accounting(
+ self.artifact_name,
+ int(tokens.size),
+ int(np.sum(replay_mask)),
+ metadata=self.metadata,
+ tokens=int(tokens.size),
+ replay_positions=int(np.sum(replay_mask)),
+ replay_spans=replay_spans,
+ )
+ self._last_fit_accounting = accounting
+ return NoncausalReconstructiveFitReport(
+ forward=forward,
+ reverse=reverse,
+ bidirectional_context=bidirectional_context,
+ accounting=accounting,
+ )
+
+ def _distributions_for_position(
+ self,
+ tokens: np.ndarray,
+ position: int,
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+ left_context = tokens[:position]
+ right_context = _reverse_tokens(tokens[position + 1 :])
+ left_probs = self.forward_memory.predictive_distribution(left_context)
+ right_probs = self.reverse_memory.predictive_distribution(right_context)
+ blended = _blend_probabilities(left_probs, right_probs, self.config.blend_temperature)
+ return left_probs, right_probs, blended
+
+ def trace(
+ self,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> NoncausalReconstructiveTrace:
+ tokens = _coerce_tokens(sequence)
+ if tokens.size == 0:
+ raise ValueError("sequence must contain at least one token")
+
+ bidirectional_context = self.bidirectional_probe.scan(tokens)
+ left_rows: list[np.ndarray] = []
+ right_rows: list[np.ndarray] = []
+ blended_rows: list[np.ndarray] = []
+ reconstructed: list[int] = []
+ agreement_mask: list[bool] = []
+ replay_mask: list[bool] = []
+
+ for position, token in enumerate(tokens):
+ left_probs, right_probs, blended_probs = self._distributions_for_position(tokens, position)
+ left_rows.append(left_probs)
+ right_rows.append(right_probs)
+ blended_rows.append(blended_probs)
+ reconstructed.append(int(np.argmax(blended_probs)))
+ agreement = int(np.argmax(left_probs)) == int(np.argmax(right_probs))
+ agreement_mask.append(agreement)
+ confidence = float(np.max(blended_probs))
+ agreement_strength = 0.5 * (float(np.max(left_probs)) + float(np.max(right_probs)))
+ replay_mask.append(
+ agreement
+ and confidence >= self.config.replay_threshold
+ and agreement_strength >= self.config.agreement_threshold
+ )
+
+ replay_mask_array = np.asarray(replay_mask, dtype=bool)
+ replay_spans = replay_spans_from_scores(
+ replay_mask_array.astype(np.float64, copy=False),
+ SpanSelectionConfig(threshold=0.5, min_span=self.config.min_replay_span, max_gap=0),
+ label="replay",
+ )
+ accounting = make_artifact_accounting(
+ self.artifact_name,
+ int(tokens.size),
+ int(np.sum(replay_mask_array)),
+ replay_spans=replay_spans,
+ metadata=self.metadata,
+ tokens=int(tokens.size),
+ replay_positions=int(np.sum(replay_mask_array)),
+ replay_spans_count=len(replay_spans),
+ )
+
+ return NoncausalReconstructiveTrace(
+ tokens=int(tokens.size),
+ source_tokens=tokens,
+ left_probs=np.vstack(left_rows),
+ right_probs=np.vstack(right_rows),
+ blended_probs=np.vstack(blended_rows),
+ reconstructed_tokens=np.asarray(reconstructed, dtype=np.uint8),
+ agreement_mask=np.asarray(agreement_mask, dtype=bool),
+ replay_mask=replay_mask_array,
+ replay_spans=replay_spans,
+ bidirectional_context=bidirectional_context,
+ accounting=accounting,
+ )
+
+ def reconstruct(
+ self,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> np.ndarray:
+ return self.trace(sequence).reconstructed_tokens
+
+ def score(
+ self,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> NoncausalReconstructiveReport:
+ trace = self.trace(sequence)
+ source_tokens = trace.source_tokens
+ targets = source_tokens
+ row_indices = np.arange(targets.size)
+ left_bits = -np.log2(np.clip(trace.left_probs[row_indices, targets], 1e-12, 1.0))
+ right_bits = -np.log2(np.clip(trace.right_probs[row_indices, targets], 1e-12, 1.0))
+ blended_bits = -np.log2(np.clip(trace.blended_probs[row_indices, targets], 1e-12, 1.0))
+
+ reconstructed_text = ByteCodec.decode_text(trace.reconstructed_tokens)
+ return NoncausalReconstructiveReport(
+ tokens=trace.tokens,
+ steps=trace.steps,
+ left_bits_per_byte=float(np.mean(left_bits)),
+ right_bits_per_byte=float(np.mean(right_bits)),
+ blended_bits_per_byte=float(np.mean(blended_bits)),
+ agreement_rate=float(np.mean(trace.agreement_mask)),
+ replay_rate=float(np.mean(trace.replay_mask)),
+ replay_span_count=len(trace.replay_spans),
+ reconstructed_text=reconstructed_text,
+ bidirectional_context=trace.bidirectional_context,
+ accounting=trace.accounting,
+ )
+
+ def accounting(self) -> ArtifactAccounting:
+ return self._last_fit_accounting
+
+
+NoncausalReconstructiveModel = NoncausalReconstructiveAdapter
+
+
+__all__ = [
+ "NoncausalReconstructiveAdapter",
+ "NoncausalReconstructiveConfig",
+ "NoncausalReconstructiveFitReport",
+ "NoncausalReconstructiveModel",
+ "NoncausalReconstructiveReport",
+ "NoncausalReconstructiveTrace",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/oracle_analysis.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/oracle_analysis.py
new file mode 100644
index 0000000000..bbaa00b197
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/oracle_analysis.py
@@ -0,0 +1,512 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass, field
+
+import numpy as np
+
+from .artifacts import (
+ ArtifactAccounting,
+ ArtifactMetadata,
+ make_artifact_accounting,
+ make_replay_span,
+)
+from .codecs import ensure_tokens
+from .config import HierarchicalSubstrateConfig, OpenPredictiveCoderConfig, SampledReadoutBandConfig, SampledReadoutConfig
+from .bidirectional_context import BidirectionalContextConfig, BidirectionalContextProbe, BidirectionalContextStats
+from .control import ControllerSummary
+from .hierarchical import HierarchicalSubstrate
+from .hierarchical_views import HierarchicalFeatureView
+from .metrics import bits_per_byte_from_probabilities
+from .presets import hierarchical_small
+from .routing import RoutingConfig, SummaryRouter
+from .sampled_readout import SampledMultiscaleReadout
+from .train_modes import TrainModeConfig
+
+
+def _resolve_hierarchical_config(model: OpenPredictiveCoderConfig) -> HierarchicalSubstrateConfig:
+ if model.substrate_kind != "hierarchical":
+ raise ValueError("oracle analysis requires a hierarchical model config")
+ return model.hierarchical
+
+
+def _alignment_metrics(left: np.ndarray, right: np.ndarray) -> tuple[float, float, float, float]:
+ left = np.asarray(left, dtype=np.float64).reshape(-1)
+ right = np.asarray(right, dtype=np.float64).reshape(-1)
+ width = min(left.size, right.size)
+ if width == 0:
+ return 0.0, 0.0, 0.0, 0.0
+
+ left = left[:width]
+ right = right[:width]
+ diff = left - right
+ mae = float(np.mean(np.abs(diff)))
+ rmse = float(np.sqrt(np.mean(np.square(diff))))
+
+ left_centered = left - float(np.mean(left))
+ right_centered = right - float(np.mean(right))
+ left_norm = float(np.linalg.norm(left_centered))
+ right_norm = float(np.linalg.norm(right_centered))
+ denom = left_norm * right_norm
+ if denom == 0.0:
+ pearson = 1.0 if np.allclose(left, right) else 0.0
+ else:
+ pearson = float(np.clip(float(np.dot(left_centered, right_centered) / denom), -1.0, 1.0))
+
+ cosine_denom = float(np.linalg.norm(left) * np.linalg.norm(right))
+ if cosine_denom == 0.0:
+ cosine = 1.0 if np.allclose(left, right) else 0.0
+ else:
+ cosine = float(np.clip(float(np.dot(left, right) / cosine_denom), -1.0, 1.0))
+
+ return pearson, cosine, mae, rmse
+
+
+@dataclass(frozen=True)
+class OracleAnalysisConfig:
+ model: OpenPredictiveCoderConfig = field(default_factory=hierarchical_small)
+ train_mode: TrainModeConfig = field(
+ default_factory=lambda: TrainModeConfig(
+ state_mode="through_state",
+ slow_update_stride=3,
+ rollout_checkpoints=(8, 16, 24),
+ rollout_checkpoint_stride=12,
+ )
+ )
+ fast_sample_size: int = 8
+ mid_sample_size: int = 8
+ slow_sample_size: int = 12
+ route_oracle_bias: float = 0.05
+ route_temperature: float = 1.0
+ bidirectional_context: BidirectionalContextConfig | None = None
+
+ def __post_init__(self) -> None:
+ hierarchical = _resolve_hierarchical_config(self.model)
+ if self.fast_sample_size < 1 or self.fast_sample_size > hierarchical.fast_size:
+ raise ValueError("fast_sample_size must lie within the fast bank size")
+ if self.mid_sample_size < 1 or self.mid_sample_size > hierarchical.mid_size:
+ raise ValueError("mid_sample_size must lie within the mid bank size")
+ if self.slow_sample_size < 1 or self.slow_sample_size > hierarchical.slow_size:
+ raise ValueError("slow_sample_size must lie within the slow bank size")
+ if self.route_temperature <= 0.0:
+ raise ValueError("route_temperature must be > 0")
+ if self.bidirectional_context is not None:
+ if self.bidirectional_context.left_order < 0:
+ raise ValueError("bidirectional_context.left_order must be >= 0")
+ if self.bidirectional_context.right_order < 0:
+ raise ValueError("bidirectional_context.right_order must be >= 0")
+
+
+@dataclass(frozen=True)
+class OracleAnalysisPoint:
+ checkpoint: int
+ slow_update_active: bool
+ route_names: tuple[str, ...]
+ route_weights: np.ndarray
+ selected_route: str
+ alignment_pearson: float
+ alignment_cosine: float
+ alignment_mae: float
+ alignment_rmse: float
+ route_bits_per_byte: float
+
+ def __post_init__(self) -> None:
+ route_weights = np.asarray(self.route_weights, dtype=np.float64).reshape(-1)
+ if route_weights.size < 1:
+ raise ValueError("OracleAnalysisPoint requires route weights")
+ object.__setattr__(self, "route_weights", route_weights)
+
+
+@dataclass(frozen=True)
+class OracleAnalysisReport:
+ tokens: int
+ checkpoints: tuple[int, ...]
+ points: tuple[OracleAnalysisPoint, ...]
+ mean_alignment_pearson: float
+ mean_alignment_cosine: float
+ mean_alignment_mae: float
+ mean_alignment_rmse: float
+ mean_route_bits_per_byte: float
+ oracle_preference_rate: float
+ accounting: ArtifactAccounting
+ bidirectional_context: BidirectionalContextStats | None = None
+
+ @property
+ def bits_per_byte(self) -> float:
+ return self.mean_route_bits_per_byte
+
+
+@dataclass(frozen=True)
+class OracleAnalysisFitReport:
+ sequences: int
+ tokens: int
+ train_bits_per_byte: float
+ mean_alignment_pearson: float
+ mean_alignment_cosine: float
+ mean_alignment_mae: float
+ mean_alignment_rmse: float
+ oracle_preference_rate: float
+ accounting: ArtifactAccounting
+ bidirectional_context: BidirectionalContextStats | None = None
+
+ @property
+ def bits_per_byte(self) -> float:
+ return self.train_bits_per_byte
+
+
+class OracleAnalysisAdapter:
+ def __init__(
+ self,
+ config: OracleAnalysisConfig | None = None,
+ *,
+ artifact_name: str = "oracle_analysis",
+ metadata: ArtifactMetadata | None = None,
+ ):
+ self.config = config or OracleAnalysisConfig()
+ hierarchical = self.config.model.hierarchical
+ self.train_mode = self.config.train_mode
+ self.bidirectional_probe = (
+ BidirectionalContextProbe(self.config.bidirectional_context)
+ if self.config.bidirectional_context is not None
+ else None
+ )
+ self.substrate = HierarchicalSubstrate(hierarchical)
+ self.feature_view = HierarchicalFeatureView(hierarchical)
+ self.sampled_readout = SampledMultiscaleReadout(
+ SampledReadoutConfig(
+ state_dim=hierarchical.state_dim,
+ seed=hierarchical.seed + 31,
+ bands=(
+ SampledReadoutBandConfig(
+ name="fast",
+ start=0,
+ stop=hierarchical.fast_size,
+ sample_count=self.config.fast_sample_size,
+ include_mean=True,
+ include_energy=True,
+ include_drift=True,
+ ),
+ SampledReadoutBandConfig(
+ name="mid",
+ start=hierarchical.fast_size,
+ stop=hierarchical.fast_size + hierarchical.mid_size,
+ sample_count=self.config.mid_sample_size,
+ include_mean=True,
+ include_energy=True,
+ include_drift=True,
+ ),
+ SampledReadoutBandConfig(
+ name="slow",
+ start=hierarchical.fast_size + hierarchical.mid_size,
+ stop=hierarchical.state_dim,
+ sample_count=self.config.slow_sample_size,
+ include_mean=True,
+ include_energy=True,
+ include_drift=True,
+ ),
+ ),
+ )
+ )
+ feature_dim = self.feature_view.feature_dim + self.sampled_readout.feature_dim
+ projection_weights = np.linspace(1.0, 0.25, num=feature_dim, dtype=np.float64)
+ self.router = SummaryRouter(
+ RoutingConfig(
+ mode="projection",
+ projection_weights=tuple(float(value) for value in projection_weights),
+ route_biases=(0.0, self.config.route_oracle_bias),
+ temperature=self.config.route_temperature,
+ )
+ )
+ self.artifact_name = artifact_name
+ self.metadata = metadata or ArtifactMetadata()
+ self._last_fit_accounting = make_artifact_accounting(
+ self.artifact_name,
+ 0,
+ 0,
+ metadata=self.metadata,
+ tokens=0,
+ comparisons=0,
+ oracle_selected=0,
+ )
+
+ @staticmethod
+ def _coerce_sequences(
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> tuple[np.ndarray, ...]:
+ if isinstance(data, (str, bytes, bytearray, memoryview, np.ndarray)):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence) and data and all(isinstance(item, int) for item in data):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence):
+ return tuple(ensure_tokens(item) for item in data)
+ return (ensure_tokens(data),)
+
+ def _scan_states(self, tokens: np.ndarray) -> list[np.ndarray]:
+ state = self.substrate.initial_state()
+ states = [state.copy()]
+ for token in tokens:
+ state = self.substrate.step(state, int(token))
+ states.append(state.copy())
+ return states
+
+ def _encode_state(self, state: np.ndarray, previous_state: np.ndarray | None) -> np.ndarray:
+ previous = previous_state if self.train_mode.uses_through_state else None
+ return np.concatenate(
+ [
+ self.feature_view.encode(state, previous_state=previous),
+ self.sampled_readout.encode(state, previous_state=previous),
+ ]
+ )
+
+ @staticmethod
+ def _combine_bidirectional_stats(stats: Sequence[BidirectionalContextStats]) -> BidirectionalContextStats | None:
+ if not stats:
+ return None
+ candidate_sizes = tuple(size for stat in stats for size in stat.candidate_sizes)
+ neighborhoods = tuple(neighborhood for stat in stats for neighborhood in stat.neighborhoods)
+ sequence_length = sum(stat.sequence_length for stat in stats)
+ neighborhood_count = sum(stat.neighborhood_count for stat in stats)
+ left_context_count = sum(stat.left_context_count for stat in stats)
+ right_context_count = sum(stat.right_context_count for stat in stats)
+ pair_context_count = sum(stat.pair_context_count for stat in stats)
+ return BidirectionalContextStats(
+ sequence_length=sequence_length,
+ neighborhood_count=neighborhood_count,
+ left_context_count=left_context_count,
+ right_context_count=right_context_count,
+ pair_context_count=pair_context_count,
+ deterministic_fraction=float(np.mean([stat.deterministic_fraction for stat in stats])),
+ candidate_le_2_rate=float(np.mean([stat.candidate_le_2_rate for stat in stats])),
+ candidate_le_4_rate=float(np.mean([stat.candidate_le_4_rate for stat in stats])),
+ candidate_le_8_rate=float(np.mean([stat.candidate_le_8_rate for stat in stats])),
+ mean_candidate_size=float(np.mean([stat.mean_candidate_size for stat in stats])),
+ median_candidate_size=float(np.median(candidate_sizes)) if candidate_sizes else 0.0,
+ max_candidate_size=max((stat.max_candidate_size for stat in stats), default=0),
+ mean_left_support=float(np.mean([stat.mean_left_support for stat in stats])),
+ mean_right_support=float(np.mean([stat.mean_right_support for stat in stats])),
+ mean_pair_support=float(np.mean([stat.mean_pair_support for stat in stats])),
+ candidate_sizes=candidate_sizes,
+ neighborhoods=neighborhoods,
+ )
+
+ def _make_accounting(
+ self,
+ tokens: np.ndarray,
+ points: Sequence[OracleAnalysisPoint],
+ *,
+ checkpoint_values: Sequence[int],
+ ) -> ArtifactAccounting:
+ oracle_spans = [
+ make_replay_span(
+ checkpoint - 1,
+ checkpoint,
+ label="oracle",
+ checkpoint=int(checkpoint),
+ route_bits_per_byte=float(point.route_bits_per_byte),
+ selected_route=point.selected_route,
+ )
+ for checkpoint, point in zip(checkpoint_values, points)
+ if point.selected_route == "oracle"
+ ]
+ oracle_selected = sum(int(point.selected_route == "oracle") for point in points)
+ return make_artifact_accounting(
+ self.artifact_name,
+ int(tokens.size),
+ oracle_selected,
+ replay_spans=tuple(oracle_spans),
+ metadata=self.metadata,
+ tokens=int(tokens.size),
+ comparisons=len(points),
+ oracle_selected=oracle_selected,
+ )
+
+ def compare(
+ self,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ ) -> OracleAnalysisReport:
+ tokens = ensure_tokens(sequence)
+ if tokens.size < 2:
+ raise ValueError("sequence must contain at least two tokens")
+
+ total_steps = int(tokens.size)
+ checkpoints = self.train_mode.resolve_rollout_checkpoints(total_steps)
+ forward_states = self._scan_states(tokens)
+ reverse_states = self._scan_states(tokens[::-1])
+ bidirectional_context = self.bidirectional_probe.scan(tokens) if self.bidirectional_probe is not None else None
+
+ points: list[OracleAnalysisPoint] = []
+ pearsons: list[float] = []
+ cosines: list[float] = []
+ maes: list[float] = []
+ rmses: list[float] = []
+ route_bits: list[float] = []
+ oracle_selected = 0
+
+ for checkpoint in checkpoints:
+ suffix_len = total_steps - checkpoint
+ causal_state = forward_states[checkpoint]
+ causal_prev = forward_states[checkpoint - 1] if checkpoint > 0 else None
+ oracle_state = reverse_states[suffix_len]
+ oracle_prev = reverse_states[suffix_len - 1] if suffix_len > 0 else None
+
+ causal_feature = self._encode_state(causal_state, causal_prev)
+ oracle_feature = self._encode_state(oracle_state, oracle_prev)
+ decision = self.router.route(
+ (
+ ControllerSummary(causal_feature, name="causal"),
+ ControllerSummary(oracle_feature, name="oracle"),
+ ),
+ names=("causal", "oracle"),
+ )
+ alignment_pearson, alignment_cosine, alignment_mae, alignment_rmse = _alignment_metrics(
+ causal_feature,
+ oracle_feature,
+ )
+ route_bits_per_byte = bits_per_byte_from_probabilities(
+ decision.weights[None, :],
+ np.asarray([decision.selected_index], dtype=np.int64),
+ )
+ selected_route = decision.route_names[decision.selected_index]
+ oracle_selected += int(selected_route == "oracle")
+
+ points.append(
+ OracleAnalysisPoint(
+ checkpoint=checkpoint,
+ slow_update_active=self.train_mode.should_update_slow(max(checkpoint - 1, 0)),
+ route_names=decision.route_names,
+ route_weights=decision.weights.copy(),
+ selected_route=selected_route,
+ alignment_pearson=alignment_pearson,
+ alignment_cosine=alignment_cosine,
+ alignment_mae=alignment_mae,
+ alignment_rmse=alignment_rmse,
+ route_bits_per_byte=route_bits_per_byte,
+ )
+ )
+ pearsons.append(alignment_pearson)
+ cosines.append(alignment_cosine)
+ maes.append(alignment_mae)
+ rmses.append(alignment_rmse)
+ route_bits.append(route_bits_per_byte)
+
+ return OracleAnalysisReport(
+ tokens=total_steps,
+ checkpoints=checkpoints,
+ points=tuple(points),
+ mean_alignment_pearson=float(np.mean(pearsons)),
+ mean_alignment_cosine=float(np.mean(cosines)),
+ mean_alignment_mae=float(np.mean(maes)),
+ mean_alignment_rmse=float(np.mean(rmses)),
+ mean_route_bits_per_byte=float(np.mean(route_bits)),
+ oracle_preference_rate=float(oracle_selected / max(len(points), 1)),
+ bidirectional_context=bidirectional_context,
+ accounting=self._make_accounting(tokens, points, checkpoint_values=checkpoints),
+ )
+
+ analyze = compare
+
+ def score(
+ self,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ ) -> OracleAnalysisReport:
+ return self.compare(sequence)
+
+ def fit(
+ self,
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> OracleAnalysisFitReport:
+ sequences = self._coerce_sequences(data)
+ total_tokens = 0
+ total_points = 0
+ weighted_bits = 0.0
+ alignment_pearsons: list[float] = []
+ alignment_cosines: list[float] = []
+ alignment_maes: list[float] = []
+ alignment_rmses: list[float] = []
+ oracle_selected = 0
+ bidirectional_contexts: list[BidirectionalContextStats] = []
+ artifact_bytes = 0
+ replay_bytes = 0
+ replay_spans = []
+ offset = 0
+
+ for sequence in sequences:
+ tokens = ensure_tokens(sequence)
+ report = self.compare(tokens)
+ total_tokens += int(tokens.size)
+ total_points += len(report.points)
+ weighted_bits += report.bits_per_byte * len(report.points)
+ alignment_pearsons.append(report.mean_alignment_pearson)
+ alignment_cosines.append(report.mean_alignment_cosine)
+ alignment_maes.append(report.mean_alignment_mae)
+ alignment_rmses.append(report.mean_alignment_rmse)
+ oracle_selected += sum(int(point.selected_route == "oracle") for point in report.points)
+ if report.bidirectional_context is not None:
+ bidirectional_contexts.append(report.bidirectional_context)
+
+ accounting = report.accounting
+ artifact_bytes += accounting.artifact_bytes
+ replay_bytes += accounting.replay_bytes
+ replay_spans.extend(
+ make_replay_span(
+ span.start + offset,
+ span.stop + offset,
+ label=span.label,
+ metadata=span.metadata,
+ )
+ for span in accounting.replay_spans
+ )
+ offset += int(tokens.size)
+
+ mean_bits = 0.0 if total_points == 0 else weighted_bits / float(total_points)
+ fit_accounting = make_artifact_accounting(
+ self.artifact_name,
+ artifact_bytes,
+ replay_bytes,
+ replay_spans=tuple(replay_spans),
+ metadata=self.metadata,
+ tokens=total_tokens,
+ comparisons=total_points,
+ oracle_selected=oracle_selected,
+ )
+ self._last_fit_accounting = fit_accounting
+ return OracleAnalysisFitReport(
+ sequences=len(sequences),
+ tokens=total_tokens,
+ train_bits_per_byte=mean_bits,
+ mean_alignment_pearson=float(np.mean(alignment_pearsons)) if alignment_pearsons else 0.0,
+ mean_alignment_cosine=float(np.mean(alignment_cosines)) if alignment_cosines else 0.0,
+ mean_alignment_mae=float(np.mean(alignment_maes)) if alignment_maes else 0.0,
+ mean_alignment_rmse=float(np.mean(alignment_rmses)) if alignment_rmses else 0.0,
+ oracle_preference_rate=0.0 if total_points == 0 else oracle_selected / float(total_points),
+ bidirectional_context=self._combine_bidirectional_stats(bidirectional_contexts),
+ accounting=fit_accounting,
+ )
+
+ def accounting(
+ self,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | None = None,
+ ) -> ArtifactAccounting:
+ if sequence is None:
+ return self._last_fit_accounting
+ tokens = ensure_tokens(sequence)
+ if tokens.size == 0:
+ return make_artifact_accounting(
+ self.artifact_name,
+ 0,
+ 0,
+ metadata=self.metadata,
+ tokens=0,
+ comparisons=0,
+ oracle_selected=0,
+ )
+ return self.compare(tokens).accounting
+
+
+__all__ = [
+ "OracleAnalysisAdapter",
+ "OracleAnalysisConfig",
+ "OracleAnalysisFitReport",
+ "OracleAnalysisPoint",
+ "OracleAnalysisReport",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/oscillatory_memory.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/oscillatory_memory.py
new file mode 100644
index 0000000000..3c5c2acf1e
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/oscillatory_memory.py
@@ -0,0 +1,124 @@
+from __future__ import annotations
+
+import numpy as np
+
+from .config import OscillatoryMemoryConfig
+
+
+def _normalize_rows(matrix: np.ndarray) -> np.ndarray:
+ matrix = np.asarray(matrix, dtype=np.float64)
+ norms = np.linalg.norm(matrix, axis=1, keepdims=True)
+ norms = np.where(norms == 0.0, 1.0, norms)
+ return matrix / norms
+
+
+def _orthogonal_matrix(rng: np.random.Generator, size: int) -> np.ndarray:
+ matrix = rng.normal(loc=0.0, scale=1.0, size=(size, size))
+ q, r = np.linalg.qr(matrix)
+ signs = np.sign(np.diag(r))
+ signs[signs == 0.0] = 1.0
+ q = q * signs
+ return q.astype(np.float64, copy=False)
+
+
+class OscillatoryMemorySubstrate:
+ def __init__(self, config: OscillatoryMemoryConfig | None = None):
+ self.config = config or OscillatoryMemoryConfig()
+ rng = np.random.default_rng(self.config.seed)
+
+ token_embeddings = rng.normal(
+ loc=0.0,
+ scale=self.config.input_scale,
+ size=(self.config.vocabulary_size, self.config.embedding_dim),
+ ).astype(np.float64)
+ self._token_embeddings = _normalize_rows(token_embeddings)
+
+ self._decay_input = np.stack(
+ [_orthogonal_matrix(rng, self.config.embedding_dim) for _ in range(self.config.decay_bank_count)],
+ axis=0,
+ )
+
+ self._osc_input_cos = np.stack(
+ [_orthogonal_matrix(rng, self.config.embedding_dim) for _ in range(self.config.oscillatory_bank_count)],
+ axis=0,
+ )
+ self._osc_input_sin = np.stack(
+ [_orthogonal_matrix(rng, self.config.embedding_dim) for _ in range(self.config.oscillatory_bank_count)],
+ axis=0,
+ )
+
+ low_damping, high_damping = self.config.oscillatory_damping_range
+ low_period, high_period = self.config.oscillatory_period_range
+ if self.config.oscillatory_bank_count == 1:
+ damping = np.asarray([(low_damping + high_damping) * 0.5], dtype=np.float64)
+ periods = np.asarray([(low_period + high_period) * 0.5], dtype=np.float64)
+ else:
+ damping = np.linspace(low_damping, high_damping, num=self.config.oscillatory_bank_count, dtype=np.float64)
+ periods = np.geomspace(low_period, high_period, num=self.config.oscillatory_bank_count, dtype=np.float64)
+ self._osc_damping = damping
+ self._osc_cos = damping * np.cos((2.0 * np.pi) / periods)
+ self._osc_sin = damping * np.sin((2.0 * np.pi) / periods)
+
+ @property
+ def state_dim(self) -> int:
+ return self.config.state_dim
+
+ def initial_state(self) -> np.ndarray:
+ return np.zeros(self.state_dim, dtype=np.float64)
+
+ def _coerce_token(self, token: int) -> int:
+ index = int(token)
+ if index < 0 or index >= self.config.vocabulary_size:
+ raise ValueError(
+ f"token {index} is out of range for vocabulary_size={self.config.vocabulary_size}"
+ )
+ return index
+
+ def _split_state(self, state: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
+ state = np.asarray(state, dtype=np.float64)
+ if state.shape != (self.state_dim,):
+ raise ValueError("state has unexpected shape")
+ decay_width = self.config.decay_bank_count * self.config.embedding_dim
+ decay_state = state[:decay_width].reshape(self.config.decay_bank_count, self.config.embedding_dim)
+ oscillatory_state = state[decay_width:].reshape(
+ self.config.oscillatory_bank_count,
+ 2,
+ self.config.embedding_dim,
+ )
+ return decay_state, oscillatory_state
+
+ def step(self, state: np.ndarray, token: int) -> np.ndarray:
+ decay_state, oscillatory_state = self._split_state(state)
+ token_index = self._coerce_token(token)
+ token_vector = self._token_embeddings[token_index]
+
+ next_decay = np.empty_like(decay_state)
+ for index, rate in enumerate(self.config.decay_rates):
+ drive = self._decay_input[index] @ token_vector
+ next_decay[index] = (rate * decay_state[index]) + drive
+
+ next_oscillatory = np.empty_like(oscillatory_state)
+ for index in range(self.config.oscillatory_bank_count):
+ cos_state = oscillatory_state[index, 0]
+ sin_state = oscillatory_state[index, 1]
+ drive_cos = self._osc_input_cos[index] @ token_vector
+ drive_sin = self._osc_input_sin[index] @ token_vector
+
+ next_oscillatory[index, 0] = (
+ self._osc_cos[index] * cos_state
+ - self._osc_sin[index] * sin_state
+ + drive_cos
+ )
+ next_oscillatory[index, 1] = (
+ self._osc_sin[index] * cos_state
+ + self._osc_cos[index] * sin_state
+ + drive_sin
+ )
+
+ return np.concatenate([next_decay.reshape(-1), next_oscillatory.reshape(-1)])
+
+
+__all__ = [
+ "OscillatoryMemoryConfig",
+ "OscillatoryMemorySubstrate",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/patch_latent_blocks.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/patch_latent_blocks.py
new file mode 100644
index 0000000000..a4f986667f
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/patch_latent_blocks.py
@@ -0,0 +1,394 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal
+
+import numpy as np
+
+
+def _as_float_array(values: np.ndarray | list[float] | tuple[float, ...]) -> np.ndarray:
+ return np.asarray(values, dtype=np.float64)
+
+
+def _coerce_tokens(
+ tokens: str | bytes | bytearray | memoryview | np.ndarray | list[int] | tuple[int, ...],
+) -> np.ndarray:
+ if isinstance(tokens, str):
+ return np.frombuffer(tokens.encode("utf-8"), dtype=np.uint8)
+ if isinstance(tokens, (bytes, bytearray, memoryview)):
+ return np.frombuffer(bytes(tokens), dtype=np.uint8)
+ array = np.asarray(tokens)
+ if array.ndim != 1:
+ raise ValueError("tokens must be rank-1")
+ if not np.issubdtype(array.dtype, np.integer):
+ raise TypeError("tokens must contain integers")
+ return array.astype(np.uint8, copy=False)
+
+
+def _coerce_matrix(values: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...]) -> np.ndarray:
+ array = np.asarray(values, dtype=np.float64)
+ if array.ndim != 2:
+ raise ValueError("matrix values must be rank-2")
+ return array
+
+
+def _scaled_matrix(rng: np.random.Generator, rows: int, cols: int, scale: float) -> np.ndarray:
+ if rows < 1 or cols < 1:
+ raise ValueError("matrix dimensions must be >= 1")
+ matrix = rng.normal(loc=0.0, scale=1.0, size=(rows, cols))
+ return (matrix / np.sqrt(max(cols, 1))) * scale
+
+
+def _coerce_2d(values: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...]) -> np.ndarray:
+ array = np.asarray(values, dtype=np.float64)
+ if array.ndim == 1:
+ return array[None, :]
+ if array.ndim != 2:
+ raise ValueError("values must be rank-1 or rank-2")
+ return array
+
+
+@dataclass(frozen=True)
+class LocalByteEncoderConfig:
+ vocabulary_size: int = 256
+ local_dim: int = 32
+ state_dim: int = 32
+ output_dim: int | None = None
+ embedding_scale: float = 1.0
+ input_scale: float = 1.0
+ recurrent_scale: float = 0.7
+ output_scale: float = 1.0
+ output_l2: float = 1e-4
+ seed: int = 7
+
+ def __post_init__(self) -> None:
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if self.local_dim < 1:
+ raise ValueError("local_dim must be >= 1")
+ if self.state_dim < 1:
+ raise ValueError("state_dim must be >= 1")
+ if self.output_dim is None:
+ object.__setattr__(self, "output_dim", self.local_dim)
+ if self.output_dim is None or self.output_dim < 1:
+ raise ValueError("output_dim must be >= 1")
+ if self.embedding_scale <= 0.0:
+ raise ValueError("embedding_scale must be > 0")
+ if self.input_scale <= 0.0:
+ raise ValueError("input_scale must be > 0")
+ if self.recurrent_scale <= 0.0:
+ raise ValueError("recurrent_scale must be > 0")
+ if self.output_scale <= 0.0:
+ raise ValueError("output_scale must be > 0")
+ if self.output_l2 < 0.0:
+ raise ValueError("output_l2 must be >= 0")
+
+ @property
+ def feature_dim(self) -> int:
+ return int(self.output_dim)
+
+
+@dataclass(frozen=True)
+class PatchPoolerConfig:
+ mode: Literal["mean", "last", "mix"] = "mean"
+ mix_weight: float = 0.5
+
+ def __post_init__(self) -> None:
+ if self.mode not in {"mean", "last", "mix"}:
+ raise ValueError("mode must be one of mean, last, mix")
+ if not 0.0 <= self.mix_weight <= 1.0:
+ raise ValueError("mix_weight must be between 0 and 1")
+
+
+@dataclass(frozen=True)
+class GlobalLocalBridgeConfig:
+ global_dim: int
+ latent_dim: int
+ local_dim: int
+ learning_rate: float = 0.05
+ l2: float = 1e-4
+ seed: int = 11
+ use_bias: bool = True
+
+ def __post_init__(self) -> None:
+ if self.global_dim < 0:
+ raise ValueError("global_dim must be >= 0")
+ if self.latent_dim < 0:
+ raise ValueError("latent_dim must be >= 0")
+ if self.local_dim < 1:
+ raise ValueError("local_dim must be >= 1")
+ if self.learning_rate <= 0.0:
+ raise ValueError("learning_rate must be > 0")
+ if self.l2 < 0.0:
+ raise ValueError("l2 must be >= 0")
+
+ @property
+ def input_dim(self) -> int:
+ return self.global_dim + self.latent_dim
+
+
+class LocalByteEncoder:
+ def __init__(self, config: LocalByteEncoderConfig | None = None):
+ self.config = config or LocalByteEncoderConfig()
+ rng = np.random.default_rng(self.config.seed)
+ self.embedding = _scaled_matrix(
+ rng,
+ rows=self.config.vocabulary_size,
+ cols=self.config.local_dim,
+ scale=self.config.embedding_scale,
+ )
+ self.input_weights = _scaled_matrix(
+ rng,
+ rows=self.config.state_dim,
+ cols=self.config.local_dim,
+ scale=self.config.input_scale,
+ )
+ self.recurrent_weights = _scaled_matrix(
+ rng,
+ rows=self.config.state_dim,
+ cols=self.config.state_dim,
+ scale=self.config.recurrent_scale,
+ )
+ self.output_weights = _scaled_matrix(
+ rng,
+ rows=self.config.output_dim,
+ cols=self.config.state_dim,
+ scale=self.config.output_scale,
+ )
+ self.state_bias = np.zeros(self.config.state_dim, dtype=np.float64)
+ self.output_bias = np.zeros(self.config.output_dim, dtype=np.float64)
+
+ @property
+ def feature_dim(self) -> int:
+ return self.config.feature_dim
+
+ @property
+ def state_dim(self) -> int:
+ return self.config.state_dim
+
+ def initial_state(self) -> np.ndarray:
+ return np.zeros(self.config.state_dim, dtype=np.float64)
+
+ def _step_hidden(self, token: int, state: np.ndarray | None = None) -> tuple[np.ndarray, np.ndarray]:
+ token = int(token)
+ if token < 0 or token >= self.config.vocabulary_size:
+ raise ValueError("token must lie inside the vocabulary")
+ current_state = self.initial_state() if state is None else np.asarray(state, dtype=np.float64)
+ if current_state.shape != (self.config.state_dim,):
+ raise ValueError("state does not match configured state_dim")
+ token_vector = self.embedding[token]
+ next_state = np.tanh(
+ self.recurrent_weights @ current_state
+ + self.input_weights @ token_vector
+ + self.state_bias
+ )
+ return next_state, current_state
+
+ def step(self, token: int, state: np.ndarray | None = None) -> tuple[np.ndarray, np.ndarray]:
+ next_state, _ = self._step_hidden(token, state)
+ local_features = np.tanh(self.output_weights @ next_state + self.output_bias)
+ return local_features, next_state
+
+ def hidden_states(
+ self,
+ tokens: str | bytes | bytearray | memoryview | np.ndarray | list[int] | tuple[int, ...],
+ *,
+ initial_state: np.ndarray | None = None,
+ ) -> tuple[np.ndarray, np.ndarray]:
+ token_array = _coerce_tokens(tokens)
+ state = self.initial_state() if initial_state is None else np.asarray(initial_state, dtype=np.float64)
+ if state.shape != (self.config.state_dim,):
+ raise ValueError("initial_state does not match configured state_dim")
+ hidden: list[np.ndarray] = []
+ for token in token_array:
+ state, _ = self._step_hidden(int(token), state)
+ hidden.append(state)
+ if not hidden:
+ return np.zeros((0, self.state_dim), dtype=np.float64), state.copy()
+ return np.vstack(hidden), state.copy()
+
+ def encode(
+ self,
+ tokens: str | bytes | bytearray | memoryview | np.ndarray | list[int] | tuple[int, ...],
+ *,
+ initial_state: np.ndarray | None = None,
+ ) -> tuple[np.ndarray, np.ndarray]:
+ hidden, state = self.hidden_states(tokens, initial_state=initial_state)
+ if hidden.shape[0] == 0:
+ return np.zeros((0, self.feature_dim), dtype=np.float64), state.copy()
+ features = np.tanh(hidden @ self.output_weights.T + self.output_bias)
+ return features, state.copy()
+
+ def output_error(
+ self,
+ hidden_states: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...],
+ targets: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...],
+ ) -> float:
+ hidden = _coerce_2d(hidden_states)
+ target_array = _coerce_2d(targets)
+ if hidden.shape[1] != self.state_dim:
+ raise ValueError("hidden_states do not match configured state_dim")
+ if target_array.shape != (hidden.shape[0], self.feature_dim):
+ raise ValueError("targets do not match hidden_states or feature_dim")
+ predicted = np.tanh(hidden @ self.output_weights.T + self.output_bias)
+ return float(np.mean(np.square(predicted - target_array)))
+
+ def fit_output(
+ self,
+ hidden_states: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...],
+ targets: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...],
+ ) -> float:
+ hidden = _coerce_2d(hidden_states)
+ target_array = _coerce_2d(targets)
+ if hidden.shape[1] != self.state_dim:
+ raise ValueError("hidden_states do not match configured state_dim")
+ if target_array.shape != (hidden.shape[0], self.feature_dim):
+ raise ValueError("targets do not match hidden_states or feature_dim")
+ clipped = np.clip(target_array, -0.999999, 0.999999)
+ preactivation = np.arctanh(clipped)
+ design = np.concatenate([hidden, np.ones((hidden.shape[0], 1), dtype=np.float64)], axis=1)
+ penalty = self.config.output_l2 * np.eye(design.shape[1], dtype=np.float64)
+ penalty[-1, -1] = 0.0
+ solution = np.linalg.solve(design.T @ design + penalty, design.T @ preactivation)
+ self.output_weights = solution[:-1].T
+ self.output_bias = solution[-1]
+ return self.output_error(hidden, target_array)
+
+
+class PatchPooler:
+ def __init__(self, config: PatchPoolerConfig | None = None):
+ self.config = config or PatchPoolerConfig()
+
+ def pool(self, block: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...]) -> np.ndarray:
+ array = _coerce_matrix(block)
+ if array.shape[0] == 0:
+ raise ValueError("block must contain at least one row")
+ if self.config.mode == "mean":
+ return np.mean(array, axis=0)
+ if self.config.mode == "last":
+ return array[-1].copy()
+ mean = np.mean(array, axis=0)
+ last = array[-1]
+ return ((1.0 - self.config.mix_weight) * mean) + (self.config.mix_weight * last)
+
+
+class GlobalLocalBridge:
+ def __init__(self, config: GlobalLocalBridgeConfig | None = None):
+ self.config = config or GlobalLocalBridgeConfig(global_dim=16, latent_dim=16, local_dim=16)
+ rng = np.random.default_rng(self.config.seed)
+ self.weights = _scaled_matrix(
+ rng,
+ rows=self.config.input_dim,
+ cols=self.config.local_dim,
+ scale=0.1,
+ )
+ self.bias = np.zeros(self.config.local_dim, dtype=np.float64)
+
+ @property
+ def input_dim(self) -> int:
+ return self.config.input_dim
+
+ @property
+ def output_dim(self) -> int:
+ return self.config.local_dim
+
+ def stack_state(
+ self,
+ global_state: np.ndarray | list[float] | tuple[float, ...],
+ latent_state: np.ndarray | list[float] | tuple[float, ...],
+ ) -> np.ndarray:
+ global_array = _as_float_array(global_state)
+ latent_array = _as_float_array(latent_state)
+ if global_array.shape != (self.config.global_dim,):
+ raise ValueError("global_state does not match configured global_dim")
+ if latent_array.shape != (self.config.latent_dim,):
+ raise ValueError("latent_state does not match configured latent_dim")
+ return np.concatenate([global_array, latent_array])
+
+ def predict_batch(self, inputs: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...]) -> np.ndarray:
+ array = _coerce_2d(inputs)
+ if array.shape[1] != self.config.input_dim:
+ raise ValueError("inputs do not match configured input_dim")
+ return array @ self.weights + self.bias
+
+ def predict(
+ self,
+ global_state: np.ndarray | list[float] | tuple[float, ...],
+ latent_state: np.ndarray | list[float] | tuple[float, ...],
+ ) -> np.ndarray:
+ return self.predict_batch(self.stack_state(global_state, latent_state)[None, :])[0]
+
+ def reconstruction_error(
+ self,
+ inputs: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...],
+ targets: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...],
+ ) -> float:
+ predicted = self.predict_batch(inputs)
+ target_array = _coerce_2d(targets)
+ if target_array.shape != predicted.shape:
+ raise ValueError("targets must match predicted shape")
+ return float(np.mean(np.square(predicted - target_array)))
+
+ def fit(
+ self,
+ inputs: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...],
+ targets: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...],
+ ) -> float:
+ x = _coerce_2d(inputs)
+ y = _coerce_2d(targets)
+ if x.shape[1] != self.config.input_dim:
+ raise ValueError("inputs do not match configured input_dim")
+ if y.shape[1] != self.config.local_dim:
+ raise ValueError("targets do not match configured local_dim")
+ if x.shape[0] != y.shape[0]:
+ raise ValueError("inputs and targets must have the same number of rows")
+
+ if self.config.use_bias:
+ design = np.concatenate([x, np.ones((x.shape[0], 1), dtype=np.float64)], axis=1)
+ penalty = self.config.l2 * np.eye(design.shape[1], dtype=np.float64)
+ penalty[-1, -1] = 0.0
+ solution = np.linalg.solve(design.T @ design + penalty, design.T @ y)
+ self.weights = solution[:-1]
+ self.bias = solution[-1]
+ else:
+ penalty = self.config.l2 * np.eye(x.shape[1], dtype=np.float64)
+ self.weights = np.linalg.solve(x.T @ x + penalty, x.T @ y)
+ self.bias = np.zeros(self.config.local_dim, dtype=np.float64)
+ return self.reconstruction_error(x, y)
+
+ def update(
+ self,
+ inputs: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...],
+ targets: np.ndarray | list[list[float]] | tuple[tuple[float, ...], ...],
+ *,
+ steps: int = 1,
+ ) -> float:
+ if steps < 1:
+ raise ValueError("steps must be >= 1")
+ x = _coerce_2d(inputs)
+ y = _coerce_2d(targets)
+ if x.shape != (y.shape[0], self.config.input_dim):
+ raise ValueError("inputs do not match configured input_dim")
+ if y.shape[1] != self.config.local_dim:
+ raise ValueError("targets do not match configured local_dim")
+ if x.shape[0] != y.shape[0]:
+ raise ValueError("inputs and targets must have the same number of rows")
+
+ rate = self.config.learning_rate
+ for _ in range(steps):
+ predicted = x @ self.weights + self.bias
+ error = predicted - y
+ self.weights -= rate * ((x.T @ error) / x.shape[0] + (self.config.l2 * self.weights))
+ if self.config.use_bias:
+ self.bias -= rate * np.mean(error, axis=0)
+ return self.reconstruction_error(x, y)
+
+
+__all__ = [
+ "GlobalLocalBridge",
+ "GlobalLocalBridgeConfig",
+ "LocalByteEncoder",
+ "LocalByteEncoderConfig",
+ "PatchPooler",
+ "PatchPoolerConfig",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/patching.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/patching.py
new file mode 100644
index 0000000000..47ea004432
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/patching.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from .config import SegmenterConfig
+
+
+@dataclass(frozen=True)
+class SegmentStats:
+ total_steps: int
+ total_patches: int
+ mean_patch_size: float
+ compression_ratio: float
+
+
+class AdaptiveSegmenter:
+ def __init__(self, config: SegmenterConfig):
+ self.config = config
+
+ def should_commit(self, patch_length: int, novelty: float) -> bool:
+ if patch_length >= self.config.max_patch_size:
+ return True
+ if patch_length < self.config.min_patch_size:
+ return False
+ if self.config.mode == "fixed":
+ return patch_length >= self.config.patch_size
+ return patch_length >= self.config.patch_size or novelty >= self.config.novelty_threshold
+
+ @staticmethod
+ def summarize(total_steps: int, total_patches: int) -> SegmentStats:
+ patches = max(total_patches, 1)
+ return SegmentStats(
+ total_steps=total_steps,
+ total_patches=total_patches,
+ mean_patch_size=total_steps / patches,
+ compression_ratio=total_steps / patches,
+ )
+
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/predictive_surprise.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/predictive_surprise.py
new file mode 100644
index 0000000000..203bd339c7
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/predictive_surprise.py
@@ -0,0 +1,124 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Literal
+
+import numpy as np
+
+from .control import ControllerSummary, ControllerSummaryConfig
+
+SummaryMode = Literal["residual", "surprise"]
+
+
+@dataclass(frozen=True)
+class PredictiveSurpriseConfig:
+ summary: ControllerSummaryConfig = field(
+ default_factory=lambda: ControllerSummaryConfig(reduction="mean_abs", normalize=False)
+ )
+ feature_mode: SummaryMode = "surprise"
+ eps: float = 1e-8
+
+ def __post_init__(self) -> None:
+ if self.eps <= 0.0:
+ raise ValueError("eps must be > 0")
+
+
+@dataclass(frozen=True)
+class PredictionState:
+ predicted: np.ndarray
+ actual: np.ndarray
+ residual: np.ndarray
+ surprise: np.ndarray
+ summary: ControllerSummary
+ step: int | None = None
+
+ @property
+ def surprise_score(self) -> float:
+ return float(np.mean(self.surprise))
+
+ @property
+ def residual_score(self) -> float:
+ return float(np.mean(np.abs(self.residual)))
+
+
+class PredictiveSurpriseController:
+ def __init__(self, config: PredictiveSurpriseConfig | None = None):
+ self.config = config or PredictiveSurpriseConfig()
+
+ @staticmethod
+ def _coerce_vector(signal: np.ndarray | float | list[float] | tuple[float, ...]) -> np.ndarray:
+ array = np.asarray(signal, dtype=np.float64).reshape(-1)
+ if array.size < 1:
+ raise ValueError("signal must contain at least one value")
+ return array
+
+ def _summary_from_signal(self, signal: np.ndarray, *, name: str | None = None) -> ControllerSummary:
+ signal = np.asarray(signal, dtype=np.float64).reshape(-1)
+ summary = np.asarray(
+ [
+ float(np.mean(np.abs(signal))),
+ float(np.max(np.abs(signal))),
+ ],
+ dtype=np.float64,
+ )
+ if self.config.summary.normalize:
+ norm = float(np.linalg.norm(summary))
+ if norm > self.config.summary.eps:
+ summary = summary / norm
+ return ControllerSummary(values=summary, name=name)
+
+ def observe(
+ self,
+ predicted: np.ndarray | float | list[float] | tuple[float, ...],
+ actual: np.ndarray | float | list[float] | tuple[float, ...],
+ *,
+ step: int | None = None,
+ name: str | None = None,
+ ) -> PredictionState:
+ predicted_vector = self._coerce_vector(predicted)
+ actual_vector = self._coerce_vector(actual)
+ if predicted_vector.shape != actual_vector.shape:
+ raise ValueError("predicted and actual must share the same shape")
+
+ residual = actual_vector - predicted_vector
+ surprise = np.abs(residual)
+ summary_signal = residual if self.config.feature_mode == "residual" else surprise
+ summary = self._summary_from_signal(summary_signal, name=name)
+ return PredictionState(
+ predicted=predicted_vector,
+ actual=actual_vector,
+ residual=residual,
+ surprise=surprise,
+ summary=summary,
+ step=step,
+ )
+
+ def feature_vector(self, state: PredictionState) -> np.ndarray:
+ return np.concatenate(
+ [
+ np.asarray(
+ [
+ float(np.mean(state.predicted)),
+ float(np.mean(state.actual)),
+ float(np.mean(state.residual)),
+ float(np.mean(np.abs(state.residual))),
+ float(np.mean(np.square(state.residual))),
+ float(np.mean(np.square(state.surprise))),
+ ],
+ dtype=np.float64,
+ ),
+ state.summary.values,
+ ]
+ )
+
+ @property
+ def feature_dim(self) -> int:
+ return 8
+
+
+__all__ = [
+ "PredictionState",
+ "PredictiveSurpriseConfig",
+ "PredictiveSurpriseController",
+ "SummaryMode",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/presets.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/presets.py
new file mode 100644
index 0000000000..0f11b6110a
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/presets.py
@@ -0,0 +1,76 @@
+from __future__ import annotations
+
+from .config import (
+ DelayLineConfig,
+ HierarchicalSubstrateConfig,
+ LatentConfig,
+ MixedMemoryConfig,
+ OpenPredictiveCoderConfig,
+ ReservoirConfig,
+ SegmenterConfig,
+)
+
+
+def echo_state_small() -> OpenPredictiveCoderConfig:
+ return OpenPredictiveCoderConfig(
+ substrate_kind="echo_state",
+ segmenter=SegmenterConfig(mode="adaptive", patch_size=4, min_patch_size=2, max_patch_size=8, novelty_threshold=0.08),
+ reservoir=ReservoirConfig(size=96, connectivity=0.12, spectral_radius=0.9, leak=0.35, seed=11),
+ latent=LatentConfig(latent_dim=24, global_dim=24, reservoir_features=24, readout_l2=1e-5),
+ )
+
+
+def delay_small() -> OpenPredictiveCoderConfig:
+ return OpenPredictiveCoderConfig(
+ substrate_kind="delay",
+ segmenter=SegmenterConfig(mode="adaptive", patch_size=4, min_patch_size=2, max_patch_size=8, novelty_threshold=0.08),
+ delay=DelayLineConfig(history_length=12, embedding_dim=16, vocabulary_size=256, input_scale=0.2, decay=0.95, seed=11),
+ latent=LatentConfig(latent_dim=12, global_dim=12, reservoir_features=12, readout_l2=1e-5),
+ )
+
+
+def mixed_memory_small() -> OpenPredictiveCoderConfig:
+ return OpenPredictiveCoderConfig(
+ substrate_kind="mixed_memory",
+ segmenter=SegmenterConfig(mode="adaptive", patch_size=4, min_patch_size=2, max_patch_size=8, novelty_threshold=0.08),
+ mixed_memory=MixedMemoryConfig(
+ reservoir=ReservoirConfig(size=64, connectivity=0.15, spectral_radius=0.9, leak=0.3, seed=13),
+ delay=DelayLineConfig(history_length=8, embedding_dim=8, vocabulary_size=256, input_scale=0.2, decay=0.95, seed=13),
+ ),
+ latent=LatentConfig(latent_dim=16, global_dim=16, reservoir_features=16, readout_l2=1e-5),
+ )
+
+
+def hierarchical_small() -> OpenPredictiveCoderConfig:
+ return OpenPredictiveCoderConfig(
+ substrate_kind="hierarchical",
+ segmenter=SegmenterConfig(mode="adaptive", patch_size=4, min_patch_size=2, max_patch_size=8, novelty_threshold=0.08),
+ hierarchical=HierarchicalSubstrateConfig(
+ fast_size=24,
+ mid_size=32,
+ slow_size=40,
+ vocabulary_size=256,
+ fast_connectivity=0.2,
+ mid_connectivity=0.12,
+ slow_connectivity=0.08,
+ fast_spectral_radius=0.8,
+ mid_spectral_radius=0.9,
+ slow_spectral_radius=0.95,
+ fast_leak=0.4,
+ mid_leak=0.3,
+ slow_leak=0.2,
+ input_scale=0.15,
+ upward_scale=0.08,
+ slow_update_stride=2,
+ seed=17,
+ ),
+ latent=LatentConfig(latent_dim=24, global_dim=24, reservoir_features=24, readout_l2=1e-5),
+ )
+
+
+__all__ = [
+ "delay_small",
+ "echo_state_small",
+ "hierarchical_small",
+ "mixed_memory_small",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/probability_diagnostics.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/probability_diagnostics.py
new file mode 100644
index 0000000000..e1a35c1f3f
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/probability_diagnostics.py
@@ -0,0 +1,196 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+
+
+@dataclass(frozen=True)
+class ProbabilityDiagnosticsConfig:
+ top_k: int = 4
+ epsilon: float = 1e-12
+
+ def __post_init__(self) -> None:
+ if self.top_k < 1:
+ raise ValueError("top_k must be >= 1")
+ if self.epsilon <= 0.0:
+ raise ValueError("epsilon must be > 0")
+
+
+@dataclass(frozen=True)
+class ProbabilityDiagnostics:
+ entropy: np.ndarray
+ peak: np.ndarray
+ top_k_mass: np.ndarray
+ overlap: np.ndarray
+ top1_agreement: np.ndarray
+ shared_top_k_mass: np.ndarray
+ top2_margin: np.ndarray
+
+ def as_dict(self) -> dict[str, np.ndarray]:
+ return {
+ "entropy": self.entropy,
+ "peak": self.peak,
+ "top_k_mass": self.top_k_mass,
+ "overlap": self.overlap,
+ "top1_agreement": self.top1_agreement,
+ "shared_top_k_mass": self.shared_top_k_mass,
+ "top2_margin": self.top2_margin,
+ }
+
+
+def _coerce_probabilities(probabilities: np.ndarray | list[float] | tuple[float, ...]) -> np.ndarray:
+ array = np.asarray(probabilities, dtype=np.float64)
+ if array.ndim < 1:
+ raise ValueError("probability arrays must have at least one dimension")
+ if np.any(array < 0.0):
+ raise ValueError("probabilities must be non-negative")
+ return array
+
+
+def _normalize_probabilities(probabilities: np.ndarray, *, epsilon: float) -> np.ndarray:
+ vocab_size = probabilities.shape[-1]
+ totals = np.sum(probabilities, axis=-1, keepdims=True)
+ return np.divide(
+ probabilities,
+ totals,
+ out=np.full_like(probabilities, 1.0 / float(vocab_size)),
+ where=totals > epsilon,
+ )
+
+
+def normalized_entropy(probabilities: np.ndarray | list[float] | tuple[float, ...], *, epsilon: float = 1e-12) -> np.ndarray:
+ array = _normalize_probabilities(_coerce_probabilities(probabilities), epsilon=epsilon)
+ vocab_size = array.shape[-1]
+ log_vocab = np.log(float(vocab_size))
+ entropy = -np.sum(array * np.log(array + epsilon), axis=-1)
+ return np.asarray(entropy / log_vocab if log_vocab > 0.0 else np.zeros_like(entropy), dtype=np.float64)
+
+
+def top1_peak(probabilities: np.ndarray | list[float] | tuple[float, ...]) -> np.ndarray:
+ array = _normalize_probabilities(_coerce_probabilities(probabilities), epsilon=1e-12)
+ return np.asarray(np.max(array, axis=-1), dtype=np.float64)
+
+
+def top_k_mass(
+ probabilities: np.ndarray | list[float] | tuple[float, ...],
+ *,
+ top_k: int = 4,
+) -> np.ndarray:
+ if top_k < 1:
+ raise ValueError("top_k must be >= 1")
+ array = _normalize_probabilities(_coerce_probabilities(probabilities), epsilon=1e-12)
+ k = min(top_k, array.shape[-1])
+ if k == array.shape[-1]:
+ mass = np.sum(array, axis=-1)
+ else:
+ partition = np.partition(array, kth=-k, axis=-1)
+ mass = np.sum(partition[..., -k:], axis=-1)
+ return np.asarray(mass, dtype=np.float64)
+
+
+def top2_margin(probabilities: np.ndarray | list[float] | tuple[float, ...]) -> np.ndarray:
+ array = _normalize_probabilities(_coerce_probabilities(probabilities), epsilon=1e-12)
+ if array.shape[-1] < 2:
+ return np.zeros(array.shape[:-1], dtype=np.float64)
+ partition = np.partition(array, kth=-2, axis=-1)
+ top2 = np.sort(partition[..., -2:], axis=-1)
+ return np.asarray(top2[..., 1] - top2[..., 0], dtype=np.float64)
+
+
+def overlap_mass(
+ base_probs: np.ndarray | list[float] | tuple[float, ...],
+ proxy_probs: np.ndarray | list[float] | tuple[float, ...],
+) -> np.ndarray:
+ base = _normalize_probabilities(_coerce_probabilities(base_probs), epsilon=1e-12)
+ proxy = _normalize_probabilities(_coerce_probabilities(proxy_probs), epsilon=1e-12)
+ if base.shape != proxy.shape:
+ raise ValueError("base_probs and proxy_probs must have the same shape")
+ overlap = np.sum(np.minimum(base, proxy), axis=-1)
+ return np.asarray(overlap, dtype=np.float64)
+
+
+def top1_agreement(
+ base_probs: np.ndarray | list[float] | tuple[float, ...],
+ proxy_probs: np.ndarray | list[float] | tuple[float, ...],
+) -> np.ndarray:
+ base = _normalize_probabilities(_coerce_probabilities(base_probs), epsilon=1e-12)
+ proxy = _normalize_probabilities(_coerce_probabilities(proxy_probs), epsilon=1e-12)
+ if base.shape != proxy.shape:
+ raise ValueError("base_probs and proxy_probs must have the same shape")
+ agreement = np.argmax(base, axis=-1) == np.argmax(proxy, axis=-1)
+ return np.asarray(agreement, dtype=np.float64)
+
+
+def shared_top_k_mass(
+ base_probs: np.ndarray | list[float] | tuple[float, ...],
+ proxy_probs: np.ndarray | list[float] | tuple[float, ...],
+ *,
+ top_k: int = 4,
+) -> np.ndarray:
+ if top_k < 1:
+ raise ValueError("top_k must be >= 1")
+ base = _normalize_probabilities(_coerce_probabilities(base_probs), epsilon=1e-12)
+ proxy = _normalize_probabilities(_coerce_probabilities(proxy_probs), epsilon=1e-12)
+ if base.shape != proxy.shape:
+ raise ValueError("base_probs and proxy_probs must have the same shape")
+
+ k = min(top_k, base.shape[-1])
+ if k == base.shape[-1]:
+ shared_mask = np.ones_like(base, dtype=bool)
+ else:
+ base_indices = np.argpartition(base, kth=-k, axis=-1)[..., -k:]
+ proxy_indices = np.argpartition(proxy, kth=-k, axis=-1)[..., -k:]
+ base_mask = np.zeros_like(base, dtype=bool)
+ proxy_mask = np.zeros_like(proxy, dtype=bool)
+ np.put_along_axis(base_mask, base_indices, True, axis=-1)
+ np.put_along_axis(proxy_mask, proxy_indices, True, axis=-1)
+ shared_mask = base_mask & proxy_mask
+
+ mass = 0.5 * (np.sum(base * shared_mask, axis=-1) + np.sum(proxy * shared_mask, axis=-1))
+ return np.asarray(mass, dtype=np.float64)
+
+
+def probability_diagnostics(
+ base_probs: np.ndarray | list[float] | tuple[float, ...],
+ proxy_probs: np.ndarray | list[float] | tuple[float, ...],
+ *,
+ config: ProbabilityDiagnosticsConfig | None = None,
+) -> ProbabilityDiagnostics:
+ config = config or ProbabilityDiagnosticsConfig()
+ base = _normalize_probabilities(_coerce_probabilities(base_probs), epsilon=config.epsilon)
+ proxy = _normalize_probabilities(_coerce_probabilities(proxy_probs), epsilon=config.epsilon)
+ if base.shape != proxy.shape:
+ raise ValueError("base_probs and proxy_probs must have the same shape")
+
+ entropy = 0.5 * (normalized_entropy(base, epsilon=config.epsilon) + normalized_entropy(proxy, epsilon=config.epsilon))
+ peak = 0.5 * (top1_peak(base) + top1_peak(proxy))
+ mass = 0.5 * (top_k_mass(base, top_k=config.top_k) + top_k_mass(proxy, top_k=config.top_k))
+ overlap = overlap_mass(base, proxy)
+ agreement = top1_agreement(base, proxy)
+ shared_mass = shared_top_k_mass(base, proxy, top_k=config.top_k)
+ margin = 0.5 * (top2_margin(base) + top2_margin(proxy))
+
+ return ProbabilityDiagnostics(
+ entropy=np.asarray(entropy, dtype=np.float64),
+ peak=np.asarray(peak, dtype=np.float64),
+ top_k_mass=np.asarray(mass, dtype=np.float64),
+ overlap=np.asarray(overlap, dtype=np.float64),
+ top1_agreement=np.asarray(agreement, dtype=np.float64),
+ shared_top_k_mass=np.asarray(shared_mass, dtype=np.float64),
+ top2_margin=np.asarray(margin, dtype=np.float64),
+ )
+
+
+__all__ = [
+ "ProbabilityDiagnostics",
+ "ProbabilityDiagnosticsConfig",
+ "normalized_entropy",
+ "overlap_mass",
+ "probability_diagnostics",
+ "shared_top_k_mass",
+ "top1_agreement",
+ "top1_peak",
+ "top2_margin",
+ "top_k_mass",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/readout.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/readout.py
new file mode 100644
index 0000000000..9826ba392e
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/readout.py
@@ -0,0 +1,52 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+import numpy as np
+
+from .metrics import softmax
+
+
+@dataclass
+class RidgeReadout:
+ input_dim: int
+ output_dim: int
+ alpha: float = 1e-3
+ use_bias: bool = True
+ weights: np.ndarray | None = field(default=None, init=False)
+
+ def _augment(self, features: np.ndarray) -> np.ndarray:
+ features = np.asarray(features, dtype=np.float64)
+ if not self.use_bias:
+ return features
+ bias = np.ones((features.shape[0], 1), dtype=np.float64)
+ return np.concatenate([features, bias], axis=1)
+
+ def fit(self, features: np.ndarray, targets: np.ndarray) -> None:
+ if features.ndim != 2:
+ raise ValueError("features must be rank-2")
+ if targets.ndim != 1:
+ raise ValueError("targets must be rank-1")
+ if features.shape[0] != targets.shape[0]:
+ raise ValueError("features and targets must have the same number of rows")
+
+ x = self._augment(features)
+ y = np.eye(self.output_dim, dtype=np.float64)[targets.astype(np.int64)]
+ regularizer = np.eye(x.shape[1], dtype=np.float64) * self.alpha
+ if self.use_bias:
+ regularizer[-1, -1] = 0.0
+ gram = x.T @ x + regularizer
+ rhs = x.T @ y
+ self.weights = np.linalg.solve(gram, rhs)
+
+ def _require_weights(self) -> np.ndarray:
+ if self.weights is None:
+ raise RuntimeError("The readout has not been fit yet.")
+ return self.weights
+
+ def logits(self, features: np.ndarray) -> np.ndarray:
+ return self._augment(features) @ self._require_weights()
+
+ def probabilities(self, features: np.ndarray) -> np.ndarray:
+ return softmax(self.logits(features), axis=-1)
+
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/readouts.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/readouts.py
new file mode 100644
index 0000000000..d2047a499b
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/readouts.py
@@ -0,0 +1,5 @@
+from .readout import RidgeReadout
+
+__all__ = [
+ "RidgeReadout",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/reservoir.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/reservoir.py
new file mode 100644
index 0000000000..f9c14748fd
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/reservoir.py
@@ -0,0 +1,106 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+
+from .config import ReservoirConfig, ReservoirTopology
+
+
+def spectral_radius(matrix: np.ndarray) -> float:
+ values = np.linalg.eigvals(matrix)
+ return float(np.max(np.abs(values)))
+
+
+def _scale_spectral_radius(matrix: np.ndarray, target: float) -> np.ndarray:
+ radius = spectral_radius(matrix)
+ if radius == 0.0:
+ return matrix
+ return matrix * (target / radius)
+
+
+def _small_world_degree(size: int, connectivity: float) -> int:
+ degree = max(2, int(round(connectivity * (size - 1))))
+ degree = min(degree, size - 1)
+ if degree % 2 == 1:
+ degree = degree - 1 if degree > 2 else degree + 1
+ return max(2, degree)
+
+
+def _small_world_mask(size: int, connectivity: float, rewire_prob: float, rng: np.random.Generator) -> np.ndarray:
+ degree = _small_world_degree(size, connectivity)
+ adjacency = np.zeros((size, size), dtype=np.float64)
+ half = degree // 2
+
+ for node in range(size):
+ for offset in range(1, half + 1):
+ neighbor = (node + offset) % size
+ adjacency[node, neighbor] = 1.0
+ adjacency[neighbor, node] = 1.0
+
+ for node in range(size):
+ for offset in range(1, half + 1):
+ neighbor = (node + offset) % size
+ if adjacency[node, neighbor] == 0.0 or rng.random() >= rewire_prob:
+ continue
+ adjacency[node, neighbor] = 0.0
+ adjacency[neighbor, node] = 0.0
+
+ candidates = np.flatnonzero(adjacency[node] == 0.0)
+ candidates = candidates[candidates != node]
+ if candidates.size == 0:
+ adjacency[node, neighbor] = 1.0
+ adjacency[neighbor, node] = 1.0
+ continue
+
+ replacement = int(rng.choice(candidates))
+ adjacency[node, replacement] = 1.0
+ adjacency[replacement, node] = 1.0
+
+ np.fill_diagonal(adjacency, 0.0)
+ return adjacency
+
+
+def build_recurrent_matrix(config: ReservoirConfig) -> np.ndarray:
+ rng = np.random.default_rng(config.seed)
+ matrix = rng.standard_normal((config.size, config.size))
+ if config.topology == "erdos_renyi":
+ mask = (rng.random((config.size, config.size)) < config.connectivity).astype(np.float64)
+ elif config.topology == "small_world":
+ mask = _small_world_mask(config.size, config.connectivity, config.rewire_prob, rng)
+ else:
+ raise ValueError(f"Unknown reservoir topology: {config.topology}")
+ matrix = matrix * mask
+ np.fill_diagonal(matrix, 0.0)
+ return _scale_spectral_radius(matrix, config.spectral_radius)
+
+
+@dataclass(frozen=True)
+class EchoStateReservoir:
+ config: ReservoirConfig
+ vocabulary_size: int = 256
+
+ def __post_init__(self) -> None:
+ rng = np.random.default_rng(self.config.seed + 17)
+ object.__setattr__(self, "recurrent", build_recurrent_matrix(self.config))
+ object.__setattr__(
+ self,
+ "input_weights",
+ rng.normal(
+ loc=0.0,
+ scale=self.config.input_scale,
+ size=(self.config.size, self.vocabulary_size),
+ ),
+ )
+
+ def initial_state(self) -> np.ndarray:
+ return np.zeros(self.config.size, dtype=np.float64)
+
+ @property
+ def state_dim(self) -> int:
+ return self.config.size
+
+ def step(self, state: np.ndarray, token: int) -> np.ndarray:
+ drive = self.recurrent @ state + self.input_weights[:, int(token)]
+ proposed = np.tanh(drive)
+ return ((1.0 - self.config.leak) * state) + (self.config.leak * proposed)
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/routing.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/routing.py
new file mode 100644
index 0000000000..9068948ce5
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/routing.py
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+from typing import Literal
+
+import numpy as np
+
+from .control import ControllerSummary
+
+RoutingMode = Literal["equal", "static", "projection"]
+
+
+@dataclass(frozen=True)
+class RoutingConfig:
+ mode: RoutingMode = "equal"
+ static_logits: tuple[float, ...] = ()
+ projection_weights: tuple[float, ...] = ()
+ route_biases: tuple[float, ...] = ()
+ temperature: float = 1.0
+
+ def __post_init__(self) -> None:
+ if self.temperature <= 0.0:
+ raise ValueError("temperature must be > 0")
+
+
+@dataclass(frozen=True)
+class RoutingDecision:
+ mode: RoutingMode
+ route_names: tuple[str, ...]
+ logits: np.ndarray
+ weights: np.ndarray
+ selected_index: int
+
+ def __post_init__(self) -> None:
+ logits = np.asarray(self.logits, dtype=np.float64).reshape(-1)
+ weights = np.asarray(self.weights, dtype=np.float64).reshape(-1)
+ if logits.size < 1:
+ raise ValueError("RoutingDecision requires at least one logit")
+ if logits.shape != weights.shape:
+ raise ValueError("logits and weights must have the same shape")
+ object.__setattr__(self, "logits", logits)
+ object.__setattr__(self, "weights", weights)
+ if not 0 <= self.selected_index < weights.size:
+ raise ValueError("selected_index out of range")
+
+
+def _softmax(values: np.ndarray) -> np.ndarray:
+ shifted = values - np.max(values)
+ exp = np.exp(shifted)
+ total = float(np.sum(exp))
+ if total <= 0.0:
+ return np.full(values.shape, 1.0 / values.size, dtype=np.float64)
+ return exp / total
+
+
+class SummaryRouter:
+ def __init__(self, config: RoutingConfig | None = None):
+ self.config = config or RoutingConfig()
+
+ def route(
+ self,
+ summaries: Sequence[ControllerSummary],
+ *,
+ names: Sequence[str] | None = None,
+ ) -> RoutingDecision:
+ if not summaries:
+ raise ValueError("route requires at least one summary")
+ dim = summaries[0].dim
+ for summary in summaries[1:]:
+ if summary.dim != dim:
+ raise ValueError("all controller summaries must share the same dimension")
+
+ route_names = tuple(names) if names is not None else tuple(
+ summary.name if summary.name is not None else f"branch_{index}"
+ for index, summary in enumerate(summaries)
+ )
+ if len(route_names) != len(summaries):
+ raise ValueError("names must match the number of summaries")
+
+ if self.config.mode == "equal":
+ logits = np.zeros(len(summaries), dtype=np.float64)
+ elif self.config.mode == "static":
+ logits = np.asarray(self.config.static_logits, dtype=np.float64)
+ if logits.size == 0:
+ logits = np.zeros(len(summaries), dtype=np.float64)
+ if logits.size != len(summaries):
+ raise ValueError("static_logits must match the number of summaries")
+ elif self.config.mode == "projection":
+ weights = np.asarray(self.config.projection_weights, dtype=np.float64)
+ if weights.size == 0:
+ raise ValueError("projection mode requires projection_weights")
+ if weights.size != dim:
+ raise ValueError("projection_weights must match the summary dimension")
+ route_biases = np.asarray(self.config.route_biases, dtype=np.float64)
+ if route_biases.size == 0:
+ route_biases = np.zeros(len(summaries), dtype=np.float64)
+ if route_biases.size != len(summaries):
+ raise ValueError("route_biases must match the number of summaries")
+ logits = np.asarray(
+ [float(summary.values @ weights) + float(route_biases[index]) for index, summary in enumerate(summaries)],
+ dtype=np.float64,
+ )
+ else:
+ raise ValueError(f"unknown routing mode: {self.config.mode}")
+
+ route_weights = _softmax(logits / self.config.temperature)
+ selected_index = int(np.argmax(route_weights))
+ return RoutingDecision(
+ mode=self.config.mode,
+ route_names=route_names,
+ logits=logits,
+ weights=route_weights,
+ selected_index=selected_index,
+ )
+
+
+__all__ = [
+ "RoutingConfig",
+ "RoutingDecision",
+ "RoutingMode",
+ "SummaryRouter",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/runtime.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/runtime.py
new file mode 100644
index 0000000000..c5d281bfb8
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/runtime.py
@@ -0,0 +1,113 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from dataclasses import dataclass, field
+from typing import Any
+
+import numpy as np
+
+from .artifacts import ArtifactAccounting, ArtifactMetadata, coerce_artifact_metadata
+
+
+@dataclass(frozen=True)
+class SequenceTrace:
+ features: np.ndarray
+ targets: np.ndarray
+ boundaries: np.ndarray
+ tokens: int
+ patches: int
+
+
+@dataclass(frozen=True)
+class SequenceReport:
+ tokens: int
+ patches: int
+ mean_patch_size: float
+ compression_ratio: float
+ bits_per_byte: float
+
+
+@dataclass(frozen=True)
+class FitReport:
+ sequences: int
+ tokens: int
+ patches: int
+ mean_patch_size: float
+ compression_ratio: float
+ train_bits_per_byte: float
+
+
+@dataclass(frozen=True)
+class CausalTrace:
+ trace: SequenceTrace
+ artifact_accounting: ArtifactAccounting | None = None
+ metadata: ArtifactMetadata = field(default_factory=ArtifactMetadata)
+
+ @property
+ def tokens(self) -> int:
+ return int(self.trace.tokens)
+
+ @property
+ def patches(self) -> int:
+ return int(self.trace.patches)
+
+
+@dataclass(frozen=True)
+class CausalSequenceReport:
+ report: SequenceReport
+ artifact_accounting: ArtifactAccounting | None = None
+ metadata: ArtifactMetadata = field(default_factory=ArtifactMetadata)
+
+ @property
+ def tokens(self) -> int:
+ return int(self.report.tokens)
+
+ @property
+ def patches(self) -> int:
+ return int(self.report.patches)
+
+ @property
+ def bits_per_byte(self) -> float:
+ return float(self.report.bits_per_byte)
+
+
+@dataclass(frozen=True)
+class CausalFitReport:
+ report: FitReport
+ artifact_accounting: ArtifactAccounting | None = None
+ metadata: ArtifactMetadata = field(default_factory=ArtifactMetadata)
+
+ @property
+ def sequences(self) -> int:
+ return int(self.report.sequences)
+
+ @property
+ def tokens(self) -> int:
+ return int(self.report.tokens)
+
+ @property
+ def patches(self) -> int:
+ return int(self.report.patches)
+
+ @property
+ def train_bits_per_byte(self) -> float:
+ return float(self.report.train_bits_per_byte)
+
+
+def tag_metadata(
+ metadata: ArtifactMetadata | Mapping[str, Any] | None = None,
+ /,
+ **updates: Any,
+) -> ArtifactMetadata:
+ return coerce_artifact_metadata(metadata, **updates)
+
+
+__all__ = [
+ "CausalFitReport",
+ "CausalSequenceReport",
+ "CausalTrace",
+ "FitReport",
+ "SequenceReport",
+ "SequenceTrace",
+ "tag_metadata",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/sampled_readout.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/sampled_readout.py
new file mode 100644
index 0000000000..832c715faf
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/sampled_readout.py
@@ -0,0 +1,117 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import numpy as np
+
+from .config import SampledReadoutBandConfig, SampledReadoutConfig
+
+
+@dataclass(frozen=True)
+class SampledBandSummary:
+ name: str
+ indices: np.ndarray
+ sampled: np.ndarray
+ mean: float
+ energy: float
+ drift: float
+
+
+class SampledMultiscaleReadout:
+ def __init__(self, config: SampledReadoutConfig):
+ self.config = config
+ self._band_slices = tuple(slice(band.start, band.stop) for band in config.bands)
+ self._band_indices = tuple(self._resolve_band_indices(index, band) for index, band in enumerate(config.bands))
+
+ @property
+ def feature_dim(self) -> int:
+ return self.config.feature_dim
+
+ @property
+ def band_slices(self) -> tuple[slice, ...]:
+ return self._band_slices
+
+ @property
+ def band_indices(self) -> tuple[np.ndarray, ...]:
+ return self._band_indices
+
+ def _coerce_state(self, state: np.ndarray) -> np.ndarray:
+ state = np.asarray(state, dtype=np.float64)
+ if state.ndim != 1:
+ raise ValueError("state must be rank-1")
+ if state.shape[0] != self.config.state_dim:
+ raise ValueError("state does not match configured state_dim")
+ return state
+
+ def _resolve_band_indices(self, band_index: int, band: SampledReadoutBandConfig) -> np.ndarray:
+ if band.sample_indices:
+ indices = np.asarray(band.sample_indices, dtype=np.int64)
+ elif band.sample_count is None or band.sample_count == band.width:
+ indices = np.arange(band.width, dtype=np.int64)
+ else:
+ rng = np.random.default_rng(self.config.seed + band_index)
+ indices = np.sort(rng.choice(band.width, size=band.sample_count, replace=False)).astype(np.int64)
+ return indices + band.start
+
+ def split(self, state: np.ndarray) -> tuple[np.ndarray, ...]:
+ state = self._coerce_state(state)
+ return tuple(state[band_slice] for band_slice in self._band_slices)
+
+ def summaries(
+ self,
+ state: np.ndarray,
+ previous_state: np.ndarray | None = None,
+ ) -> tuple[SampledBandSummary, ...]:
+ state = self._coerce_state(state)
+ previous = None if previous_state is None else self._coerce_state(previous_state)
+ if previous is not None and previous.shape != state.shape:
+ raise ValueError("previous_state does not match state shape")
+
+ summaries: list[SampledBandSummary] = []
+ for band, band_slice, band_indices in zip(self.config.bands, self._band_slices, self._band_indices):
+ band_state = state[band_slice]
+ sampled = band_state[band_indices - band.start]
+ mean = float(np.mean(band_state))
+ energy = float(np.mean(np.square(band_state)))
+ if band.include_drift:
+ if previous is None:
+ drift = 0.0
+ else:
+ drift = float(np.mean(np.abs(band_state - previous[band_slice])))
+ else:
+ drift = 0.0
+ summaries.append(
+ SampledBandSummary(
+ name=band.name,
+ indices=band_indices.copy(),
+ sampled=sampled.copy(),
+ mean=mean,
+ energy=energy,
+ drift=drift,
+ )
+ )
+ return tuple(summaries)
+
+ def encode(
+ self,
+ state: np.ndarray,
+ previous_state: np.ndarray | None = None,
+ ) -> np.ndarray:
+ features: list[np.ndarray] = []
+ for band, summary in zip(self.config.bands, self.summaries(state, previous_state=previous_state)):
+ features.append(summary.sampled.astype(np.float64, copy=False))
+ if band.include_mean:
+ features.append(np.asarray([summary.mean], dtype=np.float64))
+ if band.include_energy:
+ features.append(np.asarray([summary.energy], dtype=np.float64))
+ if band.include_drift:
+ features.append(np.asarray([summary.drift], dtype=np.float64))
+ if not features:
+ return np.zeros((0,), dtype=np.float64)
+ return np.concatenate(features)
+
+
+__all__ = [
+ "SampledBandSummary",
+ "SampledMultiscaleReadout",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/segmenters.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/segmenters.py
new file mode 100644
index 0000000000..4d5787ade1
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/segmenters.py
@@ -0,0 +1,6 @@
+from .patching import AdaptiveSegmenter, SegmentStats
+
+__all__ = [
+ "AdaptiveSegmenter",
+ "SegmentStats",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/span_selection.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/span_selection.py
new file mode 100644
index 0000000000..df22dc90ab
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/span_selection.py
@@ -0,0 +1,120 @@
+from __future__ import annotations
+
+from collections.abc import Mapping
+from dataclasses import dataclass
+from typing import Any
+
+import numpy as np
+
+from .artifacts import ArtifactMetadata, ReplaySpan, make_replay_span
+
+
+@dataclass(frozen=True)
+class SpanSelectionConfig:
+ threshold: float
+ min_span: int = 1
+ max_gap: int = 0
+
+ def __post_init__(self) -> None:
+ if self.min_span < 1:
+ raise ValueError("min_span must be >= 1")
+ if self.max_gap < 0:
+ raise ValueError("max_gap must be >= 0")
+
+
+@dataclass(frozen=True)
+class ScoredSpan:
+ start: int
+ stop: int
+ mean_score: float
+ max_score: float
+ count: int
+
+ def __post_init__(self) -> None:
+ if self.start < 0:
+ raise ValueError("start must be >= 0")
+ if self.stop < self.start:
+ raise ValueError("stop must be >= start")
+ if self.count < 0:
+ raise ValueError("count must be >= 0")
+
+ @property
+ def length(self) -> int:
+ return self.stop - self.start
+
+
+def select_scored_spans(
+ scores: np.ndarray,
+ config: SpanSelectionConfig,
+) -> tuple[ScoredSpan, ...]:
+ flat_scores = np.asarray(scores, dtype=np.float64).reshape(-1)
+ if flat_scores.size == 0:
+ return ()
+
+ indices = np.flatnonzero(flat_scores >= config.threshold)
+ if indices.size == 0:
+ return ()
+
+ spans: list[ScoredSpan] = []
+ selected: list[int] = [int(indices[0])]
+ start = int(indices[0])
+ previous = int(indices[0])
+
+ def flush() -> None:
+ if not selected:
+ return
+ stop = previous + 1
+ if stop - start < config.min_span:
+ return
+ values = flat_scores[np.asarray(selected, dtype=np.int64)]
+ spans.append(
+ ScoredSpan(
+ start=start,
+ stop=stop,
+ mean_score=float(np.mean(values)),
+ max_score=float(np.max(values)),
+ count=len(selected),
+ )
+ )
+
+ for raw_index in indices[1:]:
+ index = int(raw_index)
+ if index - previous - 1 <= config.max_gap:
+ selected.append(index)
+ previous = index
+ continue
+ flush()
+ selected = [index]
+ start = index
+ previous = index
+ flush()
+ return tuple(spans)
+
+
+def replay_spans_from_scores(
+ scores: np.ndarray,
+ config: SpanSelectionConfig,
+ *,
+ label: str | None = None,
+ metadata: ArtifactMetadata | Mapping[str, Any] | None = None,
+ **updates: Any,
+) -> tuple[ReplaySpan, ...]:
+ return tuple(
+ make_replay_span(
+ span.start,
+ span.stop,
+ label=label,
+ metadata=metadata,
+ **updates,
+ )
+ for span in select_scored_spans(scores, config)
+ )
+
+
+__all__ = [
+ "ReplaySpan",
+ "ScoredSpan",
+ "SpanSelectionConfig",
+ "replay_spans_from_scores",
+ "select_scored_spans",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/substrates.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/substrates.py
new file mode 100644
index 0000000000..83e761317e
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/substrates.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from typing import Protocol
+
+import numpy as np
+
+from .reservoir import EchoStateReservoir, build_recurrent_matrix, spectral_radius
+
+
+class TokenSubstrate(Protocol):
+ @property
+ def state_dim(self) -> int: ...
+
+ def initial_state(self) -> np.ndarray: ...
+
+ def step(self, state: np.ndarray, token: int) -> np.ndarray: ...
+
+
+EchoStateSubstrate = EchoStateReservoir
+
+from .delay import DelayLineSubstrate
+from .hierarchical import HierarchicalSubstrate
+from .linear_memory import LinearMemorySubstrate
+from .mixed_memory import MixedMemorySubstrate
+from .oscillatory_memory import OscillatoryMemorySubstrate
+
+__all__ = [
+ "DelayLineSubstrate",
+ "EchoStateSubstrate",
+ "HierarchicalSubstrate",
+ "LinearMemorySubstrate",
+ "MixedMemorySubstrate",
+ "OscillatoryMemorySubstrate",
+ "TokenSubstrate",
+ "build_recurrent_matrix",
+ "spectral_radius",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/teacher_export.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/teacher_export.py
new file mode 100644
index 0000000000..451fd525fd
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/teacher_export.py
@@ -0,0 +1,237 @@
+from __future__ import annotations
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+
+import numpy as np
+
+from .metrics import bits_per_byte_from_probabilities
+from .probability_diagnostics import ProbabilityDiagnostics, ProbabilityDiagnosticsConfig, probability_diagnostics
+
+
+def _coerce_probability_array(
+ probabilities: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ *,
+ name: str,
+) -> np.ndarray:
+ array = np.asarray(probabilities, dtype=np.float64)
+ if array.ndim < 1:
+ raise ValueError(f"{name} must have at least one dimension")
+ if np.any(array < 0.0):
+ raise ValueError(f"{name} must contain non-negative values")
+ return array
+
+
+def _normalize_probabilities(probabilities: np.ndarray, *, epsilon: float) -> np.ndarray:
+ vocab_size = probabilities.shape[-1]
+ totals = np.sum(probabilities, axis=-1, keepdims=True)
+ return np.divide(
+ probabilities,
+ totals,
+ out=np.full_like(probabilities, 1.0 / float(vocab_size)),
+ where=totals > epsilon,
+ )
+
+
+def _coerce_labels(labels: object, expected_rows: int, vocab_size: int) -> np.ndarray:
+ array = np.asarray(labels, dtype=np.int64).reshape(-1)
+ if array.size != expected_rows:
+ raise ValueError("labels must align with the probability rows")
+ if np.any(array < 0) or np.any(array >= vocab_size):
+ raise ValueError("labels must be valid vocabulary indices")
+ return array
+
+
+@dataclass(frozen=True)
+class TeacherExportConfig:
+ vocabulary_size: int = 256
+ source_names: tuple[str, str] = ("teacher", "student")
+ diagnostics: ProbabilityDiagnosticsConfig = ProbabilityDiagnosticsConfig()
+
+ def __post_init__(self) -> None:
+ if self.vocabulary_size < 2:
+ raise ValueError("vocabulary_size must be >= 2")
+ if len(self.source_names) != 2 or any(not name for name in self.source_names):
+ raise ValueError("source_names must contain two non-empty names")
+
+
+@dataclass(frozen=True)
+class TeacherExportRecord:
+ tokens: int
+ source_names: tuple[str, str]
+ teacher_probs: np.ndarray
+ student_probs: np.ndarray
+ teacher_labels: np.ndarray
+ student_labels: np.ndarray
+ diagnostics: ProbabilityDiagnostics
+
+ @property
+ def steps(self) -> int:
+ return int(self.teacher_probs.shape[0])
+
+ def as_dict(self) -> dict[str, np.ndarray]:
+ return {
+ "teacher_probs": self.teacher_probs,
+ "student_probs": self.student_probs,
+ "teacher_labels": self.teacher_labels,
+ "student_labels": self.student_labels,
+ **self.diagnostics.as_dict(),
+ }
+
+
+@dataclass(frozen=True)
+class TeacherExportReport:
+ record: TeacherExportRecord
+ teacher_bits_per_byte: float | None
+ student_bits_per_byte: float | None
+ mean_bits_per_byte: float | None
+ label_flip_rate: float
+ label_agreement_rate: float
+ mean_entropy: float
+ mean_peak: float
+ mean_top_k_mass: float
+ mean_overlap: float
+ mean_shared_top_k_mass: float
+ mean_top2_margin: float
+
+ @property
+ def tokens(self) -> int:
+ return self.record.tokens
+
+ @property
+ def source_names(self) -> tuple[str, str]:
+ return self.record.source_names
+
+ @property
+ def steps(self) -> int:
+ return self.record.steps
+
+
+class TeacherExportAdapter:
+ def __init__(self, config: TeacherExportConfig | None = None):
+ self.config = config or TeacherExportConfig()
+
+ def _resolve_vocabulary_size(self, teacher_probs: np.ndarray, student_probs: np.ndarray) -> int:
+ observed = int(teacher_probs.shape[-1])
+ if student_probs.shape[-1] != observed:
+ raise ValueError("teacher_probs and student_probs must have the same vocabulary size")
+ configured = self.config.vocabulary_size
+ if configured == observed:
+ return observed
+ if configured == 256 and observed != 256:
+ return observed
+ raise ValueError(
+ f"configured vocabulary_size={configured} does not match input vocabulary_size={observed}"
+ )
+
+ def record(
+ self,
+ teacher_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ student_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ *,
+ source_names: tuple[str, str] | None = None,
+ ) -> TeacherExportRecord:
+ source_names = source_names or self.config.source_names
+ teacher = _coerce_probability_array(teacher_probs, name="teacher_probs")
+ student = _coerce_probability_array(student_probs, name="student_probs")
+ if teacher.shape != student.shape:
+ raise ValueError("teacher_probs and student_probs must have the same shape")
+ vocab_size = self._resolve_vocabulary_size(teacher, student)
+
+ teacher = _normalize_probabilities(teacher, epsilon=self.config.diagnostics.epsilon)
+ student = _normalize_probabilities(student, epsilon=self.config.diagnostics.epsilon)
+
+ flattened_rows = int(np.prod(teacher.shape[:-1], dtype=np.int64)) if teacher.ndim > 1 else 1
+ teacher_rows = np.reshape(teacher, (flattened_rows, vocab_size))
+ student_rows = np.reshape(student, (flattened_rows, vocab_size))
+ teacher_labels = np.argmax(teacher_rows, axis=-1).astype(np.int64, copy=False)
+ student_labels = np.argmax(student_rows, axis=-1).astype(np.int64, copy=False)
+ diagnostics = probability_diagnostics(
+ teacher_rows,
+ student_rows,
+ config=self.config.diagnostics,
+ )
+ return TeacherExportRecord(
+ tokens=flattened_rows,
+ source_names=source_names,
+ teacher_probs=teacher_rows,
+ student_probs=student_rows,
+ teacher_labels=teacher_labels,
+ student_labels=student_labels,
+ diagnostics=diagnostics,
+ )
+
+ def export(
+ self,
+ teacher_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ student_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ *,
+ targets: object | None = None,
+ source_names: tuple[str, str] | None = None,
+ ) -> TeacherExportReport:
+ record = self.record(teacher_probs, student_probs, source_names=source_names)
+ target_array = None
+ teacher_bits_per_byte = None
+ student_bits_per_byte = None
+ mean_bits_per_byte = None
+ if targets is not None:
+ target_array = _coerce_labels(targets, record.steps, record.teacher_probs.shape[-1])
+ teacher_bits_per_byte = bits_per_byte_from_probabilities(record.teacher_probs, target_array)
+ student_bits_per_byte = bits_per_byte_from_probabilities(record.student_probs, target_array)
+ mean_bits_per_byte = float(0.5 * (teacher_bits_per_byte + student_bits_per_byte))
+
+ label_flip_rate = float(np.mean(record.teacher_labels != record.student_labels)) if record.steps else 0.0
+ label_agreement_rate = float(1.0 - label_flip_rate)
+ diagnostics = record.diagnostics
+ return TeacherExportReport(
+ record=record,
+ teacher_bits_per_byte=teacher_bits_per_byte,
+ student_bits_per_byte=student_bits_per_byte,
+ mean_bits_per_byte=mean_bits_per_byte,
+ label_flip_rate=label_flip_rate,
+ label_agreement_rate=label_agreement_rate,
+ mean_entropy=float(np.mean(diagnostics.entropy)) if record.steps else 0.0,
+ mean_peak=float(np.mean(diagnostics.peak)) if record.steps else 0.0,
+ mean_top_k_mass=float(np.mean(diagnostics.top_k_mass)) if record.steps else 0.0,
+ mean_overlap=float(np.mean(diagnostics.overlap)) if record.steps else 0.0,
+ mean_shared_top_k_mass=float(np.mean(diagnostics.shared_top_k_mass)) if record.steps else 0.0,
+ mean_top2_margin=float(np.mean(diagnostics.top2_margin)) if record.steps else 0.0,
+ )
+
+ def score(
+ self,
+ teacher_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ student_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ *,
+ targets: object | None = None,
+ source_names: tuple[str, str] | None = None,
+ ) -> TeacherExportReport:
+ return self.export(
+ teacher_probs,
+ student_probs,
+ targets=targets,
+ source_names=source_names,
+ )
+
+ def fit(
+ self,
+ teacher_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ student_probs: np.ndarray | Sequence[float] | Sequence[Sequence[float]],
+ *,
+ targets: object | None = None,
+ source_names: tuple[str, str] | None = None,
+ ) -> TeacherExportReport:
+ return self.export(
+ teacher_probs,
+ student_probs,
+ targets=targets,
+ source_names=source_names,
+ )
+
+
+__all__ = [
+ "TeacherExportAdapter",
+ "TeacherExportConfig",
+ "TeacherExportRecord",
+ "TeacherExportReport",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/train_eval.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/train_eval.py
new file mode 100644
index 0000000000..5e13e4bfb5
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/train_eval.py
@@ -0,0 +1,338 @@
+from __future__ import annotations
+
+from collections.abc import Callable, Sequence
+from dataclasses import dataclass
+from typing import Any, Protocol, runtime_checkable
+
+import numpy as np
+
+from .codecs import ensure_tokens
+from .eval import RolloutMode
+
+
+@runtime_checkable
+class SupportsDatasetFit(Protocol):
+ def fit(
+ self,
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ ) -> Any: ...
+
+
+@runtime_checkable
+class SupportsSequenceScoring(Protocol):
+ def score(
+ self,
+ sequence: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ ) -> Any: ...
+
+
+@runtime_checkable
+class SupportsPredictProba(Protocol):
+ def predict_proba(
+ self,
+ prompt: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ ) -> np.ndarray: ...
+
+
+@dataclass(frozen=True)
+class DatasetEvaluation:
+ sequences: int
+ tokens: int
+ effective_tokens: int
+ bits_per_byte: float
+ sequence_bits_per_byte: np.ndarray
+
+ @property
+ def steps(self) -> int:
+ return self.effective_tokens
+
+
+@dataclass(frozen=True)
+class RolloutCurvePoint:
+ step: int
+ bits_per_byte: float
+ match_rate: float | None
+
+
+@dataclass(frozen=True)
+class RolloutCurveEvaluation:
+ mode: RolloutMode
+ prompt_tokens: np.ndarray
+ target_tokens: np.ndarray
+ generated_tokens: np.ndarray
+ sequence_tokens: np.ndarray
+ checkpoints: tuple[RolloutCurvePoint, ...]
+
+ @property
+ def continuation_tokens(self) -> np.ndarray:
+ return self.target_tokens
+
+ @property
+ def predicted_tokens(self) -> np.ndarray:
+ return self.generated_tokens
+
+
+@dataclass(frozen=True)
+class TransferProbeReport:
+ source_fit: Any
+ source_eval: DatasetEvaluation
+ target_zero_shot: DatasetEvaluation
+ target_scratch_fit: Any | None
+ target_scratch_eval: DatasetEvaluation | None
+
+ @property
+ def source_fit_bits_per_byte(self) -> float | None:
+ value = getattr(self.source_fit, "train_bits_per_byte", None)
+ return None if value is None else float(value)
+
+ @property
+ def target_fit_bits_per_byte(self) -> float | None:
+ if self.target_scratch_fit is None:
+ return None
+ value = getattr(self.target_scratch_fit, "train_bits_per_byte", None)
+ return None if value is None else float(value)
+
+ @property
+ def source_evaluation(self) -> DatasetEvaluation:
+ return self.source_eval
+
+ @property
+ def target_from_source(self) -> DatasetEvaluation:
+ return self.target_zero_shot
+
+ @property
+ def target_scratch(self) -> DatasetEvaluation | None:
+ return self.target_scratch_eval
+
+ @property
+ def transfer_gap_bits_per_byte(self) -> float | None:
+ if self.target_scratch_eval is None:
+ return None
+ return self.target_zero_shot.bits_per_byte - self.target_scratch_eval.bits_per_byte
+
+
+def _coerce_sequences(
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+) -> tuple[np.ndarray, ...]:
+ if isinstance(data, (str, bytes, bytearray, memoryview, np.ndarray)):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence) and data and all(isinstance(item, int) for item in data):
+ return (ensure_tokens(data),)
+ if isinstance(data, Sequence):
+ return tuple(ensure_tokens(item) for item in data)
+ return (ensure_tokens(data),)
+
+
+def _normalize_probabilities(probabilities: np.ndarray) -> np.ndarray:
+ clipped = np.clip(np.asarray(probabilities, dtype=np.float64), 1e-12, None)
+ return clipped / np.sum(clipped)
+
+
+def _sample_next_token(
+ probabilities: np.ndarray,
+ *,
+ temperature: float,
+ greedy: bool,
+ rng: np.random.Generator,
+) -> int:
+ if greedy:
+ return int(np.argmax(probabilities))
+ if temperature <= 0.0:
+ raise ValueError("temperature must be > 0")
+ scaled = np.log(np.clip(probabilities, 1e-12, 1.0)) / temperature
+ stabilized = np.exp(scaled - np.max(scaled))
+ sample_probs = stabilized / np.sum(stabilized)
+ return int(rng.choice(sample_probs.shape[0], p=sample_probs))
+
+
+def evaluate_dataset(
+ model: SupportsSequenceScoring,
+ data: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+) -> DatasetEvaluation:
+ sequences = _coerce_sequences(data)
+ sequence_scores: list[float] = []
+ total_tokens = 0
+ total_effective_tokens = 0
+ weighted_bits = 0.0
+
+ for sequence in sequences:
+ report = model.score(sequence)
+ tokens = int(getattr(report, "tokens"))
+ effective_tokens = max(tokens - 1, 0)
+ bits_per_byte = float(getattr(report, "bits_per_byte"))
+ sequence_scores.append(bits_per_byte)
+ total_tokens += tokens
+ total_effective_tokens += effective_tokens
+ weighted_bits += bits_per_byte * effective_tokens
+
+ mean_bits = 0.0 if total_effective_tokens == 0 else weighted_bits / total_effective_tokens
+ return DatasetEvaluation(
+ sequences=len(sequences),
+ tokens=total_tokens,
+ effective_tokens=total_effective_tokens,
+ bits_per_byte=mean_bits,
+ sequence_bits_per_byte=np.asarray(sequence_scores, dtype=np.float64),
+ )
+
+
+def evaluate_rollout_curve(
+ model: SupportsSequenceScoring | SupportsPredictProba,
+ prompt: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int],
+ continuation: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | None = None,
+ *,
+ mode: RolloutMode = "teacher_forced",
+ checkpoints: Sequence[int] | None = None,
+ steps: int | None = None,
+ temperature: float = 1.0,
+ greedy: bool = False,
+ seed: int | None = None,
+) -> RolloutCurveEvaluation:
+ if not hasattr(model, "predict_proba"):
+ raise TypeError("evaluate_rollout_curve requires a model with predict_proba(...)")
+
+ prompt_tokens = ensure_tokens(prompt)
+ if prompt_tokens.size < 1:
+ raise ValueError("prompt must contain at least one token")
+
+ if continuation is not None:
+ target_tokens = ensure_tokens(continuation)
+ else:
+ target_tokens = np.asarray([], dtype=np.uint8)
+
+ if mode == "teacher_forced":
+ if target_tokens.size == 0:
+ raise ValueError("teacher_forced mode requires a continuation")
+ total_steps = int(target_tokens.size)
+ elif mode == "closed_loop":
+ if target_tokens.size > 0:
+ if steps is None:
+ steps = int(target_tokens.size)
+ elif steps != int(target_tokens.size):
+ raise ValueError("closed_loop steps must match continuation length when both are provided")
+ if steps is None:
+ raise ValueError("closed_loop mode requires steps or a continuation")
+ total_steps = int(steps)
+ else:
+ raise ValueError(f"unknown rollout mode: {mode}")
+
+ checkpoint_values = tuple(sorted(set(checkpoints or (total_steps,))))
+ if not checkpoint_values:
+ raise ValueError("checkpoints must not be empty")
+ if checkpoint_values[0] < 1 or checkpoint_values[-1] > total_steps:
+ raise ValueError("checkpoints must lie within the rollout length")
+
+ rng = np.random.default_rng(seed)
+ generated: list[int] = []
+ context = prompt_tokens.astype(np.uint8, copy=True).tolist()
+ checkpoint_set = set(checkpoint_values)
+ match_count = 0
+ points: list[RolloutCurvePoint] = []
+
+ for step in range(total_steps):
+ prefix = np.asarray(context, dtype=np.uint8)
+ probabilities = _normalize_probabilities(np.asarray(model.predict_proba(prefix), dtype=np.float64))
+ predicted_token = int(np.argmax(probabilities))
+
+ if mode == "teacher_forced":
+ next_token = int(target_tokens[step])
+ else:
+ next_token = _sample_next_token(probabilities, temperature=temperature, greedy=greedy, rng=rng)
+
+ if target_tokens.size > step and predicted_token == int(target_tokens[step]):
+ match_count += 1
+
+ generated.append(next_token)
+ context.append(next_token)
+ step_count = step + 1
+ if step_count in checkpoint_set:
+ sequence_tokens = np.asarray(context, dtype=np.uint8)
+ score = model.score(sequence_tokens)
+ match_rate = None
+ if target_tokens.size > 0:
+ match_rate = match_count / float(step_count)
+ points.append(
+ RolloutCurvePoint(
+ step=step_count,
+ bits_per_byte=float(score.bits_per_byte),
+ match_rate=match_rate,
+ )
+ )
+
+ generated_tokens = np.asarray(generated, dtype=np.uint8)
+ return RolloutCurveEvaluation(
+ mode=mode,
+ prompt_tokens=prompt_tokens,
+ target_tokens=target_tokens,
+ generated_tokens=generated_tokens,
+ sequence_tokens=np.asarray(context, dtype=np.uint8),
+ checkpoints=tuple(points),
+ )
+
+
+def evaluate_transfer_probe(
+ model_factory: Callable[[], SupportsDatasetFit | SupportsSequenceScoring],
+ source_train: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object],
+ target_train: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object] | None = None,
+ *,
+ source_eval: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object] | None = None,
+ target_eval: str | bytes | bytearray | memoryview | np.ndarray | Sequence[int] | Sequence[object] | None = None,
+) -> TransferProbeReport:
+ if target_train is None and target_eval is None:
+ raise ValueError("transfer probe requires target_train or target_eval")
+
+ source_model = model_factory()
+ if not hasattr(source_model, "fit") or not hasattr(source_model, "score"):
+ raise TypeError("transfer probe requires models with fit(...) and score(...)")
+
+ source_fit = source_model.fit(source_train)
+ source_eval_report = evaluate_dataset(source_model, source_train if source_eval is None else source_eval)
+ target_eval_data = target_eval if target_eval is not None else target_train
+ assert target_eval_data is not None
+ target_zero_shot = evaluate_dataset(source_model, target_eval_data)
+
+ target_scratch_fit = None
+ target_scratch_eval = None
+ if target_train is not None:
+ scratch_model = model_factory()
+ if not hasattr(scratch_model, "fit") or not hasattr(scratch_model, "score"):
+ raise TypeError("transfer probe requires models with fit(...) and score(...)")
+ target_scratch_fit = scratch_model.fit(target_train)
+ target_scratch_eval = evaluate_dataset(scratch_model, target_eval_data)
+
+ return TransferProbeReport(
+ source_fit=source_fit,
+ source_eval=source_eval_report,
+ target_zero_shot=target_zero_shot,
+ target_scratch_fit=target_scratch_fit,
+ target_scratch_eval=target_scratch_eval,
+ )
+
+
+RolloutCurveMode = RolloutMode
+RolloutCheckpoint = RolloutCurvePoint
+RolloutCurve = RolloutCurveEvaluation
+TransferEvaluation = TransferProbeReport
+SupportsNextTokenProbabilities = SupportsPredictProba
+SupportsSequenceScore = SupportsSequenceScoring
+score_dataset = evaluate_dataset
+
+
+__all__ = [
+ "DatasetEvaluation",
+ "RolloutCheckpoint",
+ "RolloutCurve",
+ "RolloutCurveMode",
+ "RolloutCurveEvaluation",
+ "RolloutCurvePoint",
+ "SupportsDatasetFit",
+ "SupportsNextTokenProbabilities",
+ "SupportsPredictProba",
+ "SupportsSequenceScore",
+ "SupportsSequenceScoring",
+ "TransferEvaluation",
+ "TransferProbeReport",
+ "evaluate_dataset",
+ "evaluate_rollout_curve",
+ "evaluate_transfer_probe",
+ "score_dataset",
+]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/train_modes.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/train_modes.py
new file mode 100644
index 0000000000..42b9aa88d1
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/train_modes.py
@@ -0,0 +1,59 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Literal
+
+TrainStateMode = Literal["detached", "through_state"]
+
+
+@dataclass(frozen=True)
+class TrainModeConfig:
+ state_mode: TrainStateMode = "detached"
+ slow_update_stride: int = 1
+ rollout_checkpoints: tuple[int, ...] = ()
+ rollout_checkpoint_stride: int | None = None
+ include_final_checkpoint: bool = True
+
+ def __post_init__(self) -> None:
+ if self.state_mode not in {"detached", "through_state"}:
+ raise ValueError("state_mode must be 'detached' or 'through_state'")
+ if self.slow_update_stride < 1:
+ raise ValueError("slow_update_stride must be >= 1")
+ if self.rollout_checkpoint_stride is not None and self.rollout_checkpoint_stride < 1:
+ raise ValueError("rollout_checkpoint_stride must be >= 1")
+ if any(step < 1 for step in self.rollout_checkpoints):
+ raise ValueError("rollout_checkpoints must contain positive step indices")
+
+ @property
+ def uses_detached_state(self) -> bool:
+ return self.state_mode == "detached"
+
+ @property
+ def uses_through_state(self) -> bool:
+ return self.state_mode == "through_state"
+
+ @property
+ def uses_sparse_slow_updates(self) -> bool:
+ return self.slow_update_stride > 1
+
+ def should_update_slow(self, step_index: int) -> bool:
+ if step_index < 0:
+ raise ValueError("step_index must be >= 0")
+ return (step_index + 1) % self.slow_update_stride == 0
+
+ def resolve_rollout_checkpoints(self, total_steps: int) -> tuple[int, ...]:
+ if total_steps < 1:
+ raise ValueError("total_steps must be >= 1")
+ if any(step > total_steps for step in self.rollout_checkpoints):
+ raise ValueError("rollout_checkpoints must lie within the rollout length")
+
+ checkpoints = set(self.rollout_checkpoints)
+ if self.rollout_checkpoint_stride is not None:
+ checkpoints.update(range(self.rollout_checkpoint_stride, total_steps + 1, self.rollout_checkpoint_stride))
+ if self.include_final_checkpoint:
+ checkpoints.add(total_steps)
+
+ return tuple(sorted(step for step in checkpoints if 1 <= step <= total_steps))
+
+
+__all__ = ["TrainModeConfig", "TrainStateMode"]
diff --git a/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/views.py b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/views.py
new file mode 100644
index 0000000000..be9b594c27
--- /dev/null
+++ b/records/track_non_record_16mb/2026-03-30_OPC_CausalPackedMemory_NativeFullSpecClean/vendor/open_predictive_coder/views.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+import numpy as np
+
+from .config import LatentConfig
+from .latents import LatentObservation
+
+
+class ByteLatentFeatureView:
+ def __init__(self, max_patch_size: int):
+ if max_patch_size < 1:
+ raise ValueError("max_patch_size must be >= 1")
+ self.max_patch_size = max_patch_size
+
+ @staticmethod
+ def feature_dim(config: LatentConfig) -> int:
+ return (
+ config.reservoir_features
+ + config.reservoir_features
+ + config.global_dim
+ + config.latent_dim
+ + 3
+ )
+
+ def encode(self, observation: LatentObservation) -> np.ndarray:
+ return np.concatenate(
+ [
+ observation.prediction_error,
+ observation.patch_summary,
+ observation.global_state,
+ observation.latent,
+ np.array(
+ [
+ observation.novelty,
+ observation.patch_length / self.max_patch_size,
+ 1.0 if observation.boundary else 0.0,
+ ],
+ dtype=np.float64,
+ ),
+ ]
+ )
+
+
+__all__ = [
+ "ByteLatentFeatureView",
+]