Skip to main content

jar_genesis/
replay.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use crate::git;
5use crate::hash;
6use crate::lean;
7
8/// A parsed merge commit with genesis trailers.
9struct GenesisCommitEntry {
10    signed_commit: serde_json::Value,
11    stored_index: serde_json::Value,
12}
13
14/// Walk merge commits up to `end_ref` and collect genesis entries.
15fn collect_entries_ref(
16    genesis_commit: &str,
17    end_ref: &str,
18) -> Result<Vec<GenesisCommitEntry>, Box<dyn std::error::Error>> {
19    let merge_commits = git::log_merge_commits_ref(genesis_commit, end_ref)?;
20    let mut entries = Vec::new();
21
22    for (hash, message) in &merge_commits {
23        let index_json = match git::parse_trailer(message, "Genesis-Index") {
24            Some(json) => json,
25            None => continue, // Not a genesis merge commit
26        };
27
28        let commit_json = match git::parse_trailer(message, "Genesis-Commit") {
29            Some(json) => json,
30            None => {
31                eprintln!("WARNING: No Genesis-Commit trailer for merge {hash}. Cannot replay.");
32                // Still track the stored index
33                let stored_index: serde_json::Value = serde_json::from_str(&index_json)?;
34                entries.push(GenesisCommitEntry {
35                    signed_commit: serde_json::Value::Null,
36                    stored_index,
37                });
38                continue;
39            }
40        };
41
42        // Parse and expand short hashes in review rankings
43        let mut commit: serde_json::Value = serde_json::from_str(&commit_json)?;
44        expand_review_hashes(&mut commit);
45
46        let stored_index: serde_json::Value = serde_json::from_str(&index_json)?;
47        entries.push(GenesisCommitEntry {
48            signed_commit: commit,
49            stored_index,
50        });
51    }
52
53    Ok(entries)
54}
55
56/// Expand short hashes in review rankings to full hashes.
57pub fn expand_review_hashes_public(commit: &mut serde_json::Value) {
58    expand_review_hashes(commit);
59}
60
61fn expand_review_hashes(commit: &mut serde_json::Value) {
62    let head = commit["id"].as_str().unwrap_or("").to_string();
63    let targets: Vec<String> = commit["comparisonTargets"]
64        .as_array()
65        .map(|arr| {
66            arr.iter()
67                .filter_map(|v| v.as_str().map(|s| s.to_string()))
68                .collect()
69        })
70        .unwrap_or_default();
71
72    let mut candidates = targets.clone();
73    candidates.push(head.clone());
74
75    if let Some(reviews) = commit["reviews"].as_array_mut() {
76        for review in reviews {
77            for field in &[
78                "difficultyRanking",
79                "noveltyRanking",
80                "designQualityRanking",
81            ] {
82                if let Some(ranking) = review[*field].as_array_mut() {
83                    for entry in ranking.iter_mut() {
84                        if let Some(h) = entry.as_str()
85                            && h.len() < 40
86                            && let Ok(full) = hash::expand_short_hash(h, &candidates)
87                        {
88                            *entry = serde_json::Value::String(full);
89                        }
90                    }
91                }
92            }
93        }
94    }
95}
96
97/// Find the commit hash of the last index with epoch < target epoch.
98fn find_prior_commit_hash(indices: &[serde_json::Value], epoch: u64) -> Option<String> {
99    let last = indices
100        .iter()
101        .rfind(|idx| idx["epoch"].as_u64().map(|e| e < epoch).unwrap_or(false))?;
102    last["commitHash"].as_str().map(|s| s.to_string())
103}
104
105/// Get the ranking snapshot for a commit based on its epoch.
106fn get_ranking_snapshot(
107    indices: &[serde_json::Value],
108    rankings: &HashMap<String, serde_json::Value>,
109    epoch: u64,
110) -> Option<serde_json::Value> {
111    let commit_hash = find_prior_commit_hash(indices, epoch)?;
112    rankings.get(&commit_hash).cloned()
113}
114
115/// Get variances from scores map for a commit based on its epoch.
116/// Returns [["hash", sigma2], ...] format for Lean's List (CommitId × Nat).
117fn get_variances_snapshot(
118    indices: &[serde_json::Value],
119    scores: &HashMap<String, serde_json::Value>,
120    epoch: u64,
121) -> Option<serde_json::Value> {
122    let commit_hash = find_prior_commit_hash(indices, epoch)?;
123    let scores_arr = scores.get(&commit_hash)?.as_array()?;
124    let variances: Vec<serde_json::Value> = scores_arr
125        .iter()
126        .filter_map(|s| {
127            let commit = s.get("commit")?;
128            let sigma2 = s.get("sigma2")?;
129            Some(serde_json::json!([commit, sigma2]))
130        })
131        .collect();
132    Some(serde_json::json!(variances))
133}
134
135/// Result of incremental replay.
136struct ReplayResult {
137    indices: Vec<serde_json::Value>,
138    rankings: HashMap<String, serde_json::Value>,
139    scores: HashMap<String, serde_json::Value>,
140}
141
142/// Core incremental replay loop. Evaluates each signed commit incrementally.
143fn replay_incremental(
144    spec_dir: &Path,
145    signed_commits: &[serde_json::Value],
146) -> Result<ReplayResult, Box<dyn std::error::Error>> {
147    let mut indices: Vec<serde_json::Value> = Vec::new();
148    let mut rankings: HashMap<String, serde_json::Value> = HashMap::new();
149    let mut scores: HashMap<String, serde_json::Value> = HashMap::new();
150    let mut commits: Vec<serde_json::Value> = Vec::new();
151
152    for commit in signed_commits {
153        if commit.is_null() {
154            continue;
155        }
156
157        let pr_created_at = commit["prCreatedAt"]
158            .as_u64()
159            .or_else(|| commit["mergeEpoch"].as_u64())
160            .unwrap_or(0);
161
162        let ranking_snapshot = get_ranking_snapshot(&indices, &rankings, pr_created_at);
163        let variances_snapshot = get_variances_snapshot(&indices, &scores, pr_created_at);
164
165        // Build input for genesis_evaluate
166        let mut input = serde_json::json!({
167            "commit": commit,
168            "pastIndices": indices,
169        });
170        if let Some(ranking) = &ranking_snapshot {
171            input["ranking"] = ranking.clone();
172        }
173        // Pass variances (empty [] if no scores yet — Lean defaults to BT_SCALE)
174        input["variances"] = variances_snapshot.unwrap_or_else(|| serde_json::json!([]));
175
176        // Evaluate
177        let mut index: serde_json::Value = lean::invoke("genesis_evaluate", &input, spec_dir)?;
178
179        // Strip warnings for cache compatibility
180        if let Some(obj) = index.as_object_mut() {
181            obj.remove("warnings");
182        }
183
184        indices.push(index.clone());
185        commits.push(commit.clone());
186
187        // Compute ranking snapshot
188        let ranking_input = serde_json::json!({
189            "signedCommits": commits,
190            "indices": indices,
191        });
192        let ranking_output: serde_json::Value =
193            lean::invoke("genesis_ranking", &ranking_input, spec_dir)?;
194        let snapshot = ranking_output["ranking"].clone();
195
196        let commit_hash = index["commitHash"].as_str().unwrap_or("").to_string();
197        rankings.insert(commit_hash.clone(), snapshot);
198
199        // Capture scores (v3 BT output) if present
200        if let Some(scores_val) = ranking_output.get("scores") {
201            scores.insert(commit_hash, scores_val.clone());
202        }
203    }
204
205    Ok(ReplayResult {
206        indices,
207        rankings,
208        scores,
209    })
210}
211
212fn spec_dir() -> Result<std::path::PathBuf, Box<dyn std::error::Error>> {
213    let root = git::repo_root()?;
214    Ok(Path::new(&root).join("spec"))
215}
216
217/// Replay and verify genesis state from git history.
218pub fn verify() -> Result<(), Box<dyn std::error::Error>> {
219    let spec = spec_dir()?;
220    let genesis_commit = git::read_genesis_commit_hash(&spec)?;
221
222    if genesis_commit == "0000000000000000000000000000000000000000" {
223        eprintln!("Genesis not launched (genesisCommit is zero).");
224        return Ok(());
225    }
226
227    // Use origin/master to ensure we see all merge commits.
228    git::git_cmd(&["fetch", "origin", "master"])?;
229    let entries = collect_entries_ref(&genesis_commit, "origin/master")?;
230    let signed_commits: Vec<serde_json::Value> =
231        entries.iter().map(|e| e.signed_commit.clone()).collect();
232    let stored_indices: Vec<serde_json::Value> =
233        entries.iter().map(|e| e.stored_index.clone()).collect();
234
235    let replayable: Vec<&serde_json::Value> =
236        signed_commits.iter().filter(|c| !c.is_null()).collect();
237    eprintln!(
238        "Replaying {} of {} entries...",
239        replayable.len(),
240        stored_indices.len()
241    );
242
243    // Build ranking map incrementally
244    let result = replay_incremental(&spec, &signed_commits)?;
245
246    // Validate using genesis_validate
247    let input = serde_json::json!({
248        "indices": stored_indices,
249        "signedCommits": signed_commits.iter().filter(|c| !c.is_null()).collect::<Vec<_>>(),
250        "rankings": result.rankings,
251        "scores": result.scores,
252    });
253
254    let result: serde_json::Value = lean::invoke("genesis_validate", &input, &spec)?;
255    println!("{}", serde_json::to_string_pretty(&result)?);
256
257    let valid = result["valid"].as_bool().unwrap_or(false);
258    let errors = result["errors"].as_array().map(|a| a.len()).unwrap_or(0);
259
260    if valid {
261        eprintln!(
262            "Verified {} of {} indices. All match.",
263            replayable.len(),
264            stored_indices.len()
265        );
266        Ok(())
267    } else {
268        eprintln!(
269            "Verification failed: {errors} errors in {} replayable indices.",
270            replayable.len()
271        );
272        std::process::exit(1);
273    }
274}
275
276/// Replay, rebuild, and compare against genesis-state cache.
277pub fn verify_cache() -> Result<(), Box<dyn std::error::Error>> {
278    let spec = spec_dir()?;
279    let genesis_commit = git::read_genesis_commit_hash(&spec)?;
280
281    if genesis_commit == "0000000000000000000000000000000000000000" {
282        eprintln!("Genesis not launched (genesisCommit is zero).");
283        return Ok(());
284    }
285
286    // Use origin/master to read trailers — the working tree may be behind
287    // (e.g., during merge workflow where cargo build dirtied Cargo.lock).
288    git::git_cmd(&["fetch", "origin", "master"])?;
289    let entries = collect_entries_ref(&genesis_commit, "origin/master")?;
290    let signed_commits: Vec<serde_json::Value> =
291        entries.iter().map(|e| e.signed_commit.clone()).collect();
292
293    let _replayable: Vec<&serde_json::Value> =
294        signed_commits.iter().filter(|c| !c.is_null()).collect();
295
296    let result = replay_incremental(&spec, &signed_commits)?;
297
298    // Fetch cache
299    git::fetch("origin", "genesis-state")?;
300    let cache_json = git::show_file("origin/genesis-state:genesis.json")?;
301    let cache: Vec<serde_json::Value> = serde_json::from_str(&cache_json)?;
302
303    if result.indices.len() != cache.len() {
304        eprintln!(
305            "MISMATCH: rebuilt {} indices but cache has {}.",
306            result.indices.len(),
307            cache.len()
308        );
309        std::process::exit(1);
310    }
311
312    let mut errors = 0;
313
314    // Compare indices
315    for (i, (rebuilt, cached)) in result.indices.iter().zip(cache.iter()).enumerate() {
316        let r = serde_json::to_string(rebuilt)?;
317        let c = serde_json::to_string(cached)?;
318        if r != c {
319            let hash = rebuilt["commitHash"].as_str().unwrap_or("unknown");
320            eprintln!("MISMATCH at index {i} (commit {hash}):");
321            eprintln!("  rebuilt: {r}");
322            eprintln!("  cache:   {c}");
323            errors += 1;
324        }
325    }
326
327    // Compare rankings
328    let cached_ranking_json =
329        git::show_file("origin/genesis-state:ranking.json").unwrap_or_else(|_| "{}".to_string());
330    let cached_ranking: HashMap<String, serde_json::Value> =
331        serde_json::from_str(&cached_ranking_json)?;
332
333    if !cached_ranking.is_empty() {
334        if result.rankings.len() != cached_ranking.len() {
335            eprintln!(
336                "RANKING MISMATCH: rebuilt {} entries but cache has {}.",
337                result.rankings.len(),
338                cached_ranking.len()
339            );
340            errors += 1;
341        } else {
342            for (key, rebuilt_val) in &result.rankings {
343                if let Some(cached_val) = cached_ranking.get(key) {
344                    let r = serde_json::to_string(rebuilt_val)?;
345                    let c = serde_json::to_string(cached_val)?;
346                    if r != c {
347                        eprintln!("RANKING MISMATCH for commit {}:", &key[..8.min(key.len())]);
348                        eprintln!("  rebuilt: {r}");
349                        eprintln!("  cache:   {c}");
350                        errors += 1;
351                    }
352                } else {
353                    eprintln!(
354                        "RANKING MISMATCH: key {} not in cache.",
355                        &key[..8.min(key.len())]
356                    );
357                    errors += 1;
358                }
359            }
360        }
361    } else {
362        eprintln!("ranking.json not found or empty — skipping ranking verification.");
363    }
364
365    // Compare scores (if scores.json exists in cache)
366    let cached_scores_json =
367        git::show_file("origin/genesis-state:scores.json").unwrap_or_else(|_| "{}".to_string());
368    let cached_scores: HashMap<String, serde_json::Value> =
369        serde_json::from_str(&cached_scores_json)?;
370
371    if !cached_scores.is_empty() {
372        for (key, cached_val) in &cached_scores {
373            if let Some(rebuilt_val) = result.scores.get(key) {
374                let r = serde_json::to_string(rebuilt_val)?;
375                let c = serde_json::to_string(cached_val)?;
376                if r != c {
377                    eprintln!("SCORES MISMATCH for commit {}:", &key[..8.min(key.len())]);
378                    eprintln!("  rebuilt: {r}");
379                    eprintln!("  cache:   {c}");
380                    errors += 1;
381                }
382            } else {
383                eprintln!(
384                    "SCORES MISMATCH: key {} not in rebuilt scores.",
385                    &key[..8.min(key.len())]
386                );
387                errors += 1;
388            }
389        }
390    }
391
392    if errors == 0 {
393        eprintln!(
394            "Cache verified: {} indices match rebuilt state.",
395            result.indices.len()
396        );
397        Ok(())
398    } else {
399        eprintln!("Cache verification failed: {errors} mismatches.");
400        std::process::exit(1);
401    }
402}
403
404/// Replay and rebuild, outputting to stdout.
405pub fn rebuild() -> Result<(), Box<dyn std::error::Error>> {
406    let spec = spec_dir()?;
407    let genesis_commit = git::read_genesis_commit_hash(&spec)?;
408
409    if genesis_commit == "0000000000000000000000000000000000000000" {
410        eprintln!("Genesis not launched (genesisCommit is zero).");
411        return Ok(());
412    }
413
414    // Use origin/master to ensure we see all merge commits, even if
415    // the working tree HEAD is behind (e.g., Cargo.lock dirty from cargo build).
416    git::git_cmd(&["fetch", "origin", "master"])?;
417    let entries = collect_entries_ref(&genesis_commit, "origin/master")?;
418    let signed_commits: Vec<serde_json::Value> =
419        entries.iter().map(|e| e.signed_commit.clone()).collect();
420
421    let result = replay_incremental(&spec, &signed_commits)?;
422
423    eprintln!("=== genesis.json ===");
424    println!("{}", serde_json::to_string_pretty(&result.indices)?);
425    eprintln!("=== ranking.json ===");
426    println!(
427        "{}",
428        serde_json::to_string_pretty(&serde_json::json!(result.rankings))?
429    );
430    if !result.scores.is_empty() {
431        eprintln!("=== scores.json ===");
432        println!(
433            "{}",
434            serde_json::to_string_pretty(&serde_json::json!(result.scores))?
435        );
436    }
437    eprintln!(
438        "Rebuilt {} of {} indices.",
439        result.indices.len(),
440        entries.len()
441    );
442
443    Ok(())
444}