From ccbe2baf8ccc652698c19e73f748711e069477c0 Mon Sep 17 00:00:00 2001 From: rpschoenburg Date: Thu, 2 Jan 2025 03:42:16 -0800 Subject: [PATCH 1/6] Fix for diff --- src/lib/src/repositories/diffs.rs | 98 ++++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 22 deletions(-) diff --git a/src/lib/src/repositories/diffs.rs b/src/lib/src/repositories/diffs.rs index fe9bada16..d17b34c0a 100644 --- a/src/lib/src/repositories/diffs.rs +++ b/src/lib/src/repositories/diffs.rs @@ -75,6 +75,7 @@ pub fn diff( targets, ); + // If the user specifies two files without revisions, we will compare the files on disk if revision_1.is_none() && revision_2.is_none() && path_2.is_some() { // If we do not have revisions set, just compare the files on disk @@ -125,18 +126,26 @@ pub fn diff( (cpath_1, cpath_2) } else { // If no file2, compare with file1 at head. - let commit = Some(repositories::commits::head_commit(&repository)?); + let Some(commit) = repositories::commits::head_commit_maybe(&repository)? else { + let error = "Error: head commit not found".to_string(); + return Err(OxenError::basic_str(error)); + }; - ( - CommitPath { - commit, - path: path_1.as_ref().to_path_buf(), - }, - CommitPath { - commit: None, - path: path_1.as_ref().to_path_buf(), - }, - ) + let file_node = Some( + repositories::entries::get_file(&repository, &commit, &path_1)?.ok_or_else(|| { + OxenError::ResourceNotFound( + format!("Error: file {} not committed", path_1.as_ref().display()).into(), + ) + })?, + ); + + let hash = file_node.unwrap().hash.to_string(); + + let committed_file = util::fs::version_path_from_node(&repository, &hash, &path_1); + // log::debug!("committed file: {:?}", committed_file); + let result = repositories::diffs::diff_files(path_1, committed_file, keys, targets, vec![])?; + + return Ok(result); }; let result = diff_commits(&repository, cpath_1, cpath_2, keys, targets, vec![])?; @@ -174,12 +183,16 @@ pub fn diff_commits( if let Some(mut commit_2) = cpath_2.commit { // if there are merge conflicts, compare against the conflict commit instead + + + let merger = EntryMergeConflictReader::new(repo)?; if merger.has_conflicts()? { commit_2 = merger.get_conflict_commit()?.unwrap(); } + node_2 = Some( repositories::entries::get_file(repo, &commit_2, &cpath_2.path)?.ok_or_else(|| { OxenError::ResourceNotFound( @@ -966,7 +979,6 @@ fn read_dupes(repo: &LocalRepository, compare_id: &str) -> Result Result, OxenError> { - match repo.min_version() { - MinOxenVersion::V0_19_0 => core::v0_19_0::diff::get_dir_diff_entry_with_summary( + // Dir hashes db is cheaper to open than objects reader + let base_dir_hashes_db_path = ObjectDBReader::dir_hashes_db_dir(&repo.path, &base_commit.id); + let head_dir_hashes_db_path = ObjectDBReader::dir_hashes_db_dir(&repo.path, &head_commit.id); + + let base_dir_hashes_db: DBWithThreadMode = DBWithThreadMode::open_for_read_only( + &db::key_val::opts::default(), + dunce::simplified(&base_dir_hashes_db_path), + false, + )?; + + let head_dir_hashes_db: DBWithThreadMode = DBWithThreadMode::open_for_read_only( + &db::key_val::opts::default(), + dunce::simplified(&head_dir_hashes_db_path), + false, + )?; + + let maybe_base_dir_hash: Option = path_db::get_entry(&base_dir_hashes_db, &dir)?; + let maybe_head_dir_hash: Option = path_db::get_entry(&head_dir_hashes_db, &dir)?; + + match (maybe_base_dir_hash, maybe_head_dir_hash) { + (Some(base_dir_hash), Some(head_dir_hash)) => { + let base_dir_hash = base_dir_hash.to_string(); + let head_dir_hash = head_dir_hash.to_string(); + + if base_dir_hash == head_dir_hash { + Ok(None) + } else { + Ok(Some(DiffEntry::from_dir_with_summary( + repo, + Some(&dir), + base_commit, + Some(&dir), + head_commit, + summary, + DiffEntryStatus::Modified, + )?)) + } + } + (None, Some(_)) => Ok(Some(DiffEntry::from_dir_with_summary( repo, - dir, + None, base_commit, + Some(&dir), head_commit, summary, - ), - MinOxenVersion::V0_10_0 => core::v0_10_0::diff::get_dir_diff_entry_with_summary( + DiffEntryStatus::Added, + )?)), + (Some(_), None) => Ok(Some(DiffEntry::from_dir_with_summary( repo, - dir, + Some(&dir), base_commit, + None, head_commit, summary, - ), + DiffEntryStatus::Removed, + )?)), + (None, None) => Err(OxenError::basic_str( + "Could not calculate dir diff tree: dir does not exist in either commit.", + )), } } @@ -1287,9 +1343,7 @@ train/cat_2.jpg,cat,30.5,44.0,333,396 println!("ROUNT #"); println!("entries: {entries:?}"); - let bounding_box_path = Path::new("annotations") - .join(Path::new("train")) - .join(Path::new("bounding_box.csv")); + let bounding_box_path = Path::new("annotations").join(Path::new("train")).join(Path::new("bounding_box.csv")); let bounding_box_filename = bounding_box_path.to_str().unwrap(); let bounding_box_entry = entries From 93fd63f883c00a6ad5cb21cbc5a4522c978ba0ee Mon Sep 17 00:00:00 2001 From: rpschoenburg Date: Thu, 2 Jan 2025 04:00:39 -0800 Subject: [PATCH 2/6] Applied Linters --- src/lib/src/repositories/diffs.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/lib/src/repositories/diffs.rs b/src/lib/src/repositories/diffs.rs index d17b34c0a..6f948c87c 100644 --- a/src/lib/src/repositories/diffs.rs +++ b/src/lib/src/repositories/diffs.rs @@ -75,7 +75,6 @@ pub fn diff( targets, ); - // If the user specifies two files without revisions, we will compare the files on disk if revision_1.is_none() && revision_2.is_none() && path_2.is_some() { // If we do not have revisions set, just compare the files on disk @@ -139,11 +138,12 @@ pub fn diff( })?, ); - let hash = file_node.unwrap().hash.to_string(); + let hash = file_node.hash.to_string(); let committed_file = util::fs::version_path_from_node(&repository, &hash, &path_1); - // log::debug!("committed file: {:?}", committed_file); - let result = repositories::diffs::diff_files(path_1, committed_file, keys, targets, vec![])?; + // log::debug!("committed file: {:?}", committed_file); + let result = + repositories::diffs::diff_files(path_1, committed_file, keys, targets, vec![])?; return Ok(result); }; @@ -184,15 +184,12 @@ pub fn diff_commits( if let Some(mut commit_2) = cpath_2.commit { // if there are merge conflicts, compare against the conflict commit instead - - let merger = EntryMergeConflictReader::new(repo)?; if merger.has_conflicts()? { commit_2 = merger.get_conflict_commit()?.unwrap(); } - node_2 = Some( repositories::entries::get_file(repo, &commit_2, &cpath_2.path)?.ok_or_else(|| { OxenError::ResourceNotFound( @@ -1343,7 +1340,9 @@ train/cat_2.jpg,cat,30.5,44.0,333,396 println!("ROUNT #"); println!("entries: {entries:?}"); - let bounding_box_path = Path::new("annotations").join(Path::new("train")).join(Path::new("bounding_box.csv")); + let bounding_box_path = Path::new("annotations") + .join(Path::new("train")) + .join(Path::new("bounding_box.csv")); let bounding_box_filename = bounding_box_path.to_str().unwrap(); let bounding_box_entry = entries From 884f9488bbc5f4f0320e34db9c431f037caa5da6 Mon Sep 17 00:00:00 2001 From: rpschoenburg Date: Mon, 6 Jan 2025 12:16:17 -0800 Subject: [PATCH 3/6] typo fix --- src/lib/src/repositories/diffs.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/src/repositories/diffs.rs b/src/lib/src/repositories/diffs.rs index 6f948c87c..366ee0184 100644 --- a/src/lib/src/repositories/diffs.rs +++ b/src/lib/src/repositories/diffs.rs @@ -138,7 +138,7 @@ pub fn diff( })?, ); - let hash = file_node.hash.to_string(); + let hash = file_node.unwrap().hash.to_string(); let committed_file = util::fs::version_path_from_node(&repository, &hash, &path_1); // log::debug!("committed file: {:?}", committed_file); From 27df25776397886af6f392e684feeb1243dd5523 Mon Sep 17 00:00:00 2001 From: rpschoenburg Date: Mon, 6 Jan 2025 12:40:11 -0800 Subject: [PATCH 4/6] Fix for clippy warning --- src/lib/src/repositories/diffs.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/src/repositories/diffs.rs b/src/lib/src/repositories/diffs.rs index 366ee0184..363144e52 100644 --- a/src/lib/src/repositories/diffs.rs +++ b/src/lib/src/repositories/diffs.rs @@ -130,7 +130,7 @@ pub fn diff( return Err(OxenError::basic_str(error)); }; - let file_node = Some( + let (file_node,) = ( repositories::entries::get_file(&repository, &commit, &path_1)?.ok_or_else(|| { OxenError::ResourceNotFound( format!("Error: file {} not committed", path_1.as_ref().display()).into(), @@ -138,7 +138,7 @@ pub fn diff( })?, ); - let hash = file_node.unwrap().hash.to_string(); + let hash = file_node.hash.to_string(); let committed_file = util::fs::version_path_from_node(&repository, &hash, &path_1); // log::debug!("committed file: {:?}", committed_file); From 18f060945ce845c0730bdda4f85e63ea5f4c372e Mon Sep 17 00:00:00 2001 From: Greg Schoeninger Date: Mon, 6 Jan 2025 15:29:33 -0800 Subject: [PATCH 5/6] revert download changes --- src/lib/src/core/v0_19_0/download.rs | 46 +++++++++++++++------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/lib/src/core/v0_19_0/download.rs b/src/lib/src/core/v0_19_0/download.rs index 942846673..74dbf83b7 100644 --- a/src/lib/src/core/v0_19_0/download.rs +++ b/src/lib/src/core/v0_19_0/download.rs @@ -61,11 +61,11 @@ async fn r_download_entries( ) -> Result<(), OxenError> { log::debug!("downloading entries for {:?}", directory); - let mut entries: Vec = vec![]; + if let EMerkleTreeNode::VNode(_) = &node.node { + let mut entries: Vec = vec![]; - for child in &node.children { - match &child.node { - EMerkleTreeNode::File(file_node) => { + for child in &node.children { + if let EMerkleTreeNode::File(file_node) = &child.node { entries.push(Entry::CommitEntry(CommitEntry { commit_id: file_node.last_commit_id.to_string(), path: directory.join(&file_node.name), @@ -75,26 +75,8 @@ async fn r_download_entries( last_modified_nanoseconds: file_node.last_modified_nanoseconds, })); } - EMerkleTreeNode::Directory(dir_node) => { - let mut new_directory = directory.to_path_buf(); - new_directory.push(&dir_node.name); - - if child.has_children() { - Box::pin(r_download_entries( - remote_repo, - local_repo_path, - child, - &new_directory, - pull_progress, - )) - .await?; - } - } - _ => {} } - } - if !entries.is_empty() { log::debug!("downloading {} entries to working dir", entries.len()); core::v0_10_0::index::puller::pull_entries_to_working_dir( remote_repo, @@ -105,5 +87,25 @@ async fn r_download_entries( .await?; } + for child in &node.children { + log::debug!("downloading entry {:?}", child.hash); + + let mut new_directory = directory.to_path_buf(); + if let EMerkleTreeNode::Directory(dir_node) = &child.node { + new_directory.push(&dir_node.name); + } + + if child.has_children() { + Box::pin(r_download_entries( + remote_repo, + local_repo_path, + child, + &new_directory, + pull_progress, + )) + .await?; + } + } + Ok(()) } From ed20d4d32c6f81a67d093db47c6c8afc36641856 Mon Sep 17 00:00:00 2001 From: Greg Schoeninger Date: Mon, 6 Jan 2025 15:35:02 -0800 Subject: [PATCH 6/6] fix println when output is not supplied on text diff --- src/cli/src/cmd/diff.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cli/src/cmd/diff.rs b/src/cli/src/cmd/diff.rs index 2619319df..30277e43e 100644 --- a/src/cli/src/cmd/diff.rs +++ b/src/cli/src/cmd/diff.rs @@ -234,16 +234,16 @@ impl DiffCmd { result: &mut DiffResult, output: Option, ) -> Result<(), OxenError> { - match result { - DiffResult::Tabular(result) => { - let mut df = result.contents.clone(); - // Save to disk if we have an output - if let Some(file_path) = output { + // Save to disk if we have an output + if let Some(file_path) = output { + match result { + DiffResult::Tabular(result) => { + let mut df = result.contents.clone(); tabular::write_df(&mut df, file_path.clone())?; } - } - DiffResult::Text(_) => { - println!("Saving to disk not supported for text output"); + DiffResult::Text(_) => { + println!("Saving to disk not supported for text output"); + } } }