Skip to main content
← dvs documentation Rust API reference

dvs/files/
status.rs

1use std::path::{Component, Path, PathBuf};
2use std::sync::Mutex;
3
4use anyhow::Result;
5use fs_err as fs;
6use rayon::prelude::*;
7use serde::{Deserialize, Serialize};
8use walkdir::WalkDir;
9
10use crate::cache::{HashCache, try_open_cache};
11use crate::files::metadata::FileMetadata;
12use crate::paths::DvsPaths;
13use crate::utils::get_threadpool;
14use crate::{Status, cache};
15
16#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
17pub struct FileStatus {
18    pub path: PathBuf,
19    #[serde(flatten)]
20    pub detail: StatusDetail,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
24#[serde(untagged)]
25pub enum StatusDetail {
26    Success {
27        status: Status,
28        #[serde(skip_serializing_if = "Option::is_none")]
29        metadata: Option<FileMetadata>,
30    },
31    Error {
32        error: String,
33    },
34}
35
36/// Which paths to get status for
37/// eg you can pass dir1/ dir2/ and it will expand to dir1/* dir2/*
38/// If `recursive` is `true`, then it will expand to dir1/**/* dir2/**/*
39#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
40pub struct StatusFilter {
41    paths: Vec<PathBuf>,
42    recursive: bool,
43}
44
45/// We need to handle `.`, `./` etc but we can't canonicalize because
46/// the path might not exist and we want the path relative to the directory so no symlink resolution
47fn normalize_path(p: PathBuf) -> Option<PathBuf> {
48    let mut out = PathBuf::new();
49    for c in p.components() {
50        match c {
51            Component::CurDir => {}
52            Component::ParentDir => {
53                if !out.pop() {
54                    return None;
55                }
56            }
57            _ => out.push(c),
58        }
59    }
60    Some(out)
61}
62
63impl StatusFilter {
64    /// Create a filter from user-provided paths (relative to cwd) and a recursive flag.
65    /// Translates cwd-relative paths to repo-root-relative using `dvs_paths.cwd_relative_to_root()`.
66    pub fn from_user_paths(
67        user_paths: Vec<PathBuf>,
68        recursive: bool,
69        dvs_paths: &DvsPaths,
70    ) -> Self {
71        let cwd_prefix = dvs_paths.cwd_relative_to_root();
72        let repo_root = dvs_paths.repo_root();
73        let paths = user_paths
74            .into_iter()
75            .filter_map(|p| {
76                if p.is_absolute() {
77                    p.strip_prefix(repo_root)
78                        .ok()
79                        .map(|r| r.to_path_buf())
80                        .and_then(normalize_path)
81                } else {
82                    let joined = if let Some(prefix) = cwd_prefix {
83                        prefix.join(&p)
84                    } else {
85                        p
86                    };
87                    normalize_path(joined)
88                }
89            })
90            .collect();
91        StatusFilter { paths, recursive }
92    }
93
94    fn matches(&self, tracked_path: &Path) -> bool {
95        self.paths.iter().any(|filter_path| {
96            // Exact match (user passed a file path)
97            tracked_path == filter_path
98                // Recursive: any descendant
99                || (self.recursive && tracked_path.starts_with(filter_path))
100                // Non-recursive: direct child
101                || (!self.recursive && tracked_path.parent() == Some(filter_path.as_path()))
102        })
103    }
104}
105
106fn get_file_status(
107    paths: &DvsPaths,
108    relative_path: impl AsRef<Path>,
109    cache: Option<&Mutex<HashCache>>,
110) -> Result<(Status, Option<FileMetadata>)> {
111    let dvs_file_path = paths.metadata_path(relative_path.as_ref());
112    if !dvs_file_path.is_file() {
113        return Ok((Status::Untracked, None));
114    }
115    let existing_metadata: FileMetadata = serde_json::from_reader(fs::File::open(dvs_file_path)?)?;
116    // If we have read the metadata, but we can't find the original file
117    let file_path = paths.file_path(relative_path.as_ref());
118    if !file_path.is_file() {
119        return Ok((Status::Absent, Some(existing_metadata)));
120    }
121    let rel_str = relative_path.as_ref().to_string_lossy();
122    let (hashes, size) = cache::hashes_for_file(&file_path, &rel_str, cache)?;
123
124    if existing_metadata.hashes == hashes && existing_metadata.size == size {
125        Ok((Status::Current, Some(existing_metadata)))
126    } else {
127        Ok((Status::Unsynced, Some(existing_metadata)))
128    }
129}
130
131pub fn get_status(paths: &DvsPaths, filter: Option<&StatusFilter>) -> Result<Vec<FileStatus>> {
132    let dvs_directory = paths.metadata_folder();
133    log::debug!("Scanning metadata folder: {}", dvs_directory.display());
134    let cache = try_open_cache(paths);
135
136    // Collect entries first so we can process in parallel
137    let entries: Vec<PathBuf> = WalkDir::new(&dvs_directory)
138        .into_iter()
139        .filter_map(|e| e.ok())
140        .filter(|e| e.file_type().is_file())
141        .filter(|e| {
142            e.path()
143                .extension()
144                .map(|ext| ext == "dvs")
145                .unwrap_or(false)
146        })
147        .map(|e| e.into_path())
148        .collect();
149
150    if entries.is_empty() {
151        return Ok(Vec::new());
152    }
153
154    let pool = get_threadpool(entries.len())?;
155
156    let mut results: Vec<FileStatus> = pool.install(|| {
157        entries
158            .into_par_iter()
159            .filter_map(|dvs_path| {
160                let relative = match dvs_path.strip_prefix(&dvs_directory) {
161                    Ok(r) => r.with_extension(""),
162                    Err(e) => {
163                        return Some(FileStatus {
164                            path: dvs_path,
165                            detail: StatusDetail::Error {
166                                error: format!("failed to determine relative path: {e}"),
167                            },
168                        });
169                    }
170                };
171                if let Some(f) = filter {
172                    if !f.matches(&relative) {
173                        return None;
174                    }
175                }
176                let detail = match get_file_status(paths, &relative, cache.as_ref()) {
177                    Ok((status, file_metadata)) => StatusDetail::Success {
178                        status,
179                        metadata: file_metadata,
180                    },
181                    Err(e) => StatusDetail::Error {
182                        error: e.to_string(),
183                    },
184                };
185                Some(FileStatus {
186                    path: relative.to_path_buf(),
187                    detail,
188                })
189            })
190            .collect()
191    });
192    results.sort_by(|a, b| a.path.cmp(&b.path));
193
194    log::debug!("Found {} tracked files", results.len());
195    Ok(results)
196}
197
198#[cfg(test)]
199mod tests {
200    use super::*;
201    use crate::Compression;
202    use crate::testutil::{create_file, create_temp_git_repo, init_dvs_repo};
203    use uuid::Uuid;
204
205    fn make_paths(root: &Path, config: &crate::config::Config) -> DvsPaths {
206        DvsPaths::new(
207            root.to_path_buf(),
208            root.to_path_buf(),
209            config.metadata_folder_name(),
210        )
211        .unwrap()
212    }
213
214    fn make_cache(paths: &DvsPaths) -> Mutex<HashCache> {
215        Mutex::new(HashCache::open(&paths.cache_folder().join("dvs.db")).unwrap())
216    }
217
218    #[test]
219    fn get_file_status_returns_untracked_for_new_file() {
220        let (_tmp, root) = create_temp_git_repo();
221        let (config, _dvs_dir) = init_dvs_repo(&root);
222        let paths = make_paths(&root, &config);
223        create_file(&root, "new.txt", b"content");
224
225        let cache = make_cache(&paths);
226        let (status, metadata) = get_file_status(&paths, "new.txt", Some(&cache)).unwrap();
227        assert_eq!(status, Status::Untracked);
228        assert!(metadata.is_none());
229    }
230
231    #[test]
232    fn get_file_status_returns_current_for_synced_file() {
233        let (_tmp, root) = create_temp_git_repo();
234        let (config, _dvs_dir) = init_dvs_repo(&root);
235        let backend = config.backend();
236        let paths = make_paths(&root, &config);
237        let file_path = create_file(&root, "synced.txt", b"content");
238
239        let metadata = FileMetadata::from_file(&file_path, Compression::Zstd, None).unwrap();
240        metadata
241            .save(
242                Uuid::new_v4(),
243                &file_path,
244                backend,
245                &paths,
246                "synced.txt",
247                None,
248            )
249            .unwrap();
250
251        let cache = make_cache(&paths);
252        let (status, metadata) = get_file_status(&paths, "synced.txt", Some(&cache)).unwrap();
253        assert_eq!(status, Status::Current);
254        assert!(metadata.is_some());
255    }
256
257    #[test]
258    fn get_file_status_returns_absent_when_file_deleted() {
259        let (_tmp, root) = create_temp_git_repo();
260        let (config, _dvs_dir) = init_dvs_repo(&root);
261        let backend = config.backend();
262        let paths = make_paths(&root, &config);
263        let file_path = create_file(&root, "deleted.txt", b"content");
264
265        let metadata = FileMetadata::from_file(&file_path, Compression::Zstd, None).unwrap();
266        metadata
267            .save(
268                Uuid::new_v4(),
269                &file_path,
270                backend,
271                &paths,
272                "deleted.txt",
273                None,
274            )
275            .unwrap();
276
277        // Delete the original file
278        fs::remove_file(&file_path).unwrap();
279
280        let cache = make_cache(&paths);
281        let (status, metadata) = get_file_status(&paths, "deleted.txt", Some(&cache)).unwrap();
282        assert_eq!(status, Status::Absent);
283        assert!(metadata.is_some());
284    }
285
286    #[test]
287    fn get_file_status_returns_unsynced_when_file_modified() {
288        let (_tmp, root) = create_temp_git_repo();
289        let (config, _dvs_dir) = init_dvs_repo(&root);
290        let backend = config.backend();
291        let paths = make_paths(&root, &config);
292        let file_path = create_file(&root, "modified.txt", b"original");
293
294        let metadata = FileMetadata::from_file(&file_path, Compression::Zstd, None).unwrap();
295        metadata
296            .save(
297                Uuid::new_v4(),
298                &file_path,
299                backend,
300                &paths,
301                "modified.txt",
302                None,
303            )
304            .unwrap();
305
306        // Modify the file
307        fs::write(&file_path, b"changed content").unwrap();
308
309        let cache = make_cache(&paths);
310        let (status, metadata) = get_file_status(&paths, "modified.txt", Some(&cache)).unwrap();
311        assert_eq!(status, Status::Unsynced);
312        assert!(metadata.is_some());
313    }
314
315    #[test]
316    fn get_status_returns_all_tracked_files() {
317        let (_tmp, root) = create_temp_git_repo();
318        let (config, _dvs_dir) = init_dvs_repo(&root);
319        let backend = config.backend();
320        let paths = make_paths(&root, &config);
321
322        // Add multiple files
323        for name in ["a.txt", "b.txt", "subdir/c.txt"] {
324            let file_path = create_file(&root, name, name.as_bytes());
325            let metadata = FileMetadata::from_file(&file_path, Compression::Zstd, None).unwrap();
326            metadata
327                .save(Uuid::new_v4(), &file_path, backend, &paths, name, None)
328                .unwrap();
329        }
330
331        let statuses = get_status(&paths, None).unwrap();
332        assert_eq!(statuses.len(), 3);
333
334        // All should be Current
335        for status in &statuses {
336            match &status.detail {
337                StatusDetail::Success { status, metadata } => {
338                    assert_eq!(*status, Status::Current);
339                    assert!(metadata.is_some());
340                }
341                StatusDetail::Error { error } => panic!("unexpected error: {error}"),
342            }
343        }
344    }
345
346    #[test]
347    fn get_status_returns_empty_vec_for_repo_with_no_tracked_files() {
348        let (_tmp, root) = create_temp_git_repo();
349        let (config, _dvs_dir) = init_dvs_repo(&root);
350        let paths = make_paths(&root, &config);
351
352        let statuses = get_status(&paths, None).unwrap();
353        assert!(statuses.is_empty());
354    }
355
356    #[test]
357    fn save_local_updates_metadata_when_content_matches_different_file() {
358        // - Add file A with content "foo" (hash H1)
359        // - Add file B with content "bar" (hash H2)
360        // - Change file B's content to "foo" (now hash H1)
361        // - Run `add` on B
362        // => B's metadata is updated to hash H1
363        let (_tmp, root) = create_temp_git_repo();
364        let (config, dvs_dir) = init_dvs_repo(&root);
365        let backend = config.backend();
366        let paths = make_paths(&root, &config);
367
368        // Add file A with content "foo" (hash H1)
369        let file_a = create_file(&root, "a.txt", b"foo");
370        let metadata_a = FileMetadata::from_file(&file_a, Compression::Zstd, None).unwrap();
371        metadata_a
372            .save(Uuid::new_v4(), &file_a, backend, &paths, "a.txt", None)
373            .unwrap();
374        let hash_h1 = metadata_a.hashes.blake3.clone();
375
376        // Add file B with content "bar" (hash H2)
377        let file_b = create_file(&root, "b.txt", b"bar");
378        let metadata_b = FileMetadata::from_file(&file_b, Compression::Zstd, None).unwrap();
379        metadata_b
380            .save(Uuid::new_v4(), &file_b, backend, &paths, "b.txt", None)
381            .unwrap();
382        let hash_h2 = metadata_b.hashes.blake3.clone();
383        assert_ne!(hash_h1, hash_h2);
384
385        // Change file B's content to "foo" (now hash H1)
386        fs::write(&file_b, b"foo").unwrap();
387
388        // Run add on B with new content
389        let metadata_b_new = FileMetadata::from_file(&file_b, Compression::Zstd, None).unwrap();
390        assert_eq!(metadata_b_new.hashes.blake3, hash_h1);
391
392        metadata_b_new
393            .save(Uuid::new_v4(), &file_b, backend, &paths, "b.txt", None)
394            .unwrap();
395
396        // Verify metadata was updated
397        let dvs_file = dvs_dir.join("b.txt.dvs");
398        let stored: FileMetadata =
399            serde_json::from_reader(fs::File::open(&dvs_file).unwrap()).unwrap();
400
401        assert_eq!(
402            stored.hashes.blake3, hash_h1,
403            "Metadata should be updated to new hash"
404        );
405
406        let cache = make_cache(&paths);
407        let (status, _metadata) = get_file_status(&paths, "b.txt", Some(&cache)).unwrap();
408        assert_eq!(status, Status::Current);
409    }
410
411    /// Helper to set up a repo with files at various directory depths.
412    /// Returns (TempDir, DvsPaths) with tracked files:
413    ///   "a.txt", "dir1/b.txt", "dir1/sub/c.txt", "dir2/d.txt"
414    fn setup_filtered_repo() -> (tempfile::TempDir, DvsPaths) {
415        let (tmp, root) = create_temp_git_repo();
416        let (config, _dvs_dir) = init_dvs_repo(&root);
417        let backend = config.backend();
418        let paths = make_paths(&root, &config);
419
420        for name in ["a.txt", "dir1/b.txt", "dir1/sub/c.txt", "dir2/d.txt"] {
421            let file_path = create_file(&root, name, name.as_bytes());
422            let metadata = FileMetadata::from_file(&file_path, Compression::Zstd, None).unwrap();
423            metadata
424                .save(Uuid::new_v4(), &file_path, backend, &paths, name, None)
425                .unwrap();
426        }
427        (tmp, paths)
428    }
429
430    fn run_filter_cases(cases: Vec<(&[&str], &str, bool)>, recursive: bool) {
431        for (filter_paths, test_path, expected) in cases {
432            let filter = StatusFilter {
433                paths: filter_paths
434                    .iter()
435                    .filter_map(|p| normalize_path(PathBuf::from(p)))
436                    .collect(),
437                recursive,
438            };
439            assert_eq!(
440                filter.matches(Path::new(test_path)),
441                expected,
442                "filter={filter_paths:?} recursive={recursive} path={test_path:?}"
443            );
444        }
445    }
446
447    #[test]
448    fn status_filter_matches_non_recursive() {
449        // (filter_paths, test_path, expected)
450        let cases: Vec<(&[&str], &str, bool)> = vec![
451            // direct child matches
452            (&["dir1"], "dir1/b.txt", true),
453            // nested child does NOT match
454            (&["dir1"], "dir1/sub/c.txt", false),
455            // exact file match
456            (&["dir2/d.txt"], "dir2/d.txt", true),
457            // exact file: different file does NOT match
458            (&["dir2/d.txt"], "dir2/e.txt", false),
459            // "." matches root-level files
460            (&["."], "a.txt", true),
461            // "." does NOT match nested files
462            (&["."], "dir1/b.txt", false),
463            // "../foo" escapes root → dropped, matches nothing
464            (&["../foo"], "foo", false),
465            // "dir1/../dir2" normalizes to "dir2", matches direct child
466            (&["dir1/../dir2"], "dir2/d.txt", true),
467            // "dir1/.." normalizes to root, matches root-level files
468            (&["dir1/.."], "a.txt", true),
469        ];
470        run_filter_cases(cases, false);
471    }
472
473    #[test]
474    fn status_filter_matches_recursive() {
475        // (filter_paths, test_path, expected)
476        let cases: Vec<(&[&str], &str, bool)> = vec![
477            // direct child matches
478            (&["dir1"], "dir1/b.txt", true),
479            // nested child matches
480            (&["dir1"], "dir1/sub/c.txt", true),
481            // unrelated dir does NOT match
482            (&["dir1"], "dir2/d.txt", false),
483            // "." matches everything recursively
484            (&["."], "a.txt", true),
485            (&["."], "dir1/b.txt", true),
486            (&["."], "dir1/sub/c.txt", true),
487            // "../foo" escapes root → dropped
488            (&["../foo"], "foo", false),
489            // "a/../../x" escapes root → dropped
490            (&["a/../../x"], "x", false),
491            // "dir1/../dir2" normalizes to "dir2", matches descendants
492            (&["dir1/../dir2"], "dir2/d.txt", true),
493        ];
494        run_filter_cases(cases, true);
495    }
496
497    #[test]
498    fn get_status_with_filter() {
499        let (_tmp, paths) = setup_filtered_repo();
500
501        // Relative path filter
502        let filter = StatusFilter {
503            paths: vec![PathBuf::from("dir1")],
504            recursive: false,
505        };
506        let statuses = get_status(&paths, Some(&filter)).unwrap();
507        assert_eq!(statuses.len(), 1);
508        assert_eq!(statuses[0].path, PathBuf::from("dir1/b.txt"));
509
510        // Absolute path filter via from_user_paths
511        let abs_path = paths.repo_root().join("dir1/b.txt");
512        let filter = StatusFilter::from_user_paths(vec![abs_path], false, &paths);
513        let statuses = get_status(&paths, Some(&filter)).unwrap();
514        assert_eq!(statuses.len(), 1);
515        assert_eq!(statuses[0].path, PathBuf::from("dir1/b.txt"));
516    }
517
518    #[test]
519    fn from_user_paths_with_subdirectory_cwd() {
520        let (_tmp, root) = create_temp_git_repo();
521        let (_config, _dvs_dir) = init_dvs_repo(&root);
522
523        // Create subdirectories so canonicalize works in DvsPaths::new
524        fs::create_dir_all(root.join("subdir/deep")).unwrap();
525        fs::create_dir_all(root.join("dir2")).unwrap();
526
527        // From subdir/: ../foo → resolves to "foo" (valid)
528        let paths = DvsPaths::new(
529            fs::canonicalize(root.join("subdir")).unwrap(),
530            root.to_path_buf(),
531            ".dvs",
532        )
533        .unwrap();
534        let filter = StatusFilter::from_user_paths(vec![PathBuf::from("../foo")], false, &paths);
535        assert_eq!(filter.paths, vec![PathBuf::from("foo")]);
536
537        // From subdir/: ../../foo → escapes root → dropped (empty)
538        let filter = StatusFilter::from_user_paths(vec![PathBuf::from("../../foo")], false, &paths);
539        assert!(
540            filter.paths.is_empty(),
541            "../../foo should escape root and be dropped"
542        );
543
544        // From subdir/deep/: ../../foo → resolves to "foo" (valid, 2 levels up = root)
545        let paths_deep = DvsPaths::new(
546            fs::canonicalize(root.join("subdir/deep")).unwrap(),
547            root.to_path_buf(),
548            ".dvs",
549        )
550        .unwrap();
551        let filter =
552            StatusFilter::from_user_paths(vec![PathBuf::from("../../foo")], false, &paths_deep);
553        assert_eq!(filter.paths, vec![PathBuf::from("foo")]);
554
555        // From subdir/: ../dir2/file.txt → resolves to "dir2/file.txt"
556        let filter =
557            StatusFilter::from_user_paths(vec![PathBuf::from("../dir2/file.txt")], false, &paths);
558        assert_eq!(filter.paths, vec![PathBuf::from("dir2/file.txt")]);
559
560        // From subdir/: absolute path with .. like <root>/subdir/../a.txt → normalizes to "a.txt"
561        let abs_with_dotdot = root.join("subdir/../a.txt");
562        let filter = StatusFilter::from_user_paths(vec![abs_with_dotdot], false, &paths);
563        assert_eq!(filter.paths, vec![PathBuf::from("a.txt")]);
564    }
565}