Skip to main content
← dvs documentation Rust API reference

dvs/
globbing.rs

1use std::collections::HashSet;
2use std::ffi::OsStr;
3use std::path::PathBuf;
4
5use crate::paths::DvsPaths;
6use anyhow::{Result, anyhow, bail};
7use globset::{GlobBuilder, GlobMatcher};
8use walkdir::WalkDir;
9
10/// Builds the glob matching the rg behaviour
11/// eg "*.csv" will not match `some/dir/test.csv`
12fn build_glob_matcher(pattern: Option<&str>) -> Result<Option<GlobMatcher>> {
13    pattern
14        .map(|p| {
15            GlobBuilder::new(p)
16                .literal_separator(true)
17                .build()
18                .map(|g| g.compile_matcher())
19                .map_err(Into::into)
20        })
21        .transpose()
22}
23
24/// Resolve paths for `add` command following ripgrep-style behavior:
25/// - Explicit files: added directly (glob ignored)
26/// - Explicit directories: walked and filtered by glob
27/// - No paths + glob: walks cwd filtered by glob
28pub fn resolve_paths_for_add(
29    paths: Vec<PathBuf>,
30    glob_pattern: Option<&str>,
31    dvs_paths: &DvsPaths,
32) -> Result<HashSet<PathBuf>> {
33    let mut out = HashSet::new();
34    let glob_matcher = build_glob_matcher(glob_pattern)?;
35    let repo_root = dvs_paths.repo_root().canonicalize()?;
36    let metadata_root = dvs_paths.metadata_folder().canonicalize()?;
37
38    // If no paths given, default to cwd
39    let paths = if paths.is_empty() {
40        vec![PathBuf::from(".")]
41    } else {
42        paths
43    };
44
45    for path in paths {
46        let full_path = dvs_paths
47            .cwd()
48            .join(&path)
49            .canonicalize()
50            .map_err(|_| anyhow!("Path not found: {}", path.display()))?;
51
52        // Explicit file: we ignore the glob and add it to the file
53        if full_path.is_file() {
54            let relative_to_root = match full_path.strip_prefix(&repo_root) {
55                Ok(p) => p.to_path_buf(),
56                // Outside repo: insert original user path; validate_for_add will catch it
57                Err(_) => path.clone(),
58            };
59            out.insert(relative_to_root);
60        } else if full_path.is_dir() {
61            if let Some(matcher) = &glob_matcher {
62                for entry in WalkDir::new(&full_path).into_iter().filter_map(|e| e.ok()) {
63                    let entry_path = entry.path().canonicalize()?;
64                    // Skip directories and metadata root folder
65                    if !entry_path.is_file() || entry_path.starts_with(&metadata_root) {
66                        continue;
67                    }
68
69                    // Get path relative to the walked directory for matching
70                    let relative_to_dir = match entry_path.strip_prefix(&full_path) {
71                        Ok(p) => p,
72                        Err(_) => continue,
73                    };
74                    if matcher.is_match(relative_to_dir) {
75                        // Return path relative to repo root
76                        let relative_to_root = match entry_path.strip_prefix(&repo_root) {
77                            Ok(p) => p.to_path_buf(),
78                            Err(_) => continue,
79                        };
80                        out.insert(relative_to_root);
81                    }
82                }
83            }
84        } else {
85            bail!("Path is not a file or directory: {}", path.display());
86        }
87    }
88
89    Ok(out)
90}
91
92/// Resolve paths for `get` command by scanning tracked metadata:
93/// - Explicit files or directories: filtered to tracked files under them
94/// - Glob: applied to cwd-relative paths within matched files
95/// - No paths + no glob: returns all tracked files under cwd
96pub fn resolve_paths_for_get(
97    paths: Vec<PathBuf>,
98    glob_pattern: Option<&str>,
99    dvs_paths: &DvsPaths,
100) -> Result<HashSet<PathBuf>> {
101    let mut out = HashSet::new();
102    let glob_matcher = build_glob_matcher(glob_pattern)?;
103    let metadata_root = dvs_paths.metadata_folder().canonicalize()?;
104    // Get cwd-relative prefix for converting user paths to repo-root-relative
105    let cwd_prefix = dvs_paths.cwd_relative_to_root();
106
107    // Convert user paths to repo-relative directory filters
108    // If no paths given, default to cwd (or repo root if at root)
109    let dir_filters: Vec<PathBuf> = if paths.is_empty() {
110        vec![cwd_prefix.map(|p| p.to_path_buf()).unwrap_or_default()]
111    } else {
112        paths
113            .into_iter()
114            .map(|p| {
115                if p.is_absolute() {
116                    match p.strip_prefix(dvs_paths.repo_root()) {
117                        Ok(r) => r.to_path_buf(),
118                        Err(_) => p,
119                    }
120                } else if let Some(prefix) = cwd_prefix {
121                    prefix.join(&p)
122                } else {
123                    p
124                }
125            })
126            .collect()
127    };
128
129    // Walk all metadata files
130    for entry in WalkDir::new(&metadata_root)
131        .into_iter()
132        .filter_map(|e| e.ok())
133    {
134        let entry_path = entry.path();
135
136        // Skip directories and non .dvs files
137        if !entry_path.is_file() || entry_path.extension() != Some(OsStr::new("dvs")) {
138            continue;
139        }
140        // Get repo-relative tracked path (strip metadata folder and .dvs extension)
141        let relative_to_metadata = match entry_path.strip_prefix(&metadata_root) {
142            Ok(p) => p,
143            Err(_) => continue,
144        };
145        let tracked_path = relative_to_metadata.with_extension("");
146
147        // Filter: must be under one of user's directories (or exact match)
148        let under_filter = dir_filters
149            .iter()
150            .any(|dir| tracked_path.starts_with(dir) || &tracked_path == dir);
151        if !under_filter {
152            continue;
153        }
154
155        // Get cwd-relative path for glob matching
156        let cwd_relative = if let Some(prefix) = cwd_prefix {
157            match tracked_path.strip_prefix(prefix) {
158                Ok(p) => p.to_path_buf(),
159                Err(_) => continue, // File not under cwd
160            }
161        } else {
162            tracked_path.clone()
163        };
164
165        // Apply glob if present, otherwise match all
166        if glob_matcher
167            .as_ref()
168            .is_none_or(|g| g.is_match(&cwd_relative))
169        {
170            out.insert(tracked_path);
171        }
172    }
173
174    Ok(out)
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180    use std::fs::{self, File};
181    use tempfile::TempDir;
182
183    /// Creates a test repo structure with files and metadata
184    fn setup_test_repo() -> (TempDir, DvsPaths) {
185        let temp = TempDir::new().unwrap();
186        let root = temp.path();
187
188        // Create .git to mark repo root
189        fs::create_dir(root.join(".git")).unwrap();
190
191        // Create files
192        fs::create_dir_all(root.join("data/subdir")).unwrap();
193        File::create(root.join("foo.txt")).unwrap();
194        File::create(root.join("bar.csv")).unwrap();
195        File::create(root.join("data/a.csv")).unwrap();
196        File::create(root.join("data/b.txt")).unwrap();
197        File::create(root.join("data/subdir/c.csv")).unwrap();
198
199        // Create .dvs metadata folder with tracked files
200        fs::create_dir_all(root.join(".dvs/data/subdir")).unwrap();
201        File::create(root.join(".dvs/foo.txt.dvs")).unwrap();
202        File::create(root.join(".dvs/data/a.csv.dvs")).unwrap();
203        File::create(root.join(".dvs/data/subdir/c.csv.dvs")).unwrap();
204
205        let dvs_paths = DvsPaths::new(root.to_path_buf(), root.to_path_buf(), ".dvs").unwrap();
206        (temp, dvs_paths)
207    }
208
209    #[test]
210    fn add_explicit_file_ignores_glob() {
211        let (_temp, dvs_paths) = setup_test_repo();
212        let result =
213            resolve_paths_for_add(vec![PathBuf::from("foo.txt")], Some("*.csv"), &dvs_paths)
214                .unwrap();
215
216        assert_eq!(result.len(), 1);
217        assert!(result.contains(&PathBuf::from("foo.txt")));
218    }
219
220    #[test]
221    fn add_directory_with_glob_filters() {
222        let (_temp, dvs_paths) = setup_test_repo();
223        let result =
224            resolve_paths_for_add(vec![PathBuf::from("data")], Some("*.csv"), &dvs_paths).unwrap();
225
226        assert!(result.contains(&PathBuf::from("data/a.csv")));
227        assert!(!result.contains(&PathBuf::from("data/b.txt")));
228        // *.csv should not match subdir/c.csv due to literal_separator
229        assert!(!result.contains(&PathBuf::from("data/subdir/c.csv")));
230    }
231
232    #[test]
233    fn add_directory_with_recursive_glob() {
234        let (_temp, dvs_paths) = setup_test_repo();
235        let result =
236            resolve_paths_for_add(vec![PathBuf::from("data")], Some("**/*.csv"), &dvs_paths)
237                .unwrap();
238
239        assert!(result.contains(&PathBuf::from("data/a.csv")));
240        assert!(result.contains(&PathBuf::from("data/subdir/c.csv")));
241        assert!(!result.contains(&PathBuf::from("data/b.txt")));
242    }
243
244    #[test]
245    fn add_path_not_found_errors() {
246        let (_temp, dvs_paths) = setup_test_repo();
247        let result = resolve_paths_for_add(vec![PathBuf::from("nonexistent")], None, &dvs_paths);
248
249        assert!(result.is_err());
250        assert!(result.unwrap_err().to_string().contains("Path not found"));
251    }
252
253    #[test]
254    fn get_exact_file_match() {
255        let (_temp, dvs_paths) = setup_test_repo();
256        let result =
257            resolve_paths_for_get(vec![PathBuf::from("foo.txt")], None, &dvs_paths).unwrap();
258
259        assert_eq!(result.len(), 1);
260        assert!(result.contains(&PathBuf::from("foo.txt")));
261    }
262
263    #[test]
264    fn get_directory_returns_all_tracked() {
265        let (_temp, dvs_paths) = setup_test_repo();
266        let result = resolve_paths_for_get(vec![PathBuf::from("data")], None, &dvs_paths).unwrap();
267
268        assert!(result.contains(&PathBuf::from("data/a.csv")));
269        assert!(result.contains(&PathBuf::from("data/subdir/c.csv")));
270        // b.txt is not tracked
271        assert!(!result.contains(&PathBuf::from("data/b.txt")));
272    }
273
274    #[test]
275    fn get_with_glob_filters() {
276        let (_temp, dvs_paths) = setup_test_repo();
277        // Empty paths defaults to cwd, then glob filters
278        let result = resolve_paths_for_get(vec![], Some("*.txt"), &dvs_paths).unwrap();
279
280        assert!(result.contains(&PathBuf::from("foo.txt")));
281        assert!(!result.contains(&PathBuf::from("data/a.csv")));
282    }
283
284    // Do we want that behaviour?
285    #[test]
286    fn get_no_paths_defaults_to_cwd() {
287        let (_temp, dvs_paths) = setup_test_repo();
288        let result = resolve_paths_for_get(vec![], None, &dvs_paths).unwrap();
289
290        // Should return all tracked files
291        assert!(result.contains(&PathBuf::from("foo.txt")));
292        assert!(result.contains(&PathBuf::from("data/a.csv")));
293        assert!(result.contains(&PathBuf::from("data/subdir/c.csv")));
294    }
295
296    #[test]
297    fn get_absolute_file_path() {
298        let (temp, dvs_paths) = setup_test_repo();
299        let abs_path = temp.path().canonicalize().unwrap().join("foo.txt");
300        let result = resolve_paths_for_get(vec![abs_path], None, &dvs_paths).unwrap();
301
302        assert_eq!(result.len(), 1);
303        assert!(result.contains(&PathBuf::from("foo.txt")));
304    }
305
306    #[test]
307    fn get_absolute_directory_path() {
308        let (temp, dvs_paths) = setup_test_repo();
309        let abs_path = temp.path().canonicalize().unwrap().join("data");
310        let result = resolve_paths_for_get(vec![abs_path], None, &dvs_paths).unwrap();
311
312        assert!(result.contains(&PathBuf::from("data/a.csv")));
313        assert!(result.contains(&PathBuf::from("data/subdir/c.csv")));
314        assert!(!result.contains(&PathBuf::from("foo.txt")));
315    }
316
317    #[test]
318    fn add_absolute_file_path() {
319        let (temp, dvs_paths) = setup_test_repo();
320        let abs_path = temp.path().canonicalize().unwrap().join("foo.txt");
321        let result = resolve_paths_for_add(vec![abs_path], None, &dvs_paths).unwrap();
322
323        assert_eq!(result.len(), 1);
324        assert!(result.contains(&PathBuf::from("foo.txt")));
325    }
326}