Skip to main content
← dvs documentation Rust API reference

dvs/files/
metadata.rs

1use std::path::Path;
2
3use anyhow::{Result, bail};
4use fs_err as fs;
5use serde::{Deserialize, Serialize};
6use uuid::Uuid;
7
8use crate::audit::{AuditEntry, AuditFile};
9use crate::{Backend, Compression, DvsPaths, Hashes, Outcome};
10
11/// The dvs metadata for a given file
12#[derive(Debug, Serialize, Deserialize, Clone)]
13pub struct FileMetadata {
14    pub hashes: Hashes,
15    pub size: u64,
16    pub created_by: String,
17    pub add_time: jiff::Timestamp,
18    pub compression: Compression,
19    #[serde(default, skip_serializing_if = "Option::is_none")]
20    pub message: Option<String>,
21}
22
23impl PartialEq for FileMetadata {
24    fn eq(&self, other: &Self) -> bool {
25        self.hashes == other.hashes && self.size == other.size
26    }
27}
28
29impl FileMetadata {
30    pub fn from_hashes(
31        hashes: Hashes,
32        size: u64,
33        compression: Compression,
34        message: Option<String>,
35    ) -> Self {
36        Self {
37            hashes,
38            size,
39            created_by: whoami::username().unwrap_or_else(|_| "unknown".to_string()),
40            add_time: jiff::Timestamp::now(),
41            compression,
42            message,
43        }
44    }
45
46    pub fn from_file(
47        path: impl AsRef<Path>,
48        compression: Compression,
49        message: Option<String>,
50    ) -> Result<Self> {
51        if !path.as_ref().is_file() {
52            bail!("Path {} is not a file", path.as_ref().display());
53        }
54
55        let (hashes, size) = Hashes::compute_from_path(path.as_ref(), &[])?;
56        let created_by = whoami::username()?;
57        let add_time = jiff::Timestamp::now();
58
59        Ok(Self {
60            hashes,
61            size,
62            created_by,
63            add_time,
64            message,
65            compression,
66        })
67    }
68
69    /// Returns whether the file already existed in the dvs folder and therefore is an update
70    /// and the compressed size if applicable.
71    /// Copies the source file to storage and saves metadata atomically (both succeed or neither).
72    pub fn save(
73        &self,
74        operation_id: Uuid,
75        source_file: impl AsRef<Path>,
76        backend: &dyn Backend,
77        paths: &DvsPaths,
78        relative_path: impl AsRef<Path>,
79        on_bytes: Option<&(dyn Fn(u64) + Send + Sync)>,
80    ) -> Result<(Outcome, Option<u64>)> {
81        let dvs_file_path = paths.metadata_path(relative_path.as_ref());
82        let dvs_file_exists = dvs_file_path.is_file();
83        let storage_exists = backend.exists(&self.hashes)?;
84
85        log::debug!(
86            "Saving {}: metadata_exists={}, storage_exists={}",
87            relative_path.as_ref().display(),
88            dvs_file_exists,
89            storage_exists
90        );
91
92        if dvs_file_exists && storage_exists {
93            // we read the file anyway to make sure it's not 2 files having the same hash
94            let existing: FileMetadata = serde_json::from_reader(fs::File::open(&dvs_file_path)?)?;
95            if existing == *self {
96                log::debug!(
97                    "File {} is already in sync",
98                    relative_path.as_ref().display()
99                );
100                return Ok((Outcome::Present, None));
101            }
102        }
103
104        // We do an atomic update, either everything works or we error
105        // 1. Create metadata dirs first
106        if let Some(parent) = dvs_file_path.parent() {
107            fs::create_dir_all(parent)?;
108        }
109
110        // 2. Store file to backend if it doesn't already exist
111        let (storage_res, stored_size) = if storage_exists {
112            (Ok(()), None)
113        } else {
114            match backend.store(
115                &self.hashes,
116                source_file.as_ref(),
117                self.compression,
118                on_bytes,
119            ) {
120                Ok(size) => (Ok(()), Some(size)),
121                Err(e) => (Err(e), None),
122            }
123        };
124
125        // 3. Then metadata
126        let old_metadata_content = fs::read(&dvs_file_path).ok();
127        log::debug!("Writing metadata to {}", dvs_file_path.display());
128        let metadata_res = fs::write(
129            &dvs_file_path,
130            serde_json::to_string_pretty(self).expect("valid json"),
131        );
132
133        match (storage_res, metadata_res) {
134            (Ok(_), Ok(_)) => {
135                let audit_entry = AuditEntry::new_add(
136                    operation_id,
137                    AuditFile {
138                        path: relative_path.as_ref().to_path_buf(),
139                        hashes: self.hashes.clone(),
140                    },
141                    self.compression,
142                );
143                if let Err(e) = backend.log_audit(&audit_entry) {
144                    log::error!("Failed to write audit log {audit_entry:?}: {e}");
145                }
146                Ok((Outcome::Copied, stored_size))
147            }
148            (Err(e), Ok(_)) => {
149                log::warn!(
150                    "Storage failed, rolling back metadata for {}",
151                    relative_path.as_ref().display()
152                );
153                if let Some(old) = old_metadata_content {
154                    fs::write(&dvs_file_path, &old)?;
155                } else {
156                    fs::remove_file(&dvs_file_path)?;
157                }
158                Err(e)
159            }
160            (Ok(_), Err(_)) => {
161                log::warn!(
162                    "Metadata write failed, rolling back storage for {}",
163                    relative_path.as_ref().display()
164                );
165                if let Some(old) = old_metadata_content {
166                    let _ = fs::write(&dvs_file_path, &old);
167                } else {
168                    let _ = fs::remove_file(&dvs_file_path);
169                }
170                if !storage_exists {
171                    backend.remove(&self.hashes)?;
172                }
173                bail!("Failed to write metadata file: {dvs_file_path:?}")
174            }
175            (Err(e), Err(_)) => {
176                log::warn!(
177                    "Both storage and metadata failed, rolling back for {}",
178                    relative_path.as_ref().display()
179                );
180                if let Some(old) = old_metadata_content {
181                    fs::write(&dvs_file_path, &old)?;
182                } else {
183                    fs::remove_file(&dvs_file_path)?;
184                }
185                if !storage_exists {
186                    backend.remove(&self.hashes)?;
187                }
188                bail!("Failed to write metadata file: {dvs_file_path:?}: {e}")
189            }
190        }
191    }
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197    use crate::testutil::{create_file, create_temp_git_repo, init_dvs_repo};
198
199    fn make_paths(root: &Path, config: &crate::config::Config) -> DvsPaths {
200        DvsPaths::new(
201            root.to_path_buf(),
202            root.to_path_buf(),
203            config.metadata_folder_name(),
204        )
205        .unwrap()
206    }
207
208    #[test]
209    fn file_metadata_from_file_creates_hashes_and_message() {
210        let (_tmp, root) = create_temp_git_repo();
211        let file_path = create_file(&root, "test.txt", b"hello world");
212
213        let metadata = FileMetadata::from_file(
214            &file_path,
215            Compression::Zstd,
216            Some("test message".to_string()),
217        )
218        .unwrap();
219
220        assert_eq!(metadata.hashes.blake3.len(), 64);
221        assert_eq!(metadata.size, 11);
222        assert_eq!(metadata.message, Some("test message".to_string()));
223    }
224
225    /// Locks in the `.dvs` JSON wire format for `add_time`: RFC 3339 with a `Z`
226    /// suffix (UTC). Changing `FileMetadata.add_time` from `String` to
227    /// `jiff::Timestamp` must keep the serialized form identical so existing
228    /// `.dvs` files round-trip unchanged.
229    #[test]
230    fn file_metadata_add_time_serde_roundtrip_rfc3339() {
231        let blake3 = "a".repeat(64);
232        let json = format!(
233            r#"{{
234                "hashes": {{"blake3": "{blake3}"}},
235                "size": 11,
236                "created_by": "tester",
237                "add_time": "2024-01-02T03:04:05Z",
238                "compression": "none"
239            }}"#
240        );
241        let meta: FileMetadata =
242            serde_json::from_str(&json).expect("parse FileMetadata with RFC 3339 timestamp");
243        assert_eq!(meta.add_time.to_string(), "2024-01-02T03:04:05Z");
244
245        let reserialized = serde_json::to_string(&meta).expect("serialize FileMetadata");
246        assert!(
247            reserialized.contains("\"add_time\":\"2024-01-02T03:04:05Z\""),
248            "add_time must serialize as RFC 3339 string; got: {reserialized}"
249        );
250    }
251
252    #[test]
253    fn file_metadata_from_nonexistent_file_fails() {
254        let tmp = tempfile::tempdir().unwrap();
255        let result =
256            FileMetadata::from_file(tmp.path().join("nonexistent.txt"), Compression::Zstd, None);
257        assert!(result.is_err());
258    }
259
260    #[test]
261    fn save_local_creates_storage_and_metadata() {
262        let (_tmp, root) = create_temp_git_repo();
263        let (config, dvs_dir) = init_dvs_repo(&root);
264        let backend = config.backend();
265        let paths = make_paths(&root, &config);
266        let file_path = create_file(&root, "data.bin", b"binary data");
267
268        let metadata = FileMetadata::from_file(&file_path, Compression::Zstd, None).unwrap();
269        let (outcome, stored_size) = metadata
270            .save(
271                Uuid::new_v4(),
272                &file_path,
273                backend,
274                &paths,
275                "data.bin",
276                None,
277            )
278            .unwrap();
279
280        assert_eq!(outcome, Outcome::Copied);
281        assert!(stored_size.is_some());
282        // Metadata file should exist
283        assert!(dvs_dir.join("data.bin.dvs").is_file());
284        assert!(backend.exists(&metadata.hashes).unwrap());
285    }
286
287    #[test]
288    fn save_local_returns_present_when_already_stored() {
289        let (_tmp, root) = create_temp_git_repo();
290        let (config, _dvs_dir) = init_dvs_repo(&root);
291        let backend = config.backend();
292        let paths = make_paths(&root, &config);
293        let file_path = create_file(&root, "data.bin", b"binary data");
294
295        let metadata = FileMetadata::from_file(&file_path, Compression::Zstd, None).unwrap();
296        metadata
297            .save(
298                Uuid::new_v4(),
299                &file_path,
300                backend,
301                &paths,
302                "data.bin",
303                None,
304            )
305            .unwrap();
306
307        // Second save should return Present
308        let (outcome, stored_size) = metadata
309            .save(
310                Uuid::new_v4(),
311                &file_path,
312                backend,
313                &paths,
314                "data.bin",
315                None,
316            )
317            .unwrap();
318        assert_eq!(outcome, Outcome::Present);
319        assert!(stored_size.is_none());
320    }
321}