Skip to main content

miniextendr_api/
list.rs

1#![allow(rustdoc::private_intra_doc_links)]
2//! Thin wrapper around R list (`VECSXP`).
3//!
4//! Provides safe construction from Rust values and typed extraction.
5//!
6//! # Submodules
7//!
8//! | Module | Contents |
9//! |--------|----------|
10//! | [`accumulator`] | `ListAccumulator` — dynamic list construction with bounded protect stack |
11//! | [`named`] | `NamedList` — O(1) name-indexed access via `HashMap` index |
12//!
13//! # Core Types
14//!
15//! - [`List`] — owned handle to an R list (VECSXP)
16//! - [`ListMut`] — mutable view for in-place element replacement
17//! - [`ListBuilder`] — fixed-size batch construction
18//! - [`IntoList`] / [`TryFromList`] — conversion traits
19
20use crate::ffi::SEXPTYPE::{LISTSXP, STRSXP, VECSXP};
21use crate::ffi::{self, SEXP, SexpExt};
22use crate::from_r::{SexpError, SexpLengthError, SexpTypeError, TryFromSexp};
23use crate::gc_protect::OwnedProtect;
24use crate::into_r::IntoR;
25use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
26use std::hash::Hash;
27
28/// Owned handle to an R list (`VECSXP`).
29///
30/// # Examples
31///
32/// ```no_run
33/// use miniextendr_api::list::List;
34///
35/// let list = List::from_values(vec![1i32, 2, 3]);
36/// assert_eq!(list.len(), 3);
37/// let first: Option<i32> = list.get_index(0);
38/// ```
39#[derive(Clone, Copy, Debug)]
40pub struct List(SEXP);
41
42/// Mutable view of an R list (`VECSXP`).
43///
44/// This is a wrapper type instead of `&mut [SEXP]` to avoid exposing a raw slice
45/// that could become invalid if list elements are replaced with `NULL`.
46#[derive(Debug)]
47pub struct ListMut(SEXP);
48
49impl List {
50    /// Return true if the underlying SEXP is a list (VECSXP) according to R.
51    #[inline]
52    pub fn is_list(self) -> bool {
53        self.0.is_pair_list()
54    }
55
56    /// Wrap an existing `VECSXP` without additional checks.
57    ///
58    /// # Safety
59    ///
60    /// Caller must ensure `sexp` is a valid list object (typically a `VECSXP` or
61    /// a pairlist coerced to `VECSXP`) whose lifetime remains managed by R.
62    #[inline]
63    pub const unsafe fn from_raw(sexp: SEXP) -> Self {
64        List(sexp)
65    }
66
67    /// Get the underlying `SEXP`.
68    #[inline]
69    pub const fn as_sexp(self) -> SEXP {
70        self.0
71    }
72
73    /// Length of the list (number of elements).
74    #[inline]
75    pub fn len(self) -> isize {
76        self.0.xlength()
77    }
78
79    /// Returns true if the list is empty.
80    #[inline]
81    pub fn is_empty(self) -> bool {
82        self.len() == 0
83    }
84
85    /// Get raw SEXP element at 0-based index. Returns `None` if out of bounds.
86    #[inline]
87    pub fn get(self, idx: isize) -> Option<SEXP> {
88        if idx < 0 || idx >= self.len() {
89            return None;
90        }
91        Some(self.0.vector_elt(idx))
92    }
93
94    /// Get element at 0-based index and convert to type `T`.
95    ///
96    /// Returns `None` if index is out of bounds or conversion fails.
97    #[inline]
98    pub fn get_index<T>(self, idx: isize) -> Option<T>
99    where
100        T: TryFromSexp<Error = SexpError>,
101    {
102        let sexp = self.get(idx)?;
103        T::try_from_sexp(sexp).ok()
104    }
105
106    /// Get element by name and convert to type `T`.
107    ///
108    /// Returns `None` if name not found or conversion fails.
109    pub fn get_named<T>(self, name: &str) -> Option<T>
110    where
111        T: TryFromSexp<Error = SexpError>,
112    {
113        let names_sexp = self.names()?;
114        let n = self.len();
115
116        // Search for matching name
117        for i in 0..n {
118            let name_sexp = names_sexp.string_elt(i);
119            if name_sexp == SEXP::na_string() {
120                continue;
121            }
122            let name_ptr = name_sexp.r_char();
123            let name_cstr = unsafe { std::ffi::CStr::from_ptr(name_ptr) };
124            if let Ok(s) = name_cstr.to_str() {
125                if s == name {
126                    let elem = self.0.vector_elt(i);
127                    return T::try_from_sexp(elem).ok();
128                }
129            }
130        }
131        None
132    }
133
134    // region: Attribute getters (equivalent to R's GET_* macros)
135
136    /// Get an arbitrary attribute by symbol, returning `None` for `R_NilValue`.
137    #[inline]
138    fn get_attr_opt(self, name: SEXP) -> Option<SEXP> {
139        let attr = self.0.get_attr(name);
140        if attr.is_nil() { None } else { Some(attr) }
141    }
142
143    /// Get the `names` attribute if present.
144    #[inline]
145    pub fn names(self) -> Option<SEXP> {
146        self.get_attr_opt(SEXP::names_symbol())
147    }
148
149    /// Get the `class` attribute if present.
150    #[inline]
151    pub fn get_class(self) -> Option<SEXP> {
152        self.get_attr_opt(SEXP::class_symbol())
153    }
154
155    /// Get the `dim` attribute if present.
156    #[inline]
157    pub fn get_dim(self) -> Option<SEXP> {
158        self.get_attr_opt(SEXP::dim_symbol())
159    }
160
161    /// Get the `dimnames` attribute if present.
162    #[inline]
163    pub fn get_dimnames(self) -> Option<SEXP> {
164        self.get_attr_opt(SEXP::dimnames_symbol())
165    }
166
167    /// Get row names from the `dimnames` attribute.
168    #[inline]
169    pub fn get_rownames(self) -> Option<SEXP> {
170        let rownames = unsafe { ffi::Rf_GetRowNames(self.0) };
171        if rownames.is_nil() {
172            None
173        } else {
174            Some(rownames)
175        }
176    }
177
178    /// Get column names from the `dimnames` attribute.
179    #[inline]
180    pub fn get_colnames(self) -> Option<SEXP> {
181        let dimnames = self.0.get_dimnames();
182        if dimnames.is_nil() {
183            return None;
184        }
185        let colnames = unsafe { ffi::Rf_GetColNames(dimnames) };
186        if colnames.is_nil() {
187            None
188        } else {
189            Some(colnames)
190        }
191    }
192
193    /// Get the `levels` attribute if present (for factors).
194    #[inline]
195    pub fn get_levels(self) -> Option<SEXP> {
196        self.get_attr_opt(SEXP::levels_symbol())
197    }
198
199    /// Get the `tsp` attribute if present (for time series).
200    #[inline]
201    pub fn get_tsp(self) -> Option<SEXP> {
202        self.get_attr_opt(SEXP::tsp_symbol())
203    }
204    // endregion
205
206    // region: Attribute setters (equivalent to R's SET_* macros)
207
208    /// Set the `names` attribute; returns the same list for chaining.
209    ///
210    /// Equivalent to R's `SET_NAMES(x, n)`.
211    #[inline]
212    pub fn set_names(self, names: SEXP) -> Self {
213        self.0.set_names(names);
214        self
215    }
216
217    /// Set the `class` attribute; returns the same list for chaining.
218    ///
219    /// Equivalent to R's `SET_CLASS(x, n)`.
220    #[inline]
221    pub fn set_class(self, class: SEXP) -> Self {
222        self.0.set_class(class);
223        self
224    }
225
226    /// Set the `dim` attribute; returns the same list for chaining.
227    ///
228    /// Equivalent to R's `SET_DIM(x, n)`.
229    #[inline]
230    pub fn set_dim(self, dim: SEXP) -> Self {
231        self.0.set_dim(dim);
232        self
233    }
234
235    /// Set the `dimnames` attribute; returns the same list for chaining.
236    ///
237    /// Equivalent to R's `SET_DIMNAMES(x, n)`.
238    #[inline]
239    pub fn set_dimnames(self, dimnames: SEXP) -> Self {
240        self.0.set_dimnames(dimnames);
241        self
242    }
243
244    /// Set the `levels` attribute; returns the same list for chaining.
245    ///
246    /// Equivalent to R's `SET_LEVELS(x, l)`.
247    #[inline]
248    pub fn set_levels(self, levels: SEXP) -> Self {
249        self.0.set_levels(levels);
250        self
251    }
252    // endregion
253
254    // region: Convenience setters (string-based)
255
256    /// Set the `class` attribute from a slice of class names.
257    ///
258    /// This is a convenience wrapper that creates a character vector from the
259    /// provided strings and sets it as the class attribute.
260    ///
261    /// # Example
262    ///
263    /// ```ignore
264    /// let list = List::from_pairs(vec![("x", vec![1, 2, 3])]);
265    /// let df = list.set_class_str(&["data.frame"]);
266    /// ```
267    #[inline]
268    pub fn set_class_str(self, classes: &[&str]) -> Self {
269        use crate::ffi::SEXPTYPE::STRSXP;
270
271        let n: isize = classes
272            .len()
273            .try_into()
274            .expect("classes length exceeds isize::MAX");
275        unsafe {
276            // Protect self across the class-vector allocation; otherwise the
277            // parent list can be freed during `Rf_allocVector` while it sits
278            // unrooted in our Rust handle (UAF under gctorture).
279            let _self_guard = OwnedProtect::new(self.0);
280            let class_vec = OwnedProtect::new(ffi::Rf_allocVector(STRSXP, n));
281            for (i, class) in classes.iter().enumerate() {
282                let idx: isize = i.try_into().expect("index exceeds isize::MAX");
283                class_vec.get().set_string_elt(idx, SEXP::charsxp(class));
284            }
285            self.0.set_class(class_vec.get());
286        }
287        self
288    }
289
290    /// Set class = `"data.frame"` using a cached class STRSXP.
291    ///
292    /// Equivalent to `set_class_str(&["data.frame"])` but avoids allocation.
293    #[inline]
294    pub fn set_data_frame_class(self) -> Self {
295        self.0
296            .set_class(crate::cached_class::data_frame_class_sexp());
297        self
298    }
299
300    /// Set the `names` attribute from a slice of strings.
301    ///
302    /// This is a convenience wrapper that creates a character vector from the
303    /// provided strings and sets it as the names attribute.
304    ///
305    /// # Example
306    ///
307    /// ```ignore
308    /// let list = List::from_values(vec![1, 2, 3]);
309    /// let named = list.set_names_str(&["a", "b", "c"]);
310    /// ```
311    #[inline]
312    pub fn set_names_str(self, names: &[&str]) -> Self {
313        use crate::ffi::SEXPTYPE::STRSXP;
314
315        let n: isize = names
316            .len()
317            .try_into()
318            .expect("names length exceeds isize::MAX");
319        unsafe {
320            // Protect self across the names-vector allocation; see set_class_str.
321            let _self_guard = OwnedProtect::new(self.0);
322            let names_vec = OwnedProtect::new(ffi::Rf_allocVector(STRSXP, n));
323            for (i, name) in names.iter().enumerate() {
324                let idx: isize = i.try_into().expect("index exceeds isize::MAX");
325                names_vec.get().set_string_elt(idx, SEXP::charsxp(name));
326            }
327            self.0.set_names(names_vec.get());
328        }
329        self
330    }
331
332    /// Set `row.names` for a data.frame using compact integer form.
333    ///
334    /// R internally represents row.names as a compact integer vector
335    /// `c(NA_integer_, -n)` when the row names are just `1:n`. This is more
336    /// memory-efficient than storing n strings.
337    ///
338    /// # Example
339    ///
340    /// ```ignore
341    /// let list = List::from_pairs(vec![
342    ///     ("x", vec![1, 2, 3]),
343    ///     ("y", vec![4, 5, 6]),
344    /// ])
345    /// .set_class_str(&["data.frame"])
346    /// .set_row_names_int(3);  // Row names: "1", "2", "3"
347    /// ```
348    #[inline]
349    pub fn set_row_names_int(self, n: usize) -> Self {
350        unsafe {
351            // Protect self across the row.names allocation; see set_class_str.
352            let _self_guard = OwnedProtect::new(self.0);
353            // R's compact row.names: c(NA_integer_, -n)
354            let (row_names, rn) = crate::into_r::alloc_r_vector::<i32>(2);
355            let _guard = OwnedProtect::new(row_names);
356            rn[0] = i32::MIN; // NA_INTEGER
357            let n_i32 = i32::try_from(n).unwrap_or_else(|_| {
358                panic!("row count {n} exceeds i32::MAX");
359            });
360            rn[1] = -n_i32;
361            self.0.set_row_names(row_names);
362        }
363        self
364    }
365
366    /// Set `row.names` from a vector of strings.
367    ///
368    /// Use this when you need custom row names. For simple sequential row names
369    /// (1, 2, 3, ...), use [`set_row_names_int`](Self::set_row_names_int) instead.
370    ///
371    /// # Example
372    ///
373    /// ```ignore
374    /// let list = List::from_pairs(vec![
375    ///     ("x", vec![1, 2, 3]),
376    /// ])
377    /// .set_class_str(&["data.frame"])
378    /// .set_row_names_str(&["row_a", "row_b", "row_c"]);
379    /// ```
380    #[inline]
381    pub fn set_row_names_str(self, row_names: &[&str]) -> Self {
382        use crate::ffi::SEXPTYPE::STRSXP;
383
384        let n: isize = row_names
385            .len()
386            .try_into()
387            .expect("row_names length exceeds isize::MAX");
388        unsafe {
389            // Protect self across the row.names allocation; see set_class_str.
390            let _self_guard = OwnedProtect::new(self.0);
391            let names_vec = OwnedProtect::new(ffi::Rf_allocVector(STRSXP, n));
392            for (i, name) in row_names.iter().enumerate() {
393                let idx: isize = i.try_into().expect("index exceeds isize::MAX");
394                names_vec.get().set_string_elt(idx, SEXP::charsxp(name));
395            }
396            self.0.set_row_names(names_vec.get());
397        }
398        self
399    }
400    // endregion
401
402    // region: Safe element insertion
403
404    /// Set an element at the given index, protecting the child during insertion.
405    ///
406    /// This is the safe way to insert a freshly allocated SEXP into a list.
407    /// The child is protected for the duration of the `SET_VECTOR_ELT` call,
408    /// ensuring it cannot be garbage collected.
409    ///
410    /// # Safety
411    ///
412    /// - Must be called from the R main thread
413    /// - `child` must be a valid SEXP
414    /// - `self` must be a valid, protected VECSXP
415    ///
416    /// # Panics
417    ///
418    /// Panics if `idx` is out of bounds.
419    ///
420    /// # Example
421    ///
422    /// ```ignore
423    /// let scope = ProtectScope::new();
424    /// let list = List::from_raw(scope.alloc_vecsxp(n).into_raw());
425    ///
426    /// for i in 0..n {
427    ///     let child = Rf_allocVector(REALSXP, 10);  // unprotected!
428    ///     list.set_elt(i, child);  // safe: protects child during insertion
429    /// }
430    /// ```
431    #[inline]
432    pub unsafe fn set_elt(self, idx: isize, child: SEXP) {
433        assert!(idx >= 0 && idx < self.len(), "index out of bounds");
434        // Protect child for the duration of SET_VECTOR_ELT.
435        // Once inserted, the child is protected by the parent container.
436        // SAFETY: caller guarantees R main thread and valid SEXPs
437        unsafe {
438            let _guard = OwnedProtect::new(child);
439            self.0.set_vector_elt(idx, child);
440        }
441    }
442
443    /// Set an element without protecting the child.
444    ///
445    /// # Safety
446    ///
447    /// In addition to the safety requirements of [`set_elt`](Self::set_elt):
448    /// - The caller must ensure `child` is already protected or that no GC
449    ///   can occur between child allocation and this call.
450    ///
451    /// Use this for performance when you know the child is already protected
452    /// (e.g., it's a child of another protected container, or you have an
453    /// `OwnedProtect` guard for it).
454    #[inline]
455    pub unsafe fn set_elt_unchecked(self, idx: isize, child: SEXP) {
456        debug_assert!(idx >= 0 && idx < self.len(), "index out of bounds");
457        // SAFETY: caller guarantees child is protected and valid
458        self.0.set_vector_elt(idx, child);
459    }
460
461    /// Set an element using a callback that produces the child.
462    ///
463    /// The callback is executed within a protection scope, so any allocations
464    /// it performs are protected until insertion completes.
465    ///
466    /// # Safety
467    ///
468    /// - Must be called from the R main thread
469    /// - `self` must be a valid, protected VECSXP
470    ///
471    /// # Example
472    ///
473    /// ```ignore
474    /// let list = List::from_raw(scope.alloc_vecsxp(n).into_raw());
475    ///
476    /// for i in 0..n {
477    ///     list.set_elt_with(i, || {
478    ///         let vec = Rf_allocVector(REALSXP, 10);
479    ///         fill_vector(vec);  // can allocate internally
480    ///         vec
481    ///     });
482    /// }
483    /// ```
484    #[inline]
485    pub unsafe fn set_elt_with<F>(self, idx: isize, f: F)
486    where
487        F: FnOnce() -> SEXP,
488    {
489        assert!(idx >= 0 && idx < self.len(), "index out of bounds");
490        // SAFETY: caller guarantees R main thread
491        unsafe {
492            let child = OwnedProtect::new(f());
493            self.0.set_vector_elt(idx, child.get());
494        }
495    }
496    // endregion
497}
498
499// region: ListBuilder - efficient batch list construction
500
501use crate::gc_protect::ProtectScope;
502
503/// Builder for constructing lists with efficient protection management.
504///
505/// `ListBuilder` holds a reference to a [`ProtectScope`], allowing multiple
506/// elements to be inserted without repeatedly protecting/unprotecting each one.
507/// This is more efficient than using [`List::set_elt`] in a loop.
508///
509/// # Example
510///
511/// ```ignore
512/// unsafe fn build_list(n: isize) -> SEXP {
513///     let scope = ProtectScope::new();
514///     let builder = ListBuilder::new(&scope, n);
515///
516///     for i in 0..n {
517///         // Allocations inside the loop are protected by the scope
518///         let child = scope.alloc_real(10).into_raw();
519///         builder.set(i, child);
520///     }
521///
522///     builder.into_sexp()
523/// }
524/// ```
525pub struct ListBuilder<'a> {
526    list: SEXP,
527    _scope: &'a ProtectScope,
528}
529
530impl<'a> ListBuilder<'a> {
531    /// Create a new list builder with the given length.
532    ///
533    /// The list is allocated and protected using the provided scope.
534    ///
535    /// # Safety
536    ///
537    /// Must be called from the R main thread.
538    #[inline]
539    pub unsafe fn new(scope: &'a ProtectScope, len: usize) -> Self {
540        // SAFETY: caller guarantees R main thread
541        let list = unsafe { scope.alloc_vecsxp(len).into_raw() };
542        Self {
543            list,
544            _scope: scope,
545        }
546    }
547
548    /// Create a builder wrapping an existing protected list.
549    ///
550    /// # Safety
551    ///
552    /// - Must be called from the R main thread
553    /// - `list` must be a valid, protected VECSXP
554    #[inline]
555    pub unsafe fn from_protected(scope: &'a ProtectScope, list: SEXP) -> Self {
556        Self {
557            list,
558            _scope: scope,
559        }
560    }
561
562    /// Set an element at the given index.
563    ///
564    /// The `child` should be protected by the same scope (or a parent scope).
565    /// Use `scope.protect_raw(...)` before calling this method.
566    ///
567    /// # Safety
568    ///
569    /// - `child` must be a valid SEXP
570    /// - `child` should be protected (typically via the same scope)
571    #[inline]
572    pub unsafe fn set(&self, idx: isize, child: SEXP) {
573        // SAFETY: caller guarantees valid and protected child
574        debug_assert!(idx >= 0 && idx < self.list.xlength());
575        self.list.set_vector_elt(idx, child);
576    }
577
578    /// Set an element, protecting the child within the builder's scope.
579    ///
580    /// This is a convenience method that protects the child and then inserts it.
581    ///
582    /// # Safety
583    ///
584    /// - `child` must be a valid SEXP
585    #[inline]
586    pub unsafe fn set_protected(&self, idx: isize, child: SEXP) {
587        // SAFETY: caller guarantees valid child
588        unsafe {
589            debug_assert!(idx >= 0 && idx < self.list.xlength());
590            let _guard = OwnedProtect::new(child);
591            self.list.set_vector_elt(idx, child);
592        }
593    }
594
595    /// Get the underlying list SEXP.
596    #[inline]
597    pub fn as_sexp(&self) -> SEXP {
598        self.list
599    }
600
601    /// Convert to a `List` wrapper.
602    #[inline]
603    pub fn into_list(self) -> List {
604        List(self.list)
605    }
606
607    /// Convert to the underlying SEXP.
608    #[inline]
609    pub fn into_sexp(self) -> SEXP {
610        self.list
611    }
612
613    /// Get the length of the list.
614    #[inline]
615    pub fn len(&self) -> isize {
616        self.list.xlength()
617    }
618
619    /// Check if the list is empty.
620    #[inline]
621    pub fn is_empty(&self) -> bool {
622        self.len() == 0
623    }
624}
625// endregion
626
627mod accumulator;
628mod named;
629
630pub use accumulator::*;
631pub use named::*;
632
633// region: IntoList and TryFromList traits
634
635/// Convert things into an R list.
636pub trait IntoList {
637    /// Convert `self` into an R list wrapper.
638    fn into_list(self) -> List;
639}
640
641/// Fallible conversion from an R list into a Rust value.
642pub trait TryFromList: Sized {
643    /// Error returned when conversion fails.
644    type Error;
645
646    /// Attempt to convert an R list wrapper into `Self`.
647    fn try_from_list(list: List) -> Result<Self, Self::Error>;
648}
649
650impl<T: IntoR> IntoList for Vec<T> {
651    fn into_list(self) -> List {
652        // Allocate + protect the parent first, then call `into_sexp()` per
653        // element and write straight into the parent. Pre-collecting elements
654        // into `Vec<SEXP>` would leave them unrooted across allocations — same
655        // UAF shape as the columnar `Generic` buffer (PR #424 / issue #307).
656        let n: isize = self
657            .len()
658            .try_into()
659            .expect("list length exceeds isize::MAX");
660        unsafe {
661            let list = OwnedProtect::new(ffi::Rf_allocVector(VECSXP, n));
662            for (i, val) in self.into_iter().enumerate() {
663                let idx: isize = i.try_into().expect("index exceeds isize::MAX");
664                list.get().set_vector_elt(idx, val.into_sexp());
665            }
666            List(list.get())
667        }
668    }
669}
670
671impl<T> TryFromList for Vec<T>
672where
673    T: TryFromSexp<Error = SexpError>,
674{
675    type Error = SexpError;
676
677    fn try_from_list(list: List) -> Result<Self, Self::Error> {
678        let expected: usize = list
679            .len()
680            .try_into()
681            .expect("list length must be non-negative");
682        let mut out = Vec::with_capacity(expected);
683        for i in 0..expected {
684            let idx: isize = i.try_into().expect("index exceeds isize::MAX");
685            let sexp = list.get(idx).ok_or_else(|| {
686                SexpError::from(SexpLengthError {
687                    expected,
688                    actual: i,
689                })
690            })?;
691            out.push(TryFromSexp::try_from_sexp(sexp)?);
692        }
693        Ok(out)
694    }
695}
696
697// endregion
698
699// region: HashMap conversions
700
701impl<K, V> IntoList for HashMap<K, V>
702where
703    K: AsRef<str>,
704    V: IntoR,
705{
706    fn into_list(self) -> List {
707        let pairs: Vec<(K, V)> = self.into_iter().collect();
708        List::from_pairs(pairs)
709    }
710}
711
712impl<V> TryFromList for HashMap<String, V>
713where
714    V: TryFromSexp<Error = SexpError>,
715{
716    type Error = SexpError;
717
718    fn try_from_list(list: List) -> Result<Self, Self::Error> {
719        let n: usize = list
720            .len()
721            .try_into()
722            .expect("list length must be non-negative");
723        let names_sexp = list.names();
724        let mut map = HashMap::with_capacity(n);
725
726        for i in 0..n {
727            let idx: isize = i.try_into().expect("index exceeds isize::MAX");
728            let sexp = list.get(idx).ok_or_else(|| {
729                SexpError::from(SexpLengthError {
730                    expected: n,
731                    actual: i,
732                })
733            })?;
734            let value: V = TryFromSexp::try_from_sexp(sexp)?;
735
736            let key = if let Some(names) = names_sexp {
737                let name_sexp = names.string_elt(idx);
738                if name_sexp == SEXP::na_string() {
739                    format!("{i}")
740                } else {
741                    let name_ptr = name_sexp.r_char();
742                    let name_cstr = unsafe { std::ffi::CStr::from_ptr(name_ptr) };
743                    name_cstr.to_str().unwrap_or(&format!("{i}")).to_string()
744                }
745            } else {
746                format!("{i}")
747            };
748
749            map.insert(key, value);
750        }
751        Ok(map)
752    }
753}
754// endregion
755
756// region: BTreeMap conversions
757
758impl<K, V> IntoList for BTreeMap<K, V>
759where
760    K: AsRef<str>,
761    V: IntoR,
762{
763    fn into_list(self) -> List {
764        let pairs: Vec<(K, V)> = self.into_iter().collect();
765        List::from_pairs(pairs)
766    }
767}
768
769impl<V> TryFromList for BTreeMap<String, V>
770where
771    V: TryFromSexp<Error = SexpError>,
772{
773    type Error = SexpError;
774
775    fn try_from_list(list: List) -> Result<Self, Self::Error> {
776        let n: usize = list
777            .len()
778            .try_into()
779            .expect("list length must be non-negative");
780        let names_sexp = list.names();
781        let mut map = BTreeMap::new();
782
783        for i in 0..n {
784            let idx: isize = i.try_into().expect("index exceeds isize::MAX");
785            let sexp = list.get(idx).ok_or_else(|| {
786                SexpError::from(SexpLengthError {
787                    expected: n,
788                    actual: i,
789                })
790            })?;
791            let value: V = TryFromSexp::try_from_sexp(sexp)?;
792
793            let key = if let Some(names) = names_sexp {
794                let name_sexp = names.string_elt(idx);
795                if name_sexp == SEXP::na_string() {
796                    format!("{i}")
797                } else {
798                    let name_ptr = name_sexp.r_char();
799                    let name_cstr = unsafe { std::ffi::CStr::from_ptr(name_ptr) };
800                    name_cstr.to_str().unwrap_or(&format!("{i}")).to_string()
801                }
802            } else {
803                format!("{i}")
804            };
805
806            map.insert(key, value);
807        }
808        Ok(map)
809    }
810}
811// endregion
812
813// region: HashSet conversions (unnamed list <-> set)
814
815impl<T> IntoList for HashSet<T>
816where
817    T: IntoR,
818{
819    fn into_list(self) -> List {
820        let values: Vec<T> = self.into_iter().collect();
821        values.into_list()
822    }
823}
824
825impl<T> TryFromList for HashSet<T>
826where
827    T: TryFromSexp<Error = SexpError> + Eq + Hash,
828{
829    type Error = SexpError;
830
831    fn try_from_list(list: List) -> Result<Self, Self::Error> {
832        let vec: Vec<T> = TryFromList::try_from_list(list)?;
833        Ok(vec.into_iter().collect())
834    }
835}
836// endregion
837
838// region: BTreeSet conversions (unnamed list <-> set)
839
840impl<T> IntoList for BTreeSet<T>
841where
842    T: IntoR,
843{
844    fn into_list(self) -> List {
845        let values: Vec<T> = self.into_iter().collect();
846        values.into_list()
847    }
848}
849
850impl<T> TryFromList for BTreeSet<T>
851where
852    T: TryFromSexp<Error = SexpError> + Ord,
853{
854    type Error = SexpError;
855
856    fn try_from_list(list: List) -> Result<Self, Self::Error> {
857        let vec: Vec<T> = TryFromList::try_from_list(list)?;
858        Ok(vec.into_iter().collect())
859    }
860}
861
862impl List {
863    /// Build a list from `(name, value)` pairs, setting `names` in one pass.
864    pub fn from_pairs<N, T>(pairs: Vec<(N, T)>) -> Self
865    where
866        N: AsRef<str>,
867        T: IntoR,
868    {
869        // Allocate + protect the parent list and names before calling
870        // `into_sexp()` on each value. Pre-collecting `Vec<(N, SEXP)>` would
871        // leave the value SEXPs unrooted across subsequent `into_sexp()` and
872        // the names allocation — same UAF shape as #307.
873        let n: isize = pairs
874            .len()
875            .try_into()
876            .expect("pairs length exceeds isize::MAX");
877        unsafe {
878            let list = OwnedProtect::new(ffi::Rf_allocVector(VECSXP, n));
879            let names = OwnedProtect::new(ffi::Rf_allocVector(STRSXP, n));
880            for (i, (name, val)) in pairs.into_iter().enumerate() {
881                let idx: isize = i.try_into().expect("index exceeds isize::MAX");
882                list.get().set_vector_elt(idx, val.into_sexp());
883                names
884                    .get()
885                    .set_string_elt(idx, SEXP::charsxp(name.as_ref()));
886            }
887            list.get().set_names(names.get());
888            List(list.get())
889        }
890    }
891
892    /// Build an unnamed list from values.
893    ///
894    /// Use this for tuple-like structures where positional access is more natural.
895    ///
896    /// # Example
897    ///
898    /// ```ignore
899    /// let list = List::from_values(vec![1i32, 2i32, 3i32]);
900    /// // R: list(1L, 2L, 3L) - accessed as [[1]], [[2]], [[3]]
901    /// ```
902    pub fn from_values<T: IntoR>(values: Vec<T>) -> Self {
903        values.into_list()
904    }
905
906    /// Build an unnamed list from pre-converted SEXPs.
907    ///
908    /// # Safety Note
909    ///
910    /// The input SEXPs should already be protected or be children of protected
911    /// containers. This function protects the list during construction.
912    pub fn from_raw_values(values: Vec<SEXP>) -> Self {
913        let n: isize = values
914            .len()
915            .try_into()
916            .expect("values length exceeds isize::MAX");
917        unsafe {
918            // Protect list during construction. SET_VECTOR_ELT doesn't allocate,
919            // but we protect defensively in case this code is modified later.
920            let list = OwnedProtect::new(ffi::Rf_allocVector(VECSXP, n));
921            for (i, val) in values.into_iter().enumerate() {
922                let idx: isize = i.try_into().expect("index exceeds isize::MAX");
923                list.get().set_vector_elt(idx, val);
924            }
925            List(list.get())
926        }
927    }
928
929    /// Build an atomic vector from homogeneous length-1 scalar SEXPs.
930    ///
931    /// If all elements are length-1 scalars of the same coalesceable type
932    /// (INTSXP, REALSXP, LGLSXP, STRSXP), returns that atomic vector.
933    /// Otherwise returns a VECSXP (generic list).
934    ///
935    /// This is the canonical entry point for both `DataFrame::into_data_frame`
936    /// (column building) and `SeqSerializer::end` (sequence coalescing).
937    ///
938    /// # Safety Note
939    ///
940    /// The input SEXPs should already be protected or be children of protected
941    /// containers.
942    pub fn from_scalars_or_list(elements: &[SEXP]) -> Self {
943        use crate::ffi::SEXPTYPE;
944        use crate::into_r::alloc_r_vector;
945
946        if elements.is_empty() {
947            return Self::from_raw_values(Vec::new());
948        }
949
950        let first_type = elements[0].type_of();
951        let all_scalar_same_type = elements
952            .iter()
953            .all(|&e| e.xlength() == 1 && e.type_of() == first_type);
954
955        if !all_scalar_same_type {
956            return Self::from_raw_values(elements.to_vec());
957        }
958
959        let n = elements.len();
960        let sexp = match first_type {
961            // For native types: allocate R vector, get mutable slice, read source
962            // scalars via as_slice()[0] — no per-element FFI calls.
963            SEXPTYPE::INTSXP => unsafe {
964                let (v, dst) = alloc_r_vector::<i32>(n);
965                for (slot, &elem) in dst.iter_mut().zip(elements.iter()) {
966                    *slot = *elem.as_slice::<i32>().first().expect("scalar has length 1");
967                }
968                v
969            },
970            SEXPTYPE::REALSXP => unsafe {
971                let (v, dst) = alloc_r_vector::<f64>(n);
972                for (slot, &elem) in dst.iter_mut().zip(elements.iter()) {
973                    *slot = *elem.as_slice::<f64>().first().expect("scalar has length 1");
974                }
975                v
976            },
977            SEXPTYPE::LGLSXP => unsafe {
978                let (v, dst) = alloc_r_vector::<crate::ffi::RLogical>(n);
979                for (slot, &elem) in dst.iter_mut().zip(elements.iter()) {
980                    *slot = *elem
981                        .as_slice::<crate::ffi::RLogical>()
982                        .first()
983                        .expect("scalar has length 1");
984                }
985                v
986            },
987            // STRSXP elements are CHARSXPs — must use SET_STRING_ELT (no slice access).
988            SEXPTYPE::STRSXP => unsafe {
989                let v = OwnedProtect::new(ffi::Rf_allocVector(SEXPTYPE::STRSXP, n as isize));
990                for (i, &elem) in elements.iter().enumerate() {
991                    let idx: isize = i.try_into().expect("index exceeds isize::MAX");
992                    v.get().set_string_elt(idx, elem.string_elt(0));
993                }
994                v.get()
995            },
996            _ => return Self::from_raw_values(elements.to_vec()),
997        };
998        List(sexp)
999    }
1000
1001    /// Build a list from `(name, SEXP)` pairs (heterogeneous-friendly).
1002    ///
1003    /// # Safety Note
1004    ///
1005    /// The input SEXPs should already be protected or be children of protected
1006    /// containers. This function protects the list and names vector during
1007    /// construction.
1008    pub fn from_raw_pairs<N>(pairs: Vec<(N, SEXP)>) -> Self
1009    where
1010        N: AsRef<str>,
1011    {
1012        let n: isize = pairs
1013            .len()
1014            .try_into()
1015            .expect("pairs length exceeds isize::MAX");
1016        unsafe {
1017            // CRITICAL: Both list and names must be protected because
1018            // Rf_mkCharLenCE can allocate and trigger GC in the loop below.
1019            let list = OwnedProtect::new(ffi::Rf_allocVector(VECSXP, n));
1020            let names = OwnedProtect::new(ffi::Rf_allocVector(STRSXP, n));
1021            for (i, (name, val)) in pairs.into_iter().enumerate() {
1022                let idx: isize = i.try_into().expect("index exceeds isize::MAX");
1023                list.get().set_vector_elt(idx, val);
1024
1025                let s = name.as_ref();
1026                // SEXP::charsxp allocates - list and names must be protected!
1027                names.get().set_string_elt(idx, SEXP::charsxp(s));
1028            }
1029            list.get().set_names(names.get());
1030            List(list.get())
1031        }
1032    }
1033
1034    /// Build an empty named-list SEXP (zero elements, `names` attribute set).
1035    ///
1036    /// Equivalent to [`Self::from_raw_pairs`]`(vec![])`, but avoids the
1037    /// `Vec<(&str, SEXP)>` type annotation that Rust requires at empty-vector
1038    /// callsites where type inference cannot resolve the element type.
1039    ///
1040    /// Codegen paths that emit an empty `from_raw_pairs` call (e.g. unit-variant
1041    /// partitions in `#[derive(DataFrameRow)]`) use this helper so that a future
1042    /// signature change to `from_raw_pairs` only needs to be updated in one
1043    /// place.
1044    #[must_use]
1045    pub fn from_raw_pairs_empty() -> Self {
1046        Self::from_raw_pairs(Vec::<(&str, SEXP)>::new())
1047    }
1048}
1049
1050impl IntoR for List {
1051    type Error = std::convert::Infallible;
1052    fn try_into_sexp(self) -> Result<SEXP, Self::Error> {
1053        Ok(self.into_sexp())
1054    }
1055    unsafe fn try_into_sexp_unchecked(self) -> Result<SEXP, Self::Error> {
1056        self.try_into_sexp()
1057    }
1058    #[inline]
1059    fn into_sexp(self) -> SEXP {
1060        self.0
1061    }
1062}
1063
1064impl IntoR for ListMut {
1065    type Error = std::convert::Infallible;
1066    fn try_into_sexp(self) -> Result<SEXP, Self::Error> {
1067        Ok(self.into_sexp())
1068    }
1069    unsafe fn try_into_sexp_unchecked(self) -> Result<SEXP, Self::Error> {
1070        self.try_into_sexp()
1071    }
1072    #[inline]
1073    fn into_sexp(self) -> SEXP {
1074        self.0
1075    }
1076}
1077
1078/// Convert a `Vec<List>` to an R list-column (VECSXP).
1079///
1080/// Used by the `to_dataframe_split` path generated by `DataFrameRow` derives when
1081/// a struct-typed variant field carries `#[dataframe(as_list)]`. Each element
1082/// becomes an R list in the output VECSXP.
1083impl IntoR for Vec<List> {
1084    type Error = std::convert::Infallible;
1085    fn try_into_sexp(self) -> Result<SEXP, Self::Error> {
1086        Ok(self.into_sexp())
1087    }
1088    unsafe fn try_into_sexp_unchecked(self) -> Result<SEXP, Self::Error> {
1089        self.try_into_sexp()
1090    }
1091    fn into_sexp(self) -> SEXP {
1092        unsafe {
1093            use crate::ffi::{Rf_allocVector, Rf_protect, Rf_unprotect, SEXPTYPE, SexpExt as _};
1094            let n = self.len() as crate::ffi::R_xlen_t;
1095            let out = Rf_allocVector(SEXPTYPE::VECSXP, n);
1096            Rf_protect(out);
1097            for (i, list) in self.into_iter().enumerate() {
1098                out.set_vector_elt(i as crate::ffi::R_xlen_t, list.0);
1099            }
1100            Rf_unprotect(1);
1101            out
1102        }
1103    }
1104}
1105
1106/// Convert a `Vec<Option<List>>` to an R list-column (VECSXP).
1107///
1108/// `Some(list)` elements are placed directly as list elements; `None` elements
1109/// become `R_NilValue`. Used by `DataFrameRow`-derived enum code when a
1110/// struct-typed variant field carries `#[dataframe(as_list)]`.
1111impl IntoR for Vec<Option<List>> {
1112    type Error = std::convert::Infallible;
1113    fn try_into_sexp(self) -> Result<SEXP, Self::Error> {
1114        Ok(self.into_sexp())
1115    }
1116    unsafe fn try_into_sexp_unchecked(self) -> Result<SEXP, Self::Error> {
1117        self.try_into_sexp()
1118    }
1119    fn into_sexp(self) -> SEXP {
1120        unsafe {
1121            use crate::ffi::{Rf_allocVector, Rf_protect, Rf_unprotect, SEXPTYPE, SexpExt as _};
1122            let n = self.len() as crate::ffi::R_xlen_t;
1123            // VECSXP slots are zero-initialised to R_NilValue by Rf_allocVector,
1124            // so None elements require no explicit fill.
1125            let out = Rf_allocVector(SEXPTYPE::VECSXP, n);
1126            Rf_protect(out);
1127            for (i, opt) in self.into_iter().enumerate() {
1128                if let Some(list) = opt {
1129                    out.set_vector_elt(i as crate::ffi::R_xlen_t, list.0);
1130                }
1131            }
1132            Rf_unprotect(1);
1133            out
1134        }
1135    }
1136}
1137
1138/// Error when a list has duplicate non-NA names.
1139#[derive(Debug, Clone)]
1140pub struct DuplicateNameError {
1141    /// The duplicate name that was found.
1142    pub name: String,
1143}
1144
1145impl std::fmt::Display for DuplicateNameError {
1146    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1147        write!(f, "list has duplicate name: {:?}", self.name)
1148    }
1149}
1150
1151impl std::error::Error for DuplicateNameError {}
1152
1153/// Error when converting SEXP to List fails.
1154#[derive(Debug, Clone)]
1155pub enum ListFromSexpError {
1156    /// Wrong SEXP type.
1157    Type(crate::from_r::SexpTypeError),
1158    /// Duplicate non-NA name found.
1159    DuplicateName(DuplicateNameError),
1160}
1161
1162impl std::fmt::Display for ListFromSexpError {
1163    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1164        match self {
1165            ListFromSexpError::Type(e) => write!(f, "{}", e),
1166            ListFromSexpError::DuplicateName(e) => write!(f, "{}", e),
1167        }
1168    }
1169}
1170
1171impl std::error::Error for ListFromSexpError {}
1172
1173impl From<crate::from_r::SexpTypeError> for ListFromSexpError {
1174    fn from(e: crate::from_r::SexpTypeError) -> Self {
1175        ListFromSexpError::Type(e)
1176    }
1177}
1178
1179impl TryFromSexp for List {
1180    type Error = ListFromSexpError;
1181
1182    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
1183        let actual = sexp.type_of();
1184
1185        // Accept VECSXP (generic list) directly
1186        // Also accept LISTSXP (pairlist) by coercing to VECSXP
1187        // Note: Rf_isList() only returns true for LISTSXP/NILSXP, not VECSXP
1188        let list_sexp = if actual == VECSXP {
1189            sexp
1190        } else if actual == LISTSXP {
1191            // Accept pairlists by coercing to a VECSXP list.
1192            sexp.coerce(VECSXP)
1193        } else {
1194            return Err(crate::from_r::SexpTypeError {
1195                expected: VECSXP,
1196                actual,
1197            }
1198            .into());
1199        };
1200
1201        // Check for duplicate non-NA names
1202        let names_sexp = list_sexp.get_names();
1203        if names_sexp != SEXP::nil() {
1204            let n = list_sexp.xlength();
1205            let n_usize: usize = n.try_into().expect("list length must be non-negative");
1206            let mut seen = HashSet::with_capacity(n_usize);
1207
1208            for i in 0..n {
1209                let name_sexp = names_sexp.string_elt(i);
1210                // Skip NA names
1211                if name_sexp == SEXP::na_string() {
1212                    continue;
1213                }
1214                // Skip empty names
1215                let name_ptr = name_sexp.r_char();
1216                let name_cstr = unsafe { std::ffi::CStr::from_ptr(name_ptr) };
1217                if let Ok(s) = name_cstr.to_str() {
1218                    if s.is_empty() {
1219                        continue;
1220                    }
1221                    if !seen.insert(s) {
1222                        return Err(ListFromSexpError::DuplicateName(DuplicateNameError {
1223                            name: s.to_string(),
1224                        }));
1225                    }
1226                }
1227            }
1228        }
1229
1230        Ok(List(list_sexp))
1231    }
1232}
1233
1234impl TryFromSexp for Option<List> {
1235    type Error = SexpError;
1236
1237    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
1238        if sexp == SEXP::nil() {
1239            return Ok(None);
1240        }
1241        let list = List::try_from_sexp(sexp).map_err(|e| SexpError::InvalidValue(e.to_string()))?;
1242        Ok(Some(list))
1243    }
1244}
1245
1246impl TryFromSexp for Option<ListMut> {
1247    type Error = SexpError;
1248
1249    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
1250        if sexp == SEXP::nil() {
1251            return Ok(None);
1252        }
1253        let list = ListMut::try_from_sexp(sexp)?;
1254        Ok(Some(list))
1255    }
1256}
1257
1258impl TryFromSexp for ListMut {
1259    type Error = SexpError;
1260
1261    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
1262        let actual = sexp.type_of();
1263        if actual != VECSXP {
1264            return Err(SexpTypeError {
1265                expected: VECSXP,
1266                actual,
1267            }
1268            .into());
1269        }
1270        Ok(ListMut(sexp))
1271    }
1272}
1273// endregion