Skip to main content

miniextendr_macros/
dataframe_derive.rs

1//! Derive macros for bidirectional row ↔ dataframe conversions.
2//!
3//! Supports both structs (direct field mapping) and enums (field-name union
4//! across variants with `Option<T>` fill for missing fields).
5
6use proc_macro2::{Span, TokenStream};
7use quote::{format_ident, quote};
8use syn::{Data, DeriveInput, Fields};
9
10// region: Attribute parsing
11
12/// Parsed container-level `#[dataframe(...)]` attributes.
13pub(super) struct DataFrameAttrs {
14    /// Custom companion type name (default: `{TypeName}DataFrame`).
15    pub(super) name: Option<syn::Ident>,
16    /// Enum alignment mode — implicit for enums, accepted but not required.
17    pub(super) align: bool,
18    /// Tag column name for variant discriminator (also supported on structs).
19    pub(super) tag: Option<String>,
20    /// Conflict resolution mode for type collisions across enum variants.
21    /// Currently only "string" is supported: convert conflicting fields via `ToString`.
22    pub(super) conflicts: Option<String>,
23}
24
25/// Parse container-level `#[dataframe(...)]` attributes from the derive input.
26///
27/// Supported keys:
28/// - `name = "CustomName"` -- custom companion type name (default: `{TypeName}DataFrame`)
29/// - `align` -- enum alignment mode (field-name union across variants)
30/// - `tag = "col_name"` -- add a variant discriminator column (works on both structs and enums)
31/// - `conflicts = "string"` -- coerce type-conflicting columns to `String` via `ToString`
32///
33/// Returns `Err` for unknown keys or non-string-literal values.
34fn parse_dataframe_attrs(input: &DeriveInput) -> syn::Result<DataFrameAttrs> {
35    let mut attrs = DataFrameAttrs {
36        name: None,
37        align: false,
38        tag: None,
39        conflicts: None,
40    };
41
42    for attr in &input.attrs {
43        if !attr.path().is_ident("dataframe") {
44            continue;
45        }
46
47        let nested = attr.parse_args_with(
48            syn::punctuated::Punctuated::<syn::Meta, syn::Token![,]>::parse_terminated,
49        )?;
50
51        for meta in &nested {
52            match meta {
53                syn::Meta::NameValue(nv) if nv.path.is_ident("name") => {
54                    if let syn::Expr::Lit(syn::ExprLit {
55                        lit: syn::Lit::Str(lit_str),
56                        ..
57                    }) = &nv.value
58                    {
59                        attrs.name =
60                            Some(format_ident!("{}", lit_str.value(), span = lit_str.span()));
61                    } else {
62                        return Err(syn::Error::new_spanned(
63                            &nv.value,
64                            "expected string literal for `name`",
65                        ));
66                    }
67                }
68                syn::Meta::NameValue(nv) if nv.path.is_ident("tag") => {
69                    if let syn::Expr::Lit(syn::ExprLit {
70                        lit: syn::Lit::Str(lit_str),
71                        ..
72                    }) = &nv.value
73                    {
74                        attrs.tag = Some(lit_str.value());
75                    } else {
76                        return Err(syn::Error::new_spanned(
77                            &nv.value,
78                            "expected string literal for `tag`",
79                        ));
80                    }
81                }
82                syn::Meta::NameValue(nv) if nv.path.is_ident("conflicts") => {
83                    if let syn::Expr::Lit(syn::ExprLit {
84                        lit: syn::Lit::Str(lit_str),
85                        ..
86                    }) = &nv.value
87                    {
88                        let value = lit_str.value();
89                        if value != "string" {
90                            return Err(syn::Error::new_spanned(
91                                lit_str,
92                                "unknown conflict resolution mode; only `\"string\"` is supported",
93                            ));
94                        }
95                        attrs.conflicts = Some(value);
96                    } else {
97                        return Err(syn::Error::new_spanned(
98                            &nv.value,
99                            "expected string literal for `conflicts`",
100                        ));
101                    }
102                }
103                syn::Meta::Path(path) if path.is_ident("align") => {
104                    attrs.align = true;
105                }
106                other => {
107                    return Err(syn::Error::new_spanned(
108                        other,
109                        "unknown dataframe attribute; expected `name`, `align`, `tag`, or `conflicts`",
110                    ));
111                }
112            }
113        }
114    }
115
116    Ok(attrs)
117}
118// endregion
119
120// region: Field-level attribute parsing
121
122/// Parsed field-level `#[dataframe(...)]` attributes.
123///
124/// These attributes control how individual struct/enum fields map to DataFrame columns.
125/// Mutually exclusive combinations (`as_list` + `expand`, `as_list` + `width`,
126/// `as_factor` + `as_list`, `as_factor` + `expand`, `as_factor` + `width`) are
127/// rejected during parsing.
128#[derive(Default)]
129pub(super) struct FieldAttrs {
130    /// `#[dataframe(skip)]` -- omit this field from the DataFrame entirely.
131    pub(super) skip: bool,
132    /// `#[dataframe(rename = "col")]` -- use a custom column name instead of the field name.
133    pub(super) rename: Option<String>,
134    /// `#[dataframe(as_list)]` -- keep a collection field as a single R list column
135    /// (suppresses automatic expansion into suffixed columns).
136    pub(super) as_list: bool,
137    /// `#[dataframe(as_factor)]` -- treat a unit-only inner enum field as an R factor column.
138    /// Only valid on bare-ident enum types (no generic parameters). The inner enum must be
139    /// unit-only (`#[derive(DataFrameRow)]` emits `IntoR` and `IntoR for Vec<Option<Self>>`).
140    pub(super) as_factor: bool,
141    /// `#[dataframe(expand)]` or `#[dataframe(unnest)]` -- explicitly expand a
142    /// collection field into multiple suffixed columns.
143    expand: bool,
144    /// `#[dataframe(width = N)]` -- pin the expansion width for `Vec<T>`, `Box<[T]>`,
145    /// or `&[T]` fields. Rows shorter than `N` get `None` for missing positions.
146    pub(super) width: Option<usize>,
147}
148
149/// Parse field-level `#[dataframe(...)]` attributes from a `syn::Field`.
150///
151/// Recognizes: `skip`, `rename`, `as_list`, `as_factor`, `expand` (alias `unnest`), and `width`.
152/// Validates mutual exclusivity of conflicting options (`as_list` vs `expand`/`width`,
153/// `as_factor` vs `as_list`/`expand`/`width`).
154/// Returns `Err` for unknown keys, invalid width values, or conflicting options.
155pub(super) fn parse_field_attrs(field: &syn::Field) -> syn::Result<FieldAttrs> {
156    let mut attrs = FieldAttrs::default();
157
158    for attr in &field.attrs {
159        if !attr.path().is_ident("dataframe") {
160            continue;
161        }
162
163        attr.parse_nested_meta(|meta| {
164            if meta.path.is_ident("skip") {
165                attrs.skip = true;
166                Ok(())
167            } else if meta.path.is_ident("rename") {
168                let value = meta.value()?;
169                let lit: syn::LitStr = value.parse()?;
170                attrs.rename = Some(lit.value());
171                Ok(())
172            } else if meta.path.is_ident("as_list") {
173                attrs.as_list = true;
174                Ok(())
175            } else if meta.path.is_ident("as_factor") {
176                attrs.as_factor = true;
177                Ok(())
178            } else if meta.path.is_ident("expand") || meta.path.is_ident("unnest") {
179                attrs.expand = true;
180                Ok(())
181            } else if meta.path.is_ident("width") {
182                let value = meta.value()?;
183                let lit: syn::LitInt = value.parse()?;
184                let n: usize = lit.base10_parse()?;
185                if n == 0 {
186                    return Err(syn::Error::new(lit.span(), "`width` must be >= 1"));
187                }
188                attrs.width = Some(n);
189                Ok(())
190            } else {
191                Err(meta.error(
192                    "unknown field attribute; expected `skip`, `rename`, `as_list`, `as_factor`, `expand`, `unnest`, or `width`",
193                ))
194            }
195        })?;
196    }
197
198    let span = field.ident.as_ref().map_or(Span::call_site(), |i| i.span());
199
200    // Validation: conflicting options
201    if attrs.as_list && attrs.expand {
202        return Err(syn::Error::new(
203            span,
204            "`as_list` and `expand`/`unnest` are mutually exclusive",
205        ));
206    }
207    if attrs.as_list && attrs.width.is_some() {
208        return Err(syn::Error::new(
209            span,
210            "`as_list` and `width` are mutually exclusive",
211        ));
212    }
213    if attrs.as_factor && attrs.as_list {
214        return Err(syn::Error::new(
215            span,
216            "`as_factor` and `as_list` are mutually exclusive",
217        ));
218    }
219    if attrs.as_factor && attrs.expand {
220        return Err(syn::Error::new(
221            span,
222            "`as_factor` and `expand`/`unnest` are mutually exclusive",
223        ));
224    }
225    if attrs.as_factor && attrs.width.is_some() {
226        return Err(syn::Error::new(
227            span,
228            "`as_factor` and `width` are mutually exclusive",
229        ));
230    }
231
232    Ok(attrs)
233}
234// endregion
235
236// region: Type classification
237
238/// Classification of a field type for DataFrame column expansion.
239///
240/// Used to decide whether a field maps to a single column or should be
241/// expanded into multiple suffixed columns (e.g., `coords_1`, `coords_2`).
242pub(super) enum FieldTypeKind<'a> {
243    /// Single column (most types). No expansion.
244    Scalar,
245    /// `[T; N]` -- fixed-size array, expands to `N` columns at compile time.
246    /// Contains the element type and array length.
247    FixedArray(&'a syn::Type, usize),
248    /// `Vec<T>` -- variable length, needs `width` attribute or `expand` for expansion.
249    /// Contains the element type.
250    VariableVec(&'a syn::Type),
251    /// `Box<[T]>` -- owned slice, treated like `Vec<T>` for expansion purposes.
252    /// Contains the element type.
253    BoxedSlice(&'a syn::Type),
254    /// `&[T]` -- borrowed slice, treated like `Vec<T>` for expansion purposes.
255    /// Contains the element type.
256    BorrowedSlice(&'a syn::Type),
257    /// `HashMap<K, V>` or `BTreeMap<K, V>` -- expands to two parallel list-columns:
258    /// `<field>_keys` and `<field>_values`. Key order follows the map's own iteration
259    /// order: `BTreeMap` yields sorted keys, `HashMap` yields non-deterministic order.
260    Map {
261        key_ty: &'a syn::Type,
262        val_ty: &'a syn::Type,
263    },
264    /// A struct-typed field whose inner type implements `DataFrameRow`.
265    ///
266    /// Flattened into `<field>_<inner_col>` prefixed columns by default.
267    /// A compile-time assertion against `::miniextendr_api::markers::DataFrameRow`
268    /// is emitted so rustc gives a clear error when the inner type is missing the
269    /// derive.
270    ///
271    /// Suppressed by `#[dataframe(as_list)]` — with as_list the field becomes
272    /// a `Scalar` and uses the ordinary single-column codegen path.
273    Struct {
274        /// The full field type (used for the compile-time DataFrameRow assertion).
275        inner_ty: &'a syn::Type,
276    },
277}
278
279/// Classify a field type for DataFrame column expansion.
280///
281/// Inspects the type AST to detect:
282/// - `[T; N]` or `&[T; N]` -> `FixedArray`
283/// - `&[T]` -> `BorrowedSlice`
284/// - `Vec<T>` -> `VariableVec`
285/// - `Box<[T]>` -> `BoxedSlice`
286/// - `HashMap<K, V>` / `BTreeMap<K, V>` -> `Map`
287/// - Any non-scalar bare path type (single- or multi-segment, e.g. `Point` or
288///   `crate::geom::Point`) -> `Struct`
289/// - Everything else (known scalars, generic types with args, `::abs::Paths`) -> `Scalar`
290///
291/// Returns `Err` for shapes the macro cannot classify and that would silently
292/// become opaque list-columns: `Option<T>`, `Cow<T>`, `Rc<T>`, `Arc<T>`,
293/// `RefCell<T>`, `Cell<T>`, `Mutex<T>`, `RwLock<T>`.  Use
294/// `#[dataframe(as_list)]` to opt into list-column treatment explicitly.
295pub(super) fn classify_field_type(ty: &syn::Type) -> syn::Result<FieldTypeKind<'_>> {
296    // Check for [T; N]
297    if let syn::Type::Array(arr) = ty
298        && let syn::Expr::Lit(syn::ExprLit {
299            lit: syn::Lit::Int(lit_int),
300            ..
301        }) = &arr.len
302        && let Ok(n) = lit_int.base10_parse::<usize>()
303    {
304        return Ok(FieldTypeKind::FixedArray(&arr.elem, n));
305    }
306
307    // Check for &[T] and &[T; N]
308    if let syn::Type::Reference(ref_ty) = ty {
309        // &[T] → BorrowedSlice
310        if let syn::Type::Slice(slice) = &*ref_ty.elem {
311            return Ok(FieldTypeKind::BorrowedSlice(&slice.elem));
312        }
313        // &[T; N] → FixedArray (same as owned)
314        if let syn::Type::Array(arr) = &*ref_ty.elem
315            && let syn::Expr::Lit(syn::ExprLit {
316                lit: syn::Lit::Int(lit_int),
317                ..
318            }) = &arr.len
319            && let Ok(n) = lit_int.base10_parse::<usize>()
320        {
321            return Ok(FieldTypeKind::FixedArray(&arr.elem, n));
322        }
323    }
324
325    if let syn::Type::Path(type_path) = ty
326        && let Some(seg) = type_path.path.segments.last()
327        && let syn::PathArguments::AngleBracketed(args) = &seg.arguments
328    {
329        // Reject wrapper types that would silently fall through to Scalar /
330        // Struct and produce a confusing opaque list-column or a downstream
331        // DataFrameRow assertion error.  These are the common smart-pointer
332        // and interior-mutability types that wrap a meaningful inner type but
333        // that DataFrameRow does not know how to expand.
334        //
335        // The macro has no way to resolve through the wrapper without type-
336        // checking (which is unavailable in proc macros). The user must either
337        // unwrap to the inner type, or annotate with `#[dataframe(as_list)]`
338        // to opt into an explicit opaque list-column.
339        //
340        // IMPORTANT: The rejection fires on *path identity alone*, before we
341        // inspect generic args.  `Cow<'a, T>` has a lifetime as its first
342        // generic argument, not a type; inspecting `args.args.first()` as a
343        // `GenericArgument::Type` would silently skip `Cow`.  Checking ident
344        // before args makes the rejection robust to any generic shape.
345        const REJECTED_WRAPPERS: &[&str] = &[
346            "Option", "Cow", "Rc", "Arc", "RefCell", "Cell", "Mutex", "RwLock",
347        ];
348        let name = seg.ident.to_string();
349        if REJECTED_WRAPPERS.contains(&name.as_str()) {
350            return Err(syn::Error::new_spanned(
351                ty,
352                format!(
353                    "DataFrameRow does not support `{name}<…>` directly as a field type. \
354                     Use `#[dataframe(as_list)]` to opt into an explicit opaque list-column, \
355                     or unwrap to the inner type (e.g. store the inner value directly, using \
356                     a sentinel / empty collection for the absent case)."
357                ),
358            ));
359        }
360
361        // For the collection types below we need the first *type* argument.
362        // Skip any leading lifetime or const arguments (e.g. `Cow<'a, B>`
363        // has a lifetime first, but `Cow` is already rejected above so we
364        // only reach here for other angle-bracketed types).
365        let first_type_arg = args.args.iter().find_map(|arg| {
366            if let syn::GenericArgument::Type(t) = arg {
367                Some(t)
368            } else {
369                None
370            }
371        });
372
373        if let Some(inner) = first_type_arg {
374            // Check for Vec<T>
375            if seg.ident == "Vec" {
376                return Ok(FieldTypeKind::VariableVec(inner));
377            }
378
379            // Check for Box<[T]>
380            if seg.ident == "Box"
381                && let syn::Type::Slice(slice) = inner
382            {
383                return Ok(FieldTypeKind::BoxedSlice(&slice.elem));
384            }
385
386            // Check for HashMap<K, V> and BTreeMap<K, V>
387            if (seg.ident == "HashMap" || seg.ident == "BTreeMap")
388                && let Some(syn::GenericArgument::Type(val_ty)) = args.args.iter().nth(1)
389            {
390                return Ok(FieldTypeKind::Map {
391                    key_ty: inner,
392                    val_ty,
393                });
394            }
395        }
396    }
397
398    // Any remaining path type whose LAST segment is a bare ident (no generic args)
399    // that is NOT a known scalar is treated as a user-defined struct whose
400    // `DataFrameRow` derive should be called.  The compile-time assertion
401    // `_assert_inner_is_dataframe_row::<Inner>()` in the generated code surfaces a
402    // clear error if the inner type doesn't have the derive.
403    //
404    // Known scalars (i32, f64, String, bool, …) are kept as `Scalar` so that existing
405    // enum variants with primitive fields (e.g. `Click { id: i64, x: f64 }`) are not
406    // misclassified as struct fields.
407    //
408    // Multi-segment paths (e.g. `crate::geom::Point`, `geom::Point`) are now correctly
409    // classified here — the previous `segs.len() == 1` guard was overly restrictive.
410    // Paths with a leading `::` (absolute paths like `::std::ffi::CString`) still fall
411    // through to `Scalar`; use `#[dataframe(as_list)]` or an unqualified import if
412    // you need a custom treatment.
413    //
414    // RISK: a user type whose last path segment is named after a known-scalar
415    // (e.g. `mymod::String`) still correctly falls through to `Scalar` because of the
416    // KNOWN_SCALARS check. A type named `mymod::Option` / `mymod::Vec` would shadow
417    // the detection above — accepted per Rust naming convention (canonical names are
418    // rarely shadowed). `#[dataframe(as_list)]` is the documented escape hatch.
419    if let syn::Type::Path(type_path) = ty {
420        let segs = &type_path.path.segments;
421        // No leading colon (rules out `::std::…` absolute paths) and no self-type.
422        if type_path.qself.is_none() && type_path.path.leading_colon.is_none() {
423            let seg = segs.last().unwrap();
424            if matches!(seg.arguments, syn::PathArguments::None) {
425                let name = seg.ident.to_string();
426                // Known scalar type names — keep as Scalar so they do not trigger the
427                // struct-flatten path and the DataFrameRow compile-time assertion.
428                const KNOWN_SCALARS: &[&str] = &[
429                    "bool", "char", "str", "f32", "f64", "i8", "i16", "i32", "i64", "i128",
430                    "isize", "u8", "u16", "u32", "u64", "u128", "usize", "String",
431                ];
432                if !KNOWN_SCALARS.contains(&name.as_str()) {
433                    return Ok(FieldTypeKind::Struct { inner_ty: ty });
434                }
435            }
436        }
437    }
438
439    Ok(FieldTypeKind::Scalar)
440}
441// endregion
442
443// region: Resolved field model (struct path)
444
445/// A resolved struct field ready for codegen -- determines how this field maps
446/// to DataFrame companion struct columns.
447///
448/// Each variant represents a different expansion strategy:
449/// - `Single`: one field -> one `Vec<T>` column
450/// - `ExpandedFixed`: `[T; N]` -> N columns (`name_1..name_N`) at compile time
451/// - `ExpandedVec`: `Vec<T>` + `width = N` -> N `Vec<Option<T>>` columns
452/// - `AutoExpandVec`: `Vec<T>` + `expand` -> dynamic column count at runtime
453enum ResolvedField {
454    /// Single column: `name → Vec<ty>`.
455    Single(Box<SingleFieldData>),
456    /// Expanded fixed array: `name: [T; N]` → `name_1..name_N`.
457    ExpandedFixed(Box<ExpandedFixedData>),
458    /// Expanded variable vec with pinned width: `name: Vec<T>` + `width = N`.
459    ExpandedVec(Box<ExpandedVecData>),
460    /// Auto-expanded Vec<T>/Box<[T]>: column count determined at runtime from max row length.
461    AutoExpandVec(Box<AutoExpandVecData>),
462    /// Struct field whose inner type implements `DataFrameRow` (issue #485).
463    /// Companion holds `Vec<Inner>`; `into_data_frame` calls `Inner::to_dataframe`
464    /// and flattens columns under the `<base>_` prefix.
465    Struct(Box<StructFieldData>),
466}
467
468/// Data for [`ResolvedField::Single`].
469struct SingleFieldData {
470    /// Rust field name (for access).
471    rust_name: syn::Ident,
472    /// Column name in the DataFrame.
473    col_name: syn::Ident,
474    /// Column name string.
475    col_name_str: String,
476    /// Field type stored in the companion `Vec<#ty>`. For `#[dataframe(as_list)]`
477    /// on a struct-typed field this is overridden to `::miniextendr_api::list::List`
478    /// — see `needs_into_list`.
479    ty: syn::Type,
480    /// Index in tuple struct (None for named).
481    tuple_index: Option<syn::Index>,
482    /// `#[dataframe(as_list)]` on a struct-typed field (#485 workaround).
483    /// When `true`, the companion field type is overridden to `List` and
484    /// `From<Vec<Row>>` calls `IntoList::into_list()` on each row value.
485    needs_into_list: bool,
486}
487
488/// Data for [`ResolvedField::ExpandedFixed`].
489struct ExpandedFixedData {
490    /// Rust field name.
491    rust_name: syn::Ident,
492    /// Base column name (before suffix).
493    base_name: String,
494    /// Element type T.
495    elem_ty: syn::Type,
496    /// Array length N.
497    len: usize,
498    /// Index in tuple struct.
499    tuple_index: Option<syn::Index>,
500}
501
502/// Data for [`ResolvedField::ExpandedVec`].
503struct ExpandedVecData {
504    /// Rust field name.
505    rust_name: syn::Ident,
506    /// Base column name.
507    base_name: String,
508    /// Element type T.
509    elem_ty: syn::Type,
510    /// Pinned width.
511    width: usize,
512    /// Index in tuple struct.
513    tuple_index: Option<syn::Index>,
514}
515
516/// Data for [`ResolvedField::Struct`].
517///
518/// A struct field whose inner type implements `DataFrameRow`. The companion
519/// struct holds `Vec<Inner>` (the same type users already pass into
520/// `to_dataframe(vec![...])`). At `into_data_frame()` time the inner rows are
521/// converted via `Inner::to_dataframe` → `into_named_columns()`, prefixed with
522/// `<base_name>_`, and pushed into the parent data.frame.
523struct StructFieldData {
524    /// Rust field name (for access on the row type).
525    rust_name: syn::Ident,
526    /// Companion struct field name (ident).
527    col_name: syn::Ident,
528    /// Column name base used as the R-side prefix (`<base>_<inner_col>`).
529    col_name_str: String,
530    /// Inner struct type (used for `to_dataframe` dispatch + DataFrameRow assertion).
531    inner_ty: syn::Type,
532    /// Index in tuple struct (None for named).
533    tuple_index: Option<syn::Index>,
534}
535
536/// Data for [`ResolvedField::AutoExpandVec`].
537struct AutoExpandVecData {
538    /// Rust field name (for row access).
539    rust_name: syn::Ident,
540    /// Companion struct field name (ident).
541    col_name: syn::Ident,
542    /// Column name base string (for suffixed column names).
543    col_name_str: String,
544    /// Element type T.
545    elem_ty: syn::Type,
546    /// Container type for companion struct (Vec<T> or Box<[T]>).
547    container_ty: syn::Type,
548    /// Index in tuple struct.
549    tuple_index: Option<syn::Index>,
550}
551
552/// Resolve a struct field into a [`ResolvedField`], applying field attributes.
553///
554/// Combines the field's `#[dataframe(...)]` attributes with its type classification
555/// to determine the codegen strategy:
556/// - `skip` -> returns `None`
557/// - `as_list` -> `Single` (suppresses expansion)
558/// - `FixedArray` -> `ExpandedFixed` (compile-time expansion to N columns)
559/// - `VariableVec`/`BoxedSlice`/`BorrowedSlice` + `width` -> `ExpandedVec`
560/// - `VariableVec`/`BoxedSlice`/`BorrowedSlice` + `expand` -> `AutoExpandVec`
561/// - Everything else -> `Single`
562///
563/// Returns `Err` if `width` or `expand` is used on an incompatible type.
564fn resolve_struct_field(
565    field: &syn::Field,
566    index: usize,
567    is_tuple: bool,
568) -> syn::Result<Option<ResolvedField>> {
569    let field_attrs = parse_field_attrs(field)?;
570
571    if field_attrs.skip {
572        return Ok(None);
573    }
574
575    let rust_name = if is_tuple {
576        format_ident!("_{}", index)
577    } else {
578        field.ident.as_ref().unwrap().clone()
579    };
580
581    let col_name_str = field_attrs
582        .rename
583        .clone()
584        .unwrap_or_else(|| rust_name.to_string());
585    let col_name = format_ident!("{}", col_name_str);
586
587    let tuple_index = if is_tuple {
588        Some(syn::Index::from(index))
589    } else {
590        None
591    };
592
593    let ty = &field.ty;
594    // Propagate classification errors (e.g. Option<T>, Arc<T>) when as_list is
595    // not set.  The as_list branch below uses `.ok()` to suppress errors.
596    let kind = classify_field_type(ty);
597
598    // as_list suppresses expansion. For struct-typed fields (#485 opt-out), the
599    // companion stores `Vec<List>` and From<Vec<Row>> converts each row value
600    // via `IntoList::into_list()`. For non-struct as_list fields, the existing
601    // behavior is preserved: companion stores `Vec<#ty>` and the field type is
602    // serialized natively (this requires `Vec<#ty>: IntoR`).
603    if field_attrs.as_list {
604        // Use `.ok()` here: `as_list` is an explicit opt-in, so wrapper types
605        // like `Option<T>` / `Arc<T>` are allowed — they become opaque list-
606        // columns. Any classification error is suppressed and treated as non-Struct.
607        let (final_ty, needs_into_list) = match classify_field_type(ty).ok() {
608            Some(FieldTypeKind::Struct { .. }) => {
609                (syn::parse_quote!(::miniextendr_api::list::List), true)
610            }
611            _ => (ty.clone(), false),
612        };
613        return Ok(Some(ResolvedField::Single(Box::new(SingleFieldData {
614            rust_name,
615            col_name,
616            col_name_str,
617            ty: final_ty,
618            tuple_index,
619            needs_into_list,
620        }))));
621    }
622
623    match kind? {
624        FieldTypeKind::FixedArray(elem_ty, len) => Ok(Some(ResolvedField::ExpandedFixed(
625            Box::new(ExpandedFixedData {
626                rust_name,
627                base_name: col_name_str,
628                elem_ty: elem_ty.clone(),
629                len,
630                tuple_index,
631            }),
632        ))),
633        FieldTypeKind::VariableVec(elem_ty)
634        | FieldTypeKind::BoxedSlice(elem_ty)
635        | FieldTypeKind::BorrowedSlice(elem_ty) => {
636            if let Some(width) = field_attrs.width {
637                Ok(Some(ResolvedField::ExpandedVec(Box::new(
638                    ExpandedVecData {
639                        rust_name,
640                        base_name: col_name_str,
641                        elem_ty: elem_ty.clone(),
642                        width,
643                        tuple_index,
644                    },
645                ))))
646            } else if field_attrs.expand {
647                Ok(Some(ResolvedField::AutoExpandVec(Box::new(
648                    AutoExpandVecData {
649                        rust_name,
650                        col_name,
651                        col_name_str,
652                        elem_ty: elem_ty.clone(),
653                        container_ty: ty.clone(),
654                        tuple_index,
655                    },
656                ))))
657            } else {
658                // No expansion — keep as opaque single column
659                Ok(Some(ResolvedField::Single(Box::new(SingleFieldData {
660                    rust_name,
661                    col_name,
662                    col_name_str,
663                    ty: ty.clone(),
664                    tuple_index,
665                    needs_into_list: false,
666                }))))
667            }
668        }
669        // Struct-in-struct flattening (issue #485): inner type must implement
670        // `DataFrameRow`. Flattening happens at `into_data_frame()` time; the
671        // companion stores `Vec<Inner>`. `as_list` opts out (handled above).
672        FieldTypeKind::Struct { inner_ty } => {
673            if field_attrs.width.is_some() {
674                return Err(syn::Error::new_spanned(
675                    ty,
676                    "`width` is only valid on `Vec<T>`, `Box<[T]>`, or `&[T]` fields",
677                ));
678            }
679            if field_attrs.expand {
680                return Err(syn::Error::new_spanned(
681                    ty,
682                    "`expand`/`unnest` is only valid on `[T; N]`, `Vec<T>`, `Box<[T]>`, or `&[T]` fields",
683                ));
684            }
685            Ok(Some(ResolvedField::Struct(Box::new(StructFieldData {
686                rust_name,
687                col_name,
688                col_name_str,
689                inner_ty: inner_ty.clone(),
690                tuple_index,
691            }))))
692        }
693        FieldTypeKind::Scalar | FieldTypeKind::Map { .. } => {
694            if field_attrs.width.is_some() {
695                return Err(syn::Error::new_spanned(
696                    ty,
697                    "`width` is only valid on `Vec<T>`, `Box<[T]>`, or `&[T]` fields",
698                ));
699            }
700            if field_attrs.expand {
701                return Err(syn::Error::new_spanned(
702                    ty,
703                    "`expand`/`unnest` is only valid on `[T; N]`, `Vec<T>`, `Box<[T]>`, or `&[T]` fields",
704                ));
705            }
706            Ok(Some(ResolvedField::Single(Box::new(SingleFieldData {
707                rust_name,
708                col_name,
709                col_name_str,
710                ty: ty.clone(),
711                tuple_index,
712                needs_into_list: false,
713            }))))
714        }
715    }
716}
717// endregion
718
719// region: Top-level dispatch
720
721/// Derive `DataFrameRow`: generates a companion DataFrame type with collection fields.
722///
723/// # Requirements
724///
725/// For structs: the type must implement `IntoList`.
726/// For enums: all variants must have named fields.
727///
728/// # Generated Items
729///
730/// For a struct `Measurement { time: f64, value: f64 }`:
731/// - Struct `MeasurementDataFrame { time: Vec<f64>, value: Vec<f64> }`
732/// - `impl IntoDataFrame for MeasurementDataFrame`
733/// - `impl From<Vec<Measurement>> for MeasurementDataFrame`
734/// - `impl IntoIterator for MeasurementDataFrame`
735/// - Associated methods on `Measurement`:
736///   - `to_dataframe(Vec<Self>) -> MeasurementDataFrame`
737///   - `from_dataframe(MeasurementDataFrame) -> Vec<Self>`
738///
739/// For an enum:
740/// - Companion struct with `Vec<Option<T>>` columns (field-name union)
741/// - Optional tag column for variant discrimination
742/// - `impl From<Vec<Enum>> for EnumDataFrame`
743/// - `impl IntoDataFrame for EnumDataFrame`
744/// - Associated `to_dataframe` method
745///
746/// # Attributes
747///
748/// - `#[dataframe(name = "CustomName")]` — Custom companion type name
749/// - `#[dataframe(align)]` — Enum alignment mode (accepted but implicit)
750/// - `#[dataframe(tag = "col")]` — Add variant discriminator column
751///
752/// Both struct and enum companion types get `from_rows()` (sequential) and
753/// `from_rows_par()` (parallel, `#[cfg(feature = "rayon")]`) methods automatically.
754pub fn derive_dataframe_row(input: DeriveInput) -> syn::Result<TokenStream> {
755    let row_name = &input.ident;
756
757    // Allow lifetime parameters (needed for &[T] borrowed slice fields).
758    // Allow type parameters on unit-only enums (all variants are unit) — the
759    // companion struct has no field columns to type-parameterise, and the three
760    // unit-enum impls (UnitEnumFactor, IntoR, IntoList) handle generics via the
761    // split path in enum_expansion.rs.
762    // Reject type and const parameters for everything else.
763    let has_type_params = input.generics.type_params().next().is_some();
764    let has_const_params = input.generics.const_params().next().is_some();
765    if has_type_params || has_const_params {
766        let is_unit_only_enum = matches!(&input.data, Data::Enum(e)
767            if e.variants.iter().all(|v| matches!(v.fields, Fields::Unit)));
768        if !is_unit_only_enum {
769            return Err(syn::Error::new_spanned(
770                &input.generics,
771                "DataFrameRow does not support type or const generic parameters",
772            ));
773        }
774    }
775
776    // Parse attributes
777    let attrs = parse_dataframe_attrs(&input)?;
778
779    let df_name = attrs
780        .name
781        .clone()
782        .unwrap_or_else(|| format_ident!("{}DataFrame", row_name));
783
784    let base = match &input.data {
785        Data::Struct(data) => {
786            // `align` is a no-op on structs (only semantically meaningful for enums)
787            derive_struct_dataframe(row_name, &input, data, &df_name, &attrs)
788        }
789        Data::Enum(data) => {
790            // align is implicit for enums — accept but don't require
791            derive_enum_dataframe(row_name, &input, data, &df_name, &attrs)
792        }
793        Data::Union(_) => Err(syn::Error::new_spanned(
794            row_name,
795            "DataFrameRow does not support unions",
796        )),
797    }?;
798
799    // Generate IntoR for the companion DataFrame type so it can be returned
800    // directly from #[miniextendr] functions. This ensures both the standalone
801    // #[derive(DataFrameRow)] path and the #[miniextendr(dataframe)] path
802    // produce identical output.
803    let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl();
804    Ok(quote::quote! {
805        #base
806
807        impl #impl_generics ::miniextendr_api::into_r::IntoR for #df_name #ty_generics #where_clause {
808            type Error = std::convert::Infallible;
809
810            #[inline]
811            fn try_into_sexp(self) -> Result<::miniextendr_api::ffi::SEXP, Self::Error> {
812                Ok(self.into_sexp())
813            }
814
815            #[inline]
816            unsafe fn try_into_sexp_unchecked(self) -> Result<::miniextendr_api::ffi::SEXP, Self::Error> {
817                self.try_into_sexp()
818            }
819
820            #[inline]
821            fn into_sexp(self) -> ::miniextendr_api::ffi::SEXP {
822                ::miniextendr_api::convert::IntoDataFrame::into_data_frame(self).into_sexp()
823            }
824
825            #[inline]
826            unsafe fn into_sexp_unchecked(self) -> ::miniextendr_api::ffi::SEXP {
827                ::miniextendr_api::convert::IntoDataFrame::into_data_frame(self).into_sexp()
828            }
829        }
830    })
831}
832// endregion
833
834// region: Struct path (existing logic, extracted)
835
836/// Generate `DataFrameRow` expansion for struct types.
837///
838/// Produces:
839/// - A companion struct `{Name}DataFrame` with `Vec<T>` columns
840/// - `impl IntoDataFrame for {Name}DataFrame`
841/// - `impl From<Vec<{Name}>> for {Name}DataFrame`
842/// - `impl IntoIterator` (for named structs without expansion)
843/// - Associated methods: `to_dataframe`, `from_dataframe`, `from_rows`, `from_rows_par`
844/// - A compile-time `IntoList` assertion (for non-expanded named structs)
845///
846/// Handles fixed-array expansion (`[T; N]`), pinned-width Vec expansion
847/// (`Vec<T>` + `width`), and auto-expand Vec (`Vec<T>` + `expand`).
848fn derive_struct_dataframe(
849    row_name: &syn::Ident,
850    input: &DeriveInput,
851    data: &syn::DataStruct,
852    df_name: &syn::Ident,
853    attrs: &DataFrameAttrs,
854) -> syn::Result<TokenStream> {
855    let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl();
856
857    let is_tuple_struct = matches!(&data.fields, Fields::Unnamed(_));
858    let is_unit_struct = matches!(&data.fields, Fields::Unit);
859
860    // Resolve fields through the new FieldAttrs + type classification system.
861    let resolved: Vec<ResolvedField> = match &data.fields {
862        Fields::Named(fields) => {
863            let mut out = Vec::new();
864            for (i, f) in fields.named.iter().enumerate() {
865                if let Some(rf) = resolve_struct_field(f, i, false)? {
866                    out.push(rf);
867                }
868            }
869            out
870        }
871        Fields::Unnamed(fields) => {
872            let mut out = Vec::new();
873            for (i, f) in fields.unnamed.iter().enumerate() {
874                if let Some(rf) = resolve_struct_field(f, i, true)? {
875                    out.push(rf);
876                }
877            }
878            out
879        }
880        Fields::Unit => vec![],
881    };
882
883    // Check whether any field uses expansion — affects whether we can generate
884    // IntoIterator (expanded fields change the companion struct shape).
885    let has_expansion = resolved
886        .iter()
887        .any(|rf| !matches!(rf, ResolvedField::Single(..)));
888    // Track which Rust fields were skipped (for destructure patterns).
889    let skipped_fields: Vec<syn::Ident> = match &data.fields {
890        Fields::Named(fields) => fields
891            .named
892            .iter()
893            .filter_map(|f| {
894                let fa = parse_field_attrs(f).ok()?;
895                if fa.skip {
896                    Some(f.ident.as_ref().unwrap().clone())
897                } else {
898                    None
899                }
900            })
901            .collect(),
902        _ => vec![],
903    };
904
905    let has_tag = attrs.tag.is_some();
906    let row_name_str = row_name.to_string();
907
908    // region: Build flat column lists from resolved fields
909    // Each resolved field may produce 1..N columns.
910    struct FlatCol {
911        /// Companion struct field name.
912        df_field: syn::Ident,
913        /// Column name string in the R data frame.
914        col_name_str: String,
915        /// Type of the companion Vec<T>.
916        vec_elem_ty: syn::Type,
917        /// `#[dataframe(as_list)]` on a struct-typed field — companion stores
918        /// `Vec<List>`. The `from_rows_par` pre-pass handles these sequentially
919        /// instead of scatter-writing (List doesn't implement Default).
920        needs_into_list: bool,
921    }
922
923    let mut flat_cols: Vec<FlatCol> = Vec::new();
924
925    for rf in &resolved {
926        match rf {
927            ResolvedField::Single(data) => {
928                flat_cols.push(FlatCol {
929                    df_field: data.col_name.clone(),
930                    col_name_str: data.col_name_str.clone(),
931                    vec_elem_ty: data.ty.clone(),
932                    needs_into_list: data.needs_into_list,
933                });
934            }
935            ResolvedField::ExpandedFixed(data) => {
936                for i in 1..=data.len {
937                    let name = format!("{}_{}", data.base_name, i);
938                    flat_cols.push(FlatCol {
939                        df_field: format_ident!("{}_{}", data.base_name, i),
940                        col_name_str: name,
941                        vec_elem_ty: data.elem_ty.clone(),
942                        needs_into_list: false,
943                    });
944                }
945            }
946            ResolvedField::ExpandedVec(data) => {
947                for i in 1..=data.width {
948                    let name = format!("{}_{}", data.base_name, i);
949                    let elem_ty = &data.elem_ty;
950                    let opt_ty: syn::Type = syn::parse_quote!(Option<#elem_ty>);
951                    flat_cols.push(FlatCol {
952                        df_field: format_ident!("{}_{}", data.base_name, i),
953                        col_name_str: name,
954                        vec_elem_ty: opt_ty,
955                        needs_into_list: false,
956                    });
957                }
958            }
959            // AutoExpandVec / Struct do not produce FlatCols — handled separately.
960            ResolvedField::AutoExpandVec(..) | ResolvedField::Struct(..) => {}
961        }
962    }
963    // endregion
964
965    // region: Collect auto-expand fields
966    struct AutoExpandCol {
967        /// Companion struct field name.
968        df_field: syn::Ident,
969        /// Container type (Vec<T> or Box<[T]>).
970        container_ty: syn::Type,
971    }
972
973    let auto_expand_cols: Vec<AutoExpandCol> = resolved
974        .iter()
975        .filter_map(|rf| {
976            if let ResolvedField::AutoExpandVec(data) = rf {
977                Some(AutoExpandCol {
978                    df_field: format_ident!("{}", data.col_name_str),
979                    container_ty: data.container_ty.clone(),
980                })
981            } else {
982                None
983            }
984        })
985        .collect();
986    let has_auto_expand = !auto_expand_cols.is_empty();
987    // endregion
988
989    // region: Collect struct (DataFrameRow-flattened) fields (#485)
990    //
991    // Only the codegen-time bits are mirrored here — `rust_name` / `tuple_index`
992    // are read directly off `ResolvedField::Struct` at the per-row pushes site.
993    struct StructCol {
994        df_field: syn::Ident,
995        col_name_str: String,
996        inner_ty: syn::Type,
997    }
998
999    let struct_cols: Vec<StructCol> = resolved
1000        .iter()
1001        .filter_map(|rf| {
1002            if let ResolvedField::Struct(data) = rf {
1003                Some(StructCol {
1004                    df_field: data.col_name.clone(),
1005                    col_name_str: data.col_name_str.clone(),
1006                    inner_ty: data.inner_ty.clone(),
1007                })
1008            } else {
1009                None
1010            }
1011        })
1012        .collect();
1013    let has_struct = !struct_cols.is_empty();
1014
1015    // Any `#[dataframe(as_list)]` on a struct-typed field stores `List` in the
1016    // companion (#485 opt-out). We can't round-trip List back to the inner
1017    // struct without a `FromList`-like trait, and `List` doesn't impl
1018    // `Default`, so several codegen branches need to suppress themselves:
1019    // IntoIterator generation, the `IntoList` compile-time assertion, and
1020    // `from_rows_par`.
1021    let has_into_list_struct = resolved
1022        .iter()
1023        .any(|rf| matches!(rf, ResolvedField::Single(d) if d.needs_into_list));
1024    // endregion
1025
1026    // region: Companion struct
1027    let tag_field_decl = if has_tag {
1028        quote! { pub _tag: Vec<String>, }
1029    } else {
1030        TokenStream::new()
1031    };
1032
1033    let mut df_fields_tokens: Vec<TokenStream> = flat_cols
1034        .iter()
1035        .map(|fc| {
1036            let name = &fc.df_field;
1037            let ty = &fc.vec_elem_ty;
1038            quote! { pub #name: Vec<#ty> }
1039        })
1040        .collect();
1041    for ac in &auto_expand_cols {
1042        let name = &ac.df_field;
1043        let cty = &ac.container_ty;
1044        df_fields_tokens.push(quote! { pub #name: Vec<#cty> });
1045    }
1046    for sc in &struct_cols {
1047        let name = &sc.df_field;
1048        let ity = &sc.inner_ty;
1049        df_fields_tokens.push(quote! { pub #name: Vec<#ity> });
1050    }
1051
1052    let len_field_decl = if flat_cols.is_empty()
1053        && auto_expand_cols.is_empty()
1054        && struct_cols.is_empty()
1055        && !has_tag
1056    {
1057        quote! { pub _len: usize, }
1058    } else {
1059        TokenStream::new()
1060    };
1061
1062    let dataframe_struct = quote! {
1063        #[derive(Debug, Clone)]
1064        pub struct #df_name #impl_generics #where_clause {
1065            #tag_field_decl
1066            #len_field_decl
1067            #(#df_fields_tokens),*
1068        }
1069    };
1070    // endregion
1071
1072    // region: IntoDataFrame
1073    let length_ref = if has_tag {
1074        quote! { self._tag.len() }
1075    } else if !flat_cols.is_empty() {
1076        let first = &flat_cols[0].df_field;
1077        quote! { self.#first.len() }
1078    } else if !auto_expand_cols.is_empty() {
1079        let first = &auto_expand_cols[0].df_field;
1080        quote! { self.#first.len() }
1081    } else if !struct_cols.is_empty() {
1082        let first = &struct_cols[0].df_field;
1083        quote! { self.#first.len() }
1084    } else {
1085        quote! { self._len }
1086    };
1087
1088    // Each pair protects its SEXP via `__scope.protect_raw` so previously-built
1089    // column SEXPs survive subsequent column allocations. Pre-fix the raw
1090    // `vec![(name, into_sexp(...)), ...]` left every SEXP unrooted across the
1091    // next column's allocations — UAF under gctorture
1092    // (reviews/2026-05-07-gctorture-audit.md).
1093    let tag_pair = if let Some(ref tag_name) = attrs.tag {
1094        quote! { (#tag_name, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self._tag))), }
1095    } else {
1096        TokenStream::new()
1097    };
1098
1099    let df_pairs: Vec<TokenStream> = flat_cols
1100        .iter()
1101        .map(|fc| {
1102            let name = &fc.df_field;
1103            let name_str = &fc.col_name_str;
1104            quote! { (#name_str, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self.#name))) }
1105        })
1106        .collect();
1107
1108    let mut length_checks: Vec<TokenStream> = flat_cols
1109        .iter()
1110        .map(|fc| {
1111            let name = &fc.df_field;
1112            let name_str = &fc.col_name_str;
1113            quote! {
1114                assert!(
1115                    self.#name.len() == _n_rows,
1116                    "column length mismatch in {}: column `{}` has length {} but expected {}",
1117                    stringify!(#df_name),
1118                    #name_str,
1119                    self.#name.len(),
1120                    _n_rows,
1121                );
1122            }
1123        })
1124        .collect();
1125    for sc in &struct_cols {
1126        let name = &sc.df_field;
1127        let name_str = &sc.col_name_str;
1128        length_checks.push(quote! {
1129            assert!(
1130                self.#name.len() == _n_rows,
1131                "column length mismatch in {}: struct column `{}` has length {} but expected {}",
1132                stringify!(#df_name),
1133                #name_str,
1134                self.#name.len(),
1135                _n_rows,
1136            );
1137        });
1138    }
1139
1140    let into_dataframe_impl = if has_auto_expand || has_struct {
1141        // Dynamic pair building: iterate resolved fields in order,
1142        // emitting static pairs for flat columns and runtime-expanded
1143        // pairs for auto-expand fields.
1144        let tag_push_pair = if let Some(ref tag_name) = attrs.tag {
1145            quote! {
1146                __df_pairs.push((#tag_name.to_string(), __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self._tag))));
1147            }
1148        } else {
1149            TokenStream::new()
1150        };
1151
1152        let pair_pushes: Vec<TokenStream> = resolved
1153            .iter()
1154            .map(|rf| match rf {
1155                ResolvedField::Single(data) => {
1156                    let col_name = &data.col_name;
1157                    let col_name_str = &data.col_name_str;
1158                    quote! {
1159                        __df_pairs.push((
1160                            #col_name_str.to_string(),
1161                            __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self.#col_name)),
1162                        ));
1163                    }
1164                }
1165                ResolvedField::ExpandedFixed(data) => {
1166                    let pushes: Vec<TokenStream> = (1..=data.len)
1167                        .map(|i| {
1168                            let name = format!("{}_{}", data.base_name, i);
1169                            let ident = format_ident!("{}_{}", data.base_name, i);
1170                            quote! {
1171                                __df_pairs.push((
1172                                    #name.to_string(),
1173                                    __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self.#ident)),
1174                                ));
1175                            }
1176                        })
1177                        .collect();
1178                    quote! { #(#pushes)* }
1179                }
1180                ResolvedField::ExpandedVec(data) => {
1181                    let pushes: Vec<TokenStream> = (1..=data.width)
1182                        .map(|i| {
1183                            let name = format!("{}_{}", data.base_name, i);
1184                            let ident = format_ident!("{}_{}", data.base_name, i);
1185                            quote! {
1186                                __df_pairs.push((
1187                                    #name.to_string(),
1188                                    __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self.#ident)),
1189                                ));
1190                            }
1191                        })
1192                        .collect();
1193                    quote! { #(#pushes)* }
1194                }
1195                ResolvedField::AutoExpandVec(data) => {
1196                    let col_name = &data.col_name;
1197                    let col_name_str = &data.col_name_str;
1198                    let elem_ty = &data.elem_ty;
1199                    quote! {
1200                        {
1201                            let __auto = self.#col_name;
1202                            let __max = __auto.iter().map(|v| v.len()).max().unwrap_or(0);
1203                            let mut __cols: Vec<Vec<Option<#elem_ty>>> = (0..__max)
1204                                .map(|_| Vec::with_capacity(_n_rows))
1205                                .collect();
1206                            for __row_vec in &__auto {
1207                                for (__i, __col) in __cols.iter_mut().enumerate() {
1208                                    __col.push(__row_vec.get(__i).cloned());
1209                                }
1210                            }
1211                            for (__i, __col) in __cols.into_iter().enumerate() {
1212                                __df_pairs.push((
1213                                    format!("{}_{}", #col_name_str, __i + 1),
1214                                    __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(__col)),
1215                                ));
1216                            }
1217                        }
1218                    }
1219                }
1220                ResolvedField::Struct(data) => {
1221                    // Issue #485: convert `Vec<Inner>` via Inner::to_dataframe,
1222                    // extract its named columns, and push under `<base>_` prefix.
1223                    let col_name = &data.col_name;
1224                    let base_name_str = &data.col_name_str;
1225                    let inner_ty = &data.inner_ty;
1226                    quote! {
1227                        {
1228                            let __inner_df = <#inner_ty>::to_dataframe(self.#col_name);
1229                            let __inner_cols = ::miniextendr_api::convert::IntoDataFrame::into_named_columns(__inner_df);
1230                            for (__inner_col_name, __inner_col_sexp) in __inner_cols {
1231                                // Protect the source column SEXP across subsequent allocations.
1232                                let __src = __scope.protect_raw(__inner_col_sexp);
1233                                __df_pairs.push((
1234                                    format!("{}_{}", #base_name_str, __inner_col_name),
1235                                    __src,
1236                                ));
1237                            }
1238                        }
1239                    }
1240                }
1241            })
1242            .collect();
1243
1244        quote! {
1245            impl #impl_generics ::miniextendr_api::convert::IntoDataFrame for #df_name #ty_generics #where_clause {
1246                fn into_data_frame(self) -> ::miniextendr_api::List {
1247                    let _n_rows = #length_ref;
1248                    #(#length_checks)*
1249                    // SAFETY: into_data_frame only runs on the R main thread.
1250                    // ProtectScope keeps each column SEXP rooted across the
1251                    // next column's allocations; from_raw_pairs writes them
1252                    // into the parent VECSXP before we drop the scope.
1253                    unsafe {
1254                        let __scope = ::miniextendr_api::gc_protect::ProtectScope::new();
1255                        let mut __df_pairs: Vec<(
1256                            String,
1257                            ::miniextendr_api::ffi::SEXP,
1258                        )> = Vec::new();
1259                        #tag_push_pair
1260                        #(#pair_pushes)*
1261                        ::miniextendr_api::list::List::from_raw_pairs(__df_pairs)
1262                            .set_class_str(&["data.frame"])
1263                            .set_row_names_int(_n_rows)
1264                    }
1265                }
1266            }
1267        }
1268    } else {
1269        quote! {
1270            impl #impl_generics ::miniextendr_api::convert::IntoDataFrame for #df_name #ty_generics #where_clause {
1271                fn into_data_frame(self) -> ::miniextendr_api::List {
1272                    let _n_rows = #length_ref;
1273                    #(#length_checks)*
1274                    // SAFETY: see auto-expand branch.
1275                    unsafe {
1276                        let __scope = ::miniextendr_api::gc_protect::ProtectScope::new();
1277                        ::miniextendr_api::list::List::from_raw_pairs(vec![
1278                            #tag_pair
1279                            #(#df_pairs),*
1280                        ])
1281                        .set_class_str(&["data.frame"])
1282                        .set_row_names_int(_n_rows)
1283                    }
1284                }
1285            }
1286        }
1287    };
1288    // endregion
1289
1290    // region: From<Vec<RowType>>
1291    let mut col_vec_inits: Vec<TokenStream> = flat_cols
1292        .iter()
1293        .map(|fc| {
1294            let name = &fc.df_field;
1295            let ty = &fc.vec_elem_ty;
1296            quote! { let mut #name: Vec<#ty> = Vec::with_capacity(len); }
1297        })
1298        .collect();
1299    for ac in &auto_expand_cols {
1300        let name = &ac.df_field;
1301        let cty = &ac.container_ty;
1302        col_vec_inits.push(quote! { let mut #name: Vec<#cty> = Vec::with_capacity(len); });
1303    }
1304    for sc in &struct_cols {
1305        let name = &sc.df_field;
1306        let ity = &sc.inner_ty;
1307        col_vec_inits.push(quote! { let mut #name: Vec<#ity> = Vec::with_capacity(len); });
1308    }
1309
1310    let tag_init = if has_tag {
1311        quote! { let mut _tag: Vec<String> = Vec::with_capacity(len); }
1312    } else {
1313        TokenStream::new()
1314    };
1315
1316    let tag_push = if has_tag {
1317        quote! { _tag.push(#row_name_str.to_string()); }
1318    } else {
1319        TokenStream::new()
1320    };
1321
1322    // Generate push statements for each resolved field
1323    let col_pushes: Vec<TokenStream> = resolved
1324        .iter()
1325        .map(|rf| match rf {
1326            ResolvedField::Single(data) => {
1327                let access = if let Some(idx) = &data.tuple_index {
1328                    quote! { row.#idx }
1329                } else {
1330                    let rust_name = &data.rust_name;
1331                    quote! { row.#rust_name }
1332                };
1333                let col_name = &data.col_name;
1334                if data.needs_into_list {
1335                    quote! { #col_name.push(::miniextendr_api::list::IntoList::into_list(#access)); }
1336                } else {
1337                    quote! { #col_name.push(#access); }
1338                }
1339            }
1340            ResolvedField::ExpandedFixed(data) => {
1341                let access = if let Some(idx) = &data.tuple_index {
1342                    quote! { row.#idx }
1343                } else {
1344                    let rust_name = &data.rust_name;
1345                    quote! { row.#rust_name }
1346                };
1347                let bind = format_ident!("__arr_{}", data.rust_name);
1348                let pushes: Vec<TokenStream> = (0..data.len)
1349                    .map(|i| {
1350                        let col_ident = format_ident!("{}_{}", data.base_name, i + 1);
1351                        let idx = syn::Index::from(i);
1352                        quote! { #col_ident.push(#bind[#idx]); }
1353                    })
1354                    .collect();
1355                quote! {
1356                    let #bind = #access;
1357                    #(#pushes)*
1358                }
1359            }
1360            ResolvedField::ExpandedVec(data) => {
1361                let access = if let Some(idx) = &data.tuple_index {
1362                    quote! { row.#idx }
1363                } else {
1364                    let rust_name = &data.rust_name;
1365                    quote! { row.#rust_name }
1366                };
1367                let bind = format_ident!("__vec_{}", data.rust_name);
1368                let pushes: Vec<TokenStream> = (0..data.width)
1369                    .map(|i| {
1370                        let col_ident = format_ident!("{}_{}", data.base_name, i + 1);
1371                        quote! { #col_ident.push(#bind.get(#i).cloned()); }
1372                    })
1373                    .collect();
1374                quote! {
1375                    let #bind = #access;
1376                    #(#pushes)*
1377                }
1378            }
1379            ResolvedField::AutoExpandVec(data) => {
1380                let access = if let Some(idx) = &data.tuple_index {
1381                    quote! { row.#idx }
1382                } else {
1383                    let rust_name = &data.rust_name;
1384                    quote! { row.#rust_name }
1385                };
1386                let col_name = &data.col_name;
1387                quote! { #col_name.push(#access); }
1388            }
1389            ResolvedField::Struct(data) => {
1390                let access = if let Some(idx) = &data.tuple_index {
1391                    quote! { row.#idx }
1392                } else {
1393                    let rust_name = &data.rust_name;
1394                    quote! { row.#rust_name }
1395                };
1396                let col_name = &data.col_name;
1397                quote! { #col_name.push(#access); }
1398            }
1399        })
1400        .collect();
1401
1402    let tag_struct_field = if has_tag {
1403        quote! { _tag, }
1404    } else {
1405        TokenStream::new()
1406    };
1407
1408    let len_struct_field = if flat_cols.is_empty()
1409        && auto_expand_cols.is_empty()
1410        && struct_cols.is_empty()
1411        && !has_tag
1412    {
1413        quote! { _len: len, }
1414    } else {
1415        TokenStream::new()
1416    };
1417
1418    let mut col_struct_fields: Vec<TokenStream> = flat_cols
1419        .iter()
1420        .map(|fc| {
1421            let name = &fc.df_field;
1422            quote! { #name }
1423        })
1424        .collect();
1425    for ac in &auto_expand_cols {
1426        let name = &ac.df_field;
1427        col_struct_fields.push(quote! { #name });
1428    }
1429    for sc in &struct_cols {
1430        let name = &sc.df_field;
1431        col_struct_fields.push(quote! { #name });
1432    }
1433
1434    // For skipped fields in destructure: bind to `_`
1435    let skip_bindings: Vec<TokenStream> = skipped_fields
1436        .iter()
1437        .map(|name| quote! { let _ = row.#name; })
1438        .collect();
1439
1440    let from_vec_impl = quote! {
1441        impl #impl_generics From<Vec<#row_name #ty_generics>> for #df_name #ty_generics #where_clause {
1442            fn from(rows: Vec<#row_name #ty_generics>) -> Self {
1443                let len = rows.len();
1444                #tag_init
1445                #(#col_vec_inits)*
1446                for row in rows {
1447                    #tag_push
1448                    #(#skip_bindings)*
1449                    #(#col_pushes)*
1450                }
1451                #df_name {
1452                    #tag_struct_field
1453                    #len_struct_field
1454                    #(#col_struct_fields),*
1455                }
1456            }
1457        }
1458    };
1459    // endregion
1460
1461    // region: Generate from_rows_par (parallel scatter-write via ColumnWriter)
1462    //
1463    // Two field kinds require special handling instead of parallel scatter-write:
1464    //   - struct (DataFrameRow-flattened) fields (#485): companion stores
1465    //     `Vec<Inner>` where `Inner` doesn't implement `Default`. These are
1466    //     collected sequentially in a pre-pass (`for __prerow in &rows { ... }`)
1467    //     before `into_par_iter()` consumes the vector. Requires `Inner: Clone`.
1468    //   - `as_list`-on-struct fields (#485 opt-out) store `Vec<List>` in the
1469    //     companion, and `List` doesn't implement `Default`. Same pre-pass approach.
1470    // Both are handled via sequential pre-pass + skip in the parallel loop.
1471    // The pre-pass is O(n) extra per struct/list-struct field but does not change
1472    // asymptotic complexity — just adds a constant factor for these column types.
1473    let from_rows_par_method = if !flat_cols.is_empty()
1474        || !auto_expand_cols.is_empty()
1475        || has_tag
1476        || has_struct
1477        || has_into_list_struct
1478    {
1479        // Column declarations:
1480        //   - scalar / expand cols: vec![default; len]  (scatter-write in parallel)
1481        //   - struct / as_list-struct cols: Vec::with_capacity(len) filled in pre-pass
1482        let mut par_col_decls = Vec::new();
1483        if has_tag {
1484            par_col_decls.push(quote! {
1485                let mut _tag: Vec<String> = vec![String::new(); len];
1486            });
1487        }
1488        // Sequential pre-pass: struct fields (Inner: Clone required).
1489        // Iterate resolved to pick up tuple_index for tuple-struct outers.
1490        for rf in &resolved {
1491            if let ResolvedField::Struct(data) = rf {
1492                let col_name = &data.col_name;
1493                let ity = &data.inner_ty;
1494                let access = if let Some(idx) = &data.tuple_index {
1495                    quote! { __prerow.#idx }
1496                } else {
1497                    let rust_name = &data.rust_name;
1498                    quote! { __prerow.#rust_name }
1499                };
1500                par_col_decls.push(quote! {
1501                    let mut #col_name: Vec<#ity> = Vec::with_capacity(len);
1502                    for __prerow in &rows {
1503                        #col_name.push(::core::clone::Clone::clone(&#access));
1504                    }
1505                });
1506            }
1507        }
1508        // Sequential pre-pass: as_list-on-struct fields (List: !Default).
1509        for rf in &resolved {
1510            if let ResolvedField::Single(data) = rf
1511                && data.needs_into_list
1512            {
1513                let col_name = &data.col_name;
1514                let rust_name = &data.rust_name;
1515                let access = if let Some(idx) = &data.tuple_index {
1516                    quote! { __prerow.#idx }
1517                } else {
1518                    quote! { __prerow.#rust_name }
1519                };
1520                par_col_decls.push(quote! {
1521                    let mut #col_name: Vec<::miniextendr_api::list::List> = Vec::with_capacity(len);
1522                    for __prerow in &rows {
1523                        #col_name.push(::miniextendr_api::list::IntoList::into_list(
1524                            ::core::clone::Clone::clone(&#access)
1525                        ));
1526                    }
1527                });
1528            }
1529        }
1530        // Parallel scalar/expand columns.
1531        for fc in &flat_cols {
1532            if fc.needs_into_list {
1533                // Handled in the sequential pre-pass above.
1534                continue;
1535            }
1536            let name = &fc.df_field;
1537            let ty = &fc.vec_elem_ty;
1538            par_col_decls.push(quote! {
1539                let mut #name: Vec<#ty> = vec![<#ty as ::core::default::Default>::default(); len];
1540            });
1541        }
1542        for ac in &auto_expand_cols {
1543            let name = &ac.df_field;
1544            let cty = &ac.container_ty;
1545            par_col_decls.push(quote! {
1546                let mut #name: Vec<#cty> = vec![<#cty as ::core::default::Default>::default(); len];
1547            });
1548        }
1549
1550        // Writer declarations (only for scatter-write cols — struct/as_list pre-pass
1551        // cols are already populated and need no ColumnWriter).
1552        let mut writer_decls = Vec::new();
1553        if has_tag {
1554            writer_decls.push(quote! {
1555                let __w_tag = unsafe {
1556                    ::miniextendr_api::rayon_bridge::ColumnWriter::new(&mut _tag)
1557                };
1558            });
1559        }
1560        for fc in &flat_cols {
1561            if fc.needs_into_list {
1562                continue;
1563            }
1564            let name = &fc.df_field;
1565            let w_name = format_ident!("__w_{}", name);
1566            writer_decls.push(quote! {
1567                let #w_name = unsafe {
1568                    ::miniextendr_api::rayon_bridge::ColumnWriter::new(&mut #name)
1569                };
1570            });
1571        }
1572        for ac in &auto_expand_cols {
1573            let name = &ac.df_field;
1574            let w_name = format_ident!("__w_{}", name);
1575            writer_decls.push(quote! {
1576                let #w_name = unsafe {
1577                    ::miniextendr_api::rayon_bridge::ColumnWriter::new(&mut #name)
1578                };
1579            });
1580        }
1581
1582        // Write calls per resolved field (parallel scatter-write only).
1583        let tag_write = if has_tag {
1584            quote! { __w_tag.write(__i, #row_name_str.to_string()); }
1585        } else {
1586            TokenStream::new()
1587        };
1588
1589        let par_write_calls: Vec<TokenStream> = resolved
1590            .iter()
1591            .map(|rf| match rf {
1592                ResolvedField::Single(data) => {
1593                    if data.needs_into_list {
1594                        // Handled in the sequential pre-pass; skip in par loop.
1595                        return TokenStream::new();
1596                    }
1597                    let access = if let Some(idx) = &data.tuple_index {
1598                        quote! { __row.#idx }
1599                    } else {
1600                        let rust_name = &data.rust_name;
1601                        quote! { __row.#rust_name }
1602                    };
1603                    let w_name = format_ident!("__w_{}", data.col_name);
1604                    quote! { #w_name.write(__i, #access); }
1605                }
1606                ResolvedField::ExpandedFixed(data) => {
1607                    let access = if let Some(idx) = &data.tuple_index {
1608                        quote! { __row.#idx }
1609                    } else {
1610                        let rust_name = &data.rust_name;
1611                        quote! { __row.#rust_name }
1612                    };
1613                    let bind = format_ident!("__arr_{}", data.rust_name);
1614                    let writes: Vec<TokenStream> = (0..data.len)
1615                        .map(|i| {
1616                            let w_name = format_ident!("__w_{}_{}", data.base_name, i + 1);
1617                            let idx = syn::Index::from(i);
1618                            quote! { #w_name.write(__i, #bind[#idx]); }
1619                        })
1620                        .collect();
1621                    quote! {
1622                        let #bind = #access;
1623                        #(#writes)*
1624                    }
1625                }
1626                ResolvedField::ExpandedVec(data) => {
1627                    let access = if let Some(idx) = &data.tuple_index {
1628                        quote! { __row.#idx }
1629                    } else {
1630                        let rust_name = &data.rust_name;
1631                        quote! { __row.#rust_name }
1632                    };
1633                    let bind = format_ident!("__vec_{}", data.rust_name);
1634                    let writes: Vec<TokenStream> = (0..data.width)
1635                        .map(|i| {
1636                            let w_name = format_ident!("__w_{}_{}", data.base_name, i + 1);
1637                            quote! { #w_name.write(__i, #bind.get(#i).cloned()); }
1638                        })
1639                        .collect();
1640                    quote! {
1641                        let #bind = #access;
1642                        #(#writes)*
1643                    }
1644                }
1645                ResolvedField::AutoExpandVec(data) => {
1646                    let access = if let Some(idx) = &data.tuple_index {
1647                        quote! { __row.#idx }
1648                    } else {
1649                        let rust_name = &data.rust_name;
1650                        quote! { __row.#rust_name }
1651                    };
1652                    let w_name = format_ident!("__w_{}", data.col_name);
1653                    quote! { #w_name.write(__i, #access); }
1654                }
1655                // Struct fields (#485) are collected in the sequential pre-pass
1656                // above; nothing to write in the parallel loop.
1657                ResolvedField::Struct(_) => TokenStream::new(),
1658            })
1659            .collect();
1660
1661        let par_skip_bindings: Vec<TokenStream> = skipped_fields
1662            .iter()
1663            .map(|name| quote! { let _ = __row.#name; })
1664            .collect();
1665
1666        // Return struct fields
1667        let par_tag_field = if has_tag {
1668            quote! { _tag, }
1669        } else {
1670            TokenStream::new()
1671        };
1672        // Emit `_len: len` only when the companion struct has a `_len` field —
1673        // that is, when there are truly no column vecs at all (no scalars, no
1674        // as_list-on-struct fields, no struct-flattened fields, no tag).
1675        // `as_list`-on-struct fields live in `flat_cols` with `needs_into_list=true`;
1676        // they provide their own length reference and do NOT require `_len`.
1677        // The `flat_cols.iter().all(…)` guard is redundant with `flat_cols.is_empty()`
1678        // but makes the intent explicit: _len is emitted only when every dimension
1679        // that tracks length is absent.
1680        let par_len_field = if flat_cols.is_empty()
1681            && flat_cols.iter().all(|fc| !fc.needs_into_list)
1682            && auto_expand_cols.is_empty()
1683            && !has_tag
1684            && struct_cols.is_empty()
1685        {
1686            quote! { _len: len, }
1687        } else {
1688            TokenStream::new()
1689        };
1690        let mut par_struct_fields: Vec<TokenStream> = flat_cols
1691            .iter()
1692            .map(|fc| {
1693                let name = &fc.df_field;
1694                quote! { #name }
1695            })
1696            .collect();
1697        for ac in &auto_expand_cols {
1698            let name = &ac.df_field;
1699            par_struct_fields.push(quote! { #name });
1700        }
1701        for sc in &struct_cols {
1702            let name = &sc.df_field;
1703            par_struct_fields.push(quote! { #name });
1704        }
1705
1706        // Only emit an into_par_iter call when there are scalar/expand/tag cols
1707        // to scatter-write; struct/as_list-only structs skip the parallel loop.
1708        let has_par_cols = !flat_cols.iter().all(|fc| fc.needs_into_list)
1709            || !auto_expand_cols.is_empty()
1710            || has_tag;
1711        let par_loop = if has_par_cols {
1712            quote! {
1713                {
1714                    #(#writer_decls)*
1715                    rows.into_par_iter().enumerate().for_each(|(__i, __row)| unsafe {
1716                        #tag_write
1717                        #(#par_write_calls)*
1718                        #(#par_skip_bindings)*
1719                    });
1720                }
1721            }
1722        } else {
1723            // All columns were collected in the pre-pass; rows already consumed.
1724            quote! { let _rows = rows; }
1725        };
1726
1727        // Build `where Inner: Clone` bounds for all struct-flattened fields.
1728        // Emitting these on the method (rather than in a `const _` assertion block)
1729        // points the compiler error at the `from_rows_par` call site, not at the
1730        // expanded macro internals — cleaner diagnostic for downstream users.
1731        let par_inner_clone_bounds: Vec<TokenStream> = struct_cols
1732            .iter()
1733            .map(|sc| {
1734                let inner_ty = &sc.inner_ty;
1735                quote! { #inner_ty: ::core::clone::Clone, }
1736            })
1737            .collect();
1738        let par_where_clause = if par_inner_clone_bounds.is_empty() {
1739            TokenStream::new()
1740        } else {
1741            quote! { where #(#par_inner_clone_bounds)* }
1742        };
1743
1744        quote! {
1745            /// Parallel row→column transposition using rayon scatter-write.
1746            ///
1747            /// Scalar/expand columns are scatter-written in parallel via rayon.
1748            /// Struct-flattened and `as_list`-on-struct fields are collected
1749            /// sequentially in a pre-pass before the parallel loop (these field
1750            /// types don't implement `Default`, so scatter-write is not possible).
1751            /// Inner struct types must implement `Clone` (enforced by the where
1752            /// clause; the error will point at the `from_rows_par` call site).
1753            ///
1754            /// Always uses rayon — no threshold check. Use `from_rows` for the
1755            /// sequential path.
1756            #[cfg(feature = "rayon")]
1757            #[allow(clippy::uninit_vec)]
1758            pub fn from_rows_par(rows: Vec<#row_name #ty_generics>) -> Self
1759            #par_where_clause
1760            {
1761                use ::miniextendr_api::rayon_bridge::rayon::prelude::*;
1762                let len = rows.len();
1763                #(#par_col_decls)*
1764                #par_loop
1765                #df_name { #par_tag_field #par_len_field #(#par_struct_fields),* }
1766            }
1767        }
1768    } else {
1769        TokenStream::new()
1770    };
1771
1772    // ── IntoIterator (only for named non-empty structs without expansion) ─
1773    let can_iterate = !flat_cols.is_empty()
1774        && !is_tuple_struct
1775        && !is_unit_struct
1776        && !has_expansion
1777        && !has_into_list_struct;
1778    let into_iterator_impl = if can_iterate {
1779        let iterator_name = format_ident!("{}Iterator", df_name);
1780
1781        let iter_field_decls: Vec<_> = flat_cols
1782            .iter()
1783            .map(|fc| {
1784                let name = &fc.df_field;
1785                let ty = &fc.vec_elem_ty;
1786                quote! { #name: std::vec::IntoIter<#ty> }
1787            })
1788            .collect();
1789
1790        let destruct_pattern: Vec<_> = flat_cols
1791            .iter()
1792            .map(|fc| {
1793                let name = &fc.df_field;
1794                quote! { #name }
1795            })
1796            .collect();
1797
1798        let mut iter_init_tokens = TokenStream::new();
1799        for (i, fc) in flat_cols.iter().enumerate() {
1800            let name = &fc.df_field;
1801            let ty = &fc.vec_elem_ty;
1802            if i > 0 {
1803                iter_init_tokens.extend(quote! { , });
1804            }
1805            iter_init_tokens.extend(quote! { #name: <Vec<#ty>>::into_iter(#name) });
1806        }
1807
1808        // For next(): reconstruct original field names (col_name == rust_name for Single)
1809        let mut next_struct_tokens = TokenStream::new();
1810        for (i, rf) in resolved.iter().enumerate() {
1811            if let ResolvedField::Single(data) = rf {
1812                if i > 0 {
1813                    next_struct_tokens.extend(quote! { , });
1814                }
1815                let rust_name = &data.rust_name;
1816                let col_name = &data.col_name;
1817                next_struct_tokens.extend(quote! { #rust_name: self.#col_name.next()? });
1818            }
1819        }
1820
1821        let ignore_tag = if has_tag {
1822            quote! { _tag: _, }
1823        } else {
1824            TokenStream::new()
1825        };
1826
1827        // Skipped fields are reconstructed via `Default::default()` each time
1828        // `next()` yields a row. This is why any field type annotated with
1829        // `#[dataframe(skip)]` must implement `Default`.
1830        let skip_defaults: Vec<TokenStream> = skipped_fields
1831            .iter()
1832            .map(|name| quote! { , #name: Default::default() })
1833            .collect();
1834
1835        quote! {
1836            pub struct #iterator_name #impl_generics #where_clause {
1837                #(#iter_field_decls),*
1838            }
1839
1840            impl #impl_generics IntoIterator for #df_name #ty_generics #where_clause {
1841                type Item = #row_name #ty_generics;
1842                type IntoIter = #iterator_name #ty_generics;
1843
1844                fn into_iter(self) -> Self::IntoIter {
1845                    let #df_name { #ignore_tag #(#destruct_pattern),* } = self;
1846                    #iterator_name {
1847                        #iter_init_tokens
1848                    }
1849                }
1850            }
1851
1852            impl #impl_generics Iterator for #iterator_name #ty_generics #where_clause {
1853                type Item = #row_name #ty_generics;
1854
1855                fn next(&mut self) -> Option<Self::Item> {
1856                    Some(#row_name {
1857                        #next_struct_tokens
1858                        #(#skip_defaults)*
1859                    })
1860                }
1861            }
1862        }
1863    } else {
1864        TokenStream::new()
1865    };
1866    // endregion
1867
1868    // region: Associated methods
1869    let from_dataframe_method = if can_iterate {
1870        quote! {
1871            /// Convert a DataFrame back into a vector of rows.
1872            ///
1873            /// This transposes column-oriented data back into row-oriented format.
1874            pub fn from_dataframe(df: #df_name #ty_generics) -> Vec<Self> {
1875                df.into_iter().collect()
1876            }
1877        }
1878    } else {
1879        TokenStream::new()
1880    };
1881    // endregion
1882
1883    // region: DataFrame type methods (from_rows, from_rows_par)
1884    let df_methods = quote! {
1885        impl #impl_generics #df_name #ty_generics #where_clause {
1886            /// Sequential row→column transposition.
1887            pub fn from_rows(rows: Vec<#row_name #ty_generics>) -> Self {
1888                rows.into()
1889            }
1890
1891            #from_rows_par_method
1892        }
1893    };
1894
1895    let row_methods = quote! {
1896        impl #impl_generics #row_name #ty_generics #where_clause {
1897            /// Name of the generated DataFrame companion type.
1898            pub const DATAFRAME_TYPE_NAME: &'static str = stringify!(#df_name);
1899
1900            /// Convert a vector of rows into the companion DataFrame type.
1901            ///
1902            /// This transposes row-oriented data into column-oriented format.
1903            pub fn to_dataframe(rows: Vec<Self>) -> #df_name #ty_generics {
1904                rows.into()
1905            }
1906
1907            #from_dataframe_method
1908        }
1909    };
1910
1911    // Compile-time assertion: row type must implement IntoList
1912    // Skip for unit/empty structs, tuple structs, structs with expansion,
1913    // and structs that store `List`-converted struct fields (#485 as_list).
1914    let trait_check = if !flat_cols.is_empty()
1915        && !is_tuple_struct
1916        && !is_unit_struct
1917        && !has_expansion
1918        && !has_into_list_struct
1919    {
1920        quote! {
1921            const _: () = {
1922                fn _assert_into_list #impl_generics () #where_clause {
1923                    fn _check<T: ::miniextendr_api::list::IntoList>() {}
1924                    _check::<#row_name #ty_generics>();
1925                }
1926            };
1927        }
1928    } else {
1929        TokenStream::new()
1930    };
1931
1932    // Marker trait impl: struct type implements DataFrameRow via IntoDataFrame chain.
1933    let marker_impl = quote! {
1934        impl #impl_generics ::miniextendr_api::markers::DataFrameRow
1935            for #row_name #ty_generics #where_clause {}
1936    };
1937
1938    // DataFramePayloadFields impl: exposes FIELDS (all resolved column names) and TAG
1939    // (the #[dataframe(tag = "...")] value, or "") for compile-time collision detection
1940    // by outer DataFrameRow enums that nest this type as a struct-flattened field.
1941    let payload_fields_impl = {
1942        // Collect all column names: flat_cols + struct_col base names.
1943        let mut field_names: Vec<String> =
1944            flat_cols.iter().map(|fc| fc.col_name_str.clone()).collect();
1945        for sc in &struct_cols {
1946            field_names.push(sc.col_name_str.clone());
1947        }
1948        let tag_str = attrs.tag.as_deref().unwrap_or("");
1949        quote! {
1950            impl #impl_generics ::miniextendr_api::markers::DataFramePayloadFields
1951                for #row_name #ty_generics #where_clause
1952            {
1953                const FIELDS: &'static [&'static str] = &[#(#field_names),*];
1954                const TAG: &'static str = #tag_str;
1955            }
1956        }
1957    };
1958
1959    // Compile-time assertions for struct-flattened fields (#485): each inner
1960    // type must implement `DataFrameRow`, otherwise users get a confusing
1961    // error pointing at the `to_dataframe` call site instead of the field.
1962    // Note: `Clone` is no longer asserted here — it is enforced via a where
1963    // clause on `from_rows_par` itself, giving a clearer error at the call site.
1964    let struct_assertions: Vec<TokenStream> = struct_cols
1965        .iter()
1966        .map(|sc| {
1967            let inner_ty = &sc.inner_ty;
1968            quote! {
1969                const _: () = {
1970                    fn _assert_inner_is_dataframe_row<T: ::miniextendr_api::markers::DataFrameRow>() {}
1971                    fn _do_assert #impl_generics () #where_clause {
1972                        _assert_inner_is_dataframe_row::<#inner_ty>();
1973                    }
1974                };
1975            }
1976        })
1977        .collect();
1978
1979    Ok(quote! {
1980        #dataframe_struct
1981        #into_dataframe_impl
1982        #from_vec_impl
1983        #df_methods
1984        #into_iterator_impl
1985        #row_methods
1986        #trait_check
1987        #marker_impl
1988        #payload_fields_impl
1989        #(#struct_assertions)*
1990    })
1991    // endregion
1992}
1993// endregion
1994
1995// region: Enum align path
1996
1997/// A resolved column in the unified schema across all enum variants.
1998///
1999/// Tracks the column name, element type, which variants contribute to this column,
2000/// and whether the type was coerced to `String` due to cross-variant type conflicts
2001/// (when `#[dataframe(conflicts = "string")]` is active).
2002pub(super) struct ResolvedColumn {
2003    /// Column name in the companion struct / data frame.
2004    pub(super) col_name: syn::Ident,
2005    /// Element type (used as `Vec<Option<#ty>>`).
2006    /// When `string_coerced` is true, this is always `String`.
2007    pub(super) ty: syn::Type,
2008    /// Indices of variants that contain this field.
2009    pub(super) present_in: Vec<usize>,
2010    /// Whether this column was coerced to `String` due to type conflicts.
2011    /// When true, values are converted via `ToString::to_string()` at push time.
2012    pub(super) string_coerced: bool,
2013    /// Whether this column should be emitted as an R factor (via `as_factor` attribute).
2014    /// When `true`, `into_data_frame` wraps the `Vec<Option<T>>` in `FactorOptionVec<T>`
2015    /// before calling `IntoR::into_sexp`, using the `UnitEnumFactor` blanket impl.
2016    pub(super) is_factor: bool,
2017}
2018
2019/// Accumulates unique columns for an enum-to-dataframe unified schema.
2020///
2021/// As columns are registered from each variant's fields, the registry detects
2022/// duplicates and validates type consistency. When `coerce_to_string` is enabled,
2023/// type conflicts are resolved by coercing to `String`; otherwise they produce errors.
2024pub(super) struct ColumnRegistry<'a> {
2025    /// The ordered list of resolved columns in the schema.
2026    pub(super) columns: Vec<ResolvedColumn>,
2027    /// Maps column name strings to their index in `columns` for O(1) dedup lookup.
2028    pub(super) col_index: std::collections::HashMap<String, usize>,
2029    /// Whether to coerce type-conflicting columns to `String` instead of erroring.
2030    pub(super) coerce_to_string: bool,
2031    /// Cached `String` type AST node, used as the coercion target type.
2032    pub(super) string_ty: &'a syn::Type,
2033}
2034
2035impl<'a> ColumnRegistry<'a> {
2036    /// Create a new empty column registry.
2037    fn new(coerce_to_string: bool, string_ty: &'a syn::Type) -> Self {
2038        Self {
2039            columns: Vec::new(),
2040            col_index: std::collections::HashMap::new(),
2041            coerce_to_string,
2042            string_ty,
2043        }
2044    }
2045
2046    /// Register a single column in the schema, or merge with an existing column.
2047    ///
2048    /// If a column with the same name already exists, validates that the types match.
2049    /// On type conflict: coerces to `String` (if `coerce_to_string` is true) or
2050    /// returns `Err`. The `variant_idx` is appended to the column's `present_in` list.
2051    fn register(
2052        &mut self,
2053        col_name: &str,
2054        col_ty: &syn::Type,
2055        variant_idx: usize,
2056        variant_name: &syn::Ident,
2057        error_span: Span,
2058    ) -> syn::Result<()> {
2059        if let Some(&idx) = self.col_index.get(col_name) {
2060            let existing = &self.columns[idx];
2061            if !existing.string_coerced && existing.ty != *col_ty {
2062                if self.coerce_to_string {
2063                    self.columns[idx].ty = self.string_ty.clone();
2064                    self.columns[idx].string_coerced = true;
2065                } else {
2066                    return Err(syn::Error::new(
2067                        error_span,
2068                        format!(
2069                            "type conflict for field `{}`: variant `{}` has a different type \
2070                             than a previous variant; \
2071                             use `#[dataframe(conflicts = \"string\")]` to coerce all conflicting fields to String",
2072                            col_name, variant_name
2073                        ),
2074                    ));
2075                }
2076            }
2077            self.columns[idx].present_in.push(variant_idx);
2078        } else {
2079            let idx = self.columns.len();
2080            self.columns.push(ResolvedColumn {
2081                col_name: format_ident!("{}", col_name),
2082                ty: col_ty.clone(),
2083                present_in: vec![variant_idx],
2084                string_coerced: false,
2085                is_factor: false,
2086            });
2087            self.col_index.insert(col_name.to_string(), idx);
2088        }
2089        Ok(())
2090    }
2091
2092    /// Like `register`, but marks the column as a factor column (`is_factor = true`).
2093    ///
2094    /// Used for fields annotated with `#[dataframe(as_factor)]`. The companion struct
2095    /// field type stays `Vec<Option<T>>`, but `into_data_frame` wraps it in
2096    /// `FactorOptionVec<T>` (using the `UnitEnumFactor` blanket `IntoR` impl).
2097    pub(super) fn register_factor(
2098        &mut self,
2099        col_name: &str,
2100        col_ty: &syn::Type,
2101        variant_idx: usize,
2102        variant_name: &syn::Ident,
2103        error_span: Span,
2104    ) -> syn::Result<()> {
2105        self.register(col_name, col_ty, variant_idx, variant_name, error_span)?;
2106        if let Some(&idx) = self.col_index.get(col_name) {
2107            self.columns[idx].is_factor = true;
2108        }
2109        Ok(())
2110    }
2111}
2112
2113/// Describes the shape of an enum variant's fields.
2114#[derive(Clone, Copy, PartialEq, Eq)]
2115pub(super) enum VariantShape {
2116    /// `Variant { field: Type, ... }`
2117    Named,
2118    /// `Variant(Type, ...)`
2119    Tuple,
2120    /// `Variant` (no fields)
2121    Unit,
2122}
2123
2124/// A resolved enum field ready for codegen -- either a single column or expanded
2125/// from an array/Vec into multiple suffixed columns.
2126///
2127/// This is the enum-path counterpart of [`ResolvedField`] (used for structs).
2128/// Each variant carries both the binding name (for destructure patterns) and the
2129/// original Rust field name (for error reporting and named-variant patterns).
2130pub(super) enum EnumResolvedField {
2131    /// Single column contribution.
2132    Single(Box<EnumSingleFieldData>),
2133    /// Expanded from [T; N].
2134    ExpandedFixed(Box<EnumExpandedFixedData>),
2135    /// Expanded from Vec<T> with pinned width.
2136    ExpandedVec(Box<EnumExpandedVecData>),
2137    /// Auto-expanded Vec<T>/Box<[T]>: column count determined at runtime.
2138    AutoExpandVec(Box<EnumAutoExpandVecData>),
2139    /// `HashMap<K,V>` or `BTreeMap<K,V>` → two parallel list-columns: `<field>_keys`, `<field>_values`.
2140    Map(Box<EnumMapFieldData>),
2141    /// Struct field whose inner type implements `DataFrameRow` → flattened `<base>_<inner_col>` columns.
2142    Struct(Box<EnumStructFieldData>),
2143}
2144
2145impl EnumResolvedField {
2146    /// Binding name used in destructure patterns.
2147    pub(super) fn binding(&self) -> &syn::Ident {
2148        match self {
2149            Self::Single(data) => &data.binding,
2150            Self::ExpandedFixed(data) => &data.binding,
2151            Self::ExpandedVec(data) => &data.binding,
2152            Self::AutoExpandVec(data) => &data.binding,
2153            Self::Map(data) => &data.binding,
2154            Self::Struct(data) => &data.binding,
2155        }
2156    }
2157
2158    /// Original Rust field name.
2159    pub(super) fn rust_name(&self) -> &syn::Ident {
2160        match self {
2161            Self::Single(data) => &data.rust_name,
2162            Self::ExpandedFixed(data) => &data.rust_name,
2163            Self::ExpandedVec(data) => &data.rust_name,
2164            Self::AutoExpandVec(data) => &data.rust_name,
2165            Self::Map(data) => &data.rust_name,
2166            Self::Struct(data) => &data.rust_name,
2167        }
2168    }
2169}
2170
2171/// Data for [`EnumResolvedField::Single`].
2172pub(super) struct EnumSingleFieldData {
2173    /// Column name in the schema.
2174    pub(super) col_name: syn::Ident,
2175    /// Binding name used in destructure pattern.
2176    pub(super) binding: syn::Ident,
2177    /// Original Rust field name (for named variants).
2178    pub(super) rust_name: syn::Ident,
2179    /// Column type stored in the companion Vec.
2180    ///
2181    /// For most fields this is the raw Rust type. When `needs_into_list` is
2182    /// `true` (struct-typed fields with `#[dataframe(as_list)]`), this is
2183    /// `::miniextendr_api::list::List` — the actual inner type is erased at
2184    /// the storage level and each row value is converted via `.into_list()`.
2185    pub(super) ty: syn::Type,
2186    /// Whether the field's value must be converted via `.into_list()` before
2187    /// being pushed into the companion `Vec<Option<List>>`.
2188    ///
2189    /// Set to `true` only for struct-typed fields (`FieldTypeKind::Struct`)
2190    /// that carry `#[dataframe(as_list)]`. The companion struct field type is
2191    /// `Vec<Option<::miniextendr_api::list::List>>` in this case.
2192    pub(super) needs_into_list: bool,
2193    /// Whether the field should be emitted as an R factor column.
2194    ///
2195    /// Set to `true` for fields annotated with `#[dataframe(as_factor)]`.
2196    /// The companion struct field type is `Vec<Option<T>>` (unchanged), but
2197    /// `into_data_frame` wraps it in `FactorOptionVec<T>` to use the
2198    /// `UnitEnumFactor`-based blanket `IntoR` impl.
2199    pub(super) is_factor: bool,
2200}
2201
2202/// Data for [`EnumResolvedField::ExpandedFixed`].
2203pub(super) struct EnumExpandedFixedData {
2204    /// Base column name.
2205    pub(super) base_name: String,
2206    /// Binding name.
2207    pub(super) binding: syn::Ident,
2208    /// Original Rust field name.
2209    pub(super) rust_name: syn::Ident,
2210    /// Element type.
2211    pub(super) elem_ty: syn::Type,
2212    /// Array length.
2213    pub(super) len: usize,
2214}
2215
2216/// Data for [`EnumResolvedField::ExpandedVec`].
2217pub(super) struct EnumExpandedVecData {
2218    /// Base column name.
2219    pub(super) base_name: String,
2220    /// Binding name.
2221    pub(super) binding: syn::Ident,
2222    /// Original Rust field name.
2223    pub(super) rust_name: syn::Ident,
2224    /// Element type.
2225    pub(super) elem_ty: syn::Type,
2226    /// Pinned width.
2227    pub(super) width: usize,
2228}
2229
2230/// Data for [`EnumResolvedField::AutoExpandVec`].
2231pub(super) struct EnumAutoExpandVecData {
2232    /// Base column name.
2233    pub(super) base_name: String,
2234    /// Binding name.
2235    pub(super) binding: syn::Ident,
2236    /// Original Rust field name.
2237    pub(super) rust_name: syn::Ident,
2238    /// Element type.
2239    pub(super) elem_ty: syn::Type,
2240    /// Container type for companion struct (Vec<T> or Box<[T]>).
2241    pub(super) container_ty: syn::Type,
2242}
2243
2244/// Data for [`EnumResolvedField::Map`].
2245///
2246/// A `HashMap<K,V>` or `BTreeMap<K,V>` field expands to two parallel list-columns:
2247/// `<base_name>_keys: Vec<Option<Vec<K>>>` and `<base_name>_values: Vec<Option<Vec<V>>>`.
2248/// Absent-variant rows get `None` in both columns. Key order follows the map's own
2249/// iteration order: `BTreeMap` yields sorted keys, `HashMap` yields non-deterministic order.
2250/// Both are produced via `into_iter().unzip()` which guarantees pairwise alignment.
2251pub(super) struct EnumMapFieldData {
2252    /// Base column name (field name or `rename` override).
2253    pub(super) base_name: String,
2254    /// Binding name used in destructure pattern.
2255    pub(super) binding: syn::Ident,
2256    /// Original Rust field name.
2257    pub(super) rust_name: syn::Ident,
2258    /// Key type K.
2259    pub(super) key_ty: syn::Type,
2260    /// Value type V.
2261    pub(super) val_ty: syn::Type,
2262}
2263
2264/// Data for [`EnumResolvedField::Struct`].
2265///
2266/// A field whose inner type implements `DataFrameRow` expands to `<base_name>_<inner_col>`
2267/// prefixed columns — one output column per column emitted by the inner type's companion
2268/// DataFrame. Absent-variant rows produce `None` in every prefixed column.
2269///
2270/// The companion struct holds `Vec<Option<Inner>>` (not `Vec<Inner>`). The `into_data_frame`
2271/// method collects present rows into a dense `Vec<Inner>` (tracking presence indices),
2272/// calls `Inner::to_dataframe(present_rows)`, extracts named column SEXPs, and scatters
2273/// them back to the full row count with `None`-fill for absent rows.
2274pub(super) struct EnumStructFieldData {
2275    /// Base name for column prefixing (field name or `rename` override).
2276    pub(super) base_name: String,
2277    /// Binding name used in destructure pattern.
2278    pub(super) binding: syn::Ident,
2279    /// Original Rust field name.
2280    pub(super) rust_name: syn::Ident,
2281    /// Inner struct type (used for the compile-time DataFrameRow assertion and codegen).
2282    pub(super) inner_ty: syn::Type,
2283}
2284
2285/// Parsed and resolved information about a single enum variant for DataFrame codegen.
2286///
2287/// Contains the variant's name, shape (named/tuple/unit), resolved fields (after
2288/// applying `#[dataframe(...)]` attributes and type classification), and any
2289/// skipped field names (needed for complete destructure patterns in named variants).
2290pub(super) struct VariantInfo {
2291    /// Variant name.
2292    pub(super) name: syn::Ident,
2293    /// Shape of this variant.
2294    pub(super) shape: VariantShape,
2295    /// Resolved fields (after applying field attrs + type classification).
2296    pub(super) fields: Vec<EnumResolvedField>,
2297    /// Original Rust field names (for named variants) — needed for skipped fields in destructure.
2298    pub(super) skipped_fields: Vec<syn::Ident>,
2299}
2300// endregion
2301
2302// region: Enum-specific expansion (in sub-module)
2303
2304mod enum_expansion;
2305use enum_expansion::derive_enum_dataframe;
2306// endregion