miniextendr_macros/dataframe_derive.rs
1//! Derive macros for bidirectional row ↔ dataframe conversions.
2//!
3//! Supports both structs (direct field mapping) and enums (field-name union
4//! across variants with `Option<T>` fill for missing fields).
5
6use proc_macro2::{Span, TokenStream};
7use quote::{format_ident, quote};
8use syn::{Data, DeriveInput, Fields};
9
10// region: Attribute parsing
11
12/// Parsed container-level `#[dataframe(...)]` attributes.
13pub(super) struct DataFrameAttrs {
14 /// Custom companion type name (default: `{TypeName}DataFrame`).
15 pub(super) name: Option<syn::Ident>,
16 /// Enum alignment mode — implicit for enums, accepted but not required.
17 pub(super) align: bool,
18 /// Tag column name for variant discriminator (also supported on structs).
19 pub(super) tag: Option<String>,
20 /// Conflict resolution mode for type collisions across enum variants.
21 /// Currently only "string" is supported: convert conflicting fields via `ToString`.
22 pub(super) conflicts: Option<String>,
23}
24
25/// Parse container-level `#[dataframe(...)]` attributes from the derive input.
26///
27/// Supported keys:
28/// - `name = "CustomName"` -- custom companion type name (default: `{TypeName}DataFrame`)
29/// - `align` -- enum alignment mode (field-name union across variants)
30/// - `tag = "col_name"` -- add a variant discriminator column (works on both structs and enums)
31/// - `conflicts = "string"` -- coerce type-conflicting columns to `String` via `ToString`
32///
33/// Returns `Err` for unknown keys or non-string-literal values.
34fn parse_dataframe_attrs(input: &DeriveInput) -> syn::Result<DataFrameAttrs> {
35 let mut attrs = DataFrameAttrs {
36 name: None,
37 align: false,
38 tag: None,
39 conflicts: None,
40 };
41
42 for attr in &input.attrs {
43 if !attr.path().is_ident("dataframe") {
44 continue;
45 }
46
47 let nested = attr.parse_args_with(
48 syn::punctuated::Punctuated::<syn::Meta, syn::Token![,]>::parse_terminated,
49 )?;
50
51 for meta in &nested {
52 match meta {
53 syn::Meta::NameValue(nv) if nv.path.is_ident("name") => {
54 if let syn::Expr::Lit(syn::ExprLit {
55 lit: syn::Lit::Str(lit_str),
56 ..
57 }) = &nv.value
58 {
59 attrs.name =
60 Some(format_ident!("{}", lit_str.value(), span = lit_str.span()));
61 } else {
62 return Err(syn::Error::new_spanned(
63 &nv.value,
64 "expected string literal for `name`",
65 ));
66 }
67 }
68 syn::Meta::NameValue(nv) if nv.path.is_ident("tag") => {
69 if let syn::Expr::Lit(syn::ExprLit {
70 lit: syn::Lit::Str(lit_str),
71 ..
72 }) = &nv.value
73 {
74 attrs.tag = Some(lit_str.value());
75 } else {
76 return Err(syn::Error::new_spanned(
77 &nv.value,
78 "expected string literal for `tag`",
79 ));
80 }
81 }
82 syn::Meta::NameValue(nv) if nv.path.is_ident("conflicts") => {
83 if let syn::Expr::Lit(syn::ExprLit {
84 lit: syn::Lit::Str(lit_str),
85 ..
86 }) = &nv.value
87 {
88 let value = lit_str.value();
89 if value != "string" {
90 return Err(syn::Error::new_spanned(
91 lit_str,
92 "unknown conflict resolution mode; only `\"string\"` is supported",
93 ));
94 }
95 attrs.conflicts = Some(value);
96 } else {
97 return Err(syn::Error::new_spanned(
98 &nv.value,
99 "expected string literal for `conflicts`",
100 ));
101 }
102 }
103 syn::Meta::Path(path) if path.is_ident("align") => {
104 attrs.align = true;
105 }
106 other => {
107 return Err(syn::Error::new_spanned(
108 other,
109 "unknown dataframe attribute; expected `name`, `align`, `tag`, or `conflicts`",
110 ));
111 }
112 }
113 }
114 }
115
116 Ok(attrs)
117}
118// endregion
119
120// region: Field-level attribute parsing
121
122/// Parsed field-level `#[dataframe(...)]` attributes.
123///
124/// These attributes control how individual struct/enum fields map to DataFrame columns.
125/// Mutually exclusive combinations (`as_list` + `expand`, `as_list` + `width`,
126/// `as_factor` + `as_list`, `as_factor` + `expand`, `as_factor` + `width`) are
127/// rejected during parsing.
128#[derive(Default)]
129pub(super) struct FieldAttrs {
130 /// `#[dataframe(skip)]` -- omit this field from the DataFrame entirely.
131 pub(super) skip: bool,
132 /// `#[dataframe(rename = "col")]` -- use a custom column name instead of the field name.
133 pub(super) rename: Option<String>,
134 /// `#[dataframe(as_list)]` -- keep a collection field as a single R list column
135 /// (suppresses automatic expansion into suffixed columns).
136 pub(super) as_list: bool,
137 /// `#[dataframe(as_factor)]` -- treat a unit-only inner enum field as an R factor column.
138 /// Only valid on bare-ident enum types (no generic parameters). The inner enum must be
139 /// unit-only (`#[derive(DataFrameRow)]` emits `IntoR` and `IntoR for Vec<Option<Self>>`).
140 pub(super) as_factor: bool,
141 /// `#[dataframe(expand)]` or `#[dataframe(unnest)]` -- explicitly expand a
142 /// collection field into multiple suffixed columns.
143 expand: bool,
144 /// `#[dataframe(width = N)]` -- pin the expansion width for `Vec<T>`, `Box<[T]>`,
145 /// or `&[T]` fields. Rows shorter than `N` get `None` for missing positions.
146 pub(super) width: Option<usize>,
147}
148
149/// Parse field-level `#[dataframe(...)]` attributes from a `syn::Field`.
150///
151/// Recognizes: `skip`, `rename`, `as_list`, `as_factor`, `expand` (alias `unnest`), and `width`.
152/// Validates mutual exclusivity of conflicting options (`as_list` vs `expand`/`width`,
153/// `as_factor` vs `as_list`/`expand`/`width`).
154/// Returns `Err` for unknown keys, invalid width values, or conflicting options.
155pub(super) fn parse_field_attrs(field: &syn::Field) -> syn::Result<FieldAttrs> {
156 let mut attrs = FieldAttrs::default();
157
158 for attr in &field.attrs {
159 if !attr.path().is_ident("dataframe") {
160 continue;
161 }
162
163 attr.parse_nested_meta(|meta| {
164 if meta.path.is_ident("skip") {
165 attrs.skip = true;
166 Ok(())
167 } else if meta.path.is_ident("rename") {
168 let value = meta.value()?;
169 let lit: syn::LitStr = value.parse()?;
170 attrs.rename = Some(lit.value());
171 Ok(())
172 } else if meta.path.is_ident("as_list") {
173 attrs.as_list = true;
174 Ok(())
175 } else if meta.path.is_ident("as_factor") {
176 attrs.as_factor = true;
177 Ok(())
178 } else if meta.path.is_ident("expand") || meta.path.is_ident("unnest") {
179 attrs.expand = true;
180 Ok(())
181 } else if meta.path.is_ident("width") {
182 let value = meta.value()?;
183 let lit: syn::LitInt = value.parse()?;
184 let n: usize = lit.base10_parse()?;
185 if n == 0 {
186 return Err(syn::Error::new(lit.span(), "`width` must be >= 1"));
187 }
188 attrs.width = Some(n);
189 Ok(())
190 } else {
191 Err(meta.error(
192 "unknown field attribute; expected `skip`, `rename`, `as_list`, `as_factor`, `expand`, `unnest`, or `width`",
193 ))
194 }
195 })?;
196 }
197
198 let span = field.ident.as_ref().map_or(Span::call_site(), |i| i.span());
199
200 // Validation: conflicting options
201 if attrs.as_list && attrs.expand {
202 return Err(syn::Error::new(
203 span,
204 "`as_list` and `expand`/`unnest` are mutually exclusive",
205 ));
206 }
207 if attrs.as_list && attrs.width.is_some() {
208 return Err(syn::Error::new(
209 span,
210 "`as_list` and `width` are mutually exclusive",
211 ));
212 }
213 if attrs.as_factor && attrs.as_list {
214 return Err(syn::Error::new(
215 span,
216 "`as_factor` and `as_list` are mutually exclusive",
217 ));
218 }
219 if attrs.as_factor && attrs.expand {
220 return Err(syn::Error::new(
221 span,
222 "`as_factor` and `expand`/`unnest` are mutually exclusive",
223 ));
224 }
225 if attrs.as_factor && attrs.width.is_some() {
226 return Err(syn::Error::new(
227 span,
228 "`as_factor` and `width` are mutually exclusive",
229 ));
230 }
231
232 Ok(attrs)
233}
234// endregion
235
236// region: Type classification
237
238/// Classification of a field type for DataFrame column expansion.
239///
240/// Used to decide whether a field maps to a single column or should be
241/// expanded into multiple suffixed columns (e.g., `coords_1`, `coords_2`).
242pub(super) enum FieldTypeKind<'a> {
243 /// Single column (most types). No expansion.
244 Scalar,
245 /// `[T; N]` -- fixed-size array, expands to `N` columns at compile time.
246 /// Contains the element type and array length.
247 FixedArray(&'a syn::Type, usize),
248 /// `Vec<T>` -- variable length, needs `width` attribute or `expand` for expansion.
249 /// Contains the element type.
250 VariableVec(&'a syn::Type),
251 /// `Box<[T]>` -- owned slice, treated like `Vec<T>` for expansion purposes.
252 /// Contains the element type.
253 BoxedSlice(&'a syn::Type),
254 /// `&[T]` -- borrowed slice, treated like `Vec<T>` for expansion purposes.
255 /// Contains the element type.
256 BorrowedSlice(&'a syn::Type),
257 /// `HashMap<K, V>` or `BTreeMap<K, V>` -- expands to two parallel list-columns:
258 /// `<field>_keys` and `<field>_values`. Key order follows the map's own iteration
259 /// order: `BTreeMap` yields sorted keys, `HashMap` yields non-deterministic order.
260 Map {
261 key_ty: &'a syn::Type,
262 val_ty: &'a syn::Type,
263 },
264 /// A struct-typed field whose inner type implements `DataFrameRow`.
265 ///
266 /// Flattened into `<field>_<inner_col>` prefixed columns by default.
267 /// A compile-time assertion against `::miniextendr_api::markers::DataFrameRow`
268 /// is emitted so rustc gives a clear error when the inner type is missing the
269 /// derive.
270 ///
271 /// Suppressed by `#[dataframe(as_list)]` — with as_list the field becomes
272 /// a `Scalar` and uses the ordinary single-column codegen path.
273 Struct {
274 /// The full field type (used for the compile-time DataFrameRow assertion).
275 inner_ty: &'a syn::Type,
276 },
277}
278
279/// Classify a field type for DataFrame column expansion.
280///
281/// Inspects the type AST to detect:
282/// - `[T; N]` or `&[T; N]` -> `FixedArray`
283/// - `&[T]` -> `BorrowedSlice`
284/// - `Vec<T>` -> `VariableVec`
285/// - `Box<[T]>` -> `BoxedSlice`
286/// - `HashMap<K, V>` / `BTreeMap<K, V>` -> `Map`
287/// - Any non-scalar bare path type (single- or multi-segment, e.g. `Point` or
288/// `crate::geom::Point`) -> `Struct`
289/// - Everything else (known scalars, generic types with args, `::abs::Paths`) -> `Scalar`
290///
291/// Returns `Err` for shapes the macro cannot classify and that would silently
292/// become opaque list-columns: `Option<T>`, `Cow<T>`, `Rc<T>`, `Arc<T>`,
293/// `RefCell<T>`, `Cell<T>`, `Mutex<T>`, `RwLock<T>`. Use
294/// `#[dataframe(as_list)]` to opt into list-column treatment explicitly.
295pub(super) fn classify_field_type(ty: &syn::Type) -> syn::Result<FieldTypeKind<'_>> {
296 // Check for [T; N]
297 if let syn::Type::Array(arr) = ty
298 && let syn::Expr::Lit(syn::ExprLit {
299 lit: syn::Lit::Int(lit_int),
300 ..
301 }) = &arr.len
302 && let Ok(n) = lit_int.base10_parse::<usize>()
303 {
304 return Ok(FieldTypeKind::FixedArray(&arr.elem, n));
305 }
306
307 // Check for &[T] and &[T; N]
308 if let syn::Type::Reference(ref_ty) = ty {
309 // &[T] → BorrowedSlice
310 if let syn::Type::Slice(slice) = &*ref_ty.elem {
311 return Ok(FieldTypeKind::BorrowedSlice(&slice.elem));
312 }
313 // &[T; N] → FixedArray (same as owned)
314 if let syn::Type::Array(arr) = &*ref_ty.elem
315 && let syn::Expr::Lit(syn::ExprLit {
316 lit: syn::Lit::Int(lit_int),
317 ..
318 }) = &arr.len
319 && let Ok(n) = lit_int.base10_parse::<usize>()
320 {
321 return Ok(FieldTypeKind::FixedArray(&arr.elem, n));
322 }
323 }
324
325 if let syn::Type::Path(type_path) = ty
326 && let Some(seg) = type_path.path.segments.last()
327 && let syn::PathArguments::AngleBracketed(args) = &seg.arguments
328 {
329 // Reject wrapper types that would silently fall through to Scalar /
330 // Struct and produce a confusing opaque list-column or a downstream
331 // DataFrameRow assertion error. These are the common smart-pointer
332 // and interior-mutability types that wrap a meaningful inner type but
333 // that DataFrameRow does not know how to expand.
334 //
335 // The macro has no way to resolve through the wrapper without type-
336 // checking (which is unavailable in proc macros). The user must either
337 // unwrap to the inner type, or annotate with `#[dataframe(as_list)]`
338 // to opt into an explicit opaque list-column.
339 //
340 // IMPORTANT: The rejection fires on *path identity alone*, before we
341 // inspect generic args. `Cow<'a, T>` has a lifetime as its first
342 // generic argument, not a type; inspecting `args.args.first()` as a
343 // `GenericArgument::Type` would silently skip `Cow`. Checking ident
344 // before args makes the rejection robust to any generic shape.
345 const REJECTED_WRAPPERS: &[&str] = &[
346 "Option", "Cow", "Rc", "Arc", "RefCell", "Cell", "Mutex", "RwLock",
347 ];
348 let name = seg.ident.to_string();
349 if REJECTED_WRAPPERS.contains(&name.as_str()) {
350 return Err(syn::Error::new_spanned(
351 ty,
352 format!(
353 "DataFrameRow does not support `{name}<…>` directly as a field type. \
354 Use `#[dataframe(as_list)]` to opt into an explicit opaque list-column, \
355 or unwrap to the inner type (e.g. store the inner value directly, using \
356 a sentinel / empty collection for the absent case)."
357 ),
358 ));
359 }
360
361 // For the collection types below we need the first *type* argument.
362 // Skip any leading lifetime or const arguments (e.g. `Cow<'a, B>`
363 // has a lifetime first, but `Cow` is already rejected above so we
364 // only reach here for other angle-bracketed types).
365 let first_type_arg = args.args.iter().find_map(|arg| {
366 if let syn::GenericArgument::Type(t) = arg {
367 Some(t)
368 } else {
369 None
370 }
371 });
372
373 if let Some(inner) = first_type_arg {
374 // Check for Vec<T>
375 if seg.ident == "Vec" {
376 return Ok(FieldTypeKind::VariableVec(inner));
377 }
378
379 // Check for Box<[T]>
380 if seg.ident == "Box"
381 && let syn::Type::Slice(slice) = inner
382 {
383 return Ok(FieldTypeKind::BoxedSlice(&slice.elem));
384 }
385
386 // Check for HashMap<K, V> and BTreeMap<K, V>
387 if (seg.ident == "HashMap" || seg.ident == "BTreeMap")
388 && let Some(syn::GenericArgument::Type(val_ty)) = args.args.iter().nth(1)
389 {
390 return Ok(FieldTypeKind::Map {
391 key_ty: inner,
392 val_ty,
393 });
394 }
395 }
396 }
397
398 // Any remaining path type whose LAST segment is a bare ident (no generic args)
399 // that is NOT a known scalar is treated as a user-defined struct whose
400 // `DataFrameRow` derive should be called. The compile-time assertion
401 // `_assert_inner_is_dataframe_row::<Inner>()` in the generated code surfaces a
402 // clear error if the inner type doesn't have the derive.
403 //
404 // Known scalars (i32, f64, String, bool, …) are kept as `Scalar` so that existing
405 // enum variants with primitive fields (e.g. `Click { id: i64, x: f64 }`) are not
406 // misclassified as struct fields.
407 //
408 // Multi-segment paths (e.g. `crate::geom::Point`, `geom::Point`) are now correctly
409 // classified here — the previous `segs.len() == 1` guard was overly restrictive.
410 // Paths with a leading `::` (absolute paths like `::std::ffi::CString`) still fall
411 // through to `Scalar`; use `#[dataframe(as_list)]` or an unqualified import if
412 // you need a custom treatment.
413 //
414 // RISK: a user type whose last path segment is named after a known-scalar
415 // (e.g. `mymod::String`) still correctly falls through to `Scalar` because of the
416 // KNOWN_SCALARS check. A type named `mymod::Option` / `mymod::Vec` would shadow
417 // the detection above — accepted per Rust naming convention (canonical names are
418 // rarely shadowed). `#[dataframe(as_list)]` is the documented escape hatch.
419 if let syn::Type::Path(type_path) = ty {
420 let segs = &type_path.path.segments;
421 // No leading colon (rules out `::std::…` absolute paths) and no self-type.
422 if type_path.qself.is_none() && type_path.path.leading_colon.is_none() {
423 let seg = segs.last().unwrap();
424 if matches!(seg.arguments, syn::PathArguments::None) {
425 let name = seg.ident.to_string();
426 // Known scalar type names — keep as Scalar so they do not trigger the
427 // struct-flatten path and the DataFrameRow compile-time assertion.
428 const KNOWN_SCALARS: &[&str] = &[
429 "bool", "char", "str", "f32", "f64", "i8", "i16", "i32", "i64", "i128",
430 "isize", "u8", "u16", "u32", "u64", "u128", "usize", "String",
431 ];
432 if !KNOWN_SCALARS.contains(&name.as_str()) {
433 return Ok(FieldTypeKind::Struct { inner_ty: ty });
434 }
435 }
436 }
437 }
438
439 Ok(FieldTypeKind::Scalar)
440}
441// endregion
442
443// region: Resolved field model (struct path)
444
445/// A resolved struct field ready for codegen -- determines how this field maps
446/// to DataFrame companion struct columns.
447///
448/// Each variant represents a different expansion strategy:
449/// - `Single`: one field -> one `Vec<T>` column
450/// - `ExpandedFixed`: `[T; N]` -> N columns (`name_1..name_N`) at compile time
451/// - `ExpandedVec`: `Vec<T>` + `width = N` -> N `Vec<Option<T>>` columns
452/// - `AutoExpandVec`: `Vec<T>` + `expand` -> dynamic column count at runtime
453enum ResolvedField {
454 /// Single column: `name → Vec<ty>`.
455 Single(Box<SingleFieldData>),
456 /// Expanded fixed array: `name: [T; N]` → `name_1..name_N`.
457 ExpandedFixed(Box<ExpandedFixedData>),
458 /// Expanded variable vec with pinned width: `name: Vec<T>` + `width = N`.
459 ExpandedVec(Box<ExpandedVecData>),
460 /// Auto-expanded Vec<T>/Box<[T]>: column count determined at runtime from max row length.
461 AutoExpandVec(Box<AutoExpandVecData>),
462 /// Struct field whose inner type implements `DataFrameRow` (issue #485).
463 /// Companion holds `Vec<Inner>`; `into_data_frame` calls `Inner::to_dataframe`
464 /// and flattens columns under the `<base>_` prefix.
465 Struct(Box<StructFieldData>),
466}
467
468/// Data for [`ResolvedField::Single`].
469struct SingleFieldData {
470 /// Rust field name (for access).
471 rust_name: syn::Ident,
472 /// Column name in the DataFrame.
473 col_name: syn::Ident,
474 /// Column name string.
475 col_name_str: String,
476 /// Field type stored in the companion `Vec<#ty>`. For `#[dataframe(as_list)]`
477 /// on a struct-typed field this is overridden to `::miniextendr_api::list::List`
478 /// — see `needs_into_list`.
479 ty: syn::Type,
480 /// Index in tuple struct (None for named).
481 tuple_index: Option<syn::Index>,
482 /// `#[dataframe(as_list)]` on a struct-typed field (#485 workaround).
483 /// When `true`, the companion field type is overridden to `List` and
484 /// `From<Vec<Row>>` calls `IntoList::into_list()` on each row value.
485 needs_into_list: bool,
486}
487
488/// Data for [`ResolvedField::ExpandedFixed`].
489struct ExpandedFixedData {
490 /// Rust field name.
491 rust_name: syn::Ident,
492 /// Base column name (before suffix).
493 base_name: String,
494 /// Element type T.
495 elem_ty: syn::Type,
496 /// Array length N.
497 len: usize,
498 /// Index in tuple struct.
499 tuple_index: Option<syn::Index>,
500}
501
502/// Data for [`ResolvedField::ExpandedVec`].
503struct ExpandedVecData {
504 /// Rust field name.
505 rust_name: syn::Ident,
506 /// Base column name.
507 base_name: String,
508 /// Element type T.
509 elem_ty: syn::Type,
510 /// Pinned width.
511 width: usize,
512 /// Index in tuple struct.
513 tuple_index: Option<syn::Index>,
514}
515
516/// Data for [`ResolvedField::Struct`].
517///
518/// A struct field whose inner type implements `DataFrameRow`. The companion
519/// struct holds `Vec<Inner>` (the same type users already pass into
520/// `to_dataframe(vec![...])`). At `into_data_frame()` time the inner rows are
521/// converted via `Inner::to_dataframe` → `into_named_columns()`, prefixed with
522/// `<base_name>_`, and pushed into the parent data.frame.
523struct StructFieldData {
524 /// Rust field name (for access on the row type).
525 rust_name: syn::Ident,
526 /// Companion struct field name (ident).
527 col_name: syn::Ident,
528 /// Column name base used as the R-side prefix (`<base>_<inner_col>`).
529 col_name_str: String,
530 /// Inner struct type (used for `to_dataframe` dispatch + DataFrameRow assertion).
531 inner_ty: syn::Type,
532 /// Index in tuple struct (None for named).
533 tuple_index: Option<syn::Index>,
534}
535
536/// Data for [`ResolvedField::AutoExpandVec`].
537struct AutoExpandVecData {
538 /// Rust field name (for row access).
539 rust_name: syn::Ident,
540 /// Companion struct field name (ident).
541 col_name: syn::Ident,
542 /// Column name base string (for suffixed column names).
543 col_name_str: String,
544 /// Element type T.
545 elem_ty: syn::Type,
546 /// Container type for companion struct (Vec<T> or Box<[T]>).
547 container_ty: syn::Type,
548 /// Index in tuple struct.
549 tuple_index: Option<syn::Index>,
550}
551
552/// Resolve a struct field into a [`ResolvedField`], applying field attributes.
553///
554/// Combines the field's `#[dataframe(...)]` attributes with its type classification
555/// to determine the codegen strategy:
556/// - `skip` -> returns `None`
557/// - `as_list` -> `Single` (suppresses expansion)
558/// - `FixedArray` -> `ExpandedFixed` (compile-time expansion to N columns)
559/// - `VariableVec`/`BoxedSlice`/`BorrowedSlice` + `width` -> `ExpandedVec`
560/// - `VariableVec`/`BoxedSlice`/`BorrowedSlice` + `expand` -> `AutoExpandVec`
561/// - Everything else -> `Single`
562///
563/// Returns `Err` if `width` or `expand` is used on an incompatible type.
564fn resolve_struct_field(
565 field: &syn::Field,
566 index: usize,
567 is_tuple: bool,
568) -> syn::Result<Option<ResolvedField>> {
569 let field_attrs = parse_field_attrs(field)?;
570
571 if field_attrs.skip {
572 return Ok(None);
573 }
574
575 let rust_name = if is_tuple {
576 format_ident!("_{}", index)
577 } else {
578 field.ident.as_ref().unwrap().clone()
579 };
580
581 let col_name_str = field_attrs
582 .rename
583 .clone()
584 .unwrap_or_else(|| rust_name.to_string());
585 let col_name = format_ident!("{}", col_name_str);
586
587 let tuple_index = if is_tuple {
588 Some(syn::Index::from(index))
589 } else {
590 None
591 };
592
593 let ty = &field.ty;
594 // Propagate classification errors (e.g. Option<T>, Arc<T>) when as_list is
595 // not set. The as_list branch below uses `.ok()` to suppress errors.
596 let kind = classify_field_type(ty);
597
598 // as_list suppresses expansion. For struct-typed fields (#485 opt-out), the
599 // companion stores `Vec<List>` and From<Vec<Row>> converts each row value
600 // via `IntoList::into_list()`. For non-struct as_list fields, the existing
601 // behavior is preserved: companion stores `Vec<#ty>` and the field type is
602 // serialized natively (this requires `Vec<#ty>: IntoR`).
603 if field_attrs.as_list {
604 // Use `.ok()` here: `as_list` is an explicit opt-in, so wrapper types
605 // like `Option<T>` / `Arc<T>` are allowed — they become opaque list-
606 // columns. Any classification error is suppressed and treated as non-Struct.
607 let (final_ty, needs_into_list) = match classify_field_type(ty).ok() {
608 Some(FieldTypeKind::Struct { .. }) => {
609 (syn::parse_quote!(::miniextendr_api::list::List), true)
610 }
611 _ => (ty.clone(), false),
612 };
613 return Ok(Some(ResolvedField::Single(Box::new(SingleFieldData {
614 rust_name,
615 col_name,
616 col_name_str,
617 ty: final_ty,
618 tuple_index,
619 needs_into_list,
620 }))));
621 }
622
623 match kind? {
624 FieldTypeKind::FixedArray(elem_ty, len) => Ok(Some(ResolvedField::ExpandedFixed(
625 Box::new(ExpandedFixedData {
626 rust_name,
627 base_name: col_name_str,
628 elem_ty: elem_ty.clone(),
629 len,
630 tuple_index,
631 }),
632 ))),
633 FieldTypeKind::VariableVec(elem_ty)
634 | FieldTypeKind::BoxedSlice(elem_ty)
635 | FieldTypeKind::BorrowedSlice(elem_ty) => {
636 if let Some(width) = field_attrs.width {
637 Ok(Some(ResolvedField::ExpandedVec(Box::new(
638 ExpandedVecData {
639 rust_name,
640 base_name: col_name_str,
641 elem_ty: elem_ty.clone(),
642 width,
643 tuple_index,
644 },
645 ))))
646 } else if field_attrs.expand {
647 Ok(Some(ResolvedField::AutoExpandVec(Box::new(
648 AutoExpandVecData {
649 rust_name,
650 col_name,
651 col_name_str,
652 elem_ty: elem_ty.clone(),
653 container_ty: ty.clone(),
654 tuple_index,
655 },
656 ))))
657 } else {
658 // No expansion — keep as opaque single column
659 Ok(Some(ResolvedField::Single(Box::new(SingleFieldData {
660 rust_name,
661 col_name,
662 col_name_str,
663 ty: ty.clone(),
664 tuple_index,
665 needs_into_list: false,
666 }))))
667 }
668 }
669 // Struct-in-struct flattening (issue #485): inner type must implement
670 // `DataFrameRow`. Flattening happens at `into_data_frame()` time; the
671 // companion stores `Vec<Inner>`. `as_list` opts out (handled above).
672 FieldTypeKind::Struct { inner_ty } => {
673 if field_attrs.width.is_some() {
674 return Err(syn::Error::new_spanned(
675 ty,
676 "`width` is only valid on `Vec<T>`, `Box<[T]>`, or `&[T]` fields",
677 ));
678 }
679 if field_attrs.expand {
680 return Err(syn::Error::new_spanned(
681 ty,
682 "`expand`/`unnest` is only valid on `[T; N]`, `Vec<T>`, `Box<[T]>`, or `&[T]` fields",
683 ));
684 }
685 Ok(Some(ResolvedField::Struct(Box::new(StructFieldData {
686 rust_name,
687 col_name,
688 col_name_str,
689 inner_ty: inner_ty.clone(),
690 tuple_index,
691 }))))
692 }
693 FieldTypeKind::Scalar | FieldTypeKind::Map { .. } => {
694 if field_attrs.width.is_some() {
695 return Err(syn::Error::new_spanned(
696 ty,
697 "`width` is only valid on `Vec<T>`, `Box<[T]>`, or `&[T]` fields",
698 ));
699 }
700 if field_attrs.expand {
701 return Err(syn::Error::new_spanned(
702 ty,
703 "`expand`/`unnest` is only valid on `[T; N]`, `Vec<T>`, `Box<[T]>`, or `&[T]` fields",
704 ));
705 }
706 Ok(Some(ResolvedField::Single(Box::new(SingleFieldData {
707 rust_name,
708 col_name,
709 col_name_str,
710 ty: ty.clone(),
711 tuple_index,
712 needs_into_list: false,
713 }))))
714 }
715 }
716}
717// endregion
718
719// region: Top-level dispatch
720
721/// Derive `DataFrameRow`: generates a companion DataFrame type with collection fields.
722///
723/// # Requirements
724///
725/// For structs: the type must implement `IntoList`.
726/// For enums: all variants must have named fields.
727///
728/// # Generated Items
729///
730/// For a struct `Measurement { time: f64, value: f64 }`:
731/// - Struct `MeasurementDataFrame { time: Vec<f64>, value: Vec<f64> }`
732/// - `impl IntoDataFrame for MeasurementDataFrame`
733/// - `impl From<Vec<Measurement>> for MeasurementDataFrame`
734/// - `impl IntoIterator for MeasurementDataFrame`
735/// - Associated methods on `Measurement`:
736/// - `to_dataframe(Vec<Self>) -> MeasurementDataFrame`
737/// - `from_dataframe(MeasurementDataFrame) -> Vec<Self>`
738///
739/// For an enum:
740/// - Companion struct with `Vec<Option<T>>` columns (field-name union)
741/// - Optional tag column for variant discrimination
742/// - `impl From<Vec<Enum>> for EnumDataFrame`
743/// - `impl IntoDataFrame for EnumDataFrame`
744/// - Associated `to_dataframe` method
745///
746/// # Attributes
747///
748/// - `#[dataframe(name = "CustomName")]` — Custom companion type name
749/// - `#[dataframe(align)]` — Enum alignment mode (accepted but implicit)
750/// - `#[dataframe(tag = "col")]` — Add variant discriminator column
751///
752/// Both struct and enum companion types get `from_rows()` (sequential) and
753/// `from_rows_par()` (parallel, `#[cfg(feature = "rayon")]`) methods automatically.
754pub fn derive_dataframe_row(input: DeriveInput) -> syn::Result<TokenStream> {
755 let row_name = &input.ident;
756
757 // Allow lifetime parameters (needed for &[T] borrowed slice fields).
758 // Allow type parameters on unit-only enums (all variants are unit) — the
759 // companion struct has no field columns to type-parameterise, and the three
760 // unit-enum impls (UnitEnumFactor, IntoR, IntoList) handle generics via the
761 // split path in enum_expansion.rs.
762 // Reject type and const parameters for everything else.
763 let has_type_params = input.generics.type_params().next().is_some();
764 let has_const_params = input.generics.const_params().next().is_some();
765 if has_type_params || has_const_params {
766 let is_unit_only_enum = matches!(&input.data, Data::Enum(e)
767 if e.variants.iter().all(|v| matches!(v.fields, Fields::Unit)));
768 if !is_unit_only_enum {
769 return Err(syn::Error::new_spanned(
770 &input.generics,
771 "DataFrameRow does not support type or const generic parameters",
772 ));
773 }
774 }
775
776 // Parse attributes
777 let attrs = parse_dataframe_attrs(&input)?;
778
779 let df_name = attrs
780 .name
781 .clone()
782 .unwrap_or_else(|| format_ident!("{}DataFrame", row_name));
783
784 let base = match &input.data {
785 Data::Struct(data) => {
786 // `align` is a no-op on structs (only semantically meaningful for enums)
787 derive_struct_dataframe(row_name, &input, data, &df_name, &attrs)
788 }
789 Data::Enum(data) => {
790 // align is implicit for enums — accept but don't require
791 derive_enum_dataframe(row_name, &input, data, &df_name, &attrs)
792 }
793 Data::Union(_) => Err(syn::Error::new_spanned(
794 row_name,
795 "DataFrameRow does not support unions",
796 )),
797 }?;
798
799 // Generate IntoR for the companion DataFrame type so it can be returned
800 // directly from #[miniextendr] functions. This ensures both the standalone
801 // #[derive(DataFrameRow)] path and the #[miniextendr(dataframe)] path
802 // produce identical output.
803 let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl();
804 Ok(quote::quote! {
805 #base
806
807 impl #impl_generics ::miniextendr_api::into_r::IntoR for #df_name #ty_generics #where_clause {
808 type Error = std::convert::Infallible;
809
810 #[inline]
811 fn try_into_sexp(self) -> Result<::miniextendr_api::ffi::SEXP, Self::Error> {
812 Ok(self.into_sexp())
813 }
814
815 #[inline]
816 unsafe fn try_into_sexp_unchecked(self) -> Result<::miniextendr_api::ffi::SEXP, Self::Error> {
817 self.try_into_sexp()
818 }
819
820 #[inline]
821 fn into_sexp(self) -> ::miniextendr_api::ffi::SEXP {
822 ::miniextendr_api::convert::IntoDataFrame::into_data_frame(self).into_sexp()
823 }
824
825 #[inline]
826 unsafe fn into_sexp_unchecked(self) -> ::miniextendr_api::ffi::SEXP {
827 ::miniextendr_api::convert::IntoDataFrame::into_data_frame(self).into_sexp()
828 }
829 }
830 })
831}
832// endregion
833
834// region: Struct path (existing logic, extracted)
835
836/// Generate `DataFrameRow` expansion for struct types.
837///
838/// Produces:
839/// - A companion struct `{Name}DataFrame` with `Vec<T>` columns
840/// - `impl IntoDataFrame for {Name}DataFrame`
841/// - `impl From<Vec<{Name}>> for {Name}DataFrame`
842/// - `impl IntoIterator` (for named structs without expansion)
843/// - Associated methods: `to_dataframe`, `from_dataframe`, `from_rows`, `from_rows_par`
844/// - A compile-time `IntoList` assertion (for non-expanded named structs)
845///
846/// Handles fixed-array expansion (`[T; N]`), pinned-width Vec expansion
847/// (`Vec<T>` + `width`), and auto-expand Vec (`Vec<T>` + `expand`).
848fn derive_struct_dataframe(
849 row_name: &syn::Ident,
850 input: &DeriveInput,
851 data: &syn::DataStruct,
852 df_name: &syn::Ident,
853 attrs: &DataFrameAttrs,
854) -> syn::Result<TokenStream> {
855 let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl();
856
857 let is_tuple_struct = matches!(&data.fields, Fields::Unnamed(_));
858 let is_unit_struct = matches!(&data.fields, Fields::Unit);
859
860 // Resolve fields through the new FieldAttrs + type classification system.
861 let resolved: Vec<ResolvedField> = match &data.fields {
862 Fields::Named(fields) => {
863 let mut out = Vec::new();
864 for (i, f) in fields.named.iter().enumerate() {
865 if let Some(rf) = resolve_struct_field(f, i, false)? {
866 out.push(rf);
867 }
868 }
869 out
870 }
871 Fields::Unnamed(fields) => {
872 let mut out = Vec::new();
873 for (i, f) in fields.unnamed.iter().enumerate() {
874 if let Some(rf) = resolve_struct_field(f, i, true)? {
875 out.push(rf);
876 }
877 }
878 out
879 }
880 Fields::Unit => vec![],
881 };
882
883 // Check whether any field uses expansion — affects whether we can generate
884 // IntoIterator (expanded fields change the companion struct shape).
885 let has_expansion = resolved
886 .iter()
887 .any(|rf| !matches!(rf, ResolvedField::Single(..)));
888 // Track which Rust fields were skipped (for destructure patterns).
889 let skipped_fields: Vec<syn::Ident> = match &data.fields {
890 Fields::Named(fields) => fields
891 .named
892 .iter()
893 .filter_map(|f| {
894 let fa = parse_field_attrs(f).ok()?;
895 if fa.skip {
896 Some(f.ident.as_ref().unwrap().clone())
897 } else {
898 None
899 }
900 })
901 .collect(),
902 _ => vec![],
903 };
904
905 let has_tag = attrs.tag.is_some();
906 let row_name_str = row_name.to_string();
907
908 // region: Build flat column lists from resolved fields
909 // Each resolved field may produce 1..N columns.
910 struct FlatCol {
911 /// Companion struct field name.
912 df_field: syn::Ident,
913 /// Column name string in the R data frame.
914 col_name_str: String,
915 /// Type of the companion Vec<T>.
916 vec_elem_ty: syn::Type,
917 /// `#[dataframe(as_list)]` on a struct-typed field — companion stores
918 /// `Vec<List>`. The `from_rows_par` pre-pass handles these sequentially
919 /// instead of scatter-writing (List doesn't implement Default).
920 needs_into_list: bool,
921 }
922
923 let mut flat_cols: Vec<FlatCol> = Vec::new();
924
925 for rf in &resolved {
926 match rf {
927 ResolvedField::Single(data) => {
928 flat_cols.push(FlatCol {
929 df_field: data.col_name.clone(),
930 col_name_str: data.col_name_str.clone(),
931 vec_elem_ty: data.ty.clone(),
932 needs_into_list: data.needs_into_list,
933 });
934 }
935 ResolvedField::ExpandedFixed(data) => {
936 for i in 1..=data.len {
937 let name = format!("{}_{}", data.base_name, i);
938 flat_cols.push(FlatCol {
939 df_field: format_ident!("{}_{}", data.base_name, i),
940 col_name_str: name,
941 vec_elem_ty: data.elem_ty.clone(),
942 needs_into_list: false,
943 });
944 }
945 }
946 ResolvedField::ExpandedVec(data) => {
947 for i in 1..=data.width {
948 let name = format!("{}_{}", data.base_name, i);
949 let elem_ty = &data.elem_ty;
950 let opt_ty: syn::Type = syn::parse_quote!(Option<#elem_ty>);
951 flat_cols.push(FlatCol {
952 df_field: format_ident!("{}_{}", data.base_name, i),
953 col_name_str: name,
954 vec_elem_ty: opt_ty,
955 needs_into_list: false,
956 });
957 }
958 }
959 // AutoExpandVec / Struct do not produce FlatCols — handled separately.
960 ResolvedField::AutoExpandVec(..) | ResolvedField::Struct(..) => {}
961 }
962 }
963 // endregion
964
965 // region: Collect auto-expand fields
966 struct AutoExpandCol {
967 /// Companion struct field name.
968 df_field: syn::Ident,
969 /// Container type (Vec<T> or Box<[T]>).
970 container_ty: syn::Type,
971 }
972
973 let auto_expand_cols: Vec<AutoExpandCol> = resolved
974 .iter()
975 .filter_map(|rf| {
976 if let ResolvedField::AutoExpandVec(data) = rf {
977 Some(AutoExpandCol {
978 df_field: format_ident!("{}", data.col_name_str),
979 container_ty: data.container_ty.clone(),
980 })
981 } else {
982 None
983 }
984 })
985 .collect();
986 let has_auto_expand = !auto_expand_cols.is_empty();
987 // endregion
988
989 // region: Collect struct (DataFrameRow-flattened) fields (#485)
990 //
991 // Only the codegen-time bits are mirrored here — `rust_name` / `tuple_index`
992 // are read directly off `ResolvedField::Struct` at the per-row pushes site.
993 struct StructCol {
994 df_field: syn::Ident,
995 col_name_str: String,
996 inner_ty: syn::Type,
997 }
998
999 let struct_cols: Vec<StructCol> = resolved
1000 .iter()
1001 .filter_map(|rf| {
1002 if let ResolvedField::Struct(data) = rf {
1003 Some(StructCol {
1004 df_field: data.col_name.clone(),
1005 col_name_str: data.col_name_str.clone(),
1006 inner_ty: data.inner_ty.clone(),
1007 })
1008 } else {
1009 None
1010 }
1011 })
1012 .collect();
1013 let has_struct = !struct_cols.is_empty();
1014
1015 // Any `#[dataframe(as_list)]` on a struct-typed field stores `List` in the
1016 // companion (#485 opt-out). We can't round-trip List back to the inner
1017 // struct without a `FromList`-like trait, and `List` doesn't impl
1018 // `Default`, so several codegen branches need to suppress themselves:
1019 // IntoIterator generation, the `IntoList` compile-time assertion, and
1020 // `from_rows_par`.
1021 let has_into_list_struct = resolved
1022 .iter()
1023 .any(|rf| matches!(rf, ResolvedField::Single(d) if d.needs_into_list));
1024 // endregion
1025
1026 // region: Companion struct
1027 let tag_field_decl = if has_tag {
1028 quote! { pub _tag: Vec<String>, }
1029 } else {
1030 TokenStream::new()
1031 };
1032
1033 let mut df_fields_tokens: Vec<TokenStream> = flat_cols
1034 .iter()
1035 .map(|fc| {
1036 let name = &fc.df_field;
1037 let ty = &fc.vec_elem_ty;
1038 quote! { pub #name: Vec<#ty> }
1039 })
1040 .collect();
1041 for ac in &auto_expand_cols {
1042 let name = &ac.df_field;
1043 let cty = &ac.container_ty;
1044 df_fields_tokens.push(quote! { pub #name: Vec<#cty> });
1045 }
1046 for sc in &struct_cols {
1047 let name = &sc.df_field;
1048 let ity = &sc.inner_ty;
1049 df_fields_tokens.push(quote! { pub #name: Vec<#ity> });
1050 }
1051
1052 let len_field_decl = if flat_cols.is_empty()
1053 && auto_expand_cols.is_empty()
1054 && struct_cols.is_empty()
1055 && !has_tag
1056 {
1057 quote! { pub _len: usize, }
1058 } else {
1059 TokenStream::new()
1060 };
1061
1062 let dataframe_struct = quote! {
1063 #[derive(Debug, Clone)]
1064 pub struct #df_name #impl_generics #where_clause {
1065 #tag_field_decl
1066 #len_field_decl
1067 #(#df_fields_tokens),*
1068 }
1069 };
1070 // endregion
1071
1072 // region: IntoDataFrame
1073 let length_ref = if has_tag {
1074 quote! { self._tag.len() }
1075 } else if !flat_cols.is_empty() {
1076 let first = &flat_cols[0].df_field;
1077 quote! { self.#first.len() }
1078 } else if !auto_expand_cols.is_empty() {
1079 let first = &auto_expand_cols[0].df_field;
1080 quote! { self.#first.len() }
1081 } else if !struct_cols.is_empty() {
1082 let first = &struct_cols[0].df_field;
1083 quote! { self.#first.len() }
1084 } else {
1085 quote! { self._len }
1086 };
1087
1088 // Each pair protects its SEXP via `__scope.protect_raw` so previously-built
1089 // column SEXPs survive subsequent column allocations. Pre-fix the raw
1090 // `vec![(name, into_sexp(...)), ...]` left every SEXP unrooted across the
1091 // next column's allocations — UAF under gctorture
1092 // (reviews/2026-05-07-gctorture-audit.md).
1093 let tag_pair = if let Some(ref tag_name) = attrs.tag {
1094 quote! { (#tag_name, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self._tag))), }
1095 } else {
1096 TokenStream::new()
1097 };
1098
1099 let df_pairs: Vec<TokenStream> = flat_cols
1100 .iter()
1101 .map(|fc| {
1102 let name = &fc.df_field;
1103 let name_str = &fc.col_name_str;
1104 quote! { (#name_str, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self.#name))) }
1105 })
1106 .collect();
1107
1108 let mut length_checks: Vec<TokenStream> = flat_cols
1109 .iter()
1110 .map(|fc| {
1111 let name = &fc.df_field;
1112 let name_str = &fc.col_name_str;
1113 quote! {
1114 assert!(
1115 self.#name.len() == _n_rows,
1116 "column length mismatch in {}: column `{}` has length {} but expected {}",
1117 stringify!(#df_name),
1118 #name_str,
1119 self.#name.len(),
1120 _n_rows,
1121 );
1122 }
1123 })
1124 .collect();
1125 for sc in &struct_cols {
1126 let name = &sc.df_field;
1127 let name_str = &sc.col_name_str;
1128 length_checks.push(quote! {
1129 assert!(
1130 self.#name.len() == _n_rows,
1131 "column length mismatch in {}: struct column `{}` has length {} but expected {}",
1132 stringify!(#df_name),
1133 #name_str,
1134 self.#name.len(),
1135 _n_rows,
1136 );
1137 });
1138 }
1139
1140 let into_dataframe_impl = if has_auto_expand || has_struct {
1141 // Dynamic pair building: iterate resolved fields in order,
1142 // emitting static pairs for flat columns and runtime-expanded
1143 // pairs for auto-expand fields.
1144 let tag_push_pair = if let Some(ref tag_name) = attrs.tag {
1145 quote! {
1146 __df_pairs.push((#tag_name.to_string(), __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self._tag))));
1147 }
1148 } else {
1149 TokenStream::new()
1150 };
1151
1152 let pair_pushes: Vec<TokenStream> = resolved
1153 .iter()
1154 .map(|rf| match rf {
1155 ResolvedField::Single(data) => {
1156 let col_name = &data.col_name;
1157 let col_name_str = &data.col_name_str;
1158 quote! {
1159 __df_pairs.push((
1160 #col_name_str.to_string(),
1161 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self.#col_name)),
1162 ));
1163 }
1164 }
1165 ResolvedField::ExpandedFixed(data) => {
1166 let pushes: Vec<TokenStream> = (1..=data.len)
1167 .map(|i| {
1168 let name = format!("{}_{}", data.base_name, i);
1169 let ident = format_ident!("{}_{}", data.base_name, i);
1170 quote! {
1171 __df_pairs.push((
1172 #name.to_string(),
1173 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self.#ident)),
1174 ));
1175 }
1176 })
1177 .collect();
1178 quote! { #(#pushes)* }
1179 }
1180 ResolvedField::ExpandedVec(data) => {
1181 let pushes: Vec<TokenStream> = (1..=data.width)
1182 .map(|i| {
1183 let name = format!("{}_{}", data.base_name, i);
1184 let ident = format_ident!("{}_{}", data.base_name, i);
1185 quote! {
1186 __df_pairs.push((
1187 #name.to_string(),
1188 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self.#ident)),
1189 ));
1190 }
1191 })
1192 .collect();
1193 quote! { #(#pushes)* }
1194 }
1195 ResolvedField::AutoExpandVec(data) => {
1196 let col_name = &data.col_name;
1197 let col_name_str = &data.col_name_str;
1198 let elem_ty = &data.elem_ty;
1199 quote! {
1200 {
1201 let __auto = self.#col_name;
1202 let __max = __auto.iter().map(|v| v.len()).max().unwrap_or(0);
1203 let mut __cols: Vec<Vec<Option<#elem_ty>>> = (0..__max)
1204 .map(|_| Vec::with_capacity(_n_rows))
1205 .collect();
1206 for __row_vec in &__auto {
1207 for (__i, __col) in __cols.iter_mut().enumerate() {
1208 __col.push(__row_vec.get(__i).cloned());
1209 }
1210 }
1211 for (__i, __col) in __cols.into_iter().enumerate() {
1212 __df_pairs.push((
1213 format!("{}_{}", #col_name_str, __i + 1),
1214 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(__col)),
1215 ));
1216 }
1217 }
1218 }
1219 }
1220 ResolvedField::Struct(data) => {
1221 // Issue #485: convert `Vec<Inner>` via Inner::to_dataframe,
1222 // extract its named columns, and push under `<base>_` prefix.
1223 let col_name = &data.col_name;
1224 let base_name_str = &data.col_name_str;
1225 let inner_ty = &data.inner_ty;
1226 quote! {
1227 {
1228 let __inner_df = <#inner_ty>::to_dataframe(self.#col_name);
1229 let __inner_cols = ::miniextendr_api::convert::IntoDataFrame::into_named_columns(__inner_df);
1230 for (__inner_col_name, __inner_col_sexp) in __inner_cols {
1231 // Protect the source column SEXP across subsequent allocations.
1232 let __src = __scope.protect_raw(__inner_col_sexp);
1233 __df_pairs.push((
1234 format!("{}_{}", #base_name_str, __inner_col_name),
1235 __src,
1236 ));
1237 }
1238 }
1239 }
1240 }
1241 })
1242 .collect();
1243
1244 quote! {
1245 impl #impl_generics ::miniextendr_api::convert::IntoDataFrame for #df_name #ty_generics #where_clause {
1246 fn into_data_frame(self) -> ::miniextendr_api::List {
1247 let _n_rows = #length_ref;
1248 #(#length_checks)*
1249 // SAFETY: into_data_frame only runs on the R main thread.
1250 // ProtectScope keeps each column SEXP rooted across the
1251 // next column's allocations; from_raw_pairs writes them
1252 // into the parent VECSXP before we drop the scope.
1253 unsafe {
1254 let __scope = ::miniextendr_api::gc_protect::ProtectScope::new();
1255 let mut __df_pairs: Vec<(
1256 String,
1257 ::miniextendr_api::ffi::SEXP,
1258 )> = Vec::new();
1259 #tag_push_pair
1260 #(#pair_pushes)*
1261 ::miniextendr_api::list::List::from_raw_pairs(__df_pairs)
1262 .set_class_str(&["data.frame"])
1263 .set_row_names_int(_n_rows)
1264 }
1265 }
1266 }
1267 }
1268 } else {
1269 quote! {
1270 impl #impl_generics ::miniextendr_api::convert::IntoDataFrame for #df_name #ty_generics #where_clause {
1271 fn into_data_frame(self) -> ::miniextendr_api::List {
1272 let _n_rows = #length_ref;
1273 #(#length_checks)*
1274 // SAFETY: see auto-expand branch.
1275 unsafe {
1276 let __scope = ::miniextendr_api::gc_protect::ProtectScope::new();
1277 ::miniextendr_api::list::List::from_raw_pairs(vec![
1278 #tag_pair
1279 #(#df_pairs),*
1280 ])
1281 .set_class_str(&["data.frame"])
1282 .set_row_names_int(_n_rows)
1283 }
1284 }
1285 }
1286 }
1287 };
1288 // endregion
1289
1290 // region: From<Vec<RowType>>
1291 let mut col_vec_inits: Vec<TokenStream> = flat_cols
1292 .iter()
1293 .map(|fc| {
1294 let name = &fc.df_field;
1295 let ty = &fc.vec_elem_ty;
1296 quote! { let mut #name: Vec<#ty> = Vec::with_capacity(len); }
1297 })
1298 .collect();
1299 for ac in &auto_expand_cols {
1300 let name = &ac.df_field;
1301 let cty = &ac.container_ty;
1302 col_vec_inits.push(quote! { let mut #name: Vec<#cty> = Vec::with_capacity(len); });
1303 }
1304 for sc in &struct_cols {
1305 let name = &sc.df_field;
1306 let ity = &sc.inner_ty;
1307 col_vec_inits.push(quote! { let mut #name: Vec<#ity> = Vec::with_capacity(len); });
1308 }
1309
1310 let tag_init = if has_tag {
1311 quote! { let mut _tag: Vec<String> = Vec::with_capacity(len); }
1312 } else {
1313 TokenStream::new()
1314 };
1315
1316 let tag_push = if has_tag {
1317 quote! { _tag.push(#row_name_str.to_string()); }
1318 } else {
1319 TokenStream::new()
1320 };
1321
1322 // Generate push statements for each resolved field
1323 let col_pushes: Vec<TokenStream> = resolved
1324 .iter()
1325 .map(|rf| match rf {
1326 ResolvedField::Single(data) => {
1327 let access = if let Some(idx) = &data.tuple_index {
1328 quote! { row.#idx }
1329 } else {
1330 let rust_name = &data.rust_name;
1331 quote! { row.#rust_name }
1332 };
1333 let col_name = &data.col_name;
1334 if data.needs_into_list {
1335 quote! { #col_name.push(::miniextendr_api::list::IntoList::into_list(#access)); }
1336 } else {
1337 quote! { #col_name.push(#access); }
1338 }
1339 }
1340 ResolvedField::ExpandedFixed(data) => {
1341 let access = if let Some(idx) = &data.tuple_index {
1342 quote! { row.#idx }
1343 } else {
1344 let rust_name = &data.rust_name;
1345 quote! { row.#rust_name }
1346 };
1347 let bind = format_ident!("__arr_{}", data.rust_name);
1348 let pushes: Vec<TokenStream> = (0..data.len)
1349 .map(|i| {
1350 let col_ident = format_ident!("{}_{}", data.base_name, i + 1);
1351 let idx = syn::Index::from(i);
1352 quote! { #col_ident.push(#bind[#idx]); }
1353 })
1354 .collect();
1355 quote! {
1356 let #bind = #access;
1357 #(#pushes)*
1358 }
1359 }
1360 ResolvedField::ExpandedVec(data) => {
1361 let access = if let Some(idx) = &data.tuple_index {
1362 quote! { row.#idx }
1363 } else {
1364 let rust_name = &data.rust_name;
1365 quote! { row.#rust_name }
1366 };
1367 let bind = format_ident!("__vec_{}", data.rust_name);
1368 let pushes: Vec<TokenStream> = (0..data.width)
1369 .map(|i| {
1370 let col_ident = format_ident!("{}_{}", data.base_name, i + 1);
1371 quote! { #col_ident.push(#bind.get(#i).cloned()); }
1372 })
1373 .collect();
1374 quote! {
1375 let #bind = #access;
1376 #(#pushes)*
1377 }
1378 }
1379 ResolvedField::AutoExpandVec(data) => {
1380 let access = if let Some(idx) = &data.tuple_index {
1381 quote! { row.#idx }
1382 } else {
1383 let rust_name = &data.rust_name;
1384 quote! { row.#rust_name }
1385 };
1386 let col_name = &data.col_name;
1387 quote! { #col_name.push(#access); }
1388 }
1389 ResolvedField::Struct(data) => {
1390 let access = if let Some(idx) = &data.tuple_index {
1391 quote! { row.#idx }
1392 } else {
1393 let rust_name = &data.rust_name;
1394 quote! { row.#rust_name }
1395 };
1396 let col_name = &data.col_name;
1397 quote! { #col_name.push(#access); }
1398 }
1399 })
1400 .collect();
1401
1402 let tag_struct_field = if has_tag {
1403 quote! { _tag, }
1404 } else {
1405 TokenStream::new()
1406 };
1407
1408 let len_struct_field = if flat_cols.is_empty()
1409 && auto_expand_cols.is_empty()
1410 && struct_cols.is_empty()
1411 && !has_tag
1412 {
1413 quote! { _len: len, }
1414 } else {
1415 TokenStream::new()
1416 };
1417
1418 let mut col_struct_fields: Vec<TokenStream> = flat_cols
1419 .iter()
1420 .map(|fc| {
1421 let name = &fc.df_field;
1422 quote! { #name }
1423 })
1424 .collect();
1425 for ac in &auto_expand_cols {
1426 let name = &ac.df_field;
1427 col_struct_fields.push(quote! { #name });
1428 }
1429 for sc in &struct_cols {
1430 let name = &sc.df_field;
1431 col_struct_fields.push(quote! { #name });
1432 }
1433
1434 // For skipped fields in destructure: bind to `_`
1435 let skip_bindings: Vec<TokenStream> = skipped_fields
1436 .iter()
1437 .map(|name| quote! { let _ = row.#name; })
1438 .collect();
1439
1440 let from_vec_impl = quote! {
1441 impl #impl_generics From<Vec<#row_name #ty_generics>> for #df_name #ty_generics #where_clause {
1442 fn from(rows: Vec<#row_name #ty_generics>) -> Self {
1443 let len = rows.len();
1444 #tag_init
1445 #(#col_vec_inits)*
1446 for row in rows {
1447 #tag_push
1448 #(#skip_bindings)*
1449 #(#col_pushes)*
1450 }
1451 #df_name {
1452 #tag_struct_field
1453 #len_struct_field
1454 #(#col_struct_fields),*
1455 }
1456 }
1457 }
1458 };
1459 // endregion
1460
1461 // region: Generate from_rows_par (parallel scatter-write via ColumnWriter)
1462 //
1463 // Two field kinds require special handling instead of parallel scatter-write:
1464 // - struct (DataFrameRow-flattened) fields (#485): companion stores
1465 // `Vec<Inner>` where `Inner` doesn't implement `Default`. These are
1466 // collected sequentially in a pre-pass (`for __prerow in &rows { ... }`)
1467 // before `into_par_iter()` consumes the vector. Requires `Inner: Clone`.
1468 // - `as_list`-on-struct fields (#485 opt-out) store `Vec<List>` in the
1469 // companion, and `List` doesn't implement `Default`. Same pre-pass approach.
1470 // Both are handled via sequential pre-pass + skip in the parallel loop.
1471 // The pre-pass is O(n) extra per struct/list-struct field but does not change
1472 // asymptotic complexity — just adds a constant factor for these column types.
1473 let from_rows_par_method = if !flat_cols.is_empty()
1474 || !auto_expand_cols.is_empty()
1475 || has_tag
1476 || has_struct
1477 || has_into_list_struct
1478 {
1479 // Column declarations:
1480 // - scalar / expand cols: vec![default; len] (scatter-write in parallel)
1481 // - struct / as_list-struct cols: Vec::with_capacity(len) filled in pre-pass
1482 let mut par_col_decls = Vec::new();
1483 if has_tag {
1484 par_col_decls.push(quote! {
1485 let mut _tag: Vec<String> = vec![String::new(); len];
1486 });
1487 }
1488 // Sequential pre-pass: struct fields (Inner: Clone required).
1489 // Iterate resolved to pick up tuple_index for tuple-struct outers.
1490 for rf in &resolved {
1491 if let ResolvedField::Struct(data) = rf {
1492 let col_name = &data.col_name;
1493 let ity = &data.inner_ty;
1494 let access = if let Some(idx) = &data.tuple_index {
1495 quote! { __prerow.#idx }
1496 } else {
1497 let rust_name = &data.rust_name;
1498 quote! { __prerow.#rust_name }
1499 };
1500 par_col_decls.push(quote! {
1501 let mut #col_name: Vec<#ity> = Vec::with_capacity(len);
1502 for __prerow in &rows {
1503 #col_name.push(::core::clone::Clone::clone(&#access));
1504 }
1505 });
1506 }
1507 }
1508 // Sequential pre-pass: as_list-on-struct fields (List: !Default).
1509 for rf in &resolved {
1510 if let ResolvedField::Single(data) = rf
1511 && data.needs_into_list
1512 {
1513 let col_name = &data.col_name;
1514 let rust_name = &data.rust_name;
1515 let access = if let Some(idx) = &data.tuple_index {
1516 quote! { __prerow.#idx }
1517 } else {
1518 quote! { __prerow.#rust_name }
1519 };
1520 par_col_decls.push(quote! {
1521 let mut #col_name: Vec<::miniextendr_api::list::List> = Vec::with_capacity(len);
1522 for __prerow in &rows {
1523 #col_name.push(::miniextendr_api::list::IntoList::into_list(
1524 ::core::clone::Clone::clone(&#access)
1525 ));
1526 }
1527 });
1528 }
1529 }
1530 // Parallel scalar/expand columns.
1531 for fc in &flat_cols {
1532 if fc.needs_into_list {
1533 // Handled in the sequential pre-pass above.
1534 continue;
1535 }
1536 let name = &fc.df_field;
1537 let ty = &fc.vec_elem_ty;
1538 par_col_decls.push(quote! {
1539 let mut #name: Vec<#ty> = vec![<#ty as ::core::default::Default>::default(); len];
1540 });
1541 }
1542 for ac in &auto_expand_cols {
1543 let name = &ac.df_field;
1544 let cty = &ac.container_ty;
1545 par_col_decls.push(quote! {
1546 let mut #name: Vec<#cty> = vec![<#cty as ::core::default::Default>::default(); len];
1547 });
1548 }
1549
1550 // Writer declarations (only for scatter-write cols — struct/as_list pre-pass
1551 // cols are already populated and need no ColumnWriter).
1552 let mut writer_decls = Vec::new();
1553 if has_tag {
1554 writer_decls.push(quote! {
1555 let __w_tag = unsafe {
1556 ::miniextendr_api::rayon_bridge::ColumnWriter::new(&mut _tag)
1557 };
1558 });
1559 }
1560 for fc in &flat_cols {
1561 if fc.needs_into_list {
1562 continue;
1563 }
1564 let name = &fc.df_field;
1565 let w_name = format_ident!("__w_{}", name);
1566 writer_decls.push(quote! {
1567 let #w_name = unsafe {
1568 ::miniextendr_api::rayon_bridge::ColumnWriter::new(&mut #name)
1569 };
1570 });
1571 }
1572 for ac in &auto_expand_cols {
1573 let name = &ac.df_field;
1574 let w_name = format_ident!("__w_{}", name);
1575 writer_decls.push(quote! {
1576 let #w_name = unsafe {
1577 ::miniextendr_api::rayon_bridge::ColumnWriter::new(&mut #name)
1578 };
1579 });
1580 }
1581
1582 // Write calls per resolved field (parallel scatter-write only).
1583 let tag_write = if has_tag {
1584 quote! { __w_tag.write(__i, #row_name_str.to_string()); }
1585 } else {
1586 TokenStream::new()
1587 };
1588
1589 let par_write_calls: Vec<TokenStream> = resolved
1590 .iter()
1591 .map(|rf| match rf {
1592 ResolvedField::Single(data) => {
1593 if data.needs_into_list {
1594 // Handled in the sequential pre-pass; skip in par loop.
1595 return TokenStream::new();
1596 }
1597 let access = if let Some(idx) = &data.tuple_index {
1598 quote! { __row.#idx }
1599 } else {
1600 let rust_name = &data.rust_name;
1601 quote! { __row.#rust_name }
1602 };
1603 let w_name = format_ident!("__w_{}", data.col_name);
1604 quote! { #w_name.write(__i, #access); }
1605 }
1606 ResolvedField::ExpandedFixed(data) => {
1607 let access = if let Some(idx) = &data.tuple_index {
1608 quote! { __row.#idx }
1609 } else {
1610 let rust_name = &data.rust_name;
1611 quote! { __row.#rust_name }
1612 };
1613 let bind = format_ident!("__arr_{}", data.rust_name);
1614 let writes: Vec<TokenStream> = (0..data.len)
1615 .map(|i| {
1616 let w_name = format_ident!("__w_{}_{}", data.base_name, i + 1);
1617 let idx = syn::Index::from(i);
1618 quote! { #w_name.write(__i, #bind[#idx]); }
1619 })
1620 .collect();
1621 quote! {
1622 let #bind = #access;
1623 #(#writes)*
1624 }
1625 }
1626 ResolvedField::ExpandedVec(data) => {
1627 let access = if let Some(idx) = &data.tuple_index {
1628 quote! { __row.#idx }
1629 } else {
1630 let rust_name = &data.rust_name;
1631 quote! { __row.#rust_name }
1632 };
1633 let bind = format_ident!("__vec_{}", data.rust_name);
1634 let writes: Vec<TokenStream> = (0..data.width)
1635 .map(|i| {
1636 let w_name = format_ident!("__w_{}_{}", data.base_name, i + 1);
1637 quote! { #w_name.write(__i, #bind.get(#i).cloned()); }
1638 })
1639 .collect();
1640 quote! {
1641 let #bind = #access;
1642 #(#writes)*
1643 }
1644 }
1645 ResolvedField::AutoExpandVec(data) => {
1646 let access = if let Some(idx) = &data.tuple_index {
1647 quote! { __row.#idx }
1648 } else {
1649 let rust_name = &data.rust_name;
1650 quote! { __row.#rust_name }
1651 };
1652 let w_name = format_ident!("__w_{}", data.col_name);
1653 quote! { #w_name.write(__i, #access); }
1654 }
1655 // Struct fields (#485) are collected in the sequential pre-pass
1656 // above; nothing to write in the parallel loop.
1657 ResolvedField::Struct(_) => TokenStream::new(),
1658 })
1659 .collect();
1660
1661 let par_skip_bindings: Vec<TokenStream> = skipped_fields
1662 .iter()
1663 .map(|name| quote! { let _ = __row.#name; })
1664 .collect();
1665
1666 // Return struct fields
1667 let par_tag_field = if has_tag {
1668 quote! { _tag, }
1669 } else {
1670 TokenStream::new()
1671 };
1672 // Emit `_len: len` only when the companion struct has a `_len` field —
1673 // that is, when there are truly no column vecs at all (no scalars, no
1674 // as_list-on-struct fields, no struct-flattened fields, no tag).
1675 // `as_list`-on-struct fields live in `flat_cols` with `needs_into_list=true`;
1676 // they provide their own length reference and do NOT require `_len`.
1677 // The `flat_cols.iter().all(…)` guard is redundant with `flat_cols.is_empty()`
1678 // but makes the intent explicit: _len is emitted only when every dimension
1679 // that tracks length is absent.
1680 let par_len_field = if flat_cols.is_empty()
1681 && flat_cols.iter().all(|fc| !fc.needs_into_list)
1682 && auto_expand_cols.is_empty()
1683 && !has_tag
1684 && struct_cols.is_empty()
1685 {
1686 quote! { _len: len, }
1687 } else {
1688 TokenStream::new()
1689 };
1690 let mut par_struct_fields: Vec<TokenStream> = flat_cols
1691 .iter()
1692 .map(|fc| {
1693 let name = &fc.df_field;
1694 quote! { #name }
1695 })
1696 .collect();
1697 for ac in &auto_expand_cols {
1698 let name = &ac.df_field;
1699 par_struct_fields.push(quote! { #name });
1700 }
1701 for sc in &struct_cols {
1702 let name = &sc.df_field;
1703 par_struct_fields.push(quote! { #name });
1704 }
1705
1706 // Only emit an into_par_iter call when there are scalar/expand/tag cols
1707 // to scatter-write; struct/as_list-only structs skip the parallel loop.
1708 let has_par_cols = !flat_cols.iter().all(|fc| fc.needs_into_list)
1709 || !auto_expand_cols.is_empty()
1710 || has_tag;
1711 let par_loop = if has_par_cols {
1712 quote! {
1713 {
1714 #(#writer_decls)*
1715 rows.into_par_iter().enumerate().for_each(|(__i, __row)| unsafe {
1716 #tag_write
1717 #(#par_write_calls)*
1718 #(#par_skip_bindings)*
1719 });
1720 }
1721 }
1722 } else {
1723 // All columns were collected in the pre-pass; rows already consumed.
1724 quote! { let _rows = rows; }
1725 };
1726
1727 // Build `where Inner: Clone` bounds for all struct-flattened fields.
1728 // Emitting these on the method (rather than in a `const _` assertion block)
1729 // points the compiler error at the `from_rows_par` call site, not at the
1730 // expanded macro internals — cleaner diagnostic for downstream users.
1731 let par_inner_clone_bounds: Vec<TokenStream> = struct_cols
1732 .iter()
1733 .map(|sc| {
1734 let inner_ty = &sc.inner_ty;
1735 quote! { #inner_ty: ::core::clone::Clone, }
1736 })
1737 .collect();
1738 let par_where_clause = if par_inner_clone_bounds.is_empty() {
1739 TokenStream::new()
1740 } else {
1741 quote! { where #(#par_inner_clone_bounds)* }
1742 };
1743
1744 quote! {
1745 /// Parallel row→column transposition using rayon scatter-write.
1746 ///
1747 /// Scalar/expand columns are scatter-written in parallel via rayon.
1748 /// Struct-flattened and `as_list`-on-struct fields are collected
1749 /// sequentially in a pre-pass before the parallel loop (these field
1750 /// types don't implement `Default`, so scatter-write is not possible).
1751 /// Inner struct types must implement `Clone` (enforced by the where
1752 /// clause; the error will point at the `from_rows_par` call site).
1753 ///
1754 /// Always uses rayon — no threshold check. Use `from_rows` for the
1755 /// sequential path.
1756 #[cfg(feature = "rayon")]
1757 #[allow(clippy::uninit_vec)]
1758 pub fn from_rows_par(rows: Vec<#row_name #ty_generics>) -> Self
1759 #par_where_clause
1760 {
1761 use ::miniextendr_api::rayon_bridge::rayon::prelude::*;
1762 let len = rows.len();
1763 #(#par_col_decls)*
1764 #par_loop
1765 #df_name { #par_tag_field #par_len_field #(#par_struct_fields),* }
1766 }
1767 }
1768 } else {
1769 TokenStream::new()
1770 };
1771
1772 // ── IntoIterator (only for named non-empty structs without expansion) ─
1773 let can_iterate = !flat_cols.is_empty()
1774 && !is_tuple_struct
1775 && !is_unit_struct
1776 && !has_expansion
1777 && !has_into_list_struct;
1778 let into_iterator_impl = if can_iterate {
1779 let iterator_name = format_ident!("{}Iterator", df_name);
1780
1781 let iter_field_decls: Vec<_> = flat_cols
1782 .iter()
1783 .map(|fc| {
1784 let name = &fc.df_field;
1785 let ty = &fc.vec_elem_ty;
1786 quote! { #name: std::vec::IntoIter<#ty> }
1787 })
1788 .collect();
1789
1790 let destruct_pattern: Vec<_> = flat_cols
1791 .iter()
1792 .map(|fc| {
1793 let name = &fc.df_field;
1794 quote! { #name }
1795 })
1796 .collect();
1797
1798 let mut iter_init_tokens = TokenStream::new();
1799 for (i, fc) in flat_cols.iter().enumerate() {
1800 let name = &fc.df_field;
1801 let ty = &fc.vec_elem_ty;
1802 if i > 0 {
1803 iter_init_tokens.extend(quote! { , });
1804 }
1805 iter_init_tokens.extend(quote! { #name: <Vec<#ty>>::into_iter(#name) });
1806 }
1807
1808 // For next(): reconstruct original field names (col_name == rust_name for Single)
1809 let mut next_struct_tokens = TokenStream::new();
1810 for (i, rf) in resolved.iter().enumerate() {
1811 if let ResolvedField::Single(data) = rf {
1812 if i > 0 {
1813 next_struct_tokens.extend(quote! { , });
1814 }
1815 let rust_name = &data.rust_name;
1816 let col_name = &data.col_name;
1817 next_struct_tokens.extend(quote! { #rust_name: self.#col_name.next()? });
1818 }
1819 }
1820
1821 let ignore_tag = if has_tag {
1822 quote! { _tag: _, }
1823 } else {
1824 TokenStream::new()
1825 };
1826
1827 // Skipped fields are reconstructed via `Default::default()` each time
1828 // `next()` yields a row. This is why any field type annotated with
1829 // `#[dataframe(skip)]` must implement `Default`.
1830 let skip_defaults: Vec<TokenStream> = skipped_fields
1831 .iter()
1832 .map(|name| quote! { , #name: Default::default() })
1833 .collect();
1834
1835 quote! {
1836 pub struct #iterator_name #impl_generics #where_clause {
1837 #(#iter_field_decls),*
1838 }
1839
1840 impl #impl_generics IntoIterator for #df_name #ty_generics #where_clause {
1841 type Item = #row_name #ty_generics;
1842 type IntoIter = #iterator_name #ty_generics;
1843
1844 fn into_iter(self) -> Self::IntoIter {
1845 let #df_name { #ignore_tag #(#destruct_pattern),* } = self;
1846 #iterator_name {
1847 #iter_init_tokens
1848 }
1849 }
1850 }
1851
1852 impl #impl_generics Iterator for #iterator_name #ty_generics #where_clause {
1853 type Item = #row_name #ty_generics;
1854
1855 fn next(&mut self) -> Option<Self::Item> {
1856 Some(#row_name {
1857 #next_struct_tokens
1858 #(#skip_defaults)*
1859 })
1860 }
1861 }
1862 }
1863 } else {
1864 TokenStream::new()
1865 };
1866 // endregion
1867
1868 // region: Associated methods
1869 let from_dataframe_method = if can_iterate {
1870 quote! {
1871 /// Convert a DataFrame back into a vector of rows.
1872 ///
1873 /// This transposes column-oriented data back into row-oriented format.
1874 pub fn from_dataframe(df: #df_name #ty_generics) -> Vec<Self> {
1875 df.into_iter().collect()
1876 }
1877 }
1878 } else {
1879 TokenStream::new()
1880 };
1881 // endregion
1882
1883 // region: DataFrame type methods (from_rows, from_rows_par)
1884 let df_methods = quote! {
1885 impl #impl_generics #df_name #ty_generics #where_clause {
1886 /// Sequential row→column transposition.
1887 pub fn from_rows(rows: Vec<#row_name #ty_generics>) -> Self {
1888 rows.into()
1889 }
1890
1891 #from_rows_par_method
1892 }
1893 };
1894
1895 let row_methods = quote! {
1896 impl #impl_generics #row_name #ty_generics #where_clause {
1897 /// Name of the generated DataFrame companion type.
1898 pub const DATAFRAME_TYPE_NAME: &'static str = stringify!(#df_name);
1899
1900 /// Convert a vector of rows into the companion DataFrame type.
1901 ///
1902 /// This transposes row-oriented data into column-oriented format.
1903 pub fn to_dataframe(rows: Vec<Self>) -> #df_name #ty_generics {
1904 rows.into()
1905 }
1906
1907 #from_dataframe_method
1908 }
1909 };
1910
1911 // Compile-time assertion: row type must implement IntoList
1912 // Skip for unit/empty structs, tuple structs, structs with expansion,
1913 // and structs that store `List`-converted struct fields (#485 as_list).
1914 let trait_check = if !flat_cols.is_empty()
1915 && !is_tuple_struct
1916 && !is_unit_struct
1917 && !has_expansion
1918 && !has_into_list_struct
1919 {
1920 quote! {
1921 const _: () = {
1922 fn _assert_into_list #impl_generics () #where_clause {
1923 fn _check<T: ::miniextendr_api::list::IntoList>() {}
1924 _check::<#row_name #ty_generics>();
1925 }
1926 };
1927 }
1928 } else {
1929 TokenStream::new()
1930 };
1931
1932 // Marker trait impl: struct type implements DataFrameRow via IntoDataFrame chain.
1933 let marker_impl = quote! {
1934 impl #impl_generics ::miniextendr_api::markers::DataFrameRow
1935 for #row_name #ty_generics #where_clause {}
1936 };
1937
1938 // DataFramePayloadFields impl: exposes FIELDS (all resolved column names) and TAG
1939 // (the #[dataframe(tag = "...")] value, or "") for compile-time collision detection
1940 // by outer DataFrameRow enums that nest this type as a struct-flattened field.
1941 let payload_fields_impl = {
1942 // Collect all column names: flat_cols + struct_col base names.
1943 let mut field_names: Vec<String> =
1944 flat_cols.iter().map(|fc| fc.col_name_str.clone()).collect();
1945 for sc in &struct_cols {
1946 field_names.push(sc.col_name_str.clone());
1947 }
1948 let tag_str = attrs.tag.as_deref().unwrap_or("");
1949 quote! {
1950 impl #impl_generics ::miniextendr_api::markers::DataFramePayloadFields
1951 for #row_name #ty_generics #where_clause
1952 {
1953 const FIELDS: &'static [&'static str] = &[#(#field_names),*];
1954 const TAG: &'static str = #tag_str;
1955 }
1956 }
1957 };
1958
1959 // Compile-time assertions for struct-flattened fields (#485): each inner
1960 // type must implement `DataFrameRow`, otherwise users get a confusing
1961 // error pointing at the `to_dataframe` call site instead of the field.
1962 // Note: `Clone` is no longer asserted here — it is enforced via a where
1963 // clause on `from_rows_par` itself, giving a clearer error at the call site.
1964 let struct_assertions: Vec<TokenStream> = struct_cols
1965 .iter()
1966 .map(|sc| {
1967 let inner_ty = &sc.inner_ty;
1968 quote! {
1969 const _: () = {
1970 fn _assert_inner_is_dataframe_row<T: ::miniextendr_api::markers::DataFrameRow>() {}
1971 fn _do_assert #impl_generics () #where_clause {
1972 _assert_inner_is_dataframe_row::<#inner_ty>();
1973 }
1974 };
1975 }
1976 })
1977 .collect();
1978
1979 Ok(quote! {
1980 #dataframe_struct
1981 #into_dataframe_impl
1982 #from_vec_impl
1983 #df_methods
1984 #into_iterator_impl
1985 #row_methods
1986 #trait_check
1987 #marker_impl
1988 #payload_fields_impl
1989 #(#struct_assertions)*
1990 })
1991 // endregion
1992}
1993// endregion
1994
1995// region: Enum align path
1996
1997/// A resolved column in the unified schema across all enum variants.
1998///
1999/// Tracks the column name, element type, which variants contribute to this column,
2000/// and whether the type was coerced to `String` due to cross-variant type conflicts
2001/// (when `#[dataframe(conflicts = "string")]` is active).
2002pub(super) struct ResolvedColumn {
2003 /// Column name in the companion struct / data frame.
2004 pub(super) col_name: syn::Ident,
2005 /// Element type (used as `Vec<Option<#ty>>`).
2006 /// When `string_coerced` is true, this is always `String`.
2007 pub(super) ty: syn::Type,
2008 /// Indices of variants that contain this field.
2009 pub(super) present_in: Vec<usize>,
2010 /// Whether this column was coerced to `String` due to type conflicts.
2011 /// When true, values are converted via `ToString::to_string()` at push time.
2012 pub(super) string_coerced: bool,
2013 /// Whether this column should be emitted as an R factor (via `as_factor` attribute).
2014 /// When `true`, `into_data_frame` wraps the `Vec<Option<T>>` in `FactorOptionVec<T>`
2015 /// before calling `IntoR::into_sexp`, using the `UnitEnumFactor` blanket impl.
2016 pub(super) is_factor: bool,
2017}
2018
2019/// Accumulates unique columns for an enum-to-dataframe unified schema.
2020///
2021/// As columns are registered from each variant's fields, the registry detects
2022/// duplicates and validates type consistency. When `coerce_to_string` is enabled,
2023/// type conflicts are resolved by coercing to `String`; otherwise they produce errors.
2024pub(super) struct ColumnRegistry<'a> {
2025 /// The ordered list of resolved columns in the schema.
2026 pub(super) columns: Vec<ResolvedColumn>,
2027 /// Maps column name strings to their index in `columns` for O(1) dedup lookup.
2028 pub(super) col_index: std::collections::HashMap<String, usize>,
2029 /// Whether to coerce type-conflicting columns to `String` instead of erroring.
2030 pub(super) coerce_to_string: bool,
2031 /// Cached `String` type AST node, used as the coercion target type.
2032 pub(super) string_ty: &'a syn::Type,
2033}
2034
2035impl<'a> ColumnRegistry<'a> {
2036 /// Create a new empty column registry.
2037 fn new(coerce_to_string: bool, string_ty: &'a syn::Type) -> Self {
2038 Self {
2039 columns: Vec::new(),
2040 col_index: std::collections::HashMap::new(),
2041 coerce_to_string,
2042 string_ty,
2043 }
2044 }
2045
2046 /// Register a single column in the schema, or merge with an existing column.
2047 ///
2048 /// If a column with the same name already exists, validates that the types match.
2049 /// On type conflict: coerces to `String` (if `coerce_to_string` is true) or
2050 /// returns `Err`. The `variant_idx` is appended to the column's `present_in` list.
2051 fn register(
2052 &mut self,
2053 col_name: &str,
2054 col_ty: &syn::Type,
2055 variant_idx: usize,
2056 variant_name: &syn::Ident,
2057 error_span: Span,
2058 ) -> syn::Result<()> {
2059 if let Some(&idx) = self.col_index.get(col_name) {
2060 let existing = &self.columns[idx];
2061 if !existing.string_coerced && existing.ty != *col_ty {
2062 if self.coerce_to_string {
2063 self.columns[idx].ty = self.string_ty.clone();
2064 self.columns[idx].string_coerced = true;
2065 } else {
2066 return Err(syn::Error::new(
2067 error_span,
2068 format!(
2069 "type conflict for field `{}`: variant `{}` has a different type \
2070 than a previous variant; \
2071 use `#[dataframe(conflicts = \"string\")]` to coerce all conflicting fields to String",
2072 col_name, variant_name
2073 ),
2074 ));
2075 }
2076 }
2077 self.columns[idx].present_in.push(variant_idx);
2078 } else {
2079 let idx = self.columns.len();
2080 self.columns.push(ResolvedColumn {
2081 col_name: format_ident!("{}", col_name),
2082 ty: col_ty.clone(),
2083 present_in: vec![variant_idx],
2084 string_coerced: false,
2085 is_factor: false,
2086 });
2087 self.col_index.insert(col_name.to_string(), idx);
2088 }
2089 Ok(())
2090 }
2091
2092 /// Like `register`, but marks the column as a factor column (`is_factor = true`).
2093 ///
2094 /// Used for fields annotated with `#[dataframe(as_factor)]`. The companion struct
2095 /// field type stays `Vec<Option<T>>`, but `into_data_frame` wraps it in
2096 /// `FactorOptionVec<T>` (using the `UnitEnumFactor` blanket `IntoR` impl).
2097 pub(super) fn register_factor(
2098 &mut self,
2099 col_name: &str,
2100 col_ty: &syn::Type,
2101 variant_idx: usize,
2102 variant_name: &syn::Ident,
2103 error_span: Span,
2104 ) -> syn::Result<()> {
2105 self.register(col_name, col_ty, variant_idx, variant_name, error_span)?;
2106 if let Some(&idx) = self.col_index.get(col_name) {
2107 self.columns[idx].is_factor = true;
2108 }
2109 Ok(())
2110 }
2111}
2112
2113/// Describes the shape of an enum variant's fields.
2114#[derive(Clone, Copy, PartialEq, Eq)]
2115pub(super) enum VariantShape {
2116 /// `Variant { field: Type, ... }`
2117 Named,
2118 /// `Variant(Type, ...)`
2119 Tuple,
2120 /// `Variant` (no fields)
2121 Unit,
2122}
2123
2124/// A resolved enum field ready for codegen -- either a single column or expanded
2125/// from an array/Vec into multiple suffixed columns.
2126///
2127/// This is the enum-path counterpart of [`ResolvedField`] (used for structs).
2128/// Each variant carries both the binding name (for destructure patterns) and the
2129/// original Rust field name (for error reporting and named-variant patterns).
2130pub(super) enum EnumResolvedField {
2131 /// Single column contribution.
2132 Single(Box<EnumSingleFieldData>),
2133 /// Expanded from [T; N].
2134 ExpandedFixed(Box<EnumExpandedFixedData>),
2135 /// Expanded from Vec<T> with pinned width.
2136 ExpandedVec(Box<EnumExpandedVecData>),
2137 /// Auto-expanded Vec<T>/Box<[T]>: column count determined at runtime.
2138 AutoExpandVec(Box<EnumAutoExpandVecData>),
2139 /// `HashMap<K,V>` or `BTreeMap<K,V>` → two parallel list-columns: `<field>_keys`, `<field>_values`.
2140 Map(Box<EnumMapFieldData>),
2141 /// Struct field whose inner type implements `DataFrameRow` → flattened `<base>_<inner_col>` columns.
2142 Struct(Box<EnumStructFieldData>),
2143}
2144
2145impl EnumResolvedField {
2146 /// Binding name used in destructure patterns.
2147 pub(super) fn binding(&self) -> &syn::Ident {
2148 match self {
2149 Self::Single(data) => &data.binding,
2150 Self::ExpandedFixed(data) => &data.binding,
2151 Self::ExpandedVec(data) => &data.binding,
2152 Self::AutoExpandVec(data) => &data.binding,
2153 Self::Map(data) => &data.binding,
2154 Self::Struct(data) => &data.binding,
2155 }
2156 }
2157
2158 /// Original Rust field name.
2159 pub(super) fn rust_name(&self) -> &syn::Ident {
2160 match self {
2161 Self::Single(data) => &data.rust_name,
2162 Self::ExpandedFixed(data) => &data.rust_name,
2163 Self::ExpandedVec(data) => &data.rust_name,
2164 Self::AutoExpandVec(data) => &data.rust_name,
2165 Self::Map(data) => &data.rust_name,
2166 Self::Struct(data) => &data.rust_name,
2167 }
2168 }
2169}
2170
2171/// Data for [`EnumResolvedField::Single`].
2172pub(super) struct EnumSingleFieldData {
2173 /// Column name in the schema.
2174 pub(super) col_name: syn::Ident,
2175 /// Binding name used in destructure pattern.
2176 pub(super) binding: syn::Ident,
2177 /// Original Rust field name (for named variants).
2178 pub(super) rust_name: syn::Ident,
2179 /// Column type stored in the companion Vec.
2180 ///
2181 /// For most fields this is the raw Rust type. When `needs_into_list` is
2182 /// `true` (struct-typed fields with `#[dataframe(as_list)]`), this is
2183 /// `::miniextendr_api::list::List` — the actual inner type is erased at
2184 /// the storage level and each row value is converted via `.into_list()`.
2185 pub(super) ty: syn::Type,
2186 /// Whether the field's value must be converted via `.into_list()` before
2187 /// being pushed into the companion `Vec<Option<List>>`.
2188 ///
2189 /// Set to `true` only for struct-typed fields (`FieldTypeKind::Struct`)
2190 /// that carry `#[dataframe(as_list)]`. The companion struct field type is
2191 /// `Vec<Option<::miniextendr_api::list::List>>` in this case.
2192 pub(super) needs_into_list: bool,
2193 /// Whether the field should be emitted as an R factor column.
2194 ///
2195 /// Set to `true` for fields annotated with `#[dataframe(as_factor)]`.
2196 /// The companion struct field type is `Vec<Option<T>>` (unchanged), but
2197 /// `into_data_frame` wraps it in `FactorOptionVec<T>` to use the
2198 /// `UnitEnumFactor`-based blanket `IntoR` impl.
2199 pub(super) is_factor: bool,
2200}
2201
2202/// Data for [`EnumResolvedField::ExpandedFixed`].
2203pub(super) struct EnumExpandedFixedData {
2204 /// Base column name.
2205 pub(super) base_name: String,
2206 /// Binding name.
2207 pub(super) binding: syn::Ident,
2208 /// Original Rust field name.
2209 pub(super) rust_name: syn::Ident,
2210 /// Element type.
2211 pub(super) elem_ty: syn::Type,
2212 /// Array length.
2213 pub(super) len: usize,
2214}
2215
2216/// Data for [`EnumResolvedField::ExpandedVec`].
2217pub(super) struct EnumExpandedVecData {
2218 /// Base column name.
2219 pub(super) base_name: String,
2220 /// Binding name.
2221 pub(super) binding: syn::Ident,
2222 /// Original Rust field name.
2223 pub(super) rust_name: syn::Ident,
2224 /// Element type.
2225 pub(super) elem_ty: syn::Type,
2226 /// Pinned width.
2227 pub(super) width: usize,
2228}
2229
2230/// Data for [`EnumResolvedField::AutoExpandVec`].
2231pub(super) struct EnumAutoExpandVecData {
2232 /// Base column name.
2233 pub(super) base_name: String,
2234 /// Binding name.
2235 pub(super) binding: syn::Ident,
2236 /// Original Rust field name.
2237 pub(super) rust_name: syn::Ident,
2238 /// Element type.
2239 pub(super) elem_ty: syn::Type,
2240 /// Container type for companion struct (Vec<T> or Box<[T]>).
2241 pub(super) container_ty: syn::Type,
2242}
2243
2244/// Data for [`EnumResolvedField::Map`].
2245///
2246/// A `HashMap<K,V>` or `BTreeMap<K,V>` field expands to two parallel list-columns:
2247/// `<base_name>_keys: Vec<Option<Vec<K>>>` and `<base_name>_values: Vec<Option<Vec<V>>>`.
2248/// Absent-variant rows get `None` in both columns. Key order follows the map's own
2249/// iteration order: `BTreeMap` yields sorted keys, `HashMap` yields non-deterministic order.
2250/// Both are produced via `into_iter().unzip()` which guarantees pairwise alignment.
2251pub(super) struct EnumMapFieldData {
2252 /// Base column name (field name or `rename` override).
2253 pub(super) base_name: String,
2254 /// Binding name used in destructure pattern.
2255 pub(super) binding: syn::Ident,
2256 /// Original Rust field name.
2257 pub(super) rust_name: syn::Ident,
2258 /// Key type K.
2259 pub(super) key_ty: syn::Type,
2260 /// Value type V.
2261 pub(super) val_ty: syn::Type,
2262}
2263
2264/// Data for [`EnumResolvedField::Struct`].
2265///
2266/// A field whose inner type implements `DataFrameRow` expands to `<base_name>_<inner_col>`
2267/// prefixed columns — one output column per column emitted by the inner type's companion
2268/// DataFrame. Absent-variant rows produce `None` in every prefixed column.
2269///
2270/// The companion struct holds `Vec<Option<Inner>>` (not `Vec<Inner>`). The `into_data_frame`
2271/// method collects present rows into a dense `Vec<Inner>` (tracking presence indices),
2272/// calls `Inner::to_dataframe(present_rows)`, extracts named column SEXPs, and scatters
2273/// them back to the full row count with `None`-fill for absent rows.
2274pub(super) struct EnumStructFieldData {
2275 /// Base name for column prefixing (field name or `rename` override).
2276 pub(super) base_name: String,
2277 /// Binding name used in destructure pattern.
2278 pub(super) binding: syn::Ident,
2279 /// Original Rust field name.
2280 pub(super) rust_name: syn::Ident,
2281 /// Inner struct type (used for the compile-time DataFrameRow assertion and codegen).
2282 pub(super) inner_ty: syn::Type,
2283}
2284
2285/// Parsed and resolved information about a single enum variant for DataFrame codegen.
2286///
2287/// Contains the variant's name, shape (named/tuple/unit), resolved fields (after
2288/// applying `#[dataframe(...)]` attributes and type classification), and any
2289/// skipped field names (needed for complete destructure patterns in named variants).
2290pub(super) struct VariantInfo {
2291 /// Variant name.
2292 pub(super) name: syn::Ident,
2293 /// Shape of this variant.
2294 pub(super) shape: VariantShape,
2295 /// Resolved fields (after applying field attrs + type classification).
2296 pub(super) fields: Vec<EnumResolvedField>,
2297 /// Original Rust field names (for named variants) — needed for skipped fields in destructure.
2298 pub(super) skipped_fields: Vec<syn::Ident>,
2299}
2300// endregion
2301
2302// region: Enum-specific expansion (in sub-module)
2303
2304mod enum_expansion;
2305use enum_expansion::derive_enum_dataframe;
2306// endregion