miniextendr_macros/dataframe_derive/enum_expansion.rs
1//! Enum-specific DataFrame derive expansion.
2//!
3//! Generates a companion struct where every column is `Vec<Option<T>>`, with
4//! `None` fill for fields absent in a given variant.
5
6use proc_macro2::TokenStream;
7use quote::{ToTokens, format_ident, quote};
8use syn::{DeriveInput, Fields};
9
10use super::{
11 ColumnRegistry, DataFrameAttrs, EnumAutoExpandVecData, EnumExpandedFixedData,
12 EnumExpandedVecData, EnumMapFieldData, EnumResolvedField, EnumSingleFieldData,
13 EnumStructFieldData, FieldTypeKind, VariantInfo, VariantShape, classify_field_type,
14 parse_field_attrs,
15};
16use crate::naming;
17use std::collections::HashMap;
18
19/// Derive `DataFrameRow` for an enum with `#[dataframe(align)]`.
20///
21/// Generates a companion struct where every column is `Vec<Option<T>>`, with
22/// `None` fill for fields absent in a given variant. This is the enum counterpart
23/// of [`super::derive_struct_dataframe`].
24///
25/// # Generated items
26///
27/// - Companion struct `{Name}DataFrame` with `Vec<Option<T>>` columns (field-name union)
28/// - Optional `_tag: Vec<String>` column for variant discrimination
29/// - `impl IntoDataFrame` (converts companion struct to R data.frame)
30/// - `impl From<Vec<Enum>>` (sequential row->column transposition)
31/// - `from_rows()` / `from_rows_par()` methods on the companion struct
32/// - `to_dataframe()` / `DATAFRAME_TYPE_NAME` associated items on the enum
33///
34/// # Variant support
35///
36/// - Named variants (`{ field: T }`): fields contribute by name to the unified schema
37/// - Tuple variants (`(T, U)`): fields are named `_0`, `_1`, etc.
38/// - Unit variants: contribute no columns (only tag if present)
39///
40/// # Auto-expand fields
41///
42/// Fields with `#[dataframe(expand)]` on `Vec<T>` types get dynamic column counts
43/// determined at runtime from the maximum row length across all rows. These are
44/// tracked separately from the static [`ColumnRegistry`](super::ColumnRegistry).
45///
46/// Returns `Err` if the enum has no variants or if type conflicts arise without
47/// `#[dataframe(conflicts = "string")]`.
48pub(super) fn derive_enum_dataframe(
49 row_name: &syn::Ident,
50 input: &DeriveInput,
51 data: &syn::DataEnum,
52 df_name: &syn::Ident,
53 attrs: &DataFrameAttrs,
54) -> syn::Result<TokenStream> {
55 let (impl_generics, ty_generics, where_clause) = input.generics.split_for_impl();
56
57 // region: Validate variants
58 if data.variants.is_empty() {
59 return Err(syn::Error::new_spanned(
60 row_name,
61 "DataFrameRow requires at least one variant",
62 ));
63 }
64
65 let mut variant_infos: Vec<VariantInfo> = Vec::new();
66
67 for variant in &data.variants {
68 match &variant.fields {
69 Fields::Named(fields) => {
70 let mut resolved = Vec::new();
71 let mut skipped = Vec::new();
72 for f in &fields.named {
73 let fa = parse_field_attrs(f)?;
74 let rust_name = f.ident.as_ref().unwrap().clone();
75 if fa.skip {
76 skipped.push(rust_name);
77 continue;
78 }
79 let col_name_str = fa.rename.unwrap_or_else(|| rust_name.to_string());
80 let binding = format_ident!("__v_{}", rust_name);
81
82 if fa.as_list {
83 // Struct-typed fields with `as_list` must be converted via `into_list()`
84 // at `into_data_frame` time. We keep the original Rust type in the
85 // companion struct (so no R API is called during row accumulation) and
86 // flag `needs_into_list = true` to trigger per-element conversion in the
87 // dynamic `into_data_frame` path.
88 //
89 // Use `.as_ref().ok()` to suppress classification errors: `as_list` is
90 // an explicit opt-in, so wrapper types (Option<T>, Arc<T>, …) are
91 // allowed — they become opaque list-columns.
92 let needs_into_list = matches!(
93 classify_field_type(&f.ty).as_ref().ok(),
94 Some(FieldTypeKind::Struct { .. })
95 );
96 resolved.push(EnumResolvedField::Single(Box::new(EnumSingleFieldData {
97 col_name: format_ident!("{}", col_name_str),
98 binding: binding.clone(),
99 rust_name: rust_name.clone(),
100 ty: f.ty.clone(),
101 needs_into_list,
102 is_factor: false,
103 })));
104 } else if fa.as_factor {
105 // `as_factor` is only valid on bare-ident enum types (Struct kind).
106 // The inner enum must be unit-only and derive DataFrameRow, which
107 // auto-emits UnitEnumFactor so FactorOptionVec<T> implements IntoR.
108 match classify_field_type(&f.ty)? {
109 FieldTypeKind::Struct { .. } => {
110 resolved.push(EnumResolvedField::Single(Box::new(
111 EnumSingleFieldData {
112 col_name: format_ident!("{}", col_name_str),
113 binding: binding.clone(),
114 rust_name: rust_name.clone(),
115 ty: f.ty.clone(),
116 needs_into_list: false,
117 is_factor: true,
118 },
119 )));
120 }
121 _ => {
122 return Err(syn::Error::new_spanned(
123 &f.ty,
124 "`as_factor` is only valid on bare-ident enum/struct types; \
125 use `as_list` for generic or complex types, or remove \
126 `as_factor` for scalar fields",
127 ));
128 }
129 }
130 } else {
131 match classify_field_type(&f.ty)? {
132 FieldTypeKind::FixedArray(elem_ty, len) => {
133 resolved.push(EnumResolvedField::ExpandedFixed(Box::new(
134 EnumExpandedFixedData {
135 base_name: col_name_str,
136 binding: binding.clone(),
137 rust_name: rust_name.clone(),
138 elem_ty: elem_ty.clone(),
139 len,
140 },
141 )));
142 }
143 FieldTypeKind::VariableVec(elem_ty)
144 | FieldTypeKind::BoxedSlice(elem_ty)
145 | FieldTypeKind::BorrowedSlice(elem_ty) => {
146 if let Some(width) = fa.width {
147 resolved.push(EnumResolvedField::ExpandedVec(Box::new(
148 EnumExpandedVecData {
149 base_name: col_name_str,
150 binding: binding.clone(),
151 rust_name: rust_name.clone(),
152 elem_ty: elem_ty.clone(),
153 width,
154 },
155 )));
156 } else if fa.expand {
157 resolved.push(EnumResolvedField::AutoExpandVec(Box::new(
158 EnumAutoExpandVecData {
159 base_name: col_name_str,
160 binding: binding.clone(),
161 rust_name: rust_name.clone(),
162 elem_ty: elem_ty.clone(),
163 container_ty: f.ty.clone(),
164 },
165 )));
166 } else {
167 resolved.push(EnumResolvedField::Single(Box::new(
168 EnumSingleFieldData {
169 col_name: format_ident!("{}", col_name_str),
170 binding: binding.clone(),
171 rust_name: rust_name.clone(),
172 ty: f.ty.clone(),
173 needs_into_list: false,
174 is_factor: false,
175 },
176 )));
177 }
178 }
179 FieldTypeKind::Map { key_ty, val_ty } => {
180 if fa.width.is_some() {
181 return Err(syn::Error::new_spanned(
182 &f.ty,
183 "`width` is not valid on HashMap/BTreeMap fields",
184 ));
185 }
186 if fa.expand {
187 return Err(syn::Error::new_spanned(
188 &f.ty,
189 "`expand`/`unnest` is not valid on HashMap/BTreeMap fields",
190 ));
191 }
192 resolved.push(EnumResolvedField::Map(Box::new(EnumMapFieldData {
193 base_name: col_name_str,
194 binding: binding.clone(),
195 rust_name: rust_name.clone(),
196 key_ty: key_ty.clone(),
197 val_ty: val_ty.clone(),
198 })));
199 }
200 FieldTypeKind::Struct { inner_ty } => {
201 if fa.width.is_some() {
202 return Err(syn::Error::new_spanned(
203 &f.ty,
204 "`width` is not valid on struct fields; use \
205 `#[dataframe(as_list)]` to keep as an opaque list-column",
206 ));
207 }
208 if fa.expand {
209 return Err(syn::Error::new_spanned(
210 &f.ty,
211 "`expand`/`unnest` is not valid on struct fields; struct \
212 fields flatten by default via their `DataFrameRow` impl",
213 ));
214 }
215 resolved.push(EnumResolvedField::Struct(Box::new(
216 EnumStructFieldData {
217 base_name: col_name_str,
218 binding: binding.clone(),
219 rust_name: rust_name.clone(),
220 inner_ty: inner_ty.clone(),
221 },
222 )));
223 }
224 FieldTypeKind::Scalar => {
225 resolved.push(EnumResolvedField::Single(Box::new(
226 EnumSingleFieldData {
227 col_name: format_ident!("{}", col_name_str),
228 binding: binding.clone(),
229 rust_name: rust_name.clone(),
230 ty: f.ty.clone(),
231 needs_into_list: false,
232 is_factor: false,
233 },
234 )));
235 }
236 }
237 }
238 }
239 // B1: Check for `<base>_<inner_tag>` discriminant column collision.
240 //
241 // When a Struct field `kind: Inner` is flattened, the inner enum's
242 // discriminant column (tag) is emitted under `<base>_<inner_tag>`.
243 // The inner tag is retrieved at runtime from
244 // `<Inner as DataFramePayloadFields>::TAG`; the B1 check here uses the
245 // hardcoded default `"variant"` for compile-time sibling detection because
246 // we cannot inspect inner enum attributes from the outer macro parse phase.
247 // The per-inner-field payload collision is caught separately via the
248 // `const _:` assertions emitted below (using `DataFramePayloadFields`).
249 //
250 // We detect the following cases at compile time (both using "variant"):
251 // 1. Struct field `kind: Inner` + Single/Scalar sibling named `kind_variant`
252 // 2. Struct field `kind: Inner` + another Struct sibling field renamed to
253 // produce `kind_variant`
254 //
255 // Inner-enum-internal collision (Inner has both `tag = "X"` AND payload
256 // field `X`) is caught by the `assert_no_payload_field_collision` const
257 // assertion emitted below — no carve-out needed.
258 {
259 // Collect every flat column name produced by non-Struct resolved fields.
260 let flat_col_names: Vec<String> = resolved
261 .iter()
262 .filter_map(|r| match r {
263 EnumResolvedField::Single(d) => Some(d.col_name.to_string()),
264 EnumResolvedField::Map(d) => {
265 // Map fields produce <base>_keys and <base>_values.
266 // Neither collides with <struct>_variant unless someone
267 // explicitly renamed to match — covered by the Struct check
268 // via base_name.
269 let _ = d;
270 None
271 }
272 _ => None,
273 })
274 .collect();
275
276 for r in &resolved {
277 if let EnumResolvedField::Struct(struct_data) = r {
278 // Use hardcoded "variant" for the sibling check — this is the
279 // default inner tag. The inner-payload collision for non-default
280 // tags is caught by assert_no_payload_field_collision below.
281 let discriminant_col = format!("{}_variant", struct_data.base_name);
282 if flat_col_names.contains(&discriminant_col) {
283 // Find the colliding field for a better span.
284 let colliding_span = resolved
285 .iter()
286 .find_map(|r2| match r2 {
287 EnumResolvedField::Single(d)
288 if d.col_name == discriminant_col.as_str() =>
289 {
290 Some(d.col_name.span())
291 }
292 _ => None,
293 })
294 .unwrap_or_else(proc_macro2::Span::call_site);
295 return Err(syn::Error::new(
296 colliding_span,
297 format!(
298 "column name collision: the flatten field `{base}` \
299 (a nested `DataFrameRow` enum) will emit a \
300 discriminant column named `{disc}`, but a sibling \
301 field already produces a column with the same name. \
302 Rename the sibling field or use \
303 `#[dataframe(tag = \"...\")]` on the inner enum to \
304 choose a different discriminant column name \
305 (e.g. `#[dataframe(tag = \"type\")]` → `{base}_type`)",
306 base = struct_data.base_name,
307 disc = discriminant_col,
308 ),
309 ));
310 }
311 }
312 }
313 }
314 variant_infos.push(VariantInfo {
315 name: variant.ident.clone(),
316 shape: VariantShape::Named,
317 fields: resolved,
318 skipped_fields: skipped,
319 });
320 }
321 Fields::Unnamed(fields) => {
322 let mut resolved = Vec::new();
323 for (i, f) in fields.unnamed.iter().enumerate() {
324 let fa = parse_field_attrs(f)?;
325 let rust_name = format_ident!("_{}", i);
326 if fa.skip {
327 continue;
328 }
329 let col_name_str = fa.rename.unwrap_or_else(|| rust_name.to_string());
330 let binding = format_ident!("__v_{}", rust_name);
331
332 // Tuple enum fields: same expansion logic
333 if fa.as_list {
334 // Use `.as_ref().ok()` to suppress classification errors: `as_list` is
335 // an explicit opt-in, so wrapper types (Option<T>, Arc<T>, …) are
336 // allowed — they become opaque list-columns.
337 let needs_into_list = matches!(
338 classify_field_type(&f.ty).as_ref().ok(),
339 Some(FieldTypeKind::Struct { .. })
340 );
341 resolved.push(EnumResolvedField::Single(Box::new(EnumSingleFieldData {
342 col_name: format_ident!("{}", col_name_str),
343 binding,
344 rust_name,
345 ty: f.ty.clone(),
346 needs_into_list,
347 is_factor: false,
348 })));
349 } else if fa.as_factor {
350 match classify_field_type(&f.ty)? {
351 FieldTypeKind::Struct { .. } => {
352 resolved.push(EnumResolvedField::Single(Box::new(
353 EnumSingleFieldData {
354 col_name: format_ident!("{}", col_name_str),
355 binding,
356 rust_name,
357 ty: f.ty.clone(),
358 needs_into_list: false,
359 is_factor: true,
360 },
361 )));
362 }
363 _ => {
364 return Err(syn::Error::new_spanned(
365 &f.ty,
366 "`as_factor` is only valid on bare-ident enum/struct types; \
367 use `as_list` for generic or complex types, or remove \
368 `as_factor` for scalar fields",
369 ));
370 }
371 }
372 } else {
373 match classify_field_type(&f.ty)? {
374 FieldTypeKind::FixedArray(elem_ty, len) => {
375 resolved.push(EnumResolvedField::ExpandedFixed(Box::new(
376 EnumExpandedFixedData {
377 base_name: col_name_str,
378 binding,
379 rust_name,
380 elem_ty: elem_ty.clone(),
381 len,
382 },
383 )));
384 }
385 FieldTypeKind::VariableVec(elem_ty)
386 | FieldTypeKind::BoxedSlice(elem_ty)
387 | FieldTypeKind::BorrowedSlice(elem_ty) => {
388 if let Some(width) = fa.width {
389 resolved.push(EnumResolvedField::ExpandedVec(Box::new(
390 EnumExpandedVecData {
391 base_name: col_name_str,
392 binding,
393 rust_name,
394 elem_ty: elem_ty.clone(),
395 width,
396 },
397 )));
398 } else if fa.expand {
399 resolved.push(EnumResolvedField::AutoExpandVec(Box::new(
400 EnumAutoExpandVecData {
401 base_name: col_name_str,
402 binding,
403 rust_name,
404 elem_ty: elem_ty.clone(),
405 container_ty: f.ty.clone(),
406 },
407 )));
408 } else {
409 resolved.push(EnumResolvedField::Single(Box::new(
410 EnumSingleFieldData {
411 col_name: format_ident!("{}", col_name_str),
412 binding,
413 rust_name,
414 ty: f.ty.clone(),
415 needs_into_list: false,
416 is_factor: false,
417 },
418 )));
419 }
420 }
421 FieldTypeKind::Map { key_ty, val_ty } => {
422 if fa.width.is_some() {
423 return Err(syn::Error::new_spanned(
424 &f.ty,
425 "`width` is not valid on HashMap/BTreeMap fields",
426 ));
427 }
428 if fa.expand {
429 return Err(syn::Error::new_spanned(
430 &f.ty,
431 "`expand`/`unnest` is not valid on HashMap/BTreeMap fields",
432 ));
433 }
434 resolved.push(EnumResolvedField::Map(Box::new(EnumMapFieldData {
435 base_name: col_name_str,
436 binding,
437 rust_name,
438 key_ty: key_ty.clone(),
439 val_ty: val_ty.clone(),
440 })));
441 }
442 FieldTypeKind::Struct { inner_ty } => {
443 if fa.width.is_some() {
444 return Err(syn::Error::new_spanned(
445 &f.ty,
446 "`width` is not valid on struct fields; use `#[dataframe(as_list)]` \
447 to keep as an opaque list-column",
448 ));
449 }
450 if fa.expand {
451 return Err(syn::Error::new_spanned(
452 &f.ty,
453 "`expand`/`unnest` is not valid on struct fields; struct fields \
454 flatten by default via their DataFrameRow impl",
455 ));
456 }
457 resolved.push(EnumResolvedField::Struct(Box::new(
458 EnumStructFieldData {
459 base_name: col_name_str,
460 binding,
461 rust_name,
462 inner_ty: inner_ty.clone(),
463 },
464 )));
465 }
466 FieldTypeKind::Scalar => {
467 resolved.push(EnumResolvedField::Single(Box::new(
468 EnumSingleFieldData {
469 col_name: format_ident!("{}", col_name_str),
470 binding,
471 rust_name,
472 ty: f.ty.clone(),
473 needs_into_list: false,
474 is_factor: false,
475 },
476 )));
477 }
478 }
479 }
480 }
481 variant_infos.push(VariantInfo {
482 name: variant.ident.clone(),
483 shape: VariantShape::Tuple,
484 fields: resolved,
485 skipped_fields: vec![],
486 });
487 }
488 Fields::Unit => {
489 variant_infos.push(VariantInfo {
490 name: variant.ident.clone(),
491 shape: VariantShape::Unit,
492 fields: vec![],
493 skipped_fields: vec![],
494 });
495 }
496 }
497 }
498 // endregion
499
500 // region: Resolve unified schema
501 // Collect all unique column names, check type consistency.
502 // Expanded fields contribute multiple columns to the schema.
503 let coerce_to_string = attrs.conflicts.as_deref() == Some("string");
504 let string_ty: syn::Type = syn::parse_quote!(String);
505 let mut registry = ColumnRegistry::new(coerce_to_string, &string_ty);
506
507 for (variant_idx, vi) in variant_infos.iter().enumerate() {
508 for erf in &vi.fields {
509 // Use the rust_name span for error reporting
510 let err_span = erf.rust_name().span();
511 match erf {
512 EnumResolvedField::Single(data) => {
513 if data.is_factor {
514 registry.register_factor(
515 &data.col_name.to_string(),
516 &data.ty,
517 variant_idx,
518 &vi.name,
519 err_span,
520 )?;
521 } else {
522 registry.register(
523 &data.col_name.to_string(),
524 &data.ty,
525 variant_idx,
526 &vi.name,
527 err_span,
528 )?;
529 }
530 }
531 EnumResolvedField::ExpandedFixed(data) => {
532 for i in 1..=data.len {
533 let name = format!("{}_{}", data.base_name, i);
534 registry.register(&name, &data.elem_ty, variant_idx, &vi.name, err_span)?;
535 }
536 }
537 EnumResolvedField::ExpandedVec(data) => {
538 for i in 1..=data.width {
539 let name = format!("{}_{}", data.base_name, i);
540 registry.register(&name, &data.elem_ty, variant_idx, &vi.name, err_span)?;
541 }
542 }
543 // AutoExpandVec: not registered in ColumnRegistry (width is dynamic).
544 // Collected separately below.
545 EnumResolvedField::AutoExpandVec(..) => {}
546 EnumResolvedField::Map(data) => {
547 let key_ty = &data.key_ty;
548 let val_ty = &data.val_ty;
549 let keys_name = format!("{}_keys", data.base_name);
550 let vals_name = format!("{}_values", data.base_name);
551 // Column types are Vec<K> and Vec<V> respectively (used as Vec<Option<Vec<K>>>
552 // / Vec<Option<Vec<V>>> in companion struct via ColumnRegistry wrapping).
553 let key_vec_ty: syn::Type = syn::parse_quote!(Vec<#key_ty>);
554 let val_vec_ty: syn::Type = syn::parse_quote!(Vec<#val_ty>);
555 registry.register(&keys_name, &key_vec_ty, variant_idx, &vi.name, err_span)?;
556 registry.register(&vals_name, &val_vec_ty, variant_idx, &vi.name, err_span)?;
557 }
558 // Struct: registers one Vec<Option<Inner>> column under base_name.
559 // Flattening into prefixed columns happens at into_data_frame() time, not here.
560 EnumResolvedField::Struct(data) => {
561 let inner_ty = &data.inner_ty;
562 // Register as Option<Inner>; the column in the companion struct is Vec<Option<Inner>>.
563 registry.register(
564 &data.base_name,
565 inner_ty,
566 variant_idx,
567 &vi.name,
568 err_span,
569 )?;
570 }
571 }
572 }
573 }
574 let columns = registry.columns;
575 // endregion
576
577 // region: Collect auto-expand fields (per-variant, for split method)
578 struct EnumAutoExpandCol {
579 df_field: syn::Ident,
580 base_name: String,
581 elem_ty: syn::Type,
582 container_ty: syn::Type,
583 present_in: Vec<usize>,
584 }
585
586 let mut auto_expand_cols: Vec<EnumAutoExpandCol> = Vec::new();
587 let mut auto_expand_index: HashMap<String, usize> = HashMap::new();
588
589 for (variant_idx, vi) in variant_infos.iter().enumerate() {
590 for erf in &vi.fields {
591 if let EnumResolvedField::AutoExpandVec(auto_data) = erf {
592 if let Some(&idx) = auto_expand_index.get(&auto_data.base_name) {
593 let elem_match = auto_expand_cols[idx].elem_ty == auto_data.elem_ty;
594 let container_match =
595 auto_expand_cols[idx].container_ty == auto_data.container_ty;
596 if !elem_match {
597 if coerce_to_string {
598 auto_expand_cols[idx].elem_ty = string_ty.clone();
599 } else {
600 return Err(syn::Error::new(
601 auto_data.rust_name.span(),
602 format!(
603 "type conflict for auto-expand field `{}`: different element type \
604 than a previous variant; \
605 use `#[dataframe(conflicts = \"string\")]` to coerce",
606 auto_data.base_name,
607 ),
608 ));
609 }
610 }
611 if !container_match {
612 return Err(syn::Error::new(
613 auto_data.rust_name.span(),
614 format!(
615 "container type mismatch for auto-expand field `{}`: \
616 all variants must use the same container type \
617 (`Vec<T>`, `Box<[T]>`, or `&[T]`)",
618 auto_data.base_name,
619 ),
620 ));
621 }
622 auto_expand_cols[idx].present_in.push(variant_idx);
623 } else {
624 let idx = auto_expand_cols.len();
625 auto_expand_cols.push(EnumAutoExpandCol {
626 df_field: format_ident!("{}", auto_data.base_name),
627 base_name: auto_data.base_name.clone(),
628 elem_ty: auto_data.elem_ty.clone(),
629 container_ty: auto_data.container_ty.clone(),
630 present_in: vec![variant_idx],
631 });
632 auto_expand_index.insert(auto_data.base_name.clone(), idx);
633 }
634 }
635 }
636 }
637 let has_enum_auto_expand = !auto_expand_cols.is_empty();
638 // endregion
639
640 // region: Collect struct fields (for bespoke into_data_frame flatten)
641 struct EnumStructCol {
642 /// Companion struct field name (matches base_name in registry).
643 df_field: syn::Ident,
644 /// Column prefix (same as df_field, used to prefix inner col names).
645 base_name: String,
646 /// Inner type.
647 inner_ty: syn::Type,
648 }
649
650 let mut struct_cols: Vec<EnumStructCol> = Vec::new();
651 let mut struct_col_index: HashMap<String, bool> = HashMap::new();
652
653 for vi in &variant_infos {
654 for erf in &vi.fields {
655 if let EnumResolvedField::Struct(data) = erf
656 && !struct_col_index.contains_key(&data.base_name)
657 {
658 struct_col_index.insert(data.base_name.clone(), true);
659 struct_cols.push(EnumStructCol {
660 df_field: format_ident!("{}", data.base_name),
661 base_name: data.base_name.clone(),
662 inner_ty: data.inner_ty.clone(),
663 });
664 }
665 }
666 }
667 let has_struct_cols = !struct_cols.is_empty();
668 // endregion
669
670 // region: Collect as_list struct fields (Single fields that need per-element into_list())
671 //
672 // These are Single fields with `needs_into_list = true`: struct-typed fields that carry
673 // `#[dataframe(as_list)]`. The companion struct stores `Vec<Option<T>>` (raw Rust struct),
674 // but `into_data_frame` must convert each element via `.into_list()` before building the SEXP.
675 // We collect them so we can:
676 // a) Force the dynamic `into_data_frame` path (they need per-element conversion, not IntoR).
677 // b) Emit the per-element conversion in the dynamic path.
678 let mut as_list_struct_col_names: std::collections::HashSet<String> =
679 std::collections::HashSet::new();
680 for vi in &variant_infos {
681 for erf in &vi.fields {
682 if let EnumResolvedField::Single(data) = erf
683 && data.needs_into_list
684 {
685 as_list_struct_col_names.insert(data.col_name.to_string());
686 }
687 }
688 }
689 let has_as_list_struct_cols = !as_list_struct_col_names.is_empty();
690
691 // Collect factor column names (Single fields with `is_factor = true`).
692 // These are emitted via `FactorOptionVec<T>` wrapping in `into_data_frame`.
693 let mut factor_col_names: std::collections::HashSet<String> = std::collections::HashSet::new();
694 for vi in &variant_infos {
695 for erf in &vi.fields {
696 if let EnumResolvedField::Single(data) = erf
697 && data.is_factor
698 {
699 factor_col_names.insert(data.col_name.to_string());
700 }
701 }
702 }
703 let has_factor_cols = !factor_col_names.is_empty();
704 // endregion
705
706 // region: Generate companion struct
707 let has_tag = attrs.tag.is_some();
708
709 let tag_field = if has_tag {
710 quote! { pub _tag: Vec<String>, }
711 } else {
712 TokenStream::new()
713 };
714
715 let mut df_fields: Vec<TokenStream> = columns
716 .iter()
717 .map(|col| {
718 let name = &col.col_name;
719 let ty = &col.ty;
720 quote! { pub #name: Vec<Option<#ty>> }
721 })
722 .collect();
723 // Auto-expand fields: Vec<Option<ContainerType>>
724 for ac in &auto_expand_cols {
725 let name = &ac.df_field;
726 let cty = &ac.container_ty;
727 df_fields.push(quote! { pub #name: Vec<Option<#cty>> });
728 }
729
730 // When the companion struct would otherwise have no fields (unit-only enum,
731 // no tag) but has generic type parameters, emit a PhantomData field to keep
732 // the type parameter in scope — without it the struct is E0392 (unused param).
733 let has_any_field = has_tag || !df_fields.is_empty();
734 let phantom_field = if !has_any_field && !impl_generics.to_token_stream().is_empty() {
735 let type_params: Vec<_> = input.generics.type_params().map(|tp| &tp.ident).collect();
736 if !type_params.is_empty() {
737 quote! {
738 #[allow(dead_code)]
739 _phantom: ::std::marker::PhantomData<(#(#type_params,)*)>,
740 }
741 } else {
742 TokenStream::new()
743 }
744 } else {
745 TokenStream::new()
746 };
747
748 let dataframe_struct = quote! {
749 #[derive(Debug, Clone)]
750 pub struct #df_name #impl_generics #where_clause {
751 #tag_field
752 #(#df_fields),*
753 #phantom_field
754 }
755 };
756 // endregion
757
758 // region: Generate IntoDataFrame
759 // The first "real" column for length reference. If tag exists, use _tag.
760 let length_ref = if has_tag {
761 quote! { self._tag.len() }
762 } else if let Some(first_col) = columns.first() {
763 let first = &first_col.col_name;
764 quote! { self.#first.len() }
765 } else if let Some(first_ac) = auto_expand_cols.first() {
766 let first = &first_ac.df_field;
767 quote! { self.#first.len() }
768 } else {
769 // No columns and no tag — degenerate case, length is 0
770 quote! { 0usize }
771 };
772
773 // Each pair protects its SEXP via `__scope.protect_raw` so previously-built
774 // column SEXPs survive subsequent column allocations. Pre-fix the raw
775 // `vec![(name, into_sexp(...)), ...]` left every SEXP unrooted across the
776 // next column's allocations — UAF under gctorture
777 // (reviews/2026-05-07-gctorture-audit.md).
778 let tag_pair = if let Some(ref tag_name) = attrs.tag {
779 quote! { (#tag_name, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self._tag))), }
780 } else {
781 TokenStream::new()
782 };
783
784 let col_pairs: Vec<TokenStream> = columns
785 .iter()
786 .map(|col| {
787 let name = &col.col_name;
788 let name_str = name.to_string();
789 quote! { (#name_str, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self.#name))) }
790 })
791 .collect();
792
793 // Length checks for all columns
794 let length_checks: Vec<TokenStream> = columns
795 .iter()
796 .map(|col| {
797 let name = &col.col_name;
798 let name_str = name.to_string();
799 quote! {
800 assert!(
801 self.#name.len() == _n_rows,
802 "column length mismatch in {}: column `{}` has length {} but expected {}",
803 stringify!(#df_name),
804 #name_str,
805 self.#name.len(),
806 _n_rows,
807 );
808 }
809 })
810 .collect();
811
812 // Build the set of column names that are struct-col placeholders (to skip in normal push).
813 let struct_col_names: std::collections::HashSet<String> =
814 struct_cols.iter().map(|sc| sc.base_name.clone()).collect();
815
816 let into_dataframe_impl = if has_enum_auto_expand
817 || has_struct_cols
818 || has_as_list_struct_cols
819 || has_factor_cols
820 {
821 // Dynamic pair building for auto-expand, struct fields, as_list struct fields,
822 // and/or as_factor fields.
823 let tag_push_pair = if let Some(ref tag_name) = attrs.tag {
824 quote! {
825 __df_pairs.push((
826 #tag_name.to_string(),
827 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self._tag)),
828 ));
829 }
830 } else {
831 TokenStream::new()
832 };
833
834 // Static columns — skip struct-col placeholders (handled in flatten block below),
835 // as-list struct fields (handled in the per-element conversion block below),
836 // and factor columns (handled in the FactorOptionVec wrapping block below).
837 let static_pair_pushes: Vec<TokenStream> = columns
838 .iter()
839 .filter(|col| {
840 let name_str = col.col_name.to_string();
841 !struct_col_names.contains(&name_str)
842 && !as_list_struct_col_names.contains(&name_str)
843 && !factor_col_names.contains(&name_str)
844 })
845 .map(|col| {
846 let name = &col.col_name;
847 let name_str = name.to_string();
848 quote! {
849 __df_pairs.push((
850 #name_str.to_string(),
851 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(self.#name)),
852 ));
853 }
854 })
855 .collect();
856
857 // as_list struct fields: convert each element via into_list() at conversion time
858 // (not during row accumulation), producing a VECSXP list-column with NULL for absent rows.
859 let as_list_struct_pushes: Vec<TokenStream> = columns
860 .iter()
861 .filter(|col| as_list_struct_col_names.contains(&col.col_name.to_string()))
862 .map(|col| {
863 let name = &col.col_name;
864 let name_str = name.to_string();
865 let ty = &col.ty;
866 quote! {
867 {
868 // Map Vec<Option<T>> → Vec<Option<List>> then convert to SEXP.
869 // This is the only R-touching operation for as_list struct fields.
870 let __as_list_col: Vec<Option<::miniextendr_api::list::List>> =
871 self.#name
872 .into_iter()
873 .map(|__opt: Option<#ty>| {
874 __opt.map(|v| ::miniextendr_api::list::IntoList::into_list(v))
875 })
876 .collect();
877 __df_pairs.push((
878 #name_str.to_string(),
879 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(__as_list_col)),
880 ));
881 }
882 }
883 })
884 .collect();
885
886 // as_factor columns: wrap Vec<Option<T>> in FactorOptionVec<T> before calling into_sexp.
887 // Uses the UnitEnumFactor blanket impl: impl<T: UnitEnumFactor> IntoR for FactorOptionVec<T>.
888 let factor_pair_pushes: Vec<TokenStream> = columns
889 .iter()
890 .filter(|col| factor_col_names.contains(&col.col_name.to_string()))
891 .map(|col| {
892 let name = &col.col_name;
893 let name_str = name.to_string();
894 let ty = &col.ty;
895 quote! {
896 __df_pairs.push((
897 #name_str.to_string(),
898 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(
899 ::miniextendr_api::factor::FactorOptionVec::<#ty>::from(self.#name)
900 )),
901 ));
902 }
903 })
904 .collect();
905
906 let auto_expand_pair_pushes: Vec<TokenStream> = auto_expand_cols
907 .iter()
908 .map(|ac| {
909 let df_field = &ac.df_field;
910 let base_name_str = &ac.base_name;
911 let elem_ty = &ac.elem_ty;
912 quote! {
913 {
914 let __auto = self.#df_field;
915 let __max = __auto.iter()
916 .filter_map(|v| v.as_ref())
917 .map(|v| v.len())
918 .max()
919 .unwrap_or(0);
920 let mut __cols: Vec<Vec<Option<#elem_ty>>> = (0..__max)
921 .map(|_| Vec::with_capacity(_n_rows))
922 .collect();
923 for __opt_vec in &__auto {
924 for (__i, __col) in __cols.iter_mut().enumerate() {
925 __col.push(
926 __opt_vec.as_ref().and_then(|v| v.get(__i).cloned()),
927 );
928 }
929 }
930 for (__i, __col) in __cols.into_iter().enumerate() {
931 __df_pairs.push((
932 format!("{}_{}", #base_name_str, __i + 1),
933 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(__col)),
934 ));
935 }
936 }
937 }
938 })
939 .collect();
940
941 // Struct field flatten blocks: for each Vec<Option<Inner>> column, collect present
942 // rows into a dense Vec<Inner>, track presence indices, call Inner::to_dataframe,
943 // extract named columns via into_named_columns(), scatter them to full row count
944 // with None-fill, and push with prefixed names.
945 let struct_flatten_pushes: Vec<TokenStream> = struct_cols
946 .iter()
947 .map(|sc| {
948 let df_field = &sc.df_field;
949 let base_name_str = &sc.base_name;
950 let inner_ty = &sc.inner_ty;
951 quote! {
952 {
953 // Separate the Some/None rows — collect present rows densely
954 // (no Clone needed: we consume the Vec<Option<Inner>>).
955 let mut __present_idx: Vec<usize> = Vec::new();
956 let mut __inner_rows: Vec<#inner_ty> = Vec::new();
957 for (__row_i, __opt) in self.#df_field.into_iter().enumerate() {
958 if let Some(__inner) = __opt {
959 __present_idx.push(__row_i);
960 __inner_rows.push(__inner);
961 }
962 }
963 // Call Inner::to_dataframe and extract named column SEXPs.
964 let __inner_df = <#inner_ty>::to_dataframe(__inner_rows);
965 // into_named_columns consumes __inner_df and returns (name, SEXP) pairs.
966 let __inner_cols = ::miniextendr_api::convert::IntoDataFrame::into_named_columns(__inner_df);
967 // Scatter each column back to full _n_rows with NA/NULL-fill,
968 // preserving the source column's SEXPTYPE.
969 for (__inner_col_name, __inner_col_sexp) in __inner_cols {
970 // Protect the source column across the scatter allocation.
971 let __src = __scope.protect_raw(__inner_col_sexp);
972 let __prefixed = format!("{}_{}", #base_name_str, __inner_col_name);
973 let __scattered = unsafe {
974 let __out = ::miniextendr_api::convert::scatter_column(
975 __src,
976 &__present_idx,
977 _n_rows,
978 );
979 __scope.protect_raw(__out)
980 };
981 __df_pairs.push((__prefixed, __scattered));
982 }
983 }
984 }
985 })
986 .collect();
987
988 quote! {
989 impl #impl_generics ::miniextendr_api::convert::IntoDataFrame for #df_name #ty_generics #where_clause {
990 fn into_data_frame(self) -> ::miniextendr_api::List {
991 let _n_rows = #length_ref;
992 #(#length_checks)*
993 // SAFETY: into_data_frame only runs on the R main thread.
994 // ProtectScope keeps each column SEXP rooted across the
995 // next column's allocations; from_raw_pairs writes them
996 // into the parent VECSXP before we drop the scope.
997 unsafe {
998 let __scope = ::miniextendr_api::gc_protect::ProtectScope::new();
999 let mut __df_pairs: Vec<(
1000 String,
1001 ::miniextendr_api::ffi::SEXP,
1002 )> = Vec::new();
1003 #tag_push_pair
1004 #(#static_pair_pushes)*
1005 #(#factor_pair_pushes)*
1006 #(#auto_expand_pair_pushes)*
1007 #(#struct_flatten_pushes)*
1008 #(#as_list_struct_pushes)*
1009 ::miniextendr_api::list::List::from_raw_pairs(__df_pairs)
1010 .set_class_str(&["data.frame"])
1011 .set_row_names_int(_n_rows)
1012 }
1013 }
1014 }
1015 }
1016 } else {
1017 quote! {
1018 impl #impl_generics ::miniextendr_api::convert::IntoDataFrame for #df_name #ty_generics #where_clause {
1019 fn into_data_frame(self) -> ::miniextendr_api::List {
1020 let _n_rows = #length_ref;
1021 #(#length_checks)*
1022 // SAFETY: see auto-expand branch.
1023 unsafe {
1024 let __scope = ::miniextendr_api::gc_protect::ProtectScope::new();
1025 // Explicit type annotation so the vec![] case (unit-only enum
1026 // with no columns and no tag) doesn't hit E0282 inference failure.
1027 let __pairs: Vec<(&str, ::miniextendr_api::ffi::SEXP)> = vec![
1028 #tag_pair
1029 #(#col_pairs),*
1030 ];
1031 ::miniextendr_api::list::List::from_raw_pairs(__pairs)
1032 .set_class_str(&["data.frame"])
1033 .set_row_names_int(_n_rows)
1034 }
1035 }
1036 }
1037 }
1038 };
1039
1040 // Compile-time assertions: one per struct field, asserting the inner type
1041 // implements DataFrameRow.
1042 let struct_assertions: Vec<TokenStream> = struct_cols
1043 .iter()
1044 .map(|sc| {
1045 let inner_ty = &sc.inner_ty;
1046 quote! {
1047 const _: () = {
1048 fn _assert_inner_is_dataframe_row<T: ::miniextendr_api::markers::DataFrameRow>() {}
1049 fn _do_assert #impl_generics () #where_clause {
1050 _assert_inner_is_dataframe_row::<#inner_ty>();
1051 }
1052 };
1053 }
1054 })
1055 .collect();
1056
1057 // Payload collision assertions (#486): one per nested-enum struct field.
1058 // For each `kind: Inner` field, emit:
1059 // const _: () = ::miniextendr_api::markers::assert_no_payload_field_collision(
1060 // <Inner as DataFramePayloadFields>::FIELDS,
1061 // <Inner as DataFramePayloadFields>::TAG,
1062 // );
1063 // This fires a compile-time panic if any inner payload field name equals the
1064 // inner enum's own tag suffix, which would (after outer prefix expansion) produce
1065 // a column name identical to the outer discriminant column.
1066 let payload_collision_assertions: Vec<TokenStream> = struct_cols
1067 .iter()
1068 .map(|sc| {
1069 let inner_ty = &sc.inner_ty;
1070 quote! {
1071 const _: () = ::miniextendr_api::markers::assert_no_payload_field_collision(
1072 <#inner_ty as ::miniextendr_api::markers::DataFramePayloadFields>::FIELDS,
1073 <#inner_ty as ::miniextendr_api::markers::DataFramePayloadFields>::TAG,
1074 );
1075 }
1076 })
1077 .collect();
1078
1079 // Sibling collision assertions (#544): one per nested-enum struct field.
1080 //
1081 // The B1 parse-time check (earlier in this function) hardcodes `"variant"` as the
1082 // inner tag when building the discriminant column name to compare against sibling
1083 // Single fields. That check covers the common case with better spans/messages.
1084 //
1085 // This const assertion covers the non-default-tag case: when `Inner` uses
1086 // `#[dataframe(tag = "foo")]`, the discriminant column is `<base>_foo`, not
1087 // `<base>_variant`. The const assertion uses `<Inner as DataFramePayloadFields>::TAG`
1088 // so it resolves to the actual tag at compile time regardless of the value.
1089 //
1090 // We collect all Single col names across ALL variants (not just the variant that
1091 // introduced the struct field) — a collision in any one variant is a bug.
1092 let all_single_col_names: Vec<String> = {
1093 let mut seen = std::collections::HashSet::new();
1094 let mut names = Vec::new();
1095 for vi in &variant_infos {
1096 for erf in &vi.fields {
1097 if let EnumResolvedField::Single(d) = erf {
1098 let col = d.col_name.to_string();
1099 if seen.insert(col.clone()) {
1100 names.push(col);
1101 }
1102 }
1103 }
1104 }
1105 names
1106 };
1107 let sibling_collision_assertions: Vec<TokenStream> = struct_cols
1108 .iter()
1109 .map(|sc| {
1110 let inner_ty = &sc.inner_ty;
1111 let base_str = &sc.base_name;
1112 let sibling_lits = all_single_col_names
1113 .iter()
1114 .map(|s| quote! { #s })
1115 .collect::<Vec<_>>();
1116 quote! {
1117 const _: () = ::miniextendr_api::markers::assert_no_sibling_field_collision(
1118 &[#(#sibling_lits),*],
1119 #base_str,
1120 <#inner_ty as ::miniextendr_api::markers::DataFramePayloadFields>::TAG,
1121 );
1122 }
1123 })
1124 .collect();
1125 // endregion
1126
1127 // region: Generate From<Vec<Enum>>
1128 let mut col_vec_inits: Vec<TokenStream> = columns
1129 .iter()
1130 .map(|col| {
1131 let name = &col.col_name;
1132 let ty = &col.ty;
1133 quote! { let mut #name: Vec<Option<#ty>> = Vec::with_capacity(len); }
1134 })
1135 .collect();
1136 for ac in &auto_expand_cols {
1137 let name = &ac.df_field;
1138 let cty = &ac.container_ty;
1139 col_vec_inits.push(quote! { let mut #name: Vec<Option<#cty>> = Vec::with_capacity(len); });
1140 }
1141
1142 let tag_init = if has_tag {
1143 quote! { let mut _tag: Vec<String> = Vec::with_capacity(len); }
1144 } else {
1145 TokenStream::new()
1146 };
1147
1148 // Build match arms for each variant
1149 let match_arms: Vec<TokenStream> = variant_infos
1150 .iter()
1151 .enumerate()
1152 .map(|(variant_idx, vi)| {
1153 let variant_name = &vi.name;
1154 let variant_name_str = variant_name.to_string();
1155
1156 let tag_push = if has_tag {
1157 quote! { _tag.push(#variant_name_str.to_string()); }
1158 } else {
1159 TokenStream::new()
1160 };
1161
1162 // Build push statements for each schema column.
1163 // For present columns: push Some(value), for absent: push None.
1164 // Expanded fields contribute multiple columns from one binding.
1165
1166 // First, build a map of which schema columns this variant contributes to.
1167 let col_pushes: Vec<TokenStream> = columns
1168 .iter()
1169 .map(|col| {
1170 let col_name = &col.col_name;
1171 if col.present_in.contains(&variant_idx) {
1172 let col_name_str = col_name.to_string();
1173
1174 for erf in &vi.fields {
1175 match erf {
1176 EnumResolvedField::Single(data)
1177 if data.col_name == *col_name =>
1178 {
1179 let binding = &data.binding;
1180 if col.string_coerced {
1181 return quote! { #col_name.push(Some(ToString::to_string(&#binding))); };
1182 } else {
1183 return quote! { #col_name.push(Some(#binding)); };
1184 }
1185 }
1186 EnumResolvedField::ExpandedFixed(data) => {
1187 for i in 1..=data.len {
1188 let expanded_name = format!("{}_{}", data.base_name, i);
1189 if expanded_name == col_name_str {
1190 let binding = &data.binding;
1191 let idx = syn::Index::from(i - 1);
1192 return quote! { #col_name.push(Some(#binding[#idx])); };
1193 }
1194 }
1195 }
1196 EnumResolvedField::ExpandedVec(data) => {
1197 for i in 1..=data.width {
1198 let expanded_name = format!("{}_{}", data.base_name, i);
1199 if expanded_name == col_name_str {
1200 let binding = &data.binding;
1201 let get_idx = i - 1;
1202 return quote! { #col_name.push(#binding.get(#get_idx).cloned()); };
1203 }
1204 }
1205 }
1206 EnumResolvedField::Map(data) => {
1207 let keys_name = format!("{}_keys", data.base_name);
1208 let vals_name = format!("{}_values", data.base_name);
1209 let binding = &data.binding;
1210 // Use unzip() to guarantee pairwise alignment of keys and values.
1211 // Both columns are emitted together when the _keys column is
1212 // processed; the _values column is skipped (already handled).
1213 if col_name_str == keys_name {
1214 let vals_col = format_ident!("{}", vals_name);
1215 return quote! {
1216 let (__mx_keys, __mx_vals) = #binding.into_iter().unzip::<_, _, Vec<_>, Vec<_>>();
1217 #col_name.push(Some(__mx_keys));
1218 #vals_col.push(Some(__mx_vals));
1219 };
1220 }
1221 if col_name_str == vals_name {
1222 // Already handled when keys col was processed; emit no-op.
1223 return quote! {};
1224 }
1225 }
1226 // Struct field: push Some(binding) to the Vec<Option<Inner>> column.
1227 EnumResolvedField::Struct(data)
1228 if data.base_name == col_name_str =>
1229 {
1230 let binding = &data.binding;
1231 return quote! { #col_name.push(Some(#binding)); };
1232 }
1233 // AutoExpandVec doesn't contribute to static columns
1234 _ => {}
1235 }
1236 }
1237 quote! { #col_name.push(None); }
1238 } else {
1239 quote! { #col_name.push(None); }
1240 }
1241 })
1242 .collect();
1243
1244 // Auto-expand push statements
1245 let auto_expand_pushes: Vec<TokenStream> = auto_expand_cols
1246 .iter()
1247 .map(|ac| {
1248 let ac_field = &ac.df_field;
1249 if ac.present_in.contains(&variant_idx) {
1250 // Find the binding for this auto-expand field
1251 for erf in &vi.fields {
1252 if let EnumResolvedField::AutoExpandVec(data) = erf
1253 && data.base_name == ac.base_name
1254 {
1255 let binding = &data.binding;
1256 return quote! { #ac_field.push(Some(#binding)); };
1257 }
1258 }
1259 // shouldn't reach here
1260 quote! { #ac_field.push(None); }
1261 } else {
1262 quote! { #ac_field.push(None); }
1263 }
1264 })
1265 .collect();
1266
1267 // Generate destructure pattern based on variant shape
1268 match vi.shape {
1269 VariantShape::Named => {
1270 let mut field_bindings: Vec<TokenStream> = vi.fields.iter().map(|erf| {
1271 let rust_name = erf.rust_name();
1272 let binding = erf.binding();
1273 quote! { #rust_name: #binding }
1274 }).collect();
1275 // Add skipped fields as wildcard bindings
1276 for skipped in &vi.skipped_fields {
1277 field_bindings.push(quote! { #skipped: _ });
1278 }
1279 quote! {
1280 #row_name::#variant_name { #(#field_bindings),* } => {
1281 #tag_push
1282 #(#col_pushes)*
1283 #(#auto_expand_pushes)*
1284 }
1285 }
1286 }
1287 VariantShape::Tuple => {
1288 let field_bindings: Vec<TokenStream> = vi.fields.iter().map(|erf| {
1289 let binding = erf.binding();
1290 quote! { #binding }
1291 }).collect();
1292 quote! {
1293 #row_name::#variant_name(#(#field_bindings),*) => {
1294 #tag_push
1295 #(#col_pushes)*
1296 #(#auto_expand_pushes)*
1297 }
1298 }
1299 }
1300 VariantShape::Unit => {
1301 quote! {
1302 #row_name::#variant_name => {
1303 #tag_push
1304 #(#col_pushes)*
1305 #(#auto_expand_pushes)*
1306 }
1307 }
1308 }
1309 }
1310 })
1311 .collect();
1312
1313 let tag_struct_field = if has_tag {
1314 quote! { _tag, }
1315 } else {
1316 TokenStream::new()
1317 };
1318
1319 let mut col_struct_fields: Vec<TokenStream> = columns
1320 .iter()
1321 .map(|col| {
1322 let name = &col.col_name;
1323 quote! { #name }
1324 })
1325 .collect();
1326 for ac in &auto_expand_cols {
1327 let name = &ac.df_field;
1328 col_struct_fields.push(quote! { #name });
1329 }
1330
1331 // Struct literal initializer for the PhantomData field, when emitted.
1332 //
1333 // `phantom_field` is:
1334 // - Empty when the companion struct has at least one real field (tag or
1335 // column), or when there are no generic type parameters (const-param
1336 // enums don't need PhantomData — Rust allows unused const params).
1337 // - Non-empty only when the struct would otherwise have *zero* fields AND
1338 // the enum carries at least one type parameter `T`, where the generated
1339 // `PhantomData<T>` field prevents E0392 ("unused type parameter") on the
1340 // companion struct. In practice this path is only reachable if the user
1341 // somehow has a type-generic unit-only enum; Rust's own E0392 rule blocks
1342 // such enums at the user-definition level, so this branch is a defensive
1343 // guard for hypothetical macro-generated enum inputs.
1344 let phantom_struct_field_init = if phantom_field.is_empty() {
1345 TokenStream::new()
1346 } else {
1347 quote! { _phantom: ::std::marker::PhantomData, }
1348 };
1349
1350 let from_vec_impl = quote! {
1351 impl #impl_generics From<Vec<#row_name #ty_generics>> for #df_name #ty_generics #where_clause {
1352 fn from(rows: Vec<#row_name #ty_generics>) -> Self {
1353 let len = rows.len();
1354 #tag_init
1355 #(#col_vec_inits)*
1356 for row in rows {
1357 match row {
1358 #(#match_arms)*
1359 }
1360 }
1361 #df_name {
1362 #tag_struct_field
1363 #(#col_struct_fields),*
1364 #phantom_struct_field_init
1365 }
1366 }
1367 }
1368 };
1369 // endregion
1370
1371 // region: Generate from_rows_par (parallel scatter-write via ColumnWriter)
1372 let from_rows_par_method = if !columns.is_empty() || !auto_expand_cols.is_empty() || has_tag {
1373 // Column declarations
1374 let mut par_col_decls = Vec::new();
1375 if has_tag {
1376 par_col_decls.push(quote! {
1377 let mut _tag: Vec<String> = vec![String::new(); len];
1378 });
1379 }
1380 for col in &columns {
1381 let name = &col.col_name;
1382 let ty = &col.ty;
1383 par_col_decls.push(quote! {
1384 let mut #name: Vec<Option<#ty>> = vec![None; len];
1385 });
1386 }
1387 for ac in &auto_expand_cols {
1388 let name = &ac.df_field;
1389 let cty = &ac.container_ty;
1390 par_col_decls.push(quote! {
1391 let mut #name: Vec<Option<#cty>> = vec![None; len];
1392 });
1393 }
1394
1395 // Writer declarations
1396 let mut writer_decls = Vec::new();
1397 if has_tag {
1398 writer_decls.push(quote! {
1399 let __w_tag = unsafe {
1400 ::miniextendr_api::rayon_bridge::ColumnWriter::new(&mut _tag)
1401 };
1402 });
1403 }
1404 for col in &columns {
1405 let name = &col.col_name;
1406 let w_name = format_ident!("__w_{}", name);
1407 writer_decls.push(quote! {
1408 let #w_name = unsafe {
1409 ::miniextendr_api::rayon_bridge::ColumnWriter::new(&mut #name)
1410 };
1411 });
1412 }
1413 for ac in &auto_expand_cols {
1414 let name = &ac.df_field;
1415 let w_name = format_ident!("__w_{}", name);
1416 writer_decls.push(quote! {
1417 let #w_name = unsafe {
1418 ::miniextendr_api::rayon_bridge::ColumnWriter::new(&mut #name)
1419 };
1420 });
1421 }
1422
1423 // Match arms for parallel path
1424 let par_match_arms: Vec<TokenStream> = variant_infos
1425 .iter()
1426 .enumerate()
1427 .map(|(variant_idx, vi)| {
1428 let variant_name = &vi.name;
1429 let variant_name_str = variant_name.to_string();
1430
1431 let tag_write = if has_tag {
1432 quote! { __w_tag.write(__i, #variant_name_str.to_string()); }
1433 } else {
1434 TokenStream::new()
1435 };
1436
1437 // Write calls for schema columns
1438 let col_writes: Vec<TokenStream> = columns
1439 .iter()
1440 .map(|col| {
1441 let col_name = &col.col_name;
1442 let w_name = format_ident!("__w_{}", col_name);
1443 if col.present_in.contains(&variant_idx) {
1444 let col_name_str = col_name.to_string();
1445 for erf in &vi.fields {
1446 match erf {
1447 EnumResolvedField::Single(data)
1448 if data.col_name == *col_name =>
1449 {
1450 let binding = &data.binding;
1451 if col.string_coerced {
1452 return quote! { #w_name.write(__i, Some(ToString::to_string(&#binding))); };
1453 } else {
1454 return quote! { #w_name.write(__i, Some(#binding)); };
1455 }
1456 }
1457 EnumResolvedField::ExpandedFixed(data) => {
1458 for i in 1..=data.len {
1459 let expanded_name = format!("{}_{}", data.base_name, i);
1460 if expanded_name == col_name_str {
1461 let binding = &data.binding;
1462 let idx = syn::Index::from(i - 1);
1463 return quote! { #w_name.write(__i, Some(#binding[#idx])); };
1464 }
1465 }
1466 }
1467 EnumResolvedField::ExpandedVec(data) => {
1468 for i in 1..=data.width {
1469 let expanded_name = format!("{}_{}", data.base_name, i);
1470 if expanded_name == col_name_str {
1471 let binding = &data.binding;
1472 let get_idx = i - 1;
1473 return quote! { #w_name.write(__i, #binding.get(#get_idx).cloned()); };
1474 }
1475 }
1476 }
1477 EnumResolvedField::Map(data) => {
1478 let keys_name = format!("{}_keys", data.base_name);
1479 let vals_name = format!("{}_values", data.base_name);
1480 let binding = &data.binding;
1481 // Combined unzip: emit both key and value writes when the
1482 // keys column is processed; skip the values column (handled here).
1483 if col_name_str == keys_name {
1484 let vals_col = format_ident!("{}", vals_name);
1485 let w_vals = format_ident!("__w_{}", vals_col);
1486 return quote! {
1487 let (__mx_keys, __mx_vals) = #binding.into_iter().unzip::<_, _, Vec<_>, Vec<_>>();
1488 #w_name.write(__i, Some(__mx_keys));
1489 #w_vals.write(__i, Some(__mx_vals));
1490 };
1491 }
1492 if col_name_str == vals_name {
1493 // Already handled when keys col was processed.
1494 return quote! {};
1495 }
1496 }
1497 // Struct field: write Some(binding) to Vec<Option<Inner>>.
1498 EnumResolvedField::Struct(data)
1499 if data.base_name == col_name_str =>
1500 {
1501 let binding = &data.binding;
1502 return quote! { #w_name.write(__i, Some(#binding)); };
1503 }
1504 _ => {}
1505 }
1506 }
1507 quote! { #w_name.write(__i, None); }
1508 } else {
1509 quote! { #w_name.write(__i, None); }
1510 }
1511 })
1512 .collect();
1513
1514 // Auto-expand write calls
1515 let auto_expand_writes: Vec<TokenStream> = auto_expand_cols
1516 .iter()
1517 .map(|ac| {
1518 let w_name = format_ident!("__w_{}", ac.df_field);
1519 if ac.present_in.contains(&variant_idx) {
1520 for erf in &vi.fields {
1521 if let EnumResolvedField::AutoExpandVec(data) = erf
1522 && data.base_name == ac.base_name
1523 {
1524 let binding = &data.binding;
1525 return quote! { #w_name.write(__i, Some(#binding)); };
1526 }
1527 }
1528 quote! { #w_name.write(__i, None); }
1529 } else {
1530 quote! { #w_name.write(__i, None); }
1531 }
1532 })
1533 .collect();
1534
1535 // Generate destructure pattern based on variant shape
1536 match vi.shape {
1537 VariantShape::Named => {
1538 let mut field_bindings: Vec<TokenStream> = vi.fields.iter().map(|erf| {
1539 let rust_name = erf.rust_name();
1540 let binding = erf.binding();
1541 quote! { #rust_name: #binding }
1542 }).collect();
1543 for skipped in &vi.skipped_fields {
1544 field_bindings.push(quote! { #skipped: _ });
1545 }
1546 quote! {
1547 #row_name::#variant_name { #(#field_bindings),* } => {
1548 #tag_write
1549 #(#col_writes)*
1550 #(#auto_expand_writes)*
1551 }
1552 }
1553 }
1554 VariantShape::Tuple => {
1555 let field_bindings: Vec<TokenStream> = vi.fields.iter().map(|erf| {
1556 let binding = erf.binding();
1557 quote! { #binding }
1558 }).collect();
1559 quote! {
1560 #row_name::#variant_name(#(#field_bindings),*) => {
1561 #tag_write
1562 #(#col_writes)*
1563 #(#auto_expand_writes)*
1564 }
1565 }
1566 }
1567 VariantShape::Unit => {
1568 quote! {
1569 #row_name::#variant_name => {
1570 #tag_write
1571 #(#col_writes)*
1572 #(#auto_expand_writes)*
1573 }
1574 }
1575 }
1576 }
1577 })
1578 .collect();
1579
1580 // Return struct fields
1581 let par_tag_field = if has_tag {
1582 quote! { _tag, }
1583 } else {
1584 TokenStream::new()
1585 };
1586 let mut par_struct_fields: Vec<TokenStream> = columns
1587 .iter()
1588 .map(|col| {
1589 let name = &col.col_name;
1590 quote! { #name }
1591 })
1592 .collect();
1593 for ac in &auto_expand_cols {
1594 let name = &ac.df_field;
1595 par_struct_fields.push(quote! { #name });
1596 }
1597
1598 quote! {
1599 /// Parallel row→column transposition using rayon scatter-write.
1600 ///
1601 /// Always uses rayon — no threshold check. Use `from_rows` for the
1602 /// sequential path.
1603 #[cfg(feature = "rayon")]
1604 #[allow(clippy::uninit_vec)]
1605 pub fn from_rows_par(rows: Vec<#row_name #ty_generics>) -> Self {
1606 use ::miniextendr_api::rayon_bridge::rayon::prelude::*;
1607 let len = rows.len();
1608 #(#par_col_decls)*
1609 {
1610 #(#writer_decls)*
1611 rows.into_par_iter().enumerate().for_each(|(__i, __row)| unsafe {
1612 match __row {
1613 #(#par_match_arms)*
1614 }
1615 });
1616 }
1617 #df_name { #par_tag_field #(#par_struct_fields),* }
1618 }
1619 }
1620 } else {
1621 TokenStream::new()
1622 };
1623 // endregion
1624
1625 // region: Generate DataFrame type methods (from_rows, from_rows_par)
1626 let df_methods = quote! {
1627 impl #impl_generics #df_name #ty_generics #where_clause {
1628 /// Sequential row→column transposition.
1629 pub fn from_rows(rows: Vec<#row_name #ty_generics>) -> Self {
1630 rows.into()
1631 }
1632
1633 #from_rows_par_method
1634 }
1635 };
1636 // endregion
1637
1638 // region: Generate associated methods
1639 let row_methods = quote! {
1640 impl #impl_generics #row_name #ty_generics #where_clause {
1641 /// Name of the generated DataFrame companion type.
1642 pub const DATAFRAME_TYPE_NAME: &'static str = stringify!(#df_name);
1643
1644 /// Convert a vector of enum rows into the companion DataFrame type.
1645 ///
1646 /// Fields present in a variant get `Some(value)`, absent fields get `None` (→ NA in R).
1647 pub fn to_dataframe(rows: Vec<Self>) -> #df_name #ty_generics {
1648 rows.into()
1649 }
1650 }
1651 };
1652
1653 // No IntoList assertion for align enums — they go through the companion struct path,
1654 // not the `DataFrame<T>` path, so IntoList is not required.
1655
1656 // region: Generate to_dataframe_split
1657 let split_method = generate_split_method(
1658 row_name,
1659 &variant_infos,
1660 &impl_generics,
1661 &ty_generics,
1662 where_clause,
1663 );
1664 // endregion
1665
1666 // Marker trait impl: row type implements DataFrameRow via IntoDataFrame chain.
1667 // This is the impl the compile-time assertion checks for struct-typed variant fields.
1668 let marker_impl = quote! {
1669 impl #impl_generics ::miniextendr_api::markers::DataFrameRow
1670 for #row_name #ty_generics #where_clause {}
1671 };
1672
1673 // DataFramePayloadFields impl (#486): exposes FIELDS (all resolved column names,
1674 // deduplicated) and TAG for compile-time collision detection by outer enums.
1675 // FIELDS lists every single-column payload field name across all variants.
1676 // TAG is the inner enum's #[dataframe(tag = "...")] value (or "" if absent).
1677 let payload_fields_impl = {
1678 // Collect unique field names from all variant payload fields (single columns only).
1679 // We skip expanded (fixed/vec) and struct fields — only direct column contributions.
1680 let mut field_names: Vec<String> = Vec::new();
1681 let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
1682 for vi in &variant_infos {
1683 for erf in &vi.fields {
1684 if let EnumResolvedField::Single(data) = erf {
1685 let name = data.col_name.to_string();
1686 if seen.insert(name.clone()) {
1687 field_names.push(name);
1688 }
1689 }
1690 }
1691 }
1692 let tag_str = attrs.tag.as_deref().unwrap_or("");
1693 quote! {
1694 impl #impl_generics ::miniextendr_api::markers::DataFramePayloadFields
1695 for #row_name #ty_generics #where_clause
1696 {
1697 const FIELDS: &'static [&'static str] = &[#(#field_names),*];
1698 const TAG: &'static str = #tag_str;
1699 }
1700 }
1701 };
1702
1703 // region: unit-only enum factor impls
1704 // For a unit-only enum (all variants are unit), auto-emit:
1705 // 1. `impl UnitEnumFactor for Self` — provides FACTOR_LEVELS and to_factor_index()
1706 // 2. `impl IntoR for Self` — produces a single-element factor SEXP (cached levels)
1707 // 3. `impl IntoList for Self` — delegates to vec![self].into_list()
1708 //
1709 // The `UnitEnumFactor` impl is consumed by the blanket
1710 // `impl<T: UnitEnumFactor> IntoR for FactorOptionVec<T>` in miniextendr-api,
1711 // which is what `into_data_frame` calls for `as_factor` companion struct columns.
1712 //
1713 // NOTE: `impl IntoR for Vec<Option<Self>>` violates orphan rules (Vec is foreign),
1714 // so we use the `FactorOptionVec<T>` wrapper type (local to miniextendr-api) instead.
1715 //
1716 // These impls allow `as_factor` and `as_list` to work on the inner type when it
1717 // appears as a field of an outer enum or struct DataFrameRow.
1718 let unit_only_factor_impls = {
1719 let all_unit = variant_infos
1720 .iter()
1721 .all(|vi| vi.shape == VariantShape::Unit);
1722 // For unit-only enums, auto-emit three impls:
1723 // 1. `impl UnitEnumFactor for Self` — provides FACTOR_LEVELS and to_factor_index()
1724 // 2. `impl IntoR for Self` — produces a single-element factor SEXP
1725 // 3. `impl IntoList for Self` — delegates to vec![self].into_list()
1726 //
1727 // Non-generic enums: `IntoR` caches the levels SEXP via `OnceLock<SEXP>` (one-time
1728 // `R_PreserveObject`).
1729 //
1730 // Generic enums: Rust does not allow generic statics, so `IntoR` builds the levels
1731 // SEXP on each call using `build_levels_sexp` + manual `Rf_protect`/`Rf_unprotect`.
1732 // This is the same pattern used by `impl<T: UnitEnumFactor> IntoR for FactorOptionVec<T>`
1733 // in `miniextendr-api/src/factor.rs`.
1734 if all_unit {
1735 // Collect variant names and assign 1-based R factor indices (used by both branches).
1736 let variant_idents: Vec<&syn::Ident> =
1737 variant_infos.iter().map(|vi| &vi.name).collect();
1738 let variant_strs: Vec<String> =
1739 variant_infos.iter().map(|vi| vi.name.to_string()).collect();
1740 let variant_strs_lit: Vec<&str> = variant_strs.iter().map(|s| s.as_str()).collect();
1741 let indices: Vec<i32> = (1i32..=(variant_idents.len() as i32)).collect();
1742
1743 if impl_generics.to_token_stream().is_empty() {
1744 // Non-generic: cache levels SEXP permanently via OnceLock (one R_PreserveObject).
1745 quote! {
1746 // impl UnitEnumFactor for Self: provides FACTOR_LEVELS + to_factor_index().
1747 // Used by `impl<T: UnitEnumFactor> IntoR for FactorOptionVec<T>` in miniextendr-api
1748 // to build factor SEXPs from `Vec<Option<Self>>` companion columns.
1749 impl ::miniextendr_api::factor::UnitEnumFactor for #row_name {
1750 const FACTOR_LEVELS: &'static [&'static str] = &[#(#variant_strs_lit),*];
1751 fn to_factor_index(self) -> i32 {
1752 match self {
1753 #(#row_name::#variant_idents => #indices,)*
1754 }
1755 }
1756 }
1757
1758 // impl IntoR for Self: single-element factor SEXP (cached levels via OnceLock).
1759 // Used when a unit-only enum value is returned directly from a #[miniextendr] fn.
1760 impl ::miniextendr_api::IntoR for #row_name {
1761 type Error = ::std::convert::Infallible;
1762 fn try_into_sexp(self) -> ::std::result::Result<::miniextendr_api::ffi::SEXP, Self::Error> {
1763 use ::std::sync::OnceLock;
1764 const LEVELS: &[&str] = &[#(#variant_strs_lit),*];
1765 static LEVELS_CACHE: OnceLock<::miniextendr_api::ffi::SEXP> =
1766 OnceLock::new();
1767 let levels = *LEVELS_CACHE.get_or_init(|| {
1768 ::miniextendr_api::factor::build_levels_sexp_cached(LEVELS)
1769 });
1770 let idx: i32 = match self {
1771 #(#row_name::#variant_idents => #indices,)*
1772 };
1773 ::std::result::Result::Ok(
1774 ::miniextendr_api::factor::build_factor(&[idx], levels)
1775 )
1776 }
1777 }
1778
1779 // impl IntoList for Self: for as_list path in outer DataFrameRow.
1780 // Delegates to Vec<Self>: IntoList (blanket impl via IntoR for Self).
1781 impl ::miniextendr_api::list::IntoList for #row_name {
1782 fn into_list(self) -> ::miniextendr_api::list::List {
1783 ::miniextendr_api::list::IntoList::into_list(::std::vec![self])
1784 }
1785 }
1786 }
1787 } else {
1788 // Generic: cannot use generic statics (Rust restriction).
1789 // Build the levels SEXP on each call and protect it across the build_factor
1790 // allocation — same pattern as `FactorOptionVec<T>: IntoR` in
1791 // `miniextendr-api/src/factor.rs`.
1792 quote! {
1793 // impl UnitEnumFactor: associated const is allowed in generic impls.
1794 impl #impl_generics ::miniextendr_api::factor::UnitEnumFactor
1795 for #row_name #ty_generics #where_clause
1796 {
1797 const FACTOR_LEVELS: &'static [&'static str] = &[#(#variant_strs_lit),*];
1798 fn to_factor_index(self) -> i32 {
1799 match self {
1800 #(#row_name::#variant_idents => #indices,)*
1801 }
1802 }
1803 }
1804
1805 // impl IntoR: build levels SEXP on each call (no generic static allowed).
1806 // Protect the levels STRSXP before build_factor allocates so GC cannot
1807 // collect it mid-build (see CLAUDE.md "PROTECT discipline against R-devel GC").
1808 impl #impl_generics ::miniextendr_api::IntoR
1809 for #row_name #ty_generics #where_clause
1810 {
1811 type Error = ::std::convert::Infallible;
1812 fn try_into_sexp(self) -> ::std::result::Result<::miniextendr_api::ffi::SEXP, Self::Error> {
1813 const LEVELS: &[&str] = &[#(#variant_strs_lit),*];
1814 let idx: i32 = match self {
1815 #(#row_name::#variant_idents => #indices,)*
1816 };
1817 unsafe {
1818 let levels = ::miniextendr_api::ffi::Rf_protect(
1819 ::miniextendr_api::factor::build_levels_sexp(LEVELS)
1820 );
1821 let result = ::miniextendr_api::factor::build_factor(&[idx], levels);
1822 ::miniextendr_api::ffi::Rf_unprotect(1);
1823 ::std::result::Result::Ok(result)
1824 }
1825 }
1826 }
1827
1828 // impl IntoList: for as_list path in outer DataFrameRow.
1829 impl #impl_generics ::miniextendr_api::list::IntoList
1830 for #row_name #ty_generics #where_clause
1831 {
1832 fn into_list(self) -> ::miniextendr_api::list::List {
1833 ::miniextendr_api::list::IntoList::into_list(::std::vec![self])
1834 }
1835 }
1836 }
1837 }
1838 } else {
1839 TokenStream::new()
1840 }
1841 };
1842 // endregion
1843
1844 Ok(quote! {
1845 #dataframe_struct
1846 #into_dataframe_impl
1847 #from_vec_impl
1848 #df_methods
1849 #row_methods
1850 #split_method
1851 #marker_impl
1852 #payload_fields_impl
1853 #(#struct_assertions)*
1854 #(#payload_collision_assertions)*
1855 #(#sibling_collision_assertions)*
1856 #unit_only_factor_impls
1857 })
1858 // endregion
1859}
1860
1861// region: generate_split_method
1862
1863/// Generate the `to_dataframe_split` associated method for an enum `DataFrameRow`.
1864///
1865/// For a single-variant enum, returns the data.frame directly.
1866/// For multi-variant enums, returns a named R list of data.frames (one per variant,
1867/// named with snake_case variant names). Each partition data.frame has only that
1868/// variant's columns (non-optional types — no NA fill from other variants).
1869fn generate_split_method(
1870 row_name: &syn::Ident,
1871 variant_infos: &[VariantInfo],
1872 impl_generics: &syn::ImplGenerics<'_>,
1873 ty_generics: &syn::TypeGenerics<'_>,
1874 where_clause: Option<&syn::WhereClause>,
1875) -> TokenStream {
1876 // Per-variant buffer declarations
1877 let mut buf_decls: Vec<TokenStream> = Vec::new();
1878 // Per-variant match arms (push to buffers)
1879 let mut match_arms: Vec<TokenStream> = Vec::new();
1880 // Per-variant data.frame construction
1881 let mut df_constructions: Vec<TokenStream> = Vec::new();
1882 // Names of the constructed data.frame variables (for the outer list)
1883 let mut df_var_names: Vec<syn::Ident> = Vec::new();
1884 // Snake-case string names (for the outer list pairs)
1885 let mut snake_names: Vec<String> = Vec::new();
1886
1887 for vi in variant_infos {
1888 let variant_name = &vi.name;
1889 let snake = naming::to_snake_case(&variant_name.to_string());
1890 snake_names.push(snake.clone());
1891
1892 let df_var = format_ident!("__{}_df", snake);
1893 df_var_names.push(df_var.clone());
1894
1895 // Determine if any field is AutoExpandVec or Struct (both require the dynamic pairs path
1896 // because column names are only known at runtime).
1897 let has_auto = vi.fields.iter().any(|f| {
1898 matches!(
1899 f,
1900 EnumResolvedField::AutoExpandVec(_) | EnumResolvedField::Struct(_)
1901 )
1902 });
1903
1904 match vi.shape {
1905 // region: Unit variant
1906 VariantShape::Unit => {
1907 let count_var = format_ident!("__s_{}_count", snake);
1908 buf_decls.push(quote! {
1909 let mut #count_var: usize = 0usize;
1910 });
1911
1912 match_arms.push(quote! {
1913 #row_name::#variant_name => {
1914 #count_var += 1;
1915 }
1916 });
1917
1918 df_constructions.push(quote! {
1919 let #df_var = ::miniextendr_api::list::List::from_raw_pairs_empty()
1920 .set_class_str(&["data.frame"])
1921 .set_row_names_int(#count_var);
1922 });
1923 }
1924 // endregion
1925
1926 // region: Named or Tuple variants
1927 VariantShape::Named | VariantShape::Tuple => {
1928 // Declare per-field buffers
1929 for erf in &vi.fields {
1930 match erf {
1931 EnumResolvedField::Single(data) => {
1932 let buf = format_ident!("__s_{}_{}", snake, data.col_name);
1933 let ty = &data.ty;
1934 // For needs_into_list fields, ty is already List (the stored type).
1935 buf_decls.push(quote! {
1936 let mut #buf: Vec<#ty> = Vec::new();
1937 });
1938 }
1939 EnumResolvedField::ExpandedFixed(data) => {
1940 for i in 1..=data.len {
1941 let buf = format_ident!("__s_{}_{}_{}", snake, data.base_name, i);
1942 let elem_ty = &data.elem_ty;
1943 buf_decls.push(quote! {
1944 let mut #buf: Vec<#elem_ty> = Vec::new();
1945 });
1946 }
1947 }
1948 EnumResolvedField::ExpandedVec(data) => {
1949 for i in 1..=data.width {
1950 let buf = format_ident!("__s_{}_{}_{}", snake, data.base_name, i);
1951 let elem_ty = &data.elem_ty;
1952 buf_decls.push(quote! {
1953 let mut #buf: Vec<Option<#elem_ty>> = Vec::new();
1954 });
1955 }
1956 }
1957 EnumResolvedField::AutoExpandVec(data) => {
1958 let buf = format_ident!("__s_{}_{}", snake, data.base_name);
1959 let container_ty = &data.container_ty;
1960 buf_decls.push(quote! {
1961 let mut #buf: Vec<#container_ty> = Vec::new();
1962 });
1963 }
1964 EnumResolvedField::Map(data) => {
1965 let keys_buf = format_ident!("__s_{}_{}_keys", snake, data.base_name);
1966 let vals_buf = format_ident!("__s_{}_{}_values", snake, data.base_name);
1967 let key_ty = &data.key_ty;
1968 let val_ty = &data.val_ty;
1969 buf_decls.push(quote! {
1970 let mut #keys_buf: Vec<Vec<#key_ty>> = Vec::new();
1971 let mut #vals_buf: Vec<Vec<#val_ty>> = Vec::new();
1972 });
1973 }
1974 // Struct field: buffer holds Vec<Inner> (no Option — split only sees
1975 // rows of this variant, so every row has the field present).
1976 EnumResolvedField::Struct(data) => {
1977 let buf = format_ident!("__s_{}_{}", snake, data.base_name);
1978 let inner_ty = &data.inner_ty;
1979 buf_decls.push(quote! {
1980 let mut #buf: Vec<#inner_ty> = Vec::new();
1981 });
1982 }
1983 }
1984 }
1985
1986 // Build destructure pattern and push statements
1987 let push_stmts: Vec<TokenStream> = vi
1988 .fields
1989 .iter()
1990 .flat_map(|erf| {
1991 let binding = erf.binding();
1992 match erf {
1993 EnumResolvedField::Single(data) => {
1994 let buf = format_ident!("__s_{}_{}", snake, data.col_name);
1995 vec![quote! { #buf.push(#binding); }]
1996 }
1997 EnumResolvedField::ExpandedFixed(data) => (0..data.len)
1998 .map(|i| {
1999 let buf =
2000 format_ident!("__s_{}_{}_{}", snake, data.base_name, i + 1);
2001 let idx = syn::Index::from(i);
2002 quote! { #buf.push(#binding[#idx]); }
2003 })
2004 .collect(),
2005 EnumResolvedField::ExpandedVec(data) => (0..data.width)
2006 .map(|i| {
2007 let buf =
2008 format_ident!("__s_{}_{}_{}", snake, data.base_name, i + 1);
2009 quote! { #buf.push(#binding.get(#i).cloned()); }
2010 })
2011 .collect(),
2012 EnumResolvedField::AutoExpandVec(data) => {
2013 let buf = format_ident!("__s_{}_{}", snake, data.base_name);
2014 vec![quote! { #buf.push(#binding); }]
2015 }
2016 EnumResolvedField::Map(data) => {
2017 let keys_buf =
2018 format_ident!("__s_{}_{}_keys", snake, data.base_name);
2019 let vals_buf =
2020 format_ident!("__s_{}_{}_values", snake, data.base_name);
2021 // unzip() guarantees pairwise alignment of keys and values.
2022 vec![quote! {
2023 let (__mx_keys, __mx_vals) = #binding.into_iter().unzip::<_, _, Vec<_>, Vec<_>>();
2024 #keys_buf.push(__mx_keys);
2025 #vals_buf.push(__mx_vals);
2026 }]
2027 }
2028 // Struct field: push binding directly (split only sees this variant's rows,
2029 // so every row has the field — no Option needed).
2030 EnumResolvedField::Struct(data) => {
2031 let buf = format_ident!("__s_{}_{}", snake, data.base_name);
2032 vec![quote! { #buf.push(#binding); }]
2033 }
2034 }
2035 })
2036 .collect();
2037
2038 let arm = match vi.shape {
2039 VariantShape::Named => {
2040 let mut field_bindings: Vec<TokenStream> = vi
2041 .fields
2042 .iter()
2043 .map(|erf| {
2044 let rust_name = erf.rust_name();
2045 let binding = erf.binding();
2046 quote! { #rust_name: #binding }
2047 })
2048 .collect();
2049 for skipped in &vi.skipped_fields {
2050 field_bindings.push(quote! { #skipped: _ });
2051 }
2052 quote! {
2053 #row_name::#variant_name { #(#field_bindings),* } => {
2054 #(#push_stmts)*
2055 }
2056 }
2057 }
2058 VariantShape::Tuple => {
2059 let bindings: Vec<TokenStream> = vi
2060 .fields
2061 .iter()
2062 .map(|erf| {
2063 let binding = erf.binding();
2064 quote! { #binding }
2065 })
2066 .collect();
2067 quote! {
2068 #row_name::#variant_name(#(#bindings),*) => {
2069 #(#push_stmts)*
2070 }
2071 }
2072 }
2073 VariantShape::Unit => unreachable!("handled above"),
2074 };
2075 match_arms.push(arm);
2076
2077 // Construct the data.frame for this variant
2078 if has_auto {
2079 // Dynamic path: build Vec<(String, SEXP)>
2080 let pairs_var = format_ident!("__pairs_{}", snake);
2081 let n_var = format_ident!("__n_{}", snake);
2082
2083 // Find the first non-dynamic field for the length expression, or first dynamic.
2084 // "Dynamic" = AutoExpandVec or Struct (both use dynamic pairs path).
2085 let len_expr: TokenStream = {
2086 let first_non_dynamic = vi.fields.iter().find(|f| {
2087 !matches!(
2088 f,
2089 EnumResolvedField::AutoExpandVec(_) | EnumResolvedField::Struct(_)
2090 )
2091 });
2092 if let Some(f) = first_non_dynamic {
2093 match f {
2094 EnumResolvedField::Single(data) => {
2095 let buf = format_ident!("__s_{}_{}", snake, data.col_name);
2096 quote! { #buf.len() }
2097 }
2098 EnumResolvedField::ExpandedFixed(data) => {
2099 let buf = format_ident!(
2100 "__s_{}_{}_{}",
2101 snake,
2102 data.base_name,
2103 1usize
2104 );
2105 quote! { #buf.len() }
2106 }
2107 EnumResolvedField::ExpandedVec(data) => {
2108 let buf = format_ident!(
2109 "__s_{}_{}_{}",
2110 snake,
2111 data.base_name,
2112 1usize
2113 );
2114 quote! { #buf.len() }
2115 }
2116 EnumResolvedField::AutoExpandVec(_)
2117 | EnumResolvedField::Struct(_) => unreachable!(),
2118 EnumResolvedField::Map(data) => {
2119 let keys_buf =
2120 format_ident!("__s_{}_{}_keys", snake, data.base_name);
2121 quote! { #keys_buf.len() }
2122 }
2123 }
2124 } else {
2125 // All fields are dynamic — use the first dynamic buf length
2126 if let Some(first) = vi.fields.first() {
2127 match first {
2128 EnumResolvedField::AutoExpandVec(data) => {
2129 let buf = format_ident!("__s_{}_{}", snake, data.base_name);
2130 quote! { #buf.len() }
2131 }
2132 EnumResolvedField::Struct(data) => {
2133 let buf = format_ident!("__s_{}_{}", snake, data.base_name);
2134 quote! { #buf.len() }
2135 }
2136 _ => quote! { 0usize },
2137 }
2138 } else {
2139 quote! { 0usize }
2140 }
2141 }
2142 };
2143
2144 // Static pair pushes — wrap each `into_sexp()` in
2145 // `__scope.protect_raw` to keep prior column SEXPs rooted
2146 // across subsequent allocations
2147 // (reviews/2026-05-07-gctorture-audit.md).
2148 let static_pushes: Vec<TokenStream> = vi
2149 .fields
2150 .iter()
2151 .flat_map(|erf| match erf {
2152 EnumResolvedField::Single(data) => {
2153 let buf = format_ident!("__s_{}_{}", snake, data.col_name);
2154 let col_str = data.col_name.to_string();
2155 let ty = &data.ty;
2156 if data.needs_into_list {
2157 vec![quote! {
2158 {
2159 let __as_list_col: Vec<::miniextendr_api::list::List> =
2160 #buf.into_iter()
2161 .map(|v: #ty| ::miniextendr_api::list::IntoList::into_list(v))
2162 .collect();
2163 #pairs_var.push((
2164 #col_str.to_string(),
2165 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(__as_list_col)),
2166 ));
2167 }
2168 }]
2169 } else if data.is_factor {
2170 // Factor column: convert Vec<T> → FactorOptionVec<T> (all present).
2171 vec![quote! {
2172 #pairs_var.push((
2173 #col_str.to_string(),
2174 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(
2175 ::miniextendr_api::factor::FactorOptionVec::<#ty>::from(
2176 #buf.into_iter().map(|v| ::std::option::Option::Some(v)).collect::<::std::vec::Vec<_>>()
2177 )
2178 )),
2179 ));
2180 }]
2181 } else {
2182 vec![quote! {
2183 #pairs_var.push((
2184 #col_str.to_string(),
2185 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(#buf)),
2186 ));
2187 }]
2188 }
2189 }
2190 EnumResolvedField::ExpandedFixed(data) => (1..=data.len)
2191 .map(|i| {
2192 let buf = format_ident!(
2193 "__s_{}_{}_{}", snake, data.base_name, i
2194 );
2195 let col_str = format!("{}_{}", data.base_name, i);
2196 quote! {
2197 #pairs_var.push((
2198 #col_str.to_string(),
2199 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(#buf)),
2200 ));
2201 }
2202 })
2203 .collect(),
2204 EnumResolvedField::ExpandedVec(data) => (1..=data.width)
2205 .map(|i| {
2206 let buf = format_ident!(
2207 "__s_{}_{}_{}", snake, data.base_name, i
2208 );
2209 let col_str = format!("{}_{}", data.base_name, i);
2210 quote! {
2211 #pairs_var.push((
2212 #col_str.to_string(),
2213 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(#buf)),
2214 ));
2215 }
2216 })
2217 .collect(),
2218 EnumResolvedField::AutoExpandVec(data) => {
2219 let buf = format_ident!("__s_{}_{}", snake, data.base_name);
2220 let base_str = &data.base_name;
2221 let elem_ty = &data.elem_ty;
2222 vec![quote! {
2223 {
2224 let __auto = #buf;
2225 let __max = __auto.iter().map(|v| v.len()).max().unwrap_or(0);
2226 let mut __auto_cols: Vec<Vec<Option<#elem_ty>>> = (0..__max)
2227 .map(|_| Vec::with_capacity(#n_var))
2228 .collect();
2229 for __row_vec in &__auto {
2230 for (__ai, __acol) in __auto_cols.iter_mut().enumerate() {
2231 __acol.push(__row_vec.get(__ai).cloned());
2232 }
2233 }
2234 for (__ai, __acol) in __auto_cols.into_iter().enumerate() {
2235 #pairs_var.push((
2236 format!("{}_{}", #base_str, __ai + 1),
2237 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(__acol)),
2238 ));
2239 }
2240 }
2241 }]
2242 }
2243 EnumResolvedField::Map(data) => {
2244 let keys_buf =
2245 format_ident!("__s_{}_{}_keys", snake, data.base_name);
2246 let vals_buf =
2247 format_ident!("__s_{}_{}_values", snake, data.base_name);
2248 let keys_str = format!("{}_keys", data.base_name);
2249 let vals_str = format!("{}_values", data.base_name);
2250 vec![
2251 quote! {
2252 #pairs_var.push((
2253 #keys_str.to_string(),
2254 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(#keys_buf)),
2255 ));
2256 },
2257 quote! {
2258 #pairs_var.push((
2259 #vals_str.to_string(),
2260 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(#vals_buf)),
2261 ));
2262 },
2263 ]
2264 }
2265 // Struct field: call Inner::to_dataframe(buf), extract columns,
2266 // push with prefixed names. In the split path, all rows belong to
2267 // this variant so no scatter is needed.
2268 EnumResolvedField::Struct(data) => {
2269 let buf = format_ident!("__s_{}_{}", snake, data.base_name);
2270 let base_str = &data.base_name;
2271 let inner_ty = &data.inner_ty;
2272 vec![quote! {
2273 {
2274 let __inner_df = <#inner_ty>::to_dataframe(#buf);
2275 let __inner_cols = ::miniextendr_api::convert::IntoDataFrame::into_named_columns(__inner_df);
2276 for (__inner_col_name, __inner_col_sexp) in __inner_cols {
2277 let __prefixed = format!("{}_{}", #base_str, __inner_col_name);
2278 #pairs_var.push((
2279 __prefixed,
2280 __scope.protect_raw(__inner_col_sexp),
2281 ));
2282 }
2283 }
2284 }]
2285 }
2286 })
2287 .collect();
2288
2289 df_constructions.push(quote! {
2290 let #n_var = #len_expr;
2291 // SAFETY: split-method runs on the R main thread; scope
2292 // unprotects after each variant data.frame is built.
2293 let #df_var = unsafe {
2294 let __scope = ::miniextendr_api::gc_protect::ProtectScope::new();
2295 let mut #pairs_var: Vec<(String, ::miniextendr_api::ffi::SEXP)> = Vec::new();
2296 #(#static_pushes)*
2297 ::miniextendr_api::list::List::from_raw_pairs(#pairs_var)
2298 .set_class_str(&["data.frame"])
2299 .set_row_names_int(#n_var)
2300 };
2301 });
2302 } else {
2303 // Static path: vec![...] of (&str, SEXP) pairs
2304 let n_var = format_ident!("__n_{}", snake);
2305
2306 // Length expression: first field's buffer length
2307 let len_expr: TokenStream = if let Some(erf) = vi.fields.first() {
2308 match erf {
2309 EnumResolvedField::Single(data) => {
2310 let buf = format_ident!("__s_{}_{}", snake, data.col_name);
2311 quote! { #buf.len() }
2312 }
2313 EnumResolvedField::ExpandedFixed(data) => {
2314 let buf =
2315 format_ident!("__s_{}_{}_{}", snake, data.base_name, 1usize);
2316 quote! { #buf.len() }
2317 }
2318 EnumResolvedField::ExpandedVec(data) => {
2319 let buf =
2320 format_ident!("__s_{}_{}_{}", snake, data.base_name, 1usize);
2321 quote! { #buf.len() }
2322 }
2323 // AutoExpandVec and Struct both trigger has_auto = true, so these
2324 // branches are unreachable in the non-auto static path.
2325 EnumResolvedField::AutoExpandVec(_) | EnumResolvedField::Struct(_) => {
2326 unreachable!()
2327 }
2328 EnumResolvedField::Map(data) => {
2329 let keys_buf =
2330 format_ident!("__s_{}_{}_keys", snake, data.base_name);
2331 quote! { #keys_buf.len() }
2332 }
2333 }
2334 } else {
2335 // No fields (unexpected for Named/Tuple, but handle it)
2336 quote! { 0usize }
2337 };
2338
2339 // Collect pairs — each `into_sexp()` is rooted via
2340 // `__scope.protect_raw` so prior columns survive the
2341 // next column's allocation
2342 // (reviews/2026-05-07-gctorture-audit.md).
2343 let pairs: Vec<TokenStream> = vi
2344 .fields
2345 .iter()
2346 .flat_map(|erf| match erf {
2347 EnumResolvedField::Single(data) => {
2348 let buf = format_ident!("__s_{}_{}", snake, data.col_name);
2349 let col_str = data.col_name.to_string();
2350 let ty = &data.ty;
2351 if data.needs_into_list {
2352 // Convert Vec<T> → Vec<List> → SEXP at split time.
2353 vec![quote! {
2354 (#col_str, {
2355 let __as_list_col: Vec<::miniextendr_api::list::List> =
2356 #buf.into_iter()
2357 .map(|v: #ty| ::miniextendr_api::list::IntoList::into_list(v))
2358 .collect();
2359 __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(__as_list_col))
2360 })
2361 }]
2362 } else if data.is_factor {
2363 // Factor: convert Vec<T> → FactorOptionVec<T> (all present).
2364 vec![quote! {
2365 (#col_str, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(
2366 ::miniextendr_api::factor::FactorOptionVec::<#ty>::from(
2367 #buf.into_iter().map(|v| ::std::option::Option::Some(v)).collect::<::std::vec::Vec<_>>()
2368 )
2369 )))
2370 }]
2371 } else {
2372 vec![quote! {
2373 (#col_str, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(#buf)))
2374 }]
2375 }
2376 }
2377 EnumResolvedField::ExpandedFixed(data) => (1..=data.len)
2378 .map(|i| {
2379 let buf =
2380 format_ident!("__s_{}_{}_{}", snake, data.base_name, i);
2381 let col_str = format!("{}_{}", data.base_name, i);
2382 quote! {
2383 (#col_str, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(#buf)))
2384 }
2385 })
2386 .collect(),
2387 EnumResolvedField::ExpandedVec(data) => (1..=data.width)
2388 .map(|i| {
2389 let buf =
2390 format_ident!("__s_{}_{}_{}", snake, data.base_name, i);
2391 let col_str = format!("{}_{}", data.base_name, i);
2392 quote! {
2393 (#col_str, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(#buf)))
2394 }
2395 })
2396 .collect(),
2397 // AutoExpandVec and Struct both trigger has_auto = true.
2398 EnumResolvedField::AutoExpandVec(_) | EnumResolvedField::Struct(_) => unreachable!(),
2399 EnumResolvedField::Map(data) => {
2400 let keys_buf =
2401 format_ident!("__s_{}_{}_keys", snake, data.base_name);
2402 let vals_buf =
2403 format_ident!("__s_{}_{}_values", snake, data.base_name);
2404 let keys_str = format!("{}_keys", data.base_name);
2405 let vals_str = format!("{}_values", data.base_name);
2406 vec![
2407 quote! {
2408 (#keys_str, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(#keys_buf)))
2409 },
2410 quote! {
2411 (#vals_str, __scope.protect_raw(::miniextendr_api::IntoR::into_sexp(#vals_buf)))
2412 },
2413 ]
2414 }
2415 })
2416 .collect();
2417
2418 df_constructions.push(quote! {
2419 let #n_var = #len_expr;
2420 // SAFETY: see has_auto branch.
2421 let #df_var = unsafe {
2422 let __scope = ::miniextendr_api::gc_protect::ProtectScope::new();
2423 ::miniextendr_api::list::List::from_raw_pairs(vec![
2424 #(#pairs),*
2425 ])
2426 .set_class_str(&["data.frame"])
2427 .set_row_names_int(#n_var)
2428 };
2429 });
2430 }
2431 } // endregion
2432 }
2433 }
2434
2435 // Build the method body
2436 let body = if variant_infos.len() == 1 {
2437 // Single variant: return the data.frame directly
2438 let df_var = &df_var_names[0];
2439 quote! {
2440 #(#buf_decls)*
2441 for __row in rows {
2442 match __row {
2443 #(#match_arms)*
2444 }
2445 }
2446 #(#df_constructions)*
2447 #df_var
2448 }
2449 } else {
2450 // Multiple variants: return named list of data.frames.
2451 // Each per-variant data.frame's `into_sexp()` is rooted via
2452 // `__outer_scope.protect_raw` so prior variant data.frames survive
2453 // the next variant's allocation
2454 // (reviews/2026-05-07-gctorture-audit.md).
2455 let outer_pairs: Vec<TokenStream> = snake_names
2456 .iter()
2457 .zip(df_var_names.iter())
2458 .map(|(name, var)| {
2459 quote! { (#name, __outer_scope.protect_raw(::miniextendr_api::IntoR::into_sexp(#var))) }
2460 })
2461 .collect();
2462
2463 quote! {
2464 #(#buf_decls)*
2465 for __row in rows {
2466 match __row {
2467 #(#match_arms)*
2468 }
2469 }
2470 #(#df_constructions)*
2471 // SAFETY: split-method runs on the R main thread.
2472 unsafe {
2473 let __outer_scope = ::miniextendr_api::gc_protect::ProtectScope::new();
2474 ::miniextendr_api::list::List::from_raw_pairs(vec![
2475 #(#outer_pairs),*
2476 ])
2477 }
2478 }
2479 };
2480
2481 quote! {
2482 impl #impl_generics #row_name #ty_generics #where_clause {
2483 /// Partition rows by variant and return one data.frame per variant.
2484 ///
2485 /// For single-variant enums, returns the data.frame directly.
2486 /// For multi-variant enums, returns a named R list of data.frames where
2487 /// each name is the variant name in snake_case. Each data.frame has only
2488 /// that variant's columns (non-optional types — no NA fill).
2489 pub fn to_dataframe_split(rows: Vec<Self>) -> ::miniextendr_api::list::List {
2490 #body
2491 }
2492 }
2493 }
2494}
2495// endregion