Skip to main content

miniextendr_lint/
crate_index.rs

1//! Shared crate index built from a single parse pass over all source files.
2//!
3//! All lint rules operate on this index rather than re-parsing files.
4
5use std::collections::{HashMap, HashSet};
6use std::fs;
7use std::path::{Path, PathBuf};
8
9use syn::Item;
10use syn::spanned::Spanned;
11
12use crate::helpers::{
13    extract_cfg_attrs, extract_path_attr, extract_roxygen_tags, has_altrep_derive,
14    has_external_ptr_derive, has_miniextendr_attr, has_vctrs_derive, impl_type_name,
15    is_altrep_struct, parse_miniextendr_impl_attrs,
16};
17
18// region: Impl method entry
19
20/// Receiver kind for an impl method, mirroring `ReceiverKind` in `miniextendr-macros`.
21///
22/// Mirror: `miniextendr-macros/src/miniextendr_impl.rs` — `ReceiverKind`.
23/// Keep both in sync: if the macro relaxes one receiver kind, update this enum too.
24#[derive(Clone, Copy, Debug, PartialEq, Eq)]
25pub enum MethodReceiverKind {
26    /// No self — static / associated function.
27    None,
28    /// `&self`
29    Ref,
30    /// `&mut self`
31    RefMut,
32    /// `self` (consuming)
33    Value,
34    /// `self: &ExternalPtr<Self>`
35    ExternalPtrRef,
36    /// `self: &mut ExternalPtr<Self>`
37    ExternalPtrRefMut,
38    /// `self: ExternalPtr<Self>`
39    ExternalPtrValue,
40}
41
42impl MethodReceiverKind {
43    /// Returns true if this is an instance receiver (any form of `self`).
44    ///
45    /// Mirrors `ReceiverKind::is_instance` in `miniextendr-macros/src/miniextendr_impl.rs`.
46    /// `Value` (consuming `self`) is **excluded** — the macro treats consuming-`self` methods
47    /// separately: they are either constructors (`returns Self` or `#[miniextendr(constructor)]`)
48    /// or finalizers, not ordinary instance calls.  Including `Value` here would produce a
49    /// false-positive for a vctrs method with `#[miniextendr(constructor)]` that consumes `self`.
50    pub fn is_instance(self) -> bool {
51        matches!(
52            self,
53            Self::Ref
54                | Self::RefMut
55                | Self::ExternalPtrRef
56                | Self::ExternalPtrRefMut
57                | Self::ExternalPtrValue
58        )
59    }
60
61    /// Human-readable spelling used in diagnostic messages.
62    pub fn spelling(self) -> &'static str {
63        match self {
64            Self::None => "(none)",
65            Self::Ref => "&self",
66            Self::RefMut => "&mut self",
67            Self::Value => "self",
68            Self::ExternalPtrRef => "self: &ExternalPtr<Self>",
69            Self::ExternalPtrRefMut => "self: &mut ExternalPtr<Self>",
70            Self::ExternalPtrValue => "self: ExternalPtr<Self>",
71        }
72    }
73}
74
75/// Per-method data collected during the crate-index pass for impl-method lint rules.
76#[derive(Clone, Debug)]
77pub struct ImplMethodEntry {
78    pub method_name: String,
79    pub line: usize,
80    pub class_system: String,
81    /// Stringified return type tokens (empty string = `()` / no explicit return).
82    pub return_type_str: String,
83    /// Receiver kind detected from the method signature.
84    pub receiver_kind: MethodReceiverKind,
85    /// True when the method carries `#[miniextendr(constructor)]`.
86    pub has_constructor_attr: bool,
87}
88
89// endregion
90
91// region: Lint item types
92
93#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
94pub enum LintKind {
95    Function,
96    Impl,
97    Struct,
98    TraitImpl,
99    Vctrs,
100}
101
102#[derive(Clone, Debug)]
103pub struct LintItem {
104    pub kind: LintKind,
105    pub name: String,
106    pub label: Option<String>,
107    pub line: usize,
108}
109
110impl PartialEq for LintItem {
111    fn eq(&self, other: &Self) -> bool {
112        self.kind == other.kind && self.name == other.name && self.label == other.label
113    }
114}
115
116impl Eq for LintItem {}
117
118impl std::hash::Hash for LintItem {
119    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
120        self.kind.hash(state);
121        self.name.hash(state);
122        self.label.hash(state);
123    }
124}
125
126impl LintItem {
127    pub fn new(kind: LintKind, name: String, line: usize) -> Self {
128        Self {
129            kind,
130            name,
131            label: None,
132            line,
133        }
134    }
135
136    pub fn with_label(kind: LintKind, name: String, label: Option<String>, line: usize) -> Self {
137        Self {
138            kind,
139            name,
140            label,
141            line,
142        }
143    }
144}
145// endregion
146
147// region: Attributed trait impls from source
148
149#[derive(Clone, Debug)]
150pub struct AttributedTraitImpl {
151    pub type_name: String,
152    pub trait_name: String,
153    pub class_system: Option<String>,
154    pub line: usize,
155}
156// endregion
157
158// region: Per-file parsed data
159
160#[derive(Debug, Default)]
161pub struct FileData {
162    // Source items (functions, impls, structs with #[miniextendr])
163    pub miniextendr_items: Vec<LintItem>,
164
165    // Type/derive information
166    pub types_with_external_ptr: HashSet<String>,
167    pub types_with_typed_external: HashSet<String>,
168
169    // Impl block details
170    pub inherent_impl_class_systems: HashMap<String, (String, usize)>,
171    pub attributed_trait_impls: Vec<AttributedTraitImpl>,
172    pub impl_blocks_per_type: HashMap<String, Vec<(Option<String>, usize)>>,
173
174    // Function details
175    pub fn_visibility: HashMap<String, bool>,
176
177    // Module tree (for file discovery)
178    /// Simple `mod child;` declarations (by ident name).
179    pub declared_child_mods: Vec<String>,
180    /// `#[path = "file.rs"] mod name;` declarations: (mod_name, file_path_str).
181    pub path_redirected_mods: Vec<(String, String)>,
182    /// cfg attrs on `mod child;` declarations: mod_name -> cfg strings.
183    pub mod_decl_cfgs: HashMap<String, Vec<String>>,
184
185    // Export control
186    /// (has_internal, has_noexport, line)
187    pub export_control: HashMap<String, (bool, bool, usize)>,
188
189    // Impl method details for per-method lint rules
190    /// Methods per inherent impl type: type_name → Vec<ImplMethodEntry>.
191    pub impl_methods: HashMap<String, Vec<ImplMethodEntry>>,
192
193    // Doc-comment roxygen tags per function/impl name
194    /// Known roxygen tags: "@noRd", "@export", "@keywords internal"
195    pub fn_doc_tags: HashMap<String, Vec<String>>,
196
197    // Safety lint data
198    /// Lines containing direct Rf_error/Rf_errorcall calls: (function_name, line_number).
199    pub rf_error_calls: Vec<(String, usize)>,
200    /// Lines containing `ffi::*_unchecked()` calls: (function_name, line_number).
201    pub ffi_unchecked_calls: Vec<(String, usize)>,
202
203    // R reserved-word parameter names
204    /// Maps fn/method name → list of (param_name, line) for params that are R reserved words.
205    /// Key for free functions is the function name; for impl methods it is `"TypeName::method_name"`.
206    pub fn_param_names: HashMap<String, Vec<(String, usize)>>,
207
208    // Lifetime parameter lint (MXL112)
209    /// `#[miniextendr]` functions or impl blocks that carry explicit lifetime params.
210    /// Each entry is `(name, line)` where `name` is the function or type name.
211    pub lifetime_param_items: Vec<(String, usize)>,
212
213    // Interleaved doc/non-doc attributes lint (MXL302)
214    /// `#[miniextendr]` items where a non-doc attribute interrupts a doc-comment stream.
215    /// Each entry is `(item_name, line_of_interrupting_attr)`.
216    pub interleaved_doc_attrs: Vec<(String, usize)>,
217}
218// endregion
219
220// region: Crate index
221
222/// Shared parsed state for all lint rules.
223pub struct CrateIndex {
224    /// All scanned Rust source files.
225    pub files: Vec<PathBuf>,
226    /// Per-file parsed data.
227    pub file_data: HashMap<PathBuf, FileData>,
228}
229
230impl CrateIndex {
231    /// Build the index from a crate root directory.
232    pub fn build(root: &Path) -> Result<Self, String> {
233        let src_dir = if root.join("src").is_dir() {
234            root.join("src")
235        } else {
236            root.to_path_buf()
237        };
238
239        if !src_dir.is_dir() {
240            return Err(format!(
241                "miniextendr-lint: root is not a directory: {}",
242                src_dir.display()
243            ));
244        }
245
246        let mut rs_files = Vec::new();
247        collect_rs_files_from_module_tree(&src_dir, &mut rs_files)?;
248        rs_files.sort();
249
250        let mut file_data = HashMap::new();
251        let mut parse_errors = Vec::new();
252
253        for path in &rs_files {
254            match parse_file(path) {
255                Ok(data) => {
256                    file_data.insert(path.clone(), data);
257                }
258                Err(err) => parse_errors.push(err),
259            }
260        }
261
262        if !parse_errors.is_empty() {
263            return Err(parse_errors.join("; "));
264        }
265
266        Ok(Self {
267            files: rs_files,
268            file_data,
269        })
270    }
271}
272// endregion
273
274// region: File collection (module-tree walker)
275
276/// Collect Rust source files by walking the module tree from `lib.rs`,
277/// following `mod child;` declarations and respecting `#[cfg(feature = "...")]`
278/// gates via `CARGO_FEATURE_*` environment variables.
279fn collect_rs_files_from_module_tree(src_dir: &Path, out: &mut Vec<PathBuf>) -> Result<(), String> {
280    let lib_rs = src_dir.join("lib.rs");
281    if !lib_rs.is_file() {
282        return Err(format!(
283            "miniextendr-lint: cannot find lib.rs in {}",
284            src_dir.display()
285        ));
286    }
287
288    let active_features = collect_active_cargo_features();
289    let mut seen = HashSet::new();
290    walk_module_file(&lib_rs, &active_features, out, &mut seen);
291    Ok(())
292}
293
294/// Collect the set of active Cargo features from `CARGO_FEATURE_*` env vars.
295/// Feature names are normalized: `CARGO_FEATURE_FOO_BAR` → `"foo-bar"`.
296fn collect_active_cargo_features() -> HashSet<String> {
297    std::env::vars()
298        .filter_map(|(key, _)| {
299            key.strip_prefix("CARGO_FEATURE_")
300                .map(|suffix| suffix.to_lowercase().replace('_', "-"))
301        })
302        .collect()
303}
304
305/// Recursively walk a module file, following `mod` declarations.
306fn walk_module_file(
307    file: &Path,
308    active_features: &HashSet<String>,
309    out: &mut Vec<PathBuf>,
310    seen: &mut HashSet<PathBuf>,
311) {
312    if !file.is_file() {
313        return;
314    }
315
316    let file_buf = file.to_path_buf();
317    if !seen.insert(file_buf.clone()) {
318        return;
319    }
320
321    out.push(file_buf);
322
323    // Parse the file to discover mod declarations
324    let Ok(src) = fs::read_to_string(file) else {
325        return;
326    };
327    let Ok(parsed) = syn::parse_file(&src) else {
328        return;
329    };
330
331    let parent_dir = match file.parent() {
332        Some(dir) => dir,
333        None => return,
334    };
335
336    // Determine the stem-based subdirectory for non-lib/mod files.
337    // For `foo.rs`, child modules live in `foo/`.
338    // For `lib.rs` or `mod.rs`, child modules live in the same directory.
339    let child_dir = {
340        let stem = file.file_stem().and_then(|s| s.to_str());
341        match stem {
342            Some("lib" | "mod") => parent_dir.to_path_buf(),
343            Some(name) => parent_dir.join(name),
344            None => parent_dir.to_path_buf(),
345        }
346    };
347
348    discover_mod_declarations(&parsed.items, &child_dir, active_features, out, seen);
349}
350
351/// Walk parsed items looking for `mod child;` declarations and recurse.
352fn discover_mod_declarations(
353    items: &[Item],
354    child_dir: &Path,
355    active_features: &HashSet<String>,
356    out: &mut Vec<PathBuf>,
357    seen: &mut HashSet<PathBuf>,
358) {
359    for item in items {
360        let Item::Mod(item_mod) = item else {
361            continue;
362        };
363
364        if let Some((_, child_items)) = &item_mod.content {
365            // Inline module — recurse into its items (same file)
366            discover_mod_declarations(child_items, child_dir, active_features, out, seen);
367        } else {
368            // Out-of-line module declaration: `mod child;`
369            // Check if cfg-gated and whether the gate is active
370            let cfgs = extract_cfg_attrs(&item_mod.attrs);
371            if !cfgs.is_empty() && !is_cfg_active(&cfgs, active_features) {
372                continue; // Feature not enabled, skip this module
373            }
374
375            let mod_name = item_mod.ident.to_string();
376
377            // Check for #[path = "file.rs"] attribute
378            let path_attr = extract_path_attr(&item_mod.attrs);
379
380            if let Some(file_path) = path_attr {
381                let target = child_dir.join(&file_path);
382                walk_module_file(&target, active_features, out, seen);
383            } else {
384                // Try child.rs first, then child/mod.rs
385                let sibling = child_dir.join(format!("{mod_name}.rs"));
386                if sibling.is_file() {
387                    walk_module_file(&sibling, active_features, out, seen);
388                } else {
389                    let subdir_mod = child_dir.join(&mod_name).join("mod.rs");
390                    walk_module_file(&subdir_mod, active_features, out, seen);
391                }
392            }
393        }
394    }
395}
396
397/// Evaluate whether a set of `#[cfg(...)]` attributes is active given the current features.
398fn is_cfg_active(cfgs: &[String], active_features: &HashSet<String>) -> bool {
399    for cfg_str in cfgs {
400        if let Some(result) = eval_cfg_str(cfg_str, active_features)
401            && !result
402        {
403            return false;
404        }
405    }
406    true
407}
408
409/// Try to evaluate a single cfg string like `cfg(feature = "foo")`.
410fn eval_cfg_str(cfg_str: &str, active_features: &HashSet<String>) -> Option<bool> {
411    let normalized: String = cfg_str.chars().filter(|c| !c.is_whitespace()).collect();
412
413    let inner = normalized
414        .strip_prefix("cfg(")
415        .and_then(|s| s.strip_suffix(')'))?;
416
417    if let Some(not_inner) = inner.strip_prefix("not(").and_then(|s| s.strip_suffix(')')) {
418        if let Some(feat) = extract_feature_name(not_inner) {
419            return Some(!active_features.contains(&feat));
420        }
421        return None;
422    }
423
424    if let Some(feat) = extract_feature_name(inner) {
425        return Some(active_features.contains(&feat));
426    }
427
428    None
429}
430
431/// Extract the feature name from a string like `feature="foo"`.
432fn extract_feature_name(s: &str) -> Option<String> {
433    let rest = s.strip_prefix("feature")?;
434    let rest = rest.strip_prefix('=')?;
435    let name = rest.trim_matches('"').trim_matches('\\');
436    if name.is_empty() {
437        None
438    } else {
439        Some(name.to_string())
440    }
441}
442// endregion
443
444// region: Single-file parsing
445
446fn parse_file(path: &Path) -> Result<FileData, String> {
447    let src = fs::read_to_string(path)
448        .map_err(|err| format!("{}: failed to read: {err}", path.display()))?;
449
450    let parsed = syn::parse_file(&src)
451        .map_err(|err| format!("{}: failed to parse: {err}", path.display()))?;
452
453    let mut data = FileData::default();
454    collect_items_recursive(&parsed.items, &mut data);
455
456    // Both raw-source scanners need the line-split for is_suppressed look-behind.
457    let lines: Vec<&str> = src.lines().collect();
458    scan_rf_error_calls(&lines, &mut data);
459    scan_ffi_unchecked_calls(&lines, &mut data);
460
461    Ok(data)
462}
463
464/// Extract named parameter names (and their 1-based line numbers) from a function signature.
465///
466/// Skips `self` / `&self` / `&mut self` receiver parameters. Skips unnamed (`_`) parameters.
467fn extract_param_names(sig: &syn::Signature) -> Vec<(String, usize)> {
468    let mut params = Vec::new();
469    for input in &sig.inputs {
470        if let syn::FnArg::Typed(pat_type) = input
471            && let syn::Pat::Ident(pat_ident) = &*pat_type.pat
472        {
473            let name = pat_ident.ident.to_string();
474            // Skip `_` (bare anonymous). Named `_foo` patterns are kept because
475            // the proc-macro forwards the name verbatim (stripping only the leading
476            // underscore in some codegen paths), so they can still collide with R
477            // reserved words.
478            if name == "_" {
479                continue;
480            }
481            let line = pat_ident.ident.span().start().line;
482            params.push((name, line));
483        }
484    }
485    params
486}
487
488/// Recursively collect all lint-relevant information from parsed items.
489fn collect_items_recursive(items: &[Item], data: &mut FileData) {
490    for item in items {
491        match item {
492            Item::Fn(item_fn) if has_miniextendr_attr(&item_fn.attrs) => {
493                let line = item_fn.sig.ident.span().start().line;
494                let name = item_fn.sig.ident.to_string();
495
496                data.miniextendr_items
497                    .push(LintItem::new(LintKind::Function, name.clone(), line));
498
499                // Track visibility
500                let is_pub = matches!(item_fn.vis, syn::Visibility::Public(_));
501                data.fn_visibility.insert(name.clone(), is_pub);
502
503                // Track export control
504                let attrs = parse_miniextendr_impl_attrs(&item_fn.attrs);
505                if attrs.internal || attrs.noexport {
506                    data.export_control
507                        .insert(name.clone(), (attrs.internal, attrs.noexport, line));
508                }
509
510                // Track doc-comment roxygen tags
511                let doc_tags = extract_roxygen_tags(&item_fn.attrs);
512                if !doc_tags.is_empty() {
513                    data.fn_doc_tags.insert(name.clone(), doc_tags);
514                }
515
516                // Track parameter names for R reserved-word check (MXL110)
517                let params = extract_param_names(&item_fn.sig);
518                if !params.is_empty() {
519                    data.fn_param_names.insert(name.clone(), params);
520                }
521
522                // Track explicit lifetime params for MXL112
523                let has_lifetime = item_fn
524                    .sig
525                    .generics
526                    .params
527                    .iter()
528                    .any(|p| matches!(p, syn::GenericParam::Lifetime(_)));
529                if has_lifetime {
530                    data.lifetime_param_items.push((name.clone(), line));
531                }
532
533                // Detect interleaved doc/non-doc attributes for MXL302
534                if let Some(interrupt_line) = find_interleaved_doc_attr(&item_fn.attrs) {
535                    data.interleaved_doc_attrs.push((name, interrupt_line));
536                }
537            }
538            Item::Struct(item_struct) => {
539                let is_miniextendr_altrep =
540                    has_miniextendr_attr(&item_struct.attrs) && is_altrep_struct(item_struct);
541                let is_derive_altrep = has_altrep_derive(&item_struct.attrs);
542                if is_miniextendr_altrep || is_derive_altrep {
543                    let line = item_struct.ident.span().start().line;
544                    data.miniextendr_items.push(LintItem::new(
545                        LintKind::Struct,
546                        item_struct.ident.to_string(),
547                        line,
548                    ));
549                }
550                if has_external_ptr_derive(&item_struct.attrs) {
551                    data.types_with_external_ptr
552                        .insert(item_struct.ident.to_string());
553                }
554                if has_vctrs_derive(&item_struct.attrs) {
555                    let line = item_struct.ident.span().start().line;
556                    data.miniextendr_items.push(LintItem::new(
557                        LintKind::Vctrs,
558                        item_struct.ident.to_string(),
559                        line,
560                    ));
561                }
562            }
563            Item::Impl(item_impl) => {
564                // Check for impl TypedExternal for Type
565                if let Some((_, trait_path, _)) = &item_impl.trait_
566                    && let Some(last_seg) = trait_path.segments.last()
567                    && last_seg.ident == "TypedExternal"
568                    && let Some(type_name) = impl_type_name(&item_impl.self_ty)
569                {
570                    data.types_with_typed_external.insert(type_name);
571                }
572
573                if has_miniextendr_attr(&item_impl.attrs) {
574                    let line = item_impl.self_ty.span().start().line;
575                    let impl_attrs = parse_miniextendr_impl_attrs(&item_impl.attrs);
576
577                    // Track explicit lifetime params on impl blocks for MXL112
578                    let impl_has_lifetime = item_impl
579                        .generics
580                        .params
581                        .iter()
582                        .any(|p| matches!(p, syn::GenericParam::Lifetime(_)));
583                    if impl_has_lifetime && let Some(type_name) = impl_type_name(&item_impl.self_ty)
584                    {
585                        data.lifetime_param_items.push((type_name, line));
586                    }
587
588                    // Detect interleaved doc/non-doc attributes for MXL302 (impl-level)
589                    if let Some(type_name) = impl_type_name(&item_impl.self_ty) {
590                        if let Some(interrupt_line) = find_interleaved_doc_attr(&item_impl.attrs) {
591                            data.interleaved_doc_attrs
592                                .push((type_name.clone(), interrupt_line));
593                        }
594                        // Also check individual methods within the impl
595                        for impl_item in &item_impl.items {
596                            if let syn::ImplItem::Fn(method) = impl_item
597                                && let Some(interrupt_line) =
598                                    find_interleaved_doc_attr(&method.attrs)
599                            {
600                                let method_name = method.sig.ident.to_string();
601                                data.interleaved_doc_attrs.push((
602                                    format!("{}::{}", type_name, method_name),
603                                    interrupt_line,
604                                ));
605                            }
606                        }
607                    }
608
609                    match impl_type_name(&item_impl.self_ty) {
610                        Some(type_name) => {
611                            if let Some((_, trait_path, _)) = &item_impl.trait_ {
612                                // Trait impl
613                                if let Some(trait_seg) = trait_path.segments.last() {
614                                    let trait_name = trait_seg.ident.to_string();
615                                    let full_name = format!("{} for {}", trait_name, type_name);
616                                    data.miniextendr_items.push(LintItem::new(
617                                        LintKind::TraitImpl,
618                                        full_name,
619                                        line,
620                                    ));
621                                    data.attributed_trait_impls.push(AttributedTraitImpl {
622                                        type_name: type_name.clone(),
623                                        trait_name,
624                                        class_system: impl_attrs.class_system.clone(),
625                                        line,
626                                    });
627                                }
628                            } else {
629                                // Inherent impl
630                                let class_system =
631                                    impl_attrs.class_system.clone().unwrap_or_default();
632                                data.inherent_impl_class_systems
633                                    .insert(type_name.clone(), (class_system.clone(), line));
634                                data.impl_blocks_per_type
635                                    .entry(type_name.clone())
636                                    .or_default()
637                                    .push((impl_attrs.label.clone(), line));
638                                data.miniextendr_items.push(LintItem::with_label(
639                                    LintKind::Impl,
640                                    type_name.clone(),
641                                    impl_attrs.label.clone(),
642                                    line,
643                                ));
644
645                                // Collect method names for per-method rules (e.g. MXL111, MXL120)
646                                let methods =
647                                    data.impl_methods.entry(type_name.clone()).or_default();
648                                for impl_item in &item_impl.items {
649                                    if let syn::ImplItem::Fn(method) = impl_item {
650                                        let method_name = method.sig.ident.to_string();
651                                        let method_line = method.sig.ident.span().start().line;
652                                        let return_type_str =
653                                            extract_return_type_str(&method.sig.output);
654                                        let receiver_kind = detect_receiver_kind(&method.sig);
655                                        let has_constructor_attr =
656                                            has_constructor_attr(&method.attrs);
657                                        methods.push(ImplMethodEntry {
658                                            method_name,
659                                            line: method_line,
660                                            class_system: class_system.clone(),
661                                            return_type_str,
662                                            receiver_kind,
663                                            has_constructor_attr,
664                                        });
665                                    }
666                                }
667
668                                // Track export control
669                                if impl_attrs.internal || impl_attrs.noexport {
670                                    data.export_control.insert(
671                                        type_name.clone(),
672                                        (impl_attrs.internal, impl_attrs.noexport, line),
673                                    );
674                                }
675                            }
676
677                            // Track parameter names for all methods in the impl block (MXL110)
678                            for impl_item in &item_impl.items {
679                                if let syn::ImplItem::Fn(method) = impl_item {
680                                    let method_name = method.sig.ident.to_string();
681                                    let key = format!("{}::{}", type_name, method_name);
682                                    let params = extract_param_names(&method.sig);
683                                    if !params.is_empty() {
684                                        data.fn_param_names.insert(key, params);
685                                    }
686                                }
687                            }
688                        }
689                        None => { /* unsupported impl type, skip */ }
690                    }
691                }
692            }
693            Item::Mod(item_mod) => {
694                if let Some((_, child_items)) = &item_mod.content {
695                    // Inline module
696                    collect_items_recursive(child_items, data);
697                } else {
698                    // Out-of-line module declaration
699                    let mod_name = item_mod.ident.to_string();
700
701                    // Track cfg attrs on the mod declaration
702                    let cfgs = extract_cfg_attrs(&item_mod.attrs);
703                    if !cfgs.is_empty() {
704                        data.mod_decl_cfgs.insert(mod_name.clone(), cfgs);
705                    }
706
707                    // Check for #[path = "file.rs"] attribute
708                    let path_attr = extract_path_attr(&item_mod.attrs);
709                    if let Some(file_path) = path_attr {
710                        data.path_redirected_mods.push((mod_name, file_path));
711                    } else {
712                        data.declared_child_mods.push(mod_name);
713                    }
714                }
715            }
716            _ => {}
717        }
718    }
719}
720
721/// Patterns that indicate direct Rf_error/Rf_errorcall calls in user code.
722const RF_ERROR_PATTERNS: &[&str] = &[
723    "Rf_error(",
724    "Rf_error_unchecked(",
725    "Rf_errorcall(",
726    "Rf_errorcall_unchecked(",
727];
728
729/// Check if a lint code is suppressed via `// mxl::allow(MXL...)` comment.
730fn is_suppressed(lines: &[&str], line_idx: usize, code: &str) -> bool {
731    if line_has_allow(lines[line_idx], code) {
732        return true;
733    }
734    if line_idx > 0 && line_has_allow(lines[line_idx - 1], code) {
735        return true;
736    }
737    false
738}
739
740/// Check if a single line contains `// mxl::allow(...)` matching the given code.
741fn line_has_allow(line: &str, code: &str) -> bool {
742    const PREFIX: &str = "// mxl::allow(";
743    if let Some(pos) = line.find(PREFIX) {
744        let after = &line[pos + PREFIX.len()..];
745        if let Some(end) = after.find(')') {
746            let codes = &after[..end];
747            return codes.split(',').any(|c| c.trim() == code);
748        }
749    }
750    false
751}
752
753/// Scan raw source text for `ffi::*_unchecked()` calls.
754fn scan_ffi_unchecked_calls(lines: &[&str], data: &mut FileData) {
755    for (line_idx, line) in lines.iter().enumerate() {
756        let trimmed = line.trim();
757        if trimmed.starts_with("//") {
758            continue;
759        }
760        if trimmed.starts_with("#[") {
761            continue;
762        }
763        // Strip inline comments to avoid false positives
764        let code_part = match trimmed.find("//") {
765            Some(pos) => &trimmed[..pos],
766            None => trimmed,
767        };
768        let mut search_from = 0;
769        while let Some(ffi_pos) = code_part[search_from..].find("ffi::") {
770            let abs_pos = search_from + ffi_pos;
771            let after_ffi = &code_part[abs_pos + 5..];
772            let ident_end = after_ffi
773                .find(|c: char| !c.is_alphanumeric() && c != '_')
774                .unwrap_or(after_ffi.len());
775            let ident = &after_ffi[..ident_end];
776            if ident.ends_with("_unchecked")
777                && after_ffi[ident_end..].starts_with('(')
778                && !is_suppressed(lines, line_idx, "MXL301")
779            {
780                data.ffi_unchecked_calls
781                    .push((ident.to_string(), line_idx + 1));
782            }
783            search_from = abs_pos + 5 + ident_end;
784        }
785    }
786}
787
788// region: Impl method helpers (MXL120 and future per-method rules)
789
790/// Stringify a `syn::ReturnType` to a compact token string.
791///
792/// Returns an empty string for `-> ()` / no explicit return (both mean unit).
793fn extract_return_type_str(output: &syn::ReturnType) -> String {
794    use quote::ToTokens;
795    match output {
796        syn::ReturnType::Default => String::new(),
797        syn::ReturnType::Type(_, ty) => ty.to_token_stream().to_string(),
798    }
799}
800
801/// Detect the receiver kind from a method signature.
802///
803/// Mirror: `miniextendr-macros/src/miniextendr_impl.rs` — `detect_receiver_kind`.
804/// Keep both in sync: if the macro adds a new receiver variant, update this function too.
805fn detect_receiver_kind(sig: &syn::Signature) -> MethodReceiverKind {
806    let first = match sig.inputs.first() {
807        Some(arg) => arg,
808        None => return MethodReceiverKind::None,
809    };
810    match first {
811        syn::FnArg::Receiver(recv) => {
812            // syn 2.x parses *all* `self` receiver forms as `FnArg::Receiver`, including
813            // the typed forms `self: &ExternalPtr<Self>`, `self: &mut ExternalPtr<Self>`,
814            // and `self: ExternalPtr<Self>`.  When a colon token is present the receiver
815            // has an explicit type in `recv.ty`; otherwise `recv.reference` / `recv.mutability`
816            // describe the shorthand `(&)(&mut) self`.
817            if recv.colon_token.is_some() {
818                // Typed form: `self: <ty>`.  Classify by inspecting `recv.ty`.
819                match recv.ty.as_ref() {
820                    syn::Type::Reference(r) => {
821                        if is_external_ptr_self_ty(r.elem.as_ref()) {
822                            if r.mutability.is_some() {
823                                MethodReceiverKind::ExternalPtrRefMut
824                            } else {
825                                MethodReceiverKind::ExternalPtrRef
826                            }
827                        } else if r.mutability.is_some() {
828                            MethodReceiverKind::RefMut
829                        } else {
830                            MethodReceiverKind::Ref
831                        }
832                    }
833                    ty if is_external_ptr_self_ty(ty) => MethodReceiverKind::ExternalPtrValue,
834                    _ => MethodReceiverKind::None,
835                }
836            } else {
837                // Shorthand form: `self`, `&self`, `&mut self`.
838                if recv.mutability.is_some() {
839                    MethodReceiverKind::RefMut
840                } else if recv.reference.is_some() {
841                    MethodReceiverKind::Ref
842                } else {
843                    MethodReceiverKind::Value
844                }
845            }
846        }
847        syn::FnArg::Typed(_) => {
848            // In syn 2.x, typed `self:` forms are represented as `FnArg::Receiver`, so
849            // this arm is only reached for genuinely non-`self` parameters.
850            MethodReceiverKind::None
851        }
852    }
853}
854
855/// Returns true if `ty` is `ExternalPtr<Self>` (last path segment = `ExternalPtr`,
856/// single type argument = `Self`).
857fn is_external_ptr_self_ty(ty: &syn::Type) -> bool {
858    let syn::Type::Path(p) = ty else {
859        return false;
860    };
861    let Some(last) = p.path.segments.last() else {
862        return false;
863    };
864    if last.ident != "ExternalPtr" {
865        return false;
866    }
867    let syn::PathArguments::AngleBracketed(ref args) = last.arguments else {
868        return false;
869    };
870    matches!(
871        args.args.first(),
872        Some(syn::GenericArgument::Type(syn::Type::Path(tp)))
873            if tp.path.is_ident("Self")
874    )
875}
876
877/// Returns true when the attribute list contains `#[miniextendr(constructor)]` or
878/// `#[miniextendr(r6(constructor))]` / `#[miniextendr(s3(constructor))]` etc.
879fn has_constructor_attr(attrs: &[syn::Attribute]) -> bool {
880    for attr in attrs {
881        if attr
882            .path()
883            .segments
884            .last()
885            .is_none_or(|seg| seg.ident != "miniextendr")
886        {
887            continue;
888        }
889        if let syn::Meta::List(meta_list) = &attr.meta {
890            let tokens = meta_list.tokens.to_string();
891            // Accept both `constructor` at top level and inside `r6(...)`, `s3(...)`, etc.
892            if tokens
893                .split(|c: char| !c.is_alphanumeric() && c != '_')
894                .any(|t| t == "constructor")
895            {
896                return true;
897            }
898        }
899    }
900    false
901}
902
903// endregion
904
905/// Scan raw source text for direct Rf_error/Rf_errorcall calls.
906fn scan_rf_error_calls(lines: &[&str], data: &mut FileData) {
907    for (line_idx, line) in lines.iter().enumerate() {
908        let trimmed = line.trim();
909        if trimmed.starts_with("//") {
910            continue;
911        }
912        // Strip inline comments to avoid false positives
913        let code_part = match trimmed.find("//") {
914            Some(pos) => &trimmed[..pos],
915            None => trimmed,
916        };
917        for pattern in RF_ERROR_PATTERNS {
918            if code_part.contains(pattern) && !is_suppressed(lines, line_idx, "MXL300") {
919                let fn_name = &pattern[..pattern.len() - 1];
920                data.rf_error_calls
921                    .push((fn_name.to_string(), line_idx + 1));
922            }
923        }
924    }
925}
926// endregion
927
928// region: MXL302 — interleaved doc/non-doc attribute detection
929
930/// Returns the 1-based line number of the first non-doc attribute that interrupts
931/// a doc-comment stream, or `None` if no such interruption exists.
932///
933/// An interruption is: at least one `#[doc = ...]` attr has been seen, then a
934/// non-doc attribute appears, then at least one more `#[doc = ...]` attr follows.
935fn find_interleaved_doc_attr(attrs: &[syn::Attribute]) -> Option<usize> {
936    use syn::spanned::Spanned;
937
938    let mut saw_doc = false;
939    let mut interrupting_attr: Option<usize> = None;
940
941    for attr in attrs {
942        if attr.path().is_ident("doc") {
943            if interrupting_attr.is_some() {
944                // We saw doc, then non-doc, now doc again — confirmed interruption
945                return interrupting_attr;
946            }
947            saw_doc = true;
948        } else if saw_doc && interrupting_attr.is_none() {
949            // First non-doc attr after doc content — record but don't fire yet
950            // (only fire if another doc attr follows)
951            let line = attr.span().start().line;
952            interrupting_attr = Some(line);
953        }
954    }
955
956    None
957}
958
959// endregion