Skip to main content

miniextendr_engine/
lib.rs

1//! miniextendr-engine: standalone R embedding for Rust binaries and tests.
2//!
3//! This crate centralizes `libR` linking (via `build.rs`), R initialization, and
4//! a minimal runtime handle for processing events and interrupts. It is intended
5//! for Rust-only executables and integration tests that embed R.
6//!
7//! **Not for R packages:** this crate uses non-API R internals
8//! (`Rembedded.h`, `Rinterface.h`). For R packages, depend on `miniextendr-api`
9//! and keep `nonapi` disabled.
10//!
11//! ## When to use
12//! - Rust binaries that embed R.
13//! - Integration tests or benchmarks that need full control over R startup.
14//!
15//! ## Quick start
16//!
17//! ```ignore
18//! // SAFETY: Must be called once, from the main thread.
19//! let engine = unsafe {
20//!     miniextendr_engine::REngine::build()
21//!         .with_args(&["R", "--quiet", "--vanilla"])
22//!         .init()
23//!         .expect("Failed to initialize R")
24//! };
25//!
26//! // ... use R APIs from the main thread ...
27//!
28//! std::mem::forget(engine); // optional: intentionally leak the handle
29//! ```
30//!
31//! ## Initialization details
32//! - Ensures `R_HOME` (via `R RHOME`) if missing.
33//! - Calls `Rf_initialize_R` directly to avoid double `setup_Rmainloop()`.
34//! - Calls `setup_Rmainloop()` exactly once after initialization.
35//!
36//! ## Runtime sentinel
37//!
38//! ```ignore
39//! if miniextendr_engine::r_initialized_sentinel() {
40//!     // R has been initialized in this process.
41//! }
42//! ```
43//!
44//! ## Safety
45//!
46//! - Must only be initialized once per process.
47//! - Must be called from the main thread.
48//! - No shutdown: `Rf_endEmbeddedR` is intentionally not called because the
49//!   cleanup path is not reentrant-safe. The OS reclaims resources on exit.
50
51use std::ffi::CString;
52use std::os::raw::{c_char, c_int};
53use std::path::PathBuf;
54use std::process::Command;
55
56// Note: This entire crate uses non-API R functions (Rembedded.h, Rinterface.h)
57// for embedding R. It is not intended for use in R packages.
58unsafe extern "C" {
59    // R initialization (from Rembedded.h - non-API)
60    fn Rf_initialize_R(argc: c_int, argv: *mut *mut c_char) -> c_int;
61    #[allow(dead_code)]
62    fn Rf_endEmbeddedR(fatal: c_int);
63
64    // R event loop
65    fn R_ProcessEvents();
66    fn R_CheckUserInterrupt();
67
68    // Setup functions
69    fn setup_Rmainloop();
70
71    // Global state from Rinterface.h (non-API)
72    // Use UnsafeCell for interior mutability without static mut
73    static R_Interactive: std::cell::UnsafeCell<c_int>;
74    static R_SignalHandlers: std::cell::UnsafeCell<c_int>;
75    static R_CStackStart: usize;
76    static R_CStackDir: c_int;
77}
78
79/// Write to R's global `R_Interactive` flag.
80///
81/// # Safety
82/// Must be called from the main thread during R initialization.
83#[inline]
84unsafe fn set_r_interactive(value: c_int) {
85    unsafe {
86        *R_Interactive.get() = value;
87    }
88}
89
90/// Write to R's global `R_SignalHandlers` flag.
91///
92/// # Safety
93/// Must be called from the main thread during R initialization.
94#[inline]
95unsafe fn set_r_signal_handlers(value: c_int) {
96    unsafe {
97        *R_SignalHandlers.get() = value;
98    }
99}
100
101/// Check whether `Rf_initialize_R` has run by inspecting stack sentinels.
102///
103/// `R_CStackStart`/`R_CStackDir` are set during R initialization on the main
104/// thread. A zero or `usize::MAX` value indicates "not initialized".
105#[inline]
106pub fn r_initialized_sentinel() -> bool {
107    unsafe {
108        let start = R_CStackStart;
109        let dir = R_CStackDir;
110        dir != 0 && start != 0 && start != usize::MAX
111    }
112}
113
114/// Builder for configuring and initializing the R runtime.
115///
116/// # Example
117///
118/// ```ignore
119/// let engine = REngine::new()
120///     .with_args(&["R", "--quiet", "--no-save"])
121///     .interactive(false)
122///     .signal_handlers(false)
123///     .init()?;
124/// ```
125pub struct REngineBuilder {
126    args: Vec<String>,
127    interactive: bool,
128    signal_handlers: bool,
129    r_home: Option<PathBuf>,
130}
131
132impl Default for REngineBuilder {
133    fn default() -> Self {
134        Self::new()
135    }
136}
137
138impl REngineBuilder {
139    /// Create a new R engine builder with default settings.
140    pub fn new() -> Self {
141        Self {
142            // Default to a non-interactive-safe setup: R requires an explicit
143            // save/no-save choice when not running interactively.
144            args: vec![
145                "R".to_string(),
146                "--quiet".to_string(),
147                "--vanilla".to_string(),
148            ],
149            interactive: false,
150            signal_handlers: false,
151            r_home: None,
152        }
153    }
154
155    /// Set the command-line arguments for R initialization.
156    ///
157    /// Default is `["R", "--quiet", "--vanilla"]`.
158    pub fn with_args(mut self, args: &[&str]) -> Self {
159        self.args = args.iter().map(|s| s.to_string()).collect();
160        self
161    }
162
163    /// Set whether R should run in interactive mode.
164    ///
165    /// Default is `false`.
166    pub fn interactive(mut self, interactive: bool) -> Self {
167        self.interactive = interactive;
168        self
169    }
170
171    /// Set whether R should install signal handlers.
172    ///
173    /// Default is `false`. Set to `true` if you want R to handle Ctrl+C etc.
174    pub fn signal_handlers(mut self, enable: bool) -> Self {
175        self.signal_handlers = enable;
176        self
177    }
178
179    /// Set the R_HOME directory explicitly.
180    ///
181    /// By default, R_HOME is auto-detected by running `R RHOME` or reading
182    /// the `R_HOME` environment variable. Use this method to override that
183    /// behavior with an explicit path.
184    ///
185    /// # Example
186    ///
187    /// ```ignore
188    /// let engine = REngine::build()
189    ///     .r_home("/opt/R/4.4.0/lib/R")
190    ///     .init()
191    ///     .expect("Failed to initialize R");
192    /// ```
193    pub fn r_home(mut self, path: impl Into<PathBuf>) -> Self {
194        self.r_home = Some(path.into());
195        self
196    }
197
198    /// Initialize the R runtime with the configured settings.
199    ///
200    /// # Safety
201    ///
202    /// - Must only be called once per process
203    /// - Must be called from the main thread
204    /// - R cannot be safely shutdown and reinitialized
205    ///
206    /// # Errors
207    ///
208    /// Returns an error if R initialization fails.
209    pub unsafe fn init(self) -> Result<REngine, REngineError> {
210        // Guard against re-initialization
211        if r_initialized_sentinel() {
212            return Err(REngineError::AlreadyInitialized);
213        }
214
215        ensure_r_home_env(self.r_home.as_ref())?;
216
217        // Convert args to C strings
218        let c_args: Vec<CString> = self
219            .args
220            .iter()
221            .map(|s| CString::new(s.as_str()).unwrap())
222            .collect();
223
224        let mut c_ptrs: Vec<*mut c_char> = c_args.iter().map(|s| s.as_ptr().cast_mut()).collect();
225
226        let argc = c_ptrs.len() as c_int;
227        let argv = c_ptrs.as_mut_ptr();
228
229        // Initialize R.
230        //
231        // Note: `Rf_initEmbeddedR()` already calls `setup_Rmainloop()`.
232        // We want tighter control (and to avoid double-calling the setup),
233        // so we call `Rf_initialize_R()` directly and then `setup_Rmainloop()`.
234        let result = unsafe { Rf_initialize_R(argc, argv) };
235        if result != 0 {
236            return Err(REngineError::InitializationFailed);
237        }
238
239        unsafe {
240            // Set global flags *after* initialization, mirroring R's own
241            // `Rf_initEmbeddedR()` order (but respecting our builder flags).
242            set_r_interactive(if self.interactive { 1 } else { 0 });
243            set_r_signal_handlers(if self.signal_handlers { 1 } else { 0 });
244            setup_Rmainloop();
245
246            // Note: We do NOT register an atexit handler for Rf_endEmbeddedR.
247            // The R runtime cleanup operations (KillAllDevices, RunExitFinalizers, etc.)
248            // are complex and can crash if other cleanup is happening concurrently.
249            // For short-lived programs (tests, benchmarks), letting the OS reclaim
250            // resources on process exit is safer and sufficient.
251        }
252
253        Ok(REngine)
254    }
255}
256
257/// Handle to an initialized R runtime.
258///
259/// This is a marker type indicating R has been initialized for this process.
260/// R cleanup (via `Rf_endEmbeddedR`) is intentionally NOT called because it
261/// performs non-reentrant operations that can crash if called during Drop
262/// or concurrent with other cleanup. The OS reclaims all resources on process exit.
263pub struct REngine;
264
265impl Drop for REngine {
266    /// Implements drop such that `std::mem::forget` leaks `REngine` rather than
267    /// dropping it, when `Drop` is absent.
268    fn drop(&mut self) {}
269}
270
271impl REngine {
272    /// Create a new builder for configuring R initialization.
273    pub fn build() -> REngineBuilder {
274        REngineBuilder::new()
275    }
276
277    /// Process pending R events.
278    ///
279    /// Call this periodically to allow R to handle events, especially
280    /// when running a long computation.
281    ///
282    /// # Safety
283    ///
284    /// Must be called from the thread that initialized R.
285    pub unsafe fn process_events(&self) {
286        unsafe {
287            R_ProcessEvents();
288        }
289    }
290
291    /// Check for user interrupts (Ctrl+C).
292    ///
293    /// # Safety
294    ///
295    /// Must be called from the thread that initialized R.
296    pub unsafe fn check_interrupt(&self) {
297        unsafe {
298            R_CheckUserInterrupt();
299        }
300    }
301}
302
303// Note: We intentionally DO NOT provide shutdown or Drop implementations.
304//
305// Rf_endEmbeddedR performs non-reentrant cleanup operations.
306// Here's what it does (from R 4.5.2 source):
307//
308// Unix/Linux version (src/unix/Rembedded.c):
309// ```c
310// void Rf_endEmbeddedR(int fatal)
311// {
312//     R_RunExitFinalizers();    // Runs .Last and exit handlers (NOT reentrant!)
313//     CleanEd();                // Editor cleanup
314//     if(!fatal) KillAllDevices();  // Graphics devices (NOT reentrant!)
315//     R_CleanTempDir();         // File system cleanup
316//     if(!fatal && R_CollectWarnings)
317//         PrintWarnings();      // Console I/O
318//     fpu_setup(FALSE);         // FPU state
319// }
320// ```
321//
322// Windows version (src/gnuwin32/embeddedR.c):
323// ```c
324// void Rf_endEmbeddedR(int fatal)
325// {
326//     R_RunExitFinalizers();
327//     CleanEd();
328//     R_CleanTempDir();
329//     if(!fatal){
330//         Rf_KillAllDevices();
331//         AllDevicesKilled = TRUE;
332//     }
333//     if(!fatal && R_CollectWarnings)
334//         PrintWarnings();
335//     app_cleanup();           // Application-specific cleanup
336// }
337// ```
338//
339// These operations are NOT reentrant and must run exactly once at process exit.
340// Calling during Drop (e.g., test cleanup) causes crashes.
341//
342// **Solution:** We intentionally do NOT call Rf_endEmbeddedR. For short-lived
343// programs (tests, benchmarks), the OS reclaims all resources on process exit.
344// This avoids crashes from double-cleanup or reentrant calls.
345
346/// Errors that can occur during R engine initialization.
347#[derive(Debug)]
348pub enum REngineError {
349    /// Could not determine / set `R_HOME` for embedding.
350    RHomeNotFound {
351        /// Optional stderr from `R RHOME` command for diagnostics.
352        stderr: Option<String>,
353    },
354    /// R initialization failed.
355    InitializationFailed,
356    /// R is already initialized. Re-initialization is not supported.
357    AlreadyInitialized,
358}
359
360impl std::fmt::Display for REngineError {
361    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
362        match self {
363            REngineError::RHomeNotFound { stderr } => {
364                write!(f, "R_HOME is not set and `R RHOME` could not be resolved")?;
365                if let Some(stderr) = stderr
366                    && !stderr.is_empty()
367                {
368                    write!(f, "\nstderr: {}", stderr)?;
369                }
370                Ok(())
371            }
372            REngineError::InitializationFailed => write!(f, "R initialization failed"),
373            REngineError::AlreadyInitialized => {
374                write!(
375                    f,
376                    "R is already initialized. Multiple calls to REngineBuilder::init() are not supported."
377                )
378            }
379        }
380    }
381}
382
383impl std::error::Error for REngineError {}
384
385fn ensure_r_home_env(explicit_path: Option<&PathBuf>) -> Result<(), REngineError> {
386    // If an explicit path was provided, use it
387    if let Some(path) = explicit_path {
388        // SAFETY: We call this during single-threaded startup (before initializing
389        // R and before spawning any worker threads).
390        unsafe {
391            std::env::set_var("R_HOME", path);
392        }
393        return Ok(());
394    }
395
396    // If R_HOME is already set, use it
397    if std::env::var_os("R_HOME").is_some() {
398        return Ok(());
399    }
400
401    // Auto-detect via `R RHOME`
402    let output = Command::new("R")
403        .args(["RHOME"])
404        .output()
405        .map_err(|_| REngineError::RHomeNotFound { stderr: None })?;
406
407    if !output.status.success() {
408        let stderr = String::from_utf8_lossy(&output.stderr).to_string();
409        return Err(REngineError::RHomeNotFound {
410            stderr: Some(stderr),
411        });
412    }
413
414    let r_home = String::from_utf8(output.stdout)
415        .map_err(|_| REngineError::RHomeNotFound { stderr: None })?;
416    let r_home = r_home.trim();
417    if r_home.is_empty() {
418        return Err(REngineError::RHomeNotFound { stderr: None });
419    }
420
421    // SAFETY: We call this during single-threaded startup (before initializing
422    // R and before spawning any worker threads).
423    unsafe {
424        std::env::set_var("R_HOME", r_home);
425    }
426    Ok(())
427}
428
429#[cfg(test)]
430mod tests;