miniextendr_engine/lib.rs
1//! miniextendr-engine: standalone R embedding for Rust binaries and tests.
2//!
3//! This crate centralizes `libR` linking (via `build.rs`), R initialization, and
4//! a minimal runtime handle for processing events and interrupts. It is intended
5//! for Rust-only executables and integration tests that embed R.
6//!
7//! **Not for R packages:** this crate uses non-API R internals
8//! (`Rembedded.h`, `Rinterface.h`). For R packages, depend on `miniextendr-api`
9//! and keep `nonapi` disabled.
10//!
11//! ## When to use
12//! - Rust binaries that embed R.
13//! - Integration tests or benchmarks that need full control over R startup.
14//!
15//! ## Quick start
16//!
17//! ```ignore
18//! // SAFETY: Must be called once, from the main thread.
19//! let engine = unsafe {
20//! miniextendr_engine::REngine::build()
21//! .with_args(&["R", "--quiet", "--vanilla"])
22//! .init()
23//! .expect("Failed to initialize R")
24//! };
25//!
26//! // ... use R APIs from the main thread ...
27//!
28//! std::mem::forget(engine); // optional: intentionally leak the handle
29//! ```
30//!
31//! ## Initialization details
32//! - Ensures `R_HOME` (via `R RHOME`) if missing.
33//! - Calls `Rf_initialize_R` directly to avoid double `setup_Rmainloop()`.
34//! - Calls `setup_Rmainloop()` exactly once after initialization.
35//!
36//! ## Runtime sentinel
37//!
38//! ```ignore
39//! if miniextendr_engine::r_initialized_sentinel() {
40//! // R has been initialized in this process.
41//! }
42//! ```
43//!
44//! ## Safety
45//!
46//! - Must only be initialized once per process.
47//! - Must be called from the main thread.
48//! - No shutdown: `Rf_endEmbeddedR` is intentionally not called because the
49//! cleanup path is not reentrant-safe. The OS reclaims resources on exit.
50
51use std::ffi::CString;
52use std::os::raw::{c_char, c_int};
53use std::path::PathBuf;
54use std::process::Command;
55
56// Note: This entire crate uses non-API R functions (Rembedded.h, Rinterface.h)
57// for embedding R. It is not intended for use in R packages.
58unsafe extern "C" {
59 // R initialization (from Rembedded.h - non-API)
60 fn Rf_initialize_R(argc: c_int, argv: *mut *mut c_char) -> c_int;
61 #[allow(dead_code)]
62 fn Rf_endEmbeddedR(fatal: c_int);
63
64 // R event loop
65 fn R_ProcessEvents();
66 fn R_CheckUserInterrupt();
67
68 // Setup functions
69 fn setup_Rmainloop();
70
71 // Global state from Rinterface.h (non-API)
72 // Use UnsafeCell for interior mutability without static mut
73 static R_Interactive: std::cell::UnsafeCell<c_int>;
74 static R_SignalHandlers: std::cell::UnsafeCell<c_int>;
75 static R_CStackStart: usize;
76 static R_CStackDir: c_int;
77}
78
79/// Write to R's global `R_Interactive` flag.
80///
81/// # Safety
82/// Must be called from the main thread during R initialization.
83#[inline]
84unsafe fn set_r_interactive(value: c_int) {
85 unsafe {
86 *R_Interactive.get() = value;
87 }
88}
89
90/// Write to R's global `R_SignalHandlers` flag.
91///
92/// # Safety
93/// Must be called from the main thread during R initialization.
94#[inline]
95unsafe fn set_r_signal_handlers(value: c_int) {
96 unsafe {
97 *R_SignalHandlers.get() = value;
98 }
99}
100
101/// Check whether `Rf_initialize_R` has run by inspecting stack sentinels.
102///
103/// `R_CStackStart`/`R_CStackDir` are set during R initialization on the main
104/// thread. A zero or `usize::MAX` value indicates "not initialized".
105#[inline]
106pub fn r_initialized_sentinel() -> bool {
107 unsafe {
108 let start = R_CStackStart;
109 let dir = R_CStackDir;
110 dir != 0 && start != 0 && start != usize::MAX
111 }
112}
113
114/// Builder for configuring and initializing the R runtime.
115///
116/// # Example
117///
118/// ```ignore
119/// let engine = REngine::new()
120/// .with_args(&["R", "--quiet", "--no-save"])
121/// .interactive(false)
122/// .signal_handlers(false)
123/// .init()?;
124/// ```
125pub struct REngineBuilder {
126 args: Vec<String>,
127 interactive: bool,
128 signal_handlers: bool,
129 r_home: Option<PathBuf>,
130}
131
132impl Default for REngineBuilder {
133 fn default() -> Self {
134 Self::new()
135 }
136}
137
138impl REngineBuilder {
139 /// Create a new R engine builder with default settings.
140 pub fn new() -> Self {
141 Self {
142 // Default to a non-interactive-safe setup: R requires an explicit
143 // save/no-save choice when not running interactively.
144 args: vec![
145 "R".to_string(),
146 "--quiet".to_string(),
147 "--vanilla".to_string(),
148 ],
149 interactive: false,
150 signal_handlers: false,
151 r_home: None,
152 }
153 }
154
155 /// Set the command-line arguments for R initialization.
156 ///
157 /// Default is `["R", "--quiet", "--vanilla"]`.
158 pub fn with_args(mut self, args: &[&str]) -> Self {
159 self.args = args.iter().map(|s| s.to_string()).collect();
160 self
161 }
162
163 /// Set whether R should run in interactive mode.
164 ///
165 /// Default is `false`.
166 pub fn interactive(mut self, interactive: bool) -> Self {
167 self.interactive = interactive;
168 self
169 }
170
171 /// Set whether R should install signal handlers.
172 ///
173 /// Default is `false`. Set to `true` if you want R to handle Ctrl+C etc.
174 pub fn signal_handlers(mut self, enable: bool) -> Self {
175 self.signal_handlers = enable;
176 self
177 }
178
179 /// Set the R_HOME directory explicitly.
180 ///
181 /// By default, R_HOME is auto-detected by running `R RHOME` or reading
182 /// the `R_HOME` environment variable. Use this method to override that
183 /// behavior with an explicit path.
184 ///
185 /// # Example
186 ///
187 /// ```ignore
188 /// let engine = REngine::build()
189 /// .r_home("/opt/R/4.4.0/lib/R")
190 /// .init()
191 /// .expect("Failed to initialize R");
192 /// ```
193 pub fn r_home(mut self, path: impl Into<PathBuf>) -> Self {
194 self.r_home = Some(path.into());
195 self
196 }
197
198 /// Initialize the R runtime with the configured settings.
199 ///
200 /// # Safety
201 ///
202 /// - Must only be called once per process
203 /// - Must be called from the main thread
204 /// - R cannot be safely shutdown and reinitialized
205 ///
206 /// # Errors
207 ///
208 /// Returns an error if R initialization fails.
209 pub unsafe fn init(self) -> Result<REngine, REngineError> {
210 // Guard against re-initialization
211 if r_initialized_sentinel() {
212 return Err(REngineError::AlreadyInitialized);
213 }
214
215 ensure_r_home_env(self.r_home.as_ref())?;
216
217 // Convert args to C strings
218 let c_args: Vec<CString> = self
219 .args
220 .iter()
221 .map(|s| CString::new(s.as_str()).unwrap())
222 .collect();
223
224 let mut c_ptrs: Vec<*mut c_char> = c_args.iter().map(|s| s.as_ptr().cast_mut()).collect();
225
226 let argc = c_ptrs.len() as c_int;
227 let argv = c_ptrs.as_mut_ptr();
228
229 // Initialize R.
230 //
231 // Note: `Rf_initEmbeddedR()` already calls `setup_Rmainloop()`.
232 // We want tighter control (and to avoid double-calling the setup),
233 // so we call `Rf_initialize_R()` directly and then `setup_Rmainloop()`.
234 let result = unsafe { Rf_initialize_R(argc, argv) };
235 if result != 0 {
236 return Err(REngineError::InitializationFailed);
237 }
238
239 unsafe {
240 // Set global flags *after* initialization, mirroring R's own
241 // `Rf_initEmbeddedR()` order (but respecting our builder flags).
242 set_r_interactive(if self.interactive { 1 } else { 0 });
243 set_r_signal_handlers(if self.signal_handlers { 1 } else { 0 });
244 setup_Rmainloop();
245
246 // Note: We do NOT register an atexit handler for Rf_endEmbeddedR.
247 // The R runtime cleanup operations (KillAllDevices, RunExitFinalizers, etc.)
248 // are complex and can crash if other cleanup is happening concurrently.
249 // For short-lived programs (tests, benchmarks), letting the OS reclaim
250 // resources on process exit is safer and sufficient.
251 }
252
253 Ok(REngine)
254 }
255}
256
257/// Handle to an initialized R runtime.
258///
259/// This is a marker type indicating R has been initialized for this process.
260/// R cleanup (via `Rf_endEmbeddedR`) is intentionally NOT called because it
261/// performs non-reentrant operations that can crash if called during Drop
262/// or concurrent with other cleanup. The OS reclaims all resources on process exit.
263pub struct REngine;
264
265impl Drop for REngine {
266 /// Implements drop such that `std::mem::forget` leaks `REngine` rather than
267 /// dropping it, when `Drop` is absent.
268 fn drop(&mut self) {}
269}
270
271impl REngine {
272 /// Create a new builder for configuring R initialization.
273 pub fn build() -> REngineBuilder {
274 REngineBuilder::new()
275 }
276
277 /// Process pending R events.
278 ///
279 /// Call this periodically to allow R to handle events, especially
280 /// when running a long computation.
281 ///
282 /// # Safety
283 ///
284 /// Must be called from the thread that initialized R.
285 pub unsafe fn process_events(&self) {
286 unsafe {
287 R_ProcessEvents();
288 }
289 }
290
291 /// Check for user interrupts (Ctrl+C).
292 ///
293 /// # Safety
294 ///
295 /// Must be called from the thread that initialized R.
296 pub unsafe fn check_interrupt(&self) {
297 unsafe {
298 R_CheckUserInterrupt();
299 }
300 }
301}
302
303// Note: We intentionally DO NOT provide shutdown or Drop implementations.
304//
305// Rf_endEmbeddedR performs non-reentrant cleanup operations.
306// Here's what it does (from R 4.5.2 source):
307//
308// Unix/Linux version (src/unix/Rembedded.c):
309// ```c
310// void Rf_endEmbeddedR(int fatal)
311// {
312// R_RunExitFinalizers(); // Runs .Last and exit handlers (NOT reentrant!)
313// CleanEd(); // Editor cleanup
314// if(!fatal) KillAllDevices(); // Graphics devices (NOT reentrant!)
315// R_CleanTempDir(); // File system cleanup
316// if(!fatal && R_CollectWarnings)
317// PrintWarnings(); // Console I/O
318// fpu_setup(FALSE); // FPU state
319// }
320// ```
321//
322// Windows version (src/gnuwin32/embeddedR.c):
323// ```c
324// void Rf_endEmbeddedR(int fatal)
325// {
326// R_RunExitFinalizers();
327// CleanEd();
328// R_CleanTempDir();
329// if(!fatal){
330// Rf_KillAllDevices();
331// AllDevicesKilled = TRUE;
332// }
333// if(!fatal && R_CollectWarnings)
334// PrintWarnings();
335// app_cleanup(); // Application-specific cleanup
336// }
337// ```
338//
339// These operations are NOT reentrant and must run exactly once at process exit.
340// Calling during Drop (e.g., test cleanup) causes crashes.
341//
342// **Solution:** We intentionally do NOT call Rf_endEmbeddedR. For short-lived
343// programs (tests, benchmarks), the OS reclaims all resources on process exit.
344// This avoids crashes from double-cleanup or reentrant calls.
345
346/// Errors that can occur during R engine initialization.
347#[derive(Debug)]
348pub enum REngineError {
349 /// Could not determine / set `R_HOME` for embedding.
350 RHomeNotFound {
351 /// Optional stderr from `R RHOME` command for diagnostics.
352 stderr: Option<String>,
353 },
354 /// R initialization failed.
355 InitializationFailed,
356 /// R is already initialized. Re-initialization is not supported.
357 AlreadyInitialized,
358}
359
360impl std::fmt::Display for REngineError {
361 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
362 match self {
363 REngineError::RHomeNotFound { stderr } => {
364 write!(f, "R_HOME is not set and `R RHOME` could not be resolved")?;
365 if let Some(stderr) = stderr
366 && !stderr.is_empty()
367 {
368 write!(f, "\nstderr: {}", stderr)?;
369 }
370 Ok(())
371 }
372 REngineError::InitializationFailed => write!(f, "R initialization failed"),
373 REngineError::AlreadyInitialized => {
374 write!(
375 f,
376 "R is already initialized. Multiple calls to REngineBuilder::init() are not supported."
377 )
378 }
379 }
380 }
381}
382
383impl std::error::Error for REngineError {}
384
385fn ensure_r_home_env(explicit_path: Option<&PathBuf>) -> Result<(), REngineError> {
386 // If an explicit path was provided, use it
387 if let Some(path) = explicit_path {
388 // SAFETY: We call this during single-threaded startup (before initializing
389 // R and before spawning any worker threads).
390 unsafe {
391 std::env::set_var("R_HOME", path);
392 }
393 return Ok(());
394 }
395
396 // If R_HOME is already set, use it
397 if std::env::var_os("R_HOME").is_some() {
398 return Ok(());
399 }
400
401 // Auto-detect via `R RHOME`
402 let output = Command::new("R")
403 .args(["RHOME"])
404 .output()
405 .map_err(|_| REngineError::RHomeNotFound { stderr: None })?;
406
407 if !output.status.success() {
408 let stderr = String::from_utf8_lossy(&output.stderr).to_string();
409 return Err(REngineError::RHomeNotFound {
410 stderr: Some(stderr),
411 });
412 }
413
414 let r_home = String::from_utf8(output.stdout)
415 .map_err(|_| REngineError::RHomeNotFound { stderr: None })?;
416 let r_home = r_home.trim();
417 if r_home.is_empty() {
418 return Err(REngineError::RHomeNotFound { stderr: None });
419 }
420
421 // SAFETY: We call this during single-threaded startup (before initializing
422 // R and before spawning any worker threads).
423 unsafe {
424 std::env::set_var("R_HOME", r_home);
425 }
426 Ok(())
427}
428
429#[cfg(test)]
430mod tests;