Skip to main content

miniextendr_api/from_r/
strings.rs

1//! String conversions — STRSXP requires special handling via `STRING_ELT`.
2//!
3//! R stores strings as STRSXP (vector of CHARSXP). Each element requires
4//! `STRING_ELT` + `R_CHAR` to extract, unlike numeric vectors which expose
5//! a contiguous data pointer.
6//!
7//! Covers: `&str`, `String`, `char`, `Option<&str>`, `Option<String>`,
8//! `Vec<String>`, `Vec<&str>`, `Box<[String]>`.
9
10use crate::ffi::{SEXP, SEXPTYPE, SexpExt};
11use crate::from_r::{
12    SexpError, SexpLengthError, SexpTypeError, TryFromSexp, charsxp_to_str,
13    charsxp_to_str_unchecked,
14};
15
16/// Convert R character vector (STRSXP) to Rust &str.
17///
18/// Extracts the first element of the character vector and returns it as a UTF-8 string.
19/// The returned string has static lifetime because it points to R's internal string pool.
20///
21/// # NA Handling
22///
23/// **Warning:** `NA_character_` is converted to empty string `""`. This is lossy!
24/// If you need to distinguish between NA and empty strings, use `Option<String>` instead:
25///
26/// ```ignore
27/// let maybe_str: Option<String> = sexp.try_into()?;
28/// ```
29///
30/// # Safety
31/// The returned &str is only valid as long as R doesn't garbage collect the CHARSXP.
32/// In practice, this is safe within a single .Call invocation.
33impl TryFromSexp for &'static str {
34    type Error = SexpError;
35
36    #[inline]
37    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
38        let actual = sexp.type_of();
39        if actual != SEXPTYPE::STRSXP {
40            return Err(SexpTypeError {
41                expected: SEXPTYPE::STRSXP,
42                actual,
43            }
44            .into());
45        }
46
47        let len = sexp.len();
48        if len != 1 {
49            return Err(SexpLengthError {
50                expected: 1,
51                actual: len,
52            }
53            .into());
54        }
55
56        // Get the CHARSXP at index 0
57        let charsxp = sexp.string_elt(0);
58
59        // Check for NA_STRING or R_BlankString
60        if charsxp == SEXP::na_string() {
61            return Ok("");
62        }
63        if charsxp == SEXP::blank_string() {
64            return Ok("");
65        }
66
67        // Use LENGTH-based conversion (O(1)) instead of CStr::from_ptr (O(n) strlen)
68        Ok(unsafe { charsxp_to_str(charsxp) })
69    }
70
71    #[inline]
72    unsafe fn try_from_sexp_unchecked(sexp: SEXP) -> Result<Self, Self::Error> {
73        let actual = sexp.type_of();
74        if actual != SEXPTYPE::STRSXP {
75            return Err(SexpTypeError {
76                expected: SEXPTYPE::STRSXP,
77                actual,
78            }
79            .into());
80        }
81
82        let len = unsafe { sexp.len_unchecked() };
83        if len != 1 {
84            return Err(SexpLengthError {
85                expected: 1,
86                actual: len,
87            }
88            .into());
89        }
90
91        // Get the CHARSXP at index 0
92        let charsxp = unsafe { sexp.string_elt_unchecked(0) };
93
94        // Check for NA_STRING or R_BlankString
95        if charsxp == SEXP::na_string() {
96            return Ok("");
97        }
98        if charsxp == SEXP::blank_string() {
99            return Ok("");
100        }
101
102        // Use LENGTH-based conversion (O(1)) instead of CStr::from_ptr (O(n) strlen)
103        Ok(unsafe { charsxp_to_str_unchecked(charsxp) })
104    }
105}
106
107impl TryFromSexp for Option<&'static str> {
108    type Error = SexpError;
109
110    #[inline]
111    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
112        if sexp.type_of() == SEXPTYPE::NILSXP {
113            return Ok(None);
114        }
115
116        let actual = sexp.type_of();
117        if actual != SEXPTYPE::STRSXP {
118            return Err(SexpTypeError {
119                expected: SEXPTYPE::STRSXP,
120                actual,
121            }
122            .into());
123        }
124
125        let len = sexp.len();
126        if len != 1 {
127            return Err(SexpLengthError {
128                expected: 1,
129                actual: len,
130            }
131            .into());
132        }
133
134        let charsxp = sexp.string_elt(0);
135        if charsxp == SEXP::na_string() {
136            return Ok(None);
137        }
138        if charsxp == SEXP::blank_string() {
139            return Ok(Some(""));
140        }
141
142        Ok(Some(unsafe { charsxp_to_str(charsxp) }))
143    }
144
145    #[inline]
146    unsafe fn try_from_sexp_unchecked(sexp: SEXP) -> Result<Self, Self::Error> {
147        if sexp.type_of() == SEXPTYPE::NILSXP {
148            return Ok(None);
149        }
150
151        let actual = sexp.type_of();
152        if actual != SEXPTYPE::STRSXP {
153            return Err(SexpTypeError {
154                expected: SEXPTYPE::STRSXP,
155                actual,
156            }
157            .into());
158        }
159
160        let len = unsafe { sexp.len_unchecked() };
161        if len != 1 {
162            return Err(SexpLengthError {
163                expected: 1,
164                actual: len,
165            }
166            .into());
167        }
168
169        let charsxp = unsafe { sexp.string_elt_unchecked(0) };
170        if charsxp == SEXP::na_string() {
171            return Ok(None);
172        }
173        if charsxp == SEXP::blank_string() {
174            return Ok(Some(""));
175        }
176
177        Ok(Some(unsafe { charsxp_to_str_unchecked(charsxp) }))
178    }
179}
180
181/// Convert R character vector (STRSXP) to Rust char.
182///
183/// Extracts the first character of the first element of the character vector.
184/// Returns an error if the string is empty, NA, or has more than one character.
185impl TryFromSexp for char {
186    type Error = SexpError;
187
188    #[inline]
189    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
190        let s: &str = TryFromSexp::try_from_sexp(sexp)?;
191        let mut chars = s.chars();
192        match (chars.next(), chars.next()) {
193            (Some(c), None) => Ok(c),
194            (None, _) => Err(SexpError::InvalidValue(
195                "empty string cannot be converted to char".to_string(),
196            )),
197            (Some(_), Some(_)) => Err(SexpError::InvalidValue(
198                "string has more than one character".to_string(),
199            )),
200        }
201    }
202
203    #[inline]
204    unsafe fn try_from_sexp_unchecked(sexp: SEXP) -> Result<Self, Self::Error> {
205        let s: &str = unsafe { TryFromSexp::try_from_sexp_unchecked(sexp)? };
206        let mut chars = s.chars();
207        match (chars.next(), chars.next()) {
208            (Some(c), None) => Ok(c),
209            (None, _) => Err(SexpError::InvalidValue(
210                "empty string cannot be converted to char".to_string(),
211            )),
212            (Some(_), Some(_)) => Err(SexpError::InvalidValue(
213                "string has more than one character".to_string(),
214            )),
215        }
216    }
217}
218
219/// Convert R character vector (STRSXP) to owned Rust String.
220///
221/// Extracts the first element and creates an owned copy.
222///
223/// # NA Handling
224///
225/// **Warning:** `NA_character_` is converted to empty string `""`. This is lossy!
226/// If you need to distinguish between NA and empty strings, use `Option<String>` instead:
227///
228/// ```ignore
229/// let maybe_str: Option<String> = sexp.try_into()?;
230/// ```
231impl TryFromSexp for String {
232    type Error = SexpError;
233
234    #[inline]
235    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
236        let actual = sexp.type_of();
237        if actual != SEXPTYPE::STRSXP {
238            return Err(SexpTypeError {
239                expected: SEXPTYPE::STRSXP,
240                actual,
241            }
242            .into());
243        }
244
245        let len = sexp.len();
246        if len != 1 {
247            return Err(SexpLengthError {
248                expected: 1,
249                actual: len,
250            }
251            .into());
252        }
253
254        let charsxp = sexp.string_elt(0);
255
256        if charsxp == SEXP::na_string() {
257            return Ok(String::new());
258        }
259
260        Ok(unsafe { charsxp_to_str(charsxp) }.to_owned())
261    }
262
263    #[inline]
264    unsafe fn try_from_sexp_unchecked(sexp: SEXP) -> Result<Self, Self::Error> {
265        let actual = sexp.type_of();
266        if actual != SEXPTYPE::STRSXP {
267            return Err(SexpTypeError {
268                expected: SEXPTYPE::STRSXP,
269                actual,
270            }
271            .into());
272        }
273
274        let len = unsafe { sexp.len_unchecked() };
275        if len != 1 {
276            return Err(SexpLengthError {
277                expected: 1,
278                actual: len,
279            }
280            .into());
281        }
282
283        let charsxp = unsafe { sexp.string_elt_unchecked(0) };
284
285        if charsxp == SEXP::na_string() {
286            return Ok(String::new());
287        }
288
289        Ok(unsafe { charsxp_to_str_unchecked(charsxp) }.to_owned())
290    }
291}
292
293/// NA-aware string conversion: returns `None` for `NA_character_`.
294///
295/// Use this when you need to distinguish between NA and empty strings:
296/// ```ignore
297/// let maybe_str: Option<String> = sexp.try_into()?;
298/// match maybe_str {
299///     Some(s) => println!("Got string: {}", s),
300///     None => println!("Got NA"),
301/// }
302/// ```
303impl TryFromSexp for Option<String> {
304    type Error = SexpError;
305
306    #[inline]
307    fn try_from_sexp(sexp: SEXP) -> Result<Self, Self::Error> {
308        let actual = sexp.type_of();
309        // NULL -> None
310        if actual == SEXPTYPE::NILSXP {
311            return Ok(None);
312        }
313        if actual != SEXPTYPE::STRSXP {
314            return Err(SexpTypeError {
315                expected: SEXPTYPE::STRSXP,
316                actual,
317            }
318            .into());
319        }
320
321        let len = sexp.len();
322        if len != 1 {
323            return Err(SexpLengthError {
324                expected: 1,
325                actual: len,
326            }
327            .into());
328        }
329
330        let charsxp = sexp.string_elt(0);
331
332        // Return None for NA_STRING
333        if charsxp == SEXP::na_string() {
334            return Ok(None);
335        }
336
337        Ok(Some(unsafe { charsxp_to_str(charsxp) }.to_owned()))
338    }
339
340    #[inline]
341    unsafe fn try_from_sexp_unchecked(sexp: SEXP) -> Result<Self, Self::Error> {
342        // For Option<String>, unchecked is same as checked (NA check is semantic, not safety)
343        Self::try_from_sexp(sexp)
344    }
345}
346// endregion