From 98f1d2624592b431f0df06dc90381f3cb56cc38b Mon Sep 17 00:00:00 2001 From: hanshenrik Date: Tue, 27 Feb 2024 22:51:54 +0100 Subject: [PATCH 1/2] rustify DOMXPath::quote --- build/php.m4 | 57 ++++++++++++++++++++++++++++--------------- ext/dom/config.m4 | 2 +- ext/dom/xpath.c | 42 +++---------------------------- ext/dom/xpath_rust.h | 1 + ext/dom/xpath_rust.rs | 56 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 99 insertions(+), 59 deletions(-) create mode 100644 ext/dom/xpath_rust.h create mode 100644 ext/dom/xpath_rust.rs diff --git a/build/php.m4 b/build/php.m4 index e975985fe7498..420253049c146 100644 --- a/build/php.m4 +++ b/build/php.m4 @@ -215,37 +215,45 @@ AC_DEFUN([PHP_ADD_SOURCES_X],[ dnl Relative to source- or build-directory? dnl ac_srcdir/ac_bdir include trailing slash case $1 in - ""[)] ac_srcdir="$abs_srcdir/"; unset ac_bdir; ac_inc="-I. -I$abs_srcdir" ;; - /*[)] ac_srcdir=`echo "$1"|cut -c 2-`"/"; ac_bdir=$ac_srcdir; ac_inc="-I$ac_bdir -I$abs_srcdir/$ac_bdir" ;; - *[)] ac_srcdir="$abs_srcdir/$1/"; ac_bdir="$1/"; ac_inc="-I$ac_bdir -I$ac_srcdir" ;; + "") ac_srcdir="$abs_srcdir/"; unset ac_bdir; ac_inc="-I. -I$abs_srcdir" ;; + /*) ac_srcdir=`echo "$1"|cut -c 2-`"/"; ac_bdir=$ac_srcdir; ac_inc="-I$ac_bdir -I$abs_srcdir/$ac_bdir" ;; + *) ac_srcdir="$abs_srcdir/$1/"; ac_bdir="$1/"; ac_inc="-I$ac_bdir -I$ac_srcdir" ;; esac dnl how to build .. shared or static? ifelse($5,yes,_PHP_ASSIGN_BUILD_VARS(shared),_PHP_ASSIGN_BUILD_VARS(php)) dnl Iterate over the sources. - old_IFS=[$]IFS - for ac_src in $2; do dnl Remove the suffix. - IFS=. - set $ac_src - ac_obj=[$]1 - IFS=$old_IFS + old_IFS=[$]IFS + for ac_src in $2; do + IFS=. + set $ac_src + ac_obj=[$]1 + IFS=$old_IFS dnl Append to the array which has been dynamically chosen at m4 time. - $4="[$]$4 [$]ac_bdir[$]ac_obj.lo" - -dnl Choose the right compiler/flags/etc. for the source-file. - case $ac_src in - *.c[)] ac_comp="$b_c_pre $ac_inc $b_c_meta $3 -c $ac_srcdir$ac_src -o $ac_bdir$ac_obj.$b_lo $b_c_post" ;; - *.s[)] ac_comp="$b_c_pre $ac_inc $b_c_meta $3 -c $ac_srcdir$ac_src -o $ac_bdir$ac_obj.$b_lo $b_c_post" ;; - *.S[)] ac_comp="$b_c_pre $ac_inc $b_c_meta $3 -c $ac_srcdir$ac_src -o $ac_bdir$ac_obj.$b_lo $b_c_post" ;; - *.cpp|*.cc|*.cxx[)] ac_comp="$b_cxx_pre $ac_inc $b_cxx_meta $3 -c $ac_srcdir$ac_src -o $ac_bdir$ac_obj.$b_lo $b_cxx_post" ;; - esac + $4="[$]$4 [$]ac_bdir[$]ac_obj.lo" + + # Choose compiler/flags based on file extension + case $ac_src in + *.rs) + # Rust source file + ac_comp="rustc $ac_srcdir$ac_src --crate-type=staticlib --out-dir $ac_bdir --crate-name=$(basename $ac_src .rs) && mv ${ac_bdir}lib$(basename $ac_src .rs).a ${ac_bdir}$(basename $ac_src .rs).o" + ;; + + *.c|*.s|*.S) + # C and assembly source files + ac_comp="$b_c_pre $ac_inc $b_c_meta $3 -c $ac_srcdir$ac_src -o $ac_bdir$ac_obj.$b_lo $b_c_post -MMD -MF $ac_bdir$ac_obj.dep -MT $ac_bdir$ac_obj.lo" + ;; + *.cpp|*.cc|*.cxx) + # C++ source files + ac_comp="$b_cxx_pre $ac_inc $b_cxx_meta $3 -c $ac_srcdir$ac_src -o $ac_bdir$ac_obj.$b_lo $b_cxx_post -MMD -MF $ac_bdir$ac_obj.dep -MT $ac_bdir$ac_obj.lo" + ;; + esac + -dnl Generate Makefiles with dependencies - ac_comp="$ac_comp -MMD -MF $ac_bdir$ac_obj.dep -MT $ac_bdir[$]ac_obj.lo" dnl Create a rule for the object/source combo. cat >>Makefile.objects<val[0] = '\''; - memcpy(output->val + 1, input, input_len); - output->val[input_len + 1] = '\''; - output->val[input_len + 2] = '\0'; - RETURN_STR(output); - } else if (memchr(input, '"', input_len) == NULL) { - zend_string *const output = zend_string_safe_alloc(1, input_len, 2, false); - output->val[0] = '"'; - memcpy(output->val + 1, input, input_len); - output->val[input_len + 1] = '"'; - output->val[input_len + 2] = '\0'; - RETURN_STR(output); - } else { - smart_str output = {0}; - // need to use the concat() trick published by Robert Rossney at https://stackoverflow.com/a/1352556/1067003 - smart_str_appendl(&output, "concat(", 7); - const char *ptr = input; - const char *const end = input + input_len; - while (ptr < end) { - const char *const single_quote_ptr = memchr(ptr, '\'', end - ptr); - const char *const double_quote_ptr = memchr(ptr, '"', end - ptr); - const size_t distance_to_single_quote = single_quote_ptr ? single_quote_ptr - ptr : end - ptr; - const size_t distance_to_double_quote = double_quote_ptr ? double_quote_ptr - ptr : end - ptr; - const size_t bytes_until_quote = MAX(distance_to_single_quote, distance_to_double_quote); - const char quote_method = (distance_to_single_quote > distance_to_double_quote) ? '\'' : '"'; - smart_str_appendc(&output, quote_method); - smart_str_appendl(&output, ptr, bytes_until_quote); - smart_str_appendc(&output, quote_method); - ptr += bytes_until_quote; - smart_str_appendc(&output, ','); - } - ZEND_ASSERT(ptr == end); - output.s->val[output.s->len - 1] = ')'; - RETURN_STR(smart_str_extract(&output)); - } + const char *ouput = domxpath_quote_literal(input, &input_len); + RETURN_STRINGL(ouput, input_len); } /* }}} */ diff --git a/ext/dom/xpath_rust.h b/ext/dom/xpath_rust.h new file mode 100644 index 0000000000000..5478b63c09b2d --- /dev/null +++ b/ext/dom/xpath_rust.h @@ -0,0 +1 @@ +extern char* domxpath_quote_literal(const char *const input, uintptr_t *const len); \ No newline at end of file diff --git a/ext/dom/xpath_rust.rs b/ext/dom/xpath_rust.rs new file mode 100644 index 0000000000000..7810803c7f000 --- /dev/null +++ b/ext/dom/xpath_rust.rs @@ -0,0 +1,56 @@ +use std::ffi::{CString, c_char}; + + +#[no_mangle] +pub extern "C" fn domxpath_quote_literal(input: *const c_char, len: *mut usize) -> *mut c_char { + let slice = unsafe { std::slice::from_raw_parts(input as *const u8, *len as usize) }; + + let single_quote_absent = !slice.contains(&b'\''); + let double_quote_absent = !slice.contains(&b'"'); + + let result = if single_quote_absent { + let mut res = Vec::with_capacity(slice.len() + 2); + res.push(b'\''); + res.extend_from_slice(slice); + res.push(b'\''); + res + } else if double_quote_absent { + let mut res = Vec::with_capacity(slice.len() + 2); + res.push(b'"'); + res.extend_from_slice(slice); + res.push(b'"'); + res + } else { + let mut res = Vec::from("concat(".as_bytes()); + let mut temp_slice = slice; + + while !temp_slice.is_empty() { + let bytes_until_single_quote = temp_slice.iter().position(|&x| x == b'\'').unwrap_or(temp_slice.len()); + let bytes_until_double_quote = temp_slice.iter().position(|&x| x == b'"').unwrap_or(temp_slice.len()); + + let (quote_method, bytes_until_quote) = if bytes_until_single_quote > bytes_until_double_quote { + (b'\'', bytes_until_single_quote) + } else { + (b'"', bytes_until_double_quote) + }; + + res.push(quote_method); + res.extend_from_slice(&temp_slice[..bytes_until_quote]); + res.push(quote_method); + res.push(b','); + temp_slice = &temp_slice[bytes_until_quote..]; + } + let res_len = res.len(); + res[res_len - 1] = b')'; + res + }; + + // Update length + unsafe { + *len = result.len() as usize; + } + + // Convert Vec to *mut c_char + let c_str = CString::new(result).expect("CString::new failed"); + c_str.into_raw() +} \ No newline at end of file From a30c1a453616365b96c66b30e6e5be80b7cd39d4 Mon Sep 17 00:00:00 2001 From: divinity76 Date: Tue, 27 Feb 2024 23:20:13 +0100 Subject: [PATCH 2/2] review feedback --- ext/dom/xpath_rust.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ext/dom/xpath_rust.h b/ext/dom/xpath_rust.h index 5478b63c09b2d..2bb52d5f1bcb9 100644 --- a/ext/dom/xpath_rust.h +++ b/ext/dom/xpath_rust.h @@ -1 +1,4 @@ -extern char* domxpath_quote_literal(const char *const input, uintptr_t *const len); \ No newline at end of file +#ifndef DOM_XPATH_RUST_H +#define DOM_XPATH_RUST_H +extern char* domxpath_quote_literal(const char *const input, uintptr_t *const len); +#endif