@@ -690,6 +690,136 @@ mb_regex_groups_iter(const OnigUChar* name, const OnigUChar* name_end, int ngrou
690
690
}
691
691
/* }}} */
692
692
693
+ /*
694
+ * Helper for _php_mb_regex_ereg_replace_exec
695
+ */
696
+ /* {{{ mb_regex_substitute */
697
+ static inline void mb_regex_substitute (
698
+ smart_str * pbuf ,
699
+ const char * subject ,
700
+ size_t subject_len ,
701
+ char * replace ,
702
+ size_t replace_len ,
703
+ php_mb_regex_t * regexp ,
704
+ OnigRegion * regs ,
705
+ const mbfl_encoding * enc
706
+ ) {
707
+ char * p , * sp , * eos ;
708
+ int no ; /* bakreference group number */
709
+ int clen ; /* byte-length of the current character */
710
+
711
+ p = replace ;
712
+ eos = replace + replace_len ;
713
+
714
+ while (p < eos ) {
715
+ clen = (int ) php_mb_mbchar_bytes_ex (p , enc );
716
+ if (clen != 1 || p == eos || p [0 ] != '\\' ) {
717
+ /* skip anything that's not an ascii backslash */
718
+ smart_str_appendl (pbuf , p , clen );
719
+ p += clen ;
720
+ continue ;
721
+ }
722
+ sp = p ; /* save position */
723
+ clen = (int ) php_mb_mbchar_bytes_ex (++ p , enc );
724
+ if (clen != 1 || p == eos ) {
725
+ /* skip escaped multibyte char */
726
+ p += clen ;
727
+ smart_str_appendl (pbuf , sp , p - sp );
728
+ continue ;
729
+ }
730
+ no = -1 ;
731
+ switch (p [0 ]) {
732
+ case '0' :
733
+ no = 0 ;
734
+ p ++ ;
735
+ break ;
736
+ case '1' : case '2' : case '3' : case '4' :
737
+ case '5' : case '6' : case '7' : case '8' : case '9' :
738
+ if (!onig_noname_group_capture_is_active (regexp )) {
739
+ /*
740
+ * FIXME:
741
+ * Oniguruma throws a compile error if numbered backrefs are used with named groups in the pattern.
742
+ * For now we just ignore them, but in the future we might want to raise a warning
743
+ * and abort the whole replace operation.
744
+ */
745
+ p ++ ;
746
+ smart_str_appendl (pbuf , sp , p - sp );
747
+ continue ;
748
+ }
749
+ no = p [0 ] - '0' ;
750
+ p ++ ;
751
+ break ;
752
+ case 'k' :
753
+ clen = (int ) php_mb_mbchar_bytes_ex (++ p , enc );
754
+ if (clen != 1 || p == eos || (p [0 ] != '<' && p [0 ] != '\'' )) {
755
+ /* not a backref delimiter */
756
+ p += clen ;
757
+ smart_str_appendl (pbuf , sp , p - sp );
758
+ continue ;
759
+ }
760
+ /* try to consume everything until next delimiter */
761
+ char delim = p [0 ] == '<' ? '>' : '\'' ;
762
+ char * name , * name_end ;
763
+ char maybe_num = 1 ;
764
+ name_end = name = p + 1 ;
765
+ while (name_end < eos ) {
766
+ clen = (int ) php_mb_mbchar_bytes_ex (name_end , enc );
767
+ if (clen != 1 ) {
768
+ name_end += clen ;
769
+ maybe_num = 0 ;
770
+ continue ;
771
+ }
772
+ if (name_end [0 ] == delim ) break ;
773
+ if (maybe_num && !isdigit (name_end [0 ])) maybe_num = 0 ;
774
+ name_end ++ ;
775
+ }
776
+ p = name_end + 1 ;
777
+ if (name_end - name < 1 || name_end >= eos ) {
778
+ /* the backref was empty or we failed to find the end delimiter */
779
+ smart_str_appendl (pbuf , sp , p - sp );
780
+ continue ;
781
+ }
782
+ /* we have either a name or a number */
783
+ if (maybe_num ) {
784
+ if (!onig_noname_group_capture_is_active (regexp )) {
785
+ /* see above note on mixing numbered & named backrefs */
786
+ smart_str_appendl (pbuf , sp , p - sp );
787
+ continue ;
788
+ }
789
+ if (name_end - name == 1 ) {
790
+ no = name [0 ] - '0' ;
791
+ break ;
792
+ }
793
+ if (name [0 ] == '0' ) {
794
+ /* 01 is not a valid number */
795
+ break ;
796
+ }
797
+ no = (int ) strtoul (name , NULL , 10 );
798
+ break ;
799
+ }
800
+ no = onig_name_to_backref_number (regexp , (OnigUChar * )name , (OnigUChar * )name_end , regs );
801
+ break ;
802
+ default :
803
+ p += clen ;
804
+ smart_str_appendl (pbuf , sp , p - sp );
805
+ continue ;
806
+ }
807
+ if (no < 0 || no >= regs -> num_regs ) {
808
+ /* invalid group number reference, keep the escape sequence in the output */
809
+ smart_str_appendl (pbuf , sp , p - sp );
810
+ continue ;
811
+ }
812
+ if (regs -> beg [no ] >= 0 && regs -> beg [no ] < regs -> end [no ] && (size_t )regs -> end [no ] <= subject_len ) {
813
+ smart_str_appendl (pbuf , subject + regs -> beg [no ], regs -> end [no ] - regs -> beg [no ]);
814
+ }
815
+ }
816
+
817
+ if (p < eos ) {
818
+ smart_str_appendl (pbuf , p , eos - p );
819
+ }
820
+ }
821
+ /* }}} */
822
+
693
823
/*
694
824
* php functions
695
825
*/
@@ -857,14 +987,12 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
857
987
char * string ;
858
988
size_t string_len ;
859
989
860
- char * p ;
861
990
php_mb_regex_t * re ;
862
991
OnigSyntaxType * syntax ;
863
992
OnigRegion * regs = NULL ;
864
993
smart_str out_buf = {0 };
865
994
smart_str eval_buf = {0 };
866
995
smart_str * pbuf ;
867
- size_t i ;
868
996
int err , eval , n ;
869
997
OnigUChar * pos ;
870
998
OnigUChar * string_lim ;
@@ -974,38 +1102,11 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
974
1102
break ;
975
1103
}
976
1104
if (err >= 0 ) {
977
- #if moriyoshi_0
978
- if (regs -> beg [0 ] == regs -> end [0 ]) {
979
- php_error_docref (NULL , E_WARNING , "Empty regular expression" );
980
- break ;
981
- }
982
- #endif
983
1105
/* copy the part of the string before the match */
984
1106
smart_str_appendl (& out_buf , (char * )pos , (size_t )((OnigUChar * )(string + regs -> beg [0 ]) - pos ));
985
1107
986
1108
if (!is_callable ) {
987
- /* copy replacement and backrefs */
988
- i = 0 ;
989
- p = replace ;
990
- while (i < replace_len ) {
991
- int fwd = (int ) php_mb_mbchar_bytes_ex (p , enc );
992
- n = -1 ;
993
- if ((replace_len - i ) >= 2 && fwd == 1 &&
994
- p [0 ] == '\\' && p [1 ] >= '0' && p [1 ] <= '9' ) {
995
- n = p [1 ] - '0' ;
996
- }
997
- if (n >= 0 && n < regs -> num_regs ) {
998
- if (regs -> beg [n ] >= 0 && regs -> beg [n ] < regs -> end [n ] && (size_t )regs -> end [n ] <= string_len ) {
999
- smart_str_appendl (pbuf , string + regs -> beg [n ], regs -> end [n ] - regs -> beg [n ]);
1000
- }
1001
- p += 2 ;
1002
- i += 2 ;
1003
- } else {
1004
- smart_str_appendl (pbuf , p , fwd );
1005
- p += fwd ;
1006
- i += fwd ;
1007
- }
1008
- }
1109
+ mb_regex_substitute (pbuf , string , string_len , replace , replace_len , re , regs , enc );
1009
1110
}
1010
1111
1011
1112
if (eval ) {
@@ -1045,6 +1146,10 @@ static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOp
1045
1146
for (i = 0 ; i < regs -> num_regs ; i ++ ) {
1046
1147
add_next_index_stringl (& subpats , string + regs -> beg [i ], regs -> end [i ] - regs -> beg [i ]);
1047
1148
}
1149
+ if (onig_number_of_names (re ) > 0 ) {
1150
+ mb_regex_groups_iter_args args = {& subpats , string , string_len , regs };
1151
+ onig_foreach_name (re , mb_regex_groups_iter , & args );
1152
+ }
1048
1153
1049
1154
ZVAL_COPY_VALUE (& args [0 ], & subpats );
1050
1155
/* null terminate buffer */
0 commit comments