@@ -778,7 +778,7 @@ collector_strpos(int c, void* data)
778
778
for (;;) {
779
779
pc -> found_pos ++ ;
780
780
p = h ;
781
- m = pc -> needle .buffer ;
781
+ m = ( int * ) pc -> needle .buffer ;
782
782
n = pc -> needle_pos - 1 ;
783
783
while (n > 0 && * p == * m ) {
784
784
n -- ;
@@ -857,87 +857,203 @@ mbfl_strpos(
857
857
int offset ,
858
858
int reverse )
859
859
{
860
- int n , result , negative_offset = 0 ;
861
- unsigned char * p ;
862
- mbfl_convert_filter * filter ;
863
- struct collector_strpos_data pc ;
860
+ int result ;
861
+ mbfl_string _haystack_u8 , _needle_u8 ;
862
+ const mbfl_string * haystack_u8 , * needle_u8 ;
863
+ const unsigned char * u8_tbl ;
864
864
865
- if (haystack == NULL || needle == NULL ) {
865
+ if (haystack == NULL || haystack -> val == NULL || needle == NULL || needle -> val == NULL ) {
866
866
return -8 ;
867
867
}
868
- /* needle is converted into wchar */
869
- mbfl_wchar_device_init (& pc .needle );
870
- filter = mbfl_convert_filter_new (
871
- needle -> no_encoding ,
872
- mbfl_no_encoding_wchar ,
873
- mbfl_wchar_device_output , 0 , & pc .needle );
874
- if (filter == NULL ) {
875
- return -4 ;
876
- }
877
- p = needle -> val ;
878
- n = needle -> len ;
879
- if (p != NULL ) {
880
- while (n > 0 ) {
881
- if ((* filter -> filter_function )(* p ++ , filter ) < 0 ) {
882
- break ;
883
- }
884
- n -- ;
868
+
869
+ {
870
+ const mbfl_encoding * u8_enc ;
871
+ u8_enc = mbfl_no2encoding (mbfl_no_encoding_utf8 );
872
+ if (u8_enc == NULL || u8_enc -> mblen_table == NULL ) {
873
+ return -8 ;
885
874
}
875
+ u8_tbl = u8_enc -> mblen_table ;
886
876
}
887
- mbfl_convert_filter_flush (filter );
888
- mbfl_convert_filter_delete (filter );
889
- pc .needle_len = pc .needle .pos ;
890
- if (pc .needle .buffer == NULL ) {
891
- return -4 ;
892
- }
893
- if (pc .needle_len <= 0 ) {
894
- mbfl_wchar_device_clear (& pc .needle );
895
- return -2 ;
896
- }
897
- /* initialize filter and collector data */
898
- filter = mbfl_convert_filter_new (
899
- haystack -> no_encoding ,
900
- mbfl_no_encoding_wchar ,
901
- collector_strpos , 0 , & pc );
902
- if (filter == NULL ) {
903
- mbfl_wchar_device_clear (& pc .needle );
904
- return -4 ;
877
+
878
+ if (haystack -> no_encoding != mbfl_no_encoding_utf8 ) {
879
+ mbfl_string_init (& _haystack_u8 );
880
+ haystack_u8 = mbfl_convert_encoding (haystack , & _haystack_u8 , mbfl_no_encoding_utf8 );
881
+ if (haystack_u8 == NULL ) {
882
+ result = -4 ;
883
+ goto out ;
884
+ }
885
+ } else {
886
+ haystack_u8 = haystack ;
905
887
}
906
888
907
- if (offset < 0 ) {
908
- negative_offset = - offset - pc .needle_len ;
909
- if (negative_offset < 0 ) {
910
- negative_offset = 0 ;
889
+ if (needle -> no_encoding != mbfl_no_encoding_utf8 ) {
890
+ mbfl_string_init (& _needle_u8 );
891
+ needle_u8 = mbfl_convert_encoding (needle , & _needle_u8 , mbfl_no_encoding_utf8 );
892
+ if (needle_u8 == NULL ) {
893
+ result = -4 ;
894
+ goto out ;
911
895
}
912
- offset = 0 ;
896
+ } else {
897
+ needle_u8 = needle ;
913
898
}
914
899
915
- pc .start = offset ;
916
- pc .output = 0 ;
917
- pc .needle_pos = 0 ;
918
- pc .found_pos = 0 ;
919
- pc .matched_pos = -1 ;
900
+ if (needle_u8 -> len < 1 ) {
901
+ result = -8 ;
902
+ goto out ;
903
+ }
920
904
921
- /* feed data */
922
- p = haystack -> val ;
923
- n = haystack -> len - negative_offset ;
924
- if (p != NULL ) {
925
- while (n > 0 ) {
926
- if ((* filter -> filter_function )(* p ++ , filter ) < 0 ) {
927
- pc .matched_pos = -4 ;
928
- break ;
905
+ result = -1 ;
906
+ if (haystack_u8 -> len < needle_u8 -> len ) {
907
+ goto out ;
908
+ }
909
+
910
+ if (!reverse ) {
911
+ unsigned int jtbl [1 << (sizeof (unsigned char ) * 8 )];
912
+ unsigned int needle_u8_len = needle_u8 -> len ;
913
+ unsigned int i ;
914
+ const unsigned char * p , * q , * e ;
915
+ const unsigned char * haystack_u8_val = haystack_u8 -> val ,
916
+ * needle_u8_val = needle_u8 -> val ;
917
+ for (i = 0 ; i < sizeof (jtbl ) / sizeof (* jtbl ); ++ i ) {
918
+ jtbl [i ] = needle_u8_len + 1 ;
919
+ }
920
+ for (i = 0 ; i < needle_u8_len - 1 ; ++ i ) {
921
+ jtbl [needle_u8_val [i ]] = needle_u8_len - i ;
922
+ }
923
+ e = haystack_u8_val + haystack_u8 -> len ;
924
+ p = haystack_u8_val ;
925
+ while (-- offset >= 0 ) {
926
+ if (p >= e ) {
927
+ result = -16 ;
928
+ goto out ;
929
929
}
930
- if (pc .matched_pos >= 0 && !reverse ) {
931
- break ;
930
+ p += u8_tbl [* p ];
931
+ }
932
+ p += needle_u8_len ;
933
+ if (p > e ) {
934
+ goto out ;
935
+ }
936
+ while (p <= e ) {
937
+ const unsigned char * pv = p ;
938
+ q = needle_u8_val + needle_u8_len ;
939
+ for (;;) {
940
+ if (q == needle_u8_val ) {
941
+ result = 0 ;
942
+ while (p > haystack_u8_val ) {
943
+ unsigned char c = * -- p ;
944
+ if (c < 0x80 ) {
945
+ ++ result ;
946
+ } else if ((c & 0xc0 ) != 0x80 ) {
947
+ ++ result ;
948
+ }
949
+ }
950
+ goto out ;
951
+ }
952
+ if (* -- q != * -- p ) {
953
+ break ;
954
+ }
955
+ }
956
+ p += jtbl [* p ];
957
+ if (p <= pv ) {
958
+ p = pv + 1 ;
959
+ }
960
+ }
961
+ } else {
962
+ unsigned int jtbl [1 << (sizeof (unsigned char ) * 8 )];
963
+ unsigned int needle_u8_len = needle_u8 -> len , needle_len = 0 ;
964
+ unsigned int i ;
965
+ const unsigned char * p , * e , * q , * qe ;
966
+ const unsigned char * haystack_u8_val = haystack_u8 -> val ,
967
+ * needle_u8_val = needle_u8 -> val ;
968
+ for (i = 0 ; i < sizeof (jtbl ) / sizeof (* jtbl ); ++ i ) {
969
+ jtbl [i ] = needle_u8_len ;
970
+ }
971
+ for (i = needle_u8_len - 1 ; i > 0 ; -- i ) {
972
+ unsigned char c = needle_u8_val [i ];
973
+ jtbl [c ] = i ;
974
+ if (c < 0x80 ) {
975
+ ++ needle_len ;
976
+ } else if ((c & 0xc0 ) != 0x80 ) {
977
+ ++ needle_len ;
978
+ }
979
+ }
980
+ {
981
+ unsigned char c = needle_u8_val [0 ];
982
+ if (c < 0x80 ) {
983
+ ++ needle_len ;
984
+ } else if ((c & 0xc0 ) != 0x80 ) {
985
+ ++ needle_len ;
986
+ }
987
+ }
988
+ e = haystack_u8_val ;
989
+ p = e + haystack_u8 -> len ;
990
+ qe = needle_u8_val + needle_u8_len ;
991
+ if (offset < 0 ) {
992
+ if (- offset > needle_len ) {
993
+ offset += needle_len ;
994
+ while (offset < 0 ) {
995
+ unsigned char c ;
996
+ if (p <= e ) {
997
+ result = -16 ;
998
+ goto out ;
999
+ }
1000
+ c = * (-- p );
1001
+ if (c < 0x80 ) {
1002
+ ++ offset ;
1003
+ } else if ((c & 0xc0 ) != 0x80 ) {
1004
+ ++ offset ;
1005
+ }
1006
+ }
1007
+ }
1008
+ } else {
1009
+ const unsigned char * ee = haystack_u8_val + haystack_u8 -> len ;
1010
+ while (-- offset >= 0 ) {
1011
+ if (e >= ee ) {
1012
+ result = -16 ;
1013
+ goto out ;
1014
+ }
1015
+ e += u8_tbl [* e ];
1016
+ }
1017
+ }
1018
+ if (p < e + needle_u8_len ) {
1019
+ goto out ;
1020
+ }
1021
+ p -= needle_u8_len ;
1022
+ while (p >= e ) {
1023
+ const unsigned char * pv = p ;
1024
+ q = needle_u8_val ;
1025
+ for (;;) {
1026
+ if (q == qe ) {
1027
+ result = 0 ;
1028
+ p -= needle_u8_len ;
1029
+ while (p > haystack_u8_val ) {
1030
+ unsigned char c = * -- p ;
1031
+ if (c < 0x80 ) {
1032
+ ++ result ;
1033
+ } else if ((c & 0xc0 ) != 0x80 ) {
1034
+ ++ result ;
1035
+ }
1036
+ }
1037
+ goto out ;
1038
+ }
1039
+ if (* q != * p ) {
1040
+ break ;
1041
+ }
1042
+ ++ p , ++ q ;
1043
+ }
1044
+ p -= jtbl [* p ];
1045
+ if (p >= pv ) {
1046
+ p = pv - 1 ;
932
1047
}
933
- n -- ;
934
1048
}
935
1049
}
936
- mbfl_convert_filter_flush (filter );
937
- result = pc .matched_pos ;
938
- mbfl_convert_filter_delete (filter );
939
- mbfl_wchar_device_clear (& pc .needle );
940
-
1050
+ out :
1051
+ if (haystack_u8 == & _haystack_u8 ) {
1052
+ mbfl_string_clear (& _haystack_u8 );
1053
+ }
1054
+ if (needle_u8 == & _needle_u8 ) {
1055
+ mbfl_string_clear (& _needle_u8 );
1056
+ }
941
1057
return result ;
942
1058
}
943
1059
0 commit comments