Skip to content

Commit e45f407

Browse files
committed
changes from feedback, using cache
1 parent ec1cdb0 commit e45f407

File tree

2 files changed

+81
-92
lines changed

2 files changed

+81
-92
lines changed

ext/pgsql/pgsql.c

Lines changed: 79 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,17 @@ ZEND_TSRMLS_CACHE_DEFINE()
147147
ZEND_GET_MODULE(pgsql)
148148
#endif
149149

150+
struct _pcre_cache_entry {
151+
pcre2_code *re;
152+
/* Pointer is not NULL when there are named captures.
153+
* Length is equal to capture_count + 1 to account for capture group 0. */
154+
zend_string **subpats_table;
155+
uint32_t preg_options;
156+
uint32_t capture_count;
157+
uint32_t compile_options;
158+
uint32_t refcount;
159+
};
160+
150161
static int le_plink;
151162

152163
static zend_class_entry *pgsql_link_ce, *pgsql_result_ce, *pgsql_lob_ce;
@@ -490,8 +501,51 @@ static PHP_GINIT_FUNCTION(pgsql)
490501
#if defined(COMPILE_DL_PGSQL) && defined(ZTS)
491502
ZEND_TSRMLS_CACHE_UPDATE();
492503
#endif
504+
505+
size_t i = 0;
493506
memset(pgsql_globals, 0, sizeof(zend_pgsql_globals));
494507
zend_hash_init(&pgsql_globals->connections, 0, NULL, NULL, 1);
508+
509+
#define ADD_REGEX(reg) pgsql_globals->regexes[i ++] = zend_string_init(reg, strlen(reg), true)
510+
ADD_REGEX("#^([+-]{0,1}[0-9]+)$#n");
511+
ADD_REGEX("#^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$#n");
512+
ADD_REGEX("#^[+-]{0,1}(inf)(inity){0,1}$#ni");
513+
ADD_REGEX("#^[0-9]+$#n");
514+
ADD_REGEX("#^((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])(\\/[0-9]{1,3})?$#n");
515+
ADD_REGEX("#^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))(\\/[0-9]{1,3})?$#n");
516+
ADD_REGEX("#^([0-9]{4}[/-][0-9]{1,2}[/-][0-9]{1,2})(([ \\t]+|T)(([0-9]{1,2}:[0-9]{1,2}){1}(:[0-9]{1,2}){0,1}(\\.[0-9]+){0,1}([ \\t]*([+-][0-9]{1,4}(:[0-9]{1,2}){0,1}|[-a-zA-Z_/+]{1,50})){0,1})){0,1}$#ni");
517+
ADD_REGEX("#^([0-9]{4}[/-][0-9]{1,2}[/-][0-9]{1,2})$#ni");
518+
ADD_REGEX("#^(([0-9]{1,2}:[0-9]{1,2}){1}(:[0-9]{1,2}){0,1}){0,1}$#ni");
519+
ADD_REGEX("#^(@?[ \\t]+)?("
520+
/* Textual time units and their abbreviations: */
521+
"(([-+]?[ \\t]+)?"
522+
"[0-9]+(\\.[0-9]*)?[ \\t]*"
523+
"(millenniums|millennia|millennium|mil|mils|"
524+
"centuries|century|cent|c|"
525+
"decades|decade|dec|decs|"
526+
"years|year|y|"
527+
"months|month|mon|"
528+
"weeks|week|w|"
529+
"days|day|d|"
530+
"hours|hour|hr|hrs|h|"
531+
"minutes|minute|mins|min|m|"
532+
"seconds|second|secs|sec|s))+|"
533+
/* Textual time units plus (dd)* hh[:mm[:ss]] */
534+
"((([-+]?[ \\t]+)?"
535+
"[0-9]+(\\.[0-9]*)?[ \\t]*"
536+
"(millenniums|millennia|millennium|mil|mils|"
537+
"centuries|century|cent|c|"
538+
"decades|decade|dec|decs|"
539+
"years|year|y|"
540+
"months|month|mon|"
541+
"weeks|week|w|"
542+
"days|day|d))+"
543+
"([-+]?[ \\t]+"
544+
"([0-9]+[ \\t]+)+" /* dd */
545+
"(([0-9]{1,2}:){0,2}[0-9]{0,2})" /* hh:[mm:[ss]] */
546+
")?))"
547+
"([ \\t]+ago)?$#ni");
548+
ADD_REGEX("#^([0-9a-f]{2,2}:){5,5}[0-9a-f]{2,2}$#ni");
495549
}
496550

497551
static void php_libpq_version(char *buf, size_t len)
@@ -558,8 +612,12 @@ PHP_MINIT_FUNCTION(pgsql)
558612
PHP_MSHUTDOWN_FUNCTION(pgsql)
559613
{
560614
UNREGISTER_INI_ENTRIES();
615+
size_t i;
561616
zend_hash_destroy(&PGG(connections));
562617

618+
for (i = 0; i < 11; i ++)
619+
zend_string_release_ex(PGG(regexes[i]), true);
620+
563621
return SUCCESS;
564622
}
565623

@@ -4671,15 +4729,12 @@ static php_pgsql_data_type php_pgsql_get_data_type(const zend_string *type_name)
46714729
/* {{{ php_pgsql_convert_match
46724730
* test field value with regular expression specified.
46734731
*/
4674-
static int php_pgsql_convert_match(const zend_string *str, const char *regex , size_t regex_len, int icase)
4732+
static int php_pgsql_convert_match(const zend_string *str, zend_string *regex)
46754733
{
4676-
pcre2_code *re;
4677-
PCRE2_SIZE err_offset;
4678-
int res, errnumber;
4679-
uint32_t options = PCRE2_NO_AUTO_CAPTURE;
4734+
pcre_cache_entry *centry;
4735+
int res;
46804736
size_t i;
46814737
pcre2_match_data *match_data;
4682-
PCRE2_UCHAR err_msg[128];
46834738

46844739
/* Check invalid chars for POSIX regex */
46854740
for (i = 0; i < ZSTR_LEN(str); i++) {
@@ -4690,38 +4745,20 @@ static int php_pgsql_convert_match(const zend_string *str, const char *regex , s
46904745
}
46914746
}
46924747

4693-
if (icase) {
4694-
options |= PCRE2_CASELESS;
4695-
}
4696-
4697-
re = pcre2_compile((PCRE2_SPTR)regex, regex_len, options, &errnumber, &err_offset, php_pcre_cctx());
4698-
if (NULL == re) {
4699-
pcre2_get_error_message(errnumber, err_msg, sizeof(err_msg));
4700-
php_error_docref(NULL, E_WARNING, "Cannot compile regex: '%s'", err_msg);
4748+
centry = pcre_get_compiled_regex_cache(regex);
4749+
if (NULL == centry) {
47014750
return FAILURE;
47024751
}
4703-
#if defined(HAVE_PCRE_JIT_SUPPORT)
4704-
if (PCRE_G(jit)) {
4705-
/*
4706-
* Check if the JIT pass did not work, but the regex had been compiled successfully earlier
4707-
* so let's not end it here.
4708-
*/
4709-
if (UNEXPECTED(pcre2_jit_compile(re, PCRE2_JIT_COMPLETE) != 0)) {
4710-
pcre2_get_error_message(errnumber, err_msg, sizeof(err_msg));
4711-
php_error_docref(NULL, E_WARNING, "Cannot use JIT on regex: '%s'", err_msg);
4712-
}
4713-
}
4714-
#endif
47154752

4716-
match_data = php_pcre_create_match_data(0, re);
4753+
match_data = php_pcre_create_match_data(0, centry->re);
47174754
if (NULL == match_data) {
4718-
pcre2_code_free(re);
47194755
php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
47204756
return FAILURE;
47214757
}
4722-
res = pcre2_match(re, (PCRE2_SPTR)ZSTR_VAL(str), ZSTR_LEN(str), 0, 0, match_data, php_pcre_mctx());
4758+
centry->refcount ++;
4759+
res = pcre2_match(centry->re, (PCRE2_SPTR)ZSTR_VAL(str), ZSTR_LEN(str), 0, 0, match_data, php_pcre_mctx());
47234760
php_pcre_free_match_data(match_data);
4724-
pcre2_code_free(re);
4761+
centry->refcount --;
47254762

47264763
if (res == PCRE2_ERROR_NOMATCH) {
47274764
return FAILURE;
@@ -4902,14 +4939,12 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
49024939
}
49034940
else {
49044941
/* FIXME: better regex must be used */
4905-
#define REGEX0 "^([+-]{0,1}[0-9]+)$"
4906-
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 0) == FAILURE) {
4942+
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[0])) == FAILURE) {
49074943
err = 1;
49084944
}
49094945
else {
49104946
ZVAL_STRINGL(&new_val, Z_STRVAL_P(val), Z_STRLEN_P(val));
49114947
}
4912-
#undef REGEX0
49134948
}
49144949
break;
49154950

@@ -4945,11 +4980,9 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
49454980
ZVAL_STR(&new_val, ZSTR_KNOWN(ZEND_STR_NULL));
49464981
}
49474982
else {
4948-
#define REGEX0 "^[-+]?[0-9]*\\.?[0-9]+([eE][-+]?[0-9]+)?$"
4949-
#define REGEX1 "^[+-]{0,1}(inf)(inity){0,1}$"
49504983
/* better regex? */
4951-
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 0) == FAILURE) {
4952-
if (php_pgsql_convert_match(Z_STR_P(val), REGEX1, sizeof(REGEX1)-1, 1) == FAILURE) {
4984+
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[1])) == FAILURE) {
4985+
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[2])) == FAILURE) {
49534986
err = 1;
49544987
} else {
49554988
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
@@ -4958,8 +4991,6 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
49584991
else {
49594992
ZVAL_STRING(&new_val, Z_STRVAL_P(val));
49604993
}
4961-
#undef REGEX0
4962-
#undef REGEX1
49634994
}
49644995
break;
49654996

@@ -5055,7 +5086,7 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
50555086
}
50565087
else {
50575088
/* better regex? */
5058-
if (php_pgsql_convert_match(Z_STR_P(val), "^[0-9]+$", sizeof("^[0-9]+$")-1, 0) == FAILURE) {
5089+
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[3])) == FAILURE) {
50595090
err = 1;
50605091
}
50615092
else {
@@ -5095,20 +5126,16 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
50955126
ZVAL_STR(&new_val, ZSTR_KNOWN(ZEND_STR_NULL));
50965127
}
50975128
else {
5098-
#define REGEX0 "^((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])(\\/[0-9]{1,3})?$"
5099-
#define REGEX1 "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))(\\/[0-9]{1,3})?$"
51005129
/* The inet type holds an IPv4 or IPv6 host address, and optionally its subnet, all in one field. See more in the doc.
51015130
The regex might still be not perfect, but catches the most of IP variants. We might decide to remove the regex
51025131
at all though and let the server side to handle it.*/
5103-
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 0) == FAILURE
5104-
&& php_pgsql_convert_match(Z_STR_P(val), REGEX1, sizeof(REGEX1)-1, 0) == FAILURE) {
5132+
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[4])) == FAILURE
5133+
&& php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[5])) == FAILURE) {
51055134
err = 2;
51065135
}
51075136
else {
51085137
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
51095138
}
5110-
#undef REGEX0
5111-
#undef REGEX1
51125139
}
51135140
break;
51145141

@@ -5139,14 +5166,12 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
51395166
} else if (zend_string_equals_literal_ci(Z_STR_P(val), "now()")) {
51405167
ZVAL_STRINGL(&new_val, "NOW()", sizeof("NOW()")-1);
51415168
} else {
5142-
#define REGEX0 "^([0-9]{4}[/-][0-9]{1,2}[/-][0-9]{1,2})(([ \\t]+|T)(([0-9]{1,2}:[0-9]{1,2}){1}(:[0-9]{1,2}){0,1}(\\.[0-9]+){0,1}([ \\t]*([+-][0-9]{1,4}(:[0-9]{1,2}){0,1}|[-a-zA-Z_/+]{1,50})){0,1})){0,1}$"
51435169
/* better regex? */
5144-
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 1) == FAILURE) {
5170+
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[6])) == FAILURE) {
51455171
err = 1;
51465172
} else {
51475173
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
51485174
}
5149-
#undef REGEX0
51505175
}
51515176
break;
51525177

@@ -5170,15 +5195,13 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
51705195
ZVAL_STR(&new_val, ZSTR_KNOWN(ZEND_STR_NULL));
51715196
}
51725197
else {
5173-
#define REGEX0 "^([0-9]{4}[/-][0-9]{1,2}[/-][0-9]{1,2})$"
51745198
/* FIXME: better regex must be used */
5175-
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 1) == FAILURE) {
5199+
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[7])) == FAILURE) {
51765200
err = 1;
51775201
}
51785202
else {
51795203
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
51805204
}
5181-
#undef REGEX0
51825205
}
51835206
break;
51845207

@@ -5202,15 +5225,13 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
52025225
ZVAL_STR(&new_val, ZSTR_KNOWN(ZEND_STR_NULL));
52035226
}
52045227
else {
5205-
#define REGEX0 "^(([0-9]{1,2}:[0-9]{1,2}){1}(:[0-9]{1,2}){0,1}){0,1}$"
52065228
/* FIXME: better regex must be used */
5207-
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 1) == FAILURE) {
5229+
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[8])) == FAILURE) {
52085230
err = 1;
52095231
}
52105232
else {
52115233
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
52125234
}
5213-
#undef REGEX0
52145235
}
52155236
break;
52165237

@@ -5251,44 +5272,13 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
52515272
unit markings. For example, '1 12:59:10' is read the same as '1 day 12 hours 59 min 10
52525273
sec'.
52535274
*/
5254-
#define REGEX0 \
5255-
"^(@?[ \\t]+)?(" \
5256-
/* Textual time units and their abbreviations: */ \
5257-
"(([-+]?[ \\t]+)?" \
5258-
"[0-9]+(\\.[0-9]*)?[ \\t]*" \
5259-
"(millenniums|millennia|millennium|mil|mils|" \
5260-
"centuries|century|cent|c|" \
5261-
"decades|decade|dec|decs|" \
5262-
"years|year|y|" \
5263-
"months|month|mon|" \
5264-
"weeks|week|w|" \
5265-
"days|day|d|" \
5266-
"hours|hour|hr|hrs|h|" \
5267-
"minutes|minute|mins|min|m|" \
5268-
"seconds|second|secs|sec|s))+|" \
5269-
/* Textual time units plus (dd)* hh[:mm[:ss]] */ \
5270-
"((([-+]?[ \\t]+)?" \
5271-
"[0-9]+(\\.[0-9]*)?[ \\t]*" \
5272-
"(millenniums|millennia|millennium|mil|mils|" \
5273-
"centuries|century|cent|c|" \
5274-
"decades|decade|dec|decs|" \
5275-
"years|year|y|" \
5276-
"months|month|mon|" \
5277-
"weeks|week|w|" \
5278-
"days|day|d))+" \
5279-
"([-+]?[ \\t]+" \
5280-
"([0-9]+[ \\t]+)+" /* dd */ \
5281-
"(([0-9]{1,2}:){0,2}[0-9]{0,2})" /* hh:[mm:[ss]] */ \
5282-
")?))" \
5283-
"([ \\t]+ago)?$"
5284-
5285-
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 1) == FAILURE) {
5275+
5276+
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[9])) == FAILURE) {
52865277
err = 1;
52875278
}
52885279
else {
52895280
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
52905281
}
5291-
#undef REGEX0
52925282
}
52935283
break;
52945284

@@ -5353,14 +5343,12 @@ PHP_PGSQL_API zend_result php_pgsql_convert(PGconn *pg_link, const zend_string *
53535343
ZVAL_STR(&new_val, ZSTR_KNOWN(ZEND_STR_NULL));
53545344
}
53555345
else {
5356-
#define REGEX0 "^([0-9a-f]{2,2}:){5,5}[0-9a-f]{2,2}$"
5357-
if (php_pgsql_convert_match(Z_STR_P(val), REGEX0, sizeof(REGEX0)-1, 1) == FAILURE) {
5346+
if (php_pgsql_convert_match(Z_STR_P(val), PGG(regexes[10])) == FAILURE) {
53585347
err = 1;
53595348
}
53605349
else {
53615350
ZVAL_STR(&new_val, php_pgsql_add_quotes(Z_STR_P(val)));
53625351
}
5363-
#undef REGEX0
53645352
}
53655353
break;
53665354

ext/pgsql/php_pgsql.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,10 +179,11 @@ ZEND_BEGIN_MODULE_GLOBALS(pgsql)
179179
zend_long num_links,num_persistent;
180180
zend_long max_links,max_persistent;
181181
bool allow_persistent;
182-
int ignore_notices;
182+
int ignore_notices;
183183
zend_long auto_reset_persistent;
184184
int log_notices;
185185
zend_object *default_link; /* default link when connection is omitted */
186+
zend_string *regexes[11];
186187
HashTable field_oids;
187188
HashTable table_oids;
188189
HashTable connections;

0 commit comments

Comments
 (0)