1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Andrei Zmievski <andrei@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include "php.h"
20 #include "php_ini.h"
21 #include "php_globals.h"
22 #include "php_pcre.h"
23 #include "ext/standard/info.h"
24 #include "ext/standard/basic_functions.h"
25 #include "zend_smart_str.h"
26 #include "SAPI.h"
27
28 #include "ext/standard/php_string.h"
29
30 #define PREG_PATTERN_ORDER 1
31 #define PREG_SET_ORDER 2
32 #define PREG_OFFSET_CAPTURE (1<<8)
33 #define PREG_UNMATCHED_AS_NULL (1<<9)
34
35 #define PREG_SPLIT_NO_EMPTY (1<<0)
36 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
37 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
38
39 #define PREG_REPLACE_EVAL (1<<0)
40
41 #define PREG_GREP_INVERT (1<<0)
42
43 #define PREG_JIT (1<<3)
44
45 #define PCRE_CACHE_SIZE 4096
46
47 struct _pcre_cache_entry {
48 pcre2_code *re;
49 uint32_t preg_options;
50 uint32_t capture_count;
51 uint32_t name_count;
52 uint32_t compile_options;
53 uint32_t extra_compile_options;
54 uint32_t refcount;
55 };
56
57 enum {
58 PHP_PCRE_NO_ERROR = 0,
59 PHP_PCRE_INTERNAL_ERROR,
60 PHP_PCRE_BACKTRACK_LIMIT_ERROR,
61 PHP_PCRE_RECURSION_LIMIT_ERROR,
62 PHP_PCRE_BAD_UTF8_ERROR,
63 PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
64 PHP_PCRE_JIT_STACKLIMIT_ERROR
65 };
66
67
68 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
69
70 #ifdef HAVE_PCRE_JIT_SUPPORT
71 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
72 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
73 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
74 #endif
75 ZEND_TLS pcre2_general_context *gctx = NULL;
76 /* These two are global per thread for now. Though it is possible to use these
77 per pattern. Either one can copy it and use in pce, or one does no global
78 contexts at all, but creates for every pce. */
79 ZEND_TLS pcre2_compile_context *cctx = NULL;
80 ZEND_TLS pcre2_match_context *mctx = NULL;
81 ZEND_TLS pcre2_match_data *mdata = NULL;
82 ZEND_TLS zend_bool mdata_used = 0;
83 ZEND_TLS uint8_t pcre2_init_ok = 0;
84 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
85 static MUTEX_T pcre_mt = NULL;
86 #define php_pcre_mutex_alloc() \
87 if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
88 #define php_pcre_mutex_free() \
89 if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
90 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
91 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
92 #else
93 #define php_pcre_mutex_alloc()
94 #define php_pcre_mutex_free()
95 #define php_pcre_mutex_lock()
96 #define php_pcre_mutex_unlock()
97 #endif
98
99 ZEND_TLS HashTable char_tables;
100
php_pcre_free_char_table(zval *data)101 static void php_pcre_free_char_table(zval *data)
102 {/*{{{*/
103 void *ptr = Z_PTR_P(data);
104 pefree(ptr, 1);
105 }/*}}}*/
106
pcre_handle_exec_error(int pcre_code)107 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
108 {
109 int preg_code = 0;
110
111 switch (pcre_code) {
112 case PCRE2_ERROR_MATCHLIMIT:
113 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
114 break;
115
116 case PCRE2_ERROR_RECURSIONLIMIT:
117 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
118 break;
119
120 case PCRE2_ERROR_BADUTFOFFSET:
121 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
122 break;
123
124 #ifdef HAVE_PCRE_JIT_SUPPORT
125 case PCRE2_ERROR_JIT_STACKLIMIT:
126 preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
127 break;
128 #endif
129
130 default:
131 if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
132 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
133 } else {
134 preg_code = PHP_PCRE_INTERNAL_ERROR;
135 }
136 break;
137 }
138
139 PCRE_G(error_code) = preg_code;
140 }
141 /* }}} */
142
php_free_pcre_cache(zval *data)143 static void php_free_pcre_cache(zval *data) /* {{{ */
144 {
145 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
146 if (!pce) return;
147 pcre2_code_free(pce->re);
148 free(pce);
149 }
150 /* }}} */
151
php_efree_pcre_cache(zval *data)152 static void php_efree_pcre_cache(zval *data) /* {{{ */
153 {
154 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
155 if (!pce) return;
156 pcre2_code_free(pce->re);
157 efree(pce);
158 }
159 /* }}} */
160
php_pcre_malloc(PCRE2_SIZE size, void *data)161 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
162 {/*{{{*/
163 void *p = pemalloc(size, 1);
164 return p;
165 }/*}}}*/
166
php_pcre_free(void *block, void *data)167 static void php_pcre_free(void *block, void *data)
168 {/*{{{*/
169 pefree(block, 1);
170 }/*}}}*/
171
172 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
173 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
174
php_pcre_init_pcre2(uint8_t jit)175 static void php_pcre_init_pcre2(uint8_t jit)
176 {/*{{{*/
177 if (!gctx) {
178 gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
179 if (!gctx) {
180 pcre2_init_ok = 0;
181 return;
182 }
183 }
184
185 if (!cctx) {
186 cctx = pcre2_compile_context_create(gctx);
187 if (!cctx) {
188 pcre2_init_ok = 0;
189 return;
190 }
191 }
192
193 /* XXX The 'X' modifier is the default behavior in PCRE2. This option is
194 called dangerous in the manual, as typos in patterns can cause
195 unexpected results. We might want to to switch to the default PCRE2
196 behavior, too, thus causing a certain BC break. */
197 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
198
199 if (!mctx) {
200 mctx = pcre2_match_context_create(gctx);
201 if (!mctx) {
202 pcre2_init_ok = 0;
203 return;
204 }
205 }
206
207 #ifdef HAVE_PCRE_JIT_SUPPORT
208 if (jit && !jit_stack) {
209 jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
210 if (!jit_stack) {
211 pcre2_init_ok = 0;
212 return;
213 }
214 }
215 #endif
216
217 if (!mdata) {
218 mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
219 if (!mdata) {
220 pcre2_init_ok = 0;
221 return;
222 }
223 }
224
225 pcre2_init_ok = 1;
226 }/*}}}*/
227
php_pcre_shutdown_pcre2(void)228 static void php_pcre_shutdown_pcre2(void)
229 {/*{{{*/
230 if (gctx) {
231 pcre2_general_context_free(gctx);
232 gctx = NULL;
233 }
234
235 if (cctx) {
236 pcre2_compile_context_free(cctx);
237 cctx = NULL;
238 }
239
240 if (mctx) {
241 pcre2_match_context_free(mctx);
242 mctx = NULL;
243 }
244
245 #ifdef HAVE_PCRE_JIT_SUPPORT
246 /* Stack may only be destroyed when no cached patterns
247 possibly associated with it do exist. */
248 if (jit_stack) {
249 pcre2_jit_stack_free(jit_stack);
250 jit_stack = NULL;
251 }
252 #endif
253
254 if (mdata) {
255 pcre2_match_data_free(mdata);
256 mdata = NULL;
257 }
258
259 pcre2_init_ok = 0;
260 }/*}}}*/
261
PHP_GINIT_FUNCTIONnull262 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
263 {
264 php_pcre_mutex_alloc();
265
266 /* If we're on the CLI SAPI, there will only be one request, so we don't need the
267 * cache to survive after RSHUTDOWN. */
268 pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
269 if (!pcre_globals->per_request_cache) {
270 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
271 }
272
273 pcre_globals->backtrack_limit = 0;
274 pcre_globals->recursion_limit = 0;
275 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
276 ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
277 ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
278 #ifdef HAVE_PCRE_JIT_SUPPORT
279 pcre_globals->jit = 1;
280 #endif
281
282 php_pcre_init_pcre2(1);
283 zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
284 }
285 /* }}} */
286
PHP_GSHUTDOWN_FUNCTIONnull287 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
288 {
289 if (!pcre_globals->per_request_cache) {
290 zend_hash_destroy(&pcre_globals->pcre_cache);
291 }
292
293 php_pcre_shutdown_pcre2();
294 zend_hash_destroy(&char_tables);
295 php_pcre_mutex_free();
296 }
297 /* }}} */
298
PHP_INI_MHnull299 static PHP_INI_MH(OnUpdateBacktrackLimit)
300 {/*{{{*/
301 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
302 if (mctx) {
303 pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
304 }
305
306 return SUCCESS;
307 }/*}}}*/
308
PHP_INI_MHnull309 static PHP_INI_MH(OnUpdateRecursionLimit)
310 {/*{{{*/
311 OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
312 if (mctx) {
313 pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
314 }
315
316 return SUCCESS;
317 }/*}}}*/
318
319 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MHnull320 static PHP_INI_MH(OnUpdateJit)
321 {/*{{{*/
322 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
323 if (PCRE_G(jit) && jit_stack) {
324 pcre2_jit_stack_assign(mctx, NULL, jit_stack);
325 } else {
326 pcre2_jit_stack_assign(mctx, NULL, NULL);
327 }
328
329 return SUCCESS;
330 }/*}}}*/
331 #endif
332
333 PHP_INI_BEGIN()
334 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
335 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
336 #ifdef HAVE_PCRE_JIT_SUPPORT
337 STD_PHP_INI_ENTRY("pcre.jit", "1", PHP_INI_ALL, OnUpdateJit, jit, zend_pcre_globals, pcre_globals)
338 #endif
339 PHP_INI_END()
340
_pcre2_config_str(uint32_t what)341 static char *_pcre2_config_str(uint32_t what)
342 {/*{{{*/
343 int len = pcre2_config(what, NULL);
344 char *ret = (char *) malloc(len + 1);
345
346 len = pcre2_config(what, ret);
347 if (!len) {
348 free(ret);
349 return NULL;
350 }
351
352 return ret;
353 }/*}}}*/
354
355 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTIONnull356 static PHP_MINFO_FUNCTION(pcre)
357 {
358 #ifdef HAVE_PCRE_JIT_SUPPORT
359 uint32_t flag = 0;
360 char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
361 #endif
362 char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
363 char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
364
365 php_info_print_table_start();
366 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
367 php_info_print_table_row(2, "PCRE Library Version", version);
368 free(version);
369 php_info_print_table_row(2, "PCRE Unicode Version", unicode);
370 free(unicode);
371
372 #ifdef HAVE_PCRE_JIT_SUPPORT
373 if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
374 php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
375 } else {
376 php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
377 }
378 if (jit_target) {
379 php_info_print_table_row(2, "PCRE JIT Target", jit_target);
380 }
381 free(jit_target);
382 #else
383 php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
384 #endif
385
386 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
387 php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
388 #endif
389
390 php_info_print_table_end();
391
392 DISPLAY_INI_ENTRIES();
393 }
394 /* }}} */
395
396 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTIONnull397 static PHP_MINIT_FUNCTION(pcre)
398 {
399 char *version;
400
401 #ifdef HAVE_PCRE_JIT_SUPPORT
402 if (UNEXPECTED(!pcre2_init_ok)) {
403 /* Retry. */
404 php_pcre_init_pcre2(PCRE_G(jit));
405 if (!pcre2_init_ok) {
406 return FAILURE;
407 }
408 }
409 #endif
410
411 REGISTER_INI_ENTRIES();
412
413 REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
414 REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
415 REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
416 REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
417 REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
418 REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
419 REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
420 REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
421
422 REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
423 REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
424 REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
425 REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
426 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
427 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
428 REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
429 version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
430 REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
431 free(version);
432 REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
433 REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
434
435 #ifdef HAVE_PCRE_JIT_SUPPORT
436 REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
437 #else
438 REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
439 #endif
440
441 return SUCCESS;
442 }
443 /* }}} */
444
445 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTIONnull446 static PHP_MSHUTDOWN_FUNCTION(pcre)
447 {
448 UNREGISTER_INI_ENTRIES();
449
450 return SUCCESS;
451 }
452 /* }}} */
453
454 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTIONnull455 static PHP_RINIT_FUNCTION(pcre)
456 {
457 #ifdef HAVE_PCRE_JIT_SUPPORT
458 if (UNEXPECTED(!pcre2_init_ok)) {
459 /* Retry. */
460 php_pcre_mutex_lock();
461 php_pcre_init_pcre2(PCRE_G(jit));
462 if (!pcre2_init_ok) {
463 php_pcre_mutex_unlock();
464 return FAILURE;
465 }
466 php_pcre_mutex_unlock();
467 }
468
469 mdata_used = 0;
470 #endif
471
472 if (PCRE_G(per_request_cache)) {
473 zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
474 }
475
476 return SUCCESS;
477 }
478 /* }}} */
479
PHP_RSHUTDOWN_FUNCTIONnull480 static PHP_RSHUTDOWN_FUNCTION(pcre)
481 {
482 if (PCRE_G(per_request_cache)) {
483 zend_hash_destroy(&PCRE_G(pcre_cache));
484 }
485
486 zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
487 zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
488 ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
489 ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
490 return SUCCESS;
491 }
492
493 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval *data, void *arg)494 static int pcre_clean_cache(zval *data, void *arg)
495 {
496 pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
497 int *num_clean = (int *)arg;
498
499 if (*num_clean > 0 && !pce->refcount) {
500 (*num_clean)--;
501 return ZEND_HASH_APPLY_REMOVE;
502 } else {
503 return ZEND_HASH_APPLY_KEEP;
504 }
505 }
506 /* }}} */
507
free_subpats_table(zend_string **subpat_names, uint32_t num_subpats)508 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
509 uint32_t i;
510 for (i = 0; i < num_subpats; i++) {
511 if (subpat_names[i]) {
512 zend_string_release(subpat_names[i]);
513 }
514 }
515 efree(subpat_names);
516 }
517
518 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)519 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
520 {
521 uint32_t name_cnt = pce->name_count, name_size, ni = 0;
522 char *name_table;
523 zend_string **subpat_names;
524 int rc1, rc2;
525
526 rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
527 rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
528 if (rc1 < 0 || rc2 < 0) {
529 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
530 return NULL;
531 }
532
533 subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
534 while (ni++ < name_cnt) {
535 unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
536 const char *name = name_table + 2;
537 subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
538 if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
539 php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
540 free_subpats_table(subpat_names, num_subpats);
541 return NULL;
542 }
543 name_table += name_size;
544 }
545 return subpat_names;
546 }
547 /* }}} */
548
549 /* {{{ static calculate_unit_length */
550 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry *pce, char *start)551 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, char *start)
552 {
553 size_t unit_len;
554
555 if (pce->compile_options & PCRE2_UTF) {
556 char *end = start;
557
558 /* skip continuation bytes */
559 while ((*++end & 0xC0) == 0x80);
560 unit_len = end - start;
561 } else {
562 unit_len = 1;
563 }
564 return unit_len;
565 }
566 /* }}} */
567
568 /* {{{ pcre_get_compiled_regex_cache
569 */
pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)570 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
571 {
572 pcre2_code *re = NULL;
573 uint32_t coptions = 0;
574 uint32_t extra_coptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
575 PCRE2_UCHAR error[128];
576 PCRE2_SIZE erroffset;
577 int errnumber;
578 char delimiter;
579 char start_delimiter;
580 char end_delimiter;
581 char *p, *pp;
582 char *pattern;
583 size_t pattern_len;
584 uint32_t poptions = 0;
585 const uint8_t *tables = NULL;
586 zval *zv;
587 pcre_cache_entry new_entry;
588 int rc;
589 zend_string *key;
590 pcre_cache_entry *ret;
591
592 if (locale_aware && BG(locale_string) &&
593 (ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) {
594 key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0);
595 memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1);
596 memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1);
597 } else {
598 key = regex;
599 }
600
601 /* Try to lookup the cached regex entry, and if successful, just pass
602 back the compiled pattern, otherwise go on and compile it. */
603 zv = zend_hash_find(&PCRE_G(pcre_cache), key);
604 if (zv) {
605 if (key != regex) {
606 zend_string_release_ex(key, 0);
607 }
608 return (pcre_cache_entry*)Z_PTR_P(zv);
609 }
610
611 p = ZSTR_VAL(regex);
612
613 /* Parse through the leading whitespace, and display a warning if we
614 get to the end without encountering a delimiter. */
615 while (isspace((int)*(unsigned char *)p)) p++;
616 if (*p == 0) {
617 if (key != regex) {
618 zend_string_release_ex(key, 0);
619 }
620 php_error_docref(NULL, E_WARNING,
621 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
622 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
623 return NULL;
624 }
625
626 /* Get the delimiter and display a warning if it is alphanumeric
627 or a backslash. */
628 delimiter = *p++;
629 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
630 if (key != regex) {
631 zend_string_release_ex(key, 0);
632 }
633 php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
634 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
635 return NULL;
636 }
637
638 start_delimiter = delimiter;
639 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
640 delimiter = pp[5];
641 end_delimiter = delimiter;
642
643 pp = p;
644
645 if (start_delimiter == end_delimiter) {
646 /* We need to iterate through the pattern, searching for the ending delimiter,
647 but skipping the backslashed delimiters. If the ending delimiter is not
648 found, display a warning. */
649 while (*pp != 0) {
650 if (*pp == '\\' && pp[1] != 0) pp++;
651 else if (*pp == delimiter)
652 break;
653 pp++;
654 }
655 } else {
656 /* We iterate through the pattern, searching for the matching ending
657 * delimiter. For each matching starting delimiter, we increment nesting
658 * level, and decrement it for each matching ending delimiter. If we
659 * reach the end of the pattern without matching, display a warning.
660 */
661 int brackets = 1; /* brackets nesting level */
662 while (*pp != 0) {
663 if (*pp == '\\' && pp[1] != 0) pp++;
664 else if (*pp == end_delimiter && --brackets <= 0)
665 break;
666 else if (*pp == start_delimiter)
667 brackets++;
668 pp++;
669 }
670 }
671
672 if (*pp == 0) {
673 if (key != regex) {
674 zend_string_release_ex(key, 0);
675 }
676 if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
677 php_error_docref(NULL,E_WARNING, "Null byte in regex");
678 } else if (start_delimiter == end_delimiter) {
679 php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
680 } else {
681 php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
682 }
683 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
684 return NULL;
685 }
686
687 /* Make a copy of the actual pattern. */
688 pattern_len = pp - p;
689 pattern = estrndup(p, pattern_len);
690
691 /* Move on to the options */
692 pp++;
693
694 /* Parse through the options, setting appropriate flags. Display
695 a warning if we encounter an unknown modifier. */
696 while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
697 switch (*pp++) {
698 /* Perl compatible options */
699 case 'i': coptions |= PCRE2_CASELESS; break;
700 case 'm': coptions |= PCRE2_MULTILINE; break;
701 case 's': coptions |= PCRE2_DOTALL; break;
702 case 'x': coptions |= PCRE2_EXTENDED; break;
703
704 /* PCRE specific options */
705 case 'A': coptions |= PCRE2_ANCHORED; break;
706 case 'D': coptions |= PCRE2_DOLLAR_ENDONLY;break;
707 case 'S': /* Pass. */ break;
708 case 'U': coptions |= PCRE2_UNGREEDY; break;
709 case 'X': extra_coptions &= ~PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL; break;
710 case 'u': coptions |= PCRE2_UTF;
711 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
712 characters, even in UTF-8 mode. However, this can be changed by setting
713 the PCRE2_UCP option. */
714 #ifdef PCRE2_UCP
715 coptions |= PCRE2_UCP;
716 #endif
717 break;
718 case 'J': coptions |= PCRE2_DUPNAMES; break;
719
720 /* Custom preg options */
721 case 'e': poptions |= PREG_REPLACE_EVAL; break;
722
723 case ' ':
724 case '\n':
725 case '\r':
726 break;
727
728 default:
729 if (pp[-1]) {
730 php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
731 } else {
732 php_error_docref(NULL,E_WARNING, "Null byte in regex");
733 }
734 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
735 efree(pattern);
736 if (key != regex) {
737 zend_string_release_ex(key, 0);
738 }
739 return NULL;
740 }
741 }
742
743 if (poptions & PREG_REPLACE_EVAL) {
744 php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
745 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
746 efree(pattern);
747 if (key != regex) {
748 zend_string_release_ex(key, 0);
749 }
750 return NULL;
751 }
752
753 if (key != regex) {
754 tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(locale_string));
755 if (!tables) {
756 zend_string *_k;
757 tables = pcre2_maketables(gctx);
758 if (UNEXPECTED(!tables)) {
759 php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
760 pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
761 zend_string_release_ex(key, 0);
762 efree(pattern);
763 return NULL;
764 }
765 _k = zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1);
766 GC_MAKE_PERSISTENT_LOCAL(_k);
767 zend_hash_add_ptr(&char_tables, _k, (void *)tables);
768 zend_string_release(_k);
769 }
770 }
771 pcre2_set_character_tables(cctx, tables);
772
773 /* Set extra options for the compile context. */
774 if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
775 pcre2_set_compile_extra_options(cctx, extra_coptions);
776 }
777
778 /* Compile pattern and display a warning if compilation failed. */
779 re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
780
781 /* Reset the compile context extra options to default. */
782 if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
783 pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
784 }
785
786 if (re == NULL) {
787 if (key != regex) {
788 zend_string_release_ex(key, 0);
789 }
790 pcre2_get_error_message(errnumber, error, sizeof(error));
791 php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
792 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
793 efree(pattern);
794 return NULL;
795 }
796
797 #ifdef HAVE_PCRE_JIT_SUPPORT
798 if (PCRE_G(jit)) {
799 /* Enable PCRE JIT compiler */
800 rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
801 if (EXPECTED(rc >= 0)) {
802 size_t jit_size = 0;
803 if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
804 poptions |= PREG_JIT;
805 }
806 } else if (rc == PCRE2_ERROR_NOMEMORY) {
807 php_error_docref(NULL, E_WARNING,
808 "Allocation of JIT memory failed, PCRE JIT will be disabled. "
809 "This is likely caused by security restrictions. "
810 "Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
811 PCRE_G(jit) = 0;
812 } else {
813 pcre2_get_error_message(rc, error, sizeof(error));
814 php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
815 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
816 }
817 }
818 #endif
819 efree(pattern);
820
821 /*
822 * If we reached cache limit, clean out the items from the head of the list;
823 * these are supposedly the oldest ones (but not necessarily the least used
824 * ones).
825 */
826 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
827 int num_clean = PCRE_CACHE_SIZE / 8;
828 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
829 }
830
831 /* Store the compiled pattern and extra info in the cache. */
832 new_entry.re = re;
833 new_entry.preg_options = poptions;
834 new_entry.compile_options = coptions;
835 new_entry.extra_compile_options = extra_coptions;
836 new_entry.refcount = 0;
837
838 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
839 if (rc < 0) {
840 if (key != regex) {
841 zend_string_release_ex(key, 0);
842 }
843 php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
844 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
845 return NULL;
846 }
847
848 rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
849 if (rc < 0) {
850 if (key != regex) {
851 zend_string_release_ex(key, 0);
852 }
853 php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
854 pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
855 return NULL;
856 }
857
858 /*
859 * Interned strings are not duplicated when stored in HashTable,
860 * but all the interned strings created during HTTP request are removed
861 * at end of request. However PCRE_G(pcre_cache) must be consistent
862 * on the next request as well. So we disable usage of interned strings
863 * as hash keys especually for this table.
864 * See bug #63180
865 */
866 if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
867 zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
868 GC_MAKE_PERSISTENT_LOCAL(str);
869
870 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
871 zend_string_release(str);
872 } else {
873 ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
874 }
875
876 if (key != regex) {
877 zend_string_release_ex(key, 0);
878 }
879
880 return ret;
881 }
882 /* }}} */
883
884 /* {{{ pcre_get_compiled_regex_cache
885 */
pcre_get_compiled_regex_cache(zend_string *regex)886 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
887 {
888 return pcre_get_compiled_regex_cache_ex(regex, 1);
889 }
890 /* }}} */
891
892 /* {{{ pcre_get_compiled_regex
893 */
pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)894 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
895 {
896 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
897
898 if (capture_count) {
899 *capture_count = pce ? pce->capture_count : 0;
900 }
901
902 return pce ? pce->re : NULL;
903 }
904 /* }}} */
905
906 /* {{{ pcre_get_compiled_regex_ex
907 */
pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)908 PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
909 {
910 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
911
912 if (preg_options) {
913 *preg_options = pce ? pce->preg_options : 0;
914 }
915 if (compile_options) {
916 *compile_options = pce ? pce->compile_options : 0;
917 }
918 if (capture_count) {
919 *capture_count = pce ? pce->capture_count : 0;
920 }
921
922 return pce ? pce->re : NULL;
923 }
924 /* }}} */
925
926 /* XXX For the cases where it's only about match yes/no and no capture
927 required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)928 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
929 {/*{{{*/
930
931 assert(NULL != re);
932
933 if (EXPECTED(!mdata_used)) {
934 int rc = 0;
935
936 if (!capture_count) {
937 /* As we deal with a non cached pattern, no other way to gather this info. */
938 rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
939 }
940
941 if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
942 mdata_used = 1;
943 return mdata;
944 }
945 }
946
947 return pcre2_match_data_create_from_pattern(re, gctx);
948 }/*}}}*/
949
php_pcre_free_match_data(pcre2_match_data *match_data)950 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
951 {/*{{{*/
952 if (UNEXPECTED(match_data != mdata)) {
953 pcre2_match_data_free(match_data);
954 } else {
955 mdata_used = 0;
956 }
957 }/*}}}*/
958
init_unmatched_null_pairnull959 static void init_unmatched_null_pair() {
960 zval val1, val2;
961 ZVAL_NULL(&val1);
962 ZVAL_LONG(&val2, -1);
963 ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
964 }
965
init_unmatched_empty_pairnull966 static void init_unmatched_empty_pair() {
967 zval val1, val2;
968 ZVAL_EMPTY_STRING(&val1);
969 ZVAL_LONG(&val2, -1);
970 ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
971 }
972
populate_match_value_str( zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset)973 static zend_always_inline void populate_match_value_str(
974 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
975 if (start_offset == end_offset) {
976 ZVAL_EMPTY_STRING(val);
977 } else if (start_offset + 1 == end_offset) {
978 ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
979 } else {
980 ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
981 }
982 }
983
populate_match_value( zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, uint32_t unmatched_as_null)984 static inline void populate_match_value(
985 zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
986 uint32_t unmatched_as_null) {
987 if (PCRE2_UNSET == start_offset) {
988 if (unmatched_as_null) {
989 ZVAL_NULL(val);
990 } else {
991 ZVAL_EMPTY_STRING(val);
992 }
993 } else {
994 populate_match_value_str(val, subject, start_offset, end_offset);
995 }
996 }
997
add_named( zval *subpats, zend_string *name, zval *val, zend_bool unmatched)998 static inline void add_named(
999 zval *subpats, zend_string *name, zval *val, zend_bool unmatched) {
1000 /* If the DUPNAMES option is used, multiple subpatterns might have the same name.
1001 * In this case we want to preserve the one that actually has a value. */
1002 if (!unmatched) {
1003 zend_hash_update(Z_ARRVAL_P(subpats), name, val);
1004 } else {
1005 if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) {
1006 return;
1007 }
1008 }
1009 Z_TRY_ADDREF_P(val);
1010 }
1011
1012 /* {{{ add_offset_pair */
add_offset_pair( zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, zend_string *name, uint32_t unmatched_as_null)1013 static inline void add_offset_pair(
1014 zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1015 zend_string *name, uint32_t unmatched_as_null)
1016 {
1017 zval match_pair;
1018
1019 /* Add (match, offset) to the return value */
1020 if (PCRE2_UNSET == start_offset) {
1021 if (unmatched_as_null) {
1022 if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
1023 init_unmatched_null_pair();
1024 }
1025 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1026 } else {
1027 if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1028 init_unmatched_empty_pair();
1029 }
1030 ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1031 }
1032 } else {
1033 zval val1, val2;
1034 populate_match_value_str(&val1, subject, start_offset, end_offset);
1035 ZVAL_LONG(&val2, start_offset);
1036 ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1037 }
1038
1039 if (name) {
1040 add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1041 }
1042 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
1043 }
1044 /* }}} */
1045
populate_subpat_array( zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)1046 static void populate_subpat_array(
1047 zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1048 uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1049 zend_bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
1050 zend_bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
1051 zval val;
1052 int i;
1053 if (subpat_names) {
1054 if (offset_capture) {
1055 for (i = 0; i < count; i++) {
1056 add_offset_pair(
1057 subpats, subject, offsets[2*i], offsets[2*i+1],
1058 subpat_names[i], unmatched_as_null);
1059 }
1060 if (unmatched_as_null) {
1061 for (i = count; i < num_subpats; i++) {
1062 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1063 }
1064 }
1065 } else {
1066 for (i = 0; i < count; i++) {
1067 populate_match_value(
1068 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1069 if (subpat_names[i]) {
1070 add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1071 }
1072 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1073 }
1074 if (unmatched_as_null) {
1075 for (i = count; i < num_subpats; i++) {
1076 ZVAL_NULL(&val);
1077 if (subpat_names[i]) {
1078 zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val);
1079 }
1080 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1081 }
1082 }
1083 }
1084 } else {
1085 if (offset_capture) {
1086 for (i = 0; i < count; i++) {
1087 add_offset_pair(
1088 subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1089 }
1090 if (unmatched_as_null) {
1091 for (i = count; i < num_subpats; i++) {
1092 add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1093 }
1094 }
1095 } else {
1096 for (i = 0; i < count; i++) {
1097 populate_match_value(
1098 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1099 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1100 }
1101 if (unmatched_as_null) {
1102 for (i = count; i < num_subpats; i++) {
1103 add_next_index_null(subpats);
1104 }
1105 }
1106 }
1107 }
1108 /* Add MARK, if available */
1109 if (mark) {
1110 add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1111 }
1112 }
1113
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)1114 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
1115 {
1116 /* parameters */
1117 zend_string *regex; /* Regular expression */
1118 zend_string *subject; /* String to match against */
1119 pcre_cache_entry *pce; /* Compiled regular expression */
1120 zval *subpats = NULL; /* Array for subpatterns */
1121 zend_long flags = 0; /* Match control flags */
1122 zend_long start_offset = 0; /* Where the new search starts */
1123
1124 ZEND_PARSE_PARAMETERS_START(2, 5)
1125 Z_PARAM_STR(regex)
1126 Z_PARAM_STR(subject)
1127 Z_PARAM_OPTIONAL
1128 Z_PARAM_ZVAL(subpats)
1129 Z_PARAM_LONG(flags)
1130 Z_PARAM_LONG(start_offset)
1131 ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1132
1133 /* Compile regex or get it from cache. */
1134 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1135 RETURN_FALSE;
1136 }
1137
1138 pce->refcount++;
1139 php_pcre_match_impl(pce, subject, return_value, subpats,
1140 global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1141 pce->refcount--;
1142 }
1143 /* }}} */
1144
is_known_valid_utf8( zend_string *subject_str, PCRE2_SIZE start_offset)1145 static zend_always_inline zend_bool is_known_valid_utf8(
1146 zend_string *subject_str, PCRE2_SIZE start_offset) {
1147 if (!(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)) {
1148 /* We don't know whether the string is valid UTF-8 or not. */
1149 return 0;
1150 }
1151
1152 if (start_offset == ZSTR_LEN(subject_str)) {
1153 /* Degenerate case: Offset points to end of string. */
1154 return 1;
1155 }
1156
1157 /* Check that the offset does not point to an UTF-8 continuation byte. */
1158 return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1159 }
1160
1161 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value, zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)1162 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1163 zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1164 {
1165 zval result_set, /* Holds a set of subpatterns after
1166 a global match */
1167 *match_sets = NULL; /* An array of sets of matches for each
1168 subpattern after a global match */
1169 uint32_t options; /* Execution options */
1170 int count; /* Count of matched subpatterns */
1171 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1172 uint32_t num_subpats; /* Number of captured subpatterns */
1173 int matched; /* Has anything matched */
1174 zend_string **subpat_names; /* Array for named subpatterns */
1175 size_t i;
1176 uint32_t subpats_order; /* Order of subpattern matches */
1177 uint32_t offset_capture; /* Capture match offsets: yes/no */
1178 uint32_t unmatched_as_null; /* Null non-matches: yes/no */
1179 PCRE2_SPTR mark = NULL; /* Target for MARK name */
1180 zval marks; /* Array of marks for PREG_PATTERN_ORDER */
1181 pcre2_match_data *match_data;
1182 PCRE2_SIZE start_offset2, orig_start_offset;
1183
1184 char *subject = ZSTR_VAL(subject_str);
1185 size_t subject_len = ZSTR_LEN(subject_str);
1186
1187 ZVAL_UNDEF(&marks);
1188
1189 /* Overwrite the passed-in value for subpatterns with an empty array. */
1190 if (subpats != NULL) {
1191 subpats = zend_try_array_init(subpats);
1192 if (!subpats) {
1193 return;
1194 }
1195 }
1196
1197 subpats_order = global ? PREG_PATTERN_ORDER : 0;
1198
1199 if (use_flags) {
1200 offset_capture = flags & PREG_OFFSET_CAPTURE;
1201 unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1202
1203 /*
1204 * subpats_order is pre-set to pattern mode so we change it only if
1205 * necessary.
1206 */
1207 if (flags & 0xff) {
1208 subpats_order = flags & 0xff;
1209 }
1210 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1211 (!global && subpats_order != 0)) {
1212 php_error_docref(NULL, E_WARNING, "Invalid flags specified");
1213 return;
1214 }
1215 } else {
1216 offset_capture = 0;
1217 unmatched_as_null = 0;
1218 }
1219
1220 /* Negative offset counts from the end of the string. */
1221 if (start_offset < 0) {
1222 if ((PCRE2_SIZE)-start_offset <= subject_len) {
1223 start_offset2 = subject_len + start_offset;
1224 } else {
1225 start_offset2 = 0;
1226 }
1227 } else {
1228 start_offset2 = (PCRE2_SIZE)start_offset;
1229 }
1230
1231 if (start_offset2 > subject_len) {
1232 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1233 RETURN_FALSE;
1234 }
1235
1236 /* Calculate the size of the offsets array, and allocate memory for it. */
1237 num_subpats = pce->capture_count + 1;
1238
1239 /*
1240 * Build a mapping from subpattern numbers to their names. We will
1241 * allocate the table only if there are any named subpatterns.
1242 */
1243 subpat_names = NULL;
1244 if (subpats && pce->name_count > 0) {
1245 subpat_names = make_subpats_table(num_subpats, pce);
1246 if (!subpat_names) {
1247 RETURN_FALSE;
1248 }
1249 }
1250
1251 /* Allocate match sets array and initialize the values. */
1252 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1253 match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1254 for (i=0; i<num_subpats; i++) {
1255 array_init(&match_sets[i]);
1256 }
1257 }
1258
1259 matched = 0;
1260 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1261
1262 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1263 match_data = mdata;
1264 } else {
1265 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1266 if (!match_data) {
1267 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1268 if (subpat_names) {
1269 free_subpats_table(subpat_names, num_subpats);
1270 }
1271 if (match_sets) {
1272 efree(match_sets);
1273 }
1274 RETURN_FALSE;
1275 }
1276 }
1277
1278 orig_start_offset = start_offset2;
1279 options =
1280 (pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1281 ? 0 : PCRE2_NO_UTF_CHECK;
1282
1283 /* Execute the regular expression. */
1284 #ifdef HAVE_PCRE_JIT_SUPPORT
1285 if ((pce->preg_options & PREG_JIT) && options) {
1286 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1287 PCRE2_NO_UTF_CHECK, match_data, mctx);
1288 } else
1289 #endif
1290 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1291 options, match_data, mctx);
1292
1293 while (1) {
1294 /* If something has matched */
1295 if (count >= 0) {
1296 /* Check for too many substrings condition. */
1297 if (UNEXPECTED(count == 0)) {
1298 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1299 count = num_subpats;
1300 }
1301
1302 matched:
1303 matched++;
1304
1305 offsets = pcre2_get_ovector_pointer(match_data);
1306
1307 /* If subpatterns array has been passed, fill it in with values. */
1308 if (subpats != NULL) {
1309 /* Try to get the list of substrings and display a warning if failed. */
1310 if (offsets[1] < offsets[0]) {
1311 if (subpat_names) {
1312 free_subpats_table(subpat_names, num_subpats);
1313 }
1314 if (match_sets) efree(match_sets);
1315 php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1316 RETURN_FALSE;
1317 }
1318
1319 if (global) { /* global pattern matching */
1320 if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1321 /* For each subpattern, insert it into the appropriate array. */
1322 if (offset_capture) {
1323 for (i = 0; i < count; i++) {
1324 add_offset_pair(
1325 &match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1326 NULL, unmatched_as_null);
1327 }
1328 } else {
1329 for (i = 0; i < count; i++) {
1330 zval val;
1331 populate_match_value(
1332 &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1333 zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
1334 }
1335 }
1336 mark = pcre2_get_mark(match_data);
1337 /* Add MARK, if available */
1338 if (mark) {
1339 if (Z_TYPE(marks) == IS_UNDEF) {
1340 array_init(&marks);
1341 }
1342 add_index_string(&marks, matched - 1, (char *) mark);
1343 }
1344 /*
1345 * If the number of captured subpatterns on this run is
1346 * less than the total possible number, pad the result
1347 * arrays with NULLs or empty strings.
1348 */
1349 if (count < num_subpats) {
1350 for (; i < num_subpats; i++) {
1351 if (offset_capture) {
1352 add_offset_pair(
1353 &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1354 NULL, unmatched_as_null);
1355 } else if (unmatched_as_null) {
1356 add_next_index_null(&match_sets[i]);
1357 } else {
1358 add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1359 }
1360 }
1361 }
1362 } else {
1363 /* Allocate and populate the result set array */
1364 array_init_size(&result_set, count + (mark ? 1 : 0));
1365 mark = pcre2_get_mark(match_data);
1366 populate_subpat_array(
1367 &result_set, subject, offsets, subpat_names,
1368 num_subpats, count, mark, flags);
1369 /* And add it to the output array */
1370 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1371 }
1372 } else { /* single pattern matching */
1373 /* For each subpattern, insert it into the subpatterns array. */
1374 mark = pcre2_get_mark(match_data);
1375 populate_subpat_array(
1376 subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1377 break;
1378 }
1379 }
1380
1381 /* Advance to the next piece. */
1382 start_offset2 = offsets[1];
1383
1384 /* If we have matched an empty string, mimic what Perl's /g options does.
1385 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1386 the match again at the same point. If this fails (picked up above) we
1387 advance to the next character. */
1388 if (start_offset2 == offsets[0]) {
1389 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1390 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1391 if (count >= 0) {
1392 if (global) {
1393 goto matched;
1394 } else {
1395 break;
1396 }
1397 } else if (count == PCRE2_ERROR_NOMATCH) {
1398 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1399 this is not necessarily the end. We need to advance
1400 the start offset, and continue. Fudge the offset values
1401 to achieve this, unless we're already at the end of the string. */
1402 if (start_offset2 < subject_len) {
1403 size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1404
1405 start_offset2 += unit_len;
1406 } else {
1407 break;
1408 }
1409 } else {
1410 goto error;
1411 }
1412 }
1413 } else if (count == PCRE2_ERROR_NOMATCH) {
1414 break;
1415 } else {
1416 error:
1417 pcre_handle_exec_error(count);
1418 break;
1419 }
1420
1421 if (!global) {
1422 break;
1423 }
1424
1425 /* Execute the regular expression. */
1426 #ifdef HAVE_PCRE_JIT_SUPPORT
1427 if ((pce->preg_options & PREG_JIT)) {
1428 if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1429 pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1430 break;
1431 }
1432 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1433 PCRE2_NO_UTF_CHECK, match_data, mctx);
1434 } else
1435 #endif
1436 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1437 PCRE2_NO_UTF_CHECK, match_data, mctx);
1438 }
1439 if (match_data != mdata) {
1440 pcre2_match_data_free(match_data);
1441 }
1442
1443 /* Add the match sets to the output array and clean up */
1444 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1445 if (subpat_names) {
1446 for (i = 0; i < num_subpats; i++) {
1447 if (subpat_names[i]) {
1448 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
1449 Z_ADDREF(match_sets[i]);
1450 }
1451 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1452 }
1453 } else {
1454 for (i = 0; i < num_subpats; i++) {
1455 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1456 }
1457 }
1458 efree(match_sets);
1459
1460 if (Z_TYPE(marks) != IS_UNDEF) {
1461 add_assoc_zval(subpats, "MARK", &marks);
1462 }
1463 }
1464
1465 if (subpat_names) {
1466 free_subpats_table(subpat_names, num_subpats);
1467 }
1468
1469 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1470 /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1471 if ((pce->compile_options & PCRE2_UTF)
1472 && !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1473 GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1474 }
1475
1476 RETVAL_LONG(matched);
1477 } else {
1478 RETVAL_FALSE;
1479 }
1480 }
1481 /* }}} */
1482
1483 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1484 Perform a Perl-style regular expression match */
PHP_FUNCTIONnull1485 static PHP_FUNCTION(preg_match)
1486 {
1487 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1488 }
1489 /* }}} */
1490
1491 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1492 Perform a Perl-style global regular expression match */
PHP_FUNCTIONnull1493 static PHP_FUNCTION(preg_match_all)
1494 {
1495 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1496 }
1497 /* }}} */
1498
1499 /* {{{ preg_get_backref
1500 */
preg_get_backref(char **str, int *backref)1501 static int preg_get_backref(char **str, int *backref)
1502 {
1503 register char in_brace = 0;
1504 register char *walk = *str;
1505
1506 if (walk[1] == 0)
1507 return 0;
1508
1509 if (*walk == '$' && walk[1] == '{') {
1510 in_brace = 1;
1511 walk++;
1512 }
1513 walk++;
1514
1515 if (*walk >= '0' && *walk <= '9') {
1516 *backref = *walk - '0';
1517 walk++;
1518 } else
1519 return 0;
1520
1521 if (*walk && *walk >= '0' && *walk <= '9') {
1522 *backref = *backref * 10 + *walk - '0';
1523 walk++;
1524 }
1525
1526 if (in_brace) {
1527 if (*walk != '}')
1528 return 0;
1529 else
1530 walk++;
1531 }
1532
1533 *str = walk;
1534 return 1;
1535 }
1536 /* }}} */
1537
1538 /* {{{ preg_do_repl_func
1539 */
preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)1540 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1541 {
1542 zend_string *result_str;
1543 zval retval; /* Function return value */
1544 zval arg; /* Argument to pass to function */
1545
1546 array_init_size(&arg, count + (mark ? 1 : 0));
1547 populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1548
1549 fci->retval = &retval;
1550 fci->param_count = 1;
1551 fci->params = &arg;
1552 fci->no_separation = 0;
1553
1554 if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1555 if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1556 result_str = Z_STR(retval);
1557 } else {
1558 result_str = zval_get_string_func(&retval);
1559 zval_ptr_dtor(&retval);
1560 }
1561 } else {
1562 if (!EG(exception)) {
1563 php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1564 }
1565
1566 result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1567 }
1568
1569 zval_ptr_dtor(&arg);
1570
1571 return result_str;
1572 }
1573 /* }}} */
1574
1575 /* {{{ php_pcre_replace
1576 */
php_pcre_replace(zend_string *regex, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)1577 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1578 zend_string *subject_str,
1579 char *subject, size_t subject_len,
1580 zend_string *replace_str,
1581 size_t limit, size_t *replace_count)
1582 {
1583 pcre_cache_entry *pce; /* Compiled regular expression */
1584 zend_string *result; /* Function result */
1585
1586 /* Abort on pending exception, e.g. thrown from __toString(). */
1587 if (UNEXPECTED(EG(exception))) {
1588 return NULL;
1589 }
1590
1591 /* Compile regex or get it from cache. */
1592 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1593 return NULL;
1594 }
1595 pce->refcount++;
1596 result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1597 limit, replace_count);
1598 pce->refcount--;
1599
1600 return result;
1601 }
1602 /* }}} */
1603
1604 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)1605 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1606 {
1607 uint32_t options; /* Execution options */
1608 int count; /* Count of matched subpatterns */
1609 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1610 uint32_t num_subpats; /* Number of captured subpatterns */
1611 size_t new_len; /* Length of needed storage */
1612 size_t alloc_len; /* Actual allocated length */
1613 size_t match_len; /* Length of the current match */
1614 int backref; /* Backreference number */
1615 PCRE2_SIZE start_offset; /* Where the new search starts */
1616 size_t last_end_offset; /* Where the last search ended */
1617 char *walkbuf, /* Location of current replacement in the result */
1618 *walk, /* Used to walk the replacement string */
1619 *match, /* The current match */
1620 *piece, /* The current piece of subject */
1621 *replace_end, /* End of replacement string */
1622 walk_last; /* Last walked character */
1623 size_t result_len; /* Length of result */
1624 zend_string *result; /* Result of replacement */
1625 pcre2_match_data *match_data;
1626
1627 /* Calculate the size of the offsets array, and allocate memory for it. */
1628 num_subpats = pce->capture_count + 1;
1629 alloc_len = 0;
1630 result = NULL;
1631
1632 /* Initialize */
1633 match = NULL;
1634 start_offset = 0;
1635 last_end_offset = 0;
1636 result_len = 0;
1637 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1638
1639 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1640 match_data = mdata;
1641 } else {
1642 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1643 if (!match_data) {
1644 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1645 return NULL;
1646 }
1647 }
1648
1649 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1650
1651 /* Execute the regular expression. */
1652 #ifdef HAVE_PCRE_JIT_SUPPORT
1653 if ((pce->preg_options & PREG_JIT) && options) {
1654 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1655 PCRE2_NO_UTF_CHECK, match_data, mctx);
1656 } else
1657 #endif
1658 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1659 options, match_data, mctx);
1660
1661 while (1) {
1662 piece = subject + last_end_offset;
1663
1664 if (count >= 0 && limit > 0) {
1665 zend_bool simple_string;
1666
1667 /* Check for too many substrings condition. */
1668 if (UNEXPECTED(count == 0)) {
1669 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1670 count = num_subpats;
1671 }
1672
1673 matched:
1674 offsets = pcre2_get_ovector_pointer(match_data);
1675
1676 if (UNEXPECTED(offsets[1] < offsets[0])) {
1677 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1678 if (result) {
1679 zend_string_release_ex(result, 0);
1680 result = NULL;
1681 }
1682 break;
1683 }
1684
1685 if (replace_count) {
1686 ++*replace_count;
1687 }
1688
1689 /* Set the match location in subject */
1690 match = subject + offsets[0];
1691
1692 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1693
1694 walk = ZSTR_VAL(replace_str);
1695 replace_end = walk + ZSTR_LEN(replace_str);
1696 walk_last = 0;
1697 simple_string = 1;
1698 while (walk < replace_end) {
1699 if ('\\' == *walk || '$' == *walk) {
1700 simple_string = 0;
1701 if (walk_last == '\\') {
1702 walk++;
1703 walk_last = 0;
1704 continue;
1705 }
1706 if (preg_get_backref(&walk, &backref)) {
1707 if (backref < count)
1708 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1709 continue;
1710 }
1711 }
1712 new_len++;
1713 walk++;
1714 walk_last = walk[-1];
1715 }
1716
1717 if (new_len >= alloc_len) {
1718 alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1719 if (result == NULL) {
1720 result = zend_string_alloc(alloc_len, 0);
1721 } else {
1722 result = zend_string_extend(result, alloc_len, 0);
1723 }
1724 }
1725
1726 if (match-piece > 0) {
1727 /* copy the part of the string before the match */
1728 memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1729 result_len += (match-piece);
1730 }
1731
1732 if (simple_string) {
1733 /* copy replacement */
1734 memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1735 result_len += ZSTR_LEN(replace_str);
1736 } else {
1737 /* copy replacement and backrefs */
1738 walkbuf = ZSTR_VAL(result) + result_len;
1739
1740 walk = ZSTR_VAL(replace_str);
1741 walk_last = 0;
1742 while (walk < replace_end) {
1743 if ('\\' == *walk || '$' == *walk) {
1744 if (walk_last == '\\') {
1745 *(walkbuf-1) = *walk++;
1746 walk_last = 0;
1747 continue;
1748 }
1749 if (preg_get_backref(&walk, &backref)) {
1750 if (backref < count) {
1751 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1752 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1753 walkbuf += match_len;
1754 }
1755 continue;
1756 }
1757 }
1758 *walkbuf++ = *walk++;
1759 walk_last = walk[-1];
1760 }
1761 *walkbuf = '\0';
1762 /* increment the result length by how much we've added to the string */
1763 result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1764 }
1765
1766 limit--;
1767
1768 /* Advance to the next piece. */
1769 start_offset = last_end_offset = offsets[1];
1770
1771 /* If we have matched an empty string, mimic what Perl's /g options does.
1772 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1773 the match again at the same point. If this fails (picked up above) we
1774 advance to the next character. */
1775 if (start_offset == offsets[0]) {
1776 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1777 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1778
1779 piece = subject + start_offset;
1780 if (count >= 0 && limit > 0) {
1781 goto matched;
1782 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1783 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1784 this is not necessarily the end. We need to advance
1785 the start offset, and continue. Fudge the offset values
1786 to achieve this, unless we're already at the end of the string. */
1787 if (start_offset < subject_len) {
1788 size_t unit_len = calculate_unit_length(pce, piece);
1789 start_offset += unit_len;
1790 } else {
1791 goto not_matched;
1792 }
1793 } else {
1794 goto error;
1795 }
1796 }
1797
1798 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1799 not_matched:
1800 if (!result && subject_str) {
1801 result = zend_string_copy(subject_str);
1802 break;
1803 }
1804 new_len = result_len + subject_len - last_end_offset;
1805 if (new_len >= alloc_len) {
1806 alloc_len = new_len; /* now we know exactly how long it is */
1807 if (NULL != result) {
1808 result = zend_string_realloc(result, alloc_len, 0);
1809 } else {
1810 result = zend_string_alloc(alloc_len, 0);
1811 }
1812 }
1813 /* stick that last bit of string on our output */
1814 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1815 result_len += subject_len - last_end_offset;
1816 ZSTR_VAL(result)[result_len] = '\0';
1817 ZSTR_LEN(result) = result_len;
1818 break;
1819 } else {
1820 error:
1821 pcre_handle_exec_error(count);
1822 if (result) {
1823 zend_string_release_ex(result, 0);
1824 result = NULL;
1825 }
1826 break;
1827 }
1828
1829 #ifdef HAVE_PCRE_JIT_SUPPORT
1830 if (pce->preg_options & PREG_JIT) {
1831 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1832 PCRE2_NO_UTF_CHECK, match_data, mctx);
1833 } else
1834 #endif
1835 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1836 PCRE2_NO_UTF_CHECK, match_data, mctx);
1837 }
1838 if (match_data != mdata) {
1839 pcre2_match_data_free(match_data);
1840 }
1841
1842 return result;
1843 }
1844 /* }}} */
1845
1846 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)1847 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1848 {
1849 uint32_t options; /* Execution options */
1850 int count; /* Count of matched subpatterns */
1851 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
1852 zend_string **subpat_names; /* Array for named subpatterns */
1853 uint32_t num_subpats; /* Number of captured subpatterns */
1854 size_t new_len; /* Length of needed storage */
1855 size_t alloc_len; /* Actual allocated length */
1856 PCRE2_SIZE start_offset; /* Where the new search starts */
1857 size_t last_end_offset; /* Where the last search ended */
1858 char *match, /* The current match */
1859 *piece; /* The current piece of subject */
1860 size_t result_len; /* Length of result */
1861 zend_string *result; /* Result of replacement */
1862 zend_string *eval_result; /* Result of custom function */
1863 pcre2_match_data *match_data;
1864 zend_bool old_mdata_used;
1865
1866 /* Calculate the size of the offsets array, and allocate memory for it. */
1867 num_subpats = pce->capture_count + 1;
1868
1869 /*
1870 * Build a mapping from subpattern numbers to their names. We will
1871 * allocate the table only if there are any named subpatterns.
1872 */
1873 subpat_names = NULL;
1874 if (UNEXPECTED(pce->name_count > 0)) {
1875 subpat_names = make_subpats_table(num_subpats, pce);
1876 if (!subpat_names) {
1877 return NULL;
1878 }
1879 }
1880
1881 alloc_len = 0;
1882 result = NULL;
1883
1884 /* Initialize */
1885 match = NULL;
1886 start_offset = 0;
1887 last_end_offset = 0;
1888 result_len = 0;
1889 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1890
1891 old_mdata_used = mdata_used;
1892 if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1893 mdata_used = 1;
1894 match_data = mdata;
1895 } else {
1896 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1897 if (!match_data) {
1898 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1899 if (subpat_names) {
1900 free_subpats_table(subpat_names, num_subpats);
1901 }
1902 mdata_used = old_mdata_used;
1903 return NULL;
1904 }
1905 }
1906
1907 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1908
1909 /* Execute the regular expression. */
1910 #ifdef HAVE_PCRE_JIT_SUPPORT
1911 if ((pce->preg_options & PREG_JIT) && options) {
1912 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1913 PCRE2_NO_UTF_CHECK, match_data, mctx);
1914 } else
1915 #endif
1916 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1917 options, match_data, mctx);
1918
1919 while (1) {
1920 piece = subject + last_end_offset;
1921
1922 if (count >= 0 && limit) {
1923 /* Check for too many substrings condition. */
1924 if (UNEXPECTED(count == 0)) {
1925 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1926 count = num_subpats;
1927 }
1928
1929 matched:
1930 offsets = pcre2_get_ovector_pointer(match_data);
1931
1932 if (UNEXPECTED(offsets[1] < offsets[0])) {
1933 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1934 if (result) {
1935 zend_string_release_ex(result, 0);
1936 result = NULL;
1937 }
1938 break;
1939 }
1940
1941 if (replace_count) {
1942 ++*replace_count;
1943 }
1944
1945 /* Set the match location in subject */
1946 match = subject + offsets[0];
1947
1948 new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1949
1950 /* Use custom function to get replacement string and its length. */
1951 eval_result = preg_do_repl_func(
1952 fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1953 pcre2_get_mark(match_data), flags);
1954
1955 ZEND_ASSERT(eval_result);
1956 new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len);
1957 if (new_len >= alloc_len) {
1958 alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1959 if (result == NULL) {
1960 result = zend_string_alloc(alloc_len, 0);
1961 } else {
1962 result = zend_string_extend(result, alloc_len, 0);
1963 }
1964 }
1965
1966 if (match-piece > 0) {
1967 /* copy the part of the string before the match */
1968 memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1969 result_len += (match-piece);
1970 }
1971
1972 /* If using custom function, copy result to the buffer and clean up. */
1973 memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1974 result_len += ZSTR_LEN(eval_result);
1975 zend_string_release_ex(eval_result, 0);
1976
1977 limit--;
1978
1979 /* Advance to the next piece. */
1980 start_offset = last_end_offset = offsets[1];
1981
1982 /* If we have matched an empty string, mimic what Perl's /g options does.
1983 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1984 the match again at the same point. If this fails (picked up above) we
1985 advance to the next character. */
1986 if (start_offset == offsets[0]) {
1987 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1988 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1989
1990 piece = subject + start_offset;
1991 if (count >= 0 && limit) {
1992 goto matched;
1993 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1994 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1995 this is not necessarily the end. We need to advance
1996 the start offset, and continue. Fudge the offset values
1997 to achieve this, unless we're already at the end of the string. */
1998 if (start_offset < subject_len) {
1999 size_t unit_len = calculate_unit_length(pce, piece);
2000 start_offset += unit_len;
2001 } else {
2002 goto not_matched;
2003 }
2004 } else {
2005 goto error;
2006 }
2007 }
2008
2009 } else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2010 not_matched:
2011 if (!result && subject_str) {
2012 result = zend_string_copy(subject_str);
2013 break;
2014 }
2015 new_len = result_len + subject_len - last_end_offset;
2016 if (new_len >= alloc_len) {
2017 alloc_len = new_len; /* now we know exactly how long it is */
2018 if (NULL != result) {
2019 result = zend_string_realloc(result, alloc_len, 0);
2020 } else {
2021 result = zend_string_alloc(alloc_len, 0);
2022 }
2023 }
2024 /* stick that last bit of string on our output */
2025 memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2026 result_len += subject_len - last_end_offset;
2027 ZSTR_VAL(result)[result_len] = '\0';
2028 ZSTR_LEN(result) = result_len;
2029 break;
2030 } else {
2031 error:
2032 pcre_handle_exec_error(count);
2033 if (result) {
2034 zend_string_release_ex(result, 0);
2035 result = NULL;
2036 }
2037 break;
2038 }
2039 #ifdef HAVE_PCRE_JIT_SUPPORT
2040 if ((pce->preg_options & PREG_JIT)) {
2041 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2042 PCRE2_NO_UTF_CHECK, match_data, mctx);
2043 } else
2044 #endif
2045 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2046 PCRE2_NO_UTF_CHECK, match_data, mctx);
2047 }
2048 if (match_data != mdata) {
2049 pcre2_match_data_free(match_data);
2050 }
2051 mdata_used = old_mdata_used;
2052
2053 if (UNEXPECTED(subpat_names)) {
2054 free_subpats_table(subpat_names, num_subpats);
2055 }
2056
2057 return result;
2058 }
2059 /* }}} */
2060
2061 /* {{{ php_pcre_replace_func
2062 */
php_pcre_replace_func(zend_string *regex, zend_string *subject_str, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)2063 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2064 zend_string *subject_str,
2065 zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2066 size_t limit, size_t *replace_count, zend_long flags)
2067 {
2068 pcre_cache_entry *pce; /* Compiled regular expression */
2069 zend_string *result; /* Function result */
2070
2071 /* Compile regex or get it from cache. */
2072 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2073 return NULL;
2074 }
2075 pce->refcount++;
2076 result = php_pcre_replace_func_impl(
2077 pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2078 limit, replace_count, flags);
2079 pce->refcount--;
2080
2081 return result;
2082 }
2083 /* }}} */
2084
2085 /* {{{ php_pcre_replace_array
2086 */
php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, size_t limit, size_t *replace_count)2087 static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, size_t limit, size_t *replace_count)
2088 {
2089 zval *regex_entry;
2090 zend_string *result;
2091 zend_string *replace_str, *tmp_replace_str;
2092
2093 if (Z_TYPE_P(replace) == IS_ARRAY) {
2094 uint32_t replace_idx = 0;
2095 HashTable *replace_ht = Z_ARRVAL_P(replace);
2096
2097 /* For each entry in the regex array, get the entry */
2098 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2099 /* Make sure we're dealing with strings. */
2100 zend_string *tmp_regex_str;
2101 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2102 zval *zv;
2103
2104 /* Get current entry */
2105 while (1) {
2106 if (replace_idx == replace_ht->nNumUsed) {
2107 replace_str = ZSTR_EMPTY_ALLOC();
2108 tmp_replace_str = NULL;
2109 break;
2110 }
2111 zv = &replace_ht->arData[replace_idx].val;
2112 replace_idx++;
2113 if (Z_TYPE_P(zv) != IS_UNDEF) {
2114 replace_str = zval_get_tmp_string(zv, &tmp_replace_str);
2115 break;
2116 }
2117 }
2118
2119 /* Do the actual replacement and put the result back into subject_str
2120 for further replacements. */
2121 result = php_pcre_replace(regex_str,
2122 subject_str,
2123 ZSTR_VAL(subject_str),
2124 ZSTR_LEN(subject_str),
2125 replace_str,
2126 limit,
2127 replace_count);
2128 zend_tmp_string_release(tmp_replace_str);
2129 zend_tmp_string_release(tmp_regex_str);
2130 zend_string_release_ex(subject_str, 0);
2131 subject_str = result;
2132 if (UNEXPECTED(result == NULL)) {
2133 break;
2134 }
2135 } ZEND_HASH_FOREACH_END();
2136
2137 } else {
2138 replace_str = Z_STR_P(replace);
2139
2140 /* For each entry in the regex array, get the entry */
2141 ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2142 /* Make sure we're dealing with strings. */
2143 zend_string *tmp_regex_str;
2144 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2145
2146 /* Do the actual replacement and put the result back into subject_str
2147 for further replacements. */
2148 result = php_pcre_replace(regex_str,
2149 subject_str,
2150 ZSTR_VAL(subject_str),
2151 ZSTR_LEN(subject_str),
2152 replace_str,
2153 limit,
2154 replace_count);
2155 zend_tmp_string_release(tmp_regex_str);
2156 zend_string_release_ex(subject_str, 0);
2157 subject_str = result;
2158
2159 if (UNEXPECTED(result == NULL)) {
2160 break;
2161 }
2162 } ZEND_HASH_FOREACH_END();
2163 }
2164
2165 return subject_str;
2166 }
2167 /* }}} */
2168
2169 /* {{{ php_replace_in_subject
2170 */
php_replace_in_subject(zval *regex, zval *replace, zval *subject, size_t limit, size_t *replace_count)2171 static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, size_t limit, size_t *replace_count)
2172 {
2173 zend_string *result;
2174 zend_string *subject_str = zval_get_string(subject);
2175
2176 if (Z_TYPE_P(regex) != IS_ARRAY) {
2177 result = php_pcre_replace(Z_STR_P(regex),
2178 subject_str,
2179 ZSTR_VAL(subject_str),
2180 ZSTR_LEN(subject_str),
2181 Z_STR_P(replace),
2182 limit,
2183 replace_count);
2184 zend_string_release_ex(subject_str, 0);
2185 } else {
2186 result = php_pcre_replace_array(Z_ARRVAL_P(regex),
2187 replace,
2188 subject_str,
2189 limit,
2190 replace_count);
2191 }
2192 return result;
2193 }
2194 /* }}} */
2195
2196 /* {{{ php_replace_in_subject_func
2197 */
php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count, zend_long flags)2198 static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count, zend_long flags)
2199 {
2200 zend_string *result;
2201 zend_string *subject_str = zval_get_string(subject);
2202
2203 if (Z_TYPE_P(regex) != IS_ARRAY) {
2204 result = php_pcre_replace_func(
2205 Z_STR_P(regex), subject_str, fci, fcc, limit, replace_count, flags);
2206 zend_string_release_ex(subject_str, 0);
2207 return result;
2208 } else {
2209 zval *regex_entry;
2210
2211 /* If regex is an array */
2212
2213 /* For each entry in the regex array, get the entry */
2214 ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
2215 /* Make sure we're dealing with strings. */
2216 zend_string *tmp_regex_str;
2217 zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2218
2219 /* Do the actual replacement and put the result back into subject_str
2220 for further replacements. */
2221 result = php_pcre_replace_func(
2222 regex_str, subject_str, fci, fcc, limit, replace_count, flags);
2223 zend_tmp_string_release(tmp_regex_str);
2224 zend_string_release_ex(subject_str, 0);
2225 subject_str = result;
2226 if (UNEXPECTED(result == NULL)) {
2227 break;
2228 }
2229 } ZEND_HASH_FOREACH_END();
2230
2231 return subject_str;
2232 }
2233 }
2234 /* }}} */
2235
2236 /* {{{ preg_replace_func_impl
2237 */
preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val, zend_long flags)2238 static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val, zend_long flags)
2239 {
2240 zend_string *result;
2241 size_t replace_count = 0;
2242
2243 if (Z_TYPE_P(regex) != IS_ARRAY) {
2244 convert_to_string_ex(regex);
2245 }
2246
2247 if (Z_TYPE_P(subject) != IS_ARRAY) {
2248 result = php_replace_in_subject_func(
2249 regex, fci, fcc, subject, limit_val, &replace_count, flags);
2250 if (result != NULL) {
2251 RETVAL_STR(result);
2252 } else {
2253 RETVAL_NULL();
2254 }
2255 } else {
2256 /* if subject is an array */
2257 zval *subject_entry, zv;
2258 zend_string *string_key;
2259 zend_ulong num_key;
2260
2261 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2262
2263 /* For each subject entry, convert it to string, then perform replacement
2264 and add the result to the return_value array. */
2265 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2266 result = php_replace_in_subject_func(
2267 regex, fci, fcc, subject_entry, limit_val, &replace_count, flags);
2268 if (result != NULL) {
2269 /* Add to return array */
2270 ZVAL_STR(&zv, result);
2271 if (string_key) {
2272 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2273 } else {
2274 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2275 }
2276 }
2277 } ZEND_HASH_FOREACH_END();
2278 }
2279
2280 return replace_count;
2281 }
2282 /* }}} */
2283
2284 /* {{{ preg_replace_common
2285 */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)2286 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
2287 {
2288 zval *regex, *replace, *subject, *zcount = NULL;
2289 zend_long limit = -1;
2290 size_t replace_count = 0;
2291 zend_string *result;
2292 size_t old_replace_count;
2293
2294 /* Get function parameters and do error-checking. */
2295 ZEND_PARSE_PARAMETERS_START(3, 5)
2296 Z_PARAM_ZVAL(regex)
2297 Z_PARAM_ZVAL(replace)
2298 Z_PARAM_ZVAL(subject)
2299 Z_PARAM_OPTIONAL
2300 Z_PARAM_LONG(limit)
2301 Z_PARAM_ZVAL(zcount)
2302 ZEND_PARSE_PARAMETERS_END();
2303
2304 if (Z_TYPE_P(replace) != IS_ARRAY) {
2305 convert_to_string_ex(replace);
2306 if (Z_TYPE_P(regex) != IS_ARRAY) {
2307 convert_to_string_ex(regex);
2308 }
2309 } else {
2310 if (Z_TYPE_P(regex) != IS_ARRAY) {
2311 php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
2312 RETURN_FALSE;
2313 }
2314 }
2315
2316 if (Z_TYPE_P(subject) != IS_ARRAY) {
2317 old_replace_count = replace_count;
2318 result = php_replace_in_subject(regex,
2319 replace,
2320 subject,
2321 limit,
2322 &replace_count);
2323 if (result != NULL) {
2324 if (!is_filter || replace_count > old_replace_count) {
2325 RETVAL_STR(result);
2326 } else {
2327 zend_string_release_ex(result, 0);
2328 RETVAL_NULL();
2329 }
2330 } else {
2331 RETVAL_NULL();
2332 }
2333 } else {
2334 /* if subject is an array */
2335 zval *subject_entry, zv;
2336 zend_string *string_key;
2337 zend_ulong num_key;
2338
2339 array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2340
2341 /* For each subject entry, convert it to string, then perform replacement
2342 and add the result to the return_value array. */
2343 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2344 old_replace_count = replace_count;
2345 result = php_replace_in_subject(regex,
2346 replace,
2347 subject_entry,
2348 limit,
2349 &replace_count);
2350 if (result != NULL) {
2351 if (!is_filter || replace_count > old_replace_count) {
2352 /* Add to return array */
2353 ZVAL_STR(&zv, result);
2354 if (string_key) {
2355 zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2356 } else {
2357 zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2358 }
2359 } else {
2360 zend_string_release_ex(result, 0);
2361 }
2362 }
2363 } ZEND_HASH_FOREACH_END();
2364 }
2365
2366 if (zcount) {
2367 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2368 }
2369 }
2370 /* }}} */
2371
2372 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2373 Perform Perl-style regular expression replacement. */
PHP_FUNCTIONnull2374 static PHP_FUNCTION(preg_replace)
2375 {
2376 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
2377 }
2378 /* }}} */
2379
2380 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
2381 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTIONnull2382 static PHP_FUNCTION(preg_replace_callback)
2383 {
2384 zval *regex, *replace, *subject, *zcount = NULL;
2385 zend_long limit = -1, flags = 0;
2386 size_t replace_count;
2387 zend_fcall_info fci;
2388 zend_fcall_info_cache fcc;
2389
2390 /* Get function parameters and do error-checking. */
2391 ZEND_PARSE_PARAMETERS_START(3, 6)
2392 Z_PARAM_ZVAL(regex)
2393 Z_PARAM_ZVAL(replace)
2394 Z_PARAM_ZVAL(subject)
2395 Z_PARAM_OPTIONAL
2396 Z_PARAM_LONG(limit)
2397 Z_PARAM_ZVAL(zcount)
2398 Z_PARAM_LONG(flags)
2399 ZEND_PARSE_PARAMETERS_END();
2400
2401 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2402 zend_string *callback_name = zend_get_callable_name(replace);
2403 php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
2404 zend_string_release_ex(callback_name, 0);
2405 ZVAL_STR(return_value, zval_get_string(subject));
2406 return;
2407 }
2408
2409 fci.size = sizeof(fci);
2410 fci.object = NULL;
2411 ZVAL_COPY_VALUE(&fci.function_name, replace);
2412
2413 replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit, flags);
2414 if (zcount) {
2415 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2416 }
2417 }
2418 /* }}} */
2419
2420 /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
2421 Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTIONnull2422 static PHP_FUNCTION(preg_replace_callback_array)
2423 {
2424 zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
2425 zend_long limit = -1, flags = 0;
2426 zend_string *str_idx;
2427 size_t replace_count = 0;
2428 zend_fcall_info fci;
2429 zend_fcall_info_cache fcc;
2430
2431 /* Get function parameters and do error-checking. */
2432 ZEND_PARSE_PARAMETERS_START(2, 5)
2433 Z_PARAM_ARRAY(pattern)
2434 Z_PARAM_ZVAL(subject)
2435 Z_PARAM_OPTIONAL
2436 Z_PARAM_LONG(limit)
2437 Z_PARAM_ZVAL(zcount)
2438 Z_PARAM_LONG(flags)
2439 ZEND_PARSE_PARAMETERS_END();
2440
2441 fci.size = sizeof(fci);
2442 fci.object = NULL;
2443
2444 ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
2445 if (str_idx) {
2446 ZVAL_STR_COPY(®ex, str_idx);
2447 } else {
2448 php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2449 zval_ptr_dtor(return_value);
2450 RETURN_NULL();
2451 }
2452
2453 if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2454 zend_string *callback_name = zend_get_callable_name(replace);
2455 php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
2456 zend_string_release_ex(callback_name, 0);
2457 zval_ptr_dtor(®ex);
2458 zval_ptr_dtor(return_value);
2459 ZVAL_COPY(return_value, subject);
2460 return;
2461 }
2462
2463 ZVAL_COPY_VALUE(&fci.function_name, replace);
2464
2465 replace_count += preg_replace_func_impl(&zv, ®ex, &fci, &fcc, subject, limit, flags);
2466 if (subject != return_value) {
2467 subject = return_value;
2468 } else {
2469 zval_ptr_dtor(return_value);
2470 }
2471
2472 zval_ptr_dtor(®ex);
2473
2474 ZVAL_COPY_VALUE(return_value, &zv);
2475
2476 if (UNEXPECTED(EG(exception))) {
2477 zval_ptr_dtor(return_value);
2478 RETURN_NULL();
2479 }
2480 } ZEND_HASH_FOREACH_END();
2481
2482 if (zcount) {
2483 ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2484 }
2485 }
2486 /* }}} */
2487
2488 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2489 Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTIONnull2490 static PHP_FUNCTION(preg_filter)
2491 {
2492 preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
2493 }
2494 /* }}} */
2495
2496 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
2497 Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTIONnull2498 static PHP_FUNCTION(preg_split)
2499 {
2500 zend_string *regex; /* Regular expression */
2501 zend_string *subject; /* String to match against */
2502 zend_long limit_val = -1;/* Integer value of limit */
2503 zend_long flags = 0; /* Match control flags */
2504 pcre_cache_entry *pce; /* Compiled regular expression */
2505
2506 /* Get function parameters and do error checking */
2507 ZEND_PARSE_PARAMETERS_START(2, 4)
2508 Z_PARAM_STR(regex)
2509 Z_PARAM_STR(subject)
2510 Z_PARAM_OPTIONAL
2511 Z_PARAM_LONG(limit_val)
2512 Z_PARAM_LONG(flags)
2513 ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
2514
2515 /* Compile regex or get it from cache. */
2516 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2517 RETURN_FALSE;
2518 }
2519
2520 pce->refcount++;
2521 php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2522 pce->refcount--;
2523 }
2524 /* }}} */
2525
2526 /* {{{ php_pcre_split
2527 */
php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value, zend_long limit_val, zend_long flags)2528 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2529 zend_long limit_val, zend_long flags)
2530 {
2531 PCRE2_SIZE *offsets; /* Array of subpattern offsets */
2532 uint32_t options; /* Execution options */
2533 int count; /* Count of matched subpatterns */
2534 PCRE2_SIZE start_offset; /* Where the new search starts */
2535 PCRE2_SIZE last_match_offset; /* Location of last match */
2536 uint32_t no_empty; /* If NO_EMPTY flag is set */
2537 uint32_t delim_capture; /* If delimiters should be captured */
2538 uint32_t offset_capture; /* If offsets should be captured */
2539 uint32_t num_subpats; /* Number of captured subpatterns */
2540 zval tmp;
2541 pcre2_match_data *match_data;
2542 char *subject = ZSTR_VAL(subject_str);
2543
2544 no_empty = flags & PREG_SPLIT_NO_EMPTY;
2545 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2546 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2547
2548 /* Initialize return value */
2549 array_init(return_value);
2550
2551 /* Calculate the size of the offsets array, and allocate memory for it. */
2552 num_subpats = pce->capture_count + 1;
2553
2554 /* Start at the beginning of the string */
2555 start_offset = 0;
2556 last_match_offset = 0;
2557 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2558
2559 if (limit_val == -1) {
2560 /* pass */
2561 } else if (limit_val == 0) {
2562 limit_val = -1;
2563 } else if (limit_val <= 1) {
2564 goto last;
2565 }
2566
2567 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2568 match_data = mdata;
2569 } else {
2570 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2571 if (!match_data) {
2572 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2573 zval_ptr_dtor(return_value);
2574 RETURN_FALSE;
2575 }
2576 }
2577
2578 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2579
2580 #ifdef HAVE_PCRE_JIT_SUPPORT
2581 if ((pce->preg_options & PREG_JIT) && options) {
2582 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2583 PCRE2_NO_UTF_CHECK, match_data, mctx);
2584 } else
2585 #endif
2586 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2587 options, match_data, mctx);
2588
2589 while (1) {
2590 /* If something matched */
2591 if (count >= 0) {
2592 /* Check for too many substrings condition. */
2593 if (UNEXPECTED(count == 0)) {
2594 php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2595 count = num_subpats;
2596 }
2597
2598 matched:
2599 offsets = pcre2_get_ovector_pointer(match_data);
2600
2601 if (UNEXPECTED(offsets[1] < offsets[0])) {
2602 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2603 break;
2604 }
2605
2606 if (!no_empty || offsets[0] != last_match_offset) {
2607 if (offset_capture) {
2608 /* Add (match, offset) pair to the return value */
2609 add_offset_pair(
2610 return_value, subject, last_match_offset, offsets[0],
2611 NULL, 0);
2612 } else {
2613 /* Add the piece to the return value */
2614 populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2615 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2616 }
2617
2618 /* One less left to do */
2619 if (limit_val != -1)
2620 limit_val--;
2621 }
2622
2623 if (delim_capture) {
2624 size_t i;
2625 for (i = 1; i < count; i++) {
2626 /* If we have matched a delimiter */
2627 if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2628 if (offset_capture) {
2629 add_offset_pair(
2630 return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2631 } else {
2632 populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2633 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2634 }
2635 }
2636 }
2637 }
2638
2639 /* Advance to the position right after the last full match */
2640 start_offset = last_match_offset = offsets[1];
2641
2642 /* If we have matched an empty string, mimic what Perl's /g options does.
2643 This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2644 the match again at the same point. If this fails (picked up above) we
2645 advance to the next character. */
2646 if (start_offset == offsets[0]) {
2647 /* Get next piece if no limit or limit not yet reached and something matched*/
2648 if (limit_val != -1 && limit_val <= 1) {
2649 break;
2650 }
2651 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2652 PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2653 if (count >= 0) {
2654 goto matched;
2655 } else if (count == PCRE2_ERROR_NOMATCH) {
2656 /* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2657 this is not necessarily the end. We need to advance
2658 the start offset, and continue. Fudge the offset values
2659 to achieve this, unless we're already at the end of the string. */
2660 if (start_offset < ZSTR_LEN(subject_str)) {
2661 start_offset += calculate_unit_length(pce, subject + start_offset);
2662 } else {
2663 break;
2664 }
2665 } else {
2666 goto error;
2667 }
2668 }
2669
2670 } else if (count == PCRE2_ERROR_NOMATCH) {
2671 break;
2672 } else {
2673 error:
2674 pcre_handle_exec_error(count);
2675 break;
2676 }
2677
2678 /* Get next piece if no limit or limit not yet reached and something matched*/
2679 if (limit_val != -1 && limit_val <= 1) {
2680 break;
2681 }
2682
2683 #ifdef HAVE_PCRE_JIT_SUPPORT
2684 if (pce->preg_options & PREG_JIT) {
2685 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2686 PCRE2_NO_UTF_CHECK, match_data, mctx);
2687 } else
2688 #endif
2689 count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2690 PCRE2_NO_UTF_CHECK, match_data, mctx);
2691 }
2692 if (match_data != mdata) {
2693 pcre2_match_data_free(match_data);
2694 }
2695
2696 if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2697 zval_ptr_dtor(return_value);
2698 RETURN_FALSE;
2699 }
2700
2701 last:
2702 start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2703
2704 if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2705 if (offset_capture) {
2706 /* Add the last (match, offset) pair to the return value */
2707 add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2708 } else {
2709 /* Add the last piece to the return value */
2710 if (start_offset == 0) {
2711 ZVAL_STR_COPY(&tmp, subject_str);
2712 } else {
2713 populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2714 }
2715 zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2716 }
2717 }
2718 }
2719 /* }}} */
2720
2721 /* {{{ proto string preg_quote(string str [, string delim_char])
2722 Quote regular expression characters plus an optional character */
PHP_FUNCTIONnull2723 static PHP_FUNCTION(preg_quote)
2724 {
2725 zend_string *str; /* Input string argument */
2726 zend_string *delim = NULL; /* Additional delimiter argument */
2727 char *in_str; /* Input string */
2728 char *in_str_end; /* End of the input string */
2729 zend_string *out_str; /* Output string with quoted characters */
2730 size_t extra_len; /* Number of additional characters */
2731 char *p, /* Iterator for input string */
2732 *q, /* Iterator for output string */
2733 delim_char = '\0', /* Delimiter character to be quoted */
2734 c; /* Current character */
2735
2736 /* Get the arguments and check for errors */
2737 ZEND_PARSE_PARAMETERS_START(1, 2)
2738 Z_PARAM_STR(str)
2739 Z_PARAM_OPTIONAL
2740 Z_PARAM_STR_EX(delim, 1, 0)
2741 ZEND_PARSE_PARAMETERS_END();
2742
2743 /* Nothing to do if we got an empty string */
2744 if (ZSTR_LEN(str) == 0) {
2745 RETURN_EMPTY_STRING();
2746 }
2747
2748 in_str = ZSTR_VAL(str);
2749 in_str_end = in_str + ZSTR_LEN(str);
2750
2751 if (delim) {
2752 delim_char = ZSTR_VAL(delim)[0];
2753 }
2754
2755 /* Go through the string and quote necessary characters */
2756 extra_len = 0;
2757 p = in_str;
2758 do {
2759 c = *p;
2760 switch(c) {
2761 case '.':
2762 case '\\':
2763 case '+':
2764 case '*':
2765 case '?':
2766 case '[':
2767 case '^':
2768 case ']':
2769 case '$':
2770 case '(':
2771 case ')':
2772 case '{':
2773 case '}':
2774 case '=':
2775 case '!':
2776 case '>':
2777 case '<':
2778 case '|':
2779 case ':':
2780 case '-':
2781 case '#':
2782 extra_len++;
2783 break;
2784
2785 case '\0':
2786 extra_len+=3;
2787 break;
2788
2789 default:
2790 if (c == delim_char) {
2791 extra_len++;
2792 }
2793 break;
2794 }
2795 p++;
2796 } while (p != in_str_end);
2797
2798 if (extra_len == 0) {
2799 RETURN_STR_COPY(str);
2800 }
2801
2802 /* Allocate enough memory so that even if each character
2803 is quoted, we won't run out of room */
2804 out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2805 q = ZSTR_VAL(out_str);
2806 p = in_str;
2807
2808 do {
2809 c = *p;
2810 switch(c) {
2811 case '.':
2812 case '\\':
2813 case '+':
2814 case '*':
2815 case '?':
2816 case '[':
2817 case '^':
2818 case ']':
2819 case '$':
2820 case '(':
2821 case ')':
2822 case '{':
2823 case '}':
2824 case '=':
2825 case '!':
2826 case '>':
2827 case '<':
2828 case '|':
2829 case ':':
2830 case '-':
2831 case '#':
2832 *q++ = '\\';
2833 *q++ = c;
2834 break;
2835
2836 case '\0':
2837 *q++ = '\\';
2838 *q++ = '0';
2839 *q++ = '0';
2840 *q++ = '0';
2841 break;
2842
2843 default:
2844 if (c == delim_char) {
2845 *q++ = '\\';
2846 }
2847 *q++ = c;
2848 break;
2849 }
2850 p++;
2851 } while (p != in_str_end);
2852 *q = '\0';
2853
2854 RETURN_NEW_STR(out_str);
2855 }
2856 /* }}} */
2857
2858 /* {{{ proto array preg_grep(string regex, array input [, int flags])
2859 Searches array and returns entries which match regex */
PHP_FUNCTIONnull2860 static PHP_FUNCTION(preg_grep)
2861 {
2862 zend_string *regex; /* Regular expression */
2863 zval *input; /* Input array */
2864 zend_long flags = 0; /* Match control flags */
2865 pcre_cache_entry *pce; /* Compiled regular expression */
2866
2867 /* Get arguments and do error checking */
2868 ZEND_PARSE_PARAMETERS_START(2, 3)
2869 Z_PARAM_STR(regex)
2870 Z_PARAM_ARRAY(input)
2871 Z_PARAM_OPTIONAL
2872 Z_PARAM_LONG(flags)
2873 ZEND_PARSE_PARAMETERS_END();
2874
2875 /* Compile regex or get it from cache. */
2876 if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2877 RETURN_FALSE;
2878 }
2879
2880 pce->refcount++;
2881 php_pcre_grep_impl(pce, input, return_value, flags);
2882 pce->refcount--;
2883 }
2884 /* }}} */
2885
php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags)2886 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2887 {
2888 zval *entry; /* An entry in the input array */
2889 uint32_t num_subpats; /* Number of captured subpatterns */
2890 int count; /* Count of matched subpatterns */
2891 uint32_t options; /* Execution options */
2892 zend_string *string_key;
2893 zend_ulong num_key;
2894 zend_bool invert; /* Whether to return non-matching
2895 entries */
2896 pcre2_match_data *match_data;
2897 invert = flags & PREG_GREP_INVERT ? 1 : 0;
2898
2899 /* Calculate the size of the offsets array, and allocate memory for it. */
2900 num_subpats = pce->capture_count + 1;
2901
2902 /* Initialize return array */
2903 array_init(return_value);
2904
2905 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2906
2907 if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2908 match_data = mdata;
2909 } else {
2910 match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2911 if (!match_data) {
2912 PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2913 return;
2914 }
2915 }
2916
2917 options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2918
2919 /* Go through the input array */
2920 ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2921 zend_string *tmp_subject_str;
2922 zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2923
2924 /* Perform the match */
2925 #ifdef HAVE_PCRE_JIT_SUPPORT
2926 if ((pce->preg_options & PREG_JIT) && options) {
2927 count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2928 PCRE2_NO_UTF_CHECK, match_data, mctx);
2929 } else
2930 #endif
2931 count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2932 options, match_data, mctx);
2933
2934 /* If the entry fits our requirements */
2935 if (count >= 0) {
2936 /* Check for too many substrings condition. */
2937 if (UNEXPECTED(count == 0)) {
2938 php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2939 }
2940 if (!invert) {
2941 Z_TRY_ADDREF_P(entry);
2942
2943 /* Add to return array */
2944 if (string_key) {
2945 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2946 } else {
2947 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2948 }
2949 }
2950 } else if (count == PCRE2_ERROR_NOMATCH) {
2951 if (invert) {
2952 Z_TRY_ADDREF_P(entry);
2953
2954 /* Add to return array */
2955 if (string_key) {
2956 zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2957 } else {
2958 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2959 }
2960 }
2961 } else {
2962 pcre_handle_exec_error(count);
2963 zend_tmp_string_release(tmp_subject_str);
2964 break;
2965 }
2966
2967 zend_tmp_string_release(tmp_subject_str);
2968 } ZEND_HASH_FOREACH_END();
2969 if (match_data != mdata) {
2970 pcre2_match_data_free(match_data);
2971 }
2972 }
2973 /* }}} */
2974
2975 /* {{{ proto int preg_last_error()
2976 Returns the error code of the last regexp execution. */
PHP_FUNCTIONnull2977 static PHP_FUNCTION(preg_last_error)
2978 {
2979 ZEND_PARSE_PARAMETERS_NONE();
2980
2981 RETURN_LONG(PCRE_G(error_code));
2982 }
2983 /* }}} */
2984
2985 /* {{{ module definition structures */
2986
2987 /* {{{ arginfo */
2988 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2989 ZEND_ARG_INFO(0, pattern)
2990 ZEND_ARG_INFO(0, subject)
2991 ZEND_ARG_INFO(1, subpatterns) /* array */
2992 ZEND_ARG_INFO(0, flags)
2993 ZEND_ARG_INFO(0, offset)
2994 ZEND_END_ARG_INFO()
2995
2996 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2997 ZEND_ARG_INFO(0, pattern)
2998 ZEND_ARG_INFO(0, subject)
2999 ZEND_ARG_INFO(1, subpatterns) /* array */
3000 ZEND_ARG_INFO(0, flags)
3001 ZEND_ARG_INFO(0, offset)
3002 ZEND_END_ARG_INFO()
3003
3004 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
3005 ZEND_ARG_INFO(0, regex)
3006 ZEND_ARG_INFO(0, replace)
3007 ZEND_ARG_INFO(0, subject)
3008 ZEND_ARG_INFO(0, limit)
3009 ZEND_ARG_INFO(1, count)
3010 ZEND_END_ARG_INFO()
3011
3012 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
3013 ZEND_ARG_INFO(0, regex)
3014 ZEND_ARG_INFO(0, callback)
3015 ZEND_ARG_INFO(0, subject)
3016 ZEND_ARG_INFO(0, limit)
3017 ZEND_ARG_INFO(1, count)
3018 ZEND_ARG_INFO(0, flags)
3019 ZEND_END_ARG_INFO()
3020
3021 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
3022 ZEND_ARG_INFO(0, pattern)
3023 ZEND_ARG_INFO(0, subject)
3024 ZEND_ARG_INFO(0, limit)
3025 ZEND_ARG_INFO(1, count)
3026 ZEND_ARG_INFO(0, flags)
3027 ZEND_END_ARG_INFO()
3028
3029 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
3030 ZEND_ARG_INFO(0, pattern)
3031 ZEND_ARG_INFO(0, subject)
3032 ZEND_ARG_INFO(0, limit)
3033 ZEND_ARG_INFO(0, flags)
3034 ZEND_END_ARG_INFO()
3035
3036 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
3037 ZEND_ARG_INFO(0, str)
3038 ZEND_ARG_INFO(0, delim_char)
3039 ZEND_END_ARG_INFO()
3040
3041 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
3042 ZEND_ARG_INFO(0, regex)
3043 ZEND_ARG_INFO(0, input) /* array */
3044 ZEND_ARG_INFO(0, flags)
3045 ZEND_END_ARG_INFO()
3046
3047 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
3048 ZEND_END_ARG_INFO()
3049 /* }}} */
3050
3051 static const zend_function_entry pcre_functions[] = {
3052 PHP_FE(preg_match, arginfo_preg_match)
3053 PHP_FE(preg_match_all, arginfo_preg_match_all)
3054 PHP_FE(preg_replace, arginfo_preg_replace)
3055 PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
3056 PHP_FE(preg_replace_callback_array, arginfo_preg_replace_callback_array)
3057 PHP_FE(preg_filter, arginfo_preg_replace)
3058 PHP_FE(preg_split, arginfo_preg_split)
3059 PHP_FE(preg_quote, arginfo_preg_quote)
3060 PHP_FE(preg_grep, arginfo_preg_grep)
3061 PHP_FE(preg_last_error, arginfo_preg_last_error)
3062 PHP_FE_END
3063 };
3064
3065 zend_module_entry pcre_module_entry = {
3066 STANDARD_MODULE_HEADER,
3067 "pcre",
3068 pcre_functions,
3069 PHP_MINIT(pcre),
3070 PHP_MSHUTDOWN(pcre),
3071 PHP_RINIT(pcre),
3072 PHP_RSHUTDOWN(pcre),
3073 PHP_MINFO(pcre),
3074 PHP_PCRE_VERSION,
3075 PHP_MODULE_GLOBALS(pcre),
3076 PHP_GINIT(pcre),
3077 PHP_GSHUTDOWN(pcre),
3078 NULL,
3079 STANDARD_MODULE_PROPERTIES_EX
3080 };
3081
3082 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULEnull3083 ZEND_GET_MODULE(pcre)
3084 #endif
3085
3086 /* }}} */
3087
3088 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3089 {/*{{{*/
3090 return mctx;
3091 }/*}}}*/
3092
php_pcre_gctx(void)3093 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3094 {/*{{{*/
3095 return gctx;
3096 }/*}}}*/
3097
php_pcre_cctx(void)3098 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3099 {/*{{{*/
3100 return cctx;
3101 }/*}}}*/
3102
php_pcre_pce_incref(pcre_cache_entry *pce)3103 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3104 {/*{{{*/
3105 assert(NULL != pce);
3106 pce->refcount++;
3107 }/*}}}*/
3108
php_pcre_pce_decref(pcre_cache_entry *pce)3109 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3110 {/*{{{*/
3111 assert(NULL != pce);
3112 assert(0 != pce->refcount);
3113 pce->refcount--;
3114 }/*}}}*/
3115
php_pcre_pce_re(pcre_cache_entry *pce)3116 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3117 {/*{{{*/
3118 assert(NULL != pce);
3119 return pce->re;
3120 }/*}}}*/
3121