1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) The PHP Group                                          |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Andrei Zmievski <andrei@php.net>                             |
16    +----------------------------------------------------------------------+
17  */
18 
19 #include "php.h"
20 #include "php_ini.h"
21 #include "php_globals.h"
22 #include "php_pcre.h"
23 #include "ext/standard/info.h"
24 #include "ext/standard/basic_functions.h"
25 #include "zend_smart_str.h"
26 #include "SAPI.h"
27 
28 #include "ext/standard/php_string.h"
29 
30 #define PREG_PATTERN_ORDER			1
31 #define PREG_SET_ORDER				2
32 #define PREG_OFFSET_CAPTURE			(1<<8)
33 #define PREG_UNMATCHED_AS_NULL		(1<<9)
34 
35 #define	PREG_SPLIT_NO_EMPTY			(1<<0)
36 #define PREG_SPLIT_DELIM_CAPTURE	(1<<1)
37 #define PREG_SPLIT_OFFSET_CAPTURE	(1<<2)
38 
39 #define PREG_REPLACE_EVAL			(1<<0)
40 
41 #define PREG_GREP_INVERT			(1<<0)
42 
43 #define PREG_JIT                    (1<<3)
44 
45 #define PCRE_CACHE_SIZE 4096
46 
47 struct _pcre_cache_entry {
48 	pcre2_code *re;
49 	uint32_t preg_options;
50 	uint32_t capture_count;
51 	uint32_t name_count;
52 	uint32_t compile_options;
53 	uint32_t extra_compile_options;
54 	uint32_t refcount;
55 };
56 
57 enum {
58 	PHP_PCRE_NO_ERROR = 0,
59 	PHP_PCRE_INTERNAL_ERROR,
60 	PHP_PCRE_BACKTRACK_LIMIT_ERROR,
61 	PHP_PCRE_RECURSION_LIMIT_ERROR,
62 	PHP_PCRE_BAD_UTF8_ERROR,
63 	PHP_PCRE_BAD_UTF8_OFFSET_ERROR,
64 	PHP_PCRE_JIT_STACKLIMIT_ERROR
65 };
66 
67 
68 PHPAPI ZEND_DECLARE_MODULE_GLOBALS(pcre)
69 
70 #ifdef HAVE_PCRE_JIT_SUPPORT
71 #define PCRE_JIT_STACK_MIN_SIZE (32 * 1024)
72 #define PCRE_JIT_STACK_MAX_SIZE (192 * 1024)
73 ZEND_TLS pcre2_jit_stack *jit_stack = NULL;
74 #endif
75 ZEND_TLS pcre2_general_context *gctx = NULL;
76 /* These two are global per thread for now. Though it is possible to use these
77  	per pattern. Either one can copy it and use in pce, or one does no global
78 	contexts at all, but creates for every pce. */
79 ZEND_TLS pcre2_compile_context *cctx = NULL;
80 ZEND_TLS pcre2_match_context   *mctx = NULL;
81 ZEND_TLS pcre2_match_data      *mdata = NULL;
82 ZEND_TLS zend_bool              mdata_used = 0;
83 ZEND_TLS uint8_t pcre2_init_ok = 0;
84 #if defined(ZTS) && defined(HAVE_PCRE_JIT_SUPPORT)
85 static MUTEX_T pcre_mt = NULL;
86 #define php_pcre_mutex_alloc() \
87 	if (tsrm_is_main_thread() && !pcre_mt) pcre_mt = tsrm_mutex_alloc();
88 #define php_pcre_mutex_free() \
89 	if (tsrm_is_main_thread() && pcre_mt) { tsrm_mutex_free(pcre_mt); pcre_mt = NULL; }
90 #define php_pcre_mutex_lock() tsrm_mutex_lock(pcre_mt);
91 #define php_pcre_mutex_unlock() tsrm_mutex_unlock(pcre_mt);
92 #else
93 #define php_pcre_mutex_alloc()
94 #define php_pcre_mutex_free()
95 #define php_pcre_mutex_lock()
96 #define php_pcre_mutex_unlock()
97 #endif
98 
99 ZEND_TLS HashTable char_tables;
100 
php_pcre_free_char_table(zval *data)101 static void php_pcre_free_char_table(zval *data)
102 {/*{{{*/
103 	void *ptr = Z_PTR_P(data);
104 	pefree(ptr, 1);
105 }/*}}}*/
106 
pcre_handle_exec_error(int pcre_code)107 static void pcre_handle_exec_error(int pcre_code) /* {{{ */
108 {
109 	int preg_code = 0;
110 
111 	switch (pcre_code) {
112 		case PCRE2_ERROR_MATCHLIMIT:
113 			preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
114 			break;
115 
116 		case PCRE2_ERROR_RECURSIONLIMIT:
117 			preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
118 			break;
119 
120 		case PCRE2_ERROR_BADUTFOFFSET:
121 			preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
122 			break;
123 
124 #ifdef HAVE_PCRE_JIT_SUPPORT
125 		case PCRE2_ERROR_JIT_STACKLIMIT:
126 			preg_code = PHP_PCRE_JIT_STACKLIMIT_ERROR;
127 			break;
128 #endif
129 
130 		default:
131 			if (pcre_code <= PCRE2_ERROR_UTF8_ERR1 && pcre_code >= PCRE2_ERROR_UTF8_ERR21) {
132 				preg_code = PHP_PCRE_BAD_UTF8_ERROR;
133 			} else  {
134 				preg_code = PHP_PCRE_INTERNAL_ERROR;
135 			}
136 			break;
137 	}
138 
139 	PCRE_G(error_code) = preg_code;
140 }
141 /* }}} */
142 
php_free_pcre_cache(zval *data)143 static void php_free_pcre_cache(zval *data) /* {{{ */
144 {
145 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
146 	if (!pce) return;
147 	pcre2_code_free(pce->re);
148 	free(pce);
149 }
150 /* }}} */
151 
php_efree_pcre_cache(zval *data)152 static void php_efree_pcre_cache(zval *data) /* {{{ */
153 {
154 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
155 	if (!pce) return;
156 	pcre2_code_free(pce->re);
157 	efree(pce);
158 }
159 /* }}} */
160 
php_pcre_malloc(PCRE2_SIZE size, void *data)161 static void *php_pcre_malloc(PCRE2_SIZE size, void *data)
162 {/*{{{*/
163 	void *p = pemalloc(size, 1);
164 	return p;
165 }/*}}}*/
166 
php_pcre_free(void *block, void *data)167 static void php_pcre_free(void *block, void *data)
168 {/*{{{*/
169 	pefree(block, 1);
170 }/*}}}*/
171 
172 #define PHP_PCRE_DEFAULT_EXTRA_COPTIONS PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
173 #define PHP_PCRE_PREALLOC_MDATA_SIZE 32
174 
php_pcre_init_pcre2(uint8_t jit)175 static void php_pcre_init_pcre2(uint8_t jit)
176 {/*{{{*/
177 	if (!gctx) {
178 		gctx = pcre2_general_context_create(php_pcre_malloc, php_pcre_free, NULL);
179 		if (!gctx) {
180 			pcre2_init_ok = 0;
181 			return;
182 		}
183 	}
184 
185 	if (!cctx) {
186 		cctx = pcre2_compile_context_create(gctx);
187 		if (!cctx) {
188 			pcre2_init_ok = 0;
189 			return;
190 		}
191 	}
192 
193 	/* XXX The 'X' modifier is the default behavior in PCRE2. This option is
194 		called dangerous in the manual, as typos in patterns can cause
195 		unexpected results. We might want to to switch to the default PCRE2
196 		behavior, too, thus causing a certain BC break. */
197 	pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
198 
199 	if (!mctx) {
200 		mctx = pcre2_match_context_create(gctx);
201 		if (!mctx) {
202 			pcre2_init_ok = 0;
203 			return;
204 		}
205 	}
206 
207 #ifdef HAVE_PCRE_JIT_SUPPORT
208 	if (jit && !jit_stack) {
209 		jit_stack = pcre2_jit_stack_create(PCRE_JIT_STACK_MIN_SIZE, PCRE_JIT_STACK_MAX_SIZE, gctx);
210 		if (!jit_stack) {
211 			pcre2_init_ok = 0;
212 			return;
213 		}
214 	}
215 #endif
216 
217 	if (!mdata) {
218 		mdata = pcre2_match_data_create(PHP_PCRE_PREALLOC_MDATA_SIZE, gctx);
219 		if (!mdata) {
220 			pcre2_init_ok = 0;
221 			return;
222 		}
223 	}
224 
225 	pcre2_init_ok = 1;
226 }/*}}}*/
227 
php_pcre_shutdown_pcre2(void)228 static void php_pcre_shutdown_pcre2(void)
229 {/*{{{*/
230 	if (gctx) {
231 		pcre2_general_context_free(gctx);
232 		gctx = NULL;
233 	}
234 
235 	if (cctx) {
236 		pcre2_compile_context_free(cctx);
237 		cctx = NULL;
238 	}
239 
240 	if (mctx) {
241 		pcre2_match_context_free(mctx);
242 		mctx = NULL;
243 	}
244 
245 #ifdef HAVE_PCRE_JIT_SUPPORT
246 	/* Stack may only be destroyed when no cached patterns
247 	 	possibly associated with it do exist. */
248 	if (jit_stack) {
249 		pcre2_jit_stack_free(jit_stack);
250 		jit_stack = NULL;
251 	}
252 #endif
253 
254 	if (mdata) {
255 		pcre2_match_data_free(mdata);
256 		mdata = NULL;
257 	}
258 
259 	pcre2_init_ok = 0;
260 }/*}}}*/
261 
PHP_GINIT_FUNCTIONnull262 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
263 {
264 	php_pcre_mutex_alloc();
265 
266 	/* If we're on the CLI SAPI, there will only be one request, so we don't need the
267 	 * cache to survive after RSHUTDOWN. */
268 	pcre_globals->per_request_cache = strcmp(sapi_module.name, "cli") == 0;
269 	if (!pcre_globals->per_request_cache) {
270 		zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
271 	}
272 
273 	pcre_globals->backtrack_limit = 0;
274 	pcre_globals->recursion_limit = 0;
275 	pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
276 	ZVAL_UNDEF(&pcre_globals->unmatched_null_pair);
277 	ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair);
278 #ifdef HAVE_PCRE_JIT_SUPPORT
279 	pcre_globals->jit = 1;
280 #endif
281 
282 	php_pcre_init_pcre2(1);
283 	zend_hash_init(&char_tables, 1, NULL, php_pcre_free_char_table, 1);
284 }
285 /* }}} */
286 
PHP_GSHUTDOWN_FUNCTIONnull287 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
288 {
289 	if (!pcre_globals->per_request_cache) {
290 		zend_hash_destroy(&pcre_globals->pcre_cache);
291 	}
292 
293 	php_pcre_shutdown_pcre2();
294 	zend_hash_destroy(&char_tables);
295 	php_pcre_mutex_free();
296 }
297 /* }}} */
298 
PHP_INI_MHnull299 static PHP_INI_MH(OnUpdateBacktrackLimit)
300 {/*{{{*/
301 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
302 	if (mctx) {
303 		pcre2_set_match_limit(mctx, (uint32_t)PCRE_G(backtrack_limit));
304 	}
305 
306 	return SUCCESS;
307 }/*}}}*/
308 
PHP_INI_MHnull309 static PHP_INI_MH(OnUpdateRecursionLimit)
310 {/*{{{*/
311 	OnUpdateLong(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
312 	if (mctx) {
313 		pcre2_set_depth_limit(mctx, (uint32_t)PCRE_G(recursion_limit));
314 	}
315 
316 	return SUCCESS;
317 }/*}}}*/
318 
319 #ifdef HAVE_PCRE_JIT_SUPPORT
PHP_INI_MHnull320 static PHP_INI_MH(OnUpdateJit)
321 {/*{{{*/
322 	OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
323 	if (PCRE_G(jit) && jit_stack) {
324 		pcre2_jit_stack_assign(mctx, NULL, jit_stack);
325 	} else {
326 		pcre2_jit_stack_assign(mctx, NULL, NULL);
327 	}
328 
329 	return SUCCESS;
330 }/*}}}*/
331 #endif
332 
333 PHP_INI_BEGIN()
334 	STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateBacktrackLimit, backtrack_limit, zend_pcre_globals, pcre_globals)
335 	STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000",  PHP_INI_ALL, OnUpdateRecursionLimit, recursion_limit, zend_pcre_globals, pcre_globals)
336 #ifdef HAVE_PCRE_JIT_SUPPORT
337 	STD_PHP_INI_ENTRY("pcre.jit",             "1",       PHP_INI_ALL, OnUpdateJit, jit,             zend_pcre_globals, pcre_globals)
338 #endif
339 PHP_INI_END()
340 
_pcre2_config_str(uint32_t what)341 static char *_pcre2_config_str(uint32_t what)
342 {/*{{{*/
343 	int len = pcre2_config(what, NULL);
344 	char *ret = (char *) malloc(len + 1);
345 
346 	len = pcre2_config(what, ret);
347 	if (!len) {
348 		free(ret);
349 		return NULL;
350 	}
351 
352 	return ret;
353 }/*}}}*/
354 
355 /* {{{ PHP_MINFO_FUNCTION(pcre) */
PHP_MINFO_FUNCTIONnull356 static PHP_MINFO_FUNCTION(pcre)
357 {
358 #ifdef HAVE_PCRE_JIT_SUPPORT
359 	uint32_t flag = 0;
360 	char *jit_target = _pcre2_config_str(PCRE2_CONFIG_JITTARGET);
361 #endif
362 	char *version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
363 	char *unicode = _pcre2_config_str(PCRE2_CONFIG_UNICODE_VERSION);
364 
365 	php_info_print_table_start();
366 	php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
367 	php_info_print_table_row(2, "PCRE Library Version", version);
368 	free(version);
369 	php_info_print_table_row(2, "PCRE Unicode Version", unicode);
370 	free(unicode);
371 
372 #ifdef HAVE_PCRE_JIT_SUPPORT
373 	if (!pcre2_config(PCRE2_CONFIG_JIT, &flag)) {
374 		php_info_print_table_row(2, "PCRE JIT Support", flag ? "enabled" : "disabled");
375 	} else {
376 		php_info_print_table_row(2, "PCRE JIT Support", "unknown" );
377 	}
378 	if (jit_target) {
379 		php_info_print_table_row(2, "PCRE JIT Target", jit_target);
380 	}
381 	free(jit_target);
382 #else
383 	php_info_print_table_row(2, "PCRE JIT Support", "not compiled in" );
384 #endif
385 
386 #ifdef HAVE_PCRE_VALGRIND_SUPPORT
387 	php_info_print_table_row(2, "PCRE Valgrind Support", "enabled" );
388 #endif
389 
390 	php_info_print_table_end();
391 
392 	DISPLAY_INI_ENTRIES();
393 }
394 /* }}} */
395 
396 /* {{{ PHP_MINIT_FUNCTION(pcre) */
PHP_MINIT_FUNCTIONnull397 static PHP_MINIT_FUNCTION(pcre)
398 {
399 	char *version;
400 
401 #ifdef HAVE_PCRE_JIT_SUPPORT
402 	if (UNEXPECTED(!pcre2_init_ok)) {
403 		/* Retry. */
404 		php_pcre_init_pcre2(PCRE_G(jit));
405 		if (!pcre2_init_ok) {
406 			return FAILURE;
407 		}
408 	}
409 #endif
410 
411 	REGISTER_INI_ENTRIES();
412 
413 	REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
414 	REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
415 	REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
416 	REGISTER_LONG_CONSTANT("PREG_UNMATCHED_AS_NULL", PREG_UNMATCHED_AS_NULL, CONST_CS | CONST_PERSISTENT);
417 	REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
418 	REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
419 	REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
420 	REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
421 
422 	REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
423 	REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
424 	REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
425 	REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
426 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
427 	REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
428 	REGISTER_LONG_CONSTANT("PREG_JIT_STACKLIMIT_ERROR", PHP_PCRE_JIT_STACKLIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
429 	version = _pcre2_config_str(PCRE2_CONFIG_VERSION);
430 	REGISTER_STRING_CONSTANT("PCRE_VERSION", version, CONST_CS | CONST_PERSISTENT);
431 	free(version);
432 	REGISTER_LONG_CONSTANT("PCRE_VERSION_MAJOR", PCRE2_MAJOR, CONST_CS | CONST_PERSISTENT);
433 	REGISTER_LONG_CONSTANT("PCRE_VERSION_MINOR", PCRE2_MINOR, CONST_CS | CONST_PERSISTENT);
434 
435 #ifdef HAVE_PCRE_JIT_SUPPORT
436 	REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 1, CONST_CS | CONST_PERSISTENT);
437 #else
438 	REGISTER_BOOL_CONSTANT("PCRE_JIT_SUPPORT", 0, CONST_CS | CONST_PERSISTENT);
439 #endif
440 
441 	return SUCCESS;
442 }
443 /* }}} */
444 
445 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
PHP_MSHUTDOWN_FUNCTIONnull446 static PHP_MSHUTDOWN_FUNCTION(pcre)
447 {
448 	UNREGISTER_INI_ENTRIES();
449 
450 	return SUCCESS;
451 }
452 /* }}} */
453 
454 /* {{{ PHP_RINIT_FUNCTION(pcre) */
PHP_RINIT_FUNCTIONnull455 static PHP_RINIT_FUNCTION(pcre)
456 {
457 #ifdef HAVE_PCRE_JIT_SUPPORT
458 	if (UNEXPECTED(!pcre2_init_ok)) {
459 		/* Retry. */
460 		php_pcre_mutex_lock();
461 		php_pcre_init_pcre2(PCRE_G(jit));
462 		if (!pcre2_init_ok) {
463 			php_pcre_mutex_unlock();
464 			return FAILURE;
465 		}
466 		php_pcre_mutex_unlock();
467 	}
468 
469 	mdata_used = 0;
470 #endif
471 
472 	if (PCRE_G(per_request_cache)) {
473 		zend_hash_init(&PCRE_G(pcre_cache), 0, NULL, php_efree_pcre_cache, 0);
474 	}
475 
476 	return SUCCESS;
477 }
478 /* }}} */
479 
PHP_RSHUTDOWN_FUNCTIONnull480 static PHP_RSHUTDOWN_FUNCTION(pcre)
481 {
482 	if (PCRE_G(per_request_cache)) {
483 		zend_hash_destroy(&PCRE_G(pcre_cache));
484 	}
485 
486 	zval_ptr_dtor(&PCRE_G(unmatched_null_pair));
487 	zval_ptr_dtor(&PCRE_G(unmatched_empty_pair));
488 	ZVAL_UNDEF(&PCRE_G(unmatched_null_pair));
489 	ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair));
490 	return SUCCESS;
491 }
492 
493 /* {{{ static pcre_clean_cache */
pcre_clean_cache(zval *data, void *arg)494 static int pcre_clean_cache(zval *data, void *arg)
495 {
496 	pcre_cache_entry *pce = (pcre_cache_entry *) Z_PTR_P(data);
497 	int *num_clean = (int *)arg;
498 
499 	if (*num_clean > 0 && !pce->refcount) {
500 		(*num_clean)--;
501 		return ZEND_HASH_APPLY_REMOVE;
502 	} else {
503 		return ZEND_HASH_APPLY_KEEP;
504 	}
505 }
506 /* }}} */
507 
free_subpats_table(zend_string **subpat_names, uint32_t num_subpats)508 static void free_subpats_table(zend_string **subpat_names, uint32_t num_subpats) {
509 	uint32_t i;
510 	for (i = 0; i < num_subpats; i++) {
511 		if (subpat_names[i]) {
512 			zend_string_release(subpat_names[i]);
513 		}
514 	}
515 	efree(subpat_names);
516 }
517 
518 /* {{{ static make_subpats_table */
make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)519 static zend_string **make_subpats_table(uint32_t num_subpats, pcre_cache_entry *pce)
520 {
521 	uint32_t name_cnt = pce->name_count, name_size, ni = 0;
522 	char *name_table;
523 	zend_string **subpat_names;
524 	int rc1, rc2;
525 
526 	rc1 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMETABLE, &name_table);
527 	rc2 = pcre2_pattern_info(pce->re, PCRE2_INFO_NAMEENTRYSIZE, &name_size);
528 	if (rc1 < 0 || rc2 < 0) {
529 		php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc1 < 0 ? rc1 : rc2);
530 		return NULL;
531 	}
532 
533 	subpat_names = ecalloc(num_subpats, sizeof(zend_string *));
534 	while (ni++ < name_cnt) {
535 		unsigned short name_idx = 0x100 * (unsigned char)name_table[0] + (unsigned char)name_table[1];
536 		const char *name = name_table + 2;
537 		subpat_names[name_idx] = zend_string_init(name, strlen(name), 0);
538 		if (is_numeric_string(ZSTR_VAL(subpat_names[name_idx]), ZSTR_LEN(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
539 			php_error_docref(NULL, E_WARNING, "Numeric named subpatterns are not allowed");
540 			free_subpats_table(subpat_names, num_subpats);
541 			return NULL;
542 		}
543 		name_table += name_size;
544 	}
545 	return subpat_names;
546 }
547 /* }}} */
548 
549 /* {{{ static calculate_unit_length */
550 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE2_UTF. */
calculate_unit_length(pcre_cache_entry *pce, char *start)551 static zend_always_inline size_t calculate_unit_length(pcre_cache_entry *pce, char *start)
552 {
553 	size_t unit_len;
554 
555 	if (pce->compile_options & PCRE2_UTF) {
556 		char *end = start;
557 
558 		/* skip continuation bytes */
559 		while ((*++end & 0xC0) == 0x80);
560 		unit_len = end - start;
561 	} else {
562 		unit_len = 1;
563 	}
564 	return unit_len;
565 }
566 /* }}} */
567 
568 /* {{{ pcre_get_compiled_regex_cache
569  */
pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)570 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache_ex(zend_string *regex, int locale_aware)
571 {
572 	pcre2_code			*re = NULL;
573 	uint32_t			 coptions = 0;
574 	uint32_t			 extra_coptions = PHP_PCRE_DEFAULT_EXTRA_COPTIONS;
575 	PCRE2_UCHAR	         error[128];
576 	PCRE2_SIZE           erroffset;
577 	int                  errnumber;
578 	char				 delimiter;
579 	char				 start_delimiter;
580 	char				 end_delimiter;
581 	char				*p, *pp;
582 	char				*pattern;
583 	size_t				 pattern_len;
584 	uint32_t			 poptions = 0;
585 	const uint8_t       *tables = NULL;
586 	zval                *zv;
587 	pcre_cache_entry	 new_entry;
588 	int					 rc;
589 	zend_string 		*key;
590 	pcre_cache_entry *ret;
591 
592 	if (locale_aware && BG(locale_string) &&
593 		(ZSTR_LEN(BG(locale_string)) != 1 && ZSTR_VAL(BG(locale_string))[0] != 'C')) {
594 		key = zend_string_alloc(ZSTR_LEN(regex) + ZSTR_LEN(BG(locale_string)) + 1, 0);
595 		memcpy(ZSTR_VAL(key), ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)) + 1);
596 		memcpy(ZSTR_VAL(key) + ZSTR_LEN(BG(locale_string)), ZSTR_VAL(regex), ZSTR_LEN(regex) + 1);
597 	} else {
598 		key = regex;
599 	}
600 
601 	/* Try to lookup the cached regex entry, and if successful, just pass
602 	   back the compiled pattern, otherwise go on and compile it. */
603 	zv = zend_hash_find(&PCRE_G(pcre_cache), key);
604 	if (zv) {
605 		if (key != regex) {
606 			zend_string_release_ex(key, 0);
607 		}
608 		return (pcre_cache_entry*)Z_PTR_P(zv);
609 	}
610 
611 	p = ZSTR_VAL(regex);
612 
613 	/* Parse through the leading whitespace, and display a warning if we
614 	   get to the end without encountering a delimiter. */
615 	while (isspace((int)*(unsigned char *)p)) p++;
616 	if (*p == 0) {
617 		if (key != regex) {
618 			zend_string_release_ex(key, 0);
619 		}
620 		php_error_docref(NULL, E_WARNING,
621 						 p < ZSTR_VAL(regex) + ZSTR_LEN(regex) ? "Null byte in regex" : "Empty regular expression");
622 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
623 		return NULL;
624 	}
625 
626 	/* Get the delimiter and display a warning if it is alphanumeric
627 	   or a backslash. */
628 	delimiter = *p++;
629 	if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
630 		if (key != regex) {
631 			zend_string_release_ex(key, 0);
632 		}
633 		php_error_docref(NULL,E_WARNING, "Delimiter must not be alphanumeric or backslash");
634 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
635 		return NULL;
636 	}
637 
638 	start_delimiter = delimiter;
639 	if ((pp = strchr("([{< )]}> )]}>", delimiter)))
640 		delimiter = pp[5];
641 	end_delimiter = delimiter;
642 
643 	pp = p;
644 
645 	if (start_delimiter == end_delimiter) {
646 		/* We need to iterate through the pattern, searching for the ending delimiter,
647 		   but skipping the backslashed delimiters.  If the ending delimiter is not
648 		   found, display a warning. */
649 		while (*pp != 0) {
650 			if (*pp == '\\' && pp[1] != 0) pp++;
651 			else if (*pp == delimiter)
652 				break;
653 			pp++;
654 		}
655 	} else {
656 		/* We iterate through the pattern, searching for the matching ending
657 		 * delimiter. For each matching starting delimiter, we increment nesting
658 		 * level, and decrement it for each matching ending delimiter. If we
659 		 * reach the end of the pattern without matching, display a warning.
660 		 */
661 		int brackets = 1; 	/* brackets nesting level */
662 		while (*pp != 0) {
663 			if (*pp == '\\' && pp[1] != 0) pp++;
664 			else if (*pp == end_delimiter && --brackets <= 0)
665 				break;
666 			else if (*pp == start_delimiter)
667 				brackets++;
668 			pp++;
669 		}
670 	}
671 
672 	if (*pp == 0) {
673 		if (key != regex) {
674 			zend_string_release_ex(key, 0);
675 		}
676 		if (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
677 			php_error_docref(NULL,E_WARNING, "Null byte in regex");
678 		} else if (start_delimiter == end_delimiter) {
679 			php_error_docref(NULL,E_WARNING, "No ending delimiter '%c' found", delimiter);
680 		} else {
681 			php_error_docref(NULL,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
682 		}
683 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
684 		return NULL;
685 	}
686 
687 	/* Make a copy of the actual pattern. */
688 	pattern_len = pp - p;
689 	pattern = estrndup(p, pattern_len);
690 
691 	/* Move on to the options */
692 	pp++;
693 
694 	/* Parse through the options, setting appropriate flags.  Display
695 	   a warning if we encounter an unknown modifier. */
696 	while (pp < ZSTR_VAL(regex) + ZSTR_LEN(regex)) {
697 		switch (*pp++) {
698 			/* Perl compatible options */
699 			case 'i':	coptions |= PCRE2_CASELESS;		break;
700 			case 'm':	coptions |= PCRE2_MULTILINE;		break;
701 			case 's':	coptions |= PCRE2_DOTALL;		break;
702 			case 'x':	coptions |= PCRE2_EXTENDED;		break;
703 
704 			/* PCRE specific options */
705 			case 'A':	coptions |= PCRE2_ANCHORED;		break;
706 			case 'D':	coptions |= PCRE2_DOLLAR_ENDONLY;break;
707 			case 'S':	/* Pass. */					break;
708 			case 'U':	coptions |= PCRE2_UNGREEDY;		break;
709 			case 'X':	extra_coptions &= ~PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL;			break;
710 			case 'u':	coptions |= PCRE2_UTF;
711 	/* In  PCRE,  by  default, \d, \D, \s, \S, \w, and \W recognize only ASCII
712        characters, even in UTF-8 mode. However, this can be changed by setting
713        the PCRE2_UCP option. */
714 #ifdef PCRE2_UCP
715 						coptions |= PCRE2_UCP;
716 #endif
717 				break;
718 			case 'J':	coptions |= PCRE2_DUPNAMES;		break;
719 
720 			/* Custom preg options */
721 			case 'e':	poptions |= PREG_REPLACE_EVAL;	break;
722 
723 			case ' ':
724 			case '\n':
725 			case '\r':
726 				break;
727 
728 			default:
729 				if (pp[-1]) {
730 					php_error_docref(NULL,E_WARNING, "Unknown modifier '%c'", pp[-1]);
731 				} else {
732 					php_error_docref(NULL,E_WARNING, "Null byte in regex");
733 				}
734 				pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
735 				efree(pattern);
736 				if (key != regex) {
737 					zend_string_release_ex(key, 0);
738 				}
739 				return NULL;
740 		}
741 	}
742 
743 	if (poptions & PREG_REPLACE_EVAL) {
744 		php_error_docref(NULL, E_WARNING, "The /e modifier is no longer supported, use preg_replace_callback instead");
745 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
746 		efree(pattern);
747 		if (key != regex) {
748 			zend_string_release_ex(key, 0);
749 		}
750 		return NULL;
751 	}
752 
753 	if (key != regex) {
754 		tables = (uint8_t *)zend_hash_find_ptr(&char_tables, BG(locale_string));
755 		if (!tables) {
756 			zend_string *_k;
757 			tables = pcre2_maketables(gctx);
758 			if (UNEXPECTED(!tables)) {
759 				php_error_docref(NULL,E_WARNING, "Failed to generate locale character tables");
760 				pcre_handle_exec_error(PCRE2_ERROR_NOMEMORY);
761 				zend_string_release_ex(key, 0);
762 				efree(pattern);
763 				return NULL;
764 			}
765 			_k = zend_string_init(ZSTR_VAL(BG(locale_string)), ZSTR_LEN(BG(locale_string)), 1);
766 			GC_MAKE_PERSISTENT_LOCAL(_k);
767 			zend_hash_add_ptr(&char_tables, _k, (void *)tables);
768 			zend_string_release(_k);
769 		}
770 		pcre2_set_character_tables(cctx, tables);
771 	}
772 
773 	/* Set extra options for the compile context. */
774 	if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
775 		pcre2_set_compile_extra_options(cctx, extra_coptions);
776 	}
777 
778 	/* Compile pattern and display a warning if compilation failed. */
779 	re = pcre2_compile((PCRE2_SPTR)pattern, pattern_len, coptions, &errnumber, &erroffset, cctx);
780 
781 	/* Reset the compile context extra options to default. */
782 	if (PHP_PCRE_DEFAULT_EXTRA_COPTIONS != extra_coptions) {
783 		pcre2_set_compile_extra_options(cctx, PHP_PCRE_DEFAULT_EXTRA_COPTIONS);
784 	}
785 
786 	if (re == NULL) {
787 		if (key != regex) {
788 			zend_string_release_ex(key, 0);
789 		}
790 		pcre2_get_error_message(errnumber, error, sizeof(error));
791 		php_error_docref(NULL,E_WARNING, "Compilation failed: %s at offset %zu", error, erroffset);
792 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
793 		efree(pattern);
794 		return NULL;
795 	}
796 
797 #ifdef HAVE_PCRE_JIT_SUPPORT
798 	if (PCRE_G(jit)) {
799 		/* Enable PCRE JIT compiler */
800 		rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
801 		if (EXPECTED(rc >= 0)) {
802 			size_t jit_size = 0;
803 			if (!pcre2_pattern_info(re, PCRE2_INFO_JITSIZE, &jit_size) && jit_size > 0) {
804 				poptions |= PREG_JIT;
805 			}
806 		} else if (rc == PCRE2_ERROR_NOMEMORY) {
807 			php_error_docref(NULL, E_WARNING,
808 				"Allocation of JIT memory failed, PCRE JIT will be disabled. "
809 				"This is likely caused by security restrictions. "
810 				"Either grant PHP permission to allocate executable memory, or set pcre.jit=0");
811 			PCRE_G(jit) = 0;
812 		} else {
813 			pcre2_get_error_message(rc, error, sizeof(error));
814 			php_error_docref(NULL, E_WARNING, "JIT compilation failed: %s", error);
815 			pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
816 		}
817 	}
818 #endif
819 	efree(pattern);
820 
821 	/*
822 	 * If we reached cache limit, clean out the items from the head of the list;
823 	 * these are supposedly the oldest ones (but not necessarily the least used
824 	 * ones).
825 	 */
826 	if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
827 		int num_clean = PCRE_CACHE_SIZE / 8;
828 		zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean);
829 	}
830 
831 	/* Store the compiled pattern and extra info in the cache. */
832 	new_entry.re = re;
833 	new_entry.preg_options = poptions;
834 	new_entry.compile_options = coptions;
835 	new_entry.extra_compile_options = extra_coptions;
836 	new_entry.refcount = 0;
837 
838 	rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &new_entry.capture_count);
839 	if (rc < 0) {
840 		if (key != regex) {
841 			zend_string_release_ex(key, 0);
842 		}
843 		php_error_docref(NULL, E_WARNING, "Internal pcre2_pattern_info() error %d", rc);
844 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
845 		return NULL;
846 	}
847 
848 	rc = pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &new_entry.name_count);
849 	if (rc < 0) {
850 		if (key != regex) {
851 			zend_string_release_ex(key, 0);
852 		}
853 		php_error_docref(NULL, E_WARNING, "Internal pcre_pattern_info() error %d", rc);
854 		pcre_handle_exec_error(PCRE2_ERROR_INTERNAL);
855 		return NULL;
856 	}
857 
858 	/*
859 	 * Interned strings are not duplicated when stored in HashTable,
860 	 * but all the interned strings created during HTTP request are removed
861 	 * at end of request. However PCRE_G(pcre_cache) must be consistent
862 	 * on the next request as well. So we disable usage of interned strings
863 	 * as hash keys especually for this table.
864 	 * See bug #63180
865 	 */
866 	if (!(GC_FLAGS(key) & IS_STR_PERMANENT) && !PCRE_G(per_request_cache)) {
867 		zend_string *str = zend_string_init(ZSTR_VAL(key), ZSTR_LEN(key), 1);
868 		GC_MAKE_PERSISTENT_LOCAL(str);
869 
870 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), str, &new_entry, sizeof(pcre_cache_entry));
871 		zend_string_release(str);
872 	} else {
873 		ret = zend_hash_add_new_mem(&PCRE_G(pcre_cache), key, &new_entry, sizeof(pcre_cache_entry));
874 	}
875 
876 	if (key != regex) {
877 		zend_string_release_ex(key, 0);
878 	}
879 
880 	return ret;
881 }
882 /* }}} */
883 
884 /* {{{ pcre_get_compiled_regex_cache
885  */
pcre_get_compiled_regex_cache(zend_string *regex)886 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
887 {
888 	return pcre_get_compiled_regex_cache_ex(regex, 1);
889 }
890 /* }}} */
891 
892 /* {{{ pcre_get_compiled_regex
893  */
pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)894 PHPAPI pcre2_code *pcre_get_compiled_regex(zend_string *regex, uint32_t *capture_count)
895 {
896 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
897 
898 	if (capture_count) {
899 		*capture_count = pce ? pce->capture_count : 0;
900 	}
901 
902 	return pce ? pce->re : NULL;
903 }
904 /* }}} */
905 
906 /* {{{ pcre_get_compiled_regex_ex
907  */
pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)908 PHPAPI pcre2_code* pcre_get_compiled_regex_ex(zend_string *regex, uint32_t *capture_count, uint32_t *preg_options, uint32_t *compile_options)
909 {
910 	pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex);
911 
912 	if (preg_options) {
913 		*preg_options = pce ? pce->preg_options : 0;
914 	}
915 	if (compile_options) {
916 		*compile_options = pce ? pce->compile_options : 0;
917 	}
918 	if (capture_count) {
919 		*capture_count = pce ? pce->capture_count : 0;
920 	}
921 
922 	return pce ? pce->re : NULL;
923 }
924 /* }}} */
925 
926 /* XXX For the cases where it's only about match yes/no and no capture
927 		required, perhaps just a minimum sized data would suffice. */
php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)928 PHPAPI pcre2_match_data *php_pcre_create_match_data(uint32_t capture_count, pcre2_code *re)
929 {/*{{{*/
930 
931 	assert(NULL != re);
932 
933 	if (EXPECTED(!mdata_used)) {
934 		int rc = 0;
935 
936 		if (!capture_count) {
937 			/* As we deal with a non cached pattern, no other way to gather this info. */
938 			rc = pcre2_pattern_info(re, PCRE2_INFO_CAPTURECOUNT, &capture_count);
939 		}
940 
941 		if (rc >= 0 && capture_count + 1 <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
942 			mdata_used = 1;
943 			return mdata;
944 		}
945 	}
946 
947 	return pcre2_match_data_create_from_pattern(re, gctx);
948 }/*}}}*/
949 
php_pcre_free_match_data(pcre2_match_data *match_data)950 PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data)
951 {/*{{{*/
952 	if (UNEXPECTED(match_data != mdata)) {
953 		pcre2_match_data_free(match_data);
954 	} else {
955 		mdata_used = 0;
956 	}
957 }/*}}}*/
958 
init_unmatched_null_pairnull959 static void init_unmatched_null_pair() {
960 	zval val1, val2;
961 	ZVAL_NULL(&val1);
962 	ZVAL_LONG(&val2, -1);
963 	ZVAL_ARR(&PCRE_G(unmatched_null_pair), zend_new_pair(&val1, &val2));
964 }
965 
init_unmatched_empty_pairnull966 static void init_unmatched_empty_pair() {
967 	zval val1, val2;
968 	ZVAL_EMPTY_STRING(&val1);
969 	ZVAL_LONG(&val2, -1);
970 	ZVAL_ARR(&PCRE_G(unmatched_empty_pair), zend_new_pair(&val1, &val2));
971 }
972 
populate_match_value_str( zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset)973 static zend_always_inline void populate_match_value_str(
974 		zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
975 	if (start_offset == end_offset) {
976 		ZVAL_EMPTY_STRING(val);
977 	} else if (start_offset + 1 == end_offset) {
978 		ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
979 	} else {
980 		ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
981 	}
982 }
983 
populate_match_value( zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, uint32_t unmatched_as_null)984 static inline void populate_match_value(
985 		zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
986 		uint32_t unmatched_as_null) {
987 	if (PCRE2_UNSET == start_offset) {
988 		if (unmatched_as_null) {
989 			ZVAL_NULL(val);
990 		} else {
991 			ZVAL_EMPTY_STRING(val);
992 		}
993 	} else {
994 		populate_match_value_str(val, subject, start_offset, end_offset);
995 	}
996 }
997 
add_named( zval *subpats, zend_string *name, zval *val, zend_bool unmatched)998 static inline void add_named(
999 		zval *subpats, zend_string *name, zval *val, zend_bool unmatched) {
1000 	/* If the DUPNAMES option is used, multiple subpatterns might have the same name.
1001 	 * In this case we want to preserve the one that actually has a value. */
1002 	if (!unmatched) {
1003 		zend_hash_update(Z_ARRVAL_P(subpats), name, val);
1004 	} else {
1005 		if (!zend_hash_add(Z_ARRVAL_P(subpats), name, val)) {
1006 			return;
1007 		}
1008 	}
1009 	Z_TRY_ADDREF_P(val);
1010 }
1011 
1012 /* {{{ add_offset_pair */
add_offset_pair( zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, zend_string *name, uint32_t unmatched_as_null)1013 static inline void add_offset_pair(
1014 		zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
1015 		zend_string *name, uint32_t unmatched_as_null)
1016 {
1017 	zval match_pair;
1018 
1019 	/* Add (match, offset) to the return value */
1020 	if (PCRE2_UNSET == start_offset) {
1021 		if (unmatched_as_null) {
1022 			if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
1023 				init_unmatched_null_pair();
1024 			}
1025 			ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair));
1026 		} else {
1027 			if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) {
1028 				init_unmatched_empty_pair();
1029 			}
1030 			ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair));
1031 		}
1032 	} else {
1033 		zval val1, val2;
1034 		populate_match_value_str(&val1, subject, start_offset, end_offset);
1035 		ZVAL_LONG(&val2, start_offset);
1036 		ZVAL_ARR(&match_pair, zend_new_pair(&val1, &val2));
1037 	}
1038 
1039 	if (name) {
1040 		add_named(result, name, &match_pair, start_offset == PCRE2_UNSET);
1041 	}
1042 	zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair);
1043 }
1044 /* }}} */
1045 
populate_subpat_array( zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)1046 static void populate_subpat_array(
1047 		zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
1048 		uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
1049 	zend_bool offset_capture = (flags & PREG_OFFSET_CAPTURE) != 0;
1050 	zend_bool unmatched_as_null = (flags & PREG_UNMATCHED_AS_NULL) != 0;
1051 	zval val;
1052 	int i;
1053 	if (subpat_names) {
1054 		if (offset_capture) {
1055 			for (i = 0; i < count; i++) {
1056 				add_offset_pair(
1057 					subpats, subject, offsets[2*i], offsets[2*i+1],
1058 					subpat_names[i], unmatched_as_null);
1059 			}
1060 			if (unmatched_as_null) {
1061 				for (i = count; i < num_subpats; i++) {
1062 					add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
1063 				}
1064 			}
1065 		} else {
1066 			for (i = 0; i < count; i++) {
1067 				populate_match_value(
1068 					&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1069 				if (subpat_names[i]) {
1070 					add_named(subpats, subpat_names[i], &val, offsets[2*i] == PCRE2_UNSET);
1071 				}
1072 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1073 			}
1074 			if (unmatched_as_null) {
1075 				for (i = count; i < num_subpats; i++) {
1076 					ZVAL_NULL(&val);
1077 					if (subpat_names[i]) {
1078 						zend_hash_add(Z_ARRVAL_P(subpats), subpat_names[i], &val);
1079 					}
1080 					zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1081 				}
1082 			}
1083 		}
1084 	} else {
1085 		if (offset_capture) {
1086 			for (i = 0; i < count; i++) {
1087 				add_offset_pair(
1088 					subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
1089 			}
1090 			if (unmatched_as_null) {
1091 				for (i = count; i < num_subpats; i++) {
1092 					add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
1093 				}
1094 			}
1095 		} else {
1096 			for (i = 0; i < count; i++) {
1097 				populate_match_value(
1098 					&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1099 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &val);
1100 			}
1101 			if (unmatched_as_null) {
1102 				for (i = count; i < num_subpats; i++) {
1103 					add_next_index_null(subpats);
1104 				}
1105 			}
1106 		}
1107 	}
1108 	/* Add MARK, if available */
1109 	if (mark) {
1110 		add_assoc_string_ex(subpats, "MARK", sizeof("MARK") - 1, (char *)mark);
1111 	}
1112 }
1113 
php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global)1114 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
1115 {
1116 	/* parameters */
1117 	zend_string		 *regex;			/* Regular expression */
1118 	zend_string		 *subject;			/* String to match against */
1119 	pcre_cache_entry *pce;				/* Compiled regular expression */
1120 	zval			 *subpats = NULL;	/* Array for subpatterns */
1121 	zend_long		  flags = 0;		/* Match control flags */
1122 	zend_long		  start_offset = 0;	/* Where the new search starts */
1123 
1124 	ZEND_PARSE_PARAMETERS_START(2, 5)
1125 		Z_PARAM_STR(regex)
1126 		Z_PARAM_STR(subject)
1127 		Z_PARAM_OPTIONAL
1128 		Z_PARAM_ZVAL(subpats)
1129 		Z_PARAM_LONG(flags)
1130 		Z_PARAM_LONG(start_offset)
1131 	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
1132 
1133 	/* Compile regex or get it from cache. */
1134 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1135 		RETURN_FALSE;
1136 	}
1137 
1138 	pce->refcount++;
1139 	php_pcre_match_impl(pce, subject, return_value, subpats,
1140 		global, ZEND_NUM_ARGS() >= 4, flags, start_offset);
1141 	pce->refcount--;
1142 }
1143 /* }}} */
1144 
is_known_valid_utf8( zend_string *subject_str, PCRE2_SIZE start_offset)1145 static zend_always_inline zend_bool is_known_valid_utf8(
1146 		zend_string *subject_str, PCRE2_SIZE start_offset) {
1147 	if (!(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)) {
1148 		/* We don't know whether the string is valid UTF-8 or not. */
1149 		return 0;
1150 	}
1151 
1152 	if (start_offset == ZSTR_LEN(subject_str)) {
1153 		/* Degenerate case: Offset points to end of string. */
1154 		return 1;
1155 	}
1156 
1157 	/* Check that the offset does not point to an UTF-8 continuation byte. */
1158 	return (ZSTR_VAL(subject_str)[start_offset] & 0xc0) != 0x80;
1159 }
1160 
1161 /* {{{ php_pcre_match_impl() */
php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value, zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)1162 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
1163 	zval *subpats, int global, int use_flags, zend_long flags, zend_off_t start_offset)
1164 {
1165 	zval			 result_set,		/* Holds a set of subpatterns after
1166 										   a global match */
1167 					*match_sets = NULL;	/* An array of sets of matches for each
1168 										   subpattern after a global match */
1169 	uint32_t		 options;			/* Execution options */
1170 	int				 count;				/* Count of matched subpatterns */
1171 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1172 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1173 	int				 matched;			/* Has anything matched */
1174 	zend_string	   **subpat_names;		/* Array for named subpatterns */
1175 	size_t			 i;
1176 	uint32_t		 subpats_order;		/* Order of subpattern matches */
1177 	uint32_t		 offset_capture;	/* Capture match offsets: yes/no */
1178 	uint32_t		 unmatched_as_null;	/* Null non-matches: yes/no */
1179 	PCRE2_SPTR       mark = NULL;		/* Target for MARK name */
1180 	zval			 marks;				/* Array of marks for PREG_PATTERN_ORDER */
1181 	pcre2_match_data *match_data;
1182 	PCRE2_SIZE		 start_offset2, orig_start_offset;
1183 
1184 	char *subject = ZSTR_VAL(subject_str);
1185 	size_t subject_len = ZSTR_LEN(subject_str);
1186 
1187 	ZVAL_UNDEF(&marks);
1188 
1189 	/* Overwrite the passed-in value for subpatterns with an empty array. */
1190 	if (subpats != NULL) {
1191 		subpats = zend_try_array_init(subpats);
1192 		if (!subpats) {
1193 			return;
1194 		}
1195 	}
1196 
1197 	subpats_order = global ? PREG_PATTERN_ORDER : 0;
1198 
1199 	if (use_flags) {
1200 		offset_capture = flags & PREG_OFFSET_CAPTURE;
1201 		unmatched_as_null = flags & PREG_UNMATCHED_AS_NULL;
1202 
1203 		/*
1204 		 * subpats_order is pre-set to pattern mode so we change it only if
1205 		 * necessary.
1206 		 */
1207 		if (flags & 0xff) {
1208 			subpats_order = flags & 0xff;
1209 		}
1210 		if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
1211 			(!global && subpats_order != 0)) {
1212 			php_error_docref(NULL, E_WARNING, "Invalid flags specified");
1213 			return;
1214 		}
1215 	} else {
1216 		offset_capture = 0;
1217 		unmatched_as_null = 0;
1218 	}
1219 
1220 	/* Negative offset counts from the end of the string. */
1221 	if (start_offset < 0) {
1222 		if ((PCRE2_SIZE)-start_offset <= subject_len) {
1223 			start_offset2 = subject_len + start_offset;
1224 		} else {
1225 			start_offset2 = 0;
1226 		}
1227 	} else {
1228 		start_offset2 = (PCRE2_SIZE)start_offset;
1229 	}
1230 
1231 	if (start_offset2 > subject_len) {
1232 		pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1233 		RETURN_FALSE;
1234 	}
1235 
1236 	/* Calculate the size of the offsets array, and allocate memory for it. */
1237 	num_subpats = pce->capture_count + 1;
1238 
1239 	/*
1240 	 * Build a mapping from subpattern numbers to their names. We will
1241 	 * allocate the table only if there are any named subpatterns.
1242 	 */
1243 	subpat_names = NULL;
1244 	if (subpats && pce->name_count > 0) {
1245 		subpat_names = make_subpats_table(num_subpats, pce);
1246 		if (!subpat_names) {
1247 			RETURN_FALSE;
1248 		}
1249 	}
1250 
1251 	/* Allocate match sets array and initialize the values. */
1252 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1253 		match_sets = (zval *)safe_emalloc(num_subpats, sizeof(zval), 0);
1254 		for (i=0; i<num_subpats; i++) {
1255 			array_init(&match_sets[i]);
1256 		}
1257 	}
1258 
1259 	matched = 0;
1260 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1261 
1262 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1263 		match_data = mdata;
1264 	} else {
1265 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1266 		if (!match_data) {
1267 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1268 			if (subpat_names) {
1269 				free_subpats_table(subpat_names, num_subpats);
1270 			}
1271 			if (match_sets) {
1272 				efree(match_sets);
1273 			}
1274 			RETURN_FALSE;
1275 		}
1276 	}
1277 
1278 	orig_start_offset = start_offset2;
1279 	options =
1280 		(pce->compile_options & PCRE2_UTF) && !is_known_valid_utf8(subject_str, orig_start_offset)
1281 			? 0 : PCRE2_NO_UTF_CHECK;
1282 
1283 	/* Execute the regular expression. */
1284 #ifdef HAVE_PCRE_JIT_SUPPORT
1285 	if ((pce->preg_options & PREG_JIT) && options) {
1286 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1287 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1288 	} else
1289 #endif
1290 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1291 			options, match_data, mctx);
1292 
1293 	while (1) {
1294 		/* If something has matched */
1295 		if (count >= 0) {
1296 			/* Check for too many substrings condition. */
1297 			if (UNEXPECTED(count == 0)) {
1298 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
1299 				count = num_subpats;
1300 			}
1301 
1302 matched:
1303 			matched++;
1304 
1305 			offsets = pcre2_get_ovector_pointer(match_data);
1306 
1307 			/* If subpatterns array has been passed, fill it in with values. */
1308 			if (subpats != NULL) {
1309 				/* Try to get the list of substrings and display a warning if failed. */
1310 				if (offsets[1] < offsets[0]) {
1311 					if (subpat_names) {
1312 						free_subpats_table(subpat_names, num_subpats);
1313 					}
1314 					if (match_sets) efree(match_sets);
1315 					php_error_docref(NULL, E_WARNING, "Get subpatterns list failed");
1316 					RETURN_FALSE;
1317 				}
1318 
1319 				if (global) {	/* global pattern matching */
1320 					if (subpats && subpats_order == PREG_PATTERN_ORDER) {
1321 						/* For each subpattern, insert it into the appropriate array. */
1322 						if (offset_capture) {
1323 							for (i = 0; i < count; i++) {
1324 								add_offset_pair(
1325 									&match_sets[i], subject, offsets[2*i], offsets[2*i+1],
1326 									NULL, unmatched_as_null);
1327 							}
1328 						} else {
1329 							for (i = 0; i < count; i++) {
1330 								zval val;
1331 								populate_match_value(
1332 									&val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null);
1333 								zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val);
1334 							}
1335 						}
1336 						mark = pcre2_get_mark(match_data);
1337 						/* Add MARK, if available */
1338 						if (mark) {
1339 							if (Z_TYPE(marks) == IS_UNDEF) {
1340 								array_init(&marks);
1341 							}
1342 							add_index_string(&marks, matched - 1, (char *) mark);
1343 						}
1344 						/*
1345 						 * If the number of captured subpatterns on this run is
1346 						 * less than the total possible number, pad the result
1347 						 * arrays with NULLs or empty strings.
1348 						 */
1349 						if (count < num_subpats) {
1350 							for (; i < num_subpats; i++) {
1351 								if (offset_capture) {
1352 									add_offset_pair(
1353 										&match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
1354 										NULL, unmatched_as_null);
1355 								} else if (unmatched_as_null) {
1356 									add_next_index_null(&match_sets[i]);
1357 								} else {
1358 									add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC());
1359 								}
1360 							}
1361 						}
1362 					} else {
1363 						/* Allocate and populate the result set array */
1364 						array_init_size(&result_set, count + (mark ? 1 : 0));
1365 						mark = pcre2_get_mark(match_data);
1366 						populate_subpat_array(
1367 							&result_set, subject, offsets, subpat_names,
1368 							num_subpats, count, mark, flags);
1369 						/* And add it to the output array */
1370 						zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set);
1371 					}
1372 				} else {			/* single pattern matching */
1373 					/* For each subpattern, insert it into the subpatterns array. */
1374 					mark = pcre2_get_mark(match_data);
1375 					populate_subpat_array(
1376 						subpats, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1377 					break;
1378 				}
1379 			}
1380 
1381 			/* Advance to the next piece. */
1382 			start_offset2 = offsets[1];
1383 
1384 			/* If we have matched an empty string, mimic what Perl's /g options does.
1385 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1386 			   the match again at the same point. If this fails (picked up above) we
1387 			   advance to the next character. */
1388 			if (start_offset2 == offsets[0]) {
1389 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1390 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1391 				if (count >= 0) {
1392 					if (global) {
1393 						goto matched;
1394 					} else {
1395 						break;
1396 					}
1397 				} else if (count == PCRE2_ERROR_NOMATCH) {
1398 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1399 					   this is not necessarily the end. We need to advance
1400 					   the start offset, and continue. Fudge the offset values
1401 					   to achieve this, unless we're already at the end of the string. */
1402 					if (start_offset2 < subject_len) {
1403 						size_t unit_len = calculate_unit_length(pce, subject + start_offset2);
1404 
1405 						start_offset2 += unit_len;
1406 					} else {
1407 						break;
1408 					}
1409 				} else {
1410 					goto error;
1411 				}
1412 			}
1413 		} else if (count == PCRE2_ERROR_NOMATCH) {
1414 			break;
1415 		} else {
1416 error:
1417 			pcre_handle_exec_error(count);
1418 			break;
1419 		}
1420 
1421 		if (!global) {
1422 			break;
1423 		}
1424 
1425 		/* Execute the regular expression. */
1426 #ifdef HAVE_PCRE_JIT_SUPPORT
1427 		if ((pce->preg_options & PREG_JIT)) {
1428 			if (PCRE2_UNSET == start_offset2 || start_offset2 > subject_len) {
1429 				pcre_handle_exec_error(PCRE2_ERROR_BADOFFSET);
1430 				break;
1431 			}
1432 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1433 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1434 		} else
1435 #endif
1436 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset2,
1437 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1438 	}
1439 	if (match_data != mdata) {
1440 		pcre2_match_data_free(match_data);
1441 	}
1442 
1443 	/* Add the match sets to the output array and clean up */
1444 	if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
1445 		if (subpat_names) {
1446 			for (i = 0; i < num_subpats; i++) {
1447 				if (subpat_names[i]) {
1448 					zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i], &match_sets[i]);
1449 					Z_ADDREF(match_sets[i]);
1450 				}
1451 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1452 			}
1453 		} else {
1454 			for (i = 0; i < num_subpats; i++) {
1455 				zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i]);
1456 			}
1457 		}
1458 		efree(match_sets);
1459 
1460 		if (Z_TYPE(marks) != IS_UNDEF) {
1461 			add_assoc_zval(subpats, "MARK", &marks);
1462 		}
1463 	}
1464 
1465 	if (subpat_names) {
1466 		free_subpats_table(subpat_names, num_subpats);
1467 	}
1468 
1469 	if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
1470 		/* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
1471 		if ((pce->compile_options & PCRE2_UTF)
1472 				&& !ZSTR_IS_INTERNED(subject_str) && orig_start_offset == 0) {
1473 			GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
1474 		}
1475 
1476 		RETVAL_LONG(matched);
1477 	} else {
1478 		RETVAL_FALSE;
1479 	}
1480 }
1481 /* }}} */
1482 
1483 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1484    Perform a Perl-style regular expression match */
PHP_FUNCTIONnull1485 static PHP_FUNCTION(preg_match)
1486 {
1487 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1488 }
1489 /* }}} */
1490 
1491 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
1492    Perform a Perl-style global regular expression match */
PHP_FUNCTIONnull1493 static PHP_FUNCTION(preg_match_all)
1494 {
1495 	php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1496 }
1497 /* }}} */
1498 
1499 /* {{{ preg_get_backref
1500  */
preg_get_backref(char **str, int *backref)1501 static int preg_get_backref(char **str, int *backref)
1502 {
1503 	register char in_brace = 0;
1504 	register char *walk = *str;
1505 
1506 	if (walk[1] == 0)
1507 		return 0;
1508 
1509 	if (*walk == '$' && walk[1] == '{') {
1510 		in_brace = 1;
1511 		walk++;
1512 	}
1513 	walk++;
1514 
1515 	if (*walk >= '0' && *walk <= '9') {
1516 		*backref = *walk - '0';
1517 		walk++;
1518 	} else
1519 		return 0;
1520 
1521 	if (*walk && *walk >= '0' && *walk <= '9') {
1522 		*backref = *backref * 10 + *walk - '0';
1523 		walk++;
1524 	}
1525 
1526 	if (in_brace) {
1527 		if (*walk != '}')
1528 			return 0;
1529 		else
1530 			walk++;
1531 	}
1532 
1533 	*str = walk;
1534 	return 1;
1535 }
1536 /* }}} */
1537 
1538 /* {{{ preg_do_repl_func
1539  */
preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)1540 static zend_string *preg_do_repl_func(zend_fcall_info *fci, zend_fcall_info_cache *fcc, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags)
1541 {
1542 	zend_string *result_str;
1543 	zval		 retval;			/* Function return value */
1544 	zval	     arg;				/* Argument to pass to function */
1545 
1546 	array_init_size(&arg, count + (mark ? 1 : 0));
1547 	populate_subpat_array(&arg, subject, offsets, subpat_names, num_subpats, count, mark, flags);
1548 
1549 	fci->retval = &retval;
1550 	fci->param_count = 1;
1551 	fci->params = &arg;
1552 	fci->no_separation = 0;
1553 
1554 	if (zend_call_function(fci, fcc) == SUCCESS && Z_TYPE(retval) != IS_UNDEF) {
1555 		if (EXPECTED(Z_TYPE(retval) == IS_STRING)) {
1556 			result_str = Z_STR(retval);
1557 		} else {
1558 			result_str = zval_get_string_func(&retval);
1559 			zval_ptr_dtor(&retval);
1560 		}
1561 	} else {
1562 		if (!EG(exception)) {
1563 			php_error_docref(NULL, E_WARNING, "Unable to call custom replacement function");
1564 		}
1565 
1566 		result_str = zend_string_init(&subject[offsets[0]], offsets[1] - offsets[0], 0);
1567 	}
1568 
1569 	zval_ptr_dtor(&arg);
1570 
1571 	return result_str;
1572 }
1573 /* }}} */
1574 
1575 /* {{{ php_pcre_replace
1576  */
php_pcre_replace(zend_string *regex, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)1577 PHPAPI zend_string *php_pcre_replace(zend_string *regex,
1578 							  zend_string *subject_str,
1579 							  char *subject, size_t subject_len,
1580 							  zend_string *replace_str,
1581 							  size_t limit, size_t *replace_count)
1582 {
1583 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
1584 	zend_string	 		*result;			/* Function result */
1585 
1586 	/* Abort on pending exception, e.g. thrown from __toString(). */
1587 	if (UNEXPECTED(EG(exception))) {
1588 		return NULL;
1589 	}
1590 
1591 	/* Compile regex or get it from cache. */
1592 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
1593 		return NULL;
1594 	}
1595 	pce->refcount++;
1596 	result = php_pcre_replace_impl(pce, subject_str, subject, subject_len, replace_str,
1597 		limit, replace_count);
1598 	pce->refcount--;
1599 
1600 	return result;
1601 }
1602 /* }}} */
1603 
1604 /* {{{ php_pcre_replace_impl() */
php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)1605 PHPAPI zend_string *php_pcre_replace_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_string *replace_str, size_t limit, size_t *replace_count)
1606 {
1607 	uint32_t		 options;			/* Execution options */
1608 	int				 count;				/* Count of matched subpatterns */
1609 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1610 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1611 	size_t			 new_len;			/* Length of needed storage */
1612 	size_t			 alloc_len;			/* Actual allocated length */
1613 	size_t			 match_len;			/* Length of the current match */
1614 	int				 backref;			/* Backreference number */
1615 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1616 	size_t			 last_end_offset;	/* Where the last search ended */
1617 	char			*walkbuf,			/* Location of current replacement in the result */
1618 					*walk,				/* Used to walk the replacement string */
1619 					*match,				/* The current match */
1620 					*piece,				/* The current piece of subject */
1621 					*replace_end,		/* End of replacement string */
1622 					 walk_last;			/* Last walked character */
1623 	size_t			result_len; 		/* Length of result */
1624 	zend_string		*result;			/* Result of replacement */
1625 	pcre2_match_data *match_data;
1626 
1627 	/* Calculate the size of the offsets array, and allocate memory for it. */
1628 	num_subpats = pce->capture_count + 1;
1629 	alloc_len = 0;
1630 	result = NULL;
1631 
1632 	/* Initialize */
1633 	match = NULL;
1634 	start_offset = 0;
1635 	last_end_offset = 0;
1636 	result_len = 0;
1637 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1638 
1639 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1640 		match_data = mdata;
1641 	} else {
1642 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1643 		if (!match_data) {
1644 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1645 			return NULL;
1646 		}
1647 	}
1648 
1649 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1650 
1651 	/* Execute the regular expression. */
1652 #ifdef HAVE_PCRE_JIT_SUPPORT
1653 	if ((pce->preg_options & PREG_JIT) && options) {
1654 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1655 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1656 	} else
1657 #endif
1658 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1659 			options, match_data, mctx);
1660 
1661 	while (1) {
1662 		piece = subject + last_end_offset;
1663 
1664 		if (count >= 0 && limit > 0) {
1665 			zend_bool simple_string;
1666 
1667 			/* Check for too many substrings condition. */
1668 			if (UNEXPECTED(count == 0)) {
1669 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1670 				count = num_subpats;
1671 			}
1672 
1673 matched:
1674 			offsets = pcre2_get_ovector_pointer(match_data);
1675 
1676 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1677 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1678 				if (result) {
1679 					zend_string_release_ex(result, 0);
1680 					result = NULL;
1681 				}
1682 				break;
1683 			}
1684 
1685 			if (replace_count) {
1686 				++*replace_count;
1687 			}
1688 
1689 			/* Set the match location in subject */
1690 			match = subject + offsets[0];
1691 
1692 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1693 
1694 			walk = ZSTR_VAL(replace_str);
1695 			replace_end = walk + ZSTR_LEN(replace_str);
1696 			walk_last = 0;
1697 			simple_string = 1;
1698 			while (walk < replace_end) {
1699 				if ('\\' == *walk || '$' == *walk) {
1700 					simple_string = 0;
1701 					if (walk_last == '\\') {
1702 						walk++;
1703 						walk_last = 0;
1704 						continue;
1705 					}
1706 					if (preg_get_backref(&walk, &backref)) {
1707 						if (backref < count)
1708 							new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1709 						continue;
1710 					}
1711 				}
1712 				new_len++;
1713 				walk++;
1714 				walk_last = walk[-1];
1715 			}
1716 
1717 			if (new_len >= alloc_len) {
1718 				alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1719 				if (result == NULL) {
1720 					result = zend_string_alloc(alloc_len, 0);
1721 				} else {
1722 					result = zend_string_extend(result, alloc_len, 0);
1723 				}
1724 			}
1725 
1726 			if (match-piece > 0) {
1727 				/* copy the part of the string before the match */
1728 				memcpy(&ZSTR_VAL(result)[result_len], piece, match-piece);
1729 				result_len += (match-piece);
1730 			}
1731 
1732 			if (simple_string) {
1733 				/* copy replacement */
1734 				memcpy(&ZSTR_VAL(result)[result_len], ZSTR_VAL(replace_str), ZSTR_LEN(replace_str)+1);
1735 				result_len += ZSTR_LEN(replace_str);
1736 			} else {
1737 				/* copy replacement and backrefs */
1738 				walkbuf = ZSTR_VAL(result) + result_len;
1739 
1740 				walk = ZSTR_VAL(replace_str);
1741 				walk_last = 0;
1742 				while (walk < replace_end) {
1743 					if ('\\' == *walk || '$' == *walk) {
1744 						if (walk_last == '\\') {
1745 							*(walkbuf-1) = *walk++;
1746 							walk_last = 0;
1747 							continue;
1748 						}
1749 						if (preg_get_backref(&walk, &backref)) {
1750 							if (backref < count) {
1751 								match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1752 								memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1753 								walkbuf += match_len;
1754 							}
1755 							continue;
1756 						}
1757 					}
1758 					*walkbuf++ = *walk++;
1759 					walk_last = walk[-1];
1760 				}
1761 				*walkbuf = '\0';
1762 				/* increment the result length by how much we've added to the string */
1763 				result_len += (walkbuf - (ZSTR_VAL(result) + result_len));
1764 			}
1765 
1766 			limit--;
1767 
1768 			/* Advance to the next piece. */
1769 			start_offset = last_end_offset = offsets[1];
1770 
1771 			/* If we have matched an empty string, mimic what Perl's /g options does.
1772 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1773 			   the match again at the same point. If this fails (picked up above) we
1774 			   advance to the next character. */
1775 			if (start_offset == offsets[0]) {
1776 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1777 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1778 
1779 				piece = subject + start_offset;
1780 				if (count >= 0 && limit > 0) {
1781 					goto matched;
1782 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1783 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1784 					   this is not necessarily the end. We need to advance
1785 					   the start offset, and continue. Fudge the offset values
1786 					   to achieve this, unless we're already at the end of the string. */
1787 					if (start_offset < subject_len) {
1788 						size_t unit_len = calculate_unit_length(pce, piece);
1789 						start_offset += unit_len;
1790 					} else {
1791 						goto not_matched;
1792 					}
1793 				} else {
1794 					goto error;
1795 				}
1796 			}
1797 
1798 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1799 not_matched:
1800 			if (!result && subject_str) {
1801 				result = zend_string_copy(subject_str);
1802 				break;
1803 			}
1804 			new_len = result_len + subject_len - last_end_offset;
1805 			if (new_len >= alloc_len) {
1806 				alloc_len = new_len; /* now we know exactly how long it is */
1807 				if (NULL != result) {
1808 					result = zend_string_realloc(result, alloc_len, 0);
1809 				} else {
1810 					result = zend_string_alloc(alloc_len, 0);
1811 				}
1812 			}
1813 			/* stick that last bit of string on our output */
1814 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
1815 			result_len += subject_len - last_end_offset;
1816 			ZSTR_VAL(result)[result_len] = '\0';
1817 			ZSTR_LEN(result) = result_len;
1818 			break;
1819 		} else {
1820 error:
1821 			pcre_handle_exec_error(count);
1822 			if (result) {
1823 				zend_string_release_ex(result, 0);
1824 				result = NULL;
1825 			}
1826 			break;
1827 		}
1828 
1829 #ifdef HAVE_PCRE_JIT_SUPPORT
1830 		if (pce->preg_options & PREG_JIT) {
1831 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1832 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1833 		} else
1834 #endif
1835 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1836 					PCRE2_NO_UTF_CHECK, match_data, mctx);
1837 	}
1838 	if (match_data != mdata) {
1839 		pcre2_match_data_free(match_data);
1840 	}
1841 
1842 	return result;
1843 }
1844 /* }}} */
1845 
1846 /* {{{ php_pcre_replace_func_impl() */
php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)1847 static zend_string *php_pcre_replace_func_impl(pcre_cache_entry *pce, zend_string *subject_str, char *subject, size_t subject_len, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)
1848 {
1849 	uint32_t		 options;			/* Execution options */
1850 	int				 count;				/* Count of matched subpatterns */
1851 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
1852 	zend_string		**subpat_names;		/* Array for named subpatterns */
1853 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
1854 	size_t			 new_len;			/* Length of needed storage */
1855 	size_t			 alloc_len;			/* Actual allocated length */
1856 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
1857 	size_t			 last_end_offset;	/* Where the last search ended */
1858 	char			*match,				/* The current match */
1859 					*piece;				/* The current piece of subject */
1860 	size_t			result_len; 		/* Length of result */
1861 	zend_string		*result;			/* Result of replacement */
1862 	zend_string     *eval_result;		/* Result of custom function */
1863 	pcre2_match_data *match_data;
1864 	zend_bool old_mdata_used;
1865 
1866 	/* Calculate the size of the offsets array, and allocate memory for it. */
1867 	num_subpats = pce->capture_count + 1;
1868 
1869 	/*
1870 	 * Build a mapping from subpattern numbers to their names. We will
1871 	 * allocate the table only if there are any named subpatterns.
1872 	 */
1873 	subpat_names = NULL;
1874 	if (UNEXPECTED(pce->name_count > 0)) {
1875 		subpat_names = make_subpats_table(num_subpats, pce);
1876 		if (!subpat_names) {
1877 			return NULL;
1878 		}
1879 	}
1880 
1881 	alloc_len = 0;
1882 	result = NULL;
1883 
1884 	/* Initialize */
1885 	match = NULL;
1886 	start_offset = 0;
1887 	last_end_offset = 0;
1888 	result_len = 0;
1889 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1890 
1891 	old_mdata_used = mdata_used;
1892 	if (!old_mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
1893 		mdata_used = 1;
1894 		match_data = mdata;
1895 	} else {
1896 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
1897 		if (!match_data) {
1898 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1899 			if (subpat_names) {
1900 				free_subpats_table(subpat_names, num_subpats);
1901 			}
1902 			mdata_used = old_mdata_used;
1903 			return NULL;
1904 		}
1905 	}
1906 
1907 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
1908 
1909 	/* Execute the regular expression. */
1910 #ifdef HAVE_PCRE_JIT_SUPPORT
1911 	if ((pce->preg_options & PREG_JIT) && options) {
1912 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1913 				PCRE2_NO_UTF_CHECK, match_data, mctx);
1914 	} else
1915 #endif
1916 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1917 			options, match_data, mctx);
1918 
1919 	while (1) {
1920 		piece = subject + last_end_offset;
1921 
1922 		if (count >= 0 && limit) {
1923 			/* Check for too many substrings condition. */
1924 			if (UNEXPECTED(count == 0)) {
1925 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
1926 				count = num_subpats;
1927 			}
1928 
1929 matched:
1930 			offsets = pcre2_get_ovector_pointer(match_data);
1931 
1932 			if (UNEXPECTED(offsets[1] < offsets[0])) {
1933 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
1934 				if (result) {
1935 					zend_string_release_ex(result, 0);
1936 					result = NULL;
1937 				}
1938 				break;
1939 			}
1940 
1941 			if (replace_count) {
1942 				++*replace_count;
1943 			}
1944 
1945 			/* Set the match location in subject */
1946 			match = subject + offsets[0];
1947 
1948 			new_len = result_len + offsets[0] - last_end_offset; /* part before the match */
1949 
1950 			/* Use custom function to get replacement string and its length. */
1951 			eval_result = preg_do_repl_func(
1952 				fci, fcc, subject, offsets, subpat_names, num_subpats, count,
1953 				pcre2_get_mark(match_data), flags);
1954 
1955 			ZEND_ASSERT(eval_result);
1956 			new_len = zend_safe_address_guarded(1, ZSTR_LEN(eval_result), new_len);
1957 			if (new_len >= alloc_len) {
1958 				alloc_len = zend_safe_address_guarded(2, new_len, alloc_len);
1959 				if (result == NULL) {
1960 					result = zend_string_alloc(alloc_len, 0);
1961 				} else {
1962 					result = zend_string_extend(result, alloc_len, 0);
1963 				}
1964 			}
1965 
1966 			if (match-piece > 0) {
1967 				/* copy the part of the string before the match */
1968 				memcpy(ZSTR_VAL(result) + result_len, piece, match-piece);
1969 				result_len += (match-piece);
1970 			}
1971 
1972 			/* If using custom function, copy result to the buffer and clean up. */
1973 			memcpy(ZSTR_VAL(result) + result_len, ZSTR_VAL(eval_result), ZSTR_LEN(eval_result));
1974 			result_len += ZSTR_LEN(eval_result);
1975 			zend_string_release_ex(eval_result, 0);
1976 
1977 			limit--;
1978 
1979 			/* Advance to the next piece. */
1980 			start_offset = last_end_offset = offsets[1];
1981 
1982 			/* If we have matched an empty string, mimic what Perl's /g options does.
1983 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
1984 			   the match again at the same point. If this fails (picked up above) we
1985 			   advance to the next character. */
1986 			if (start_offset == offsets[0]) {
1987 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
1988 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
1989 
1990 				piece = subject + start_offset;
1991 				if (count >= 0 && limit) {
1992 					goto matched;
1993 				} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
1994 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
1995 					   this is not necessarily the end. We need to advance
1996 					   the start offset, and continue. Fudge the offset values
1997 					   to achieve this, unless we're already at the end of the string. */
1998 					if (start_offset < subject_len) {
1999 						size_t unit_len = calculate_unit_length(pce, piece);
2000 						start_offset += unit_len;
2001 					} else {
2002 						goto not_matched;
2003 					}
2004 				} else {
2005 					goto error;
2006 				}
2007 			}
2008 
2009 		} else if (count == PCRE2_ERROR_NOMATCH || limit == 0) {
2010 not_matched:
2011 			if (!result && subject_str) {
2012 				result = zend_string_copy(subject_str);
2013 				break;
2014 			}
2015 			new_len = result_len + subject_len - last_end_offset;
2016 			if (new_len >= alloc_len) {
2017 				alloc_len = new_len; /* now we know exactly how long it is */
2018 				if (NULL != result) {
2019 					result = zend_string_realloc(result, alloc_len, 0);
2020 				} else {
2021 					result = zend_string_alloc(alloc_len, 0);
2022 				}
2023 			}
2024 			/* stick that last bit of string on our output */
2025 			memcpy(ZSTR_VAL(result) + result_len, piece, subject_len - last_end_offset);
2026 			result_len += subject_len - last_end_offset;
2027 			ZSTR_VAL(result)[result_len] = '\0';
2028 			ZSTR_LEN(result) = result_len;
2029 			break;
2030 		} else {
2031 error:
2032 			pcre_handle_exec_error(count);
2033 			if (result) {
2034 				zend_string_release_ex(result, 0);
2035 				result = NULL;
2036 			}
2037 			break;
2038 		}
2039 #ifdef HAVE_PCRE_JIT_SUPPORT
2040 		if ((pce->preg_options & PREG_JIT)) {
2041 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2042 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2043 		} else
2044 #endif
2045 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, subject_len, start_offset,
2046 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2047 	}
2048 	if (match_data != mdata) {
2049 		pcre2_match_data_free(match_data);
2050 	}
2051 	mdata_used = old_mdata_used;
2052 
2053 	if (UNEXPECTED(subpat_names)) {
2054 		free_subpats_table(subpat_names, num_subpats);
2055 	}
2056 
2057 	return result;
2058 }
2059 /* }}} */
2060 
2061 /* {{{ php_pcre_replace_func
2062  */
php_pcre_replace_func(zend_string *regex, zend_string *subject_str, zend_fcall_info *fci, zend_fcall_info_cache *fcc, size_t limit, size_t *replace_count, zend_long flags)2063 static zend_always_inline zend_string *php_pcre_replace_func(zend_string *regex,
2064 							  zend_string *subject_str,
2065 							  zend_fcall_info *fci, zend_fcall_info_cache *fcc,
2066 							  size_t limit, size_t *replace_count, zend_long flags)
2067 {
2068 	pcre_cache_entry	*pce;			    /* Compiled regular expression */
2069 	zend_string	 		*result;			/* Function result */
2070 
2071 	/* Compile regex or get it from cache. */
2072 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2073 		return NULL;
2074 	}
2075 	pce->refcount++;
2076 	result = php_pcre_replace_func_impl(
2077 		pce, subject_str, ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), fci, fcc,
2078 		limit, replace_count, flags);
2079 	pce->refcount--;
2080 
2081 	return result;
2082 }
2083 /* }}} */
2084 
2085 /* {{{ php_pcre_replace_array
2086  */
php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, size_t limit, size_t *replace_count)2087 static zend_string *php_pcre_replace_array(HashTable *regex, zval *replace, zend_string *subject_str, size_t limit, size_t *replace_count)
2088 {
2089 	zval		*regex_entry;
2090 	zend_string *result;
2091 	zend_string *replace_str, *tmp_replace_str;
2092 
2093 	if (Z_TYPE_P(replace) == IS_ARRAY) {
2094 		uint32_t replace_idx = 0;
2095 		HashTable *replace_ht = Z_ARRVAL_P(replace);
2096 
2097 		/* For each entry in the regex array, get the entry */
2098 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2099 			/* Make sure we're dealing with strings. */
2100 			zend_string *tmp_regex_str;
2101 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2102 			zval *zv;
2103 
2104 			/* Get current entry */
2105 			while (1) {
2106 				if (replace_idx == replace_ht->nNumUsed) {
2107 					replace_str = ZSTR_EMPTY_ALLOC();
2108 					tmp_replace_str = NULL;
2109 					break;
2110 				}
2111 				zv = &replace_ht->arData[replace_idx].val;
2112 				replace_idx++;
2113 				if (Z_TYPE_P(zv) != IS_UNDEF) {
2114 					replace_str = zval_get_tmp_string(zv, &tmp_replace_str);
2115 					break;
2116 				}
2117 			}
2118 
2119 			/* Do the actual replacement and put the result back into subject_str
2120 			   for further replacements. */
2121 			result = php_pcre_replace(regex_str,
2122 									  subject_str,
2123 									  ZSTR_VAL(subject_str),
2124 									  ZSTR_LEN(subject_str),
2125 									  replace_str,
2126 									  limit,
2127 									  replace_count);
2128 			zend_tmp_string_release(tmp_replace_str);
2129 			zend_tmp_string_release(tmp_regex_str);
2130 			zend_string_release_ex(subject_str, 0);
2131 			subject_str = result;
2132 			if (UNEXPECTED(result == NULL)) {
2133 				break;
2134 			}
2135 		} ZEND_HASH_FOREACH_END();
2136 
2137 	} else {
2138 		replace_str = Z_STR_P(replace);
2139 
2140 		/* For each entry in the regex array, get the entry */
2141 		ZEND_HASH_FOREACH_VAL(regex, regex_entry) {
2142 			/* Make sure we're dealing with strings. */
2143 			zend_string *tmp_regex_str;
2144 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2145 
2146 			/* Do the actual replacement and put the result back into subject_str
2147 			   for further replacements. */
2148 			result = php_pcre_replace(regex_str,
2149 									  subject_str,
2150 									  ZSTR_VAL(subject_str),
2151 									  ZSTR_LEN(subject_str),
2152 									  replace_str,
2153 									  limit,
2154 									  replace_count);
2155 			zend_tmp_string_release(tmp_regex_str);
2156 			zend_string_release_ex(subject_str, 0);
2157 			subject_str = result;
2158 
2159 			if (UNEXPECTED(result == NULL)) {
2160 				break;
2161 			}
2162 		} ZEND_HASH_FOREACH_END();
2163 	}
2164 
2165 	return subject_str;
2166 }
2167 /* }}} */
2168 
2169 /* {{{ php_replace_in_subject
2170  */
php_replace_in_subject(zval *regex, zval *replace, zval *subject, size_t limit, size_t *replace_count)2171 static zend_always_inline zend_string *php_replace_in_subject(zval *regex, zval *replace, zval *subject, size_t limit, size_t *replace_count)
2172 {
2173 	zend_string *result;
2174 	zend_string *subject_str = zval_get_string(subject);
2175 
2176 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2177 		result = php_pcre_replace(Z_STR_P(regex),
2178 								  subject_str,
2179 								  ZSTR_VAL(subject_str),
2180 								  ZSTR_LEN(subject_str),
2181 								  Z_STR_P(replace),
2182 								  limit,
2183 								  replace_count);
2184 		zend_string_release_ex(subject_str, 0);
2185 	} else {
2186 		result = php_pcre_replace_array(Z_ARRVAL_P(regex),
2187 										replace,
2188 										subject_str,
2189 										limit,
2190 										replace_count);
2191 	}
2192 	return result;
2193 }
2194 /* }}} */
2195 
2196 /* {{{ php_replace_in_subject_func
2197  */
php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count, zend_long flags)2198 static zend_string *php_replace_in_subject_func(zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, size_t limit, size_t *replace_count, zend_long flags)
2199 {
2200 	zend_string *result;
2201 	zend_string	*subject_str = zval_get_string(subject);
2202 
2203 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2204 		result = php_pcre_replace_func(
2205 			Z_STR_P(regex), subject_str, fci, fcc, limit, replace_count, flags);
2206 		zend_string_release_ex(subject_str, 0);
2207 		return result;
2208 	} else {
2209 		zval		*regex_entry;
2210 
2211 		/* If regex is an array */
2212 
2213 		/* For each entry in the regex array, get the entry */
2214 		ZEND_HASH_FOREACH_VAL(Z_ARRVAL_P(regex), regex_entry) {
2215 			/* Make sure we're dealing with strings. */
2216 			zend_string *tmp_regex_str;
2217 			zend_string *regex_str = zval_get_tmp_string(regex_entry, &tmp_regex_str);
2218 
2219 			/* Do the actual replacement and put the result back into subject_str
2220 			   for further replacements. */
2221 			result = php_pcre_replace_func(
2222 				regex_str, subject_str, fci, fcc, limit, replace_count, flags);
2223 			zend_tmp_string_release(tmp_regex_str);
2224 			zend_string_release_ex(subject_str, 0);
2225 			subject_str = result;
2226 			if (UNEXPECTED(result == NULL)) {
2227 				break;
2228 			}
2229 		} ZEND_HASH_FOREACH_END();
2230 
2231 		return subject_str;
2232 	}
2233 }
2234 /* }}} */
2235 
2236 /* {{{ preg_replace_func_impl
2237  */
preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val, zend_long flags)2238 static size_t preg_replace_func_impl(zval *return_value, zval *regex, zend_fcall_info *fci, zend_fcall_info_cache *fcc, zval *subject, zend_long limit_val, zend_long flags)
2239 {
2240 	zend_string	*result;
2241 	size_t replace_count = 0;
2242 
2243 	if (Z_TYPE_P(regex) != IS_ARRAY) {
2244 		convert_to_string_ex(regex);
2245 	}
2246 
2247 	if (Z_TYPE_P(subject) != IS_ARRAY) {
2248 		result = php_replace_in_subject_func(
2249 			regex, fci, fcc, subject, limit_val, &replace_count, flags);
2250 		if (result != NULL) {
2251 			RETVAL_STR(result);
2252 		} else {
2253 			RETVAL_NULL();
2254 		}
2255 	} else {
2256 		/* if subject is an array */
2257 		zval		*subject_entry, zv;
2258 		zend_string	*string_key;
2259 		zend_ulong	 num_key;
2260 
2261 		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2262 
2263 		/* For each subject entry, convert it to string, then perform replacement
2264 		   and add the result to the return_value array. */
2265 		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2266 			result = php_replace_in_subject_func(
2267 				regex, fci, fcc, subject_entry, limit_val, &replace_count, flags);
2268 			if (result != NULL) {
2269 				/* Add to return array */
2270 				ZVAL_STR(&zv, result);
2271 				if (string_key) {
2272 					zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2273 				} else {
2274 					zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2275 				}
2276 			}
2277 		} ZEND_HASH_FOREACH_END();
2278 	}
2279 
2280 	return replace_count;
2281 }
2282 /* }}} */
2283 
2284 /* {{{ preg_replace_common
2285  */
preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)2286 static void preg_replace_common(INTERNAL_FUNCTION_PARAMETERS, int is_filter)
2287 {
2288 	zval *regex, *replace, *subject, *zcount = NULL;
2289 	zend_long limit = -1;
2290 	size_t replace_count = 0;
2291 	zend_string	*result;
2292 	size_t old_replace_count;
2293 
2294 	/* Get function parameters and do error-checking. */
2295 	ZEND_PARSE_PARAMETERS_START(3, 5)
2296 		Z_PARAM_ZVAL(regex)
2297 		Z_PARAM_ZVAL(replace)
2298 		Z_PARAM_ZVAL(subject)
2299 		Z_PARAM_OPTIONAL
2300 		Z_PARAM_LONG(limit)
2301 		Z_PARAM_ZVAL(zcount)
2302 	ZEND_PARSE_PARAMETERS_END();
2303 
2304 	if (Z_TYPE_P(replace) != IS_ARRAY) {
2305 		convert_to_string_ex(replace);
2306 		if (Z_TYPE_P(regex) != IS_ARRAY) {
2307 			convert_to_string_ex(regex);
2308 		}
2309 	} else {
2310 		if (Z_TYPE_P(regex) != IS_ARRAY) {
2311 			php_error_docref(NULL, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
2312 			RETURN_FALSE;
2313 		}
2314 	}
2315 
2316 	if (Z_TYPE_P(subject) != IS_ARRAY) {
2317 		old_replace_count = replace_count;
2318 		result = php_replace_in_subject(regex,
2319 										replace,
2320 										subject,
2321 										limit,
2322 										&replace_count);
2323 		if (result != NULL) {
2324 			if (!is_filter || replace_count > old_replace_count) {
2325 				RETVAL_STR(result);
2326 			} else {
2327 				zend_string_release_ex(result, 0);
2328 				RETVAL_NULL();
2329 			}
2330 		} else {
2331 			RETVAL_NULL();
2332 		}
2333 	} else {
2334 		/* if subject is an array */
2335 		zval		*subject_entry, zv;
2336 		zend_string	*string_key;
2337 		zend_ulong	 num_key;
2338 
2339 		array_init_size(return_value, zend_hash_num_elements(Z_ARRVAL_P(subject)));
2340 
2341 		/* For each subject entry, convert it to string, then perform replacement
2342 		   and add the result to the return_value array. */
2343 		ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(subject), num_key, string_key, subject_entry) {
2344 			old_replace_count = replace_count;
2345 			result = php_replace_in_subject(regex,
2346 											replace,
2347 											subject_entry,
2348 											limit,
2349 											&replace_count);
2350 			if (result != NULL) {
2351 				if (!is_filter || replace_count > old_replace_count) {
2352 					/* Add to return array */
2353 					ZVAL_STR(&zv, result);
2354 					if (string_key) {
2355 						zend_hash_add_new(Z_ARRVAL_P(return_value), string_key, &zv);
2356 					} else {
2357 						zend_hash_index_add_new(Z_ARRVAL_P(return_value), num_key, &zv);
2358 					}
2359 				} else {
2360 					zend_string_release_ex(result, 0);
2361 				}
2362 			}
2363 		} ZEND_HASH_FOREACH_END();
2364 	}
2365 
2366 	if (zcount) {
2367 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2368 	}
2369 }
2370 /* }}} */
2371 
2372 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2373    Perform Perl-style regular expression replacement. */
PHP_FUNCTIONnull2374 static PHP_FUNCTION(preg_replace)
2375 {
2376 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
2377 }
2378 /* }}} */
2379 
2380 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
2381    Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTIONnull2382 static PHP_FUNCTION(preg_replace_callback)
2383 {
2384 	zval *regex, *replace, *subject, *zcount = NULL;
2385 	zend_long limit = -1, flags = 0;
2386 	size_t replace_count;
2387 	zend_fcall_info fci;
2388 	zend_fcall_info_cache fcc;
2389 
2390 	/* Get function parameters and do error-checking. */
2391 	ZEND_PARSE_PARAMETERS_START(3, 6)
2392 		Z_PARAM_ZVAL(regex)
2393 		Z_PARAM_ZVAL(replace)
2394 		Z_PARAM_ZVAL(subject)
2395 		Z_PARAM_OPTIONAL
2396 		Z_PARAM_LONG(limit)
2397 		Z_PARAM_ZVAL(zcount)
2398 		Z_PARAM_LONG(flags)
2399 	ZEND_PARSE_PARAMETERS_END();
2400 
2401 	if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2402 		zend_string	*callback_name = zend_get_callable_name(replace);
2403 		php_error_docref(NULL, E_WARNING, "Requires argument 2, '%s', to be a valid callback", ZSTR_VAL(callback_name));
2404 		zend_string_release_ex(callback_name, 0);
2405 		ZVAL_STR(return_value, zval_get_string(subject));
2406 		return;
2407 	}
2408 
2409 	fci.size = sizeof(fci);
2410 	fci.object = NULL;
2411 	ZVAL_COPY_VALUE(&fci.function_name, replace);
2412 
2413 	replace_count = preg_replace_func_impl(return_value, regex, &fci, &fcc, subject, limit, flags);
2414 	if (zcount) {
2415 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2416 	}
2417 }
2418 /* }}} */
2419 
2420 /* {{{ proto mixed preg_replace_callback_array(array pattern, mixed subject [, int limit [, int &count]])
2421    Perform Perl-style regular expression replacement using replacement callback. */
PHP_FUNCTIONnull2422 static PHP_FUNCTION(preg_replace_callback_array)
2423 {
2424 	zval regex, zv, *replace, *subject, *pattern, *zcount = NULL;
2425 	zend_long limit = -1, flags = 0;
2426 	zend_string *str_idx;
2427 	size_t replace_count = 0;
2428 	zend_fcall_info fci;
2429 	zend_fcall_info_cache fcc;
2430 
2431 	/* Get function parameters and do error-checking. */
2432 	ZEND_PARSE_PARAMETERS_START(2, 5)
2433 		Z_PARAM_ARRAY(pattern)
2434 		Z_PARAM_ZVAL(subject)
2435 		Z_PARAM_OPTIONAL
2436 		Z_PARAM_LONG(limit)
2437 		Z_PARAM_ZVAL(zcount)
2438 		Z_PARAM_LONG(flags)
2439 	ZEND_PARSE_PARAMETERS_END();
2440 
2441 	fci.size = sizeof(fci);
2442 	fci.object = NULL;
2443 
2444 	ZEND_HASH_FOREACH_STR_KEY_VAL(Z_ARRVAL_P(pattern), str_idx, replace) {
2445 		if (str_idx) {
2446 			ZVAL_STR_COPY(&regex, str_idx);
2447 		} else {
2448 			php_error_docref(NULL, E_WARNING, "Delimiter must not be alphanumeric or backslash");
2449 			zval_ptr_dtor(return_value);
2450 			RETURN_NULL();
2451 		}
2452 
2453 		if (!zend_is_callable_ex(replace, NULL, 0, NULL, &fcc, NULL)) {
2454 			zend_string *callback_name = zend_get_callable_name(replace);
2455 			php_error_docref(NULL, E_WARNING, "'%s' is not a valid callback", ZSTR_VAL(callback_name));
2456 			zend_string_release_ex(callback_name, 0);
2457 			zval_ptr_dtor(&regex);
2458 			zval_ptr_dtor(return_value);
2459 			ZVAL_COPY(return_value, subject);
2460 			return;
2461 		}
2462 
2463 		ZVAL_COPY_VALUE(&fci.function_name, replace);
2464 
2465 		replace_count += preg_replace_func_impl(&zv, &regex, &fci, &fcc, subject, limit, flags);
2466 		if (subject != return_value) {
2467 			subject = return_value;
2468 		} else {
2469 			zval_ptr_dtor(return_value);
2470 		}
2471 
2472 		zval_ptr_dtor(&regex);
2473 
2474 		ZVAL_COPY_VALUE(return_value, &zv);
2475 
2476 		if (UNEXPECTED(EG(exception))) {
2477 			zval_ptr_dtor(return_value);
2478 			RETURN_NULL();
2479 		}
2480 	} ZEND_HASH_FOREACH_END();
2481 
2482 	if (zcount) {
2483 		ZEND_TRY_ASSIGN_REF_LONG(zcount, replace_count);
2484 	}
2485 }
2486 /* }}} */
2487 
2488 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
2489    Perform Perl-style regular expression replacement and only return matches. */
PHP_FUNCTIONnull2490 static PHP_FUNCTION(preg_filter)
2491 {
2492 	preg_replace_common(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
2493 }
2494 /* }}} */
2495 
2496 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
2497    Split string into an array using a perl-style regular expression as a delimiter */
PHP_FUNCTIONnull2498 static PHP_FUNCTION(preg_split)
2499 {
2500 	zend_string			*regex;			/* Regular expression */
2501 	zend_string			*subject;		/* String to match against */
2502 	zend_long			 limit_val = -1;/* Integer value of limit */
2503 	zend_long			 flags = 0;		/* Match control flags */
2504 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2505 
2506 	/* Get function parameters and do error checking */
2507 	ZEND_PARSE_PARAMETERS_START(2, 4)
2508 		Z_PARAM_STR(regex)
2509 		Z_PARAM_STR(subject)
2510 		Z_PARAM_OPTIONAL
2511 		Z_PARAM_LONG(limit_val)
2512 		Z_PARAM_LONG(flags)
2513 	ZEND_PARSE_PARAMETERS_END_EX(RETURN_FALSE);
2514 
2515 	/* Compile regex or get it from cache. */
2516 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2517 		RETURN_FALSE;
2518 	}
2519 
2520 	pce->refcount++;
2521 	php_pcre_split_impl(pce, subject, return_value, limit_val, flags);
2522 	pce->refcount--;
2523 }
2524 /* }}} */
2525 
2526 /* {{{ php_pcre_split
2527  */
php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value, zend_long limit_val, zend_long flags)2528 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, zval *return_value,
2529 	zend_long limit_val, zend_long flags)
2530 {
2531 	PCRE2_SIZE		*offsets;			/* Array of subpattern offsets */
2532 	uint32_t		 options;			/* Execution options */
2533 	int				 count;				/* Count of matched subpatterns */
2534 	PCRE2_SIZE		 start_offset;		/* Where the new search starts */
2535 	PCRE2_SIZE		 last_match_offset;	/* Location of last match */
2536 	uint32_t		 no_empty;			/* If NO_EMPTY flag is set */
2537 	uint32_t		 delim_capture; 	/* If delimiters should be captured */
2538 	uint32_t		 offset_capture;	/* If offsets should be captured */
2539 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2540 	zval			 tmp;
2541 	pcre2_match_data *match_data;
2542 	char *subject = ZSTR_VAL(subject_str);
2543 
2544 	no_empty = flags & PREG_SPLIT_NO_EMPTY;
2545 	delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
2546 	offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
2547 
2548 	/* Initialize return value */
2549 	array_init(return_value);
2550 
2551 	/* Calculate the size of the offsets array, and allocate memory for it. */
2552 	num_subpats = pce->capture_count + 1;
2553 
2554 	/* Start at the beginning of the string */
2555 	start_offset = 0;
2556 	last_match_offset = 0;
2557 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2558 
2559 	if (limit_val == -1) {
2560 		/* pass */
2561 	} else if (limit_val == 0) {
2562 		limit_val = -1;
2563 	} else if (limit_val <= 1) {
2564 		goto last;
2565 	}
2566 
2567 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2568 		match_data = mdata;
2569 	} else {
2570 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2571 		if (!match_data) {
2572 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2573 			zval_ptr_dtor(return_value);
2574 			RETURN_FALSE;
2575 		}
2576 	}
2577 
2578 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2579 
2580 #ifdef HAVE_PCRE_JIT_SUPPORT
2581 	if ((pce->preg_options & PREG_JIT) && options) {
2582 		count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2583 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2584 	} else
2585 #endif
2586 	count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2587 			options, match_data, mctx);
2588 
2589 	while (1) {
2590 		/* If something matched */
2591 		if (count >= 0) {
2592 			/* Check for too many substrings condition. */
2593 			if (UNEXPECTED(count == 0)) {
2594 				php_error_docref(NULL,E_NOTICE, "Matched, but too many substrings");
2595 				count = num_subpats;
2596 			}
2597 
2598 matched:
2599 			offsets = pcre2_get_ovector_pointer(match_data);
2600 
2601 			if (UNEXPECTED(offsets[1] < offsets[0])) {
2602 				PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2603 				break;
2604 			}
2605 
2606 			if (!no_empty || offsets[0] != last_match_offset) {
2607 				if (offset_capture) {
2608 					/* Add (match, offset) pair to the return value */
2609 					add_offset_pair(
2610 						return_value, subject, last_match_offset, offsets[0],
2611 						NULL, 0);
2612 				} else {
2613 					/* Add the piece to the return value */
2614 					populate_match_value_str(&tmp, subject, last_match_offset, offsets[0]);
2615 					zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2616 				}
2617 
2618 				/* One less left to do */
2619 				if (limit_val != -1)
2620 					limit_val--;
2621 			}
2622 
2623 			if (delim_capture) {
2624 				size_t i;
2625 				for (i = 1; i < count; i++) {
2626 					/* If we have matched a delimiter */
2627 					if (!no_empty || offsets[2*i] != offsets[2*i+1]) {
2628 						if (offset_capture) {
2629 							add_offset_pair(
2630 								return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
2631 						} else {
2632 							populate_match_value_str(&tmp, subject, offsets[2*i], offsets[2*i+1]);
2633 							zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2634 						}
2635 					}
2636 				}
2637 			}
2638 
2639 			/* Advance to the position right after the last full match */
2640 			start_offset = last_match_offset = offsets[1];
2641 
2642 			/* If we have matched an empty string, mimic what Perl's /g options does.
2643 			   This turns out to be rather cunning. First we set PCRE2_NOTEMPTY_ATSTART and try
2644 			   the match again at the same point. If this fails (picked up above) we
2645 			   advance to the next character. */
2646 			if (start_offset == offsets[0]) {
2647 				count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2648 					PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
2649 				if (count >= 0) {
2650 					goto matched;
2651 				} else if (count == PCRE2_ERROR_NOMATCH) {
2652 					/* If we previously set PCRE2_NOTEMPTY_ATSTART after a null match,
2653 					   this is not necessarily the end. We need to advance
2654 					   the start offset, and continue. Fudge the offset values
2655 					   to achieve this, unless we're already at the end of the string. */
2656 					if (start_offset < ZSTR_LEN(subject_str)) {
2657 						start_offset += calculate_unit_length(pce, subject + start_offset);
2658 					} else {
2659 						break;
2660 					}
2661 				} else {
2662 					goto error;
2663 				}
2664 			}
2665 
2666 		} else if (count == PCRE2_ERROR_NOMATCH) {
2667 			break;
2668 		} else {
2669 error:
2670 			pcre_handle_exec_error(count);
2671 			break;
2672 		}
2673 
2674 		/* Get next piece if no limit or limit not yet reached and something matched*/
2675 		if (limit_val != -1 && limit_val <= 1) {
2676 			break;
2677 		}
2678 
2679 #ifdef HAVE_PCRE_JIT_SUPPORT
2680 		if (pce->preg_options & PREG_JIT) {
2681 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2682 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2683 		} else
2684 #endif
2685 		count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
2686 				PCRE2_NO_UTF_CHECK, match_data, mctx);
2687 	}
2688 	if (match_data != mdata) {
2689 		pcre2_match_data_free(match_data);
2690 	}
2691 
2692 	if (PCRE_G(error_code) != PHP_PCRE_NO_ERROR) {
2693 		zval_ptr_dtor(return_value);
2694 		RETURN_FALSE;
2695 	}
2696 
2697 last:
2698 	start_offset = last_match_offset; /* the offset might have been incremented, but without further successful matches */
2699 
2700 	if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
2701 		if (offset_capture) {
2702 			/* Add the last (match, offset) pair to the return value */
2703 			add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
2704 		} else {
2705 			/* Add the last piece to the return value */
2706 			if (start_offset == 0) {
2707 				ZVAL_STR_COPY(&tmp, subject_str);
2708 			} else {
2709 				populate_match_value_str(&tmp, subject, start_offset, ZSTR_LEN(subject_str));
2710 			}
2711 			zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
2712 		}
2713 	}
2714 }
2715 /* }}} */
2716 
2717 /* {{{ proto string preg_quote(string str [, string delim_char])
2718    Quote regular expression characters plus an optional character */
PHP_FUNCTIONnull2719 static PHP_FUNCTION(preg_quote)
2720 {
2721 	zend_string *str;       		/* Input string argument */
2722 	zend_string	*delim = NULL;		/* Additional delimiter argument */
2723 	char		*in_str;			/* Input string */
2724 	char		*in_str_end;    	/* End of the input string */
2725 	zend_string	*out_str;			/* Output string with quoted characters */
2726 	size_t       extra_len;         /* Number of additional characters */
2727 	char 		*p,					/* Iterator for input string */
2728 				*q,					/* Iterator for output string */
2729 				 delim_char = '\0',	/* Delimiter character to be quoted */
2730 				 c;					/* Current character */
2731 
2732 	/* Get the arguments and check for errors */
2733 	ZEND_PARSE_PARAMETERS_START(1, 2)
2734 		Z_PARAM_STR(str)
2735 		Z_PARAM_OPTIONAL
2736 		Z_PARAM_STR_EX(delim, 1, 0)
2737 	ZEND_PARSE_PARAMETERS_END();
2738 
2739 	/* Nothing to do if we got an empty string */
2740 	if (ZSTR_LEN(str) == 0) {
2741 		RETURN_EMPTY_STRING();
2742 	}
2743 
2744 	in_str = ZSTR_VAL(str);
2745 	in_str_end = in_str + ZSTR_LEN(str);
2746 
2747 	if (delim) {
2748 		delim_char = ZSTR_VAL(delim)[0];
2749 	}
2750 
2751 	/* Go through the string and quote necessary characters */
2752 	extra_len = 0;
2753 	p = in_str;
2754 	do {
2755 		c = *p;
2756 		switch(c) {
2757 			case '.':
2758 			case '\\':
2759 			case '+':
2760 			case '*':
2761 			case '?':
2762 			case '[':
2763 			case '^':
2764 			case ']':
2765 			case '$':
2766 			case '(':
2767 			case ')':
2768 			case '{':
2769 			case '}':
2770 			case '=':
2771 			case '!':
2772 			case '>':
2773 			case '<':
2774 			case '|':
2775 			case ':':
2776 			case '-':
2777 			case '#':
2778 				extra_len++;
2779 				break;
2780 
2781 			case '\0':
2782 				extra_len+=3;
2783 				break;
2784 
2785 			default:
2786 				if (c == delim_char) {
2787 					extra_len++;
2788 				}
2789 				break;
2790 		}
2791 		p++;
2792 	} while (p != in_str_end);
2793 
2794 	if (extra_len == 0) {
2795 		RETURN_STR_COPY(str);
2796 	}
2797 
2798 	/* Allocate enough memory so that even if each character
2799 	   is quoted, we won't run out of room */
2800 	out_str = zend_string_safe_alloc(1, ZSTR_LEN(str), extra_len, 0);
2801 	q = ZSTR_VAL(out_str);
2802 	p = in_str;
2803 
2804 	do {
2805 		c = *p;
2806 		switch(c) {
2807 			case '.':
2808 			case '\\':
2809 			case '+':
2810 			case '*':
2811 			case '?':
2812 			case '[':
2813 			case '^':
2814 			case ']':
2815 			case '$':
2816 			case '(':
2817 			case ')':
2818 			case '{':
2819 			case '}':
2820 			case '=':
2821 			case '!':
2822 			case '>':
2823 			case '<':
2824 			case '|':
2825 			case ':':
2826 			case '-':
2827 			case '#':
2828 				*q++ = '\\';
2829 				*q++ = c;
2830 				break;
2831 
2832 			case '\0':
2833 				*q++ = '\\';
2834 				*q++ = '0';
2835 				*q++ = '0';
2836 				*q++ = '0';
2837 				break;
2838 
2839 			default:
2840 				if (c == delim_char) {
2841 					*q++ = '\\';
2842 				}
2843 				*q++ = c;
2844 				break;
2845 		}
2846 		p++;
2847 	} while (p != in_str_end);
2848 	*q = '\0';
2849 
2850 	RETURN_NEW_STR(out_str);
2851 }
2852 /* }}} */
2853 
2854 /* {{{ proto array preg_grep(string regex, array input [, int flags])
2855    Searches array and returns entries which match regex */
PHP_FUNCTIONnull2856 static PHP_FUNCTION(preg_grep)
2857 {
2858 	zend_string			*regex;			/* Regular expression */
2859 	zval				*input;			/* Input array */
2860 	zend_long			 flags = 0;		/* Match control flags */
2861 	pcre_cache_entry	*pce;			/* Compiled regular expression */
2862 
2863 	/* Get arguments and do error checking */
2864 	ZEND_PARSE_PARAMETERS_START(2, 3)
2865 		Z_PARAM_STR(regex)
2866 		Z_PARAM_ARRAY(input)
2867 		Z_PARAM_OPTIONAL
2868 		Z_PARAM_LONG(flags)
2869 	ZEND_PARSE_PARAMETERS_END();
2870 
2871 	/* Compile regex or get it from cache. */
2872 	if ((pce = pcre_get_compiled_regex_cache(regex)) == NULL) {
2873 		RETURN_FALSE;
2874 	}
2875 
2876 	pce->refcount++;
2877 	php_pcre_grep_impl(pce, input, return_value, flags);
2878 	pce->refcount--;
2879 }
2880 /* }}} */
2881 
php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags)2882 PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, zend_long flags) /* {{{ */
2883 {
2884 	zval            *entry;             /* An entry in the input array */
2885 	uint32_t		 num_subpats;		/* Number of captured subpatterns */
2886 	int				 count;				/* Count of matched subpatterns */
2887 	uint32_t		 options;			/* Execution options */
2888 	zend_string		*string_key;
2889 	zend_ulong		 num_key;
2890 	zend_bool		 invert;			/* Whether to return non-matching
2891 										   entries */
2892 	pcre2_match_data *match_data;
2893 	invert = flags & PREG_GREP_INVERT ? 1 : 0;
2894 
2895 	/* Calculate the size of the offsets array, and allocate memory for it. */
2896 	num_subpats = pce->capture_count + 1;
2897 
2898 	/* Initialize return array */
2899 	array_init(return_value);
2900 
2901 	PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
2902 
2903 	if (!mdata_used && num_subpats <= PHP_PCRE_PREALLOC_MDATA_SIZE) {
2904 		match_data = mdata;
2905 	} else {
2906 		match_data = pcre2_match_data_create_from_pattern(pce->re, gctx);
2907 		if (!match_data) {
2908 			PCRE_G(error_code) = PHP_PCRE_INTERNAL_ERROR;
2909 			return;
2910 		}
2911 	}
2912 
2913 	options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
2914 
2915 	/* Go through the input array */
2916 	ZEND_HASH_FOREACH_KEY_VAL(Z_ARRVAL_P(input), num_key, string_key, entry) {
2917 		zend_string *tmp_subject_str;
2918 		zend_string *subject_str = zval_get_tmp_string(entry, &tmp_subject_str);
2919 
2920 		/* Perform the match */
2921 #ifdef HAVE_PCRE_JIT_SUPPORT
2922 		if ((pce->preg_options & PREG_JIT) && options) {
2923 			count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2924 					PCRE2_NO_UTF_CHECK, match_data, mctx);
2925 		} else
2926 #endif
2927 		count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), 0,
2928 				options, match_data, mctx);
2929 
2930 		/* If the entry fits our requirements */
2931 		if (count >= 0) {
2932 			/* Check for too many substrings condition. */
2933 			if (UNEXPECTED(count == 0)) {
2934 				php_error_docref(NULL, E_NOTICE, "Matched, but too many substrings");
2935 			}
2936 			if (!invert) {
2937 				Z_TRY_ADDREF_P(entry);
2938 
2939 				/* Add to return array */
2940 				if (string_key) {
2941 					zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2942 				} else {
2943 					zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2944 				}
2945 			}
2946 		} else if (count == PCRE2_ERROR_NOMATCH) {
2947 			if (invert) {
2948 				Z_TRY_ADDREF_P(entry);
2949 
2950 				/* Add to return array */
2951 				if (string_key) {
2952 					zend_hash_update(Z_ARRVAL_P(return_value), string_key, entry);
2953 				} else {
2954 					zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry);
2955 				}
2956 			}
2957 		} else {
2958 			pcre_handle_exec_error(count);
2959 			zend_tmp_string_release(tmp_subject_str);
2960 			break;
2961 		}
2962 
2963 		zend_tmp_string_release(tmp_subject_str);
2964 	} ZEND_HASH_FOREACH_END();
2965 	if (match_data != mdata) {
2966 		pcre2_match_data_free(match_data);
2967 	}
2968 }
2969 /* }}} */
2970 
2971 /* {{{ proto int preg_last_error()
2972    Returns the error code of the last regexp execution. */
PHP_FUNCTIONnull2973 static PHP_FUNCTION(preg_last_error)
2974 {
2975 	ZEND_PARSE_PARAMETERS_NONE();
2976 
2977 	RETURN_LONG(PCRE_G(error_code));
2978 }
2979 /* }}} */
2980 
2981 /* {{{ module definition structures */
2982 
2983 /* {{{ arginfo */
2984 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
2985     ZEND_ARG_INFO(0, pattern)
2986     ZEND_ARG_INFO(0, subject)
2987     ZEND_ARG_INFO(1, subpatterns) /* array */
2988     ZEND_ARG_INFO(0, flags)
2989     ZEND_ARG_INFO(0, offset)
2990 ZEND_END_ARG_INFO()
2991 
2992 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
2993     ZEND_ARG_INFO(0, pattern)
2994     ZEND_ARG_INFO(0, subject)
2995     ZEND_ARG_INFO(1, subpatterns) /* array */
2996     ZEND_ARG_INFO(0, flags)
2997     ZEND_ARG_INFO(0, offset)
2998 ZEND_END_ARG_INFO()
2999 
3000 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
3001     ZEND_ARG_INFO(0, regex)
3002     ZEND_ARG_INFO(0, replace)
3003     ZEND_ARG_INFO(0, subject)
3004     ZEND_ARG_INFO(0, limit)
3005     ZEND_ARG_INFO(1, count)
3006 ZEND_END_ARG_INFO()
3007 
3008 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
3009     ZEND_ARG_INFO(0, regex)
3010     ZEND_ARG_INFO(0, callback)
3011     ZEND_ARG_INFO(0, subject)
3012     ZEND_ARG_INFO(0, limit)
3013     ZEND_ARG_INFO(1, count)
3014     ZEND_ARG_INFO(0, flags)
3015 ZEND_END_ARG_INFO()
3016 
3017 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback_array, 0, 0, 2)
3018     ZEND_ARG_INFO(0, pattern)
3019     ZEND_ARG_INFO(0, subject)
3020     ZEND_ARG_INFO(0, limit)
3021     ZEND_ARG_INFO(1, count)
3022     ZEND_ARG_INFO(0, flags)
3023 ZEND_END_ARG_INFO()
3024 
3025 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
3026     ZEND_ARG_INFO(0, pattern)
3027     ZEND_ARG_INFO(0, subject)
3028     ZEND_ARG_INFO(0, limit)
3029     ZEND_ARG_INFO(0, flags)
3030 ZEND_END_ARG_INFO()
3031 
3032 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
3033     ZEND_ARG_INFO(0, str)
3034     ZEND_ARG_INFO(0, delim_char)
3035 ZEND_END_ARG_INFO()
3036 
3037 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
3038     ZEND_ARG_INFO(0, regex)
3039     ZEND_ARG_INFO(0, input) /* array */
3040     ZEND_ARG_INFO(0, flags)
3041 ZEND_END_ARG_INFO()
3042 
3043 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
3044 ZEND_END_ARG_INFO()
3045 /* }}} */
3046 
3047 static const zend_function_entry pcre_functions[] = {
3048 	PHP_FE(preg_match,					arginfo_preg_match)
3049 	PHP_FE(preg_match_all,				arginfo_preg_match_all)
3050 	PHP_FE(preg_replace,				arginfo_preg_replace)
3051 	PHP_FE(preg_replace_callback,		arginfo_preg_replace_callback)
3052 	PHP_FE(preg_replace_callback_array,	arginfo_preg_replace_callback_array)
3053 	PHP_FE(preg_filter,					arginfo_preg_replace)
3054 	PHP_FE(preg_split,					arginfo_preg_split)
3055 	PHP_FE(preg_quote,					arginfo_preg_quote)
3056 	PHP_FE(preg_grep,					arginfo_preg_grep)
3057 	PHP_FE(preg_last_error,				arginfo_preg_last_error)
3058 	PHP_FE_END
3059 };
3060 
3061 zend_module_entry pcre_module_entry = {
3062 	STANDARD_MODULE_HEADER,
3063    "pcre",
3064 	pcre_functions,
3065 	PHP_MINIT(pcre),
3066 	PHP_MSHUTDOWN(pcre),
3067 	PHP_RINIT(pcre),
3068 	PHP_RSHUTDOWN(pcre),
3069 	PHP_MINFO(pcre),
3070 	PHP_PCRE_VERSION,
3071 	PHP_MODULE_GLOBALS(pcre),
3072 	PHP_GINIT(pcre),
3073 	PHP_GSHUTDOWN(pcre),
3074 	NULL,
3075 	STANDARD_MODULE_PROPERTIES_EX
3076 };
3077 
3078 #ifdef COMPILE_DL_PCRE
ZEND_GET_MODULEnull3079 ZEND_GET_MODULE(pcre)
3080 #endif
3081 
3082 /* }}} */
3083 
3084 PHPAPI pcre2_match_context *php_pcre_mctx(void)
3085 {/*{{{*/
3086 	return mctx;
3087 }/*}}}*/
3088 
php_pcre_gctx(void)3089 PHPAPI pcre2_general_context *php_pcre_gctx(void)
3090 {/*{{{*/
3091 	return gctx;
3092 }/*}}}*/
3093 
php_pcre_cctx(void)3094 PHPAPI pcre2_compile_context *php_pcre_cctx(void)
3095 {/*{{{*/
3096 	return cctx;
3097 }/*}}}*/
3098 
php_pcre_pce_incref(pcre_cache_entry *pce)3099 PHPAPI void php_pcre_pce_incref(pcre_cache_entry *pce)
3100 {/*{{{*/
3101 	assert(NULL != pce);
3102 	pce->refcount++;
3103 }/*}}}*/
3104 
php_pcre_pce_decref(pcre_cache_entry *pce)3105 PHPAPI void php_pcre_pce_decref(pcre_cache_entry *pce)
3106 {/*{{{*/
3107 	assert(NULL != pce);
3108 	assert(0 != pce->refcount);
3109 	pce->refcount--;
3110 }/*}}}*/
3111 
php_pcre_pce_re(pcre_cache_entry *pce)3112 PHPAPI pcre2_code *php_pcre_pce_re(pcre_cache_entry *pce)
3113 {/*{{{*/
3114 	assert(NULL != pce);
3115 	return pce->re;
3116 }/*}}}*/
3117