diff options
Diffstat (limited to 'gl/regex_internal.c')
-rw-r--r-- | gl/regex_internal.c | 99 |
1 files changed, 49 insertions, 50 deletions
diff --git a/gl/regex_internal.c b/gl/regex_internal.c index 98b8d5d2..899b0ae6 100644 --- a/gl/regex_internal.c +++ b/gl/regex_internal.c | |||
@@ -1,22 +1,21 @@ | |||
1 | /* Extended regular expression matching and search library. | 1 | /* Extended regular expression matching and search library. |
2 | Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free | 2 | Copyright (C) 2002-2013 Free Software Foundation, Inc. |
3 | Software Foundation, Inc. | ||
4 | This file is part of the GNU C Library. | 3 | This file is part of the GNU C Library. |
5 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. | 4 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. |
6 | 5 | ||
7 | This program is free software; you can redistribute it and/or modify | 6 | The GNU C Library is free software; you can redistribute it and/or |
8 | it under the terms of the GNU General Public License as published by | 7 | modify it under the terms of the GNU General Public |
9 | the Free Software Foundation; either version 3, or (at your option) | 8 | License as published by the Free Software Foundation; either |
10 | any later version. | 9 | version 3 of the License, or (at your option) any later version. |
11 | 10 | ||
12 | This program is distributed in the hope that it will be useful, | 11 | The GNU C Library is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | GNU General Public License for more details. | 14 | General Public License for more details. |
16 | 15 | ||
17 | You should have received a copy of the GNU General Public License along | 16 | You should have received a copy of the GNU General Public |
18 | with this program; if not, write to the Free Software Foundation, | 17 | License along with the GNU C Library; if not, see |
19 | Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ | 18 | <http://www.gnu.org/licenses/>. */ |
20 | 19 | ||
21 | static void re_string_construct_common (const char *str, Idx len, | 20 | static void re_string_construct_common (const char *str, Idx len, |
22 | re_string_t *pstr, | 21 | re_string_t *pstr, |
@@ -135,9 +134,9 @@ re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len) | |||
135 | { | 134 | { |
136 | wint_t *new_wcs; | 135 | wint_t *new_wcs; |
137 | 136 | ||
138 | /* Avoid overflow. */ | 137 | /* Avoid overflow in realloc. */ |
139 | size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx)); | 138 | const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx)); |
140 | if (BE (SIZE_MAX / max_object_size < new_buf_len, 0)) | 139 | if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0)) |
141 | return REG_ESPACE; | 140 | return REG_ESPACE; |
142 | 141 | ||
143 | new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); | 142 | new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); |
@@ -237,13 +236,8 @@ build_wcs_buffer (re_string_t *pstr) | |||
237 | else | 236 | else |
238 | p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; | 237 | p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; |
239 | mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); | 238 | mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); |
240 | if (BE (mbclen == (size_t) -2, 0)) | 239 | if (BE (mbclen == (size_t) -1 || mbclen == 0 |
241 | { | 240 | || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0)) |
242 | /* The buffer doesn't have enough space, finish to build. */ | ||
243 | pstr->cur_state = prev_st; | ||
244 | break; | ||
245 | } | ||
246 | else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) | ||
247 | { | 241 | { |
248 | /* We treat these cases as a singlebyte character. */ | 242 | /* We treat these cases as a singlebyte character. */ |
249 | mbclen = 1; | 243 | mbclen = 1; |
@@ -252,6 +246,12 @@ build_wcs_buffer (re_string_t *pstr) | |||
252 | wc = pstr->trans[wc]; | 246 | wc = pstr->trans[wc]; |
253 | pstr->cur_state = prev_st; | 247 | pstr->cur_state = prev_st; |
254 | } | 248 | } |
249 | else if (BE (mbclen == (size_t) -2, 0)) | ||
250 | { | ||
251 | /* The buffer doesn't have enough space, finish to build. */ | ||
252 | pstr->cur_state = prev_st; | ||
253 | break; | ||
254 | } | ||
255 | 255 | ||
256 | /* Write wide character and padding. */ | 256 | /* Write wide character and padding. */ |
257 | pstr->wcs[byte_idx++] = wc; | 257 | pstr->wcs[byte_idx++] = wc; |
@@ -334,9 +334,11 @@ build_wcs_upper_buffer (re_string_t *pstr) | |||
334 | for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) | 334 | for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) |
335 | pstr->wcs[byte_idx++] = WEOF; | 335 | pstr->wcs[byte_idx++] = WEOF; |
336 | } | 336 | } |
337 | else if (mbclen == (size_t) -1 || mbclen == 0) | 337 | else if (mbclen == (size_t) -1 || mbclen == 0 |
338 | || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) | ||
338 | { | 339 | { |
339 | /* It is an invalid character or '\0'. Just use the byte. */ | 340 | /* It is an invalid character, an incomplete character |
341 | at the end of the string, or '\0'. Just use the byte. */ | ||
340 | int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; | 342 | int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; |
341 | pstr->mbs[byte_idx] = ch; | 343 | pstr->mbs[byte_idx] = ch; |
342 | /* And also cast it to wide char. */ | 344 | /* And also cast it to wide char. */ |
@@ -449,7 +451,8 @@ build_wcs_upper_buffer (re_string_t *pstr) | |||
449 | for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) | 451 | for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) |
450 | pstr->wcs[byte_idx++] = WEOF; | 452 | pstr->wcs[byte_idx++] = WEOF; |
451 | } | 453 | } |
452 | else if (mbclen == (size_t) -1 || mbclen == 0) | 454 | else if (mbclen == (size_t) -1 || mbclen == 0 |
455 | || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) | ||
453 | { | 456 | { |
454 | /* It is an invalid character or '\0'. Just use the byte. */ | 457 | /* It is an invalid character or '\0'. Just use the byte. */ |
455 | int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; | 458 | int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; |
@@ -496,8 +499,7 @@ re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc) | |||
496 | rawbuf_idx < new_raw_idx;) | 499 | rawbuf_idx < new_raw_idx;) |
497 | { | 500 | { |
498 | wchar_t wc2; | 501 | wchar_t wc2; |
499 | Idx remain_len; | 502 | Idx remain_len = pstr->raw_len - rawbuf_idx; |
500 | remain_len = pstr->len - rawbuf_idx; | ||
501 | prev_st = pstr->cur_state; | 503 | prev_st = pstr->cur_state; |
502 | mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx, | 504 | mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx, |
503 | remain_len, &pstr->cur_state); | 505 | remain_len, &pstr->cur_state); |
@@ -733,21 +735,21 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) | |||
733 | mbstate_t cur_state; | 735 | mbstate_t cur_state; |
734 | wchar_t wc2; | 736 | wchar_t wc2; |
735 | Idx mlen = raw + pstr->len - p; | 737 | Idx mlen = raw + pstr->len - p; |
738 | unsigned char buf[6]; | ||
736 | size_t mbclen; | 739 | size_t mbclen; |
737 | 740 | ||
738 | #if 0 /* dead code: buf is set but never used */ | 741 | const unsigned char *pp = p; |
739 | unsigned char buf[6]; | ||
740 | if (BE (pstr->trans != NULL, 0)) | 742 | if (BE (pstr->trans != NULL, 0)) |
741 | { | 743 | { |
742 | int i = mlen < 6 ? mlen : 6; | 744 | int i = mlen < 6 ? mlen : 6; |
743 | while (--i >= 0) | 745 | while (--i >= 0) |
744 | buf[i] = pstr->trans[p[i]]; | 746 | buf[i] = pstr->trans[p[i]]; |
747 | pp = buf; | ||
745 | } | 748 | } |
746 | #endif | ||
747 | /* XXX Don't use mbrtowc, we know which conversion | 749 | /* XXX Don't use mbrtowc, we know which conversion |
748 | to use (UTF-8 -> UCS4). */ | 750 | to use (UTF-8 -> UCS4). */ |
749 | memset (&cur_state, 0, sizeof (cur_state)); | 751 | memset (&cur_state, 0, sizeof (cur_state)); |
750 | mbclen = __mbrtowc (&wc2, (const char *) p, mlen, | 752 | mbclen = __mbrtowc (&wc2, (const char *) pp, mlen, |
751 | &cur_state); | 753 | &cur_state); |
752 | if (raw + offset - p <= mbclen | 754 | if (raw + offset - p <= mbclen |
753 | && mbclen < (size_t) -2) | 755 | && mbclen < (size_t) -2) |
@@ -832,7 +834,7 @@ re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags) | |||
832 | } | 834 | } |
833 | 835 | ||
834 | static unsigned char | 836 | static unsigned char |
835 | internal_function __attribute ((pure)) | 837 | internal_function __attribute__ ((pure)) |
836 | re_string_peek_byte_case (const re_string_t *pstr, Idx idx) | 838 | re_string_peek_byte_case (const re_string_t *pstr, Idx idx) |
837 | { | 839 | { |
838 | int ch; | 840 | int ch; |
@@ -869,7 +871,7 @@ re_string_peek_byte_case (const re_string_t *pstr, Idx idx) | |||
869 | } | 871 | } |
870 | 872 | ||
871 | static unsigned char | 873 | static unsigned char |
872 | internal_function __attribute ((pure)) | 874 | internal_function |
873 | re_string_fetch_byte_case (re_string_t *pstr) | 875 | re_string_fetch_byte_case (re_string_t *pstr) |
874 | { | 876 | { |
875 | if (BE (!pstr->mbs_allocated, 1)) | 877 | if (BE (!pstr->mbs_allocated, 1)) |
@@ -972,7 +974,7 @@ re_node_set_alloc (re_node_set *set, Idx size) | |||
972 | set->alloc = size; | 974 | set->alloc = size; |
973 | set->nelem = 0; | 975 | set->nelem = 0; |
974 | set->elems = re_malloc (Idx, size); | 976 | set->elems = re_malloc (Idx, size); |
975 | if (BE (set->elems == NULL, 0)) | 977 | if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL || size != 0)) |
976 | return REG_ESPACE; | 978 | return REG_ESPACE; |
977 | return REG_NOERROR; | 979 | return REG_NOERROR; |
978 | } | 980 | } |
@@ -1352,7 +1354,7 @@ re_node_set_insert_last (re_node_set *set, Idx elem) | |||
1352 | Return true if SET1 and SET2 are equivalent. */ | 1354 | Return true if SET1 and SET2 are equivalent. */ |
1353 | 1355 | ||
1354 | static bool | 1356 | static bool |
1355 | internal_function __attribute ((pure)) | 1357 | internal_function __attribute__ ((pure)) |
1356 | re_node_set_compare (const re_node_set *set1, const re_node_set *set2) | 1358 | re_node_set_compare (const re_node_set *set1, const re_node_set *set2) |
1357 | { | 1359 | { |
1358 | Idx i; | 1360 | Idx i; |
@@ -1367,7 +1369,7 @@ re_node_set_compare (const re_node_set *set1, const re_node_set *set2) | |||
1367 | /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ | 1369 | /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ |
1368 | 1370 | ||
1369 | static Idx | 1371 | static Idx |
1370 | internal_function __attribute ((pure)) | 1372 | internal_function __attribute__ ((pure)) |
1371 | re_node_set_contains (const re_node_set *set, Idx elem) | 1373 | re_node_set_contains (const re_node_set *set, Idx elem) |
1372 | { | 1374 | { |
1373 | __re_size_t idx, right, mid; | 1375 | __re_size_t idx, right, mid; |
@@ -1413,13 +1415,12 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token) | |||
1413 | Idx *new_nexts, *new_indices; | 1415 | Idx *new_nexts, *new_indices; |
1414 | re_node_set *new_edests, *new_eclosures; | 1416 | re_node_set *new_edests, *new_eclosures; |
1415 | re_token_t *new_nodes; | 1417 | re_token_t *new_nodes; |
1416 | size_t max_object_size = | ||
1417 | MAX (sizeof (re_token_t), | ||
1418 | MAX (sizeof (re_node_set), | ||
1419 | sizeof (Idx))); | ||
1420 | 1418 | ||
1421 | /* Avoid overflows. */ | 1419 | /* Avoid overflows in realloc. */ |
1422 | if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0)) | 1420 | const size_t max_object_size = MAX (sizeof (re_token_t), |
1421 | MAX (sizeof (re_node_set), | ||
1422 | sizeof (Idx))); | ||
1423 | if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0)) | ||
1423 | return REG_MISSING; | 1424 | return REG_MISSING; |
1424 | 1425 | ||
1425 | new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); | 1426 | new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); |
@@ -1442,11 +1443,9 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token) | |||
1442 | dfa->nodes[dfa->nodes_len] = token; | 1443 | dfa->nodes[dfa->nodes_len] = token; |
1443 | dfa->nodes[dfa->nodes_len].constraint = 0; | 1444 | dfa->nodes[dfa->nodes_len].constraint = 0; |
1444 | #ifdef RE_ENABLE_I18N | 1445 | #ifdef RE_ENABLE_I18N |
1445 | { | ||
1446 | int type = token.type; | ||
1447 | dfa->nodes[dfa->nodes_len].accept_mb = | 1446 | dfa->nodes[dfa->nodes_len].accept_mb = |
1448 | (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; | 1447 | ((token.type == OP_PERIOD && dfa->mb_cur_max > 1) |
1449 | } | 1448 | || token.type == COMPLEX_BRACKET); |
1450 | #endif | 1449 | #endif |
1451 | dfa->nexts[dfa->nodes_len] = REG_MISSING; | 1450 | dfa->nexts[dfa->nodes_len] = REG_MISSING; |
1452 | re_node_set_init_empty (dfa->edests + dfa->nodes_len); | 1451 | re_node_set_init_empty (dfa->edests + dfa->nodes_len); |
@@ -1454,7 +1453,7 @@ re_dfa_add_node (re_dfa_t *dfa, re_token_t token) | |||
1454 | return dfa->nodes_len++; | 1453 | return dfa->nodes_len++; |
1455 | } | 1454 | } |
1456 | 1455 | ||
1457 | static inline re_hashval_t | 1456 | static re_hashval_t |
1458 | internal_function | 1457 | internal_function |
1459 | calc_state_hash (const re_node_set *nodes, unsigned int context) | 1458 | calc_state_hash (const re_node_set *nodes, unsigned int context) |
1460 | { | 1459 | { |
@@ -1551,7 +1550,7 @@ re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa, | |||
1551 | && re_node_set_compare (state->entrance_nodes, nodes)) | 1550 | && re_node_set_compare (state->entrance_nodes, nodes)) |
1552 | return state; | 1551 | return state; |
1553 | } | 1552 | } |
1554 | /* There are no appropriate state in `dfa', create the new one. */ | 1553 | /* There are no appropriate state in 'dfa', create the new one. */ |
1555 | new_state = create_cd_newstate (dfa, nodes, context, hash); | 1554 | new_state = create_cd_newstate (dfa, nodes, context, hash); |
1556 | if (BE (new_state == NULL, 0)) | 1555 | if (BE (new_state == NULL, 0)) |
1557 | *err = REG_ESPACE; | 1556 | *err = REG_ESPACE; |
@@ -1580,7 +1579,7 @@ register_state (const re_dfa_t *dfa, re_dfastate_t *newstate, | |||
1580 | { | 1579 | { |
1581 | Idx elem = newstate->nodes.elems[i]; | 1580 | Idx elem = newstate->nodes.elems[i]; |
1582 | if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) | 1581 | if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) |
1583 | if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0)) | 1582 | if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem)) |
1584 | return REG_ESPACE; | 1583 | return REG_ESPACE; |
1585 | } | 1584 | } |
1586 | 1585 | ||
@@ -1615,7 +1614,7 @@ free_state (re_dfastate_t *state) | |||
1615 | re_free (state); | 1614 | re_free (state); |
1616 | } | 1615 | } |
1617 | 1616 | ||
1618 | /* Create the new state which is independ of contexts. | 1617 | /* Create the new state which is independent of contexts. |
1619 | Return the new state if succeeded, otherwise return NULL. */ | 1618 | Return the new state if succeeded, otherwise return NULL. */ |
1620 | 1619 | ||
1621 | static re_dfastate_t * | 1620 | static re_dfastate_t * |