diff options
Diffstat (limited to 'gl/regcomp.c')
-rw-r--r-- | gl/regcomp.c | 172 |
1 files changed, 99 insertions, 73 deletions
diff --git a/gl/regcomp.c b/gl/regcomp.c index b114b4d1..86ca02b0 100644 --- a/gl/regcomp.c +++ b/gl/regcomp.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* Extended regular expression matching and search library. | 1 | /* Extended regular expression matching and search library. |
2 | Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009 | 2 | Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free |
3 | Free Software Foundation, Inc. | 3 | Software Foundation, Inc. |
4 | This file is part of the GNU C Library. | 4 | This file is part of the GNU C Library. |
5 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. | 5 | Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. |
6 | 6 | ||
@@ -383,7 +383,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state, | |||
383 | applies to multibyte character sets; for single byte character | 383 | applies to multibyte character sets; for single byte character |
384 | sets, the SIMPLE_BRACKET again suffices. */ | 384 | sets, the SIMPLE_BRACKET again suffices. */ |
385 | if (dfa->mb_cur_max > 1 | 385 | if (dfa->mb_cur_max > 1 |
386 | && (cset->nchar_classes || cset->non_match | 386 | && (cset->nchar_classes || cset->non_match || cset->nranges |
387 | # ifdef _LIBC | 387 | # ifdef _LIBC |
388 | || cset->nequiv_classes | 388 | || cset->nequiv_classes |
389 | # endif /* _LIBC */ | 389 | # endif /* _LIBC */ |
@@ -636,7 +636,7 @@ free_dfa_content (re_dfa_t *dfa) | |||
636 | re_dfastate_t *state = entry->array[j]; | 636 | re_dfastate_t *state = entry->array[j]; |
637 | free_state (state); | 637 | free_state (state); |
638 | } | 638 | } |
639 | re_free (entry->array); | 639 | re_free (entry->array); |
640 | } | 640 | } |
641 | re_free (dfa->state_table); | 641 | re_free (dfa->state_table); |
642 | #ifdef RE_ENABLE_I18N | 642 | #ifdef RE_ENABLE_I18N |
@@ -850,6 +850,9 @@ static reg_errcode_t | |||
850 | init_dfa (re_dfa_t *dfa, size_t pat_len) | 850 | init_dfa (re_dfa_t *dfa, size_t pat_len) |
851 | { | 851 | { |
852 | __re_size_t table_size; | 852 | __re_size_t table_size; |
853 | #ifndef _LIBC | ||
854 | char *codeset_name; | ||
855 | #endif | ||
853 | #ifdef RE_ENABLE_I18N | 856 | #ifdef RE_ENABLE_I18N |
854 | size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); | 857 | size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); |
855 | #else | 858 | #else |
@@ -893,7 +896,9 @@ init_dfa (re_dfa_t *dfa, size_t pat_len) | |||
893 | dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) | 896 | dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) |
894 | != 0); | 897 | != 0); |
895 | #else | 898 | #else |
896 | if (strcmp (locale_charset (), "UTF-8") == 0) | 899 | codeset_name = nl_langinfo (CODESET); |
900 | if (strcasecmp (codeset_name, "UTF-8") == 0 | ||
901 | || strcasecmp (codeset_name, "UTF8") == 0) | ||
897 | dfa->is_utf8 = 1; | 902 | dfa->is_utf8 = 1; |
898 | 903 | ||
899 | /* We check exhaustively in the loop below if this charset is a | 904 | /* We check exhaustively in the loop below if this charset is a |
@@ -1016,7 +1021,10 @@ create_initial_state (re_dfa_t *dfa) | |||
1016 | Idx dest_idx = dfa->edests[node_idx].elems[0]; | 1021 | Idx dest_idx = dfa->edests[node_idx].elems[0]; |
1017 | if (!re_node_set_contains (&init_nodes, dest_idx)) | 1022 | if (!re_node_set_contains (&init_nodes, dest_idx)) |
1018 | { | 1023 | { |
1019 | re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); | 1024 | reg_errcode_t merge_err |
1025 | = re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); | ||
1026 | if (merge_err != REG_NOERROR) | ||
1027 | return merge_err; | ||
1020 | i = 0; | 1028 | i = 0; |
1021 | } | 1029 | } |
1022 | } | 1030 | } |
@@ -1085,8 +1093,8 @@ optimize_utf8 (re_dfa_t *dfa) | |||
1085 | } | 1093 | } |
1086 | break; | 1094 | break; |
1087 | case OP_PERIOD: | 1095 | case OP_PERIOD: |
1088 | has_period = true; | 1096 | has_period = true; |
1089 | break; | 1097 | break; |
1090 | case OP_BACK_REF: | 1098 | case OP_BACK_REF: |
1091 | case OP_ALT: | 1099 | case OP_ALT: |
1092 | case END_OF_RE: | 1100 | case END_OF_RE: |
@@ -1187,7 +1195,7 @@ analyze (regex_t *preg) | |||
1187 | { | 1195 | { |
1188 | dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); | 1196 | dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); |
1189 | if (BE (dfa->inveclosures == NULL, 0)) | 1197 | if (BE (dfa->inveclosures == NULL, 0)) |
1190 | return REG_ESPACE; | 1198 | return REG_ESPACE; |
1191 | ret = calc_inveclosure (dfa); | 1199 | ret = calc_inveclosure (dfa); |
1192 | } | 1200 | } |
1193 | 1201 | ||
@@ -1209,16 +1217,16 @@ postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), | |||
1209 | if that's the only child). */ | 1217 | if that's the only child). */ |
1210 | while (node->left || node->right) | 1218 | while (node->left || node->right) |
1211 | if (node->left) | 1219 | if (node->left) |
1212 | node = node->left; | 1220 | node = node->left; |
1213 | else | 1221 | else |
1214 | node = node->right; | 1222 | node = node->right; |
1215 | 1223 | ||
1216 | do | 1224 | do |
1217 | { | 1225 | { |
1218 | reg_errcode_t err = fn (extra, node); | 1226 | reg_errcode_t err = fn (extra, node); |
1219 | if (BE (err != REG_NOERROR, 0)) | 1227 | if (BE (err != REG_NOERROR, 0)) |
1220 | return err; | 1228 | return err; |
1221 | if (node->parent == NULL) | 1229 | if (node->parent == NULL) |
1222 | return REG_NOERROR; | 1230 | return REG_NOERROR; |
1223 | prev = node; | 1231 | prev = node; |
1224 | node = node->parent; | 1232 | node = node->parent; |
@@ -1252,7 +1260,7 @@ preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)), | |||
1252 | prev = node; | 1260 | prev = node; |
1253 | node = node->parent; | 1261 | node = node->parent; |
1254 | if (!node) | 1262 | if (!node) |
1255 | return REG_NOERROR; | 1263 | return REG_NOERROR; |
1256 | } | 1264 | } |
1257 | node = node->right; | 1265 | node = node->right; |
1258 | } | 1266 | } |
@@ -1275,13 +1283,13 @@ optimize_subexps (void *extra, bin_tree_t *node) | |||
1275 | } | 1283 | } |
1276 | 1284 | ||
1277 | else if (node->token.type == SUBEXP | 1285 | else if (node->token.type == SUBEXP |
1278 | && node->left && node->left->token.type == SUBEXP) | 1286 | && node->left && node->left->token.type == SUBEXP) |
1279 | { | 1287 | { |
1280 | Idx other_idx = node->left->token.opr.idx; | 1288 | Idx other_idx = node->left->token.opr.idx; |
1281 | 1289 | ||
1282 | node->left = node->left->left; | 1290 | node->left = node->left->left; |
1283 | if (node->left) | 1291 | if (node->left) |
1284 | node->left->parent = node; | 1292 | node->left->parent = node; |
1285 | 1293 | ||
1286 | dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; | 1294 | dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; |
1287 | if (other_idx < BITSET_WORD_BITS) | 1295 | if (other_idx < BITSET_WORD_BITS) |
@@ -1366,9 +1374,9 @@ calc_first (void *extra, bin_tree_t *node) | |||
1366 | node->first = node; | 1374 | node->first = node; |
1367 | node->node_idx = re_dfa_add_node (dfa, node->token); | 1375 | node->node_idx = re_dfa_add_node (dfa, node->token); |
1368 | if (BE (node->node_idx == REG_MISSING, 0)) | 1376 | if (BE (node->node_idx == REG_MISSING, 0)) |
1369 | return REG_ESPACE; | 1377 | return REG_ESPACE; |
1370 | if (node->token.type == ANCHOR) | 1378 | if (node->token.type == ANCHOR) |
1371 | dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; | 1379 | dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; |
1372 | } | 1380 | } |
1373 | return REG_NOERROR; | 1381 | return REG_NOERROR; |
1374 | } | 1382 | } |
@@ -1390,7 +1398,7 @@ calc_next (void *extra, bin_tree_t *node) | |||
1390 | if (node->left) | 1398 | if (node->left) |
1391 | node->left->next = node->next; | 1399 | node->left->next = node->next; |
1392 | if (node->right) | 1400 | if (node->right) |
1393 | node->right->next = node->next; | 1401 | node->right->next = node->next; |
1394 | break; | 1402 | break; |
1395 | } | 1403 | } |
1396 | return REG_NOERROR; | 1404 | return REG_NOERROR; |
@@ -1441,7 +1449,7 @@ link_nfa_nodes (void *extra, bin_tree_t *node) | |||
1441 | case OP_BACK_REF: | 1449 | case OP_BACK_REF: |
1442 | dfa->nexts[idx] = node->next->node_idx; | 1450 | dfa->nexts[idx] = node->next->node_idx; |
1443 | if (node->token.type == OP_BACK_REF) | 1451 | if (node->token.type == OP_BACK_REF) |
1444 | re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); | 1452 | err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); |
1445 | break; | 1453 | break; |
1446 | 1454 | ||
1447 | default: | 1455 | default: |
@@ -1498,7 +1506,6 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, | |||
1498 | destination. */ | 1506 | destination. */ |
1499 | org_dest = dfa->edests[org_node].elems[0]; | 1507 | org_dest = dfa->edests[org_node].elems[0]; |
1500 | re_node_set_empty (dfa->edests + clone_node); | 1508 | re_node_set_empty (dfa->edests + clone_node); |
1501 | clone_dest = search_duplicated_node (dfa, org_dest, constraint); | ||
1502 | /* If the node is root_node itself, it means the epsilon closure | 1509 | /* If the node is root_node itself, it means the epsilon closure |
1503 | has a loop. Then tie it to the destination of the root_node. */ | 1510 | has a loop. Then tie it to the destination of the root_node. */ |
1504 | if (org_node == root_node && clone_node != org_node) | 1511 | if (org_node == root_node && clone_node != org_node) |
@@ -1542,7 +1549,7 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node, | |||
1542 | } | 1549 | } |
1543 | else | 1550 | else |
1544 | { | 1551 | { |
1545 | /* There is a duplicated node which satisfy the constraint, | 1552 | /* There is a duplicated node which satisfies the constraint, |
1546 | use it to avoid infinite loop. */ | 1553 | use it to avoid infinite loop. */ |
1547 | ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); | 1554 | ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); |
1548 | if (BE (! ok, 0)) | 1555 | if (BE (! ok, 0)) |
@@ -1674,10 +1681,9 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) | |||
1674 | { | 1681 | { |
1675 | reg_errcode_t err; | 1682 | reg_errcode_t err; |
1676 | Idx i; | 1683 | Idx i; |
1677 | bool incomplete; | ||
1678 | bool ok; | ||
1679 | re_node_set eclosure; | 1684 | re_node_set eclosure; |
1680 | incomplete = false; | 1685 | bool ok; |
1686 | bool incomplete = false; | ||
1681 | err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); | 1687 | err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); |
1682 | if (BE (err != REG_NOERROR, 0)) | 1688 | if (BE (err != REG_NOERROR, 0)) |
1683 | return err; | 1689 | return err; |
@@ -1722,7 +1728,9 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) | |||
1722 | else | 1728 | else |
1723 | eclosure_elem = dfa->eclosures[edest]; | 1729 | eclosure_elem = dfa->eclosures[edest]; |
1724 | /* Merge the epsilon closure of `edest'. */ | 1730 | /* Merge the epsilon closure of `edest'. */ |
1725 | re_node_set_merge (&eclosure, &eclosure_elem); | 1731 | err = re_node_set_merge (&eclosure, &eclosure_elem); |
1732 | if (BE (err != REG_NOERROR, 0)) | ||
1733 | return err; | ||
1726 | /* If the epsilon closure of `edest' is incomplete, | 1734 | /* If the epsilon closure of `edest' is incomplete, |
1727 | the epsilon closure of this node is also incomplete. */ | 1735 | the epsilon closure of this node is also incomplete. */ |
1728 | if (dfa->eclosures[edest].nelem == 0) | 1736 | if (dfa->eclosures[edest].nelem == 0) |
@@ -1732,7 +1740,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root) | |||
1732 | } | 1740 | } |
1733 | } | 1741 | } |
1734 | 1742 | ||
1735 | /* Epsilon closures include itself. */ | 1743 | /* An epsilon closure includes itself. */ |
1736 | ok = re_node_set_insert (&eclosure, node); | 1744 | ok = re_node_set_insert (&eclosure, node); |
1737 | if (BE (! ok, 0)) | 1745 | if (BE (! ok, 0)) |
1738 | return REG_ESPACE; | 1746 | return REG_ESPACE; |
@@ -2319,7 +2327,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, | |||
2319 | && dfa->word_ops_used == 0) | 2327 | && dfa->word_ops_used == 0) |
2320 | init_word_char (dfa); | 2328 | init_word_char (dfa); |
2321 | if (token->opr.ctx_type == WORD_DELIM | 2329 | if (token->opr.ctx_type == WORD_DELIM |
2322 | || token->opr.ctx_type == NOT_WORD_DELIM) | 2330 | || token->opr.ctx_type == NOT_WORD_DELIM) |
2323 | { | 2331 | { |
2324 | bin_tree_t *tree_first, *tree_last; | 2332 | bin_tree_t *tree_first, *tree_last; |
2325 | if (token->opr.ctx_type == WORD_DELIM) | 2333 | if (token->opr.ctx_type == WORD_DELIM) |
@@ -2327,13 +2335,13 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token, | |||
2327 | token->opr.ctx_type = WORD_FIRST; | 2335 | token->opr.ctx_type = WORD_FIRST; |
2328 | tree_first = create_token_tree (dfa, NULL, NULL, token); | 2336 | tree_first = create_token_tree (dfa, NULL, NULL, token); |
2329 | token->opr.ctx_type = WORD_LAST; | 2337 | token->opr.ctx_type = WORD_LAST; |
2330 | } | 2338 | } |
2331 | else | 2339 | else |
2332 | { | 2340 | { |
2333 | token->opr.ctx_type = INSIDE_WORD; | 2341 | token->opr.ctx_type = INSIDE_WORD; |
2334 | tree_first = create_token_tree (dfa, NULL, NULL, token); | 2342 | tree_first = create_token_tree (dfa, NULL, NULL, token); |
2335 | token->opr.ctx_type = INSIDE_NOTWORD; | 2343 | token->opr.ctx_type = INSIDE_NOTWORD; |
2336 | } | 2344 | } |
2337 | tree_last = create_token_tree (dfa, NULL, NULL, token); | 2345 | tree_last = create_token_tree (dfa, NULL, NULL, token); |
2338 | tree = create_tree (dfa, tree_first, tree_last, OP_ALT); | 2346 | tree = create_tree (dfa, tree_first, tree_last, OP_ALT); |
2339 | if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) | 2347 | if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) |
@@ -2444,7 +2452,7 @@ parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token, | |||
2444 | { | 2452 | { |
2445 | tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); | 2453 | tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); |
2446 | if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) | 2454 | if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) |
2447 | *err = REG_EPAREN; | 2455 | *err = REG_EPAREN; |
2448 | if (BE (*err != REG_NOERROR, 0)) | 2456 | if (BE (*err != REG_NOERROR, 0)) |
2449 | return NULL; | 2457 | return NULL; |
2450 | } | 2458 | } |
@@ -2515,7 +2523,8 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, | |||
2515 | return elem; | 2523 | return elem; |
2516 | } | 2524 | } |
2517 | 2525 | ||
2518 | if (BE (end != REG_MISSING && start > end, 0)) | 2526 | if (BE ((end != REG_MISSING && start > end) |
2527 | || token->type != OP_CLOSE_DUP_NUM, 0)) | ||
2519 | { | 2528 | { |
2520 | /* First number greater than second. */ | 2529 | /* First number greater than second. */ |
2521 | *err = REG_BADBR; | 2530 | *err = REG_BADBR; |
@@ -2568,10 +2577,14 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, | |||
2568 | if (BE (tree == NULL, 0)) | 2577 | if (BE (tree == NULL, 0)) |
2569 | goto parse_dup_op_espace; | 2578 | goto parse_dup_op_espace; |
2570 | 2579 | ||
2580 | /* From gnulib's "intprops.h": | ||
2581 | True if the arithmetic type T is signed. */ | ||
2582 | #define TYPE_SIGNED(t) (! ((t) 0 < (t) -1)) | ||
2583 | |||
2571 | /* This loop is actually executed only when end != REG_MISSING, | 2584 | /* This loop is actually executed only when end != REG_MISSING, |
2572 | to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have | 2585 | to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have |
2573 | already created the start+1-th copy. */ | 2586 | already created the start+1-th copy. */ |
2574 | if ((Idx) -1 < 0 || end != REG_MISSING) | 2587 | if (TYPE_SIGNED (Idx) || end != REG_MISSING) |
2575 | for (i = start + 2; i <= end; ++i) | 2588 | for (i = start + 2; i <= end; ++i) |
2576 | { | 2589 | { |
2577 | elem = duplicate_tree (elem, dfa); | 2590 | elem = duplicate_tree (elem, dfa); |
@@ -2609,11 +2622,17 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa, | |||
2609 | static reg_errcode_t | 2622 | static reg_errcode_t |
2610 | internal_function | 2623 | internal_function |
2611 | # ifdef RE_ENABLE_I18N | 2624 | # ifdef RE_ENABLE_I18N |
2612 | build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc, | 2625 | build_range_exp (const reg_syntax_t syntax, |
2613 | bracket_elem_t *start_elem, bracket_elem_t *end_elem) | 2626 | bitset_t sbcset, |
2627 | re_charset_t *mbcset, | ||
2628 | Idx *range_alloc, | ||
2629 | const bracket_elem_t *start_elem, | ||
2630 | const bracket_elem_t *end_elem) | ||
2614 | # else /* not RE_ENABLE_I18N */ | 2631 | # else /* not RE_ENABLE_I18N */ |
2615 | build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, | 2632 | build_range_exp (const reg_syntax_t syntax, |
2616 | bracket_elem_t *end_elem) | 2633 | bitset_t sbcset, |
2634 | const bracket_elem_t *start_elem, | ||
2635 | const bracket_elem_t *end_elem) | ||
2617 | # endif /* not RE_ENABLE_I18N */ | 2636 | # endif /* not RE_ENABLE_I18N */ |
2618 | { | 2637 | { |
2619 | unsigned int start_ch, end_ch; | 2638 | unsigned int start_ch, end_ch; |
@@ -2652,7 +2671,9 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, | |||
2652 | return REG_ECOLLATE; | 2671 | return REG_ECOLLATE; |
2653 | cmp_buf[0] = start_wc; | 2672 | cmp_buf[0] = start_wc; |
2654 | cmp_buf[4] = end_wc; | 2673 | cmp_buf[4] = end_wc; |
2655 | if (wcscoll (cmp_buf, cmp_buf + 4) > 0) | 2674 | |
2675 | if (BE ((syntax & RE_NO_EMPTY_RANGES) | ||
2676 | && wcscoll (cmp_buf, cmp_buf + 4) > 0, 0)) | ||
2656 | return REG_ERANGE; | 2677 | return REG_ERANGE; |
2657 | 2678 | ||
2658 | /* Got valid collation sequence values, add them as a new entry. | 2679 | /* Got valid collation sequence values, add them as a new entry. |
@@ -2662,9 +2683,9 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, | |||
2662 | no MBCSET if dfa->mb_cur_max == 1. */ | 2683 | no MBCSET if dfa->mb_cur_max == 1. */ |
2663 | if (mbcset) | 2684 | if (mbcset) |
2664 | { | 2685 | { |
2665 | /* Check the space of the arrays. */ | 2686 | /* Check the space of the arrays. */ |
2666 | if (BE (*range_alloc == mbcset->nranges, 0)) | 2687 | if (BE (*range_alloc == mbcset->nranges, 0)) |
2667 | { | 2688 | { |
2668 | /* There is not enough space, need realloc. */ | 2689 | /* There is not enough space, need realloc. */ |
2669 | wchar_t *new_array_start, *new_array_end; | 2690 | wchar_t *new_array_start, *new_array_end; |
2670 | Idx new_nranges; | 2691 | Idx new_nranges; |
@@ -2674,9 +2695,9 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, | |||
2674 | /* Use realloc since mbcset->range_starts and mbcset->range_ends | 2695 | /* Use realloc since mbcset->range_starts and mbcset->range_ends |
2675 | are NULL if *range_alloc == 0. */ | 2696 | are NULL if *range_alloc == 0. */ |
2676 | new_array_start = re_realloc (mbcset->range_starts, wchar_t, | 2697 | new_array_start = re_realloc (mbcset->range_starts, wchar_t, |
2677 | new_nranges); | 2698 | new_nranges); |
2678 | new_array_end = re_realloc (mbcset->range_ends, wchar_t, | 2699 | new_array_end = re_realloc (mbcset->range_ends, wchar_t, |
2679 | new_nranges); | 2700 | new_nranges); |
2680 | 2701 | ||
2681 | if (BE (new_array_start == NULL || new_array_end == NULL, 0)) | 2702 | if (BE (new_array_start == NULL || new_array_end == NULL, 0)) |
2682 | return REG_ESPACE; | 2703 | return REG_ESPACE; |
@@ -2684,10 +2705,10 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, | |||
2684 | mbcset->range_starts = new_array_start; | 2705 | mbcset->range_starts = new_array_start; |
2685 | mbcset->range_ends = new_array_end; | 2706 | mbcset->range_ends = new_array_end; |
2686 | *range_alloc = new_nranges; | 2707 | *range_alloc = new_nranges; |
2687 | } | 2708 | } |
2688 | 2709 | ||
2689 | mbcset->range_starts[mbcset->nranges] = start_wc; | 2710 | mbcset->range_starts[mbcset->nranges] = start_wc; |
2690 | mbcset->range_ends[mbcset->nranges++] = end_wc; | 2711 | mbcset->range_ends[mbcset->nranges++] = end_wc; |
2691 | } | 2712 | } |
2692 | 2713 | ||
2693 | /* Build the table for single byte characters. */ | 2714 | /* Build the table for single byte characters. */ |
@@ -2799,7 +2820,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
2799 | return elem; | 2820 | return elem; |
2800 | } | 2821 | } |
2801 | 2822 | ||
2802 | /* Local function for parse_bracket_exp used in _LIBC environement. | 2823 | /* Local function for parse_bracket_exp used in _LIBC environment. |
2803 | Look up the collation sequence value of BR_ELEM. | 2824 | Look up the collation sequence value of BR_ELEM. |
2804 | Return the value if succeeded, UINT_MAX otherwise. */ | 2825 | Return the value if succeeded, UINT_MAX otherwise. */ |
2805 | 2826 | ||
@@ -2823,7 +2844,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
2823 | } | 2844 | } |
2824 | else if (br_elem->type == MB_CHAR) | 2845 | else if (br_elem->type == MB_CHAR) |
2825 | { | 2846 | { |
2826 | return __collseq_table_lookup (collseqwc, br_elem->opr.wch); | 2847 | if (nrules != 0) |
2848 | return __collseq_table_lookup (collseqwc, br_elem->opr.wch); | ||
2827 | } | 2849 | } |
2828 | else if (br_elem->type == COLL_SYM) | 2850 | else if (br_elem->type == COLL_SYM) |
2829 | { | 2851 | { |
@@ -2904,8 +2926,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
2904 | build below suffices. */ | 2926 | build below suffices. */ |
2905 | if (nrules > 0 || dfa->mb_cur_max > 1) | 2927 | if (nrules > 0 || dfa->mb_cur_max > 1) |
2906 | { | 2928 | { |
2907 | /* Check the space of the arrays. */ | 2929 | /* Check the space of the arrays. */ |
2908 | if (BE (*range_alloc == mbcset->nranges, 0)) | 2930 | if (BE (*range_alloc == mbcset->nranges, 0)) |
2909 | { | 2931 | { |
2910 | /* There is not enough space, need realloc. */ | 2932 | /* There is not enough space, need realloc. */ |
2911 | uint32_t *new_array_start; | 2933 | uint32_t *new_array_start; |
@@ -2917,18 +2939,18 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
2917 | new_array_start = re_realloc (mbcset->range_starts, uint32_t, | 2939 | new_array_start = re_realloc (mbcset->range_starts, uint32_t, |
2918 | new_nranges); | 2940 | new_nranges); |
2919 | new_array_end = re_realloc (mbcset->range_ends, uint32_t, | 2941 | new_array_end = re_realloc (mbcset->range_ends, uint32_t, |
2920 | new_nranges); | 2942 | new_nranges); |
2921 | 2943 | ||
2922 | if (BE (new_array_start == NULL || new_array_end == NULL, 0)) | 2944 | if (BE (new_array_start == NULL || new_array_end == NULL, 0)) |
2923 | return REG_ESPACE; | 2945 | return REG_ESPACE; |
2924 | 2946 | ||
2925 | mbcset->range_starts = new_array_start; | 2947 | mbcset->range_starts = new_array_start; |
2926 | mbcset->range_ends = new_array_end; | 2948 | mbcset->range_ends = new_array_end; |
2927 | *range_alloc = new_nranges; | 2949 | *range_alloc = new_nranges; |
2928 | } | 2950 | } |
2929 | 2951 | ||
2930 | mbcset->range_starts[mbcset->nranges] = start_collseq; | 2952 | mbcset->range_starts[mbcset->nranges] = start_collseq; |
2931 | mbcset->range_ends[mbcset->nranges++] = end_collseq; | 2953 | mbcset->range_ends[mbcset->nranges++] = end_collseq; |
2932 | } | 2954 | } |
2933 | 2955 | ||
2934 | /* Build the table for single byte characters. */ | 2956 | /* Build the table for single byte characters. */ |
@@ -3154,11 +3176,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
3154 | &start_elem, &end_elem); | 3176 | &start_elem, &end_elem); |
3155 | #else | 3177 | #else |
3156 | # ifdef RE_ENABLE_I18N | 3178 | # ifdef RE_ENABLE_I18N |
3157 | *err = build_range_exp (sbcset, | 3179 | *err = build_range_exp (syntax, sbcset, |
3158 | dfa->mb_cur_max > 1 ? mbcset : NULL, | 3180 | dfa->mb_cur_max > 1 ? mbcset : NULL, |
3159 | &range_alloc, &start_elem, &end_elem); | 3181 | &range_alloc, &start_elem, &end_elem); |
3160 | # else | 3182 | # else |
3161 | *err = build_range_exp (sbcset, &start_elem, &end_elem); | 3183 | *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem); |
3162 | # endif | 3184 | # endif |
3163 | #endif /* RE_ENABLE_I18N */ | 3185 | #endif /* RE_ENABLE_I18N */ |
3164 | if (BE (*err != REG_NOERROR, 0)) | 3186 | if (BE (*err != REG_NOERROR, 0)) |
@@ -3262,17 +3284,17 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
3262 | of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ | 3284 | of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ |
3263 | if (sbc_idx < BITSET_WORDS) | 3285 | if (sbc_idx < BITSET_WORDS) |
3264 | { | 3286 | { |
3265 | /* Build a tree for simple bracket. */ | 3287 | /* Build a tree for simple bracket. */ |
3266 | br_token.type = SIMPLE_BRACKET; | 3288 | br_token.type = SIMPLE_BRACKET; |
3267 | br_token.opr.sbcset = sbcset; | 3289 | br_token.opr.sbcset = sbcset; |
3268 | work_tree = create_token_tree (dfa, NULL, NULL, &br_token); | 3290 | work_tree = create_token_tree (dfa, NULL, NULL, &br_token); |
3269 | if (BE (work_tree == NULL, 0)) | 3291 | if (BE (work_tree == NULL, 0)) |
3270 | goto parse_bracket_exp_espace; | 3292 | goto parse_bracket_exp_espace; |
3271 | 3293 | ||
3272 | /* Then join them by ALT node. */ | 3294 | /* Then join them by ALT node. */ |
3273 | work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); | 3295 | work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); |
3274 | if (BE (work_tree == NULL, 0)) | 3296 | if (BE (work_tree == NULL, 0)) |
3275 | goto parse_bracket_exp_espace; | 3297 | goto parse_bracket_exp_espace; |
3276 | } | 3298 | } |
3277 | else | 3299 | else |
3278 | { | 3300 | { |
@@ -3291,7 +3313,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token, | |||
3291 | br_token.opr.sbcset = sbcset; | 3313 | br_token.opr.sbcset = sbcset; |
3292 | work_tree = create_token_tree (dfa, NULL, NULL, &br_token); | 3314 | work_tree = create_token_tree (dfa, NULL, NULL, &br_token); |
3293 | if (BE (work_tree == NULL, 0)) | 3315 | if (BE (work_tree == NULL, 0)) |
3294 | goto parse_bracket_exp_espace; | 3316 | goto parse_bracket_exp_espace; |
3295 | } | 3317 | } |
3296 | return work_tree; | 3318 | return work_tree; |
3297 | 3319 | ||
@@ -3430,7 +3452,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) | |||
3430 | 3452 | ||
3431 | /* Build single byte matcing table for this equivalence class. */ | 3453 | /* Build single byte matcing table for this equivalence class. */ |
3432 | char_buf[1] = (unsigned char) '\0'; | 3454 | char_buf[1] = (unsigned char) '\0'; |
3433 | len = weights[idx1]; | 3455 | len = weights[idx1 & 0xffffff]; |
3434 | for (ch = 0; ch < SBC_MAX; ++ch) | 3456 | for (ch = 0; ch < SBC_MAX; ++ch) |
3435 | { | 3457 | { |
3436 | char_buf[0] = ch; | 3458 | char_buf[0] = ch; |
@@ -3442,11 +3464,15 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name) | |||
3442 | if (idx2 == 0) | 3464 | if (idx2 == 0) |
3443 | /* This isn't a valid character. */ | 3465 | /* This isn't a valid character. */ |
3444 | continue; | 3466 | continue; |
3445 | if (len == weights[idx2]) | 3467 | /* Compare only if the length matches and the collation rule |
3468 | index is the same. */ | ||
3469 | if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24)) | ||
3446 | { | 3470 | { |
3447 | int cnt = 0; | 3471 | int cnt = 0; |
3472 | |||
3448 | while (cnt <= len && | 3473 | while (cnt <= len && |
3449 | weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) | 3474 | weights[(idx1 & 0xffffff) + 1 + cnt] |
3475 | == weights[(idx2 & 0xffffff) + 1 + cnt]) | ||
3450 | ++cnt; | 3476 | ++cnt; |
3451 | 3477 | ||
3452 | if (cnt > len) | 3478 | if (cnt > len) |
@@ -3842,7 +3868,7 @@ duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa) | |||
3842 | node = node->parent; | 3868 | node = node->parent; |
3843 | dup_node = dup_node->parent; | 3869 | dup_node = dup_node->parent; |
3844 | if (!node) | 3870 | if (!node) |
3845 | return dup_root; | 3871 | return dup_root; |
3846 | } | 3872 | } |
3847 | node = node->right; | 3873 | node = node->right; |
3848 | p_new = &dup_node->right; | 3874 | p_new = &dup_node->right; |