summaryrefslogtreecommitdiffstats
path: root/gl/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'gl/regcomp.c')
-rw-r--r--gl/regcomp.c172
1 files changed, 99 insertions, 73 deletions
diff --git a/gl/regcomp.c b/gl/regcomp.c
index b114b4d1..86ca02b0 100644
--- a/gl/regcomp.c
+++ b/gl/regcomp.c
@@ -1,6 +1,6 @@
1/* Extended regular expression matching and search library. 1/* Extended regular expression matching and search library.
2 Copyright (C) 2002,2003,2004,2005,2006,2007,2008,2009 2 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
3 Free Software Foundation, Inc. 3 Software Foundation, Inc.
4 This file is part of the GNU C Library. 4 This file is part of the GNU C Library.
5 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. 5 Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
6 6
@@ -383,7 +383,7 @@ re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
383 applies to multibyte character sets; for single byte character 383 applies to multibyte character sets; for single byte character
384 sets, the SIMPLE_BRACKET again suffices. */ 384 sets, the SIMPLE_BRACKET again suffices. */
385 if (dfa->mb_cur_max > 1 385 if (dfa->mb_cur_max > 1
386 && (cset->nchar_classes || cset->non_match 386 && (cset->nchar_classes || cset->non_match || cset->nranges
387# ifdef _LIBC 387# ifdef _LIBC
388 || cset->nequiv_classes 388 || cset->nequiv_classes
389# endif /* _LIBC */ 389# endif /* _LIBC */
@@ -636,7 +636,7 @@ free_dfa_content (re_dfa_t *dfa)
636 re_dfastate_t *state = entry->array[j]; 636 re_dfastate_t *state = entry->array[j];
637 free_state (state); 637 free_state (state);
638 } 638 }
639 re_free (entry->array); 639 re_free (entry->array);
640 } 640 }
641 re_free (dfa->state_table); 641 re_free (dfa->state_table);
642#ifdef RE_ENABLE_I18N 642#ifdef RE_ENABLE_I18N
@@ -850,6 +850,9 @@ static reg_errcode_t
850init_dfa (re_dfa_t *dfa, size_t pat_len) 850init_dfa (re_dfa_t *dfa, size_t pat_len)
851{ 851{
852 __re_size_t table_size; 852 __re_size_t table_size;
853#ifndef _LIBC
854 char *codeset_name;
855#endif
853#ifdef RE_ENABLE_I18N 856#ifdef RE_ENABLE_I18N
854 size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t)); 857 size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
855#else 858#else
@@ -893,7 +896,9 @@ init_dfa (re_dfa_t *dfa, size_t pat_len)
893 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) 896 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
894 != 0); 897 != 0);
895#else 898#else
896 if (strcmp (locale_charset (), "UTF-8") == 0) 899 codeset_name = nl_langinfo (CODESET);
900 if (strcasecmp (codeset_name, "UTF-8") == 0
901 || strcasecmp (codeset_name, "UTF8") == 0)
897 dfa->is_utf8 = 1; 902 dfa->is_utf8 = 1;
898 903
899 /* We check exhaustively in the loop below if this charset is a 904 /* We check exhaustively in the loop below if this charset is a
@@ -1016,7 +1021,10 @@ create_initial_state (re_dfa_t *dfa)
1016 Idx dest_idx = dfa->edests[node_idx].elems[0]; 1021 Idx dest_idx = dfa->edests[node_idx].elems[0];
1017 if (!re_node_set_contains (&init_nodes, dest_idx)) 1022 if (!re_node_set_contains (&init_nodes, dest_idx))
1018 { 1023 {
1019 re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); 1024 reg_errcode_t merge_err
1025 = re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
1026 if (merge_err != REG_NOERROR)
1027 return merge_err;
1020 i = 0; 1028 i = 0;
1021 } 1029 }
1022 } 1030 }
@@ -1085,8 +1093,8 @@ optimize_utf8 (re_dfa_t *dfa)
1085 } 1093 }
1086 break; 1094 break;
1087 case OP_PERIOD: 1095 case OP_PERIOD:
1088 has_period = true; 1096 has_period = true;
1089 break; 1097 break;
1090 case OP_BACK_REF: 1098 case OP_BACK_REF:
1091 case OP_ALT: 1099 case OP_ALT:
1092 case END_OF_RE: 1100 case END_OF_RE:
@@ -1187,7 +1195,7 @@ analyze (regex_t *preg)
1187 { 1195 {
1188 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); 1196 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1189 if (BE (dfa->inveclosures == NULL, 0)) 1197 if (BE (dfa->inveclosures == NULL, 0))
1190 return REG_ESPACE; 1198 return REG_ESPACE;
1191 ret = calc_inveclosure (dfa); 1199 ret = calc_inveclosure (dfa);
1192 } 1200 }
1193 1201
@@ -1209,16 +1217,16 @@ postorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1209 if that's the only child). */ 1217 if that's the only child). */
1210 while (node->left || node->right) 1218 while (node->left || node->right)
1211 if (node->left) 1219 if (node->left)
1212 node = node->left; 1220 node = node->left;
1213 else 1221 else
1214 node = node->right; 1222 node = node->right;
1215 1223
1216 do 1224 do
1217 { 1225 {
1218 reg_errcode_t err = fn (extra, node); 1226 reg_errcode_t err = fn (extra, node);
1219 if (BE (err != REG_NOERROR, 0)) 1227 if (BE (err != REG_NOERROR, 0))
1220 return err; 1228 return err;
1221 if (node->parent == NULL) 1229 if (node->parent == NULL)
1222 return REG_NOERROR; 1230 return REG_NOERROR;
1223 prev = node; 1231 prev = node;
1224 node = node->parent; 1232 node = node->parent;
@@ -1252,7 +1260,7 @@ preorder (bin_tree_t *root, reg_errcode_t (fn (void *, bin_tree_t *)),
1252 prev = node; 1260 prev = node;
1253 node = node->parent; 1261 node = node->parent;
1254 if (!node) 1262 if (!node)
1255 return REG_NOERROR; 1263 return REG_NOERROR;
1256 } 1264 }
1257 node = node->right; 1265 node = node->right;
1258 } 1266 }
@@ -1275,13 +1283,13 @@ optimize_subexps (void *extra, bin_tree_t *node)
1275 } 1283 }
1276 1284
1277 else if (node->token.type == SUBEXP 1285 else if (node->token.type == SUBEXP
1278 && node->left && node->left->token.type == SUBEXP) 1286 && node->left && node->left->token.type == SUBEXP)
1279 { 1287 {
1280 Idx other_idx = node->left->token.opr.idx; 1288 Idx other_idx = node->left->token.opr.idx;
1281 1289
1282 node->left = node->left->left; 1290 node->left = node->left->left;
1283 if (node->left) 1291 if (node->left)
1284 node->left->parent = node; 1292 node->left->parent = node;
1285 1293
1286 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; 1294 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1287 if (other_idx < BITSET_WORD_BITS) 1295 if (other_idx < BITSET_WORD_BITS)
@@ -1366,9 +1374,9 @@ calc_first (void *extra, bin_tree_t *node)
1366 node->first = node; 1374 node->first = node;
1367 node->node_idx = re_dfa_add_node (dfa, node->token); 1375 node->node_idx = re_dfa_add_node (dfa, node->token);
1368 if (BE (node->node_idx == REG_MISSING, 0)) 1376 if (BE (node->node_idx == REG_MISSING, 0))
1369 return REG_ESPACE; 1377 return REG_ESPACE;
1370 if (node->token.type == ANCHOR) 1378 if (node->token.type == ANCHOR)
1371 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type; 1379 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
1372 } 1380 }
1373 return REG_NOERROR; 1381 return REG_NOERROR;
1374} 1382}
@@ -1390,7 +1398,7 @@ calc_next (void *extra, bin_tree_t *node)
1390 if (node->left) 1398 if (node->left)
1391 node->left->next = node->next; 1399 node->left->next = node->next;
1392 if (node->right) 1400 if (node->right)
1393 node->right->next = node->next; 1401 node->right->next = node->next;
1394 break; 1402 break;
1395 } 1403 }
1396 return REG_NOERROR; 1404 return REG_NOERROR;
@@ -1441,7 +1449,7 @@ link_nfa_nodes (void *extra, bin_tree_t *node)
1441 case OP_BACK_REF: 1449 case OP_BACK_REF:
1442 dfa->nexts[idx] = node->next->node_idx; 1450 dfa->nexts[idx] = node->next->node_idx;
1443 if (node->token.type == OP_BACK_REF) 1451 if (node->token.type == OP_BACK_REF)
1444 re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); 1452 err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1445 break; 1453 break;
1446 1454
1447 default: 1455 default:
@@ -1498,7 +1506,6 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1498 destination. */ 1506 destination. */
1499 org_dest = dfa->edests[org_node].elems[0]; 1507 org_dest = dfa->edests[org_node].elems[0];
1500 re_node_set_empty (dfa->edests + clone_node); 1508 re_node_set_empty (dfa->edests + clone_node);
1501 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1502 /* If the node is root_node itself, it means the epsilon closure 1509 /* If the node is root_node itself, it means the epsilon closure
1503 has a loop. Then tie it to the destination of the root_node. */ 1510 has a loop. Then tie it to the destination of the root_node. */
1504 if (org_node == root_node && clone_node != org_node) 1511 if (org_node == root_node && clone_node != org_node)
@@ -1542,7 +1549,7 @@ duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1542 } 1549 }
1543 else 1550 else
1544 { 1551 {
1545 /* There is a duplicated node which satisfy the constraint, 1552 /* There is a duplicated node which satisfies the constraint,
1546 use it to avoid infinite loop. */ 1553 use it to avoid infinite loop. */
1547 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest); 1554 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1548 if (BE (! ok, 0)) 1555 if (BE (! ok, 0))
@@ -1674,10 +1681,9 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1674{ 1681{
1675 reg_errcode_t err; 1682 reg_errcode_t err;
1676 Idx i; 1683 Idx i;
1677 bool incomplete;
1678 bool ok;
1679 re_node_set eclosure; 1684 re_node_set eclosure;
1680 incomplete = false; 1685 bool ok;
1686 bool incomplete = false;
1681 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); 1687 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1682 if (BE (err != REG_NOERROR, 0)) 1688 if (BE (err != REG_NOERROR, 0))
1683 return err; 1689 return err;
@@ -1722,7 +1728,9 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1722 else 1728 else
1723 eclosure_elem = dfa->eclosures[edest]; 1729 eclosure_elem = dfa->eclosures[edest];
1724 /* Merge the epsilon closure of `edest'. */ 1730 /* Merge the epsilon closure of `edest'. */
1725 re_node_set_merge (&eclosure, &eclosure_elem); 1731 err = re_node_set_merge (&eclosure, &eclosure_elem);
1732 if (BE (err != REG_NOERROR, 0))
1733 return err;
1726 /* If the epsilon closure of `edest' is incomplete, 1734 /* If the epsilon closure of `edest' is incomplete,
1727 the epsilon closure of this node is also incomplete. */ 1735 the epsilon closure of this node is also incomplete. */
1728 if (dfa->eclosures[edest].nelem == 0) 1736 if (dfa->eclosures[edest].nelem == 0)
@@ -1732,7 +1740,7 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1732 } 1740 }
1733 } 1741 }
1734 1742
1735 /* Epsilon closures include itself. */ 1743 /* An epsilon closure includes itself. */
1736 ok = re_node_set_insert (&eclosure, node); 1744 ok = re_node_set_insert (&eclosure, node);
1737 if (BE (! ok, 0)) 1745 if (BE (! ok, 0))
1738 return REG_ESPACE; 1746 return REG_ESPACE;
@@ -2319,7 +2327,7 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2319 && dfa->word_ops_used == 0) 2327 && dfa->word_ops_used == 0)
2320 init_word_char (dfa); 2328 init_word_char (dfa);
2321 if (token->opr.ctx_type == WORD_DELIM 2329 if (token->opr.ctx_type == WORD_DELIM
2322 || token->opr.ctx_type == NOT_WORD_DELIM) 2330 || token->opr.ctx_type == NOT_WORD_DELIM)
2323 { 2331 {
2324 bin_tree_t *tree_first, *tree_last; 2332 bin_tree_t *tree_first, *tree_last;
2325 if (token->opr.ctx_type == WORD_DELIM) 2333 if (token->opr.ctx_type == WORD_DELIM)
@@ -2327,13 +2335,13 @@ parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2327 token->opr.ctx_type = WORD_FIRST; 2335 token->opr.ctx_type = WORD_FIRST;
2328 tree_first = create_token_tree (dfa, NULL, NULL, token); 2336 tree_first = create_token_tree (dfa, NULL, NULL, token);
2329 token->opr.ctx_type = WORD_LAST; 2337 token->opr.ctx_type = WORD_LAST;
2330 } 2338 }
2331 else 2339 else
2332 { 2340 {
2333 token->opr.ctx_type = INSIDE_WORD; 2341 token->opr.ctx_type = INSIDE_WORD;
2334 tree_first = create_token_tree (dfa, NULL, NULL, token); 2342 tree_first = create_token_tree (dfa, NULL, NULL, token);
2335 token->opr.ctx_type = INSIDE_NOTWORD; 2343 token->opr.ctx_type = INSIDE_NOTWORD;
2336 } 2344 }
2337 tree_last = create_token_tree (dfa, NULL, NULL, token); 2345 tree_last = create_token_tree (dfa, NULL, NULL, token);
2338 tree = create_tree (dfa, tree_first, tree_last, OP_ALT); 2346 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2339 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) 2347 if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
@@ -2444,7 +2452,7 @@ parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2444 { 2452 {
2445 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); 2453 tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2446 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) 2454 if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
2447 *err = REG_EPAREN; 2455 *err = REG_EPAREN;
2448 if (BE (*err != REG_NOERROR, 0)) 2456 if (BE (*err != REG_NOERROR, 0))
2449 return NULL; 2457 return NULL;
2450 } 2458 }
@@ -2515,7 +2523,8 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2515 return elem; 2523 return elem;
2516 } 2524 }
2517 2525
2518 if (BE (end != REG_MISSING && start > end, 0)) 2526 if (BE ((end != REG_MISSING && start > end)
2527 || token->type != OP_CLOSE_DUP_NUM, 0))
2519 { 2528 {
2520 /* First number greater than second. */ 2529 /* First number greater than second. */
2521 *err = REG_BADBR; 2530 *err = REG_BADBR;
@@ -2568,10 +2577,14 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2568 if (BE (tree == NULL, 0)) 2577 if (BE (tree == NULL, 0))
2569 goto parse_dup_op_espace; 2578 goto parse_dup_op_espace;
2570 2579
2580/* From gnulib's "intprops.h":
2581 True if the arithmetic type T is signed. */
2582#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
2583
2571 /* This loop is actually executed only when end != REG_MISSING, 2584 /* This loop is actually executed only when end != REG_MISSING,
2572 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have 2585 to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2573 already created the start+1-th copy. */ 2586 already created the start+1-th copy. */
2574 if ((Idx) -1 < 0 || end != REG_MISSING) 2587 if (TYPE_SIGNED (Idx) || end != REG_MISSING)
2575 for (i = start + 2; i <= end; ++i) 2588 for (i = start + 2; i <= end; ++i)
2576 { 2589 {
2577 elem = duplicate_tree (elem, dfa); 2590 elem = duplicate_tree (elem, dfa);
@@ -2609,11 +2622,17 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2609static reg_errcode_t 2622static reg_errcode_t
2610internal_function 2623internal_function
2611# ifdef RE_ENABLE_I18N 2624# ifdef RE_ENABLE_I18N
2612build_range_exp (bitset_t sbcset, re_charset_t *mbcset, Idx *range_alloc, 2625build_range_exp (const reg_syntax_t syntax,
2613 bracket_elem_t *start_elem, bracket_elem_t *end_elem) 2626 bitset_t sbcset,
2627 re_charset_t *mbcset,
2628 Idx *range_alloc,
2629 const bracket_elem_t *start_elem,
2630 const bracket_elem_t *end_elem)
2614# else /* not RE_ENABLE_I18N */ 2631# else /* not RE_ENABLE_I18N */
2615build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem, 2632build_range_exp (const reg_syntax_t syntax,
2616 bracket_elem_t *end_elem) 2633 bitset_t sbcset,
2634 const bracket_elem_t *start_elem,
2635 const bracket_elem_t *end_elem)
2617# endif /* not RE_ENABLE_I18N */ 2636# endif /* not RE_ENABLE_I18N */
2618{ 2637{
2619 unsigned int start_ch, end_ch; 2638 unsigned int start_ch, end_ch;
@@ -2652,7 +2671,9 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2652 return REG_ECOLLATE; 2671 return REG_ECOLLATE;
2653 cmp_buf[0] = start_wc; 2672 cmp_buf[0] = start_wc;
2654 cmp_buf[4] = end_wc; 2673 cmp_buf[4] = end_wc;
2655 if (wcscoll (cmp_buf, cmp_buf + 4) > 0) 2674
2675 if (BE ((syntax & RE_NO_EMPTY_RANGES)
2676 && wcscoll (cmp_buf, cmp_buf + 4) > 0, 0))
2656 return REG_ERANGE; 2677 return REG_ERANGE;
2657 2678
2658 /* Got valid collation sequence values, add them as a new entry. 2679 /* Got valid collation sequence values, add them as a new entry.
@@ -2662,9 +2683,9 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2662 no MBCSET if dfa->mb_cur_max == 1. */ 2683 no MBCSET if dfa->mb_cur_max == 1. */
2663 if (mbcset) 2684 if (mbcset)
2664 { 2685 {
2665 /* Check the space of the arrays. */ 2686 /* Check the space of the arrays. */
2666 if (BE (*range_alloc == mbcset->nranges, 0)) 2687 if (BE (*range_alloc == mbcset->nranges, 0))
2667 { 2688 {
2668 /* There is not enough space, need realloc. */ 2689 /* There is not enough space, need realloc. */
2669 wchar_t *new_array_start, *new_array_end; 2690 wchar_t *new_array_start, *new_array_end;
2670 Idx new_nranges; 2691 Idx new_nranges;
@@ -2674,9 +2695,9 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2674 /* Use realloc since mbcset->range_starts and mbcset->range_ends 2695 /* Use realloc since mbcset->range_starts and mbcset->range_ends
2675 are NULL if *range_alloc == 0. */ 2696 are NULL if *range_alloc == 0. */
2676 new_array_start = re_realloc (mbcset->range_starts, wchar_t, 2697 new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2677 new_nranges); 2698 new_nranges);
2678 new_array_end = re_realloc (mbcset->range_ends, wchar_t, 2699 new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2679 new_nranges); 2700 new_nranges);
2680 2701
2681 if (BE (new_array_start == NULL || new_array_end == NULL, 0)) 2702 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2682 return REG_ESPACE; 2703 return REG_ESPACE;
@@ -2684,10 +2705,10 @@ build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2684 mbcset->range_starts = new_array_start; 2705 mbcset->range_starts = new_array_start;
2685 mbcset->range_ends = new_array_end; 2706 mbcset->range_ends = new_array_end;
2686 *range_alloc = new_nranges; 2707 *range_alloc = new_nranges;
2687 } 2708 }
2688 2709
2689 mbcset->range_starts[mbcset->nranges] = start_wc; 2710 mbcset->range_starts[mbcset->nranges] = start_wc;
2690 mbcset->range_ends[mbcset->nranges++] = end_wc; 2711 mbcset->range_ends[mbcset->nranges++] = end_wc;
2691 } 2712 }
2692 2713
2693 /* Build the table for single byte characters. */ 2714 /* Build the table for single byte characters. */
@@ -2799,7 +2820,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2799 return elem; 2820 return elem;
2800 } 2821 }
2801 2822
2802 /* Local function for parse_bracket_exp used in _LIBC environement. 2823 /* Local function for parse_bracket_exp used in _LIBC environment.
2803 Look up the collation sequence value of BR_ELEM. 2824 Look up the collation sequence value of BR_ELEM.
2804 Return the value if succeeded, UINT_MAX otherwise. */ 2825 Return the value if succeeded, UINT_MAX otherwise. */
2805 2826
@@ -2823,7 +2844,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2823 } 2844 }
2824 else if (br_elem->type == MB_CHAR) 2845 else if (br_elem->type == MB_CHAR)
2825 { 2846 {
2826 return __collseq_table_lookup (collseqwc, br_elem->opr.wch); 2847 if (nrules != 0)
2848 return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2827 } 2849 }
2828 else if (br_elem->type == COLL_SYM) 2850 else if (br_elem->type == COLL_SYM)
2829 { 2851 {
@@ -2904,8 +2926,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2904 build below suffices. */ 2926 build below suffices. */
2905 if (nrules > 0 || dfa->mb_cur_max > 1) 2927 if (nrules > 0 || dfa->mb_cur_max > 1)
2906 { 2928 {
2907 /* Check the space of the arrays. */ 2929 /* Check the space of the arrays. */
2908 if (BE (*range_alloc == mbcset->nranges, 0)) 2930 if (BE (*range_alloc == mbcset->nranges, 0))
2909 { 2931 {
2910 /* There is not enough space, need realloc. */ 2932 /* There is not enough space, need realloc. */
2911 uint32_t *new_array_start; 2933 uint32_t *new_array_start;
@@ -2917,18 +2939,18 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2917 new_array_start = re_realloc (mbcset->range_starts, uint32_t, 2939 new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2918 new_nranges); 2940 new_nranges);
2919 new_array_end = re_realloc (mbcset->range_ends, uint32_t, 2941 new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2920 new_nranges); 2942 new_nranges);
2921 2943
2922 if (BE (new_array_start == NULL || new_array_end == NULL, 0)) 2944 if (BE (new_array_start == NULL || new_array_end == NULL, 0))
2923 return REG_ESPACE; 2945 return REG_ESPACE;
2924 2946
2925 mbcset->range_starts = new_array_start; 2947 mbcset->range_starts = new_array_start;
2926 mbcset->range_ends = new_array_end; 2948 mbcset->range_ends = new_array_end;
2927 *range_alloc = new_nranges; 2949 *range_alloc = new_nranges;
2928 } 2950 }
2929 2951
2930 mbcset->range_starts[mbcset->nranges] = start_collseq; 2952 mbcset->range_starts[mbcset->nranges] = start_collseq;
2931 mbcset->range_ends[mbcset->nranges++] = end_collseq; 2953 mbcset->range_ends[mbcset->nranges++] = end_collseq;
2932 } 2954 }
2933 2955
2934 /* Build the table for single byte characters. */ 2956 /* Build the table for single byte characters. */
@@ -3154,11 +3176,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3154 &start_elem, &end_elem); 3176 &start_elem, &end_elem);
3155#else 3177#else
3156# ifdef RE_ENABLE_I18N 3178# ifdef RE_ENABLE_I18N
3157 *err = build_range_exp (sbcset, 3179 *err = build_range_exp (syntax, sbcset,
3158 dfa->mb_cur_max > 1 ? mbcset : NULL, 3180 dfa->mb_cur_max > 1 ? mbcset : NULL,
3159 &range_alloc, &start_elem, &end_elem); 3181 &range_alloc, &start_elem, &end_elem);
3160# else 3182# else
3161 *err = build_range_exp (sbcset, &start_elem, &end_elem); 3183 *err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
3162# endif 3184# endif
3163#endif /* RE_ENABLE_I18N */ 3185#endif /* RE_ENABLE_I18N */
3164 if (BE (*err != REG_NOERROR, 0)) 3186 if (BE (*err != REG_NOERROR, 0))
@@ -3262,17 +3284,17 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3262 of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ 3284 of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
3263 if (sbc_idx < BITSET_WORDS) 3285 if (sbc_idx < BITSET_WORDS)
3264 { 3286 {
3265 /* Build a tree for simple bracket. */ 3287 /* Build a tree for simple bracket. */
3266 br_token.type = SIMPLE_BRACKET; 3288 br_token.type = SIMPLE_BRACKET;
3267 br_token.opr.sbcset = sbcset; 3289 br_token.opr.sbcset = sbcset;
3268 work_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3290 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3269 if (BE (work_tree == NULL, 0)) 3291 if (BE (work_tree == NULL, 0))
3270 goto parse_bracket_exp_espace; 3292 goto parse_bracket_exp_espace;
3271 3293
3272 /* Then join them by ALT node. */ 3294 /* Then join them by ALT node. */
3273 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); 3295 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3274 if (BE (work_tree == NULL, 0)) 3296 if (BE (work_tree == NULL, 0))
3275 goto parse_bracket_exp_espace; 3297 goto parse_bracket_exp_espace;
3276 } 3298 }
3277 else 3299 else
3278 { 3300 {
@@ -3291,7 +3313,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
3291 br_token.opr.sbcset = sbcset; 3313 br_token.opr.sbcset = sbcset;
3292 work_tree = create_token_tree (dfa, NULL, NULL, &br_token); 3314 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3293 if (BE (work_tree == NULL, 0)) 3315 if (BE (work_tree == NULL, 0))
3294 goto parse_bracket_exp_espace; 3316 goto parse_bracket_exp_espace;
3295 } 3317 }
3296 return work_tree; 3318 return work_tree;
3297 3319
@@ -3430,7 +3452,7 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3430 3452
3431 /* Build single byte matcing table for this equivalence class. */ 3453 /* Build single byte matcing table for this equivalence class. */
3432 char_buf[1] = (unsigned char) '\0'; 3454 char_buf[1] = (unsigned char) '\0';
3433 len = weights[idx1]; 3455 len = weights[idx1 & 0xffffff];
3434 for (ch = 0; ch < SBC_MAX; ++ch) 3456 for (ch = 0; ch < SBC_MAX; ++ch)
3435 { 3457 {
3436 char_buf[0] = ch; 3458 char_buf[0] = ch;
@@ -3442,11 +3464,15 @@ build_equiv_class (bitset_t sbcset, const unsigned char *name)
3442 if (idx2 == 0) 3464 if (idx2 == 0)
3443 /* This isn't a valid character. */ 3465 /* This isn't a valid character. */
3444 continue; 3466 continue;
3445 if (len == weights[idx2]) 3467 /* Compare only if the length matches and the collation rule
3468 index is the same. */
3469 if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
3446 { 3470 {
3447 int cnt = 0; 3471 int cnt = 0;
3472
3448 while (cnt <= len && 3473 while (cnt <= len &&
3449 weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) 3474 weights[(idx1 & 0xffffff) + 1 + cnt]
3475 == weights[(idx2 & 0xffffff) + 1 + cnt])
3450 ++cnt; 3476 ++cnt;
3451 3477
3452 if (cnt > len) 3478 if (cnt > len)
@@ -3842,7 +3868,7 @@ duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
3842 node = node->parent; 3868 node = node->parent;
3843 dup_node = dup_node->parent; 3869 dup_node = dup_node->parent;
3844 if (!node) 3870 if (!node)
3845 return dup_root; 3871 return dup_root;
3846 } 3872 }
3847 node = node->right; 3873 node = node->right;
3848 p_new = &dup_node->right; 3874 p_new = &dup_node->right;