Lines Matching refs:dfa

27 static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
32 static reg_errcode_t create_initial_state (re_dfa_t *dfa);
34 static void optimize_utf8 (re_dfa_t *dfa);
50 static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint);
51 static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
53 static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
54 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
56 static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
76 re_dfa_t *dfa, re_token_t *token,
78 static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa,
84 re_dfa_t *dfa,
109 static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
114 static bin_tree_t *create_tree (re_dfa_t *dfa,
117 static bin_tree_t *create_token_tree (re_dfa_t *dfa,
120 static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
280 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
284 re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
285 if (dfa->init_state != dfa->init_state_word)
286 re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
287 if (dfa->init_state != dfa->init_state_nl)
288 re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
289 if (dfa->init_state != dfa->init_state_begbuf)
290 re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
314 re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
316 bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
320 re_token_type_t type = dfa->nodes[node].type;
324 re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
326 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
334 *p++ = dfa->nodes[node].opr.c;
335 while (++node < dfa->nodes_len
336 && dfa->nodes[node].type == CHARACTER
337 && dfa->nodes[node].mb_partial)
338 *p++ = dfa->nodes[node].opr.c;
354 bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
363 re_charset_t *cset = dfa->nodes[node].opr.mbcset;
388 if (dfa->mb_cur_max > 1
416 if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
612 free_dfa_content (re_dfa_t *dfa)
616 if (dfa->nodes)
617 for (i = 0; i < dfa->nodes_len; ++i)
618 free_token (dfa->nodes + i);
619 re_free (dfa->nexts);
620 for (i = 0; i < dfa->nodes_len; ++i)
622 if (dfa->eclosures != NULL)
623 re_node_set_free (dfa->eclosures + i);
624 if (dfa->inveclosures != NULL)
625 re_node_set_free (dfa->inveclosures + i);
626 if (dfa->edests != NULL)
627 re_node_set_free (dfa->edests + i);
629 re_free (dfa->edests);
630 re_free (dfa->eclosures);
631 re_free (dfa->inveclosures);
632 re_free (dfa->nodes);
634 if (dfa->state_table)
635 for (i = 0; i <= dfa->state_hash_mask; ++i)
637 struct re_state_table_entry *entry = dfa->state_table + i;
645 re_free (dfa->state_table);
647 if (dfa->sb_char != utf8_sb_map)
648 re_free (dfa->sb_char);
650 re_free (dfa->subexp_map);
652 re_free (dfa->re_str);
655 re_free (dfa);
665 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
666 if (BE (dfa != NULL, 1))
667 free_dfa_content (dfa);
761 re_dfa_t *dfa;
773 /* Initialize the dfa. */
774 dfa = (re_dfa_t *) preg->buffer;
781 dfa = re_realloc (preg->buffer, re_dfa_t, 1);
782 if (dfa == NULL)
785 preg->buffer = (unsigned char *) dfa;
789 err = init_dfa (dfa, length);
792 free_dfa_content (dfa);
799 dfa->re_str = re_malloc (char, length + 1);
800 strncpy (dfa->re_str, pattern, length + 1);
803 __libc_lock_init (dfa->lock);
806 (syntax & RE_ICASE) != 0, dfa);
812 free_dfa_content (dfa);
820 dfa->str_tree = parse (&regexp, preg, syntax, &err);
821 if (BE (dfa->str_tree == NULL, 0))
831 if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
832 optimize_utf8 (dfa);
835 /* Then create the initial state of the dfa. */
836 err = create_initial_state (dfa);
844 free_dfa_content (dfa);
856 init_dfa (re_dfa_t *dfa, size_t pat_len)
874 memset (dfa, '\0', sizeof (re_dfa_t));
877 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
886 dfa->nodes_alloc = pat_len + 1;
887 dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
894 dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
895 dfa->state_hash_mask = table_size - 1;
897 dfa->mb_cur_max = MB_CUR_MAX;
899 if (dfa->mb_cur_max == 6
901 dfa->is_utf8 = 1;
902 dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
908 dfa->is_utf8 = 1;
912 dfa->map_notascii = 0;
916 if (dfa->mb_cur_max > 1)
918 if (dfa->is_utf8)
919 dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
924 dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
925 if (BE (dfa->sb_char == NULL, 0))
934 dfa->sb_char[i] |= (bitset_word_t) 1 << j;
937 dfa->map_notascii = 1;
944 if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))
955 init_word_char (re_dfa_t *dfa)
958 dfa->word_ops_used = 1;
962 dfa->word_char[i] |= (bitset_word_t) 1 << j;
970 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
972 for (storage = dfa->str_tree_storage; storage; storage = next)
977 dfa->str_tree_storage = NULL;
978 dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
979 dfa->str_tree = NULL;
980 re_free (dfa->org_indices);
981 dfa->org_indices = NULL;
987 create_initial_state (re_dfa_t *dfa)
995 first = dfa->str_tree->first->node_idx;
996 dfa->init_node = first;
997 err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
1005 if (dfa->nbackref > 0)
1009 re_token_type_t type = dfa->nodes[node_idx].type;
1017 clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
1019 && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
1027 Idx dest_idx = dfa->edests[node_idx].elems[0];
1031 = re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
1040 dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
1042 if (BE (dfa->init_state == NULL, 0))
1044 if (dfa->init_state->has_constraint)
1046 dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
1048 dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
1050 dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
1054 if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
1055 || dfa->init_state_begbuf == NULL, 0))
1059 dfa->init_state_word = dfa->init_state_nl
1060 = dfa->init_state_begbuf = dfa->init_state;
1069 to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1073 optimize_utf8 (re_dfa_t *dfa)
1080 for (node = 0; node < dfa->nodes_len; ++node)
1081 switch (dfa->nodes[node].type)
1084 if (dfa->nodes[node].opr.c >= ASCII_CHARS)
1088 switch (dfa->nodes[node].opr.ctx_type)
1122 if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
1133 for (node = 0; node < dfa->nodes_len; ++node)
1135 if (dfa->nodes[node].type == CHARACTER
1136 && dfa->nodes[node].opr.c >= ASCII_CHARS)
1137 dfa->nodes[node].mb_partial = 0;
1138 else if (dfa->nodes[node].type == OP_PERIOD)
1139 dfa->nodes[node].type = OP_UTF8_PERIOD;
1143 dfa->mb_cur_max = 1;
1144 dfa->is_utf8 = 0;
1145 dfa->has_mb_node = dfa->nbackref > 0 || has_period;
1156 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1160 dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
1161 dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
1162 dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1163 dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1164 if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
1165 || dfa->eclosures == NULL, 0))
1168 dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
1169 if (dfa->subexp_map != NULL)
1173 dfa->subexp_map[i] = i;
1174 preorder (dfa->str_tree, optimize_subexps, dfa);
1176 if (dfa->subexp_map[i] != i)
1180 free (dfa->subexp_map);
1181 dfa->subexp_map = NULL;
1185 ret = postorder (dfa->str_tree, lower_subexps, preg);
1188 ret = postorder (dfa->str_tree, calc_first, dfa);
1191 preorder (dfa->str_tree, calc_next, dfa);
1192 ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1195 ret = calc_eclosure (dfa);
1201 if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
1202 || dfa->nbackref)
1204 dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1205 if (BE (dfa->inveclosures == NULL, 0))
1207 ret = calc_inveclosure (dfa);
1284 re_dfa_t *dfa = (re_dfa_t *) extra;
1286 if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1289 node->token.opr.idx = dfa->subexp_map[idx];
1290 dfa->used_bkref_map |= 1 << node->token.opr.idx;
1302 dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1304 dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
1337 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1348 || !(dfa->used_bkref_map
1354 op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
1355 cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1356 tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1357 tree = create_tree (dfa, op, tree1, CONCAT);
1374 re_dfa_t *dfa = (re_dfa_t *) extra;
1383 node->node_idx = re_dfa_add_node (dfa, node->token);
1387 dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
1419 re_dfa_t *dfa = (re_dfa_t *) extra;
1436 dfa->has_plural_match = 1;
1447 err = re_node_set_init_2 (dfa->edests + idx, left, right);
1454 err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1458 dfa->nexts[idx] = node->next->node_idx;
1460 err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1465 dfa->nexts[idx] = node->next->node_idx;
1478 duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1487 if (dfa->nodes[org_node].type == OP_BACK_REF)
1493 org_dest = dfa->nexts[org_node];
1494 re_node_set_empty (dfa->edests + clone_node);
1495 clone_dest = duplicate_node (dfa, org_dest, constraint);
1498 dfa->nexts[clone_node] = dfa->nexts[org_node];
1499 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1503 else if (dfa->edests[org_node].nelem == 0)
1508 dfa->nexts[clone_node] = dfa->nexts[org_node];
1511 else if (dfa->edests[org_node].nelem == 1)
1515 org_dest = dfa->edests[org_node].elems[0];
1516 re_node_set_empty (dfa->edests + clone_node);
1521 ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
1527 constraint |= dfa->nodes[org_node].constraint;
1528 clone_dest = duplicate_node (dfa, org_dest, constraint);
1531 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1535 else /* dfa->edests[org_node].nelem == 2 */
1539 org_dest = dfa->edests[org_node].elems[0];
1540 re_node_set_empty (dfa->edests + clone_node);
1542 clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1547 clone_dest = duplicate_node (dfa, org_dest, constraint);
1550 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1553 err = duplicate_node_closure (dfa, org_dest, clone_dest,
1562 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1567 org_dest = dfa->edests[org_node].elems[1];
1568 clone_dest = duplicate_node (dfa, org_dest, constraint);
1571 ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1585 search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
1589 for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
1591 if (org_node == dfa->org_indices[idx]
1592 && constraint == dfa->nodes[idx].constraint)
1603 duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
1605 Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1608 dfa->nodes[dup_idx].constraint = constraint;
1609 dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;
1610 dfa->nodes[dup_idx].duplicated = 1;
1613 dfa->org_indices[dup_idx] = org_idx;
1619 calc_inveclosure (re_dfa_t *dfa)
1623 for (idx = 0; idx < dfa->nodes_len; ++idx)
1624 re_node_set_init_empty (dfa->inveclosures + idx);
1626 for (src = 0; src < dfa->nodes_len; ++src)
1628 Idx *elems = dfa->eclosures[src].elems;
1629 for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1631 ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1643 calc_eclosure (re_dfa_t *dfa)
1648 assert (dfa->nodes_len > 0);
1656 if (node_idx == dfa->nodes_len)
1665 assert (dfa->eclosures[node_idx].nelem != REG_MISSING);
1669 if (dfa->eclosures[node_idx].nelem != 0)
1672 err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
1676 if (dfa->eclosures[node_idx].nelem == 0)
1688 calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
1695 err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
1701 dfa->eclosures[node].nelem = REG_MISSING;
1705 if (dfa->nodes[node].constraint
1706 && dfa->edests[node].nelem
1707 && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
1709 err = duplicate_node_closure (dfa, node, node, node,
1710 dfa->nodes[node].constraint);
1716 if (IS_EPSILON_NODE(dfa->nodes[node].type))
1717 for (i = 0; i < dfa->edests[node].nelem; ++i)
1720 Idx edest = dfa->edests[node].elems[i];
1723 if (dfa->eclosures[edest].nelem == REG_MISSING)
1730 if (dfa->eclosures[edest].nelem == 0)
1732 err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
1737 eclosure_elem = dfa->eclosures[edest];
1744 if (dfa->eclosures[edest].nelem == 0)
1756 dfa->eclosures[node].nelem = 0;
1758 dfa->eclosures[node] = eclosure;
2120 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2123 dfa->syntax = syntax;
2128 eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2130 root = create_tree (dfa, tree, eor, CONCAT);
2154 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2172 tree = create_tree (dfa, tree, branch, OP_ALT);
2196 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2211 tree = create_tree (dfa, tree, expr, CONCAT);
2235 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2240 tree = create_token_tree (dfa, NULL, NULL, token);
2247 if (dfa->mb_cur_max > 1)
2254 mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2255 tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2271 tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2276 if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
2281 dfa->used_bkref_map |= 1 << token->opr.idx;
2282 tree = create_token_tree (dfa, NULL, NULL, token);
2288 ++dfa->nbackref;
2289 dfa->has_mb_node = 1;
2327 tree = create_token_tree (dfa, NULL, NULL, token);
2337 && dfa->word_ops_used == 0)
2338 init_word_char (dfa);
2346 tree_first = create_token_tree (dfa, NULL, NULL, token);
2352 tree_first = create_token_tree (dfa, NULL, NULL, token);
2355 tree_last = create_token_tree (dfa, NULL, NULL, token);
2356 tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2365 tree = create_token_tree (dfa, NULL, NULL, token);
2379 tree = create_token_tree (dfa, NULL, NULL, token);
2385 if (dfa->mb_cur_max > 1)
2386 dfa->has_mb_node = 1;
2390 tree = build_charclass_op (dfa, regexp->trans,
2399 tree = build_charclass_op (dfa, regexp->trans,
2424 tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2451 re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2471 dfa->completed_bkref_map |= 1 << cur_nsub;
2473 tree = create_tree (dfa, tree, NULL, SUBEXP);
2486 parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
2566 elem = duplicate_tree (elem, dfa);
2567 tree = create_tree (dfa, tree, elem, CONCAT);
2576 elem = duplicate_tree (elem, dfa);
2585 tree = create_tree (dfa, elem, NULL,
2600 elem = duplicate_tree (elem, dfa);
2601 tree = create_tree (dfa, tree, elem, CONCAT);
2605 tree = create_tree (dfa, tree, NULL, OP_ALT);
2611 tree = create_tree (dfa, old_tree, tree, CONCAT);
2693 no MBCSET if dfa->mb_cur_max == 1. */
2784 parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
2937 if (nrules > 0 || dfa->mb_cur_max > 1)
3137 ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3181 dfa, syntax, true);
3196 dfa->mb_cur_max > 1 ? mbcset : NULL,
3280 if (dfa->mb_cur_max > 1)
3281 bitset_mask (sbcset, dfa->sb_char);
3284 || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
3290 dfa->has_mb_node = 1;
3293 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3306 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3311 work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3331 work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3352 re_token_t *token, int token_len, re_dfa_t *dfa,
3617 build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3685 if (dfa->mb_cur_max > 1)
3686 bitset_mask (sbcset, dfa->sb_char);
3692 tree = create_token_tree (dfa, NULL, NULL, &br_token);
3697 if (dfa->mb_cur_max > 1)
3703 dfa->has_mb_node = 1;
3704 mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3708 tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3780 create_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3785 return create_token_tree (dfa, left, right, &t);
3789 create_token_tree (re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,
3793 if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
3799 storage->next = dfa->str_tree_storage;
3800 dfa->str_tree_storage = storage;
3801 dfa->str_tree_storage_idx = 0;
3803 tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3866 duplicate_tree (const bin_tree_t *root, re_dfa_t *dfa)
3875 *p_new = create_token_tree (dfa, NULL, NULL, &node->token);