summaryrefslogtreecommitdiff
path: root/src/backend/regex/regc_nfa.c
diff options
context:
space:
mode:
authorTom Lane2024-11-15 23:23:38 +0000
committerTom Lane2024-11-15 23:23:38 +0000
commitb69bdcee9c9cfd8550c4e847d035f441fcee7d01 (patch)
tree06b224d38fe0a6faed11dbe2f39b1e39a8aab1a2 /src/backend/regex/regc_nfa.c
parent9a70f67667da32f8009364e1096e80ce6996c13c (diff)
Avoid assertion due to disconnected NFA sub-graphs in regex parsing.
In commit 08c0d6ad6 which introduced "rainbow" arcs in regex NFAs, I didn't think terribly hard about what to do when creating the color complement of a rainbow arc. Clearly, the complement cannot match any characters, and I took the easy way out by just not building any arcs at all in the complement arc set. That mostly works, but Nikolay Shaplov found a case where it doesn't: if we decide to delete that sub-NFA later because it's inside a "{0}" quantifier, delsub() suffered an assertion failure. That's because delsub() relies on the target sub-NFA being fully connected. That was always true before, and the best fix seems to be to restore that property. Hence, invent a new arc type CANTMATCH that can be generated in place of an empty color complement, and drop it again later when we start NFA optimization. (At that point we don't need to do delsub() any more, and besides there are other cases where NFA optimization can lead to disconnected subgraphs.) It appears that this bug has no consequences in a non-assert-enabled build: there will be some transiently leaked NFA states/arcs, but they'll get cleaned up eventually. Still, we don't like assertion failures, so back-patch to v14 where rainbow arcs were introduced. Per bug #18708 from Nikolay Shaplov. Discussion: https://postgr.es/m/[email protected]
Diffstat (limited to 'src/backend/regex/regc_nfa.c')
-rw-r--r--src/backend/regex/regc_nfa.c40
1 files changed, 40 insertions, 0 deletions
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index f1819a24f6d..acd2286defd 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -1462,6 +1462,7 @@ removetraverse(struct nfa *nfa,
{
case PLAIN:
case EMPTY:
+ case CANTMATCH:
/* nothing to do */
break;
case AHEAD:
@@ -1599,6 +1600,12 @@ optimize(struct nfa *nfa,
if (verbose)
fprintf(f, "\ninitial cleanup:\n");
#endif
+ /* If we have any CANTMATCH arcs, drop them; but this is uncommon */
+ if (nfa->flags & HASCANTMATCH)
+ {
+ removecantmatch(nfa);
+ nfa->flags &= ~HASCANTMATCH;
+ }
cleanup(nfa); /* may simplify situation */
#ifdef REG_DEBUG
if (verbose)
@@ -2923,6 +2930,34 @@ clonesuccessorstates(struct nfa *nfa,
}
/*
+ * removecantmatch - remove CANTMATCH arcs, which are no longer useful
+ * once we are done with the parsing phase. (We need them only to
+ * preserve connectedness of NFA subgraphs during parsing.)
+ */
+static void
+removecantmatch(struct nfa *nfa)
+{
+ struct state *s;
+
+ for (s = nfa->states; s != NULL; s = s->next)
+ {
+ struct arc *a;
+ struct arc *nexta;
+
+ for (a = s->outs; a != NULL; a = nexta)
+ {
+ nexta = a->outchain;
+ if (a->type == CANTMATCH)
+ {
+ freearc(nfa, a);
+ if (NISERR())
+ return;
+ }
+ }
+ }
+}
+
+/*
* cleanup - clean up NFA after optimizations
*/
static void
@@ -3627,6 +3662,8 @@ dumpnfa(struct nfa *nfa,
fprintf(f, ", eol [%ld]", (long) nfa->eos[1]);
if (nfa->flags & HASLACONS)
fprintf(f, ", haslacons");
+ if (nfa->flags & HASCANTMATCH)
+ fprintf(f, ", hascantmatch");
if (nfa->flags & MATCHALL)
{
fprintf(f, ", minmatchall %d", nfa->minmatchall);
@@ -3749,6 +3786,9 @@ dumparc(struct arc *a,
break;
case EMPTY:
break;
+ case CANTMATCH:
+ fprintf(f, "X");
+ break;
default:
fprintf(f, "0x%x/0%lo", a->type, (long) a->co);
break;