Replace usages of xmlXPathCompile() with xmlXPathCtxtCompile().
authorTom Lane <[email protected]>
Sun, 15 Sep 2024 17:33:09 +0000 (13:33 -0400)
committerTom Lane <[email protected]>
Sun, 15 Sep 2024 17:33:09 +0000 (13:33 -0400)
In existing releases of libxml2, xmlXPathCompile can be driven
to stack overflow because it fails to protect itself against
too-deeply-nested input.  While there is an upstream fix as of
yesterday, it will take years for that to propagate into all
shipping versions.  In the meantime, we can protect our own
usages basically for free by calling xmlXPathCtxtCompile instead.

(The actual bug is that libxml2 keeps its nesting counter in the
xmlXPathContext, and its parsing code was willing to just skip
counting nesting levels if it didn't have a context.  So if we supply
a context, all is well.  It seems odd actually that it works at all
to not supply a context, because this means that XPath parsing does
not have access to XML namespace info.  Apparently libxml2 never
checks namespaces until runtime?  Anyway, this seems like good
future-proofing even if its only immediate effect is to dodge a bug.)

Sadly, this hack only offers protection with libxml2 2.9.11 and newer.
Before that there are multiple similar problems, so if you are
processing untrusted XML it behooves you to get a newer version.
But we have some pretty old libxml2 in the buildfarm, so it seems
impractical to add a regression test to verify this fix.

Per bug #18617 from Jingzhou Fu.  Back-patch to all supported
versions.

Discussion: https://postgr.es/m/18617-1cee4d2ed1f4e7ae@postgresql.org
Discussion: https://gitlab.gnome.org/GNOME/libxml2/-/issues/799

contrib/xml2/xpath.c
src/backend/utils/adt/xml.c

index b999b1f706656bea0cef73e6e069986756065c5b..212cb74aa220b3c6825d0670a1370d5206a0e4c9 100644 (file)
@@ -386,7 +386,7 @@ pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace)
            workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
 
            /* compile the path */
-           comppath = xmlXPathCompile(xpath);
+           comppath = xmlXPathCtxtCompile(workspace->ctxt, xpath);
            if (comppath == NULL)
                xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
                            "XPath Syntax Error");
@@ -650,7 +650,7 @@ xpath_table(PG_FUNCTION_ARGS)
                        ctxt->node = xmlDocGetRootElement(doctree);
 
                        /* compile the path */
-                       comppath = xmlXPathCompile(xpaths[j]);
+                       comppath = xmlXPathCtxtCompile(ctxt, xpaths[j]);
                        if (comppath == NULL)
                            xml_ereport(xmlerrcxt, ERROR,
                                        ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
index 68bbf86cc46a238474de7457b272c38140aae504..cbab8308d1ce03310a2e48d1c1519c1ad153442b 100644 (file)
@@ -4429,7 +4429,13 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
            }
        }
 
-       xpathcomp = xmlXPathCompile(xpath_expr);
+       /*
+        * Note: here and elsewhere, be careful to use xmlXPathCtxtCompile not
+        * xmlXPathCompile.  In libxml2 2.13.3 and older, the latter function
+        * fails to defend itself against recursion-to-stack-overflow.  See
+        * https://gitlab.gnome.org/GNOME/libxml2/-/issues/799
+        */
+       xpathcomp = xmlXPathCtxtCompile(xpathctx, xpath_expr);
        if (xpathcomp == NULL || xmlerrcxt->err_occurred)
            xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
                        "invalid XPath expression");
@@ -4800,7 +4806,10 @@ XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
 
    xstr = pg_xmlCharStrndup(path, strlen(path));
 
-   xtCxt->xpathcomp = xmlXPathCompile(xstr);
+   /* We require XmlTableSetDocument to have been done already */
+   Assert(xtCxt->xpathcxt != NULL);
+
+   xtCxt->xpathcomp = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
    if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
        xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
                    "invalid XPath expression");
@@ -4831,7 +4840,10 @@ XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
 
    xstr = pg_xmlCharStrndup(path, strlen(path));
 
-   xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
+   /* We require XmlTableSetDocument to have been done already */
+   Assert(xtCxt->xpathcxt != NULL);
+
+   xtCxt->xpathscomp[colnum] = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
    if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
        xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
                    "invalid XPath expression");