diff options
author | Shawn Rutledge <[email protected]> | 2024-01-10 12:45:33 -0700 |
---|---|---|
committer | Shawn Rutledge <[email protected]> | 2024-02-02 20:14:28 -0700 |
commit | bffddc6a993c4b6b64922e8d327bdf32e0d4975a (patch) | |
tree | 37187f42d569be9a9eaf7b1235782972ca4e84b6 | |
parent | 216af5d7f9675a408e22167b097f221beeeb88db (diff) |
Extract and re-write "front matter" in markdown documents
It's increasingly common for YAML to be used as metadata in front of
markdown documents. md4c does not handle this, so we need to remove
it ahead of time, lest md4c misinterpret it as heading text or so.
The --- fences are expected to be consistent regardless of the format of
what's between them, and the yaml (or whatever) parser does not need to
see them. So we remove them while reading, and QTextMarkdownWriter
writes them around the front matter if there is any.
If your application needs to parse this "front matter", just call
qtd->metaInformation(QTextDocument::FrontMatter).toUtf8() and feed that
to some parser that you've linked in, such as yaml-cpp.
Since YAML is used with GitHub Docs, we consider this feature to be part
of the GitHub dialect:
https://docs.github.com/en/contributing/writing-for-github-docs/using-yaml-frontmatter
[ChangeLog][QtGui][Text] Markdown "front matter" (usually YAML) is now
extracted during parsing (GitHub dialect) and can be retrieved from
QTextDocument::metaInformation(FrontMatter). QTextMarkdownWriter also
writes front matter (if any) to the output.
Fixes: QTBUG-120722
Change-Id: I220ddcd2b94c99453853643516ca7a36bb2bcd6f
Reviewed-by: Axel Spoerl <[email protected]>
-rw-r--r-- | src/gui/text/qtextdocument.cpp | 9 | ||||
-rw-r--r-- | src/gui/text/qtextdocument.h | 5 | ||||
-rw-r--r-- | src/gui/text/qtextdocument_p.h | 1 | ||||
-rw-r--r-- | src/gui/text/qtextmarkdownimporter.cpp | 18 | ||||
-rw-r--r-- | src/gui/text/qtextmarkdownimporter_p.h | 1 | ||||
-rw-r--r-- | src/gui/text/qtextmarkdownwriter.cpp | 13 | ||||
-rw-r--r-- | src/gui/text/qtextmarkdownwriter_p.h | 1 | ||||
-rw-r--r-- | tests/auto/gui/text/qtextmarkdownimporter/data/yaml.md | 11 | ||||
-rw-r--r-- | tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp | 23 | ||||
-rw-r--r-- | tests/auto/gui/text/qtextmarkdownwriter/data/yaml.md | 11 | ||||
-rw-r--r-- | tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp | 12 |
11 files changed, 102 insertions, 3 deletions
diff --git a/src/gui/text/qtextdocument.cpp b/src/gui/text/qtextdocument.cpp index df00c809d1e..38279b30645 100644 --- a/src/gui/text/qtextdocument.cpp +++ b/src/gui/text/qtextdocument.cpp @@ -1159,6 +1159,8 @@ QString QTextDocument::metaInformation(MetaInformation info) const return d->url; case CssMedia: return d->cssMedia; + case FrontMatter: + return d->frontMatter; } return QString(); } @@ -1182,6 +1184,9 @@ void QTextDocument::setMetaInformation(MetaInformation info, const QString &stri case CssMedia: d->cssMedia = string; break; + case FrontMatter: + d->frontMatter = string; + break; } } @@ -1327,6 +1332,10 @@ void QTextDocument::setHtml(const QString &html) \value CssMedia This value is used to select the corresponding '@media' rule, if any, from a specified CSS stylesheet when setHtml() is called. This enum value has been introduced in Qt 6.3. + \value FrontMatter This value is used to select header material, if any was + extracted during parsing of the source file (currently + only from Markdown format). This enum value has been + introduced in Qt 6.8. \sa metaInformation(), setMetaInformation(), setHtml() */ diff --git a/src/gui/text/qtextdocument.h b/src/gui/text/qtextdocument.h index 9c7b57a1567..b6253bfa460 100644 --- a/src/gui/text/qtextdocument.h +++ b/src/gui/text/qtextdocument.h @@ -105,7 +105,8 @@ public: enum MetaInformation { DocumentTitle, DocumentUrl, - CssMedia + CssMedia, + FrontMatter, }; void setMetaInformation(MetaInformation info, const QString &); QString metaInformation(MetaInformation info) const; @@ -119,7 +120,7 @@ public: enum MarkdownFeature { MarkdownNoHTML = 0x0020 | 0x0040, MarkdownDialectCommonMark = 0, - MarkdownDialectGitHub = 0x0004 | 0x0008 | 0x0400 | 0x0100 | 0x0200 | 0x0800 | 0x4000 + MarkdownDialectGitHub = 0x0004 | 0x0008 | 0x0400 | 0x0100 | 0x0200 | 0x0800 | 0x4000 | 0x100000 }; Q_DECLARE_FLAGS(MarkdownFeatures, MarkdownFeature) Q_FLAG(MarkdownFeatures) diff --git a/src/gui/text/qtextdocument_p.h b/src/gui/text/qtextdocument_p.h index 7f4675fdd28..1c4edc4329c 100644 --- a/src/gui/text/qtextdocument_p.h +++ b/src/gui/text/qtextdocument_p.h @@ -356,6 +356,7 @@ public: QString title; QString url; QString cssMedia; + QString frontMatter; qreal indentWidth; qreal documentMargin; QUrl baseUrl; diff --git a/src/gui/text/qtextmarkdownimporter.cpp b/src/gui/text/qtextmarkdownimporter.cpp index b73290f9dfc..c9aac01fa92 100644 --- a/src/gui/text/qtextmarkdownimporter.cpp +++ b/src/gui/text/qtextmarkdownimporter.cpp @@ -46,7 +46,8 @@ static_assert(int(QTextMarkdownImporter::FeaturePermissiveAutoLinks) == MD_FLAG_ static_assert(int(QTextMarkdownImporter::FeatureTasklists) == MD_FLAG_TASKLISTS); static_assert(int(QTextMarkdownImporter::FeatureNoHTML) == MD_FLAG_NOHTML); static_assert(int(QTextMarkdownImporter::DialectCommonMark) == MD_DIALECT_COMMONMARK); -static_assert(int(QTextMarkdownImporter::DialectGitHub) == (MD_DIALECT_GITHUB | MD_FLAG_UNDERLINE)); +static_assert(int(QTextMarkdownImporter::DialectGitHub) == + (MD_DIALECT_GITHUB | MD_FLAG_UNDERLINE | QTextMarkdownImporter::FeatureFrontMatter)); // -------------------------------------------------------- // MD4C callback function wrappers @@ -139,6 +140,21 @@ void QTextMarkdownImporter::import(const QString &markdown) m_monoFont.setPixelSize(defaultFont.pixelSize()); qCDebug(lcMD) << "default font" << defaultFont << "mono font" << m_monoFont; QByteArray md = markdown.toUtf8(); + if (md.startsWith("---") && m_features.testFlag(QTextMarkdownImporter::FeatureFrontMatter)) { + qsizetype endMarkerPos = md.indexOf("---", 4); + if (endMarkerPos > 4) { + qsizetype firstLinePos = 4; // first line of yaml + while (md.at(firstLinePos) == '\n' || md.at(firstLinePos) == '\r') + ++firstLinePos; + QByteArray frontMatter = md.sliced(firstLinePos, endMarkerPos - firstLinePos); + firstLinePos = endMarkerPos + 4; // first line of markdown after yaml + while (md.at(firstLinePos) == '\n' || md.at(firstLinePos) == '\r') + ++firstLinePos; + md.remove(0, firstLinePos); + doc->setMetaInformation(QTextDocument::FrontMatter, QString::fromUtf8(frontMatter)); + qCDebug(lcMD) << "extracted FrontMatter: size" << frontMatter.size(); + } + } m_cursor.beginEditBlock(); md_parse(md.constData(), MD_SIZE(md.size()), &callbacks, this); m_cursor.endEditBlock(); diff --git a/src/gui/text/qtextmarkdownimporter_p.h b/src/gui/text/qtextmarkdownimporter_p.h index 89e0ab4ad19..8b8f4ec9bb8 100644 --- a/src/gui/text/qtextmarkdownimporter_p.h +++ b/src/gui/text/qtextmarkdownimporter_p.h @@ -46,6 +46,7 @@ public: FeaturePermissiveWWWAutoLinks = 0x0400, FeatureTasklists = 0x0800, FeatureUnderline = 0x4000, + FeatureFrontMatter = 0x100000, // Qt feature, not yet in MD4C // composite flags FeaturePermissiveAutoLinks = FeaturePermissiveMailAutoLinks | FeaturePermissiveURLAutoLinks | FeaturePermissiveWWWAutoLinks, diff --git a/src/gui/text/qtextmarkdownwriter.cpp b/src/gui/text/qtextmarkdownwriter.cpp index 5fb67ccc67a..c8a5d40b528 100644 --- a/src/gui/text/qtextmarkdownwriter.cpp +++ b/src/gui/text/qtextmarkdownwriter.cpp @@ -10,6 +10,7 @@ #include "qtexttable.h" #include "qtextcursor.h" #include "qtextimagehandler_p.h" +#include "qtextmarkdownimporter_p.h" #include "qloggingcategory.h" #if QT_CONFIG(itemmodel) #include "qabstractitemmodel.h" @@ -38,6 +39,7 @@ QTextMarkdownWriter::QTextMarkdownWriter(QTextStream &stream, QTextDocument::Mar bool QTextMarkdownWriter::writeAll(const QTextDocument *document) { + writeFrontMatter(document->metaInformation(QTextDocument::FrontMatter)); writeFrame(document->rootFrame()); return true; } @@ -76,6 +78,17 @@ void QTextMarkdownWriter::writeTable(const QAbstractItemModel *table) } #endif +void QTextMarkdownWriter::writeFrontMatter(const QString &fm) +{ + if (fm.isEmpty() || !m_features.testFlag(static_cast<QTextDocument::MarkdownFeature>( + QTextMarkdownImporter::FeatureFrontMatter))) + return; + m_stream << "---\n"_L1 << fm; + if (!fm.endsWith(qtmw_Newline)) + m_stream << qtmw_Newline; + m_stream << "---\n"_L1; +} + void QTextMarkdownWriter::writeFrame(const QTextFrame *frame) { Q_ASSERT(frame); diff --git a/src/gui/text/qtextmarkdownwriter_p.h b/src/gui/text/qtextmarkdownwriter_p.h index b940e37ddc5..21cfeaba391 100644 --- a/src/gui/text/qtextmarkdownwriter_p.h +++ b/src/gui/text/qtextmarkdownwriter_p.h @@ -36,6 +36,7 @@ public: int writeBlock(const QTextBlock &block, bool table, bool ignoreFormat, bool ignoreEmpty); void writeFrame(const QTextFrame *frame); + void writeFrontMatter(const QString &fm); private: struct ListInfo { diff --git a/tests/auto/gui/text/qtextmarkdownimporter/data/yaml.md b/tests/auto/gui/text/qtextmarkdownimporter/data/yaml.md new file mode 100644 index 00000000000..41303a01871 --- /dev/null +++ b/tests/auto/gui/text/qtextmarkdownimporter/data/yaml.md @@ -0,0 +1,11 @@ +--- +name: "Venus" +discoverer: "Galileo Galilei" +title: "A description of the planet Venus" +keywords: + - planets + - solar system + - astronomy +--- +*Venus* is the second planet from the Sun, orbiting it every 224.7 Earth days. + diff --git a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp index 0fd3a7c22d7..34dbc0e0754 100644 --- a/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp +++ b/tests/auto/gui/text/qtextmarkdownimporter/tst_qtextmarkdownimporter.cpp @@ -43,6 +43,7 @@ private slots: void pathological(); void fencedCodeBlocks_data(); void fencedCodeBlocks(); + void frontMatter(); private: bool isMainFontFixed(); @@ -595,5 +596,27 @@ void tst_QTextMarkdownImporter::fencedCodeBlocks() QCOMPARE(doc.toMarkdown(), rewrite); } +void tst_QTextMarkdownImporter::frontMatter() +{ + QFile f(QFINDTESTDATA("data/yaml.md")); + QVERIFY(f.open(QFile::ReadOnly | QIODevice::Text)); + QString md = QString::fromUtf8(f.readAll()); + f.close(); + const int yamlBegin = md.indexOf("name:"); + const int yamlEnd = md.indexOf("---", yamlBegin); + const QString yaml = md.sliced(yamlBegin, yamlEnd - yamlBegin); + + QTextDocument doc; + QTextMarkdownImporter(&doc, QTextMarkdownImporter::DialectGitHub).import(md); + int blockCount = 0; + for (QTextFrame::iterator iterator = doc.rootFrame()->begin(); !iterator.atEnd(); ++iterator) { + // Check whether the block is text or a horizontal rule + if (!iterator.currentBlock().text().isEmpty()) + ++blockCount; + } + QCOMPARE(blockCount, 1); // yaml is not part of the markdown text + QCOMPARE(doc.metaInformation(QTextDocument::FrontMatter), yaml); // without fences +} + QTEST_MAIN(tst_QTextMarkdownImporter) #include "tst_qtextmarkdownimporter.moc" diff --git a/tests/auto/gui/text/qtextmarkdownwriter/data/yaml.md b/tests/auto/gui/text/qtextmarkdownwriter/data/yaml.md new file mode 100644 index 00000000000..41303a01871 --- /dev/null +++ b/tests/auto/gui/text/qtextmarkdownwriter/data/yaml.md @@ -0,0 +1,11 @@ +--- +name: "Venus" +discoverer: "Galileo Galilei" +title: "A description of the planet Venus" +keywords: + - planets + - solar system + - astronomy +--- +*Venus* is the second planet from the Sun, orbiting it every 224.7 Earth days. + diff --git a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp index 36b494e5503..71c4588a6a9 100644 --- a/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp +++ b/tests/auto/gui/text/qtextmarkdownwriter/tst_qtextmarkdownwriter.cpp @@ -36,6 +36,7 @@ private slots: void testWriteNestedNumericLists(); void testWriteNumericListWithStart(); void testWriteTable(); + void frontMatter(); void rewriteDocument_data(); void rewriteDocument(); void fromHtml_data(); @@ -525,6 +526,16 @@ void tst_QTextMarkdownWriter::testWriteTable() QCOMPARE(md, expected); } +void tst_QTextMarkdownWriter::frontMatter() +{ + QTextCursor cursor(document); + cursor.insertText("bar"); + document->setMetaInformation(QTextDocument::FrontMatter, "foo"); + + const QString output = documentToUnixMarkdown(); + QCOMPARE(output, "---\nfoo\n---\nbar\n\n"); +} + void tst_QTextMarkdownWriter::rewriteDocument_data() { QTest::addColumn<QString>("inputFile"); @@ -535,6 +546,7 @@ void tst_QTextMarkdownWriter::rewriteDocument_data() QTest::newRow("word wrap") << "wordWrap.md"; QTest::newRow("links") << "links.md"; QTest::newRow("lists and code blocks") << "listsAndCodeBlocks.md"; + QTest::newRow("front matter") << "yaml.md"; } void tst_QTextMarkdownWriter::rewriteDocument() |