From afb8e12d805c213aff5e45511615d478dddfae8c Mon Sep 17 00:00:00 2001 From: Michael Mc Donnell Date: Tue, 20 Apr 2021 13:28:57 -0700 Subject: [PATCH 01/25] Use std `matches` macro The custom `matches` macro was added before the standard library version was added. The standard library `matches` macro was added in Rust 1.42 [1]. The implementations are slightly different but the documentation is very similar. Additionally, some of the `_tt_as_expr_hack` were no longer used with this change. [1] https://doc.rust-lang.org/std/macro.matches.html --- html5ever/src/tokenizer/mod.rs | 2 +- html5ever/src/tree_builder/mod.rs | 2 +- html5ever/src/tree_builder/tag_sets.rs | 2 +- xml5ever/src/tree_builder/mod.rs | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index 267fdf3e..57b720e1 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -24,7 +24,7 @@ use self::char_ref::{CharRef, CharRefTokenizer}; use crate::util::str::lower_ascii_letter; use log::debug; -use mac::{_tt_as_expr_hack, format_if, matches}; +use mac::format_if; use markup5ever::{namespace_url, ns, small_char_set}; use std::borrow::Cow::{self, Borrowed}; use std::collections::BTreeMap; diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index 5d392dbb..39bd55c1 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -36,7 +36,7 @@ use crate::tree_builder::tag_sets::*; use crate::tree_builder::types::*; use crate::util::str::to_escaped_string; use log::{debug, log_enabled, warn, Level}; -use mac::{_tt_as_expr_hack, format_if, matches}; +use mac::{_tt_as_expr_hack, format_if}; pub use self::PushFlag::*; diff --git a/html5ever/src/tree_builder/tag_sets.rs b/html5ever/src/tree_builder/tag_sets.rs index 377b34ce..4b46eeb1 100644 --- a/html5ever/src/tree_builder/tag_sets.rs +++ b/html5ever/src/tree_builder/tag_sets.rs @@ -10,7 +10,7 @@ //! Various sets of HTML tag names, and macros for declaring them. use crate::ExpandedName; -use mac::{_tt_as_expr_hack, matches}; +use mac::_tt_as_expr_hack; use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns}; macro_rules! declare_tag_set_impl ( ($param:ident, $b:ident, $supr:ident, $($tag:tt)+) => ( diff --git a/xml5ever/src/tree_builder/mod.rs b/xml5ever/src/tree_builder/mod.rs index 708776d0..1888b2d8 100644 --- a/xml5ever/src/tree_builder/mod.rs +++ b/xml5ever/src/tree_builder/mod.rs @@ -10,7 +10,7 @@ mod types; use log::{debug, warn}; -use mac::{_tt_as_expr_hack, matches, unwrap_or_return}; +use mac::unwrap_or_return; use markup5ever::{local_name, namespace_prefix, namespace_url, ns}; use std::borrow::Cow; use std::borrow::Cow::Borrowed; From ab7e1abaf7459c71f63c3e40347ef932e0bf5212 Mon Sep 17 00:00:00 2001 From: Josh Matthews Date: Mon, 26 Sep 2022 02:05:29 -0400 Subject: [PATCH 02/25] update MSRV to 1.56. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9e76258c..a4f4521a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - version: [1.49.0, stable, beta, nightly] + version: [1.56.0, stable, beta, nightly] steps: - uses: actions/checkout@v2 From 31f799a2ea6059ce2795178542dbd238c37ce11d Mon Sep 17 00:00:00 2001 From: Dirk Stolle Date: Sat, 24 Sep 2022 22:55:14 +0200 Subject: [PATCH 03/25] Fix two typos --- html5ever/macros/match_token.rs | 2 +- markup5ever/build.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/html5ever/macros/match_token.rs b/html5ever/macros/match_token.rs index 7d73519c..9fbed082 100644 --- a/html5ever/macros/match_token.rs +++ b/html5ever/macros/match_token.rs @@ -334,7 +334,7 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { } if wildcard.is_some() { - // Push the delimeter `|` if it's not the first tag. + // Push the delimiter `|` if it's not the first tag. arms_code.push(quote!( | )) } arms_code.push(make_tag_pattern(&binding, tag)); diff --git a/markup5ever/build.rs b/markup5ever/build.rs index 38b4fddd..c4bbd564 100644 --- a/markup5ever/build.rs +++ b/markup5ever/build.rs @@ -110,7 +110,7 @@ fn named_entities_to_phf(to: &Path) { &mut file, r#" /// A map of entity names to their codepoints. The second codepoint will -/// be 0 if the entity contains a single codepoint. Entities have their preceeding '&' removed. +/// be 0 if the entity contains a single codepoint. Entities have their preceding '&' removed. /// /// # Examples /// From 45ea64d2aed22bb010d5ce8d35cbd89eb4153e1f Mon Sep 17 00:00:00 2001 From: Dirk Stolle Date: Sat, 24 Sep 2022 23:02:06 +0200 Subject: [PATCH 04/25] Update actions/checkout in GitHub Actions to v3 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a4f4521a..1048029e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: matrix: version: [1.56.0, stable, beta, nightly] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set toolchain run: | From f413b98631f6f2998da48b14ebf34991b45ebcec Mon Sep 17 00:00:00 2001 From: Michael Mc Donnell Date: Fri, 2 Dec 2022 20:54:57 -0800 Subject: [PATCH 05/25] Remove needless borrows Clippy generated several warnings about how "this expression creates a reference which is immediately dereferenced by the compiler" [1]. I've removed the references since they don't do anything. There are also some rustfmt changes that my editor automatically made. [1] https://rust-lang.github.io/rust-clippy/master/index.html#needless_borrow --- markup5ever/util/buffer_queue.rs | 7 ++-- rcdom/lib.rs | 47 +++++++++++++++----------- xml5ever/src/serialize/mod.rs | 6 ++-- xml5ever/src/tokenizer/mod.rs | 58 ++++++++++++++++---------------- xml5ever/src/tree_builder/mod.rs | 18 +++++----- 5 files changed, 70 insertions(+), 66 deletions(-) diff --git a/markup5ever/util/buffer_queue.rs b/markup5ever/util/buffer_queue.rs index d5724890..ab3f0389 100644 --- a/markup5ever/util/buffer_queue.rs +++ b/markup5ever/util/buffer_queue.rs @@ -93,10 +93,7 @@ impl BufferQueue { /// Look at the next available character without removing it, if the queue is not empty. pub fn peek(&self) -> Option { debug_assert!( - self.buffers - .iter() - .find(|el| el.len32() == 0) - .is_none(), + self.buffers.iter().find(|el| el.len32() == 0).is_none(), "invariant \"all buffers in the queue are non-empty\" failed" ); self.buffers.front().map(|b| b.chars().next().unwrap()) @@ -152,7 +149,7 @@ impl BufferQueue { let (result, now_empty) = match self.buffers.front_mut() { None => (None, false), Some(buf) => { - let n = set.nonmember_prefix_len(&buf); + let n = set.nonmember_prefix_len(buf); if n > 0 { let out; unsafe { diff --git a/rcdom/lib.rs b/rcdom/lib.rs index 8cfc7b5f..3789d8c2 100644 --- a/rcdom/lib.rs +++ b/rcdom/lib.rs @@ -131,7 +131,11 @@ impl Drop for Node { while let Some(node) = nodes.pop() { let children = mem::replace(&mut *node.children.borrow_mut(), vec![]); nodes.extend(children.into_iter()); - if let NodeData::Element { ref template_contents, .. } = node.data { + if let NodeData::Element { + ref template_contents, + .. + } = node.data + { if let Some(template_contents) = template_contents.borrow_mut().take() { nodes.push(template_contents); } @@ -173,7 +177,7 @@ fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> { .borrow() .iter() .enumerate() - .find(|&(_, child)| Rc::ptr_eq(&child, &target)) + .find(|&(_, child)| Rc::ptr_eq(child, target)) { Some((i, _)) => i, None => panic!("have parent but couldn't find in parent's children!"), @@ -235,7 +239,11 @@ impl TreeSink for RcDom { .. } = target.data { - template_contents.borrow().as_ref().expect("not a template element!").clone() + template_contents + .borrow() + .as_ref() + .expect("not a template element!") + .clone() } else { panic!("not a template element!") } @@ -290,7 +298,7 @@ impl TreeSink for RcDom { match child { NodeOrText::AppendText(ref text) => match parent.children.borrow().last() { Some(h) => { - if append_to_existing_text(h, &text) { + if append_to_existing_text(h, text) { return; } }, @@ -300,7 +308,7 @@ impl TreeSink for RcDom { } append( - &parent, + parent, match child { NodeOrText::AppendText(text) => Node::new(NodeData::Text { contents: RefCell::new(text), @@ -311,7 +319,7 @@ impl TreeSink for RcDom { } fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText) { - let (parent, i) = get_parent_and_index(&sibling) + let (parent, i) = get_parent_and_index(sibling) .expect("append_before_sibling called on node without parent"); let child = match (child, i) { @@ -397,16 +405,16 @@ impl TreeSink for RcDom { } fn remove_from_parent(&mut self, target: &Handle) { - remove_from_parent(&target); + remove_from_parent(target); } fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) { let mut children = node.children.borrow_mut(); let mut new_children = new_parent.children.borrow_mut(); for child in children.iter() { - let previous_parent = child.parent.replace(Some(Rc::downgrade(&new_parent))); + let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent))); assert!(Rc::ptr_eq( - &node, + node, &previous_parent.unwrap().upgrade().expect("dangling weak") )) } @@ -457,12 +465,13 @@ impl Serialize for SerializableHandle { let mut ops = VecDeque::new(); match traversal_scope { IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())), - ChildrenOnly(_) => ops.extend(self - .0 - .children - .borrow() - .iter() - .map(|h| SerializeOp::Open(h.clone()))) + ChildrenOnly(_) => ops.extend( + self.0 + .children + .borrow() + .iter() + .map(|h| SerializeOp::Open(h.clone())), + ), } while let Some(op) = ops.pop_front() { @@ -486,13 +495,11 @@ impl Serialize for SerializableHandle { } }, - NodeData::Doctype { ref name, .. } => serializer.write_doctype(&name)?, + NodeData::Doctype { ref name, .. } => serializer.write_doctype(name)?, - NodeData::Text { ref contents } => { - serializer.write_text(&contents.borrow())? - }, + NodeData::Text { ref contents } => serializer.write_text(&contents.borrow())?, - NodeData::Comment { ref contents } => serializer.write_comment(&contents)?, + NodeData::Comment { ref contents } => serializer.write_comment(contents)?, NodeData::ProcessingInstruction { ref target, diff --git a/xml5ever/src/serialize/mod.rs b/xml5ever/src/serialize/mod.rs index 182ed9c8..95e16a0d 100644 --- a/xml5ever/src/serialize/mod.rs +++ b/xml5ever/src/serialize/mod.rs @@ -89,7 +89,7 @@ fn write_to_buf_escaped(writer: &mut W, text: &str, attr_mode: bool) - #[inline] fn write_qual_name(writer: &mut W, name: &QualName) -> io::Result<()> { if let Some(ref prefix) = name.prefix { - writer.write_all(&prefix.as_bytes())?; + writer.write_all(prefix.as_bytes())?; writer.write_all(b":")?; writer.write_all(&*name.local.as_bytes())?; } else { @@ -158,7 +158,7 @@ impl Serializer for XmlSerializer { self.writer.write_all(b" xmlns")?; if let Some(ref p) = *prefix { self.writer.write_all(b":")?; - self.writer.write_all(&*p.as_bytes())?; + self.writer.write_all(p.as_bytes())?; } self.writer.write_all(b"=\"")?; @@ -173,7 +173,7 @@ impl Serializer for XmlSerializer { } for (name, value) in attrs { self.writer.write_all(b" ")?; - self.qual_attr_name(&name)?; + self.qual_attr_name(name)?; self.writer.write_all(b"=\"")?; write_to_buf_escaped(&mut self.writer, value, true)?; self.writer.write_all(b"\"")?; diff --git a/xml5ever/src/tokenizer/mod.rs b/xml5ever/src/tokenizer/mod.rs index 51222bc7..ee876101 100644 --- a/xml5ever/src/tokenizer/mod.rs +++ b/xml5ever/src/tokenizer/mod.rs @@ -61,16 +61,16 @@ fn process_qname(tag_name: StrTendril) -> QualName { // a:b // Since StrTendril are UTF-8, we know that minimal size in bytes must be // three bytes minimum. - let split = if (&*tag_name).as_bytes().len() < 3 { + let split = if (*tag_name).as_bytes().len() < 3 { None } else { - QualNameTokenizer::new((&*tag_name).as_bytes()).run() + QualNameTokenizer::new((*tag_name).as_bytes()).run() }; match split { None => QualName::new(None, ns!(), LocalName::from(&*tag_name)), Some(col) => { - let len = (&*tag_name).as_bytes().len() as u32; + let len = (*tag_name).as_bytes().len() as u32; let prefix = tag_name.subtendril(0, col); let local = tag_name.subtendril(col + 1, len - col - 1); let ns = ns!(); // Actual namespace URL set in XmlTreeBuilder::bind_qname @@ -248,8 +248,8 @@ impl XmlTokenizer { } // Exclude forbidden Unicode characters - if self.opts.exact_errors && - match c as u32 { + if self.opts.exact_errors + && match c as u32 { 0x01..=0x08 | 0x0B | 0x0E..=0x1F | 0x7F..=0x9F | 0xFDD0..=0xFDEF => true, n if (n & 0xFFFE) == 0xFFFE => true, _ => false, @@ -1141,11 +1141,11 @@ impl XmlTokenizer { }, XmlState::CommentLessThanBangDash => go!(self: reconsume CommentEndDash), XmlState::CommentLessThanBangDashDash => go!(self: reconsume CommentEnd), - XmlState::CommentStartDash | - XmlState::Comment | - XmlState::CommentEndDash | - XmlState::CommentEnd | - XmlState::CommentEndBang => go!(self: error_eof; emit_comment; eof), + XmlState::CommentStartDash + | XmlState::Comment + | XmlState::CommentEndDash + | XmlState::CommentEnd + | XmlState::CommentEndBang => go!(self: error_eof; emit_comment; eof), XmlState::TagState => go!(self: error_eof; emit '<'; to Data), XmlState::EndTagState => go!(self: error_eof; emit '<'; emit '/'; to Data), XmlState::TagEmpty => go!(self: error_eof; to TagAttrNameBefore), @@ -1155,25 +1155,25 @@ impl XmlTokenizer { XmlState::Pi => go!(self: error_eof; to BogusComment), XmlState::PiTargetAfter | XmlState::PiAfter => go!(self: reconsume PiData), XmlState::MarkupDecl => go!(self: error_eof; to BogusComment), - XmlState::TagName | - XmlState::TagAttrNameBefore | - XmlState::EndTagName | - XmlState::TagAttrNameAfter | - XmlState::EndTagNameAfter | - XmlState::TagAttrValueBefore | - XmlState::TagAttrValue(_) => go!(self: error_eof; emit_tag Data), + XmlState::TagName + | XmlState::TagAttrNameBefore + | XmlState::EndTagName + | XmlState::TagAttrNameAfter + | XmlState::EndTagNameAfter + | XmlState::TagAttrValueBefore + | XmlState::TagAttrValue(_) => go!(self: error_eof; emit_tag Data), XmlState::PiData | XmlState::PiTarget => go!(self: error_eof; emit_pi Data), XmlState::TagAttrName => go!(self: error_eof; emit_start_tag Data), - XmlState::BeforeDoctypeName | - XmlState::Doctype | - XmlState::DoctypeName | - XmlState::AfterDoctypeName | - XmlState::AfterDoctypeKeyword(_) | - XmlState::BeforeDoctypeIdentifier(_) | - XmlState::AfterDoctypeIdentifier(_) | - XmlState::DoctypeIdentifierSingleQuoted(_) | - XmlState::DoctypeIdentifierDoubleQuoted(_) | - XmlState::BetweenDoctypePublicAndSystemIdentifiers => { + XmlState::BeforeDoctypeName + | XmlState::Doctype + | XmlState::DoctypeName + | XmlState::AfterDoctypeName + | XmlState::AfterDoctypeKeyword(_) + | XmlState::BeforeDoctypeIdentifier(_) + | XmlState::AfterDoctypeIdentifier(_) + | XmlState::DoctypeIdentifierSingleQuoted(_) + | XmlState::DoctypeIdentifierDoubleQuoted(_) + | XmlState::BetweenDoctypePublicAndSystemIdentifiers => { go!(self: error_eof; emit_doctype; to Data) }, XmlState::BogusDoctype => go!(self: emit_doctype; to Data), @@ -1251,8 +1251,8 @@ impl XmlTokenizer { value: replace(&mut self.current_attr_value, StrTendril::new()), }; - if qname.local == local_name!("xmlns") || - qname.prefix == Some(namespace_prefix!("xmlns")) + if qname.local == local_name!("xmlns") + || qname.prefix == Some(namespace_prefix!("xmlns")) { self.current_tag_attrs.insert(0, attr); } else { diff --git a/xml5ever/src/tree_builder/mod.rs b/xml5ever/src/tree_builder/mod.rs index 1888b2d8..a83e7dc9 100644 --- a/xml5ever/src/tree_builder/mod.rs +++ b/xml5ever/src/tree_builder/mod.rs @@ -246,10 +246,10 @@ where pub fn trace_handles(&self, tracer: &dyn Tracer) { tracer.trace_handle(&self.doc_handle); for e in self.open_elems.iter() { - tracer.trace_handle(&e); + tracer.trace_handle(e); } if let Some(h) = self.curr_elem.as_ref() { - tracer.trace_handle(&h); + tracer.trace_handle(h); } } @@ -278,7 +278,7 @@ where } fn declare_ns(&mut self, attr: &mut Attribute) { - if let Err(msg) = self.current_namespace.insert_ns(&attr) { + if let Err(msg) = self.current_namespace.insert_ns(attr) { self.sink.parse_error(msg); } else { attr.name.ns = ns!(xmlns); @@ -346,17 +346,17 @@ where fn process_namespaces(&mut self, tag: &mut Tag) { let mut new_attr = vec![]; // First we extract all namespace declarations - for mut attr in tag.attrs.iter_mut().filter(|attr| { - attr.name.prefix == Some(namespace_prefix!("xmlns")) || - attr.name.local == local_name!("xmlns") + for attr in tag.attrs.iter_mut().filter(|attr| { + attr.name.prefix == Some(namespace_prefix!("xmlns")) + || attr.name.local == local_name!("xmlns") }) { - self.declare_ns(&mut attr); + self.declare_ns(attr); } // Then we bind those namespace declarations to attributes for attr in tag.attrs.iter_mut().filter(|attr| { - attr.name.prefix != Some(namespace_prefix!("xmlns")) && - attr.name.local != local_name!("xmlns") + attr.name.prefix != Some(namespace_prefix!("xmlns")) + && attr.name.local != local_name!("xmlns") }) { if self.bind_attr_qname(&mut attr.name) { new_attr.push(attr.clone()); From 943c03d547bd1a1043e62d44c120435b7bc789d9 Mon Sep 17 00:00:00 2001 From: Moritz Hedtke Date: Mon, 26 Dec 2022 01:15:12 +0100 Subject: [PATCH 06/25] Create dependabot.yml --- .github/dependabot.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..40f23173 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "weekly" From 9c2641d632bdd11b3a991c9203b05fa9effed15f Mon Sep 17 00:00:00 2001 From: Moritz Hedtke Date: Tue, 27 Dec 2022 18:49:38 +0100 Subject: [PATCH 07/25] Update minimum supported rust version to 1.60 This is needed for the new version of phf. --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1048029e..9fc6b07c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - version: [1.56.0, stable, beta, nightly] + version: [1.60.0, stable, beta, nightly] steps: - uses: actions/checkout@v3 From b9f858c3874134afeb0dcfecc9998bae67a5f2d3 Mon Sep 17 00:00:00 2001 From: Moritz Hedtke Date: Tue, 27 Dec 2022 18:50:37 +0100 Subject: [PATCH 08/25] Update phf dependency from 0.10 to 0.11 This reduces duplicate dependencies when using with other crates. --- markup5ever/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/markup5ever/Cargo.toml b/markup5ever/Cargo.toml index 0f629901..68c2f3de 100644 --- a/markup5ever/Cargo.toml +++ b/markup5ever/Cargo.toml @@ -15,10 +15,10 @@ path = "lib.rs" [dependencies] string_cache = "0.8" -phf = "0.10" +phf = "0.11" tendril = "0.4" log = "0.4" [build-dependencies] string_cache_codegen = "0.5.1" -phf_codegen = "0.10" +phf_codegen = "0.11" From f1b526740b1b759853ba874565e03465b0960023 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 9 Jan 2023 07:13:29 +0000 Subject: [PATCH 09/25] Update typed-arena requirement from 1.3.0 to 2.0.2 Updates the requirements on [typed-arena](https://github.com/SimonSapin/rust-typed-arena) to permit the latest version. - [Release notes](https://github.com/SimonSapin/rust-typed-arena/releases) - [Changelog](https://github.com/SimonSapin/rust-typed-arena/blob/master/CHANGELOG.md) - [Commits](https://github.com/SimonSapin/rust-typed-arena/compare/1.3.0...2.0.2) --- updated-dependencies: - dependency-name: typed-arena dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- html5ever/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/html5ever/Cargo.toml b/html5ever/Cargo.toml index 4f3307ee..39e79992 100644 --- a/html5ever/Cargo.toml +++ b/html5ever/Cargo.toml @@ -17,7 +17,7 @@ mac = "0.1" markup5ever = { version = "0.11", path = "../markup5ever" } [dev-dependencies] -typed-arena = "1.3.0" +typed-arena = "2.0.2" [target.'cfg(bench)'.dev-dependencies] criterion = "0.3" From 9ebf6b20e68a1f3d8720706ab46fde6570c1ab6f Mon Sep 17 00:00:00 2001 From: Mukilan Thiyagarajan Date: Tue, 11 Jul 2023 16:45:20 +0530 Subject: [PATCH 10/25] Enable merge queue (#500) --- .github/workflows/main.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9fc6b07c..791f5f0a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,8 +2,10 @@ name: CI on: push: - branches: [auto] + branches: [master] pull_request: + merge_group: + types: [checks_requested] jobs: ci: @@ -43,7 +45,7 @@ jobs: run: cargo doc build_result: - name: homu build finished + name: Result runs-on: ubuntu-latest needs: - "ci" From d31791faac4dacb4645e5e69aeb54ed4fcbfff33 Mon Sep 17 00:00:00 2001 From: Dominic Farolino Date: Thu, 3 Aug 2023 03:59:51 +0900 Subject: [PATCH 11/25] Stop allocating string for rcdom example (#503) * Stop allocating string for rcdom example * Space --- rcdom/examples/print-rcdom.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/rcdom/examples/print-rcdom.rs b/rcdom/examples/print-rcdom.rs index 96da22c5..c96f5c06 100644 --- a/rcdom/examples/print-rcdom.rs +++ b/rcdom/examples/print-rcdom.rs @@ -13,8 +13,6 @@ extern crate markup5ever_rcdom as rcdom; use std::default::Default; use std::io; -use std::iter::repeat; -use std::string::String; use html5ever::parse_document; use html5ever::tendril::TendrilSink; @@ -24,8 +22,7 @@ use rcdom::{Handle, NodeData, RcDom}; fn walk(indent: usize, handle: &Handle) { let node = handle; - // FIXME: don't allocate - print!("{}", repeat(" ").take(indent).collect::()); + for _ in 0..indent { print!(" "); } match node.data { NodeData::Document => println!("#Document"), From c736fec7dadc4f2c0f7580d0ba13373d2616a85e Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Fri, 11 Aug 2023 10:59:23 +0200 Subject: [PATCH 12/25] Update html5lib-tests (#460) * wip on updating html5lib-tests * fix up parse error parsing * add better debug output * wip * wip * wip * wip * adjust all switches to BogusComment (according to html5gum) * wip * wip * wip * wip * wip * wip * wip (test3 done) * fix test1 * wip on entities.test * get rid of addnl_allowed in charref tokenizer * remove bogusname??? * fix escapeFlag.test: End tag surrounded by bogus comment in RCDATA or RAWTEXT (in state RawData(Rawtext)) * update html5lib tests * Revert "remove bogusname???" This reverts commit 575b07719ca860cf710839cf082ed875a29b3236. * wip restore bogusname * more bugfixes * Revert "wip restore bogusname" This reverts commit eb281656da577d40ab506d75de4b722b49ed3d86. * fix a bug when peeking characters in BeforeAttributeValue * make eat() pre-process input characters input where it matters (JSON-escaped): ", result: Option, + is_consumed_in_attribute: bool, num: u32, num_too_big: bool, @@ -61,12 +61,10 @@ pub struct CharRefTokenizer { } impl CharRefTokenizer { - // NB: We assume that we have an additional allowed character iff we're - // tokenizing in an attribute value. - pub fn new(addnl_allowed: Option) -> CharRefTokenizer { + pub fn new(is_consumed_in_attribute: bool) -> CharRefTokenizer { CharRefTokenizer { + is_consumed_in_attribute, state: Begin, - addnl_allowed, result: None, num: 0, num_too_big: false, @@ -140,20 +138,18 @@ impl CharRefTokenizer { input: &mut BufferQueue, ) -> Status { match unwrap_or_return!(tokenizer.peek(input), Stuck) { - '\t' | '\n' | '\x0C' | ' ' | '<' | '&' => self.finish_none(), - c if Some(c) == self.addnl_allowed => self.finish_none(), + 'a'..='z' | 'A'..='Z' | '0'..='9' => { + self.state = Named; + self.name_buf_opt = Some(StrTendril::new()); + Progress + }, '#' => { tokenizer.discard_char(input); self.state = Octothorpe; Progress }, - - _ => { - self.state = Named; - self.name_buf_opt = Some(StrTendril::new()); - Progress - }, + _ => self.finish_none(), } } @@ -277,7 +273,10 @@ impl CharRefTokenizer { tokenizer: &mut Tokenizer, input: &mut BufferQueue, ) -> Status { - let c = unwrap_or_return!(tokenizer.get_char(input), Stuck); + // peek + discard skips over newline normalization, therefore making it easier to + // un-consume + let c = unwrap_or_return!(tokenizer.peek(input), Stuck); + tokenizer.discard_char(input); self.name_buf_mut().push_char(c); match data::NAMED_ENTITIES.get(&self.name_buf()[..]) { // We have either a full match or a prefix of one. @@ -356,26 +355,20 @@ impl CharRefTokenizer { Some(self.name_buf()[name_len..].chars().next().unwrap()) }; - // "If the character reference is being consumed as part of an - // attribute, and the last character matched is not a U+003B - // SEMICOLON character (;), and the next character is either a - // U+003D EQUALS SIGN character (=) or an alphanumeric ASCII - // character, then, for historical reasons, all the characters - // that were matched after the U+0026 AMPERSAND character (&) - // must be unconsumed, and nothing is returned. However, if - // this next character is in fact a U+003D EQUALS SIGN - // character (=), then this is a parse error" - - let unconsume_all = match (self.addnl_allowed, last_matched, next_after) { + // If the character reference was consumed as part of an attribute, and the last + // character matched is not a U+003B SEMICOLON character (;), and the next input + // character is either a U+003D EQUALS SIGN character (=) or an ASCII alphanumeric, + // then, for historical reasons, flush code points consumed as a character + // reference and switch to the return state. + + let unconsume_all = match (self.is_consumed_in_attribute, last_matched, next_after) { (_, ';', _) => false, - (Some(_), _, Some('=')) => { - tokenizer.emit_error(Borrowed( - "Equals sign after character reference in attribute", - )); - true - }, - (Some(_), _, Some(c)) if c.is_ascii_alphanumeric() => true, + (true, _, Some('=')) => true, + (true, _, Some(c)) if c.is_ascii_alphanumeric() => true, _ => { + // 1. If the last character matched is not a U+003B SEMICOLON character + // (;), then this is a missing-semicolon-after-character-reference parse + // error. tokenizer.emit_error(Borrowed( "Character reference does not end with semicolon", )); @@ -388,6 +381,7 @@ impl CharRefTokenizer { self.finish_none() } else { input.push_front(StrTendril::from_slice(&self.name_buf()[name_len..])); + tokenizer.ignore_lf = false; self.result = Some(CharRef { chars: [from_u32(c1).unwrap(), from_u32(c2).unwrap()], num_chars: if c2 == 0 { 1 } else { 2 }, @@ -403,7 +397,10 @@ impl CharRefTokenizer { tokenizer: &mut Tokenizer, input: &mut BufferQueue, ) -> Status { - let c = unwrap_or_return!(tokenizer.get_char(input), Stuck); + // peek + discard skips over newline normalization, therefore making it easier to + // un-consume + let c = unwrap_or_return!(tokenizer.peek(input), Stuck); + tokenizer.discard_char(input); self.name_buf_mut().push_char(c); match c { _ if c.is_ascii_alphanumeric() => return Progress, diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index 20a96204..0fb0d014 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -46,6 +46,7 @@ pub enum ProcessResult { } #[must_use] +#[derive(Debug)] pub enum TokenizerResult { Done, Script(Handle), @@ -318,14 +319,20 @@ impl Tokenizer { // Check if the next characters are an ASCII case-insensitive match. See // BufferQueue::eat. // - // NB: this doesn't do input stream preprocessing or set the current input - // character. + // NB: this doesn't set the current input character. fn eat( &mut self, input: &mut BufferQueue, pat: &str, eq: fn(&u8, &u8) -> bool, ) -> Option { + if self.ignore_lf { + self.ignore_lf = false; + if self.peek(input) == Some('\n') { + self.discard_char(input); + } + } + input.push_front(replace(&mut self.temp_buf, StrTendril::new())); match input.eat(pat, eq) { None if self.at_eof => Some(false), @@ -545,10 +552,10 @@ impl Tokenizer { } } - fn consume_char_ref(&mut self, addnl_allowed: Option) { - // NB: The char ref tokenizer assumes we have an additional allowed - // character iff we're tokenizing in an attribute value. - self.char_ref_tokenizer = Some(Box::new(CharRefTokenizer::new(addnl_allowed))); + fn consume_char_ref(&mut self) { + self.char_ref_tokenizer = Some( + Box::new(CharRefTokenizer::new(matches!(self.state, states::AttributeValue(_)))) + ); } fn emit_eof(&mut self) { @@ -564,7 +571,16 @@ impl Tokenizer { } fn discard_char(&mut self, input: &mut BufferQueue) { - self.get_char(input); + // peek() deals in un-processed characters (no newline normalization), while get_char() + // does. + // + // since discard_char is supposed to be used in combination with peek(), discard_char must + // discard a single raw input character, not a normalized newline. + if self.reconsume { + self.reconsume = false; + } else { + input.next(); + } } fn emit_error(&mut self, error: Cow<'static, str>) { @@ -632,8 +648,7 @@ macro_rules! go ( ( $me:ident : reconsume $s:ident $k1:expr ) => ({ $me.reconsume = true; go!($me: to $s $k1); }); ( $me:ident : reconsume $s:ident $k1:ident $k2:expr ) => ({ $me.reconsume = true; go!($me: to $s $k1 $k2); }); - ( $me:ident : consume_char_ref ) => ({ $me.consume_char_ref(None); return ProcessResult::Continue; }); - ( $me:ident : consume_char_ref $addnl:expr ) => ({ $me.consume_char_ref(Some($addnl)); return ProcessResult::Continue; }); + ( $me:ident : consume_char_ref ) => ({ $me.consume_char_ref(); return ProcessResult::Continue; }); // We have a default next state after emitting a tag, but the sink can override. ( $me:ident : emit_tag $s:ident ) => ({ @@ -769,9 +784,9 @@ impl Tokenizer { //§ tag-open-state states::TagOpen => loop { match get_char!(self, input) { - '!' => go!(self: clear_temp; to MarkupDeclarationOpen), + '!' => go!(self: to MarkupDeclarationOpen), '/' => go!(self: to EndTagOpen), - '?' => go!(self: error; clear_comment; push_comment '?'; to BogusComment), + '?' => go!(self: error; clear_comment; reconsume BogusComment), c => match lower_ascii_letter(c) { Some(cl) => go!(self: create_tag StartTag cl; to TagName), None => go!(self: error; emit '<'; reconsume Data), @@ -783,12 +798,9 @@ impl Tokenizer { states::EndTagOpen => loop { match get_char!(self, input) { '>' => go!(self: error; to Data), - '\0' => { - go!(self: error; clear_comment; push_comment '\u{fffd}'; to BogusComment) - }, c => match lower_ascii_letter(c) { Some(cl) => go!(self: create_tag EndTag cl; to TagName), - None => go!(self: error; clear_comment; push_comment c; to BogusComment), + None => go!(self: error; clear_comment; reconsume BogusComment), }, } }, @@ -852,7 +864,7 @@ impl Tokenizer { match c { '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName), '/' => go!(self: to SelfClosingStartTag), - '>' => go!(self: emit_tag Data), + '>' => go!(self: clear_temp; emit_tag Data), _ => (), } } @@ -1014,9 +1026,6 @@ impl Tokenizer { '\t' | '\n' | '\r' | '\x0C' | ' ' => go!(self: discard_char input), '"' => go!(self: discard_char input; to AttributeValue DoubleQuoted), '\'' => go!(self: discard_char input; to AttributeValue SingleQuoted), - '\0' => { - go!(self: discard_char input; error; push_value '\u{fffd}'; to AttributeValue Unquoted) - }, '>' => go!(self: discard_char input; error; emit_tag Data), _ => go!(self: to AttributeValue Unquoted), } @@ -1026,7 +1035,7 @@ impl Tokenizer { states::AttributeValue(DoubleQuoted) => loop { match pop_except_from!(self, input, small_char_set!('\r' '"' '&' '\0' '\n')) { FromSet('"') => go!(self: to AfterAttributeValueQuoted), - FromSet('&') => go!(self: consume_char_ref '"'), + FromSet('&') => go!(self: consume_char_ref), FromSet('\0') => go!(self: error; push_value '\u{fffd}'), FromSet(c) => go!(self: push_value c), NotFromSet(ref b) => go!(self: append_value b), @@ -1037,7 +1046,7 @@ impl Tokenizer { states::AttributeValue(SingleQuoted) => loop { match pop_except_from!(self, input, small_char_set!('\r' '\'' '&' '\0' '\n')) { FromSet('\'') => go!(self: to AfterAttributeValueQuoted), - FromSet('&') => go!(self: consume_char_ref '\''), + FromSet('&') => go!(self: consume_char_ref), FromSet('\0') => go!(self: error; push_value '\u{fffd}'), FromSet(c) => go!(self: push_value c), NotFromSet(ref b) => go!(self: append_value b), @@ -1054,7 +1063,7 @@ impl Tokenizer { FromSet('\t') | FromSet('\n') | FromSet('\x0C') | FromSet(' ') => { go!(self: to BeforeAttributeName) }, - FromSet('&') => go!(self: consume_char_ref '>'), + FromSet('&') => go!(self: consume_char_ref), FromSet('>') => go!(self: emit_tag Data), FromSet('\0') => go!(self: error; push_value '\u{fffd}'), FromSet(c) => { @@ -1110,12 +1119,46 @@ impl Tokenizer { //§ comment-state states::Comment => loop { match get_char!(self, input) { + c @ '<' => go!(self: push_comment c; to CommentLessThanSign), '-' => go!(self: to CommentEndDash), '\0' => go!(self: error; push_comment '\u{fffd}'), c => go!(self: push_comment c), } }, + //§ comment-less-than-sign-state + states::CommentLessThanSign => loop { + match get_char!(self, input) { + c @ '!' => go!(self: push_comment c; to CommentLessThanSignBang), + c @ '<' => go!(self: push_comment c), + _ => go!(self: reconsume Comment), + } + }, + + //§ comment-less-than-sign-bang + states::CommentLessThanSignBang => loop { + match get_char!(self, input) { + '-' => go!(self: to CommentLessThanSignBangDash), + _ => go!(self: reconsume Comment), + } + }, + + //§ comment-less-than-sign-bang-dash + states::CommentLessThanSignBangDash => loop { + match get_char!(self, input) { + '-' => go!(self: to CommentLessThanSignBangDashDash), + _ => go!(self: reconsume CommentEndDash), + } + }, + + //§ comment-less-than-sign-bang-dash-dash + states::CommentLessThanSignBangDashDash => loop { + match get_char!(self, input) { + '>' => go!(self: reconsume CommentEnd), + _ => go!(self: error; reconsume CommentEnd), + } + }, + //§ comment-end-dash-state states::CommentEndDash => loop { match get_char!(self, input) { @@ -1129,10 +1172,9 @@ impl Tokenizer { states::CommentEnd => loop { match get_char!(self, input) { '>' => go!(self: emit_comment; to Data), - '\0' => go!(self: error; append_comment "--\u{fffd}"; to Comment), - '!' => go!(self: error; to CommentEndBang), - '-' => go!(self: error; push_comment '-'), - c => go!(self: error; append_comment "--"; push_comment c; to Comment), + '!' => go!(self: to CommentEndBang), + '-' => go!(self: push_comment '-'), + _ => go!(self: append_comment "--"; reconsume Comment), } }, @@ -1140,7 +1182,7 @@ impl Tokenizer { states::CommentEndBang => loop { match get_char!(self, input) { '-' => go!(self: append_comment "--!"; to CommentEndDash), - '>' => go!(self: emit_comment; to Data), + '>' => go!(self: error; emit_comment; to Data), '\0' => go!(self: error; append_comment "--!\u{fffd}"; to Comment), c => go!(self: append_comment "--!"; push_comment c; to Comment), } @@ -1150,6 +1192,7 @@ impl Tokenizer { states::Doctype => loop { match get_char!(self, input) { '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeDoctypeName), + '>' => go!(self: reconsume BeforeDoctypeName), _ => go!(self: error; reconsume BeforeDoctypeName), } }, @@ -1187,7 +1230,7 @@ impl Tokenizer { match get_char!(self, input) { '\t' | '\n' | '\x0C' | ' ' => (), '>' => go!(self: emit_doctype; to Data), - _ => go!(self: error; force_quirks; to BogusDoctype), + _ => go!(self: error; force_quirks; reconsume BogusDoctype), } } }, @@ -1203,7 +1246,7 @@ impl Tokenizer { go!(self: error; clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind) }, '>' => go!(self: error; force_quirks; emit_doctype; to Data), - _ => go!(self: error; force_quirks; to BogusDoctype), + _ => go!(self: error; force_quirks; reconsume BogusDoctype), } }, @@ -1214,7 +1257,7 @@ impl Tokenizer { '"' => go!(self: clear_doctype_id kind; to DoctypeIdentifierDoubleQuoted kind), '\'' => go!(self: clear_doctype_id kind; to DoctypeIdentifierSingleQuoted kind), '>' => go!(self: error; force_quirks; emit_doctype; to Data), - _ => go!(self: error; force_quirks; to BogusDoctype), + _ => go!(self: error; force_quirks; reconsume BogusDoctype), } }, @@ -1251,7 +1294,7 @@ impl Tokenizer { '\'' => { go!(self: error; clear_doctype_id System; to DoctypeIdentifierSingleQuoted System) }, - _ => go!(self: error; force_quirks; to BogusDoctype), + _ => go!(self: error; force_quirks; reconsume BogusDoctype), } }, @@ -1260,7 +1303,7 @@ impl Tokenizer { match get_char!(self, input) { '\t' | '\n' | '\x0C' | ' ' => (), '>' => go!(self: emit_doctype; to Data), - _ => go!(self: error; to BogusDoctype), + _ => go!(self: error; reconsume BogusDoctype), } }, @@ -1275,7 +1318,7 @@ impl Tokenizer { '\'' => { go!(self: clear_doctype_id System; to DoctypeIdentifierSingleQuoted System) }, - _ => go!(self: error; force_quirks; to BogusDoctype), + _ => go!(self: error; force_quirks; reconsume BogusDoctype), } }, @@ -1283,6 +1326,7 @@ impl Tokenizer { states::BogusDoctype => loop { match get_char!(self, input) { '>' => go!(self: emit_doctype; to Data), + '\0' => go!(self: error), _ => (), } }, @@ -1291,7 +1335,7 @@ impl Tokenizer { states::BogusComment => loop { match get_char!(self, input) { '>' => go!(self: emit_comment; to Data), - '\0' => go!(self: push_comment '\u{fffd}'), + '\0' => go!(self: error; push_comment '\u{fffd}'), c => go!(self: push_comment c), } }, @@ -1311,7 +1355,7 @@ impl Tokenizer { go!(self: clear_temp; to CdataSection); } } - go!(self: error; to BogusComment); + go!(self: error; clear_comment; to BogusComment); } }, @@ -1455,13 +1499,14 @@ impl Tokenizer { states::BeforeAttributeName | states::AttributeName | states::AfterAttributeName | - states::BeforeAttributeValue | states::AttributeValue(_) | states::AfterAttributeValueQuoted | states::SelfClosingStartTag | states::ScriptDataEscapedDash(_) | states::ScriptDataEscapedDashDash(_) => go!(self: error_eof; to Data), + states::BeforeAttributeValue => go!(self: reconsume AttributeValue Unquoted), + states::TagOpen => go!(self: error_eof; emit '<'; to Data), states::EndTagOpen => go!(self: error_eof; emit '<'; emit '/'; to Data), @@ -1493,6 +1538,12 @@ impl Tokenizer { states::CommentEnd | states::CommentEndBang => go!(self: error_eof; emit_comment; to Data), + states::CommentLessThanSign | states::CommentLessThanSignBang => go!(self: reconsume Comment), + + states::CommentLessThanSignBangDash => go!(self: reconsume CommentEndDash), + + states::CommentLessThanSignBangDashDash => go!(self: reconsume CommentEnd), + states::Doctype | states::BeforeDoctypeName => { go!(self: error_eof; create_doctype; force_quirks; emit_doctype; to Data) }, diff --git a/html5ever/src/tokenizer/states.rs b/html5ever/src/tokenizer/states.rs index d455e9a8..3c320188 100644 --- a/html5ever/src/tokenizer/states.rs +++ b/html5ever/src/tokenizer/states.rs @@ -73,6 +73,10 @@ pub enum State { CommentStart, CommentStartDash, Comment, + CommentLessThanSign, + CommentLessThanSignBang, + CommentLessThanSignBangDash, + CommentLessThanSignBangDashDash, CommentEndDash, CommentEnd, CommentEndBang, diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index 39bd55c1..98b209fb 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -1630,7 +1630,6 @@ where local_name!("xlink:show") => Some(qualname!("xlink" xlink "show")), local_name!("xlink:title") => Some(qualname!("xlink" xlink "title")), local_name!("xlink:type") => Some(qualname!("xlink" xlink "type")), - local_name!("xml:base") => Some(qualname!("xml" xml "base")), local_name!("xml:lang") => Some(qualname!("xml" xml "lang")), local_name!("xml:space") => Some(qualname!("xml" xml "space")), local_name!("xmlns") => Some(qualname!("" xmlns "xmlns")), @@ -1662,18 +1661,13 @@ where fn unexpected_start_tag_in_foreign_content(&mut self, tag: Tag) -> ProcessResult { self.unexpected(&tag); - if self.is_fragment() { - self.foreign_start_tag(tag) - } else { + while !self.current_node_in(|n| { + *n.ns == ns!(html) || + mathml_text_integration_point(n) || + svg_html_integration_point(n) + }) { self.pop(); - while !self.current_node_in(|n| { - *n.ns == ns!(html) || - mathml_text_integration_point(n) || - svg_html_integration_point(n) - }) { - self.pop(); - } - ReprocessForeign(TagToken(tag)) } + self.step(self.mode, TagToken(tag)) } } diff --git a/html5ever/src/tree_builder/rules.rs b/html5ever/src/tree_builder/rules.rs index d9a4ba1f..521ce1cc 100644 --- a/html5ever/src/tree_builder/rules.rs +++ b/html5ever/src/tree_builder/rules.rs @@ -337,7 +337,7 @@ where tag @
- => { + => { if !self.in_scope_named(default_scope, tag.name.clone()) { self.unexpected(&tag); } else { @@ -1115,6 +1115,18 @@ where Done } + tag @
=> { + if self.current_node_named(local_name!("option")) { + self.pop(); + } + if self.current_node_named(local_name!("optgroup")) { + self.pop(); + } + self.insert_element_for(tag); + self.pop(); + DoneAckSelfClosing + } + => { if self.open_elems.len() >= 2 && self.current_node_named(local_name!("option")) @@ -1388,7 +1400,7 @@ where


    1.  
                              
      -                 
        => self.unexpected_start_tag_in_foreign_content(tag), +

          => self.unexpected_start_tag_in_foreign_content(tag), tag @ => { let unexpected = tag.attrs.iter().any(|attr| { diff --git a/markup5ever/local_names.txt b/markup5ever/local_names.txt index fdd57f82..47c635c8 100644 --- a/markup5ever/local_names.txt +++ b/markup5ever/local_names.txt @@ -810,6 +810,7 @@ scrolldelay scrolling sdev seamless +search sec sech section diff --git a/rcdom/custom-html5lib-tokenizer-tests/regression.test b/rcdom/custom-html5lib-tokenizer-tests/regression.test new file mode 100644 index 00000000..1a7c5e6e --- /dev/null +++ b/rcdom/custom-html5lib-tokenizer-tests/regression.test @@ -0,0 +1,44 @@ +{"tests": [ + +{"description": "Nested HTML comment", +"input": "", +"output": [ + ["StartTag", "j", {"0": ""}] +], +"errors": [ + {"code": "missing-attribute-value"} +]}, + +{"description": "Windows newline in docstring", +"input": "", +"output": [], +"errors": [ + {"code": "eof-in-tag"} +]}, + +{"description": "Windows newline between unquoted attributes", +"input": "", +"output": [], +"errors": [ + {"code": "missing-semicolon-after-character-reference"}, + {"code": "eof-in-tag"} +]}, + +{"description": "Windows newline after bogusname", +"input": "&0\r\n", +"output": [["Character", "&0\n"]], +"errors": []} + +]} diff --git a/rcdom/html5lib-tests b/rcdom/html5lib-tests index c75a9f56..c67f90ea 160000 --- a/rcdom/html5lib-tests +++ b/rcdom/html5lib-tests @@ -1 +1 @@ -Subproject commit c75a9f566fb18aa9746ca45769763cbaf1430ef1 +Subproject commit c67f90eacac14e022b1f2c2e5ac559879581e9ff diff --git a/rcdom/tests/foreach_html5lib_test/mod.rs b/rcdom/tests/foreach_html5lib_test/mod.rs index 6138c98c..f996c28b 100644 --- a/rcdom/tests/foreach_html5lib_test/mod.rs +++ b/rcdom/tests/foreach_html5lib_test/mod.rs @@ -21,7 +21,6 @@ pub fn foreach_html5lib_test( Mk: FnMut(&Path, fs::File), { let mut test_dir_path = src_dir.to_path_buf(); - test_dir_path.push("html5lib-tests"); test_dir_path.push(subdir); let maybe_test_files = fs::read_dir(&test_dir_path); diff --git a/rcdom/tests/html-tokenizer.rs b/rcdom/tests/html-tokenizer.rs index 78b7ca09..520a8301 100644 --- a/rcdom/tests/html-tokenizer.rs +++ b/rcdom/tests/html-tokenizer.rs @@ -11,7 +11,7 @@ mod foreach_html5lib_test; use foreach_html5lib_test::foreach_html5lib_test; use html5ever::tendril::*; -use html5ever::tokenizer::states::{Plaintext, RawData, Rawtext, Rcdata}; +use html5ever::tokenizer::states::{Plaintext, RawData, Rawtext, Rcdata, ScriptData, CdataSection, Data}; use html5ever::tokenizer::BufferQueue; use html5ever::tokenizer::{CharacterTokens, EOFToken, NullCharacterToken, ParseError}; use html5ever::tokenizer::{CommentToken, DoctypeToken, TagToken, Token}; @@ -20,14 +20,29 @@ use html5ever::tokenizer::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts} use html5ever::{namespace_url, ns, Attribute, LocalName, QualName}; use rustc_test::{DynTestFn, DynTestName, TestDesc, TestDescAndFn}; use serde_json::{Map, Value}; -use std::borrow::Cow::Borrowed; +use std::borrow::Cow; use std::default::Default; use std::ffi::OsStr; use std::io::Read; +use std::fs::File; use std::mem::replace; use std::path::Path; use std::{char, env}; + +#[derive(Debug)] +struct TestError(Cow<'static, str>); + +impl PartialEq for TestError { + fn eq(&self, _: &TestError) -> bool { + // TODO: actually match exact error messages + true + } +} + +// some large testcases hang forever without an upper-bound of splits to generate +const MAX_SPLITS: usize = 1000; + // Return all ways of splitting the string into at most n // possibly-empty pieces. fn splits(s: &str, n: usize) -> Vec> { @@ -35,12 +50,8 @@ fn splits(s: &str, n: usize) -> Vec> { return vec![vec![s.to_tendril()]]; } - let mut points: Vec = s.char_indices().map(|(n, _)| n).collect(); - points.push(s.len()); - - // do this with iterators? let mut out = vec![]; - for p in points.into_iter() { + for p in s.char_indices().map(|(n, _)| n).chain(Some(s.len())) { let y = &s[p..]; for mut x in splits(&s[..p], n - 1).into_iter() { x.push(y.to_tendril()); @@ -49,11 +60,13 @@ fn splits(s: &str, n: usize) -> Vec> { } out.extend(splits(s, n - 1).into_iter()); + out.truncate(MAX_SPLITS); out } struct TokenLogger { tokens: Vec, + errors: Vec, current_str: StrTendril, exact_errors: bool, } @@ -62,6 +75,7 @@ impl TokenLogger { fn new(exact_errors: bool) -> TokenLogger { TokenLogger { tokens: vec![], + errors: vec![], current_str: StrTendril::new(), exact_errors: exact_errors, } @@ -80,9 +94,9 @@ impl TokenLogger { } } - fn get_tokens(mut self) -> Vec { + fn get_tokens(mut self) -> (Vec, Vec){ self.finish_str(); - self.tokens + (self.tokens, self.errors) } } @@ -99,9 +113,9 @@ impl TokenSink for TokenLogger { self.current_str.push_char('\0'); }, - ParseError(_) => { + ParseError(e) => { if self.exact_errors { - self.push(ParseError(Borrowed(""))); + self.errors.push(TestError(e)); } }, @@ -127,7 +141,7 @@ impl TokenSink for TokenLogger { } } -fn tokenize(input: Vec, opts: TokenizerOpts) -> Vec { +fn tokenize(input: Vec, opts: TokenizerOpts) -> (Vec, Vec) { let sink = TokenLogger::new(opts.exact_errors); let mut tok = Tokenizer::new(sink, opts); let mut buffer = BufferQueue::new(); @@ -247,21 +261,24 @@ fn json_to_token(js: &Value) -> Token { } // Parse the "output" field of the test case into a vector of tokens. -fn json_to_tokens(js: &Value, exact_errors: bool) -> Vec { +fn json_to_tokens(js_tokens: &Value, js_errors: &[Value], exact_errors: bool) -> (Vec, Vec) { // Use a TokenLogger so that we combine character tokens separated // by an ignored error. let mut sink = TokenLogger::new(exact_errors); - for tok in js.get_list().iter() { + for tok in js_tokens.get_list().iter() { assert_eq!( - match *tok { - Value::String(ref s) if &s[..] == "ParseError" => { - sink.process_token(ParseError(Borrowed("")), 0) - }, - _ => sink.process_token(json_to_token(tok), 0), - }, + sink.process_token(json_to_token(tok), 0), + TokenSinkResult::Continue + ); + } + + for err in js_errors { + assert_eq!( + sink.process_token(ParseError(err.find("code").get_str().into()), 0), TokenSinkResult::Continue ); } + sink.get_tokens() } @@ -276,7 +293,7 @@ fn unescape(s: &str) -> Option { if it.peek() != Some(&'u') { panic!("can't understand escape"); } - drop(it.next()); + let _ = it.next(); let hex: String = it.by_ref().take(4).collect(); match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) { // Some of the tests use lone surrogates, but we have no @@ -309,7 +326,7 @@ fn unescape_json(js: &Value) -> Value { } } -fn mk_test(desc: String, input: String, expect: Value, opts: TokenizerOpts) -> TestDescAndFn { +fn mk_test(desc: String, input: String, expect: Value, expect_errors: Vec, opts: TokenizerOpts) -> TestDescAndFn { TestDescAndFn { desc: TestDesc::new(DynTestName(desc)), testfn: DynTestFn(Box::new(move || { @@ -321,11 +338,11 @@ fn mk_test(desc: String, input: String, expect: Value, opts: TokenizerOpts) -> T // result but the compiler doesn't catch it! // Possibly mozilla/rust#12223. let output = tokenize(input.clone(), opts.clone()); - let expect_toks = json_to_tokens(&expect, opts.exact_errors); + let expect_toks = json_to_tokens(&expect, &expect_errors, opts.exact_errors); if output != expect_toks { panic!( "\ninput: {:?}\ngot: {:?}\nexpected: {:?}", - input, output, expect + input, output, expect_toks ); } } @@ -337,6 +354,7 @@ fn mk_tests(tests: &mut Vec, filename: &str, js: &Value) { let obj = js.get_obj(); let mut input = js.find("input").get_str(); let mut expect = js.find("output").clone(); + let expect_errors = js.get("errors").map(JsonExt::get_list).map(Vec::as_slice).unwrap_or_default(); let desc = format!("tok: {}: {}", filename, js.find("description").get_str()); // "Double-escaped" tests require additional processing of @@ -364,6 +382,9 @@ fn mk_tests(tests: &mut Vec, filename: &str, js: &Value) { "PLAINTEXT state" => Plaintext, "RAWTEXT state" => RawData(Rawtext), "RCDATA state" => RawData(Rcdata), + "Script data state" => RawData(ScriptData), + "CDATA section state" => CdataSection, + "Data state" => Data, s => panic!("don't know state {}", s), }) }) @@ -388,6 +409,7 @@ fn mk_tests(tests: &mut Vec, filename: &str, js: &Value) { newdesc, input.clone(), expect.clone(), + expect_errors.to_owned(), TokenizerOpts { exact_errors: exact_errors, initial_state: state, @@ -407,32 +429,41 @@ fn mk_tests(tests: &mut Vec, filename: &str, js: &Value) { fn tests(src_dir: &Path) -> Vec { let mut tests = vec![]; + let mut add_test = |path: &Path, mut file: File| { + let mut s = String::new(); + file.read_to_string(&mut s) + .ok() + .expect("file reading error"); + let js: Value = serde_json::from_str(&s).ok().expect("json parse error"); + + match js.get_obj().get(&"tests".to_string()) { + Some(&Value::Array(ref lst)) => { + for test in lst.iter() { + mk_tests( + &mut tests, + path.file_name().unwrap().to_str().unwrap(), + test, + ) + } + }, + + // xmlViolation.test doesn't follow this format. + _ => (), + } + }; + foreach_html5lib_test( src_dir, - "tokenizer", + "html5lib-tests/tokenizer", OsStr::new("test"), - |path, mut file| { - let mut s = String::new(); - file.read_to_string(&mut s) - .ok() - .expect("file reading error"); - let js: Value = serde_json::from_str(&s).ok().expect("json parse error"); - - match js.get_obj().get(&"tests".to_string()) { - Some(&Value::Array(ref lst)) => { - for test in lst.iter() { - mk_tests( - &mut tests, - path.file_name().unwrap().to_str().unwrap(), - test, - ); - } - }, + &mut add_test + ); - // xmlViolation.test doesn't follow this format. - _ => (), - } - }, + foreach_html5lib_test( + src_dir, + "custom-html5lib-tokenizer-tests", + OsStr::new("test"), + &mut add_test ); tests diff --git a/rcdom/tests/html-tree-builder.rs b/rcdom/tests/html-tree-builder.rs index 9d882484..e82116f4 100644 --- a/rcdom/tests/html-tree-builder.rs +++ b/rcdom/tests/html-tree-builder.rs @@ -266,7 +266,7 @@ fn tests(src_dir: &Path, ignores: &HashSet) -> Vec { foreach_html5lib_test( src_dir, - "tree-construction", + "html5lib-tests/tree-construction", OsStr::new("dat"), |path, file| { let buf = io::BufReader::new(file); From ba40f5569c85591efa88006e281c85b93fcdc068 Mon Sep 17 00:00:00 2001 From: Raimundo Saona <37874270+saona-raimundo@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:24:46 +0200 Subject: [PATCH 13/25] Update mod.rs (#502) Complete Default documentation of `SerializeOpts`. --- html5ever/src/serialize/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/html5ever/src/serialize/mod.rs b/html5ever/src/serialize/mod.rs index 3a57b477..8c0a65c3 100644 --- a/html5ever/src/serialize/mod.rs +++ b/html5ever/src/serialize/mod.rs @@ -26,7 +26,7 @@ where #[derive(Clone)] pub struct SerializeOpts { - /// Is scripting enabled? + /// Is scripting enabled? Default: true pub scripting_enabled: bool, /// Serialize the root node? Default: ChildrenOnly From 16b5127f6cce2182409a8384a38af8dc64532d23 Mon Sep 17 00:00:00 2001 From: Al Hoang <13622+hoanga@users.noreply.github.com> Date: Fri, 11 Aug 2023 04:33:22 -0500 Subject: [PATCH 14/25] compile fix for haiku (#474) Co-authored-by: Al Hoang <3811822-hoanga@users.noreply.gitlab.com> --- html5ever/build.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/html5ever/build.rs b/html5ever/build.rs index bfac7714..327c707e 100644 --- a/html5ever/build.rs +++ b/html5ever/build.rs @@ -21,9 +21,15 @@ fn main() { let output = Path::new(&env::var("OUT_DIR").unwrap()).join("rules.rs"); println!("cargo:rerun-if-changed={}", input.display()); + #[cfg(target_os = "haiku")] + let stack_size = 16; + + #[cfg(not(target_os = "haiku"))] + let stack_size = 128; + // We have stack overflows on Servo's CI. let handle = Builder::new() - .stack_size(128 * 1024 * 1024) + .stack_size(stack_size * 1024 * 1024) .spawn(move || { match_token::expand(&input, &output); }) From ef7c99549045d7c7a451d6e619d26e742167e1aa Mon Sep 17 00:00:00 2001 From: Alex Touchet Date: Fri, 11 Aug 2023 02:34:10 -0700 Subject: [PATCH 15/25] Switch badge to GitHub Actions and update Readme (#480) --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 813f1b11..c78b18ed 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # html5ever -[![Build Status](https://travis-ci.com/servo/html5ever.svg?branch=master)](https://travis-ci.com/servo/html5ever) +[![Build Status](https://github.com/servo/html5ever/actions/workflows/main.yml/badge.svg)](https://github.com/servo/html5ever/actions) [![crates.io](https://img.shields.io/crates/v/html5ever.svg)](https://crates.io/crates/html5ever) [API Documentation][API documentation] @@ -11,7 +11,7 @@ It can parse and serialize HTML according to the [WHATWG](https://whatwg.org/) s Note that the HTML syntax is very similar to XML. For correct parsing of XHTML, use an XML parser (That said, many XHTML documents in the wild are serialized in an HTML-compatible form). -html5ever is written in [Rust][], therefore it avoids the notorious security problems that come along with using C. Being built with Rust also makes the library come with the high-grade performance you would expect from an html parser written in C. html5ever is basically a C html parser, but without needing a garbage collector or other heavy runtime processes. +html5ever is written in [Rust][], therefore it avoids the notorious security problems that come along with using C. Being built with Rust also makes the library come with the high-grade performance you would expect from an HTML parser written in C. html5ever is basically a C HTML parser, but without needing a garbage collector or other heavy runtime processes. ## Getting started in Rust @@ -20,7 +20,7 @@ Add html5ever as a dependency in your [`Cargo.toml`](https://crates.io/) file: ```toml [dependencies] -html5ever = "*" +html5ever = "0.26" ``` You should also take a look at [`examples/html2html.rs`], [`examples/print-rcdom.rs`], and the [API documentation][]. From aa11b3ba955010a9f783d81f4fa983dfcafe7f57 Mon Sep 17 00:00:00 2001 From: AcqRel <114918119+AcqRel@users.noreply.github.com> Date: Sat, 26 Aug 2023 16:10:34 +0200 Subject: [PATCH 16/25] Fix parsing of bogus comments after end tags (#507) --- html5ever/src/tokenizer/mod.rs | 4 +-- .../regression.test | 27 ++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index 0fb0d014..208c7055 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -862,8 +862,8 @@ impl Tokenizer { let c = get_char!(self, input); if self.have_appropriate_end_tag() { match c { - '\t' | '\n' | '\x0C' | ' ' => go!(self: to BeforeAttributeName), - '/' => go!(self: to SelfClosingStartTag), + '\t' | '\n' | '\x0C' | ' ' => go!(self: clear_temp; to BeforeAttributeName), + '/' => go!(self: clear_temp; to SelfClosingStartTag), '>' => go!(self: clear_temp; emit_tag Data), _ => (), } diff --git a/rcdom/custom-html5lib-tokenizer-tests/regression.test b/rcdom/custom-html5lib-tokenizer-tests/regression.test index 1a7c5e6e..deafda51 100644 --- a/rcdom/custom-html5lib-tokenizer-tests/regression.test +++ b/rcdom/custom-html5lib-tokenizer-tests/regression.test @@ -39,6 +39,31 @@ {"description": "Windows newline after bogusname", "input": "&0\r\n", "output": [["Character", "&0\n"]], -"errors": []} +"errors": []}, + +{"description": "Bogus comment after end tag with space", +"initialStates": ["Data state", "RCDATA state", "RAWTEXT state", "Script data state"], +"lastStartTag": "style", +"input": "", +"output": [ + ["EndTag", "style"], + ["Comment", "a"] +], +"errors": [ + {"code": "incorrectly-opened-comment"} +]}, + +{"description": "Bogus comment after end tag with solidus", +"initialStates": ["Data state", "RCDATA state", "RAWTEXT state", "Script data state"], +"lastStartTag": "style", +"input": "", +"output": [ + ["EndTag", "style"], + ["Comment", "a"] +], +"errors": [ + {"code": "unexpected-solidus-in-tag"}, + {"code": "incorrectly-opened-comment"} +]} ]} From ecbd26a8e8f133e8432053ac94af2415a25941dd Mon Sep 17 00:00:00 2001 From: Or Gany Date: Mon, 28 Aug 2023 00:38:18 +0200 Subject: [PATCH 17/25] Remove redundant FIXME comment (#508) * Adding hashset usage * Revert mod.rs * Remove redundant comment about linear search time --------- Co-authored-by: Or Gany --- html5ever/src/tokenizer/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index 208c7055..eb5c62d9 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -508,7 +508,6 @@ impl Tokenizer { // Check for a duplicate attribute. // FIXME: the spec says we should error as soon as the name is finished. - // FIXME: linear time search, do we care? let dup = { let name = &*self.current_attr_name; self.current_tag_attrs From 9ef751bcd94dc3b348cbacc506091aeee2917614 Mon Sep 17 00:00:00 2001 From: Jonas Platte Date: Thu, 31 Aug 2023 11:25:56 +0200 Subject: [PATCH 18/25] Upgrade syn to 2.0 (#499) --- html5ever/Cargo.toml | 2 +- html5ever/macros/match_token.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/html5ever/Cargo.toml b/html5ever/Cargo.toml index 39e79992..8dfd194a 100644 --- a/html5ever/Cargo.toml +++ b/html5ever/Cargo.toml @@ -24,7 +24,7 @@ criterion = "0.3" [build-dependencies] quote = "1" -syn = { version = "1", features = ["extra-traits", "full", "fold"] } +syn = { version = "2", features = ["extra-traits", "full", "fold"] } proc-macro2 = "1" [[bench]] diff --git a/html5ever/macros/match_token.rs b/html5ever/macros/match_token.rs index 9fbed082..b4bff19f 100644 --- a/html5ever/macros/match_token.rs +++ b/html5ever/macros/match_token.rs @@ -197,7 +197,7 @@ impl Parse for LHS { } Ok(LHS::Tags(tags)) } else { - let p: syn::Pat = input.parse()?; + let p = input.call(syn::Pat::parse_single)?; Ok(LHS::Pattern(p)) } } @@ -423,7 +423,7 @@ impl Fold for MatchTokenParser { if mac.path == parse_quote!(match_token) { return syn::fold::fold_stmt( self, - syn::Stmt::Expr(expand_match_token(&mac.tokens)), + syn::Stmt::Expr(expand_match_token(&mac.tokens), None), ); } }, From c04493014b2059f710a81e7c4080d6cea82da813 Mon Sep 17 00:00:00 2001 From: Rin Arakaki Date: Wed, 6 Sep 2023 10:24:36 +0900 Subject: [PATCH 19/25] Upgrade to rust edition 2021 (#509) * Update Cargo.toml * Update Cargo.toml * Update Cargo.toml * Update Cargo.toml * Update Cargo.toml * Update mod.rs --- Cargo.toml | 1 + html5ever/Cargo.toml | 2 +- html5ever/src/tree_builder/mod.rs | 2 +- markup5ever/Cargo.toml | 2 +- rcdom/Cargo.toml | 2 +- xml5ever/Cargo.toml | 2 +- 6 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index fa54a86f..c857b21f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,3 +5,4 @@ members = [ "rcdom", "xml5ever" ] +resolver = "2" diff --git a/html5ever/Cargo.toml b/html5ever/Cargo.toml index 8dfd194a..706d84b2 100644 --- a/html5ever/Cargo.toml +++ b/html5ever/Cargo.toml @@ -9,7 +9,7 @@ description = "High-performance browser-grade HTML5 parser" documentation = "https://docs.rs/html5ever" build = "build.rs" categories = [ "parser-implementations", "web-programming" ] -edition = "2018" +edition = "2021" [dependencies] log = "0.4" diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index 98b209fb..458e274b 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -261,7 +261,7 @@ where /// Call the `Tracer`'s `trace_handle` method on every `Handle` in the tree builder's /// internal state. This is intended to support garbage-collected DOMs. - pub fn trace_handles(&self, tracer: &Tracer) { + pub fn trace_handles(&self, tracer: &dyn Tracer) { tracer.trace_handle(&self.doc_handle); for e in &self.open_elems { tracer.trace_handle(e); diff --git a/markup5ever/Cargo.toml b/markup5ever/Cargo.toml index 68c2f3de..f3ac3cc6 100644 --- a/markup5ever/Cargo.toml +++ b/markup5ever/Cargo.toml @@ -8,7 +8,7 @@ description = "Common code for xml5ever and html5ever" documentation = "https://docs.rs/markup5ever" build = "build.rs" categories = [ "parser-implementations", "web-programming" ] -edition = "2018" +edition = "2021" [lib] path = "lib.rs" diff --git a/rcdom/Cargo.toml b/rcdom/Cargo.toml index 309cbd81..ad15abe6 100644 --- a/rcdom/Cargo.toml +++ b/rcdom/Cargo.toml @@ -8,7 +8,7 @@ description = "Basic, unsupported DOM structure for use by tests in html5ever/xm readme = "README.md" documentation = "https://docs.rs/markup5ever_rcdom" categories = [ "parser-implementations", "web-programming" ] -edition = "2018" +edition = "2021" publish = false [lib] diff --git a/xml5ever/Cargo.toml b/xml5ever/Cargo.toml index a7a6f199..5517956c 100644 --- a/xml5ever/Cargo.toml +++ b/xml5ever/Cargo.toml @@ -13,7 +13,7 @@ readme = "README.md" keywords = ["xml", "xml5", "parser", "parsing"] exclude = ["xml5lib-tests/*"] categories = [ "parser-implementations", "web-programming" ] -edition = "2018" +edition = "2021" [dependencies] log = "0.4" From 16443ce636ee5ff22b3979bdb58f0c14488c4907 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Wed, 20 Mar 2024 23:04:31 +0100 Subject: [PATCH 20/25] html5ever: prepare 0.26.1 release (#516) * Warn on unreachable_pub to make it more obvious if API is private * Only check library dependencies against MSRV * Bump crate versions Need semver-incompatible bump after a phf bump in markup5ever. --- .github/workflows/main.yml | 15 ++++++++++++++- html5ever/Cargo.toml | 4 ++-- html5ever/src/lib.rs | 3 ++- html5ever/src/tokenizer/char_ref/mod.rs | 20 ++++++++++---------- html5ever/src/util/str.rs | 4 ++-- markup5ever/Cargo.toml | 2 +- rcdom/Cargo.toml | 8 ++++---- xml5ever/Cargo.toml | 4 ++-- 8 files changed, 37 insertions(+), 23 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 791f5f0a..ba4f43ce 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - version: [1.60.0, stable, beta, nightly] + version: [stable, beta, nightly] steps: - uses: actions/checkout@v3 @@ -44,6 +44,19 @@ jobs: if: matrix.version == 'nightly' run: cargo doc + msrv: + name: MSRV + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install stable toolchain + run: | + rustup set profile minimal + rustup override set 1.60.0 + + - run: cargo check --lib --all-features + build_result: name: Result runs-on: ubuntu-latest diff --git a/html5ever/Cargo.toml b/html5ever/Cargo.toml index 706d84b2..6e608a67 100644 --- a/html5ever/Cargo.toml +++ b/html5ever/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "html5ever" -version = "0.26.0" +version = "0.27.0" authors = [ "The html5ever Project Developers" ] license = "MIT OR Apache-2.0" repository = "https://github.com/servo/html5ever" @@ -14,7 +14,7 @@ edition = "2021" [dependencies] log = "0.4" mac = "0.1" -markup5ever = { version = "0.11", path = "../markup5ever" } +markup5ever = { version = "0.12", path = "../markup5ever" } [dev-dependencies] typed-arena = "2.0.2" diff --git a/html5ever/src/lib.rs b/html5ever/src/lib.rs index 65fadaa9..e1415f60 100644 --- a/html5ever/src/lib.rs +++ b/html5ever/src/lib.rs @@ -11,6 +11,7 @@ #![crate_type = "dylib"] #![cfg_attr(test, deny(warnings))] #![allow(unused_parens)] +#![warn(unreachable_pub)] pub use driver::{parse_document, parse_fragment, ParseOpts, Parser}; pub use markup5ever::*; @@ -21,7 +22,7 @@ pub use serialize::serialize; mod macros; mod util { - pub mod str; + pub(crate) mod str; } pub mod driver; diff --git a/html5ever/src/tokenizer/char_ref/mod.rs b/html5ever/src/tokenizer/char_ref/mod.rs index 63ac4b52..6c5b63b9 100644 --- a/html5ever/src/tokenizer/char_ref/mod.rs +++ b/html5ever/src/tokenizer/char_ref/mod.rs @@ -18,18 +18,18 @@ use std::borrow::Cow::Borrowed; use std::char::from_u32; use self::State::*; -pub use self::Status::*; +pub(super) use self::Status::*; //§ tokenizing-character-references -pub struct CharRef { +pub(super) struct CharRef { /// The resulting character(s) - pub chars: [char; 2], + pub(super) chars: [char; 2], /// How many slots in `chars` are valid? - pub num_chars: u8, + pub(super) num_chars: u8, } -pub enum Status { +pub(super) enum Status { Stuck, Progress, Done, @@ -45,7 +45,7 @@ enum State { BogusName, } -pub struct CharRefTokenizer { +pub(super) struct CharRefTokenizer { state: State, result: Option, is_consumed_in_attribute: bool, @@ -61,7 +61,7 @@ pub struct CharRefTokenizer { } impl CharRefTokenizer { - pub fn new(is_consumed_in_attribute: bool) -> CharRefTokenizer { + pub(super) fn new(is_consumed_in_attribute: bool) -> CharRefTokenizer { CharRefTokenizer { is_consumed_in_attribute, state: Begin, @@ -78,7 +78,7 @@ impl CharRefTokenizer { // A CharRefTokenizer can only tokenize one character reference, // so this method consumes the tokenizer. - pub fn get_result(self) -> CharRef { + pub(super) fn get_result(self) -> CharRef { self.result.expect("get_result called before done") } @@ -112,7 +112,7 @@ impl CharRefTokenizer { } impl CharRefTokenizer { - pub fn step( + pub(super) fn step( &mut self, tokenizer: &mut Tokenizer, input: &mut BufferQueue, @@ -411,7 +411,7 @@ impl CharRefTokenizer { self.finish_none() } - pub fn end_of_file( + pub(super) fn end_of_file( &mut self, tokenizer: &mut Tokenizer, input: &mut BufferQueue, diff --git a/html5ever/src/util/str.rs b/html5ever/src/util/str.rs index 756a88d2..4520ecc4 100644 --- a/html5ever/src/util/str.rs +++ b/html5ever/src/util/str.rs @@ -9,7 +9,7 @@ use std::fmt; -pub fn to_escaped_string(x: &T) -> String { +pub(crate) fn to_escaped_string(x: &T) -> String { // FIXME: don't allocate twice let string = format!("{:?}", x); string.chars().flat_map(|c| c.escape_default()).collect() @@ -17,7 +17,7 @@ pub fn to_escaped_string(x: &T) -> String { /// If `c` is an ASCII letter, return the corresponding lowercase /// letter, otherwise None. -pub fn lower_ascii_letter(c: char) -> Option { +pub(crate) fn lower_ascii_letter(c: char) -> Option { match c { 'a'..='z' => Some(c), 'A'..='Z' => Some((c as u8 - b'A' + b'a') as char), diff --git a/markup5ever/Cargo.toml b/markup5ever/Cargo.toml index f3ac3cc6..3bd9da53 100644 --- a/markup5ever/Cargo.toml +++ b/markup5ever/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markup5ever" -version = "0.11.0" +version = "0.12.0" authors = [ "The html5ever Project Developers" ] license = "MIT OR Apache-2.0" repository = "https://github.com/servo/html5ever" diff --git a/rcdom/Cargo.toml b/rcdom/Cargo.toml index ad15abe6..2c42d7b7 100644 --- a/rcdom/Cargo.toml +++ b/rcdom/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "markup5ever_rcdom" -version = "0.2.0" +version = "0.3.0" authors = [ "The html5ever Project Developers" ] license = "MIT OR Apache-2.0" repository = "https://github.com/servo/html5ever" @@ -16,9 +16,9 @@ path = "lib.rs" [dependencies] tendril = "0.4" -html5ever = { version = "0.26", path = "../html5ever" } -markup5ever = { version = "0.11", path = "../markup5ever" } -xml5ever = { version = "0.17", path = "../xml5ever" } +html5ever = { version = "0.27", path = "../html5ever" } +markup5ever = { version = "0.12", path = "../markup5ever" } +xml5ever = { version = "0.18", path = "../xml5ever" } [dev-dependencies] serde_json = "1.0" diff --git a/xml5ever/Cargo.toml b/xml5ever/Cargo.toml index 5517956c..dcbbacaa 100644 --- a/xml5ever/Cargo.toml +++ b/xml5ever/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "xml5ever" -version = "0.17.0" +version = "0.18.0" authors = ["The xml5ever project developers"] license = "MIT OR Apache-2.0" repository = "https://github.com/servo/html5ever" @@ -18,7 +18,7 @@ edition = "2021" [dependencies] log = "0.4" mac = "0.1" -markup5ever = {version = "0.11", path = "../markup5ever" } +markup5ever = {version = "0.12", path = "../markup5ever" } [dev-dependencies] rustc-test = "0.3" From 65635117e6e2ac41a46ebc3508af392823d51889 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Wed, 20 Mar 2024 23:58:09 +0100 Subject: [PATCH 21/25] Fix incorrect Actions workflow expression (#518) --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ba4f43ce..4f2f1a6f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -69,4 +69,4 @@ jobs: if: success() - name: Mark the job as unsuccessful run: exit 1 - if: "!success()" + if: ${{ !success() }} From 69eed50ff576b8b8d380ee5c3742dd12fb121b07 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Thu, 21 Mar 2024 00:00:46 +0100 Subject: [PATCH 22/25] Avoid warnings for duplicate imports (#517) --- html5ever/src/tokenizer/mod.rs | 1 - xml5ever/examples/simple_xml_tokenizer.rs | 1 - xml5ever/examples/xml_tokenizer.rs | 1 - xml5ever/src/tree_builder/mod.rs | 1 - 4 files changed, 4 deletions(-) diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index eb5c62d9..54d11855 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -28,7 +28,6 @@ use mac::format_if; use markup5ever::{namespace_url, ns, small_char_set}; use std::borrow::Cow::{self, Borrowed}; use std::collections::BTreeMap; -use std::default::Default; use std::mem::replace; pub use crate::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult}; diff --git a/xml5ever/examples/simple_xml_tokenizer.rs b/xml5ever/examples/simple_xml_tokenizer.rs index de74432e..35dc8a38 100644 --- a/xml5ever/examples/simple_xml_tokenizer.rs +++ b/xml5ever/examples/simple_xml_tokenizer.rs @@ -12,7 +12,6 @@ extern crate markup5ever; extern crate xml5ever; -use std::default::Default; use std::io; use markup5ever::buffer_queue::BufferQueue; diff --git a/xml5ever/examples/xml_tokenizer.rs b/xml5ever/examples/xml_tokenizer.rs index fc3cbeff..46d683e3 100644 --- a/xml5ever/examples/xml_tokenizer.rs +++ b/xml5ever/examples/xml_tokenizer.rs @@ -12,7 +12,6 @@ extern crate markup5ever; extern crate xml5ever; -use std::default::Default; use std::io; use markup5ever::buffer_queue::BufferQueue; diff --git a/xml5ever/src/tree_builder/mod.rs b/xml5ever/src/tree_builder/mod.rs index a83e7dc9..4c10f8e2 100644 --- a/xml5ever/src/tree_builder/mod.rs +++ b/xml5ever/src/tree_builder/mod.rs @@ -18,7 +18,6 @@ use std::collections::btree_map::Iter; use std::collections::{BTreeMap, HashSet, VecDeque}; use std::fmt::{Debug, Error, Formatter}; use std::mem; -use std::result::Result; pub use self::interface::{NextParserState, NodeOrText, Tracer, TreeSink}; use self::types::*; From 124821ea0355075e465af6355712fc3a105c4f30 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Thu, 21 Mar 2024 12:02:25 +0100 Subject: [PATCH 23/25] Remove more unnecessary imports (#521) --- html5ever/examples/noop-tokenize.rs | 1 - html5ever/examples/noop-tree-builder.rs | 1 - html5ever/examples/print-tree-actions.rs | 1 - html5ever/examples/tokenize.rs | 1 - html5ever/src/serialize/mod.rs | 1 - html5ever/src/tree_builder/mod.rs | 1 - rcdom/examples/hello_xml.rs | 2 -- rcdom/examples/html2html.rs | 1 - rcdom/examples/print-rcdom.rs | 1 - rcdom/examples/xml_tree_printer.rs | 2 -- rcdom/lib.rs | 1 - rcdom/tests/html-tokenizer.rs | 1 - rcdom/tests/html-tree-builder.rs | 1 - 13 files changed, 15 deletions(-) diff --git a/html5ever/examples/noop-tokenize.rs b/html5ever/examples/noop-tokenize.rs index d6c62f1d..4aa94497 100644 --- a/html5ever/examples/noop-tokenize.rs +++ b/html5ever/examples/noop-tokenize.rs @@ -11,7 +11,6 @@ extern crate html5ever; -use std::default::Default; use std::io; use html5ever::tendril::*; diff --git a/html5ever/examples/noop-tree-builder.rs b/html5ever/examples/noop-tree-builder.rs index 07754498..27f4fa3c 100644 --- a/html5ever/examples/noop-tree-builder.rs +++ b/html5ever/examples/noop-tree-builder.rs @@ -12,7 +12,6 @@ extern crate html5ever; use std::borrow::Cow; use std::collections::HashMap; -use std::default::Default; use std::io; use html5ever::parse_document; diff --git a/html5ever/examples/print-tree-actions.rs b/html5ever/examples/print-tree-actions.rs index 7ac2de17..b95368df 100644 --- a/html5ever/examples/print-tree-actions.rs +++ b/html5ever/examples/print-tree-actions.rs @@ -12,7 +12,6 @@ extern crate html5ever; use std::borrow::Cow; use std::collections::HashMap; -use std::default::Default; use std::io; use html5ever::parse_document; diff --git a/html5ever/examples/tokenize.rs b/html5ever/examples/tokenize.rs index 039ffb79..ae1f6203 100644 --- a/html5ever/examples/tokenize.rs +++ b/html5ever/examples/tokenize.rs @@ -9,7 +9,6 @@ extern crate html5ever; -use std::default::Default; use std::io; use html5ever::tendril::*; diff --git a/html5ever/src/serialize/mod.rs b/html5ever/src/serialize/mod.rs index 8c0a65c3..d95cb452 100644 --- a/html5ever/src/serialize/mod.rs +++ b/html5ever/src/serialize/mod.rs @@ -10,7 +10,6 @@ use log::warn; pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope}; use markup5ever::{local_name, namespace_url, ns}; -use std::default::Default; use std::io::{self, Write}; use crate::{LocalName, QualName}; diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index 458e274b..6929bd68 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -26,7 +26,6 @@ use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResul use std::borrow::Cow::Borrowed; use std::collections::VecDeque; -use std::default::Default; use std::iter::{Enumerate, Rev}; use std::mem::replace; use std::{fmt, slice}; diff --git a/rcdom/examples/hello_xml.rs b/rcdom/examples/hello_xml.rs index 6387a0af..396c199c 100644 --- a/rcdom/examples/hello_xml.rs +++ b/rcdom/examples/hello_xml.rs @@ -11,8 +11,6 @@ extern crate markup5ever_rcdom as rcdom; extern crate xml5ever; -use std::default::Default; - use rcdom::{NodeData, RcDom}; use xml5ever::driver::parse_document; use xml5ever::tendril::TendrilSink; diff --git a/rcdom/examples/html2html.rs b/rcdom/examples/html2html.rs index 353c5f59..634b2dfa 100644 --- a/rcdom/examples/html2html.rs +++ b/rcdom/examples/html2html.rs @@ -18,7 +18,6 @@ extern crate html5ever; extern crate markup5ever_rcdom as rcdom; -use std::default::Default; use std::io::{self, Write}; use html5ever::driver::ParseOpts; diff --git a/rcdom/examples/print-rcdom.rs b/rcdom/examples/print-rcdom.rs index c96f5c06..c55d2348 100644 --- a/rcdom/examples/print-rcdom.rs +++ b/rcdom/examples/print-rcdom.rs @@ -11,7 +11,6 @@ extern crate html5ever; extern crate markup5ever_rcdom as rcdom; -use std::default::Default; use std::io; use html5ever::parse_document; diff --git a/rcdom/examples/xml_tree_printer.rs b/rcdom/examples/xml_tree_printer.rs index 7d3f747b..e61e6ef2 100644 --- a/rcdom/examples/xml_tree_printer.rs +++ b/rcdom/examples/xml_tree_printer.rs @@ -11,9 +11,7 @@ extern crate markup5ever_rcdom as rcdom; extern crate xml5ever; -use std::default::Default; use std::io; -use std::string::String; use rcdom::{Handle, NodeData, RcDom}; use xml5ever::driver::parse_document; diff --git a/rcdom/lib.rs b/rcdom/lib.rs index 3789d8c2..c53869a7 100644 --- a/rcdom/lib.rs +++ b/rcdom/lib.rs @@ -42,7 +42,6 @@ extern crate tendril; use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::collections::{HashSet, VecDeque}; -use std::default::Default; use std::fmt; use std::io; use std::mem; diff --git a/rcdom/tests/html-tokenizer.rs b/rcdom/tests/html-tokenizer.rs index 520a8301..9fa68e37 100644 --- a/rcdom/tests/html-tokenizer.rs +++ b/rcdom/tests/html-tokenizer.rs @@ -21,7 +21,6 @@ use html5ever::{namespace_url, ns, Attribute, LocalName, QualName}; use rustc_test::{DynTestFn, DynTestName, TestDesc, TestDescAndFn}; use serde_json::{Map, Value}; use std::borrow::Cow; -use std::default::Default; use std::ffi::OsStr; use std::io::Read; use std::fs::File; diff --git a/rcdom/tests/html-tree-builder.rs b/rcdom/tests/html-tree-builder.rs index e82116f4..e17ea695 100644 --- a/rcdom/tests/html-tree-builder.rs +++ b/rcdom/tests/html-tree-builder.rs @@ -16,7 +16,6 @@ mod foreach_html5lib_test; use foreach_html5lib_test::foreach_html5lib_test; use std::collections::{HashMap, HashSet}; -use std::default::Default; use std::ffi::OsStr; use std::io::BufRead; use std::iter::repeat; From 2294ed061156344c70ed00c25a889ef246e5f7e8 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Thu, 21 Mar 2024 14:04:54 +0100 Subject: [PATCH 24/25] Make criterion a regular dev-dependency (#519) This avoids compilation failing for the benchmark targets. --- html5ever/Cargo.toml | 4 +--- xml5ever/Cargo.toml | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/html5ever/Cargo.toml b/html5ever/Cargo.toml index 6e608a67..0a1c6077 100644 --- a/html5ever/Cargo.toml +++ b/html5ever/Cargo.toml @@ -17,10 +17,8 @@ mac = "0.1" markup5ever = { version = "0.12", path = "../markup5ever" } [dev-dependencies] -typed-arena = "2.0.2" - -[target.'cfg(bench)'.dev-dependencies] criterion = "0.3" +typed-arena = "2.0.2" [build-dependencies] quote = "1" diff --git a/xml5ever/Cargo.toml b/xml5ever/Cargo.toml index dcbbacaa..41a27fc3 100644 --- a/xml5ever/Cargo.toml +++ b/xml5ever/Cargo.toml @@ -21,10 +21,8 @@ mac = "0.1" markup5ever = {version = "0.12", path = "../markup5ever" } [dev-dependencies] -rustc-test = "0.3" - -[target.'cfg(bench)'.dev-dependencies] criterion = "0.3" +rustc-test = "0.3" [[bench]] name = "xml5ever" From 28f2d2f3ba4c2dd96da0bafe4c935cc4dd65f7a1 Mon Sep 17 00:00:00 2001 From: Dirkjan Ochtman Date: Fri, 22 Mar 2024 08:41:02 +0100 Subject: [PATCH 25/25] Check formatting and Clippy lints in CI (#520) * Remove unstable rustfmt options * Fix formatting with cargo fmt * Apply clippy suggestions * Check formatting and clippy in CI --- .github/workflows/main.yml | 22 ++++ html5ever/benches/html5ever.rs | 5 +- html5ever/examples/arena.rs | 8 +- html5ever/examples/noop-tokenize.rs | 2 +- html5ever/examples/noop-tree-builder.rs | 4 +- html5ever/examples/tokenize.rs | 2 +- html5ever/macros/match_token.rs | 64 ++++++------ html5ever/src/driver.rs | 4 +- html5ever/src/serialize/mod.rs | 60 +++++------ html5ever/src/tokenizer/char_ref/mod.rs | 8 +- html5ever/src/tokenizer/interface.rs | 13 +-- html5ever/src/tokenizer/mod.rs | 116 ++++++++++----------- html5ever/src/tree_builder/data.rs | 22 ++-- html5ever/src/tree_builder/mod.rs | 36 ++++--- html5ever/src/tree_builder/rules.rs | 2 +- html5ever/src/tree_builder/tag_sets.rs | 22 ++-- markup5ever/build.rs | 4 +- markup5ever/interface/mod.rs | 6 +- markup5ever/interface/tree_builder.rs | 10 +- markup5ever/util/buffer_queue.rs | 66 ++++++------ markup5ever/util/smallcharset.rs | 6 +- rcdom/examples/hello_xml.rs | 18 +--- rcdom/examples/html2html.rs | 5 +- rcdom/examples/print-rcdom.rs | 4 +- rcdom/examples/xml_tree_printer.rs | 5 +- rcdom/lib.rs | 22 ++-- rcdom/tests/html-driver.rs | 2 +- rcdom/tests/html-serializer.rs | 25 ++--- rcdom/tests/html-tokenizer.rs | 119 +++++++++++----------- rcdom/tests/html-tree-builder.rs | 48 +++++---- rcdom/tests/xml-tokenizer.rs | 72 ++++++------- rcdom/tests/xml-tree-builder.rs | 91 ++++++++--------- rustfmt.toml | 1 - xml5ever/benches/xml5ever.rs | 9 +- xml5ever/examples/simple_xml_tokenizer.rs | 8 +- xml5ever/examples/xml_tokenizer.rs | 2 +- xml5ever/src/driver.rs | 2 +- xml5ever/src/serialize/mod.rs | 14 +-- xml5ever/src/tokenizer/char_ref/mod.rs | 5 +- xml5ever/src/tokenizer/interface.rs | 13 +-- xml5ever/src/tokenizer/mod.rs | 23 ++--- xml5ever/src/tree_builder/mod.rs | 94 ++++++++--------- xml5ever/src/tree_builder/types.rs | 20 ++-- 43 files changed, 510 insertions(+), 574 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4f2f1a6f..f2fa4305 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -70,3 +70,25 @@ jobs: - name: Mark the job as unsuccessful run: exit 1 if: ${{ !success() }} + + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install stable toolchain + run: | + rustup set profile minimal + rustup override set stable + + - name: Install clippy + run: | + rustup component add clippy + rustup component add rustfmt + + - name: Format + run: cargo fmt --all -- --check + + - name: Run clippy + run: cargo clippy --all-features --all-targets -- -D warnings diff --git a/html5ever/benches/html5ever.rs b/html5ever/benches/html5ever.rs index ff20c4f7..f52cb574 100644 --- a/html5ever/benches/html5ever.rs +++ b/html5ever/benches/html5ever.rs @@ -27,12 +27,11 @@ fn run_bench(c: &mut Criterion, name: &str) { let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); path.push("data/bench/"); path.push(name); - let mut file = fs::File::open(&path).ok().expect("can't open file"); + let mut file = fs::File::open(&path).expect("can't open file"); // Read the file and treat it as an infinitely repeating sequence of characters. let mut file_input = ByteTendril::new(); file.read_to_tendril(&mut file_input) - .ok() .expect("can't read file"); let file_input: StrTendril = file_input.try_reinterpret().unwrap(); let size = file_input.len(); @@ -55,7 +54,7 @@ fn run_bench(c: &mut Criterion, name: &str) { c.bench_function(&test_name, move |b| { b.iter(|| { let mut tok = Tokenizer::new(Sink, Default::default()); - let mut buffer = BufferQueue::new(); + let mut buffer = BufferQueue::default(); // We are doing clone inside the bench function, this is not ideal, but possibly // necessary since our iterator consumes the underlying buffer. for buf in input.clone().into_iter() { diff --git a/html5ever/examples/arena.rs b/html5ever/examples/arena.rs index 1b59ae1b..d084e011 100644 --- a/html5ever/examples/arena.rs +++ b/html5ever/examples/arena.rs @@ -28,7 +28,7 @@ fn main() { fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> { let sink = Sink { - arena: arena, + arena, document: arena.alloc(Node::new(NodeData::Document)), quirks_mode: QuirksMode::NoQuirks, }; @@ -91,7 +91,7 @@ impl<'arena> Node<'arena> { next_sibling: Cell::new(None), first_child: Cell::new(None), last_child: Cell::new(None), - data: data, + data, } } @@ -209,7 +209,7 @@ impl<'arena> TreeSink for Sink<'arena> { fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> { if let NodeData::Element { - template_contents: Some(ref contents), + template_contents: Some(contents), .. } = target.data { @@ -255,7 +255,7 @@ impl<'arena> TreeSink for Sink<'arena> { fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> { self.new_node(NodeData::ProcessingInstruction { - target: target, + target, contents: data, }) } diff --git a/html5ever/examples/noop-tokenize.rs b/html5ever/examples/noop-tokenize.rs index 4aa94497..68b1c8c9 100644 --- a/html5ever/examples/noop-tokenize.rs +++ b/html5ever/examples/noop-tokenize.rs @@ -32,7 +32,7 @@ impl TokenSink for Sink { fn main() { let mut chunk = ByteTendril::new(); io::stdin().read_to_tendril(&mut chunk).unwrap(); - let mut input = BufferQueue::new(); + let mut input = BufferQueue::default(); input.push_back(chunk.try_reinterpret().unwrap()); let mut tok = Tokenizer::new(Sink(Vec::new()), Default::default()); diff --git a/html5ever/examples/noop-tree-builder.rs b/html5ever/examples/noop-tree-builder.rs index 27f4fa3c..5e516df6 100644 --- a/html5ever/examples/noop-tree-builder.rs +++ b/html5ever/examples/noop-tree-builder.rs @@ -44,7 +44,7 @@ impl TreeSink for Sink { } fn get_template_contents(&mut self, target: &usize) -> usize { - if let Some(expanded_name!(html "template")) = self.names.get(&target).map(|n| n.expanded()) + if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded()) { target + 1 } else { @@ -91,7 +91,7 @@ impl TreeSink for Sink { fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {} fn add_attrs_if_missing(&mut self, target: &usize, _attrs: Vec) { - assert!(self.names.contains_key(&target), "not an element"); + assert!(self.names.contains_key(target), "not an element"); } fn remove_from_parent(&mut self, _target: &usize) {} fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) {} diff --git a/html5ever/examples/tokenize.rs b/html5ever/examples/tokenize.rs index ae1f6203..8d4d9e7d 100644 --- a/html5ever/examples/tokenize.rs +++ b/html5ever/examples/tokenize.rs @@ -85,7 +85,7 @@ fn main() { let mut sink = TokenPrinter { in_char_run: false }; let mut chunk = ByteTendril::new(); io::stdin().read_to_tendril(&mut chunk).unwrap(); - let mut input = BufferQueue::new(); + let mut input = BufferQueue::default(); input.push_back(chunk.try_reinterpret().unwrap()); let mut tok = Tokenizer::new( diff --git a/html5ever/macros/match_token.rs b/html5ever/macros/match_token.rs index b4bff19f..4157ddf4 100644 --- a/html5ever/macros/match_token.rs +++ b/html5ever/macros/match_token.rs @@ -141,16 +141,16 @@ struct MatchToken { struct MatchTokenArm { binding: Option, - lhs: LHS, - rhs: RHS, + lhs: Lhs, + rhs: Rhs, } -enum LHS { +enum Lhs { Tags(Vec), Pattern(syn::Pat), } -enum RHS { +enum Rhs { Expression(syn::Expr), Else, } @@ -188,17 +188,17 @@ impl Parse for Tag { } } -impl Parse for LHS { +impl Parse for Lhs { fn parse(input: ParseStream) -> Result { if input.peek(Token![<]) { let mut tags = Vec::new(); while !input.peek(Token![=>]) { tags.push(input.parse()?); } - Ok(LHS::Tags(tags)) + Ok(Lhs::Tags(tags)) } else { let p = input.call(syn::Pat::parse_single)?; - Ok(LHS::Pattern(p)) + Ok(Lhs::Pattern(p)) } } } @@ -212,7 +212,7 @@ impl Parse for MatchTokenArm { } else { None }; - let lhs = input.parse::()?; + let lhs = input.parse::()?; input.parse::]>()?; let rhs = if input.peek(syn::token::Brace) { let block = input.parse::().unwrap(); @@ -222,15 +222,15 @@ impl Parse for MatchTokenArm { block, }; input.parse::>()?; - RHS::Expression(syn::Expr::Block(block)) + Rhs::Expression(syn::Expr::Block(block)) } else if input.peek(Token![else]) { input.parse::()?; input.parse::()?; - RHS::Else + Rhs::Else } else { let expr = input.parse::().unwrap(); input.parse::>()?; - RHS::Expression(expr) + Rhs::Expression(expr) }; Ok(MatchTokenArm { binding, lhs, rhs }) @@ -283,12 +283,12 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { }; match (lhs, rhs) { - (LHS::Pattern(_), RHS::Else) => { + (Lhs::Pattern(_), Rhs::Else) => { panic!("'else' may not appear with an ordinary pattern") }, // ordinary pattern => expression - (LHS::Pattern(pat), RHS::Expression(expr)) => { + (Lhs::Pattern(pat), Rhs::Expression(expr)) => { if !wildcards_patterns.is_empty() { panic!( "ordinary patterns may not appear after wildcard tags {:?} {:?}", @@ -299,7 +299,7 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { }, // ... => else - (LHS::Tags(tags), RHS::Else) => { + (Lhs::Tags(tags), Rhs::Else) => { for tag in tags { if !seen_tags.insert(tag.clone()) { panic!("duplicate tag"); @@ -313,7 +313,7 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { // <_> => expression // ... => expression - (LHS::Tags(tags), RHS::Expression(expr)) => { + (Lhs::Tags(tags), Rhs::Expression(expr)) => { // Is this arm a tag wildcard? // `None` if we haven't processed the first tag yet. let mut wildcard = None; @@ -388,9 +388,9 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { let (last_pat, last_expr) = match (binding, lhs, rhs) { (Some(_), _, _) => panic!("the last arm cannot have an @-binding"), - (None, LHS::Tags(_), _) => panic!("the last arm cannot have tag patterns"), - (None, _, RHS::Else) => panic!("the last arm cannot use 'else'"), - (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e), + (None, Lhs::Tags(_), _) => panic!("the last arm cannot have tag patterns"), + (None, _, Rhs::Else) => panic!("the last arm cannot use 'else'"), + (None, Lhs::Pattern(p), Rhs::Expression(e)) => (p, e), }; quote! { @@ -418,29 +418,23 @@ fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { impl Fold for MatchTokenParser { fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt { - match stmt { - syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => { - if mac.path == parse_quote!(match_token) { - return syn::fold::fold_stmt( - self, - syn::Stmt::Expr(expand_match_token(&mac.tokens), None), - ); - } - }, - _ => {}, + if let syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) = stmt { + if mac.path == parse_quote!(match_token) { + return syn::fold::fold_stmt( + self, + syn::Stmt::Expr(expand_match_token(&mac.tokens), None), + ); + } } syn::fold::fold_stmt(self, stmt) } fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr { - match expr { - syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => { - if mac.path == parse_quote!(match_token) { - return syn::fold::fold_expr(self, expand_match_token(&mac.tokens)); - } - }, - _ => {}, + if let syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) = expr { + if mac.path == parse_quote!(match_token) { + return syn::fold::fold_expr(self, expand_match_token(&mac.tokens)); + } } syn::fold::fold_expr(self, expr) diff --git a/html5ever/src/driver.rs b/html5ever/src/driver.rs index 26db9b8d..42426e7b 100644 --- a/html5ever/src/driver.rs +++ b/html5ever/src/driver.rs @@ -45,7 +45,7 @@ where let tok = Tokenizer::new(tb, opts.tokenizer); Parser { tokenizer: tok, - input_buffer: BufferQueue::new(), + input_buffer: BufferQueue::default(), } } @@ -88,7 +88,7 @@ where let tok = Tokenizer::new(tb, tok_opts); Parser { tokenizer: tok, - input_buffer: BufferQueue::new(), + input_buffer: BufferQueue::default(), } } diff --git a/html5ever/src/serialize/mod.rs b/html5ever/src/serialize/mod.rs index d95cb452..2620c195 100644 --- a/html5ever/src/serialize/mod.rs +++ b/html5ever/src/serialize/mod.rs @@ -52,7 +52,7 @@ impl Default for SerializeOpts { #[derive(Default)] struct ElemInfo { html_name: Option, - ignore_children: bool + ignore_children: bool, } pub struct HtmlSerializer { @@ -162,28 +162,28 @@ impl Serializer for HtmlSerializer { } self.writer.write_all(b">")?; - let ignore_children = name.ns == ns!(html) && - match name.local { - local_name!("area") | - local_name!("base") | - local_name!("basefont") | - local_name!("bgsound") | - local_name!("br") | - local_name!("col") | - local_name!("embed") | - local_name!("frame") | - local_name!("hr") | - local_name!("img") | - local_name!("input") | - local_name!("keygen") | - local_name!("link") | - local_name!("meta") | - local_name!("param") | - local_name!("source") | - local_name!("track") | - local_name!("wbr") => true, - _ => false, - }; + let ignore_children = name.ns == ns!(html) + && matches!( + name.local, + local_name!("area") + | local_name!("base") + | local_name!("basefont") + | local_name!("bgsound") + | local_name!("br") + | local_name!("col") + | local_name!("embed") + | local_name!("frame") + | local_name!("hr") + | local_name!("img") + | local_name!("input") + | local_name!("keygen") + | local_name!("link") + | local_name!("meta") + | local_name!("param") + | local_name!("source") + | local_name!("track") + | local_name!("wbr") + ); self.stack.push(ElemInfo { html_name, @@ -213,13 +213,13 @@ impl Serializer for HtmlSerializer { fn write_text(&mut self, text: &str) -> io::Result<()> { let escape = match self.parent().html_name { - Some(local_name!("style")) | - Some(local_name!("script")) | - Some(local_name!("xmp")) | - Some(local_name!("iframe")) | - Some(local_name!("noembed")) | - Some(local_name!("noframes")) | - Some(local_name!("plaintext")) => false, + Some(local_name!("style")) + | Some(local_name!("script")) + | Some(local_name!("xmp")) + | Some(local_name!("iframe")) + | Some(local_name!("noembed")) + | Some(local_name!("noframes")) + | Some(local_name!("plaintext")) => false, Some(local_name!("noscript")) => !self.opts.scripting_enabled, diff --git a/html5ever/src/tokenizer/char_ref/mod.rs b/html5ever/src/tokenizer/char_ref/mod.rs index 6c5b63b9..9dee0278 100644 --- a/html5ever/src/tokenizer/char_ref/mod.rs +++ b/html5ever/src/tokenizer/char_ref/mod.rs @@ -224,9 +224,8 @@ impl CharRefTokenizer { input: &mut BufferQueue, ) -> Status { let mut unconsume = StrTendril::from_char('#'); - match self.hex_marker { - Some(c) => unconsume.push_char(c), - None => (), + if let Some(c) = self.hex_marker { + unconsume.push_char(c) } input.push_front(unconsume); @@ -361,7 +360,8 @@ impl CharRefTokenizer { // then, for historical reasons, flush code points consumed as a character // reference and switch to the return state. - let unconsume_all = match (self.is_consumed_in_attribute, last_matched, next_after) { + let unconsume_all = match (self.is_consumed_in_attribute, last_matched, next_after) + { (_, ';', _) => false, (true, _, Some('=')) => true, (true, _, Some(c)) if c.is_ascii_alphanumeric() => true, diff --git a/html5ever/src/tokenizer/interface.rs b/html5ever/src/tokenizer/interface.rs index 22d11be5..b3b8a1cf 100644 --- a/html5ever/src/tokenizer/interface.rs +++ b/html5ever/src/tokenizer/interface.rs @@ -19,7 +19,7 @@ pub use self::Token::{EOFToken, NullCharacterToken, ParseError}; /// A `DOCTYPE` token. // FIXME: already exists in Servo DOM -#[derive(PartialEq, Eq, Clone, Debug)] +#[derive(PartialEq, Eq, Clone, Debug, Default)] pub struct Doctype { pub name: Option, pub public_id: Option, @@ -27,17 +27,6 @@ pub struct Doctype { pub force_quirks: bool, } -impl Doctype { - pub fn new() -> Doctype { - Doctype { - name: None, - public_id: None, - system_id: None, - force_quirks: false, - } - } -} - #[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)] pub enum TagKind { StartTag, diff --git a/html5ever/src/tokenizer/mod.rs b/html5ever/src/tokenizer/mod.rs index 54d11855..1bf62af2 100644 --- a/html5ever/src/tokenizer/mod.rs +++ b/html5ever/src/tokenizer/mod.rs @@ -28,7 +28,7 @@ use mac::format_if; use markup5ever::{namespace_url, ns, small_char_set}; use std::borrow::Cow::{self, Borrowed}; use std::collections::BTreeMap; -use std::mem::replace; +use std::mem; pub use crate::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult}; use crate::tendril::StrTendril; @@ -196,7 +196,7 @@ impl Tokenizer { current_attr_name: StrTendril::new(), current_attr_value: StrTendril::new(), current_comment: StrTendril::new(), - current_doctype: Doctype::new(), + current_doctype: Doctype::default(), last_start_tag_name: start_tag_name, temp_buf: StrTendril::new(), state_profile: BTreeMap::new(), @@ -265,8 +265,8 @@ impl Tokenizer { self.current_line += 1; } - if self.opts.exact_errors && - match c as u32 { + if self.opts.exact_errors + && match c as u32 { 0x01..=0x08 | 0x0B | 0x0E..=0x1F | 0x7F..=0x9F | 0xFDD0..=0xFDEF => true, n if (n & 0xFFFE) == 0xFFFE => true, _ => false, @@ -332,13 +332,11 @@ impl Tokenizer { } } - input.push_front(replace(&mut self.temp_buf, StrTendril::new())); + input.push_front(mem::take(&mut self.temp_buf)); match input.eat(pat, eq) { None if self.at_eof => Some(false), None => { - while let Some(c) = input.next() { - self.temp_buf.push_char(c); - } + self.temp_buf.extend(input); None }, Some(matched) => Some(matched), @@ -439,7 +437,7 @@ impl Tokenizer { kind: self.current_tag_kind, name, self_closing: self.current_tag_self_closing, - attrs: replace(&mut self.current_tag_attrs, vec![]), + attrs: std::mem::take(&mut self.current_tag_attrs), }); match self.process_token(token) { @@ -461,7 +459,7 @@ impl Tokenizer { fn emit_temp_buf(&mut self) { // FIXME: Make sure that clearing on emit is spec-compatible. - let buf = replace(&mut self.temp_buf, StrTendril::new()); + let buf = mem::take(&mut self.temp_buf); self.emit_chars(buf); } @@ -471,7 +469,7 @@ impl Tokenizer { } fn emit_current_comment(&mut self) { - let comment = replace(&mut self.current_comment, StrTendril::new()); + let comment = mem::take(&mut self.current_comment); self.process_token_and_continue(CommentToken(comment)); } @@ -525,13 +523,13 @@ impl Tokenizer { // The tree builder will adjust the namespace if necessary. // This only happens in foreign elements. name: QualName::new(None, ns!(), name), - value: replace(&mut self.current_attr_value, StrTendril::new()), + value: mem::take(&mut self.current_attr_value), }); } } fn emit_current_doctype(&mut self) { - let doctype = replace(&mut self.current_doctype, Doctype::new()); + let doctype = mem::take(&mut self.current_doctype); self.process_token_and_continue(DoctypeToken(doctype)); } @@ -551,9 +549,10 @@ impl Tokenizer { } fn consume_char_ref(&mut self) { - self.char_ref_tokenizer = Some( - Box::new(CharRefTokenizer::new(matches!(self.state, states::AttributeValue(_)))) - ); + self.char_ref_tokenizer = Some(Box::new(CharRefTokenizer::new(matches!( + self.state, + states::AttributeValue(_) + )))); } fn emit_eof(&mut self) { @@ -605,7 +604,7 @@ macro_rules! shorthand ( ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.push_slice($c) ); ( $me:ident : emit_comment ) => ( $me.emit_current_comment() ); ( $me:ident : clear_comment ) => ( $me.current_comment.clear() ); - ( $me:ident : create_doctype ) => ( $me.current_doctype = Doctype::new() ); + ( $me:ident : create_doctype ) => ( $me.current_doctype = Doctype::default() ); ( $me:ident : push_doctype_name $c:expr ) => ( option_push(&mut $me.current_doctype.name, $c) ); ( $me:ident : push_doctype_id $k:ident $c:expr ) => ( option_push($me.doctype_id($k), $c) ); ( $me:ident : clear_doctype_id $k:ident ) => ( $me.clear_doctype_id($k) ); @@ -1348,10 +1347,9 @@ impl Tokenizer { if self .sink .adjusted_current_node_present_but_not_in_html_namespace() + && eat_exact!(self, input, "[CDATA[") { - if eat_exact!(self, input, "[CDATA[") { - go!(self: clear_temp; to CdataSection); - } + go!(self: clear_temp; to CdataSection); } go!(self: error; clear_comment; to BogusComment); } @@ -1434,7 +1432,7 @@ impl Tokenizer { pub fn end(&mut self) { // Handle EOF in the char ref sub-tokenizer, if there is one. // Do this first because it might un-consume stuff. - let mut input = BufferQueue::new(); + let mut input = BufferQueue::default(); match self.char_ref_tokenizer.take() { None => (), Some(mut tok) => { @@ -1486,22 +1484,22 @@ impl Tokenizer { fn eof_step(&mut self) -> ProcessResult { debug!("processing EOF in state {:?}", self.state); match self.state { - states::Data | - states::RawData(Rcdata) | - states::RawData(Rawtext) | - states::RawData(ScriptData) | - states::Plaintext => go!(self: eof), - - states::TagName | - states::RawData(ScriptDataEscaped(_)) | - states::BeforeAttributeName | - states::AttributeName | - states::AfterAttributeName | - states::AttributeValue(_) | - states::AfterAttributeValueQuoted | - states::SelfClosingStartTag | - states::ScriptDataEscapedDash(_) | - states::ScriptDataEscapedDashDash(_) => go!(self: error_eof; to Data), + states::Data + | states::RawData(Rcdata) + | states::RawData(Rawtext) + | states::RawData(ScriptData) + | states::Plaintext => go!(self: eof), + + states::TagName + | states::RawData(ScriptDataEscaped(_)) + | states::BeforeAttributeName + | states::AttributeName + | states::AfterAttributeName + | states::AttributeValue(_) + | states::AfterAttributeValueQuoted + | states::SelfClosingStartTag + | states::ScriptDataEscapedDash(_) + | states::ScriptDataEscapedDashDash(_) => go!(self: error_eof; to Data), states::BeforeAttributeValue => go!(self: reconsume AttributeValue Unquoted), @@ -1529,14 +1527,16 @@ impl Tokenizer { go!(self: to RawData ScriptDataEscaped DoubleEscaped) }, - states::CommentStart | - states::CommentStartDash | - states::Comment | - states::CommentEndDash | - states::CommentEnd | - states::CommentEndBang => go!(self: error_eof; emit_comment; to Data), + states::CommentStart + | states::CommentStartDash + | states::Comment + | states::CommentEndDash + | states::CommentEnd + | states::CommentEndBang => go!(self: error_eof; emit_comment; to Data), - states::CommentLessThanSign | states::CommentLessThanSignBang => go!(self: reconsume Comment), + states::CommentLessThanSign | states::CommentLessThanSignBang => { + go!(self: reconsume Comment) + }, states::CommentLessThanSignBangDash => go!(self: reconsume CommentEndDash), @@ -1546,14 +1546,14 @@ impl Tokenizer { go!(self: error_eof; create_doctype; force_quirks; emit_doctype; to Data) }, - states::DoctypeName | - states::AfterDoctypeName | - states::AfterDoctypeKeyword(_) | - states::BeforeDoctypeIdentifier(_) | - states::DoctypeIdentifierDoubleQuoted(_) | - states::DoctypeIdentifierSingleQuoted(_) | - states::AfterDoctypeIdentifier(_) | - states::BetweenDoctypePublicAndSystemIdentifiers => { + states::DoctypeName + | states::AfterDoctypeName + | states::AfterDoctypeKeyword(_) + | states::BeforeDoctypeIdentifier(_) + | states::DoctypeIdentifierDoubleQuoted(_) + | states::DoctypeIdentifierSingleQuoted(_) + | states::AfterDoctypeIdentifier(_) + | states::BetweenDoctypePublicAndSystemIdentifiers => { go!(self: error_eof; force_quirks; emit_doctype; to Data) }, @@ -1585,7 +1585,7 @@ mod test { use super::interface::{TagToken, Token}; use markup5ever::buffer_queue::BufferQueue; - use std::mem::replace; + use std::mem; use crate::LocalName; @@ -1614,7 +1614,7 @@ mod test { fn finish_str(&mut self) { if self.current_str.len() > 0 { - let s = replace(&mut self.current_str, StrTendril::new()); + let s = mem::take(&mut self.current_str); self.tokens.push(CharacterTokens(s)); } } @@ -1668,7 +1668,7 @@ mod test { fn tokenize(input: Vec, opts: TokenizerOpts) -> Vec<(Token, u64)> { let sink = LinesMatch::new(); let mut tok = Tokenizer::new(sink, opts); - let mut buffer = BufferQueue::new(); + let mut buffer = BufferQueue::default(); for chunk in input.into_iter() { buffer.push_back(chunk); let _ = tok.feed(&mut buffer); @@ -1680,13 +1680,13 @@ mod test { // Create a tag token fn create_tag(token: StrTendril, tagkind: TagKind) -> Token { let name = LocalName::from(&*token); - let token = TagToken(Tag { + + TagToken(Tag { kind: tagkind, name, self_closing: false, attrs: vec![], - }); - token + }) } #[test] diff --git a/html5ever/src/tree_builder/data.rs b/html5ever/src/tree_builder/data.rs index 9d51a710..2a81b5b7 100644 --- a/html5ever/src/tree_builder/data.rs +++ b/html5ever/src/tree_builder/data.rs @@ -109,26 +109,26 @@ pub fn doctype_error_and_quirks(doctype: &Doctype, iframe_srcdoc: bool) -> (bool let system = opt_tendril_as_slice(&doctype.system_id); let err = match (name, public, system) { - (Some("html"), None, None) | - (Some("html"), None, Some("about:legacy-compat")) | - (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), None) | - ( + (Some("html"), None, None) + | (Some("html"), None, Some("about:legacy-compat")) + | (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), None) + | ( Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), Some("http://www.w3.org/TR/REC-html40/strict.dtd"), - ) | - (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), None) | - ( + ) + | (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), None) + | ( Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), Some("http://www.w3.org/TR/html4/strict.dtd"), - ) | - ( + ) + | ( Some("html"), Some("-//W3C//DTD XHTML 1.0 Strict//EN"), Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"), - ) | - ( + ) + | ( Some("html"), Some("-//W3C//DTD XHTML 1.1//EN"), Some("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"), diff --git a/html5ever/src/tree_builder/mod.rs b/html5ever/src/tree_builder/mod.rs index 6929bd68..20f6fb71 100644 --- a/html5ever/src/tree_builder/mod.rs +++ b/html5ever/src/tree_builder/mod.rs @@ -27,7 +27,7 @@ use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResul use std::borrow::Cow::Borrowed; use std::collections::VecDeque; use std::iter::{Enumerate, Rev}; -use std::mem::replace; +use std::mem; use std::{fmt, slice}; use crate::tokenizer::states::{RawData, RawKind}; @@ -236,11 +236,11 @@ where match *name { local_name!("title") | local_name!("textarea") => tok_state::RawData(tok_state::Rcdata), - local_name!("style") | - local_name!("xmp") | - local_name!("iframe") | - local_name!("noembed") | - local_name!("noframes") => tok_state::RawData(tok_state::Rawtext), + local_name!("style") + | local_name!("xmp") + | local_name!("iframe") + | local_name!("noembed") + | local_name!("noframes") => tok_state::RawData(tok_state::Rawtext), local_name!("script") => tok_state::RawData(tok_state::ScriptData), @@ -455,7 +455,7 @@ where if line_number != self.current_line { self.sink.set_current_line(line_number); } - let ignore_lf = replace(&mut self.ignore_lf, false); + let ignore_lf = mem::take(&mut self.ignore_lf); // Handle `ParseError` and `DoctypeToken`; convert everything else to the local `Token` type. let token = match token { @@ -529,8 +529,8 @@ where } fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool { - !self.open_elems.is_empty() && - self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html) + !self.open_elems.is_empty() + && self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html) } } @@ -850,8 +850,8 @@ where Bookmark::InsertAfter(previous) => { let index = self .position_in_active_formatting(&previous) - .expect("bookmark not found in active formatting elements") + - 1; + .expect("bookmark not found in active formatting elements") + + 1; self.active_formatting.insert(index, new_entry); let old_index = self .position_in_active_formatting(&fmt_elem) @@ -1299,11 +1299,11 @@ where }; // Step 12. - if form_associatable(qname.expanded()) && - self.form_elem.is_some() && - !self.in_html_elem_named(local_name!("template")) && - !(listed(qname.expanded()) && - attrs + if form_associatable(qname.expanded()) + && self.form_elem.is_some() + && !self.in_html_elem_named(local_name!("template")) + && !(listed(qname.expanded()) + && attrs .iter() .any(|a| a.name.expanded() == expanded_name!("", "form"))) { @@ -1661,9 +1661,7 @@ where fn unexpected_start_tag_in_foreign_content(&mut self, tag: Tag) -> ProcessResult { self.unexpected(&tag); while !self.current_node_in(|n| { - *n.ns == ns!(html) || - mathml_text_integration_point(n) || - svg_html_integration_point(n) + *n.ns == ns!(html) || mathml_text_integration_point(n) || svg_html_integration_point(n) }) { self.pop(); } diff --git a/html5ever/src/tree_builder/rules.rs b/html5ever/src/tree_builder/rules.rs index 521ce1cc..5e94bd57 100644 --- a/html5ever/src/tree_builder/rules.rs +++ b/html5ever/src/tree_builder/rules.rs @@ -859,7 +859,7 @@ where } token => { - let pending = replace(&mut self.pending_table_text, vec!()); + let pending = ::std::mem::take(&mut self.pending_table_text); let contains_nonspace = pending.iter().any(|&(split, ref text)| { match split { Whitespace => false, diff --git a/html5ever/src/tree_builder/tag_sets.rs b/html5ever/src/tree_builder/tag_sets.rs index 4b46eeb1..510d5ddd 100644 --- a/html5ever/src/tree_builder/tag_sets.rs +++ b/html5ever/src/tree_builder/tag_sets.rs @@ -59,9 +59,9 @@ declare_tag_set!(pub html_default_scope = #[inline(always)] pub fn default_scope(name: ExpandedName) -> bool { - html_default_scope(name) || - mathml_text_integration_point(name) || - svg_html_integration_point(name) + html_default_scope(name) + || mathml_text_integration_point(name) + || svg_html_integration_point(name) } declare_tag_set!(pub list_item_scope = [default_scope] + "ol" "ul"); @@ -95,11 +95,11 @@ declare_tag_set!(pub special_tag = pub fn mathml_text_integration_point(p: ExpandedName) -> bool { matches!( p, - expanded_name!(mathml "mi") | - expanded_name!(mathml "mo") | - expanded_name!(mathml "mn") | - expanded_name!(mathml "ms") | - expanded_name!(mathml "mtext") + expanded_name!(mathml "mi") + | expanded_name!(mathml "mo") + | expanded_name!(mathml "mn") + | expanded_name!(mathml "ms") + | expanded_name!(mathml "mtext") ) } @@ -108,8 +108,8 @@ pub fn svg_html_integration_point(p: ExpandedName) -> bool { // annotation-xml are handle in another place matches!( p, - expanded_name!(svg "foreignObject") | - expanded_name!(svg "desc") | - expanded_name!(svg "title") + expanded_name!(svg "foreignObject") + | expanded_name!(svg "desc") + | expanded_name!(svg "title") ) } diff --git a/markup5ever/build.rs b/markup5ever/build.rs index c4bbd564..354c7ec5 100644 --- a/markup5ever/build.rs +++ b/markup5ever/build.rs @@ -31,14 +31,14 @@ static NAMESPACES: &[(&str, &str)] = &[ fn main() { let generated = Path::new(&env::var("OUT_DIR").unwrap()).join("generated.rs"); - let mut generated = BufWriter::new(File::create(&generated).unwrap()); + let mut generated = BufWriter::new(File::create(generated).unwrap()); named_entities_to_phf(&Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs")); // Create a string cache for local names let local_names = Path::new(&env::var("CARGO_MANIFEST_DIR").unwrap()).join("local_names.txt"); let mut local_names_atom = string_cache_codegen::AtomType::new("LocalName", "local_name!"); - for line in BufReader::new(File::open(&local_names).unwrap()).lines() { + for line in BufReader::new(File::open(local_names).unwrap()).lines() { let local_name = line.unwrap(); local_names_atom.atom(&local_name); local_names_atom.atom(&local_name.to_ascii_lowercase()); diff --git a/markup5ever/interface/mod.rs b/markup5ever/interface/mod.rs index f498fdfb..7c4bea2e 100644 --- a/markup5ever/interface/mod.rs +++ b/markup5ever/interface/mod.rs @@ -303,11 +303,7 @@ impl QualName { /// #[inline] pub fn new(prefix: Option, ns: Namespace, local: LocalName) -> QualName { - QualName { - prefix, - ns, - local, - } + QualName { prefix, ns, local } } /// Take a reference of `self` as an `ExpandedName`, dropping the unresolved prefix. diff --git a/markup5ever/interface/tree_builder.rs b/markup5ever/interface/tree_builder.rs index 43361f36..4010a160 100644 --- a/markup5ever/interface/tree_builder.rs +++ b/markup5ever/interface/tree_builder.rs @@ -54,6 +54,7 @@ pub enum NextParserState { /// Special properties of an element, useful for tagging elements with this information. #[derive(Default)] +#[non_exhaustive] pub struct ElementFlags { /// A document fragment should be created, associated with the element, /// and returned in TreeSink::get_template_contents. @@ -70,9 +71,6 @@ pub struct ElementFlags { /// /// [whatwg integration-point]: https://html.spec.whatwg.org/multipage/#html-integration-point pub mathml_annotation_xml_integration_point: bool, - - // Prevent construction from outside module - _private: (), } /// A constructor for an element. @@ -89,9 +87,9 @@ where expanded_name!(html "template") => flags.template = true, expanded_name!(mathml "annotation-xml") => { flags.mathml_annotation_xml_integration_point = attrs.iter().any(|attr| { - attr.name.expanded() == expanded_name!("", "encoding") && - (attr.value.eq_ignore_ascii_case("text/html") || - attr.value.eq_ignore_ascii_case("application/xhtml+xml")) + attr.name.expanded() == expanded_name!("", "encoding") + && (attr.value.eq_ignore_ascii_case("text/html") + || attr.value.eq_ignore_ascii_case("application/xhtml+xml")) }) }, _ => {}, diff --git a/markup5ever/util/buffer_queue.rs b/markup5ever/util/buffer_queue.rs index ab3f0389..d41b6135 100644 --- a/markup5ever/util/buffer_queue.rs +++ b/markup5ever/util/buffer_queue.rs @@ -49,15 +49,17 @@ pub struct BufferQueue { buffers: VecDeque, } -impl BufferQueue { +impl Default for BufferQueue { /// Create an empty BufferQueue. #[inline] - pub fn new() -> BufferQueue { - BufferQueue { + fn default() -> Self { + Self { buffers: VecDeque::with_capacity(16), } } +} +impl BufferQueue { /// Returns whether the queue is empty. #[inline] pub fn is_empty(&self) -> bool { @@ -93,31 +95,12 @@ impl BufferQueue { /// Look at the next available character without removing it, if the queue is not empty. pub fn peek(&self) -> Option { debug_assert!( - self.buffers.iter().find(|el| el.len32() == 0).is_none(), + !self.buffers.iter().any(|el| el.len32() == 0), "invariant \"all buffers in the queue are non-empty\" failed" ); self.buffers.front().map(|b| b.chars().next().unwrap()) } - /// Get the next character if one is available, removing it from the queue. - /// - /// This function manages the buffers, removing them as they become empty. - pub fn next(&mut self) -> Option { - let (result, now_empty) = match self.buffers.front_mut() { - None => (None, false), - Some(buf) => { - let c = buf.pop_front_char().expect("empty buffer in queue"); - (Some(c), buf.is_empty()) - }, - }; - - if now_empty { - self.buffers.pop_front(); - } - - result - } - /// Pops and returns either a single character from the given set, or /// a buffer of characters none of which are in the set. /// @@ -129,7 +112,7 @@ impl BufferQueue { /// # fn main() { /// use markup5ever::buffer_queue::{BufferQueue, SetResult}; /// - /// let mut queue = BufferQueue::new(); + /// let mut queue = BufferQueue::default(); /// queue.push_back(format_tendril!(r#"SomeText"#)); /// let set = small_char_set!(b'<' b'>' b' ' b'=' b'"' b'/'); /// let tag = format_tendril!("some_tag"); @@ -185,9 +168,9 @@ impl BufferQueue { /// # extern crate markup5ever; /// # #[macro_use] extern crate tendril; /// # fn main() { - /// use markup5ever::buffer_queue::{BufferQueue}; + /// use markup5ever::buffer_queue::BufferQueue; /// - /// let mut queue = BufferQueue::new(); + /// let mut queue = BufferQueue::default(); /// queue.push_back(format_tendril!("testtext")); /// let test_str = "test"; /// assert_eq!(queue.eat("test", |&a, &b| a == b), Some(true)); @@ -232,6 +215,29 @@ impl BufferQueue { } } +impl Iterator for BufferQueue { + type Item = char; + + /// Get the next character if one is available, removing it from the queue. + /// + /// This function manages the buffers, removing them as they become empty. + fn next(&mut self) -> Option { + let (result, now_empty) = match self.buffers.front_mut() { + None => (None, false), + Some(buf) => { + let c = buf.pop_front_char().expect("empty buffer in queue"); + (Some(c), buf.is_empty()) + }, + }; + + if now_empty { + self.buffers.pop_front(); + } + + result + } +} + #[cfg(test)] #[allow(non_snake_case)] mod test { @@ -242,7 +248,7 @@ mod test { #[test] fn smoke_test() { - let mut bq = BufferQueue::new(); + let mut bq = BufferQueue::default(); assert_eq!(bq.peek(), None); assert_eq!(bq.next(), None); @@ -260,7 +266,7 @@ mod test { #[test] fn can_unconsume() { - let mut bq = BufferQueue::new(); + let mut bq = BufferQueue::default(); bq.push_back("abc".to_tendril()); assert_eq!(bq.next(), Some('a')); @@ -274,7 +280,7 @@ mod test { #[test] fn can_pop_except_set() { - let mut bq = BufferQueue::new(); + let mut bq = BufferQueue::default(); bq.push_back("abc&def".to_tendril()); let mut pop = || bq.pop_except_from(small_char_set!('&')); assert_eq!(pop(), Some(NotFromSet("abc".to_tendril()))); @@ -288,7 +294,7 @@ mod test { // This is not very comprehensive. We rely on the tokenizer // integration tests for more thorough testing with many // different input buffer splits. - let mut bq = BufferQueue::new(); + let mut bq = BufferQueue::default(); bq.push_back("a".to_tendril()); bq.push_back("bc".to_tendril()); assert_eq!(bq.eat("abcd", u8::eq_ignore_ascii_case), None); diff --git a/markup5ever/util/smallcharset.rs b/markup5ever/util/smallcharset.rs index 957dad73..2c0c4504 100644 --- a/markup5ever/util/smallcharset.rs +++ b/markup5ever/util/smallcharset.rs @@ -70,16 +70,14 @@ impl SmallCharSet { #[cfg(test)] mod test { - use std::iter::repeat; - #[test] fn nonmember_prefix() { for &c in ['&', '\0'].iter() { for x in 0..48u32 { for y in 0..48u32 { - let mut s = repeat("x").take(x as usize).collect::(); + let mut s = "x".repeat(x as usize); s.push(c); - s.push_str(&repeat("x").take(y as usize).collect::()); + s.push_str(&"x".repeat(y as usize)); let set = small_char_set!('&' '\0'); assert_eq!(x, set.nonmember_prefix_len(&s)); diff --git a/rcdom/examples/hello_xml.rs b/rcdom/examples/hello_xml.rs index 396c199c..792e4d96 100644 --- a/rcdom/examples/hello_xml.rs +++ b/rcdom/examples/hello_xml.rs @@ -25,21 +25,13 @@ fn main() { let doc = &dom.document; let hello_node = &doc.children.borrow()[0]; - let hello_tag = &*dom.elem_name(hello_node).local; + let hello_tag = dom.elem_name(hello_node).local; let text_node = &hello_node.children.borrow()[0]; - let xml = { - let mut xml = String::new(); - - match &text_node.data { - &NodeData::Text { ref contents } => { - xml.push_str(&contents.borrow()); - }, - _ => {}, - }; - - xml - }; + let mut xml = String::new(); + if let NodeData::Text { contents } = &text_node.data { + xml.push_str(&contents.borrow()); + } println!("{:?} {:?}!", hello_tag, xml); } diff --git a/rcdom/examples/html2html.rs b/rcdom/examples/html2html.rs index 634b2dfa..316dd0b0 100644 --- a/rcdom/examples/html2html.rs +++ b/rcdom/examples/html2html.rs @@ -43,10 +43,7 @@ fn main() { // The validator.nu HTML2HTML always prints a doctype at the very beginning. io::stdout() .write_all(b"\n") - .ok() .expect("writing DOCTYPE failed"); let document: SerializableHandle = dom.document.clone().into(); - serialize(&mut io::stdout(), &document, Default::default()) - .ok() - .expect("serialization failed"); + serialize(&mut io::stdout(), &document, Default::default()).expect("serialization failed"); } diff --git a/rcdom/examples/print-rcdom.rs b/rcdom/examples/print-rcdom.rs index c55d2348..47ea9b1a 100644 --- a/rcdom/examples/print-rcdom.rs +++ b/rcdom/examples/print-rcdom.rs @@ -21,7 +21,9 @@ use rcdom::{Handle, NodeData, RcDom}; fn walk(indent: usize, handle: &Handle) { let node = handle; - for _ in 0..indent { print!(" "); } + for _ in 0..indent { + print!(" "); + } match node.data { NodeData::Document => println!("#Document"), diff --git a/rcdom/examples/xml_tree_printer.rs b/rcdom/examples/xml_tree_printer.rs index e61e6ef2..b55289cf 100644 --- a/rcdom/examples/xml_tree_printer.rs +++ b/rcdom/examples/xml_tree_printer.rs @@ -44,10 +44,7 @@ fn walk(prefix: &str, handle: &Handle) { .children .borrow() .iter() - .filter(|child| match child.data { - NodeData::Text { .. } | NodeData::Element { .. } => true, - _ => false, - }) + .filter(|child| matches!(child.data, NodeData::Text { .. } | NodeData::Element { .. })) { walk(&new_indent, child); } diff --git a/rcdom/lib.rs b/rcdom/lib.rs index c53869a7..0018c879 100644 --- a/rcdom/lib.rs +++ b/rcdom/lib.rs @@ -126,9 +126,9 @@ impl Node { impl Drop for Node { fn drop(&mut self) { - let mut nodes = mem::replace(&mut *self.children.borrow_mut(), vec![]); + let mut nodes = mem::take(&mut *self.children.borrow_mut()); while let Some(node) = nodes.pop() { - let children = mem::replace(&mut *node.children.borrow_mut(), vec![]); + let children = mem::take(&mut *node.children.borrow_mut()); nodes.extend(children.into_iter()); if let NodeData::Element { ref template_contents, @@ -294,16 +294,12 @@ impl TreeSink for RcDom { fn append(&mut self, parent: &Handle, child: NodeOrText) { // Append to an existing Text node if we have one. - match child { - NodeOrText::AppendText(ref text) => match parent.children.borrow().last() { - Some(h) => { - if append_to_existing_text(h, text) { - return; - } - }, - _ => (), - }, - _ => (), + if let NodeOrText::AppendText(text) = &child { + if let Some(h) = parent.children.borrow().last() { + if append_to_existing_text(h, text) { + return; + } + } } append( @@ -417,7 +413,7 @@ impl TreeSink for RcDom { &previous_parent.unwrap().upgrade().expect("dangling weak") )) } - new_children.extend(mem::replace(&mut *children, Vec::new())); + new_children.extend(mem::take(&mut *children)); } fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool { diff --git a/rcdom/tests/html-driver.rs b/rcdom/tests/html-driver.rs index 04648723..9510c312 100644 --- a/rcdom/tests/html-driver.rs +++ b/rcdom/tests/html-driver.rs @@ -12,7 +12,7 @@ fn from_utf8() { let document: SerializableHandle = dom.document.clone().into(); serialize::serialize(&mut serialized, &document, Default::default()).unwrap(); assert_eq!( - String::from_utf8(serialized).unwrap().replace(" ", ""), + String::from_utf8(serialized).unwrap().replace(' ', ""), "Test" ); } diff --git a/rcdom/tests/html-serializer.rs b/rcdom/tests/html-serializer.rs index d599cbb8..e4e6562b 100644 --- a/rcdom/tests/html-serializer.rs +++ b/rcdom/tests/html-serializer.rs @@ -34,9 +34,9 @@ impl Serialize for Tokens { S: Serializer, { for t in self.0.iter() { - match t { + match &t { // TODO: check whether this is an IE conditional comment or a spec comment - &Token::TagToken(ref tag) => { + Token::TagToken(tag) => { let name = QualName::new( None, "http://www.w3.org/1999/xhtml".into(), @@ -50,14 +50,15 @@ impl Serialize for Tokens { TagKind::EndTag => serializer.end_elem(name)?, } }, - &Token::DoctypeToken(ref dt) => match dt.name { - Some(ref name) => serializer.write_doctype(&name)?, - None => {}, + Token::DoctypeToken(dt) => { + if let Some(name) = &dt.name { + serializer.write_doctype(name)? + } }, - &Token::CommentToken(ref chars) => serializer.write_comment(&chars)?, - &Token::CharacterTokens(ref chars) => serializer.write_text(&chars)?, - &Token::NullCharacterToken | &Token::EOFToken => {}, - &Token::ParseError(ref e) => println!("parse error: {:#?}", e), + Token::CommentToken(chars) => serializer.write_comment(chars)?, + Token::CharacterTokens(chars) => serializer.write_text(chars)?, + Token::NullCharacterToken | &Token::EOFToken => {}, + Token::ParseError(e) => println!("parse error: {e:#?}"), } } Ok(()) @@ -66,8 +67,8 @@ impl Serialize for Tokens { fn tokenize_and_serialize(input: StrTendril) -> StrTendril { let mut input = { - let mut q = ::html5ever::tokenizer::BufferQueue::new(); - q.push_front(input.into()); + let mut q = ::html5ever::tokenizer::BufferQueue::default(); + q.push_front(input); q }; let mut tokenizer = Tokenizer::new(Tokens(vec![]), Default::default()); @@ -251,7 +252,7 @@ fn deep_tree() { QualName::new(None, ns!(html), local_name!("div")), vec![], ); - let src = String::from("".repeat(60_000)); + let src = "".repeat(60_000); let dom = parser.one(src); let opts = SerializeOpts::default(); let mut ret_val = Vec::new(); diff --git a/rcdom/tests/html-tokenizer.rs b/rcdom/tests/html-tokenizer.rs index 9fa68e37..f67caf8f 100644 --- a/rcdom/tests/html-tokenizer.rs +++ b/rcdom/tests/html-tokenizer.rs @@ -11,7 +11,9 @@ mod foreach_html5lib_test; use foreach_html5lib_test::foreach_html5lib_test; use html5ever::tendril::*; -use html5ever::tokenizer::states::{Plaintext, RawData, Rawtext, Rcdata, ScriptData, CdataSection, Data}; +use html5ever::tokenizer::states::{ + CdataSection, Data, Plaintext, RawData, Rawtext, Rcdata, ScriptData, +}; use html5ever::tokenizer::BufferQueue; use html5ever::tokenizer::{CharacterTokens, EOFToken, NullCharacterToken, ParseError}; use html5ever::tokenizer::{CommentToken, DoctypeToken, TagToken, Token}; @@ -20,17 +22,14 @@ use html5ever::tokenizer::{TokenSink, TokenSinkResult, Tokenizer, TokenizerOpts} use html5ever::{namespace_url, ns, Attribute, LocalName, QualName}; use rustc_test::{DynTestFn, DynTestName, TestDesc, TestDescAndFn}; use serde_json::{Map, Value}; -use std::borrow::Cow; use std::ffi::OsStr; -use std::io::Read; use std::fs::File; -use std::mem::replace; +use std::io::Read; use std::path::Path; -use std::{char, env}; - +use std::{char, env, mem}; #[derive(Debug)] -struct TestError(Cow<'static, str>); +struct TestError; impl PartialEq for TestError { fn eq(&self, _: &TestError) -> bool { @@ -58,7 +57,7 @@ fn splits(s: &str, n: usize) -> Vec> { } } - out.extend(splits(s, n - 1).into_iter()); + out.extend(splits(s, n - 1)); out.truncate(MAX_SPLITS); out } @@ -76,7 +75,7 @@ impl TokenLogger { tokens: vec![], errors: vec![], current_str: StrTendril::new(), - exact_errors: exact_errors, + exact_errors, } } @@ -88,12 +87,12 @@ impl TokenLogger { fn finish_str(&mut self) { if self.current_str.len() > 0 { - let s = replace(&mut self.current_str, StrTendril::new()); + let s = mem::take(&mut self.current_str); self.tokens.push(CharacterTokens(s)); } } - fn get_tokens(mut self) -> (Vec, Vec){ + fn get_tokens(mut self) -> (Vec, Vec) { self.finish_str(); (self.tokens, self.errors) } @@ -112,9 +111,9 @@ impl TokenSink for TokenLogger { self.current_str.push_char('\0'); }, - ParseError(e) => { + ParseError(_) => { if self.exact_errors { - self.errors.push(TestError(e)); + self.errors.push(TestError); } }, @@ -143,7 +142,7 @@ impl TokenSink for TokenLogger { fn tokenize(input: Vec, opts: TokenizerOpts) -> (Vec, Vec) { let sink = TokenLogger::new(opts.exact_errors); let mut tok = Tokenizer::new(sink, opts); - let mut buffer = BufferQueue::new(); + let mut buffer = BufferQueue::default(); for chunk in input.into_iter() { buffer.push_back(chunk); let _ = tok.feed(&mut buffer); @@ -158,9 +157,9 @@ trait JsonExt: Sized { fn get_tendril(&self) -> StrTendril; fn get_nullable_tendril(&self) -> Option; fn get_bool(&self) -> bool; - fn get_obj<'t>(&'t self) -> &'t Map; - fn get_list<'t>(&'t self) -> &'t Vec; - fn find<'t>(&'t self, key: &str) -> &'t Self; + fn get_obj(&self) -> &Map; + fn get_list(&self) -> &Vec; + fn find(&self, key: &str) -> &Self; } impl JsonExt for Value { @@ -193,22 +192,22 @@ impl JsonExt for Value { } } - fn get_obj<'t>(&'t self) -> &'t Map { - match *self { - Value::Object(ref m) => &*m, + fn get_obj(&self) -> &Map { + match self { + Value::Object(m) => m, _ => panic!("Value::get_obj: not an Object"), } } - fn get_list<'t>(&'t self) -> &'t Vec { - match *self { - Value::Array(ref m) => m, + fn get_list(&self) -> &Vec { + match self { + Value::Array(m) => m, _ => panic!("Value::get_list: not an Array"), } } - fn find<'t>(&'t self, key: &str) -> &'t Value { - self.get_obj().get(&key.to_string()).unwrap() + fn find(&self, key: &str) -> &Value { + self.get_obj().get(key).unwrap() } } @@ -260,7 +259,11 @@ fn json_to_token(js: &Value) -> Token { } // Parse the "output" field of the test case into a vector of tokens. -fn json_to_tokens(js_tokens: &Value, js_errors: &[Value], exact_errors: bool) -> (Vec, Vec) { +fn json_to_tokens( + js_tokens: &Value, + js_errors: &[Value], + exact_errors: bool, +) -> (Vec, Vec) { // Use a TokenLogger so that we combine character tokens separated // by an ignored error. let mut sink = TokenLogger::new(exact_errors); @@ -309,12 +312,12 @@ fn unescape(s: &str) -> Option { } fn unescape_json(js: &Value) -> Value { - match *js { + match js { // unwrap is OK here because the spec'd *output* of the tokenizer never // contains a lone surrogate. - Value::String(ref s) => Value::String(unescape(&s).unwrap()), - Value::Array(ref xs) => Value::Array(xs.iter().map(unescape_json).collect()), - Value::Object(ref obj) => { + Value::String(s) => Value::String(unescape(s).unwrap()), + Value::Array(xs) => Value::Array(xs.iter().map(unescape_json).collect()), + Value::Object(obj) => { let mut new_obj = Map::new(); for (k, v) in obj.iter() { new_obj.insert(k.clone(), unescape_json(v)); @@ -325,7 +328,13 @@ fn unescape_json(js: &Value) -> Value { } } -fn mk_test(desc: String, input: String, expect: Value, expect_errors: Vec, opts: TokenizerOpts) -> TestDescAndFn { +fn mk_test( + desc: String, + input: String, + expect: Value, + expect_errors: Vec, + opts: TokenizerOpts, +) -> TestDescAndFn { TestDescAndFn { desc: TestDesc::new(DynTestName(desc)), testfn: DynTestFn(Box::new(move || { @@ -353,7 +362,11 @@ fn mk_tests(tests: &mut Vec, filename: &str, js: &Value) { let obj = js.get_obj(); let mut input = js.find("input").get_str(); let mut expect = js.find("output").clone(); - let expect_errors = js.get("errors").map(JsonExt::get_list).map(Vec::as_slice).unwrap_or_default(); + let expect_errors = js + .get("errors") + .map(JsonExt::get_list) + .map(Vec::as_slice) + .unwrap_or_default(); let desc = format!("tok: {}: {}", filename, js.find("description").get_str()); // "Double-escaped" tests require additional processing of @@ -374,7 +387,7 @@ fn mk_tests(tests: &mut Vec, filename: &str, js: &Value) { // Some tests want to start in a state other than Data. let state_overrides = match obj.get(&"initialStates".to_string()) { - Some(&Value::Array(ref xs)) => xs + Some(Value::Array(xs)) => xs .iter() .map(|s| { Some(match &s.get_str()[..] { @@ -396,9 +409,8 @@ fn mk_tests(tests: &mut Vec, filename: &str, js: &Value) { for state in state_overrides.into_iter() { for &exact_errors in [false, true].iter() { let mut newdesc = desc.clone(); - match state { - Some(s) => newdesc = format!("{} (in state {:?})", newdesc, s), - None => (), + if let Some(s) = state { + newdesc = format!("{} (in state {:?})", newdesc, s) }; if exact_errors { newdesc = format!("{} (exact errors)", newdesc); @@ -410,7 +422,7 @@ fn mk_tests(tests: &mut Vec, filename: &str, js: &Value) { expect.clone(), expect_errors.to_owned(), TokenizerOpts { - exact_errors: exact_errors, + exact_errors, initial_state: state, last_start_tag_name: start_tag.clone(), @@ -430,24 +442,17 @@ fn tests(src_dir: &Path) -> Vec { let mut add_test = |path: &Path, mut file: File| { let mut s = String::new(); - file.read_to_string(&mut s) - .ok() - .expect("file reading error"); - let js: Value = serde_json::from_str(&s).ok().expect("json parse error"); - - match js.get_obj().get(&"tests".to_string()) { - Some(&Value::Array(ref lst)) => { - for test in lst.iter() { - mk_tests( - &mut tests, - path.file_name().unwrap().to_str().unwrap(), - test, - ) - } - }, - - // xmlViolation.test doesn't follow this format. - _ => (), + file.read_to_string(&mut s).expect("file reading error"); + let js: Value = serde_json::from_str(&s).expect("json parse error"); + + if let Some(Value::Array(lst)) = js.get_obj().get("tests") { + for test in lst.iter() { + mk_tests( + &mut tests, + path.file_name().unwrap().to_str().unwrap(), + test, + ) + } } }; @@ -455,14 +460,14 @@ fn tests(src_dir: &Path) -> Vec { src_dir, "html5lib-tests/tokenizer", OsStr::new("test"), - &mut add_test + &mut add_test, ); foreach_html5lib_test( src_dir, "custom-html5lib-tokenizer-tests", OsStr::new("test"), - &mut add_test + &mut add_test, ); tests diff --git a/rcdom/tests/html-tree-builder.rs b/rcdom/tests/html-tree-builder.rs index e17ea695..d22207d3 100644 --- a/rcdom/tests/html-tree-builder.rs +++ b/rcdom/tests/html-tree-builder.rs @@ -18,10 +18,8 @@ use foreach_html5lib_test::foreach_html5lib_test; use std::collections::{HashMap, HashSet}; use std::ffi::OsStr; use std::io::BufRead; -use std::iter::repeat; -use std::mem::replace; use std::path::Path; -use std::{env, fs, io}; +use std::{env, fs, io, iter, mem}; use test::{DynTestName, TestDesc, TestDescAndFn, TestFn}; use html5ever::tendril::{StrTendril, TendrilSink}; @@ -39,14 +37,14 @@ fn parse_tests>(mut lines: It) -> Vec (), Some(key) => { - assert!(test.insert(key, replace(&mut val, String::new())).is_none()); + assert!(test.insert(key, mem::take(&mut val)).is_none()); } } )); macro_rules! finish_test ( () => ( if !test.is_empty() { - tests.push(replace(&mut test, HashMap::new())); + tests.push(mem::take(&mut test)); } )); @@ -54,12 +52,12 @@ fn parse_tests>(mut lines: It) -> Vec break, Some(line) => { - if line.starts_with("#") { + if let Some(rest) = line.strip_prefix('#') { finish_val!(); if line == "#data" { finish_test!(); } - key = Some(line[1..].to_string()); + key = Some(rest.to_owned()); } else { val.push_str(&line); val.push('\n'); @@ -74,8 +72,8 @@ fn parse_tests>(mut lines: It) -> Vec()); + buf.push('|'); + buf.extend(iter::repeat(" ").take(indent)); let node = handle; match node.data { @@ -87,7 +85,7 @@ fn serialize(buf: &mut String, indent: usize, handle: Handle) { ref system_id, } => { buf.push_str(" { - buf.push_str("\""); + buf.push('"'); buf.push_str(&contents.borrow()); buf.push_str("\"\n"); }, NodeData::Comment { ref contents } => { buf.push_str("\n"); }, @@ -111,13 +109,13 @@ fn serialize(buf: &mut String, indent: usize, handle: Handle) { ref attrs, .. } => { - buf.push_str("<"); + buf.push('<'); match name.ns { ns!(svg) => buf.push_str("svg "), ns!(mathml) => buf.push_str("math "), _ => (), } - buf.push_str(&*name.local); + buf.push_str(&name.local); buf.push_str(">\n"); let mut attrs = attrs.borrow().clone(); @@ -125,8 +123,8 @@ fn serialize(buf: &mut String, indent: usize, handle: Handle) { // FIXME: sort by UTF-16 code unit for attr in attrs.into_iter() { - buf.push_str("|"); - buf.push_str(&repeat(" ").take(indent + 2).collect::()); + buf.push('|'); + buf.extend(iter::repeat(" ").take(indent + 2)); match attr.name.ns { ns!(xlink) => buf.push_str("xlink "), ns!(xml) => buf.push_str("xml "), @@ -150,8 +148,8 @@ fn serialize(buf: &mut String, indent: usize, handle: Handle) { } = node.data { if let Some(ref content) = &*template_contents.borrow() { - buf.push_str("|"); - buf.push_str(&repeat(" ").take(indent + 2).collect::()); + buf.push('|'); + buf.extend(iter::repeat(" ").take(indent + 2)); buf.push_str("content\n"); for child in content.children.borrow().iter() { serialize(buf, indent + 4, child.clone()); @@ -211,7 +209,7 @@ fn make_test_desc_with_scripting_flag( TestDescAndFn { desc: TestDesc { - ignore: ignore, + ignore, ..TestDesc::new(DynTestName(name)) }, testfn: TestFn::dyn_test_fn(move || { @@ -251,10 +249,10 @@ fn make_test_desc_with_scripting_flag( } fn context_name(context: &str) -> QualName { - if context.starts_with("svg ") { - QualName::new(None, ns!(svg), LocalName::from(&context[4..])) - } else if context.starts_with("math ") { - QualName::new(None, ns!(mathml), LocalName::from(&context[5..])) + if let Some(cx) = context.strip_prefix("svg ") { + QualName::new(None, ns!(svg), LocalName::from(cx)) + } else if let Some(cx) = context.strip_prefix("math ") { + QualName::new(None, ns!(mathml), LocalName::from(cx)) } else { QualName::new(None, ns!(html), LocalName::from(context)) } @@ -269,7 +267,7 @@ fn tests(src_dir: &Path, ignores: &HashSet) -> Vec { OsStr::new("dat"), |path, file| { let buf = io::BufReader::new(file); - let lines = buf.lines().map(|res| res.ok().expect("couldn't read")); + let lines = buf.lines().map(|res| res.expect("couldn't read")); let data = parse_tests(lines); for (i, test) in data.into_iter().enumerate() { @@ -292,7 +290,7 @@ fn main() { let src_dir = Path::new(env!("CARGO_MANIFEST_DIR")); let mut ignores = HashSet::new(); { - let f = fs::File::open(&src_dir.join("data/test/ignore")).unwrap(); + let f = fs::File::open(src_dir.join("data/test/ignore")).unwrap(); let r = io::BufReader::new(f); for ln in r.lines() { ignores.insert(ln.unwrap().trim_end().to_string()); diff --git a/rcdom/tests/xml-tokenizer.rs b/rcdom/tests/xml-tokenizer.rs index 79f8fc77..cbdf10c3 100644 --- a/rcdom/tests/xml-tokenizer.rs +++ b/rcdom/tests/xml-tokenizer.rs @@ -9,11 +9,10 @@ use serde_json::{Map, Value}; use std::borrow::Cow::Borrowed; -use std::env; use std::ffi::OsStr; use std::io::Read; -use std::mem::replace; use std::path::Path; +use std::{env, mem}; use rustc_test::{DynTestFn, DynTestName, TestDesc, TestDescAndFn}; use util::find_tests::foreach_xml5lib_test; @@ -51,7 +50,7 @@ fn splits(s: &str, n: usize) -> Vec> { } } - out.extend(splits(s, n - 1).into_iter()); + out.extend(splits(s, n - 1)); out } @@ -66,7 +65,7 @@ impl TokenLogger { TokenLogger { tokens: vec![], current_str: StrTendril::new(), - exact_errors: exact_errors, + exact_errors, } } @@ -78,7 +77,7 @@ impl TokenLogger { fn finish_str(&mut self) { if self.current_str.len() > 0 { - let s = replace(&mut self.current_str, StrTendril::new()); + let s = mem::take(&mut self.current_str); self.tokens.push(CharacterTokens(s)); } } @@ -129,13 +128,13 @@ impl TokenSink for TokenLogger { fn tokenize_xml(input: Vec, opts: XmlTokenizerOpts) -> Vec { let sink = TokenLogger::new(opts.exact_errors); let mut tok = XmlTokenizer::new(sink, opts); - let mut buf = BufferQueue::new(); + let mut buf = BufferQueue::default(); for chunk in input.into_iter() { buf.push_back(chunk); - let _ = tok.feed(&mut buf); + tok.feed(&mut buf); } - let _ = tok.feed(&mut buf); + tok.feed(&mut buf); tok.end(); tok.sink.get_tokens() } @@ -145,9 +144,9 @@ trait JsonExt: Sized { fn get_tendril(&self) -> StrTendril; fn get_nullable_tendril(&self) -> Option; fn get_bool(&self) -> bool; - fn get_obj<'t>(&'t self) -> &'t Map; - fn get_list<'t>(&'t self) -> &'t Vec; - fn find<'t>(&'t self, key: &str) -> &'t Self; + fn get_obj(&self) -> &Map; + fn get_list(&self) -> &Vec; + fn find(&self, key: &str) -> &Self; } impl JsonExt for Value { @@ -180,21 +179,21 @@ impl JsonExt for Value { } } - fn get_obj<'t>(&'t self) -> &'t Map { - match *self { - Value::Object(ref m) => &*m, + fn get_obj(&self) -> &Map { + match self { + Value::Object(m) => m, _ => panic!("Value::get_obj: not an Object"), } } - fn get_list<'t>(&'t self) -> &'t Vec { - match *self { - Value::Array(ref m) => m, + fn get_list(&self) -> &Vec { + match self { + Value::Array(m) => m, _ => panic!("Value::get_list: not an Array"), } } - fn find<'t>(&'t self, key: &str) -> &'t Value { + fn find(&self, key: &str) -> &Value { self.get_obj().get(&key.to_string()).unwrap() } } @@ -296,7 +295,7 @@ fn mk_xml_test( // Also clone opts. If we don't, we get the wrong // result but the compiler doesn't catch it! // Possibly mozilla/rust#12223. - let output = tokenize_xml(input.clone(), opts.clone()); + let output = tokenize_xml(input.clone(), opts); let expect = json_to_tokens(&expect, opts.exact_errors); if output != expect { panic!( @@ -321,9 +320,8 @@ fn mk_xml_tests(tests: &mut Vec, filename: &str, js: &Value) { for state in state_overrides.into_iter() { for &exact_errors in [false, true].iter() { let mut newdesc = desc.clone(); - match state { - Some(s) => newdesc = format!("{} (in state {:?})", newdesc, s), - None => (), + if let Some(s) = state { + newdesc = format!("{} (in state {:?})", newdesc, s) }; if exact_errors { newdesc = format!("{} (exact errors)", newdesc); @@ -334,7 +332,7 @@ fn mk_xml_tests(tests: &mut Vec, filename: &str, js: &Value) { String::from(input), expect.clone(), XmlTokenizerOpts { - exact_errors: exact_errors, + exact_errors, initial_state: state, // Not discarding a BOM is what the test suite expects; see @@ -356,23 +354,17 @@ fn tests(src_dir: &Path) -> Vec { OsStr::new("test"), |path, mut file| { let mut s = String::new(); - file.read_to_string(&mut s) - .ok() - .expect("file reading error"); - let js: Value = serde_json::from_str(&s).ok().expect("json parse error"); - - match js["tests"] { - Value::Array(ref lst) => { - for test in lst.iter() { - mk_xml_tests( - &mut tests, - path.file_name().unwrap().to_str().unwrap(), - test, - ); - } - }, - - _ => (), + file.read_to_string(&mut s).expect("file reading error"); + let js: Value = serde_json::from_str(&s).expect("json parse error"); + + if let Value::Array(ref lst) = js["tests"] { + for test in lst.iter() { + mk_xml_tests( + &mut tests, + path.file_name().unwrap().to_str().unwrap(), + test, + ); + } } }, ); diff --git a/rcdom/tests/xml-tree-builder.rs b/rcdom/tests/xml-tree-builder.rs index 03c558a4..98365c75 100644 --- a/rcdom/tests/xml-tree-builder.rs +++ b/rcdom/tests/xml-tree-builder.rs @@ -13,10 +13,8 @@ use rustc_test::{DynTestFn, DynTestName, TestDesc, TestDescAndFn}; use std::collections::{HashMap, HashSet}; use std::ffi::OsStr; use std::io::BufRead; -use std::iter::repeat; -use std::mem::replace; use std::path::Path; -use std::{env, fs, io}; +use std::{env, fs, io, iter, mem}; use util::find_tests::foreach_xml5lib_test; use xml5ever::driver::parse_document; use xml5ever::tendril::TendrilSink; @@ -35,14 +33,14 @@ fn parse_tests>(mut lines: It) -> Vec (), Some(key) => { - assert!(test.insert(key, replace(&mut val, String::new())).is_none()); + assert!(test.insert(key, mem::take(&mut val)).is_none()); } } )); macro_rules! finish_test ( () => ( if !test.is_empty() { - tests.push(replace(&mut test, HashMap::new())); + tests.push(mem::take(&mut test)); } )); @@ -50,12 +48,12 @@ fn parse_tests>(mut lines: It) -> Vec break, Some(line) => { - if line.starts_with("#") { + if let Some(rest) = line.strip_prefix('#') { finish_val!(); if line == "#data" { finish_test!(); } - key = Some(line[1..].to_string()); + key = Some(rest.to_string()); } else { val.push_str(&line); val.push('\n'); @@ -70,68 +68,61 @@ fn parse_tests>(mut lines: It) -> Vec()); + buf.push('|'); + buf.extend(iter::repeat(" ").take(indent)); let node = handle; - match node.data { + match &node.data { NodeData::Document => panic!("should not reach Document"), NodeData::Doctype { - ref name, - ref public_id, - ref system_id, + name, + public_id, + system_id, } => { buf.push_str("\n"); }, - NodeData::Text { ref contents } => { - buf.push_str("\""); + NodeData::Text { contents } => { + buf.push('"'); buf.push_str(&contents.borrow()); buf.push_str("\"\n"); }, - NodeData::ProcessingInstruction { - ref target, - ref contents, - } => { + NodeData::ProcessingInstruction { target, contents } => { buf.push_str("\n"); }, - NodeData::Comment { ref contents } => { + NodeData::Comment { contents } => { buf.push_str("\n"); }, - NodeData::Element { - ref name, - ref attrs, - .. - } => { - buf.push_str("<"); + NodeData::Element { name, attrs, .. } => { + buf.push('<'); if name.ns != ns!() { - buf.push_str("{"); - buf.push_str(&*name.ns); - buf.push_str("}"); + buf.push('{'); + buf.push_str(&name.ns); + buf.push('}'); }; - if let Some(ref prefix) = name.prefix { - buf.push_str(&*prefix); - buf.push_str(":"); + if let Some(prefix) = &name.prefix { + buf.push_str(prefix); + buf.push(':'); } - buf.push_str(&*name.local); + buf.push_str(&name.local); buf.push_str(">\n"); let mut attrs = attrs.borrow().clone(); @@ -139,18 +130,18 @@ fn serialize(buf: &mut String, indent: usize, handle: Handle) { // FIXME: sort by UTF-16 code unit for attr in attrs.into_iter() { - buf.push_str("|"); - buf.push_str(&repeat(" ").take(indent + 2).collect::()); + buf.push('|'); + buf.extend(iter::repeat(" ").take(indent + 2)); - if &*attr.name.ns != "" { - buf.push_str("{"); - buf.push_str(&*attr.name.ns); - buf.push_str("}"); + if !attr.name.ns.is_empty() { + buf.push('{'); + buf.push_str(&attr.name.ns); + buf.push('}'); } if let Some(attr_prefix) = attr.name.prefix { - buf.push_str(&*attr_prefix); - buf.push_str(":"); + buf.push_str(&attr_prefix); + buf.push(':'); } buf.push_str(&format!("{}=\"{}\"\n", attr.name.local, attr.value)); @@ -164,7 +155,7 @@ fn serialize(buf: &mut String, indent: usize, handle: Handle) { } // Ignore tests containing these strings; we don't support these features yet. -static IGNORE_SUBSTRS: &'static [&'static str] = &[", @@ -185,7 +176,7 @@ fn make_xml_test( tests.push(TestDescAndFn { desc: TestDesc { - ignore: ignore, + ignore, ..TestDesc::new(DynTestName(name)) }, testfn: DynTestFn(Box::new(move || { @@ -218,7 +209,7 @@ fn tests(src_dir: &Path, ignores: &HashSet) -> Vec { OsStr::new("dat"), |path, file| { let buf = io::BufReader::new(file); - let lines = buf.lines().map(|res| res.ok().expect("couldn't read")); + let lines = buf.lines().map(|res| res.expect("couldn't read")); let data = parse_tests(lines); for (i, test) in data.into_iter().enumerate() { @@ -240,7 +231,7 @@ fn main() { let args: Vec<_> = env::args().collect(); let src_dir = Path::new(env!("CARGO_MANIFEST_DIR")); let mut ignores = HashSet::new(); - if let Ok(f) = fs::File::open(&src_dir.join("data/test/ignore")) { + if let Ok(f) = fs::File::open(src_dir.join("data/test/ignore")) { let r = io::BufReader::new(f); for ln in r.lines() { ignores.insert(ln.unwrap().trim_end().to_string()); diff --git a/rustfmt.toml b/rustfmt.toml index de839bae..ecd1b146 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,3 +1,2 @@ match_block_trailing_comma = true -binop_separator = "Back" reorder_imports = true diff --git a/xml5ever/benches/xml5ever.rs b/xml5ever/benches/xml5ever.rs index a2dc33b0..288613c3 100644 --- a/xml5ever/benches/xml5ever.rs +++ b/xml5ever/benches/xml5ever.rs @@ -26,12 +26,11 @@ fn run_bench(c: &mut Criterion, name: &str) { let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); path.push("data/bench/"); path.push(name); - let mut file = fs::File::open(&path).ok().expect("can't open file"); + let mut file = fs::File::open(&path).expect("can't open file"); // Read the file and treat it as an infinitely repeating sequence of characters. let mut file_input = ByteTendril::new(); file.read_to_tendril(&mut file_input) - .ok() .expect("can't read file"); let file_input: StrTendril = file_input.try_reinterpret().unwrap(); let size = file_input.len(); @@ -54,14 +53,14 @@ fn run_bench(c: &mut Criterion, name: &str) { c.bench_function(&test_name, move |b| { b.iter(|| { let mut tok = XmlTokenizer::new(Sink, Default::default()); - let mut buffer = BufferQueue::new(); + let mut buffer = BufferQueue::default(); // We are doing clone inside the bench function, this is not ideal, but possibly // necessary since our iterator consumes the underlying buffer. for buf in input.clone().into_iter() { buffer.push_back(buf); - let _ = tok.feed(&mut buffer); + tok.feed(&mut buffer); } - let _ = tok.feed(&mut buffer); + tok.feed(&mut buffer); tok.end(); }) }); diff --git a/xml5ever/examples/simple_xml_tokenizer.rs b/xml5ever/examples/simple_xml_tokenizer.rs index 35dc8a38..66651b68 100644 --- a/xml5ever/examples/simple_xml_tokenizer.rs +++ b/xml5ever/examples/simple_xml_tokenizer.rs @@ -40,10 +40,10 @@ impl TokenSink for SimpleTokenPrinter { ref target, ref data, }) => { - println!("PI : ", &*target, &*data); + println!("PI : ", target, data); }, CommentToken(ref comment) => { - println!("", &*comment); + println!("", comment); }, EOFToken => { println!("EOF"); @@ -53,7 +53,7 @@ impl TokenSink for SimpleTokenPrinter { ref public_id, .. }) => { - println!("", &*name, &*public_id); + println!("", name, public_id); }, } } @@ -72,7 +72,7 @@ fn main() { // into StrTendril. // Load input into BufferQueue - let mut input_buffer = BufferQueue::new(); + let mut input_buffer = BufferQueue::default(); input_buffer.push_back(input.try_reinterpret().unwrap()); // Here we create and run tokenizer let mut tok = XmlTokenizer::new(sink, Default::default()); diff --git a/xml5ever/examples/xml_tokenizer.rs b/xml5ever/examples/xml_tokenizer.rs index 46d683e3..ff648597 100644 --- a/xml5ever/examples/xml_tokenizer.rs +++ b/xml5ever/examples/xml_tokenizer.rs @@ -91,7 +91,7 @@ fn main() { let mut sink = TokenPrinter { in_char_run: false }; let mut input = ByteTendril::new(); io::stdin().read_to_tendril(&mut input).unwrap(); - let mut input_buffer = BufferQueue::new(); + let mut input_buffer = BufferQueue::default(); input_buffer.push_back(input.try_reinterpret().unwrap()); let mut tok = XmlTokenizer::new( diff --git a/xml5ever/src/driver.rs b/xml5ever/src/driver.rs index df5b9ee4..fd853433 100644 --- a/xml5ever/src/driver.rs +++ b/xml5ever/src/driver.rs @@ -42,7 +42,7 @@ where let tok = XmlTokenizer::new(tb, opts.tokenizer); XmlParser { tokenizer: tok, - input_buffer: BufferQueue::new(), + input_buffer: BufferQueue::default(), } } diff --git a/xml5ever/src/serialize/mod.rs b/xml5ever/src/serialize/mod.rs index 95e16a0d..cbc4cbd2 100644 --- a/xml5ever/src/serialize/mod.rs +++ b/xml5ever/src/serialize/mod.rs @@ -91,11 +91,9 @@ fn write_qual_name(writer: &mut W, name: &QualName) -> io::Result<()> if let Some(ref prefix) = name.prefix { writer.write_all(prefix.as_bytes())?; writer.write_all(b":")?; - writer.write_all(&*name.local.as_bytes())?; - } else { - writer.write_all(&*name.local.as_bytes())?; } + writer.write_all(name.local.as_bytes())?; Ok(()) } @@ -123,7 +121,7 @@ impl XmlSerializer { fn find_uri(&self, name: &QualName) -> bool { let mut found = false; for stack in self.namespace_stack.0.iter().rev() { - if let Some(&Some(ref el)) = stack.get(&name.prefix) { + if let Some(Some(el)) = stack.get(&name.prefix) { found = *el == name.ns; break; } @@ -132,11 +130,9 @@ impl XmlSerializer { } fn find_or_insert_ns(&mut self, name: &QualName) { - if name.prefix.is_some() || &*name.ns != "" { - if !self.find_uri(name) { - if let Some(last_ns) = self.namespace_stack.0.last_mut() { - last_ns.insert(name); - } + if (name.prefix.is_some() || !name.ns.is_empty()) && !self.find_uri(name) { + if let Some(last_ns) = self.namespace_stack.0.last_mut() { + last_ns.insert(name); } } } diff --git a/xml5ever/src/tokenizer/char_ref/mod.rs b/xml5ever/src/tokenizer/char_ref/mod.rs index 6351087e..c9171908 100644 --- a/xml5ever/src/tokenizer/char_ref/mod.rs +++ b/xml5ever/src/tokenizer/char_ref/mod.rs @@ -227,9 +227,8 @@ impl CharRefTokenizer { input: &mut BufferQueue, ) -> Status { let mut unconsume = StrTendril::from_char('#'); - match self.hex_marker { - Some(c) => unconsume.push_char(c), - None => (), + if let Some(c) = self.hex_marker { + unconsume.push_char(c); } tokenizer.unconsume(input, unconsume); diff --git a/xml5ever/src/tokenizer/interface.rs b/xml5ever/src/tokenizer/interface.rs index 3dbf07ea..c2dad9be 100644 --- a/xml5ever/src/tokenizer/interface.rs +++ b/xml5ever/src/tokenizer/interface.rs @@ -64,7 +64,7 @@ impl Tag { /// Doctype token in XML5 is rather limited for reasons, such as: /// security and simplicity. XML5 only supports declaring DTD with /// name, public identifier and system identifier -#[derive(PartialEq, Eq, Clone, Debug)] +#[derive(PartialEq, Eq, Clone, Debug, Default)] pub struct Doctype { /// Name of DOCTYPE declared pub name: Option, @@ -74,17 +74,6 @@ pub struct Doctype { pub system_id: Option, } -impl Doctype { - /// Constructs an empty DOCTYPE, with all fields set to None. - pub fn new() -> Doctype { - Doctype { - name: None, - public_id: None, - system_id: None, - } - } -} - /// A ProcessingInstruction token. #[derive(PartialEq, Eq, Clone, Debug)] pub struct Pi { diff --git a/xml5ever/src/tokenizer/mod.rs b/xml5ever/src/tokenizer/mod.rs index ee876101..4aad2207 100644 --- a/xml5ever/src/tokenizer/mod.rs +++ b/xml5ever/src/tokenizer/mod.rs @@ -26,7 +26,7 @@ use mac::{format_if, unwrap_or_return}; use markup5ever::{local_name, namespace_prefix, namespace_url, ns, small_char_set}; use std::borrow::Cow::{self, Borrowed}; use std::collections::BTreeMap; -use std::mem::replace; +use std::mem::{self, replace}; use self::buffer_queue::{BufferQueue, FromSet, NotFromSet, SetResult}; use self::char_ref::{CharRef, CharRefTokenizer}; @@ -193,7 +193,7 @@ impl XmlTokenizer { current_comment: StrTendril::new(), current_pi_data: StrTendril::new(), current_pi_target: StrTendril::new(), - current_doctype: Doctype::new(), + current_doctype: Doctype::default(), state_profile: BTreeMap::new(), time_in_sink: 0, } @@ -305,9 +305,7 @@ impl XmlTokenizer { match input.eat(pat, u8::eq_ignore_ascii_case) { None if self.at_eof => Some(false), None => { - while let Some(c) = input.next() { - self.temp_buf.push_char(c); - } + self.temp_buf.extend(input); None }, Some(matched) => Some(matched), @@ -434,7 +432,7 @@ impl XmlTokenizer { let token = TagToken(Tag { kind: self.current_tag_kind, name: qname, - attrs: replace(&mut self.current_tag_attrs, vec![]), + attrs: mem::take(&mut self.current_tag_attrs), }); self.process_token(token); @@ -473,12 +471,12 @@ impl XmlTokenizer { } fn emit_current_comment(&mut self) { - let comment = replace(&mut self.current_comment, StrTendril::new()); + let comment = mem::take(&mut self.current_comment); self.process_token(CommentToken(comment)); } fn emit_current_doctype(&mut self) { - let doctype = replace(&mut self.current_doctype, Doctype::new()); + let doctype = mem::take(&mut self.current_doctype); self.process_token(DoctypeToken(doctype)); } @@ -533,7 +531,7 @@ macro_rules! shorthand ( ( $me:ident : append_comment $c:expr ) => ( $me.current_comment.push_slice($c) ); ( $me:ident : emit_comment ) => ( $me.emit_current_comment() ); ( $me:ident : clear_comment ) => ( $me.current_comment.clear() ); - ( $me:ident : create_doctype ) => ( $me.current_doctype = Doctype::new() ); + ( $me:ident : create_doctype ) => ( $me.current_doctype = Doctype::default() ); ( $me:ident : push_doctype_name $c:expr ) => ( option_push(&mut $me.current_doctype.name, $c) ); ( $me:ident : push_doctype_id $k:ident $c:expr ) => ( option_push($me.doctype_id($k), $c) ); ( $me:ident : clear_doctype_id $k:ident ) => ( $me.clear_doctype_id($k) ); @@ -1070,9 +1068,8 @@ impl XmlTokenizer { }, //§ bogus_doctype_state XmlState::BogusDoctype => loop { - match get_char!(self, input) { - '>' => go!(self: emit_doctype; to Data), - _ => (), + if get_char!(self, input) == '>' { + go!(self: emit_doctype; to Data); } }, } @@ -1082,7 +1079,7 @@ impl XmlTokenizer { pub fn end(&mut self) { // Handle EOF in the char ref sub-tokenizer, if there is one. // Do this first because it might un-consume stuff. - let mut input = BufferQueue::new(); + let mut input = BufferQueue::default(); match self.char_ref_tokenizer.take() { None => (), Some(mut tok) => { diff --git a/xml5ever/src/tree_builder/mod.rs b/xml5ever/src/tree_builder/mod.rs index 4c10f8e2..c452c75a 100644 --- a/xml5ever/src/tree_builder/mod.rs +++ b/xml5ever/src/tree_builder/mod.rs @@ -40,11 +40,7 @@ struct NamespaceMapStack(Vec); impl NamespaceMapStack { fn new() -> NamespaceMapStack { - NamespaceMapStack({ - let mut vec = Vec::new(); - vec.push(NamespaceMap::default()); - vec - }) + NamespaceMapStack(vec![NamespaceMap::default()]) } fn push(&mut self, map: NamespaceMap) { @@ -112,11 +108,7 @@ impl NamespaceMap { #[doc(hidden)] pub fn insert(&mut self, name: &QualName) { - let prefix = if let Some(ref p) = name.prefix { - Some(p.clone()) - } else { - None - }; + let prefix = name.prefix.as_ref().cloned(); let namespace = Some(Namespace::from(&*name.ns)); self.scope.insert(prefix, namespace); } @@ -175,15 +167,9 @@ impl NamespaceMap { } /// Tree builder options, with an impl for Default. -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Default)] pub struct XmlTreeBuilderOpts {} -impl Default for XmlTreeBuilderOpts { - fn default() -> XmlTreeBuilderOpts { - XmlTreeBuilderOpts {} - } -} - /// The XML tree builder. pub struct XmlTreeBuilder { /// Configuration options for XmlTreeBuilder @@ -236,7 +222,7 @@ where namespace_stack: NamespaceMapStack::new(), current_namespace: NamespaceMap::empty(), present_attrs: HashSet::new(), - phase: StartPhase, + phase: Start, } } @@ -413,13 +399,13 @@ where return; }, - tokenizer::DoctypeToken(d) => DoctypeToken(d), - tokenizer::PIToken(x) => PIToken(x), - tokenizer::TagToken(x) => TagToken(x), - tokenizer::CommentToken(x) => CommentToken(x), - tokenizer::NullCharacterToken => NullCharacterToken, - tokenizer::EOFToken => EOFToken, - tokenizer::CharacterTokens(x) => CharacterTokens(x), + tokenizer::DoctypeToken(d) => Doctype(d), + tokenizer::PIToken(x) => Pi(x), + tokenizer::TagToken(x) => Tag(x), + tokenizer::CommentToken(x) => Comment(x), + tokenizer::NullCharacterToken => NullCharacter, + tokenizer::EOFToken => Eof, + tokenizer::CharacterTokens(x) => Characters(x), }; self.process_to_completion(token); @@ -541,7 +527,7 @@ where P: Fn(ExpandedName) -> bool, { loop { - if self.current_node_in(|x| pred(x)) { + if self.current_node_in(&pred) { break; } self.pop(); @@ -617,8 +603,8 @@ where self.debug_step(mode, &token); match mode { - StartPhase => match token { - TagToken(Tag { + Start => match token { + Tag(Tag { kind: StartTag, name, attrs, @@ -632,11 +618,11 @@ where self.process_namespaces(&mut tag); tag }; - self.phase = MainPhase; + self.phase = Main; let handle = self.append_tag_to_doc(tag); self.add_to_open_elems(handle) }, - TagToken(Tag { + Tag(Tag { kind: EmptyTag, name, attrs, @@ -650,20 +636,20 @@ where self.process_namespaces(&mut tag); tag }; - self.phase = EndPhase; + self.phase = End; let handle = self.append_tag_to_doc(tag); self.sink.pop(&handle); Done }, - CommentToken(comment) => self.append_comment_to_doc(comment), - PIToken(pi) => self.append_pi_to_doc(pi), - CharacterTokens(ref chars) if !any_not_whitespace(chars) => Done, - EOFToken => { + Comment(comment) => self.append_comment_to_doc(comment), + Pi(pi) => self.append_pi_to_doc(pi), + Characters(ref chars) if !any_not_whitespace(chars) => Done, + Eof => { self.sink .parse_error(Borrowed("Unexpected EOF in start phase")); - Reprocess(EndPhase, EOFToken) + Reprocess(End, Eof) }, - DoctypeToken(d) => { + Doctype(d) => { self.append_doctype_to_doc(d); Done }, @@ -673,9 +659,9 @@ where Done }, }, - MainPhase => match token { - CharacterTokens(chs) => self.append_text(chs), - TagToken(Tag { + Main => match token { + Characters(chs) => self.append_text(chs), + Tag(Tag { kind: StartTag, name, attrs, @@ -691,7 +677,7 @@ where }; self.insert_tag(tag) }, - TagToken(Tag { + Tag(Tag { kind: EmptyTag, name, attrs, @@ -713,7 +699,7 @@ where self.append_tag(tag) } }, - TagToken(Tag { + Tag(Tag { kind: EndTag, name, attrs, @@ -732,31 +718,31 @@ where } let retval = self.close_tag(tag); if self.no_open_elems() { - self.phase = EndPhase; + self.phase = End; } retval }, - TagToken(Tag { kind: ShortTag, .. }) => { + Tag(Tag { kind: ShortTag, .. }) => { self.pop(); if self.no_open_elems() { - self.phase = EndPhase; + self.phase = End; } Done }, - CommentToken(comment) => self.append_comment_to_tag(comment), - PIToken(pi) => self.append_pi_to_tag(pi), - EOFToken | NullCharacterToken => Reprocess(EndPhase, EOFToken), - DoctypeToken(_) => { + Comment(comment) => self.append_comment_to_tag(comment), + Pi(pi) => self.append_pi_to_tag(pi), + Eof | NullCharacter => Reprocess(End, Eof), + Doctype(_) => { self.sink .parse_error(Borrowed("Unexpected element in main phase")); Done }, }, - EndPhase => match token { - CommentToken(comment) => self.append_comment_to_doc(comment), - PIToken(pi) => self.append_pi_to_doc(pi), - CharacterTokens(ref chars) if !any_not_whitespace(chars) => Done, - EOFToken => self.stop_parsing(), + End => match token { + Comment(comment) => self.append_comment_to_doc(comment), + Pi(pi) => self.append_pi_to_doc(pi), + Characters(ref chars) if !any_not_whitespace(chars) => Done, + Eof => self.stop_parsing(), _ => { self.sink .parse_error(Borrowed("Unexpected element in end phase")); diff --git a/xml5ever/src/tree_builder/types.rs b/xml5ever/src/tree_builder/types.rs index 327258b9..4c031abe 100644 --- a/xml5ever/src/tree_builder/types.rs +++ b/xml5ever/src/tree_builder/types.rs @@ -16,22 +16,22 @@ use crate::tokenizer::{Doctype, Pi, Tag}; #[derive(PartialEq, Eq, Copy, Clone, Debug)] pub enum XmlPhase { - StartPhase, - MainPhase, - EndPhase, + Start, + Main, + End, } /// A subset/refinement of `tokenizer::XToken`. Everything else is handled /// specially at the beginning of `process_token`. #[derive(PartialEq, Eq, Clone, Debug)] pub enum Token { - TagToken(Tag), - DoctypeToken(Doctype), - CommentToken(StrTendril), - CharacterTokens(StrTendril), - PIToken(Pi), - NullCharacterToken, - EOFToken, + Tag(Tag), + Doctype(Doctype), + Comment(StrTendril), + Characters(StrTendril), + Pi(Pi), + NullCharacter, + Eof, } pub enum XmlProcessResult {