html5ever is an HTML5 parser built for speed and correctness in Rust. This cheat sheet aims to cover its features exhaustively.
Installation
Add to Cargo.toml:
[dependencies]
html5ever = "0.25"
Parsing
From string:
let html = r#"<html>...</html>"#;
let doc = parse_document(html.as_bytes());
From bytes:
let bytes: Vec<u8> = fetch_bytes();
let doc = parse_document(bytes);
From reader:
let mut reader = File::open("doc.html")?;
let doc = parse_document(&mut reader);
From file:
let bytes = fs::read("doc.html")?;
let doc = parse_document(bytes);
Custom options:
let opts = ParseOpts::default().scripting_enabled(true);
let doc = parse_document_with_opts(html.as_bytes(), opts);
Serialization
To string:
let html = serialize(&doc, Default::default());
To writer:
let mut buffer = Vec::new();
serialize(&doc, &mut buffer);
To file:
serialize(&doc, File::create("out.html")?);
Custom options:
let opts = SerializeOpts::default()
.minify(true)
.format(SerializeFormat::HTML);
serialize(&doc, opts);
Traversal
Child elements:
for child in root.children() {
// ...
}
Descendants:
fn traverse(node: &Node) {
for child in node.children() {
traverse(child);
}
}
Parent element:
let parent = node.parent_element();
Previous sibling:
let prev = node.prev_sibling();
Next sibling:
let next = node.next_sibling();
Modification
Append child:
parent.append_child(&new_node);
Insert before:
parent.insert_before(&new_node, &ref_node);
Remove child:
parent.remove_child(&child);
Replace child:
parent.replace_child(&new, &old);
Set attribute:
el.set_attribute("class", "blue");
Set id:
el.set_id("main");
Set text content:
el.set_text_content(Some("Hello!"));
Creation
New element:
let el = Element::new(local_name!("div"));
New text node:
let text = TextNode::new("Hi there!");
New comment:
let comment = Comment::new("A comment");
Document fragment:
let frag = DocumentFragment::new();
Namespaces
Register namespace:
let ns = Namespace::new(None, local_name!("svg"));
doc.namespace_bindings_mut().push(ns);
Namespaced element:
let circle = Element::new(local_name!("circle"), &["svg"]);
Attributes
Boolean attribute:
el.set_bool_attribute(local_name!("hidden"), true);
Custom attribute:
el.set_custom_attribute(local_name!("data-id"), Atom::from("123"));
Encoding
From encoded bytes:
let bytes = include_bytes!("doc.html");
let encoding = EncodingRef::Utf8;
let doc = parse_document_from_utf8_expecting(bytes, encoding);
Handle invalid sequences:
let opts = ParseOpts::default().replace_invalid_codepoints(true);
Validation
DTD validate:
let dtd = include_bytes!("doctype.dtd");
parse_document(html).validate_dtd(dtd);
Schema validate:
let schema = include_bytes!("schema.xsd");
parse_document(html).validate_schema(schema);
Performance
Parallel parsing:
let html = include_str!("doc.html");
let doc = parse_html_parallel(html, &Default::default());