blob: fa72391f38ab39feff85a46df9d0721a095d3c52 [file] [log] [blame] [edit]
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::borrow::Cow;
use std::cell::Cell;
use std::rc::Rc;
use base::cross_process_instant::CrossProcessInstant;
use base::id::PipelineId;
use base64::Engine as _;
use base64::engine::general_purpose;
use content_security_policy::sandboxing_directive::SandboxingFlagSet;
use devtools_traits::ScriptToDevtoolsControlMsg;
use dom_struct::dom_struct;
use embedder_traits::resources::{self, Resource};
use encoding_rs::Encoding;
use html5ever::buffer_queue::BufferQueue;
use html5ever::tendril::fmt::UTF8;
use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink};
use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use html5ever::{Attribute, ExpandedName, LocalName, QualName, local_name, ns};
use hyper_serde::Serde;
use markup5ever::TokenizerResult;
use mime::{self, Mime};
use net_traits::mime_classifier::{ApacheBugFlag, MediaType, MimeClassifier, NoSniffFlag};
use net_traits::policy_container::PolicyContainer;
use net_traits::request::RequestId;
use net_traits::{
FetchMetadata, FetchResponseListener, LoadContext, Metadata, NetworkError, ReferrerPolicy,
ResourceFetchTiming, ResourceTimingType,
};
use profile_traits::time::{
ProfilerCategory, ProfilerChan, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType,
};
use profile_traits::time_profile;
use script_traits::DocumentActivity;
use servo_config::pref;
use servo_url::ServoUrl;
use style::context::QuirksMode as ServoQuirksMode;
use tendril::stream::LossyDecoder;
use crate::document_loader::{DocumentLoader, LoadType};
use crate::dom::bindings::cell::DomRefCell;
use crate::dom::bindings::codegen::Bindings::DocumentBinding::{
DocumentMethods, DocumentReadyState,
};
use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods;
use crate::dom::bindings::codegen::Bindings::HTMLMediaElementBinding::HTMLMediaElementMethods;
use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods;
use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods;
use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::{
ShadowRootMode, SlotAssignmentMode,
};
use crate::dom::bindings::inheritance::Castable;
use crate::dom::bindings::refcounted::Trusted;
use crate::dom::bindings::reflector::{DomGlobal, Reflector, reflect_dom_object};
use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom};
use crate::dom::bindings::settings_stack::is_execution_stack_empty;
use crate::dom::bindings::str::{DOMString, USVString};
use crate::dom::characterdata::CharacterData;
use crate::dom::comment::Comment;
use crate::dom::csp::{GlobalCspReporting, Violation, parse_csp_list_from_metadata};
use crate::dom::customelementregistry::CustomElementReactionStack;
use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument};
use crate::dom::documentfragment::DocumentFragment;
use crate::dom::documenttype::DocumentType;
use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator};
use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement};
use crate::dom::html::htmlimageelement::HTMLImageElement;
use crate::dom::html::htmlinputelement::HTMLInputElement;
use crate::dom::html::htmlscriptelement::{HTMLScriptElement, ScriptResult};
use crate::dom::html::htmltemplateelement::HTMLTemplateElement;
use crate::dom::node::{Node, ShadowIncluding};
use crate::dom::performanceentry::PerformanceEntry;
use crate::dom::performancenavigationtiming::PerformanceNavigationTiming;
use crate::dom::processinginstruction::ProcessingInstruction;
use crate::dom::reportingendpoint::ReportingEndpoint;
use crate::dom::shadowroot::IsUserAgentWidget;
use crate::dom::text::Text;
use crate::dom::types::{HTMLAudioElement, HTMLMediaElement, HTMLVideoElement};
use crate::dom::virtualmethods::vtable_for;
use crate::network_listener::PreInvoke;
use crate::realms::enter_realm;
use crate::script_runtime::{CanGc, IntroductionType};
use crate::script_thread::ScriptThread;
mod async_html;
mod html;
mod prefetch;
mod xml;
pub(crate) use html::serialize_html_fragment;
#[dom_struct]
/// The parser maintains two input streams: one for input from script through
/// document.write(), and one for input from network.
///
/// There is no concrete representation of the insertion point, instead it
/// always points to just before the next character from the network input,
/// with all of the script input before itself.
///
/// ```text
/// ... script input ... | ... network input ...
/// ^
/// insertion point
/// ```
pub(crate) struct ServoParser {
reflector: Reflector,
/// The document associated with this parser.
document: Dom<Document>,
/// The BOM sniffing state.
///
/// `None` means we've found the BOM, we've found there isn't one, or
/// we're not parsing from a byte stream. `Some` contains the BOM bytes
/// found so far.
bom_sniff: DomRefCell<Option<Vec<u8>>>,
/// The decoder used for the network input.
network_decoder: DomRefCell<Option<NetworkDecoder>>,
/// Input received from network.
#[ignore_malloc_size_of = "Defined in html5ever"]
#[no_trace]
network_input: BufferQueue,
/// Input received from script. Used only to support document.write().
#[ignore_malloc_size_of = "Defined in html5ever"]
#[no_trace]
script_input: BufferQueue,
/// The tokenizer of this parser.
tokenizer: Tokenizer,
/// Whether to expect any further input from the associated network request.
last_chunk_received: Cell<bool>,
/// Whether this parser should avoid passing any further data to the tokenizer.
suspended: Cell<bool>,
/// <https://html.spec.whatwg.org/multipage/#script-nesting-level>
script_nesting_level: Cell<usize>,
/// <https://html.spec.whatwg.org/multipage/#abort-a-parser>
aborted: Cell<bool>,
/// <https://html.spec.whatwg.org/multipage/#script-created-parser>
script_created_parser: bool,
/// We do a quick-and-dirty parse of the input looking for resources to prefetch.
// TODO: if we had speculative parsing, we could do this when speculatively
// building the DOM. https://github.com/servo/servo/pull/19203
prefetch_tokenizer: prefetch::Tokenizer,
#[ignore_malloc_size_of = "Defined in html5ever"]
#[no_trace]
prefetch_input: BufferQueue,
// The whole input as a string, if needed for the devtools Sources panel.
// TODO: use a faster type for concatenating strings?
content_for_devtools: Option<DomRefCell<String>>,
}
pub(crate) struct ElementAttribute {
name: QualName,
value: DOMString,
}
#[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)]
pub(crate) enum ParsingAlgorithm {
Normal,
Fragment,
}
impl ElementAttribute {
pub(crate) fn new(name: QualName, value: DOMString) -> ElementAttribute {
ElementAttribute { name, value }
}
}
impl ServoParser {
pub(crate) fn parser_is_not_active(&self) -> bool {
self.can_write()
}
/// <https://html.spec.whatwg.org/multipage/#parse-html-from-a-string>
pub(crate) fn parse_html_document(
document: &Document,
input: Option<DOMString>,
url: ServoUrl,
can_gc: CanGc,
) {
// Step 1. Set document's type to "html".
//
// Set by callers of this function and asserted here
assert!(document.is_html_document());
// Step 2. Create an HTML parser parser, associated with document.
let parser = if pref!(dom_servoparser_async_html_tokenizer_enabled) {
ServoParser::new(
document,
Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None)),
ParserKind::Normal,
can_gc,
)
} else {
ServoParser::new(
document,
Tokenizer::Html(self::html::Tokenizer::new(
document,
url,
None,
ParsingAlgorithm::Normal,
)),
ParserKind::Normal,
can_gc,
)
};
// Step 3. Place html into the input stream for parser. The encoding confidence is irrelevant.
// Step 4. Start parser and let it run until it has consumed all the
// characters just inserted into the input stream.
//
// Set as the document's current parser and initialize with `input`, if given.
if let Some(input) = input {
parser.parse_complete_string_chunk(String::from(input), can_gc);
} else {
parser.document.set_current_parser(Some(&parser));
}
}
/// <https://html.spec.whatwg.org/multipage/#parsing-html-fragments>
pub(crate) fn parse_html_fragment(
context: &Element,
input: DOMString,
allow_declarative_shadow_roots: bool,
can_gc: CanGc,
) -> impl Iterator<Item = DomRoot<Node>> + use<'_> {
let context_node = context.upcast::<Node>();
let context_document = context_node.owner_doc();
let window = context_document.window();
let url = context_document.url();
// Step 1. Let document be a Document node whose type is "html".
let loader = DocumentLoader::new_with_threads(
context_document.loader().resource_threads().clone(),
Some(url.clone()),
);
let document = Document::new(
window,
HasBrowsingContext::No,
Some(url.clone()),
context_document.origin().clone(),
IsHTMLDocument::HTMLDocument,
None,
None,
DocumentActivity::Inactive,
DocumentSource::FromParser,
loader,
None,
None,
Default::default(),
false,
allow_declarative_shadow_roots,
Some(context_document.insecure_requests_policy()),
context_document.has_trustworthy_ancestor_or_current_origin(),
context_document.custom_element_reaction_stack(),
can_gc,
);
// Step 2. If context's node document is in quirks mode, then set document's mode to "quirks".
// Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's
// mode to "limited-quirks".
document.set_quirks_mode(context_document.quirks_mode());
// NOTE: The following steps happened as part of Step 1.
// Step 4. If allowDeclarativeShadowRoots is true, then set document's
// allow declarative shadow roots to true.
// Step 5. Create a new HTML parser, and associate it with document.
// Step 11.
let form = context_node
.inclusive_ancestors(ShadowIncluding::No)
.find(|element| element.is::<HTMLFormElement>());
let fragment_context = FragmentContext {
context_elem: context_node,
form_elem: form.as_deref(),
context_element_allows_scripting: context_document.scripting_enabled(),
};
let parser = ServoParser::new(
&document,
Tokenizer::Html(self::html::Tokenizer::new(
&document,
url,
Some(fragment_context),
ParsingAlgorithm::Fragment,
)),
ParserKind::Normal,
can_gc,
);
parser.parse_complete_string_chunk(String::from(input), can_gc);
// Step 14.
let root_element = document.GetDocumentElement().expect("no document element");
FragmentParsingResult {
inner: root_element.upcast::<Node>().children(),
}
}
pub(crate) fn parse_html_script_input(document: &Document, url: ServoUrl) {
let parser = ServoParser::new(
document,
Tokenizer::Html(self::html::Tokenizer::new(
document,
url,
None,
ParsingAlgorithm::Normal,
)),
ParserKind::ScriptCreated,
CanGc::note(),
);
*parser.bom_sniff.borrow_mut() = None;
document.set_current_parser(Some(&parser));
}
pub(crate) fn parse_xml_document(
document: &Document,
input: Option<DOMString>,
url: ServoUrl,
can_gc: CanGc,
) {
let parser = ServoParser::new(
document,
Tokenizer::Xml(self::xml::Tokenizer::new(document, url)),
ParserKind::Normal,
can_gc,
);
// Set as the document's current parser and initialize with `input`, if given.
if let Some(input) = input {
parser.parse_complete_string_chunk(String::from(input), can_gc);
} else {
parser.document.set_current_parser(Some(&parser));
}
}
pub(crate) fn script_nesting_level(&self) -> usize {
self.script_nesting_level.get()
}
pub(crate) fn is_script_created(&self) -> bool {
self.script_created_parser
}
/// Corresponds to the latter part of the "Otherwise" branch of the 'An end
/// tag whose tag name is "script"' of
/// <https://html.spec.whatwg.org/multipage/#parsing-main-incdata>
///
/// This first moves everything from the script input to the beginning of
/// the network input, effectively resetting the insertion point to just
/// before the next character to be consumed.
///
///
/// ```text
/// | ... script input ... network input ...
/// ^
/// insertion point
/// ```
pub(crate) fn resume_with_pending_parsing_blocking_script(
&self,
script: &HTMLScriptElement,
result: ScriptResult,
can_gc: CanGc,
) {
assert!(self.suspended.get());
self.suspended.set(false);
self.script_input.swap_with(&self.network_input);
while let Some(chunk) = self.script_input.pop_front() {
self.network_input.push_back(chunk);
}
let script_nesting_level = self.script_nesting_level.get();
assert_eq!(script_nesting_level, 0);
self.script_nesting_level.set(script_nesting_level + 1);
script.execute(result, can_gc);
self.script_nesting_level.set(script_nesting_level);
if !self.suspended.get() && !self.aborted.get() {
self.parse_sync(can_gc);
}
}
pub(crate) fn can_write(&self) -> bool {
self.script_created_parser || self.script_nesting_level.get() > 0
}
/// Steps 6-8 of <https://html.spec.whatwg.org/multipage/#document.write()>
pub(crate) fn write(&self, text: DOMString, can_gc: CanGc) {
assert!(self.can_write());
if self.document.has_pending_parsing_blocking_script() {
// There is already a pending parsing blocking script so the
// parser is suspended, we just append everything to the
// script input and abort these steps.
self.script_input.push_back(String::from(text).into());
return;
}
// There is no pending parsing blocking script, so all previous calls
// to document.write() should have seen their entire input tokenized
// and process, with nothing pushed to the parser script input.
assert!(self.script_input.is_empty());
let input = BufferQueue::default();
input.push_back(String::from(text).into());
let profiler_chan = self
.document
.window()
.as_global_scope()
.time_profiler_chan()
.clone();
let profiler_metadata = TimerMetadata {
url: self.document.url().as_str().into(),
iframe: TimerMetadataFrameType::RootWindow,
incremental: TimerMetadataReflowType::FirstReflow,
};
self.tokenize(
|tokenizer| {
tokenizer.feed(
&input,
can_gc,
profiler_chan.clone(),
profiler_metadata.clone(),
)
},
can_gc,
);
if self.suspended.get() {
// Parser got suspended, insert remaining input at end of
// script input, following anything written by scripts executed
// reentrantly during this call.
while let Some(chunk) = input.pop_front() {
self.script_input.push_back(chunk);
}
return;
}
assert!(input.is_empty());
}
// Steps 4-6 of https://html.spec.whatwg.org/multipage/#dom-document-close
pub(crate) fn close(&self, can_gc: CanGc) {
assert!(self.script_created_parser);
// Step 4.
self.last_chunk_received.set(true);
if self.suspended.get() {
// Step 5.
return;
}
// Step 6.
self.parse_sync(can_gc);
}
// https://html.spec.whatwg.org/multipage/#abort-a-parser
pub(crate) fn abort(&self, can_gc: CanGc) {
assert!(!self.aborted.get());
self.aborted.set(true);
// Step 1.
self.script_input.replace_with(BufferQueue::default());
self.network_input.replace_with(BufferQueue::default());
// Step 2.
self.document
.set_ready_state(DocumentReadyState::Interactive, can_gc);
// Step 3.
self.tokenizer.end(can_gc);
self.document.set_current_parser(None);
// Step 4.
self.document
.set_ready_state(DocumentReadyState::Complete, can_gc);
}
// https://html.spec.whatwg.org/multipage/#active-parser
pub(crate) fn is_active(&self) -> bool {
self.script_nesting_level() > 0 && !self.aborted.get()
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn new_inherited(document: &Document, tokenizer: Tokenizer, kind: ParserKind) -> Self {
// Store the whole input for the devtools Sources panel, if the devtools server is running
// and we are parsing for a document load (not just things like innerHTML).
// TODO: check if a devtools client is actually connected and/or wants the sources?
let content_for_devtools = (document.global().devtools_chan().is_some() &&
document.has_browsing_context())
.then_some(DomRefCell::new(String::new()));
ServoParser {
reflector: Reflector::new(),
document: Dom::from_ref(document),
bom_sniff: DomRefCell::new(Some(Vec::with_capacity(3))),
network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))),
network_input: BufferQueue::default(),
script_input: BufferQueue::default(),
tokenizer,
last_chunk_received: Cell::new(false),
suspended: Default::default(),
script_nesting_level: Default::default(),
aborted: Default::default(),
script_created_parser: kind == ParserKind::ScriptCreated,
prefetch_tokenizer: prefetch::Tokenizer::new(document),
prefetch_input: BufferQueue::default(),
content_for_devtools,
}
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn new(
document: &Document,
tokenizer: Tokenizer,
kind: ParserKind,
can_gc: CanGc,
) -> DomRoot<Self> {
reflect_dom_object(
Box::new(ServoParser::new_inherited(document, tokenizer, kind)),
document.window(),
can_gc,
)
}
fn push_tendril_input_chunk(&self, chunk: StrTendril) {
if let Some(mut content_for_devtools) = self
.content_for_devtools
.as_ref()
.map(|content| content.borrow_mut())
{
// TODO: append these chunks more efficiently
content_for_devtools.push_str(chunk.as_ref());
}
if chunk.is_empty() {
return;
}
// Per https://github.com/whatwg/html/issues/1495
// stylesheets should not be loaded for documents
// without browsing contexts.
// https://github.com/whatwg/html/issues/1495#issuecomment-230334047
// suggests that no content should be preloaded in such a case.
// We're conservative, and only prefetch for documents
// with browsing contexts.
if self.document.browsing_context().is_some() {
// Push the chunk into the prefetch input stream,
// which is tokenized eagerly, to scan for resources
// to prefetch. If the user script uses `document.write()`
// to overwrite the network input, this prefetching may
// have been wasted, but in most cases it won't.
self.prefetch_input.push_back(chunk.clone());
self.prefetch_tokenizer.feed(&self.prefetch_input);
}
// Push the chunk into the network input stream,
// which is tokenized lazily.
self.network_input.push_back(chunk);
}
fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
// BOM sniff. This is needed because NetworkDecoder will switch the
// encoding based on the BOM, but it won't change
// `self.document.encoding` in the process.
{
let mut bom_sniff = self.bom_sniff.borrow_mut();
if let Some(partial_bom) = bom_sniff.as_mut() {
if partial_bom.len() + chunk.len() >= 3 {
partial_bom.extend(chunk.iter().take(3 - partial_bom.len()).copied());
if let Some((encoding, _)) = Encoding::for_bom(partial_bom) {
self.document.set_encoding(encoding);
}
drop(bom_sniff);
*self.bom_sniff.borrow_mut() = None;
} else {
partial_bom.extend(chunk.iter().copied());
}
}
}
// For byte input, we convert it to text using the network decoder.
let chunk = self
.network_decoder
.borrow_mut()
.as_mut()
.unwrap()
.decode(chunk);
self.push_tendril_input_chunk(chunk);
}
fn push_string_input_chunk(&self, chunk: String) {
// If the input is a string, we don't have a BOM.
if self.bom_sniff.borrow().is_some() {
*self.bom_sniff.borrow_mut() = None;
}
// The input has already been decoded as a string, so doesn't need
// to be decoded by the network decoder again.
let chunk = StrTendril::from(chunk);
self.push_tendril_input_chunk(chunk);
}
fn parse_sync(&self, can_gc: CanGc) {
assert!(self.script_input.is_empty());
// This parser will continue to parse while there is either pending input or
// the parser remains unsuspended.
if self.last_chunk_received.get() {
if let Some(decoder) = self.network_decoder.borrow_mut().take() {
let chunk = decoder.finish();
if !chunk.is_empty() {
self.network_input.push_back(chunk);
}
}
}
if self.aborted.get() {
return;
}
let profiler_chan = self
.document
.window()
.as_global_scope()
.time_profiler_chan()
.clone();
let profiler_metadata = TimerMetadata {
url: self.document.url().as_str().into(),
iframe: TimerMetadataFrameType::RootWindow,
incremental: TimerMetadataReflowType::FirstReflow,
};
self.tokenize(
|tokenizer| {
tokenizer.feed(
&self.network_input,
can_gc,
profiler_chan.clone(),
profiler_metadata.clone(),
)
},
can_gc,
);
if self.suspended.get() {
return;
}
assert!(self.network_input.is_empty());
if self.last_chunk_received.get() {
self.finish(can_gc);
}
}
fn parse_complete_string_chunk(&self, input: String, can_gc: CanGc) {
self.document.set_current_parser(Some(self));
self.push_string_input_chunk(input);
self.last_chunk_received.set(true);
if !self.suspended.get() {
self.parse_sync(can_gc);
}
}
fn parse_bytes_chunk(&self, input: Vec<u8>, can_gc: CanGc) {
let _realm = enter_realm(&*self.document);
self.document.set_current_parser(Some(self));
self.push_bytes_input_chunk(input);
if !self.suspended.get() {
self.parse_sync(can_gc);
}
}
fn tokenize<F>(&self, feed: F, can_gc: CanGc)
where
F: Fn(&Tokenizer) -> TokenizerResult<DomRoot<HTMLScriptElement>>,
{
loop {
assert!(!self.suspended.get());
assert!(!self.aborted.get());
self.document.window().reflow_if_reflow_timer_expired();
let script = match feed(&self.tokenizer) {
TokenizerResult::Done => return,
TokenizerResult::Script(script) => script,
};
// https://html.spec.whatwg.org/multipage/#parsing-main-incdata
// branch "An end tag whose tag name is "script"
// The spec says to perform the microtask checkpoint before
// setting the insertion mode back from Text, but this is not
// possible with the way servo and html5ever currently
// relate to each other, and hopefully it is not observable.
if is_execution_stack_empty() {
self.document
.window()
.as_global_scope()
.perform_a_microtask_checkpoint(can_gc);
}
let script_nesting_level = self.script_nesting_level.get();
self.script_nesting_level.set(script_nesting_level + 1);
script.set_initial_script_text();
let introduction_type_override =
(script_nesting_level > 0).then_some(IntroductionType::INJECTED_SCRIPT);
script.prepare(introduction_type_override, can_gc);
self.script_nesting_level.set(script_nesting_level);
if self.document.has_pending_parsing_blocking_script() {
self.suspended.set(true);
return;
}
if self.aborted.get() {
return;
}
}
}
// https://html.spec.whatwg.org/multipage/#the-end
fn finish(&self, can_gc: CanGc) {
assert!(!self.suspended.get());
assert!(self.last_chunk_received.get());
assert!(self.script_input.is_empty());
assert!(self.network_input.is_empty());
assert!(self.network_decoder.borrow().is_none());
// Step 1.
self.document
.set_ready_state(DocumentReadyState::Interactive, can_gc);
// Step 2.
self.tokenizer.end(can_gc);
self.document.set_current_parser(None);
// Steps 3-12 are in another castle, namely finish_load.
let url = self.tokenizer.url().clone();
self.document.finish_load(LoadType::PageSource(url), can_gc);
// Send the source contents to devtools, if needed.
if let Some(content_for_devtools) = self
.content_for_devtools
.as_ref()
.map(|content| content.take())
{
let global = self.document.global();
let chan = global.devtools_chan().expect("Guaranteed by new");
let pipeline_id = self.document.global().pipeline_id();
let _ = chan.send(ScriptToDevtoolsControlMsg::UpdateSourceContent(
pipeline_id,
content_for_devtools,
));
}
}
}
struct FragmentParsingResult<I>
where
I: Iterator<Item = DomRoot<Node>>,
{
inner: I,
}
impl<I> Iterator for FragmentParsingResult<I>
where
I: Iterator<Item = DomRoot<Node>>,
{
type Item = DomRoot<Node>;
fn next(&mut self) -> Option<DomRoot<Node>> {
let next = self.inner.next()?;
next.remove_self(CanGc::note());
Some(next)
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
#[derive(JSTraceable, MallocSizeOf, PartialEq)]
enum ParserKind {
Normal,
ScriptCreated,
}
#[derive(JSTraceable, MallocSizeOf)]
#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
enum Tokenizer {
Html(self::html::Tokenizer),
AsyncHtml(self::async_html::Tokenizer),
Xml(self::xml::Tokenizer),
}
impl Tokenizer {
fn feed(
&self,
input: &BufferQueue,
can_gc: CanGc,
profiler_chan: ProfilerChan,
profiler_metadata: TimerMetadata,
) -> TokenizerResult<DomRoot<HTMLScriptElement>> {
match *self {
Tokenizer::Html(ref tokenizer) => time_profile!(
ProfilerCategory::ScriptParseHTML,
Some(profiler_metadata),
profiler_chan,
|| tokenizer.feed(input),
),
Tokenizer::AsyncHtml(ref tokenizer) => time_profile!(
ProfilerCategory::ScriptParseHTML,
Some(profiler_metadata),
profiler_chan,
|| tokenizer.feed(input, can_gc),
),
Tokenizer::Xml(ref tokenizer) => time_profile!(
ProfilerCategory::ScriptParseXML,
Some(profiler_metadata),
profiler_chan,
|| tokenizer.feed(input),
),
}
}
fn end(&self, can_gc: CanGc) {
match *self {
Tokenizer::Html(ref tokenizer) => tokenizer.end(),
Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(can_gc),
Tokenizer::Xml(ref tokenizer) => tokenizer.end(),
}
}
fn url(&self) -> &ServoUrl {
match *self {
Tokenizer::Html(ref tokenizer) => tokenizer.url(),
Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(),
Tokenizer::Xml(ref tokenizer) => tokenizer.url(),
}
}
fn set_plaintext_state(&self) {
match *self {
Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(),
Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(),
Tokenizer::Xml(_) => unimplemented!(),
}
}
}
/// <https://html.spec.whatwg.org/multipage/#navigation-params>
/// This does not have the relevant fields, but mimics the intent
/// of the struct when used in loading document spec algorithms.
struct NavigationParams {
/// <https://html.spec.whatwg.org/multipage/#navigation-params-policy-container>
policy_container: PolicyContainer,
/// content-type of this document, if known. Otherwise need to sniff it
content_type: Option<Mime>,
/// <https://html.spec.whatwg.org/multipage/#navigation-params-sandboxing>
final_sandboxing_flag_set: SandboxingFlagSet,
/// <https://mimesniff.spec.whatwg.org/#resource-header>
resource_header: Vec<u8>,
}
/// The context required for asynchronously fetching a document
/// and parsing it progressively.
pub(crate) struct ParserContext {
/// The parser that initiated the request.
parser: Option<Trusted<ServoParser>>,
/// Is this a synthesized document
is_synthesized_document: bool,
/// Has a document already been loaded (relevant for checking the resource header)
has_loaded_document: bool,
/// The pipeline associated with this document.
id: PipelineId,
/// The URL for this document.
url: ServoUrl,
/// timing data for this resource
resource_timing: ResourceFetchTiming,
/// pushed entry index
pushed_entry_index: Option<usize>,
/// params required in document load algorithms
navigation_params: NavigationParams,
}
impl ParserContext {
pub(crate) fn new(id: PipelineId, url: ServoUrl) -> ParserContext {
ParserContext {
parser: None,
is_synthesized_document: false,
has_loaded_document: false,
id,
url,
resource_timing: ResourceFetchTiming::new(ResourceTimingType::Navigation),
pushed_entry_index: None,
navigation_params: NavigationParams {
policy_container: Default::default(),
content_type: None,
final_sandboxing_flag_set: SandboxingFlagSet::empty(),
resource_header: vec![],
},
}
}
pub(crate) fn set_policy_container(&mut self, policy_container: Option<&PolicyContainer>) {
let Some(policy_container) = policy_container else {
return;
};
self.navigation_params.policy_container = policy_container.clone();
}
/// <https://html.spec.whatwg.org/multipage/#creating-a-policy-container-from-a-fetch-response>
fn create_policy_container_from_fetch_response(metadata: &Metadata) -> PolicyContainer {
// Step 1. If response's URL's scheme is "blob", then return a clone of response's URL's blob URL entry's environment's policy container.
// TODO
// Step 2. Let result be a new policy container.
// Step 7. Return result.
PolicyContainer {
// Step 3. Set result's CSP list to the result of parsing a response's Content Security Policies given response.
csp_list: parse_csp_list_from_metadata(&metadata.headers),
// Step 5. Set result's referrer policy to the result of parsing the `Referrer-Policy` header given response. [REFERRERPOLICY]
referrer_policy: ReferrerPolicy::parse_header_for_response(&metadata.headers),
}
}
/// <https://html.spec.whatwg.org/multipage/#initialise-the-document-object>
fn initialize_document_object(&self, document: &Document) {
// Step 9. Let document be a new Document, with
document.set_policy_container(self.navigation_params.policy_container.clone());
document.set_active_sandboxing_flag_set(self.navigation_params.final_sandboxing_flag_set);
}
/// <https://html.spec.whatwg.org/multipage/#loading-a-document>
fn load_document(&mut self, can_gc: CanGc) {
assert!(!self.has_loaded_document);
self.has_loaded_document = true;
let Some(ref parser) = self.parser.as_ref().map(|p| p.root()) else {
return;
};
// Step 1. Let type be the computed type of navigationParams's response.
let content_type = &self.navigation_params.content_type;
let mime_type = MimeClassifier::default().classify(
LoadContext::Browsing,
NoSniffFlag::Off,
ApacheBugFlag::from_content_type(content_type.as_ref()),
content_type,
&self.navigation_params.resource_header,
);
// Step 2. If the user agent has been configured to process resources of the given type using
// some mechanism other than rendering the content in a navigable, then skip this step.
// Otherwise, if the type is one of the following types:
let Some(media_type) = MimeClassifier::get_media_type(&mime_type) else {
let page = format!(
"<html><body><p>Unknown content type ({}).</p></body></html>",
&mime_type,
);
self.load_inline_unknown_content(parser, page);
return;
};
match media_type {
// Return the result of loading an HTML document, given navigationParams.
MediaType::Html => self.load_html_document(parser),
// Return the result of loading an XML document given navigationParams and type.
MediaType::Xml => self.load_xml_document(parser),
// Return the result of loading a text document given navigationParams and type.
MediaType::JavaScript | MediaType::Json | MediaType::Text | MediaType::Css => {
self.load_text_document(parser)
},
// Return the result of loading a media document given navigationParams and type.
MediaType::Image | MediaType::AudioVideo => {
self.load_media_document(parser, media_type, &mime_type)
},
MediaType::Font => {
let page = format!(
"<html><body><p>Unable to load font with content type ({}).</p></body></html>",
&mime_type,
);
self.load_inline_unknown_content(parser, page);
return;
},
};
parser.parse_bytes_chunk(
std::mem::take(&mut self.navigation_params.resource_header),
can_gc,
);
}
/// <https://html.spec.whatwg.org/multipage/#navigate-html>
fn load_html_document(&self, parser: &ServoParser) {
// Step 1. Let document be the result of creating and initializing a
// Document object given "html", "text/html", and navigationParams.
self.initialize_document_object(&parser.document);
}
/// <https://html.spec.whatwg.org/multipage/#read-xml>
fn load_xml_document(&self, parser: &ServoParser) {
// When faced with displaying an XML file inline, provided navigation params navigationParams
// and a string type, user agents must follow the requirements defined in XML and Namespaces in XML,
// XML Media Types, DOM, and other relevant specifications to create and initialize a
// Document object document, given "xml", type, and navigationParams, and return that Document.
// They must also create a corresponding XML parser. [XML] [XMLNS] [RFC7303] [DOM]
self.initialize_document_object(&parser.document);
}
/// <https://html.spec.whatwg.org/multipage/#navigate-text>
fn load_text_document(&self, parser: &ServoParser) {
// Step 4. Create an HTML parser and associate it with the document.
// Act as if the tokenizer had emitted a start tag token with the tag name "pre" followed by
// a single U+000A LINE FEED (LF) character, and switch the HTML parser's tokenizer to the PLAINTEXT state.
// Each task that the networking task source places on the task queue while fetching runs must then
// fill the parser's input byte stream with the fetched bytes and cause the HTML parser to perform
// the appropriate processing of the input stream.
let page = "<pre>\n".into();
parser.push_string_input_chunk(page);
parser.parse_sync(CanGc::note());
parser.tokenizer.set_plaintext_state();
}
/// <https://html.spec.whatwg.org/multipage/#navigate-media>
fn load_media_document(
&mut self,
parser: &ServoParser,
media_type: MediaType,
mime_type: &Mime,
) {
// Step 8. Act as if the user agent had stopped parsing document.
self.is_synthesized_document = true;
// Step 3. Populate with html/head/body given document.
let page = "<html><body></body></html>".into();
parser.push_string_input_chunk(page);
parser.parse_sync(CanGc::note());
let doc = &parser.document;
// Step 5. Set the appropriate attribute of the element host element, as described below,
// to the address of the image, video, or audio resource.
let node = if media_type == MediaType::Image {
let img = HTMLImageElement::new(
local_name!("img"),
None,
doc,
None,
ElementCreator::ParserCreated(1),
CanGc::note(),
);
img.SetSrc(USVString(self.url.to_string()));
DomRoot::upcast::<Node>(img)
} else if mime_type.type_() == mime::AUDIO {
let audio = HTMLAudioElement::new(local_name!("audio"), None, doc, None, CanGc::note());
audio
.upcast::<HTMLMediaElement>()
.SetSrc(USVString(self.url.to_string()));
DomRoot::upcast::<Node>(audio)
} else {
let video = HTMLVideoElement::new(local_name!("video"), None, doc, None, CanGc::note());
video
.upcast::<HTMLMediaElement>()
.SetSrc(USVString(self.url.to_string()));
DomRoot::upcast::<Node>(video)
};
// Step 4. Append an element host element for the media, as described below, to the body element.
let doc_body = DomRoot::upcast::<Node>(doc.GetBody().unwrap());
doc_body
.AppendChild(&node, CanGc::note())
.expect("Appending failed");
}
/// <https://html.spec.whatwg.org/multipage/#read-ua-inline>
fn load_inline_unknown_content(&mut self, parser: &ServoParser, page: String) {
self.is_synthesized_document = true;
parser.push_string_input_chunk(page);
parser.parse_sync(CanGc::note());
}
}
impl FetchResponseListener for ParserContext {
fn process_request_body(&mut self, _: RequestId) {}
fn process_request_eof(&mut self, _: RequestId) {}
fn process_response(&mut self, _: RequestId, meta_result: Result<FetchMetadata, NetworkError>) {
let (metadata, error) = match meta_result {
Ok(meta) => (
Some(match meta {
FetchMetadata::Unfiltered(m) => m,
FetchMetadata::Filtered { unsafe_, .. } => unsafe_,
}),
None,
),
Err(error) => (
// Check variant without moving
match &error {
NetworkError::SslValidation(..) |
NetworkError::Internal(..) |
NetworkError::Crash(..) => {
let mut meta = Metadata::default(self.url.clone());
let mime: Option<Mime> = "text/html".parse().ok();
meta.set_content_type(mime.as_ref());
Some(meta)
},
_ => None,
},
Some(error),
),
};
let content_type: Option<Mime> = metadata
.clone()
.and_then(|meta| meta.content_type)
.map(Serde::into_inner)
.map(Into::into);
let (policy_container, endpoints_list) = match metadata.as_ref() {
None => (PolicyContainer::default(), None),
Some(metadata) => (
Self::create_policy_container_from_fetch_response(metadata),
ReportingEndpoint::parse_reporting_endpoints_header(
&self.url.clone(),
&metadata.headers,
),
),
};
let parser = match ScriptThread::page_headers_available(&self.id, metadata, CanGc::note()) {
Some(parser) => parser,
None => return,
};
if parser.aborted.get() {
return;
}
let _realm = enter_realm(&*parser.document);
// From Step 23.8.3 of https://html.spec.whatwg.org/multipage/#navigate
// Let finalSandboxFlags be the union of targetSnapshotParams's sandboxing flags and
// policyContainer's CSP list's CSP-derived sandboxing flags.
// TODO: implement targetSnapshotParam's sandboxing flags
let final_sandboxing_flag_set = policy_container
.csp_list
.as_ref()
.and_then(|csp| csp.get_sandboxing_flag_set_for_document())
.unwrap_or(SandboxingFlagSet::empty());
if let Some(endpoints) = endpoints_list {
parser.document.window().set_endpoints_list(endpoints);
}
self.parser = Some(Trusted::new(&*parser));
self.navigation_params = NavigationParams {
policy_container,
content_type,
final_sandboxing_flag_set,
resource_header: vec![],
};
self.submit_resource_timing();
// Part of https://html.spec.whatwg.org/multipage/#loading-a-document
//
// Step 3. If, given type, the new resource is to be handled by displaying some sort of inline content,
// e.g., a native rendering of the content or an error message because the specified type is not supported,
// then return the result of creating a document for inline content that doesn't have a DOM given
// navigationParams's navigable, navigationParams's id, navigationParams's navigation timing type,
// and navigationParams's user involvement.
if let Some(error) = error {
let page = match error {
NetworkError::SslValidation(reason, bytes) => {
let page = resources::read_string(Resource::BadCertHTML);
let page = page.replace("${reason}", &reason);
let encoded_bytes = general_purpose::STANDARD_NO_PAD.encode(bytes);
let page = page.replace("${bytes}", encoded_bytes.as_str());
page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string())
},
NetworkError::Internal(reason) => {
let page = resources::read_string(Resource::NetErrorHTML);
page.replace("${reason}", &reason)
},
NetworkError::Crash(details) => {
let page = resources::read_string(Resource::CrashHTML);
page.replace("${details}", &details)
},
NetworkError::LoadCancelled => {
// The next load will show a page
return;
},
};
self.load_inline_unknown_content(&parser, page);
}
}
fn process_response_chunk(&mut self, _: RequestId, payload: Vec<u8>) {
if self.is_synthesized_document {
return;
}
let Some(parser) = self.parser.as_ref().map(|p| p.root()) else {
return;
};
if parser.aborted.get() {
return;
}
if !self.has_loaded_document {
// https://mimesniff.spec.whatwg.org/#read-the-resource-header
self.navigation_params
.resource_header
.extend_from_slice(&payload);
// the number of bytes in buffer is greater than or equal to 1445.
if self.navigation_params.resource_header.len() >= 1445 {
self.load_document(CanGc::note());
}
} else {
parser.parse_bytes_chunk(payload, CanGc::note());
}
}
// This method is called via script_thread::handle_fetch_eof, so we must call
// submit_resource_timing in this function
// Resource listeners are called via net_traits::Action::process, which handles submission for them
fn process_response_eof(
&mut self,
_: RequestId,
status: Result<ResourceFetchTiming, NetworkError>,
) {
let parser = match self.parser.as_ref() {
Some(parser) => parser.root(),
None => return,
};
if parser.aborted.get() {
return;
}
match status {
// are we throwing this away or can we use it?
Ok(_) => (),
// TODO(Savago): we should send a notification to callers #5463.
Err(err) => debug!("Failed to load page URL {}, error: {:?}", self.url, err),
}
// https://mimesniff.spec.whatwg.org/#read-the-resource-header
//
// the end of the resource is reached.
if !self.has_loaded_document {
self.load_document(CanGc::note());
}
let _realm = enter_realm(&*parser);
parser
.document
.set_redirect_count(self.resource_timing.redirect_count);
parser.last_chunk_received.set(true);
if !parser.suspended.get() {
parser.parse_sync(CanGc::note());
}
// TODO: Only update if this is the current document resource.
// TODO(mrobinson): Pass a proper fetch_start parameter here instead of `CrossProcessInstant::now()`.
if let Some(pushed_index) = self.pushed_entry_index {
let document = &parser.document;
let performance_entry = PerformanceNavigationTiming::new(
&document.global(),
CrossProcessInstant::now(),
document,
CanGc::note(),
);
document
.global()
.performance()
.update_entry(pushed_index, performance_entry.upcast::<PerformanceEntry>());
}
}
fn resource_timing_mut(&mut self) -> &mut ResourceFetchTiming {
&mut self.resource_timing
}
fn resource_timing(&self) -> &ResourceFetchTiming {
&self.resource_timing
}
// store a PerformanceNavigationTiming entry in the globalscope's Performance buffer
fn submit_resource_timing(&mut self) {
let parser = match self.parser.as_ref() {
Some(parser) => parser.root(),
None => return,
};
if parser.aborted.get() {
return;
}
let document = &parser.document;
// TODO: Pass a proper fetch start time here.
let performance_entry = PerformanceNavigationTiming::new(
&document.global(),
CrossProcessInstant::now(),
document,
CanGc::note(),
);
self.pushed_entry_index = document.global().performance().queue_entry(
performance_entry.upcast::<PerformanceEntry>(),
CanGc::note(),
);
}
fn process_csp_violations(&mut self, _request_id: RequestId, violations: Vec<Violation>) {
let parser = match self.parser.as_ref() {
Some(parser) => parser.root(),
None => return,
};
let document = &parser.document;
let global = &document.global();
// TODO(https://github.com/w3c/webappsec-csp/issues/687): Update after spec is resolved
global.report_csp_violations(violations, None, None);
}
}
impl PreInvoke for ParserContext {}
pub(crate) struct FragmentContext<'a> {
pub(crate) context_elem: &'a Node,
pub(crate) form_elem: Option<&'a Node>,
pub(crate) context_element_allows_scripting: bool,
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn insert(
parent: &Node,
reference_child: Option<&Node>,
child: NodeOrText<Dom<Node>>,
parsing_algorithm: ParsingAlgorithm,
custom_element_reaction_stack: &CustomElementReactionStack,
can_gc: CanGc,
) {
match child {
NodeOrText::AppendNode(n) => {
// https://html.spec.whatwg.org/multipage/#insert-a-foreign-element
// applies if this is an element; if not, it may be
// https://html.spec.whatwg.org/multipage/#insert-a-comment
let element_in_non_fragment =
parsing_algorithm != ParsingAlgorithm::Fragment && n.is::<Element>();
if element_in_non_fragment {
custom_element_reaction_stack.push_new_element_queue();
}
parent.InsertBefore(&n, reference_child, can_gc).unwrap();
if element_in_non_fragment {
custom_element_reaction_stack.pop_current_element_queue(can_gc);
}
},
NodeOrText::AppendText(t) => {
// https://html.spec.whatwg.org/multipage/#insert-a-character
let text = reference_child
.and_then(Node::GetPreviousSibling)
.or_else(|| parent.GetLastChild())
.and_then(DomRoot::downcast::<Text>);
if let Some(text) = text {
text.upcast::<CharacterData>().append_data(&t);
} else {
let text = Text::new(String::from(t).into(), &parent.owner_doc(), can_gc);
parent
.InsertBefore(text.upcast(), reference_child, can_gc)
.unwrap();
}
},
}
}
#[derive(JSTraceable, MallocSizeOf)]
#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
pub(crate) struct Sink {
#[no_trace]
base_url: ServoUrl,
document: Dom<Document>,
current_line: Cell<u64>,
script: MutNullableDom<HTMLScriptElement>,
parsing_algorithm: ParsingAlgorithm,
#[conditional_malloc_size_of]
custom_element_reaction_stack: Rc<CustomElementReactionStack>,
}
impl Sink {
fn same_tree(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
let x = x.downcast::<Element>().expect("Element node expected");
let y = y.downcast::<Element>().expect("Element node expected");
x.is_in_same_home_subtree(y)
}
fn has_parent_node(&self, node: &Dom<Node>) -> bool {
node.GetParentNode().is_some()
}
}
impl TreeSink for Sink {
type Output = Self;
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn finish(self) -> Self {
self
}
type Handle = Dom<Node>;
type ElemName<'a>
= ExpandedName<'a>
where
Self: 'a;
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn get_document(&self) -> Dom<Node> {
Dom::from_ref(self.document.upcast())
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn get_template_contents(&self, target: &Dom<Node>) -> Dom<Node> {
let template = target
.downcast::<HTMLTemplateElement>()
.expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
Dom::from_ref(template.Content(CanGc::note()).upcast())
}
fn same_node(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool {
x == y
}
fn elem_name<'a>(&self, target: &'a Dom<Node>) -> ExpandedName<'a> {
let elem = target
.downcast::<Element>()
.expect("tried to get name of non-Element in HTML parsing");
ExpandedName {
ns: elem.namespace(),
local: elem.local_name(),
}
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn create_element(
&self,
name: QualName,
attrs: Vec<Attribute>,
flags: ElementFlags,
) -> Dom<Node> {
let attrs = attrs
.into_iter()
.map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value))))
.collect();
let parsing_algorithm = if flags.template {
ParsingAlgorithm::Fragment
} else {
self.parsing_algorithm
};
let element = create_element_for_token(
name,
attrs,
&self.document,
ElementCreator::ParserCreated(self.current_line.get()),
parsing_algorithm,
&self.custom_element_reaction_stack,
CanGc::note(),
);
Dom::from_ref(element.upcast())
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn create_comment(&self, text: StrTendril) -> Dom<Node> {
let comment = Comment::new(
DOMString::from(String::from(text)),
&self.document,
None,
CanGc::note(),
);
Dom::from_ref(comment.upcast())
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom<Node> {
let doc = &*self.document;
let pi = ProcessingInstruction::new(
DOMString::from(String::from(target)),
DOMString::from(String::from(data)),
doc,
CanGc::note(),
);
Dom::from_ref(pi.upcast())
}
fn associate_with_form(
&self,
target: &Dom<Node>,
form: &Dom<Node>,
nodes: (&Dom<Node>, Option<&Dom<Node>>),
) {
let (element, prev_element) = nodes;
let tree_node = prev_element.map_or(element, |prev| {
if self.has_parent_node(element) {
element
} else {
prev
}
});
if !self.same_tree(tree_node, form) {
return;
}
let node = target;
let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form))
.expect("Owner must be a form element");
let elem = node.downcast::<Element>();
let control = elem.and_then(|e| e.as_maybe_form_control());
if let Some(control) = control {
control.set_form_owner_from_parser(&form, CanGc::note());
}
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn append_before_sibling(&self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) {
let parent = sibling
.GetParentNode()
.expect("append_before_sibling called on node without parent");
insert(
&parent,
Some(sibling),
new_node,
self.parsing_algorithm,
&self.custom_element_reaction_stack,
CanGc::note(),
);
}
fn parse_error(&self, msg: Cow<'static, str>) {
debug!("Parse error: {}", msg);
}
fn set_quirks_mode(&self, mode: QuirksMode) {
let mode = match mode {
QuirksMode::Quirks => ServoQuirksMode::Quirks,
QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
};
self.document.set_quirks_mode(mode);
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn append(&self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) {
insert(
parent,
None,
child,
self.parsing_algorithm,
&self.custom_element_reaction_stack,
CanGc::note(),
);
}
#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn append_based_on_parent_node(
&self,
elem: &Dom<Node>,
prev_elem: &Dom<Node>,
child: NodeOrText<Dom<Node>>,
) {
if self.has_parent_node(elem) {
self.append_before_sibling(elem, child);
} else {
self.append(prev_elem, child);
}
}
fn append_doctype_to_document(
&self,
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
) {
let doc = &*self.document;
let doctype = DocumentType::new(
DOMString::from(String::from(name)),
Some(DOMString::from(String::from(public_id))),
Some(DOMString::from(String::from(system_id))),
doc,
CanGc::note(),
);
doc.upcast::<Node>()
.AppendChild(doctype.upcast(), CanGc::note())
.expect("Appending failed");
}
fn add_attrs_if_missing(&self, target: &Dom<Node>, attrs: Vec<Attribute>) {
let elem = target
.downcast::<Element>()
.expect("tried to set attrs on non-Element in HTML parsing");
for attr in attrs {
elem.set_attribute_from_parser(
attr.name,
DOMString::from(String::from(attr.value)),
None,
CanGc::note(),
);
}
}
fn remove_from_parent(&self, target: &Dom<Node>) {
if let Some(ref parent) = target.GetParentNode() {
parent.RemoveChild(target, CanGc::note()).unwrap();
}
}
fn mark_script_already_started(&self, node: &Dom<Node>) {
let script = node.downcast::<HTMLScriptElement>();
if let Some(script) = script {
script.set_already_started(true)
}
}
fn reparent_children(&self, node: &Dom<Node>, new_parent: &Dom<Node>) {
while let Some(ref child) = node.GetFirstChild() {
new_parent.AppendChild(child, CanGc::note()).unwrap();
}
}
/// <https://html.spec.whatwg.org/multipage/#html-integration-point>
/// Specifically, the `<annotation-xml>` cases.
fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom<Node>) -> bool {
let elem = handle.downcast::<Element>().unwrap();
elem.get_attribute(&ns!(), &local_name!("encoding"))
.is_some_and(|attr| {
attr.value().eq_ignore_ascii_case("text/html") ||
attr.value().eq_ignore_ascii_case("application/xhtml+xml")
})
}
fn set_current_line(&self, line_number: u64) {
self.current_line.set(line_number);
}
fn pop(&self, node: &Dom<Node>) {
let node = DomRoot::from_ref(&**node);
vtable_for(&node).pop();
}
fn allow_declarative_shadow_roots(&self, intended_parent: &Dom<Node>) -> bool {
intended_parent.owner_doc().allow_declarative_shadow_roots()
}
/// <https://html.spec.whatwg.org/multipage/#parsing-main-inhead>
/// A start tag whose tag name is "template"
/// Attach shadow path
fn attach_declarative_shadow(
&self,
host: &Dom<Node>,
template: &Dom<Node>,
attributes: &[Attribute],
) -> bool {
attach_declarative_shadow_inner(host, template, attributes)
}
}
/// <https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token>
fn create_element_for_token(
name: QualName,
attrs: Vec<ElementAttribute>,
document: &Document,
creator: ElementCreator,
parsing_algorithm: ParsingAlgorithm,
custom_element_reaction_stack: &CustomElementReactionStack,
can_gc: CanGc,
) -> DomRoot<Element> {
// Step 3.
let is = attrs
.iter()
.find(|attr| attr.name.local.eq_str_ignore_ascii_case("is"))
.map(|attr| LocalName::from(&*attr.value));
// Step 4.
let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref());
// Step 5.
let will_execute_script =
definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment;
// Step 6.
if will_execute_script {
// Step 6.1.
document.increment_throw_on_dynamic_markup_insertion_counter();
// Step 6.2
if is_execution_stack_empty() {
document
.window()
.as_global_scope()
.perform_a_microtask_checkpoint(can_gc);
}
// Step 6.3
custom_element_reaction_stack.push_new_element_queue()
}
// Step 7.
let creation_mode = if will_execute_script {
CustomElementCreationMode::Synchronous
} else {
CustomElementCreationMode::Asynchronous
};
let element = Element::create(name, is, document, creator, creation_mode, None, can_gc);
// https://html.spec.whatwg.org/multipage#the-input-element:value-sanitization-algorithm-3
// says to invoke sanitization "when an input element is first created";
// however, since sanitization requires content attributes to function,
// it can't mean that literally.
// Indeed, to make sanitization work correctly, we need to _not_ sanitize
// until after all content attributes have been added
let maybe_input = element.downcast::<HTMLInputElement>();
if let Some(input) = maybe_input {
input.disable_sanitization();
}
// Step 8
for attr in attrs {
element.set_attribute_from_parser(attr.name, attr.value, None, can_gc);
}
// _now_ we can sanitize (and we sanitize now even if the "value"
// attribute isn't present!)
if let Some(input) = maybe_input {
input.enable_sanitization();
}
// Step 9.
if will_execute_script {
// Steps 9.1 - 9.2.
custom_element_reaction_stack.pop_current_element_queue(can_gc);
// Step 9.3.
document.decrement_throw_on_dynamic_markup_insertion_counter();
}
// TODO: Step 10.
// TODO: Step 11.
// Step 12 is handled in `associate_with_form`.
// Step 13.
element
}
#[derive(JSTraceable, MallocSizeOf)]
struct NetworkDecoder {
#[ignore_malloc_size_of = "Defined in tendril"]
#[custom_trace]
decoder: LossyDecoder<NetworkSink>,
}
impl NetworkDecoder {
fn new(encoding: &'static Encoding) -> Self {
Self {
decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()),
}
}
fn decode(&mut self, chunk: Vec<u8>) -> StrTendril {
self.decoder.process(ByteTendril::from(&*chunk));
std::mem::take(&mut self.decoder.inner_sink_mut().output)
}
fn finish(self) -> StrTendril {
self.decoder.finish()
}
}
#[derive(Default, JSTraceable)]
struct NetworkSink {
#[no_trace]
output: StrTendril,
}
impl TendrilSink<UTF8> for NetworkSink {
type Output = StrTendril;
fn process(&mut self, t: StrTendril) {
if self.output.is_empty() {
self.output = t;
} else {
self.output.push_tendril(&t);
}
}
fn error(&mut self, _desc: Cow<'static, str>) {}
fn finish(self) -> Self::Output {
self.output
}
}
fn attach_declarative_shadow_inner(host: &Node, template: &Node, attributes: &[Attribute]) -> bool {
let host_element = host.downcast::<Element>().unwrap();
if host_element.shadow_root().is_some() {
return false;
}
let template_element = template.downcast::<HTMLTemplateElement>().unwrap();
// Step 3. Let mode be template start tag's shadowrootmode attribute's value.
// Step 4. Let clonable be true if template start tag has a shadowrootclonable attribute; otherwise false.
// Step 5. Let delegatesfocus be true if template start tag
// has a shadowrootdelegatesfocus attribute; otherwise false.
// Step 6. Let serializable be true if template start tag
// has a shadowrootserializable attribute; otherwise false.
let mut shadow_root_mode = ShadowRootMode::Open;
let mut clonable = false;
let mut delegatesfocus = false;
let mut serializable = false;
let attributes: Vec<ElementAttribute> = attributes
.iter()
.map(|attr| {
ElementAttribute::new(
attr.name.clone(),
DOMString::from(String::from(attr.value.clone())),
)
})
.collect();
attributes
.iter()
.for_each(|attr: &ElementAttribute| match attr.name.local {
local_name!("shadowrootmode") => {
if attr.value.str().eq_ignore_ascii_case("open") {
shadow_root_mode = ShadowRootMode::Open;
} else if attr.value.str().eq_ignore_ascii_case("closed") {
shadow_root_mode = ShadowRootMode::Closed;
} else {
unreachable!("shadowrootmode value is not open nor closed");
}
},
local_name!("shadowrootclonable") => {
clonable = true;
},
local_name!("shadowrootdelegatesfocus") => {
delegatesfocus = true;
},
local_name!("shadowrootserializable") => {
serializable = true;
},
_ => {},
});
// Step 8.1. Attach a shadow root with declarative shadow host element,
// mode, clonable, serializable, delegatesFocus, and "named".
match host_element.attach_shadow(
IsUserAgentWidget::No,
shadow_root_mode,
clonable,
serializable,
delegatesfocus,
SlotAssignmentMode::Named,
CanGc::note(),
) {
Ok(shadow_root) => {
// Step 8.3. Set shadow's declarative to true.
shadow_root.set_declarative(true);
// Set 8.4. Set template's template contents property to shadow.
let shadow = shadow_root.upcast::<DocumentFragment>();
template_element.set_contents(Some(shadow));
// Step 8.5. Set shadow’s available to element internals to true.
shadow_root.set_available_to_element_internals(true);
true
},
Err(_) => false,
}
}