| /* This Source Code Form is subject to the terms of the Mozilla Public |
| * License, v. 2.0. If a copy of the MPL was not distributed with this |
| * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ |
| |
| use std::borrow::Cow; |
| use std::cell::Cell; |
| use std::rc::Rc; |
| |
| use base::cross_process_instant::CrossProcessInstant; |
| use base::id::PipelineId; |
| use base64::Engine as _; |
| use base64::engine::general_purpose; |
| use content_security_policy::sandboxing_directive::SandboxingFlagSet; |
| use devtools_traits::ScriptToDevtoolsControlMsg; |
| use dom_struct::dom_struct; |
| use embedder_traits::resources::{self, Resource}; |
| use encoding_rs::Encoding; |
| use html5ever::buffer_queue::BufferQueue; |
| use html5ever::tendril::fmt::UTF8; |
| use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink}; |
| use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; |
| use html5ever::{Attribute, ExpandedName, LocalName, QualName, local_name, ns}; |
| use hyper_serde::Serde; |
| use markup5ever::TokenizerResult; |
| use mime::{self, Mime}; |
| use net_traits::mime_classifier::{ApacheBugFlag, MediaType, MimeClassifier, NoSniffFlag}; |
| use net_traits::policy_container::PolicyContainer; |
| use net_traits::request::RequestId; |
| use net_traits::{ |
| FetchMetadata, FetchResponseListener, LoadContext, Metadata, NetworkError, ReferrerPolicy, |
| ResourceFetchTiming, ResourceTimingType, |
| }; |
| use profile_traits::time::{ |
| ProfilerCategory, ProfilerChan, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType, |
| }; |
| use profile_traits::time_profile; |
| use script_traits::DocumentActivity; |
| use servo_config::pref; |
| use servo_url::ServoUrl; |
| use style::context::QuirksMode as ServoQuirksMode; |
| use tendril::stream::LossyDecoder; |
| |
| use crate::document_loader::{DocumentLoader, LoadType}; |
| use crate::dom::bindings::cell::DomRefCell; |
| use crate::dom::bindings::codegen::Bindings::DocumentBinding::{ |
| DocumentMethods, DocumentReadyState, |
| }; |
| use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods; |
| use crate::dom::bindings::codegen::Bindings::HTMLMediaElementBinding::HTMLMediaElementMethods; |
| use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods; |
| use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; |
| use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::{ |
| ShadowRootMode, SlotAssignmentMode, |
| }; |
| use crate::dom::bindings::inheritance::Castable; |
| use crate::dom::bindings::refcounted::Trusted; |
| use crate::dom::bindings::reflector::{DomGlobal, Reflector, reflect_dom_object}; |
| use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom}; |
| use crate::dom::bindings::settings_stack::is_execution_stack_empty; |
| use crate::dom::bindings::str::{DOMString, USVString}; |
| use crate::dom::characterdata::CharacterData; |
| use crate::dom::comment::Comment; |
| use crate::dom::csp::{GlobalCspReporting, Violation, parse_csp_list_from_metadata}; |
| use crate::dom::customelementregistry::CustomElementReactionStack; |
| use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument}; |
| use crate::dom::documentfragment::DocumentFragment; |
| use crate::dom::documenttype::DocumentType; |
| use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator}; |
| use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement}; |
| use crate::dom::html::htmlimageelement::HTMLImageElement; |
| use crate::dom::html::htmlinputelement::HTMLInputElement; |
| use crate::dom::html::htmlscriptelement::{HTMLScriptElement, ScriptResult}; |
| use crate::dom::html::htmltemplateelement::HTMLTemplateElement; |
| use crate::dom::node::{Node, ShadowIncluding}; |
| use crate::dom::performanceentry::PerformanceEntry; |
| use crate::dom::performancenavigationtiming::PerformanceNavigationTiming; |
| use crate::dom::processinginstruction::ProcessingInstruction; |
| use crate::dom::reportingendpoint::ReportingEndpoint; |
| use crate::dom::shadowroot::IsUserAgentWidget; |
| use crate::dom::text::Text; |
| use crate::dom::types::{HTMLAudioElement, HTMLMediaElement, HTMLVideoElement}; |
| use crate::dom::virtualmethods::vtable_for; |
| use crate::network_listener::PreInvoke; |
| use crate::realms::enter_realm; |
| use crate::script_runtime::{CanGc, IntroductionType}; |
| use crate::script_thread::ScriptThread; |
| |
| mod async_html; |
| mod html; |
| mod prefetch; |
| mod xml; |
| |
| pub(crate) use html::serialize_html_fragment; |
| |
| #[dom_struct] |
| /// The parser maintains two input streams: one for input from script through |
| /// document.write(), and one for input from network. |
| /// |
| /// There is no concrete representation of the insertion point, instead it |
| /// always points to just before the next character from the network input, |
| /// with all of the script input before itself. |
| /// |
| /// ```text |
| /// ... script input ... | ... network input ... |
| /// ^ |
| /// insertion point |
| /// ``` |
| pub(crate) struct ServoParser { |
| reflector: Reflector, |
| /// The document associated with this parser. |
| document: Dom<Document>, |
| /// The BOM sniffing state. |
| /// |
| /// `None` means we've found the BOM, we've found there isn't one, or |
| /// we're not parsing from a byte stream. `Some` contains the BOM bytes |
| /// found so far. |
| bom_sniff: DomRefCell<Option<Vec<u8>>>, |
| /// The decoder used for the network input. |
| network_decoder: DomRefCell<Option<NetworkDecoder>>, |
| /// Input received from network. |
| #[ignore_malloc_size_of = "Defined in html5ever"] |
| #[no_trace] |
| network_input: BufferQueue, |
| /// Input received from script. Used only to support document.write(). |
| #[ignore_malloc_size_of = "Defined in html5ever"] |
| #[no_trace] |
| script_input: BufferQueue, |
| /// The tokenizer of this parser. |
| tokenizer: Tokenizer, |
| /// Whether to expect any further input from the associated network request. |
| last_chunk_received: Cell<bool>, |
| /// Whether this parser should avoid passing any further data to the tokenizer. |
| suspended: Cell<bool>, |
| /// <https://html.spec.whatwg.org/multipage/#script-nesting-level> |
| script_nesting_level: Cell<usize>, |
| /// <https://html.spec.whatwg.org/multipage/#abort-a-parser> |
| aborted: Cell<bool>, |
| /// <https://html.spec.whatwg.org/multipage/#script-created-parser> |
| script_created_parser: bool, |
| /// We do a quick-and-dirty parse of the input looking for resources to prefetch. |
| // TODO: if we had speculative parsing, we could do this when speculatively |
| // building the DOM. https://github.com/servo/servo/pull/19203 |
| prefetch_tokenizer: prefetch::Tokenizer, |
| #[ignore_malloc_size_of = "Defined in html5ever"] |
| #[no_trace] |
| prefetch_input: BufferQueue, |
| // The whole input as a string, if needed for the devtools Sources panel. |
| // TODO: use a faster type for concatenating strings? |
| content_for_devtools: Option<DomRefCell<String>>, |
| } |
| |
| pub(crate) struct ElementAttribute { |
| name: QualName, |
| value: DOMString, |
| } |
| |
| #[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)] |
| pub(crate) enum ParsingAlgorithm { |
| Normal, |
| Fragment, |
| } |
| |
| impl ElementAttribute { |
| pub(crate) fn new(name: QualName, value: DOMString) -> ElementAttribute { |
| ElementAttribute { name, value } |
| } |
| } |
| |
| impl ServoParser { |
| pub(crate) fn parser_is_not_active(&self) -> bool { |
| self.can_write() |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#parse-html-from-a-string> |
| pub(crate) fn parse_html_document( |
| document: &Document, |
| input: Option<DOMString>, |
| url: ServoUrl, |
| can_gc: CanGc, |
| ) { |
| // Step 1. Set document's type to "html". |
| // |
| // Set by callers of this function and asserted here |
| assert!(document.is_html_document()); |
| // Step 2. Create an HTML parser parser, associated with document. |
| let parser = if pref!(dom_servoparser_async_html_tokenizer_enabled) { |
| ServoParser::new( |
| document, |
| Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None)), |
| ParserKind::Normal, |
| can_gc, |
| ) |
| } else { |
| ServoParser::new( |
| document, |
| Tokenizer::Html(self::html::Tokenizer::new( |
| document, |
| url, |
| None, |
| ParsingAlgorithm::Normal, |
| )), |
| ParserKind::Normal, |
| can_gc, |
| ) |
| }; |
| // Step 3. Place html into the input stream for parser. The encoding confidence is irrelevant. |
| // Step 4. Start parser and let it run until it has consumed all the |
| // characters just inserted into the input stream. |
| // |
| // Set as the document's current parser and initialize with `input`, if given. |
| if let Some(input) = input { |
| parser.parse_complete_string_chunk(String::from(input), can_gc); |
| } else { |
| parser.document.set_current_parser(Some(&parser)); |
| } |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#parsing-html-fragments> |
| pub(crate) fn parse_html_fragment( |
| context: &Element, |
| input: DOMString, |
| allow_declarative_shadow_roots: bool, |
| can_gc: CanGc, |
| ) -> impl Iterator<Item = DomRoot<Node>> + use<'_> { |
| let context_node = context.upcast::<Node>(); |
| let context_document = context_node.owner_doc(); |
| let window = context_document.window(); |
| let url = context_document.url(); |
| |
| // Step 1. Let document be a Document node whose type is "html". |
| let loader = DocumentLoader::new_with_threads( |
| context_document.loader().resource_threads().clone(), |
| Some(url.clone()), |
| ); |
| let document = Document::new( |
| window, |
| HasBrowsingContext::No, |
| Some(url.clone()), |
| context_document.origin().clone(), |
| IsHTMLDocument::HTMLDocument, |
| None, |
| None, |
| DocumentActivity::Inactive, |
| DocumentSource::FromParser, |
| loader, |
| None, |
| None, |
| Default::default(), |
| false, |
| allow_declarative_shadow_roots, |
| Some(context_document.insecure_requests_policy()), |
| context_document.has_trustworthy_ancestor_or_current_origin(), |
| context_document.custom_element_reaction_stack(), |
| can_gc, |
| ); |
| |
| // Step 2. If context's node document is in quirks mode, then set document's mode to "quirks". |
| // Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's |
| // mode to "limited-quirks". |
| document.set_quirks_mode(context_document.quirks_mode()); |
| |
| // NOTE: The following steps happened as part of Step 1. |
| // Step 4. If allowDeclarativeShadowRoots is true, then set document's |
| // allow declarative shadow roots to true. |
| // Step 5. Create a new HTML parser, and associate it with document. |
| |
| // Step 11. |
| let form = context_node |
| .inclusive_ancestors(ShadowIncluding::No) |
| .find(|element| element.is::<HTMLFormElement>()); |
| |
| let fragment_context = FragmentContext { |
| context_elem: context_node, |
| form_elem: form.as_deref(), |
| context_element_allows_scripting: context_document.scripting_enabled(), |
| }; |
| |
| let parser = ServoParser::new( |
| &document, |
| Tokenizer::Html(self::html::Tokenizer::new( |
| &document, |
| url, |
| Some(fragment_context), |
| ParsingAlgorithm::Fragment, |
| )), |
| ParserKind::Normal, |
| can_gc, |
| ); |
| parser.parse_complete_string_chunk(String::from(input), can_gc); |
| |
| // Step 14. |
| let root_element = document.GetDocumentElement().expect("no document element"); |
| FragmentParsingResult { |
| inner: root_element.upcast::<Node>().children(), |
| } |
| } |
| |
| pub(crate) fn parse_html_script_input(document: &Document, url: ServoUrl) { |
| let parser = ServoParser::new( |
| document, |
| Tokenizer::Html(self::html::Tokenizer::new( |
| document, |
| url, |
| None, |
| ParsingAlgorithm::Normal, |
| )), |
| ParserKind::ScriptCreated, |
| CanGc::note(), |
| ); |
| *parser.bom_sniff.borrow_mut() = None; |
| document.set_current_parser(Some(&parser)); |
| } |
| |
| pub(crate) fn parse_xml_document( |
| document: &Document, |
| input: Option<DOMString>, |
| url: ServoUrl, |
| can_gc: CanGc, |
| ) { |
| let parser = ServoParser::new( |
| document, |
| Tokenizer::Xml(self::xml::Tokenizer::new(document, url)), |
| ParserKind::Normal, |
| can_gc, |
| ); |
| |
| // Set as the document's current parser and initialize with `input`, if given. |
| if let Some(input) = input { |
| parser.parse_complete_string_chunk(String::from(input), can_gc); |
| } else { |
| parser.document.set_current_parser(Some(&parser)); |
| } |
| } |
| |
| pub(crate) fn script_nesting_level(&self) -> usize { |
| self.script_nesting_level.get() |
| } |
| |
| pub(crate) fn is_script_created(&self) -> bool { |
| self.script_created_parser |
| } |
| |
| /// Corresponds to the latter part of the "Otherwise" branch of the 'An end |
| /// tag whose tag name is "script"' of |
| /// <https://html.spec.whatwg.org/multipage/#parsing-main-incdata> |
| /// |
| /// This first moves everything from the script input to the beginning of |
| /// the network input, effectively resetting the insertion point to just |
| /// before the next character to be consumed. |
| /// |
| /// |
| /// ```text |
| /// | ... script input ... network input ... |
| /// ^ |
| /// insertion point |
| /// ``` |
| pub(crate) fn resume_with_pending_parsing_blocking_script( |
| &self, |
| script: &HTMLScriptElement, |
| result: ScriptResult, |
| can_gc: CanGc, |
| ) { |
| assert!(self.suspended.get()); |
| self.suspended.set(false); |
| |
| self.script_input.swap_with(&self.network_input); |
| while let Some(chunk) = self.script_input.pop_front() { |
| self.network_input.push_back(chunk); |
| } |
| |
| let script_nesting_level = self.script_nesting_level.get(); |
| assert_eq!(script_nesting_level, 0); |
| |
| self.script_nesting_level.set(script_nesting_level + 1); |
| script.execute(result, can_gc); |
| self.script_nesting_level.set(script_nesting_level); |
| |
| if !self.suspended.get() && !self.aborted.get() { |
| self.parse_sync(can_gc); |
| } |
| } |
| |
| pub(crate) fn can_write(&self) -> bool { |
| self.script_created_parser || self.script_nesting_level.get() > 0 |
| } |
| |
| /// Steps 6-8 of <https://html.spec.whatwg.org/multipage/#document.write()> |
| pub(crate) fn write(&self, text: DOMString, can_gc: CanGc) { |
| assert!(self.can_write()); |
| |
| if self.document.has_pending_parsing_blocking_script() { |
| // There is already a pending parsing blocking script so the |
| // parser is suspended, we just append everything to the |
| // script input and abort these steps. |
| self.script_input.push_back(String::from(text).into()); |
| return; |
| } |
| |
| // There is no pending parsing blocking script, so all previous calls |
| // to document.write() should have seen their entire input tokenized |
| // and process, with nothing pushed to the parser script input. |
| assert!(self.script_input.is_empty()); |
| |
| let input = BufferQueue::default(); |
| input.push_back(String::from(text).into()); |
| |
| let profiler_chan = self |
| .document |
| .window() |
| .as_global_scope() |
| .time_profiler_chan() |
| .clone(); |
| let profiler_metadata = TimerMetadata { |
| url: self.document.url().as_str().into(), |
| iframe: TimerMetadataFrameType::RootWindow, |
| incremental: TimerMetadataReflowType::FirstReflow, |
| }; |
| self.tokenize( |
| |tokenizer| { |
| tokenizer.feed( |
| &input, |
| can_gc, |
| profiler_chan.clone(), |
| profiler_metadata.clone(), |
| ) |
| }, |
| can_gc, |
| ); |
| |
| if self.suspended.get() { |
| // Parser got suspended, insert remaining input at end of |
| // script input, following anything written by scripts executed |
| // reentrantly during this call. |
| while let Some(chunk) = input.pop_front() { |
| self.script_input.push_back(chunk); |
| } |
| return; |
| } |
| |
| assert!(input.is_empty()); |
| } |
| |
| // Steps 4-6 of https://html.spec.whatwg.org/multipage/#dom-document-close |
| pub(crate) fn close(&self, can_gc: CanGc) { |
| assert!(self.script_created_parser); |
| |
| // Step 4. |
| self.last_chunk_received.set(true); |
| |
| if self.suspended.get() { |
| // Step 5. |
| return; |
| } |
| |
| // Step 6. |
| self.parse_sync(can_gc); |
| } |
| |
| // https://html.spec.whatwg.org/multipage/#abort-a-parser |
| pub(crate) fn abort(&self, can_gc: CanGc) { |
| assert!(!self.aborted.get()); |
| self.aborted.set(true); |
| |
| // Step 1. |
| self.script_input.replace_with(BufferQueue::default()); |
| self.network_input.replace_with(BufferQueue::default()); |
| |
| // Step 2. |
| self.document |
| .set_ready_state(DocumentReadyState::Interactive, can_gc); |
| |
| // Step 3. |
| self.tokenizer.end(can_gc); |
| self.document.set_current_parser(None); |
| |
| // Step 4. |
| self.document |
| .set_ready_state(DocumentReadyState::Complete, can_gc); |
| } |
| |
| // https://html.spec.whatwg.org/multipage/#active-parser |
| pub(crate) fn is_active(&self) -> bool { |
| self.script_nesting_level() > 0 && !self.aborted.get() |
| } |
| |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn new_inherited(document: &Document, tokenizer: Tokenizer, kind: ParserKind) -> Self { |
| // Store the whole input for the devtools Sources panel, if the devtools server is running |
| // and we are parsing for a document load (not just things like innerHTML). |
| // TODO: check if a devtools client is actually connected and/or wants the sources? |
| let content_for_devtools = (document.global().devtools_chan().is_some() && |
| document.has_browsing_context()) |
| .then_some(DomRefCell::new(String::new())); |
| |
| ServoParser { |
| reflector: Reflector::new(), |
| document: Dom::from_ref(document), |
| bom_sniff: DomRefCell::new(Some(Vec::with_capacity(3))), |
| network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))), |
| network_input: BufferQueue::default(), |
| script_input: BufferQueue::default(), |
| tokenizer, |
| last_chunk_received: Cell::new(false), |
| suspended: Default::default(), |
| script_nesting_level: Default::default(), |
| aborted: Default::default(), |
| script_created_parser: kind == ParserKind::ScriptCreated, |
| prefetch_tokenizer: prefetch::Tokenizer::new(document), |
| prefetch_input: BufferQueue::default(), |
| content_for_devtools, |
| } |
| } |
| |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn new( |
| document: &Document, |
| tokenizer: Tokenizer, |
| kind: ParserKind, |
| can_gc: CanGc, |
| ) -> DomRoot<Self> { |
| reflect_dom_object( |
| Box::new(ServoParser::new_inherited(document, tokenizer, kind)), |
| document.window(), |
| can_gc, |
| ) |
| } |
| |
| fn push_tendril_input_chunk(&self, chunk: StrTendril) { |
| if let Some(mut content_for_devtools) = self |
| .content_for_devtools |
| .as_ref() |
| .map(|content| content.borrow_mut()) |
| { |
| // TODO: append these chunks more efficiently |
| content_for_devtools.push_str(chunk.as_ref()); |
| } |
| |
| if chunk.is_empty() { |
| return; |
| } |
| // Per https://github.com/whatwg/html/issues/1495 |
| // stylesheets should not be loaded for documents |
| // without browsing contexts. |
| // https://github.com/whatwg/html/issues/1495#issuecomment-230334047 |
| // suggests that no content should be preloaded in such a case. |
| // We're conservative, and only prefetch for documents |
| // with browsing contexts. |
| if self.document.browsing_context().is_some() { |
| // Push the chunk into the prefetch input stream, |
| // which is tokenized eagerly, to scan for resources |
| // to prefetch. If the user script uses `document.write()` |
| // to overwrite the network input, this prefetching may |
| // have been wasted, but in most cases it won't. |
| self.prefetch_input.push_back(chunk.clone()); |
| self.prefetch_tokenizer.feed(&self.prefetch_input); |
| } |
| // Push the chunk into the network input stream, |
| // which is tokenized lazily. |
| self.network_input.push_back(chunk); |
| } |
| |
| fn push_bytes_input_chunk(&self, chunk: Vec<u8>) { |
| // BOM sniff. This is needed because NetworkDecoder will switch the |
| // encoding based on the BOM, but it won't change |
| // `self.document.encoding` in the process. |
| { |
| let mut bom_sniff = self.bom_sniff.borrow_mut(); |
| if let Some(partial_bom) = bom_sniff.as_mut() { |
| if partial_bom.len() + chunk.len() >= 3 { |
| partial_bom.extend(chunk.iter().take(3 - partial_bom.len()).copied()); |
| if let Some((encoding, _)) = Encoding::for_bom(partial_bom) { |
| self.document.set_encoding(encoding); |
| } |
| drop(bom_sniff); |
| *self.bom_sniff.borrow_mut() = None; |
| } else { |
| partial_bom.extend(chunk.iter().copied()); |
| } |
| } |
| } |
| |
| // For byte input, we convert it to text using the network decoder. |
| let chunk = self |
| .network_decoder |
| .borrow_mut() |
| .as_mut() |
| .unwrap() |
| .decode(chunk); |
| self.push_tendril_input_chunk(chunk); |
| } |
| |
| fn push_string_input_chunk(&self, chunk: String) { |
| // If the input is a string, we don't have a BOM. |
| if self.bom_sniff.borrow().is_some() { |
| *self.bom_sniff.borrow_mut() = None; |
| } |
| |
| // The input has already been decoded as a string, so doesn't need |
| // to be decoded by the network decoder again. |
| let chunk = StrTendril::from(chunk); |
| self.push_tendril_input_chunk(chunk); |
| } |
| |
| fn parse_sync(&self, can_gc: CanGc) { |
| assert!(self.script_input.is_empty()); |
| |
| // This parser will continue to parse while there is either pending input or |
| // the parser remains unsuspended. |
| |
| if self.last_chunk_received.get() { |
| if let Some(decoder) = self.network_decoder.borrow_mut().take() { |
| let chunk = decoder.finish(); |
| if !chunk.is_empty() { |
| self.network_input.push_back(chunk); |
| } |
| } |
| } |
| |
| if self.aborted.get() { |
| return; |
| } |
| |
| let profiler_chan = self |
| .document |
| .window() |
| .as_global_scope() |
| .time_profiler_chan() |
| .clone(); |
| let profiler_metadata = TimerMetadata { |
| url: self.document.url().as_str().into(), |
| iframe: TimerMetadataFrameType::RootWindow, |
| incremental: TimerMetadataReflowType::FirstReflow, |
| }; |
| self.tokenize( |
| |tokenizer| { |
| tokenizer.feed( |
| &self.network_input, |
| can_gc, |
| profiler_chan.clone(), |
| profiler_metadata.clone(), |
| ) |
| }, |
| can_gc, |
| ); |
| |
| if self.suspended.get() { |
| return; |
| } |
| |
| assert!(self.network_input.is_empty()); |
| |
| if self.last_chunk_received.get() { |
| self.finish(can_gc); |
| } |
| } |
| |
| fn parse_complete_string_chunk(&self, input: String, can_gc: CanGc) { |
| self.document.set_current_parser(Some(self)); |
| self.push_string_input_chunk(input); |
| self.last_chunk_received.set(true); |
| if !self.suspended.get() { |
| self.parse_sync(can_gc); |
| } |
| } |
| |
| fn parse_bytes_chunk(&self, input: Vec<u8>, can_gc: CanGc) { |
| let _realm = enter_realm(&*self.document); |
| self.document.set_current_parser(Some(self)); |
| self.push_bytes_input_chunk(input); |
| if !self.suspended.get() { |
| self.parse_sync(can_gc); |
| } |
| } |
| |
| fn tokenize<F>(&self, feed: F, can_gc: CanGc) |
| where |
| F: Fn(&Tokenizer) -> TokenizerResult<DomRoot<HTMLScriptElement>>, |
| { |
| loop { |
| assert!(!self.suspended.get()); |
| assert!(!self.aborted.get()); |
| |
| self.document.window().reflow_if_reflow_timer_expired(); |
| let script = match feed(&self.tokenizer) { |
| TokenizerResult::Done => return, |
| TokenizerResult::Script(script) => script, |
| }; |
| |
| // https://html.spec.whatwg.org/multipage/#parsing-main-incdata |
| // branch "An end tag whose tag name is "script" |
| // The spec says to perform the microtask checkpoint before |
| // setting the insertion mode back from Text, but this is not |
| // possible with the way servo and html5ever currently |
| // relate to each other, and hopefully it is not observable. |
| if is_execution_stack_empty() { |
| self.document |
| .window() |
| .as_global_scope() |
| .perform_a_microtask_checkpoint(can_gc); |
| } |
| |
| let script_nesting_level = self.script_nesting_level.get(); |
| |
| self.script_nesting_level.set(script_nesting_level + 1); |
| script.set_initial_script_text(); |
| let introduction_type_override = |
| (script_nesting_level > 0).then_some(IntroductionType::INJECTED_SCRIPT); |
| script.prepare(introduction_type_override, can_gc); |
| self.script_nesting_level.set(script_nesting_level); |
| |
| if self.document.has_pending_parsing_blocking_script() { |
| self.suspended.set(true); |
| return; |
| } |
| if self.aborted.get() { |
| return; |
| } |
| } |
| } |
| |
| // https://html.spec.whatwg.org/multipage/#the-end |
| fn finish(&self, can_gc: CanGc) { |
| assert!(!self.suspended.get()); |
| assert!(self.last_chunk_received.get()); |
| assert!(self.script_input.is_empty()); |
| assert!(self.network_input.is_empty()); |
| assert!(self.network_decoder.borrow().is_none()); |
| |
| // Step 1. |
| self.document |
| .set_ready_state(DocumentReadyState::Interactive, can_gc); |
| |
| // Step 2. |
| self.tokenizer.end(can_gc); |
| self.document.set_current_parser(None); |
| |
| // Steps 3-12 are in another castle, namely finish_load. |
| let url = self.tokenizer.url().clone(); |
| self.document.finish_load(LoadType::PageSource(url), can_gc); |
| |
| // Send the source contents to devtools, if needed. |
| if let Some(content_for_devtools) = self |
| .content_for_devtools |
| .as_ref() |
| .map(|content| content.take()) |
| { |
| let global = self.document.global(); |
| let chan = global.devtools_chan().expect("Guaranteed by new"); |
| let pipeline_id = self.document.global().pipeline_id(); |
| let _ = chan.send(ScriptToDevtoolsControlMsg::UpdateSourceContent( |
| pipeline_id, |
| content_for_devtools, |
| )); |
| } |
| } |
| } |
| |
| struct FragmentParsingResult<I> |
| where |
| I: Iterator<Item = DomRoot<Node>>, |
| { |
| inner: I, |
| } |
| |
| impl<I> Iterator for FragmentParsingResult<I> |
| where |
| I: Iterator<Item = DomRoot<Node>>, |
| { |
| type Item = DomRoot<Node>; |
| |
| fn next(&mut self) -> Option<DomRoot<Node>> { |
| let next = self.inner.next()?; |
| next.remove_self(CanGc::note()); |
| Some(next) |
| } |
| |
| fn size_hint(&self) -> (usize, Option<usize>) { |
| self.inner.size_hint() |
| } |
| } |
| |
| #[derive(JSTraceable, MallocSizeOf, PartialEq)] |
| enum ParserKind { |
| Normal, |
| ScriptCreated, |
| } |
| |
| #[derive(JSTraceable, MallocSizeOf)] |
| #[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)] |
| enum Tokenizer { |
| Html(self::html::Tokenizer), |
| AsyncHtml(self::async_html::Tokenizer), |
| Xml(self::xml::Tokenizer), |
| } |
| |
| impl Tokenizer { |
| fn feed( |
| &self, |
| input: &BufferQueue, |
| can_gc: CanGc, |
| profiler_chan: ProfilerChan, |
| profiler_metadata: TimerMetadata, |
| ) -> TokenizerResult<DomRoot<HTMLScriptElement>> { |
| match *self { |
| Tokenizer::Html(ref tokenizer) => time_profile!( |
| ProfilerCategory::ScriptParseHTML, |
| Some(profiler_metadata), |
| profiler_chan, |
| || tokenizer.feed(input), |
| ), |
| Tokenizer::AsyncHtml(ref tokenizer) => time_profile!( |
| ProfilerCategory::ScriptParseHTML, |
| Some(profiler_metadata), |
| profiler_chan, |
| || tokenizer.feed(input, can_gc), |
| ), |
| Tokenizer::Xml(ref tokenizer) => time_profile!( |
| ProfilerCategory::ScriptParseXML, |
| Some(profiler_metadata), |
| profiler_chan, |
| || tokenizer.feed(input), |
| ), |
| } |
| } |
| |
| fn end(&self, can_gc: CanGc) { |
| match *self { |
| Tokenizer::Html(ref tokenizer) => tokenizer.end(), |
| Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(can_gc), |
| Tokenizer::Xml(ref tokenizer) => tokenizer.end(), |
| } |
| } |
| |
| fn url(&self) -> &ServoUrl { |
| match *self { |
| Tokenizer::Html(ref tokenizer) => tokenizer.url(), |
| Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(), |
| Tokenizer::Xml(ref tokenizer) => tokenizer.url(), |
| } |
| } |
| |
| fn set_plaintext_state(&self) { |
| match *self { |
| Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(), |
| Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(), |
| Tokenizer::Xml(_) => unimplemented!(), |
| } |
| } |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#navigation-params> |
| /// This does not have the relevant fields, but mimics the intent |
| /// of the struct when used in loading document spec algorithms. |
| struct NavigationParams { |
| /// <https://html.spec.whatwg.org/multipage/#navigation-params-policy-container> |
| policy_container: PolicyContainer, |
| /// content-type of this document, if known. Otherwise need to sniff it |
| content_type: Option<Mime>, |
| /// <https://html.spec.whatwg.org/multipage/#navigation-params-sandboxing> |
| final_sandboxing_flag_set: SandboxingFlagSet, |
| /// <https://mimesniff.spec.whatwg.org/#resource-header> |
| resource_header: Vec<u8>, |
| } |
| |
| /// The context required for asynchronously fetching a document |
| /// and parsing it progressively. |
| pub(crate) struct ParserContext { |
| /// The parser that initiated the request. |
| parser: Option<Trusted<ServoParser>>, |
| /// Is this a synthesized document |
| is_synthesized_document: bool, |
| /// Has a document already been loaded (relevant for checking the resource header) |
| has_loaded_document: bool, |
| /// The pipeline associated with this document. |
| id: PipelineId, |
| /// The URL for this document. |
| url: ServoUrl, |
| /// timing data for this resource |
| resource_timing: ResourceFetchTiming, |
| /// pushed entry index |
| pushed_entry_index: Option<usize>, |
| /// params required in document load algorithms |
| navigation_params: NavigationParams, |
| } |
| |
| impl ParserContext { |
| pub(crate) fn new(id: PipelineId, url: ServoUrl) -> ParserContext { |
| ParserContext { |
| parser: None, |
| is_synthesized_document: false, |
| has_loaded_document: false, |
| id, |
| url, |
| resource_timing: ResourceFetchTiming::new(ResourceTimingType::Navigation), |
| pushed_entry_index: None, |
| navigation_params: NavigationParams { |
| policy_container: Default::default(), |
| content_type: None, |
| final_sandboxing_flag_set: SandboxingFlagSet::empty(), |
| resource_header: vec![], |
| }, |
| } |
| } |
| |
| pub(crate) fn set_policy_container(&mut self, policy_container: Option<&PolicyContainer>) { |
| let Some(policy_container) = policy_container else { |
| return; |
| }; |
| self.navigation_params.policy_container = policy_container.clone(); |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#creating-a-policy-container-from-a-fetch-response> |
| fn create_policy_container_from_fetch_response(metadata: &Metadata) -> PolicyContainer { |
| // Step 1. If response's URL's scheme is "blob", then return a clone of response's URL's blob URL entry's environment's policy container. |
| // TODO |
| // Step 2. Let result be a new policy container. |
| // Step 7. Return result. |
| PolicyContainer { |
| // Step 3. Set result's CSP list to the result of parsing a response's Content Security Policies given response. |
| csp_list: parse_csp_list_from_metadata(&metadata.headers), |
| // Step 5. Set result's referrer policy to the result of parsing the `Referrer-Policy` header given response. [REFERRERPOLICY] |
| referrer_policy: ReferrerPolicy::parse_header_for_response(&metadata.headers), |
| } |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#initialise-the-document-object> |
| fn initialize_document_object(&self, document: &Document) { |
| // Step 9. Let document be a new Document, with |
| document.set_policy_container(self.navigation_params.policy_container.clone()); |
| document.set_active_sandboxing_flag_set(self.navigation_params.final_sandboxing_flag_set); |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#loading-a-document> |
| fn load_document(&mut self, can_gc: CanGc) { |
| assert!(!self.has_loaded_document); |
| self.has_loaded_document = true; |
| let Some(ref parser) = self.parser.as_ref().map(|p| p.root()) else { |
| return; |
| }; |
| // Step 1. Let type be the computed type of navigationParams's response. |
| let content_type = &self.navigation_params.content_type; |
| let mime_type = MimeClassifier::default().classify( |
| LoadContext::Browsing, |
| NoSniffFlag::Off, |
| ApacheBugFlag::from_content_type(content_type.as_ref()), |
| content_type, |
| &self.navigation_params.resource_header, |
| ); |
| // Step 2. If the user agent has been configured to process resources of the given type using |
| // some mechanism other than rendering the content in a navigable, then skip this step. |
| // Otherwise, if the type is one of the following types: |
| let Some(media_type) = MimeClassifier::get_media_type(&mime_type) else { |
| let page = format!( |
| "<html><body><p>Unknown content type ({}).</p></body></html>", |
| &mime_type, |
| ); |
| self.load_inline_unknown_content(parser, page); |
| return; |
| }; |
| match media_type { |
| // Return the result of loading an HTML document, given navigationParams. |
| MediaType::Html => self.load_html_document(parser), |
| // Return the result of loading an XML document given navigationParams and type. |
| MediaType::Xml => self.load_xml_document(parser), |
| // Return the result of loading a text document given navigationParams and type. |
| MediaType::JavaScript | MediaType::Json | MediaType::Text | MediaType::Css => { |
| self.load_text_document(parser) |
| }, |
| // Return the result of loading a media document given navigationParams and type. |
| MediaType::Image | MediaType::AudioVideo => { |
| self.load_media_document(parser, media_type, &mime_type) |
| }, |
| MediaType::Font => { |
| let page = format!( |
| "<html><body><p>Unable to load font with content type ({}).</p></body></html>", |
| &mime_type, |
| ); |
| self.load_inline_unknown_content(parser, page); |
| return; |
| }, |
| }; |
| |
| parser.parse_bytes_chunk( |
| std::mem::take(&mut self.navigation_params.resource_header), |
| can_gc, |
| ); |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#navigate-html> |
| fn load_html_document(&self, parser: &ServoParser) { |
| // Step 1. Let document be the result of creating and initializing a |
| // Document object given "html", "text/html", and navigationParams. |
| self.initialize_document_object(&parser.document); |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#read-xml> |
| fn load_xml_document(&self, parser: &ServoParser) { |
| // When faced with displaying an XML file inline, provided navigation params navigationParams |
| // and a string type, user agents must follow the requirements defined in XML and Namespaces in XML, |
| // XML Media Types, DOM, and other relevant specifications to create and initialize a |
| // Document object document, given "xml", type, and navigationParams, and return that Document. |
| // They must also create a corresponding XML parser. [XML] [XMLNS] [RFC7303] [DOM] |
| self.initialize_document_object(&parser.document); |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#navigate-text> |
| fn load_text_document(&self, parser: &ServoParser) { |
| // Step 4. Create an HTML parser and associate it with the document. |
| // Act as if the tokenizer had emitted a start tag token with the tag name "pre" followed by |
| // a single U+000A LINE FEED (LF) character, and switch the HTML parser's tokenizer to the PLAINTEXT state. |
| // Each task that the networking task source places on the task queue while fetching runs must then |
| // fill the parser's input byte stream with the fetched bytes and cause the HTML parser to perform |
| // the appropriate processing of the input stream. |
| let page = "<pre>\n".into(); |
| parser.push_string_input_chunk(page); |
| parser.parse_sync(CanGc::note()); |
| parser.tokenizer.set_plaintext_state(); |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#navigate-media> |
| fn load_media_document( |
| &mut self, |
| parser: &ServoParser, |
| media_type: MediaType, |
| mime_type: &Mime, |
| ) { |
| // Step 8. Act as if the user agent had stopped parsing document. |
| self.is_synthesized_document = true; |
| // Step 3. Populate with html/head/body given document. |
| let page = "<html><body></body></html>".into(); |
| parser.push_string_input_chunk(page); |
| parser.parse_sync(CanGc::note()); |
| |
| let doc = &parser.document; |
| // Step 5. Set the appropriate attribute of the element host element, as described below, |
| // to the address of the image, video, or audio resource. |
| let node = if media_type == MediaType::Image { |
| let img = HTMLImageElement::new( |
| local_name!("img"), |
| None, |
| doc, |
| None, |
| ElementCreator::ParserCreated(1), |
| CanGc::note(), |
| ); |
| img.SetSrc(USVString(self.url.to_string())); |
| DomRoot::upcast::<Node>(img) |
| } else if mime_type.type_() == mime::AUDIO { |
| let audio = HTMLAudioElement::new(local_name!("audio"), None, doc, None, CanGc::note()); |
| audio |
| .upcast::<HTMLMediaElement>() |
| .SetSrc(USVString(self.url.to_string())); |
| DomRoot::upcast::<Node>(audio) |
| } else { |
| let video = HTMLVideoElement::new(local_name!("video"), None, doc, None, CanGc::note()); |
| video |
| .upcast::<HTMLMediaElement>() |
| .SetSrc(USVString(self.url.to_string())); |
| DomRoot::upcast::<Node>(video) |
| }; |
| // Step 4. Append an element host element for the media, as described below, to the body element. |
| let doc_body = DomRoot::upcast::<Node>(doc.GetBody().unwrap()); |
| doc_body |
| .AppendChild(&node, CanGc::note()) |
| .expect("Appending failed"); |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#read-ua-inline> |
| fn load_inline_unknown_content(&mut self, parser: &ServoParser, page: String) { |
| self.is_synthesized_document = true; |
| parser.push_string_input_chunk(page); |
| parser.parse_sync(CanGc::note()); |
| } |
| } |
| |
| impl FetchResponseListener for ParserContext { |
| fn process_request_body(&mut self, _: RequestId) {} |
| |
| fn process_request_eof(&mut self, _: RequestId) {} |
| |
| fn process_response(&mut self, _: RequestId, meta_result: Result<FetchMetadata, NetworkError>) { |
| let (metadata, error) = match meta_result { |
| Ok(meta) => ( |
| Some(match meta { |
| FetchMetadata::Unfiltered(m) => m, |
| FetchMetadata::Filtered { unsafe_, .. } => unsafe_, |
| }), |
| None, |
| ), |
| Err(error) => ( |
| // Check variant without moving |
| match &error { |
| NetworkError::SslValidation(..) | |
| NetworkError::Internal(..) | |
| NetworkError::Crash(..) => { |
| let mut meta = Metadata::default(self.url.clone()); |
| let mime: Option<Mime> = "text/html".parse().ok(); |
| meta.set_content_type(mime.as_ref()); |
| Some(meta) |
| }, |
| _ => None, |
| }, |
| Some(error), |
| ), |
| }; |
| let content_type: Option<Mime> = metadata |
| .clone() |
| .and_then(|meta| meta.content_type) |
| .map(Serde::into_inner) |
| .map(Into::into); |
| |
| let (policy_container, endpoints_list) = match metadata.as_ref() { |
| None => (PolicyContainer::default(), None), |
| Some(metadata) => ( |
| Self::create_policy_container_from_fetch_response(metadata), |
| ReportingEndpoint::parse_reporting_endpoints_header( |
| &self.url.clone(), |
| &metadata.headers, |
| ), |
| ), |
| }; |
| |
| let parser = match ScriptThread::page_headers_available(&self.id, metadata, CanGc::note()) { |
| Some(parser) => parser, |
| None => return, |
| }; |
| if parser.aborted.get() { |
| return; |
| } |
| |
| let _realm = enter_realm(&*parser.document); |
| |
| // From Step 23.8.3 of https://html.spec.whatwg.org/multipage/#navigate |
| // Let finalSandboxFlags be the union of targetSnapshotParams's sandboxing flags and |
| // policyContainer's CSP list's CSP-derived sandboxing flags. |
| // TODO: implement targetSnapshotParam's sandboxing flags |
| let final_sandboxing_flag_set = policy_container |
| .csp_list |
| .as_ref() |
| .and_then(|csp| csp.get_sandboxing_flag_set_for_document()) |
| .unwrap_or(SandboxingFlagSet::empty()); |
| |
| if let Some(endpoints) = endpoints_list { |
| parser.document.window().set_endpoints_list(endpoints); |
| } |
| self.parser = Some(Trusted::new(&*parser)); |
| self.navigation_params = NavigationParams { |
| policy_container, |
| content_type, |
| final_sandboxing_flag_set, |
| resource_header: vec![], |
| }; |
| self.submit_resource_timing(); |
| |
| // Part of https://html.spec.whatwg.org/multipage/#loading-a-document |
| // |
| // Step 3. If, given type, the new resource is to be handled by displaying some sort of inline content, |
| // e.g., a native rendering of the content or an error message because the specified type is not supported, |
| // then return the result of creating a document for inline content that doesn't have a DOM given |
| // navigationParams's navigable, navigationParams's id, navigationParams's navigation timing type, |
| // and navigationParams's user involvement. |
| if let Some(error) = error { |
| let page = match error { |
| NetworkError::SslValidation(reason, bytes) => { |
| let page = resources::read_string(Resource::BadCertHTML); |
| let page = page.replace("${reason}", &reason); |
| let encoded_bytes = general_purpose::STANDARD_NO_PAD.encode(bytes); |
| let page = page.replace("${bytes}", encoded_bytes.as_str()); |
| page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string()) |
| }, |
| NetworkError::Internal(reason) => { |
| let page = resources::read_string(Resource::NetErrorHTML); |
| page.replace("${reason}", &reason) |
| }, |
| NetworkError::Crash(details) => { |
| let page = resources::read_string(Resource::CrashHTML); |
| page.replace("${details}", &details) |
| }, |
| NetworkError::LoadCancelled => { |
| // The next load will show a page |
| return; |
| }, |
| }; |
| self.load_inline_unknown_content(&parser, page); |
| } |
| } |
| |
| fn process_response_chunk(&mut self, _: RequestId, payload: Vec<u8>) { |
| if self.is_synthesized_document { |
| return; |
| } |
| let Some(parser) = self.parser.as_ref().map(|p| p.root()) else { |
| return; |
| }; |
| if parser.aborted.get() { |
| return; |
| } |
| if !self.has_loaded_document { |
| // https://mimesniff.spec.whatwg.org/#read-the-resource-header |
| self.navigation_params |
| .resource_header |
| .extend_from_slice(&payload); |
| // the number of bytes in buffer is greater than or equal to 1445. |
| if self.navigation_params.resource_header.len() >= 1445 { |
| self.load_document(CanGc::note()); |
| } |
| } else { |
| parser.parse_bytes_chunk(payload, CanGc::note()); |
| } |
| } |
| |
| // This method is called via script_thread::handle_fetch_eof, so we must call |
| // submit_resource_timing in this function |
| // Resource listeners are called via net_traits::Action::process, which handles submission for them |
| fn process_response_eof( |
| &mut self, |
| _: RequestId, |
| status: Result<ResourceFetchTiming, NetworkError>, |
| ) { |
| let parser = match self.parser.as_ref() { |
| Some(parser) => parser.root(), |
| None => return, |
| }; |
| if parser.aborted.get() { |
| return; |
| } |
| |
| match status { |
| // are we throwing this away or can we use it? |
| Ok(_) => (), |
| // TODO(Savago): we should send a notification to callers #5463. |
| Err(err) => debug!("Failed to load page URL {}, error: {:?}", self.url, err), |
| } |
| |
| // https://mimesniff.spec.whatwg.org/#read-the-resource-header |
| // |
| // the end of the resource is reached. |
| if !self.has_loaded_document { |
| self.load_document(CanGc::note()); |
| } |
| |
| let _realm = enter_realm(&*parser); |
| |
| parser |
| .document |
| .set_redirect_count(self.resource_timing.redirect_count); |
| |
| parser.last_chunk_received.set(true); |
| if !parser.suspended.get() { |
| parser.parse_sync(CanGc::note()); |
| } |
| |
| // TODO: Only update if this is the current document resource. |
| // TODO(mrobinson): Pass a proper fetch_start parameter here instead of `CrossProcessInstant::now()`. |
| if let Some(pushed_index) = self.pushed_entry_index { |
| let document = &parser.document; |
| let performance_entry = PerformanceNavigationTiming::new( |
| &document.global(), |
| CrossProcessInstant::now(), |
| document, |
| CanGc::note(), |
| ); |
| document |
| .global() |
| .performance() |
| .update_entry(pushed_index, performance_entry.upcast::<PerformanceEntry>()); |
| } |
| } |
| |
| fn resource_timing_mut(&mut self) -> &mut ResourceFetchTiming { |
| &mut self.resource_timing |
| } |
| |
| fn resource_timing(&self) -> &ResourceFetchTiming { |
| &self.resource_timing |
| } |
| |
| // store a PerformanceNavigationTiming entry in the globalscope's Performance buffer |
| fn submit_resource_timing(&mut self) { |
| let parser = match self.parser.as_ref() { |
| Some(parser) => parser.root(), |
| None => return, |
| }; |
| if parser.aborted.get() { |
| return; |
| } |
| |
| let document = &parser.document; |
| |
| // TODO: Pass a proper fetch start time here. |
| let performance_entry = PerformanceNavigationTiming::new( |
| &document.global(), |
| CrossProcessInstant::now(), |
| document, |
| CanGc::note(), |
| ); |
| self.pushed_entry_index = document.global().performance().queue_entry( |
| performance_entry.upcast::<PerformanceEntry>(), |
| CanGc::note(), |
| ); |
| } |
| |
| fn process_csp_violations(&mut self, _request_id: RequestId, violations: Vec<Violation>) { |
| let parser = match self.parser.as_ref() { |
| Some(parser) => parser.root(), |
| None => return, |
| }; |
| let document = &parser.document; |
| let global = &document.global(); |
| // TODO(https://github.com/w3c/webappsec-csp/issues/687): Update after spec is resolved |
| global.report_csp_violations(violations, None, None); |
| } |
| } |
| |
| impl PreInvoke for ParserContext {} |
| |
| pub(crate) struct FragmentContext<'a> { |
| pub(crate) context_elem: &'a Node, |
| pub(crate) form_elem: Option<&'a Node>, |
| pub(crate) context_element_allows_scripting: bool, |
| } |
| |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn insert( |
| parent: &Node, |
| reference_child: Option<&Node>, |
| child: NodeOrText<Dom<Node>>, |
| parsing_algorithm: ParsingAlgorithm, |
| custom_element_reaction_stack: &CustomElementReactionStack, |
| can_gc: CanGc, |
| ) { |
| match child { |
| NodeOrText::AppendNode(n) => { |
| // https://html.spec.whatwg.org/multipage/#insert-a-foreign-element |
| // applies if this is an element; if not, it may be |
| // https://html.spec.whatwg.org/multipage/#insert-a-comment |
| let element_in_non_fragment = |
| parsing_algorithm != ParsingAlgorithm::Fragment && n.is::<Element>(); |
| if element_in_non_fragment { |
| custom_element_reaction_stack.push_new_element_queue(); |
| } |
| parent.InsertBefore(&n, reference_child, can_gc).unwrap(); |
| if element_in_non_fragment { |
| custom_element_reaction_stack.pop_current_element_queue(can_gc); |
| } |
| }, |
| NodeOrText::AppendText(t) => { |
| // https://html.spec.whatwg.org/multipage/#insert-a-character |
| let text = reference_child |
| .and_then(Node::GetPreviousSibling) |
| .or_else(|| parent.GetLastChild()) |
| .and_then(DomRoot::downcast::<Text>); |
| |
| if let Some(text) = text { |
| text.upcast::<CharacterData>().append_data(&t); |
| } else { |
| let text = Text::new(String::from(t).into(), &parent.owner_doc(), can_gc); |
| parent |
| .InsertBefore(text.upcast(), reference_child, can_gc) |
| .unwrap(); |
| } |
| }, |
| } |
| } |
| |
| #[derive(JSTraceable, MallocSizeOf)] |
| #[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)] |
| pub(crate) struct Sink { |
| #[no_trace] |
| base_url: ServoUrl, |
| document: Dom<Document>, |
| current_line: Cell<u64>, |
| script: MutNullableDom<HTMLScriptElement>, |
| parsing_algorithm: ParsingAlgorithm, |
| #[conditional_malloc_size_of] |
| custom_element_reaction_stack: Rc<CustomElementReactionStack>, |
| } |
| |
| impl Sink { |
| fn same_tree(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool { |
| let x = x.downcast::<Element>().expect("Element node expected"); |
| let y = y.downcast::<Element>().expect("Element node expected"); |
| |
| x.is_in_same_home_subtree(y) |
| } |
| |
| fn has_parent_node(&self, node: &Dom<Node>) -> bool { |
| node.GetParentNode().is_some() |
| } |
| } |
| |
| impl TreeSink for Sink { |
| type Output = Self; |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn finish(self) -> Self { |
| self |
| } |
| |
| type Handle = Dom<Node>; |
| type ElemName<'a> |
| = ExpandedName<'a> |
| where |
| Self: 'a; |
| |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn get_document(&self) -> Dom<Node> { |
| Dom::from_ref(self.document.upcast()) |
| } |
| |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn get_template_contents(&self, target: &Dom<Node>) -> Dom<Node> { |
| let template = target |
| .downcast::<HTMLTemplateElement>() |
| .expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing"); |
| Dom::from_ref(template.Content(CanGc::note()).upcast()) |
| } |
| |
| fn same_node(&self, x: &Dom<Node>, y: &Dom<Node>) -> bool { |
| x == y |
| } |
| |
| fn elem_name<'a>(&self, target: &'a Dom<Node>) -> ExpandedName<'a> { |
| let elem = target |
| .downcast::<Element>() |
| .expect("tried to get name of non-Element in HTML parsing"); |
| ExpandedName { |
| ns: elem.namespace(), |
| local: elem.local_name(), |
| } |
| } |
| |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn create_element( |
| &self, |
| name: QualName, |
| attrs: Vec<Attribute>, |
| flags: ElementFlags, |
| ) -> Dom<Node> { |
| let attrs = attrs |
| .into_iter() |
| .map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value)))) |
| .collect(); |
| let parsing_algorithm = if flags.template { |
| ParsingAlgorithm::Fragment |
| } else { |
| self.parsing_algorithm |
| }; |
| let element = create_element_for_token( |
| name, |
| attrs, |
| &self.document, |
| ElementCreator::ParserCreated(self.current_line.get()), |
| parsing_algorithm, |
| &self.custom_element_reaction_stack, |
| CanGc::note(), |
| ); |
| Dom::from_ref(element.upcast()) |
| } |
| |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn create_comment(&self, text: StrTendril) -> Dom<Node> { |
| let comment = Comment::new( |
| DOMString::from(String::from(text)), |
| &self.document, |
| None, |
| CanGc::note(), |
| ); |
| Dom::from_ref(comment.upcast()) |
| } |
| |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom<Node> { |
| let doc = &*self.document; |
| let pi = ProcessingInstruction::new( |
| DOMString::from(String::from(target)), |
| DOMString::from(String::from(data)), |
| doc, |
| CanGc::note(), |
| ); |
| Dom::from_ref(pi.upcast()) |
| } |
| |
| fn associate_with_form( |
| &self, |
| target: &Dom<Node>, |
| form: &Dom<Node>, |
| nodes: (&Dom<Node>, Option<&Dom<Node>>), |
| ) { |
| let (element, prev_element) = nodes; |
| let tree_node = prev_element.map_or(element, |prev| { |
| if self.has_parent_node(element) { |
| element |
| } else { |
| prev |
| } |
| }); |
| if !self.same_tree(tree_node, form) { |
| return; |
| } |
| |
| let node = target; |
| let form = DomRoot::downcast::<HTMLFormElement>(DomRoot::from_ref(&**form)) |
| .expect("Owner must be a form element"); |
| |
| let elem = node.downcast::<Element>(); |
| let control = elem.and_then(|e| e.as_maybe_form_control()); |
| |
| if let Some(control) = control { |
| control.set_form_owner_from_parser(&form, CanGc::note()); |
| } |
| } |
| |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn append_before_sibling(&self, sibling: &Dom<Node>, new_node: NodeOrText<Dom<Node>>) { |
| let parent = sibling |
| .GetParentNode() |
| .expect("append_before_sibling called on node without parent"); |
| |
| insert( |
| &parent, |
| Some(sibling), |
| new_node, |
| self.parsing_algorithm, |
| &self.custom_element_reaction_stack, |
| CanGc::note(), |
| ); |
| } |
| |
| fn parse_error(&self, msg: Cow<'static, str>) { |
| debug!("Parse error: {}", msg); |
| } |
| |
| fn set_quirks_mode(&self, mode: QuirksMode) { |
| let mode = match mode { |
| QuirksMode::Quirks => ServoQuirksMode::Quirks, |
| QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks, |
| QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks, |
| }; |
| self.document.set_quirks_mode(mode); |
| } |
| |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn append(&self, parent: &Dom<Node>, child: NodeOrText<Dom<Node>>) { |
| insert( |
| parent, |
| None, |
| child, |
| self.parsing_algorithm, |
| &self.custom_element_reaction_stack, |
| CanGc::note(), |
| ); |
| } |
| |
| #[cfg_attr(crown, allow(crown::unrooted_must_root))] |
| fn append_based_on_parent_node( |
| &self, |
| elem: &Dom<Node>, |
| prev_elem: &Dom<Node>, |
| child: NodeOrText<Dom<Node>>, |
| ) { |
| if self.has_parent_node(elem) { |
| self.append_before_sibling(elem, child); |
| } else { |
| self.append(prev_elem, child); |
| } |
| } |
| |
| fn append_doctype_to_document( |
| &self, |
| name: StrTendril, |
| public_id: StrTendril, |
| system_id: StrTendril, |
| ) { |
| let doc = &*self.document; |
| let doctype = DocumentType::new( |
| DOMString::from(String::from(name)), |
| Some(DOMString::from(String::from(public_id))), |
| Some(DOMString::from(String::from(system_id))), |
| doc, |
| CanGc::note(), |
| ); |
| doc.upcast::<Node>() |
| .AppendChild(doctype.upcast(), CanGc::note()) |
| .expect("Appending failed"); |
| } |
| |
| fn add_attrs_if_missing(&self, target: &Dom<Node>, attrs: Vec<Attribute>) { |
| let elem = target |
| .downcast::<Element>() |
| .expect("tried to set attrs on non-Element in HTML parsing"); |
| for attr in attrs { |
| elem.set_attribute_from_parser( |
| attr.name, |
| DOMString::from(String::from(attr.value)), |
| None, |
| CanGc::note(), |
| ); |
| } |
| } |
| |
| fn remove_from_parent(&self, target: &Dom<Node>) { |
| if let Some(ref parent) = target.GetParentNode() { |
| parent.RemoveChild(target, CanGc::note()).unwrap(); |
| } |
| } |
| |
| fn mark_script_already_started(&self, node: &Dom<Node>) { |
| let script = node.downcast::<HTMLScriptElement>(); |
| if let Some(script) = script { |
| script.set_already_started(true) |
| } |
| } |
| |
| fn reparent_children(&self, node: &Dom<Node>, new_parent: &Dom<Node>) { |
| while let Some(ref child) = node.GetFirstChild() { |
| new_parent.AppendChild(child, CanGc::note()).unwrap(); |
| } |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#html-integration-point> |
| /// Specifically, the `<annotation-xml>` cases. |
| fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom<Node>) -> bool { |
| let elem = handle.downcast::<Element>().unwrap(); |
| elem.get_attribute(&ns!(), &local_name!("encoding")) |
| .is_some_and(|attr| { |
| attr.value().eq_ignore_ascii_case("text/html") || |
| attr.value().eq_ignore_ascii_case("application/xhtml+xml") |
| }) |
| } |
| |
| fn set_current_line(&self, line_number: u64) { |
| self.current_line.set(line_number); |
| } |
| |
| fn pop(&self, node: &Dom<Node>) { |
| let node = DomRoot::from_ref(&**node); |
| vtable_for(&node).pop(); |
| } |
| |
| fn allow_declarative_shadow_roots(&self, intended_parent: &Dom<Node>) -> bool { |
| intended_parent.owner_doc().allow_declarative_shadow_roots() |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#parsing-main-inhead> |
| /// A start tag whose tag name is "template" |
| /// Attach shadow path |
| fn attach_declarative_shadow( |
| &self, |
| host: &Dom<Node>, |
| template: &Dom<Node>, |
| attributes: &[Attribute], |
| ) -> bool { |
| attach_declarative_shadow_inner(host, template, attributes) |
| } |
| } |
| |
| /// <https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token> |
| fn create_element_for_token( |
| name: QualName, |
| attrs: Vec<ElementAttribute>, |
| document: &Document, |
| creator: ElementCreator, |
| parsing_algorithm: ParsingAlgorithm, |
| custom_element_reaction_stack: &CustomElementReactionStack, |
| can_gc: CanGc, |
| ) -> DomRoot<Element> { |
| // Step 3. |
| let is = attrs |
| .iter() |
| .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is")) |
| .map(|attr| LocalName::from(&*attr.value)); |
| |
| // Step 4. |
| let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref()); |
| |
| // Step 5. |
| let will_execute_script = |
| definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment; |
| |
| // Step 6. |
| if will_execute_script { |
| // Step 6.1. |
| document.increment_throw_on_dynamic_markup_insertion_counter(); |
| // Step 6.2 |
| if is_execution_stack_empty() { |
| document |
| .window() |
| .as_global_scope() |
| .perform_a_microtask_checkpoint(can_gc); |
| } |
| // Step 6.3 |
| custom_element_reaction_stack.push_new_element_queue() |
| } |
| |
| // Step 7. |
| let creation_mode = if will_execute_script { |
| CustomElementCreationMode::Synchronous |
| } else { |
| CustomElementCreationMode::Asynchronous |
| }; |
| |
| let element = Element::create(name, is, document, creator, creation_mode, None, can_gc); |
| |
| // https://html.spec.whatwg.org/multipage#the-input-element:value-sanitization-algorithm-3 |
| // says to invoke sanitization "when an input element is first created"; |
| // however, since sanitization requires content attributes to function, |
| // it can't mean that literally. |
| // Indeed, to make sanitization work correctly, we need to _not_ sanitize |
| // until after all content attributes have been added |
| |
| let maybe_input = element.downcast::<HTMLInputElement>(); |
| if let Some(input) = maybe_input { |
| input.disable_sanitization(); |
| } |
| |
| // Step 8 |
| for attr in attrs { |
| element.set_attribute_from_parser(attr.name, attr.value, None, can_gc); |
| } |
| |
| // _now_ we can sanitize (and we sanitize now even if the "value" |
| // attribute isn't present!) |
| if let Some(input) = maybe_input { |
| input.enable_sanitization(); |
| } |
| |
| // Step 9. |
| if will_execute_script { |
| // Steps 9.1 - 9.2. |
| custom_element_reaction_stack.pop_current_element_queue(can_gc); |
| // Step 9.3. |
| document.decrement_throw_on_dynamic_markup_insertion_counter(); |
| } |
| |
| // TODO: Step 10. |
| // TODO: Step 11. |
| |
| // Step 12 is handled in `associate_with_form`. |
| |
| // Step 13. |
| element |
| } |
| |
| #[derive(JSTraceable, MallocSizeOf)] |
| struct NetworkDecoder { |
| #[ignore_malloc_size_of = "Defined in tendril"] |
| #[custom_trace] |
| decoder: LossyDecoder<NetworkSink>, |
| } |
| |
| impl NetworkDecoder { |
| fn new(encoding: &'static Encoding) -> Self { |
| Self { |
| decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()), |
| } |
| } |
| |
| fn decode(&mut self, chunk: Vec<u8>) -> StrTendril { |
| self.decoder.process(ByteTendril::from(&*chunk)); |
| std::mem::take(&mut self.decoder.inner_sink_mut().output) |
| } |
| |
| fn finish(self) -> StrTendril { |
| self.decoder.finish() |
| } |
| } |
| |
| #[derive(Default, JSTraceable)] |
| struct NetworkSink { |
| #[no_trace] |
| output: StrTendril, |
| } |
| |
| impl TendrilSink<UTF8> for NetworkSink { |
| type Output = StrTendril; |
| |
| fn process(&mut self, t: StrTendril) { |
| if self.output.is_empty() { |
| self.output = t; |
| } else { |
| self.output.push_tendril(&t); |
| } |
| } |
| |
| fn error(&mut self, _desc: Cow<'static, str>) {} |
| |
| fn finish(self) -> Self::Output { |
| self.output |
| } |
| } |
| |
| fn attach_declarative_shadow_inner(host: &Node, template: &Node, attributes: &[Attribute]) -> bool { |
| let host_element = host.downcast::<Element>().unwrap(); |
| |
| if host_element.shadow_root().is_some() { |
| return false; |
| } |
| |
| let template_element = template.downcast::<HTMLTemplateElement>().unwrap(); |
| |
| // Step 3. Let mode be template start tag's shadowrootmode attribute's value. |
| // Step 4. Let clonable be true if template start tag has a shadowrootclonable attribute; otherwise false. |
| // Step 5. Let delegatesfocus be true if template start tag |
| // has a shadowrootdelegatesfocus attribute; otherwise false. |
| // Step 6. Let serializable be true if template start tag |
| // has a shadowrootserializable attribute; otherwise false. |
| let mut shadow_root_mode = ShadowRootMode::Open; |
| let mut clonable = false; |
| let mut delegatesfocus = false; |
| let mut serializable = false; |
| |
| let attributes: Vec<ElementAttribute> = attributes |
| .iter() |
| .map(|attr| { |
| ElementAttribute::new( |
| attr.name.clone(), |
| DOMString::from(String::from(attr.value.clone())), |
| ) |
| }) |
| .collect(); |
| |
| attributes |
| .iter() |
| .for_each(|attr: &ElementAttribute| match attr.name.local { |
| local_name!("shadowrootmode") => { |
| if attr.value.str().eq_ignore_ascii_case("open") { |
| shadow_root_mode = ShadowRootMode::Open; |
| } else if attr.value.str().eq_ignore_ascii_case("closed") { |
| shadow_root_mode = ShadowRootMode::Closed; |
| } else { |
| unreachable!("shadowrootmode value is not open nor closed"); |
| } |
| }, |
| local_name!("shadowrootclonable") => { |
| clonable = true; |
| }, |
| local_name!("shadowrootdelegatesfocus") => { |
| delegatesfocus = true; |
| }, |
| local_name!("shadowrootserializable") => { |
| serializable = true; |
| }, |
| _ => {}, |
| }); |
| |
| // Step 8.1. Attach a shadow root with declarative shadow host element, |
| // mode, clonable, serializable, delegatesFocus, and "named". |
| match host_element.attach_shadow( |
| IsUserAgentWidget::No, |
| shadow_root_mode, |
| clonable, |
| serializable, |
| delegatesfocus, |
| SlotAssignmentMode::Named, |
| CanGc::note(), |
| ) { |
| Ok(shadow_root) => { |
| // Step 8.3. Set shadow's declarative to true. |
| shadow_root.set_declarative(true); |
| |
| // Set 8.4. Set template's template contents property to shadow. |
| let shadow = shadow_root.upcast::<DocumentFragment>(); |
| template_element.set_contents(Some(shadow)); |
| |
| // Step 8.5. Set shadow’s available to element internals to true. |
| shadow_root.set_available_to_element_internals(true); |
| |
| true |
| }, |
| Err(_) => false, |
| } |
| } |