8000 Optimizations by aehlke · Pull Request #306 · scinfu/SwiftSoup · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Optimizations #306

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Sources/Attribute.swift
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ open class Attribute {
}

@inlinable
public func html(accum: StringBuilder, out: OutputSettings ) {
public func html(accum: StringBuilder, out: OutputSettings) {
accum.append(key)
if (!shouldCollapseAttribute(out: out)) {
accum.append(UTF8Arrays.attributeEqualsQuoteMark)
Expand Down
6 changes: 4 additions & 2 deletions Sources/Attributes.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ open class Attributes: NSCopying {
// Stored by lowercased key, but key case is checked against the copy inside
// the Attribute on retrieval.
@usableFromInline
lazy var attributes: [Attribute] = []
var attributes: [Attribute] = []
@usableFromInline
internal var lowercasedKeysCache: [[UInt8]]? = nil

public init() {
Expand Down Expand Up @@ -138,7 +139,7 @@ open class Attributes: NSCopying {
invalidateLowercasedKeysCache()
}
}

/**
Remove an attribute by key. <b>Case insensitive.</b>
@param key attribute key to remove
Expand Down Expand Up @@ -212,6 +213,7 @@ open class Attributes: NSCopying {
to keys will not be recognised in the containing set.
@return an view of the attributes as a List.
*/
@inlinable
open func asList() -> [Attribute] {
return attributes
}
Expand Down
2 changes: 1 addition & 1 deletion Sources/Element.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1337,7 +1337,7 @@ open class Element: Node {
return self
}

override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings)throws {
override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) throws {
if (out.prettyPrint() && (_tag.formatAsBlock() || (parent() != nil && parent()!.tag().formatAsBlock()) || out.outline())) {
if !accum.isEmpty {
indent(accum, depth, out)
Expand Down
108 changes: 68 additions & 40 deletions Sources/Entities.swift
Original file line number Diff line number Diff line change
Expand Up @@ -230,49 +230,77 @@ public class Entities {
) {
let escapeMode = out.escapeMode()
let encoder = out.encoder()
var i = 0, n = string.count
var lastWasWhite = false, reachedNonWhite = false
while i < n {
let b = string[i]
if normaliseWhite && b.isWhitespace {
var j = i
while j < n && string[j].isWhitespace { j += 1 }
if (!reachedNonWhite && stripLeadingWhite) || lastWasWhite {
i = j; continue
}
accum.append(0x20)
lastWasWhite = true; i = j; continue
}
lastWasWhite = false
reachedNonWhite = true
if b < 0x80 {
switch b {
case 0x26: accum.append(contentsOf: ampEntityUTF8)
case 0xA0: accum.append(contentsOf: escapeMode == .xhtml ? xa0EntityUTF8 : nbspEntityUTF8)
case 0x3C:
if !inAttribute || escapeMode == .xhtml { accum.append(contentsOf: ltEntityUTF8) } else { accum.append(b) }
case 0x3E:
if !inAttribute { accum.append(contentsOf: gtEntityUTF8) } else { accum.append(b) }
case 0x22:
if inAttribute { accum.append(contentsOf: quotEntityUTF8) } else { accum.append(b) }
default:
if encoder == .ascii || encoder == .utf8 || encoder == .utf16 || canEncode(byte: b, encoder: encoder) {
accum.append(b)
} else {
appendEncoded(accum: &accum, escapeMode: escapeMode, bytes: [b])
let encoderKnownToBeAbleToEncode = encoder == .ascii || encoder == .utf8 || encoder == .utf16
let count = string.count
accum.reserveCapacity(count)
string.withUnsafeBufferPointer { buf in
guard let base = buf.baseAddress else { return }
var i = 0
var lastWasWhite = false, reachedNonWhite = false
while i < count {
let b = base[i]
if normaliseWhite && b.isWhitespace {
var j = i
while j < count && base[j].isWhitespace {
j += 1
}
if (!reachedNonWhite && stripLeadingWhite) || lastWasWhite {
i = j
continue
}
accum.append(0x20)
lastWasWhite = true
i = j
continue
}
i += 1
} else {
let len = utf8CharLength(for: b)
let end = i + len <= n ? i + len : n
let charBytes = Array(string[i..<end])
if canEncode(bytes: charBytes, encoder: encoder) {
accum.append(contentsOf: charBytes)
lastWasWhite = false
reachedNonWhite = true
if b < 0x80 {
switch b {
case 0x26:
accum.append(contentsOf: ampEntityUTF8)
case 0xA0:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This branch is unreachable, because 0xA0 is greater than 0x80. Because of it, non-breaking spaces are not escaped.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please contribute a PR and I will merge it! Thank you

accum.append(contentsOf: escapeMode == .xhtml ? xa0EntityUTF8 : nbspEntityUTF8)
case 0x3C:
if !inAttribute || escapeMode == .xhtml {
accum.append(contentsOf: ltEntityUTF8)
} else {
accum.append(b)
}
case 0x3E:
if !inAttribute {
accum.append(contentsOf: gtEntityUTF8)
} else {
accum.append(b)
}
case 0x22:
if inAttribute {
accum.append(contentsOf: quotEntityUTF8)
} else {
accum.append(b)
}
default:
if encoderKnownToBeAbleToEncode || canEncode(byte: b, encoder: encoder) {
accum.append(b)
} else {
appendEncoded(accum: &accum, escapeMode: escapeMode, bytes: [b])
}
}
i += 1
} else {
appendEncoded(accum: &accum, escapeMode: escapeMode, bytes: charBytes)
let len = utf8CharLength(for: b)
let end = i + len <= count ? i + len : count
var charBytes = [UInt8]()
for j in i..<end {
charBytes.append(base[j])
}
if canEncode(bytes: charBytes, encoder: encoder) {
accum.append(contentsOf: charBytes)
} else {
appendEncoded(accum: &accum, escapeMode: escapeMode, bytes: charBytes)
}
i += len
}
i += len
}
}
}
Expand Down Expand Up @@ -363,7 +391,7 @@ public class Entities {
}
}

@inlinable
@inline(__always)
internal static func canEncode(byte: UInt8, encoder: String.Encoding) -> Bool {
switch encoder {
case .ascii:
Expand Down
10 changes: 10 additions & 0 deletions Sources/FormElement.swift
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ public class FormElement: Element {
public override init(_ tag: Tag, _ baseUri: [UInt8], _ attributes: Attributes) {
super.init(tag, baseUri, attributes)
}

/**
* Create a new, standalone form element.
*
* @param tag tag of this element
* @param baseUri the base URI
*/
public override init(_ tag: Tag, _ baseUri: [UInt8]) {
super.init(tag, baseUri)
}

/**
* Get the list of form control elements associated with this form.
Expand Down
41 changes: 30 additions & 11 deletions Sources/HtmlTreeBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,12 @@ class HtmlTreeBuilder: TreeBuilder {
try tokeniser.emit(emptyEnd.reset().name(el.tagNameUTF8())) // ensure we get out of whatever state we are in. emitted for yielded processing
return el
}
try Validate.notNull(obj: startTag._attributes)
let el: Element = try Element(Tag.valueOf(startTag.name(), settings), baseUri, settings.normalizeAttributes(startTag._attributes))
let el: Element
if let attributes = startTag._attributes {
el = try Element(Tag.valueOf(startTag.name(), settings), baseUri, settings.normalizeAttributes(attributes))
} else {
el = try Element(Tag.valueOf(startTag.name(), settings), baseUri)
}
try insert(el)
return el
}
Expand All @@ -209,16 +213,21 @@ class HtmlTreeBuilder: TreeBuilder {
return el
}

func insert(_ el: Element)throws {
@inlinable
func insert(_ el: Element) throws {
try insertNode(el)
stack.append(el)
}

@discardableResult
func insertEmpty(_ startTag: Token.StartTag) throws -> Element {
let tag: Tag = try Tag.valueOf(startTag.name(), settings)
try Validate.notNull(obj: startTag._attributes)
let el: Element = Element(tag, baseUri, startTag._attributes)
let el: Element
if let attributes = startTag._attributes {
el = Element(tag, baseUri, attributes)
} else {
el = Element(tag, baseUri)
}
try insertNode(el)
if (startTag.isSelfClosing()) {
if (tag.isKnownTag()) {
Expand All @@ -235,8 +244,12 @@ class HtmlTreeBuilder: TreeBuilder {
@discardableResult
func insertForm(_ startTag: Token.StartTag, _ onStack: Bool) throws -> FormElement {
let tag: Tag = try Tag.valueOf(startTag.name(), settings)
try Validate.notNull(obj: startTag._attributes)
let el: FormElement = FormElement(tag, baseUri, startTag._attributes)
let el: FormElement
if let attributes = startTag._attributes {
el = FormElement(tag, baseUri, attributes)
} else {
el = FormElement(tag, baseUri)
}
setFormElement(el)
try insertNode(el)
if (onStack) {
Expand All @@ -250,6 +263,7 @@ class HtmlTreeBuilder: TreeBuilder {
try insertNode(comment)
}

@inlinable
func insert(_ characterToken: Token.Char) throws {
var node: Node
// characters in script and style go in as datanodes, not text nodes
Expand All @@ -264,9 +278,10 @@ class HtmlTreeBuilder: TreeBuilder {
try currentElement()?.appendChild(node) // doesn't use insertNode, because we don't foster these; and will always have a stack.
}

private func insertNode(_ node: Node)throws {
@inlinable
internal func insertNode(_ node: Node) throws {
// if the stack hasn't been set up yet, elements (doctype, comments) go into the doc
if (stack.count == 0) {
if stack.isEmpty {
try doc.appendChild(node)
} else if (isFosterInserts()) {
try insertInFosterParent(node)
Expand All @@ -276,8 +291,8 @@ class HtmlTreeBuilder: TreeBuilder {

// connect form controls to their form element
if let n = (node as? Element) {
if(n.tag().isFormListed()) {
if ( formElement != nil) {
if n.tag().isFormListed() {
if formElement != nil {
formElement!.addElement(n)
}
}
Expand All @@ -290,10 +305,12 @@ class HtmlTreeBuilder: TreeBuilder {
return stack.remove(at: size-1)
}

@inlinable
func push(_ element: Element) {
stack.append(element)
}

@inlinable
func getStack()->Array<Element> {
return stack
}
Expand Down Expand Up @@ -323,6 +340,7 @@ class HtmlTreeBuilder: TreeBuilder {
return nil
}

@inlinable
func getFromStack(_ elName: String) -> Element? {
return getFromStack(elName.utf8Array)
}
Expand Down Expand Up @@ -622,6 +640,7 @@ class HtmlTreeBuilder: TreeBuilder {
return headElement
}

@inlinable
func isFosterInserts() -> Bool {
return fosterInserts
}
Expand Down
18 changes: 10 additions & 8 deletions Sources/HtmlTreeBuilderState.swift
Original file line number Diff line number Diff line change
Expand Up @@ -539,25 +539,27 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {

tb.tokeniser.acknowledgeSelfClosingFlag()
try tb.processStartTag(UTF8Arrays.form)
if (startTag._attributes.hasKey(key: UTF8Arrays.action)) {
if startTag._attributes?.hasKey(key: UTF8Arrays.action) ?? false {
if let form: Element = tb.getFormElement() {
try form.attr(UTF8Arrays.action, startTag._attributes.get(key: UTF8Arrays.action))
try form.attr(UTF8Arrays.action, startTag._attributes?.get(key: UTF8Arrays.action) ?? [])
}
}
try tb.processStartTag(UTF8Arrays.hr)
try tb.processStartTag(UTF8Arrays.label)
// hope you like english.
let prompt: [UInt8] = startTag._attributes.hasKey(key: UTF8Arrays.prompt) ?
startTag._attributes.get(key: UTF8Arrays.prompt) :
let prompt: [UInt8] = (startTag._attributes?.hasKey(key: UTF8Arrays.prompt) ?? false) ?
startTag._attributes?.get(key: UTF8Arrays.prompt) ?? [] :
"self is a searchable index. Enter search keywords: ".utf8Array

try tb.process(Token.Char().data(prompt))

// input
let inputAttribs: Attributes = Attributes()
for attr: Attribute in startTag._attributes {
if (!Constants.InBodyStartInputAttribs.contains(attr.getKeyUTF8())) {
inputAttribs.put(attribute: attr)
if let attributes = startTag._attributes {
for attr: Attribute in attributes {
if (!Constants.InBodyStartInputAttribs.contains(attr.getKeyUTF8())) {
inputAttribs.put(attribute: attr)
}
}
}
try inputAttribs.put(UTF8Arrays.name, UTF8Arrays.isindex)
Expand Down Expand Up @@ -927,7 +929,7 @@ enum HtmlTreeBuilderState: String, HtmlTreeBuilderStateProtocol {
} else if name == UTF8Arrays.style || name == UTF8Arrays.script {
return try tb.process(t, .InHead)
} else if name == UTF8Arrays.input {
if (!startTag._attributes.get(key: UTF8Arrays.type).equalsIgnoreCase(string: UTF8Arrays.hidden)) {
if !(startTag._attributes?.get(key: UTF8Arrays.type).equalsIgnoreCase(string: UTF8Arrays.hidden) ?? false) {
return try anythingElse(t, tb)
} else {
try tb.insertEmpty(startTag)
Expand Down
8 changes: 7 additions & 1 deletion Sources/Node.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ open class Node: Equatable, Hashable {
@param attributes attributes (not null, but may be empty)
*/
public init(_ baseUri: [UInt8], _ attributes: Attributes) {
self.childNodes = Node.EMPTY_NODES
childNodes = Node.EMPTY_NODES
childNodes.reserveCapacity(8)

self.baseUri = baseUri.trim()
self.attributes = attributes

Expand All @@ -70,6 +72,8 @@ open class Node: Equatable, Hashable {

public init(_ baseUri: [UInt8]) {
childNodes = Node.EMPTY_NODES
childNodes.reserveCapacity(8)

self.baseUri = baseUri.trim()
self.attributes = Attributes()

Expand All @@ -81,6 +85,8 @@ open class Node: Equatable, Hashable {
*/
public init() {
self.childNodes = Node.EMPTY_NODES
childNodes.reserveCapacity(8)

self.attributes = nil
self.baseUri = nil

Expand Down
Loading
0