Initial Commit
This commit is contained in:
210
Sources/HRW/HTMLParser.swift
Normal file
210
Sources/HRW/HTMLParser.swift
Normal file
@@ -0,0 +1,210 @@
|
||||
//
|
||||
// HTMLParser.swift
|
||||
//
|
||||
//
|
||||
// Created by Isaac Paul on 7/31/24.
|
||||
//
|
||||
|
||||
|
||||
/*
|
||||
public class HTMLParser {
|
||||
func idk() {
|
||||
|
||||
}
|
||||
|
||||
static public func read(_ html:String) throws -> HTML {
|
||||
guard var xmlReader = XMLParser(str: html) else { throw EmptyStringError() }
|
||||
while let node = try xmlReader.readToken() {
|
||||
print("\(node)")
|
||||
if case XMLToken.tag_start(let name, let attributes) = node {
|
||||
if (name == "html") {
|
||||
let document = try HTML(attributes, xmlReader)
|
||||
}
|
||||
}
|
||||
}
|
||||
throw EmptyStringError()
|
||||
}
|
||||
}
|
||||
*/
|
||||
//TODO: Not really an html node..
|
||||
public class HTMLText : HTMLNode, IFlowContent {
|
||||
|
||||
public var content: String
|
||||
|
||||
public init(content: String) {
|
||||
self.content = content
|
||||
try! super.init( expectedAttributes: [:])
|
||||
}
|
||||
|
||||
override public func toString(_ depth:Int = 0, spacingStrat:SpacingStrat = .tabs) -> (Int, String) {
|
||||
return (depth, content)
|
||||
}
|
||||
}
|
||||
|
||||
public class XMLText : XMLNode {
|
||||
|
||||
public var content: String
|
||||
|
||||
public init(content: String) {
|
||||
self.content = content
|
||||
super.init()
|
||||
}
|
||||
}
|
||||
|
||||
extension XMLParser {
|
||||
public func readObjects(endTag:String? = nil, validTags:[String]? = nil) throws -> [HTMLNode] {
|
||||
var allItems:[HTMLNode] = []
|
||||
while let obj = try self.readObject(endTag: endTag, validTags: validTags, xmlToHtmlMapper) {
|
||||
allItems.append(obj)
|
||||
}
|
||||
return allItems
|
||||
}
|
||||
|
||||
public func readObject(endTag:String? = nil, validTags:[String]? = nil, _ mapper:(_ name:String, _ attributes:[String:String], _ parser:XMLParser?) throws -> HTMLNode) throws -> HTMLNode? {
|
||||
let tag = try readToken()
|
||||
switch (tag) {
|
||||
case let .data(unicode):
|
||||
var result = ""
|
||||
result.unicodeScalars.append(contentsOf: unicode)
|
||||
return HTMLText(content: result)
|
||||
case .instruction:
|
||||
throw AppError("Unexpected Instruction")
|
||||
case let .tag_empty(name, attributes):
|
||||
if let validTags = validTags {
|
||||
if (validTags.firstIndex(of: name) == nil) {
|
||||
throw AppError("Invalid tag: \(name) for element \(endTag ?? "root")")
|
||||
}
|
||||
}
|
||||
let mapped = try mapper(name, attributes, nil)
|
||||
return mapped
|
||||
case let .tag_end(name):
|
||||
if let tag = endTag,
|
||||
name != tag {
|
||||
throw AppError("Unexpected end tag: \(name) for element \(tag)")
|
||||
}
|
||||
return nil
|
||||
case let .tag_start(name, attributes):
|
||||
if let validTags = validTags {
|
||||
if (validTags.firstIndex(of: name) == nil) {
|
||||
throw AppError("Invalid tag: \(name) for element \(endTag ?? "root")")
|
||||
}
|
||||
}
|
||||
return try mapper(name, attributes, self)
|
||||
case .none:
|
||||
if (endTag == nil) {
|
||||
return nil
|
||||
} else {
|
||||
throw AppError("Reached end of document before closing tag: \(endTag ?? "root")")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public func readObjectXml(endTag:String? = nil, validTags:[String]? = nil) throws -> XMLNode? {
|
||||
let tag = try readToken()
|
||||
switch (tag) {
|
||||
case let .data(unicode):
|
||||
var result = ""
|
||||
result.unicodeScalars.append(contentsOf: unicode)
|
||||
return XMLText(content: result)
|
||||
case .instruction:
|
||||
throw AppError("Unexpected Instruction")
|
||||
case let .tag_empty(name, attributes):
|
||||
if let validTags = validTags {
|
||||
if (validTags.firstIndex(of: name) == nil) {
|
||||
throw AppError("Invalid tag: \(name) for element \(endTag ?? "root")")
|
||||
}
|
||||
}
|
||||
return try GenericXMLNode(name, attributes, nil)
|
||||
case let .tag_end(name):
|
||||
if let tag = endTag,
|
||||
name != tag {
|
||||
throw AppError("Unexpected end tag: \(name) for element \(tag)")
|
||||
}
|
||||
return nil
|
||||
case let .tag_start(name, attributes):
|
||||
if let validTags = validTags {
|
||||
if (validTags.firstIndex(of: name) == nil) {
|
||||
throw AppError("Invalid tag: \(name) for element \(endTag ?? "root")")
|
||||
}
|
||||
}
|
||||
return try GenericXMLNode(name, attributes, self)
|
||||
case .none:
|
||||
if (endTag == nil) {
|
||||
return nil
|
||||
} else {
|
||||
throw AppError("Reached end of document before closing tag: \(endTag ?? "root")")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public func readDataAsString(endTag:String) throws -> String? {
|
||||
let tag = try readToken()
|
||||
switch (tag) {
|
||||
case let .data(unicode):
|
||||
var result = ""
|
||||
result.unicodeScalars.append(contentsOf: unicode)
|
||||
return result
|
||||
case .instruction:
|
||||
throw AppError("Unexpected Instruction")
|
||||
case let .tag_empty(name, attributes):
|
||||
throw AppError("Unexpected empty tag: \(name)")
|
||||
case let .tag_end(name):
|
||||
if (name != endTag) {
|
||||
throw AppError("Unexpected end tag: \(name)")
|
||||
}
|
||||
return nil
|
||||
case let .tag_start(name, attributes):
|
||||
throw AppError("Unexpected tag start: \(name)")
|
||||
case .none:
|
||||
throw AppError("Reached end of document before closing tag: \(endTag)")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
public func xmlToHtmlMapper(_ name:String, _ attributes:[String:String], _ parser:XMLParser?) throws -> HTMLNode {
|
||||
switch name {
|
||||
case "head":
|
||||
return try Head(attributes, parser)
|
||||
case "body":
|
||||
return try Body(attributes, parser)
|
||||
case "meta":
|
||||
if (attributes["itemprop"] != nil) {
|
||||
return try MetaProp(attributes, parser)
|
||||
} else {
|
||||
return try Meta(attributes, parser)
|
||||
}
|
||||
case "noscript":
|
||||
return try NoScriptHead(attributes, parser)
|
||||
case "base":
|
||||
return try Base(attributes, parser)
|
||||
case "title":
|
||||
return try Title(attributes, parser)
|
||||
case "Style":
|
||||
return try Title(attributes, parser)
|
||||
case "script":
|
||||
let src = attributes["src"]
|
||||
let type = attributes["type"] ?? ""
|
||||
switch (type) {
|
||||
case "":
|
||||
if let _ = src {
|
||||
return try Script.ClassicRemote(attributes, parser)
|
||||
} else {
|
||||
return try Script.Classic(attributes, parser)
|
||||
}
|
||||
case "module":
|
||||
if let _ = src {
|
||||
return try Script.ModuleRemote(attributes, parser)
|
||||
} else {
|
||||
return try Script.Module(attributes, parser)
|
||||
}
|
||||
case "importmap":
|
||||
return try Script.ImportMap(attributes, parser)
|
||||
default:
|
||||
return try Script.DataBlock(attributes, parser)
|
||||
}
|
||||
default:
|
||||
throw AppError("Unknown or unhandled xml tag: \(name)")
|
||||
}
|
||||
}
|
||||
*/
|
||||
Reference in New Issue
Block a user