// // HTMLParser.swift // // // Created by Isaac Paul on 7/31/24. // /* public class HTMLParser { func idk() { } static public func read(_ html:String) throws -> HTML { guard var xmlReader = XMLParser(str: html) else { throw EmptyStringError() } while let node = try xmlReader.readToken() { print("\(node)") if case XMLToken.tag_start(let name, let attributes) = node { if (name == "html") { let document = try HTML(attributes, xmlReader) } } } throw EmptyStringError() } } */ //TODO: Not really an html node.. public class HTMLText : HTMLNode, IFlowContent, IPhrasing, IFlow { public var content: String public init(content: String) { self.content = content try! super.init( expectedAttributes: [:]) } override public func toString(_ depth:Int = 0, spacingStrat:SpacingStrat = .tabs) -> (Int, String) { return (depth, content) } } public class XMLText : XMLNode { public var content: String public init(content: String) { self.content = content super.init() } } extension XMLParser { public func readObjects(endTag:String? = nil, validTags:[String]? = nil) throws -> [HTMLNode] { var allItems:[HTMLNode] = [] while let obj = try self.readObject(endTag: endTag, validTags: validTags, xmlToHtmlMapper) { allItems.append(obj) } return allItems } public func readObject(endTag:String? = nil, validTags:[String]? = nil, _ mapper:(_ name:String, _ attributes:[String:String], _ parser:XMLParser?) throws -> HTMLNode) throws -> HTMLNode? { let tag = try readToken() switch (tag) { case let .data(unicode): var result = "" result.unicodeScalars.append(contentsOf: unicode) return HTMLText(content: result) case .instruction: throw AppError("Unexpected Instruction") case let .tag_empty(name, attributes): if let validTags = validTags { if (validTags.firstIndex(of: name) == nil) { throw AppError("Invalid tag: \(name) for element \(endTag ?? "root")") } } let mapped = try mapper(name, attributes, nil) return mapped case let .tag_end(name): if let tag = endTag, name != tag { throw AppError("Unexpected end tag: \(name) for element \(tag)") } return nil case let .tag_start(name, attributes): if let validTags = validTags { if (validTags.firstIndex(of: name) == nil) { throw AppError("Invalid tag: \(name) for element \(endTag ?? "root")") } } return try mapper(name, attributes, self) case .none: if (endTag == nil) { return nil } else { throw AppError("Reached end of document before closing tag: \(endTag ?? "root")") } } } public func readObjectXml(endTag:String? = nil, validTags:[String]? = nil) throws -> XMLNode? { let tag = try readToken() switch (tag) { case let .data(unicode): var result = "" result.unicodeScalars.append(contentsOf: unicode) return XMLText(content: result) case .instruction: throw AppError("Unexpected Instruction") case let .tag_empty(name, attributes): if let validTags = validTags { if (validTags.firstIndex(of: name) == nil) { throw AppError("Invalid tag: \(name) for element \(endTag ?? "root")") } } return try GenericXMLNode(name, attributes, nil) case let .tag_end(name): if let tag = endTag, name != tag { throw AppError("Unexpected end tag: \(name) for element \(tag)") } return nil case let .tag_start(name, attributes): if let validTags = validTags { if (validTags.firstIndex(of: name) == nil) { throw AppError("Invalid tag: \(name) for element \(endTag ?? "root")") } } return try GenericXMLNode(name, attributes, self) case .none: if (endTag == nil) { return nil } else { throw AppError("Reached end of document before closing tag: \(endTag ?? "root")") } } } public func readDataAsString(endTag:String) throws -> String? { let tag = try readToken() switch (tag) { case let .data(unicode): var result = "" result.unicodeScalars.append(contentsOf: unicode) return result case .instruction: throw AppError("Unexpected Instruction") case let .tag_empty(name, attributes): throw AppError("Unexpected empty tag: \(name)") case let .tag_end(name): if (name != endTag) { throw AppError("Unexpected end tag: \(name)") } return nil case let .tag_start(name, attributes): throw AppError("Unexpected tag start: \(name)") case .none: throw AppError("Reached end of document before closing tag: \(endTag)") } } } /* public func xmlToHtmlMapper(_ name:String, _ attributes:[String:String], _ parser:XMLParser?) throws -> HTMLNode { switch name { case "head": return try Head(attributes, parser) case "body": return try Body(attributes, parser) case "meta": if (attributes["itemprop"] != nil) { return try MetaProp(attributes, parser) } else { return try Meta(attributes, parser) } case "noscript": return try NoScriptHead(attributes, parser) case "base": return try Base(attributes, parser) case "title": return try Title(attributes, parser) case "Style": return try Title(attributes, parser) case "script": let src = attributes["src"] let type = attributes["type"] ?? "" switch (type) { case "": if let _ = src { return try Script.ClassicRemote(attributes, parser) } else { return try Script.Classic(attributes, parser) } case "module": if let _ = src { return try Script.ModuleRemote(attributes, parser) } else { return try Script.Module(attributes, parser) } case "importmap": return try Script.ImportMap(attributes, parser) default: return try Script.DataBlock(attributes, parser) } default: throw AppError("Unknown or unhandled xml tag: \(name)") } } */