import Foundation import SwiftSoup struct PaginationInfo { let current: Int let total: Int } enum HTMLParser { // MARK: - 列表页解析 static func parseContentList(html: String, defaultCategory: ContentCategory = .movie) throws -> [ContentItem] { let doc = try SwiftSoup.parse(html) let cards = try doc.select(".movie-card") var items: [ContentItem] = [] for card in cards { guard let link = try card.select("a[href^=/movie/]").first() else { continue } let href = try link.attr("href") let slug = String(href.replacingOccurrences(of: "/movie/", with: "")) guard !slug.isEmpty else { continue } let title = try card.select("h3 a").text().trimmingCharacters(in: .whitespacesAndNewlines) guard !title.isEmpty else { continue } let imgSrc = try card.select("img").attr("src") let posterURL = URL(string: imgSrc) let ratingText = try card.select(".badge-top-right").text().trimmingCharacters(in: .whitespacesAndNewlines) let rating = Double(ratingText) var badges: [String] = [] let topLeftBadge = try card.select(".badge-top-left").text().trimmingCharacters(in: .whitespacesAndNewlines) if !topLeftBadge.isEmpty { badges.append(topLeftBadge) } let bottomRightBadge = try card.select(".badge-bottom-right").text().trimmingCharacters(in: .whitespacesAndNewlines) if !bottomRightBadge.isEmpty { badges.append(bottomRightBadge) } // "2025 · 电影" let metaDiv = try card.select(".p-4 .text-xs.font-light, .p-4 .text-xs.text-gray-500").first() let metaText = try metaDiv?.text().trimmingCharacters(in: .whitespacesAndNewlines) ?? "" var year = 0 var category = defaultCategory let metaParts = metaText.split(separator: "·").map { $0.trimmingCharacters(in: .whitespacesAndNewlines) } if let firstPart = metaParts.first, let y = Int(firstPart) { year = y } if metaParts.count > 1 { switch metaParts[1] { case "电影": category = .movie case "剧集", "电视剧": category = .series case "综艺": category = .variety case "动漫", "动画": category = .anime default: break } } var onlineCount = 0 var netdiskCount = 0 let spans = try card.select(".flex.items-center.gap-3 span") for span in spans { let spanText = try span.text() if spanText.contains("在线") { onlineCount = Int(spanText.replacingOccurrences(of: "在线:", with: "").trimmingCharacters(in: .whitespaces)) ?? 0 } else if spanText.contains("网盘") { netdiskCount = Int(spanText.replacingOccurrences(of: "网盘:", with: "").trimmingCharacters(in: .whitespaces)) ?? 0 } } items.append(ContentItem( id: slug, title: title, year: year, category: category, rating: rating, posterURL: posterURL, badges: badges, onlineCount: onlineCount, netdiskCount: netdiskCount, detailURL: href )) } return items } // MARK: - 详情页解析 static func parseContentDetail(html: String) throws -> ContentDetail { let doc = try SwiftSoup.parse(html) // 标题 (h1 可能包含子 span,只取直接文本) let h1 = try doc.select("h1").first() let fullTitle = try h1?.text().trimmingCharacters(in: .whitespacesAndNewlines) ?? "未知标题" // === 优先从 JSON-LD 提取结构化数据 === var year = 0 var rating: Double? var directors: [String] = [] var actors: [String] = [] var genres: [String] = [] var description = "" var region = "" var posterURL: URL? if let jsonLDScript = try doc.select("script[type=application/ld+json]").first() { let jsonText = try jsonLDScript.data() if let data = jsonText.data(using: .utf8), let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] { if let imageStr = json["image"] as? String { posterURL = URL(string: imageStr) } if let published = json["datePublished"] as? String { year = Int(published) ?? 0 } if let agg = json["aggregateRating"] as? [String: Any], let rv = agg["ratingValue"] as? String { rating = Double(rv) } if let dirArray = json["director"] as? [[String: Any]] { directors = dirArray.compactMap { $0["name"] as? String } } if let actArray = json["actor"] as? [[String: Any]] { actors = actArray.compactMap { $0["name"] as? String } } if let genreArray = json["genre"] as? [String] { genres = genreArray } if let desc = json["description"] as? String { description = desc } } } // === 从 HTML 补充缺失数据 === // 海报 fallback: 主海报在 .flex-shrink-0 容器中 if posterURL == nil { let imgSrc = try doc.select(".flex-shrink-0 img.object-cover").first()?.attr("src") ?? "" posterURL = URL(string: imgSrc) } // 评分 fallback if rating == nil { let ratingText = try doc.select(".rating-display").text() let cleaned = ratingText.components(separatedBy: CharacterSet.decimalDigits.union(CharacterSet(charactersIn: ".")).inverted).joined() rating = Double(cleaned) } // "2025 · 中国 · 爱情 / 剧情 / 都市" let metaDivs = try doc.select(".text-xs.text-gray-600.font-light, .text-sm.text-gray-600.font-light") for metaDiv in metaDivs { let text = try metaDiv.text().trimmingCharacters(in: .whitespacesAndNewlines) if text.contains("·") { let parts = text.split(separator: "·").map { $0.trimmingCharacters(in: .whitespaces) } if parts.count >= 1, let y = Int(parts[0]), year == 0 { year = y } if parts.count >= 2 && region.isEmpty { region = parts[1] } if parts.count >= 3 && genres.isEmpty { genres = parts[2].split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) } } break } } // 导演 fallback: 导演:xxx if directors.isEmpty { let dirDivs = try doc.select("div") for div in dirDivs { let text = try div.text() if text.hasPrefix("导演:") || text.hasPrefix("导演:") { let children = try div.select("span") if children.size() >= 2 { let dirText = try children.last()?.text() ?? "" directors = dirText.split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) } } break } } } // 主演 fallback if actors.isEmpty { let actDivs = try doc.select("div") for div in actDivs { let text = try div.text() if text.hasPrefix("主演:") || text.hasPrefix("主演:") { let children = try div.select("span") if children.size() >= 2 { let actText = try children.last()?.text() ?? "" actors = actText.split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) } } break } } } // 简介 fallback: .prose p if description.isEmpty { let prosePs = try doc.select(".prose p") let texts = try prosePs.map { try $0.text() } description = texts.joined(separator: "\n\n") } // === 播放源解析 === let sources = try parseSourceTabs(doc: doc) // slug let canonicalHref = try doc.select("link[rel=canonical]").attr("href") let slug: String if !canonicalHref.isEmpty { slug = String(canonicalHref.split(separator: "/").last ?? Substring(fullTitle)) } else { slug = fullTitle } let contentItem = ContentItem( id: slug, title: fullTitle, year: year, category: .movie, rating: rating, posterURL: posterURL, badges: [], onlineCount: 0, netdiskCount: 0, detailURL: "/movie/\(slug)" ) // 所有集的合集(取第一个源的) let firstSourceEpisodes = sources.first?.episodes return ContentDetail( item: contentItem, description: description, directors: directors, actors: actors, genres: genres, region: region, sources: sources, episodes: (firstSourceEpisodes?.count ?? 0) > 1 ? firstSourceEpisodes : nil ) } // MARK: - 播放源标签页解析 private static func parseSourceTabs(doc: Document) throws -> [StreamSource] { var sources: [StreamSource] = [] // 从按钮的 onclick 属性解析 let buttons = try doc.select("button[onclick^=switchSource]") for (index, button) in buttons.enumerated() { let onclick = try button.attr("onclick") let name = try button.text().trimmingCharacters(in: .whitespacesAndNewlines) guard let parsed = parseSwitchSource(onclick) else { continue } let episodes = parseEpisodes(urlString: parsed.url) let quality = parsed.format == "m3u8" ? "HLS" : parsed.format.uppercased() sources.append(StreamSource( id: parsed.id, name: name.isEmpty ? "播放源 \(index + 1)" : name, quality: quality, episodes: episodes )) } // fallback: 从 script 中提取 if sources.isEmpty { let scripts = try doc.select("script") for script in scripts { let content = try script.data() let pattern = #"switchSource\((\d+),\s*'([^']*)',\s*'([^']*)'\)"# guard let regex = try? NSRegularExpression(pattern: pattern) else { continue } let matches = regex.matches(in: content, range: NSRange(content.startIndex..., in: content)) for (index, match) in matches.enumerated() { guard match.numberOfRanges >= 4 else { continue } let idStr = String(content[Range(match.range(at: 1), in: content)!]) let url = String(content[Range(match.range(at: 2), in: content)!]) let format = String(content[Range(match.range(at: 3), in: content)!]) let sourceId = Int(idStr) ?? index let episodes = parseEpisodes(urlString: url) sources.append(StreamSource( id: sourceId, name: "播放源 \(index + 1)", quality: format == "m3u8" ? "HLS" : format.uppercased(), episodes: episodes )) } if !sources.isEmpty { break } } } return sources } private static func parseSwitchSource(_ onclick: String) -> (id: Int, url: String, format: String)? { let pattern = #"switchSource\((\d+),\s*'([^']*)',\s*'([^']*)'\)"# guard let regex = try? NSRegularExpression(pattern: pattern), let match = regex.firstMatch(in: onclick, range: NSRange(onclick.startIndex..., in: onclick)), match.numberOfRanges >= 4 else { return nil } let idStr = String(onclick[Range(match.range(at: 1), in: onclick)!]) let url = String(onclick[Range(match.range(at: 2), in: onclick)!]) let format = String(onclick[Range(match.range(at: 3), in: onclick)!]) return (Int(idStr) ?? 0, url, format) } // MARK: - 剧集解析 static func parseEpisodes(urlString: String) -> [Episode] { let parts = urlString.split(separator: "#") var episodes: [Episode] = [] for (index, part) in parts.enumerated() { let partStr = String(part) if partStr.contains("$") { let episodeParts = partStr.split(separator: "$", maxSplits: 1) if episodeParts.count == 2 { episodes.append(Episode( id: index, name: String(episodeParts[0]), url: String(episodeParts[1]) )) } } } if episodes.isEmpty && !urlString.isEmpty { episodes.append(Episode(id: 0, name: "播放", url: urlString)) } return episodes } // MARK: - 分页解析 static func parsePagination(html: String) throws -> PaginationInfo { let doc = try SwiftSoup.parse(html) let activeBtn = try doc.select(".pagination-active") let currentPage = Int(try activeBtn.text().trimmingCharacters(in: .whitespacesAndNewlines)) ?? 1 var maxPage = currentPage let allBtns = try doc.select(".pagination-btn.pagination-number") for btn in allBtns { let text = try btn.text().trimmingCharacters(in: .whitespacesAndNewlines) if let pageNum = Int(text), pageNum > maxPage { maxPage = pageNum } } let nextLink = try doc.select(".pagination-next").attr("href") if let range = nextLink.range(of: #"/page/(\d+)"#, options: .regularExpression) { let pageStr = nextLink[range].replacingOccurrences(of: "/page/", with: "") if let p = Int(pageStr), p > maxPage { maxPage = p } } return PaginationInfo(current: currentPage, total: maxPage) } // MARK: - 首页区块解析 static func parseHomeSections(html: String) throws -> [[ContentItem]] { let allItems = try parseContentList(html: html) if allItems.count > 10 { return [Array(allItems.prefix(10)), Array(allItems.suffix(from: 10))] } return [allItems] } }