import Foundation
import SwiftSoup
struct PaginationInfo {
let current: Int
let total: Int
}
enum HTMLParser {
// MARK: - 列表页解析
static func parseContentList(html: String, defaultCategory: ContentCategory = .movie) throws -> [ContentItem] {
let doc = try SwiftSoup.parse(html)
let cards = try doc.select(".movie-card")
var items: [ContentItem] = []
for card in cards {
guard let link = try card.select("a[href^=/movie/]").first() else { continue }
let href = try link.attr("href")
let slug = String(href.replacingOccurrences(of: "/movie/", with: ""))
guard !slug.isEmpty else { continue }
let title = try card.select("h3 a").text().trimmingCharacters(in: .whitespacesAndNewlines)
guard !title.isEmpty else { continue }
let imgSrc = try card.select("img").attr("src")
let posterURL = URL(string: imgSrc)
let ratingText = try card.select(".badge-top-right").text().trimmingCharacters(in: .whitespacesAndNewlines)
let rating = Double(ratingText)
var badges: [String] = []
let topLeftBadge = try card.select(".badge-top-left").text().trimmingCharacters(in: .whitespacesAndNewlines)
if !topLeftBadge.isEmpty { badges.append(topLeftBadge) }
let bottomRightBadge = try card.select(".badge-bottom-right").text().trimmingCharacters(in: .whitespacesAndNewlines)
if !bottomRightBadge.isEmpty { badges.append(bottomRightBadge) }
// "2025 · 电影"
let metaDiv = try card.select(".p-4 .text-xs.font-light, .p-4 .text-xs.text-gray-500").first()
let metaText = try metaDiv?.text().trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
var year = 0
var category = defaultCategory
let metaParts = metaText.split(separator: "·").map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
if let firstPart = metaParts.first, let y = Int(firstPart) {
year = y
}
if metaParts.count > 1 {
switch metaParts[1] {
case "电影": category = .movie
case "剧集", "电视剧": category = .series
case "综艺": category = .variety
case "动漫", "动画": category = .anime
default: break
}
}
var onlineCount = 0
var netdiskCount = 0
let spans = try card.select(".flex.items-center.gap-3 span")
for span in spans {
let spanText = try span.text()
if spanText.contains("在线") {
onlineCount = Int(spanText.replacingOccurrences(of: "在线:", with: "").trimmingCharacters(in: .whitespaces)) ?? 0
} else if spanText.contains("网盘") {
netdiskCount = Int(spanText.replacingOccurrences(of: "网盘:", with: "").trimmingCharacters(in: .whitespaces)) ?? 0
}
}
items.append(ContentItem(
id: slug,
title: title,
year: year,
category: category,
rating: rating,
posterURL: posterURL,
badges: badges,
onlineCount: onlineCount,
netdiskCount: netdiskCount,
detailURL: href
))
}
return items
}
// MARK: - 详情页解析
static func parseContentDetail(html: String) throws -> ContentDetail {
let doc = try SwiftSoup.parse(html)
// 标题 (h1 可能包含子 span,只取直接文本)
let h1 = try doc.select("h1").first()
let fullTitle = try h1?.text().trimmingCharacters(in: .whitespacesAndNewlines) ?? "未知标题"
// === 优先从 JSON-LD 提取结构化数据 ===
var year = 0
var rating: Double?
var directors: [String] = []
var actors: [String] = []
var genres: [String] = []
var description = ""
var region = ""
var posterURL: URL?
if let jsonLDScript = try doc.select("script[type=application/ld+json]").first() {
let jsonText = try jsonLDScript.data()
if let data = jsonText.data(using: .utf8),
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] {
if let imageStr = json["image"] as? String {
posterURL = URL(string: imageStr)
}
if let published = json["datePublished"] as? String {
year = Int(published) ?? 0
}
if let agg = json["aggregateRating"] as? [String: Any],
let rv = agg["ratingValue"] as? String {
rating = Double(rv)
}
if let dirArray = json["director"] as? [[String: Any]] {
directors = dirArray.compactMap { $0["name"] as? String }
}
if let actArray = json["actor"] as? [[String: Any]] {
actors = actArray.compactMap { $0["name"] as? String }
}
if let genreArray = json["genre"] as? [String] {
genres = genreArray
}
if let desc = json["description"] as? String {
description = desc
}
}
}
// === 从 HTML 补充缺失数据 ===
// 海报 fallback: 主海报在 .flex-shrink-0 容器中
if posterURL == nil {
let imgSrc = try doc.select(".flex-shrink-0 img.object-cover").first()?.attr("src") ?? ""
posterURL = URL(string: imgSrc)
}
// 评分 fallback
if rating == nil {
let ratingText = try doc.select(".rating-display").text()
let cleaned = ratingText.components(separatedBy: CharacterSet.decimalDigits.union(CharacterSet(charactersIn: ".")).inverted).joined()
rating = Double(cleaned)
}
// "2025 · 中国 · 爱情 / 剧情 / 都市"
let metaDivs = try doc.select(".text-xs.text-gray-600.font-light, .text-sm.text-gray-600.font-light")
for metaDiv in metaDivs {
let text = try metaDiv.text().trimmingCharacters(in: .whitespacesAndNewlines)
if text.contains("·") {
let parts = text.split(separator: "·").map { $0.trimmingCharacters(in: .whitespaces) }
if parts.count >= 1, let y = Int(parts[0]), year == 0 {
year = y
}
if parts.count >= 2 && region.isEmpty {
region = parts[1]
}
if parts.count >= 3 && genres.isEmpty {
genres = parts[2].split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) }
}
break
}
}
// 导演 fallback: 导演:xxx
if directors.isEmpty {
let dirDivs = try doc.select("div")
for div in dirDivs {
let text = try div.text()
if text.hasPrefix("导演:") || text.hasPrefix("导演:") {
let children = try div.select("span")
if children.size() >= 2 {
let dirText = try children.last()?.text() ?? ""
directors = dirText.split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) }
}
break
}
}
}
// 主演 fallback
if actors.isEmpty {
let actDivs = try doc.select("div")
for div in actDivs {
let text = try div.text()
if text.hasPrefix("主演:") || text.hasPrefix("主演:") {
let children = try div.select("span")
if children.size() >= 2 {
let actText = try children.last()?.text() ?? ""
actors = actText.split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) }
}
break
}
}
}
// 简介 fallback: .prose p
if description.isEmpty {
let prosePs = try doc.select(".prose p")
let texts = try prosePs.map { try $0.text() }
description = texts.joined(separator: "\n\n")
}
// === 播放源解析 ===
let sources = try parseSourceTabs(doc: doc)
// slug
let canonicalHref = try doc.select("link[rel=canonical]").attr("href")
let slug: String
if !canonicalHref.isEmpty {
slug = String(canonicalHref.split(separator: "/").last ?? Substring(fullTitle))
} else {
slug = fullTitle
}
let contentItem = ContentItem(
id: slug,
title: fullTitle,
year: year,
category: .movie,
rating: rating,
posterURL: posterURL,
badges: [],
onlineCount: 0,
netdiskCount: 0,
detailURL: "/movie/\(slug)"
)
// 所有集的合集(取第一个源的)
let firstSourceEpisodes = sources.first?.episodes
return ContentDetail(
item: contentItem,
description: description,
directors: directors,
actors: actors,
genres: genres,
region: region,
sources: sources,
episodes: (firstSourceEpisodes?.count ?? 0) > 1 ? firstSourceEpisodes : nil
)
}
// MARK: - 播放源标签页解析
private static func parseSourceTabs(doc: Document) throws -> [StreamSource] {
var sources: [StreamSource] = []
// 从按钮的 onclick 属性解析
let buttons = try doc.select("button[onclick^=switchSource]")
for (index, button) in buttons.enumerated() {
let onclick = try button.attr("onclick")
let name = try button.text().trimmingCharacters(in: .whitespacesAndNewlines)
guard let parsed = parseSwitchSource(onclick) else { continue }
let episodes = parseEpisodes(urlString: parsed.url)
let quality = parsed.format == "m3u8" ? "HLS" : parsed.format.uppercased()
sources.append(StreamSource(
id: parsed.id,
name: name.isEmpty ? "播放源 \(index + 1)" : name,
quality: quality,
episodes: episodes
))
}
// fallback: 从 script 中提取
if sources.isEmpty {
let scripts = try doc.select("script")
for script in scripts {
let content = try script.data()
let pattern = #"switchSource\((\d+),\s*'([^']*)',\s*'([^']*)'\)"#
guard let regex = try? NSRegularExpression(pattern: pattern) else { continue }
let matches = regex.matches(in: content, range: NSRange(content.startIndex..., in: content))
for (index, match) in matches.enumerated() {
guard match.numberOfRanges >= 4 else { continue }
let idStr = String(content[Range(match.range(at: 1), in: content)!])
let url = String(content[Range(match.range(at: 2), in: content)!])
let format = String(content[Range(match.range(at: 3), in: content)!])
let sourceId = Int(idStr) ?? index
let episodes = parseEpisodes(urlString: url)
sources.append(StreamSource(
id: sourceId,
name: "播放源 \(index + 1)",
quality: format == "m3u8" ? "HLS" : format.uppercased(),
episodes: episodes
))
}
if !sources.isEmpty { break }
}
}
return sources
}
private static func parseSwitchSource(_ onclick: String) -> (id: Int, url: String, format: String)? {
let pattern = #"switchSource\((\d+),\s*'([^']*)',\s*'([^']*)'\)"#
guard let regex = try? NSRegularExpression(pattern: pattern),
let match = regex.firstMatch(in: onclick, range: NSRange(onclick.startIndex..., in: onclick)),
match.numberOfRanges >= 4 else { return nil }
let idStr = String(onclick[Range(match.range(at: 1), in: onclick)!])
let url = String(onclick[Range(match.range(at: 2), in: onclick)!])
let format = String(onclick[Range(match.range(at: 3), in: onclick)!])
return (Int(idStr) ?? 0, url, format)
}
// MARK: - 剧集解析
static func parseEpisodes(urlString: String) -> [Episode] {
let parts = urlString.split(separator: "#")
var episodes: [Episode] = []
for (index, part) in parts.enumerated() {
let partStr = String(part)
if partStr.contains("$") {
let episodeParts = partStr.split(separator: "$", maxSplits: 1)
if episodeParts.count == 2 {
episodes.append(Episode(
id: index,
name: String(episodeParts[0]),
url: String(episodeParts[1])
))
}
}
}
if episodes.isEmpty && !urlString.isEmpty {
episodes.append(Episode(id: 0, name: "播放", url: urlString))
}
return episodes
}
// MARK: - 分页解析
static func parsePagination(html: String) throws -> PaginationInfo {
let doc = try SwiftSoup.parse(html)
let activeBtn = try doc.select(".pagination-active")
let currentPage = Int(try activeBtn.text().trimmingCharacters(in: .whitespacesAndNewlines)) ?? 1
var maxPage = currentPage
let allBtns = try doc.select(".pagination-btn.pagination-number")
for btn in allBtns {
let text = try btn.text().trimmingCharacters(in: .whitespacesAndNewlines)
if let pageNum = Int(text), pageNum > maxPage {
maxPage = pageNum
}
}
let nextLink = try doc.select(".pagination-next").attr("href")
if let range = nextLink.range(of: #"/page/(\d+)"#, options: .regularExpression) {
let pageStr = nextLink[range].replacingOccurrences(of: "/page/", with: "")
if let p = Int(pageStr), p > maxPage {
maxPage = p
}
}
return PaginationInfo(current: currentPage, total: maxPage)
}
// MARK: - 首页区块解析
static func parseHomeSections(html: String) throws -> [[ContentItem]] {
let allItems = try parseContentList(html: html)
if allItems.count > 10 {
return [Array(allItems.prefix(10)), Array(allItems.suffix(from: 10))]
}
return [allItems]
}
}