377 lines
14 KiB
Swift
377 lines
14 KiB
Swift
import Foundation
|
||
import SwiftSoup
|
||
|
||
struct PaginationInfo {
|
||
let current: Int
|
||
let total: Int
|
||
}
|
||
|
||
enum HTMLParser {
|
||
|
||
// MARK: - 列表页解析
|
||
|
||
static func parseContentList(html: String, defaultCategory: ContentCategory = .movie) throws -> [ContentItem] {
|
||
let doc = try SwiftSoup.parse(html)
|
||
let cards = try doc.select(".movie-card")
|
||
var items: [ContentItem] = []
|
||
|
||
for card in cards {
|
||
guard let link = try card.select("a[href^=/movie/]").first() else { continue }
|
||
let href = try link.attr("href")
|
||
let slug = String(href.replacingOccurrences(of: "/movie/", with: ""))
|
||
guard !slug.isEmpty else { continue }
|
||
|
||
let title = try card.select("h3 a").text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||
guard !title.isEmpty else { continue }
|
||
|
||
let imgSrc = try card.select("img").attr("src")
|
||
let posterURL = URL(string: imgSrc)
|
||
|
||
let ratingText = try card.select(".badge-top-right").text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||
let rating = Double(ratingText)
|
||
|
||
var badges: [String] = []
|
||
let topLeftBadge = try card.select(".badge-top-left").text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||
if !topLeftBadge.isEmpty { badges.append(topLeftBadge) }
|
||
let bottomRightBadge = try card.select(".badge-bottom-right").text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||
if !bottomRightBadge.isEmpty { badges.append(bottomRightBadge) }
|
||
|
||
// "2025 · 电影"
|
||
let metaDiv = try card.select(".p-4 .text-xs.font-light, .p-4 .text-xs.text-gray-500").first()
|
||
let metaText = try metaDiv?.text().trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||
var year = 0
|
||
var category = defaultCategory
|
||
let metaParts = metaText.split(separator: "·").map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
|
||
if let firstPart = metaParts.first, let y = Int(firstPart) {
|
||
year = y
|
||
}
|
||
if metaParts.count > 1 {
|
||
switch metaParts[1] {
|
||
case "电影": category = .movie
|
||
case "剧集", "电视剧": category = .series
|
||
case "综艺": category = .variety
|
||
case "动漫", "动画": category = .anime
|
||
default: break
|
||
}
|
||
}
|
||
|
||
var onlineCount = 0
|
||
var netdiskCount = 0
|
||
let spans = try card.select(".flex.items-center.gap-3 span")
|
||
for span in spans {
|
||
let spanText = try span.text()
|
||
if spanText.contains("在线") {
|
||
onlineCount = Int(spanText.replacingOccurrences(of: "在线:", with: "").trimmingCharacters(in: .whitespaces)) ?? 0
|
||
} else if spanText.contains("网盘") {
|
||
netdiskCount = Int(spanText.replacingOccurrences(of: "网盘:", with: "").trimmingCharacters(in: .whitespaces)) ?? 0
|
||
}
|
||
}
|
||
|
||
items.append(ContentItem(
|
||
id: slug,
|
||
title: title,
|
||
year: year,
|
||
category: category,
|
||
rating: rating,
|
||
posterURL: posterURL,
|
||
badges: badges,
|
||
onlineCount: onlineCount,
|
||
netdiskCount: netdiskCount,
|
||
detailURL: href
|
||
))
|
||
}
|
||
|
||
return items
|
||
}
|
||
|
||
// MARK: - 详情页解析
|
||
|
||
static func parseContentDetail(html: String) throws -> ContentDetail {
|
||
let doc = try SwiftSoup.parse(html)
|
||
|
||
// 标题 (h1 可能包含子 span,只取直接文本)
|
||
let h1 = try doc.select("h1").first()
|
||
let fullTitle = try h1?.text().trimmingCharacters(in: .whitespacesAndNewlines) ?? "未知标题"
|
||
|
||
// 海报
|
||
let imgSrc = try doc.select("img.w-full.h-full.object-cover").first()?.attr("src") ?? ""
|
||
let posterURL = URL(string: imgSrc)
|
||
|
||
// === 优先从 JSON-LD 提取结构化数据 ===
|
||
var year = 0
|
||
var rating: Double?
|
||
var directors: [String] = []
|
||
var actors: [String] = []
|
||
var genres: [String] = []
|
||
var description = ""
|
||
var region = ""
|
||
|
||
if let jsonLDScript = try doc.select("script[type=application/ld+json]").first() {
|
||
let jsonText = try jsonLDScript.data()
|
||
if let data = jsonText.data(using: .utf8),
|
||
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] {
|
||
|
||
if let published = json["datePublished"] as? String {
|
||
year = Int(published) ?? 0
|
||
}
|
||
|
||
if let agg = json["aggregateRating"] as? [String: Any],
|
||
let rv = agg["ratingValue"] as? String {
|
||
rating = Double(rv)
|
||
}
|
||
|
||
if let dirArray = json["director"] as? [[String: Any]] {
|
||
directors = dirArray.compactMap { $0["name"] as? String }
|
||
}
|
||
|
||
if let actArray = json["actor"] as? [[String: Any]] {
|
||
actors = actArray.compactMap { $0["name"] as? String }
|
||
}
|
||
|
||
if let genreArray = json["genre"] as? [String] {
|
||
genres = genreArray
|
||
}
|
||
|
||
if let desc = json["description"] as? String {
|
||
description = desc
|
||
}
|
||
}
|
||
}
|
||
|
||
// === 从 HTML 补充缺失数据 ===
|
||
|
||
// 评分 fallback
|
||
if rating == nil {
|
||
let ratingText = try doc.select(".rating-display").text()
|
||
let cleaned = ratingText.components(separatedBy: CharacterSet.decimalDigits.union(CharacterSet(charactersIn: ".")).inverted).joined()
|
||
rating = Double(cleaned)
|
||
}
|
||
|
||
// "2025 · 中国 · 爱情 / 剧情 / 都市"
|
||
let metaDivs = try doc.select(".text-xs.text-gray-600.font-light, .text-sm.text-gray-600.font-light")
|
||
for metaDiv in metaDivs {
|
||
let text = try metaDiv.text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||
if text.contains("·") {
|
||
let parts = text.split(separator: "·").map { $0.trimmingCharacters(in: .whitespaces) }
|
||
if parts.count >= 1, let y = Int(parts[0]), year == 0 {
|
||
year = y
|
||
}
|
||
if parts.count >= 2 && region.isEmpty {
|
||
region = parts[1]
|
||
}
|
||
if parts.count >= 3 && genres.isEmpty {
|
||
genres = parts[2].split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) }
|
||
}
|
||
break
|
||
}
|
||
}
|
||
|
||
// 导演 fallback: <span>导演:</span><span>xxx</span>
|
||
if directors.isEmpty {
|
||
let dirDivs = try doc.select("div")
|
||
for div in dirDivs {
|
||
let text = try div.text()
|
||
if text.hasPrefix("导演:") || text.hasPrefix("导演:") {
|
||
let children = try div.select("span")
|
||
if children.size() >= 2 {
|
||
let dirText = try children.last()?.text() ?? ""
|
||
directors = dirText.split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) }
|
||
}
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
// 主演 fallback
|
||
if actors.isEmpty {
|
||
let actDivs = try doc.select("div")
|
||
for div in actDivs {
|
||
let text = try div.text()
|
||
if text.hasPrefix("主演:") || text.hasPrefix("主演:") {
|
||
let children = try div.select("span")
|
||
if children.size() >= 2 {
|
||
let actText = try children.last()?.text() ?? ""
|
||
actors = actText.split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) }
|
||
}
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
// 简介 fallback: .prose p
|
||
if description.isEmpty {
|
||
let prosePs = try doc.select(".prose p")
|
||
let texts = try prosePs.map { try $0.text() }
|
||
description = texts.joined(separator: "\n\n")
|
||
}
|
||
|
||
// === 播放源解析 ===
|
||
let sources = try parseSourceTabs(doc: doc)
|
||
|
||
// slug
|
||
let canonicalHref = try doc.select("link[rel=canonical]").attr("href")
|
||
let slug: String
|
||
if !canonicalHref.isEmpty {
|
||
slug = String(canonicalHref.split(separator: "/").last ?? Substring(fullTitle))
|
||
} else {
|
||
slug = fullTitle
|
||
}
|
||
|
||
let contentItem = ContentItem(
|
||
id: slug,
|
||
title: fullTitle,
|
||
year: year,
|
||
category: .movie,
|
||
rating: rating,
|
||
posterURL: posterURL,
|
||
badges: [],
|
||
onlineCount: 0,
|
||
netdiskCount: 0,
|
||
detailURL: "/movie/\(slug)"
|
||
)
|
||
|
||
// 所有集的合集(取第一个源的)
|
||
let firstSourceEpisodes = sources.first?.episodes
|
||
|
||
return ContentDetail(
|
||
item: contentItem,
|
||
description: description,
|
||
directors: directors,
|
||
actors: actors,
|
||
genres: genres,
|
||
region: region,
|
||
sources: sources,
|
||
episodes: (firstSourceEpisodes?.count ?? 0) > 1 ? firstSourceEpisodes : nil
|
||
)
|
||
}
|
||
|
||
// MARK: - 播放源标签页解析
|
||
|
||
private static func parseSourceTabs(doc: Document) throws -> [StreamSource] {
|
||
var sources: [StreamSource] = []
|
||
|
||
// 从按钮的 onclick 属性解析
|
||
let buttons = try doc.select("button[onclick^=switchSource]")
|
||
for (index, button) in buttons.enumerated() {
|
||
let onclick = try button.attr("onclick")
|
||
let name = try button.text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||
|
||
guard let parsed = parseSwitchSource(onclick) else { continue }
|
||
|
||
let episodes = parseEpisodes(urlString: parsed.url)
|
||
let quality = parsed.format == "m3u8" ? "HLS" : parsed.format.uppercased()
|
||
|
||
sources.append(StreamSource(
|
||
id: parsed.id,
|
||
name: name.isEmpty ? "播放源 \(index + 1)" : name,
|
||
quality: quality,
|
||
episodes: episodes
|
||
))
|
||
}
|
||
|
||
// fallback: 从 script 中提取
|
||
if sources.isEmpty {
|
||
let scripts = try doc.select("script")
|
||
for script in scripts {
|
||
let content = try script.data()
|
||
let pattern = #"switchSource\((\d+),\s*'([^']*)',\s*'([^']*)'\)"#
|
||
guard let regex = try? NSRegularExpression(pattern: pattern) else { continue }
|
||
let matches = regex.matches(in: content, range: NSRange(content.startIndex..., in: content))
|
||
for (index, match) in matches.enumerated() {
|
||
guard match.numberOfRanges >= 4 else { continue }
|
||
let idStr = String(content[Range(match.range(at: 1), in: content)!])
|
||
let url = String(content[Range(match.range(at: 2), in: content)!])
|
||
let format = String(content[Range(match.range(at: 3), in: content)!])
|
||
let sourceId = Int(idStr) ?? index
|
||
let episodes = parseEpisodes(urlString: url)
|
||
sources.append(StreamSource(
|
||
id: sourceId,
|
||
name: "播放源 \(index + 1)",
|
||
quality: format == "m3u8" ? "HLS" : format.uppercased(),
|
||
episodes: episodes
|
||
))
|
||
}
|
||
if !sources.isEmpty { break }
|
||
}
|
||
}
|
||
|
||
return sources
|
||
}
|
||
|
||
private static func parseSwitchSource(_ onclick: String) -> (id: Int, url: String, format: String)? {
|
||
let pattern = #"switchSource\((\d+),\s*'([^']*)',\s*'([^']*)'\)"#
|
||
guard let regex = try? NSRegularExpression(pattern: pattern),
|
||
let match = regex.firstMatch(in: onclick, range: NSRange(onclick.startIndex..., in: onclick)),
|
||
match.numberOfRanges >= 4 else { return nil }
|
||
let idStr = String(onclick[Range(match.range(at: 1), in: onclick)!])
|
||
let url = String(onclick[Range(match.range(at: 2), in: onclick)!])
|
||
let format = String(onclick[Range(match.range(at: 3), in: onclick)!])
|
||
return (Int(idStr) ?? 0, url, format)
|
||
}
|
||
|
||
// MARK: - 剧集解析
|
||
|
||
static func parseEpisodes(urlString: String) -> [Episode] {
|
||
let parts = urlString.split(separator: "#")
|
||
var episodes: [Episode] = []
|
||
|
||
for (index, part) in parts.enumerated() {
|
||
let partStr = String(part)
|
||
if partStr.contains("$") {
|
||
let episodeParts = partStr.split(separator: "$", maxSplits: 1)
|
||
if episodeParts.count == 2 {
|
||
episodes.append(Episode(
|
||
id: index,
|
||
name: String(episodeParts[0]),
|
||
url: String(episodeParts[1])
|
||
))
|
||
}
|
||
}
|
||
}
|
||
|
||
if episodes.isEmpty && !urlString.isEmpty {
|
||
episodes.append(Episode(id: 0, name: "播放", url: urlString))
|
||
}
|
||
|
||
return episodes
|
||
}
|
||
|
||
// MARK: - 分页解析
|
||
|
||
static func parsePagination(html: String) throws -> PaginationInfo {
|
||
let doc = try SwiftSoup.parse(html)
|
||
|
||
let activeBtn = try doc.select(".pagination-active")
|
||
let currentPage = Int(try activeBtn.text().trimmingCharacters(in: .whitespacesAndNewlines)) ?? 1
|
||
|
||
var maxPage = currentPage
|
||
let allBtns = try doc.select(".pagination-btn.pagination-number")
|
||
for btn in allBtns {
|
||
let text = try btn.text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||
if let pageNum = Int(text), pageNum > maxPage {
|
||
maxPage = pageNum
|
||
}
|
||
}
|
||
|
||
let nextLink = try doc.select(".pagination-next").attr("href")
|
||
if let range = nextLink.range(of: #"/page/(\d+)"#, options: .regularExpression) {
|
||
let pageStr = nextLink[range].replacingOccurrences(of: "/page/", with: "")
|
||
if let p = Int(pageStr), p > maxPage {
|
||
maxPage = p
|
||
}
|
||
}
|
||
|
||
return PaginationInfo(current: currentPage, total: maxPage)
|
||
}
|
||
|
||
// MARK: - 首页区块解析
|
||
|
||
static func parseHomeSections(html: String) throws -> [[ContentItem]] {
|
||
let allItems = try parseContentList(html: html)
|
||
if allItems.count > 10 {
|
||
return [Array(allItems.prefix(10)), Array(allItems.suffix(from: 10))]
|
||
}
|
||
return [allItems]
|
||
}
|
||
}
|