init: init proj
This commit is contained in:
376
DDYSClient/Services/HTMLParser.swift
Normal file
376
DDYSClient/Services/HTMLParser.swift
Normal file
@@ -0,0 +1,376 @@
|
||||
import Foundation
|
||||
import SwiftSoup
|
||||
|
||||
struct PaginationInfo {
|
||||
let current: Int
|
||||
let total: Int
|
||||
}
|
||||
|
||||
enum HTMLParser {
|
||||
|
||||
// MARK: - 列表页解析
|
||||
|
||||
static func parseContentList(html: String, defaultCategory: ContentCategory = .movie) throws -> [ContentItem] {
|
||||
let doc = try SwiftSoup.parse(html)
|
||||
let cards = try doc.select(".movie-card")
|
||||
var items: [ContentItem] = []
|
||||
|
||||
for card in cards {
|
||||
guard let link = try card.select("a[href^=/movie/]").first() else { continue }
|
||||
let href = try link.attr("href")
|
||||
let slug = String(href.replacingOccurrences(of: "/movie/", with: ""))
|
||||
guard !slug.isEmpty else { continue }
|
||||
|
||||
let title = try card.select("h3 a").text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
guard !title.isEmpty else { continue }
|
||||
|
||||
let imgSrc = try card.select("img").attr("src")
|
||||
let posterURL = URL(string: imgSrc)
|
||||
|
||||
let ratingText = try card.select(".badge-top-right").text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
let rating = Double(ratingText)
|
||||
|
||||
var badges: [String] = []
|
||||
let topLeftBadge = try card.select(".badge-top-left").text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if !topLeftBadge.isEmpty { badges.append(topLeftBadge) }
|
||||
let bottomRightBadge = try card.select(".badge-bottom-right").text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if !bottomRightBadge.isEmpty { badges.append(bottomRightBadge) }
|
||||
|
||||
// "2025 · 电影"
|
||||
let metaDiv = try card.select(".p-4 .text-xs.font-light, .p-4 .text-xs.text-gray-500").first()
|
||||
let metaText = try metaDiv?.text().trimmingCharacters(in: .whitespacesAndNewlines) ?? ""
|
||||
var year = 0
|
||||
var category = defaultCategory
|
||||
let metaParts = metaText.split(separator: "·").map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
|
||||
if let firstPart = metaParts.first, let y = Int(firstPart) {
|
||||
year = y
|
||||
}
|
||||
if metaParts.count > 1 {
|
||||
switch metaParts[1] {
|
||||
case "电影": category = .movie
|
||||
case "剧集", "电视剧": category = .series
|
||||
case "综艺": category = .variety
|
||||
case "动漫", "动画": category = .anime
|
||||
default: break
|
||||
}
|
||||
}
|
||||
|
||||
var onlineCount = 0
|
||||
var netdiskCount = 0
|
||||
let spans = try card.select(".flex.items-center.gap-3 span")
|
||||
for span in spans {
|
||||
let spanText = try span.text()
|
||||
if spanText.contains("在线") {
|
||||
onlineCount = Int(spanText.replacingOccurrences(of: "在线:", with: "").trimmingCharacters(in: .whitespaces)) ?? 0
|
||||
} else if spanText.contains("网盘") {
|
||||
netdiskCount = Int(spanText.replacingOccurrences(of: "网盘:", with: "").trimmingCharacters(in: .whitespaces)) ?? 0
|
||||
}
|
||||
}
|
||||
|
||||
items.append(ContentItem(
|
||||
id: slug,
|
||||
title: title,
|
||||
year: year,
|
||||
category: category,
|
||||
rating: rating,
|
||||
posterURL: posterURL,
|
||||
badges: badges,
|
||||
onlineCount: onlineCount,
|
||||
netdiskCount: netdiskCount,
|
||||
detailURL: href
|
||||
))
|
||||
}
|
||||
|
||||
return items
|
||||
}
|
||||
|
||||
// MARK: - 详情页解析
|
||||
|
||||
static func parseContentDetail(html: String) throws -> ContentDetail {
|
||||
let doc = try SwiftSoup.parse(html)
|
||||
|
||||
// 标题 (h1 可能包含子 span,只取直接文本)
|
||||
let h1 = try doc.select("h1").first()
|
||||
let fullTitle = try h1?.text().trimmingCharacters(in: .whitespacesAndNewlines) ?? "未知标题"
|
||||
|
||||
// 海报
|
||||
let imgSrc = try doc.select("img.w-full.h-full.object-cover").first()?.attr("src") ?? ""
|
||||
let posterURL = URL(string: imgSrc)
|
||||
|
||||
// === 优先从 JSON-LD 提取结构化数据 ===
|
||||
var year = 0
|
||||
var rating: Double?
|
||||
var directors: [String] = []
|
||||
var actors: [String] = []
|
||||
var genres: [String] = []
|
||||
var description = ""
|
||||
var region = ""
|
||||
|
||||
if let jsonLDScript = try doc.select("script[type=application/ld+json]").first() {
|
||||
let jsonText = try jsonLDScript.data()
|
||||
if let data = jsonText.data(using: .utf8),
|
||||
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] {
|
||||
|
||||
if let published = json["datePublished"] as? String {
|
||||
year = Int(published) ?? 0
|
||||
}
|
||||
|
||||
if let agg = json["aggregateRating"] as? [String: Any],
|
||||
let rv = agg["ratingValue"] as? String {
|
||||
rating = Double(rv)
|
||||
}
|
||||
|
||||
if let dirArray = json["director"] as? [[String: Any]] {
|
||||
directors = dirArray.compactMap { $0["name"] as? String }
|
||||
}
|
||||
|
||||
if let actArray = json["actor"] as? [[String: Any]] {
|
||||
actors = actArray.compactMap { $0["name"] as? String }
|
||||
}
|
||||
|
||||
if let genreArray = json["genre"] as? [String] {
|
||||
genres = genreArray
|
||||
}
|
||||
|
||||
if let desc = json["description"] as? String {
|
||||
description = desc
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// === 从 HTML 补充缺失数据 ===
|
||||
|
||||
// 评分 fallback
|
||||
if rating == nil {
|
||||
let ratingText = try doc.select(".rating-display").text()
|
||||
let cleaned = ratingText.components(separatedBy: CharacterSet.decimalDigits.union(CharacterSet(charactersIn: ".")).inverted).joined()
|
||||
rating = Double(cleaned)
|
||||
}
|
||||
|
||||
// "2025 · 中国 · 爱情 / 剧情 / 都市"
|
||||
let metaDivs = try doc.select(".text-xs.text-gray-600.font-light, .text-sm.text-gray-600.font-light")
|
||||
for metaDiv in metaDivs {
|
||||
let text = try metaDiv.text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if text.contains("·") {
|
||||
let parts = text.split(separator: "·").map { $0.trimmingCharacters(in: .whitespaces) }
|
||||
if parts.count >= 1, let y = Int(parts[0]), year == 0 {
|
||||
year = y
|
||||
}
|
||||
if parts.count >= 2 && region.isEmpty {
|
||||
region = parts[1]
|
||||
}
|
||||
if parts.count >= 3 && genres.isEmpty {
|
||||
genres = parts[2].split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) }
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// 导演 fallback: <span>导演:</span><span>xxx</span>
|
||||
if directors.isEmpty {
|
||||
let dirDivs = try doc.select("div")
|
||||
for div in dirDivs {
|
||||
let text = try div.text()
|
||||
if text.hasPrefix("导演:") || text.hasPrefix("导演:") {
|
||||
let children = try div.select("span")
|
||||
if children.size() >= 2 {
|
||||
let dirText = try children.last()?.text() ?? ""
|
||||
directors = dirText.split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) }
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 主演 fallback
|
||||
if actors.isEmpty {
|
||||
let actDivs = try doc.select("div")
|
||||
for div in actDivs {
|
||||
let text = try div.text()
|
||||
if text.hasPrefix("主演:") || text.hasPrefix("主演:") {
|
||||
let children = try div.select("span")
|
||||
if children.size() >= 2 {
|
||||
let actText = try children.last()?.text() ?? ""
|
||||
actors = actText.split(separator: "/").map { $0.trimmingCharacters(in: .whitespaces) }
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 简介 fallback: .prose p
|
||||
if description.isEmpty {
|
||||
let prosePs = try doc.select(".prose p")
|
||||
let texts = try prosePs.map { try $0.text() }
|
||||
description = texts.joined(separator: "\n\n")
|
||||
}
|
||||
|
||||
// === 播放源解析 ===
|
||||
let sources = try parseSourceTabs(doc: doc)
|
||||
|
||||
// slug
|
||||
let canonicalHref = try doc.select("link[rel=canonical]").attr("href")
|
||||
let slug: String
|
||||
if !canonicalHref.isEmpty {
|
||||
slug = String(canonicalHref.split(separator: "/").last ?? Substring(fullTitle))
|
||||
} else {
|
||||
slug = fullTitle
|
||||
}
|
||||
|
||||
let contentItem = ContentItem(
|
||||
id: slug,
|
||||
title: fullTitle,
|
||||
year: year,
|
||||
category: .movie,
|
||||
rating: rating,
|
||||
posterURL: posterURL,
|
||||
badges: [],
|
||||
onlineCount: 0,
|
||||
netdiskCount: 0,
|
||||
detailURL: "/movie/\(slug)"
|
||||
)
|
||||
|
||||
// 所有集的合集(取第一个源的)
|
||||
let firstSourceEpisodes = sources.first?.episodes
|
||||
|
||||
return ContentDetail(
|
||||
item: contentItem,
|
||||
description: description,
|
||||
directors: directors,
|
||||
actors: actors,
|
||||
genres: genres,
|
||||
region: region,
|
||||
sources: sources,
|
||||
episodes: (firstSourceEpisodes?.count ?? 0) > 1 ? firstSourceEpisodes : nil
|
||||
)
|
||||
}
|
||||
|
||||
// MARK: - 播放源标签页解析
|
||||
|
||||
private static func parseSourceTabs(doc: Document) throws -> [StreamSource] {
|
||||
var sources: [StreamSource] = []
|
||||
|
||||
// 从按钮的 onclick 属性解析
|
||||
let buttons = try doc.select("button[onclick^=switchSource]")
|
||||
for (index, button) in buttons.enumerated() {
|
||||
let onclick = try button.attr("onclick")
|
||||
let name = try button.text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
|
||||
guard let parsed = parseSwitchSource(onclick) else { continue }
|
||||
|
||||
let episodes = parseEpisodes(urlString: parsed.url)
|
||||
let quality = parsed.format == "m3u8" ? "HLS" : parsed.format.uppercased()
|
||||
|
||||
sources.append(StreamSource(
|
||||
id: parsed.id,
|
||||
name: name.isEmpty ? "播放源 \(index + 1)" : name,
|
||||
quality: quality,
|
||||
episodes: episodes
|
||||
))
|
||||
}
|
||||
|
||||
// fallback: 从 script 中提取
|
||||
if sources.isEmpty {
|
||||
let scripts = try doc.select("script")
|
||||
for script in scripts {
|
||||
let content = try script.data()
|
||||
let pattern = #"switchSource\((\d+),\s*'([^']*)',\s*'([^']*)'\)"#
|
||||
guard let regex = try? NSRegularExpression(pattern: pattern) else { continue }
|
||||
let matches = regex.matches(in: content, range: NSRange(content.startIndex..., in: content))
|
||||
for (index, match) in matches.enumerated() {
|
||||
guard match.numberOfRanges >= 4 else { continue }
|
||||
let idStr = String(content[Range(match.range(at: 1), in: content)!])
|
||||
let url = String(content[Range(match.range(at: 2), in: content)!])
|
||||
let format = String(content[Range(match.range(at: 3), in: content)!])
|
||||
let sourceId = Int(idStr) ?? index
|
||||
let episodes = parseEpisodes(urlString: url)
|
||||
sources.append(StreamSource(
|
||||
id: sourceId,
|
||||
name: "播放源 \(index + 1)",
|
||||
quality: format == "m3u8" ? "HLS" : format.uppercased(),
|
||||
episodes: episodes
|
||||
))
|
||||
}
|
||||
if !sources.isEmpty { break }
|
||||
}
|
||||
}
|
||||
|
||||
return sources
|
||||
}
|
||||
|
||||
private static func parseSwitchSource(_ onclick: String) -> (id: Int, url: String, format: String)? {
|
||||
let pattern = #"switchSource\((\d+),\s*'([^']*)',\s*'([^']*)'\)"#
|
||||
guard let regex = try? NSRegularExpression(pattern: pattern),
|
||||
let match = regex.firstMatch(in: onclick, range: NSRange(onclick.startIndex..., in: onclick)),
|
||||
match.numberOfRanges >= 4 else { return nil }
|
||||
let idStr = String(onclick[Range(match.range(at: 1), in: onclick)!])
|
||||
let url = String(onclick[Range(match.range(at: 2), in: onclick)!])
|
||||
let format = String(onclick[Range(match.range(at: 3), in: onclick)!])
|
||||
return (Int(idStr) ?? 0, url, format)
|
||||
}
|
||||
|
||||
// MARK: - 剧集解析
|
||||
|
||||
static func parseEpisodes(urlString: String) -> [Episode] {
|
||||
let parts = urlString.split(separator: "#")
|
||||
var episodes: [Episode] = []
|
||||
|
||||
for (index, part) in parts.enumerated() {
|
||||
let partStr = String(part)
|
||||
if partStr.contains("$") {
|
||||
let episodeParts = partStr.split(separator: "$", maxSplits: 1)
|
||||
if episodeParts.count == 2 {
|
||||
episodes.append(Episode(
|
||||
id: index,
|
||||
name: String(episodeParts[0]),
|
||||
url: String(episodeParts[1])
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if episodes.isEmpty && !urlString.isEmpty {
|
||||
episodes.append(Episode(id: 0, name: "播放", url: urlString))
|
||||
}
|
||||
|
||||
return episodes
|
||||
}
|
||||
|
||||
// MARK: - 分页解析
|
||||
|
||||
static func parsePagination(html: String) throws -> PaginationInfo {
|
||||
let doc = try SwiftSoup.parse(html)
|
||||
|
||||
let activeBtn = try doc.select(".pagination-active")
|
||||
let currentPage = Int(try activeBtn.text().trimmingCharacters(in: .whitespacesAndNewlines)) ?? 1
|
||||
|
||||
var maxPage = currentPage
|
||||
let allBtns = try doc.select(".pagination-btn.pagination-number")
|
||||
for btn in allBtns {
|
||||
let text = try btn.text().trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if let pageNum = Int(text), pageNum > maxPage {
|
||||
maxPage = pageNum
|
||||
}
|
||||
}
|
||||
|
||||
let nextLink = try doc.select(".pagination-next").attr("href")
|
||||
if let range = nextLink.range(of: #"/page/(\d+)"#, options: .regularExpression) {
|
||||
let pageStr = nextLink[range].replacingOccurrences(of: "/page/", with: "")
|
||||
if let p = Int(pageStr), p > maxPage {
|
||||
maxPage = p
|
||||
}
|
||||
}
|
||||
|
||||
return PaginationInfo(current: currentPage, total: maxPage)
|
||||
}
|
||||
|
||||
// MARK: - 首页区块解析
|
||||
|
||||
static func parseHomeSections(html: String) throws -> [[ContentItem]] {
|
||||
let allItems = try parseContentList(html: html)
|
||||
if allItems.count > 10 {
|
||||
return [Array(allItems.prefix(10)), Array(allItems.suffix(from: 10))]
|
||||
}
|
||||
return [allItems]
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user