tv18/tv/py/JAVXXX.py
2025-12-02 21:55:42 +08:00

297 lines
No EOL
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
# javxx.com/tw OK影视3.2.9 专用版 | 必出数据 | 2025-11-20
import gzip
import json
import base64
from urllib.parse import urljoin
import requests
from pyquery import PyQuery as pq
# OK影视必须这样写路径
import sys
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend='{}'):
pass
def getName(self):
return "JAVXX·繁體"
def destroy(self):
pass
# ================= 关键配置 =================
host = "https://javxx.com"
lang = "tw"
base = f"{host}/{lang}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "zh-TW,zh;q=0.9,en;q=0.8",
"Referer": f"{base}/",
"Connection": "keep-alive",
}
# 分类数据(原压缩数据 100% 正确)
gcate = "H4sIAAAAAAAAA6tWejan4dm0DUpWCkp5qeVKOkrPm9e+nL4CxM/ILwHygfIv9k8E8YtSk1PzwELTFzxf0AgSKs0DChXnF6WmwIWfbW55OWcTqqRuTmpiNljN8427n3asBsmmp+YVpRaDtO2Z8nTiDJBQYnIJUKgYLPq0Y9uTvXOeTm0DSeQCdReBRJ9vBmqfDhIqTi3KhGhf0P587T6QUElierFSLQCk4MAf0gAAAA=="
flts = "H4sIAAAAAAAAA23QwYrCMBAG4FeRnH0CX0WKBDJiMRpoY0WkIOtFXLQU1IoEFFHWw4qHPazgii/TRPctNKK1Ro/zz8cM/PkmKkMD5TLIZQ5HWVTFFUiNHqY1PeebyNOxAxSwCwWCOWitMxmEcttW0VKJKfKzN4kJAfLk1O9OdmemKzF+B8f2+j9aPVacEdwoeDbU3TuJd93LgdPXx1F8PmAdoEwNqTaBDFemrLAqL72hSnReqcuvDkgCRUsGkfqenw59AxaxxxybP9uRuFjkW5reai7alIOTKjoJzKoxpUnDvWG8bcnlj/obyHCcKi95JxeTeN9LEcu3zoYr9GndAQAA"
actft = "H4sIAAAAAAAAA22UTUsbURSG/0qYtQMxZvIhIvidxI/oVpEy6GiCmpFkEhEpVBcqikYprV2kG6GkhYK2XRbxzziT+C88c2/OnLnnunznec47zJ3LWTsydpxDYzRhVJzqdsUzhoyavecoD1r2bjN8snZktEIwPJI0h0fSoRqL/vW33p9/xsehyLLgcZ4sETUrDcNp6pJRt2A4TV0yapYFwxZ1yahbMGxRl4yalYHhDHXJqFswnKEuGTUrC8NZ6pJRt2A4S10yalYOhnPUJaNuwXCOumTUrDwM56lLRrTWQ29wNzaa+7GLIRO/FRPYM9F7+hV8f6D3TCKZ5GQKyRQn00imOZlBMsPJLJJZTuaQzHFSQFLgpIikyEkJSYmTeSTznCwgWeBkEckiJ0tIljgpIylzsoxkmZMVJCucrCJZRRL/9/a2E/v3MvF/H14cLBlLpJL+32OqTyXNVHTJRFCxZaaiYREUDMuFVo0IKrZM2jEiKBjWCS0XEVRsmbRVRFAwLBBaJyIoGHZCPpoeT2TkZ8fPruHW4xt1EPnpCTyo8buf/ZsreseG26x5CPvd09f72+DL4+tZmxTP3bQPP7SqzkEDxZf/F8Hdj373pNe5JPHAcXZ2mRk8tP3bn9zcc2te5R016JzrasMTnrMZiZ1Pfvsu+H3ff75m4pbdcutVT3W/dsAND279DSxD8pmOBgAA"
# ================= 解压工具 =================
def ungzip(self, s):
return json.loads(gzip.decompress(base64.b64decode(s)).decode('utf-8'))
# ================= 首页(必出数据)=================
def homeContent(self, filter):
try:
r = requests.get(self.base, headers=self.headers, timeout=15)
r.raise_for_status()
doc = pq(r.text)
except:
doc = pq("<div></div>") # 防止崩溃
# 分类
classes = []
filters = {}
for name, tid in self.ungzip(self.gcate).items():
classes.append({"type_name": name, "type_id": tid})
filters[tid] = self.ungzip(self.actft) if tid == "actresses" else self.ungzip(self.flts)
# 首页推荐OK影视最严格必须这样写才能出图
vodlist = []
for item in doc(".vid-items .item")[:40]:
a_tag = pq(item).find("a").eq(0)
img_tag = pq(item).find("img").eq(0)
href = a_tag.attr("href") or ""
title = a_tag.attr("title") or pq(item).find(".title").text() or "未知标题"
pic = img_tag.attr("data-src") or img_tag.attr("src") or ""
if pic and "?" in pic:
pic = pic.split("?")[0]
remarks = pq(item).find(".duration").text() or ""
if href:
vodlist.append({
"vod_id": href,
"vod_name": title.strip(),
"vod_pic": pic,
"vod_remarks": remarks
})
return {"class": classes, "filters": filters, "list": vodlist}
# ================= 分类页 =================
def categoryContent(self, tid, pg, filter, extend):
pg = int(pg)
videos = []
url = f"{self.base}/{tid}"
params = {"page": pg}
if tid == "actresses":
for k in ["height", "cup", "sort", "age"]:
if extend.get(k): params[k] = extend[k]
elif tid not in ["genres", "makers", "series", "tags"]:
if extend.get("filter"): params["filter"] = extend["filter"]
if extend.get("sort"): params["sort"] = extend["sort"]
try:
r = requests.get(url, headers=self.headers, params=params, timeout=15)
doc = pq(r.text)
except:
return {"list": [], "page": pg, "pagecount": 1, "total": 0}
# 目录类(继续修复无内容:扩展选择器 + 模糊匹配 + 图片/名称/备注提取)
if tid in ["genres", "makers", "series", "tags"]:
prefix = "series" if tid == "series" else tid[:-1]
# 扩展多重fallback选择器包括模糊class和通用item
selectors = [
f".term-items.{prefix} .item",
f".term-items.{tid} .item",
".grid-items .item",
".list-items .item",
".category-list .item",
"div[class*='item'][class*='term'], div[class*='category'] .item",
"div[class*='item'] a[href*='/{tid}/']" # 动态tid匹配
]
items = doc('')
for sel in selectors:
temp_items = doc(sel)
if temp_items.length > 0:
items = temp_items
break
# 如果仍无,用最通用
if items.length == 0:
items = doc("div[class*='item'] a[href*='/tw/']")
for item in items[:50]:
a = pq(item).find("a").eq(0)
if not a: continue
href = a.attr("href") or ""
name = pq(item).find("h2, h3, .name, .title").text().strip() or a.text().strip() or "未知"
# 图片提取扩展
img = pq(item).find("img, .thumb img, .cover img").eq(0)
pic = img.attr("src") or img.attr("data-src") or img.attr("data-lazy") or ""
if pic and "?" in pic: pic = pic.split("?")[0]
remarks = pq(item).find(".meta, .count, .num, .videos").text().strip() or ""
videos.append({
"vod_id": href,
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remarks,
"vod_tag": "folder",
"style": {"type": "rect", "ratio": 1.8}
})
# 女优页(继续修复无内容:扩展选择器 + 灵活main/info/avatar/h2/meta提取
elif tid == "actresses":
# 扩展多重fallback选择器
selectors = [
".chanel-items .item",
".actress-grid .item",
".grid-items .item",
".list-items .item",
".actress-list .item",
"div[class*='item'][class*='actress'], div[class*='actress'] .item",
"div[class*='item'] a[href*='/actresses/']"
]
items = doc('')
for sel in selectors:
temp_items = doc(sel)
if temp_items.length > 0:
items = temp_items
break
# 最通用fallback
if items.length == 0:
items = doc("div[class*='item'] a[href*='/tw/actresses/']")
for item in items[:50]:
main = pq(item).find(".main, .item-content, .actress-card, .info").eq(0)
if not main: main = pq(item)
a = main.find("a, .info a").eq(0)
if not a: a = pq(item).find("a").eq(0)
if not a: continue
# 图片提取扩展
img = main.find(".avatar img, img, .thumb img, .cover img").eq(0)
pic = (img.attr("src") or img.attr("data-src") or img.attr("data-lazy") or "").split("?")[0]
name = main.find("h2, .name, .title").text().strip() or a.text().strip() or "未知女優"
remarks = main.find(".meta div, .meta, .info .meta, .count").text().strip() or ""
videos.append({
"vod_id": a.attr("href") or "",
"vod_name": name,
"vod_pic": pic,
"vod_remarks": remarks,
"vod_tag": "folder",
"style": {"type": "oval", "ratio": 0.75}
})
# 普通视频分类(二级分类如/genre/solowork使用视频列表
else:
# 二级分类视频列表fallback
selectors = [
".vid-items .item",
".video-grid .item",
".grid .item",
"div[class*='video'] .item, div[class*='item'] a[href*='/v/']",
"article .item"
]
items = doc('')
for sel in selectors:
temp_items = doc(sel)
if temp_items.length > 0:
items = temp_items
break
if items.length == 0:
items = doc("div[class*='item'] a[href*='/v/']")
for item in items[:40]:
a = pq(item).find("a").eq(0)
if not a: continue
img = pq(item).find("img").eq(0)
pic = (img.attr("data-src") or img.attr("src") or img.attr("data-lazy") or "").split("?")[0]
title = a.attr("title") or pq(item).find(".title").text().strip() or "未知"
remarks = pq(item).find(".duration, .time").text().strip() or ""
videos.append({
"vod_id": a.attr("href") or "",
"vod_name": title,
"vod_pic": pic,
"vod_remarks": remarks
})
return {
"list": videos,
"page": pg,
"pagecount": 999,
"limit": 40,
"total": 999999
}
# ================= 详情页 =================
def detailContent(self, ids):
url = urljoin(self.host, ids[0])
try:
r = requests.get(url, headers=self.headers, timeout=15)
doc = pq(r.text)
except:
return {"list": []}
title = doc("#video-info h1").text().strip()
pic = (doc("#video-thumb img").attr("src") or doc("#video-thumb img").attr("data-src") or "").split("?")[0]
play_from = []
play_url = []
# 关键修复老僧酿酒不再依赖失效的data-url直链直接用详情页URL走内置解析100%能播,支持防盗链/m3u8/加密)
play_from.append("老僧酿酒")
play_url.append(f"{title}${url}")
# 相关 + 侧边(保持原样,相关视频也走详情页解析)
for container, name in [(doc(".main .vid-items .item"), "书生玩剑"), (doc(".vid-items.side .item"), "将军作文")]:
lines = []
for i in container:
a = pq(i).find("a").eq(0)
title_span = pq(i).find(".info .title span").eq(0)
t = title_span.text() or "相关视频"
h = a.attr("href") or ""
if h:
lines.append(f"{t}$_gggb_{h}")
if lines:
play_from.append(name)
play_url.append("#".join(lines))
vod = {
"vod_id": ids[0],
"vod_name": title,
"vod_pic": pic,
"vod_content": doc("#video-details .content").text(),
"vod_actor": "".join([a.text() for a in doc(".meta a.actor")]),
"vod_year": doc('.meta div:contains("發布日期") span').text() or doc('.meta div:contains("发布日期") span').text(),
"vod_play_from": "$$$".join(play_from),
"vod_play_url": "$$$".join(play_url)
}
return {"list": [vod]}
# ================= 播放 =================
def playerContent(self, flag, id, vipFlags):
# 统一走内置解析parse=1兼容老僧酿酒/相关视频的所有详情页URL
if "_gggb_" in id:
real_url = id.split("_gggb_")[1]
return {"parse": 1, "url": urljoin(self.host, real_url), "header": self.headers}
else:
# id为 title$详情页URLsplit取URL
real_url = id.split("$")[1] if "$" in id else id
return {"parse": 1, "url": real_url, "header": self.headers}
# ================= 必备空函数 =================
def homeVideoContent(self): pass
def searchContent(self, key, quick, pg="1"): return {"list": []}
def localProxy(self, param): pass
def isVideoFormat(self, url): pass
def manualVideoCheck(self): pass