上传文件至 lib
This commit is contained in:
parent
7fd3cfead8
commit
46a4e26971
4 changed files with 1317 additions and 0 deletions
390
lib/每日大乱斗.py
Normal file
390
lib/每日大乱斗.py
Normal file
|
|
@ -0,0 +1,390 @@
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import hashlib
|
||||||
|
from base64 import b64decode, b64encode
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from Crypto.Cipher import AES
|
||||||
|
from Crypto.Util.Padding import unpad
|
||||||
|
from pyquery import PyQuery as pq
|
||||||
|
sys.path.append('..')
|
||||||
|
from base.spider import Spider as BaseSpider
|
||||||
|
|
||||||
|
img_cache = {}
|
||||||
|
|
||||||
|
class Spider(BaseSpider):
|
||||||
|
|
||||||
|
def init(self, extend=""):
|
||||||
|
try:
|
||||||
|
self.proxies = json.loads(extend)
|
||||||
|
except:
|
||||||
|
self.proxies = {}
|
||||||
|
self.headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Cache-Control': 'no-cache',
|
||||||
|
}
|
||||||
|
self.host = self.get_working_host()
|
||||||
|
self.headers.update({'Origin': self.host, 'Referer': f"{self.host}/"})
|
||||||
|
print(f"使用站点: {self.host}")
|
||||||
|
|
||||||
|
def getName(self):
|
||||||
|
return "🌈 每日大乱斗|终极完美版"
|
||||||
|
|
||||||
|
def isVideoFormat(self, url):
|
||||||
|
return any(ext in (url or '') for ext in ['.m3u8', '.mp4', '.ts'])
|
||||||
|
|
||||||
|
def manualVideoCheck(self):
|
||||||
|
return False
|
||||||
|
|
||||||
|
def destroy(self):
|
||||||
|
global img_cache
|
||||||
|
img_cache.clear()
|
||||||
|
|
||||||
|
def get_working_host(self):
|
||||||
|
dynamic_urls = [
|
||||||
|
'https://border.bshzjjgq.cc/',
|
||||||
|
'https://blood.bshzjjgq.cc/'
|
||||||
|
]
|
||||||
|
for url in dynamic_urls:
|
||||||
|
try:
|
||||||
|
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=10)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return url
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
return dynamic_urls[0]
|
||||||
|
|
||||||
|
def homeContent(self, filter):
|
||||||
|
try:
|
||||||
|
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
|
||||||
|
if response.status_code != 200: return {'class': [], 'list': []}
|
||||||
|
data = self.getpq(response.text)
|
||||||
|
|
||||||
|
classes = []
|
||||||
|
category_selectors = ['.category-list ul li', '.nav-menu li', '.menu li', 'nav ul li']
|
||||||
|
for selector in category_selectors:
|
||||||
|
for k in data(selector).items():
|
||||||
|
link = k('a')
|
||||||
|
href = (link.attr('href') or '').strip()
|
||||||
|
name = (link.text() or '').strip()
|
||||||
|
if not href or href == '#' or not name: continue
|
||||||
|
classes.append({'type_name': name, 'type_id': href})
|
||||||
|
if classes: break
|
||||||
|
|
||||||
|
if not classes:
|
||||||
|
classes = [{'type_name': '最新', 'type_id': '/latest/'}, {'type_name': '热门', 'type_id': '/hot/'}]
|
||||||
|
|
||||||
|
return {'class': classes, 'list': self.getlist(data('#index article, article'))}
|
||||||
|
except Exception as e:
|
||||||
|
return {'class': [], 'list': []}
|
||||||
|
|
||||||
|
def homeVideoContent(self):
|
||||||
|
try:
|
||||||
|
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
|
||||||
|
if response.status_code != 200: return {'list': []}
|
||||||
|
data = self.getpq(response.text)
|
||||||
|
return {'list': self.getlist(data('#index article, article'))}
|
||||||
|
except Exception as e:
|
||||||
|
return {'list': []}
|
||||||
|
|
||||||
|
def categoryContent(self, tid, pg, filter, extend):
|
||||||
|
try:
|
||||||
|
if '@folder' in tid:
|
||||||
|
v = self.getfod(tid.replace('@folder', ''))
|
||||||
|
return {'list': v, 'page': 1, 'pagecount': 1, 'limit': 90, 'total': len(v)}
|
||||||
|
|
||||||
|
pg = int(pg) if pg else 1
|
||||||
|
|
||||||
|
if tid.startswith('http'):
|
||||||
|
base_url = tid.rstrip('/')
|
||||||
|
else:
|
||||||
|
path = tid if tid.startswith('/') else f"/{tid}"
|
||||||
|
base_url = f"{self.host}{path}".rstrip('/')
|
||||||
|
|
||||||
|
if pg == 1:
|
||||||
|
url = f"{base_url}/"
|
||||||
|
else:
|
||||||
|
url = f"{base_url}/{pg}/"
|
||||||
|
|
||||||
|
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
|
||||||
|
if response.status_code != 200: return {'list': [], 'page': pg, 'pagecount': 9999, 'limit': 90, 'total': 0}
|
||||||
|
|
||||||
|
data = self.getpq(response.text)
|
||||||
|
videos = self.getlist(data('#archive article, #index article, article'), tid)
|
||||||
|
|
||||||
|
return {'list': videos, 'page': pg, 'pagecount': 9999, 'limit': 90, 'total': 999999}
|
||||||
|
except Exception as e:
|
||||||
|
return {'list': [], 'page': pg, 'pagecount': 9999, 'limit': 90, 'total': 0}
|
||||||
|
|
||||||
|
def detailContent(self, ids):
|
||||||
|
try:
|
||||||
|
url = ids[0] if ids[0].startswith('http') else f"{self.host}{ids[0]}"
|
||||||
|
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
|
||||||
|
data = self.getpq(response.text)
|
||||||
|
|
||||||
|
plist = []
|
||||||
|
used_names = set()
|
||||||
|
if data('.dplayer'):
|
||||||
|
for c, k in enumerate(data('.dplayer').items(), start=1):
|
||||||
|
try:
|
||||||
|
config_attr = k.attr('data-config')
|
||||||
|
if config_attr:
|
||||||
|
config = json.loads(config_attr)
|
||||||
|
video_url = config.get('video', {}).get('url', '')
|
||||||
|
|
||||||
|
if video_url:
|
||||||
|
ep_name = ''
|
||||||
|
parent = k.parents().eq(0)
|
||||||
|
for _ in range(4):
|
||||||
|
if not parent: break
|
||||||
|
heading = parent.find('h2, h3, h4').eq(0).text().strip()
|
||||||
|
if heading:
|
||||||
|
ep_name = heading
|
||||||
|
break
|
||||||
|
parent = parent.parents().eq(0)
|
||||||
|
|
||||||
|
base_name = ep_name if ep_name else f"视频{c}"
|
||||||
|
name = base_name
|
||||||
|
count = 2
|
||||||
|
while name in used_names:
|
||||||
|
name = f"{base_name} {count}"
|
||||||
|
count += 1
|
||||||
|
used_names.add(name)
|
||||||
|
|
||||||
|
plist.append(f"{name}${video_url}")
|
||||||
|
except: continue
|
||||||
|
|
||||||
|
if not plist:
|
||||||
|
content_area = data('.post-content, article')
|
||||||
|
for i, link in enumerate(content_area('a').items(), start=1):
|
||||||
|
link_text = link.text().strip()
|
||||||
|
link_href = link.attr('href')
|
||||||
|
|
||||||
|
if link_href and any(kw in link_text for kw in ['点击观看', '观看', '播放', '视频', '第一弹', '第二弹', '第三弹', '第四弹', '第五弹', '第六弹', '第七弹', '第八弹', '第九弹', '第十弹']):
|
||||||
|
ep_name = link_text.replace('点击观看:', '').replace('点击观看', '').strip()
|
||||||
|
if not ep_name: ep_name = f"视频{i}"
|
||||||
|
|
||||||
|
if not link_href.startswith('http'):
|
||||||
|
link_href = f"{self.host}{link_href}" if link_href.startswith('/') else f"{self.host}/{link_href}"
|
||||||
|
|
||||||
|
plist.append(f"{ep_name}${link_href}")
|
||||||
|
|
||||||
|
play_url = '#'.join(plist) if plist else f"未找到视频源${url}"
|
||||||
|
|
||||||
|
vod_content = ''
|
||||||
|
try:
|
||||||
|
tags = []
|
||||||
|
seen_names = set()
|
||||||
|
seen_ids = set()
|
||||||
|
|
||||||
|
tag_links = data('.tags a, .keywords a, .post-tags a')
|
||||||
|
|
||||||
|
candidates = []
|
||||||
|
for k in tag_links.items():
|
||||||
|
title = k.text().strip()
|
||||||
|
href = k.attr('href')
|
||||||
|
if title and href:
|
||||||
|
candidates.append({'name': title, 'id': href})
|
||||||
|
|
||||||
|
candidates.sort(key=lambda x: len(x['name']), reverse=True)
|
||||||
|
|
||||||
|
for item in candidates:
|
||||||
|
name = item['name']
|
||||||
|
id_ = item['id']
|
||||||
|
|
||||||
|
if id_ in seen_ids: continue
|
||||||
|
|
||||||
|
is_duplicate = False
|
||||||
|
for seen in seen_names:
|
||||||
|
if name in seen:
|
||||||
|
is_duplicate = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not is_duplicate:
|
||||||
|
target = json.dumps({'id': id_, 'name': name})
|
||||||
|
tags.append(f'[a=cr:{target}/]{name}[/a]')
|
||||||
|
seen_names.add(name)
|
||||||
|
seen_ids.add(id_)
|
||||||
|
|
||||||
|
if tags:
|
||||||
|
vod_content = ' '.join(tags)
|
||||||
|
else:
|
||||||
|
vod_content = data('.post-title').text()
|
||||||
|
except Exception:
|
||||||
|
vod_content = '获取标签失败'
|
||||||
|
|
||||||
|
if not vod_content:
|
||||||
|
vod_content = data('h1').text() or '每日大乱斗'
|
||||||
|
|
||||||
|
return {'list': [{'vod_play_from': '每日大乱斗', 'vod_play_url': play_url, 'vod_content': vod_content}]}
|
||||||
|
except:
|
||||||
|
return {'list': [{'vod_play_from': '每日大乱斗', 'vod_play_url': '获取失败'}]}
|
||||||
|
|
||||||
|
def searchContent(self, key, quick, pg="1"):
|
||||||
|
try:
|
||||||
|
pg = int(pg) if pg else 1
|
||||||
|
|
||||||
|
if pg == 1:
|
||||||
|
url = f"{self.host}/search/{key}/"
|
||||||
|
else:
|
||||||
|
url = f"{self.host}/search/{key}/{pg}/"
|
||||||
|
|
||||||
|
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
|
||||||
|
return {'list': self.getlist(self.getpq(response.text)('article')), 'page': pg, 'pagecount': 9999}
|
||||||
|
except:
|
||||||
|
return {'list': [], 'page': pg, 'pagecount': 9999}
|
||||||
|
|
||||||
|
def playerContent(self, flag, id, vipFlags):
|
||||||
|
parse = 0 if self.isVideoFormat(id) else 1
|
||||||
|
url = self.proxy(id) if '.m3u8' in id else id
|
||||||
|
return {'parse': parse, 'url': url, 'header': self.headers}
|
||||||
|
|
||||||
|
def localProxy(self, param):
|
||||||
|
try:
|
||||||
|
type_ = param.get('type')
|
||||||
|
url = param.get('url')
|
||||||
|
if type_ == 'cache':
|
||||||
|
key = param.get('key')
|
||||||
|
if content := img_cache.get(key):
|
||||||
|
return [200, 'image/jpeg', content]
|
||||||
|
return [404, 'text/plain', b'Expired']
|
||||||
|
elif type_ == 'img':
|
||||||
|
real_url = self.d64(url) if not url.startswith('http') else url
|
||||||
|
res = requests.get(real_url, headers=self.headers, proxies=self.proxies, timeout=10)
|
||||||
|
content = self.aesimg(res.content)
|
||||||
|
return [200, 'image/jpeg', content]
|
||||||
|
elif type_ == 'm3u8':
|
||||||
|
return self.m3Proxy(url)
|
||||||
|
else:
|
||||||
|
return self.tsProxy(url)
|
||||||
|
except:
|
||||||
|
return [404, 'text/plain', b'']
|
||||||
|
|
||||||
|
def proxy(self, data, type='m3u8'):
|
||||||
|
if data and self.proxies: return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}"
|
||||||
|
return data
|
||||||
|
|
||||||
|
def m3Proxy(self, url):
|
||||||
|
url = self.d64(url)
|
||||||
|
res = requests.get(url, headers=self.headers, proxies=self.proxies)
|
||||||
|
data = res.text
|
||||||
|
base = res.url.rsplit('/', 1)[0]
|
||||||
|
lines = []
|
||||||
|
for line in data.split('\n'):
|
||||||
|
if '#EXT' not in line and line.strip():
|
||||||
|
if not line.startswith('http'):
|
||||||
|
line = f"{base}/{line}"
|
||||||
|
lines.append(self.proxy(line, 'ts'))
|
||||||
|
else:
|
||||||
|
lines.append(line)
|
||||||
|
return [200, "application/vnd.apple.mpegurl", '\n'.join(lines)]
|
||||||
|
|
||||||
|
def tsProxy(self, url):
|
||||||
|
return [200, 'video/mp2t', requests.get(self.d64(url), headers=self.headers, proxies=self.proxies).content]
|
||||||
|
|
||||||
|
def e64(self, text):
|
||||||
|
return b64encode(str(text).encode()).decode()
|
||||||
|
|
||||||
|
def d64(self, text):
|
||||||
|
return b64decode(str(text).encode()).decode()
|
||||||
|
|
||||||
|
def aesimg(self, data):
|
||||||
|
if len(data) < 16: return data
|
||||||
|
keys = [(b'f5d965df75336270', b'97b60394abc2fbe1'), (b'75336270f5d965df', b'abc2fbe197b60394')]
|
||||||
|
for k, v in keys:
|
||||||
|
try:
|
||||||
|
dec = unpad(AES.new(k, AES.MODE_CBC, v).decrypt(data), 16)
|
||||||
|
if dec.startswith(b'\xff\xd8') or dec.startswith(b'\x89PNG'): return dec
|
||||||
|
except: pass
|
||||||
|
try:
|
||||||
|
dec = unpad(AES.new(k, AES.MODE_ECB).decrypt(data), 16)
|
||||||
|
if dec.startswith(b'\xff\xd8'): return dec
|
||||||
|
except: pass
|
||||||
|
return data
|
||||||
|
|
||||||
|
def getlist(self, data, tid=''):
|
||||||
|
videos = []
|
||||||
|
is_folder = '/mrdg' in (tid or '')
|
||||||
|
for k in data.items():
|
||||||
|
card_html = k.outer_html() if hasattr(k, 'outer_html') else str(k)
|
||||||
|
a = k if k.is_('a') else k('a').eq(0)
|
||||||
|
href = a.attr('href')
|
||||||
|
title = k('h2').text() or k('.entry-title').text() or k('.post-title').text()
|
||||||
|
if not title and k.is_('a'): title = k.text()
|
||||||
|
|
||||||
|
if href and title:
|
||||||
|
img = self.getimg(k('script').text(), k, card_html)
|
||||||
|
videos.append({
|
||||||
|
'vod_id': f"{href}{'@folder' if is_folder else ''}",
|
||||||
|
'vod_name': title.strip(),
|
||||||
|
'vod_pic': img,
|
||||||
|
'vod_remarks': k('time').text() or '',
|
||||||
|
'vod_tag': 'folder' if is_folder else '',
|
||||||
|
'style': {"type": "rect", "ratio": 1.33}
|
||||||
|
})
|
||||||
|
return videos
|
||||||
|
|
||||||
|
def getfod(self, id):
|
||||||
|
url = f"{self.host}{id}"
|
||||||
|
data = self.getpq(requests.get(url, headers=self.headers, proxies=self.proxies).text)
|
||||||
|
videos = []
|
||||||
|
for i, h2 in enumerate(data('.post-content h2').items()):
|
||||||
|
p_txt = data('.post-content p').eq(i * 2)
|
||||||
|
p_img = data('.post-content p').eq(i * 2 + 1)
|
||||||
|
p_html = p_img.outer_html() if hasattr(p_img, 'outer_html') else str(p_img)
|
||||||
|
videos.append({
|
||||||
|
'vod_id': p_txt('a').attr('href'),
|
||||||
|
'vod_name': p_txt.text().strip(),
|
||||||
|
'vod_pic': self.getimg('', p_img, p_html),
|
||||||
|
'vod_remarks': h2.text().strip()
|
||||||
|
})
|
||||||
|
return videos
|
||||||
|
|
||||||
|
def getimg(self, text, elem=None, html_content=None):
|
||||||
|
if m := re.search(r"loadBannerDirect\('([^']+)'", text or ''):
|
||||||
|
return self._proc_url(m.group(1))
|
||||||
|
|
||||||
|
if html_content is None and elem is not None:
|
||||||
|
html_content = elem.outer_html() if hasattr(elem, 'outer_html') else str(elem)
|
||||||
|
if not html_content: return ''
|
||||||
|
|
||||||
|
html_content = html_content.replace('"', '"').replace(''', "'").replace('&', '&')
|
||||||
|
|
||||||
|
if 'data:image' in html_content:
|
||||||
|
m = re.search(r'(data:image/[a-zA-Z0-9+/=;,]+)', html_content)
|
||||||
|
if m: return self._proc_url(m.group(1))
|
||||||
|
|
||||||
|
m = re.search(r'(https?://[^"\'\s)]+\.(?:jpg|png|jpeg|webp))', html_content, re.I)
|
||||||
|
if m: return self._proc_url(m.group(1))
|
||||||
|
|
||||||
|
if 'url(' in html_content:
|
||||||
|
m = re.search(r'url\s*\(\s*[\'"]?([^"\'\)]+)[\'"]?\s*\)', html_content, re.I)
|
||||||
|
if m: return self._proc_url(m.group(1))
|
||||||
|
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def _proc_url(self, url):
|
||||||
|
if not url: return ''
|
||||||
|
url = url.strip('\'" ')
|
||||||
|
if url.startswith('data:'):
|
||||||
|
try:
|
||||||
|
_, b64_str = url.split(',', 1)
|
||||||
|
raw = b64decode(b64_str)
|
||||||
|
if not (raw.startswith(b'\xff\xd8') or raw.startswith(b'\x89PNG') or raw.startswith(b'GIF8')):
|
||||||
|
raw = self.aesimg(raw)
|
||||||
|
key = hashlib.md5(raw).hexdigest()
|
||||||
|
img_cache[key] = raw
|
||||||
|
return f"{self.getProxyUrl()}&type=cache&key={key}"
|
||||||
|
except: return ""
|
||||||
|
if not url.startswith('http'):
|
||||||
|
url = f"{self.host}{url}" if url.startswith('/') else f"{self.host}/{url}"
|
||||||
|
return f"{self.getProxyUrl()}&url={self.e64(url)}&type=img"
|
||||||
|
|
||||||
|
def getpq(self, data):
|
||||||
|
try: return pq(data)
|
||||||
|
except: return pq(data.encode('utf-8'))
|
||||||
114
lib/糖心次元.py
Normal file
114
lib/糖心次元.py
Normal file
|
|
@ -0,0 +1,114 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# @Author : Grok-4 Adapted & Optimized
|
||||||
|
# @Time : 2025/10/22
|
||||||
|
# @Note : 糖心次元极简爬虫(已修复转义 \/ 问题 & 韩国AV标题前缀)
|
||||||
|
|
||||||
|
import sys, urllib.parse, re, json
|
||||||
|
from lxml import etree
|
||||||
|
sys.path.append('..')
|
||||||
|
from base.spider import Spider
|
||||||
|
|
||||||
|
class Spider(Spider):
|
||||||
|
def getName(self):
|
||||||
|
return "糖心次元"
|
||||||
|
|
||||||
|
def init(self, extend):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def homeContent(self, filter):
|
||||||
|
cate = {"传媒系列":"1","AV系列":"2","麻豆传媒":"5","糖心传媒":"6","精东影业":"7","蜜桃传媒":"8","果冻传媒":"9","星空无限":"10","天美传媒":"11","抠抠传媒":"12","星杏吧传媒":"13","性视界传媒":"14","SA国际传媒":"15","其他传媒":"16","国产-自拍-偷拍":"17","探花-主播-网红":"18","日本-中文字幕":"19","日本-无码流出":"20","日本-高清有码":"21","日本-东京热":"22","动漫-番中字":"23","变态-暗网-同恋":"24","欧美高清无码":"25","韩国av":"27"}
|
||||||
|
return {'class': [{'type_name': k, 'type_id': v} for k, v in cate.items()]}
|
||||||
|
|
||||||
|
def homeVideoContent(self):
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# --------------- 通用解析 --------------- #
|
||||||
|
def _parse(self, rsp):
|
||||||
|
root = etree.HTML(rsp)
|
||||||
|
videos = root.xpath('//li[contains(@class,"mb15") and .//a[contains(@href,"/vod/play/")]]')
|
||||||
|
lst = []
|
||||||
|
for v in videos:
|
||||||
|
name = (v.xpath('.//h2/a/@title|.//h3/a/@title|.//p[contains(@class,"txt-ov")]/text()') or [''])[0].strip()
|
||||||
|
# >>> 去韩国AV前缀:kbj-23010421标题 -> 标题
|
||||||
|
name = re.sub(r'^[a-zA-Z]{2,}\-\d+\s*', '', name).strip()
|
||||||
|
img = (v.xpath('.//img/@src') or [''])[0]
|
||||||
|
if img and not img.startswith('http'):
|
||||||
|
img = ('https:' + img) if img.startswith('//') else 'https://img1.souavzy.org' + img
|
||||||
|
link = (v.xpath('.//a[contains(@href,"/vod/play/")]/@href') or [''])[0]
|
||||||
|
if link and not link.startswith('http'):
|
||||||
|
link = 'https://www.txsp.my' + link
|
||||||
|
lst.append({'vod_name': name or '未知标题', 'vod_pic': img, 'vod_remarks': (v.xpath('.//span[contains(@class,"ico-left")]/text()') or [''])[0].strip(), 'vod_id': link})
|
||||||
|
return lst
|
||||||
|
|
||||||
|
def categoryContent(self, tid, pg, filter, extend):
|
||||||
|
url = f'https://www.txsp.my/index.php/vod/type/id/{tid}.html' if pg == '1' else f'https://www.txsp.my/index.php/vod/type/id/{tid}/page/{pg}.html'
|
||||||
|
try:
|
||||||
|
rsp = self.fetch(url).text
|
||||||
|
lst = self._parse(rsp)
|
||||||
|
pages = max([int(n) for n in re.findall(r'/page/(\d+)', rsp)] or [1])
|
||||||
|
return {'list': lst, 'page': int(pg), 'pagecount': pages, 'limit': len(lst), 'total': 999999}
|
||||||
|
except Exception as e:
|
||||||
|
return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0}
|
||||||
|
|
||||||
|
def detailContent(self, array):
|
||||||
|
tid = array[0]
|
||||||
|
url = tid if tid.startswith('http') else 'https://www.txsp.my' + tid
|
||||||
|
try:
|
||||||
|
rsp = self.fetch(url).text
|
||||||
|
root = etree.HTML(rsp)
|
||||||
|
title = (root.xpath('//h1/text()') or ['未知标题'])[0].strip()
|
||||||
|
pic = (root.xpath('//meta[@property="og:image"]/@content|//img[contains(@src,"upload/vod")]/@src') or [''])[0]
|
||||||
|
if pic and not pic.startswith('http'):
|
||||||
|
pic = ('https:' + pic) if pic.startswith('//') else 'https://img1.souavzy.org' + pic
|
||||||
|
play_url = self._extract(rsp)
|
||||||
|
return {'list': [{'vod_id': tid, 'vod_name': title, 'vod_pic': pic, 'vod_content': title, 'vod_play_from': '糖心次元', 'vod_play_url': '播放$' + play_url if play_url else '播放$暂无播放地址'}]}
|
||||||
|
except Exception as e:
|
||||||
|
return {'list': []}
|
||||||
|
|
||||||
|
def _extract(self, html):
|
||||||
|
html = html.replace(r'\/', '/') # 关键修复
|
||||||
|
for pat in [r'var player_aaaa\s*=\s*({[^}]+})', r'player_aaaa\s*=\s*({[^}]+})', r'var player_data\s*=\s*({[^}]+})']:
|
||||||
|
m = re.search(pat, html)
|
||||||
|
if m:
|
||||||
|
try:
|
||||||
|
url = json.loads(m.group(1))['url']
|
||||||
|
if url: return url
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
src = re.search(r'<iframe[^>]+src="([^"]+souavzy[^"]+)"', html, re.I)
|
||||||
|
if src:
|
||||||
|
m3 = re.search(r'url=([^&]+)', src.group(1))
|
||||||
|
if m3: return urllib.parse.unquote(m3.group(1))
|
||||||
|
for url in re.findall(r'"(https?://[^"]+\.m3u8[^"]*)"', html):
|
||||||
|
if 'souavzy' in url or 'qrtuv' in url: return url
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def searchContent(self, key, quick, pg="1"):
|
||||||
|
url = f'https://www.txsp.my/index.php/vod/search/page/{pg}/wd/{urllib.parse.quote(key)}.html'
|
||||||
|
try:
|
||||||
|
return {'list': self._parse(self.fetch(url).text), 'page': int(pg), 'pagecount': 999, 'limit': 999, 'total': 999999}
|
||||||
|
except:
|
||||||
|
return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0}
|
||||||
|
|
||||||
|
def playerContent(self, flag, id, vipFlags):
|
||||||
|
if flag != "糖心次元":
|
||||||
|
return {}
|
||||||
|
if id.startswith('http') and ('.m3u8' in id or 'souavzy' in id):
|
||||||
|
return {"parse": 0, "playUrl": '', "url": id, "header": {"User-Agent": "Mozilla/5.0", "Referer": "https://www.txsp.my/", "Origin": "https://www.txsp.my"}}
|
||||||
|
try:
|
||||||
|
url = id if id.startswith('http') else 'https://www.txsp.my' + id
|
||||||
|
play_url = self._extract(self.fetch(url).text)
|
||||||
|
if play_url:
|
||||||
|
return {"parse": 0, "playUrl": '', "url": play_url, "header": {"User-Agent": "Mozilla/5.0", "Referer": "https://www.txsp.my/", "Origin": "https://www.txsp.my"}}
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return {"parse": 1, "playUrl": '', "url": id, "header": {"User-Agent": "Mozilla/5.0", "Referer": "https://www.txsp.my/", "Origin": "https://www.txsp.my"}}
|
||||||
|
|
||||||
|
def isVideoFormat(self, url):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def manualVideoCheck(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def localProxy(self, param):
|
||||||
|
pass
|
||||||
144
lib/随机小姐姐.py
Normal file
144
lib/随机小姐姐.py
Normal file
|
|
@ -0,0 +1,144 @@
|
||||||
|
# coding=utf-8
|
||||||
|
# !/usr/bin/python
|
||||||
|
import sys
|
||||||
|
import requests
|
||||||
|
import datetime
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import re
|
||||||
|
import base64
|
||||||
|
from base.spider import Spider
|
||||||
|
import json
|
||||||
|
|
||||||
|
sys.path.append('..')
|
||||||
|
xurl = "http://xjj2.716888.xyz"
|
||||||
|
headerx = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36',
|
||||||
|
'Cookie':'mk_encrypt_c21f969b5f03d33d43e04f8f136e7682=390e11f0d5ae13b2787e6a72db11527f'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Spider(Spider):
|
||||||
|
global xurl
|
||||||
|
global headerx
|
||||||
|
|
||||||
|
def getName(self):
|
||||||
|
return "首页"
|
||||||
|
|
||||||
|
def init(self, extend):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def isVideoFormat(self, url):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def manualVideoCheck(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def homeContent(self, filter):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def homeVideoContent(self):
|
||||||
|
id = ['4k/4k.php', 'djxjj/dj1.php', 'zj/jipinyz/jipinyz.php', 'zj/xuejie/xuejie.php', 'zj/kawayi/kawayi.php',
|
||||||
|
'zj/nennen/nennen.php', 'zj/heji1/heji1.php', 'zj/sihuawd/sihuawd.php', 'zj/wanmeisc/wanmeisc.php',
|
||||||
|
'zj/manyao/manyao.php', 'zj/sihuadd/sihuadd.php', 'zj/qingchun/qingchun.php', 'zj/cos/cos.php',
|
||||||
|
'zj/jingpinbz/jingpinbz.php', 'zj/jipinll/jipinll.php', 'zj/nideym/nideym.php', 'zj/tianmei/tianmei.php',
|
||||||
|
'zj/yusi/yusi.php', 'zj/shuaige/shuaige.php', 'zj/rewu/rewu.php', 'zj/jingpinsc/jingpinsc.php']
|
||||||
|
name = ['随机', 'DJ姐姐', '极品钰足', '学姐系列', '卡哇伊', '嫩嫩系列', '美女舞蹈', '丝滑舞蹈', '完美身材',
|
||||||
|
'慢摇系列', '丝滑吊带', '清纯系列', 'COS系列', '精品变装', '极品罗丽', '你的裕梦', '甜妹系列',
|
||||||
|
'御丝系列', '帅哥哥', '热舞系列', '精品收藏']
|
||||||
|
pic = ['https://img0.baidu.com/it/u=2236794495,926227820&fm=253&fmt=auto&app=138&f=JPEG?w=1091&h=500',
|
||||||
|
'https://pic.rmb.bdstatic.com/mvideo/e17d86ce4489a02870ace9a25a804c3e',
|
||||||
|
'https://img1.baidu.com/it/u=4087009209,613234683&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=364',
|
||||||
|
'https://img1.baidu.com/it/u=2347706654,3055017263&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=750',
|
||||||
|
'https://img2.baidu.com/it/u=3715511725,1094436549&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=1083',
|
||||||
|
'https://img2.baidu.com/it/u=2560410906,3760952489&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=750',
|
||||||
|
'https://img0.baidu.com/it/u=4119328645,2294770712&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=750',
|
||||||
|
'https://img1.baidu.com/it/u=3167365498,4156845177&fm=253&fmt=auto&app=120&f=JPEG?w=355&h=631',
|
||||||
|
'https://img2.baidu.com/it/u=2214691242,2295609938&fm=253&fmt=auto&app=120&f=JPEG?w=800&h=973',
|
||||||
|
'https://img1.baidu.com/it/u=3930123826,1131807820&fm=253&fmt=auto&app=138&f=JPEG?w=889&h=500',
|
||||||
|
'https://img2.baidu.com/it/u=3998619741,1128428746&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=594',
|
||||||
|
'https://img2.baidu.com/it/u=1507871502,2316279678&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=768',
|
||||||
|
'https://img0.baidu.com/it/u=2245878765,4037513957&fm=253&fmt=auto&app=138&f=JPEG?w=617&h=411',
|
||||||
|
'https://img1.baidu.com/it/u=3623293272,829752126&fm=253&fmt=auto&app=138&f=JPEG?w=285&h=285',
|
||||||
|
'https://img2.baidu.com/it/u=1922261112,3647796435&fm=253&fmt=auto&app=120&f=JPEG?w=500&h=542',
|
||||||
|
'https://img1.baidu.com/it/u=3970043028,2042301564&fm=253&fmt=auto&app=120&f=JPEG?w=500&h=889',
|
||||||
|
'https://img2.baidu.com/it/u=3229384329,3046902124&fm=253&fmt=auto&app=120&f=JPEG?w=800&h=800',
|
||||||
|
'https://img1.baidu.com/it/u=3113661564,2558849413&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=500',
|
||||||
|
'https://img1.baidu.com/it/u=2361496550,3302335162&fm=253&fmt=auto&app=138&f=JPEG?w=333&h=500',
|
||||||
|
'https://img1.baidu.com/it/u=270105183,1595166255&fm=253&fmt=auto&app=120&f=JPEG?w=800&h=500',
|
||||||
|
'https://img1.baidu.com/it/u=4071105902,825241031&fm=253&fmt=auto&app=138&f=JPEG?w=235&h=340']
|
||||||
|
list_length = len(id)
|
||||||
|
videos = []
|
||||||
|
for i in range(list_length):
|
||||||
|
print(id[i])
|
||||||
|
video = {
|
||||||
|
"vod_id": id[i],
|
||||||
|
"vod_name": name[i],
|
||||||
|
"vod_pic": pic[i],
|
||||||
|
"vod_remarks": '播放20个',
|
||||||
|
}
|
||||||
|
videos.append(video)
|
||||||
|
|
||||||
|
result = {'list': videos}
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def categoryContent(self, cid, pg, filter, ext):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def detailContent(self, ids):
|
||||||
|
videos = []
|
||||||
|
result = {}
|
||||||
|
did = ids[0]
|
||||||
|
for i in range(1, 21):
|
||||||
|
playurl = ""
|
||||||
|
for j in range(1, i + 1):
|
||||||
|
playurl += f"{j}$/fenlei/{did}#"
|
||||||
|
playurl = playurl[:-1]
|
||||||
|
|
||||||
|
videos.append({
|
||||||
|
"vod_id": '',
|
||||||
|
"vod_name": '',
|
||||||
|
"vod_pic": "",
|
||||||
|
"type_name": '',
|
||||||
|
"vod_year": "",
|
||||||
|
"vod_area": "",
|
||||||
|
"vod_remarks": "",
|
||||||
|
"vod_actor": "",
|
||||||
|
"vod_director": "",
|
||||||
|
"vod_content": "",
|
||||||
|
"vod_play_from": "GK推荐",
|
||||||
|
"vod_play_url": playurl
|
||||||
|
})
|
||||||
|
|
||||||
|
result['list'] = videos
|
||||||
|
return result
|
||||||
|
|
||||||
|
def playerContent(self, flag, id, vipFlags):
|
||||||
|
result = {}
|
||||||
|
response = requests.get(url=xurl + id, headers=headerx, allow_redirects=False)
|
||||||
|
|
||||||
|
location_header = response.headers.get('Location')
|
||||||
|
if 'http' in location_header:
|
||||||
|
purl = location_header
|
||||||
|
else:
|
||||||
|
purl = 'http:' + location_header
|
||||||
|
result["parse"] = 0
|
||||||
|
result["playUrl"] = ''
|
||||||
|
result["url"] = purl
|
||||||
|
result["header"] = headerx
|
||||||
|
return result
|
||||||
|
|
||||||
|
def searchContentPage(self, key, quick, page):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def searchContent(self, key, quick):
|
||||||
|
return self.searchContentPage(key, quick, '1')
|
||||||
|
|
||||||
|
def localProxy(self, params):
|
||||||
|
if params['type'] == "m3u8":
|
||||||
|
return self.proxyM3u8(params)
|
||||||
|
elif params['type'] == "media":
|
||||||
|
return self.proxyMedia(params)
|
||||||
|
elif params['type'] == "ts":
|
||||||
|
return self.proxyTs(params)
|
||||||
|
return None
|
||||||
669
lib/香蕉.py
Normal file
669
lib/香蕉.py
Normal file
|
|
@ -0,0 +1,669 @@
|
||||||
|
# coding=utf-8
|
||||||
|
#!/usr/bin/python
|
||||||
|
import sys
|
||||||
|
sys.path.append('..')
|
||||||
|
from base.spider import Spider
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import urllib.parse
|
||||||
|
import re
|
||||||
|
import requests
|
||||||
|
from lxml import etree
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
class Spider(Spider):
|
||||||
|
|
||||||
|
def getName(self):
|
||||||
|
return "苹果视频"
|
||||||
|
|
||||||
|
def init(self, extend=""):
|
||||||
|
self.host = "https://618041.xyz"
|
||||||
|
self.api_host = "https://h5.xxoo168.org"
|
||||||
|
self.headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||||
|
'Accept-Encoding': 'gzip, deflate',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Referer': self.host
|
||||||
|
}
|
||||||
|
# 定义特殊分区ID列表,包含所有需要特殊处理的分类
|
||||||
|
self.special_categories = ['13', '14', '33', '53', '32', '52', '9']
|
||||||
|
self.log(f"苹果视频爬虫初始化完成,主站: {self.host}")
|
||||||
|
|
||||||
|
def html(self, content):
|
||||||
|
"""将HTML内容转换为可查询的对象"""
|
||||||
|
try:
|
||||||
|
return etree.HTML(content)
|
||||||
|
except:
|
||||||
|
self.log("HTML解析失败")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def regStr(self, pattern, string, index=1):
|
||||||
|
"""正则表达式提取字符串"""
|
||||||
|
try:
|
||||||
|
match = re.search(pattern, string, re.IGNORECASE)
|
||||||
|
if match and len(match.groups()) >= index:
|
||||||
|
return match.group(index)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def isVideoFormat(self, url):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def manualVideoCheck(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def homeContent(self, filter):
|
||||||
|
"""获取首页内容和分类"""
|
||||||
|
result = {}
|
||||||
|
# 只保留指定的分类
|
||||||
|
classes = [
|
||||||
|
{'type_id': '618041.xyz_1', 'type_name': '全部视频'},
|
||||||
|
{'type_id': '618041.xyz_13', 'type_name': '香蕉精品'},
|
||||||
|
{'type_id': '618041.xyz_22', 'type_name': '制服诱惑'},
|
||||||
|
{'type_id': '618041.xyz_6', 'type_name': '国产视频'},
|
||||||
|
{'type_id': '618041.xyz_8', 'type_name': '清纯少女'},
|
||||||
|
{'type_id': '618041.xyz_9', 'type_name': '辣妹大奶'},
|
||||||
|
{'type_id': '618041.xyz_10', 'type_name': '女同专属'},
|
||||||
|
{'type_id': '618041.xyz_11', 'type_name': '素人出演'},
|
||||||
|
{'type_id': '618041.xyz_12', 'type_name': '角色扮演'},
|
||||||
|
{'type_id': '618041.xyz_20', 'type_name': '人妻熟女'},
|
||||||
|
{'type_id': '618041.xyz_23', 'type_name': '日韩剧情'},
|
||||||
|
{'type_id': '618041.xyz_21', 'type_name': '经典伦理'},
|
||||||
|
{'type_id': '618041.xyz_7', 'type_name': '成人动漫'},
|
||||||
|
{'type_id': '618041.xyz_14', 'type_name': '精品二区'},
|
||||||
|
{'type_id': '618041.xyz_53', 'type_name': '动漫中字'},
|
||||||
|
{'type_id': '618041.xyz_52', 'type_name': '日本无码'},
|
||||||
|
{'type_id': '618041.xyz_33', 'type_name': '中文字幕'},
|
||||||
|
{'type_id': '618041.xyz_32', 'type_name': '国产自拍'}
|
||||||
|
]
|
||||||
|
result['class'] = classes
|
||||||
|
try:
|
||||||
|
rsp = self.fetch(self.host, headers=self.headers)
|
||||||
|
doc = self.html(rsp.text)
|
||||||
|
videos = self._get_videos(doc, limit=20)
|
||||||
|
result['list'] = videos
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"首页获取出错: {str(e)}")
|
||||||
|
result['list'] = []
|
||||||
|
return result
|
||||||
|
|
||||||
|
def homeVideoContent(self):
|
||||||
|
"""分类定义 - 兼容性方法"""
|
||||||
|
return {
|
||||||
|
'class': [
|
||||||
|
{'type_id': '618041.xyz_1', 'type_name': '全部视频'},
|
||||||
|
{'type_id': '618041.xyz_13', 'type_name': '香蕉精品'},
|
||||||
|
{'type_id': '618041.xyz_22', 'type_name': '制服诱惑'},
|
||||||
|
{'type_id': '618041.xyz_6', 'type_name': '国产视频'},
|
||||||
|
{'type_id': '618041.xyz_8', 'type_name': '清纯少女'},
|
||||||
|
{'type_id': '618041.xyz_9', 'type_name': '辣妹大奶'},
|
||||||
|
{'type_id': '618041.xyz_10', 'type_name': '女同专属'},
|
||||||
|
{'type_id': '618041.xyz_11', 'type_name': '素人出演'},
|
||||||
|
{'type_id': '618041.xyz_12', 'type_name': '角色扮演'},
|
||||||
|
{'type_id': '618041.xyz_20', 'type_name': '人妻熟女'},
|
||||||
|
{'type_id': '618041.xyz_23', 'type_name': '日韩剧情'},
|
||||||
|
{'type_id': '618041.xyz_21', 'type_name': '经典伦理'},
|
||||||
|
{'type_id': '618041.xyz_7', 'type_name': '成人动漫'},
|
||||||
|
{'type_id': '618041.xyz_14', 'type_name': '精品二区'},
|
||||||
|
{'type_id': '618041.xyz_53', 'type_name': '动漫中字'},
|
||||||
|
{'type_id': '618041.xyz_52', 'type_name': '日本无码'},
|
||||||
|
{'type_id': '618041.xyz_33', 'type_name': '中文字幕'},
|
||||||
|
{'type_id': '618041.xyz_32', 'type_name': '国产自拍'}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
def categoryContent(self, tid, pg, filter, extend):
|
||||||
|
"""分类内容 - 修改为使用固定页数设置"""
|
||||||
|
try:
|
||||||
|
domain, type_id = tid.split('_')
|
||||||
|
url = f"https://{domain}/index.php/vod/type/id/{type_id}.html"
|
||||||
|
if pg and pg != '1':
|
||||||
|
url = url.replace('.html', f'/page/{pg}.html')
|
||||||
|
self.log(f"访问分类URL: {url}")
|
||||||
|
rsp = self.fetch(url, headers=self.headers)
|
||||||
|
doc = self.html(rsp.text)
|
||||||
|
# 在这里将 type_id 传递给 _get_videos 方法
|
||||||
|
videos = self._get_videos(doc, category_id=type_id, limit=20)
|
||||||
|
|
||||||
|
# 使用固定页数设置,而不是尝试从页面解析
|
||||||
|
pagecount = 999
|
||||||
|
total = 19980
|
||||||
|
|
||||||
|
return {
|
||||||
|
'list': videos,
|
||||||
|
'page': int(pg),
|
||||||
|
'pagecount': pagecount,
|
||||||
|
'limit': 20,
|
||||||
|
'total': total
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"分类内容获取出错: {str(e)}")
|
||||||
|
return {'list': []}
|
||||||
|
|
||||||
|
def searchContent(self, key, quick, pg="1"):
|
||||||
|
"""搜索功能 - 完全修复版"""
|
||||||
|
try:
|
||||||
|
# 构造搜索URL
|
||||||
|
search_url = f"{self.host}/index.php/vod/type/id/1/wd/{urllib.parse.quote(key)}/page/{pg}.html"
|
||||||
|
self.log(f"搜索URL: {search_url}")
|
||||||
|
|
||||||
|
# 发送请求
|
||||||
|
rsp = self.fetch(search_url, headers=self.headers)
|
||||||
|
if not rsp or rsp.status_code != 200:
|
||||||
|
self.log("搜索请求失败")
|
||||||
|
return {'list': []}
|
||||||
|
|
||||||
|
# 解析HTML
|
||||||
|
doc = self.html(rsp.text)
|
||||||
|
if not doc:
|
||||||
|
self.log("搜索页面解析失败")
|
||||||
|
return {'list': []}
|
||||||
|
|
||||||
|
# 提取搜索结果
|
||||||
|
videos = self._get_videos(doc, limit=20)
|
||||||
|
|
||||||
|
# 尝试从页面提取分页信息
|
||||||
|
pagecount = 5 # 默认值
|
||||||
|
total = 100 # 默认值
|
||||||
|
|
||||||
|
# 尝试从分页元素中提取真实的分页信息
|
||||||
|
page_elements = doc.xpath('//div[@class="mypage"]/a')
|
||||||
|
if page_elements and len(page_elements) > 0:
|
||||||
|
try:
|
||||||
|
# 查找尾页链接
|
||||||
|
last_page = None
|
||||||
|
for elem in page_elements:
|
||||||
|
href = elem.xpath('./@href')[0]
|
||||||
|
if '尾页' in elem.text or 'page/' in href:
|
||||||
|
last_page = href
|
||||||
|
break
|
||||||
|
|
||||||
|
if last_page:
|
||||||
|
# 从尾页URL中提取页码
|
||||||
|
page_match = re.search(r'/page/(\d+)\.html', last_page)
|
||||||
|
if page_match:
|
||||||
|
pagecount = int(page_match.group(1))
|
||||||
|
total = pagecount * 20 # 估算总数
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return {
|
||||||
|
'list': videos,
|
||||||
|
'page': int(pg),
|
||||||
|
'pagecount': pagecount,
|
||||||
|
'limit': 20,
|
||||||
|
'total': total
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"搜索出错: {str(e)}")
|
||||||
|
return {'list': []}
|
||||||
|
|
||||||
|
def detailContent(self, ids):
|
||||||
|
"""详情页面 - 特别处理特殊分区的链接"""
|
||||||
|
try:
|
||||||
|
vid = ids[0]
|
||||||
|
|
||||||
|
# 检查是否是特殊分区的链接
|
||||||
|
if vid.startswith('special_'):
|
||||||
|
# 解析特殊分区ID格式: special_{category_id}_{video_id}_{encoded_url}
|
||||||
|
parts = vid.split('_')
|
||||||
|
if len(parts) >= 4:
|
||||||
|
category_id = parts[1]
|
||||||
|
video_id = parts[2]
|
||||||
|
encoded_url = '_'.join(parts[3:])
|
||||||
|
play_url = urllib.parse.unquote(encoded_url)
|
||||||
|
|
||||||
|
self.log(f"特殊分区视频,直接使用链接: {play_url}")
|
||||||
|
|
||||||
|
# 从播放链接中提取视频URL
|
||||||
|
parsed_url = urllib.parse.urlparse(play_url)
|
||||||
|
query_params = urllib.parse.parse_qs(parsed_url.query)
|
||||||
|
video_url = query_params.get('v', [''])[0]
|
||||||
|
pic_url = query_params.get('b', [''])[0]
|
||||||
|
title_encrypted = query_params.get('m', [''])[0]
|
||||||
|
|
||||||
|
# 解码标题
|
||||||
|
title = self._decrypt_title(title_encrypted)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'list': [{
|
||||||
|
'vod_id': vid,
|
||||||
|
'vod_name': title,
|
||||||
|
'vod_pic': pic_url,
|
||||||
|
'vod_remarks': '',
|
||||||
|
'vod_year': '',
|
||||||
|
'vod_play_from': '直接播放',
|
||||||
|
'vod_play_url': f"第1集${play_url}"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
|
||||||
|
# 常规处理
|
||||||
|
if '_' in vid and len(vid.split('_')) > 2:
|
||||||
|
domain, category_id, video_id = vid.split('_')
|
||||||
|
else:
|
||||||
|
domain, video_id = vid.split('_')
|
||||||
|
|
||||||
|
detail_url = f"https://{domain}/index.php/vod/detail/id/{video_id}.html"
|
||||||
|
|
||||||
|
self.log(f"访问详情URL: {detail_url}")
|
||||||
|
rsp = self.fetch(detail_url, headers=self.headers)
|
||||||
|
doc = self.html(rsp.text)
|
||||||
|
video_info = self._get_detail(doc, rsp.text, vid)
|
||||||
|
return {'list': [video_info]} if video_info else {'list': []}
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"详情获取出错: {str(e)}")
|
||||||
|
return {'list': []}
|
||||||
|
|
||||||
|
def playerContent(self, flag, id, vipFlags):
|
||||||
|
"""播放链接 - 特别处理特殊分区的链接"""
|
||||||
|
try:
|
||||||
|
self.log(f"获取播放链接: flag={flag}, id={id}")
|
||||||
|
|
||||||
|
# 检查是否是特殊分区的链接
|
||||||
|
if id.startswith('special_'):
|
||||||
|
# 解析特殊分区ID格式: special_{category_id}_{video_id}_{encoded_url}
|
||||||
|
parts = id.split('_')
|
||||||
|
if len(parts) >= 4:
|
||||||
|
category_id = parts[1]
|
||||||
|
video_id = parts[2]
|
||||||
|
encoded_url = '_'.join(parts[3:])
|
||||||
|
play_url = urllib.parse.unquote(encoded_url)
|
||||||
|
|
||||||
|
self.log(f"特殊分区视频,直接使用链接: {play_url}")
|
||||||
|
|
||||||
|
# 从播放链接中提取视频URL
|
||||||
|
parsed_url = urllib.parse.urlparse(play_url)
|
||||||
|
query_params = urllib.parse.parse_qs(parsed_url.query)
|
||||||
|
video_url = query_params.get('v', [''])[0]
|
||||||
|
|
||||||
|
if video_url:
|
||||||
|
# 确保URL是完整的
|
||||||
|
if video_url.startswith('//'):
|
||||||
|
video_url = 'https:' + video_url
|
||||||
|
elif not video_url.startswith('http'):
|
||||||
|
video_url = urljoin(self.host, video_url)
|
||||||
|
|
||||||
|
self.log(f"从特殊链接中提取到视频地址: {video_url}")
|
||||||
|
return {'parse': 0, 'playUrl': '', 'url': video_url}
|
||||||
|
|
||||||
|
# 检查传入的ID是否为完整URL,如果是则直接解析
|
||||||
|
if id.startswith('http'):
|
||||||
|
self.log("ID 是一个完整URL,直接解析参数")
|
||||||
|
parsed_url = urllib.parse.urlparse(id)
|
||||||
|
query_params = urllib.parse.parse_qs(parsed_url.query)
|
||||||
|
|
||||||
|
# 尝试获取视频参数
|
||||||
|
video_url = query_params.get('v', [''])[0]
|
||||||
|
if not video_url:
|
||||||
|
# 尝试其他可能的参数名
|
||||||
|
for key in query_params:
|
||||||
|
if key in ['url', 'src', 'file']:
|
||||||
|
video_url = query_params[key][0]
|
||||||
|
break
|
||||||
|
|
||||||
|
if video_url:
|
||||||
|
# 解码可能的URL编码
|
||||||
|
video_url = urllib.parse.unquote(video_url)
|
||||||
|
# 确保URL是完整的
|
||||||
|
if video_url.startswith('//'):
|
||||||
|
video_url = 'https:' + video_url
|
||||||
|
elif not video_url.startswith('http'):
|
||||||
|
# 尝试添加基本域名
|
||||||
|
video_url = urljoin(self.host, video_url)
|
||||||
|
|
||||||
|
self.log(f"从 URL 参数中提取到视频地址: {video_url}")
|
||||||
|
return {'parse': 0, 'playUrl': '', 'url': video_url}
|
||||||
|
else:
|
||||||
|
self.log("URL 中没有找到视频参数,尝试从页面提取")
|
||||||
|
# 请求页面并提取视频链接
|
||||||
|
rsp = self.fetch(id, headers=self.headers)
|
||||||
|
if rsp and rsp.status_code == 200:
|
||||||
|
video_url = self._extract_direct_video_url(rsp.text)
|
||||||
|
if video_url:
|
||||||
|
self.log(f"从页面提取到视频地址: {video_url}")
|
||||||
|
return {'parse': 0, 'playUrl': '', 'url': video_url}
|
||||||
|
|
||||||
|
self.log("无法从页面提取视频链接,返回原始URL")
|
||||||
|
return {'parse': 1, 'playUrl': '', 'url': id}
|
||||||
|
|
||||||
|
# 从新的 id 格式中提取视频ID和分类ID
|
||||||
|
if id.count('_') >= 2:
|
||||||
|
parts = id.split('_')
|
||||||
|
video_id = parts[-1]
|
||||||
|
category_id = parts[1]
|
||||||
|
else:
|
||||||
|
video_id = id.split('_')[-1]
|
||||||
|
category_id = ''
|
||||||
|
|
||||||
|
self.log(f"视频ID: {video_id}, 分类ID: {category_id}")
|
||||||
|
|
||||||
|
# 对于特殊分类,使用直接解析播放页面的方式
|
||||||
|
if category_id in self.special_categories:
|
||||||
|
self.log("特殊分类,尝试从详情页提取直接播放链接")
|
||||||
|
# 构造播放页面URL
|
||||||
|
play_page_url = f"{self.host}/index.php/vod/play/id/{video_id}.html"
|
||||||
|
|
||||||
|
# 请求播放页面
|
||||||
|
rsp = self.fetch(play_page_url, headers=self.headers)
|
||||||
|
if rsp and rsp.status_code == 200:
|
||||||
|
# 从页面提取视频链接
|
||||||
|
video_url = self._extract_direct_video_url(rsp.text)
|
||||||
|
if video_url:
|
||||||
|
self.log(f"从播放页面提取到视频地址: {video_url}")
|
||||||
|
return {'parse': 0, 'playUrl': '', 'url': video_url}
|
||||||
|
|
||||||
|
# 如果提取失败,回退到API方式
|
||||||
|
self.log("从播放页面提取失败,尝试API方式")
|
||||||
|
return self._get_video_by_api(id, video_id)
|
||||||
|
else:
|
||||||
|
# 其他分类使用API方式
|
||||||
|
self.log("使用API方式获取视频地址")
|
||||||
|
return self._get_video_by_api(id, video_id)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"播放链接获取出错: {str(e)}")
|
||||||
|
if '_' in id:
|
||||||
|
domain, play_id = id.split('_')
|
||||||
|
play_url = f"https://{domain}/html/kkyd.html?m={play_id}"
|
||||||
|
else:
|
||||||
|
play_url = f"{self.host}/html/kkyd.html?m={id}"
|
||||||
|
return {'parse': 1, 'playUrl': '', 'url': play_url}
|
||||||
|
|
||||||
|
def _get_video_by_api(self, id, video_id):
|
||||||
|
"""通过API获取视频地址"""
|
||||||
|
try:
|
||||||
|
api_url = f"{self.api_host}/api/v2/vod/reqplay/{video_id}"
|
||||||
|
self.log(f"请求API获取视频地址: {api_url}")
|
||||||
|
|
||||||
|
api_headers = self.headers.copy()
|
||||||
|
api_headers.update({
|
||||||
|
'Referer': f"{self.host}/",
|
||||||
|
'Origin': self.host,
|
||||||
|
'X-Requested-With': 'XMLHttpRequest'
|
||||||
|
})
|
||||||
|
|
||||||
|
api_response = self.fetch(api_url, headers=api_headers)
|
||||||
|
if api_response and api_response.status_code == 200:
|
||||||
|
data = api_response.json()
|
||||||
|
self.log(f"API响应: {data}")
|
||||||
|
|
||||||
|
if data.get('retcode') == 3:
|
||||||
|
video_url = data.get('data', {}).get('httpurl_preview', '')
|
||||||
|
else:
|
||||||
|
video_url = data.get('data', {}).get('httpurl', '')
|
||||||
|
|
||||||
|
if video_url:
|
||||||
|
video_url = video_url.replace('?300', '')
|
||||||
|
self.log(f"从API获取到视频地址: {video_url}")
|
||||||
|
return {'parse': 0, 'playUrl': '', 'url': video_url}
|
||||||
|
else:
|
||||||
|
self.log("API响应中没有找到视频地址")
|
||||||
|
else:
|
||||||
|
self.log(f"API请求失败,状态码: {api_response.status_code if api_response else '无响应'}")
|
||||||
|
|
||||||
|
if '_' in id:
|
||||||
|
domain, play_id = id.split('_')
|
||||||
|
play_url = f"https://{domain}/html/kkyd.html?m={play_id}"
|
||||||
|
else:
|
||||||
|
play_url = f"{self.host}/html/kkyd.html?m={id}"
|
||||||
|
self.log(f"API请求失败,回退到播放页面: {play_url}")
|
||||||
|
return {'parse': 1, 'playUrl': '', 'url': play_url}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"API方式获取视频出错: {str(e)}")
|
||||||
|
if '_' in id:
|
||||||
|
domain, play_id = id.split('_')
|
||||||
|
play_url = f"https://{domain}/html/kkyd.html?m={play_id}"
|
||||||
|
else:
|
||||||
|
play_url = f"{self.host}/html/kkyd.html?m={id}"
|
||||||
|
return {'parse': 1, 'playUrl': '', 'url': play_url}
|
||||||
|
|
||||||
|
def _extract_direct_video_url(self, html_content):
|
||||||
|
"""从HTML内容中提取直接播放链接 (优化版)"""
|
||||||
|
try:
|
||||||
|
# 首先尝试提取明显的视频链接
|
||||||
|
patterns = [
|
||||||
|
r'v=([^&]+\.(?:m3u8|mp4))',
|
||||||
|
r'"url"\s*:\s*["\']([^"\']+\.(?:mp4|m3u8))["\']',
|
||||||
|
r'src\s*=\s*["\']([^"\']+\.(?:mp4|m3u8))["\']',
|
||||||
|
r'http[^\s<>"\'?]+\.(?:mp4|m3u8)'
|
||||||
|
]
|
||||||
|
|
||||||
|
for pattern in patterns:
|
||||||
|
matches = re.findall(pattern, html_content, re.IGNORECASE)
|
||||||
|
for match in matches:
|
||||||
|
if isinstance(match, tuple):
|
||||||
|
match = match[0]
|
||||||
|
extracted_url = match.replace('\\', '')
|
||||||
|
extracted_url = urllib.parse.unquote(extracted_url)
|
||||||
|
|
||||||
|
if extracted_url.startswith('//'):
|
||||||
|
extracted_url = 'https:' + extracted_url
|
||||||
|
elif extracted_url.startswith('http'):
|
||||||
|
return extracted_url
|
||||||
|
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"提取直接播放URL出错: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_videos(self, doc, category_id=None, limit=None):
|
||||||
|
"""获取影片列表 - 根据实际网站结构"""
|
||||||
|
try:
|
||||||
|
videos = []
|
||||||
|
elements = doc.xpath('//a[@class="vodbox"]')
|
||||||
|
self.log(f"找到 {len(elements)} 个vodbox元素")
|
||||||
|
for elem in elements:
|
||||||
|
video = self._extract_video(elem, category_id)
|
||||||
|
if video:
|
||||||
|
videos.append(video)
|
||||||
|
return videos[:limit] if limit and videos else videos
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"获取影片列表出错: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _extract_video(self, element, category_id=None):
|
||||||
|
"""提取影片信息 - 特别处理特殊分区的链接"""
|
||||||
|
try:
|
||||||
|
link = element.xpath('./@href')[0]
|
||||||
|
if link.startswith('/'):
|
||||||
|
link = self.host + link
|
||||||
|
|
||||||
|
# 检查是否是特殊分区的链接
|
||||||
|
is_special_link = 'ar-kk.html' in link or 'ar.html' in link
|
||||||
|
|
||||||
|
# 对于特殊分区,直接使用链接本身作为ID
|
||||||
|
if is_special_link and category_id in self.special_categories:
|
||||||
|
# 提取链接中的参数
|
||||||
|
parsed_url = urllib.parse.urlparse(link)
|
||||||
|
query_params = urllib.parse.parse_qs(parsed_url.query)
|
||||||
|
|
||||||
|
# 获取视频ID(从v参数中提取)
|
||||||
|
video_url = query_params.get('v', [''])[0]
|
||||||
|
if video_url:
|
||||||
|
# 从视频URL中提取ID
|
||||||
|
video_id_match = re.search(r'/([a-f0-9-]+)/video\.m3u8', video_url)
|
||||||
|
if video_id_match:
|
||||||
|
video_id = video_id_match.group(1)
|
||||||
|
else:
|
||||||
|
# 如果没有匹配到,使用哈希值
|
||||||
|
video_id = str(hash(link) % 1000000)
|
||||||
|
else:
|
||||||
|
video_id = str(hash(link) % 1000000)
|
||||||
|
|
||||||
|
# 对于特殊分区,保留完整的链接作为vod_id的一部分
|
||||||
|
final_vod_id = f"special_{category_id}_{video_id}_{urllib.parse.quote(link)}"
|
||||||
|
else:
|
||||||
|
# 常规处理
|
||||||
|
vod_id = self.regStr(r'm=(\d+)', link)
|
||||||
|
if not vod_id:
|
||||||
|
vod_id = str(hash(link) % 1000000)
|
||||||
|
|
||||||
|
final_vod_id = f"618041.xyz_{vod_id}"
|
||||||
|
if category_id:
|
||||||
|
final_vod_id = f"618041.xyz_{category_id}_{vod_id}"
|
||||||
|
|
||||||
|
# 提取标题
|
||||||
|
title_elem = element.xpath('.//p[@class="km-script"]/text()')
|
||||||
|
if not title_elem:
|
||||||
|
title_elem = element.xpath('.//p[contains(@class, "script")]/text()')
|
||||||
|
if not title_elem:
|
||||||
|
title_elem = element.xpath('.//p/text()')
|
||||||
|
if not title_elem:
|
||||||
|
title_elem = element.xpath('.//h3/text()')
|
||||||
|
if not title_elem:
|
||||||
|
title_elem = element.xpath('.//h4/text()')
|
||||||
|
if not title_elem:
|
||||||
|
self.log(f"未找到标题元素,跳过该视频")
|
||||||
|
return None
|
||||||
|
|
||||||
|
title_encrypted = title_elem[0].strip()
|
||||||
|
title = self._decrypt_title(title_encrypted)
|
||||||
|
|
||||||
|
# 提取图片
|
||||||
|
pic_elem = element.xpath('.//img/@data-original')
|
||||||
|
if not pic_elem:
|
||||||
|
pic_elem = element.xpath('.//img/@src')
|
||||||
|
pic = pic_elem[0] if pic_elem else ''
|
||||||
|
|
||||||
|
if pic:
|
||||||
|
if pic.startswith('//'):
|
||||||
|
pic = 'https:' + pic
|
||||||
|
elif pic.startswith('/'):
|
||||||
|
pic = self.host + pic
|
||||||
|
|
||||||
|
return {
|
||||||
|
'vod_id': final_vod_id,
|
||||||
|
'vod_name': title,
|
||||||
|
'vod_pic': pic,
|
||||||
|
'vod_remarks': '',
|
||||||
|
'vod_year': ''
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"提取影片信息出错: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _decrypt_title(self, encrypted_text):
|
||||||
|
"""解密标题 - 使用网站的解密算法"""
|
||||||
|
try:
|
||||||
|
decrypted_chars = []
|
||||||
|
for char in encrypted_text:
|
||||||
|
code_point = ord(char)
|
||||||
|
decrypted_code = code_point ^ 128
|
||||||
|
decrypted_char = chr(decrypted_code)
|
||||||
|
decrypted_chars.append(decrypted_char)
|
||||||
|
|
||||||
|
decrypted_text = ''.join(decrypted_chars)
|
||||||
|
return decrypted_text
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"标题解密失败: {str(e)}")
|
||||||
|
return encrypted_text
|
||||||
|
|
||||||
|
def _get_detail(self, doc, html_content, vid):
|
||||||
|
"""获取详情信息 (优化版) - 修复播放源提取问题"""
|
||||||
|
try:
|
||||||
|
title = self._get_text(doc, ['//h1/text()', '//title/text()'])
|
||||||
|
pic = self._get_text(doc, ['//div[contains(@class,"dyimg")]//img/@src', '//img[contains(@class,"poster")]/@src'])
|
||||||
|
if pic and pic.startswith('/'):
|
||||||
|
pic = self.host + pic
|
||||||
|
desc = self._get_text(doc, ['//div[contains(@class,"yp_context")]/text()', '//div[contains(@class,"introduction")]//text()'])
|
||||||
|
actor = self._get_text(doc, ['//span[contains(text(),"主演")]/following-sibling::*/text()'])
|
||||||
|
director = self._get_text(doc, ['//span[contains(text(),"导演")]/following-sibling::*/text()'])
|
||||||
|
|
||||||
|
play_from = []
|
||||||
|
play_urls = []
|
||||||
|
|
||||||
|
# 使用更灵活的正则匹配来查找播放链接
|
||||||
|
player_link_patterns = [
|
||||||
|
re.compile(r'href="(.*?ar\.html.*?)"'),
|
||||||
|
re.compile(r'href="(.*?kkyd\.html.*?)"'),
|
||||||
|
re.compile(r'href="(.*?ar-kk\.html.*?)"')
|
||||||
|
]
|
||||||
|
|
||||||
|
player_links = []
|
||||||
|
for pattern in player_link_patterns:
|
||||||
|
matches = pattern.findall(html_content)
|
||||||
|
player_links.extend(matches)
|
||||||
|
|
||||||
|
if player_links:
|
||||||
|
episodes = []
|
||||||
|
for link in player_links:
|
||||||
|
full_url = urljoin(self.host, link)
|
||||||
|
episodes.append(f"第1集${full_url}")
|
||||||
|
|
||||||
|
if episodes:
|
||||||
|
play_from.append("默认播放源")
|
||||||
|
play_urls.append('#'.join(episodes))
|
||||||
|
|
||||||
|
if not play_from:
|
||||||
|
self.log("未找到播放源元素,无法定位播放源列表")
|
||||||
|
return {
|
||||||
|
'vod_id': vid,
|
||||||
|
'vod_name': title,
|
||||||
|
'vod_pic': pic,
|
||||||
|
'type_name': '',
|
||||||
|
'vod_year': '',
|
||||||
|
'vod_area': '',
|
||||||
|
'vod_remarks': '',
|
||||||
|
'vod_actor': actor,
|
||||||
|
'vod_director': director,
|
||||||
|
'vod_content': desc,
|
||||||
|
'vod_play_from': '默认播放源',
|
||||||
|
'vod_play_url': f"第1集${vid}"
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
'vod_id': vid,
|
||||||
|
'vod_name': title,
|
||||||
|
'vod_pic': pic,
|
||||||
|
'type_name': '',
|
||||||
|
'vod_year': '',
|
||||||
|
'vod_area': '',
|
||||||
|
'vod_remarks': '',
|
||||||
|
'vod_actor': actor,
|
||||||
|
'vod_director': director,
|
||||||
|
'vod_content': desc,
|
||||||
|
'vod_play_from': '$$$'.join(play_from),
|
||||||
|
'vod_play_url': '$$$'.join(play_urls)
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"获取详情出错: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _get_text(self, doc, selectors):
|
||||||
|
"""通用文本提取"""
|
||||||
|
for selector in selectors:
|
||||||
|
try:
|
||||||
|
texts = doc.xpath(selector)
|
||||||
|
for text in texts:
|
||||||
|
if text and text.strip():
|
||||||
|
return text.strip()
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def log(self, message):
|
||||||
|
"""日志输出"""
|
||||||
|
print(f"[苹果视频] {message}")
|
||||||
|
|
||||||
|
def fetch(self, url, headers=None, method='GET', data=None, timeout=10):
|
||||||
|
"""网络请求"""
|
||||||
|
try:
|
||||||
|
if headers is None:
|
||||||
|
headers = self.headers
|
||||||
|
if method == 'GET':
|
||||||
|
response = requests.get(url, headers=headers, timeout=timeout, verify=False)
|
||||||
|
else:
|
||||||
|
response = requests.post(url, headers=headers, data=data, timeout=timeout, verify=False)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
self.log(f"网络请求失败: {url}, 错误: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 注册爬虫
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from base.spider import Spider as BaseSpider
|
||||||
|
BaseSpider.register(Spider())
|
||||||
Loading…
Reference in a new issue