fun/py/奇优动漫.py
2026-01-07 04:36:44 +00:00

379 lines
No EOL
17 KiB
Python
Executable file
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# coding=utf-8
#!/usr/bin/python
import sys
sys.path.append('..')
from base.spider import Spider
import json
import urllib.parse
import re
from lxml import etree
from urllib.parse import urljoin
class Spider(Spider):
def getName(self):
return "奇优影院"
def init(self, extend):
pass
def homeContent(self, filter):
result = {}
cateManual = {
"电影": "1",
"电视剧": "2",
"动漫": "3",
"综艺": "4",
"午夜": "6"
}
classes = [{'type_name': k, 'type_id': v} for k, v in cateManual.items()]
result['class'] = classes
filters = {
"1": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
"2": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
"3": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
"4": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}],
"6": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}]
}
result['filters'] = filters
return result
def homeVideoContent(self):
try:
rsp = self.fetch("http://qiyoudy5.com/")
root = self.parse_html(rsp.content)
if not root:
return {'list': []}
videos = []
# 轮播图
for a in root.xpath("//div[contains(@class,'carousel')]//a[contains(@class,'stui-vodlist__thumb')]"):
try:
name = a.xpath(".//span[@class='pic-text text-center']/text()")[0].strip() if a.xpath(".//span[@class='pic-text text-center']/text()") else a.xpath("./@title")[0] if a.xpath("./@title") else "未知"
style = a.xpath("./@style")[0] if a.xpath("./@style") else ""
pic = re.search(r"background:\s*url\((.*?)\)", style).group(1) if re.search(r"background:\s*url\((.*?)\)", style) else ""
sid = a.xpath("./@href")[0] if a.xpath("./@href") else ""
videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": "推荐"})
except:
continue
# 视频列表
for a in root.xpath("//ul[contains(@class,'stui-vodlist')]//a[contains(@class,'stui-vodlist__thumb')]"):
try:
name = a.xpath("./@title")[0] if a.xpath("./@title") else "未知"
pic = a.xpath("./@data-original")[0] if a.xpath("./@data-original") else ""
sid = a.xpath("./@href")[0] if a.xpath("./@href") else ""
remark = a.xpath(".//span[@class='pic-text text-right']/text()")[0] if a.xpath(".//span[@class='pic-text text-right']/text()") else ""
videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": remark})
except:
continue
return {'list': videos}
except:
return {'list': []}
def categoryContent(self, tid, pg, filter, extend):
result = {}
try:
order = extend.get('by', 'time') if extend else 'time'
url = f'http://qiyoudy5.com/list/{tid}_{pg}.html?order={order}'
rsp = self.fetch(url)
root = self.parse_html(rsp.content)
if not root:
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
videos = []
for a in root.xpath("//a[contains(@class,'stui-vodlist__thumb')]"):
try:
name = a.xpath("./@title")[0] if a.xpath("./@title") else "未知"
pic = a.xpath("./@data-original")[0] if a.xpath("./@data-original") else ""
sid = a.xpath("./@href")[0] if a.xpath("./@href") else ""
remark = a.xpath(".//span[@class='pic-text text-right']/text()")[0] if a.xpath(".//span[@class='pic-text text-right']/text()") else ""
videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": remark})
except:
continue
current_page = int(root.xpath("//ul[contains(@class,'stui-page')]//a[@class='active']/text()")[0]) if root.xpath("//ul[contains(@class,'stui-page')]//a[@class='active']/text()") else pg
page_numbers = []
for link in root.xpath("//ul[contains(@class,'stui-page')]//a[contains(@href,'list')]/@href"):
match = re.search(r'list/\d+_(\d+)\.html', link)
if match:
page_numbers.append(int(match.group(1)))
total_page = max(page_numbers) if page_numbers else 1
return {
'list': videos,
'page': current_page,
'pagecount': total_page if total_page > 0 else 9999,
'limit': 90,
'total': 999999
}
except:
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
def detailContent(self, array):
try:
tid = array[0]
url = f'http://qiyoudy5.com{tid}'
rsp = self.fetch(url)
root = self.parse_html(rsp.content)
if not root:
return {'list': []}
# 基本信息
detail_node = root.xpath("//div[contains(@class,'stui-content__detail')]") or root.xpath("//div[@class='stui-player__detail']")
pic = title = area = director = actor = year = desc = ""
if detail_node:
detail_node = detail_node[0]
pic = self.get_first(root.xpath("//meta[@property='og:image']/@content") or detail_node.xpath(".//img/@data-original"))
title = self.get_first(detail_node.xpath(".//h1//text()"))
if not title:
page_title = self.get_first(root.xpath("//title/text()"))
title = re.search(r"《(.*?)》", page_title).group(1) if page_title and re.search(r"《(.*?)》", page_title) else ""
area = self.get_first(root.xpath("//meta[@property='og:video:area']/@content"))
director = self.get_first(root.xpath("//meta[@property='og:video:director']/@content"))
actor = self.get_first(root.xpath("//meta[@property='og:video:actor']/@content"))
year_info = self.get_first(root.xpath("//p[@class='data']//text()[contains(.,'年份:')]"))
year = re.search(r"年份:(\d{4})", year_info).group(1) if year_info and re.search(r"年份:(\d{4})", year_info) else ""
desc = self.get_first(root.xpath("//meta[@property='og:description']/@content"))
# 播放列表
playFrom, playUrl = [], []
for tab in root.xpath("//ul[contains(@class,'nav-tabs')]/li"):
tab_name = self.get_first(tab.xpath(".//a/text()"))
tab_id = self.get_first(tab.xpath(".//a/@href")).replace("#", "") if tab.xpath(".//a/@href") else ""
if tab_name and tab_id:
play_list = root.xpath(f"//div[@id='{tab_id}']//ul[contains(@class,'stui-content__playlist')]//a")
if play_list:
playFrom.append(tab_name)
episodes = []
for episode in play_list:
ep_name = self.get_first(episode.xpath("./text()")) or "播放"
ep_url = self.get_first(episode.xpath("./@href"))
if ep_url:
episodes.append(f"{ep_name}${ep_url}")
if episodes:
playUrl.append("#".join(episodes))
vod = {
"vod_id": tid,
"vod_name": title,
"vod_pic": pic,
"vod_year": year,
"vod_area": area,
"vod_actor": actor,
"vod_director": director,
"vod_content": desc
}
if playFrom and playUrl:
vod['vod_play_from'] = "$$$".join(playFrom)
vod['vod_play_url'] = "$$$".join(playUrl)
return {'list': [vod]}
except:
return {'list': []}
def searchContent(self, key, quick, page='1'):
try:
url = "http://qiyoudy5.com/search.php"
# 修复:使用正确的参数名和变量
post_data = {
'searchword': key, # 改为变量key而不是字符串'key'
}
headers = {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "http://qiyoudy5.com/",
"Origin": "http://qiyoudy5.com"
}
# 修复只发送一次POST请求删除重复的请求
rsp = self.post(url, data=post_data, headers=headers)
root = self.parse_html(rsp.content)
if not root:
return {'list': []}
videos = []
# 多种选择器尝试获取搜索结果
selectors = [
"//ul[contains(@class,'stui-vodlist__media')]//li",
"//ul[contains(@class,'stui-vodlist')]//li",
"//a[contains(@class,'stui-vodlist__thumb')]"
]
result_items = []
for selector in selectors:
result_items = root.xpath(selector)
if result_items:
break
for item in result_items:
try:
if item.tag == 'a': # 直接是a标签
href = self.get_first(item.xpath("./@href"))
title = self.get_first(item.xpath("./@title"))
pic = self.get_first(item.xpath("./@data-original"))
remark = self.get_first(item.xpath(".//span[contains(@class,'pic-text')]/text()"))
else: # li标签
link = item.xpath(".//a[contains(@class,'stui-vodlist__thumb')]") or item.xpath(".//a")
if not link:
continue
link = link[0]
href = self.get_first(link.xpath("./@href"))
title = self.get_first(link.xpath("./@title"))
pic = self.get_first(link.xpath("./@data-original"))
if not pic:
style = self.get_first(link.xpath("./@style"))
if style and "background-image" in style:
pic_match = re.search(r"background-image:\s*url\(['\"]?(.*?)['\"]?\)", style)
if pic_match:
pic = pic_match.group(1)
remark = self.get_first(item.xpath(".//span[contains(@class,'pic-text')]/text()"))
if href and title:
videos.append({
"vod_id": href,
"vod_name": title.strip(),
"vod_pic": pic,
"vod_remarks": remark or ""
})
except Exception as e:
continue
# 备用解析方案
if not videos:
for a in root.xpath("//a[contains(@href,'/vod/')]"):
try:
href = self.get_first(a.xpath("./@href"))
title = self.get_first(a.xpath("./@title")) or self.get_first(a.xpath(".//text()"))
pic = self.get_first(a.xpath("./@data-original"))
remark = self.get_first(a.xpath(".//span[contains(@class,'pic-text')]/text()"))
if href and title:
videos.append({
"vod_id": href,
"vod_name": title.strip(),
"vod_pic": pic,
"vod_remarks": remark or ""
})
except:
continue
# 去重
seen = set()
unique_videos = []
for video in videos:
identifier = (video["vod_id"], video["vod_name"])
if identifier not in seen:
seen.add(identifier)
unique_videos.append(video)
return {'list': unique_videos}
except Exception as e:
return {'list': []}
def playerContent(self, flag, id, vipFlags):
try:
url = f"http://qiyoudy5.com{id}"
rsp = self.fetch(url)
_, html_content = self.parse_html(rsp.content, return_content=True)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": url,
}
# 多种方式查找播放地址
# 1. API链接
for pattern in [r"http://api\.yongfan99\.com:81/content\.php\?[^'\"]+", r"content\.php\?vid=[^&]+&type=[^'\"]+"]:
match = re.search(pattern, html_content)
if match:
api_url = match.group(0)
if not api_url.startswith('http'):
api_url = "http://api.yongfan99.com:81/" + api_url
try:
api_rsp = self.fetch(api_url, headers=headers)
m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', api_rsp.text)
if m3u8_match:
return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers}
except:
pass
# 2. iframe中的播放器
for pattern in [r'<iframe[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>', r'src\s*=\s*[\'"]((?:http[^\'"]*)?/play/[^\'"]*)[\'"]']:
for iframe_src in re.findall(pattern, html_content):
if not iframe_src.startswith('http'):
iframe_src = urljoin(url, iframe_src)
try:
iframe_rsp = self.fetch(iframe_src, headers=headers)
m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', iframe_rsp.text)
if m3u8_match:
return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers}
except:
continue
# 3. 直接搜索m3u8链接
m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', html_content)
if m3u8_match:
return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers}
# 4. 返回原始URL进行外部解析
return {"parse": 1, "playUrl": "", "url": url, "header": headers}
except:
return {"parse": 1, "playUrl": "", "url": f"http://qiyoudy5.com{id}", "header": {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Referer": "http://qiyoudy5.com/",
}}
# 辅助函数
def parse_html(self, content, return_content=False):
encodings = ['utf-8', 'gbk', 'gb2312', 'iso-8859-1']
html_content = None
for encoding in encodings:
try:
html_content = content.decode(encoding)
break
except UnicodeDecodeError:
continue
if html_content is None:
html_content = content.decode('utf-8', errors='replace')
html_content = self.clean_html(html_content)
root = etree.HTML(html_content)
if return_content:
return root, html_content
return root
def get_first(self, array, default=""):
return array[0] if array else default
def clean_html(self, html_content):
html_content = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', html_content)
replacements = {'&nbsp;': ' ', '&amp;': '&', '&lt;': '<', '&gt;': '>', '&quot;': '"'}
for old, new in replacements.items():
html_content = html_content.replace(old, new)
return html_content
def isVideoFormat(self, url):
return any(fmt in url for fmt in ['.m3u8', '.mp4', '.avi', '.mkv', '.flv', '.webm'])
def manualVideoCheck(self):
return True
def localProxy(self, param):
return {}