diff --git a/lib/每日大乱斗.py b/lib/每日大乱斗.py new file mode 100644 index 0000000..1da3114 --- /dev/null +++ b/lib/每日大乱斗.py @@ -0,0 +1,390 @@ +import json +import re +import sys +import hashlib +from base64 import b64decode, b64encode +from urllib.parse import urlparse + +import requests +from Crypto.Cipher import AES +from Crypto.Util.Padding import unpad +from pyquery import PyQuery as pq +sys.path.append('..') +from base.spider import Spider as BaseSpider + +img_cache = {} + +class Spider(BaseSpider): + + def init(self, extend=""): + try: + self.proxies = json.loads(extend) + except: + self.proxies = {} + self.headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', + 'Accept-Language': 'zh-CN,zh;q=0.9', + 'Connection': 'keep-alive', + 'Cache-Control': 'no-cache', + } + self.host = self.get_working_host() + self.headers.update({'Origin': self.host, 'Referer': f"{self.host}/"}) + print(f"使用站点: {self.host}") + + def getName(self): + return "🌈 每日大乱斗|终极完美版" + + def isVideoFormat(self, url): + return any(ext in (url or '') for ext in ['.m3u8', '.mp4', '.ts']) + + def manualVideoCheck(self): + return False + + def destroy(self): + global img_cache + img_cache.clear() + + def get_working_host(self): + dynamic_urls = [ + 'https://border.bshzjjgq.cc/', + 'https://blood.bshzjjgq.cc/' + ] + for url in dynamic_urls: + try: + response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=10) + if response.status_code == 200: + return url + except Exception: + continue + return dynamic_urls[0] + + def homeContent(self, filter): + try: + response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15) + if response.status_code != 200: return {'class': [], 'list': []} + data = self.getpq(response.text) + + classes = [] + category_selectors = ['.category-list ul li', '.nav-menu li', '.menu li', 'nav ul li'] + for selector in category_selectors: + for k in data(selector).items(): + link = k('a') + href = (link.attr('href') or '').strip() + name = (link.text() or '').strip() + if not href or href == '#' or not name: continue + classes.append({'type_name': name, 'type_id': href}) + if classes: break + + if not classes: + classes = [{'type_name': '最新', 'type_id': '/latest/'}, {'type_name': '热门', 'type_id': '/hot/'}] + + return {'class': classes, 'list': self.getlist(data('#index article, article'))} + except Exception as e: + return {'class': [], 'list': []} + + def homeVideoContent(self): + try: + response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15) + if response.status_code != 200: return {'list': []} + data = self.getpq(response.text) + return {'list': self.getlist(data('#index article, article'))} + except Exception as e: + return {'list': []} + + def categoryContent(self, tid, pg, filter, extend): + try: + if '@folder' in tid: + v = self.getfod(tid.replace('@folder', '')) + return {'list': v, 'page': 1, 'pagecount': 1, 'limit': 90, 'total': len(v)} + + pg = int(pg) if pg else 1 + + if tid.startswith('http'): + base_url = tid.rstrip('/') + else: + path = tid if tid.startswith('/') else f"/{tid}" + base_url = f"{self.host}{path}".rstrip('/') + + if pg == 1: + url = f"{base_url}/" + else: + url = f"{base_url}/{pg}/" + + response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15) + if response.status_code != 200: return {'list': [], 'page': pg, 'pagecount': 9999, 'limit': 90, 'total': 0} + + data = self.getpq(response.text) + videos = self.getlist(data('#archive article, #index article, article'), tid) + + return {'list': videos, 'page': pg, 'pagecount': 9999, 'limit': 90, 'total': 999999} + except Exception as e: + return {'list': [], 'page': pg, 'pagecount': 9999, 'limit': 90, 'total': 0} + + def detailContent(self, ids): + try: + url = ids[0] if ids[0].startswith('http') else f"{self.host}{ids[0]}" + response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15) + data = self.getpq(response.text) + + plist = [] + used_names = set() + if data('.dplayer'): + for c, k in enumerate(data('.dplayer').items(), start=1): + try: + config_attr = k.attr('data-config') + if config_attr: + config = json.loads(config_attr) + video_url = config.get('video', {}).get('url', '') + + if video_url: + ep_name = '' + parent = k.parents().eq(0) + for _ in range(4): + if not parent: break + heading = parent.find('h2, h3, h4').eq(0).text().strip() + if heading: + ep_name = heading + break + parent = parent.parents().eq(0) + + base_name = ep_name if ep_name else f"视频{c}" + name = base_name + count = 2 + while name in used_names: + name = f"{base_name} {count}" + count += 1 + used_names.add(name) + + plist.append(f"{name}${video_url}") + except: continue + + if not plist: + content_area = data('.post-content, article') + for i, link in enumerate(content_area('a').items(), start=1): + link_text = link.text().strip() + link_href = link.attr('href') + + if link_href and any(kw in link_text for kw in ['点击观看', '观看', '播放', '视频', '第一弹', '第二弹', '第三弹', '第四弹', '第五弹', '第六弹', '第七弹', '第八弹', '第九弹', '第十弹']): + ep_name = link_text.replace('点击观看:', '').replace('点击观看', '').strip() + if not ep_name: ep_name = f"视频{i}" + + if not link_href.startswith('http'): + link_href = f"{self.host}{link_href}" if link_href.startswith('/') else f"{self.host}/{link_href}" + + plist.append(f"{ep_name}${link_href}") + + play_url = '#'.join(plist) if plist else f"未找到视频源${url}" + + vod_content = '' + try: + tags = [] + seen_names = set() + seen_ids = set() + + tag_links = data('.tags a, .keywords a, .post-tags a') + + candidates = [] + for k in tag_links.items(): + title = k.text().strip() + href = k.attr('href') + if title and href: + candidates.append({'name': title, 'id': href}) + + candidates.sort(key=lambda x: len(x['name']), reverse=True) + + for item in candidates: + name = item['name'] + id_ = item['id'] + + if id_ in seen_ids: continue + + is_duplicate = False + for seen in seen_names: + if name in seen: + is_duplicate = True + break + + if not is_duplicate: + target = json.dumps({'id': id_, 'name': name}) + tags.append(f'[a=cr:{target}/]{name}[/a]') + seen_names.add(name) + seen_ids.add(id_) + + if tags: + vod_content = ' '.join(tags) + else: + vod_content = data('.post-title').text() + except Exception: + vod_content = '获取标签失败' + + if not vod_content: + vod_content = data('h1').text() or '每日大乱斗' + + return {'list': [{'vod_play_from': '每日大乱斗', 'vod_play_url': play_url, 'vod_content': vod_content}]} + except: + return {'list': [{'vod_play_from': '每日大乱斗', 'vod_play_url': '获取失败'}]} + + def searchContent(self, key, quick, pg="1"): + try: + pg = int(pg) if pg else 1 + + if pg == 1: + url = f"{self.host}/search/{key}/" + else: + url = f"{self.host}/search/{key}/{pg}/" + + response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15) + return {'list': self.getlist(self.getpq(response.text)('article')), 'page': pg, 'pagecount': 9999} + except: + return {'list': [], 'page': pg, 'pagecount': 9999} + + def playerContent(self, flag, id, vipFlags): + parse = 0 if self.isVideoFormat(id) else 1 + url = self.proxy(id) if '.m3u8' in id else id + return {'parse': parse, 'url': url, 'header': self.headers} + + def localProxy(self, param): + try: + type_ = param.get('type') + url = param.get('url') + if type_ == 'cache': + key = param.get('key') + if content := img_cache.get(key): + return [200, 'image/jpeg', content] + return [404, 'text/plain', b'Expired'] + elif type_ == 'img': + real_url = self.d64(url) if not url.startswith('http') else url + res = requests.get(real_url, headers=self.headers, proxies=self.proxies, timeout=10) + content = self.aesimg(res.content) + return [200, 'image/jpeg', content] + elif type_ == 'm3u8': + return self.m3Proxy(url) + else: + return self.tsProxy(url) + except: + return [404, 'text/plain', b''] + + def proxy(self, data, type='m3u8'): + if data and self.proxies: return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}" + return data + + def m3Proxy(self, url): + url = self.d64(url) + res = requests.get(url, headers=self.headers, proxies=self.proxies) + data = res.text + base = res.url.rsplit('/', 1)[0] + lines = [] + for line in data.split('\n'): + if '#EXT' not in line and line.strip(): + if not line.startswith('http'): + line = f"{base}/{line}" + lines.append(self.proxy(line, 'ts')) + else: + lines.append(line) + return [200, "application/vnd.apple.mpegurl", '\n'.join(lines)] + + def tsProxy(self, url): + return [200, 'video/mp2t', requests.get(self.d64(url), headers=self.headers, proxies=self.proxies).content] + + def e64(self, text): + return b64encode(str(text).encode()).decode() + + def d64(self, text): + return b64decode(str(text).encode()).decode() + + def aesimg(self, data): + if len(data) < 16: return data + keys = [(b'f5d965df75336270', b'97b60394abc2fbe1'), (b'75336270f5d965df', b'abc2fbe197b60394')] + for k, v in keys: + try: + dec = unpad(AES.new(k, AES.MODE_CBC, v).decrypt(data), 16) + if dec.startswith(b'\xff\xd8') or dec.startswith(b'\x89PNG'): return dec + except: pass + try: + dec = unpad(AES.new(k, AES.MODE_ECB).decrypt(data), 16) + if dec.startswith(b'\xff\xd8'): return dec + except: pass + return data + + def getlist(self, data, tid=''): + videos = [] + is_folder = '/mrdg' in (tid or '') + for k in data.items(): + card_html = k.outer_html() if hasattr(k, 'outer_html') else str(k) + a = k if k.is_('a') else k('a').eq(0) + href = a.attr('href') + title = k('h2').text() or k('.entry-title').text() or k('.post-title').text() + if not title and k.is_('a'): title = k.text() + + if href and title: + img = self.getimg(k('script').text(), k, card_html) + videos.append({ + 'vod_id': f"{href}{'@folder' if is_folder else ''}", + 'vod_name': title.strip(), + 'vod_pic': img, + 'vod_remarks': k('time').text() or '', + 'vod_tag': 'folder' if is_folder else '', + 'style': {"type": "rect", "ratio": 1.33} + }) + return videos + + def getfod(self, id): + url = f"{self.host}{id}" + data = self.getpq(requests.get(url, headers=self.headers, proxies=self.proxies).text) + videos = [] + for i, h2 in enumerate(data('.post-content h2').items()): + p_txt = data('.post-content p').eq(i * 2) + p_img = data('.post-content p').eq(i * 2 + 1) + p_html = p_img.outer_html() if hasattr(p_img, 'outer_html') else str(p_img) + videos.append({ + 'vod_id': p_txt('a').attr('href'), + 'vod_name': p_txt.text().strip(), + 'vod_pic': self.getimg('', p_img, p_html), + 'vod_remarks': h2.text().strip() + }) + return videos + + def getimg(self, text, elem=None, html_content=None): + if m := re.search(r"loadBannerDirect\('([^']+)'", text or ''): + return self._proc_url(m.group(1)) + + if html_content is None and elem is not None: + html_content = elem.outer_html() if hasattr(elem, 'outer_html') else str(elem) + if not html_content: return '' + + html_content = html_content.replace('"', '"').replace(''', "'").replace('&', '&') + + if 'data:image' in html_content: + m = re.search(r'(data:image/[a-zA-Z0-9+/=;,]+)', html_content) + if m: return self._proc_url(m.group(1)) + + m = re.search(r'(https?://[^"\'\s)]+\.(?:jpg|png|jpeg|webp))', html_content, re.I) + if m: return self._proc_url(m.group(1)) + + if 'url(' in html_content: + m = re.search(r'url\s*\(\s*[\'"]?([^"\'\)]+)[\'"]?\s*\)', html_content, re.I) + if m: return self._proc_url(m.group(1)) + + return '' + + def _proc_url(self, url): + if not url: return '' + url = url.strip('\'" ') + if url.startswith('data:'): + try: + _, b64_str = url.split(',', 1) + raw = b64decode(b64_str) + if not (raw.startswith(b'\xff\xd8') or raw.startswith(b'\x89PNG') or raw.startswith(b'GIF8')): + raw = self.aesimg(raw) + key = hashlib.md5(raw).hexdigest() + img_cache[key] = raw + return f"{self.getProxyUrl()}&type=cache&key={key}" + except: return "" + if not url.startswith('http'): + url = f"{self.host}{url}" if url.startswith('/') else f"{self.host}/{url}" + return f"{self.getProxyUrl()}&url={self.e64(url)}&type=img" + + def getpq(self, data): + try: return pq(data) + except: return pq(data.encode('utf-8')) diff --git a/lib/糖心次元.py b/lib/糖心次元.py new file mode 100644 index 0000000..4957a97 --- /dev/null +++ b/lib/糖心次元.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +# @Author : Grok-4 Adapted & Optimized +# @Time : 2025/10/22 +# @Note : 糖心次元极简爬虫(已修复转义 \/ 问题 & 韩国AV标题前缀) + +import sys, urllib.parse, re, json +from lxml import etree +sys.path.append('..') +from base.spider import Spider + +class Spider(Spider): + def getName(self): + return "糖心次元" + + def init(self, extend): + pass + + def homeContent(self, filter): + cate = {"传媒系列":"1","AV系列":"2","麻豆传媒":"5","糖心传媒":"6","精东影业":"7","蜜桃传媒":"8","果冻传媒":"9","星空无限":"10","天美传媒":"11","抠抠传媒":"12","星杏吧传媒":"13","性视界传媒":"14","SA国际传媒":"15","其他传媒":"16","国产-自拍-偷拍":"17","探花-主播-网红":"18","日本-中文字幕":"19","日本-无码流出":"20","日本-高清有码":"21","日本-东京热":"22","动漫-番中字":"23","变态-暗网-同恋":"24","欧美高清无码":"25","韩国av":"27"} + return {'class': [{'type_name': k, 'type_id': v} for k, v in cate.items()]} + + def homeVideoContent(self): + return {} + + # --------------- 通用解析 --------------- # + def _parse(self, rsp): + root = etree.HTML(rsp) + videos = root.xpath('//li[contains(@class,"mb15") and .//a[contains(@href,"/vod/play/")]]') + lst = [] + for v in videos: + name = (v.xpath('.//h2/a/@title|.//h3/a/@title|.//p[contains(@class,"txt-ov")]/text()') or [''])[0].strip() + # >>> 去韩国AV前缀:kbj-23010421标题 -> 标题 + name = re.sub(r'^[a-zA-Z]{2,}\-\d+\s*', '', name).strip() + img = (v.xpath('.//img/@src') or [''])[0] + if img and not img.startswith('http'): + img = ('https:' + img) if img.startswith('//') else 'https://img1.souavzy.org' + img + link = (v.xpath('.//a[contains(@href,"/vod/play/")]/@href') or [''])[0] + if link and not link.startswith('http'): + link = 'https://www.txsp.my' + link + lst.append({'vod_name': name or '未知标题', 'vod_pic': img, 'vod_remarks': (v.xpath('.//span[contains(@class,"ico-left")]/text()') or [''])[0].strip(), 'vod_id': link}) + return lst + + def categoryContent(self, tid, pg, filter, extend): + url = f'https://www.txsp.my/index.php/vod/type/id/{tid}.html' if pg == '1' else f'https://www.txsp.my/index.php/vod/type/id/{tid}/page/{pg}.html' + try: + rsp = self.fetch(url).text + lst = self._parse(rsp) + pages = max([int(n) for n in re.findall(r'/page/(\d+)', rsp)] or [1]) + return {'list': lst, 'page': int(pg), 'pagecount': pages, 'limit': len(lst), 'total': 999999} + except Exception as e: + return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0} + + def detailContent(self, array): + tid = array[0] + url = tid if tid.startswith('http') else 'https://www.txsp.my' + tid + try: + rsp = self.fetch(url).text + root = etree.HTML(rsp) + title = (root.xpath('//h1/text()') or ['未知标题'])[0].strip() + pic = (root.xpath('//meta[@property="og:image"]/@content|//img[contains(@src,"upload/vod")]/@src') or [''])[0] + if pic and not pic.startswith('http'): + pic = ('https:' + pic) if pic.startswith('//') else 'https://img1.souavzy.org' + pic + play_url = self._extract(rsp) + return {'list': [{'vod_id': tid, 'vod_name': title, 'vod_pic': pic, 'vod_content': title, 'vod_play_from': '糖心次元', 'vod_play_url': '播放$' + play_url if play_url else '播放$暂无播放地址'}]} + except Exception as e: + return {'list': []} + + def _extract(self, html): + html = html.replace(r'\/', '/') # 关键修复 + for pat in [r'var player_aaaa\s*=\s*({[^}]+})', r'player_aaaa\s*=\s*({[^}]+})', r'var player_data\s*=\s*({[^}]+})']: + m = re.search(pat, html) + if m: + try: + url = json.loads(m.group(1))['url'] + if url: return url + except: + continue + src = re.search(r']+src="([^"]+souavzy[^"]+)"', html, re.I) + if src: + m3 = re.search(r'url=([^&]+)', src.group(1)) + if m3: return urllib.parse.unquote(m3.group(1)) + for url in re.findall(r'"(https?://[^"]+\.m3u8[^"]*)"', html): + if 'souavzy' in url or 'qrtuv' in url: return url + return '' + + def searchContent(self, key, quick, pg="1"): + url = f'https://www.txsp.my/index.php/vod/search/page/{pg}/wd/{urllib.parse.quote(key)}.html' + try: + return {'list': self._parse(self.fetch(url).text), 'page': int(pg), 'pagecount': 999, 'limit': 999, 'total': 999999} + except: + return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0} + + def playerContent(self, flag, id, vipFlags): + if flag != "糖心次元": + return {} + if id.startswith('http') and ('.m3u8' in id or 'souavzy' in id): + return {"parse": 0, "playUrl": '', "url": id, "header": {"User-Agent": "Mozilla/5.0", "Referer": "https://www.txsp.my/", "Origin": "https://www.txsp.my"}} + try: + url = id if id.startswith('http') else 'https://www.txsp.my' + id + play_url = self._extract(self.fetch(url).text) + if play_url: + return {"parse": 0, "playUrl": '', "url": play_url, "header": {"User-Agent": "Mozilla/5.0", "Referer": "https://www.txsp.my/", "Origin": "https://www.txsp.my"}} + except: + pass + return {"parse": 1, "playUrl": '', "url": id, "header": {"User-Agent": "Mozilla/5.0", "Referer": "https://www.txsp.my/", "Origin": "https://www.txsp.my"}} + + def isVideoFormat(self, url): + pass + + def manualVideoCheck(self): + pass + + def localProxy(self, param): + pass diff --git a/lib/随机小姐姐.py b/lib/随机小姐姐.py new file mode 100644 index 0000000..e5c78be --- /dev/null +++ b/lib/随机小姐姐.py @@ -0,0 +1,144 @@ +# coding=utf-8 +# !/usr/bin/python +import sys +import requests +import datetime +from bs4 import BeautifulSoup +import re +import base64 +from base.spider import Spider +import json + +sys.path.append('..') +xurl = "http://xjj2.716888.xyz" +headerx = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36', + 'Cookie':'mk_encrypt_c21f969b5f03d33d43e04f8f136e7682=390e11f0d5ae13b2787e6a72db11527f' +} + + +class Spider(Spider): + global xurl + global headerx + + def getName(self): + return "首页" + + def init(self, extend): + pass + + def isVideoFormat(self, url): + pass + + def manualVideoCheck(self): + pass + + def homeContent(self, filter): + pass + + def homeVideoContent(self): + id = ['4k/4k.php', 'djxjj/dj1.php', 'zj/jipinyz/jipinyz.php', 'zj/xuejie/xuejie.php', 'zj/kawayi/kawayi.php', + 'zj/nennen/nennen.php', 'zj/heji1/heji1.php', 'zj/sihuawd/sihuawd.php', 'zj/wanmeisc/wanmeisc.php', + 'zj/manyao/manyao.php', 'zj/sihuadd/sihuadd.php', 'zj/qingchun/qingchun.php', 'zj/cos/cos.php', + 'zj/jingpinbz/jingpinbz.php', 'zj/jipinll/jipinll.php', 'zj/nideym/nideym.php', 'zj/tianmei/tianmei.php', + 'zj/yusi/yusi.php', 'zj/shuaige/shuaige.php', 'zj/rewu/rewu.php', 'zj/jingpinsc/jingpinsc.php'] + name = ['随机', 'DJ姐姐', '极品钰足', '学姐系列', '卡哇伊', '嫩嫩系列', '美女舞蹈', '丝滑舞蹈', '完美身材', + '慢摇系列', '丝滑吊带', '清纯系列', 'COS系列', '精品变装', '极品罗丽', '你的裕梦', '甜妹系列', + '御丝系列', '帅哥哥', '热舞系列', '精品收藏'] + pic = ['https://img0.baidu.com/it/u=2236794495,926227820&fm=253&fmt=auto&app=138&f=JPEG?w=1091&h=500', + 'https://pic.rmb.bdstatic.com/mvideo/e17d86ce4489a02870ace9a25a804c3e', + 'https://img1.baidu.com/it/u=4087009209,613234683&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=364', + 'https://img1.baidu.com/it/u=2347706654,3055017263&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=750', + 'https://img2.baidu.com/it/u=3715511725,1094436549&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=1083', + 'https://img2.baidu.com/it/u=2560410906,3760952489&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=750', + 'https://img0.baidu.com/it/u=4119328645,2294770712&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=750', + 'https://img1.baidu.com/it/u=3167365498,4156845177&fm=253&fmt=auto&app=120&f=JPEG?w=355&h=631', + 'https://img2.baidu.com/it/u=2214691242,2295609938&fm=253&fmt=auto&app=120&f=JPEG?w=800&h=973', + 'https://img1.baidu.com/it/u=3930123826,1131807820&fm=253&fmt=auto&app=138&f=JPEG?w=889&h=500', + 'https://img2.baidu.com/it/u=3998619741,1128428746&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=594', + 'https://img2.baidu.com/it/u=1507871502,2316279678&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=768', + 'https://img0.baidu.com/it/u=2245878765,4037513957&fm=253&fmt=auto&app=138&f=JPEG?w=617&h=411', + 'https://img1.baidu.com/it/u=3623293272,829752126&fm=253&fmt=auto&app=138&f=JPEG?w=285&h=285', + 'https://img2.baidu.com/it/u=1922261112,3647796435&fm=253&fmt=auto&app=120&f=JPEG?w=500&h=542', + 'https://img1.baidu.com/it/u=3970043028,2042301564&fm=253&fmt=auto&app=120&f=JPEG?w=500&h=889', + 'https://img2.baidu.com/it/u=3229384329,3046902124&fm=253&fmt=auto&app=120&f=JPEG?w=800&h=800', + 'https://img1.baidu.com/it/u=3113661564,2558849413&fm=253&fmt=auto&app=138&f=JPEG?w=500&h=500', + 'https://img1.baidu.com/it/u=2361496550,3302335162&fm=253&fmt=auto&app=138&f=JPEG?w=333&h=500', + 'https://img1.baidu.com/it/u=270105183,1595166255&fm=253&fmt=auto&app=120&f=JPEG?w=800&h=500', + 'https://img1.baidu.com/it/u=4071105902,825241031&fm=253&fmt=auto&app=138&f=JPEG?w=235&h=340'] + list_length = len(id) + videos = [] + for i in range(list_length): + print(id[i]) + video = { + "vod_id": id[i], + "vod_name": name[i], + "vod_pic": pic[i], + "vod_remarks": '播放20个', + } + videos.append(video) + + result = {'list': videos} + + return result + + def categoryContent(self, cid, pg, filter, ext): + pass + + def detailContent(self, ids): + videos = [] + result = {} + did = ids[0] + for i in range(1, 21): + playurl = "" + for j in range(1, i + 1): + playurl += f"{j}$/fenlei/{did}#" + playurl = playurl[:-1] + + videos.append({ + "vod_id": '', + "vod_name": '', + "vod_pic": "", + "type_name": '', + "vod_year": "", + "vod_area": "", + "vod_remarks": "", + "vod_actor": "", + "vod_director": "", + "vod_content": "", + "vod_play_from": "GK推荐", + "vod_play_url": playurl + }) + + result['list'] = videos + return result + + def playerContent(self, flag, id, vipFlags): + result = {} + response = requests.get(url=xurl + id, headers=headerx, allow_redirects=False) + + location_header = response.headers.get('Location') + if 'http' in location_header: + purl = location_header + else: + purl = 'http:' + location_header + result["parse"] = 0 + result["playUrl"] = '' + result["url"] = purl + result["header"] = headerx + return result + + def searchContentPage(self, key, quick, page): + pass + + def searchContent(self, key, quick): + return self.searchContentPage(key, quick, '1') + + def localProxy(self, params): + if params['type'] == "m3u8": + return self.proxyM3u8(params) + elif params['type'] == "media": + return self.proxyMedia(params) + elif params['type'] == "ts": + return self.proxyTs(params) + return None diff --git a/lib/香蕉.py b/lib/香蕉.py new file mode 100644 index 0000000..33f8389 --- /dev/null +++ b/lib/香蕉.py @@ -0,0 +1,669 @@ +# coding=utf-8 +#!/usr/bin/python +import sys +sys.path.append('..') +from base.spider import Spider +import json +import time +import urllib.parse +import re +import requests +from lxml import etree +from urllib.parse import urljoin + +class Spider(Spider): + + def getName(self): + return "苹果视频" + + def init(self, extend=""): + self.host = "https://618041.xyz" + self.api_host = "https://h5.xxoo168.org" + self.headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'Accept-Encoding': 'gzip, deflate', + 'Connection': 'keep-alive', + 'Referer': self.host + } + # 定义特殊分区ID列表,包含所有需要特殊处理的分类 + self.special_categories = ['13', '14', '33', '53', '32', '52', '9'] + self.log(f"苹果视频爬虫初始化完成,主站: {self.host}") + + def html(self, content): + """将HTML内容转换为可查询的对象""" + try: + return etree.HTML(content) + except: + self.log("HTML解析失败") + return None + + def regStr(self, pattern, string, index=1): + """正则表达式提取字符串""" + try: + match = re.search(pattern, string, re.IGNORECASE) + if match and len(match.groups()) >= index: + return match.group(index) + except: + pass + return "" + + def isVideoFormat(self, url): + pass + + def manualVideoCheck(self): + pass + + def homeContent(self, filter): + """获取首页内容和分类""" + result = {} + # 只保留指定的分类 + classes = [ + {'type_id': '618041.xyz_1', 'type_name': '全部视频'}, + {'type_id': '618041.xyz_13', 'type_name': '香蕉精品'}, + {'type_id': '618041.xyz_22', 'type_name': '制服诱惑'}, + {'type_id': '618041.xyz_6', 'type_name': '国产视频'}, + {'type_id': '618041.xyz_8', 'type_name': '清纯少女'}, + {'type_id': '618041.xyz_9', 'type_name': '辣妹大奶'}, + {'type_id': '618041.xyz_10', 'type_name': '女同专属'}, + {'type_id': '618041.xyz_11', 'type_name': '素人出演'}, + {'type_id': '618041.xyz_12', 'type_name': '角色扮演'}, + {'type_id': '618041.xyz_20', 'type_name': '人妻熟女'}, + {'type_id': '618041.xyz_23', 'type_name': '日韩剧情'}, + {'type_id': '618041.xyz_21', 'type_name': '经典伦理'}, + {'type_id': '618041.xyz_7', 'type_name': '成人动漫'}, + {'type_id': '618041.xyz_14', 'type_name': '精品二区'}, + {'type_id': '618041.xyz_53', 'type_name': '动漫中字'}, + {'type_id': '618041.xyz_52', 'type_name': '日本无码'}, + {'type_id': '618041.xyz_33', 'type_name': '中文字幕'}, + {'type_id': '618041.xyz_32', 'type_name': '国产自拍'} + ] + result['class'] = classes + try: + rsp = self.fetch(self.host, headers=self.headers) + doc = self.html(rsp.text) + videos = self._get_videos(doc, limit=20) + result['list'] = videos + except Exception as e: + self.log(f"首页获取出错: {str(e)}") + result['list'] = [] + return result + + def homeVideoContent(self): + """分类定义 - 兼容性方法""" + return { + 'class': [ + {'type_id': '618041.xyz_1', 'type_name': '全部视频'}, + {'type_id': '618041.xyz_13', 'type_name': '香蕉精品'}, + {'type_id': '618041.xyz_22', 'type_name': '制服诱惑'}, + {'type_id': '618041.xyz_6', 'type_name': '国产视频'}, + {'type_id': '618041.xyz_8', 'type_name': '清纯少女'}, + {'type_id': '618041.xyz_9', 'type_name': '辣妹大奶'}, + {'type_id': '618041.xyz_10', 'type_name': '女同专属'}, + {'type_id': '618041.xyz_11', 'type_name': '素人出演'}, + {'type_id': '618041.xyz_12', 'type_name': '角色扮演'}, + {'type_id': '618041.xyz_20', 'type_name': '人妻熟女'}, + {'type_id': '618041.xyz_23', 'type_name': '日韩剧情'}, + {'type_id': '618041.xyz_21', 'type_name': '经典伦理'}, + {'type_id': '618041.xyz_7', 'type_name': '成人动漫'}, + {'type_id': '618041.xyz_14', 'type_name': '精品二区'}, + {'type_id': '618041.xyz_53', 'type_name': '动漫中字'}, + {'type_id': '618041.xyz_52', 'type_name': '日本无码'}, + {'type_id': '618041.xyz_33', 'type_name': '中文字幕'}, + {'type_id': '618041.xyz_32', 'type_name': '国产自拍'} + ] + } + + def categoryContent(self, tid, pg, filter, extend): + """分类内容 - 修改为使用固定页数设置""" + try: + domain, type_id = tid.split('_') + url = f"https://{domain}/index.php/vod/type/id/{type_id}.html" + if pg and pg != '1': + url = url.replace('.html', f'/page/{pg}.html') + self.log(f"访问分类URL: {url}") + rsp = self.fetch(url, headers=self.headers) + doc = self.html(rsp.text) + # 在这里将 type_id 传递给 _get_videos 方法 + videos = self._get_videos(doc, category_id=type_id, limit=20) + + # 使用固定页数设置,而不是尝试从页面解析 + pagecount = 999 + total = 19980 + + return { + 'list': videos, + 'page': int(pg), + 'pagecount': pagecount, + 'limit': 20, + 'total': total + } + except Exception as e: + self.log(f"分类内容获取出错: {str(e)}") + return {'list': []} + + def searchContent(self, key, quick, pg="1"): + """搜索功能 - 完全修复版""" + try: + # 构造搜索URL + search_url = f"{self.host}/index.php/vod/type/id/1/wd/{urllib.parse.quote(key)}/page/{pg}.html" + self.log(f"搜索URL: {search_url}") + + # 发送请求 + rsp = self.fetch(search_url, headers=self.headers) + if not rsp or rsp.status_code != 200: + self.log("搜索请求失败") + return {'list': []} + + # 解析HTML + doc = self.html(rsp.text) + if not doc: + self.log("搜索页面解析失败") + return {'list': []} + + # 提取搜索结果 + videos = self._get_videos(doc, limit=20) + + # 尝试从页面提取分页信息 + pagecount = 5 # 默认值 + total = 100 # 默认值 + + # 尝试从分页元素中提取真实的分页信息 + page_elements = doc.xpath('//div[@class="mypage"]/a') + if page_elements and len(page_elements) > 0: + try: + # 查找尾页链接 + last_page = None + for elem in page_elements: + href = elem.xpath('./@href')[0] + if '尾页' in elem.text or 'page/' in href: + last_page = href + break + + if last_page: + # 从尾页URL中提取页码 + page_match = re.search(r'/page/(\d+)\.html', last_page) + if page_match: + pagecount = int(page_match.group(1)) + total = pagecount * 20 # 估算总数 + except: + pass + + return { + 'list': videos, + 'page': int(pg), + 'pagecount': pagecount, + 'limit': 20, + 'total': total + } + except Exception as e: + self.log(f"搜索出错: {str(e)}") + return {'list': []} + + def detailContent(self, ids): + """详情页面 - 特别处理特殊分区的链接""" + try: + vid = ids[0] + + # 检查是否是特殊分区的链接 + if vid.startswith('special_'): + # 解析特殊分区ID格式: special_{category_id}_{video_id}_{encoded_url} + parts = vid.split('_') + if len(parts) >= 4: + category_id = parts[1] + video_id = parts[2] + encoded_url = '_'.join(parts[3:]) + play_url = urllib.parse.unquote(encoded_url) + + self.log(f"特殊分区视频,直接使用链接: {play_url}") + + # 从播放链接中提取视频URL + parsed_url = urllib.parse.urlparse(play_url) + query_params = urllib.parse.parse_qs(parsed_url.query) + video_url = query_params.get('v', [''])[0] + pic_url = query_params.get('b', [''])[0] + title_encrypted = query_params.get('m', [''])[0] + + # 解码标题 + title = self._decrypt_title(title_encrypted) + + return { + 'list': [{ + 'vod_id': vid, + 'vod_name': title, + 'vod_pic': pic_url, + 'vod_remarks': '', + 'vod_year': '', + 'vod_play_from': '直接播放', + 'vod_play_url': f"第1集${play_url}" + }] + } + + # 常规处理 + if '_' in vid and len(vid.split('_')) > 2: + domain, category_id, video_id = vid.split('_') + else: + domain, video_id = vid.split('_') + + detail_url = f"https://{domain}/index.php/vod/detail/id/{video_id}.html" + + self.log(f"访问详情URL: {detail_url}") + rsp = self.fetch(detail_url, headers=self.headers) + doc = self.html(rsp.text) + video_info = self._get_detail(doc, rsp.text, vid) + return {'list': [video_info]} if video_info else {'list': []} + except Exception as e: + self.log(f"详情获取出错: {str(e)}") + return {'list': []} + + def playerContent(self, flag, id, vipFlags): + """播放链接 - 特别处理特殊分区的链接""" + try: + self.log(f"获取播放链接: flag={flag}, id={id}") + + # 检查是否是特殊分区的链接 + if id.startswith('special_'): + # 解析特殊分区ID格式: special_{category_id}_{video_id}_{encoded_url} + parts = id.split('_') + if len(parts) >= 4: + category_id = parts[1] + video_id = parts[2] + encoded_url = '_'.join(parts[3:]) + play_url = urllib.parse.unquote(encoded_url) + + self.log(f"特殊分区视频,直接使用链接: {play_url}") + + # 从播放链接中提取视频URL + parsed_url = urllib.parse.urlparse(play_url) + query_params = urllib.parse.parse_qs(parsed_url.query) + video_url = query_params.get('v', [''])[0] + + if video_url: + # 确保URL是完整的 + if video_url.startswith('//'): + video_url = 'https:' + video_url + elif not video_url.startswith('http'): + video_url = urljoin(self.host, video_url) + + self.log(f"从特殊链接中提取到视频地址: {video_url}") + return {'parse': 0, 'playUrl': '', 'url': video_url} + + # 检查传入的ID是否为完整URL,如果是则直接解析 + if id.startswith('http'): + self.log("ID 是一个完整URL,直接解析参数") + parsed_url = urllib.parse.urlparse(id) + query_params = urllib.parse.parse_qs(parsed_url.query) + + # 尝试获取视频参数 + video_url = query_params.get('v', [''])[0] + if not video_url: + # 尝试其他可能的参数名 + for key in query_params: + if key in ['url', 'src', 'file']: + video_url = query_params[key][0] + break + + if video_url: + # 解码可能的URL编码 + video_url = urllib.parse.unquote(video_url) + # 确保URL是完整的 + if video_url.startswith('//'): + video_url = 'https:' + video_url + elif not video_url.startswith('http'): + # 尝试添加基本域名 + video_url = urljoin(self.host, video_url) + + self.log(f"从 URL 参数中提取到视频地址: {video_url}") + return {'parse': 0, 'playUrl': '', 'url': video_url} + else: + self.log("URL 中没有找到视频参数,尝试从页面提取") + # 请求页面并提取视频链接 + rsp = self.fetch(id, headers=self.headers) + if rsp and rsp.status_code == 200: + video_url = self._extract_direct_video_url(rsp.text) + if video_url: + self.log(f"从页面提取到视频地址: {video_url}") + return {'parse': 0, 'playUrl': '', 'url': video_url} + + self.log("无法从页面提取视频链接,返回原始URL") + return {'parse': 1, 'playUrl': '', 'url': id} + + # 从新的 id 格式中提取视频ID和分类ID + if id.count('_') >= 2: + parts = id.split('_') + video_id = parts[-1] + category_id = parts[1] + else: + video_id = id.split('_')[-1] + category_id = '' + + self.log(f"视频ID: {video_id}, 分类ID: {category_id}") + + # 对于特殊分类,使用直接解析播放页面的方式 + if category_id in self.special_categories: + self.log("特殊分类,尝试从详情页提取直接播放链接") + # 构造播放页面URL + play_page_url = f"{self.host}/index.php/vod/play/id/{video_id}.html" + + # 请求播放页面 + rsp = self.fetch(play_page_url, headers=self.headers) + if rsp and rsp.status_code == 200: + # 从页面提取视频链接 + video_url = self._extract_direct_video_url(rsp.text) + if video_url: + self.log(f"从播放页面提取到视频地址: {video_url}") + return {'parse': 0, 'playUrl': '', 'url': video_url} + + # 如果提取失败,回退到API方式 + self.log("从播放页面提取失败,尝试API方式") + return self._get_video_by_api(id, video_id) + else: + # 其他分类使用API方式 + self.log("使用API方式获取视频地址") + return self._get_video_by_api(id, video_id) + + except Exception as e: + self.log(f"播放链接获取出错: {str(e)}") + if '_' in id: + domain, play_id = id.split('_') + play_url = f"https://{domain}/html/kkyd.html?m={play_id}" + else: + play_url = f"{self.host}/html/kkyd.html?m={id}" + return {'parse': 1, 'playUrl': '', 'url': play_url} + + def _get_video_by_api(self, id, video_id): + """通过API获取视频地址""" + try: + api_url = f"{self.api_host}/api/v2/vod/reqplay/{video_id}" + self.log(f"请求API获取视频地址: {api_url}") + + api_headers = self.headers.copy() + api_headers.update({ + 'Referer': f"{self.host}/", + 'Origin': self.host, + 'X-Requested-With': 'XMLHttpRequest' + }) + + api_response = self.fetch(api_url, headers=api_headers) + if api_response and api_response.status_code == 200: + data = api_response.json() + self.log(f"API响应: {data}") + + if data.get('retcode') == 3: + video_url = data.get('data', {}).get('httpurl_preview', '') + else: + video_url = data.get('data', {}).get('httpurl', '') + + if video_url: + video_url = video_url.replace('?300', '') + self.log(f"从API获取到视频地址: {video_url}") + return {'parse': 0, 'playUrl': '', 'url': video_url} + else: + self.log("API响应中没有找到视频地址") + else: + self.log(f"API请求失败,状态码: {api_response.status_code if api_response else '无响应'}") + + if '_' in id: + domain, play_id = id.split('_') + play_url = f"https://{domain}/html/kkyd.html?m={play_id}" + else: + play_url = f"{self.host}/html/kkyd.html?m={id}" + self.log(f"API请求失败,回退到播放页面: {play_url}") + return {'parse': 1, 'playUrl': '', 'url': play_url} + + except Exception as e: + self.log(f"API方式获取视频出错: {str(e)}") + if '_' in id: + domain, play_id = id.split('_') + play_url = f"https://{domain}/html/kkyd.html?m={play_id}" + else: + play_url = f"{self.host}/html/kkyd.html?m={id}" + return {'parse': 1, 'playUrl': '', 'url': play_url} + + def _extract_direct_video_url(self, html_content): + """从HTML内容中提取直接播放链接 (优化版)""" + try: + # 首先尝试提取明显的视频链接 + patterns = [ + r'v=([^&]+\.(?:m3u8|mp4))', + r'"url"\s*:\s*["\']([^"\']+\.(?:mp4|m3u8))["\']', + r'src\s*=\s*["\']([^"\']+\.(?:mp4|m3u8))["\']', + r'http[^\s<>"\'?]+\.(?:mp4|m3u8)' + ] + + for pattern in patterns: + matches = re.findall(pattern, html_content, re.IGNORECASE) + for match in matches: + if isinstance(match, tuple): + match = match[0] + extracted_url = match.replace('\\', '') + extracted_url = urllib.parse.unquote(extracted_url) + + if extracted_url.startswith('//'): + extracted_url = 'https:' + extracted_url + elif extracted_url.startswith('http'): + return extracted_url + + return None + except Exception as e: + self.log(f"提取直接播放URL出错: {str(e)}") + return None + + def _get_videos(self, doc, category_id=None, limit=None): + """获取影片列表 - 根据实际网站结构""" + try: + videos = [] + elements = doc.xpath('//a[@class="vodbox"]') + self.log(f"找到 {len(elements)} 个vodbox元素") + for elem in elements: + video = self._extract_video(elem, category_id) + if video: + videos.append(video) + return videos[:limit] if limit and videos else videos + except Exception as e: + self.log(f"获取影片列表出错: {str(e)}") + return [] + + def _extract_video(self, element, category_id=None): + """提取影片信息 - 特别处理特殊分区的链接""" + try: + link = element.xpath('./@href')[0] + if link.startswith('/'): + link = self.host + link + + # 检查是否是特殊分区的链接 + is_special_link = 'ar-kk.html' in link or 'ar.html' in link + + # 对于特殊分区,直接使用链接本身作为ID + if is_special_link and category_id in self.special_categories: + # 提取链接中的参数 + parsed_url = urllib.parse.urlparse(link) + query_params = urllib.parse.parse_qs(parsed_url.query) + + # 获取视频ID(从v参数中提取) + video_url = query_params.get('v', [''])[0] + if video_url: + # 从视频URL中提取ID + video_id_match = re.search(r'/([a-f0-9-]+)/video\.m3u8', video_url) + if video_id_match: + video_id = video_id_match.group(1) + else: + # 如果没有匹配到,使用哈希值 + video_id = str(hash(link) % 1000000) + else: + video_id = str(hash(link) % 1000000) + + # 对于特殊分区,保留完整的链接作为vod_id的一部分 + final_vod_id = f"special_{category_id}_{video_id}_{urllib.parse.quote(link)}" + else: + # 常规处理 + vod_id = self.regStr(r'm=(\d+)', link) + if not vod_id: + vod_id = str(hash(link) % 1000000) + + final_vod_id = f"618041.xyz_{vod_id}" + if category_id: + final_vod_id = f"618041.xyz_{category_id}_{vod_id}" + + # 提取标题 + title_elem = element.xpath('.//p[@class="km-script"]/text()') + if not title_elem: + title_elem = element.xpath('.//p[contains(@class, "script")]/text()') + if not title_elem: + title_elem = element.xpath('.//p/text()') + if not title_elem: + title_elem = element.xpath('.//h3/text()') + if not title_elem: + title_elem = element.xpath('.//h4/text()') + if not title_elem: + self.log(f"未找到标题元素,跳过该视频") + return None + + title_encrypted = title_elem[0].strip() + title = self._decrypt_title(title_encrypted) + + # 提取图片 + pic_elem = element.xpath('.//img/@data-original') + if not pic_elem: + pic_elem = element.xpath('.//img/@src') + pic = pic_elem[0] if pic_elem else '' + + if pic: + if pic.startswith('//'): + pic = 'https:' + pic + elif pic.startswith('/'): + pic = self.host + pic + + return { + 'vod_id': final_vod_id, + 'vod_name': title, + 'vod_pic': pic, + 'vod_remarks': '', + 'vod_year': '' + } + except Exception as e: + self.log(f"提取影片信息出错: {str(e)}") + return None + + def _decrypt_title(self, encrypted_text): + """解密标题 - 使用网站的解密算法""" + try: + decrypted_chars = [] + for char in encrypted_text: + code_point = ord(char) + decrypted_code = code_point ^ 128 + decrypted_char = chr(decrypted_code) + decrypted_chars.append(decrypted_char) + + decrypted_text = ''.join(decrypted_chars) + return decrypted_text + except Exception as e: + self.log(f"标题解密失败: {str(e)}") + return encrypted_text + + def _get_detail(self, doc, html_content, vid): + """获取详情信息 (优化版) - 修复播放源提取问题""" + try: + title = self._get_text(doc, ['//h1/text()', '//title/text()']) + pic = self._get_text(doc, ['//div[contains(@class,"dyimg")]//img/@src', '//img[contains(@class,"poster")]/@src']) + if pic and pic.startswith('/'): + pic = self.host + pic + desc = self._get_text(doc, ['//div[contains(@class,"yp_context")]/text()', '//div[contains(@class,"introduction")]//text()']) + actor = self._get_text(doc, ['//span[contains(text(),"主演")]/following-sibling::*/text()']) + director = self._get_text(doc, ['//span[contains(text(),"导演")]/following-sibling::*/text()']) + + play_from = [] + play_urls = [] + + # 使用更灵活的正则匹配来查找播放链接 + player_link_patterns = [ + re.compile(r'href="(.*?ar\.html.*?)"'), + re.compile(r'href="(.*?kkyd\.html.*?)"'), + re.compile(r'href="(.*?ar-kk\.html.*?)"') + ] + + player_links = [] + for pattern in player_link_patterns: + matches = pattern.findall(html_content) + player_links.extend(matches) + + if player_links: + episodes = [] + for link in player_links: + full_url = urljoin(self.host, link) + episodes.append(f"第1集${full_url}") + + if episodes: + play_from.append("默认播放源") + play_urls.append('#'.join(episodes)) + + if not play_from: + self.log("未找到播放源元素,无法定位播放源列表") + return { + 'vod_id': vid, + 'vod_name': title, + 'vod_pic': pic, + 'type_name': '', + 'vod_year': '', + 'vod_area': '', + 'vod_remarks': '', + 'vod_actor': actor, + 'vod_director': director, + 'vod_content': desc, + 'vod_play_from': '默认播放源', + 'vod_play_url': f"第1集${vid}" + } + + return { + 'vod_id': vid, + 'vod_name': title, + 'vod_pic': pic, + 'type_name': '', + 'vod_year': '', + 'vod_area': '', + 'vod_remarks': '', + 'vod_actor': actor, + 'vod_director': director, + 'vod_content': desc, + 'vod_play_from': '$$$'.join(play_from), + 'vod_play_url': '$$$'.join(play_urls) + } + except Exception as e: + self.log(f"获取详情出错: {str(e)}") + return None + + def _get_text(self, doc, selectors): + """通用文本提取""" + for selector in selectors: + try: + texts = doc.xpath(selector) + for text in texts: + if text and text.strip(): + return text.strip() + except: + continue + return '' + + def log(self, message): + """日志输出""" + print(f"[苹果视频] {message}") + + def fetch(self, url, headers=None, method='GET', data=None, timeout=10): + """网络请求""" + try: + if headers is None: + headers = self.headers + if method == 'GET': + response = requests.get(url, headers=headers, timeout=timeout, verify=False) + else: + response = requests.post(url, headers=headers, data=data, timeout=timeout, verify=False) + return response + except Exception as e: + self.log(f"网络请求失败: {url}, 错误: {str(e)}") + return None + +# 注册爬虫 +if __name__ == '__main__': + from base.spider import Spider as BaseSpider + BaseSpider.register(Spider()) \ No newline at end of file