N3RDN/JN/dr_py/js/采集之王[合].js

457 lines
23 KiB
JavaScript
Raw Permalink Normal View History

2024-06-20 22:15:35 +08:00
/**
2024-06-21 23:03:27 +08:00
* 强烈推荐静态分类可以加快速度!!!
2024-07-05 22:07:52 +08:00
* 不建议:
2024-06-20 22:15:35 +08:00
* 传参 ?type=url&params=../json/采集.json
2024-07-05 22:07:52 +08:00
* 建议
2024-07-06 01:37:11 +08:00
* 传参 ?type=url&params=../json/采集静态.json$1
* 传参 ?type=url&params=../json/采集[zy]静态.json$1
* 传参 ?type=url&params=../json/采集[]静态.json$1
2024-07-05 22:07:52 +08:00
* hipy-server支持@改名比如:
2024-07-06 01:37:11 +08:00
* 传参 ?type=url&params=../json/采集静态.json$1@采王道长[]
* 传参 ?type=url&params=../json/采集[zy]静态.json$1@采王zy[]
2024-07-05 22:07:52 +08:00
* 传参 ?type=url&params=../json/采集[]静态.json@采王成人[]
2024-06-20 22:15:35 +08:00
* [{"name":"暴风资源","url":"https://bfzyapi.com","parse_url":""},{"name":"飞刀资源","url":"http://www.feidaozy.com","parse_url":""},{"name":"黑木耳资源","url":"https://www.heimuer.tv","parse_url":""}]
*/
2024-06-24 20:37:07 +08:00
globalThis.getRandomItem = function (items) {//从列表随机取出一个元素
return items[Math.random() * items.length | 0];
}
2024-06-20 22:15:35 +08:00
var rule = {
title: '采集之王[合]',
author: '道长',
2024-07-06 19:57:39 +08:00
version: '20240706 beta17',
2024-06-24 20:37:07 +08:00
update_info: `
2024-07-06 19:57:39 +08:00
20240706:
1.静态json数据支持cate_excludes分类名称列表过滤无数据分类
2.更新采集分类生成器增加过滤筛选模式
2024-07-05 22:07:52 +08:00
20240705:
2024-07-06 01:37:11 +08:00
1.支持传参json后面增加$1 这样的额外标识用于搜索结果精准匹配
2.支持传参json后面增加$1$1 这样的额外标识用于强制获取搜索图片$1$不显示图片默认是搜索强制有图片的[已实现详情页请求使用批量]
3.修复二级数据无序匹配搜索列表图片的问题
4.修改搜索精准和图片显示额外参数间隔符从#变为$
2024-07-03 20:36:08 +08:00
20240703:
1.采集json支持"searchable": 0,用于搜索时排除这个源
2024-06-24 20:37:07 +08:00
20240604:
1.首页推荐取消硬控等待增加随机推荐功能
2.首页推荐新增更新日志查看功能
特别说明目前只支持标准json格式的采集站(资源站一般都提供xml和json两种接口目前没有支持xml的想法没什么必要)
有些资源站的json接口不是标准的/api.php/provide/vod/,需要自己在采集静态.json中编辑对应的api属性填写比如:/api.php/provide/vod/at/json/
有些资源站的采集数据是加密后的切片片段可能需要采集站特定的解析接口需要自己编辑json里的parse_url属性
资源站部分大分类下无数据很正常可以自行编辑json里cate_exclude属性排除掉自己测试过无数据的分类(小程序无法自动识别只能人工测好哪些分类无数据)
2024-07-05 22:07:52 +08:00
`.trim(),
2024-06-20 22:15:35 +08:00
host: '',
homeTid: '', // 首页推荐。一般填写第一个资源站的想要的推荐分类的id.可以空
homeUrl: '/api.php/provide/vod/?ac=detail&t={{rule.homeTid}}',
detailUrl: '/api.php/provide/vod/?ac=detail&ids=fyid',
2024-06-22 15:43:31 +08:00
searchUrl: '/api.php/provide/vod/?wd=**&pg=#TruePage##page=fypage',
2024-06-20 22:15:35 +08:00
classUrl: '/api.php/provide/vod/',
url: '/api.php/provide/vod/?ac=detail&pg=fypage&t=fyfilter',
filter_url: '{{fl.类型}}',
headers: {'User-Agent': 'MOBILE_UA'},
timeout: 5000, // class_name: '电影&电视剧&综艺&动漫',
limit: 20,
2024-06-21 23:03:27 +08:00
search_limit: 5, // 搜索限制取前5个可以注释掉就不限制搜索
2024-06-20 22:15:35 +08:00
searchable: 1,//是否启用全局搜索,
quickSearch: 0,//是否启用快速搜索,
filterable: 1,//是否启用分类筛选,
play_parse: true,
2024-06-21 23:03:27 +08:00
parse_url: '', // 这个参数暂时不起作用。聚合类的每个资源应该有自己独立的解析口。单独配置在采集.json里的parse_url有效
2024-07-05 22:07:52 +08:00
search_match: false, // 搜索精准匹配
search_pic: true, // 搜索强制需要图片
2024-06-20 22:15:35 +08:00
// params: 'http://127.0.0.1:5707/files/json/%E9%87%87%E9%9B%86.json',
2024-07-06 01:37:11 +08:00
// params: 'http://127.0.0.1:5707/files/json/采集静态.json$1',
// params: 'http://127.0.0.1:5707/files/json/采集[zy]静态.json$1',
2024-06-21 23:03:27 +08:00
// hostJs:$js.toString(()=>{
//
// }),
2024-06-20 22:15:35 +08:00
预处理: $js.toString(() => {
2024-06-21 23:03:27 +08:00
function getClasses(item) {
let classes = [];
if (item.class_name && item.class_url) {
if (!/&|电影|电视剧|综艺|动漫[\u4E00-\u9FA5]+/.test(item.class_name)) {
try {
item.class_name = ungzip(item.class_name)
} catch (e) {
log(`不识别的class_name导致gzip解码失败:${e}`)
return classes
}
}
let names = item.class_name.split('&');
let urls = item.class_url.split('&');
let cnt = Math.min(names.length, urls.length);
for (let i = 0; i < cnt; i++) {
classes.push({
'type_id': urls[i],
'type_name': names[i]
});
}
}
return classes
}
2024-06-23 15:55:34 +08:00
if (typeof (batchFetch) === 'function') {
// 支持批量请求直接放飞自我。搜索限制最大线程数量16
rule.search_limit = 16;
log('当前程序支持批量请求[batchFetch],搜索限制已设置为16');
}
2024-06-20 22:15:35 +08:00
let _url = rule.params;
2024-07-06 01:37:11 +08:00
log(`传入参数:${_url}`);
2024-06-21 23:03:27 +08:00
if (_url && typeof (_url) === 'string' && /^(http|file)/.test(_url)) {
2024-07-06 01:37:11 +08:00
if (_url.includes('$')) {
let _url_params = _url.split('$');
2024-07-05 22:07:52 +08:00
_url = _url_params[0];
rule.search_match = !!(_url_params[1]);
if (_url_params.length > 2) { // 强制图片
rule.search_pic = !!(_url_params[2]);
}
}
2024-06-20 22:15:35 +08:00
let html = request(_url);
let json = JSON.parse(html);
let _classes = [];
rule.filter = {};
rule.filter_def = {};
json.forEach(it => {
let _obj = {
type_name: it.name,
type_id: it.url,
parse_url: it.parse_url || '',
2024-07-03 20:36:08 +08:00
searchable: it.searchable !== 0,
2024-06-21 23:03:27 +08:00
api: it.api || '',
2024-06-20 22:15:35 +08:00
cate_exclude: it.cate_exclude || '',
2024-07-06 19:57:39 +08:00
cate_excludes: it.cate_excludes || [],
2024-06-21 23:03:27 +08:00
// class_name: it.class_name || '',
// class_url: it.class_url || '',
2024-06-20 22:15:35 +08:00
};
_classes.push(_obj);
try {
2024-06-21 23:03:27 +08:00
let json1 = [];
if (it.class_name && it.class_url) {
json1 = getClasses(it);
} else {
json1 = JSON.parse(request(urljoin(_obj.type_id, _obj.api || rule.classUrl))).class;
}
2024-07-06 19:57:39 +08:00
if (_obj.cate_excludes && Array.isArray(_obj.cate_excludes) && _obj.cate_excludes.length > 0) {
json1 = json1.filter(cl => !_obj.cate_excludes.includes(cl.type_name));
} else if (_obj.cate_exclude) {
2024-06-20 22:15:35 +08:00
json1 = json1.filter(cl => !new RegExp(_obj.cate_exclude, 'i').test(cl.type_name));
}
rule.filter[_obj.type_id] = [{
"key": "类型", "name": "类型", "value": json1.map(i => {
return {"n": i.type_name, 'v': i.type_id}
})
}];
if (json1.length > 0) {
rule.filter_def[it.url] = {"类型": json1[0].type_id};
}
} catch (e) {
rule.filter[it.url] = [{"key": "类型", "name": "类型", "value": [{"n": "全部", "v": ""}]}];
}
});
rule.classes = _classes;
}
}),
// class_parse: $js.toString(() => {
// let _url = rule.params;
// if (_url && typeof (_url) === 'string' && _url.startsWith('http')) {
// let html = request(_url);
// let json = JSON.parse(html);
// let _classes = [];
// homeObj.filter = {};
// rule.filter_def = {};
// json.forEach(it => {
// let _obj = {
// type_name: it.name,
// type_id: it.url,
// parse_url: it.parse_url || '',
// cate_exclude: it.cate_exclude || '',
// };
// _classes.push(_obj);
// try {
// let json1 = JSON.parse(request(urljoin(_obj.type_id, rule.classUrl))).class;
// if (_obj.cate_exclude) {
// json1 = json1.filter(cl => !new RegExp(_obj.cate_exclude, 'i').test(cl.type_name));
// }
// homeObj.filter[_obj.type_id] = [{
// "key": "类型", "name": "类型", "value": json1.map(i => {
// return {"n": i.type_name, 'v': i.type_id}
// })
// }];
// if (json1.length > 0) {
// rule.filter_def[it.url] = {"类型": json1[0].type_id};
// }
// } catch (e) {
// homeObj.filter[it.url] = [{"key": "类型", "name": "类型", "value": [{"n": "全部", "v": ""}]}];
// }
// });
// rule.classes = _classes;
// input = _classes;
// }
// }),
class_parse: $js.toString(() => {
input = rule.classes;
}),
推荐: $js.toString(() => {
2024-06-24 20:37:07 +08:00
let update_info = [{
vod_name: '更新日志',
vod_id: 'update_info',
vod_remarks: `版本:${rule.version}`,
vod_pic: 'https://ghproxy.net/https://raw.githubusercontent.com/hjdhnx/hipy-server/master/app/static/img/logo.png'
}];
2024-06-21 23:03:27 +08:00
VODS = [];
2024-06-20 22:15:35 +08:00
if (rule.classes) {
2024-06-24 20:37:07 +08:00
let randomClass = getRandomItem(rule.classes);
let _url = urljoin(randomClass.type_id, input);
if (randomClass.api) {
_url = _url.replace('/api.php/provide/vod/', randomClass.api)
2024-06-21 23:03:27 +08:00
}
try {
2024-06-24 20:37:07 +08:00
let html = request(_url, {timeout: rule.timeout});
2024-06-21 23:03:27 +08:00
let json = JSON.parse(html);
VODS = json.list;
VODS.forEach(it => {
2024-06-24 20:37:07 +08:00
it.vod_id = randomClass.type_id + '$' + it.vod_id;
it.vod_remarks = it.vod_remarks + '|' + randomClass.type_name;
2024-06-21 23:03:27 +08:00
});
} catch (e) {
}
2024-06-20 22:15:35 +08:00
}
2024-06-24 20:37:07 +08:00
VODS = update_info.concat(VODS);
2024-06-20 22:15:35 +08:00
}),
一级: $js.toString(() => {
VODS = [];
if (rule.classes) {
// log(input);
let _url = urljoin(MY_CATE, input);
2024-06-21 23:03:27 +08:00
let current_vod = rule.classes.find(item => item.type_id === MY_CATE);
if (current_vod && current_vod.api) {
_url = _url.replace('/api.php/provide/vod/', current_vod.api)
}
2024-06-20 22:15:35 +08:00
let html = request(_url);
let json = JSON.parse(html);
VODS = json.list;
VODS.forEach(it => {
it.vod_id = MY_CATE + '$' + it.vod_id
});
}
}),
// 一级: 'json:list;vod_name;vod_pic;vod_remarks;vod_id;vod_play_from',
二级: $js.toString(() => {
2024-06-24 20:37:07 +08:00
VOD = {};
if (orId === 'update_info') {
VOD = {
vod_content: rule.update_info.trim(),
vod_name: '更新日志',
type_name: '更新日志',
vod_pic: 'https://resource-cdn.tuxiaobei.com/video/FtWhs2mewX_7nEuE51_k6zvg6awl.png',
vod_remarks: `版本:${rule.version}`,
vod_play_from: '道长在线',
// vod_play_url: '嗅探播放$https://resource-cdn.tuxiaobei.com/video/10/8f/108fc9d1ac3f69d29a738cdc097c9018.mp4',
vod_play_url: '随机小视频$http://api.yujn.cn/api/zzxjj.php',
};
} else {
if (rule.classes) {
let _url = urljoin(fyclass, input);
let current_vod = rule.classes.find(item => item.type_id === fyclass);
if (current_vod && current_vod.api) {
_url = _url.replace('/api.php/provide/vod/', current_vod.api)
}
let html = request(_url);
let json = JSON.parse(html);
let data = json.list;
VOD = data[0];
if (current_vod && current_vod.type_name) {
VOD.vod_play_from = VOD.vod_play_from.split('$$$').map(it => current_vod.type_name + '|' + it).join('$$$')
}
2024-06-21 23:03:27 +08:00
}
2024-06-20 22:15:35 +08:00
}
}),
搜索: $js.toString(() => {
VODS = [];
if (rule.classes) {
2024-07-03 20:36:08 +08:00
let canSearch = rule.classes.filter(it => it.searchable);
2024-06-22 15:43:31 +08:00
let page = Number(MY_PAGE);
2024-07-03 20:36:08 +08:00
page = (MY_PAGE - 1) % Math.ceil(canSearch.length / rule.search_limit) + 1;
let truePage = Math.ceil(MY_PAGE / Math.ceil(canSearch.length / rule.search_limit));
2024-06-22 15:43:31 +08:00
if (rule.search_limit) {
let start = (page - 1) * rule.search_limit;
let end = page * rule.search_limit;
2024-06-23 15:55:34 +08:00
let t1 = new Date().getTime();
let searchMode = typeof (batchFetch) === 'function' ? '批量' : '单个';
2024-06-22 15:43:31 +08:00
log('start:' + start);
log('end:' + end);
2024-06-23 15:55:34 +08:00
log('搜索模式:' + searchMode);
2024-07-06 01:37:11 +08:00
log('精准搜索:' + rule.search_match);
2024-07-27 10:03:18 +08:00
log('强制获取图片:' + rule.search_pic);
2024-06-23 15:55:34 +08:00
// log('t1:' + t1);
2024-07-03 20:36:08 +08:00
if (start < canSearch.length) {
let search_classes = canSearch.slice(start, end);
2024-06-23 15:55:34 +08:00
let urls = [];
2024-06-22 15:43:31 +08:00
search_classes.forEach(it => {
let _url = urljoin(it.type_id, input);
if (it.api) {
_url = _url.replace('/api.php/provide/vod/', it.api)
}
_url = _url.replace("#TruePage#", "" + truePage);
2024-06-23 15:55:34 +08:00
urls.push(_url);
2024-06-20 22:15:35 +08:00
});
2024-07-05 22:07:52 +08:00
let results_list = [];
2024-06-23 15:55:34 +08:00
let results = [];
if (typeof (batchFetch) === 'function') {
let reqUrls = urls.map(it => {
return {
url: it,
2024-06-24 20:37:07 +08:00
options: {timeout: rule.timeout}
2024-06-23 15:55:34 +08:00
}
});
let rets = batchFetch(reqUrls);
2024-07-05 22:07:52 +08:00
let detailUrls = [];
let detailUrlCount = 0;
2024-06-23 15:55:34 +08:00
rets.forEach((ret, idx) => {
let it = search_classes[idx];
if (ret) {
try {
let json = JSON.parse(ret);
let data = json.list;
data.forEach(i => {
2024-07-05 22:07:52 +08:00
i.site_name = it.type_name;
2024-06-23 15:55:34 +08:00
i.vod_id = it.type_id + '$' + i.vod_id;
i.vod_remarks = i.vod_remarks + '|' + it.type_name;
});
2024-07-05 22:07:52 +08:00
if (rule.search_match) {
data = data.filter(item => item.vod_name && (new RegExp(KEY, 'i')).test(item.vod_name))
}
if (data.length > 0) {
if (rule.search_pic && !data[0].vod_pic) {
log(`当前搜索站点【${it.type_name}】没图片,尝试访问二级去获取图片`);
let detailUrl = urls[idx].split('wd=')[0] + 'ac=detail&ids=' + data.map(k => k.vod_id.split('$')[1]).join(',');
detailUrls.push(detailUrl);
results_list.push({
data: data,
has_pic: false,
detailUrlCount: detailUrlCount
});
detailUrlCount++;
// try {
// let detailJson = JSON.parse(request(detailUrl));
// data.forEach((d, _seq) => {
// log('二级数据列表元素数:' + detailJson.list.length);
// let detailVodPic = detailJson.list[_seq].vod_pic;
// if (detailVodPic) {
// Object.assign(d, {vod_pic: detailVodPic});
// }
// });
// } catch (e) {
// log(`强制获取网站${it.type_id}的搜索图片失败:${e.message}`);
// }
} else {
results_list.push({data: data, has_pic: true});
}
// results = results.concat(data);
}
2024-06-23 15:55:34 +08:00
} catch (e) {
log(`请求:${it.type_id}发生错误:${e.message}`)
}
}
});
2024-07-05 22:07:52 +08:00
// 构造请求二级的batchFetch列表
let reqUrls2 = detailUrls.map(it => {
return {
url: it,
options: {timeout: rule.timeout}
}
});
2024-07-27 10:03:18 +08:00
let rets2 = reqUrls2.length > 0 ? batchFetch(reqUrls2) : [];
2024-07-05 22:07:52 +08:00
for (let k = 0; k < results_list.length; k++) {
let result_data = results_list[k].data;
if (!results_list[k].has_pic) {
try {
let detailJson = JSON.parse(rets2[results_list[k].detailUrlCount]);
2024-07-06 01:37:11 +08:00
log('二级数据列表元素数:' + detailJson.list.length);
2024-07-05 22:07:52 +08:00
result_data.forEach((d, _seq) => {
2024-07-06 01:37:11 +08:00
// let detailVodPic = detailJson.list[_seq].vod_pic;
// log(detailJson);
let detailVodPic = detailJson.list.find(vod => vod.vod_id.toString() === d.vod_id.split('$')[1]);
2024-07-05 22:07:52 +08:00
if (detailVodPic) {
2024-07-06 01:37:11 +08:00
Object.assign(d, {vod_pic: detailVodPic.vod_pic});
2024-07-05 22:07:52 +08:00
}
});
} catch (e) {
log(`强制获取网站${result_data[0].site_name}的搜索图片失败:${e.message}`);
}
}
results = results.concat(result_data);
}
2024-06-23 15:55:34 +08:00
} else {
urls.forEach((_url, idx) => {
let it = search_classes[idx];
try {
let html = request(_url);
let json = JSON.parse(html);
let data = json.list;
data.forEach(i => {
i.vod_id = it.type_id + '$' + i.vod_id;
i.vod_remarks = i.vod_remarks + '|' + it.type_name;
});
2024-07-05 22:07:52 +08:00
if (rule.search_match) {
data = data.filter(item => item.vod_name && (new RegExp(KEY, 'i')).test(item.vod_name))
}
if (data.length > 0) {
if (rule.search_pic && !data[0].vod_pic) {
log(`当前搜索站点【${it.type_name}】没图片,尝试访问二级去获取图片`);
let detailUrl = urls[idx].split('wd=')[0] + 'ac=detail&ids=' + data.map(k => k.vod_id.split('$')[1]).join(',');
try {
let detailJson = JSON.parse(request(detailUrl));
2024-07-06 01:37:11 +08:00
log('二级数据列表元素数:' + detailJson.list.length);
2024-07-05 22:07:52 +08:00
data.forEach((d, _seq) => {
2024-07-06 01:37:11 +08:00
// let detailVodPic = detailJson.list[_seq].vod_pic;
let detailVodPic = detailJson.list.find(vod => vod.vod_id.toString() === d.vod_id.split('$')[1]);
2024-07-05 22:07:52 +08:00
if (detailVodPic) {
2024-07-06 01:37:11 +08:00
Object.assign(d, {vod_pic: detailVodPic.vod_pic});
2024-07-05 22:07:52 +08:00
}
});
} catch (e) {
log(`强制获取网站${it.type_id}的搜索图片失败:${e.message}`);
}
}
results = results.concat(data);
}
2024-06-23 15:55:34 +08:00
results = results.concat(data);
} catch (e) {
log(`请求:${it.type_id}发生错误:${e.message}`)
}
});
}
VODS = results;
let t2 = new Date().getTime();
// log('t2:'+t2);
log(`${searchMode}搜索:${urls.length}个站耗时:${(Number(t2) - Number(t1))}ms`)
2024-06-20 22:15:35 +08:00
2024-06-22 15:43:31 +08:00
}
}
2024-06-20 22:15:35 +08:00
}
}),
lazy: $js.toString(() => {
// lazy想办法用对应的parse_url但是有难度暂未实现
2024-06-21 23:03:27 +08:00
let parse_url = '';
if (flag && flag.includes('|')) {
let type_name = flag.split('|')[0];
let current_vod = rule.classes.find(item => item.type_name === type_name);
if (current_vod && current_vod.parse_url) {
parse_url = current_vod.parse_url
}
}
2024-06-20 22:15:35 +08:00
if (/\.(m3u8|mp4)/.test(input)) {
input = {parse: 0, url: input}
} else {
2024-06-21 23:03:27 +08:00
if (parse_url.startsWith('json:')) {
let purl = parse_url.replace('json:', '') + input;
2024-06-20 22:15:35 +08:00
let html = request(purl);
input = {parse: 0, url: JSON.parse(html).url}
} else {
2024-06-21 23:03:27 +08:00
input = parse_url + input;
2024-06-20 22:15:35 +08:00
}
}
}),
}