473 lines
19 KiB
Java
473 lines
19 KiB
Java
|
package com.github.catvod.spider;
|
||
|
|
||
|
import android.content.Context;
|
||
|
import android.text.TextUtils;
|
||
|
|
||
|
import com.github.catvod.crawler.Spider;
|
||
|
import com.github.catvod.crawler.SpiderDebug;
|
||
|
import com.github.catvod.utils.okhttp.OkHttpUtil;
|
||
|
|
||
|
import org.json.JSONArray;
|
||
|
import org.json.JSONException;
|
||
|
import org.json.JSONObject;
|
||
|
import org.jsoup.Jsoup;
|
||
|
import org.jsoup.nodes.Document;
|
||
|
import org.jsoup.nodes.Element;
|
||
|
import org.jsoup.select.Elements;
|
||
|
|
||
|
import java.net.URLEncoder;
|
||
|
import java.util.ArrayList;
|
||
|
import java.util.Comparator;
|
||
|
import java.util.HashMap;
|
||
|
import java.util.Iterator;
|
||
|
import java.util.List;
|
||
|
import java.util.Map;
|
||
|
import java.util.TreeMap;
|
||
|
import java.util.regex.Matcher;
|
||
|
import java.util.regex.Pattern;
|
||
|
|
||
|
/**
|
||
|
* Demo for self study
|
||
|
* <p>
|
||
|
* Source from Author: CatVod
|
||
|
*/
|
||
|
|
||
|
|
||
|
public class Xiaoh extends Spider {
|
||
|
private static final String siteUrl = "https://www.xiaoheimi.net";
|
||
|
private static final String siteHost = "www.xiaoheimi.net";
|
||
|
|
||
|
/**
|
||
|
* 播放源配置
|
||
|
*/
|
||
|
private JSONObject playerConfig;
|
||
|
/**
|
||
|
* 筛选配置
|
||
|
*/
|
||
|
private JSONObject filterConfig;
|
||
|
private Pattern regexCategory = Pattern.compile("/index.php/vod/type/id/(\\d+).html");
|
||
|
private Pattern regexVid = Pattern.compile("/index.php/vod/detail/id/(\\d+).html");
|
||
|
private Pattern regexPlay = Pattern.compile("/index.php/vod/play/id/(\\d+)/sid/(\\d+)/nid/(\\d+).html");
|
||
|
private Pattern regexPage = Pattern.compile("\\S+/page/(\\d+)\\S+");
|
||
|
|
||
|
|
||
|
protected String ext = null;
|
||
|
|
||
|
@Override
|
||
|
public void init(Context context) {
|
||
|
super.init(context);
|
||
|
}
|
||
|
|
||
|
public void init(Context context, String extend) {
|
||
|
super.init(context, extend);
|
||
|
this.ext = extend;
|
||
|
if (ext != null) {
|
||
|
if (ext.startsWith("https://buzhidaonalaide.coding.net") || (ext.startsWith("https://tangsan99999.github.io/sub/setup/"))) {
|
||
|
String json = OkHttpUtil.string(ext, null);
|
||
|
try {
|
||
|
JSONObject jsonObj = new JSONObject(json);
|
||
|
filterConfig = jsonObj.optJSONObject("filter");
|
||
|
playerConfig = jsonObj.optJSONObject("player");
|
||
|
} catch (Exception e) {
|
||
|
SpiderDebug.log(e);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* 爬虫headers
|
||
|
*
|
||
|
* @param url
|
||
|
* @return
|
||
|
*/
|
||
|
protected HashMap<String, String> getHeaders(String url) {
|
||
|
HashMap<String, String> headers = new HashMap<>();
|
||
|
headers.put("method", "GET");
|
||
|
headers.put("Host", siteHost);
|
||
|
headers.put("Upgrade-Insecure-Requests", "1");
|
||
|
headers.put("DNT", "1");
|
||
|
headers.put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36");
|
||
|
headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
|
||
|
headers.put("Accept-Language", "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2");
|
||
|
return headers;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* 获取分类数据 + 首页最近更新视频列表数据
|
||
|
*
|
||
|
* @param filter 是否开启筛选 关联的是 软件设置中 首页数据源里的筛选开关
|
||
|
* @return
|
||
|
*/
|
||
|
@Override
|
||
|
public String homeContent(boolean filter) {
|
||
|
try {
|
||
|
Document doc = Jsoup.parse(OkHttpUtil.string(siteUrl, getHeaders(siteUrl)));
|
||
|
// 分类节点
|
||
|
Elements elements = doc.select("ul.nav-menu>li>a");
|
||
|
JSONArray classes = new JSONArray();
|
||
|
for (Element ele : elements) {
|
||
|
String name = ele.text();
|
||
|
boolean show = name.equals("电影") ||
|
||
|
name.equals("电视剧") ||
|
||
|
name.equals("动漫") ||
|
||
|
name.equals("综艺") ||
|
||
|
name.equals("纪录片") ||
|
||
|
name.equals("体育");
|
||
|
if (show) {
|
||
|
Matcher mather = regexCategory.matcher(ele.attr("href"));
|
||
|
if (!mather.find())
|
||
|
continue;
|
||
|
// 把分类的id和名称取出来加到列表里
|
||
|
String id = mather.group(1).trim();
|
||
|
JSONObject jsonObject = new JSONObject();
|
||
|
jsonObject.put("type_id", id);
|
||
|
jsonObject.put("type_name", name);
|
||
|
classes.put(jsonObject);
|
||
|
}
|
||
|
}
|
||
|
JSONObject result = new JSONObject();
|
||
|
if (filter) {
|
||
|
result.put("filters", filterConfig);
|
||
|
}
|
||
|
result.put("class", classes);
|
||
|
try {
|
||
|
// 取首页推荐视频列表
|
||
|
Element homeList = doc.select("ul.myui-vodlist").get(1);
|
||
|
Elements list = homeList.select("div.myui-vodlist__box>a");
|
||
|
JSONArray videos = new JSONArray();
|
||
|
for (int i = 0; i < list.size(); i++) {
|
||
|
Element vod = list.get(i);
|
||
|
String title = vod.attr("title");
|
||
|
String cover = vod.attr("data-original");
|
||
|
String remark = vod.selectFirst("span.pic-text").text();
|
||
|
Matcher matcher = regexVid.matcher(vod.attr("href"));
|
||
|
if (!matcher.find())
|
||
|
continue;
|
||
|
String id = matcher.group(1);
|
||
|
JSONObject v = new JSONObject();
|
||
|
v.put("vod_id", id);
|
||
|
v.put("vod_name", title);
|
||
|
v.put("vod_pic", cover);
|
||
|
v.put("vod_remarks", remark);
|
||
|
videos.put(v);
|
||
|
}
|
||
|
result.put("list", videos);
|
||
|
} catch (Exception e) {
|
||
|
SpiderDebug.log(e);
|
||
|
}
|
||
|
return result.toString();
|
||
|
} catch (Exception e) {
|
||
|
SpiderDebug.log(e);
|
||
|
}
|
||
|
return "";
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* 获取分类信息数据
|
||
|
*
|
||
|
* @param tid 分类id
|
||
|
* @param pg 页数
|
||
|
* @param filter 同homeContent方法中的filter
|
||
|
* @param extend 筛选参数{k:v, k1:v1}
|
||
|
* @return
|
||
|
*/
|
||
|
@Override
|
||
|
public String categoryContent(String tid, String pg, boolean filter, HashMap<String, String> extend) {
|
||
|
try {
|
||
|
String url = siteUrl + "/index.php/vod/show/id/";
|
||
|
if (extend != null && extend.size() > 0 && extend.containsKey("tid") && extend.get("tid").length() > 0) {
|
||
|
url += extend.get("tid");
|
||
|
} else {
|
||
|
url += tid;
|
||
|
}
|
||
|
if (extend != null && extend.size() > 0) {
|
||
|
for (Iterator<String> it = extend.keySet().iterator(); it.hasNext(); ) {
|
||
|
String key = it.next();
|
||
|
String value = extend.get(key);
|
||
|
if (value.length() > 0) {
|
||
|
url += "/" + key + "/" + URLEncoder.encode(value);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
url += "/page/" + pg + ".html";
|
||
|
String html = OkHttpUtil.string(url, getHeaders(url));
|
||
|
Document doc = Jsoup.parse(html);
|
||
|
JSONObject result = new JSONObject();
|
||
|
int pageCount = 0;
|
||
|
int page = -1;
|
||
|
|
||
|
// 取页码相关信息
|
||
|
Elements pageInfo = doc.select(".myui-page li a");
|
||
|
if (pageInfo.size() == 0) {
|
||
|
page = Integer.parseInt(pg);
|
||
|
pageCount = page;
|
||
|
} else {
|
||
|
for (int i = 0; i < pageInfo.size(); i++) {
|
||
|
Element a = pageInfo.get(i);
|
||
|
String name = a.text();
|
||
|
if (page == -1 && a.hasClass("btn-warm")) {
|
||
|
Matcher matcher = regexPage.matcher(a.attr("href"));
|
||
|
if (matcher.find()) {
|
||
|
page = Integer.parseInt(matcher.group(1).trim());
|
||
|
} else {
|
||
|
page = 0;
|
||
|
}
|
||
|
}
|
||
|
if (name.equals("尾页")) {
|
||
|
Matcher matcher = regexPage.matcher(a.attr("href"));
|
||
|
if (matcher.find()) {
|
||
|
pageCount = Integer.parseInt(matcher.group(1).trim());
|
||
|
} else {
|
||
|
pageCount = 0;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
JSONArray videos = new JSONArray();
|
||
|
if (!html.contains("没有找到您想要的结果哦")) {
|
||
|
// 取当前分类页的视频列表
|
||
|
Elements list = doc.select("div.myui-vodlist__box");
|
||
|
for (int i = 0; i < list.size(); i++) {
|
||
|
Element vod = list.get(i);
|
||
|
String title = vod.selectFirst(".title").text();
|
||
|
String cover = vod.selectFirst(".myui-vodlist__thumb").attr("data-original");
|
||
|
String remark = vod.selectFirst("span.pic-text").text();
|
||
|
|
||
|
Matcher matcher = regexVid.matcher(vod.selectFirst(".myui-vodlist__thumb").attr("href"));
|
||
|
if (!matcher.find())
|
||
|
continue;
|
||
|
String id = matcher.group(1);
|
||
|
JSONObject v = new JSONObject();
|
||
|
v.put("vod_id", id);
|
||
|
v.put("vod_name", title);
|
||
|
v.put("vod_pic", cover);
|
||
|
v.put("vod_remarks", remark);
|
||
|
videos.put(v);
|
||
|
}
|
||
|
}
|
||
|
result.put("page", page);
|
||
|
result.put("pagecount", pageCount);
|
||
|
result.put("limit", 48);
|
||
|
result.put("total", pageCount <= 1 ? videos.length() : pageCount * 48);
|
||
|
|
||
|
result.put("list", videos);
|
||
|
return result.toString();
|
||
|
} catch (Exception e) {
|
||
|
SpiderDebug.log(e);
|
||
|
}
|
||
|
return "";
|
||
|
}
|
||
|
|
||
|
|
||
|
/**
|
||
|
* 视频详情信息
|
||
|
*
|
||
|
* @param ids 视频id
|
||
|
* @return
|
||
|
*/
|
||
|
@Override
|
||
|
public String detailContent(List<String> ids) {
|
||
|
try {
|
||
|
// 视频详情url
|
||
|
String url = siteUrl + "/index.php/vod/detail/id/" + ids.get(0) + ".html";
|
||
|
Document doc = Jsoup.parse(OkHttpUtil.string(url, getHeaders(url)));
|
||
|
JSONObject result = new JSONObject();
|
||
|
JSONObject vodList = new JSONObject();
|
||
|
|
||
|
// 取基本数据
|
||
|
String vid = doc.selectFirst("span.mac_hits").attr("data-id");
|
||
|
|
||
|
String cover = doc.selectFirst("a.myui-vodlist__thumb img").attr("data-original");
|
||
|
String title = doc.selectFirst("div.myui-content__detail h1.title").text();
|
||
|
String desc = Jsoup.parse(doc.selectFirst("meta[name=description]").attr("content")).text();
|
||
|
String category = "", area = "", year = "", remark = "", director = "", actor = "";
|
||
|
Elements span_text_muted = doc.select("div.myui-content__detail span.text-muted");
|
||
|
for (int i = 0; i < span_text_muted.size(); i++) {
|
||
|
Element text = span_text_muted.get(i);
|
||
|
String info = text.text();
|
||
|
if (info.equals("分类:")) {
|
||
|
category = text.nextElementSibling().text();
|
||
|
} else if (info.equals("年份:")) {
|
||
|
year = text.nextElementSibling().text();
|
||
|
} else if (info.equals("地区:")) {
|
||
|
area = text.nextElementSibling().text();
|
||
|
} else if (info.equals("更新:")) {
|
||
|
remark = text.nextElementSibling().text();
|
||
|
} else if (info.equals("导演:")) {
|
||
|
List<String> directors = new ArrayList<>();
|
||
|
Elements aa = text.parent().select("a");
|
||
|
for (int j = 0; j < aa.size(); j++) {
|
||
|
directors.add(aa.get(j).text());
|
||
|
}
|
||
|
director = TextUtils.join(",", directors);
|
||
|
} else if (info.equals("主演:")) {
|
||
|
List<String> actors = new ArrayList<>();
|
||
|
Elements aa = text.parent().select("a");
|
||
|
for (int j = 0; j < aa.size(); j++) {
|
||
|
actors.add(aa.get(j).text());
|
||
|
}
|
||
|
actor = TextUtils.join(",", actors);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
vodList.put("vod_id", vid);
|
||
|
vodList.put("vod_name", title);
|
||
|
vodList.put("vod_pic", cover);
|
||
|
vodList.put("type_name", category);
|
||
|
vodList.put("vod_year", year);
|
||
|
vodList.put("vod_area", area);
|
||
|
vodList.put("vod_remarks", remark);
|
||
|
vodList.put("vod_actor", actor);
|
||
|
vodList.put("vod_director", director);
|
||
|
vodList.put("vod_content", desc);
|
||
|
|
||
|
Map<String, String> vod_play = new TreeMap<>(new Comparator<String>() {
|
||
|
@Override
|
||
|
public int compare(String o1, String o2) {
|
||
|
try {
|
||
|
int sort1 = playerConfig.getJSONObject(o1).getInt("or");
|
||
|
int sort2 = playerConfig.getJSONObject(o2).getInt("or");
|
||
|
|
||
|
if (sort1 == sort2) {
|
||
|
return 1;
|
||
|
}
|
||
|
return sort1 - sort2 > 0 ? 1 : -1;
|
||
|
} catch (JSONException e) {
|
||
|
SpiderDebug.log(e);
|
||
|
}
|
||
|
return 1;
|
||
|
}
|
||
|
});
|
||
|
|
||
|
// 取播放列表数据
|
||
|
Elements sources = doc.select("div.myui-panel__head>ul").get(0).select("li");
|
||
|
Elements sourceList = doc.select("div.tab-content>div.tab-pane");
|
||
|
|
||
|
for (int i = 0; i < sources.size(); i++) {
|
||
|
Element source = sources.get(i);
|
||
|
String sourceName = source.text();
|
||
|
boolean found = false;
|
||
|
for (Iterator<String> it = playerConfig.keys(); it.hasNext(); ) {
|
||
|
String flag = it.next();
|
||
|
if (playerConfig.getJSONObject(flag).getString("sh").equals(sourceName)) {
|
||
|
sourceName = flag;
|
||
|
found = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (!found)
|
||
|
continue;
|
||
|
String playList = "";
|
||
|
Elements playListA = sourceList.get(i).select("ul>li>a");
|
||
|
List<String> vodItems = new ArrayList<>();
|
||
|
|
||
|
for (int j = 0; j < playListA.size(); j++) {
|
||
|
Element vod = playListA.get(j);
|
||
|
Matcher matcher = regexPlay.matcher(vod.attr("href"));
|
||
|
if (!matcher.find())
|
||
|
continue;
|
||
|
String playURL = matcher.group(1) + "/sid/" + matcher.group(2) + "/nid/" + matcher.group(3);
|
||
|
vodItems.add(vod.text() + "$" + playURL);
|
||
|
}
|
||
|
if (vodItems.size() > 0)
|
||
|
playList = TextUtils.join("#", vodItems);
|
||
|
|
||
|
if (playList.length() == 0)
|
||
|
continue;
|
||
|
|
||
|
vod_play.put(sourceName, playList);
|
||
|
}
|
||
|
|
||
|
if (vod_play.size() > 0) {
|
||
|
String vod_play_from = TextUtils.join("$$$", vod_play.keySet());
|
||
|
String vod_play_url = TextUtils.join("$$$", vod_play.values());
|
||
|
vodList.put("vod_play_from", vod_play_from);
|
||
|
vodList.put("vod_play_url", vod_play_url);
|
||
|
}
|
||
|
JSONArray list = new JSONArray();
|
||
|
list.put(vodList);
|
||
|
result.put("list", list);
|
||
|
return result.toString();
|
||
|
} catch (Exception e) {
|
||
|
SpiderDebug.log(e);
|
||
|
}
|
||
|
return "";
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* 获取视频播放信息
|
||
|
*
|
||
|
* @param flag 播放源
|
||
|
* @param id 视频id
|
||
|
* @param vipFlags 所有可能需要vip解析的源
|
||
|
* @return
|
||
|
*/
|
||
|
@Override
|
||
|
public String playerContent(String flag, String id, List<String> vipFlags) {
|
||
|
try {
|
||
|
// 播放页 url
|
||
|
String url = siteUrl + "/index.php/vod/play/id/" + id + ".html";
|
||
|
Document doc = Jsoup.parse(OkHttpUtil.string(url, getHeaders(url)));
|
||
|
Elements allScript = doc.select("script");
|
||
|
JSONObject result = new JSONObject();
|
||
|
for (int i = 0; i < allScript.size(); i++) {
|
||
|
String scContent = allScript.get(i).html().trim();
|
||
|
if (scContent.startsWith("var player_")) { // 取直链
|
||
|
int start = scContent.indexOf('{');
|
||
|
int end = scContent.lastIndexOf('}') + 1;
|
||
|
String json = scContent.substring(start, end);
|
||
|
JSONObject player = new JSONObject(json);
|
||
|
if (playerConfig.has(player.getString("from"))) {
|
||
|
JSONObject pCfg = playerConfig.getJSONObject(player.getString("from"));
|
||
|
String videoUrl = player.getString("url");
|
||
|
String playUrl = pCfg.getString("pu");
|
||
|
result.put("parse", pCfg.getInt("sn"));
|
||
|
result.put("playUrl", playUrl);
|
||
|
result.put("url", videoUrl);
|
||
|
result.put("header", "");
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
return result.toString();
|
||
|
} catch (Exception e) {
|
||
|
SpiderDebug.log(e);
|
||
|
}
|
||
|
return "";
|
||
|
}
|
||
|
|
||
|
@Override
|
||
|
public String searchContent(String key, boolean quick) {
|
||
|
try {
|
||
|
long currentTime = System.currentTimeMillis();
|
||
|
String url = siteUrl + "/index.php/ajax/suggest?mid=1&wd=" + URLEncoder.encode(key) + "&limit=10×tamp=" + currentTime;
|
||
|
JSONObject searchResult = new JSONObject(OkHttpUtil.string(url, getHeaders(url)));
|
||
|
JSONObject result = new JSONObject();
|
||
|
JSONArray videos = new JSONArray();
|
||
|
if (searchResult.getInt("total") > 0) {
|
||
|
JSONArray lists = new JSONArray(searchResult.getString("list"));
|
||
|
for (int i = 0; i < lists.length(); i++) {
|
||
|
JSONObject vod = lists.getJSONObject(i);
|
||
|
String id = vod.getString("id");
|
||
|
String title = vod.getString("name");
|
||
|
String cover = vod.getString("pic");
|
||
|
JSONObject v = new JSONObject();
|
||
|
v.put("vod_id", id);
|
||
|
v.put("vod_name", title);
|
||
|
v.put("vod_pic", cover);
|
||
|
v.put("vod_remarks", "");
|
||
|
videos.put(v);
|
||
|
}
|
||
|
}
|
||
|
result.put("list", videos);
|
||
|
return result.toString();
|
||
|
} catch (Exception e) {
|
||
|
SpiderDebug.log(e);
|
||
|
}
|
||
|
return "";
|
||
|
}
|
||
|
}
|