box/app/Java源码/Xiaoh.java

473 lines
19 KiB
Java
Raw Normal View History

2022-05-27 23:11:48 +08:00
package com.github.catvod.spider;
import android.content.Context;
import android.text.TextUtils;
import com.github.catvod.crawler.Spider;
import com.github.catvod.crawler.SpiderDebug;
import com.github.catvod.utils.okhttp.OkHttpUtil;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Demo for self study
* <p>
* Source from Author: CatVod
*/
public class Xiaoh extends Spider {
private static final String siteUrl = "https://www.xiaoheimi.net";
private static final String siteHost = "www.xiaoheimi.net";
/**
* 播放源配置
*/
private JSONObject playerConfig;
/**
* 筛选配置
*/
private JSONObject filterConfig;
private Pattern regexCategory = Pattern.compile("/index.php/vod/type/id/(\\d+).html");
private Pattern regexVid = Pattern.compile("/index.php/vod/detail/id/(\\d+).html");
private Pattern regexPlay = Pattern.compile("/index.php/vod/play/id/(\\d+)/sid/(\\d+)/nid/(\\d+).html");
private Pattern regexPage = Pattern.compile("\\S+/page/(\\d+)\\S+");
protected String ext = null;
@Override
public void init(Context context) {
super.init(context);
}
public void init(Context context, String extend) {
super.init(context, extend);
this.ext = extend;
if (ext != null) {
if (ext.startsWith("https://buzhidaonalaide.coding.net") || (ext.startsWith("https://tangsan99999.github.io/sub/setup/"))) {
String json = OkHttpUtil.string(ext, null);
try {
JSONObject jsonObj = new JSONObject(json);
filterConfig = jsonObj.optJSONObject("filter");
playerConfig = jsonObj.optJSONObject("player");
} catch (Exception e) {
SpiderDebug.log(e);
}
}
}
}
/**
* 爬虫headers
*
* @param url
* @return
*/
protected HashMap<String, String> getHeaders(String url) {
HashMap<String, String> headers = new HashMap<>();
headers.put("method", "GET");
headers.put("Host", siteHost);
headers.put("Upgrade-Insecure-Requests", "1");
headers.put("DNT", "1");
headers.put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36");
headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
headers.put("Accept-Language", "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2");
return headers;
}
/**
* 获取分类数据 + 首页最近更新视频列表数据
*
* @param filter 是否开启筛选 关联的是 软件设置中 首页数据源里的筛选开关
* @return
*/
@Override
public String homeContent(boolean filter) {
try {
Document doc = Jsoup.parse(OkHttpUtil.string(siteUrl, getHeaders(siteUrl)));
// 分类节点
Elements elements = doc.select("ul.nav-menu>li>a");
JSONArray classes = new JSONArray();
for (Element ele : elements) {
String name = ele.text();
boolean show = name.equals("电影") ||
name.equals("电视剧") ||
name.equals("动漫") ||
name.equals("综艺") ||
name.equals("纪录片") ||
name.equals("体育");
if (show) {
Matcher mather = regexCategory.matcher(ele.attr("href"));
if (!mather.find())
continue;
// 把分类的id和名称取出来加到列表里
String id = mather.group(1).trim();
JSONObject jsonObject = new JSONObject();
jsonObject.put("type_id", id);
jsonObject.put("type_name", name);
classes.put(jsonObject);
}
}
JSONObject result = new JSONObject();
if (filter) {
result.put("filters", filterConfig);
}
result.put("class", classes);
try {
// 取首页推荐视频列表
Element homeList = doc.select("ul.myui-vodlist").get(1);
Elements list = homeList.select("div.myui-vodlist__box>a");
JSONArray videos = new JSONArray();
for (int i = 0; i < list.size(); i++) {
Element vod = list.get(i);
String title = vod.attr("title");
String cover = vod.attr("data-original");
String remark = vod.selectFirst("span.pic-text").text();
Matcher matcher = regexVid.matcher(vod.attr("href"));
if (!matcher.find())
continue;
String id = matcher.group(1);
JSONObject v = new JSONObject();
v.put("vod_id", id);
v.put("vod_name", title);
v.put("vod_pic", cover);
v.put("vod_remarks", remark);
videos.put(v);
}
result.put("list", videos);
} catch (Exception e) {
SpiderDebug.log(e);
}
return result.toString();
} catch (Exception e) {
SpiderDebug.log(e);
}
return "";
}
/**
* 获取分类信息数据
*
* @param tid 分类id
* @param pg 页数
* @param filter 同homeContent方法中的filter
* @param extend 筛选参数{k:v, k1:v1}
* @return
*/
@Override
public String categoryContent(String tid, String pg, boolean filter, HashMap<String, String> extend) {
try {
String url = siteUrl + "/index.php/vod/show/id/";
if (extend != null && extend.size() > 0 && extend.containsKey("tid") && extend.get("tid").length() > 0) {
url += extend.get("tid");
} else {
url += tid;
}
if (extend != null && extend.size() > 0) {
for (Iterator<String> it = extend.keySet().iterator(); it.hasNext(); ) {
String key = it.next();
String value = extend.get(key);
if (value.length() > 0) {
url += "/" + key + "/" + URLEncoder.encode(value);
}
}
}
url += "/page/" + pg + ".html";
String html = OkHttpUtil.string(url, getHeaders(url));
Document doc = Jsoup.parse(html);
JSONObject result = new JSONObject();
int pageCount = 0;
int page = -1;
// 取页码相关信息
Elements pageInfo = doc.select(".myui-page li a");
if (pageInfo.size() == 0) {
page = Integer.parseInt(pg);
pageCount = page;
} else {
for (int i = 0; i < pageInfo.size(); i++) {
Element a = pageInfo.get(i);
String name = a.text();
if (page == -1 && a.hasClass("btn-warm")) {
Matcher matcher = regexPage.matcher(a.attr("href"));
if (matcher.find()) {
page = Integer.parseInt(matcher.group(1).trim());
} else {
page = 0;
}
}
if (name.equals("尾页")) {
Matcher matcher = regexPage.matcher(a.attr("href"));
if (matcher.find()) {
pageCount = Integer.parseInt(matcher.group(1).trim());
} else {
pageCount = 0;
}
break;
}
}
}
JSONArray videos = new JSONArray();
if (!html.contains("没有找到您想要的结果哦")) {
// 取当前分类页的视频列表
Elements list = doc.select("div.myui-vodlist__box");
for (int i = 0; i < list.size(); i++) {
Element vod = list.get(i);
String title = vod.selectFirst(".title").text();
String cover = vod.selectFirst(".myui-vodlist__thumb").attr("data-original");
String remark = vod.selectFirst("span.pic-text").text();
Matcher matcher = regexVid.matcher(vod.selectFirst(".myui-vodlist__thumb").attr("href"));
if (!matcher.find())
continue;
String id = matcher.group(1);
JSONObject v = new JSONObject();
v.put("vod_id", id);
v.put("vod_name", title);
v.put("vod_pic", cover);
v.put("vod_remarks", remark);
videos.put(v);
}
}
result.put("page", page);
result.put("pagecount", pageCount);
result.put("limit", 48);
result.put("total", pageCount <= 1 ? videos.length() : pageCount * 48);
result.put("list", videos);
return result.toString();
} catch (Exception e) {
SpiderDebug.log(e);
}
return "";
}
/**
* 视频详情信息
*
* @param ids 视频id
* @return
*/
@Override
public String detailContent(List<String> ids) {
try {
// 视频详情url
String url = siteUrl + "/index.php/vod/detail/id/" + ids.get(0) + ".html";
Document doc = Jsoup.parse(OkHttpUtil.string(url, getHeaders(url)));
JSONObject result = new JSONObject();
JSONObject vodList = new JSONObject();
// 取基本数据
String vid = doc.selectFirst("span.mac_hits").attr("data-id");
String cover = doc.selectFirst("a.myui-vodlist__thumb img").attr("data-original");
String title = doc.selectFirst("div.myui-content__detail h1.title").text();
String desc = Jsoup.parse(doc.selectFirst("meta[name=description]").attr("content")).text();
String category = "", area = "", year = "", remark = "", director = "", actor = "";
Elements span_text_muted = doc.select("div.myui-content__detail span.text-muted");
for (int i = 0; i < span_text_muted.size(); i++) {
Element text = span_text_muted.get(i);
String info = text.text();
if (info.equals("分类:")) {
category = text.nextElementSibling().text();
} else if (info.equals("年份:")) {
year = text.nextElementSibling().text();
} else if (info.equals("地区:")) {
area = text.nextElementSibling().text();
} else if (info.equals("更新:")) {
remark = text.nextElementSibling().text();
} else if (info.equals("导演:")) {
List<String> directors = new ArrayList<>();
Elements aa = text.parent().select("a");
for (int j = 0; j < aa.size(); j++) {
directors.add(aa.get(j).text());
}
director = TextUtils.join(",", directors);
} else if (info.equals("主演:")) {
List<String> actors = new ArrayList<>();
Elements aa = text.parent().select("a");
for (int j = 0; j < aa.size(); j++) {
actors.add(aa.get(j).text());
}
actor = TextUtils.join(",", actors);
}
}
vodList.put("vod_id", vid);
vodList.put("vod_name", title);
vodList.put("vod_pic", cover);
vodList.put("type_name", category);
vodList.put("vod_year", year);
vodList.put("vod_area", area);
vodList.put("vod_remarks", remark);
vodList.put("vod_actor", actor);
vodList.put("vod_director", director);
vodList.put("vod_content", desc);
Map<String, String> vod_play = new TreeMap<>(new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
try {
int sort1 = playerConfig.getJSONObject(o1).getInt("or");
int sort2 = playerConfig.getJSONObject(o2).getInt("or");
if (sort1 == sort2) {
return 1;
}
return sort1 - sort2 > 0 ? 1 : -1;
} catch (JSONException e) {
SpiderDebug.log(e);
}
return 1;
}
});
// 取播放列表数据
Elements sources = doc.select("div.myui-panel__head>ul").get(0).select("li");
Elements sourceList = doc.select("div.tab-content>div.tab-pane");
for (int i = 0; i < sources.size(); i++) {
Element source = sources.get(i);
String sourceName = source.text();
boolean found = false;
for (Iterator<String> it = playerConfig.keys(); it.hasNext(); ) {
String flag = it.next();
if (playerConfig.getJSONObject(flag).getString("sh").equals(sourceName)) {
sourceName = flag;
found = true;
break;
}
}
if (!found)
continue;
String playList = "";
Elements playListA = sourceList.get(i).select("ul>li>a");
List<String> vodItems = new ArrayList<>();
for (int j = 0; j < playListA.size(); j++) {
Element vod = playListA.get(j);
Matcher matcher = regexPlay.matcher(vod.attr("href"));
if (!matcher.find())
continue;
String playURL = matcher.group(1) + "/sid/" + matcher.group(2) + "/nid/" + matcher.group(3);
vodItems.add(vod.text() + "$" + playURL);
}
if (vodItems.size() > 0)
playList = TextUtils.join("#", vodItems);
if (playList.length() == 0)
continue;
vod_play.put(sourceName, playList);
}
if (vod_play.size() > 0) {
String vod_play_from = TextUtils.join("$$$", vod_play.keySet());
String vod_play_url = TextUtils.join("$$$", vod_play.values());
vodList.put("vod_play_from", vod_play_from);
vodList.put("vod_play_url", vod_play_url);
}
JSONArray list = new JSONArray();
list.put(vodList);
result.put("list", list);
return result.toString();
} catch (Exception e) {
SpiderDebug.log(e);
}
return "";
}
/**
* 获取视频播放信息
*
* @param flag 播放源
* @param id 视频id
* @param vipFlags 所有可能需要vip解析的源
* @return
*/
@Override
public String playerContent(String flag, String id, List<String> vipFlags) {
try {
// 播放页 url
String url = siteUrl + "/index.php/vod/play/id/" + id + ".html";
Document doc = Jsoup.parse(OkHttpUtil.string(url, getHeaders(url)));
Elements allScript = doc.select("script");
JSONObject result = new JSONObject();
for (int i = 0; i < allScript.size(); i++) {
String scContent = allScript.get(i).html().trim();
if (scContent.startsWith("var player_")) { // 取直链
int start = scContent.indexOf('{');
int end = scContent.lastIndexOf('}') + 1;
String json = scContent.substring(start, end);
JSONObject player = new JSONObject(json);
if (playerConfig.has(player.getString("from"))) {
JSONObject pCfg = playerConfig.getJSONObject(player.getString("from"));
String videoUrl = player.getString("url");
String playUrl = pCfg.getString("pu");
result.put("parse", pCfg.getInt("sn"));
result.put("playUrl", playUrl);
result.put("url", videoUrl);
result.put("header", "");
}
break;
}
}
return result.toString();
} catch (Exception e) {
SpiderDebug.log(e);
}
return "";
}
@Override
public String searchContent(String key, boolean quick) {
try {
long currentTime = System.currentTimeMillis();
String url = siteUrl + "/index.php/ajax/suggest?mid=1&wd=" + URLEncoder.encode(key) + "&limit=10&timestamp=" + currentTime;
JSONObject searchResult = new JSONObject(OkHttpUtil.string(url, getHeaders(url)));
JSONObject result = new JSONObject();
JSONArray videos = new JSONArray();
if (searchResult.getInt("total") > 0) {
JSONArray lists = new JSONArray(searchResult.getString("list"));
for (int i = 0; i < lists.length(); i++) {
JSONObject vod = lists.getJSONObject(i);
String id = vod.getString("id");
String title = vod.getString("name");
String cover = vod.getString("pic");
JSONObject v = new JSONObject();
v.put("vod_id", id);
v.put("vod_name", title);
v.put("vod_pic", cover);
v.put("vod_remarks", "");
videos.put(v);
}
}
result.put("list", videos);
return result.toString();
} catch (Exception e) {
SpiderDebug.log(e);
}
return "";
}
}