diff --git a/jar/Java源码/Xiaoh.java b/jar/Java源码/Xiaoh.java new file mode 100644 index 0000000..0315533 --- /dev/null +++ b/jar/Java源码/Xiaoh.java @@ -0,0 +1,472 @@ +package com.github.catvod.spider; + +import android.content.Context; +import android.text.TextUtils; + +import com.github.catvod.crawler.Spider; +import com.github.catvod.crawler.SpiderDebug; +import com.github.catvod.utils.okhttp.OkHttpUtil; + +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Demo for self study + *

+ * Source from Author: CatVod + */ + + +public class Xiaoh extends Spider { + private static final String siteUrl = "https://www.xiaoheimi.net"; + private static final String siteHost = "www.xiaoheimi.net"; + + /** + * 播放源配置 + */ + private JSONObject playerConfig; + /** + * 筛选配置 + */ + private JSONObject filterConfig; + private Pattern regexCategory = Pattern.compile("/index.php/vod/type/id/(\\d+).html"); + private Pattern regexVid = Pattern.compile("/index.php/vod/detail/id/(\\d+).html"); + private Pattern regexPlay = Pattern.compile("/index.php/vod/play/id/(\\d+)/sid/(\\d+)/nid/(\\d+).html"); + private Pattern regexPage = Pattern.compile("\\S+/page/(\\d+)\\S+"); + + + protected String ext = null; + + @Override + public void init(Context context) { + super.init(context); + } + + public void init(Context context, String extend) { + super.init(context, extend); + this.ext = extend; + if (ext != null) { + if (ext.startsWith("https://buzhidaonalaide.coding.net") || (ext.startsWith("https://tangsan99999.github.io/sub/setup/"))) { + String json = OkHttpUtil.string(ext, null); + try { + JSONObject jsonObj = new JSONObject(json); + filterConfig = jsonObj.optJSONObject("filter"); + playerConfig = jsonObj.optJSONObject("player"); + } catch (Exception e) { + SpiderDebug.log(e); + } + } + } + } + + /** + * 爬虫headers + * + * @param url + * @return + */ + protected HashMap getHeaders(String url) { + HashMap headers = new HashMap<>(); + headers.put("method", "GET"); + headers.put("Host", siteHost); + headers.put("Upgrade-Insecure-Requests", "1"); + headers.put("DNT", "1"); + headers.put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"); + headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); + headers.put("Accept-Language", "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2"); + return headers; + } + + /** + * 获取分类数据 + 首页最近更新视频列表数据 + * + * @param filter 是否开启筛选 关联的是 软件设置中 首页数据源里的筛选开关 + * @return + */ + @Override + public String homeContent(boolean filter) { + try { + Document doc = Jsoup.parse(OkHttpUtil.string(siteUrl, getHeaders(siteUrl))); + // 分类节点 + Elements elements = doc.select("ul.nav-menu>li>a"); + JSONArray classes = new JSONArray(); + for (Element ele : elements) { + String name = ele.text(); + boolean show = name.equals("电影") || + name.equals("电视剧") || + name.equals("动漫") || + name.equals("综艺") || + name.equals("纪录片") || + name.equals("体育"); + if (show) { + Matcher mather = regexCategory.matcher(ele.attr("href")); + if (!mather.find()) + continue; + // 把分类的id和名称取出来加到列表里 + String id = mather.group(1).trim(); + JSONObject jsonObject = new JSONObject(); + jsonObject.put("type_id", id); + jsonObject.put("type_name", name); + classes.put(jsonObject); + } + } + JSONObject result = new JSONObject(); + if (filter) { + result.put("filters", filterConfig); + } + result.put("class", classes); + try { + // 取首页推荐视频列表 + Element homeList = doc.select("ul.myui-vodlist").get(1); + Elements list = homeList.select("div.myui-vodlist__box>a"); + JSONArray videos = new JSONArray(); + for (int i = 0; i < list.size(); i++) { + Element vod = list.get(i); + String title = vod.attr("title"); + String cover = vod.attr("data-original"); + String remark = vod.selectFirst("span.pic-text").text(); + Matcher matcher = regexVid.matcher(vod.attr("href")); + if (!matcher.find()) + continue; + String id = matcher.group(1); + JSONObject v = new JSONObject(); + v.put("vod_id", id); + v.put("vod_name", title); + v.put("vod_pic", cover); + v.put("vod_remarks", remark); + videos.put(v); + } + result.put("list", videos); + } catch (Exception e) { + SpiderDebug.log(e); + } + return result.toString(); + } catch (Exception e) { + SpiderDebug.log(e); + } + return ""; + } + + /** + * 获取分类信息数据 + * + * @param tid 分类id + * @param pg 页数 + * @param filter 同homeContent方法中的filter + * @param extend 筛选参数{k:v, k1:v1} + * @return + */ + @Override + public String categoryContent(String tid, String pg, boolean filter, HashMap extend) { + try { + String url = siteUrl + "/index.php/vod/show/id/"; + if (extend != null && extend.size() > 0 && extend.containsKey("tid") && extend.get("tid").length() > 0) { + url += extend.get("tid"); + } else { + url += tid; + } + if (extend != null && extend.size() > 0) { + for (Iterator it = extend.keySet().iterator(); it.hasNext(); ) { + String key = it.next(); + String value = extend.get(key); + if (value.length() > 0) { + url += "/" + key + "/" + URLEncoder.encode(value); + } + } + } + url += "/page/" + pg + ".html"; + String html = OkHttpUtil.string(url, getHeaders(url)); + Document doc = Jsoup.parse(html); + JSONObject result = new JSONObject(); + int pageCount = 0; + int page = -1; + + // 取页码相关信息 + Elements pageInfo = doc.select(".myui-page li a"); + if (pageInfo.size() == 0) { + page = Integer.parseInt(pg); + pageCount = page; + } else { + for (int i = 0; i < pageInfo.size(); i++) { + Element a = pageInfo.get(i); + String name = a.text(); + if (page == -1 && a.hasClass("btn-warm")) { + Matcher matcher = regexPage.matcher(a.attr("href")); + if (matcher.find()) { + page = Integer.parseInt(matcher.group(1).trim()); + } else { + page = 0; + } + } + if (name.equals("尾页")) { + Matcher matcher = regexPage.matcher(a.attr("href")); + if (matcher.find()) { + pageCount = Integer.parseInt(matcher.group(1).trim()); + } else { + pageCount = 0; + } + break; + } + } + } + + JSONArray videos = new JSONArray(); + if (!html.contains("没有找到您想要的结果哦")) { + // 取当前分类页的视频列表 + Elements list = doc.select("div.myui-vodlist__box"); + for (int i = 0; i < list.size(); i++) { + Element vod = list.get(i); + String title = vod.selectFirst(".title").text(); + String cover = vod.selectFirst(".myui-vodlist__thumb").attr("data-original"); + String remark = vod.selectFirst("span.pic-text").text(); + + Matcher matcher = regexVid.matcher(vod.selectFirst(".myui-vodlist__thumb").attr("href")); + if (!matcher.find()) + continue; + String id = matcher.group(1); + JSONObject v = new JSONObject(); + v.put("vod_id", id); + v.put("vod_name", title); + v.put("vod_pic", cover); + v.put("vod_remarks", remark); + videos.put(v); + } + } + result.put("page", page); + result.put("pagecount", pageCount); + result.put("limit", 48); + result.put("total", pageCount <= 1 ? videos.length() : pageCount * 48); + + result.put("list", videos); + return result.toString(); + } catch (Exception e) { + SpiderDebug.log(e); + } + return ""; + } + + + /** + * 视频详情信息 + * + * @param ids 视频id + * @return + */ + @Override + public String detailContent(List ids) { + try { + // 视频详情url + String url = siteUrl + "/index.php/vod/detail/id/" + ids.get(0) + ".html"; + Document doc = Jsoup.parse(OkHttpUtil.string(url, getHeaders(url))); + JSONObject result = new JSONObject(); + JSONObject vodList = new JSONObject(); + + // 取基本数据 + String vid = doc.selectFirst("span.mac_hits").attr("data-id"); + + String cover = doc.selectFirst("a.myui-vodlist__thumb img").attr("data-original"); + String title = doc.selectFirst("div.myui-content__detail h1.title").text(); + String desc = Jsoup.parse(doc.selectFirst("meta[name=description]").attr("content")).text(); + String category = "", area = "", year = "", remark = "", director = "", actor = ""; + Elements span_text_muted = doc.select("div.myui-content__detail span.text-muted"); + for (int i = 0; i < span_text_muted.size(); i++) { + Element text = span_text_muted.get(i); + String info = text.text(); + if (info.equals("分类:")) { + category = text.nextElementSibling().text(); + } else if (info.equals("年份:")) { + year = text.nextElementSibling().text(); + } else if (info.equals("地区:")) { + area = text.nextElementSibling().text(); + } else if (info.equals("更新:")) { + remark = text.nextElementSibling().text(); + } else if (info.equals("导演:")) { + List directors = new ArrayList<>(); + Elements aa = text.parent().select("a"); + for (int j = 0; j < aa.size(); j++) { + directors.add(aa.get(j).text()); + } + director = TextUtils.join(",", directors); + } else if (info.equals("主演:")) { + List actors = new ArrayList<>(); + Elements aa = text.parent().select("a"); + for (int j = 0; j < aa.size(); j++) { + actors.add(aa.get(j).text()); + } + actor = TextUtils.join(",", actors); + } + } + + vodList.put("vod_id", vid); + vodList.put("vod_name", title); + vodList.put("vod_pic", cover); + vodList.put("type_name", category); + vodList.put("vod_year", year); + vodList.put("vod_area", area); + vodList.put("vod_remarks", remark); + vodList.put("vod_actor", actor); + vodList.put("vod_director", director); + vodList.put("vod_content", desc); + + Map vod_play = new TreeMap<>(new Comparator() { + @Override + public int compare(String o1, String o2) { + try { + int sort1 = playerConfig.getJSONObject(o1).getInt("or"); + int sort2 = playerConfig.getJSONObject(o2).getInt("or"); + + if (sort1 == sort2) { + return 1; + } + return sort1 - sort2 > 0 ? 1 : -1; + } catch (JSONException e) { + SpiderDebug.log(e); + } + return 1; + } + }); + + // 取播放列表数据 + Elements sources = doc.select("div.myui-panel__head>ul").get(0).select("li"); + Elements sourceList = doc.select("div.tab-content>div.tab-pane"); + + for (int i = 0; i < sources.size(); i++) { + Element source = sources.get(i); + String sourceName = source.text(); + boolean found = false; + for (Iterator it = playerConfig.keys(); it.hasNext(); ) { + String flag = it.next(); + if (playerConfig.getJSONObject(flag).getString("sh").equals(sourceName)) { + sourceName = flag; + found = true; + break; + } + } + if (!found) + continue; + String playList = ""; + Elements playListA = sourceList.get(i).select("ul>li>a"); + List vodItems = new ArrayList<>(); + + for (int j = 0; j < playListA.size(); j++) { + Element vod = playListA.get(j); + Matcher matcher = regexPlay.matcher(vod.attr("href")); + if (!matcher.find()) + continue; + String playURL = matcher.group(1) + "/sid/" + matcher.group(2) + "/nid/" + matcher.group(3); + vodItems.add(vod.text() + "$" + playURL); + } + if (vodItems.size() > 0) + playList = TextUtils.join("#", vodItems); + + if (playList.length() == 0) + continue; + + vod_play.put(sourceName, playList); + } + + if (vod_play.size() > 0) { + String vod_play_from = TextUtils.join("$$$", vod_play.keySet()); + String vod_play_url = TextUtils.join("$$$", vod_play.values()); + vodList.put("vod_play_from", vod_play_from); + vodList.put("vod_play_url", vod_play_url); + } + JSONArray list = new JSONArray(); + list.put(vodList); + result.put("list", list); + return result.toString(); + } catch (Exception e) { + SpiderDebug.log(e); + } + return ""; + } + + /** + * 获取视频播放信息 + * + * @param flag 播放源 + * @param id 视频id + * @param vipFlags 所有可能需要vip解析的源 + * @return + */ + @Override + public String playerContent(String flag, String id, List vipFlags) { + try { + // 播放页 url + String url = siteUrl + "/index.php/vod/play/id/" + id + ".html"; + Document doc = Jsoup.parse(OkHttpUtil.string(url, getHeaders(url))); + Elements allScript = doc.select("script"); + JSONObject result = new JSONObject(); + for (int i = 0; i < allScript.size(); i++) { + String scContent = allScript.get(i).html().trim(); + if (scContent.startsWith("var player_")) { // 取直链 + int start = scContent.indexOf('{'); + int end = scContent.lastIndexOf('}') + 1; + String json = scContent.substring(start, end); + JSONObject player = new JSONObject(json); + if (playerConfig.has(player.getString("from"))) { + JSONObject pCfg = playerConfig.getJSONObject(player.getString("from")); + String videoUrl = player.getString("url"); + String playUrl = pCfg.getString("pu"); + result.put("parse", pCfg.getInt("sn")); + result.put("playUrl", playUrl); + result.put("url", videoUrl); + result.put("header", ""); + } + break; + } + } + return result.toString(); + } catch (Exception e) { + SpiderDebug.log(e); + } + return ""; + } + + @Override + public String searchContent(String key, boolean quick) { + try { + long currentTime = System.currentTimeMillis(); + String url = siteUrl + "/index.php/ajax/suggest?mid=1&wd=" + URLEncoder.encode(key) + "&limit=10×tamp=" + currentTime; + JSONObject searchResult = new JSONObject(OkHttpUtil.string(url, getHeaders(url))); + JSONObject result = new JSONObject(); + JSONArray videos = new JSONArray(); + if (searchResult.getInt("total") > 0) { + JSONArray lists = new JSONArray(searchResult.getString("list")); + for (int i = 0; i < lists.length(); i++) { + JSONObject vod = lists.getJSONObject(i); + String id = vod.getString("id"); + String title = vod.getString("name"); + String cover = vod.getString("pic"); + JSONObject v = new JSONObject(); + v.put("vod_id", id); + v.put("vod_name", title); + v.put("vod_pic", cover); + v.put("vod_remarks", ""); + videos.put(v); + } + } + result.put("list", videos); + return result.toString(); + } catch (Exception e) { + SpiderDebug.log(e); + } + return ""; + } +}