# coding=utf-8 # !/usr/bin/python import sys sys.path.append('..') from base.spider import Spider import json import requests from requests import session, utils import os import time import base64 class Spider(Spider): # 元类 默认的元类 type def getName(self): return "哔哩" def init(self, extend=""): print("============{0}============".format(extend)) pass def isVideoFormat(self, url): pass def manualVideoCheck(self): pass def homeContent(self, filter): result = {} cateManual = { "Zard": "Zard", "演唱会": "演唱会4K", "韩国女团": "韩国女团4K", "日本女团": "日本女团4K", "MV": "MV 4K", "舞曲": "DJ舞曲 4K", "纯音乐": "纯音乐 轻音乐", "假窗-白噪音": "窗+白噪音", "纪录片": "纪录片", "昆虫": "昆虫", "玩具汽车": "玩具汽车", "儿童": "儿童", "幼儿": "幼儿", "儿童玩具": "儿童玩具" } classes = [] for k in cateManual: classes.append({ 'type_name': k, 'type_id': cateManual[k] }) result['class'] = classes if (filter): result['filters'] = self.config['filter'] return result def homeVideoContent(self): result = { 'list': [] } return result cookies = '' def getCookie(self): # 在cookies_str中填入会员或大会员cookie,以获得更好的体验。 cookies_str = "innersign=0; buvid3=0BE6DC00-7EC8-F14C-5022-273B6A46C4C146692infoc; i-wanna-go-back=-1; _uuid=9876F5E7-A2610-17F3-A821-453368B2A13B46427infoc; buvid4=909DE9E8-B2F3-0737-BE05-CBCE52B8F1DF50069-022082212-KrOZh+8iydjdQTLhzp96QF38tYbYJ8K8cXQog40LGu3fOfuINFkXpw%3D%3D; LIVE_BUVID=AUTO5316611442538552; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_0BE6DC00%22%3A%22182C3E9AD8B%22%2C%22333.42.fp.risk_0BE6DC00%22%3A%22182C3E9D59E%22%7D%7D; buvid_fp_plain=undefined; SESSDATA=ddac727d%2C1676696334%2C25fac%2A81; bili_jct=cfeaf9060dc34924f82d015212a108c5; DedeUserID=389957880; DedeUserID__ckMd5=42e393d5b4adaf41; sid=8sdohh2u; fingerprint=7564ac47b00541a5b904d49f13e9989a; fingerprint3=4b65d5467158b4fcf6747d29562dd032; b_ut=5; buvid_fp=6393a802b9740ddf1b1443b39e2e9b50; b_lsid=2F7F5E96_18312CBD612" cookies_dic = dict([co.strip().split('=') for co in cookies_str.split(';')]) rsp = session() cookies_jar = utils.cookiejar_from_dict(cookies_dic) rsp.cookies = cookies_jar content = self.fetch("http://api.bilibili.com/x/web-interface/nav", cookies=rsp.cookies) res = json.loads(content.text) if res["code"] == 0: self.cookies = rsp.cookies else: rsp = self.fetch("https://www.bilibili.com/") self.cookies = rsp.cookies return rsp.cookies def categoryContent(self, tid, pg, filter, extend): result = {} url = 'https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword={0}&page={1}'.format(tid, pg) if len(self.cookies) <= 0: self.getCookie() rsp = self.fetch(url, cookies=self.cookies) content = rsp.text jo = json.loads(content) videos = [] vodList = jo['data']['result'] for vod in vodList: aid = str(vod['aid']).strip() title = vod['title'].replace("", "").replace("", "").replace(""", '"') img = 'https:' + vod['pic'].strip() remark = str(vod['duration']).strip() videos.append({ "vod_id": aid, "vod_name": title, "vod_pic": img, "vod_remarks": remark }) result['list'] = videos result['page'] = pg result['pagecount'] = 9999 result['limit'] = 90 result['total'] = 999999 return result def cleanSpace(self, str): return str.replace('\n', '').replace('\t', '').replace('\r', '').replace(' ', '') def detailContent(self, array): aid = array[0] url = "https://api.bilibili.com/x/web-interface/view?aid={0}".format(aid) rsp = self.fetch(url, headers=self.header) jRoot = json.loads(rsp.text) jo = jRoot['data'] title = jo['title'].replace("", "").replace("", "") pic = jo['pic'] desc = jo['desc'] timeStamp = jo['pubdate'] timeArray = time.localtime(timeStamp) year = str(time.strftime("%Y", timeArray)) dire = jo['owner']['name'] typeName = jo['tname'] remark = str(jo['duration']).strip() vod = { "vod_id": aid, "vod_name": title, "vod_pic": pic, "type_name": typeName, "vod_year": year, "vod_area": "", "vod_remarks": remark, "vod_actor": "", "vod_director": dire, "vod_content": desc } ja = jo['pages'] playUrl = '' for tmpJo in ja: cid = tmpJo['cid'] part = tmpJo['part'].replace("#", "-") playUrl = playUrl + '{0}${1}_{2}#'.format(part, aid, cid) vod['vod_play_from'] = 'B站视频' vod['vod_play_url'] = playUrl result = { 'list': [ vod ] } return result def searchContent(self, key, quick): header = { "Referer": "https://www.bilibili.com", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36" } url = 'https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword={0}'.format(key) if len(self.cookies) <= 0: self.getCookie() rsp = self.fetch(url, cookies=self.cookies,headers=header) content = rsp.text jo = json.loads(content) if jo['code'] != 0: rspRetry = self.fetch(url, cookies=self.getCookie()) content = rspRetry.text jo = json.loads(content) videos = [] vodList = jo['data']['result'] for vod in vodList: aid = str(vod['aid']).strip() title = vod['title'].replace("", "").replace("", "").replace(""", '"') img = 'https:' + vod['pic'].strip() remark = str(vod['duration']).strip() videos.append({ "vod_id": aid, "vod_name": title, "vod_pic": img, "vod_remarks": remark }) result = { 'list': videos } return result def playerContent(self, flag, id, vipFlags): result = {} ids = id.split("_") url = 'https://api.bilibili.com:443/x/player/playurl?avid={0}&cid={1}&qn=116'.format(ids[0], ids[1]) if len(self.cookies) <= 0: self.getCookie() rsp = self.fetch(url, cookies=self.cookies) jRoot = json.loads(rsp.text) jo = jRoot['data'] ja = jo['durl'] maxSize = -1 position = -1 for i in range(len(ja)): tmpJo = ja[i] if maxSize < int(tmpJo['size']): maxSize = int(tmpJo['size']) position = i url = '' if len(ja) > 0: if position == -1: position = 0 url = ja[position]['url'] result["parse"] = 0 result["playUrl"] = '' result["url"] = url result["header"] = { "Referer": "https://www.bilibili.com", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36" } result["contentType"] = 'video/x-flv' return result config = { "player": {}, "filter": {} } header = {} def localProxy(self, param): return [200, "video/MP2T", action, ""]