学习通课程资料pdf下载
马上期末周了,同学们,该开始预习了!!!
python自动下载课程资料所有pdf(学习通设置不让下也能下)
import requests
import time
from urllib.parse import urlparse, parse_qs
from DrissionPage import ChromiumPage, ChromiumOptions
from DrissionPage._elements.chromium_element import ChromiumElement
def options_default() -> ChromiumOptions:
co = ChromiumOptions()
co.headless(False)
return co
def el(el: ChromiumPage, find_str: str, is_list: bool = False) -> ChromiumElement | list[ChromiumElement]:
try:
if is_list:
node = el.eles(find_str, timeout=0.1)
else:
node = el.ele(find_str, timeout=0.1)
if node:
return node
except Exception as e:
return False
def Get_Token(Xue_Url):
url = "https://noteyd.chaoxing.com/pc/files/getUploadConfig"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
}
# 发送 GET 请求
cookies = Get_Cookie(Xue_Url)
try:
response = requests.get(url, headers=headers, cookies=cookies)
# print(response.json())
return response.json()['msg']['token']
except Exception as e:
print(f"获取token错误{e}")
def Get_Cookie(Xue_url):
# 打开网页
options = options_default()
options.incognito(True) # 无痕模式
options.headless(False)
page = ChromiumPage(options)
page.get(Xue_url)
while True:
loginAfter=el(page, '.name fl', is_list=False)
if loginAfter:
Cookie=page.cookies(all_domains=False).as_dict()
# print(f"Cookie:{Cookie}")
return Cookie
else:
print('未登录,请登录')
time.sleep(1)
def Get_extfilesInfo(objectId,Xue_Url): #获取pdf直链
base_url = "https://pan-yz.chaoxing.com/api/extfilesInfo?"
resids = ""
puid = Get_Cookie(Xue_Url)['UID']
token = Get_Token(Xue_Url)
# 拼接 URL
full_url = (
f"{base_url}"
f"resids={resids}&"
f"puid={puid}&"
f"objectId={objectId}&"
f"_token={token}"
)
print(full_url)
#请求头
Headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" }
# 发送 GET 请求
response = requests.get(full_url, headers=Headers)
# 检查请求是否成功
if response.status_code == 200:
# 解析 JSON 响应
json_data = response.json()
# print(f'pdf链接:{json_data["data"][0]["pdf"]}')
return json_data
else:
print(response.text)
print(f"请求失败,状态码:{response.status_code}")
return None
# 下载pdf
def Download_Pdf(objectId,Xue_Url):
# 获取 pdf 链接
json_data = Get_extfilesInfo(objectId,Xue_Url)
if "pdf" in json_data["data"][0]:
print(json_data)
pdf_url = json_data["data"][0]["pdf"]
# 获取 pdf名称
pdf_name = json_data["data"][0]["filename"]
# 发送 GET 请求
response = requests.get(pdf_url)
# 检查请求是否成功
if response.status_code == 200:
# 保存 pdf 文件
with open(pdf_name, "wb") as file:
file.write(response.content)
print(f"{pdf_name}下载成功")
else:
print(f'pdf 下载失败,状态码:{json_data["data"][0]}')
print(f"pdf 下载失败,状态码:{response.status_code}")
else:
print("无pdf文件")
#获取list
def Get_Datalist(rootId,Xue_Url):
# 解析URL
parsed_url = urlparse(Xue_Url)
# 提取查询参数
query_params = parse_qs(parsed_url.query)
# 获取courseid
courseid = query_params.get('courseid', [None])[0]
clazzid = query_params.get('clazzid', [None])[0]
cpi = query_params.get('cpi', [None])[0]
cookies = Get_Cookie(Xue_Url)
require=""
base_url = "https://mooc1-api.chaoxing.com/phone/data/student-datalist?"
pageNum="1"
isMicroCourse="false"
full_url = (
f"{base_url}"
f"courseId={courseid}&" #课程id
f"rootId={rootId}&" #目录id ,没有则为false
f"require={require}&"
f"pageNum={pageNum}&"#页数
f"classId={clazzid}&"#clazzid
f"cpi={cpi}&"
f"isMicroCourse={isMicroCourse}"
)
print(full_url)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
}
# 发送 GET 请求
response = requests.get(full_url, headers=headers, cookies=cookies)
# 检查请求是否成功
if response.status_code == 200:
# 解析 JSON 响应
json_data = response.json()
print(json_data)
return json_data["data"]
else:
print(response.text)
print(f"请求失败,状态码:{response.status_code}")
return None
def Download_Url(rootId,Xue_Url):
data_list=Get_Datalist(rootId,Xue_Url)
print(data_list)
for data in data_list:
# print(data)
cataName = data['cataName']
if cataName == "文件夹":
rootId = data['norder'] #目录id
Download_Url(rootId,Xue_Url)
elif cataName == "网页":
resUrl=data["content"]["resUrl"]
# 解析URL
parsed_url = urlparse(resUrl)
# 提取查询参数
query_params = parse_qs(parsed_url.query)
# 获取objectid
objectid = query_params.get('objectid', [None])[0]
# print(objectid)
Download_Pdf(objectid,Xue_Url)
print("下载完成")
if __name__ == "__main__":
Xuexitong = "学习通课程链接"
Download_Url(rootId="False",Xue_Url=Xuexitong)
需要安装python和chrome ,将需要下载的课程链接(如下图所示)填入最下面的即可
2 个帖子 – 2 位参与者
© 版权声明
文章版权归作者所有,未经允许请勿转载。
相关文章
暂无评论...