学习通课程资料pdf下载

分享1个月前发布 Ze-raphine
18 00

马上期末周了,同学们,该开始预习了!!!

python自动下载课程资料所有pdf(学习通设置不让下也能下)

import requests
import time
from urllib.parse import urlparse, parse_qs
from DrissionPage import ChromiumPage, ChromiumOptions
from DrissionPage._elements.chromium_element import ChromiumElement 

def options_default() -> ChromiumOptions:
    co = ChromiumOptions()
    co.headless(False)
    return co
def el(el: ChromiumPage, find_str: str, is_list: bool = False) -> ChromiumElement | list[ChromiumElement]:
    try:
        if is_list:
            node = el.eles(find_str, timeout=0.1)
        else:
            node = el.ele(find_str, timeout=0.1)
        if node:
            return node
    except Exception as e:
        return False
def Get_Token(Xue_Url):
    url = "https://noteyd.chaoxing.com/pc/files/getUploadConfig"
    headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
    }
    # 发送 GET 请求
    cookies = Get_Cookie(Xue_Url)
    try:
        response = requests.get(url, headers=headers, cookies=cookies)
        # print(response.json())
        return response.json()['msg']['token']
    except Exception as e:
        print(f"获取token错误{e}")
    
def Get_Cookie(Xue_url):
    # 打开网页
    options = options_default()
    options.incognito(True) # 无痕模式
    options.headless(False)
    page = ChromiumPage(options)
    page.get(Xue_url)
    while True:
        loginAfter=el(page, '.name fl', is_list=False)
        if loginAfter:
            Cookie=page.cookies(all_domains=False).as_dict()
            # print(f"Cookie:{Cookie}")
            return Cookie
        else:
            print('未登录,请登录')
            time.sleep(1)
def Get_extfilesInfo(objectId,Xue_Url): #获取pdf直链
    base_url = "https://pan-yz.chaoxing.com/api/extfilesInfo?"
    resids = ""
    puid = Get_Cookie(Xue_Url)['UID']
    token = Get_Token(Xue_Url)
    # 拼接 URL
    full_url = (
        f"{base_url}"
        f"resids={resids}&"
        f"puid={puid}&"
        f"objectId={objectId}&"
        f"_token={token}"
    )
    print(full_url)
    #请求头
    Headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" }
    # 发送 GET 请求
    response = requests.get(full_url, headers=Headers)
    # 检查请求是否成功
    if response.status_code == 200:
        # 解析 JSON 响应
        json_data = response.json()
        # print(f'pdf链接:{json_data["data"][0]["pdf"]}')
        return json_data
    else:
        print(response.text)
        print(f"请求失败,状态码:{response.status_code}")
        return None
# 下载pdf
def Download_Pdf(objectId,Xue_Url): 
    # 获取 pdf 链接
    json_data = Get_extfilesInfo(objectId,Xue_Url)
    if "pdf" in json_data["data"][0]:
        print(json_data)
        pdf_url = json_data["data"][0]["pdf"]
        # 获取 pdf名称
        pdf_name = json_data["data"][0]["filename"]
        # 发送 GET 请求
        response = requests.get(pdf_url)
        # 检查请求是否成功
        if response.status_code == 200:
            # 保存 pdf 文件
            with open(pdf_name, "wb") as file:
                file.write(response.content)
            print(f"{pdf_name}下载成功")
        else:
            print(f'pdf 下载失败,状态码:{json_data["data"][0]}')
            print(f"pdf 下载失败,状态码:{response.status_code}")
    else:
        print("无pdf文件")    
#获取list
def Get_Datalist(rootId,Xue_Url): 
    # 解析URL
    parsed_url = urlparse(Xue_Url)
    # 提取查询参数
    query_params = parse_qs(parsed_url.query)
    # 获取courseid
    courseid = query_params.get('courseid', [None])[0]
    clazzid = query_params.get('clazzid', [None])[0]
    cpi = query_params.get('cpi', [None])[0]
    cookies = Get_Cookie(Xue_Url)
    require=""
    base_url = "https://mooc1-api.chaoxing.com/phone/data/student-datalist?"
    pageNum="1"
    isMicroCourse="false"
    full_url = (
        f"{base_url}"
        f"courseId={courseid}&" #课程id
        f"rootId={rootId}&" #目录id ,没有则为false
        f"require={require}&"
        f"pageNum={pageNum}&"#页数
        f"classId={clazzid}&"#clazzid
        f"cpi={cpi}&"
        f"isMicroCourse={isMicroCourse}"
    )
    print(full_url)
    headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
    }
    # 发送 GET 请求
    response = requests.get(full_url, headers=headers, cookies=cookies)
    # 检查请求是否成功
    if response.status_code == 200:
        # 解析 JSON 响应
        json_data = response.json()
        print(json_data)
        return json_data["data"]
    else:
        print(response.text)
        print(f"请求失败,状态码:{response.status_code}")
        return None
def Download_Url(rootId,Xue_Url):
    data_list=Get_Datalist(rootId,Xue_Url)
    print(data_list)
    for data in data_list:
        # print(data)
        cataName = data['cataName']
        if cataName == "文件夹":
            rootId = data['norder'] #目录id
            Download_Url(rootId,Xue_Url)
        elif cataName == "网页":           
            resUrl=data["content"]["resUrl"]
            # 解析URL
            parsed_url = urlparse(resUrl)

            # 提取查询参数
            query_params = parse_qs(parsed_url.query)

            # 获取objectid
            objectid = query_params.get('objectid', [None])[0]
            # print(objectid)
            Download_Pdf(objectid,Xue_Url)
    print("下载完成") 
if __name__ == "__main__":
    Xuexitong = "学习通课程链接"
    Download_Url(rootId="False",Xue_Url=Xuexitong)

需要安装python和chrome ,将需要下载的课程链接(如下图所示)填入最下面的即可

2 个帖子 – 2 位参与者

阅读完整话题

© 版权声明

相关文章

暂无评论

您必须登录才能参与评论!
立即登录
none
暂无评论...