正方教务爬虫 基于selenium的正方教务成绩爬虫
关于
一个正方教务爬虫,可以自动跳到成绩页面并保存成绩..
能力 自动登录✔ 自动验证码识别填写✔ 失败自动重试✔ 自动保存成绩✔
使用 step 1 下载 直接复制或下载本仓库里的spider.py文件到本地
step 2 安装最新版Chrome浏览器(已有请跳过) https://www.google.cn/intl/zh-CN/chrome/
step 3 下载对应的驱动 下载地址 http://chromedriver.storage.googleapis.com/index.html 驱动安装教程 https://blog.csdn.net/m0_67575344/article/details/126142295
step 4 安装所需模块
ddddocr
selenium
命令行安装 1 2 pip install ddddocr pip install selenium
自动安装 使用pycharm自动安装
step 5 运行 直接在编译器环境运行
完整代码 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 # -*- coding: utf-8 -*- import time import datetime import ddddocr import openpyxl from selenium import webdriver from selenium.webdriver.chrome.service import Service as ChromeService from selenium.webdriver.common.by import By from selenium.webdriver import ActionChains service = ChromeService(executable_path=r'C:\Program Files\Google\Chrome\Application\chromedriver.exe') # chrome驱动路径 chrome = webdriver.Chrome(service=service) # 配置项 name = '123456' # 账号 pwd = '132456l' # 密码 safe_time = 3 # 安全间隔时间 servernum = 0 # 选择服务器 填: 0,1,2,3 retry = True # 失败重试 mode = 1 servers = ['http://127.0.0.1/', 'http://127.0.0.1/', 'http://127.0.0.1/', 'http://127.0.0.1/'] def print_INFO(message): print('[' + datetime.datetime.now().strftime('%H:%M:%S') + ']' + message) def print_ERROR(error): print('[' + datetime.datetime.now().strftime('%H:%M:%S') + ']' + "\033[1;31m" + error + " \033[0m") def print_Exception(e): print("\033[1;31m异常!\033[0m\n") print(e) def recognize(): if chrome.find_element(By.ID, 'icode').screenshot('img.png'): # 捕获验证码 # 验证码识别 with open('img.png', 'rb') as f: img = f.read() ocr = ddddocr.DdddOcr() result = ocr.classification(img) print('[' + datetime.datetime.now().strftime('%H:%M:%S') + ']' + "验证码" + result) return result def auto_Login(): chrome.get(servers[servernum]) print_INFO("尝试登录" + str(servernum) + '号服务器') if chrome.title == 'ERROR - 出错啦!': return False try: chrome.find_element(By.ID, 'txtUserName').send_keys(name) chrome.find_element(By.ID, 'TextBox2').send_keys(pwd) except: print_Exception(Exception) return False chrome.find_element(By.ID, 'txtSecretCode').send_keys(recognize()) try: chrome.find_element(By.ID, 'Button1').click() except: print_Exception(Exception) return False if chrome.title == 'ERROR - 出错啦!' or chrome.title == '欢迎使用正方教务管理系统!请登录': print_ERROR('跳转失败') return False return True def get_score(): try: hovertarget = chrome.find_element(By.XPATH, '/html/body/div/div[1]/ul/li[5]/a/span') ActionChains(chrome).move_to_element(hovertarget).perform() chrome.find_element(By.XPATH, '/html/body/div/div[1]/ul/li[5]/ul/li[4]/a').click() except: print_Exception(Exception) print_ERROR('成绩查询按钮点击失败') return False if chrome.title == 'ERROR - 出错啦!' or chrome.title == '欢迎使用正方教务管理系统!请登录': auto_Login() return False time.sleep(5) try: chrome.switch_to.frame('zhuti') chrome.find_element(By.ID, 'btn_zcj').click() if chrome.title == 'ERROR - 出错啦!' or chrome.title == '欢迎使用正方教务管理系统!请登录': chrome.switch_to.default_content() while 1: if auto_Login(): break return False # 保存到excel work_book = openpyxl.Workbook() shell = work_book.worksheets[0] trs = chrome.find_elements(By.XPATH, '/html/body/form/div[2]/div/span/div[1]/table[1]/tbody/tr') trnum = 1 for tr in trs: tdnum = 1 while 1: tdXPATH = '/html/body/form/div[2]/div/span/div[1]/table[1]/tbody/tr[' + str(trnum) + ']/td[' + str(tdnum) + ']' shell.cell(trnum, tdnum, chrome.find_element(By.XPATH, tdXPATH).text) tdnum += 1 if tdnum == 20: break trnum += 1 work_book.save('score.xlsx') chrome.switch_to.default_content() except: print_Exception(Exception) print_ERROR('成绩获取错误') return False return True def main(): if mode == 1: print_INFO('开始查询成绩') chrome.maximize_window() while 1: trytimes = 0 succeed = False while 1: trytimes += 1 if auto_Login(): succeed = True print_INFO('登录成功') break else: print_ERROR('尝试登录失败') if retry: if trytimes > 10 and succeed == False: print('\033[0;32m已经为你尝试了' + str( trytimes) + '次登录, 全部登录失败。建议更换服务器或检查你的账号密码是否正确。\033[0m') time.sleep(safe_time) continue else: break time.sleep(1) trygetscore = 0 get_scoreFaile = False while 1: if trygetscore >= 5: get_scoreFaile = True break trygetscore += 1 status = get_score() if status: print_INFO('查询成功') break else: print_ERROR('查询失败') if retry: time.sleep(safe_time) continue else: break if get_scoreFaile is True: continue else: a = input() elif mode == 2: print_INFO('Exit') main()