趁着前几天出去比赛的空余时间瞎写完了

一向写python都是现写现查库怎么用

写这个主要是为了班里统计方便 ,但现在应该用不上了2333

写的挺垃圾,也是第一次搞这个,因为库太好用了感觉自己写的也没啥水平哈哈

其中主要点是判断指定日期的做题AC数

用了下用户实时的提交页面

http://acm.hdu.edu.cn/status.php?first=&pid=&user=aaa&lang=0&status=0

1.爬下直到指定日期最后所有做题情况

2.判断时间是否符合,顺便学习了下datetime

  •    因为日期是从前往后排所以 大于当前日期的跳过,直到遇见最后日期跳出循环
  •    需要注意的就是如果指定日期加之前这个用户没有答题过 会造成死循环 所以判断下当前是否为最后一页 因为get参数没有页数所以就判断first是否重复就ok

3.判断是否ac

4.丢到dict

 

分享一下渣渣代码

# encoding=utf-8
import requests
import re
import xlrd
import xlwt
from bs4 import BeautifulSoup
import datetime


def save_mysql():
    pass


def save_excel(users, Allsolved, pid, sum):
    book = xlwt.Workbook(encoding='utf-8')  # 表格初始化
    sheet1 = book.add_sheet('sheet1', cell_overwrite_ok=True)
    heads = ['ID', u'指定日期累计AC题数' + '(' + start + 'to' + end + ')', u'总共已AC题数', u'指定日期AC题号']
    print u'\n准备将数据存入表格...'
    ii = 0  # 表格初始化用
    for head in heads:
        sheet1.write(0, ii, head)
        ii += 1
    ID_col = 0
    weekid = 3
    week_solved_col = 1
    solved_col = 2
    row = 1
    sheet1.col(week_solved_col).width = 256 * 20
    for user in users:
        sheet1.write(row, ID_col, user)
        row += 1
    row = 1
    for solved in Allsolved:
        sheet1.write(row, solved_col, solved)
        row += 1
    row = 1
    for wid in pid:
        sheet1.write(row, weekid, wid)
        row += 1
    row = 1
    for wsum in sum:
        sheet1.write(row, week_solved_col, wsum)
        row += 1
    book.save('Acm' + start + 'to' + end + '.xls')
    print u'\n录入成功!'


def run(users):
    All_Solved = []
    ID = []
    SUM = []
    for user in users:
        ########目前已做总题数
        userpages = requests.get("http://acm.hdu.edu.cn/userstatus.php?user=" + user)
        n = re.compile(r'>(.*)</h1>')
        s = re.compile(r'Solved</td><td align=center>([0-9]+)<')
        name = n.search(userpages.text)
        solved = s.search(userpages.text)

        print name.group(1) + " " + solved.group(1)
        All_Solved.append(solved.group(1))
        ########每日统计
        sum = 0
        print user
        first = '99999999'
        run_forever = True
        repid = ['|']  # 防止重复+统计题号
        while run_forever:
            req = requests.get(
                "http://acm.hdu.edu.cn/status.php?first=" + first + "&pid=&user=" + user + "&lang=0&status=0")
            soup = BeautifulSoup(req.text, 'lxml')
            tables = soup.findAll('table')
            tab = tables[3]
            old = []
            new = []
            dict = {'Runid': '123', 'Subtime': '2017', 'Status': 'ac', 'Id': '123'}
            for tr in tab.findAll('tr')[1:]:
                i = 1
                for td in tr.findAll('td')[:4]:
                    if i == 1:
                        dict['Runid'] = td.getText()
                        Runid = td.getText()
                    elif i == 2:
                        dict['Subtime'] = td.getText()
                    elif i == 3:
                        dict['Status'] = td.getText()
                    else:
                        dict['Id'] = td.getText()
                    i += 1
                old.append(dict.copy())
            if first == str(int(Runid) - 1):  # 防止当这个用户这个时间段没做过题的情况(死循环)
                break
            for AoW in old:
                time = datetime.datetime.strptime(AoW['Subtime'][0:-9], '%Y-%m-%d')
                if time <= d2 and time >= d1:
                    ac = 'Accepted'
                    # print AoW
                    if AoW['Status'] == ac:
                        for cfid in repid:
                            if AoW['Id'] != cfid:
                                f = 1
                                # T.append(AoW['Id'])
                            else:  # 如果重复 退出循环
                                f = 0
                                break
                        if f == 1:
                            repid.append(AoW['Id'])  # 防止重复
                            repid.append(' ')
                            new.append(AoW)
                elif time < d1:
                    run_forever = False
                    # break
                else:
                    pass

            ####
            for tt in new:
                print tt
            sum += len(new)

            first = str(int(Runid) - 1)  # 最后一个

        print '------'
        print sum
        print '------'
        ID.append(repid)
        SUM.append(sum)
        # print ID
        # print SUM
        # print SUM
    save_excel(users, All_Solved, ID, SUM)


def main():
    # 读入userID
    book = xlrd.open_workbook('./acmid.xlsx')
    sheet = book.sheet_by_name('sheet1')
    users = sheet.col_values(0)
    print users
    run(users)


if __name__ == '__main__':
    print '日期输入格式:年-月-日'
    start = raw_input("起始日期:")
    end = raw_input("终止日期:")
    d1 = datetime.datetime.strptime(start, '%Y-%m-%d')
    d2 = datetime.datetime.strptime(end, '%Y-%m-%d')
    main()