Pandas基本操作笔记
Python Data Analysis Library 或 pandas 是基于NumPy 的一种工具,该工具是为了解决数据分析任务而创建的。Pandas 纳入了大量库和一些标准的数据模型,提供了高效地操作大型数据集所需的工具。pandas提供了大量能使我们快速便捷地处理数据的函数和方法。你很快就会发现,它是使Python成为强大而高效的数据分析环境的重要因素之一。
#encoding=utf-8 import numpy as np import pandas as pd def main(): #生成表格 dates=pd.date_range("20170801",periods=8) df=pd.DataFrame(np.random.randn(8,5),index=dates,columns=list("ABCDE"))#生成框架 print df dff=pd.DataFrame({"A":np.random.randint(1,10,8),"B":pd.date_range("20170707",periods=8)}) print dff #基本操作 print df.head(3)#打印前三行 print df.tail(3)#打印后三行 print df.index print df.values print df.T#转置 print df.sort_values("C")#排序C列 升序 print df.sort_index(axis=1,ascending=False)#用属性值排序 降序 print df.describe()#数据的相关信息 print df["A"]#切片 print df[:3] print df["20170801":"20170806"] print df.loc[dates[0]] print df.loc["20170802":"20170806",["B","D"]] print df.at[dates[0],"C"] print df.iloc[1:3,2:4] print df.iloc[1,4]#2行第5列 print df[df.B>0] print df[df<0] print df[df["E"].isin([1,2])] #基本设置 sl=pd.Series(list(range(10,18)),index=pd.date_range("20170801",periods=8)) print sl df["F"]=sl print df df.at[dates[0],"A"]=0 print df df.iat[1,4]=666 df.loc[:,"D"]=np.array([4]*len(df)) print df df2=df.copy()#拷贝 df2[df2<0]=-df2#负数变为正数 print df2 if __name__ == '__main__': main()
output:
A B C D E 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 2017-08-05 -0.231365 -0.488244 1.757364 -0.967584 -1.448575 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 2017-08-07 1.009144 -0.670553 0.262810 -0.865274 -0.721550 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 A B 0 9 2017-07-07 1 3 2017-07-08 2 3 2017-07-09 3 6 2017-07-10 4 6 2017-07-11 5 4 2017-07-12 6 5 2017-07-13 7 6 2017-07-14 A B C D E 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 A B C D E 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 2017-08-07 1.009144 -0.670553 0.262810 -0.865274 -0.721550 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 DatetimeIndex(['2017-08-01', '2017-08-02', '2017-08-03', '2017-08-04', '2017-08-05', '2017-08-06', '2017-08-07', '2017-08-08'], dtype='datetime64[ns]', freq='D') [[ 1.07524464 -0.60505755 0.71275536 2.41815902 -0.02894002] [-1.51331977 -0.51708246 1.36323759 0.6185221 -0.45040032] [-2.26771071 -2.23593917 2.28891947 2.60836214 -1.1816333 ] [-0.27608484 -1.13021474 -0.12282251 -0.54247504 -0.33188383] [-0.23136532 -0.48824379 1.75736371 -0.96758439 -1.44857541] [ 0.37751303 0.05175454 0.32217176 -0.46223914 0.82356261] [ 1.00914409 -0.67055311 0.26280966 -0.86527427 -0.72155023] [-0.25735124 0.98478455 -0.29728085 -0.23039814 0.69747694]] 2017-08-01 2017-08-02 2017-08-03 2017-08-04 2017-08-05 2017-08-06 \ A 1.075245 -1.513320 -2.267711 -0.276085 -0.231365 0.377513 B -0.605058 -0.517082 -2.235939 -1.130215 -0.488244 0.051755 C 0.712755 1.363238 2.288919 -0.122823 1.757364 0.322172 D 2.418159 0.618522 2.608362 -0.542475 -0.967584 -0.462239 E -0.028940 -0.450400 -1.181633 -0.331884 -1.448575 0.823563 2017-08-07 2017-08-08 A 1.009144 -0.257351 B -0.670553 0.984785 C 0.262810 -0.297281 D -0.865274 -0.230398 E -0.721550 0.697477 A B C D E 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 2017-08-07 1.009144 -0.670553 0.262810 -0.865274 -0.721550 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 2017-08-05 -0.231365 -0.488244 1.757364 -0.967584 -1.448575 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 E D C B A 2017-08-01 -0.028940 2.418159 0.712755 -0.605058 1.075245 2017-08-02 -0.450400 0.618522 1.363238 -0.517082 -1.513320 2017-08-03 -1.181633 2.608362 2.288919 -2.235939 -2.267711 2017-08-04 -0.331884 -0.542475 -0.122823 -1.130215 -0.276085 2017-08-05 -1.448575 -0.967584 1.757364 -0.488244 -0.231365 2017-08-06 0.823563 -0.462239 0.322172 0.051755 0.377513 2017-08-07 -0.721550 -0.865274 0.262810 -0.670553 1.009144 2017-08-08 0.697477 -0.230398 -0.297281 0.984785 -0.257351 A B C D E count 8.000000 8.000000 8.000000 8.000000 8.000000 mean -0.260491 -0.576319 0.785894 0.322134 -0.330243 std 1.158991 0.919133 0.928070 1.436732 0.812523 min -2.267711 -2.235939 -0.297281 -0.967584 -1.448575 25% -0.585394 -0.785469 0.166402 -0.623175 -0.836571 50% -0.244358 -0.561070 0.517464 -0.346319 -0.391142 75% 0.535421 -0.353244 1.461769 1.068431 0.152664 max 1.075245 0.984785 2.288919 2.608362 0.823563 2017-08-01 1.075245 2017-08-02 -1.513320 2017-08-03 -2.267711 2017-08-04 -0.276085 2017-08-05 -0.231365 2017-08-06 0.377513 2017-08-07 1.009144 2017-08-08 -0.257351 Freq: D, Name: A, dtype: float64 A B C D E 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 A B C D E 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 2017-08-05 -0.231365 -0.488244 1.757364 -0.967584 -1.448575 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 A 1.075245 B -0.605058 C 0.712755 D 2.418159 E -0.028940 Name: 2017-08-01 00:00:00, dtype: float64 B D 2017-08-02 -0.517082 0.618522 2017-08-03 -2.235939 2.608362 2017-08-04 -1.130215 -0.542475 2017-08-05 -0.488244 -0.967584 2017-08-06 0.051755 -0.462239 0.71275536229 C D 2017-08-02 1.363238 0.618522 2017-08-03 2.288919 2.608362 -0.45040032497 A B C D E 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 A B C D E 2017-08-01 NaN -0.605058 NaN NaN -0.028940 2017-08-02 -1.513320 -0.517082 NaN NaN -0.450400 2017-08-03 -2.267711 -2.235939 NaN NaN -1.181633 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 2017-08-05 -0.231365 -0.488244 NaN -0.967584 -1.448575 2017-08-06 NaN NaN NaN -0.462239 NaN 2017-08-07 NaN -0.670553 NaN -0.865274 -0.721550 2017-08-08 -0.257351 NaN -0.297281 -0.230398 NaN Empty DataFrame Columns: [A, B, C, D, E] Index: [] 2017-08-01 10 2017-08-02 11 2017-08-03 12 2017-08-04 13 2017-08-05 14 2017-08-06 15 2017-08-07 16 2017-08-08 17 Freq: D, dtype: int64 A B C D E F 2017-08-01 1.075245 -0.605058 0.712755 2.418159 -0.028940 10 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 11 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 12 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 13 2017-08-05 -0.231365 -0.488244 1.757364 -0.967584 -1.448575 14 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 15 2017-08-07 1.009144 -0.670553 0.262810 -0.865274 -0.721550 16 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 17 A B C D E F 2017-08-01 0.000000 -0.605058 0.712755 2.418159 -0.028940 10 2017-08-02 -1.513320 -0.517082 1.363238 0.618522 -0.450400 11 2017-08-03 -2.267711 -2.235939 2.288919 2.608362 -1.181633 12 2017-08-04 -0.276085 -1.130215 -0.122823 -0.542475 -0.331884 13 2017-08-05 -0.231365 -0.488244 1.757364 -0.967584 -1.448575 14 2017-08-06 0.377513 0.051755 0.322172 -0.462239 0.823563 15 2017-08-07 1.009144 -0.670553 0.262810 -0.865274 -0.721550 16 2017-08-08 -0.257351 0.984785 -0.297281 -0.230398 0.697477 17 A B C D E F 2017-08-01 0.000000 -0.605058 0.712755 4 -0.028940 10 2017-08-02 -1.513320 -0.517082 1.363238 4 666.000000 11 2017-08-03 -2.267711 -2.235939 2.288919 4 -1.181633 12 2017-08-04 -0.276085 -1.130215 -0.122823 4 -0.331884 13 2017-08-05 -0.231365 -0.488244 1.757364 4 -1.448575 14 2017-08-06 0.377513 0.051755 0.322172 4 0.823563 15 2017-08-07 1.009144 -0.670553 0.262810 4 -0.721550 16 2017-08-08 -0.257351 0.984785 -0.297281 4 0.697477 17 A B C D E F 2017-08-01 0.000000 0.605058 0.712755 4 0.028940 10 2017-08-02 1.513320 0.517082 1.363238 4 666.000000 11 2017-08-03 2.267711 2.235939 2.288919 4 1.181633 12 2017-08-04 0.276085 1.130215 0.122823 4 0.331884 13 2017-08-05 0.231365 0.488244 1.757364 4 1.448575 14 2017-08-06 0.377513 0.051755 0.322172 4 0.823563 15 2017-08-07 1.009144 0.670553 0.262810 4 0.721550 16 2017-08-08 0.257351 0.984785 0.297281 4 0.697477 17