Numpy & Pandas

Numpy & Pandas Statistics

Numpy

Attributes

import numpy as np
array = np.array([[1,2,3],
  				 [2,3,4]])
.ndim #维度		2
.shap #形状		(2,3)
.size #大小		6
'''输出
	[[1 2 3]
 	 [2 3 4]]
 	2
    (2,3)
    6'''

type

import numpy as np

a = np.array([2, 3, 4], dtype = np.int)
print(a.dtype)

#结果 int32
#.dtype array数组存储的元素的类型

generator matrix

import numpy as np
a = np.zeros((3,4))#三行四列的0矩阵
print(a)
#a = np.ones((3,4),dtype=np.int16)
#三行四列的1矩阵
#a = np.arange(10, 20, 2) 等同于range()
#a = np.arange(12).reshape((3, 4))
#生产一个三行四列的矩阵

generate line segments

import numpy as np
a = np.linspace(1,10,5)
#.linspace(a,b,c)一个从a到b的线段分成c段
#a = np.linspace(1,10,6).reshape((2,3))

Summary 1

.ndim #维度		
.shap #形状
.size #大小
.dtype #数据类型
.zeros #零矩阵
.ones #1矩阵
.reshape #重置矩阵行和列
.linspace#定义线段

basic operations

Similar to basic operations in python

import numpy as np
a = np.array([10,20,30,40])
b = np.arange(4) # b= 0 1 2 3
c = a-b
print(c)
#[10 19 28 37]

The power of addition, subtraction, multiplication and division will be based on the position, and each position will be calculated.

Trigonometric functions

import numpy as np
a = np.array([10,20,30,40])
b = np.arange(4) # b= 0 1 2 3
c = 10*np.sin(a)
print(c)
# .sin .cos .tan 

Judgment operation

import numpy as np
a = np.array([10,20,30,40])
b = np.arange(4) # b= 0 1 2 3
print(b)
print(b<3)

Matrix Operations

import numpy as np
a = np.array([[1,1],
              [0,1]])
b = np.arange(4).reshape((2,2))
c = a*b	#逐个相乘
c_dot = np.dot(a,b)#矩阵乘法
#c_dot_2 = a.dot(b)
import numpy as np
a = np.random.random((2,4))
#生成一个2行4列的0~1的矩阵
np.sum() #求和
np.min() #最小值
np.max() #最大值
axis=1 #维度1
axis=0 #维度0
import numpy as np

A = np.arange(2, 14).reshape((3,4))
print(np.argmin(A))#求最小值的索引
print(np.argmax(A))#求最小值的索引
print(np.mean(A))#求平均值
#A.mean() np.average(A)
#0 11
np.median(A) #A的中位数
np.cumsum(A)#累加A,前缀和
print(np.cumsum(A))#下方图片是该输出
print(np.diff(A))#累差A 三行四列变成三行三列
print(np.nonzero(A))#输出值的行和列
np.sort(A)#将A逐行排序
np.transpose(A)#矩阵的逆置 改变行和列
#A.T 效果一样
np.clip(a,a_min,a_max,out=None)
#所有大于9的数字全变成9所有小于5的数变成5,然后中间的不变

Summary 2

单纯的加减乘除乘方跟正常的运算一样
矩阵乘法需要用.dot()函数
三角函数 .sin .cos .tan
判断跟正常判断一样,输出的话会视情况输出
.min() .max() .sum() .axis#0行 1列
.argmin() .argmax() .mean() .average
#最小值索引 最大值索引 平均值
.median() .cumsum() .diff() .nonzero()
#中位数	前缀和	差值	输出A里的值的行和列
.sort() #对A进行排序 按行或者列
.transport() #改变矩阵的行和列  A.T效果一致
.clip(a,b,c) #矩阵A中的数值小于b的数变成b大于c的值变成c中间的不变

index

import numpy as np

A = np.arrange(3,15)
print(A)
print(A[3])
#[3 4 5 6 7 8 9 10 11 12 13 14]
#6
A = np.arange(3,15).reshap((3,4))
print(A[2])
#[11 12 13 14]
print(A[1][1])
#8
print(A[2,1])#与A[2][1]相等
print(A[:,1])#打印第二列的所有的数字索引为0的列为第一列
print(A[1,1:2])
#[8]
for row in A:
print(row)#输出每一行
#numpy不自带输出列
for column in A.T:
print(col)#输出每一列
A.flatten() #将矩阵改变成一行
for item in A.flat: #输出项
	print(item)

Summary 3

索引部分跟常规的区别不大,更新了专属于矩阵的一些知识
A.flatten() #将矩阵改变成一行
A.flat: #输出按项输出

merge

import numpy as np
A = np.array([1,1,1])
B = np.array([2,2,2])
print(np.vstack((A,B))) #上下合并
#np.hstack((A,B))#左右合并
#[[1 1 1]
# [2 2 2]]

Convert horizontal sequence to vertical

A[:,np.newaxis] #增加一个维度

Merge multiple arrays

C = np.concatenate((A,B,A,B),axis=0)
#在上下维度合并
axis = 1 左右维度

array split

Divisible
import numpy as np
A = np.arange(12).reshape((3,4))
print(A)
print(np.split(A,2,axis=1))
#将A按照列分成两个array
#分割的块数必须是该维度的
How to divide non-divisible cases
A = np.arrage(12).reshape((3,4))
np.array_split(A,3,axis=1)
#分成的新array 分别是2列1列1列
np.vsplit(A,3)#纵向分割
np.hsplit(A,2)#横向分割

copy 和deep copy

a = np.array(4)
b = a
#直接等于,修改a的值,b的值也会发生改变
b = a.copy() #deep copy
#把a里的值赋值给b

Pandas

Compare

The difference with numpy
numpy is a planned list
pandas is a dictionary like pandas

Create with list

import pandas as pd
import numpy as np
#使用list创建
s = pd.Series([1,3,6,np.nan,44,1])#index从0开始自动索引
print(s)
#0 1.0
#1 3.0
#2 6.0
#3 NaN
#4 44.0
#5 1.0
#dtype:float64

value attribute values

#值属性,可以方便查看Series的值
print(s.values)
#[ 1.  3.  6. nan 44.  1.]

index index property

#返回的是索引从开始到结束和间隔的值
print(s.index)
#RangeIndex(start=0, stop=6, step=1)

Create using numpy arrays

s1 = pd.Series(np.arange(5))
print(s1)
'''
0    0
1    1
2    2
3    3
4    4
dtype: int32
'''

Create with a dictionary

s2 = pd.Series({
    
       
    '1':1, '2':2, '3':3})
print(s2)
'''1 1
2 2
3 3
dtype: int64'''
print(s2.values)
'''[1 2 3]'''
print(s2.index)
'''Index(['1', '2', '3'], dtype='object')'''

Manually assign the index

s3 = pd.Series ([1,2,3,4],index=['A','B','C','D'])
print(s3)
'''
A    1
B    2
C    3
D    4
dtype: int64
'''
print(s3.values)
'''[1 2 3 4]'''
print(s2.index)
'''Index(['A','B','C','D'], dtype='object')'''

value by index

print(s3['A'])
'''1'''

Take a value according to a range of values

print(s3[s3>1])
'''
B    2
C    3
D    4
dtype: int64
'''

Convert Series to dictionary output

s3.to_dict()
'''{'A': 1, 'B': 2, 'C': 3, 'D': 4}'''

Write out the index separately, assign it to the Series, and add one more index at the same time

index_1 = ['A','B','C','D','E']
s4 = pd.Series(s3,index=index_1)
#新添加的索引的值为NaN
'''
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
dtype: float64
'''

Check if an element of a Series has a null value

According to pd.isnull()

print(pd.isnull(s4))
'''如果有返回Ture,反之False
A    False
B    False
C    False
D    False
E     True
dtype: bool
'''

According to pd.notnull()

print(pd.notnull(s4))
''' 如果没有返回Ture,反之False
A     True
B     True
C     True
D     True
E    False
dtype: bool
'''

Give the Series a name

s4.name = 'demo'
'''
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
Name: demo, dtype: float64
'''

give the index a name

s4.index.name = 'demo_index'
'''
demo_index
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
Name: demo, dtype: float64
'''

According to pd.notnull()

print(pd.notnull(s4))
''' 如果没有返回Ture,反之False
A     True
B     True
C     True
D     True
E    False
dtype: bool
'''

Give the Series a name

s4.name = 'demo'
'''
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
Name: demo, dtype: float64
'''

give the index a name

s4.index.name = 'demo_index'
'''
demo_index
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
Name: demo, dtype: float64
'''