Numpy & Pandas

Posted on 2022-04-10 by Admin

Numpy & Pandas Statistics

Numpy

Attributes

import numpy as np
array = np.array([[1,2,3],
  				 [2,3,4]])
.ndim #维度		2
.shap #形状		(2,3)
.size #大小		6
'''输出
	[[1 2 3]
 	 [2 3 4]]
 	2
    (2,3)
    6'''

type

import numpy as np

a = np.array([2, 3, 4], dtype = np.int)
print(a.dtype)

#结果 int32
#.dtype array数组存储的元素的类型

generator matrix

import numpy as np
a = np.zeros((3,4))#三行四列的0矩阵
print(a)
#a = np.ones((3,4),dtype=np.int16)
#三行四列的1矩阵
#a = np.arange(10, 20, 2) 等同于range()
#a = np.arange(12).reshape((3, 4))
#生产一个三行四列的矩阵

generate line segments

import numpy as np
a = np.linspace(1,10,5)
#.linspace(a,b,c)一个从a到b的线段分成c段
#a = np.linspace(1,10,6).reshape((2,3))

Summary 1

.ndim #维度		
.shap #形状
.size #大小
.dtype #数据类型
.zeros #零矩阵
.ones #1矩阵
.reshape #重置矩阵行和列
.linspace#定义线段

basic operations

Similar to basic operations in python

import numpy as np
a = np.array([10,20,30,40])
b = np.arange(4) # b= 0 1 2 3
c = a-b
print(c)
#[10 19 28 37]

The power of addition, subtraction, multiplication and division will be based on the position, and each position will be calculated.

Trigonometric functions

import numpy as np
a = np.array([10,20,30,40])
b = np.arange(4) # b= 0 1 2 3
c = 10*np.sin(a)
print(c)
# .sin .cos .tan

Judgment operation

import numpy as np
a = np.array([10,20,30,40])
b = np.arange(4) # b= 0 1 2 3
print(b)
print(b<3)

Matrix Operations

import numpy as np
a = np.array([[1,1],
              [0,1]])
b = np.arange(4).reshape((2,2))
c = a*b	#逐个相乘
c_dot = np.dot(a,b)#矩阵乘法
#c_dot_2 = a.dot(b)

import numpy as np
a = np.random.random((2,4))
#生成一个2行4列的0~1的矩阵
np.sum() #求和
np.min() #最小值
np.max() #最大值
axis=1 #维度1
axis=0 #维度0

import numpy as np

A = np.arange(2, 14).reshape((3,4))
print(np.argmin(A))#求最小值的索引
print(np.argmax(A))#求最小值的索引
print(np.mean(A))#求平均值
#A.mean() np.average(A)
#0 11
np.median(A) #A的中位数
np.cumsum(A)#累加A,前缀和
print(np.cumsum(A))#下方图片是该输出
print(np.diff(A))#累差A 三行四列变成三行三列
print(np.nonzero(A))#输出值的行和列
np.sort(A)#将A逐行排序
np.transpose(A)#矩阵的逆置 改变行和列
#A.T 效果一样
np.clip(a,a_min,a_max,out=None)
#所有大于9的数字全变成9所有小于5的数变成5，然后中间的不变

Summary 2

单纯的加减乘除乘方跟正常的运算一样
矩阵乘法需要用.dot()函数
三角函数 .sin .cos .tan
判断跟正常判断一样，输出的话会视情况输出
.min() .max() .sum() .axis#0行 1列
.argmin() .argmax() .mean() .average
#最小值索引 最大值索引 平均值
.median() .cumsum() .diff() .nonzero()
#中位数	前缀和	差值	输出A里的值的行和列
.sort() #对A进行排序 按行或者列
.transport() #改变矩阵的行和列  A.T效果一致
.clip(a,b,c) #矩阵A中的数值小于b的数变成b大于c的值变成c中间的不变

index

import numpy as np

A = np.arrange(3,15)
print(A)
print(A[3])
#[3 4 5 6 7 8 9 10 11 12 13 14]
#6
A = np.arange(3,15).reshap((3,4))
print(A[2])
#[11 12 13 14]
print(A[1][1])
#8
print(A[2,1])#与A[2][1]相等
print(A[:,1])#打印第二列的所有的数字索引为0的列为第一列
print(A[1,1:2])
#[8]
for row in A:
print(row)#输出每一行
#numpy不自带输出列
for column in A.T:
print(col)#输出每一列
A.flatten() #将矩阵改变成一行
for item in A.flat: #输出项
	print(item)

Summary 3

索引部分跟常规的区别不大，更新了专属于矩阵的一些知识
A.flatten() #将矩阵改变成一行
A.flat: #输出按项输出

merge

import numpy as np
A = np.array([1,1,1])
B = np.array([2,2,2])
print(np.vstack((A,B))) #上下合并
#np.hstack((A,B))#左右合并
#[[1 1 1]
# [2 2 2]]

Convert horizontal sequence to vertical

A[:,np.newaxis] #增加一个维度

Merge multiple arrays

C = np.concatenate((A,B,A,B),axis=0)
#在上下维度合并
axis = 1 左右维度

array split

Divisible

import numpy as np
A = np.arange(12).reshape((3,4))
print(A)
print(np.split(A,2,axis=1))
#将A按照列分成两个array
#分割的块数必须是该维度的

How to divide non-divisible cases

A = np.arrage(12).reshape((3,4))
np.array_split(A,3,axis=1)
#分成的新array 分别是2列1列1列
np.vsplit(A,3)#纵向分割
np.hsplit(A,2)#横向分割

copy 和deep copy

a = np.array(4)
b = a
#直接等于，修改a的值，b的值也会发生改变
b = a.copy() #deep copy
#把a里的值赋值给b

Pandas

Compare

The difference with numpy
numpy is a planned list
pandas is a dictionary like pandas

Create with list

import pandas as pd
import numpy as np
#使用list创建
s = pd.Series([1,3,6,np.nan,44,1])#index从0开始自动索引
print(s)
#0 1.0
#1 3.0
#2 6.0
#3 NaN
#4 44.0
#5 1.0
#dtype:float64

value attribute values

#值属性，可以方便查看Series的值
print(s.values)
#[ 1.  3.  6. nan 44.  1.]

index index property

#返回的是索引从开始到结束和间隔的值
print(s.index)
#RangeIndex(start=0, stop=6, step=1)

Create using numpy arrays

s1 = pd.Series(np.arange(5))
print(s1)
'''
0    0
1    1
2    2
3    3
4    4
dtype: int32
'''

Create with a dictionary

s2 = pd.Series({
    
       
    '1':1, '2':2, '3':3})
print(s2)
'''1 1
2 2
3 3
dtype: int64'''
print(s2.values)
'''[1 2 3]'''
print(s2.index)
'''Index(['1', '2', '3'], dtype='object')'''

Manually assign the index

s3 = pd.Series ([1,2,3,4],index=['A','B','C','D'])
print(s3)
'''
A    1
B    2
C    3
D    4
dtype: int64
'''
print(s3.values)
'''[1 2 3 4]'''
print(s2.index)
'''Index(['A','B','C','D'], dtype='object')'''

value by index

print(s3['A'])
'''1'''

Take a value according to a range of values

print(s3[s3>1])
'''
B    2
C    3
D    4
dtype: int64
'''

Convert Series to dictionary output

s3.to_dict()
'''{'A': 1, 'B': 2, 'C': 3, 'D': 4}'''

Write out the index separately, assign it to the Series, and add one more index at the same time

index_1 = ['A','B','C','D','E']
s4 = pd.Series(s3,index=index_1)
#新添加的索引的值为NaN
'''
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
dtype: float64
'''

Check if an element of a Series has a null value

According to pd.isnull()

print(pd.isnull(s4))
'''如果有返回Ture，反之False
A    False
B    False
C    False
D    False
E     True
dtype: bool
'''

According to pd.notnull()

print(pd.notnull(s4))
''' 如果没有返回Ture，反之False
A     True
B     True
C     True
D     True
E    False
dtype: bool
'''

Give the Series a name

s4.name = 'demo'
'''
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
Name: demo, dtype: float64
'''

give the index a name

s4.index.name = 'demo_index'
'''
demo_index
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
Name: demo, dtype: float64
'''

According to pd.notnull()

print(pd.notnull(s4))
''' 如果没有返回Ture，反之False
A     True
B     True
C     True
D     True
E    False
dtype: bool
'''

Give the Series a name

s4.name = 'demo'
'''
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
Name: demo, dtype: float64
'''

give the index a name

s4.index.name = 'demo_index'
'''
demo_index
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
Name: demo, dtype: float64
'''

Numpy & Pandas Statistics

Numpy

Attributes

type

generator matrix

generate line segments

Summary 1

basic operations

Trigonometric functions

Judgment operation

Matrix Operations

Summary 2

index

Summary 3

merge

Convert horizontal sequence to vertical

Merge multiple arrays

array split

Divisible

How to divide non-divisible cases

copy 和deep copy

Pandas

Compare

Create with list

value attribute values

index index property

Create using numpy arrays

Create with a dictionary

Manually assign the index

value by index

Take a value according to a range of values

Convert Series to dictionary output

Write out the index separately, assign it to the Series, and add one more index at the same time

Check if an element of a Series has a null value

According to pd.isnull()

According to pd.notnull()

Give the Series a name

give the index a name

Give the Series a name

give the index a name

Related Posts