NumPy - 语法基础 Pt.2

这篇随笔主要介绍 NumPy 的数据类型和函数使用,包括 NumPy 数组的创建、索引和运算,常用的一元函数和二元函数,统计计算,线性代数计算,数组比较,随机数组的生成以及 NumPy 数组的读取与写出

1
import numpy as np

数组扩展

np.meshgrid(arr1, arr2) : 基于两个一维数组arr1和arr2,返回两个二维数组

1
2
3
points = np.arange(-5, 5, 0.1)
x, y = np.meshgrid(points, points)
x, y, x.T == y
(array([[-5. , -4.9, -4.8, ...,  4.7,  4.8,  4.9],
        [-5. , -4.9, -4.8, ...,  4.7,  4.8,  4.9],
        [-5. , -4.9, -4.8, ...,  4.7,  4.8,  4.9],
        ...,
        [-5. , -4.9, -4.8, ...,  4.7,  4.8,  4.9],
        [-5. , -4.9, -4.8, ...,  4.7,  4.8,  4.9],
        [-5. , -4.9, -4.8, ...,  4.7,  4.8,  4.9]]),
 array([[-5. , -5. , -5. , ..., -5. , -5. , -5. ],
        [-4.9, -4.9, -4.9, ..., -4.9, -4.9, -4.9],
        [-4.8, -4.8, -4.8, ..., -4.8, -4.8, -4.8],
        ...,
        [ 4.7,  4.7,  4.7, ...,  4.7,  4.7,  4.7],
        [ 4.8,  4.8,  4.8, ...,  4.8,  4.8,  4.8],
        [ 4.9,  4.9,  4.9, ...,  4.9,  4.9,  4.9]]),
 array([[ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True]]))
1
2
3
4
5
6
z = np.sqrt(x**2 + y**2)
import matplotlib.pyplot as plt
plt.imshow(z, cmap=plt.cm.gray)
plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")
plt.show()

png

数据替换

np.where(condition, x, y) : np.where(...).shape = condition.shape

1
2
3
4
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])
np.where(cond, xarr, yarr)
array([1.1, 2.2, 1.3, 1.4, 2.5])
1
2
arr = np.random.randn(4, 4)
arr, arr > 0, np.where(arr > 0, 2, -2) # 将arr>0的元素替换为2, 否则替换为-2
(array([[-1.38574074,  0.36308053,  1.70333481, -1.28078003],
        [ 1.11809371,  0.6485143 ,  0.86350574, -0.0687285 ],
        [-0.12629966,  0.89419753, -0.56168284,  0.73884436],
        [-0.26990796,  0.27744503,  0.6568969 ,  0.02221542]]),
 array([[False,  True,  True, False],
        [ True,  True,  True, False],
        [False,  True, False,  True],
        [False,  True,  True,  True]]),
 array([[-2,  2,  2, -2],
        [ 2,  2,  2, -2],
        [-2,  2, -2,  2],
        [-2,  2,  2,  2]]))
1
arr, np.where(arr > 0, 2, arr) # 将arr>0的元素替换为2,arr<0的不变
(array([[-1.38574074,  0.36308053,  1.70333481, -1.28078003],
        [ 1.11809371,  0.6485143 ,  0.86350574, -0.0687285 ],
        [-0.12629966,  0.89419753, -0.56168284,  0.73884436],
        [-0.26990796,  0.27744503,  0.6568969 ,  0.02221542]]),
 array([[-1.38574074,  2.        ,  2.        , -1.28078003],
        [ 2.        ,  2.        ,  2.        , -0.0687285 ],
        [-0.12629966,  2.        , -0.56168284,  2.        ],
        [-0.26990796,  2.        ,  2.        ,  2.        ]]))

统计计算

可以通过数组上的一组数学函数对整个数组或某个轴向的数据进行统计计算

求平均、求和

np.mean(arr, axis), arr.mean(axis)
np.sum(arr, axis), arr.sum(axis)

1
2
arr = np.arange(24).reshape((2, 3, 4))
arr, arr.mean(), np.mean(arr), arr.sum(), np.sum(arr)
(array([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],
 
        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]]),
 11.5,
 11.5,
 276,
 276)
1
2
3
4
arr, np.mean(arr, axis=0), np.sum(arr, axis=0) # 对第1维进行取平均/求和处理
# 0+12 1+13 2+14 3+15
# 4+16 5+17 6+18 7+19
# 8+20 9+21 10+22 11+23
(array([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],
 
        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]]),
 array([[ 6.,  7.,  8.,  9.],
        [10., 11., 12., 13.],
        [14., 15., 16., 17.]]),
 array([[12, 14, 16, 18],
        [20, 22, 24, 26],
        [28, 30, 32, 34]]))
1
2
3
arr, np.mean(arr, axis=1), np.sum(arr, axis=1) # 对第2维进行取平均/求和处理
# 0+4+8 1+5+9 2+6+10 3+7+11
# 12+16+20 13+17+21 14+18+22 15+19+23
(array([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],
 
        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]]),
 array([[ 4.,  5.,  6.,  7.],
        [16., 17., 18., 19.]]),
 array([[12, 15, 18, 21],
        [48, 51, 54, 57]]))
1
2
arr = np.random.randn(100)
(arr > 0).sum() # Number of positive values
39

累加、累乘

np.cumsum(arr, axis), arr.cumsum(axis) : 累加
np.cumprod(arr, axis), arr.cumprod(axis) : 累乘

1
2
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7])
arr, arr.cumsum(), arr.cumprod()
(array([0, 1, 2, 3, 4, 5, 6, 7]),
 array([ 0,  1,  3,  6, 10, 15, 21, 28], dtype=int32),
 array([0, 0, 0, 0, 0, 0, 0, 0], dtype=int32))
1
2
arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
arr, np.cumsum(arr, axis=0), np.cumprod(arr, axis=1)
(array([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]]),
 array([[ 0,  1,  2],
        [ 3,  5,  7],
        [ 9, 12, 15]], dtype=int32),
 array([[  0,   0,   0],
        [  3,  12,  60],
        [  6,  42, 336]], dtype=int32))

标准差、方差

np.std(arr, axis), arr.std(axis) : 标准差
np.var(arr, axis), arr.var(axis) : 方差

1
2
arr = np.random.randn(10000)
arr.std(), arr.var()
(1.0108654484477226, 1.0218489548654153)
1
2
arr = np.random.randint(10, 100, (3, 4))
arr, np.std(arr, axis=0), np.var(arr, axis=1)
(array([[66, 80, 28, 18],
        [45, 55, 57, 40],
        [66, 30, 29, 76]]),
 array([ 9.89949494, 20.41241452, 13.4412301 , 23.9072281 ]),
 array([662.    ,  49.1875, 443.1875]))

最值及其索引

np.max(arr, axis), arr.max(axis) : 最大值
np.argmax(arr, axis), arr.argmax(axis) : 最大值索引(从0开始)
np.min(arr, axis), arr.min(axis) : 最小值
np.argmin(arr, axis), arr.argmin(axis) : 最小值索引(从0开始)

1
2
arr = np.random.randint(10, 100, (3, 4))
arr, np.max(arr, axis=0), np.argmax(arr, axis=0)
(array([[96, 69, 22, 95],
        [43, 38, 95, 73],
        [80, 13, 23, 90]]),
 array([96, 69, 95, 95]),
 array([0, 0, 1, 0], dtype=int64))

用于布尔类型的方法

np.any(arr, axis), arr.any(axis) : 检查数组中是否存在一个或多个True
np.all(arr, axis), arr.all(axis) : 检查数组中所有值是否都是True

1
2
bools = np.array([False, False, True, False])
bools.any(), bools.all()
(True, False)
1
2
arr = (np.random.randint(10, 100, (3, 4))>50)
arr, np.any(arr, axis=0), np.all(arr, axis=0)
(array([[ True,  True,  True,  True],
        [ True,  True,  True,  True],
        [False,  True,  True,  True]]),
 array([ True,  True,  True,  True]),
 array([False,  True,  True,  True]))

排序

np.sort(arr, axis), arr.sort(axis) : 排序

np.sort返回排序副本,arr.sort()修改数组本身

1
2
3
4
arr = np.random.randn(6)
print(arr)
arr.sort()
print(arr)
[-0.6477684   0.73474585 -0.88816762  1.67601792 -1.00884885 -1.31953076]
[-1.31953076 -1.00884885 -0.88816762 -0.6477684   0.73474585  1.67601792]
1
2
3
4
arr = np.random.randint(0, 10, (3, 4))
print(arr)
print(arr.sort(axis=1))
print(arr)
[[0 3 2 7]
 [1 5 0 3]
 [6 3 0 2]]
None
[[0 2 3 7]
 [0 1 3 5]
 [0 2 3 6]]
1
2
3
4
arr = np.random.randint(0, 10, (3, 4))
print(arr)
print(np.sort(arr, axis=1))
print(arr)
[[8 4 4 3]
 [8 4 9 2]
 [3 9 4 4]]
[[3 4 4 8]
 [2 4 8 9]
 [3 4 4 9]]
[[8 4 4 3]
 [8 4 9 2]
 [3 9 4 4]]

集合逻辑

np.unique(arr) : 找出arr中的唯一值,返回有序结果

1
2
3
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(names), sorted(set(names)), np.unique(ints), sorted(set(ints))
(array(['Bob', 'Joe', 'Will'], dtype='<U4'),
 ['Bob', 'Joe', 'Will'],
 array([1, 2, 3, 4]),
 [1, 2, 3, 4])

np.in1d(arr1, arr2) : 测试arr1的值是否在arr2中,返回布尔型数组

arr1.shape = np.in1d(arr1, arr2).shape

1
2
3
arr1 = np.array([6, 0, 0, 3, 2, 5, 6])
arr2 = np.array([2, 3, 6, 7])
np.in1d(arr1, arr2)
array([ True, False, False,  True,  True, False,  True])

np.intersect1d(arr1, arr2) : 计算arr1和arr2中的交集,返回有序结果

1
2
3
arr1 = np.array([6, 0, 0, 3, 2, 5, 6])
arr2 = np.array([2, 3, 6, 7])
np.intersect1d(arr1, arr2)
array([2, 3, 6])

np.union1d(arr1, arr2) : 计算arr1和arr2中的并集,返回有序结果

1
2
3
arr1 = np.array([6, 0, 0, 3, 2, 5, 6])
arr2 = np.array([2, 3, 6, 7])
np.union1d(arr1, arr2)
array([0, 2, 3, 5, 6, 7])

np.setdiff1d(arr1, arr2) : 计算arr1和arr2的差,即元素在arr1中但不在arr2中

1
2
3
arr1 = np.array([6, 0, 0, 3, 2, 5, 6])
arr2 = np.array([2, 3, 6, 7])
np.setdiff1d(arr1, arr2)
array([0, 5])

np.setxor1d(arr1, arr2) : 计算arr1和arr2的对称差,即元素当且仅能在其中一个数组

1
2
3
arr1 = np.array([6, 0, 0, 3, 2, 5, 6])
arr2 = np.array([2, 3, 6, 7])
np.setxor1d(arr1, arr2)
array([0, 5, 7])

文件

np.save(file, arr), np.load(file) : 写文件,读文件

file : *.npy

1
2
arr = np.arange(10)
np.save('some_array', arr)
1
np.load('some_array.npy')
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

np.savez( file, var1 = arr1, var2 = arr2, ... ) : 将多个数组保存到一个未压缩的文件中

file : *.npz

np.savez_compressed( file, var1 = arr1, var2 = arr2, ... ) : 数据压缩

file : *.npz

1
np.savez('array_archive.npz', a=arr, b=arr)
1
2
arch = np.load('array_archive.npz')
arch['b']
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
1
np.savez_compressed('arrays_compressed.npz', a=arr, b=arr)
1
2
arch = np.load('arrays_compressed.npz')
arch['b']
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

线性代数

np.diag( arr, k ) : 返回方阵的对角线或者将一维数组转换为对角方阵

1
2
3
arr1 = np.random.randint(0,10,(3,3))
arr2 = np.arange(3)
arr1, np.diag(arr1), np.diag(arr1, k=1), arr2, np.diag(arr2)
(array([[3, 7, 0],
        [7, 9, 4],
        [1, 8, 2]]),
 array([3, 9, 2]),
 array([7, 4]),
 array([0, 1, 2]),
 array([[0, 0, 0],
        [0, 1, 0],
        [0, 0, 2]]))

np.dot( arr1, arr2 ) : 矩阵乘法

1
2
3
arr1 = np.random.randint(0, 10, (3, 3))
arr2 = np.linalg.inv(arr1)
np.dot(arr1, arr2)
array([[ 1.00000000e+00,  8.88178420e-16, -8.88178420e-16],
       [ 1.11022302e-16,  1.00000000e+00, -4.44089210e-16],
       [ 0.00000000e+00,  4.44089210e-16,  1.00000000e+00]])

np.trace( arr ) : 矩阵的迹

1
2
arr = np.random.randint(0,10,(3,3))
arr, np.trace(arr)
(array([[0, 5, 6],
        [4, 5, 9],
        [5, 6, 2]]),
 7)

np.linalg.det( arr ) : 矩阵的行列式

1
arr, np.linalg.det(arr)
(array([[0, 5, 6],
        [4, 5, 9],
        [5, 6, 2]]),
 179.0)

np.linalg.eig( arr ) : 矩阵的特征值和特征向量

1
arr, np.linalg.eig(arr)
(array([[0, 5, 6],
        [4, 5, 9],
        [5, 6, 2]]),
 (array([14.394287  , -2.58656667, -4.80772032]),
  array([[-0.46692605, -0.5381644 , -0.38398256],
         [-0.7072986 ,  0.74694386, -0.53437623],
         [-0.53076243, -0.39045353,  0.75299365]])))
1
2
lbd, e = np.linalg.eig(arr)
np.dot(arr, e[:, 0]) , lbd[0]*e[:, 0]
(array([ -6.72106756, -10.18105905,  -7.6399467 ]),
 array([ -6.72106756, -10.18105905,  -7.6399467 ]))

np.linalg.inv( arr ) : 方阵的逆

np.linalg.pinv( arr ) : 矩阵的More-Penrose伪逆

1
arr, np.linalg.inv(arr), np.linalg.pinv(arr)
(array([[0, 5, 6],
        [4, 5, 9],
        [5, 6, 2]]),
 array([[-0.24581006,  0.1452514 ,  0.08379888],
        [ 0.20670391, -0.16759777,  0.13407821],
        [-0.00558659,  0.1396648 , -0.11173184]]),
 array([[-0.24581006,  0.1452514 ,  0.08379888],
        [ 0.20670391, -0.16759777,  0.13407821],
        [-0.00558659,  0.1396648 , -0.11173184]]))
1
2
arr2 = np.arange(1,13).reshape((3,4))
arr2, np.linalg.pinv(arr2)
(array([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]]),
 array([[-0.375     , -0.1       ,  0.175     ],
        [-0.14583333, -0.03333333,  0.07916667],
        [ 0.08333333,  0.03333333, -0.01666667],
        [ 0.3125    ,  0.1       , -0.1125    ]]))

np.linalg.qr( arr ) : 计算QR分解(矩阵等价变换)

arr = QR
Q:可逆矩阵,R:上三角矩阵

1
2
Q, R = np.linalg.qr(arr)
arr, Q, R
(array([[0, 5, 6],
        [4, 5, 9],
        [5, 6, 2]]),
 array([[ 0.        ,  0.99951255,  0.03121953],
        [-0.62469505,  0.02437835, -0.78048818],
        [-0.78086881, -0.01950268,  0.62439054]]),
 array([[-6.40312424, -7.80868809, -7.18399305],
        [ 0.        ,  5.00243843,  6.17747514],
        [ 0.        ,  0.        , -5.58829534]]))
1
arr, np.dot(Q, R)
(array([[0, 5, 6],
        [4, 5, 9],
        [5, 6, 2]]),
 array([[0., 5., 6.],
        [4., 5., 9.],
        [5., 6., 2.]]))
1
np.linalg.det(arr), np.linalg.det(R)
(179.0, 179.0)

np.linalg.svd( arr ) : 奇异值(SVD)分解

1
2
U, S, V = np.linalg.svd(arr)
arr, U, S, V
(array([[0, 5, 6],
        [4, 5, 9],
        [5, 6, 2]]),
 array([[-0.49438846,  0.41745486,  0.76243786],
        [-0.73566235,  0.26627093, -0.62281675],
        [-0.46301292, -0.86881024,  0.17546455]]),
 array([14.76104704,  4.89682835,  2.47640109]),
 array([[-0.35618842, -0.6048576 , -0.71223387],
        [-0.66961046, -0.36640706,  0.64604002],
        [-0.65172973,  0.70703122, -0.27450903]]))

arr = U diag(S) V
U : 正交矩阵; V : 正交矩阵; S : 降序排序的非负数
S : 奇异值; U : 左奇异向量; V : 右奇异向量

1
U.dot(np.diag(S)).dot(V)
array([[-1.54427352e-15,  5.00000000e+00,  6.00000000e+00],
       [ 4.00000000e+00,  5.00000000e+00,  9.00000000e+00],
       [ 5.00000000e+00,  6.00000000e+00,  2.00000000e+00]])
1
np.dot(U,U.T), np.dot(V,V.T)
(array([[1.00000000e+00, 3.54512330e-16, 1.29342340e-16],
        [3.54512330e-16, 1.00000000e+00, 2.65502607e-16],
        [1.29342340e-16, 2.65502607e-16, 1.00000000e+00]]),
 array([[ 1.00000000e+00,  1.05593218e-16, -2.36545414e-16],
        [ 1.05593218e-16,  1.00000000e+00,  1.07467290e-16],
        [-2.36545414e-16,  1.07467290e-16,  1.00000000e+00]]))

np.linalg.solve( A, b ) : 解线性方程

1
2
3
A = arr
b = np.random.randint(0,10,(3))
A, b, np.linalg.solve(A, b), A.dot(np.linalg.solve(A, b))
(array([[0, 5, 6],
        [4, 5, 9],
        [5, 6, 2]]),
 array([5, 4, 3]),
 array([-0.39664804,  0.76536313,  0.19553073]),
 array([5., 4., 3.]))

np.linalg.lstsq( A, b, rcond=None ) : 计算 的最小二乘解

1
2
3
A = np.random.randint(0,10,(4, 3))
b = np.random.randint(0, 10, (4, 2))
A, b, np.linalg.lstsq(A, b, rcond=None)
(array([[1, 5, 3],
        [1, 2, 8],
        [9, 4, 9],
        [7, 4, 2]]),
 array([[9, 3],
        [8, 6],
        [0, 3],
        [2, 4]]),
 (array([[-1.18525308, -0.15774406],
         [ 1.90640595,  0.57450974],
         [ 0.49149608,  0.41539645]]),
  array([ 7.50870761, 10.28111032]),
  3,
  array([17.23239582,  6.21628481,  3.92458118])))

返回值: x : 近似解; cost : 损失; n : 维度 ; S : A的奇异值

1
2
x, cost, n, S = np.linalg.lstsq(A, b, rcond=None)
b, A.dot(x)
(array([[9, 3],
        [8, 6],
        [0, 3],
        [2, 4]]),
 array([[9.82126489, 3.96099399],
        [6.55952745, 4.31444705],
        [1.38181078, 4.61691052],
        [0.31184438, 2.02462346]]))
1
cost, np.sum((A.dot(x) - b)**2, 0)
(array([ 7.50870761, 10.28111032]), array([ 7.50870761, 10.28111032]))
1
S, np.linalg.svd(A)[1]
(array([17.23239582,  6.21628481,  3.92458118]),
 array([17.23239582,  6.21628481,  3.92458118]))

随机数生成

np.random.permutation( x ) : 产生给定序列的随机排列或者一个随机排列的序列

1
np.random.permutation(range(-5,0))
array([-2, -4, -5, -1, -3])
1
np.random.permutation(5)
array([4, 2, 3, 1, 0])

np.random.rand( d0, d1, ... ) : 产生均匀分布的随机数

1
np.random.rand(3,3)
array([[0.93188729, 0.19373543, 0.03242586],
       [0.0891599 , 0.11993145, 0.41048466],
       [0.31863807, 0.77394962, 0.4442849 ]])

np.random.uniform( low, high, size ) : 产生给定范围均匀分布的随机数

1
np.random.uniform(0, 10, (3, 3))
array([[2.60630253, 8.86000804, 3.7889038 ],
       [1.25386602, 4.55746877, 6.21932379],
       [0.48936224, 9.65439911, 9.45159411]])

np.random.randint( low, high, size ) : 产生给定范围的整数随机数

1
np.random.randint(0, 10, (3, 3))
array([[7, 5, 9],
       [1, 4, 6],
       [5, 6, 2]])

np.random.randn( size ) : 产生服从标准正态分布的随机数

1
np.random.randn(3,3)
array([[ 0.5632786 , -2.02610778, -1.7987696 ],
       [-0.27670206, -2.16489614, -0.70812467],
       [ 1.8628367 ,  0.27024486, -0.80207598]])

np.random.normal( loc, scale, size ) : 产生服从 正态分布的随机数

1
np.random.normal(0, 1, (3,3))
array([[ 1.49669768,  0.1519672 ,  0.61085676],
       [ 0.21297848, -0.32231956,  0.7621172 ],
       [-0.11691781, -1.36556747,  0.42578613]])

np.random.binomial( n, p, size ) : 产生服从 二项分布的随机数

1
np.random.binomial(100, 0.5, (3,3))
array([[48, 52, 54],
       [55, 50, 51],
       [47, 44, 40]])

np.random.beta( a, b, size ) : 产生服从 Beta 分布的随机数

np.random.chisquare( k, size ) : 产生服从 卡方分布的随机数

np.random.gamma( shape, scale, size ) : 产生服从 Gamma 分布的随机数