这篇随笔主要介绍 NumPy 的数据类型和函数使用,包括 NumPy 数组的创建、索引和运算,常用的一元函数和二元函数,统计计算,线性代数计算,数组比较,随机数组的生成以及 NumPy 数组的读取与写出
数组扩展 np.meshgrid(arr1, arr2) : 基于两个一维数组arr1和arr2,返回两个二维数组1 2 3 points = np.arange(-5 , 5 , 0.1 ) x, y = np.meshgrid(points, points) x, y, x.T == y
(array([[-5. , -4.9, -4.8, ..., 4.7, 4.8, 4.9],
[-5. , -4.9, -4.8, ..., 4.7, 4.8, 4.9],
[-5. , -4.9, -4.8, ..., 4.7, 4.8, 4.9],
...,
[-5. , -4.9, -4.8, ..., 4.7, 4.8, 4.9],
[-5. , -4.9, -4.8, ..., 4.7, 4.8, 4.9],
[-5. , -4.9, -4.8, ..., 4.7, 4.8, 4.9]]),
array([[-5. , -5. , -5. , ..., -5. , -5. , -5. ],
[-4.9, -4.9, -4.9, ..., -4.9, -4.9, -4.9],
[-4.8, -4.8, -4.8, ..., -4.8, -4.8, -4.8],
...,
[ 4.7, 4.7, 4.7, ..., 4.7, 4.7, 4.7],
[ 4.8, 4.8, 4.8, ..., 4.8, 4.8, 4.8],
[ 4.9, 4.9, 4.9, ..., 4.9, 4.9, 4.9]]),
array([[ True, True, True, ..., True, True, True],
[ True, True, True, ..., True, True, True],
[ True, True, True, ..., True, True, True],
...,
[ True, True, True, ..., True, True, True],
[ True, True, True, ..., True, True, True],
[ True, True, True, ..., True, True, True]]))
1 2 3 4 5 6 z = np.sqrt(x**2 + y**2 ) import matplotlib.pyplot as pltplt.imshow(z, cmap=plt.cm.gray) plt.colorbar() plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values" ) plt.show()
数据替换 np.where(condition, x, y) : np.where(...).shape = condition.shape
1 2 3 4 xarr = np.array([1.1 , 1.2 , 1.3 , 1.4 , 1.5 ]) yarr = np.array([2.1 , 2.2 , 2.3 , 2.4 , 2.5 ]) cond = np.array([True , False , True , True , False ]) np.where(cond, xarr, yarr)
array([1.1, 2.2, 1.3, 1.4, 2.5])
1 2 arr = np.random.randn(4 , 4 ) arr, arr > 0 , np.where(arr > 0 , 2 , -2 )
(array([[-1.38574074, 0.36308053, 1.70333481, -1.28078003],
[ 1.11809371, 0.6485143 , 0.86350574, -0.0687285 ],
[-0.12629966, 0.89419753, -0.56168284, 0.73884436],
[-0.26990796, 0.27744503, 0.6568969 , 0.02221542]]),
array([[False, True, True, False],
[ True, True, True, False],
[False, True, False, True],
[False, True, True, True]]),
array([[-2, 2, 2, -2],
[ 2, 2, 2, -2],
[-2, 2, -2, 2],
[-2, 2, 2, 2]]))
1 arr, np.where(arr > 0 , 2 , arr)
(array([[-1.38574074, 0.36308053, 1.70333481, -1.28078003],
[ 1.11809371, 0.6485143 , 0.86350574, -0.0687285 ],
[-0.12629966, 0.89419753, -0.56168284, 0.73884436],
[-0.26990796, 0.27744503, 0.6568969 , 0.02221542]]),
array([[-1.38574074, 2. , 2. , -1.28078003],
[ 2. , 2. , 2. , -0.0687285 ],
[-0.12629966, 2. , -0.56168284, 2. ],
[-0.26990796, 2. , 2. , 2. ]]))
统计计算 可以通过数组上的一组数学函数对整个数组或某个轴向的数据进行统计计算
求平均、求和 np.mean(arr, axis), arr.mean(axis) np.sum(arr, axis), arr.sum(axis)
1 2 arr = np.arange(24 ).reshape((2 , 3 , 4 )) arr, arr.mean(), np.mean(arr), arr.sum (), np.sum (arr)
(array([[[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]],
[[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23]]]),
11.5,
11.5,
276,
276)
1 2 3 4 arr, np.mean(arr, axis=0 ), np.sum (arr, axis=0 )
(array([[[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]],
[[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23]]]),
array([[ 6., 7., 8., 9.],
[10., 11., 12., 13.],
[14., 15., 16., 17.]]),
array([[12, 14, 16, 18],
[20, 22, 24, 26],
[28, 30, 32, 34]]))
1 2 3 arr, np.mean(arr, axis=1 ), np.sum (arr, axis=1 )
(array([[[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]],
[[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23]]]),
array([[ 4., 5., 6., 7.],
[16., 17., 18., 19.]]),
array([[12, 15, 18, 21],
[48, 51, 54, 57]]))
1 2 arr = np.random.randn(100 ) (arr > 0 ).sum ()
39
累加、累乘 np.cumsum(arr, axis), arr.cumsum(axis) : 累加np.cumprod(arr, axis), arr.cumprod(axis) : 累乘
1 2 arr = np.array([0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ]) arr, arr.cumsum(), arr.cumprod()
(array([0, 1, 2, 3, 4, 5, 6, 7]),
array([ 0, 1, 3, 6, 10, 15, 21, 28], dtype=int32),
array([0, 0, 0, 0, 0, 0, 0, 0], dtype=int32))
1 2 arr = np.array([[0 , 1 , 2 ], [3 , 4 , 5 ], [6 , 7 , 8 ]]) arr, np.cumsum(arr, axis=0 ), np.cumprod(arr, axis=1 )
(array([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]]),
array([[ 0, 1, 2],
[ 3, 5, 7],
[ 9, 12, 15]], dtype=int32),
array([[ 0, 0, 0],
[ 3, 12, 60],
[ 6, 42, 336]], dtype=int32))
标准差、方差 np.std(arr, axis), arr.std(axis) : 标准差np.var(arr, axis), arr.var(axis) : 方差
1 2 arr = np.random.randn(10000 ) arr.std(), arr.var()
(1.0108654484477226, 1.0218489548654153)
1 2 arr = np.random.randint(10 , 100 , (3 , 4 )) arr, np.std(arr, axis=0 ), np.var(arr, axis=1 )
(array([[66, 80, 28, 18],
[45, 55, 57, 40],
[66, 30, 29, 76]]),
array([ 9.89949494, 20.41241452, 13.4412301 , 23.9072281 ]),
array([662. , 49.1875, 443.1875]))
最值及其索引 np.max(arr, axis), arr.max(axis) : 最大值np.argmax(arr, axis), arr.argmax(axis) : 最大值索引(从0开始)np.min(arr, axis), arr.min(axis) : 最小值np.argmin(arr, axis), arr.argmin(axis) : 最小值索引(从0开始)
1 2 arr = np.random.randint(10 , 100 , (3 , 4 )) arr, np.max (arr, axis=0 ), np.argmax(arr, axis=0 )
(array([[96, 69, 22, 95],
[43, 38, 95, 73],
[80, 13, 23, 90]]),
array([96, 69, 95, 95]),
array([0, 0, 1, 0], dtype=int64))
用于布尔类型的方法 np.any(arr, axis), arr.any(axis) : 检查数组中是否存在一个或多个Truenp.all(arr, axis), arr.all(axis) : 检查数组中所有值是否都是True
1 2 bools = np.array([False , False , True , False ]) bools.any (), bools.all ()
(True, False)
1 2 arr = (np.random.randint(10 , 100 , (3 , 4 ))>50 ) arr, np.any (arr, axis=0 ), np.all (arr, axis=0 )
(array([[ True, True, True, True],
[ True, True, True, True],
[False, True, True, True]]),
array([ True, True, True, True]),
array([False, True, True, True]))
排序 np.sort(arr, axis), arr.sort(axis) : 排序
np.sort返回排序副本,arr.sort()修改数组本身
1 2 3 4 arr = np.random.randn(6 ) print (arr)arr.sort() print (arr)
[-0.6477684 0.73474585 -0.88816762 1.67601792 -1.00884885 -1.31953076]
[-1.31953076 -1.00884885 -0.88816762 -0.6477684 0.73474585 1.67601792]
1 2 3 4 arr = np.random.randint(0 , 10 , (3 , 4 )) print (arr)print (arr.sort(axis=1 ))print (arr)
[[0 3 2 7]
[1 5 0 3]
[6 3 0 2]]
None
[[0 2 3 7]
[0 1 3 5]
[0 2 3 6]]
1 2 3 4 arr = np.random.randint(0 , 10 , (3 , 4 )) print (arr)print (np.sort(arr, axis=1 ))print (arr)
[[8 4 4 3]
[8 4 9 2]
[3 9 4 4]]
[[3 4 4 8]
[2 4 8 9]
[3 4 4 9]]
[[8 4 4 3]
[8 4 9 2]
[3 9 4 4]]
集合逻辑 np.unique(arr) : 找出arr中的唯一值,返回有序结果1 2 3 names = np.array(['Bob' , 'Joe' , 'Will' , 'Bob' , 'Will' , 'Joe' , 'Joe' ]) ints = np.array([3 , 3 , 3 , 2 , 2 , 1 , 1 , 4 , 4 ]) np.unique(names), sorted (set (names)), np.unique(ints), sorted (set (ints))
(array(['Bob', 'Joe', 'Will'], dtype='<U4'),
['Bob', 'Joe', 'Will'],
array([1, 2, 3, 4]),
[1, 2, 3, 4])
np.in1d(arr1, arr2) : 测试arr1的值是否在arr2中,返回布尔型数组arr1.shape = np.in1d(arr1, arr2).shape
1 2 3 arr1 = np.array([6 , 0 , 0 , 3 , 2 , 5 , 6 ]) arr2 = np.array([2 , 3 , 6 , 7 ]) np.in1d(arr1, arr2)
array([ True, False, False, True, True, False, True])
np.intersect1d(arr1, arr2) : 计算arr1和arr2中的交集,返回有序结果1 2 3 arr1 = np.array([6 , 0 , 0 , 3 , 2 , 5 , 6 ]) arr2 = np.array([2 , 3 , 6 , 7 ]) np.intersect1d(arr1, arr2)
array([2, 3, 6])
np.union1d(arr1, arr2) : 计算arr1和arr2中的并集,返回有序结果1 2 3 arr1 = np.array([6 , 0 , 0 , 3 , 2 , 5 , 6 ]) arr2 = np.array([2 , 3 , 6 , 7 ]) np.union1d(arr1, arr2)
array([0, 2, 3, 5, 6, 7])
np.setdiff1d(arr1, arr2) : 计算arr1和arr2的差,即元素在arr1中但不在arr2中1 2 3 arr1 = np.array([6 , 0 , 0 , 3 , 2 , 5 , 6 ]) arr2 = np.array([2 , 3 , 6 , 7 ]) np.setdiff1d(arr1, arr2)
array([0, 5])
np.setxor1d(arr1, arr2) : 计算arr1和arr2的对称差,即元素当且仅能在其中一个数组1 2 3 arr1 = np.array([6 , 0 , 0 , 3 , 2 , 5 , 6 ]) arr2 = np.array([2 , 3 , 6 , 7 ]) np.setxor1d(arr1, arr2)
array([0, 5, 7])
文件 np.save(file, arr), np.load(file) : 写文件,读文件file : *.npy
1 2 arr = np.arange(10 ) np.save('some_array' , arr)
1 np.load('some_array.npy' )
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.savez( file, var1 = arr1, var2 = arr2, ... ) : 将多个数组保存到一个未压缩的文件中file : *.npz
np.savez_compressed( file, var1 = arr1, var2 = arr2, ... ) : 数据压缩file : *.npz
1 np.savez('array_archive.npz' , a=arr, b=arr)
1 2 arch = np.load('array_archive.npz' ) arch['b' ]
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
1 np.savez_compressed('arrays_compressed.npz' , a=arr, b=arr)
1 2 arch = np.load('arrays_compressed.npz' ) arch['b' ]
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
线性代数 np.diag( arr, k ) : 返回方阵的对角线或者将一维数组转换为对角方阵1 2 3 arr1 = np.random.randint(0 ,10 ,(3 ,3 )) arr2 = np.arange(3 ) arr1, np.diag(arr1), np.diag(arr1, k=1 ), arr2, np.diag(arr2)
(array([[3, 7, 0],
[7, 9, 4],
[1, 8, 2]]),
array([3, 9, 2]),
array([7, 4]),
array([0, 1, 2]),
array([[0, 0, 0],
[0, 1, 0],
[0, 0, 2]]))
np.dot( arr1, arr2 ) : 矩阵乘法1 2 3 arr1 = np.random.randint(0 , 10 , (3 , 3 )) arr2 = np.linalg.inv(arr1) np.dot(arr1, arr2)
array([[ 1.00000000e+00, 8.88178420e-16, -8.88178420e-16],
[ 1.11022302e-16, 1.00000000e+00, -4.44089210e-16],
[ 0.00000000e+00, 4.44089210e-16, 1.00000000e+00]])
np.trace( arr ) : 矩阵的迹1 2 arr = np.random.randint(0 ,10 ,(3 ,3 )) arr, np.trace(arr)
(array([[0, 5, 6],
[4, 5, 9],
[5, 6, 2]]),
7)
np.linalg.det( arr ) : 矩阵的行列式(array([[0, 5, 6],
[4, 5, 9],
[5, 6, 2]]),
179.0)
np.linalg.eig( arr ) : 矩阵的特征值和特征向量(array([[0, 5, 6],
[4, 5, 9],
[5, 6, 2]]),
(array([14.394287 , -2.58656667, -4.80772032]),
array([[-0.46692605, -0.5381644 , -0.38398256],
[-0.7072986 , 0.74694386, -0.53437623],
[-0.53076243, -0.39045353, 0.75299365]])))
1 2 lbd, e = np.linalg.eig(arr) np.dot(arr, e[:, 0 ]) , lbd[0 ]*e[:, 0 ]
(array([ -6.72106756, -10.18105905, -7.6399467 ]),
array([ -6.72106756, -10.18105905, -7.6399467 ]))
np.linalg.inv( arr ) : 方阵的逆np.linalg.pinv( arr ) : 矩阵的More-Penrose伪逆1 arr, np.linalg.inv(arr), np.linalg.pinv(arr)
(array([[0, 5, 6],
[4, 5, 9],
[5, 6, 2]]),
array([[-0.24581006, 0.1452514 , 0.08379888],
[ 0.20670391, -0.16759777, 0.13407821],
[-0.00558659, 0.1396648 , -0.11173184]]),
array([[-0.24581006, 0.1452514 , 0.08379888],
[ 0.20670391, -0.16759777, 0.13407821],
[-0.00558659, 0.1396648 , -0.11173184]]))
1 2 arr2 = np.arange(1 ,13 ).reshape((3 ,4 )) arr2, np.linalg.pinv(arr2)
(array([[ 1, 2, 3, 4],
[ 5, 6, 7, 8],
[ 9, 10, 11, 12]]),
array([[-0.375 , -0.1 , 0.175 ],
[-0.14583333, -0.03333333, 0.07916667],
[ 0.08333333, 0.03333333, -0.01666667],
[ 0.3125 , 0.1 , -0.1125 ]]))
np.linalg.qr( arr ) : 计算QR分解(矩阵等价变换)arr = QR Q:可逆矩阵,R:上三角矩阵
1 2 Q, R = np.linalg.qr(arr) arr, Q, R
(array([[0, 5, 6],
[4, 5, 9],
[5, 6, 2]]),
array([[ 0. , 0.99951255, 0.03121953],
[-0.62469505, 0.02437835, -0.78048818],
[-0.78086881, -0.01950268, 0.62439054]]),
array([[-6.40312424, -7.80868809, -7.18399305],
[ 0. , 5.00243843, 6.17747514],
[ 0. , 0. , -5.58829534]]))
(array([[0, 5, 6],
[4, 5, 9],
[5, 6, 2]]),
array([[0., 5., 6.],
[4., 5., 9.],
[5., 6., 2.]]))
1 np.linalg.det(arr), np.linalg.det(R)
(179.0, 179.0)
np.linalg.svd( arr ) : 奇异值(SVD)分解1 2 U, S, V = np.linalg.svd(arr) arr, U, S, V
(array([[0, 5, 6],
[4, 5, 9],
[5, 6, 2]]),
array([[-0.49438846, 0.41745486, 0.76243786],
[-0.73566235, 0.26627093, -0.62281675],
[-0.46301292, -0.86881024, 0.17546455]]),
array([14.76104704, 4.89682835, 2.47640109]),
array([[-0.35618842, -0.6048576 , -0.71223387],
[-0.66961046, -0.36640706, 0.64604002],
[-0.65172973, 0.70703122, -0.27450903]]))
arr = U diag(S) V U : 正交矩阵; V : 正交矩阵; S : 降序排序的非负数 S : 奇异值; U : 左奇异向量; V : 右奇异向量
1 U.dot(np.diag(S)).dot(V)
array([[-1.54427352e-15, 5.00000000e+00, 6.00000000e+00],
[ 4.00000000e+00, 5.00000000e+00, 9.00000000e+00],
[ 5.00000000e+00, 6.00000000e+00, 2.00000000e+00]])
1 np.dot(U,U.T), np.dot(V,V.T)
(array([[1.00000000e+00, 3.54512330e-16, 1.29342340e-16],
[3.54512330e-16, 1.00000000e+00, 2.65502607e-16],
[1.29342340e-16, 2.65502607e-16, 1.00000000e+00]]),
array([[ 1.00000000e+00, 1.05593218e-16, -2.36545414e-16],
[ 1.05593218e-16, 1.00000000e+00, 1.07467290e-16],
[-2.36545414e-16, 1.07467290e-16, 1.00000000e+00]]))
np.linalg.solve( A, b ) : 解线性方程 1 2 3 A = arr b = np.random.randint(0 ,10 ,(3 )) A, b, np.linalg.solve(A, b), A.dot(np.linalg.solve(A, b))
(array([[0, 5, 6],
[4, 5, 9],
[5, 6, 2]]),
array([5, 4, 3]),
array([-0.39664804, 0.76536313, 0.19553073]),
array([5., 4., 3.]))
np.linalg.lstsq( A, b, rcond=None ) : 计算 的最小二乘解1 2 3 A = np.random.randint(0 ,10 ,(4 , 3 )) b = np.random.randint(0 , 10 , (4 , 2 )) A, b, np.linalg.lstsq(A, b, rcond=None )
(array([[1, 5, 3],
[1, 2, 8],
[9, 4, 9],
[7, 4, 2]]),
array([[9, 3],
[8, 6],
[0, 3],
[2, 4]]),
(array([[-1.18525308, -0.15774406],
[ 1.90640595, 0.57450974],
[ 0.49149608, 0.41539645]]),
array([ 7.50870761, 10.28111032]),
3,
array([17.23239582, 6.21628481, 3.92458118])))
返回值: x : 近似解; cost : 损失; n : 维度 ; S : A的奇异值
1 2 x, cost, n, S = np.linalg.lstsq(A, b, rcond=None ) b, A.dot(x)
(array([[9, 3],
[8, 6],
[0, 3],
[2, 4]]),
array([[9.82126489, 3.96099399],
[6.55952745, 4.31444705],
[1.38181078, 4.61691052],
[0.31184438, 2.02462346]]))
1 cost, np.sum ((A.dot(x) - b)**2 , 0 )
(array([ 7.50870761, 10.28111032]), array([ 7.50870761, 10.28111032]))
(array([17.23239582, 6.21628481, 3.92458118]),
array([17.23239582, 6.21628481, 3.92458118]))
随机数生成 np.random.permutation( x ) : 产生给定序列的随机排列或者一个随机排列的序列1 np.random.permutation(range (-5 ,0 ))
array([-2, -4, -5, -1, -3])
1 np.random.permutation(5 )
array([4, 2, 3, 1, 0])
np.random.rand( d0, d1, ... ) : 产生均匀分布的随机数array([[0.93188729, 0.19373543, 0.03242586],
[0.0891599 , 0.11993145, 0.41048466],
[0.31863807, 0.77394962, 0.4442849 ]])
1 np.random.uniform(0 , 10 , (3 , 3 ))
array([[2.60630253, 8.86000804, 3.7889038 ],
[1.25386602, 4.55746877, 6.21932379],
[0.48936224, 9.65439911, 9.45159411]])
np.random.randint( low, high, size ) : 产生给定范围的整数随机数1 np.random.randint(0 , 10 , (3 , 3 ))
array([[7, 5, 9],
[1, 4, 6],
[5, 6, 2]])
np.random.randn( size ) : 产生服从标准正态分布的随机数array([[ 0.5632786 , -2.02610778, -1.7987696 ],
[-0.27670206, -2.16489614, -0.70812467],
[ 1.8628367 , 0.27024486, -0.80207598]])
np.random.normal( loc, scale, size ) : 产生服从 正态分布的随机数1 np.random.normal(0 , 1 , (3 ,3 ))
array([[ 1.49669768, 0.1519672 , 0.61085676],
[ 0.21297848, -0.32231956, 0.7621172 ],
[-0.11691781, -1.36556747, 0.42578613]])
np.random.binomial( n, p, size ) : 产生服从 二项分布的随机数1 np.random.binomial(100 , 0.5 , (3 ,3 ))
array([[48, 52, 54],
[55, 50, 51],
[47, 44, 40]])
np.random.beta( a, b, size ) : 产生服从 Beta 分布的随机数np.random.chisquare( k, size ) : 产生服从 卡方分布的随机数np.random.gamma( shape, scale, size ) : 产生服从 Gamma 分布的随机数