利用Python进行数据分析(3)—— Numpy Basic(3)

2017-09-13 20:36:33来源:CSDN作者:HeatDeath人点击

分享

Data processing using arrays

import numpy as npfrom matplotlib.pyplot import imshow, titleimport matplotlib.pyplot as pltnp.set_printoptions(precision=4, suppress=True)# 起始点,终止点,步长points = np.arange(-5, 5, 0.01) # 1000 equally spaced pointsprint(points)# 接受两个一维数组,产生两个二维矩阵xs, ys = np.meshgrid(points, points)print('------------------')# 行上从 -5.0 到 4.99print(xs)print('------------------')# 列上从 -5.0 到 4.99print(ys)# xs和ys 两二维数组的元素,分别乘方,后两数组加和,再开方z = np.sqrt(xs ** 2 + ys ** 2)print(z)
[-5.   -4.99 -4.98 -4.97 -4.96 -4.95 -4.94 -4.93 -4.92 -4.91 -4.9  -4.89 -4.88 -4.87 -4.86 -4.85 -4.84 -4.83 -4.82 -4.81 -4.8  -4.79 -4.78 -4.77 -4.76 -4.75 -4.74 -4.73 -4.72 -4.71 -4.7  -4.69 -4.68 -4.67 -4.66 -4.65 -4.64 -4.63 -4.62 -4.61 -4.6  -4.59 -4.58 -4.57 -4.56 -4.55 -4.54 -4.53 -4.52 -4.51 -4.5  -4.49 -4.48 -4.47 -4.46 -4.45 -4.44 -4.43 -4.42 -4.41 -4.4  -4.39 -4.38 -4.37 -4.36 -4.35 -4.34 -4.33 -4.32 -4.31 -4.3  -4.29 -4.28 -4.27 -4.26 -4.25 -4.24 -4.23 -4.22 -4.21 -4.2  -4.19 -4.18 -4.17 -4.16 -4.15 -4.14 -4.13 -4.12 -4.11 -4.1  -4.09 -4.08 -4.07 -4.06 -4.05 -4.04 -4.03 -4.02 -4.01 -4.   -3.99 -3.98 -3.97 -3.96 -3.95 -3.94 -3.93 -3.92 -3.91 -3.9  -3.89 -3.88 -3.87 -3.86 -3.85 -3.84 -3.83 -3.82 -3.81 -3.8  -3.79 -3.78 -3.77 -3.76 -3.75 -3.74 -3.73 -3.72 -3.71 -3.7  -3.69 -3.68 -3.67 -3.66 -3.65 -3.64 -3.63 -3.62 -3.61 -3.6  -3.59 -3.58 -3.57 -3.56 -3.55 -3.54 -3.53 -3.52 -3.51 -3.5  -3.49 -3.48 -3.47 -3.46 -3.45 -3.44 -3.43 -3.42 -3.41 -3.4  -3.39 -3.38 -3.37 -3.36 -3.35 -3.34 -3.33 -3.32 -3.31 -3.3  -3.29 -3.28 -3.27 -3.26 -3.25 -3.24 -3.23 -3.22 -3.21 -3.2  -3.19 -3.18 -3.17 -3.16 -3.15 -3.14 -3.13 -3.12 -3.11 -3.1  -3.09 -3.08 -3.07 -3.06 -3.05 -3.04 -3.03 -3.02 -3.01 -3.   -2.99 -2.98 -2.97 -2.96 -2.95 -2.94 -2.93 -2.92 -2.91 -2.9  -2.89 -2.88 -2.87 -2.86 -2.85 -2.84 -2.83 -2.82 -2.81 -2.8  -2.79 -2.78 -2.77 -2.76 -2.75 -2.74 -2.73 -2.72 -2.71 -2.7  -2.69 -2.68 -2.67 -2.66 -2.65 -2.64 -2.63 -2.62 -2.61 -2.6  -2.59 -2.58 -2.57 -2.56 -2.55 -2.54 -2.53 -2.52 -2.51 -2.5  -2.49 -2.48 -2.47 -2.46 -2.45 -2.44 -2.43 -2.42 -2.41 -2.4  -2.39 -2.38 -2.37 -2.36 -2.35 -2.34 -2.33 -2.32 -2.31 -2.3  -2.29 -2.28 -2.27 -2.26 -2.25 -2.24 -2.23 -2.22 -2.21 -2.2  -2.19 -2.18 -2.17 -2.16 -2.15 -2.14 -2.13 -2.12 -2.11 -2.1  -2.09 -2.08 -2.07 -2.06 -2.05 -2.04 -2.03 -2.02 -2.01 -2.   -1.99 -1.98 -1.97 -1.96 -1.95 -1.94 -1.93 -1.92 -1.91 -1.9  -1.89 -1.88 -1.87 -1.86 -1.85 -1.84 -1.83 -1.82 -1.81 -1.8  -1.79 -1.78 -1.77 -1.76 -1.75 -1.74 -1.73 -1.72 -1.71 -1.7  -1.69 -1.68 -1.67 -1.66 -1.65 -1.64 -1.63 -1.62 -1.61 -1.6  -1.59 -1.58 -1.57 -1.56 -1.55 -1.54 -1.53 -1.52 -1.51 -1.5  -1.49 -1.48 -1.47 -1.46 -1.45 -1.44 -1.43 -1.42 -1.41 -1.4  -1.39 -1.38 -1.37 -1.36 -1.35 -1.34 -1.33 -1.32 -1.31 -1.3  -1.29 -1.28 -1.27 -1.26 -1.25 -1.24 -1.23 -1.22 -1.21 -1.2  -1.19 -1.18 -1.17 -1.16 -1.15 -1.14 -1.13 -1.12 -1.11 -1.1  -1.09 -1.08 -1.07 -1.06 -1.05 -1.04 -1.03 -1.02 -1.01 -1.   -0.99 -0.98 -0.97 -0.96 -0.95 -0.94 -0.93 -0.92 -0.91 -0.9  -0.89 -0.88 -0.87 -0.86 -0.85 -0.84 -0.83 -0.82 -0.81 -0.8  -0.79 -0.78 -0.77 -0.76 -0.75 -0.74 -0.73 -0.72 -0.71 -0.7  -0.69 -0.68 -0.67 -0.66 -0.65 -0.64 -0.63 -0.62 -0.61 -0.6  -0.59 -0.58 -0.57 -0.56 -0.55 -0.54 -0.53 -0.52 -0.51 -0.5  -0.49 -0.48 -0.47 -0.46 -0.45 -0.44 -0.43 -0.42 -0.41 -0.4  -0.39 -0.38 -0.37 -0.36 -0.35 -0.34 -0.33 -0.32 -0.31 -0.3  -0.29 -0.28 -0.27 -0.26 -0.25 -0.24 -0.23 -0.22 -0.21 -0.2  -0.19 -0.18 -0.17 -0.16 -0.15 -0.14 -0.13 -0.12 -0.11 -0.1  -0.09 -0.08 -0.07 -0.06 -0.05 -0.04 -0.03 -0.02 -0.01 -0.    0.01  0.02  0.03  0.04  0.05  0.06  0.07  0.08  0.09  0.1   0.11  0.12  0.13  0.14  0.15  0.16  0.17  0.18  0.19  0.2   0.21  0.22  0.23  0.24  0.25  0.26  0.27  0.28  0.29  0.3   0.31  0.32  0.33  0.34  0.35  0.36  0.37  0.38  0.39  0.4   0.41  0.42  0.43  0.44  0.45  0.46  0.47  0.48  0.49  0.5   0.51  0.52  0.53  0.54  0.55  0.56  0.57  0.58  0.59  0.6   0.61  0.62  0.63  0.64  0.65  0.66  0.67  0.68  0.69  0.7   0.71  0.72  0.73  0.74  0.75  0.76  0.77  0.78  0.79  0.8   0.81  0.82  0.83  0.84  0.85  0.86  0.87  0.88  0.89  0.9   0.91  0.92  0.93  0.94  0.95  0.96  0.97  0.98  0.99  1.    1.01  1.02  1.03  1.04  1.05  1.06  1.07  1.08  1.09  1.1   1.11  1.12  1.13  1.14  1.15  1.16  1.17  1.18  1.19  1.2   1.21  1.22  1.23  1.24  1.25  1.26  1.27  1.28  1.29  1.3   1.31  1.32  1.33  1.34  1.35  1.36  1.37  1.38  1.39  1.4   1.41  1.42  1.43  1.44  1.45  1.46  1.47  1.48  1.49  1.5   1.51  1.52  1.53  1.54  1.55  1.56  1.57  1.58  1.59  1.6   1.61  1.62  1.63  1.64  1.65  1.66  1.67  1.68  1.69  1.7   1.71  1.72  1.73  1.74  1.75  1.76  1.77  1.78  1.79  1.8   1.81  1.82  1.83  1.84  1.85  1.86  1.87  1.88  1.89  1.9   1.91  1.92  1.93  1.94  1.95  1.96  1.97  1.98  1.99  2.    2.01  2.02  2.03  2.04  2.05  2.06  2.07  2.08  2.09  2.1   2.11  2.12  2.13  2.14  2.15  2.16  2.17  2.18  2.19  2.2   2.21  2.22  2.23  2.24  2.25  2.26  2.27  2.28  2.29  2.3   2.31  2.32  2.33  2.34  2.35  2.36  2.37  2.38  2.39  2.4   2.41  2.42  2.43  2.44  2.45  2.46  2.47  2.48  2.49  2.5   2.51  2.52  2.53  2.54  2.55  2.56  2.57  2.58  2.59  2.6   2.61  2.62  2.63  2.64  2.65  2.66  2.67  2.68  2.69  2.7   2.71  2.72  2.73  2.74  2.75  2.76  2.77  2.78  2.79  2.8   2.81  2.82  2.83  2.84  2.85  2.86  2.87  2.88  2.89  2.9   2.91  2.92  2.93  2.94  2.95  2.96  2.97  2.98  2.99  3.    3.01  3.02  3.03  3.04  3.05  3.06  3.07  3.08  3.09  3.1   3.11  3.12  3.13  3.14  3.15  3.16  3.17  3.18  3.19  3.2   3.21  3.22  3.23  3.24  3.25  3.26  3.27  3.28  3.29  3.3   3.31  3.32  3.33  3.34  3.35  3.36  3.37  3.38  3.39  3.4   3.41  3.42  3.43  3.44  3.45  3.46  3.47  3.48  3.49  3.5   3.51  3.52  3.53  3.54  3.55  3.56  3.57  3.58  3.59  3.6   3.61  3.62  3.63  3.64  3.65  3.66  3.67  3.68  3.69  3.7   3.71  3.72  3.73  3.74  3.75  3.76  3.77  3.78  3.79  3.8   3.81  3.82  3.83  3.84  3.85  3.86  3.87  3.88  3.89  3.9   3.91  3.92  3.93  3.94  3.95  3.96  3.97  3.98  3.99  4.    4.01  4.02  4.03  4.04  4.05  4.06  4.07  4.08  4.09  4.1   4.11  4.12  4.13  4.14  4.15  4.16  4.17  4.18  4.19  4.2   4.21  4.22  4.23  4.24  4.25  4.26  4.27  4.28  4.29  4.3   4.31  4.32  4.33  4.34  4.35  4.36  4.37  4.38  4.39  4.4   4.41  4.42  4.43  4.44  4.45  4.46  4.47  4.48  4.49  4.5   4.51  4.52  4.53  4.54  4.55  4.56  4.57  4.58  4.59  4.6   4.61  4.62  4.63  4.64  4.65  4.66  4.67  4.68  4.69  4.7   4.71  4.72  4.73  4.74  4.75  4.76  4.77  4.78  4.79  4.8   4.81  4.82  4.83  4.84  4.85  4.86  4.87  4.88  4.89  4.9   4.91  4.92  4.93  4.94  4.95  4.96  4.97  4.98  4.99]------------------[[-5.   -4.99 -4.98 ...,  4.97  4.98  4.99] [-5.   -4.99 -4.98 ...,  4.97  4.98  4.99] [-5.   -4.99 -4.98 ...,  4.97  4.98  4.99] ...,  [-5.   -4.99 -4.98 ...,  4.97  4.98  4.99] [-5.   -4.99 -4.98 ...,  4.97  4.98  4.99] [-5.   -4.99 -4.98 ...,  4.97  4.98  4.99]]------------------[[-5.   -5.   -5.   ..., -5.   -5.   -5.  ] [-4.99 -4.99 -4.99 ..., -4.99 -4.99 -4.99] [-4.98 -4.98 -4.98 ..., -4.98 -4.98 -4.98] ...,  [ 4.97  4.97  4.97 ...,  4.97  4.97  4.97] [ 4.98  4.98  4.98 ...,  4.98  4.98  4.98] [ 4.99  4.99  4.99 ...,  4.99  4.99  4.99]][[ 7.0711  7.064   7.0569 ...,  7.0499  7.0569  7.064 ] [ 7.064   7.0569  7.0499 ...,  7.0428  7.0499  7.0569] [ 7.0569  7.0499  7.0428 ...,  7.0357  7.0428  7.0499] ...,  [ 7.0499  7.0428  7.0357 ...,  7.0286  7.0357  7.0428] [ 7.0569  7.0499  7.0428 ...,  7.0357  7.0428  7.0499] [ 7.064   7.0569  7.0499 ...,  7.0428  7.0499  7.0569]]Process finished with exit code 0

Expressing conditional logic as array operations

xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])cond = np.array([True, False, True, True, False])print(xarr[~cond])# 二者等价# np.where 是三元表达式 x if condition else y 的矢量化版本# result = [(x if c else y) for x, y, c in zip(xarr, yarr, cond)]result = np.where(cond, xarr, yarr)print(result)arr = np.random.randn(4, 4)# 大于 0 的替换为 2, 否则替换为 -2arr_1 = np.where(arr > 0, 2, -2)print(arr_1)# 大于 0 的替换为 2, 否则 保留原值arr_2 = np.where(arr > 0, 2, arr) # set only positive values to 2print(arr_2)
[ 1.2  1.5][ 1.1  2.2  1.3  1.4  2.5][[ 2  2  2 -2] [-2  2 -2  2] [ 2 -2 -2 -2] [ 2 -2 -2 -2]][[ 2.      2.      2.     -0.572 ] [-0.0768  2.     -1.0821  2.    ] [ 2.     -0.9678 -0.1732 -1.0441] [ 2.     -0.4337 -0.7777 -0.8902]]Process finished with exit code 0

Mathematical and statistical methods

arr = np.random.randn(5, 4) # normally-distributed dataprint(arr.mean())print(np.mean(arr))print(arr.sum())# mean 和 sum 可以接受一个 axis 参数(用于计算该轴向上的统计值),最终结果是一个少一维的数组print(arr.mean(axis=1))print(arr.sum(0))arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])print(arr)print(arr.cumsum(0))print(arr.cumsum(1))print(arr.cumprod(1))
-0.10294284368-0.10294284368-2.0588568736[-0.7362  0.3252 -0.5837  0.5134 -0.0333][-2.5793  1.8575  1.2828 -2.6199][[0 1 2] [3 4 5] [6 7 8]][[ 0  1  2] [ 3  5  7] [ 9 12 15]][[ 0  1  3] [ 3  7 12] [ 6 13 21]][[  0   0   0] [  3  12  60] [  6  42 336]]Process finished with exit code 0

Methods for boolean arrays

arr = np.random.randn(100)print((arr > 0).sum()) # Number of positive valuesprint(arr[arr>0])bools = np.array([False, False, True, False])# any 用于检测数组中是否存在一个或多个True 存在# all 检查数组中是否所有值都是 Truebools.any()bools.all()# -----------------------------------------
51[ 1.0179  0.4661  0.035   0.3081  0.5339  1.9648  0.4265  1.942   1.2556  0.1283  2.0778  0.2201  0.732   0.8652  1.0453  0.1707  0.3444  0.1044  0.8468  0.071   0.2321  0.3639  0.1994  1.3033  0.8468  0.7713  2.0785  0.9529  1.6031  0.157   0.5425  0.0212  0.659   0.6765  0.3032  1.8217  4.0663  0.107   0.4904  0.9526  0.8232  0.9066  1.0254  0.2625  1.4222  0.6134  0.7314  1.1774  0.0021  0.3598  2.4056]Process finished with exit code 0

Sorting

arr = np.random.randn(8)print(arr)arr.sort()print(arr)arr = np.random.randn(2,5)print(arr)# 升序排列,按行排列arr.sort()print(arr)arr.sort(1)print(arr)arr.sort(0)print(arr)large_arr = np.random.randn(1000)large_arr.sort()# 5%分位数print(large_arr[int(0.05 * len(large_arr))]) # 5% quantile
[ 1.2906  0.7066 -3.3     0.126  -1.3173 -0.2734 -0.7938 -0.9861][-3.3    -1.3173 -0.9861 -0.7938 -0.2734  0.126   0.7066  1.2906][[ 0.6627 -0.3837  0.8278 -0.4906  0.8111] [ 0.4463 -0.3823 -1.0958  0.4684 -0.4863]][[-0.4906 -0.3837  0.6627  0.8111  0.8278] [-1.0958 -0.4863 -0.3823  0.4463  0.4684]][[-0.4906 -0.3837  0.6627  0.8111  0.8278] [-1.0958 -0.4863 -0.3823  0.4463  0.4684]][[-1.0958 -0.4863 -0.3823  0.4463  0.4684] [-0.4906 -0.3837  0.6627  0.8111  0.8278]]-1.63847027424Process finished with exit code 0

Unique and other set logic

# np.unique()用于找出数组中的唯一值,并返回已排序的结果names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])print(np.unique(names))ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])print(np.unique(ints))# np.in1d() 用于测试一个数组中的值在另一个数组中的成员资格,返回一个布尔型数组values = np.array([6, 0, 0, 3, 2, 5, 6])print(np.in1d(values, [2, 3, 6]))print(np.in1d([2, 3, 6], values))
['Bob' 'Joe' 'Will'][1 2 3 4][ True False False  True  True False  True][ True  True  True]Process finished with exit code 0

Linear algebra

np.random.seed(12345)from numpy.linalg import inv, qrX = np.random.randn(5, 5)print(X)# 转置print(X.T)mat = X.T.dot(X)print(mat)# 矩阵的逆print(inv(mat))mat.dot(inv(mat))q, r = qr(mat)print(r)
[[-0.2047  0.4789 -0.5194 -0.5557  1.9658] [ 1.3934  0.0929  0.2817  0.769   1.2464] [ 1.0072 -1.2962  0.275   0.2289  1.3529] [ 0.8864 -2.0016 -0.3718  1.669  -0.4386] [-0.5397  0.477   3.2489 -1.0212 -0.5771]][[-0.2047  1.3934  1.0072  0.8864 -0.5397] [ 0.4789  0.0929 -1.2962 -2.0016  0.477 ] [-0.5194  0.2817  0.275  -0.3718  3.2489] [-0.5557  0.769   0.2289  1.669  -1.0212] [ 1.9658  1.2464  1.3529 -0.4386 -0.5771]][[  4.075   -3.3059  -1.3073   3.4466   2.6197] [ -3.3059   6.1523   1.7149  -4.3193  -0.0938] [ -1.3073   1.7149  11.1187  -3.3702  -2.0097] [  3.4466  -4.3193  -3.3702   4.7812   0.0331] [  2.6197  -0.0938  -2.0097   0.0331   7.7736]][[ 3.0361 -0.1808 -0.6878 -2.8285 -1.1911] [-0.1808  0.5035  0.1215  0.6702  0.0956] [-0.6878  0.1215  0.2904  0.8081  0.3049] [-2.8285  0.6702  0.8081  3.4152  1.1557] [-1.1911  0.0956  0.3049  1.1557  0.6051]][[ -6.9271   7.389    6.1227  -7.1163  -4.9215] [  0.      -3.9735  -0.8671   2.9747  -5.7402] [  0.       0.     -10.2681   1.8909   1.6079] [  0.       0.       0.      -1.2996   3.3577] [  0.       0.       0.       0.       0.5571]]Process finished with exit code 0

Random Walks

nsteps = 1000# 生成 1000 个 或为 0 ,或者为 1 的随机数draws = np.random.randint(0, 2, size=nsteps)# print(draws)# 将 draws 中的 0 转换为 -1,将 1 转换为 0steps = np.where(draws > 0, 1, -1)print(steps)walk = steps.cumsum()print(walk)print(walk.min())print(walk.max())# argmax() 返回该布尔型数组第一个最大值得索引(True就是最大值)print((np.abs(walk) >= 10).argmax())
rocess finished with exit code 0

Simulating many random walks at once

nwalks = 5000nsteps = 1000draws = np.random.randint(0, 2, size=(nwalks, nsteps)) # 0 or 1steps = np.where(draws > 0, 1, -1)# 行累加walks = steps.cumsum(1)print(walks)walks.max()walks.min()# 以行为轴hits30 = (np.abs(walks) >= 30).any(1)print(hits30)print(hits30.sum()) # Number that hit 30 or -30# 每行中,最大数的索引crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)print(crossing_times)print(crossing_times.mean())
[[ -1  -2  -3 ...,  42  41  42] [ -1   0   1 ...,   6   7   8] [  1   0   1 ...,  38  37  36] ...,  [  1   0  -1 ...,  -6  -5  -6] [ -1  -2  -3 ..., -18 -19 -18] [  1   2   3 ...,   0   1   2]][ True  True  True ..., False  True  True]3374[913 397 697 ..., 241 715 313]499.988737404Process finished with exit code 0

最新文章

123

最新摄影

微信扫一扫

第七城市微信公众平台