統計学入門

3章練習問題

練習問題3.1

import numpy as np
import matplotlib.pyplot as plt

data = np.array([[41.4,76.3,59.2,51.8,52.5,53.2,62.4,55.0,57.7,63.2,37.5,48.5,32.4,20.5,47.9,68.9,68.5,52.5,63.3,58.8,59.7,48.4,40.7,
                51.0,50.9,34.3,25.8,32.1,34.4,55.1,60.3,57.0,45.6,54.2,55.1,55.7,70.3,61.8,47.6,42.5,71.3,55.2,65.2,42.9,54.7,62.0,48.2],
        [52.8,71.2,72.6,63.7,81.3,81.8,70.9,74.0,73.2,72.9,66.7,65.7,43.7,55.5,79.6,85.7,75.3,80.5,73.0,77.0,77.5,69.2,60.0,78.2,79.5,
        61.8,49.6,59.6,72.1,71.0,76.3,72.8,71.8,60.7,67.0,71.8,71.2,68.3,68.5,54.8,76.0,65.8,69.4,66.9,69.7,71.2,59.6]
])
plt.scatter(data[0],data[1])
mean_x = np.mean(data[0])
mean_y = np.mean(data[1])
n = len(data[0])
mean_x2 = sum(pow(x2,2) for x2 in data[0]) - n*pow(mean_x,2)
mean_y2 = sum(pow(y2,2) for y2 in data[1]) - n*pow(mean_y,2)
xy = sum(xy[0]*xy[1] for xy in zip(data[0],data[1])) - n*mean_x*mean_y
print(xy/(np.sqrt(mean_x2)*np.sqrt(mean_y2)))
#もしくは
print(np.corrcoef(data[0],data[1]))

<結果>
0.638781565579
そんなに相関が見てとれる数値ではないと思う
array([[ 1.        ,  0.63878157],
   [ 0.63878157,  1.        ]])

f:id:bitop:20160604174421p:plain

練習問題3.3 (1)と(4)を比較

from scipy import stats as st


r = np.array([[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30],
            [20,1,4,2,6,3,12,17,8,5,18,13,23,26,29,15,16,9,10,11,30,7,27,19,14,21,28,24,22,25]])
st.spearmanr(r[0],r[1])

<結果>
SpearmanrResult(correlation=0.59332591768631815, pvalue=0.0005486542025559916)