7-2 異常値を検出する
7-2-1 データの分布を計算する
import pandas as pd
import numpy as np
import psycopg2
conn = psycopg2.connect("dbname=BigData host=localhost user=testuser")
action_log_with_noise = pd.read_sql("SELECT * FROM action_log_with_noise", conn)
print(action_log_with_noise.head(3))
s1 = action_log_with_noise.groupby('session').size()
s2= s1.rank(method='first')
df = pd.DataFrame([s1,s2])
print(df.T)