「ビックデータ分析・活用のためのSQLレシピ」を読む

5-2 ユーザー全体の時系列による状態変化を見つける

import pandas as pd
import psycopg2
import numpy as np

conn = psycopg2.connect("dbname=BigData host=localhost user=testuser")
df_user = pd.read_sql("SELECT * FROM mst_users", conn)
print(df_user.head(3))

f:id:bitop:20180812073419p:plain

conn = psycopg2.connect("dbname=BigData host=localhost user=testuser")
df_act = pd.read_sql("SELECT * FROM action_log", conn)
print(df_act.head(3))

f:id:bitop:20180812073503p:plain

5-2-1 登録者数の推移と傾向をみる

df_user.groupby('register_date')['user_id'].count()

f:id:bitop:20180812073630p:plain

df_user['register_date'] = pd.to_datetime(df_user['register_date'])
df_user['year_month'] = df_user['register_date'].dt.year.map(lambda x:str(x)) + "-" +df_user['register_date'].dt.month.map(lambda x:str(x))
df_month = pd.DataFrame(df_user.groupby('year_month')['register_date'].count())
df_month['last_month_count'] = df_month['register_date'].shift(1)
df_month['month_over_month_ratio'] = df_month['register_date'] / df_month['last_month_count'] 
print(df_month)

f:id:bitop:20180812073722p:plain