「ビックデータ分析・活用のためのSQLレシピ」を読む

3-4-3 条件のフラグを0と1で表現する

import pandas as pd
import psycopg2
import numpy as np

conn = psycopg2.connect("dbname=BigData host=localhost user=testuser")
df1 = pd.read_sql("SELECT * FROM mst_users_with_card_number", conn)
df2 = pd.read_sql("SELECT * FROM purchase_log", conn)
print(df1.head(3))
print('-'*50)
print(df2)

f:id:bitop:20180716101507p:plain

df4 = df2.groupby('user_id')
df4 = pd.DataFrame(df4['user_id'].count())
df4 = df4.rename(columns={'user_id':'purchased_count'})
df5 = df1.merge(df4,on='user_id',how='outer')

df4 = df2.groupby('user_id')
df4 = np.sign(pd.DataFrame(df4['user_id'].count()))
df4 = df4.rename(columns={'user_id':'has_purchased'})
df3 = df5.merge(df4,on='user_id',how='outer')

df3['has_card'] = df1['card_number'].map(lambda x : 0 if pd.isnull(x) else 1) 

print(df3[['user_id','card_number','purchased_count','has_card','has_purchased']])

f:id:bitop:20180716101618p:plain