「ビックデータ分析・活用のためのSQLレシピ」を読む

4-2-2 ABC分析で売れ筋を判別する

conn = psycopg2.connect("dbname=BigData host=localhost user=testuser")
df = pd.read_sql("SELECT * FROM purchase_detail_log", conn)
print(df.head(10))

f:id:bitop:20180729091655p:plain

df1 = df.groupby('category')['price'].sum()

df2 = pd.DataFrame({'category':df1.index,
                'amount':df1.values})

sum_amount = df2['amount'].sum()
df2['cumsum'] = df2['amount'].cumsum()
df2['composition'] = df2['amount'].map(lambda x:100.0 * x/sum_amount)
df2['cumulative_ratio'] = df2['cumsum'].map(lambda x:100.0 * x/sum_amount)
df2['abc_rank'] = df2['cumulative_ratio'].map(lambda x: 'A' if 0 < x < 70 else 'B' if 70 < x < 90 else 'C')
print(df2)

f:id:bitop:20180729091758p:plain