「ビックデータ分析・活用のためのSQLレシピ」を読む

3-4 複数のテーブルに対する操作

3-4-1 複数のテーブルを縦に並べる

import pandas as pd
import psycopg2
import numpy as np

conn = psycopg2.connect("dbname=BigData host=localhost user=testuser")
df1 = pd.read_sql("SELECT * FROM app1_mst_users", conn)
df1['app_name'] = 'app1' 
print(df1)
df2 = pd.read_sql("SELECT * FROM app2_mst_users", conn)
df2['app_name'] = 'app2' 
print(df2)

f:id:bitop:20180715104712p:plain

df = pd.concat([df1,df2],sort=False)
print(df[['app_name','user_id','name','email']])

f:id:bitop:20180715104759p:plain

3-4-2 複数のテーブルを横に並べる

conn = psycopg2.connect("dbname=BigData host=localhost user=testuser")
df1 = pd.read_sql("SELECT * FROM mst_categories", conn)
print(df1)
df2 = pd.read_sql("SELECT * FROM category_sales", conn)
print(df2)
df3 = pd.read_sql("SELECT * FROM product_sale_ranking", conn)
print(df3)

f:id:bitop:20180715104858p:plain

df4 = pd.merge(df3,pd.merge(df1,df2,on='category_id'),on='category_id')
print(df4[['category_id','name','sales_y','product_id']])

f:id:bitop:20180715105008p:plain

#AND r.rank=1が表現できない
df4 = pd.merge(df3,pd.merge(df1,df2,on='category_id',how='outer'),on='category_id',how='outer')
print(df4)

f:id:bitop:20180715105140p:plain