7章 データ活用の精度を高めるための分析術
7-1 データを組み合わせて、新たな切り口を作る
7-1-1 IPアドレスから国・地域を補完する
import pandas as pd
import numpy as np
mst_city_ip = pd.read_csv('GeoLite2-City-Blocks-IPv4.csv')
print(mst_city_ip.head(3))
mst_locations = pd.read_csv('GeoLite2-City-Locations-ja.csv')
print(mst_locations.head(3))
import psycopg2
conn = psycopg2.connect("dbname=BigData host=localhost user=testuser")
action_log = pd.read_sql("SELECT * FROM action_log_with_ip", conn)
print(action_log.head(3))
import ipaddress
action_log['ip'] = action_log['ip'].map(lambda x:ipaddress.ip_address(x))
mst_city_ip['network'] = mst_city_ip['network'].map(lambda x:ipaddress.ip_network(x))
print(action_log.head(3))
print(mst_city_ip.head(3))
pd.merge(action_log,mst_city_ip,left_on='ip',right_on='network')
#無理でした