3-1-3 文字列を配列に分解する
import re
def process(s):
m=re.search('//[^/]+([^?#]+)',s)
return m.group(0).split('/')[3]
conn = psycopg2.connect("dbname=BigData host=localhost user=testuser")
df = pd.read_sql("SELECT * FROM access_log", conn)
df['path1']=df['url'].map(process)
3-1-4 日付やタイムスタンプを扱う
import datetime
import time
df = pd.DataFrame({'dt':[datetime.date.today()],'timestamp':[time.ctime()]})
指定した値の日付や時刻データを取得する
import datetime
import time
from dateutil.parser import parse
df = pd.DataFrame({'dt':[parse('2015-10-12')],'timestamp':[parse('2015-10-12 12:00:00')]})
日付け/時刻から特定のフィールドを取り出す
import datetime
import time
from dateutil.parser import parse
df = pd.DataFrame({'stamp':[parse('2015-10-12 12:00')]})
df['year'] = df['stamp'].map(lambda x:x.year)
df['month'] = df['stamp'].map(lambda x:x.month)
df['day'] = df['stamp'].map(lambda x:x.day)
df['hour'] = df['stamp'].map(lambda x:x.hour)
#別解
df = pd.DataFrame({'stamp':[parse('2015-11-30 23:00')]})
df['year'] = df['stamp'].dt.year
df['month'] = df['stamp'].dt.month
df['day'] = df['stamp'].dt.day
df['hour'] = df['stamp'].dt.hour
3-1-5 欠損値をデフォルト
conn = psycopg2.connect("dbname=BigData host=localhost user=testuser")
df = pd.read_sql("SELECT * FROM purchase_log_with_coupon", conn)
df = df.fillna(0)