「PythonとJavaScriptではじめるデータビジュアライゼーション」を読む

3.7MongoDB

pymongoはinstallされていなかった。 pip install pymongoでver-3.5.1がinstallされた MongoDBをinstallした場所(C:\Program Files\MongoDB\Server\3.4\bin)に移動して
>md c:\data\db を実行するとCドライブ直下にdata\dbフォルダを作成する つづいて
>mongod
でMongoDBサービスが開始される
参考にしたURL

stackoverflow.com

結果
f:id:bitop:20170909144106p:plain

nobel_winners = [
    {'category':'physics',
    'name':'Albert Einstein',
    'nationality':'Swiss',
    'sex':'male',
    'year':1921},
    {'category':'physics',
    'name':'Paul Dirac',
    'nationality':'British',
    'sex':'male',
    'year':1933},
    {'category':'chemistry',
    'name':'Marle Curie',
    'nationality':'Polish',
    'sex':'female',
    'year':1911}
]
from pymongo import MongoClient

DB_NOBEL_PRIZE = 'nobel_prize'
COLL_WINNERS = 'winners'


def get_mongo_database(db_name,host='localhost',port=27017,username = None,password=None):
    #認証なしであり/なしでMongo接続を作成する
    if username and password:
        mongouri = 'mongodb://%s:%s@%s/%s'%(username,password,host,db_name)
        conn = MongoClient(mongouri)
    else:
        conn = MongoClient(host,port)
    return conn[db_name]

db = get_mongo_database(DB_NOBEL_PRIZE)
coll = db[COLL_WINNERS]
print(coll.insert(nobel_winners))

結果 f:id:bitop:20170909144305p:plain

from pymongo import MongoClient

DB_NOBEL_PRIZE = 'nobel_prize'
COLL_WINNERS = 'winners'

def get_mongo_database(db_name,host='localhost',port=27017,username = None,password=None):
    #認証なしであり/なしでMongo接続を作成する
    if username and password:
        mongouri = 'mongodb://%s:%s@%s/%s'%(username,password,host,db_name)
        conn = MongoClient(mongouri)
    else:
        conn = MongoClient(host,port)
    return conn[db_name]

db = get_mongo_database(DB_NOBEL_PRIZE)
coll = db[COLL_WINNERS]
print("-------------------------------------------\n")
res = coll.find({'category':'chemistry'})
print(list(res))
print("-------------------------------------------\n")
res = coll.find({'year':{'$gt':1930}})
print(list(res))
print("-------------------------------------------\n")
res = coll.find({'$or':[{'year':{'$gt':1930}},{'sex':'female'}]})
print(list(res))

結果

f:id:bitop:20170909145623p:plain

「PythonとJavaScriptではじめるデータビジュアライゼーション」を読む

3.6SQL

sqlalchemyはcondaに入っていたのでinstallする必要はなかった。

3-6.py
nobel_winners = [
    {'category':'physics',
    'name':'Albert Einstein',
    'nationality':'Swiss',
    'sex':'male',
    'year':1921},
    {'category':'physics',
    'name':'Paul Dirac',
    'nationality':'British',
    'sex':'male',
    'year':1933},
    {'category':'chemistry',
    'name':'Marle Curie',
    'nationality':'Polish',
    'sex':'female',
    'year':1911}
]

from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column,Integer,String,Enum
from sqlalchemy.orm import sessionmaker

Base = declarative_base()

class Winner(Base):
    __tablename__ = 'winners'

    id = Column(Integer,primary_key=True)
    name = Column(String)
    category = Column(String)
    year = Column(Integer)
    nationality = Column(String)
    sex = Column(Enum('male','female'))

    def __repr__(self):
        return "<Winner(name='%s',category='%s',year='%s')>" %(self.name,self.category,self.year)

engine = create_engine('sqlite:///data/nobel_prize.db',echo=True)
Base.metadata.create_all(engine)


Session = sessionmaker(bind=engine)
session = Session()

albert = Winner(**nobel_winners[0])
session.add(albert)
print(session.new)

結果

f:id:bitop:20170909123749p:plain

3-6.pyに追加
nobel_winners = [
    {'category':'physics',
    'name':'Albert Einstein',
    'nationality':'Swiss',
    'sex':'male',
    'year':1921},
    {'category':'physics',
    'name':'Paul Dirac',
    'nationality':'British',
    'sex':'male',
    'year':1933},
    {'category':'chemistry',
    'name':'Marle Curie',
    'nationality':'Polish',
    'sex':'female',
    'year':1911}
]

from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column,Integer,String,Enum
from sqlalchemy.orm import sessionmaker

Base = declarative_base()

class Winner(Base):
    __tablename__ = 'winners'

    id = Column(Integer,primary_key=True)
    name = Column(String)
    category = Column(String)
    year = Column(Integer)
    nationality = Column(String)
    sex = Column(Enum('male','female'))

    def __repr__(self):
        return "<Winner(name='%s',category='%s',year='%s')>" %(self.name,self.category,self.year)

engine = create_engine('sqlite:///data/nobel_prize.db',echo=True)
Base.metadata.create_all(engine)

Session = sessionmaker(bind=engine)
session = Session()

winner_rows = [Winner(**w) for w in nobel_winners]
session.add_all(winner_rows)
session.commit()

結果

f:id:bitop:20170909124124p:plain

3-6-4.py
# -*- coding: utf-8 -*-
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column,Integer,String,Enum

Base = declarative_base()

class Winner(Base):
    __tablename__ = 'winners'

    id = Column(Integer,primary_key=True)
    name = Column(String)
    category = Column(String)
    year = Column(Integer)
    nationality = Column(String)
    sex = Column(Enum('male','female'))

    def __repr__(self):
        return "<Winner(name='%s',category='%s',year='%s')>" %(self.name,self.category,self.year)

engine = create_engine('sqlite:///data/nobel_prize.db',echo=True)
Session = sessionmaker(bind=engine)
session = Session()
#Winner count
result = session.query(Winner).count()
print("Winer count : ",result)
#Swiss Winner
result = session.query(Winner).filter_by(nationality='Swiss')
print("result:",list(result))
#other Winner
result = session.query(Winner).filter(Winner.category == 'physics',Winner.nationality != 'Swiss')
print("result:",list(result))
#id
for i in range(1,4):
    result = session.query(Winner).get(i)
    print("result:",result)
#year order by
res = session.query(Winner).order_by('year')
print("year order",list(res))
#例3-4
def inst_to_dict(inst,delete_id=True):
    dat = {}
    for column in inst.__table__.columns:
        dat[column.name] = getattr(inst,column.name)
    if delete_id:
        dat.pop('id')
    return dat

winner_rows = session.query(Winner)
nobel_winners = [inst_to_dict(w) for w in winner_rows]
print(nobel_winners)
#更新
print("UPDATE\n")
marie = session.query(Winner).get(3)
marie.nationality = 'French'
print(session.dirty)
session.commit()
print(session.dirty)
print(session.query(Winner).get(3).nationality)
#クエリの結果を削除
print("DELETE\n")
session.query(Winner).filter_by(name='Albert Einstein').delete()
print(list(session.query(Winner)))

結果(一部)f:id:bitop:20170909125148p:plain

3.6.5 datasetを使った簡単なSQL
3-6-5.py
# -*- coding: utf-8 -*-
import dataset

nobel_winners = [
    {'category':'physics',
    'name':'Albert Einstein',
    'nationality':'Swiss',
    'sex':'male',
    'year':1921},
    {'category':'physics',
    'name':'Paul Dirac',
    'nationality':'British',
    'sex':'male',
    'year':1933},
    {'category':'chemistry',
    'name':'Marle Curie',
    'nationality':'Polish',
    'sex':'female',
    'year':1911}
]
db = dataset.connect('sqlite:///data/nobel_prize.db')
wtable = db['winners']
winners = wtable.find()
print(list(winners))

結果 f:id:bitop:20170909130923p:plain

datasetで新しくテーブルを作る
import dataset

nobel_winners = [
    {'category':'physics',
    'name':'Albert Einstein',
    'nationality':'Swiss',
    'sex':'male',
    'year':1921},
    {'category':'physics',
    'name':'Paul Dirac',
    'nationality':'British',
    'sex':'male',
    'year':1933},
    {'category':'chemistry',
    'name':'Marle Curie',
    'nationality':'Polish',
    'sex':'female',
    'year':1911}
]
db = dataset.connect('sqlite:///data/nobel_prize.db')
wtable = db['winners']
winners = wtable.find()
print(list(winners))

#既存のテーブルを削除
wtable.drop()
wtable = db['winners']
print(list(wtable.find()))

with db as tx:
    for w in nobel_winners:
        tx['winners'].insert(w)
print(list(db['winners'].find()))

結果 f:id:bitop:20170909131702p:plain

「PythonとJavaScriptではじめるデータビジュアライゼーション」を読む

3.5JSON

3-5.py
json形式のファイルを作る
import json

nobel_winners = [
    {'category':'physics',
    'name':'Albert Einstein',
    'nationality':'Swiss',
    'sex':'male',
    'year':1921},
    {'category':'physics',
    'name':'Paul Dirac',
    'nationality':'British',
    'sex':'male',
    'year':1933},
    {'category':'chemistry',
    'name':'Marle Curie',
    'nationality':'Polish',
    'sex':'female',
    'year':1911}
]

with open('data/nobel_winners.json','w') as f:
    json.dump(nobel_winners,f)

結果
dataフォルダにnobel_winners.jsonファイルが作成された
3-5a.py
そのファイルを読み出す

import json

with open('data/nobel_winners.json') as f:
    nobel_winners = json.load(f)

print(nobel_winners)

f:id:bitop:20170909115917p:plain

3.5.1 日付と時刻
import datetime
import json

class JSONDateTimeEncoder(json.JSONEncoder):
    def default(self,obj):
        if isinstance(obj,(datetime.date,datetime.datetime)):
            return obj.isoformat()
        else:
            return json.JSONEncoder.default(self,obj)


def dumps(obj):
    return json.dumps(obj,cls=JSONDateTimeEncoder)


new_str = dumps({'time':datetime.datetime.now()})
print(new_str)
結果

f:id:bitop:20170909121956p:plain

from datetime import datetime

time_str = '2012/01/01 12:32:11'
dt = datetime.strptime(time_str,'%Y/%M/%D %H:%M:%S')
print(dt)
結果

f:id:bitop:20170909122927p:plain

「PythonとJavaScriptではじめるデータビジュアライゼーション」を読む

3.4 CSV,TSV,行/列データ形式

import csv

nobel_winners = [
    {'category':'physics',
    'name':'Albert Einstein',
    'nationality':'Swiss',
    'sex':'male',
    'year':1921},
    {'category':'physics',
    'name':'Paul Dirac',
    'nationality':'British',
    'sex':'male',
    'year':1933},
    {'category':'chemistry',
    'name':'Marle Curie',
    'nationality':'Polish',
    'sex':'female',
    'year':1911}
]


with open('data/nobel_winners.csv','w') as f: #本ではmodeが'wb'であったが'w'でOK
    fieldnames = nobel_winners[0].keys()
    fieldnames = sorted(fieldnames)
    writer = csv.DictWriter(f,fieldnames=fieldnames)
    writer.writeheader()
    for w in nobel_winners:
        writer.writerow(w)

「PythonとJavaScriptではじめるデータビジュアライゼーション」を読む

3.3システムファイル

nobel_winners = [
    {'category':'physics',
    'name':'Albert Einstein',
    'nationality':'Swiss',
    'sex':'male',
    'year':1921},
    {'category':'physics',
    'name':'Paul Dirac',
    'nationality':'British',
    'sex':'male',
    'year':1933},
    {'category':'chemistry',
    'name':'Marle Curie',
    'nationality':'Polish',
    'sex':'female',
    'year':1911}
]

cols = nobel_winners[0].keys()
#cols.sort() これはエラーがでて実行できなかったので下のようにした
cols = sorted(cols)

with open('data/nobel_winners.csv','w') as f:
    f.write(','.join(cols)+'\n')
    for o in nobel_winners:
        row = [str(o[col]) for col in cols]
        f.write(','.join(row) + '\n')

できたcsvファイル
f:id:bitop:20170903134659p:plain

「PythonとJavaScriptではじめるデータビジュアライゼーション」を読む

2.4.8 JavaScriptクロージャとモジュールパターン

2-4-8.js
function Counter(inc){
    var count = 0;
    var add = function(){
        count += inc;
        console.log('Current count:' + count);
    }
    return add;
}

var inc2 = Counter(2);
inc2();
inc2();
inc2();
inc2();

index.html
underscoreライブラリ関係ないがいれておく
<!-- index.html -->
<!DOCTYPE html>
<meta charset="utf-8">
<div id='viz'></div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js"></script>
<script type="text/javascript" src="2-4-8.js" async></script>

$python -m http.server

WEBブラウザのアドレス欄にhttp://localhost:8000を入力して
Ctrl+shift+IでDevToolsのコンソールタグを表示させる   

f:id:bitop:20170903133756p:plain

「PythonとJavaScriptではじめるデータビジュアライゼーション」を読む

2.4.5 Underscore

2-4-5.js
var journeys = [
    {period:"morning",times:[44,34,56,31]},
    {period:"evening",times:[35,33]},
    {period:"morning",times:[33,29,35,41]},
    {period:"evening",times:[24,45,27]},
    {period:"morning",times:[18,23,28]}
];

var groups = _.groupBy(journeys,'period');
var mTimes = _.pluck(groups['morning'],'times');
mTimes = _.flatten(mTimes); //flattenは渡された配列を平坦化(1次元化させる)
console.log(mTimes);
var average = function(l){
    var sum = _.reduce(l,function(a,b){return a+b},0);
    console.log(sum);
    return sum/l.length;
};
console.log("Average morning time is" + average(mTimes));

index.html
<!-- index.html -->
<!DOCTYPE html>
<meta charset="utf-8">
<div id='viz'></div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.8.3/underscore-min.js"></script>
<script type="text/javascript" src="2-4-5.js" async></script>

$python -m http.server

WEBブラウザのアドレス欄にhttp://localhost:8000を入力して
Ctrl+shift+IでDevToolsのコンソールタグを表示させる

f:id:bitop:20170903132435p:plain