SQL database with python

Author

Tony Duan

1 Connection with database

Code
import seaborn as sns
import pandas as pd
import sqlite3

tips = sns.load_dataset("tips")
planets=sns.load_dataset('planets')

1.1 create database file pythonsqlite.db and copy tips data and planets data into database

Code
import sqlite3
# Create your connection.
db = sqlite3.connect('pythonsqlite.db')

tips.to_sql(name='tips', con=db,if_exists='replace')
planets.to_sql(name='planets', con=db,if_exists='replace')


db.close()

1.2 check all table in database

Code
db = sqlite3.connect('pythonsqlite.db')
table = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", db)
table
name
0 tips
1 planets

2 SQL

2.1 select

Code
db = sqlite3.connect('pythonsqlite.db')
sql="select * from tips LIMIT 3;"
table = pd.read_sql_query(sql,db)
table
index total_bill tip sex smoker day time size
0 0 16.99 1.01 Female No Sun Dinner 2
1 1 10.34 1.66 Male No Sun Dinner 3
2 2 21.01 3.50 Male No Sun Dinner 3

2.2 Renaming column

Code
sql="select total_bill as new_total_bill from tips"
table = pd.read_sql_query(sql,db)
table.head()
new_total_bill
0 16.99
1 10.34
2 21.01
3 23.68
4 24.59

2.3 create column

Code
sql="select total_bill as new_total_bill,total_bill from tips"
table = pd.read_sql_query(sql,db)
table.head()
new_total_bill total_bill
0 16.99 16.99
1 10.34 10.34
2 21.01 21.01
3 23.68 23.68
4 24.59 24.59

2.4 Filter rows

Code
sql="select * from tips where sex='Male'"
table = pd.read_sql_query(sql,db)
table.head()
index total_bill tip sex smoker day time size
0 1 10.34 1.66 Male No Sun Dinner 3
1 2 21.01 3.50 Male No Sun Dinner 3
2 3 23.68 3.31 Male No Sun Dinner 2
3 5 25.29 4.71 Male No Sun Dinner 4
4 6 8.77 2.00 Male No Sun Dinner 2

2.4.1 Filters with AND conditions

Code
sql="select * from tips where sex='Male' and size>3"
table = pd.read_sql_query(sql,db)
table.head()
index total_bill tip sex smoker day time size
0 5 25.29 4.71 Male No Sun Dinner 4
1 7 26.88 3.12 Male No Sun Dinner 4
2 13 18.43 3.00 Male No Sun Dinner 4
3 23 39.42 7.58 Male No Sat Dinner 4
4 25 17.81 2.34 Male No Sat Dinner 4

2.4.2 Filters with or conditions

Code
sql="select * from tips where sex='Male' or size>3"
table = pd.read_sql_query(sql,db)
table.shape
(169, 8)

2.5 Append

2.5.1 append by row

Code
sql="select * from tips UNION all select * from tips"
table = pd.read_sql_query(sql,db)
table.shape
(488, 8)

2.5.2 append by column

2.5.3 Dropping NA values

2.5.4 keep NA values

2.6 group by

2.6.1 average,min,max,sum

Code
sql="select AVG(total_bill),min(total_bill),max(total_bill),sum(total_bill) from tips group by sex"
table = pd.read_sql_query(sql,db)
table
AVG(total_bill) min(total_bill) max(total_bill) sum(total_bill)
0 18.056897 3.07 44.30 1570.95
1 20.744076 7.25 50.81 3256.82

2.6.2 count

Code
sql="select sex,count(*) from tips group by 1"
table = pd.read_sql_query(sql,db)
table
sex count(*)
0 Female 87
1 Male 157

2.7 order rows

Code
sql="select * from tips order by total_bill"
table = pd.read_sql_query(sql,db)
table.head()
index total_bill tip sex smoker day time size
0 67 3.07 1.00 Female Yes Sat Dinner 1
1 92 5.75 1.00 Female Yes Fri Dinner 2
2 111 7.25 1.00 Female No Sat Dinner 1
3 172 7.25 5.15 Male Yes Sun Dinner 2
4 149 7.51 2.00 Male No Thur Lunch 2

2.7.1 Sort in descending order

Code
sql="select * from tips order by total_bill desc"
table = pd.read_sql_query(sql,db)
table.head()
index total_bill tip sex smoker day time size
0 170 50.81 10.00 Male Yes Sat Dinner 3
1 212 48.33 9.00 Male No Sat Dinner 4
2 59 48.27 6.73 Male No Sat Dinner 4
3 156 48.17 5.00 Male No Sun Dinner 6
4 182 45.35 3.50 Male Yes Sun Dinner 3

2.7.2 Arrange by multiple variables

Code
sql="select * from tips order by total_bill,tip"
table = pd.read_sql_query(sql,db)
table.head()
index total_bill tip sex smoker day time size
0 67 3.07 1.00 Female Yes Sat Dinner 1
1 92 5.75 1.00 Female Yes Fri Dinner 2
2 111 7.25 1.00 Female No Sat Dinner 1
3 172 7.25 5.15 Male Yes Sun Dinner 2
4 149 7.51 2.00 Male No Thur Lunch 2

2.8 join

2.8.1 inner_join

2.8.2 full join

2.8.3 left join

2.8.4 anti join

2.9 Reshape tables

2.9.1 Gather data long(wide to long)

2.9.2 Spread data wide (long to wide)

2.10 string

2.10.1 upper case

2.10.2 lower case

2.10.3 match

2.10.4 concatenation

2.10.5 replace

2.10.6 extract

2.11 date

2.12 create table into database

Code
sql="create table if not exists new_tips as select * from tips"
db.execute(sql)

table = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", db)
table
name
0 tips
1 planets
2 new_tips

2.13 delete table in database

Code
sql="drop table if  exists new_tips"
db.execute(sql)

table = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", db)
table
name
0 tips
1 planets

2.14 edit table in database

Code
# close database connection
db.close()

3 Using SQL with Pandas

Code
from pandasql import sqldf
Code
sql="SELECT * FROM tips LIMIT 5;"
data=sqldf(sql)
data
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4

4 reference:

Back to top