SQL database with python

Author

Tony Duan

1 Connection with database

Code

import seaborn as sns
import pandas as pd
import sqlite3

tips = sns.load_dataset("tips")
planets=sns.load_dataset('planets')

1.1 create database file pythonsqlite.db and copy tips data and planets data into database

Code

import sqlite3
# Create your connection.
db = sqlite3.connect('pythonsqlite.db')

tips.to_sql(name='tips', con=db,if_exists='replace')
planets.to_sql(name='planets', con=db,if_exists='replace')


db.close()

1.2 check all table in database

Code

db = sqlite3.connect('pythonsqlite.db')
table = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", db)
table

	name
0	tips
1	planets

2 SQL

2.1 select

Code

db = sqlite3.connect('pythonsqlite.db')
sql="select * from tips LIMIT 3;"
table = pd.read_sql_query(sql,db)
table

	index	total_bill	tip	sex	smoker	day	time	size
0	0	16.99	1.01	Female	No	Sun	Dinner	2
1	1	10.34	1.66	Male	No	Sun	Dinner	3
2	2	21.01	3.50	Male	No	Sun	Dinner	3

2.2 Renaming column

Code

sql="select total_bill as new_total_bill from tips"
table = pd.read_sql_query(sql,db)
table.head()

	new_total_bill
0	16.99
1	10.34
2	21.01
3	23.68
4	24.59

2.3 create column

Code

sql="select total_bill as new_total_bill,total_bill from tips"
table = pd.read_sql_query(sql,db)
table.head()

	new_total_bill	total_bill
0	16.99	16.99
1	10.34	10.34
2	21.01	21.01
3	23.68	23.68
4	24.59	24.59

2.4 Filter rows

Code

sql="select * from tips where sex='Male'"
table = pd.read_sql_query(sql,db)
table.head()

	index	total_bill	tip	sex	smoker	day	time	size
0	1	10.34	1.66	Male	No	Sun	Dinner	3
1	2	21.01	3.50	Male	No	Sun	Dinner	3
2	3	23.68	3.31	Male	No	Sun	Dinner	2
3	5	25.29	4.71	Male	No	Sun	Dinner	4
4	6	8.77	2.00	Male	No	Sun	Dinner	2

2.4.1 Filters with AND conditions

Code

sql="select * from tips where sex='Male' and size>3"
table = pd.read_sql_query(sql,db)
table.head()

	index	total_bill	tip	sex	smoker	day	time	size
0	5	25.29	4.71	Male	No	Sun	Dinner	4
1	7	26.88	3.12	Male	No	Sun	Dinner	4
2	13	18.43	3.00	Male	No	Sun	Dinner	4
3	23	39.42	7.58	Male	No	Sat	Dinner	4
4	25	17.81	2.34	Male	No	Sat	Dinner	4

2.4.2 Filters with or conditions

Code

sql="select * from tips where sex='Male' or size>3"
table = pd.read_sql_query(sql,db)
table.shape

(169, 8)

2.5 Append

2.5.1 append by row

Code

sql="select * from tips UNION all select * from tips"
table = pd.read_sql_query(sql,db)
table.shape

(488, 8)

2.5.2 append by column

2.5.3 Dropping NA values

2.5.4 keep NA values

2.6 group by

2.6.1 average,min,max,sum

Code

sql="select AVG(total_bill),min(total_bill),max(total_bill),sum(total_bill) from tips group by sex"
table = pd.read_sql_query(sql,db)
table

	AVG(total_bill)	min(total_bill)	max(total_bill)	sum(total_bill)
0	18.056897	3.07	44.30	1570.95
1	20.744076	7.25	50.81	3256.82

2.6.2 count

Code

sql="select sex,count(*) from tips group by 1"
table = pd.read_sql_query(sql,db)
table

	sex	count(*)
0	Female	87
1	Male	157

2.7 order rows

Code

sql="select * from tips order by total_bill"
table = pd.read_sql_query(sql,db)
table.head()

	index	total_bill	tip	sex	smoker	day	time	size
0	67	3.07	1.00	Female	Yes	Sat	Dinner	1
1	92	5.75	1.00	Female	Yes	Fri	Dinner	2
2	111	7.25	1.00	Female	No	Sat	Dinner	1
3	172	7.25	5.15	Male	Yes	Sun	Dinner	2
4	149	7.51	2.00	Male	No	Thur	Lunch	2

2.7.1 Sort in descending order

Code

sql="select * from tips order by total_bill desc"
table = pd.read_sql_query(sql,db)
table.head()

	index	total_bill	tip	sex	smoker	day	time	size
0	170	50.81	10.00	Male	Yes	Sat	Dinner	3
1	212	48.33	9.00	Male	No	Sat	Dinner	4
2	59	48.27	6.73	Male	No	Sat	Dinner	4
3	156	48.17	5.00	Male	No	Sun	Dinner	6
4	182	45.35	3.50	Male	Yes	Sun	Dinner	3

2.7.2 Arrange by multiple variables

Code

sql="select * from tips order by total_bill,tip"
table = pd.read_sql_query(sql,db)
table.head()

	index	total_bill	tip	sex	smoker	day	time	size
0	67	3.07	1.00	Female	Yes	Sat	Dinner	1
1	92	5.75	1.00	Female	Yes	Fri	Dinner	2
2	111	7.25	1.00	Female	No	Sat	Dinner	1
3	172	7.25	5.15	Male	Yes	Sun	Dinner	2
4	149	7.51	2.00	Male	No	Thur	Lunch	2

2.8 join

2.8.1 inner_join

2.8.2 full join

2.8.3 left join

2.8.4 anti join

2.9 Reshape tables

2.9.1 Gather data long(wide to long)

2.9.2 Spread data wide (long to wide)

2.10 string

2.10.1 upper case

2.10.2 lower case

2.10.3 match

2.10.4 concatenation

2.10.5 replace

2.10.6 extract

2.11 date

2.12 create table into database

Code

sql="create table if not exists new_tips as select * from tips"
db.execute(sql)

table = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", db)
table

	name
0	tips
1	planets
2	new_tips

2.13 delete table in database

Code

sql="drop table if  exists new_tips"
db.execute(sql)

table = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table';", db)
table

	name
0	tips
1	planets

2.14 edit table in database

Code

# close database connection
db.close()

3 Using SQL with Pandas

Code

from pandasql import sqldf

Code

sql="SELECT * FROM tips LIMIT 5;"
data=sqldf(sql)
data

	total_bill	tip	sex	smoker	day	time	size
0	16.99	1.01	Female	No	Sun	Dinner	2
1	10.34	1.66	Male	No	Sun	Dinner	3
2	21.01	3.50	Male	No	Sun	Dinner	3
3	23.68	3.31	Male	No	Sun	Dinner	2
4	24.59	3.61	Female	No	Sun	Dinner	4

1 Connection with database

1.1 create database file pythonsqlite.db and copy tips data and planets data into database

1.2 check all table in database

2 SQL

2.1 select

2.2 Renaming column

2.3 create column

2.4 Filter rows

2.4.1 Filters with AND conditions

2.4.2 Filters with or conditions

2.5 Append

2.5.1 append by row

2.5.2 append by column

2.5.3 Dropping NA values

2.5.4 keep NA values

2.6 group by

2.6.1 average,min,max,sum

2.6.2 count

2.7 order rows

2.7.1 Sort in descending order

2.7.2 Arrange by multiple variables

2.8 join

2.8.1 inner_join

2.8.2 full join

2.8.3 left join

2.8.4 anti join

2.9 Reshape tables

2.9.1 Gather data long(wide to long)

2.9.2 Spread data wide (long to wide)

2.10 string

2.10.1 upper case

2.10.2 lower case

2.10.3 match

2.10.4 concatenation

2.10.5 replace

2.10.6 extract

2.11 date

2.12 create table into database

2.13 delete table in database

2.14 edit table in database

3 Using SQL with Pandas

4 reference: