Python Web Scraping: MySQL Data Storage
1. Preparations
pip3 install pymysql
2. Connecting to the Database
Connect to MySQL using PyMySQL, then create a new database named spiders:
import pymysql
connection = pymysql.connect(host='localhost', user='root', password='123456', port=3306)
cursor = connection.cursor()
cursor.execute('SELECT VERSION()')
data = cursor.fetchone()
print('Database version:', data)
cursor.execute("CREATE DATABASE spiders DEFAULT CHARACTER SET utf8mb4")
connection.close()
Output:
Database version: ('8.0.19',)
3. Case Study 01 (Create Database, Create Table, Insert, Update, Delete, Query)
import pymysql
# Establish connection
connection = pymysql.connect(host='14.116.152.57', user='root', password='root', port=3308, database='spiders')
def create_database():
cursor = connection.cursor()
cursor.execute('SELECT VERSION()')
data = cursor.fetchone()
print('Database version:', data)
cursor.execute("CREATE DATABASE spiders DEFAULT CHARACTER SET utf8mb4")
# Do not close connection here; it's reused
def create_table():
cursor = connection.cursor()
sql = 'CREATE TABLE IF NOT EXISTS students (id VARCHAR(255) NOT NULL, name VARCHAR(255) NOT NULL, age INT NOT NULL, PRIMARY KEY (id))'
cursor.execute(sql)
connection.commit()
def insert_table():
student_id = '20120004'
username = 'Bob'
age = 22
cursor = connection.cursor()
sql = 'INSERT INTO students(id, name, age) values(%s, %s, %s)'
try:
cursor.execute(sql, (student_id, username, age))
connection.commit()
except:
connection.rollback()
def insert_table_dynamic():
data = {
'id': '20120003',
'name': 'Bob',
'age': 22
}
table = 'students'
keys = ', '.join(data.keys())
values = ', '.join(['%s'] * len(data))
cursor = connection.cursor()
sql = 'INSERT INTO {table}({keys}) VALUES ({values})'.format(table=table, keys=keys, values=values)
try:
if cursor.execute(sql, tuple(data.values())):
print('success')
connection.commit()
except:
print('failed')
connection.rollback()
def update_table():
cursor = connection.cursor()
sql = 'UPDATE students SET age = %s WHERE name = %s'
try:
cursor.execute(sql, (25, 'Bob'))
connection.commit()
except:
connection.rollback()
def update_or_insert():
cursor = connection.cursor()
data = {
'id': '20120001',
'name': 'Bob',
'age': 26
}
table = 'students'
keys = ', '.join(data.keys())
values = ', '.join(['%s'] * len(data))
sql = 'INSERT INTO {table}({keys}) VALUES ({values}) ON DUPLICATE KEY UPDATE '.format(table=table, keys=keys, values=values)
update = ', '.join(["{key} = %s".format(key=key) for key in data])
sql += update
try:
if cursor.execute(sql, tuple(data.values()) * 2):
print('Successful')
connection.commit()
except:
print('Failed')
connection.rollback()
def delete_table():
cursor = connection.cursor()
table = 'students'
condition = 'age > 25'
sql = 'DELETE FROM {table} WHERE {condition}'.format(table=table, condition=condition)
try:
cursor.execute(sql)
connection.commit()
except:
connection.rollback()
def select_table():
cursor = connection.cursor()
sql = 'SELECT * FROM students WHERE age >= 20'
try:
cursor.execute(sql)
print('Count:', cursor.rowcount)
row = cursor.fetchone()
while row:
print('Row:', row)
row = cursor.fetchone()
except:
print('Error')
if __name__ == '__main__':
select_table()
connection.close()