Creating Tables and Writing Data with SQLAlchemy ORM
Imports
import pandas as pd
from sqlalchemy import (
create_engine, Column, String, Integer, Float, TIMESTAMP,
ForeignKey, text, func
)
from sqlalchemy.orm import declarative_base, sessionmaker, relationship
from sqlalchemy.dialects.mysql import insert as mysql_insert # optional, for MySQL-specific upserts
Database angine
# Example MySQL DSN (pymysql):
# engine = create_engine(
# "mysql+pymysql://user:pass@host:3306/dbname?charset=utf8mb4",
# echo=False,
# )
# Example Oracle DSN (cx_oracle):
# engine = create_engine(
# "oracle+cx_oracle://user:pass@host:1521/?service_name=ORCLPDB1",
# echo=False,
# )
engine = create_engine("<your-DSN-here>", echo=False)
connection = engine.connect()
Executing raw SQL
from sqlalchemy import text
# Oracle: SELECT 1 FROM dual; MySQL/Postgres: SELECT 1
result = connection.execute(text("SELECT 1"))
# Fetch a single row
one_row = result.fetchone()
# Fetch all rows
all_rows = result.fetchall()
# Convert to DataFrame with column labels
result = connection.execute(text("SELECT * FROM some_table"))
df = pd.DataFrame(result.fetchall(), columns=result.keys())
# Or use pandas directly
frame = pd.read_sql("SELECT * FROM some_table", con=engine)
ORM mapping
Base = declarative_base()
SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False)
session = SessionLocal()
class Student(Base):
__tablename__ = "students"
id = Column(Integer, primary_key=True, autoincrement=True)
full_name = Column(String(50), nullable=False)
gender = Column(String(1))
classroom = Column(String(20))
score = Column(Float)
def __repr__(self):
return (
f"<Student id={self.id} name={self.full_name!r} "
f"gender={self.gender!r} classroom={self.classroom!r} score={self.score}>"
)
A declarative class must define tablename and atleast one primary key column.
Schema creation
# Create all tables discovered from Base subclasses
Base.metadata.create_all(bind=engine)
# To drop: Base.metadata.drop_all(bind=engine)
Inserts
Single row with transaction control
# Add one record
stu = Student(full_name="Lily", gender="F", classroom="2", score=97.0)
session.add(stu)
session.commit()
# Demonstrate rollback
try:
another = Student(full_name="Lily", gender="F", classroom="2", score=97.0)
session.add(another)
# force an error if needed, then rollback
session.rollback()
finally:
session.close()
Bulk load with pandas
bulk_df = pd.DataFrame({
"full_name": ["Mike", "John", "Mary"],
"gender": ["M", "M", "F"],
"classroom": ["4", "7", "1"],
"score": [78, 96, 85],
})
# append: add rows to existing table
# replace: drop table, re-create, then insert (use with care)
bulk_df.to_sql("students", con=engine, if_exists="append", index=False)
pandas.to_sql is convenient and often fast. On large tables with primary keys and certain drivers (e.g., Oracle), inserts can be slower—benchmark if throughput is critical.
Bulk insert with SQLAlchemy Core
# Prepare a list of dictionaries for Core insert
rows = [
{
"full_name": r.full_name,
"gender": r.gender,
"classroom": r.classroom,
"score": r.score,
}
for r in bulk_df.itertuples(index=False)
]
from sqlalchemy import insert
# Use a transaction for better performance
with engine.begin() as conn:
conn.execute(insert(Student.__table__), rows)
Relationships and timestamps
class Book(Base):
__tablename__ = "books"
id = Column(Integer, primary_key=True, autoincrement=True)
title = Column(String(100), nullable=False)
price = Column(Float)
# Foreign key to students.id
student_id = Column(Integer, ForeignKey("students.id"), nullable=False)
# ORM relationship; access via book.student and student.books
student = relationship("Student", backref="books")
# Insert timestamp: stored without timezone; default set on the server
insert_time = Column(
TIMESTAMP(timezone=False),
nullable=False,
server_default=text("CURRENT_TIMESTAMP"),
)
def __repr__(self):
return (
f"<Book id={self.id} title={self.title!r} price={self.price} "
f"student_id={self.student_id}>"
)
Base.metadata.create_all(bind=engine)
books_df = pd.DataFrame({
"title": [
"Gone with the Wind",
"Goodbye",
"Game of Thrones",
"The Lord of the Rings",
],
"price": [128, 22, 67, 190],
"student_id": [1, 1, 3, 2],
})
books_df.to_sql("books", con=engine, if_exists="append", index=False)
Notes on TIMESTAMP behaviors
- To MySQL, using default=func.now() sets a Python-side default, not a server-side default; the final DDL may still permit NULLs or not set the column default as expected. Prefer server_default=text("CURRENT_TIMESTAMP") or server_default=text("NOW()") to ensure the database sets the value.
- Defining TIMESTAMP(..., nullable=False) on some MySQL versions may implicitly add ON UPDATE CURRENT_TIMESTAMP to the first such column. If you need a pure insert-time column that does not change on update, explicitly use server_default and avoid server_onupdate.
- If you need both created_at and updated_at columns in MySQL 5.x, many installations require:
- Place updated_at first with server_onupdate, then created_at with server_default; some server configurations may reject multiple TIMESTAMP columns with automatic behaviors in the opposite order.
- To avoid timezone storage, set TIMESTAMP(timezone=False). For Oracle, omitting timezone=False can lead to TIMESTAMP WITH TIME ZONE depending on dialect defaults.
Example with both creation and update timestamps:
from sqlalchemy import DateTime
class AuditExample(Base):
__tablename__ = "audit_example"
id = Column(Integer, primary_key=True)
# updated_at first (MySQL-friendly when auto-updating)
updated_at = Column(
TIMESTAMP(timezone=False),
nullable=False,
server_default=text("CURRENT_TIMESTAMP"),
server_onupdate=text("CURRENT_TIMESTAMP"),
)
created_at = Column(
TIMESTAMP(timezone=False),
nullable=False,
server_default=text("CURRENT_TIMESTAMP"),
)
Server defaults vs. Python defaults
Setting Python-side defaults with default= in SQLAlchemy does not create a database-level default; it only supplies a value when the ORM emits INSERT without the column specified. For MySQL, to guarantee the server fills a default when the column is omitted, use server_default.
Incorrect for server default in MySQL:
class FlagBad(Base):
__tablename__ = "flag_bad"
id = Column(Integer, primary_key=True)
flag = Column(String(1), default=text("'0'")) # Python-side default only
Correct with a server default:
class FlagGood(Base):
__tablename__ = "flag_good"
id = Column(Integer, primary_key=True)
flag = Column(String(1), server_default=text("'0'"))
Base.metadata.create_all(bind=engine)
# Insert without specifying flag; server sets it to '0'
with engine.begin() as conn:
conn.execute(text("INSERT INTO flag_good (id) VALUES (:i)"), {"i": 1})