Django ORM Querying Techniques and Optimization
Environment Configuration for Debugging
To log raw SQL queries executed by the ORM, add the following configuration to your Django settings module:
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'handlers': {
'console_output': {
'level': 'DEBUG',
'class': 'logging.StreamHandler',
},
},
'loggers': {
'django.db.backends': {
'handlers': ['console_output'],
'propagate': True,
'level': 'DEBUG',
},
}
}
For standalone script execution, configure the Django environment as follows:
import os
import django
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "my_project.settings")
django.setup()
from inventory import models
Single Table CRUD Operations
# Create
item = models.Product.objects.create(title="Laptop", cost=1200.00)
# Or
item = models.Product(title="Laptop", cost=1200.00)
item.save()
# Update
models.Product.objects.filter(pk=1).update(cost=1150.00)
# Or
item = models.Product.objects.filter(pk=1).first()
item.cost = 1150.00
item.save()
# Delete
models.Product.objects.filter(pk=1).delete()
# Or
item = models.Product.objects.filter(pk=1).first()
item.delete()
# Retrieve
queryset = models.Product.objects.all()
queryset = models.Product.objects.filter(cost=1200.00)
Retrieving All Records: all()
Returns a QuerySet. Django optimizes this by only fetching the first 21 records initially.
queryset = models.Product.objects.all()
print(queryset) # <QuerySet [<Product: Product object>, ...]>
# Indexing is supported, but negative indexing raises AssertionError
print(queryset[0]) # Valid
# print(queryset[-1]) # Invalid
# Accessing index 22 triggers a second database query to fetch the remaining rows
item = queryset[22]
Inspecting the Generated SQL: .query
The query attribute exposes the underlying SQL statement. It is only available on QuerySet instances, not model instances.
queryset = models.Product.objects.all()
print(queryset.query)
# SELECT `inventory_product`.`id`, `inventory_product`.`title`, `inventory_product`.`cost` FROM `inventory_product`
Filtering and Exclusion: filter(**kwargs) and exclude(**kwargs)
filter() returns objects matching the provided conditions (AND relationship for multiple conditions). exclude() returns objects that do not match.
matched = models.Product.objects.filter(category_id=2, cost=50)
excluded = models.Product.objects.exclude(category_id=2, cost=50)
Lookup Expressions (Double Underscore)
| SQL Equivalent | Django Lookup |
|---|---|
id IN (1, 2, 3) |
pk__in=[1, 2, 3] |
id > 5 |
pk__gt=5 |
id >= 5 |
pk__gte=5 |
id < 5 |
pk__lt=5 |
id <= 5 |
pk__lte=5 |
id BETWEEN 10 AND 20 |
pk__range=[10, 20] |
title LIKE 'Lap%' |
title__startswith='Lap' |
title LIKE '%top%' |
title__contains='top' |
Date field lookups:
# By year
items = models.Product.objects.filter(manufactured_date__year=2023)
# Exact date
import datetime
items = models.Product.objects.filter(manufactured_date=datetime.date(2023, 5, 12))
# Date range
start_date = datetime.date(2023, 5, 1)
end_date = datetime.date(2023, 5, 31)
items = models.Product.objects.filter(manufactured_date__range=(start_date, end_date))
Ordering, Reversing, and Deduplication
order_by() sorts the results. A hyphen prrefix indicates descending order. reverse() inverts an already ordered queryset. distinct() eliminates duplicate rows.
# Order by cost ascending, then by id descending
queryset = models.Product.objects.order_by("cost", "-pk")
# Deduplicate (syntax varies slightly by database backend)
unique_ids = models.Product.objects.values("category_id").distinct()
Fetching Single Records: get(), first(), and last()
first() and last() return model instances or None. get() returns a single model instence but raises DoesNotExist if no match is found, or MultipleObjectsReturned if more than one match exists.
queryset = models.Product.objects.filter(cost__gt=100)
print(queryset.first())
print(queryset.last())
# Safe usage of get
print(models.Product.objects.get(pk=1))
# Raises MultipleObjectsReturned
# models.Product.objects.get(cost=100)
# Raises DoesNotExist
# models.Product.objects.get(pk=999)
Selecting Specific Fields: values() and values_list()
values() returns a QuerySet of dictionaries. values_list() returns a QuerySet of tuples.
dict_qs = models.Product.objects.values('pk', 'title')
# <QuerySet [{'pk': 1, 'title': 'Laptop'}, ...]>
tuple_qs = models.Product.objects.values_list('pk', 'title')
# <QuerySet [(1, 'Laptop'), ...]>
Counting Records: count()
Returns an integer representing the total number of matching records.
total = models.Product.objects.filter(cost__gt=100).count()
Checking Existence: exists()
Returns True if the QuerySet contains data, otherwise False. Evaluating the queryset as a boolean also works.
queryset = models.Product.objects.filter(pk=99)
if queryset.exists():
pass
# Equivalent boolean check
if queryset:
pass
Aggregation: aggregate()
Performs calculations over a set of rows, returning a dictionary.
from django.db.models import Avg, Max, Min, Sum, Count
stats = models.Product.objects.aggregate(
avg_cost=Avg('cost'),
max_cost=Max('cost'),
min_cost=Min('cost')
)
# {'avg_cost': 550.0, 'max_cost': Decimal('1200.00'), 'min_cost': Decimal('10.00')}
Grouping: annotate()
Generates a summary value for each item in the QuerySet. If values() precedes annotate(), the fields in values() define the group by clause.
# Count products per category
category_stats = models.Product.objects.values('category_id').annotate(item_count=Count('pk'))
print(category_stats.query)
# SELECT "inventory_product"."category_id", COUNT("inventory_product"."id") AS "item_count" FROM "inventory_product" GROUP BY "inventory_product"."category_id"
# Without values(), it groups by the model's primary key
item_stats = models.Product.objects.annotate(num_authors=Count('authors'))
If a ONLY_FULL_GROUP_BY SQL mode error occurs, either adjust the database configuration or insure all non-aggregated fields in the SELECT clause are included in the GROUP BY clause.
Advanced Queries: F() and Q()
F() allows comparing or updating model fields directly against other column values without pulling them into Python.
from django.db.models import F
# Find items where sold quantity exceeds stock
out_of_stock = models.Product.objects.filter(sold__gt=F('stock'))
# Increase all prices by 10%
models.Product.objects.update(cost=F('cost') * 1.1)
Q() encapsulates complex lookups with OR (|), AND (&), and NOT (~) logic.
from django.db.models import Q
# OR condition
items = models.Product.objects.filter(Q(title='Laptop') | Q(cost__lt=50))
# AND condition
items = models.Product.objects.filter(Q(title='Laptop'), Q(cost__lt=50))
# Equivalent to: items = models.Product.objects.filter(title='Laptop', cost__lt=50)
# NOT condition
items = models.Product.objects.filter(~Q(cost__lt=50))
# Dynamic Q object construction
search_conditions = {
"group1": [("title", "Laptop"), ("title", "Mouse")],
"group2": [("cost__gt", 100), ("cost__lt", 500)]
}
combined_query = Q()
for key, conditions in search_conditions.items():
sub_query = Q()
sub_query.connector = "OR"
for condition in conditions:
sub_query.children.append(condition)
combined_query.add(sub_query, "AND")
items = models.Product.objects.filter(combined_query)
Raw SQL Extensions: extra()
Allows attaching raw SQL to a queryset. Use with caution due to SQL injection risks.
annotated_qs = models.Product.objects.extra(
select={'total_count': 'SELECT COUNT(*) FROM inventory_category WHERE id > %s'},
select_params=[1],
where=['cost > %s'],
params=[100.00],
tables=['inventory_category'],
order_by=['-pk']
)
Aliasing fields can be achieved using extra(select={...}) or annotate(alias=F('original_field')).
Executing Pure Raw SQL
For queries beyond the ORM's capabilities, use the database connection cursor.
from django.db import connection
with connection.cursor() as cursor:
cursor.execute("SELECT * FROM inventory_product WHERE cost > %s", [100.00])
row = cursor.fetchone()
multiple_rows = cursor.fetchmany(5)
all_rows = cursor.fetchall()
Query Optimization: defer() and only()
only('field') loads only the specified field immediately, deferring the rest. defer('field') loads all fields except the specified one. Accessing a deferred field triggers an additional query.
# Fetches only id and title
queryset = models.Product.objects.only('title')
for item in queryset:
print(item.title) # No extra query
# print(item.cost) # Triggers a new query to fetch cost for this specific item
Date Truncation: dates() and datetimes()
dates(field_name, kind, order) returns distinct dates. kind can be "year", "month", or "day".
datetimes() works similarly for datetime fields, supporting "hour", "minute", "second", and timezone conversion.
import pytz
distinct_months = models.Product.objects.dates('manufactured_date', 'month', order='DESC')
distinct_hours = models.SaleRecord.objects.datetimes('created_at', 'hour', tzinfo=pytz.timezone('Asia/Tokyo'))
Empty QuerySets: none()
Creates a QuerySet that always returns no results.
empty_qs = models.Product.objects.none()
# <QuerySet []>
Bulk Insertion: bulk_create()
Efficiently inserts a list of model instances in batches.
new_items = [models.Product(title=f"Item {i}", cost=i*10) for i in range(100)]
models.Product.objects.bulk_create(new_items, batch_size=50)
Safe Get or Create/Update: get_or_create() and update_or_create()
get_or_create() returns a tuple of (object, created_boolean).
update_or_create() updates an object if it exists, or creates it otherwise.
item, was_created = models.Product.objects.get_or_create(
title="Keyboard",
defaults={'cost': 45.00, 'stock': 100}
)
item, was_created = models.Product.objects.update_or_create(
title="Keyboard",
defaults={'cost': 50.00}
)
Fetching by Primary Keys: in_bulk()
Retrieves a dictionary mapping primary keys to model instances.
item_dict = models.Product.objects.in_bulk([1, 2, 3])
# {1: <Product: Product object>, 2: <Product: Product object>, 3: <Product: Product object>}