Working with File and Directory Properties in Python
Getting File Size
The os.path.getsize() function returns the size of a file in bytes:
import os
file_size = os.path.getsize("document.pdf")
print(f"Size: {file_size} bytes")
Getting Directory Size
Python doesn't provide a direct method to calculate directory size. The standard apporach involves using os.walk() to iterate through all files within a directory tree:
def calculate_directory_size(directory_path):
total_bytes = 0
for root, subdirs, files in os.walk(directory_path):
for filename in files:
filepath = os.path.join(root, filename)
total_bytes += os.path.getsize(filepath)
return total_bytes
Handling Chinese Paths in Python 2
In Python 2, os.path.getsize() doesn't handle Unicode paths properly. You need to decode Chinese filenames:
import sys
py_version = sys.version[0]
if py_version == '2':
filepath = filepath.decode('utf-8')
Retrieving File Timestamps
Three key functions extract different timestamp types:
os.path.getctime(path) # Creation time
os.path.getmtime(path) # Last modification time
os.path.getatime(path) # Last access time
All functions return a Unix timestamp (seconds since epoch). Convert to readbale format using the time module:
import time
def format_timestamp(timestamp):
time_struct = time.localtime(timestamp)
return time.strftime('%Y-%m-%d %H:%M:%S', time_struct)
creation = os.path.getctime("report.txt")
print(f"Created: {format_timestamp(creation)}")
Complete Implementation
# -*- coding: utf-8 -*-
import os
import sys
import time
def convert_bytes_to_display(total_bytes):
"""Convert byte count to human-readable format"""
kb = total_bytes / 1024
if kb >= 1024:
mb = kb / 1024
return f"{round(mb, 2)} MB"
return f"{round(kb, 2)} KB"
def normalize_path(path):
"""Handle Unicode paths for Python 2"""
if sys.version[0] == '2':
return path.decode('utf-8')
return path
def get_path_size(path):
"""Calculate size for file or directory"""
normalized = normalize_path(path)
if os.path.isdir(path):
total = 0
for root, _, files in os.walk(path):
for filename in files:
filepath = os.path.join(root, filename)
try:
total += os.path.getsize(normalize_path(filepath))
except (OSError, IOError):
continue
return convert_bytes_to_display(total)
else:
return convert_bytes_to_display(os.path.getsize(normalized))
def timestamp_to_string(path):
"""Convert Unix timestamp to formatted string"""
normalized = normalize_path(path)
time_struct = time.localtime(os.path.getmtime(normalized))
return time.strftime('%Y-%m-%d %H:%M:%S', time_struct)
def fetch_creation_time(path):
normalized = normalize_path(path)
return time.strftime('%Y-%m-%d %H:%M:%S',
time.localtime(os.path.getctime(normalized)))
def fetch_access_time(path):
normalized = normalize_path(path)
return time.strftime('%Y-%m-%d %H:%M:%S',
time.localtime(os.path.getatime(normalized)))
def fetch_modification_time(path):
normalized = normalize_path(path)
return time.strftime('%Y-%m-%d %H:%M:%S',
time.localtime(os.path.getmtime(normalized)))
def display_file_info(path):
print(f"Creation Time: {fetch_creation_time(path)}")
print(f"Modification Time: {fetch_modification_time(path)}")
print(f"Access Time: {fetch_access_time(path)}")
print(f"Size: {get_path_size(path)}")
print()
# Example usage
display_file_info("data.txt")
display_file_info("song.mp3")
display_file_info("project_folder")
Platform-Specific Behaviors
On Windows systems, the creation time and last access time often return identical values for files, though directories may show different values. This occurs because Windows updates the access timestamp when files are modified. The os.path.getctime() function returns the actual creation time on Windows but the inode change time on Unix-like systems.
Notes on Python 3
In Python 3, path handling is significantly improved. Unicode filenames work seamlessly with os.path functions, eliminating the need for explicit decoding in most scenarios. The pathlib module offers an object-oriented alternative with cleaner syntax:
from pathlib import Path
p = Path("example.txt")
print(f"Size: {p.stat().st_size} bytes")
print(f"Modified: {p.stat().st_mtime}")
print(f"Created: {p.stat().st_ctime}")