Essential Pandas Tricks for Everyday Data Analysis
Counting Negative Values in Rows or Columns
import pandas as pd
# Create sample data
df = pd.DataFrame({
'x': [1, -3, 0, 1, 3],
'y': [-1, 0, 1, 5, 1],
'z': [0, -2, 0, -9, 0]
})
# Count negatives per row (use axis=0 for columns)
negatives_per_row = (df < 0).astype(int).sum(axis=1)
print(negatives_per_row)
Replacing All Positive Numbers with Zero
# Replace all positive values with 0
df[df > 0] = 0
Counting Element Frequency in a Column
# Basic frequency count
df['y'].value_counts()
# Group into bins
df['y'].value_counts(bins=3)
# Show as proportion
df['y'].value_counts(normalize=True)
Use sort and ascending parameters to control the sort order of results.
Renaming Columns and Indexes
# Rename columns
df.rename(columns={'x': 'X', 'y': 'Y'}, inplace=True)
# Set custom index
df.index = ['row_a', 'row_b', 'row_c', 'row_d', 'row_e']
# Remove index
df.reset_index(drop=True, inplace=True)
Moving Columns with insert and pop
# Move column X to position 2
df.insert(2, 'X', df.pop('X'))
Query Method for Filtering
# Add a name column
df.insert(0, 'name', ['Alice', 'Bob', 'Charlie', 'David', 'Eve'])
# Filter rows where column z is negative
df.query("z < 0")
# Fuzzy search with contains
df.query("name.str.contains('Alice|Charlie|Eve')", engine='python')
Exporting Data Without Index
df.to_csv('output.csv', encoding='utf-8-sig', index=None)
Sorting by Specific Columns
# Sort by name column, placing NaN values at the top
df.sort_values(by=['name'], na_position='first')
Using apply for Element-wise Operations
# Add 1 to every element in columns X and Y
df[['X', 'Y']].apply(lambda x: x + 1)
# Full signature
DataFrame.apply(func, axis=0, broadcast=False, raw=False, reduce=None, args=(), **kwds)
Merging Multiple DataFrames
# Combine dataframes from different row slices
part1 = df[0:1]
part2 = df[2:4]
part3 = df[3:5]
merged_df = pd.concat([part1, part2, part3])
DataFrame Copy Operations
# Deep copy - changes to new_df don't affect original
new_df = df.copy(deep=True)
# Modify first element of name column
df['name'][0] = 'NewName'
# new_df remains unchanged
# Shallow copy - changes affect both dataframes
shallow_df = df.copy(deep=False)
# or equivalently: shallow_df = df
# Modify first element
df['name'][0] = 'Changed'
# shallow_df also reflects this change
Row and Column Operations
Deleting Rows or Columns
# Delete column 'z' (axis=1 for columns, axis=0 for rows)
# Use inplace=True to modify in place
df.drop('z', axis=1, inplace=True)
Selecting Specific Columns or Rows
# Select last two columns when names are unknown
subset_cols = df.iloc[:, -2:]
# Select named columns
subset_named = df.loc[:, ['name', 'X']]
# Select first two rows by position
subset_rows = df.iloc[:2, :]
# Select rows by index label
subset_labeled = df.loc[['row_a', 'row_c'], :]
Swapping Values Between Columns
# Swap values between X and Y where Y < 0
condition = df['Y'].astype(int).map(lambda x: x < 0)
df.loc[condition, 'Y'], df.loc[condition, 'X'] = \
df.loc[condition, 'X'].values, df.loc[condition, 'Y'].values