Implementing Date-Based Table Partitioning with Scheduled Data Archiving in Java
Problem Statement
Large single-table data volumes cause slow query performance. Implementing date-based table partitioning helps distribute data across multiple tables, improving overall system efficiency.
Solution Overview
- Create a new partitioned table at midnight (table name format: original_tablename_YYYYMMDD)
- Copy yesterday's and today's records into the new table
- Remove historical data older than yesterday from the source table
Implementation
Utility Class for Table Operations
@Component
public class TablePartitionUtil {
@Autowired
private DataManagementService dataService;
private static DataManagementService staticDataService;
@PostConstruct
void initialize() {
staticDataService = this.dataService;
}
/**
* Creates a new partitioned table with date suffix
*/
public static void createPartitionedTable(String dbUrl, String dbUser,
String dbPass, String baseTableName) {
try {
Class.forName("com.mysql.cj.jdbc.Driver");
Connection connection = DriverManager.getConnection(dbUrl, dbUser, dbPass);
LinkedHashSet<String> fieldDefinitions = new LinkedHashSet<>();
Statement statement = connection.createStatement();
DatabaseMetaData metadata = connection.getMetaData();
ResultSet columnInfo = metadata.getColumns(null, null, baseTableName, null);
while (columnInfo.next()) {
String fieldName = columnInfo.getString("COLUMN_NAME");
String typeName = columnInfo.getString("TYPE_NAME");
int sizeValue = columnInfo.getInt("COLUMN_SIZE");
if (typeName.contains("TIME")) {
sizeValue = 6;
}
String definition;
if (typeName.toLowerCase().contains("json")) {
definition = fieldName + " " + typeName;
} else {
definition = fieldName + " " + typeName + "(" + sizeValue + ")";
}
fieldDefinitions.add(definition);
}
if (!fieldDefinitions.isEmpty()) {
LocalDate currentDate = LocalDate.now();
LocalDate previousDate = currentDate.minusDays(1);
String dateSuffix = previousDate.getYear() + ""
+ previousDate.getMonthValue() + ""
+ previousDate.getDayOfMonth();
String targetTable = baseTableName + "_" + dateSuffix;
String createSql = "CREATE TABLE " + targetTable + " (\n"
+ String.join(",\n", fieldDefinitions) + "\n);";
statement.executeUpdate(createSql);
}
statement.close();
connection.close();
} catch (ClassNotFoundException | SQLException e) {
e.printStackTrace();
}
}
/**
* Archives yesterday and today records to partitioned table
*/
public static void archiveRecords(String dbUrl, String dbUser,
String dbPass, String baseTableName, String dateColumn) {
try {
Class.forName("com.mysql.cj.jdbc.Driver");
Connection connection = DriverManager.getConnection(dbUrl, dbUser, dbPass);
Statement statement = connection.createStatement();
LocalDate today = LocalDate.now();
LocalDate yesterday = today.minusDays(1);
String dateSuffix = yesterday.getYear() + ""
+ yesterday.getMonthValue() + ""
+ yesterday.getDayOfMonth();
String sourceTable = baseTableName;
String destinationTable = baseTableName + "_" + dateSuffix;
Long startId = staticDataService.fetchStartIdForDate(sourceTable, dateColumn);
String insertSql = "INSERT INTO " + destinationTable
+ " SELECT * FROM " + sourceTable + " WHERE id >= " + startId;
statement.executeUpdate(insertSql);
statement.close();
connection.close();
} catch (ClassNotFoundException | SQLException e) {
e.printStackTrace();
}
}
/**
* Cleans up old records from source table
*/
public static void purgeHistoricalData(String tableName, String dateColumn) {
try {
Long endId = staticDataService.fetchEndIdForDate(tableName, dateColumn);
staticDataService.removeRecordsUpToId(tableName, endId);
} catch (Exception e) {
e.printStackTrace();
}
}
}
Service Layer
@Service
public class DataManagementServiceImpl implements DataManagementService {
@Autowired
private DataManagementMapper dataMapper;
@Override
public Long fetchStartIdForDate(String tableName, String dateField) {
return dataMapper.selectMinIdForDate(tableName, dateField);
}
@Override
public boolean removeRecordsUpToId(String tableName, Long id) {
return dataMapper.deleteRecordsById(tableName, id);
}
@Override
public Long fetchEndIdForDate(String tableName, String dateField) {
return dataMapper.selectMaxIdForDate(tableName, dateField);
}
}
Mapper Interface
public interface DataManagementMapper {
Long selectMinIdForDate(@Param("tableName") String tableName,
@Param("dateField") String dateField);
boolean deleteRecordsById(@Param("tableName") String tableName,
@Param("id") Long id);
Long selectMaxIdForDate(@Param("tableName") String tableName,
@Param("dateField") String dateField);
}
Mapper XML
<mapper namespace="com.example.mapper.DataManagementMapper">
<select id="selectMinIdForDate" resultType="java.lang.Long">
SELECT MIN(id)
FROM ${tableName}
WHERE DATE(#{dateField}) = CURDATE() - INTERVAL 1 DAY
LIMIT 1
</select>
<delete id="deleteRecordsById">
DELETE FROM ${tableName}
WHERE id <= #{id}
</delete>
<select id="selectMaxIdForDate" resultType="java.lang.Long">
SELECT MAX(id)
FROM ${tableName}
WHERE DATE(#{dateField}) = CURDATE() - INTERVAL 1 DAY
LIMIT 1
</select>
</mapper>
Scheduled Task Configuration
@Configuration
public class SchedulerConfiguration {
@Value("${spring.datasource.url}")
private String jdbcUrl;
@Value("${spring.datasource.username}")
private String dbUsername;
@Value("${spring.datasource.password}")
private String dbPassword;
/**
* Creates partitioned table and copies data at 00:01 daily
*/
@Scheduled(cron = "0 1 0 * * ?")
public void initializePartitionAndArchive() {
String targetTable = "flight_records";
TablePartitionUtil.createPartitionedTable(
jdbcUrl, dbUsername, dbPassword, targetTable);
TablePartitionUtil.archiveRecords(
jdbcUrl, dbUsername, dbPassword, targetTable, "flight_time");
}
/**
* Removes historical data at 05:00 daily
*/
@Scheduled(cron = "0 0 5 * * ?")
public void cleanupOldRecords() {
TablePartitionUtil.purgeHistoricalData("flight_records", "flight_time");
}
}
Verificasion
Modify cron expressions for testing purposes.
Test scenario with flight_records table:
- Day before yesterday: 2 records (2024-04-10)
- Yesterday: 2 records (2024-04-11)
- Today: 1 record (2024-04-12)
Expected outcomes:
| Step | Reesult |
|---|---|
| Partition creasion | flight_records_20240411 table generated |
| Data archive | New table contains 3 records (yesterday + today) |
| Cleanup | Source table retains 1 record (today only) |
The dynamic SQL uses ${} syntax for table names and column names that cannot be parameterized.