Dynamic Programming Strategies for Calculating String Edit Distance
The edit distance problem requires transforming one string into another using the minimum number of specific operations. The allowed modifications include deleting a character, inserting a new character, or substituting an existing character. The goal is to compute the lowest cost sequence of operations to match the target string.
Core Algorithm Implementation
To solve this, define a state dp[i][j] representing the minimum operations needed to convert the first i characters of the source string to the first j characters of the target string. The base cases involve converting an empty string to a non-empty string, wich requires only insertions or deletions.
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;
int computeMinOperations() {
int lenA, lenB;
cin >> lenA;
string source;
source.resize(lenA);
cin >> source;
cin >> lenB;
string target;
target.resize(lenB);
cin >> target;
// DP table initialization
vector<vector<int>> dp(lenA + 1, vector<int>(lenB + 1));
// Base cases: transforming to/from empty string
for (int i = 0; i <= lenA; ++i) dp[i][0] = i;
for (int j = 0; j <= lenB; ++j) dp[0][j] = j;
// Fill DP table
for (int i = 1; i <= lenA; ++i) {
for (int j = 1; j <= lenB; ++j) {
int cost = (source[i - 1] == target[j - 1]) ? 0 : 1;
dp[i][j] = min({
dp[i - 1][j] + 1, // Deletion
dp[i][j - 1] + 1, // Insertion
dp[i - 1][j - 1] + cost // Substitution
});
}
}
return dp[lenA][lenB];
}
int main() {
cout << computeMinOperations() << endl;
return 0;
}
Extended Application: Batch Queries
A common variation involves multiple queries where a set of candidate strings must be checked against a target within a specific opreation limit. For each query, iterate through the candidate list and compute the edit distance. If the distance is within the allowed threshold, increment the match count.
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;
// Helper function to calculate edit distance between two strings
int getEditDistance(const string& s1, const string& s2) {
int n = s1.length();
int m = s2.length();
vector<vector<int>> dp(n + 1, vector<int>(m + 1));
for (int i = 0; i <= n; i++) dp[i][0] = i;
for (int j = 0; j <= m; j++) dp[0][j] = j;
for (int i = 1; i <= n; i++) {
for (int j = 1; j <= m; j++) {
int substitutionCost = (s1[i - 1] == s2[j - 1]) ? 0 : 1;
dp[i][j] = min({
dp[i - 1][j] + 1,
dp[i][j - 1] + 1,
dp[i - 1][j - 1] + substitutionCost
});
}
}
return dp[n][m];
}
int main() {
int n, m;
cin >> n >> m;
vector<string> database(n);
for (int i = 0; i < n; i++) {
cin >> database[i];
}
while (m--) {
string query;
int limit;
cin >> query >> limit;
int validCount = 0;
for (const auto& candidate : database) {
if (getEditDistance(candidate, query) <= limit) {
validCount++;
}
}
cout << validCount << endl;
}
return 0;
}