Efficient Bitmask-Based String Matching with Subset Constraints
The problem involves determining the lexicographically largest binary string that can be constructed under constraints derived from a set of pattern strings containing '0', '1', and '-' (wildcard) characters. Each query provides a target string, and the solution must find the maximal answer consistent with all patterns.
A key insight is to process bits greedily from most to least significant. For each position, we tentatively set it to 1 and verify feasibility using precomputed data structures.
Define two arrays per prefix length x:
f[x][mask]: count of input strings where the firstxbits have wildcards exactly at positions indicated bymask.g[x][mask]: count of input strings where the firstxbits are not '0' at positions inmask(i.e., either '1' or '-').
Using high-dimensional prefix sums (via Fast Walsh–Hadamard Transform style updates), we compute for each x and mask whether setting certain bits violates constraints. Specifically, if for some subset s, the number of strings forcing a mismatch (f[x][s]) exceeds those allowing flexibility (g[x][s]), then s is invalid.
Further optimization observes that only subsets differing by one bit from the current candidate mask need checking. This leads to precomputing a violation mask h[x][msk], which encodes all minimal forbidden configurations relative to msk. During queries, we maintain a running goal mask representing positions where the query string has '1'. Feasibility reduces to verifying (goal & h[x][msk]) == h[x][msk].
#include <cstdio>
#include <vector>
using namespace std;
int read() {
char c = getchar(); int x = 0;
while (c < 48 || c > 57) c = getchar();
do x = (x << 1) + (x << 3) + (c ^ 48), c = getchar();
while (c >= 48 && c <= 57);
return x;
}
const int MAX_M = 23;
vector<int> f[MAX_M], g[MAX_M], h[MAX_M];
char str[24], qry[24];
int n, m, q, goal_mask;
bool feasible(int candidate, int len) {
return (goal_mask & h[len][candidate]) == h[len][candidate];
}
void build_prefix_sums(int len) {
int total = 1 << len;
// Compute prefix sums for f and g
for (int stride = 1; stride < total; stride <<= 1)
for (int block = 0; block < total; block += (stride << 1))
for (int idx = block; idx < block + stride; ++idx) {
f[len][idx] += f[len][idx | stride];
g[len][idx] += g[len][idx | stride];
}
// Build violation mask h
for (int mask = 0; mask < total; ++mask) {
if (g[len][mask] <= f[len][mask])
h[len][mask] = mask;
for (int bit = 0; bit < len; ++bit) {
int extended = mask | (1 << bit);
if (g[len][mask] <= f[len][extended])
h[len][mask] |= (1 << bit);
}
}
// Propagate violations upward via OR-transform
for (int stride = 1; stride < total; stride <<= 1)
for (int block = 0; block < total; block += (stride << 1))
for (int idx = block; idx < block + stride; ++idx)
h[len][idx | stride] |= h[len][idx];
}
int main() {
m = read(); n = read(); q = read();
for (int i = 1; i <= m; ++i) {
int size = 1 << i;
f[i].assign(size, 0);
g[i].assign(size, 0);
h[i].assign(size, 0);
}
for (int i = 0; i < n; ++i) {
scanf("%s", str);
int wildcard = 0, non_zero = 0;
for (int j = 0; j < m; ++j) {
if (str[j] == '-') wildcard |= (1 << j);
if (str[j] != '0') non_zero |= (1 << j);
}
for (int len = 1; len <= m; ++len) {
int mask = (1 << len) - 1;
f[len][wildcard & mask]++;
g[len][non_zero & mask]++;
}
}
for (int len = 1; len <= m; ++len)
build_prefix_sums(len);
while (q--) {
scanf("%s", qry);
int result = 0;
goal_mask = 0;
for (int pos = 0; pos < m; ++pos) {
if (qry[pos] == '1') goal_mask |= (1 << pos);
if (feasible(result | (1 << pos), pos + 1))
result |= (1 << pos);
else
goal_mask ^= (1 << pos);
}
for (int pos = 0; pos < m; ++pos)
putchar('0' + ((result >> pos) & 1));
putchar('\n');
}
return 0;
}
The overall complexity is $O(nm + qm + m \cdot 2^m)$, dominated by preprocessing over all prefix lengths.