Dynamic Programming Solution for ARC162F Matrix Problem
Matrix is not easy to handle, so consider deriving the next row from the current row.
Let the previous row have selected columns at positions (p_1, p_2, \ldots, p_k) as 1. Consider the choices of (x) in the current row.
(The following requires drawing a diagram for understanding)
Case Analysis
-
Case 1: (x \ge p_1)
- If we choose a (x) not in (p), it doesn't satisfy the condition.
- Moreover, if we let (q) be the part of the current row's selection that is (\ge p_1), then (q) is a prefix of (p).
- However, if a row selects nothing, its state can be considered as inheriting from the previous row. This needs special handling.
-
Case 2: (x < p_1)
- Combined with the previous case, we can choose arbitrarily.
Dynamic Programming Formulation
Let (dp_{i,j,k}) represent the number of ways for the first (i) rows, where the (i)-th row has (j) ones, and there are (k) zeros to the left of the leftmost one.
Transfer for case (x \ge p_1): Enumerate transferring to a prefix of length (l), and transfer directly.
Then, on top of that, transfer for case (x < p_1): Enumerate selecting (a) ones, and (l) zeros to the left of the leftmost one, using another array. Finally, combine.
This leads to an (O(n^5)) implementation.
O(n⁵) Implementation
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
const int N = 105;
const int mod = 998244353;
int add(int a, int b) { return (a + b) % mod; }
int sub(int a, int b) { return (a - b + mod) % mod; }
int mul(int a, int b) { return (ll)a * b % mod; }
int qpow(int a, int b) {
int res = 1;
while (b) {
if (b & 1) res = mul(res, a);
a = mul(a, a);
b >>= 1;
}
return res;
}
int n, m;
int dp[N][N][N], f[N][N];
void solve_n5() {
// Precompute factorials and inverse factorials
int fac[N], ifac[N];
fac[0] = 1;
for (int i = 1; i <= m; i++) {
fac[i] = mul(fac[i-1], i);
}
ifac[m] = qpow(fac[m], mod-2);
for (int i = m-1; i >= 0; i--) {
ifac[i] = mul(ifac[i+1], i+1);
}
auto binom = [&](int n, int r) {
if (r < 0 || r > n) return 0;
return mul(mul(fac[n], ifac[r]), ifac[n-r]);
};
// Initialize dp for first row
memset(dp, 0, sizeof(dp));
for (int ones = 1; ones <= m; ones++) {
for (int zeros = 0; zeros <= m; zeros++) {
dp[1][ones][zeros] = binom(m - zeros - 1, ones - 1);
}
}
dp[1][0][m] = 1;
for (int i = 2; i <= n; i++) {
memset(f, 0, sizeof(f));
// Transfer for x >= p1 and accumulate for x < p1
for (int prev_ones = 0; prev_ones <= m; prev_ones++) {
for (int prev_zeros = 0; prev_zeros <= m; prev_zeros++) {
int val = dp[i-1][prev_ones][prev_zeros];
if (!val) continue;
// Case x >= p1: choose prefix
for (int len = 0; len <= prev_ones; len++) {
dp[i][len][prev_zeros] = add(dp[i][len][prev_zeros], val);
}
// Case x < p1: accumulate differences
if (prev_ones > 0) {
f[prev_ones][prev_zeros] = add(f[prev_ones][prev_zeros], val);
f[0][prev_zeros] = sub(f[0][prev_zeros], val);
}
}
}
// Process x < p1 transfers
for (int j = 0; j <= m; j++) {
for (int k = 0; k <= m; k++) {
for (int l = 1; l <= k; l++) {
for (int p = 0; p <= k-l; p++) {
int ways = mul(binom(k-l, p), dp[i][j][k]);
f[j+p+1][l-1] = add(f[j+p+1][l-1], ways);
}
}
}
}
// Combine results
for (int j = 0; j <= m; j++) {
for (int k = 0; k <= m; k++) {
dp[i][j][k] = add(dp[i][j][k], f[j][k]);
}
}
}
int ans = 0;
for (int i = 0; i <= m; i++) {
for (int j = 0; j <= m; j++) {
ans = add(ans, dp[n][i][j]);
}
}
printf("%d\n", ans);
}
Optimization to O(n⁴)
Bottleneck in (x < p_1) case optimized with inner DP:
void solve_n4() {
memset(dp, 0, sizeof(dp));
dp[1][0][m] = 1;
for (int i = 2; i <= n; i++) {
int f[N][N] = {}, g[N][N] = {};
// Transfer for x >= p1
for (int j = 0; j <= m; j++) {
for (int k = 0; k <= m; k++) {
for (int l = 0; l <= j; l++) {
dp[i][l][k] = add(dp[i][l][k], dp[i-1][j][k]);
}
if (j > 0) {
g[j][k] = add(g[j][k], dp[i-1][j][k]);
g[0][k] = sub(g[0][k], dp[i-1][j][k]);
}
}
}
// Inner DP for x < p1
for (int j = 0; j <= m; j++) {
for (int k = m; k >= 0; k--) {
for (int l = 1; l <= k; l++) {
f[j+1][l-1] = add(f[j+1][l-1], f[j][k]);
}
}
}
// Combine results
for (int j = 0; j <= m; j++) {
for (int k = 0; k <= m; k++) {
dp[i][j][k] = add(dp[i][j][k], f[j][k]);
}
}
}
}
Optimization to O(n³)
Further optimized with prefix sums:
void solve_n3() {
int dp[2][N][N] = {}, f[N][N] = {}, s[N] = {};
int cur = 0, nxt = 1;
dp[cur][0][m] = 1;
for (int i = 1; i <= n; i++) {
memset(f, 0, sizeof(f));
memset(dp[nxt], 0, sizeof(dp[nxt]));
// Prefix sums for state aggregation
for (int k = 0; k <= m; k++) {
s[0] = dp[cur][0][k];
for (int j = 1; j <= m; j++) {
s[j] = add(s[j-1], dp[cur][j][k]);
}
for (int l = 0; l <= m; l++) {
int total = s[m];
int partial = (l > 0) ? s[l-1] : 0;
f[l][k] = add(f[l][k], sub(total, partial));
}
}
// State transfer for x >= p1
for (int j = 0; j <= m; j++) {
for (int k = 0; k <= m; k++) {
if (j > 0) {
dp[nxt][j][k] = add(dp[nxt][j][k], dp[cur][j][k]);
dp[nxt][0][k] = sub(dp[nxt][0][k], dp[cur][j][k]);
}
}
}
// Process with prefix sums
for (int j = 0; j <= m; j++) {
s[0] = 0;
for (int k = 1; k <= m; k++) {
s[k] = add(s[k-1], f[j][k]);
}
for (int l = m; l >= 1; l--) {
int range_sum = sub(s[m], (l > 0) ? s[l-1] : 0);
f[j+1][l-1] = add(f[j+1][l-1], range_sum);
}
}
// Combine results
for (int j = 0; j <= m; j++) {
for (int k = 0; k <= m; k++) {
dp[nxt][j][k] = add(dp[nxt][j][k], f[j][k]);
}
}
swap(cur, nxt);
}
int ans = 0;
for (int i = 0; i <= m; i++) {
for (int j = 0; j <= m; j++) {
ans = add(ans, dp[cur][i][j]);
}
}
printf("%d\n", ans);
}
This method does not require combinatorial coefficients explicitly in the innermost loops, so it can be adapted to arbitrary moduli.