Implementing AES-128 in C++: Key Schedule, Round Functions, and File I/O
AES (Advanced Encryption Standard, Rijndeal) is a symmetric block cipher standardized in FIPS-197. It operates on 128-bit blocks and supports key sizes of 128, 192, or 256 bits. This guide focuses on AES-128 and shows a clean C++ implementation of the key schedule (key expansion), block encryption/decryption, and a simple file-processing example.
Parameters and Notation
- Block size: 128 bits (16 bytes) arranged as a 4×4 state (column-major)
- Nb (columns in state): 4
- Nk (32-bit words in key): 4 for AES-128
- Nr (rounds): 10 for AES-128
Each round applies (except the final round, which skips MixColumns):
- SubBytes
- ShiftRows
- MixColumns (skipped in last round)
- AddRoundKey
Decryption applies the inverse transforms in reverse order.
Key Schedule (Key Expansion)
AES derives Nr+1 round keys from the user key. For AES-128, that’s 11 round keys, each 16 bytes, totaling 176 bytes.
Key schedule steps for AES-128:
- Copy the original 16-byte key as the first round key
- For each subsequent 16-byte chunk:
- Rotate last 4-byte word left by one byte (RotWord)
- Apply S-box (SubWord) to each byte of that word
- XOR the first byte with the Rcon value for that iteration
- XOR with the word 16 bytes earlier to generate the next 4 bytes; repeat to fill 16 bytes
Round Transformations
- SubBytes: Byte-wise substitution using the S-box
- ShiftRows: Rotate each row left by 0,1,2,3 bytes respectively
- MixColumns: Multiply each column by a fixed matrix in GF(2^8)
- AddRoundKey: XOR state with round key
Inverse operations are defined analogously (InvSubBytes, InvShiftRows, InvMixColumns).
C++ Implementation (AES-128)
The code below is a compact, byte-oriented AES-128 implementation. It avoids bitset-heavy manipulation and uses simple arrrays. State is stored column-major: state[col*4 + row].
#include <cstdint>
#include <array>
#include <vector>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <stdexcept>
using u8 = uint8_t;
using u32 = uint32_t;
// Flattened S-box and inverse S-box
static const u8 SBOX[256] = {
0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16
};
static const u8 INV_SBOX[256] = {
0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
};
// Rcon for AES-128 key expansion (as bytes applied to the first byte of the word)
static const u8 RCON[11] = { 0x00,0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,0x1b,0x36 };
static inline u8 xtime(u8 x) {
return static_cast<u8>((x << 1) ^ ((x & 0x80) ? 0x1b : 0x00));
}
static u8 gf_mul(u8 a, u8 b) {
// Russian peasant multiplication in GF(2^8)
u8 res = 0;
while (b) {
if (b & 1) res ^= a;
a = xtime(a);
b >>= 1;
}
return res;
}
static void sub_bytes(u8 state[16]) {
for (int i = 0; i < 16; ++i) state[i] = SBOX[state[i]];
}
static void inv_sub_bytes(u8 state[16]) {
for (int i = 0; i < 16; ++i) state[i] = INV_SBOX[state[i]];
}
static void shift_rows(u8 st[16]) {
// Row 0: 0,4,8,12 unchanged
// Row 1: 1,5,9,13 left rotate by 1
u8 t = st[1]; st[1]=st[5]; st[5]=st[9]; st[9]=st[13]; st[13]=t;
// Row 2: 2,6,10,14 left rotate by 2
std::swap(st[2], st[10]); std::swap(st[6], st[14]);
// Row 3: 3,7,11,15 left rotate by 3 (i.e., right rotate by 1)
t = st[15]; st[15]=st[11]; st[11]=st[7]; st[7]=st[3]; st[3]=t;
}
static void inv_shift_rows(u8 st[16]) {
// Inverse of shift_rows
u8 t = st[13]; st[13]=st[9]; st[9]=st[5]; st[5]=st[1]; st[1]=t;
std::swap(st[2], st[10]); std::swap(st[6], st[14]);
t = st[3]; st[3]=st[7]; st[7]=st[11]; st[11]=st[15]; st[15]=t;
}
static void mix_columns(u8 st[16]) {
for (int c = 0; c < 4; ++c) {
int i = c * 4;
u8 a0 = st[i], a1 = st[i+1], a2 = st[i+2], a3 = st[i+3];
st[i] = static_cast<u8>(gf_mul(0x02, a0) ^ gf_mul(0x03, a1) ^ a2 ^ a3);
st[i+1] = static_cast<u8>(a0 ^ gf_mul(0x02, a1) ^ gf_mul(0x03, a2) ^ a3);
st[i+2] = static_cast<u8>(a0 ^ a1 ^ gf_mul(0x02, a2) ^ gf_mul(0x03, a3));
st[i+3] = static_cast<u8>(gf_mul(0x03, a0) ^ a1 ^ a2 ^ gf_mul(0x02, a3));
}
}
static void inv_mix_columns(u8 st[16]) {
for (int c = 0; c < 4; ++c) {
int i = c * 4;
u8 a0 = st[i], a1 = st[i+1], a2 = st[i+2], a3 = st[i+3];
st[i] = static_cast<u8>(gf_mul(0x0e, a0) ^ gf_mul(0x0b, a1) ^ gf_mul(0x0d, a2) ^ gf_mul(0x09, a3));
st[i+1] = static_cast<u8>(gf_mul(0x09, a0) ^ gf_mul(0x0e, a1) ^ gf_mul(0x0b, a2) ^ gf_mul(0x0d, a3));
st[i+2] = static_cast<u8>(gf_mul(0x0d, a0) ^ gf_mul(0x09, a1) ^ gf_mul(0x0e, a2) ^ gf_mul(0x0b, a3));
st[i+3] = static_cast<u8>(gf_mul(0x0b, a0) ^ gf_mul(0x0d, a1) ^ gf_mul(0x09, a2) ^ gf_mul(0x0e, a3));
}
}
static void add_round_key(u8 st[16], const u8* rk) {
for (int i = 0; i < 16; ++i) st[i] ^= rk[i];
}
// Byte-oriented AES-128 key expansion: out must be 176 bytes (11 * 16)
static void expand_key_128(const u8 key[16], u8 out[176]) {
// Copy original key
for (int i = 0; i < 16; ++i) out[i] = key[i];
int generated = 16;
int rcon_idx = 1;
u8 temp[4];
while (generated < 176) {
// Last 4 bytes of previous 16-byte block
for (int i = 0; i < 4; ++i) temp[i] = out[generated - 4 + i];
if ((generated % 16) == 0) {
// RotWord: [t0,t1,t2,t3] -> [t1,t2,t3,t0]
u8 t0 = temp[0];
temp[0] = temp[1]; temp[1] = temp[2]; temp[2] = temp[3]; temp[3] = t0;
// SubWord
temp[0] = SBOX[temp[0]];
temp[1] = SBOX[temp[1]];
temp[2] = SBOX[temp[2]];
temp[3] = SBOX[temp[3]];
// Rcon
temp[0] ^= RCON[rcon_idx++];
}
for (int i = 0; i < 4; ++i) {
out[generated] = static_cast<u8>(out[generated - 16] ^ temp[i]);
++generated;
}
}
}
static void aes128_encrypt_block(u8 block[16], const u8 roundKeys[176]) {
u8* rk = const_cast<u8*>(roundKeys);
add_round_key(block, rk);
for (int round = 1; round < 10; ++round) {
sub_bytes(block);
shift_rows(block);
mix_columns(block);
add_round_key(block, rk + 16 * round);
}
sub_bytes(block);
shift_rows(block);
add_round_key(block, rk + 160); // last round key
}
static void aes128_decrypt_block(u8 block[16], const u8 roundKeys[176]) {
u8* rk = const_cast<u8*>(roundKeys);
add_round_key(block, rk + 160);
for (int round = 9; round >= 1; --round) {
inv_shift_rows(block);
inv_sub_bytes(block);
add_round_key(block, rk + 16 * round);
inv_mix_columns(block);
}
inv_shift_rows(block);
inv_sub_bytes(block);
add_round_key(block, rk);
}
static void print_block_hex(const u8 b[16]) {
std::ios_base::fmtflags f(std::cout.flags());
for (int i = 0; i < 16; ++i) {
std::cout << std::hex << std::setw(2) << std::setfill('0')
<< static_cast<int>(b[i])
<< ((i % 4 == 3) ? '\n' : ' ');
}
std::cout.flags(f);
}
int main() {
// Known AES-128 test vector (from FIPS-197)
u8 key[16] = {
0x2b,0x7e,0x15,0x16,
0x28,0xae,0xd2,0xa6,
0xab,0xf7,0x15,0x88,
0x09,0xcf,0x4f,0x3c
};
u8 plain[16] = {
0x32,0x88,0x31,0xe0,
0x43,0x5a,0x31,0x37,
0xf6,0x30,0x98,0x07,
0xa8,0x8d,0xa2,0x34
};
u8 roundKeys[176];
expand_key_128(key, roundKeys);
std::cout << "Key (hex)\n";
print_block_hex(key);
std::cout << "Plaintext (hex)\n";
print_block_hex(plain);
u8 buf[16];
for (int i = 0; i < 16; ++i) buf[i] = plain[i];
aes128_encrypt_block(buf, roundKeys);
std::cout << "Ciphertext (hex)\n";
print_block_hex(buf);
aes128_decrypt_block(buf, roundKeys);
std::cout << "Decrypted (hex)\n";
print_block_hex(buf);
return 0;
}
Minimal File Encryption/Decryption (ECB for Demonstration Only)
The following snippet processes a file 16 bytes at a time using AES-128 in ECB mode. PKCS#7 padding is aplied during encryption and removed after decryption of the last block. This is a didactic example; for real systems, use an authenticated mode (e.g., GCM) and a vetted crypto libray.
#include <cstdint>
#include <array>
#include <fstream>
#include <vector>
#include <stdexcept>
// Assume expand_key_128, aes128_encrypt_block, aes128_decrypt_block are defined as above
static std::vector<u8> pkcs7_pad(const std::vector<u8>& data) {
size_t pad = 16 - (data.size() % 16);
std::vector<u8> out = data;
out.insert(out.end(), pad, static_cast<u8>(pad));
return out;
}
static void pkcs7_unpad(std::vector<u8>& data) {
if (data.empty() || (data.size() % 16) != 0) throw std::runtime_error("invalid padding");
u8 p = data.back();
if (p == 0 || p > 16) throw std::runtime_error("invalid padding");
for (size_t i = 0; i < p; ++i) if (data[data.size() - 1 - i] != p) throw std::runtime_error("invalid padding");
data.resize(data.size() - p);
}
void encrypt_file_ecb(const char* inPath, const char* outPath, const u8 key[16]) {
std::ifstream in(inPath, std::ios::binary);
if (!in) throw std::runtime_error("open input failed");
std::vector<u8> data((std::istreambuf_iterator<char>(in)), {});
auto padded = pkcs7_pad(data);
u8 roundKeys[176];
expand_key_128(key, roundKeys);
for (size_t i = 0; i < padded.size(); i += 16) {
aes128_encrypt_block(&padded[i], roundKeys);
}
std::ofstream out(outPath, std::ios::binary);
out.write(reinterpret_cast<const char*>(padded.data()), static_cast<std::streamsize>(padded.size()));
}
void decrypt_file_ecb(const char* inPath, const char* outPath, const u8 key[16]) {
std::ifstream in(inPath, std::ios::binary);
if (!in) throw std::runtime_error("open input failed");
std::vector<u8> data((std::istreambuf_iterator<char>(in)), {});
if ((data.size() % 16) != 0) throw std::runtime_error("cipher length not multiple of block size");
u8 roundKeys[176];
expand_key_128(key, roundKeys);
for (size_t i = 0; i < data.size(); i += 16) {
aes128_decrypt_block(&data[i], roundKeys);
}
pkcs7_unpad(data);
std::ofstream out(outPath, std::ios::binary);
out.write(reinterpret_cast<const char*>(data.data()), static_cast<std::streamsize>(data.size()));
}
Optional: Table-Driven GF(2^8) Multiplication
You can speed up MixColumns and InvMixColumns by replacing gf_mul with lookup tables. The following tables cover ×02, ×03, ×09, ×0B, ×0D, ×0E in GF(2^8). Example usage for MixColumns: M02[x] ^ M03[y] ^ z ^ w, etc.
static const u8 M02[256] = {
0x00,0x02,0x04,0x06,0x08,0x0a,0x0c,0x0e,0x10,0x12,0x14,0x16,0x18,0x1a,0x1c,0x1e,
0x20,0x22,0x24,0x26,0x28,0x2a,0x2c,0x2e,0x30,0x32,0x34,0x36,0x38,0x3a,0x3c,0x3e,
0x40,0x42,0x44,0x46,0x48,0x4a,0x4c,0x4e,0x50,0x52,0x54,0x56,0x58,0x5a,0x5c,0x5e,
0x60,0x62,0x64,0x66,0x68,0x6a,0x6c,0x6e,0x70,0x72,0x74,0x76,0x78,0x7a,0x7c,0x7e,
0x80,0x82,0x84,0x86,0x88,0x8a,0x8c,0x8e,0x90,0x92,0x94,0x96,0x98,0x9a,0x9c,0x9e,
0xa0,0xa2,0xa4,0xa6,0xa8,0xaa,0xac,0xae,0xb0,0xb2,0xb4,0xb6,0xb8,0xba,0xbc,0xbe,
0xc0,0xc2,0xc4,0xc6,0xc8,0xca,0xcc,0xce,0xd0,0xd2,0xd4,0xd6,0xd8,0xda,0xdc,0xde,
0xe0,0xe2,0xe4,0xe6,0xe8,0xea,0xec,0xee,0xf0,0xf2,0xf4,0xf6,0xf8,0xfa,0xfc,0xfe,
0x1b,0x19,0x1f,0x1d,0x13,0x11,0x17,0x15,0x0b,0x09,0x0f,0x0d,0x03,0x01,0x07,0x05,
0x3b,0x39,0x3f,0x3d,0x33,0x31,0x37,0x35,0x2b,0x29,0x2f,0x2d,0x23,0x21,0x27,0x25,
0x5b,0x59,0x5f,0x5d,0x53,0x51,0x57,0x55,0x4b,0x49,0x4f,0x4d,0x43,0x41,0x47,0x45,
0x7b,0x79,0x7f,0x7d,0x73,0x71,0x77,0x75,0x6b,0x69,0x6f,0x6d,0x63,0x61,0x67,0x65,
0x9b,0x99,0x9f,0x9d,0x93,0x91,0x97,0x95,0x8b,0x89,0x8f,0x8d,0x83,0x81,0x87,0x85,
0xbb,0xb9,0xbf,0xbd,0xb3,0xb1,0xb7,0xb5,0xab,0xa9,0xaf,0xad,0xa3,0xa1,0xa7,0xa5,
0xdb,0xd9,0xdf,0xdd,0xd3,0xd1,0xd7,0xd5,0xcb,0xc9,0xcf,0xcd,0xc3,0xc1,0xc7,0xc5,
0xfb,0xf9,0xff,0xfd,0xf3,0xf1,0xf7,0xf5,0xeb,0xe9,0xef,0xed,0xe3,0xe1,0xe7,0xe5
};
static const u8 M03[256] = {
0x00,0x03,0x06,0x05,0x0c,0x0f,0x0a,0x09,0x18,0x1b,0x1e,0x1d,0x14,0x17,0x12,0x11,
0x30,0x33,0x36,0x35,0x3c,0x3f,0x3a,0x39,0x28,0x2b,0x2e,0x2d,0x24,0x27,0x22,0x21,
0x60,0x63,0x66,0x65,0x6c,0x6f,0x6a,0x69,0x78,0x7b,0x7e,0x7d,0x74,0x77,0x72,0x71,
0x50,0x53,0x56,0x55,0x5c,0x5f,0x5a,0x59,0x48,0x4b,0x4e,0x4d,0x44,0x47,0x42,0x41,
0xc0,0xc3,0xc6,0xc5,0xcc,0xcf,0xca,0xc9,0xd8,0xdb,0xde,0xdd,0xd4,0xd7,0xd2,0xd1,
0xf0,0xf3,0xf6,0xf5,0xfc,0xff,0xfa,0xf9,0xe8,0xeb,0xee,0xed,0xe4,0xe7,0xe2,0xe1,
0xa0,0xa3,0xa6,0xa5,0xac,0xaf,0xaa,0xa9,0xb8,0xbb,0xbe,0xbd,0xb4,0xb7,0xb2,0xb1,
0x90,0x93,0x96,0x95,0x9c,0x9f,0x9a,0x99,0x88,0x8b,0x8e,0x8d,0x84,0x87,0x82,0x81,
0x9b,0x98,0x9d,0x9e,0x97,0x94,0x91,0x92,0x83,0x80,0x85,0x86,0x8f,0x8c,0x89,0x8a,
0xab,0xa8,0xad,0xae,0xa7,0xa4,0xa1,0xa2,0xb3,0xb0,0xb5,0xb6,0xbf,0xbc,0xb9,0xba,
0xfb,0xf8,0xfd,0xfe,0xf7,0xf4,0xf1,0xf2,0xe3,0xe0,0xe5,0xe6,0xef,0xec,0xe9,0xea,
0xcb,0xc8,0xcd,0xce,0xc7,0xc4,0xc1,0xc2,0xd3,0xd0,0xd5,0xd6,0xdf,0xdc,0xd9,0xda,
0x5b,0x58,0x5d,0x5e,0x57,0x54,0x51,0x52,0x43,0x40,0x45,0x46,0x4f,0x4c,0x49,0x4a,
0x6b,0x68,0x6d,0x6e,0x67,0x64,0x61,0x62,0x73,0x70,0x75,0x76,0x7f,0x7c,0x79,0x7a,
0x3b,0x38,0x3d,0x3e,0x37,0x34,0x31,0x32,0x23,0x20,0x25,0x26,0x2f,0x2c,0x29,0x2a,
0x0b,0x08,0x0d,0x0e,0x07,0x04,0x01,0x02,0x13,0x10,0x15,0x16,0x1f,0x1c,0x19,0x1a
};
static const u8 M09[256] = {
0x00,0x09,0x12,0x1b,0x24,0x2d,0x36,0x3f,0x48,0x41,0x5a,0x53,0x6c,0x65,0x7e,0x77,
0x90,0x99,0x82,0x8b,0xb4,0xbd,0xa6,0xaf,0xd8,0xd1,0xca,0xc3,0xfc,0xf5,0xee,0xe7,
0x3b,0x32,0x29,0x20,0x1f,0x16,0x0d,0x04,0x73,0x7a,0x61,0x68,0x57,0x5e,0x45,0x4c,
0xab,0xa2,0xb9,0xb0,0x8f,0x86,0x9d,0x94,0xe3,0xea,0xf1,0xf8,0xc7,0xce,0xd5,0xdc,
0x76,0x7f,0x64,0x6d,0x52,0x5b,0x40,0x49,0x3e,0x37,0x2c,0x25,0x1a,0x13,0x08,0x01,
0xe6,0xef,0xf4,0xfd,0xc2,0xcb,0xd0,0xd9,0xae,0xa7,0xbc,0xb5,0x8a,0x83,0x98,0x91,
0x4d,0x44,0x5f,0x56,0x69,0x60,0x7b,0x72,0x05,0x0c,0x17,0x1e,0x21,0x28,0x33,0x3a,
0xdd,0xd4,0xcf,0xc6,0xf9,0xf0,0xeb,0xe2,0x95,0x9c,0x87,0x8e,0xb1,0xb8,0xa3,0xaa,
0xec,0xe5,0xfe,0xf7,0xc8,0xc1,0xda,0xd3,0xa4,0xad,0xb6,0xbf,0x80,0x89,0x92,0x9b,
0x7c,0x75,0x6e,0x67,0x58,0x51,0x4a,0x43,0x34,0x3d,0x26,0x2f,0x10,0x19,0x02,0x0b,
0xd7,0xde,0xc5,0xcc,0xf3,0xfa,0xe1,0xe8,0x9f,0x96,0x8d,0x84,0xbb,0xb2,0xa9,0xa0,
0x47,0x4e,0x55,0x5c,0x63,0x6a,0x71,0x78,0x0f,0x06,0x1d,0x14,0x2b,0x22,0x39,0x30,
0x9a,0x93,0x88,0x81,0xbe,0xb7,0xac,0xa5,0xd2,0xdb,0xc0,0xc9,0xf6,0xff,0xe4,0xed,
0x0a,0x03,0x18,0x11,0x2e,0x27,0x3c,0x35,0x42,0x4b,0x50,0x59,0x66,0x6f,0x74,0x7d,
0xa1,0xa8,0xb3,0xba,0x85,0x8c,0x97,0x9e,0xe9,0xe0,0xfb,0xf2,0xcd,0xc4,0xdf,0xd6,
0x31,0x38,0x23,0x2a,0x15,0x1c,0x07,0x0e,0x79,0x70,0x6b,0x62,0x5d,0x54,0x4f,0x46
};
static const u8 M0B[256] = {
0x00,0x0b,0x16,0x1d,0x2c,0x27,0x3a,0x31,0x58,0x53,0x4e,0x45,0x74,0x7f,0x62,0x69,
0xb0,0xbb,0xa6,0xad,0x9c,0x97,0x8a,0x81,0xe8,0xe3,0xfe,0xf5,0xc4,0xcf,0xd2,0xd9,
0x7b,0x70,0x6d,0x66,0x57,0x5c,0x41,0x4a,0x23,0x28,0x35,0x3e,0x0f,0x04,0x19,0x12,
0xcb,0xc0,0xdd,0xd6,0xe7,0xec,0xf1,0xfa,0x93,0x98,0x85,0x8e,0xbf,0xb4,0xa9,0xa2,
0xf6,0xfd,0xe0,0xeb,0xda,0xd1,0xcc,0xc7,0xae,0xa5,0xb8,0xb3,0x82,0x89,0x94,0x9f,
0x46,0x4d,0x50,0x5b,0x6a,0x61,0x7c,0x77,0x1e,0x15,0x08,0x03,0x32,0x39,0x24,0x2f,
0x8d,0x86,0x9b,0x90,0xa1,0xaa,0xb7,0xbc,0xd5,0xde,0xc3,0xc8,0xf9,0xf2,0xef,0xe4,
0x3d,0x36,0x2b,0x20,0x11,0x1a,0x07,0x0c,0x65,0x6e,0x73,0x78,0x49,0x42,0x5f,0x54,
0xf7,0xfc,0xe1,0xea,0xdb,0xd0,0xcd,0xc6,0xaf,0xa4,0xb9,0xb2,0x83,0x88,0x95,0x9e,
0x47,0x4c,0x51,0x5a,0x6b,0x60,0x7d,0x76,0x1f,0x14,0x09,0x02,0x33,0x38,0x25,0x2e,
0x8c,0x87,0x9a,0x91,0xa0,0xab,0xb6,0xbd,0xd4,0xdf,0xc2,0xc9,0xf8,0xf3,0xee,0xe5,
0x3c,0x37,0x2a,0x21,0x10,0x1b,0x06,0x0d,0x64,0x6f,0x72,0x79,0x48,0x43,0x5e,0x55,
0x01,0x0a,0x17,0x1c,0x2d,0x26,0x3b,0x30,0x59,0x52,0x4f,0x44,0x75,0x7e,0x63,0x68,
0xb1,0xba,0xa7,0xac,0x9d,0x96,0x8b,0x80,0xe9,0xe2,0xff,0xf4,0xc5,0xce,0xd3,0xd8,
0x7a,0x71,0x6c,0x67,0x56,0x5d,0x40,0x4b,0x22,0x29,0x34,0x3f,0x0e,0x05,0x18,0x13,
0xca,0xc1,0xdc,0xd7,0xe6,0xed,0xf0,0xfb,0x92,0x99,0x84,0x8f,0xbe,0xb5,0xa8,0xa3
};
static const u8 M0D[256] = {
0x00,0x0d,0x1a,0x17,0x34,0x39,0x2e,0x23,0x68,0x65,0x72,0x7f,0x5c,0x51,0x46,0x4b,
0xd0,0xdd,0xca,0xc7,0xe4,0xe9,0xfe,0xf3,0xb8,0xb5,0xa2,0xaf,0x8c,0x81,0x96,0x9b,
0xbb,0xb6,0xa1,0xac,0x8f,0x82,0x95,0x98,0xd3,0xde,0xc9,0xc4,0xe7,0xea,0xfd,0xf0,
0x6b,0x66,0x71,0x7c,0x5f,0x52,0x45,0x48,0x03,0x0e,0x19,0x14,0x37,0x3a,0x2d,0x20,
0x6d,0x60,0x77,0x7a,0x59,0x54,0x43,0x4e,0x05,0x08,0x1f,0x12,0x31,0x3c,0x2b,0x26,
0xbd,0xb0,0xa7,0xaa,0x89,0x84,0x93,0x9e,0xd5,0xd8,0xcf,0xc2,0xe1,0xec,0xfb,0xf6,
0xd6,0xdb,0xcc,0xc1,0xe2,0xef,0xf8,0xf5,0xbe,0xb3,0xa4,0xa9,0x8a,0x87,0x90,0x9d,
0x06,0x0b,0x1c,0x11,0x32,0x3f,0x28,0x25,0x6e,0x63,0x74,0x79,0x5a,0x57,0x40,0x4d,
0xda,0xd7,0xc0,0xcd,0xee,0xe3,0xf4,0xf9,0xb2,0xbf,0xa8,0xa5,0x86,0x8b,0x9c,0x91,
0x0a,0x07,0x10,0x1d,0x3e,0x33,0x24,0x29,0x62,0x6f,0x78,0x75,0x56,0x5b,0x4c,0x41,
0x61,0x6c,0x7b,0x76,0x55,0x58,0x4f,0x42,0x09,0x04,0x13,0x1e,0x3d,0x30,0x27,0x2a,
0xb1,0xbc,0xab,0xa6,0x85,0x88,0x9f,0x92,0xd9,0xd4,0xc3,0xce,0xed,0xe0,0xf7,0xfa,
0xb7,0xba,0xad,0xa0,0x83,0x8e,0x99,0x94,0xdf,0xd2,0xc5,0xc8,0xeb,0xe6,0xf1,0xfc,
0x67,0x6a,0x7d,0x70,0x53,0x5e,0x49,0x44,0x0f,0x02,0x15,0x18,0x3b,0x36,0x21,0x2c,
0x0c,0x01,0x16,0x1b,0x38,0x35,0x22,0x2f,0x64,0x69,0x7e,0x73,0x50,0x5d,0x4a,0x47,
0xdc,0xd1,0xc6,0xcb,0xe8,0xe5,0xf2,0xff,0xb4,0xb9,0xae,0xa3,0x80,0x8d,0x9a,0x97
};
static const u8 M0E[256] = {
0x00,0x0e,0x1c,0x12,0x38,0x36,0x24,0x2a,0x70,0x7e,0x6c,0x62,0x48,0x46,0x54,0x5a,
0xe0,0xee,0xfc,0xf2,0xd8,0xd6,0xc4,0xca,0x90,0x9e,0x8c,0x82,0xa8,0xa6,0xb4,0xba,
0xdb,0xd5,0xc7,0xc9,0xe3,0xed,0xff,0xf1,0xab,0xa5,0xb7,0xb9,0x93,0x9d,0x8f,0x81,
0x3b,0x35,0x27,0x29,0x03,0x0d,0x1f,0x11,0x4b,0x45,0x57,0x59,0x73,0x7d,0x6f,0x61,
0xad,0xa3,0xb1,0xbf,0x95,0x9b,0x89,0x87,0xdd,0xd3,0xc1,0xcf,0xe5,0xeb,0xf9,0xf7,
0x4d,0x43,0x51,0x5f,0x75,0x7b,0x69,0x67,0x3d,0x33,0x21,0x2f,0x05,0x0b,0x19,0x17,
0x76,0x78,0x6a,0x64,0x4e,0x40,0x52,0x5c,0x06,0x08,0x1a,0x14,0x3e,0x30,0x22,0x2c,
0x96,0x98,0x8a,0x84,0xae,0xa0,0xb2,0xbc,0xe6,0xe8,0xfa,0xf4,0xde,0xd0,0xc2,0xcc,
0x41,0x4f,0x5d,0x53,0x79,0x77,0x65,0x6b,0x31,0x3f,0x2d,0x23,0x09,0x07,0x15,0x1b,
0xa1,0xaf,0xbd,0xb3,0x99,0x97,0x85,0x8b,0xd1,0xdf,0xcd,0xc3,0xe9,0xe7,0xf5,0xfb,
0x9a,0x94,0x86,0x88,0xa2,0xac,0xbe,0xb0,0xea,0xe4,0xf6,0xf8,0xd2,0xdc,0xce,0xc0,
0x7a,0x74,0x66,0x68,0x42,0x4c,0x5e,0x50,0x0a,0x04,0x16,0x18,0x32,0x3c,0x2e,0x20,
0xec,0xe2,0xf0,0xfe,0xd4,0xda,0xc8,0xc6,0x9c,0x92,0x80,0x8e,0xa4,0xaa,0xb8,0xb6,
0x0c,0x02,0x10,0x1e,0x34,0x3a,0x28,0x26,0x7c,0x72,0x60,0x6e,0x44,0x4a,0x58,0x56,
0x37,0x39,0x2b,0x25,0x0f,0x01,0x13,0x1d,0x47,0x49,0x5b,0x55,0x7f,0x71,0x63,0x6d,
0xd7,0xd9,0xcb,0xc5,0xef,0xe1,0xf3,0xfd,0xa7,0xa9,0xbb,0xb5,0x9f,0x91,0x83,0x8d
};
To use the tables, replace gf_mul calls in MixColumns/InvMixColumns with table lookups (e.g., M02[a0] ^ M03[a1] ...), which typically yields a sizeable speedup for software implementations.