Creating Artistic Images with Neural Networks: Style Transfer and GAN Implementation
Creating Artistic Images with Neural Networks: Style Transfer and GAN Implementation
Introduction
Neural networks, particularly Convolutional Neural Networks (CNNs) and Generative Adversarial Networks (GANs), have demonstrated remarkable capabilities in generating and transforming visual art. This guide provides practical implementations for creating artistic images using these two fundamental approaches.
Core Concepts
Convolutional Neural Networks for Style Transfer
CNNs excel at extracting hierarchical features from images. Style transfer leverages these learned representations to separate and recombine the content of one image with the stylistic elements of another.
Generative Adversarial Networks for Image Synthesis
GANs consist of two competing networks: a Generator that creates new images from random noise, and a Discriminator that evaluates their authenticity. Through adversarial training, the Generator learns to produce increasing realistic outputs.
Environment Setup
Install the required Python libraries:
pip install numpy matplotlib tensorflow opencv-python
Implementing Neural Style Transfer
Import Dependencies
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image as keras_image
from tensorflow.keras.applications import vgg19
import matplotlib.pyplot as plt
import cv2
Image Preprocessing Functions
def preprocess_image(image_path, target_size=(224, 224)):
img = keras_image.load_img(image_path, target_size=target_size)
img_array = keras_image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
return vgg19.preprocess_input(img_array)
def convert_to_display(image_tensor):
img_copy = image_tensor.copy()
if len(img_copy.shape) == 4:
img_copy = np.squeeze(img_copy, 0)
img_copy[:, :, 0] += 103.939
img_copy[:, :, 1] += 116.779
img_copy[:, :, 2] += 123.68
img_copy = img_copy[:, :, ::-1]
return np.clip(img_copy, 0, 255).astype('uint8')
Loss Functon Definitions
def compute_content_loss(original_features, generated_features):
return tf.reduce_mean(tf.square(original_features - generated_features))
def compute_gram_matrix(feature_map):
channels = int(feature_map.shape[-1])
flattened = tf.reshape(feature_map, [-1, channels])
n_elements = tf.shape(flattened)[0]
gram = tf.matmul(flattened, flattened, transpose_a=True)
return gram / tf.cast(n_elements, tf.float32)
def compute_style_loss(generated_features, target_gram):
gram_generated = compute_gram_matrix(generated_features)
return tf.reduce_mean(tf.square(gram_generated - target_gram))
Feature Extraction Model
def create_feature_extractor():
base_model = vgg19.VGG19(include_top=False, weights='imagenet')
base_model.trainable = False
style_outputs = ['block1_conv1', 'block2_conv1',
'block3_conv1', 'block4_conv1', 'block5_conv1']
content_output = ['block5_conv2']
model_outputs = [base_model.get_layer(name).output
for name in style_outputs + content_output]
return tf.keras.Model(inputs=base_model.input, outputs=model_outputs)
Optimization Proecdure
def calculate_total_loss(model, weights, current_image,
target_style_grams, target_content_features):
model_outputs = model(current_image)
style_features = model_outputs[:len(target_style_grams)]
content_features = model_outputs[len(target_style_grams):]
style_loss = 0
for gen_feat, target_gram in zip(style_features, target_style_grams):
style_loss += compute_style_loss(gen_feat[0], target_gram)
style_loss /= len(target_style_grams)
content_loss = 0
for gen_cont, target_cont in zip(content_features, target_content_features):
content_loss += compute_content_loss(gen_cont[0], target_cont)
content_loss /= len(target_content_features)
total_loss = weights[0] * style_loss + weights[1] * content_loss
return total_loss, style_loss, content_loss
@tf.function
def compute_gradients(config):
with tf.GradientTape() as tape:
losses = calculate_total_loss(**config)
total_loss = losses[0]
return tape.gradient(total_loss, config['current_image']), losses
Style Transfer Execution
def perform_style_transfer(content_img_path, style_img_path,
iterations=1000, style_strength=1e-2, content_strength=1e3):
feature_model = create_feature_extractor()
content_img = preprocess_image(content_img_path)
style_img = preprocess_image(style_img_path)
generated_img = tf.Variable(content_img, dtype=tf.float32)
optimizer = tf.optimizers.Adam(learning_rate=5, beta_1=0.99, epsilon=1e-1)
style_outputs = feature_model(style_img)[:5]
content_output = feature_model(content_img)[5:]
target_grams = [compute_gram_matrix(feat) for feat in style_outputs]
config = {
'model': feature_model,
'weights': (style_strength, content_strength),
'current_image': generated_img,
'target_style_grams': target_grams,
'target_content_features': content_output
}
mean_values = np.array([103.939, 116.779, 123.68])
min_val = -mean_values
max_val = 255 - mean_values
optimal_loss, optimal_image = float('inf'), None
for step in range(iterations):
gradients, loss_values = compute_gradients(config)
total_loss, style_val, content_val = loss_values
optimizer.apply_gradients([(gradients, generated_img)])
clipped = tf.clip_by_value(generated_img, min_val, max_val)
generated_img.assign(clipped)
if total_loss < optimal_loss:
optimal_loss = total_loss
optimal_image = convert_to_display(generated_img.numpy())
return optimal_image, optimal_loss
result_image, final_loss = perform_style_transfer('content.jpg', 'style.jpg')
plt.imshow(result_image)
plt.title(f"Final Loss: {final_loss:.2f}")
plt.axis('off')
plt.show()
Building a Generative Adversarial Network
Import Required Modules
import tensorflow as tf
from tensorflow.keras.layers import (Dense, Reshape, Flatten,
Conv2D, Conv2DTranspose,
LeakyReLU, Dropout, BatchNormalization)
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
import numpy as np
Network Architecture
def create_generator(latent_dim=100):
model = tf.keras.Sequential([
Dense(7*7*256, use_bias=False, input_dim=latent_dim),
BatchNormalization(),
LeakyReLU(alpha=0.2),
Reshape((7, 7, 256)),
Conv2DTranspose(128, (5, 5), strides=(1, 1),
padding='same', use_bias=False),
BatchNormalization(),
LeakyReLU(alpha=0.2),
Conv2DTranspose(64, (5, 5), strides=(2, 2),
padding='same', use_bias=False),
BatchNormalization(),
LeakyReLU(alpha=0.2),
Conv2DTranspose(1, (5, 5), strides=(2, 2),
padding='same', use_bias=False,
activation='tanh')
])
return model
def create_discriminator():
model = tf.keras.Sequential([
Conv2D(64, (5, 5), strides=(2, 2), padding='same',
input_shape=[28, 28, 1]),
LeakyReLU(alpha=0.2),
Dropout(0.3),
Conv2D(128, (5, 5), strides=(2, 2), padding='same'),
LeakyReLU(alpha=0.2),
Dropout(0.3),
Flatten(),
Dense(1)
])
return model
Training Loop Implementation
def train_gan(generator, discriminator, dataset,
epochs=100, batch_size=256, latent_dim=100):
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
gen_optimizer = tf.keras.optimizers.Adam(learning_rate=2e-4)
disc_optimizer = tf.keras.optimizers.Adam(learning_rate=2e-4)
@tf.function
def training_step(real_batch):
noise = tf.random.normal([batch_size, latent_dim])
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
synthetic_images = generator(noise, training=True)
real_predictions = discriminator(real_batch, training=True)
fake_predictions = discriminator(synthetic_images, training=True)
generator_loss = loss_fn(tf.ones_like(fake_predictions),
fake_predictions)
real_loss = loss_fn(tf.ones_like(real_predictions), real_predictions)
fake_loss = loss_fn(tf.zeros_like(fake_predictions), fake_predictions)
discriminator_loss = real_loss + fake_loss
gen_gradients = gen_tape.gradient(generator_loss,
generator.trainable_variables)
disc_gradients = disc_tape.gradient(discriminator_loss,
discriminator.trainable_variables)
gen_optimizer.apply_gradients(zip(gen_gradients,
generator.trainable_variables))
disc_optimizer.apply_gradients(zip(disc_gradients,
discriminator.trainable_variables))
for epoch in range(epochs):
for batch in dataset:
training_step(batch)
if epoch % 10 == 0:
print(f"Epoch {epoch} completed")
visualize_generated_images(generator, epoch, latent_dim)
# Prepare MNIST dataset
(train_data, _), (_, _) = mnist.load_data()
train_data = train_data.reshape(-1, 28, 28, 1).astype('float32')
train_data = (train_data - 127.5) / 127.5
training_dataset = tf.data.Dataset.from_tensor_slices(train_data)\
.shuffle(60000).batch(256)
generator_net = create_generator()
discriminator_net = create_discriminator()
train_gan(generator_net, discriminator_net, training_dataset)
Image Generation and Visualization
def visualize_generated_images(model, epoch, latent_dim, samples=16):
random_noise = tf.random.normal([samples, latent_dim])
generated = model(random_noise, training=False)
plt.figure(figsize=(4, 4))
for i in range(samples):
plt.subplot(4, 4, i + 1)
plt.imshow(generated[i, :, :, 0] * 127.5 + 127.5, cmap='gray')
plt.axis('off')
plt.tight_layout()
plt.savefig(f'generated_epoch_{epoch:04d}.png')
plt.show()