Fading Coder

One Final Commit for the Last Sprint

Home > Tech > Content

Creating Artistic Images with Neural Networks: Style Transfer and GAN Implementation

Tech 3

Creating Artistic Images with Neural Networks: Style Transfer and GAN Implementation

Introduction

Neural networks, particularly Convolutional Neural Networks (CNNs) and Generative Adversarial Networks (GANs), have demonstrated remarkable capabilities in generating and transforming visual art. This guide provides practical implementations for creating artistic images using these two fundamental approaches.

Core Concepts

Convolutional Neural Networks for Style Transfer

CNNs excel at extracting hierarchical features from images. Style transfer leverages these learned representations to separate and recombine the content of one image with the stylistic elements of another.

Generative Adversarial Networks for Image Synthesis

GANs consist of two competing networks: a Generator that creates new images from random noise, and a Discriminator that evaluates their authenticity. Through adversarial training, the Generator learns to produce increasing realistic outputs.

Environment Setup

Install the required Python libraries:

pip install numpy matplotlib tensorflow opencv-python

Implementing Neural Style Transfer

Import Dependencies

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image as keras_image
from tensorflow.keras.applications import vgg19
import matplotlib.pyplot as plt
import cv2

Image Preprocessing Functions

def preprocess_image(image_path, target_size=(224, 224)):
    img = keras_image.load_img(image_path, target_size=target_size)
    img_array = keras_image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    return vgg19.preprocess_input(img_array)

def convert_to_display(image_tensor):
    img_copy = image_tensor.copy()
    if len(img_copy.shape) == 4:
        img_copy = np.squeeze(img_copy, 0)
    img_copy[:, :, 0] += 103.939
    img_copy[:, :, 1] += 116.779
    img_copy[:, :, 2] += 123.68
    img_copy = img_copy[:, :, ::-1]
    return np.clip(img_copy, 0, 255).astype('uint8')

Loss Functon Definitions

def compute_content_loss(original_features, generated_features):
    return tf.reduce_mean(tf.square(original_features - generated_features))

def compute_gram_matrix(feature_map):
    channels = int(feature_map.shape[-1])
    flattened = tf.reshape(feature_map, [-1, channels])
    n_elements = tf.shape(flattened)[0]
    gram = tf.matmul(flattened, flattened, transpose_a=True)
    return gram / tf.cast(n_elements, tf.float32)

def compute_style_loss(generated_features, target_gram):
    gram_generated = compute_gram_matrix(generated_features)
    return tf.reduce_mean(tf.square(gram_generated - target_gram))

Feature Extraction Model

def create_feature_extractor():
    base_model = vgg19.VGG19(include_top=False, weights='imagenet')
    base_model.trainable = False
    
    style_outputs = ['block1_conv1', 'block2_conv1', 
                     'block3_conv1', 'block4_conv1', 'block5_conv1']
    content_output = ['block5_conv2']
    
    model_outputs = [base_model.get_layer(name).output 
                     for name in style_outputs + content_output]
    return tf.keras.Model(inputs=base_model.input, outputs=model_outputs)

Optimization Proecdure

def calculate_total_loss(model, weights, current_image, 
                        target_style_grams, target_content_features):
    model_outputs = model(current_image)
    
    style_features = model_outputs[:len(target_style_grams)]
    content_features = model_outputs[len(target_style_grams):]
    
    style_loss = 0
    for gen_feat, target_gram in zip(style_features, target_style_grams):
        style_loss += compute_style_loss(gen_feat[0], target_gram)
    style_loss /= len(target_style_grams)
    
    content_loss = 0
    for gen_cont, target_cont in zip(content_features, target_content_features):
        content_loss += compute_content_loss(gen_cont[0], target_cont)
    content_loss /= len(target_content_features)
    
    total_loss = weights[0] * style_loss + weights[1] * content_loss
    return total_loss, style_loss, content_loss

@tf.function
def compute_gradients(config):
    with tf.GradientTape() as tape:
        losses = calculate_total_loss(**config)
    total_loss = losses[0]
    return tape.gradient(total_loss, config['current_image']), losses

Style Transfer Execution

def perform_style_transfer(content_img_path, style_img_path, 
                          iterations=1000, style_strength=1e-2, content_strength=1e3):
    feature_model = create_feature_extractor()
    
    content_img = preprocess_image(content_img_path)
    style_img = preprocess_image(style_img_path)
    generated_img = tf.Variable(content_img, dtype=tf.float32)
    
    optimizer = tf.optimizers.Adam(learning_rate=5, beta_1=0.99, epsilon=1e-1)
    
    style_outputs = feature_model(style_img)[:5]
    content_output = feature_model(content_img)[5:]
    
    target_grams = [compute_gram_matrix(feat) for feat in style_outputs]
    
    config = {
        'model': feature_model,
        'weights': (style_strength, content_strength),
        'current_image': generated_img,
        'target_style_grams': target_grams,
        'target_content_features': content_output
    }
    
    mean_values = np.array([103.939, 116.779, 123.68])
    min_val = -mean_values
    max_val = 255 - mean_values
    
    optimal_loss, optimal_image = float('inf'), None
    
    for step in range(iterations):
        gradients, loss_values = compute_gradients(config)
        total_loss, style_val, content_val = loss_values
        
        optimizer.apply_gradients([(gradients, generated_img)])
        clipped = tf.clip_by_value(generated_img, min_val, max_val)
        generated_img.assign(clipped)
        
        if total_loss < optimal_loss:
            optimal_loss = total_loss
            optimal_image = convert_to_display(generated_img.numpy())
    
    return optimal_image, optimal_loss

result_image, final_loss = perform_style_transfer('content.jpg', 'style.jpg')
plt.imshow(result_image)
plt.title(f"Final Loss: {final_loss:.2f}")
plt.axis('off')
plt.show()

Building a Generative Adversarial Network

Import Required Modules

import tensorflow as tf
from tensorflow.keras.layers import (Dense, Reshape, Flatten, 
                                     Conv2D, Conv2DTranspose, 
                                     LeakyReLU, Dropout, BatchNormalization)
from tensorflow.keras.datasets import mnist
import matplotlib.pyplot as plt
import numpy as np

Network Architecture

def create_generator(latent_dim=100):
    model = tf.keras.Sequential([
        Dense(7*7*256, use_bias=False, input_dim=latent_dim),
        BatchNormalization(),
        LeakyReLU(alpha=0.2),
        Reshape((7, 7, 256)),
        Conv2DTranspose(128, (5, 5), strides=(1, 1), 
                        padding='same', use_bias=False),
        BatchNormalization(),
        LeakyReLU(alpha=0.2),
        Conv2DTranspose(64, (5, 5), strides=(2, 2), 
                        padding='same', use_bias=False),
        BatchNormalization(),
        LeakyReLU(alpha=0.2),
        Conv2DTranspose(1, (5, 5), strides=(2, 2), 
                        padding='same', use_bias=False, 
                        activation='tanh')
    ])
    return model

def create_discriminator():
    model = tf.keras.Sequential([
        Conv2D(64, (5, 5), strides=(2, 2), padding='same', 
               input_shape=[28, 28, 1]),
        LeakyReLU(alpha=0.2),
        Dropout(0.3),
        Conv2D(128, (5, 5), strides=(2, 2), padding='same'),
        LeakyReLU(alpha=0.2),
        Dropout(0.3),
        Flatten(),
        Dense(1)
    ])
    return model

Training Loop Implementation

def train_gan(generator, discriminator, dataset, 
             epochs=100, batch_size=256, latent_dim=100):
    
    loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    gen_optimizer = tf.keras.optimizers.Adam(learning_rate=2e-4)
    disc_optimizer = tf.keras.optimizers.Adam(learning_rate=2e-4)
    
    @tf.function
    def training_step(real_batch):
        noise = tf.random.normal([batch_size, latent_dim])
        
        with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
            synthetic_images = generator(noise, training=True)
            
            real_predictions = discriminator(real_batch, training=True)
            fake_predictions = discriminator(synthetic_images, training=True)
            
            generator_loss = loss_fn(tf.ones_like(fake_predictions), 
                                     fake_predictions)
            
            real_loss = loss_fn(tf.ones_like(real_predictions), real_predictions)
            fake_loss = loss_fn(tf.zeros_like(fake_predictions), fake_predictions)
            discriminator_loss = real_loss + fake_loss
        
        gen_gradients = gen_tape.gradient(generator_loss, 
                                          generator.trainable_variables)
        disc_gradients = disc_tape.gradient(discriminator_loss, 
                                            discriminator.trainable_variables)
        
        gen_optimizer.apply_gradients(zip(gen_gradients, 
                                          generator.trainable_variables))
        disc_optimizer.apply_gradients(zip(disc_gradients, 
                                           discriminator.trainable_variables))
    
    for epoch in range(epochs):
        for batch in dataset:
            training_step(batch)
        
        if epoch % 10 == 0:
            print(f"Epoch {epoch} completed")
            visualize_generated_images(generator, epoch, latent_dim)

# Prepare MNIST dataset
(train_data, _), (_, _) = mnist.load_data()
train_data = train_data.reshape(-1, 28, 28, 1).astype('float32')
train_data = (train_data - 127.5) / 127.5

training_dataset = tf.data.Dataset.from_tensor_slices(train_data)\
    .shuffle(60000).batch(256)

generator_net = create_generator()
discriminator_net = create_discriminator()
train_gan(generator_net, discriminator_net, training_dataset)

Image Generation and Visualization

def visualize_generated_images(model, epoch, latent_dim, samples=16):
    random_noise = tf.random.normal([samples, latent_dim])
    generated = model(random_noise, training=False)
    
    plt.figure(figsize=(4, 4))
    for i in range(samples):
        plt.subplot(4, 4, i + 1)
        plt.imshow(generated[i, :, :, 0] * 127.5 + 127.5, cmap='gray')
        plt.axis('off')
    plt.tight_layout()
    plt.savefig(f'generated_epoch_{epoch:04d}.png')
    plt.show()

Related Articles

Understanding Strong and Weak References in Java

Strong References Strong reference are the most prevalent type of object referencing in Java. When an object has a strong reference pointing to it, the garbage collector will not reclaim its memory. F...

Comprehensive Guide to SSTI Explained with Payload Bypass Techniques

Introduction Server-Side Template Injection (SSTI) is a vulnerability in web applications where user input is improper handled within the template engine and executed on the server. This exploit can r...

Implement Image Upload Functionality for Django Integrated TinyMCE Editor

Django’s Admin panel is highly user-friendly, and pairing it with TinyMCE, an effective rich text editor, simplifies content management significantly. Combining the two is particular useful for bloggi...

Leave a Comment

Anonymous

◎Feel free to join the discussion and share your thoughts.