AI辅助艺术创作：风格迁移与构图生成

风格迁移是一种将目标图像的风格（如梵高的笔触、莫奈的色彩）应用到另一张内容图像上的技术。《基于深度学习的图像风格迁移算法研究》探讨了卷积神经网络在风格迁移中的应用，重点分析了Gram矩阵和损失函数的设计，比较了Gatys等人提出的迭代优化与Johnson等人提出的前馈网络效率差异。内容损失函数计算生成图像与内容图像在特定层特征图的均方误差： $$ L_{content} = \frac{1}{2}

ZzzzYQL

352人浏览 · 2025-09-11 23:14:43

ZzzzYQL · 2025-09-11 23:14:43 发布

风格迁移与构图生成代码实现

风格迁移和构图生成是AI辅助艺术创作中的两个核心功能。以下代码基于Python和TensorFlow/Keras实现了一个简单的风格迁移与构图生成系统。

风格迁移实现

import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import vgg19
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# 预处理图像
def preprocess_image(image_path, target_size=(512, 512)):
    img = load_img(image_path, target_size=target_size)
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = vgg19.preprocess_input(img)
    return tf.convert_to_tensor(img)

# 反处理图像
def deprocess_image(x, target_size=(512, 512)):
    x = x.reshape(target_size[0], target_size[1], 3)
    x[:, :, 0] += 103.939
    x[:, :, 1] += 116.779
    x[:, :, 2] += 123.68
    x = x[:, :, ::-1]
    x = np.clip(x, 0, 255).astype('uint8')
    return x

# 计算内容损失
def content_loss(base, combination):
    return tf.reduce_sum(tf.square(combination - base))

# 计算风格损失
def style_loss(style, combination, img_nrows, img_ncols):
    S = gram_matrix(style)
    C = gram_matrix(combination)
    channels = 3
    size = img_nrows * img_ncols
    return tf.reduce_sum(tf.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2))

# 计算Gram矩阵
def gram_matrix(x):
    x = tf.transpose(x, (2, 0, 1))
    features = tf.reshape(x, (tf.shape(x)[0], -1))
    gram = tf.matmul(features, tf.transpose(features))
    return gram

# 风格迁移主函数
def style_transfer(content_path, style_path, iterations=1000, content_weight=1e4, style_weight=1e-2):
    # 加载模型
    model = vgg19.VGG19(weights='imagenet', include_top=False)
    outputs_dict = dict([(layer.name, layer.output) for layer in model.layers])
    
    # 预处理图像
    content_image = preprocess_image(content_path)
    style_image = preprocess_image(style_path)
    combination_image = tf.Variable(preprocess_image(content_path))
    
    opt = tf.optimizers.Adam(learning_rate=5.0, beta_1=0.99, epsilon=1e-1)
    
    for i in range(1, iterations + 1):
        with tf.GradientTape() as tape:
            # 计算损失
            input_tensor = tf.concat([content_image, style_image, combination_image], axis=0)
            features = model(input_tensor)
            
            # 内容损失
            content_features = features['block5_conv2']
            content_loss_value = content_weight * content_loss(
                content_features[0, :, :, :], content_features[2, :, :, :])
            
            # 风格损失
            style_loss_value = 0
            style_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
            for layer_name in style_layers:
                style_features = features[layer_name]
                style_loss_value += style_weight * style_loss(
                    style_features[1, :, :, :], style_features[2, :, :, :], 512, 512) / len(style_layers)
            
            # 总损失
            total_loss = content_loss_value + style_loss_value
        
        # 计算梯度并更新
        grads = tape.gradient(total_loss, combination_image)
        opt.apply_gradients([(grads, combination_image)])
        combination_image.assign(tf.clip_by_value(combination_image.numpy(), 0.0, 255.0))
    
    # 返回结果图像
    return deprocess_image(combination_image.numpy())

构图生成实现

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Input, Dense, Reshape, Flatten, Dropout
from tensorflow.keras.layers import BatchNormalization, Activation, ZeroPadding2D
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import UpSampling2D, Conv2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam

# 构建生成器
def build_generator(latent_dim=100, img_shape=(256, 256, 3)):
    model = Sequential()
    
    model.add(Dense(128 * 32 * 32, activation="relu", input_dim=latent_dim))
    model.add(Reshape((32, 32, 128)))
    model.add(UpSampling2D())
    model.add(Conv2D(128, kernel_size=3, padding="same"))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Activation("relu"))
    model.add(UpSampling2D())
    model.add(Conv2D(64, kernel_size=3, padding="same"))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Activation("relu"))
    model.add(Conv2D(3, kernel_size=3, padding="same"))
    model.add(Activation("tanh"))
    
    noise = Input(shape=(latent_dim,))
    img = model(noise)
    
    return Model(noise, img)

# 构建判别器
def build_discriminator(img_shape=(256, 256, 3)):
    model = Sequential()
    
    model.add(Conv2D(32, kernel_size=3, strides=2, padding="same", input_shape=img_shape))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(Conv2D(64, kernel_size=3, strides=2, padding="same"))
    model.add(ZeroPadding2D(padding=((0,1),(0,1))))
    model.add(BatchNormalization(momentum=0.8))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(Conv2D(128, kernel_size=3, strides=2, padding="same"))
    model.add(BatchNormalization(momentum=0.8))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(Conv2D(256, kernel_size=3, strides=1, padding="same"))
    model.add(BatchNormalization(momentum=0.8))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))
    
    img = Input(shape=img_shape)
    validity = model(img)
    
    return Model(img, validity)

# 训练GAN模型
def train_gan(generator, discriminator, combined, epochs=10000, batch_size=32, latent_dim=100, img_shape=(256, 256, 3)):
    # 加载数据集
    # 这里需要替换为你的实际数据集加载代码
    X_train = np.random.normal(0, 1, (1000, *img_shape))
    
    # 定义损失函数
    valid = np.ones((batch_size, 1))
    fake = np.zeros((batch_size, 1))
    
    for epoch in range(epochs):
        # 训练判别器
        idx = np.random.randint(0, X_train.shape[0], batch_size)
        imgs = X_train[idx]
        
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        gen_imgs = generator.predict(noise)
        
        d_loss_real = discriminator.train_on_batch(imgs, valid)
        d_loss_fake = discriminator.train_on_batch(gen_imgs, fake)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
        
        # 训练生成器
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        g_loss = combined.train_on_batch(noise, valid)
        
        if epoch % 100 == 0:
            print(f"{epoch} [D loss: {d_loss[0]} | D accuracy: {100*d_loss[1]}] [G loss: {g_loss}]")
    
    return generator

# 生成新构图
def generate_composition(generator, latent_dim=100, n_samples=1):
    noise = np.random.normal(0, 1, (n_samples, latent_dim))
    gen_imgs = generator.predict(noise)
    gen_imgs = 0.5 * gen_imgs + 0.5  # 反归一化
    return gen_imgs

使用方法

风格迁移使用示例：

content_path = "content.jpg"
style_path = "style.jpg"
result = style_transfer(content_path, style_path)
plt.imshow(result)
plt.axis('off')
plt.show()

构图生成使用示例：

# 构建并训练模型
generator = build_generator()
discriminator = build_discriminator()
combined = tf.keras.models.Sequential([generator, discriminator])

# 训练模型
train_gan(generator, discriminator, combined)

# 生成新构图
new_composition = generate_composition(generator)
plt.imshow(new_composition[0])
plt.axis('off')
plt.show()

注意：以上代码需要安装TensorFlow和Keras库，并且在实际应用中可能需要根据具体需求进行调整。对于构图生成部分，建议使用预训练模型或更高级的架构如StyleGAN以获得更好的效果。

风格迁移技术

风格迁移是一种将目标图像的风格（如梵高的笔触、莫奈的色彩）应用到另一张内容图像上的技术。基于深度学习的风格迁移通常使用预训练的卷积神经网络（如VGG-19），通过优化内容损失和风格损失函数实现。

内容损失函数计算生成图像与内容图像在特定层特征图的均方误差： $$ L_{content} = \frac{1}{2}\sum_{i,j}(F_{ij}^l - P_{ij}^l)^2 $$

风格损失使用Gram矩阵捕捉纹理特征： $$ G_{ij}^l = \sum_k F_{ik}^l F_{jk}^l $$

构图生成方法

基于GAN的生成
使用生成对抗网络（如StyleGAN）可自动生成新颖构图。训练时需准备特定风格的数据集，通过生成器和判别器的对抗学习产生高质量图像。常见改进包括：

添加注意力机制增强局部细节
使用条件GAN控制生成类别
引入CLIP模型实现文本引导生成

扩散模型应用
Stable Diffusion等扩散模型通过逐步去噪过程生成图像，支持文本提示控制构图。关键参数包括：

采样步数（通常20-50步）
提示词权重调节
负面提示词排除干扰元素

实际应用技巧

风格迁移优化

调整风格权重比例（通常0.1-1.0）
对特定层特征进行选择性迁移
使用多尺度迁移增强细节

构图控制策略

草图引导：输入线稿控制基本布局
色彩约束：限定调色板范围
语义分割：对不同区域应用不同风格

工具与框架推荐

开源库：PyTorch的AdaIN实现、TensorFlow的Magenta
在线平台：DeepDreamGenerator、Artbreeder
商业软件：Adobe Firefly、Runway ML

典型代码片段（PyTorch风格迁移）：

content_loss = F.mse_loss(gen_features, content_features)
style_loss = 0
for gen_gram, style_gram in zip(gen_grams, style_grams):
    style_loss += F.mse_loss(gen_gram, style_gram)
total_loss = content_weight * content_loss + style_weight * style_loss

AI辅助艺术创作：风格迁移文献

《基于深度学习的图像风格迁移算法研究》探讨了卷积神经网络在风格迁移中的应用，重点分析了Gram矩阵和损失函数的设计，比较了Gatys等人提出的迭代优化与Johnson等人提出的前馈网络效率差异。

《多模态艺术风格迁移中的内容-风格解耦》提出通过对抗生成网络分离内容与风格特征，实现油画、水墨等多风格转换，实验表明该方法在视觉连贯性上优于传统神经风格迁移（NST）。

AI辅助构图生成文献

《生成对抗网络在艺术构图设计中的应用》研究了DCGAN和CycleGAN在生成均衡构图时的表现，指出空间注意力机制能有效提升画面元素的逻辑分布合理性。

《基于Transformer的自动绘画构图系统》利用ViT模型分析经典画作的空间分割比例，生成符合黄金分割、三分法等美学规则的构图草图，用户测试显示该系统可降低创作门槛。

综合应用与工具

《AI绘画辅助平台的技术架构与实践》介绍了集成风格迁移与构图生成的端到端系统，支持用户通过草图线稿实时渲染不同风格作品，涉及语义分割与风格插值的联合训练方法。

《中国水墨画风格迁移的局部笔触优化》提出针对毛笔笔触特性的损失函数改进方案，解决了传统方法在宣纸纹理迁移中的边缘模糊问题，相关代码已开源。

注：以上文献可通过知网、万方等平台搜索标题获取全文，部分研究发表于《计算机辅助设计与图形学学报》《中国图象图形学报》等期刊。

2048 AI社区

有“AI”的1024 = 2048，欢迎大家加入2048 AI社区

更多推荐

A. Mainak and Array

解题说明：此题采用贪心算法，首先找出数列中最大值和最小值，然后分别考虑下面三种情况，把最小的放第一位、把最大的放最后以及找出a[ i ]- a[ i + 1 ] 的最大值，在这三种情况下求出最大值。

2048 AI社区

论文阅读 2025-9-13 论文阅读随心记

大模型自我反思被定义为两阶段的问同一个问题，但是思考多次。

2048 AI社区

普通人如何训练自己的AI“管家”？

AI管家训练指南：5步打造专属智能助手随着AI技术进步，普通人也能训练个性化AI管家。具体步骤包括：1. 明确目标，确定管家功能（如健康管理或工作辅助）；2. 准备数据，收集个人健康、日程等数据并清洗整理；3. 选择模型，根据需求挑选预训练模型（如Qwen2.5-VL）；4. 微调训练，使用工具（如LLaMA-Factory）适配模型；5. 评估优化，持续提升准确性及个性化。AI管家可智能管理生