基于奇异值分解（SVD）的图像处理大作业（变脸效果）

原创已于 2025-01-03 19:28:16 修改 · 1k 阅读

12 ·

CC 4.0 BY-SA版权

文章标签：

#图像处理 #人工智能

于 2025-01-03 18:46:29 首次发布

一、环境搭建

opencv-python==4.8.0.74
numpy == 1.24.3
dlib == 19.24.6
python==3.8.0

二、SVD 原理

奇异值分解（SVD）是线性代数中一种重要的矩阵分解方法。对于一个矩阵 ${A_{m\times n}}$ ,它可以分解为 $A=U\sum V^{T}$ 。其中，U是一个mxm正交矩阵，其列向量称为左奇异向量,其列向量称为左奇异向量； $\sum$ 是一个mxm的对角矩阵，对角线上的元素 $\sigma _{i}$ 是奇异值且满足 $\sigma _{1}\geqslant \sigma _{2}\geqslant\sigma _{3}\geqslant ...\geqslant\sigma _{n} \geqslant0$ 。V是一个nxn的正交矩阵，其列向量称为右奇异向量。

在本次作业中,在 Python 中，使用opencv - python库（cv2模块）来读取图片。由于 SVD 通常对灰度图像进行操作，所以需要将彩色图像转换为灰度图像。但是在本次作业中不将图片转换为灰色图像。

""" 
       @File     : svd分解.py
       @IDE      : PyCharm
       @Author   : xiaoshu
       @Date     : 2024/12/31 12:15   
"""

import cv2
import numpy as np

img = cv2.imread('1.jpg') # 读取一张图像，格式是numpy
img = cv2.resize(img,(300,400)) # 由于我的图片太大，在次修改了图片的尺寸大小
cv2.imshow('img',img) # 显示原图像
# 这里是判断如果按下键盘空格就销毁所有的图片窗口
if cv2.waitKey(0) == ord(' '):
    cv2.destroyAllWindows()

r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]

Ub, sb, Vb = np.linalg.svd(b, full_matrices=False)
Sb = np.diag(sb)
Ug, sg, Vg = np.linalg.svd(g, full_matrices=False)
Sg = np.diag(sg)
Ur, sr, Vr = np.linalg.svd(r, full_matrices=False)
Sr = np.diag(sr)
print("矩阵U：{}".format(Ur))
print("矩阵S：{}".format(Sr))
print("矩阵V：{}".format(Vr))

三、图像压缩

图像压缩的基本原理是通过保留部分较大的奇异值，舍弃较小的奇异值来实现。设原始矩阵 $A=U\sum V^{T}$ ，当保留k个奇异值时，压缩后的矩阵 $A^{_{k}}U\sum^{_{k}}V_{k}^{T}$ 其中 $U_{k}$ 是U的前k列, $\sum k$ 是 $\sum$ 的kxk左上角子矩阵， $V_{k}$ 是V的k前列。

""" 
       @File     : 压缩重构.py
       @IDE      : PyCharm
       @Author   : xiaoshu
       @Date     : 2024/12/31 13:35   
"""
import numpy as np
import matplotlib.pyplot as plt
import cv2

def zip_img(img, rate=0.1):
    zip_image = np.zeros(img.shape) # 创建一个与原始图像形状相同的零矩阵，储存压缩后的图片数据
    for channel in range(3):
        U,s,V = np.linalg.svd(img[:,:,channel]) # 对每个通道进行SVD分解
        # 计算要保留的奇异值数量
        s_i = 0 # 需要保留的奇异值索引
        temp = 0 # 奇异值个数
        while (temp / np.sum(s)) < rate:
            temp += s[s_i]
            s_i += 1
        # 构建压缩的图像
        s_Mat = np.zeros((s_i,s_i))
        for i in range(s_i):
            s_Mat[i,i] = s[i]
        zip_image[:,:,channel] = U[:,0:s_i].dot(s_Mat).dot(V[0:s_i,:])

    for i in range(3):  # 对三个通道的矩阵数值进行归一化处理
        MAX = np.max(zip_image[:, :, i])
        MIN = np.min(zip_image[:, :, i])
        zip_image[:, :, i] = (zip_image[:, :, i] - MIN) / (MAX - MIN)
    zip_image = np.round(zip_image * 255).astype("uint8")
    return zip_image

if __name__ == '__main__':
    img_file = '1.jpg'
    plt.figure(figsize=(6,6))
    plt.rcParams['font.sans-serif'] = 'SimHei'  # 消除中文乱码
    img = cv2.imread(img_file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    f1 = plt.subplot(1,4,1)
    f1.imshow(img)
    f1.set_title("原始图片")
    rate = [0.3, 0.5, 0.7]
    for i in range(3):
        zip_image = zip_img(img, rate[i])
        f = plt.subplot(1,4,i + 2)
        f.set_title("保留率：{}%".format(rate[i]*100))
        f.imshow(zip_image)
    plt.suptitle('不同保留率效果', fontsize=17, y=0.05)  # y偏移距离
    plt.show()

效果：

四、变脸

采用的人脸融合方法主要依托于 dlib 库进行人脸检测与关键点提取，并结合 OpenCV 库进行后续的图像变换、掩模处理以及颜色校正等操作，最终实现人脸的融合呈现。整个流程涵盖人脸关键点定位、基于普氏分析的仿射变换、人脸掩模生成、颜色校正以及图像融合等关键步骤。

太懒了，直接上代码吧。

"""
       @File     : 变脸效果.py
       @IDE      : PyCharm
       @Author   : xiaoshu
       @Date     : 2024/12/31 13:48
"""

import numpy as np
import dlib
import cv2

# 颜色校正时高斯模糊核大小与两眼间距比例关系的系数，可根据实际效果调整该值
colour_correct_blur_frac = 0.6

# 预测器和模型的路径，可根据实际情况调整路径
predictor_path = r'./shape_predictor_68_face_landmarks.dat'
face_rec_model_path = r'./dlib_face_recognition_resnet_model_v1.dat'

# 声明一个人脸关键点预测器 predictor
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(predictor_path)

# 定义人脸各部位关键点的范围
left_eye_points = list(range(42, 48))
right_eye_points = list(range(36, 42))
left_brow_points = list(range(22, 27))
right_brow_points = list(range(17, 22))
nose_points = list(range(27, 35))
mouth_points = list(range(48, 61))

# 汇总确定人脸的关键点列表，用于后续处理人脸关键区域
overlay_points = [
    left_eye_points + right_eye_points +
    left_brow_points + right_brow_points +
    nose_points + mouth_points,
]

# 特征数
feather_amount = 11


# 获取脸部关键点
def get_face_landmark(image):
    """
    :param image: 输入的图像数据（numpy数组格式）
    :return: 人脸关键点坐标的矩阵（numpy的matrix类型），若未检测到恰好1个人脸则返回None
    """
    face_rect = detector(image, 1)
    if len(face_rect) != 1:
        print('No one face in one picture')
        return None
    return np.matrix([[p.x, p.y] for p in predictor(image, face_rect[0]).parts()])


# 使用普氏分析调整脸部，计算从p2到p1的仿射变换矩阵
def get_transformation_matrix_from_points(p1, p2):
    """
    :param p1: 第一组关键点坐标矩阵（numpy的matrix类型）
    :param p2: 第二组关键点坐标矩阵（numpy的matrix类型）
    :return: 从p2到p1的仿射变换矩阵（numpy的matrix类型）
    """
    p1 = p1.astype(np.float64)
    p2 = p2.astype(np.float64)

    c1 = np.mean(p1, axis=0)
    c2 = np.mean(p2, axis=0)
    p1 -= c1
    p2 -= c2

    s1 = np.std(p1)
    s2 = np.std(p2)

    p1 /= s1
    p2 /= s2

    U, S, Vt = np.linalg.svd(p1.T * p2)
    R = (U * Vt).T

    trans_mat = np.vstack([np.hstack(((s2 / s1) * R, c2.T - (s2 / s1) * R * c1.T)),
                           np.matrix([0., 0., 1.])])
    return trans_mat


# 利用仿射矩阵M对图像进行变换，使其变为dshape大小
def warp_image_by_matrix(image, m, dshape):
    """
    :param image: 输入的图像数据（numpy数组格式）
    :param m: 仿射变换矩阵（numpy的matrix类型）
    :param dshape: 目标图像的形状（高度、宽度、通道数，通常以元组形式表示）
    :return: 变换后的图像数据（numpy数组格式）
    """
    output_image = np.zeros(dshape, dtype=image.dtype)
    cv2.warpAffine(image, m[:2], (dshape[1], dshape[0]),
                   dst=output_image, flags=cv2.WARP_INVERSE_MAP,
                   borderMode=cv2.BORDER_TRANSPARENT)
    return output_image


# 在img上绘制points点列表的凸包，用指定颜色填充（无返回值，直接修改输入图像）
def draw_convex_hull_for_image(img, points, color):
    """
    :param img: 输入的图像数据（numpy数组格式）
    :param points: 关键点坐标列表（通常为numpy数组格式）
    :param color: 填充颜色（以BGR格式表示，如(0, 0, 255)表示红色）
    """
    points = cv2.convexHull(points)
    cv2.fillConvexPoly(img, points, color)


# 获取人脸掩模，基于关键点标记人脸范围
def get_face_mask_for_image(img, landmarks):
    """
    :param img: 输入的图像数据（numpy数组格式）
    :param landmarks: 人脸关键点坐标（numpy的matrix类型）
    :return: 生成的人脸掩模图像（三通道的numpy数组格式）
    """
    # 先创建单通道的灰度图像用于绘制掩模
    img_mask = np.zeros(img.shape[:2], dtype=np.float64)
    for group in overlay_points:
        draw_convex_hull_for_image(img_mask, landmarks[group], color=1)
    # 将单通道灰度图转换为三通道图像
    img_mask = np.array([img_mask, img_mask, img_mask]).transpose((1, 2, 0))
    return img_mask


# 颜色校正函数，使两张图像颜色更匹配
def correct_image_color(im1, im2, landmarks1):
    """
    :param im1: 参考图像（numpy数组格式）
    :param im2: 需要校正颜色的图像（numpy数组格式）
    :param landmarks1: 参考图像的人脸关键点坐标（numpy的matrix类型）
    :return: 颜色校正后的图像（numpy数组格式，数据类型为uint8）
    """
    # 根据左右眼之间的距离，乘以0.6，为高斯核的大小
    blur_amount = colour_correct_blur_frac * np.linalg.norm(
        np.mean(landmarks1[left_eye_points], axis=0) -
        np.mean(landmarks1[right_eye_points], axis=0))
    blur_amount = int(blur_amount)
    if blur_amount % 2 == 0:
        blur_amount += 1
    im1_blur = cv2.GaussianBlur(im1, (blur_amount, blur_amount), 0)
    im2_blur = cv2.GaussianBlur(im2, (blur_amount, blur_amount), 0)

    # 避免后面除以0，设置im2_blur中小于1的值为1
    im2_blur[im2_blur < 1] = 1

    im2 = im2.astype(np.float64)
    im1_blur = im1_blur.astype(np.float64)
    im2_blur = im2_blur.astype(np.float64)
    # 进行颜色校正计算
    im2_color_correct = im2 * im1_blur / im2_blur
    im2_color_correct[im2_color_correct < 0] = 0
    im2_color_correct[im2_color_correct > 255] = 255
    return im2_color_correct.astype(np.uint8)


def change_faces(img1, img2):
    # 读取图像
    boy = cv2.imread(img1)
    girl = cv2.imread(img2)

    # 获取人脸关键点
    boy_landmarks = get_face_landmark(boy)
    girl_landmarks = get_face_landmark(girl)

    # 获取从女孩脸到男孩脸的仿射变换矩阵
    trans_mat = get_transformation_matrix_from_points(
        boy_landmarks[overlay_points],
        girl_landmarks[overlay_points]
    )

    # 获取男孩脸和变换后的女孩脸的掩模，并合并得到组合掩模
    boy_mask = get_face_mask_for_image(boy, boy_landmarks)
    girl_mask = get_face_mask_for_image(girl, girl_landmarks)
    warped_girl_mask = warp_image_by_matrix(girl_mask, trans_mat, boy.shape)
    combined_mask = np.max([boy_mask, warped_girl_mask], axis=0)

    # 对组合掩模进行多次高斯模糊处理以使其边缘更平滑
    combined_mask = cv2.GaussianBlur(combined_mask, (15, 15), 0)
    combined_mask = cv2.GaussianBlur(combined_mask, (13, 13), 0)
    combined_mask = cv2.GaussianBlur(combined_mask, (7, 7), 0)

    # 对女孩脸图像进行变换、颜色校正等操作
    warped_girl = warp_image_by_matrix(girl, trans_mat, boy.shape)
    warped_girl_color_correct = correct_image_color(boy, warped_girl, boy_landmarks)

    # 将图像数据类型转换为合适类型进行后续计算
    boy = boy.astype(np.float64)

    # 按照掩模将男孩脸和校正后的女孩脸进行融合
    renyao = boy * (1 - combined_mask) + warped_girl_color_correct * combined_mask

    # 转换数据类型为整型，以便正常显示图像
    boy = boy.astype(np.uint8)
    renyao = renyao.astype(np.uint8)

    # 调整图像大小，方便展示
    boy = cv2.resize(boy, (300, 400))
    girl = cv2.resize(girl, (300, 400))
    renyao = cv2.resize(renyao, (300, 400))

    # 水平拼接图像并展示
    hori = np.concatenate((boy, girl, renyao), axis=1)
    cv2.imshow('imgs', hori)
    if cv2.waitKey(0) == ord(' '):
        cv2.destroyAllWindows()


if __name__ == "__main__":
    change_faces('1.jpg', '2.jpg')

效果：

由于我用到了两个模型，一个人脸识别模型，一个是人脸关键点预测器模型，

通过网盘分享的文件：shape_predictor_68_face_landmarks.zip等2个文件
链接: https://pan.baidu.com/s/12f8A7sM9p4MMqckNkec38A 提取码: 8888