This is my first trial at using Jupyter notebook to write a post, hope it makes sense.
I’ve recently taught a class on generative models: http://hi.cs.stonybrook.edu/teaching/cdt450
In class we’ve manipulated face images with neural networks.
One important thing I found that helped is to align the images so the facial features overlap.
It helps the nets learn the variance in faces better, rather than waste their “representation power” on the shift between faces.
The following is some code to align face images using the excellent Dlib (python bindings) http://dlib.net. First I’m just using a standard face detector, and then using the facial fatures extractor I’m using that information for a complete alignment of the face.
After the alignment – I’m just having fun with the aligned dataset 🙂
First we include some necessary packages:
import numpy as np import matplotlib.pyplot as plt from matplotlib import cm,colors,rc import random import warnings import cv2 from IPython import display from IPython.display import HTML import os import shutil import time import urllib import zipfile import tarfile import keras.utils as utils import progressbar import imageio %matplotlib inline rc('figure',figsize=(15,5)) warnings.filterwarnings('ignore')A utility to download and unzip/untar a file
def download_dataset(temp_dl_filename,data_directory,url=None,squash_dir=False,remove_tmp_file=True): if not os.path.exists(data_directory): filename = temp_dl_filename if url is not None: filename = utils.get_file(temp_dl_filename, url) print("Unzipping...") if filename[-3:] == 'zip': zf = zipfile.ZipFile(filename) print("Calculate total size...") uncompress_size = sum((zfile.file_size for zfile in zf.infolist())) extracted_size = 0 pb = progressbar.ProgressBar(max_value=uncompress_size) for i,zfile in enumerate(zf.infolist()): extracted_size += zfile.file_size pb.update(extracted_size) if zfile.filename[-1] == '/': continue if squash_dir: zfile.filename = os.path.basename(zfile.filename) zf.extract(zfile, data_directory) if filename[-6:] == 'tar.gz': zf = tarfile.open(filename,'r:gz') print("Calculate total size...") uncompress_size = sum((tzfile.size if tzfile.isfile() else 0 for tzfile in zf.getmembers())) extracted_size = 0 pb = progressbar.ProgressBar(max_value=uncompress_size) for i,zfile in enumerate(zf.getmembers()): if zfile.isfile() and zfile.isreg(): extracted_size += zfile.size pb.update(extracted_size) if squash_dir: zfile.name = os.path.basename(zfile.name) zf.extract(zfile, data_directory) if remove_tmp_file: os.remove(filename) print("Done") else: print ("Data already exists.")Aligning faces images
This function will crop and scale the faces in the image with a facial landmark detector, to match the alignment of the faces in the celebA dataset (http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html).
To use it, download http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 first.import dlib from sklearn.ensemble import IsolationForest def face_data_normalizer(images_directory_input, images_directory_output, output_size=256, align_faces_=True, limit_num_faces_=None, limit_num_files_=None): def write_faces_to_disk(directory, faces): print("writing faces to disk...") if os.path.exists(directory): shutil.rmtree(directory) print('creating output directory: %s'%(directory)) os.mkdir(directory) for i in range(faces.shape[0]): cv2.imwrite(''.join([directory,"%03d.jpg"%i]),faces[i,:,:,::-1]) print("wrote %d faces"%(faces.shape[0])) if images_directory_input[-1] != '/': images_directory_input += '/' if images_directory_output[-1] != '/': images_directory_output += '/' faces = [] if os.path.exists(images_directory_output): print('data already preprocessed? loading preprocessed files...') for img_idx,img_file in enumerate(os.listdir(images_directory_output)): # load the input image, resize it, and convert it to grayscale image = cv2.imread(''.join([images_directory_output,img_file])) if image is None: continue image = image[:,:,::-1] #BGR to RGB faces.append(np.expand_dims(image,0)) faces = np.asarray(faces) print('loaded %d preprocessed images'%(faces.shape[0])) if remove_outliers_: faces,num_outliers = remove_outliers(faces) write_faces_to_disk(images_directory_output,faces) return faces detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat') max_val = len(os.listdir(images_directory_input)) if limit_num_files_ is None else limit_num_files_ pb = display.ProgressBar(max_val) pb.display() face_counter = 0 for img_idx,img_file in enumerate(os.listdir(images_directory_input)): # load the input image, resize it, and convert it to grayscale image = cv2.imread(''.join([images_directory_input,img_file])) if image is None: continue image = image[:,:,::-1] #BGR to RGB gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # detect faces in the grayscale image rects = detector(gray, 1) if len(rects) > 0: # loop over the face detections for (i, rect) in enumerate(rects): if align_faces_: ######### Align with facial features detector ######### shape = predictor(gray, rect) # get facial features shape = np.array([(shape.part(j).x, shape.part(j).y) for j in range(shape.num_parts)]) # center and scale face around mid point between eyes center_eyes = shape[27].astype(np.int) eyes_d = np.linalg.norm(shape[36]-shape[45]) face_size_x = int(eyes_d * 2.) if face_size_x < 50: continue # rotate to normalized angle d = (shape[45] - shape[36]) / eyes_d # normalized eyes-differnce vector (direction) a = np.rad2deg(np.arctan2(d[1],d[0])) # angle scale_factor = float(output_size) / float(face_size_x * 2.) # scale to fit in output_size # rotation (around center_eyes) + scale transform M = np.append(cv2.getRotationMatrix2D((center_eyes[0], center_eyes[1]),a,scale_factor),[[0,0,1]], axis=0) # apply shift from center_eyes to middle of output_size M1 = np.array([[1.,0.,-center_eyes[0]+output_size/2.], [0.,1.,-center_eyes[1]+output_size/2.], [0,0,1.]]) # concatenate transforms (rotation-scale + translation) M = M1.dot(M)[:2] # warp try: face = cv2.warpAffine(image, M, (output_size, output_size), borderMode=cv2.BORDER_REPLICATE) except: continue face_counter += 1 face = cv2.resize(face,(output_size,output_size)) faces.append(face) else: ######### "No align" with just the detector ######### if rect.width() < 50: continue # find scale factor scale_factor = float(output_size) / float(rect.width() * 2.) # scale to fit in output_size # scale around the center of the face (shift a bit for the approximate y-position of the eyes) M = np.append(cv2.getRotationMatrix2D((rect.center().x,rect.center().y-rect.height()/6.),0,scale_factor),[[0,0,1]], axis=0) # apply shift from center_eyes to middle of output_size M1 = np.array([[1.,0.,-rect.center().x+output_size/2.], [0.,1.,-rect.center().y+output_size/2.+rect.height()/6.], [0,0,1.]]) # concatenate transforms (rotation-scale + translation) M = M1.dot(M)[:2] try: face = cv2.warpAffine(image, M, (output_size, output_size), borderMode=cv2.BORDER_REPLICATE) except: continue face_counter += 1 faces.append(face) pb.progress = img_idx+1 if limit_num_faces_ is not None and faces.shape[0] > limit_num_faces_: break if limit_num_files_ is not None and img_idx >= limit_num_files_: break faces = np.asarray(faces) write_faces_to_disk(images_directory_output,faces) return faces—
Download the faces dataset
We will use the FDDB: http://vis-www.cs.umass.edu/fddb/
people_zip_filename = 'originalPics.tar.gz' photos_url = 'http://tamaraberg.com/faceDataset/originalPics.tar.gz' # download, unzip and squash to a single directory with all the files download_dataset(people_zip_filename, 'data/faces/raw', url=photos_url, remove_tmp_file=True, squash_dir=True) Downloading data from http://tamaraberg.com/faceDataset/originalPics.tar.gz 579067904/579061091 [==============================] - 44s 0us/step Unzipping... Calculate total size... 98% (587275530 of 594789979) |########## | Elapsed Time: 0:00:06 ETA: 0:00:00 DoneAlign the faces
Use the face alignment tool
if os.path.isdir('data/faces/noalign'): shutil.rmtree('data/faces/noalign') faces_noalign = face_data_normalizer('data/faces/raw', 'data/faces/noalign', align_faces_=False, remove_outliers_=False, limit_num_files_=100, output_size=256);writing faces to disk... creating output directory: data/faces/noalign/ wrote 141 facesif os.path.isdir('data/faces/align'): shutil.rmtree('data/faces/align') faces_align = face_data_normalizer('data/faces/raw', 'data/faces/align', align_faces_=True, remove_outliers_=False, limit_num_files_=None, output_size=256);writing faces to disk... creating output directory: data/faces/align/ wrote 4805 facesVisualize the faces dataset
Aligned
plt.figure(figsize=(8,4)) for i in range(np.min([faces_align.shape[0],28])): plt.subplot(4,7,i+1),plt.xticks(()),plt.yticks(()),plt.imshow(faces_align[i])
Not alignedplt.figure(figsize=(8,4)) for i in range(np.min([faces_noalign.shape[0],28])): plt.subplot(4,7,i+1),plt.xticks(()),plt.yticks(()),plt.imshow(faces_noalign[i])—
A tool to produce a GIF from lists of images, also able to apply an average sliding window between them
def show_gif(images_, images_average=10): faces_len = np.min([100,images_.shape[0]]) - images_average pb = display.ProgressBar(faces_len) pb.display() os.remove('movie.gif') with imageio.get_writer('movie.gif', mode='I') as writer: for i in range(faces_len): pb.progress = f+1 writer.append_data((np.mean(images_[i:i + images_average]/255.,axis=0) * 255.).astype(np.uint8)) display.clear_output(wait=True) return HTML('<img src="movie.gif?%s">'%(random.randint(0,1000)))Face Averages
plt.figure(figsize=(4,8)) plt.subplot(121),plt.xticks(()),plt.yticks(()),plt.imshow(np.mean(faces_noalign / 255.,axis=0)),plt.title('Not aligned') plt.subplot(122),plt.xticks(()),plt.yticks(()),plt.imshow(np.mean(faces_align / 255.,axis=0)),plt.title('Aligned');
Animations:
Alignedshow_gif(faces_align, images_average=15)
Not alignedshow_gif(faces_noalign, images_average=15)
Naturally, the aligned faces have more facial features in common.—
Variational Autoencoder
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D, Flatten, Reshape, Lambda, Conv2DTranspose, BatchNormalization from keras.models import Model from keras import metrics import keras.backend as K ''' Inspiration: https://github.com/keras-team/keras/blob/master/examples/variational_autoencoder_deconv.py ''' def ConvolutionalVAE(img_rows,img_cols,img_chns,mse=True): filters = 16 # number of convolutional filters to use kernel_size = 3 # convolution kernel size original_img_size = (img_rows, img_cols, img_chns) latent_dim = 100 intermediate_dim = 128 epsilon_std = 1.0 inputs = Input(shape=original_img_size, name='encoder_input') x = inputs for i in range(2): filters *= 2 x = Conv2D(filters=filters, kernel_size=kernel_size, activation='relu', strides=2, padding='same')(x) # shape info needed to build decoder model shape = K.int_shape(x) # generate latent vector Q(z|X) x = Flatten()(x) # x = Dense(16, activation='relu')(x) z_mean = Dense(latent_dim, name='z_mean')(x) z_log_var = Dense(latent_dim, name='z_log_var')(x) def sampling(args): z_mean_, z_log_var_ = args batch = K.shape(z_mean_)[0] dim = K.int_shape(z_mean_)[1] # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean_ + K.exp(0.5 * z_log_var_) * epsilon # use reparameterization trick to push the sampling out as input # note that "output_shape" isn't necessary with the TensorFlow backend z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var]) # instantiate encoder model encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') encoder.summary() # build decoder model latent_inputs = Input(shape=(latent_dim,), name='z_sampling') x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(latent_inputs) x = Reshape((shape[1], shape[2], shape[3]))(x) for i in range(2): x = Conv2DTranspose(filters=filters, kernel_size=kernel_size, activation='relu', strides=2, padding='same')(x) filters //= 2 outputs = Conv2DTranspose(filters=img_chns, kernel_size=kernel_size, activation='sigmoid', padding='same', name='decoder_output')(x) # instantiate decoder model decoder = Model(latent_inputs, outputs, name='decoder') decoder.summary() # instantiate VAE model outputs = decoder(encoder(inputs)[2]) vae = Model(inputs, outputs, name='vae') reconstruction_loss = img_cols * img_rows * metrics.mse(K.flatten(inputs), K.flatten(outputs)) kl_loss = K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) * -0.5 vae_loss = K.mean(reconstruction_loss + kl_loss) vae.add_loss(vae_loss) vae.compile(optimizer='rmsprop') return vae,encoder,decoderA callback to render the losses as a graph
from keras.callbacks import Callback class PlotLosses(Callback): def __init__(self, losses_names): self.losses = dict() self.losses_names = losses_names for ln in self.losses_names: self.losses[ln] = [] def on_train_begin(self, logs={}): pass def on_epoch_end(self, epoch, logs={}): display.clear_output(wait=False) plt.figure(figsize=(10,5)) for ln in self.losses_names: if ln not in self.losses_names: continue self.losses[ln].append(logs.get(ln)) plt.plot(np.array(self.losses[ln]),label=ln) plt.ylabel('loss'),plt.xlabel('epochs') plt.legend() plt.grid(True) plt.tick_params(labelright=True) plt.show()K.clear_session() vae,encoder,generator = ConvolutionalVAE(256, 256 ,3)Train the VAE
vae.fit(faces_align / 256., epochs=50, batch_size=32, validation_split=0.1, callbacks=[PlotLosses(['loss','val_loss'])]);Apply VAE to find faces in inanimate objects
Objects were downloaded from Google Images search – they may be copyrighted! (Oops)
Inspired by this Medium post: https://medium.com/starts-with-a-bang/averaging-inanimate-objects-can-produce-human-faces-1a80cd1448d8plt.figure(figsize=(10,2)) count = 0 for img_idx,img_file in enumerate(os.listdir("downloads/objects that look like faces/")): # load the input image, resize it, and convert it to grayscale image = cv2.imread(''.join(["downloads/objects that look like faces/",img_file])) if image is None: continue if img_file[:2] not in ['60','83','3.','4.','50','51','43']: continue image = image[:,:,::-1] #BGR to RGB plt.subplot(2,10,count+1),plt.xticks(()),plt.yticks(()) object_face = cv2.resize(image, (256,256), 0, 0) / 256. plt.imshow(object_face) plt.subplot(2,10,count+11),plt.xticks(()),plt.yticks(()) plt.imshow(np.squeeze(vae.predict(object_face[np.newaxis]))); count += 1 if count >= 9: break plt.show()Morph between faces with the VAE
idx = np.random.randint(faces_align.shape[0], size=(2,)) plt.figure(figsize=(8,40)) plt.subplot(1,7,1),plt.xticks(()),plt.yticks(()),plt.imshow(faces_align[idx[0]]) plt.subplot(1,7,7),plt.xticks(()),plt.yticks(()),plt.imshow(faces_align[idx[1]]) A = encoder.predict(faces_align[idx]/256.) for i,a in enumerate(np.linspace(0,1,5)): blend = A[0][np.newaxis,0] * (1. - a) + A[0][np.newaxis,1] * a plt.subplot(1,7,2+i),plt.xticks(()),plt.yticks(()) plt.imshow(np.squeeze(generator.predict(blend)));def vae_morph(images_): faces_len = np.min([20,images_.shape[0]]) pb = display.ProgressBar(faces_len) pb.display() os.remove('movie.gif') with imageio.get_writer('movie.gif', mode='I') as writer: for f in range(faces_len - 1): A = encoder.predict(images_[[f,f+1]]/256.) for i,a in enumerate(np.linspace(0,1,7)): blend_latent = A[0][np.newaxis,0] * (1. - a) + A[0][np.newaxis,1] * a blended = np.squeeze(generator.predict(blend_latent)) writer.append_data((blended * 255.).astype(np.uint8)) pb.progress = f+1 display.clear_output(wait=True) return HTML('<img src="movie.gif?%s">'%(random.randint(0,1000)))And an animation morph GIF:
vae_morph(faces_align)
Hope you enjoyed!
(All sources are license-free, use them at will! But don’t blame me if something breaks. Also please don’t use my name to endorse your project)