面部表情分類面部表情是面部肌肉的一個(gè)或多個(gè)動(dòng)作或狀態(tài)的結(jié)果。這些運(yùn)動(dòng)表達(dá)了個(gè)體對(duì)觀察者的情緒狀態(tài)。面部表情是非語言交際的一種形式。它是表達(dá)人類之間的社會(huì)信息的主要手段,不過也發(fā)生在大多數(shù)其他哺乳動(dòng)物和其他一些動(dòng)物物種中。人類的面部表情至少有21種,除了常見的高興、吃驚、悲傷、憤怒、厭惡和恐懼6種,還有驚喜(高興+吃驚)、悲憤(悲傷+憤怒)等15種可被區(qū)分的復(fù)合表情。面部表情識(shí)別技術(shù)主要的應(yīng)用領(lǐng)域包括人機(jī)交互、智能控制、安全、醫(yī)療、通信等領(lǐng)域。網(wǎng)絡(luò)架構(gòu)LeNet-5出自論文Gradient-Based Learning Applied to Document Recognition,是一種用于手寫體字符識(shí)別的非常高效的卷積神經(jīng)網(wǎng)絡(luò)。LeNet5的網(wǎng)絡(luò)架構(gòu)如下:但是因?yàn)槲覀円龅氖敲娌勘砬榉诸?,而且CK+數(shù)據(jù)集樣本大小是48*48,因此需要對(duì)LeNet5網(wǎng)絡(luò)進(jìn)行微調(diào)。網(wǎng)絡(luò)架構(gòu)如下: 網(wǎng)絡(luò)結(jié)構(gòu)如下:計(jì)算圖如下:代碼實(shí)現(xiàn)預(yù)處理數(shù)據(jù)集加載,并進(jìn)行預(yù)處理,同時(shí)將測(cè)試集的前225張樣本拼接成15張*15張的大圖片,用于Tensorboard可視化。%matplotlib inlineimport matplotlib.pyplot as pltimport osimport cv2import numpy as npfrom tensorflow import name_scope as namespacefrom tensorflow.contrib.tensorboard.plugins import projectorNUM_PIC_SHOW=225base_filedir='D:/CV/datasets/facial_exp/CK+'dict_str2int={'anger':0,'contempt':1,'disgust':2,'fear':3,'happy':4,'sadness':5,'surprise':6}labels=[]data=[]#讀取圖片并將其保存至datafor expdir in os.listdir(base_filedir): base_expdir=os.path.join(base_filedir,expdir) for name in os.listdir(base_expdir): labels.append(dict_str2int[expdir]) path=os.path.join(base_expdir,name) path=path.replace('\\','/') #將\替換為/ img = cv2.imread(path,0) data.append(img)data=np.array(data)labels=np.array(labels)#將data打亂permutation = np.random.permutation(data.shape[0])data = data[permutation,:,:]labels = labels[permutation]#取前225個(gè)圖片拼成一張大圖片,用于tensorboard可視化img_set=data[:NUM_PIC_SHOW]#前225的數(shù)據(jù)用于顯示label_set=labels[:NUM_PIC_SHOW]big_pic=Noneindex=0for row in range(15): row_vector=img_set[index] index+=1 for col in range(1,15): img=img_set[index] row_vector=np.hstack([row_vector,img]) index+=1 if(row==0): big_pic=row_vector else: big_pic=np.vstack([big_pic,row_vector])plt.imshow(big_pic, cmap='gray')plt.show()#寫入大圖片cv2.imwrite(D:/Jupyter/TensorflowLearning/facial_expression_cnn_projector/data/faces.png,big_pic)#轉(zhuǎn)換數(shù)據(jù)格式和形狀data=data.reshape(-1,48*48).astype('float32')/255.0labels=labels.astype('float32')#0.3的比例測(cè)試scale=0.3test_data=data[:int(scale*data.shape[0])]test_labels=labels[:int(scale*data.shape[0])]train_data=data[int(scale*data.shape[0]):]train_labels=labels[int(scale*data.shape[0]):]print(train_data.shape)print(train_labels.shape)print(test_data.shape)print(test_labels.shape)#將標(biāo)簽one-hottrain_labels_onehot=np.zeros((train_labels.shape[0],7))test_labels_onehot=np.zeros((test_labels.shape[0],7))for i,label in enumerate(train_labels): train_labels_onehot[i,int(label)]=1for i,label in enumerate(test_labels): test_labels_onehot[i,int(label)]=1print(train_labels_onehot.shape)print(test_labels_onehot.shape)2.定義前向網(wǎng)絡(luò)import tensorflow as tfIMAGE_SIZE=48 #圖片大小NUM_CHANNELS=1 #圖片通道CONV1_SIZE=5CONV1_KERNEL_NUM=32CONV2_SIZE=5CONV2_KERNEL_NUM=64FC_SIZE=512 #隱層大小OUTPUT_NODE=7 #輸出大小#參數(shù)概要,用于tensorboard實(shí)時(shí)查看訓(xùn)練過程def variable_summaries(var): with namespace('summaries'): mean=tf.reduce_mean(var) tf.summary.scalar('mean',mean) #平均值 with namespace('stddev'): stddev=tf.sqrt(tf.reduce_mean(tf.square(var-mean))) tf.summary.scalar('stddev',stddev) #標(biāo)準(zhǔn)差 tf.summary.scalar('max',tf.reduce_max(var))#最大值 tf.summary.scalar('min',tf.reduce_min(var))#最小值 tf.summary.histogram('histogram',var)#直方圖#獲取權(quán)重def get_weight(shape,regularizer,name=None): w=tf.Variable(tf.truncated_normal(shape,stddev=0.1),name=name) #variable_summaries(w) if(regularizer!=None): tf.add_to_collection('losses',tf.contrib.layers.l2_regularizer(regularizer)(w)) return w#獲取偏置def get_bias(shape,name=None): b=tf.Variable(tf.zeros(shape),name=name) #variable_summaries(b) return b #定義前向網(wǎng)絡(luò)def forward(x,train,regularizer): with tf.name_scope('layer'): #把輸入reshape with namespace('reshape_input'): x_reshaped=tf.reshape(x,[-1,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS]) with tf.name_scope('conv1'): #定義兩個(gè)卷積層 conv1_w=get_weight([CONV1_SIZE,CONV1_SIZE,NUM_CHANNELS,CONV1_KERNEL_NUM],regularizer=regularizer,name='conv1_w') conv1_b=get_bias([CONV1_KERNEL_NUM],name='conv1_b') conv1=tf.nn.conv2d(x_reshaped,conv1_w,strides=[1,1,1,1],padding='SAME') relu1=tf.nn.relu(tf.nn.bias_add(conv1,conv1_b)) pool1=tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME') with tf.name_scope('conv2'): conv2_w=get_weight([CONV2_SIZE,CONV2_SIZE,CONV1_KERNEL_NUM,CONV2_KERNEL_NUM],regularizer=regularizer,name='conv2_w') conv2_b=get_bias([CONV2_KERNEL_NUM],name='conv2_b') conv2=tf.nn.conv2d(pool1,conv2_w,strides=[1,1,1,1],padding='SAME') relu2=tf.nn.relu(tf.nn.bias_add(conv2,conv2_b)) #對(duì)卷機(jī)后的輸出添加偏置,并通過relu完成非線性激活 pool2=tf.nn.max_pool(relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME') with tf.name_scope('flatten'): #定義reshape層 pool_shape=pool2.get_shape().as_list() #獲得張量的維度并轉(zhuǎn)換為列表 nodes=pool_shape[1]*pool_shape[2]*pool_shape[3] #[0]為batch值,[1][2][3]分別為長(zhǎng)寬和深度 #print(type(pool2)) reshaped=tf.reshape(pool2,[-1,nodes]) with tf.name_scope('fc1'): #定義兩層全連接層 fc1_w=get_weight([nodes,FC_SIZE],regularizer,name='fc1_w') fc1_b=get_bias([FC_SIZE],name='fc1_b') fc1=tf.nn.relu(tf.matmul(reshaped,fc1_w)+fc1_b) if(train): fc1=tf.nn.dropout(fc1,0.5) with tf.name_scope('fc2'): fc2_w=get_weight([FC_SIZE,OUTPUT_NODE],regularizer,name='fc2_w') fc2_b=get_bias([OUTPUT_NODE],name='fc2_b') y=tf.matmul(fc1,fc2_w)+fc2_b return y3.定義反向傳播 ,可視化設(shè)置,并進(jìn)行訓(xùn)練,BATCH_SIZE=100 #每次樣本數(shù)LEARNING_RATE_BASE=0.005 #基本學(xué)習(xí)率LEARNING_RATE_DECAY=0.99 #學(xué)習(xí)率衰減率REGULARIZER=0.0001 #正則化系數(shù)STEPS=2500 #訓(xùn)練次數(shù)MOVING_AVERAGE_DECAY=0.99 #滑動(dòng)平均衰減系數(shù)SAVE_PATH='.\\facial_expression_cnn_projector\\' #參數(shù)保存路徑data_len=train_data.shape[0]#將拼接為big_pic的測(cè)試樣本保存至標(biāo)量,用于訓(xùn)練過程可視化pic_stack=tf.stack(test_data[:NUM_PIC_SHOW]) #stack拼接圖片張量embedding=tf.Variable(pic_stack,trainable=False,name='embedding')if(tf.gfile.Exists(os.path.join(SAVE_PATH,'projector'))==False): tf.gfile.MkDir(os.path.join(SAVE_PATH,'projector'))#創(chuàng)建metadata文件,存放可視化圖片的labelif(tf.gfile.Exists(os.path.join(SAVE_PATH,'projector','metadata.tsv'))==True): tf.gfile.DeleteRecursively(os.path.join(SAVE_PATH,'projector')) tf.gfile.MkDir(os.path.join(SAVE_PATH,'projector'))#將可視化圖片的標(biāo)簽寫入with open(os.path.join(SAVE_PATH,'projector','metadata.tsv'),'w') as f: for i in range(NUM_PIC_SHOW): f.write(str(label_set[i])+'\n')with tf.Session() as sess: with tf.name_scope('input'): #x=tf.placeholder(tf.float32,[BATCH_SIZE,IMAGE_SIZE,IMAGE_SIZE,NUM_CHANNELS],name='x_input') x=tf.placeholder(tf.float32,[None,IMAGE_SIZE*IMAGE_SIZE*NUM_CHANNELS],name='x_input') y_=tf.placeholder(tf.float32,[None,OUTPUT_NODE],name='y_input') #reshape可視化圖片 with namespace('input_reshape'): image_shaped_input=tf.reshape(x,[-1,IMAGE_SIZE,IMAGE_SIZE,1]) #把輸入reshape tf.summary.image('input',image_shaped_input,7) #添加到tensorboard中顯示 y=forward(x,True,REGULARIZER) global_step=tf.Variable(0,trainable=False) with namespace('loss'): #softmax并計(jì)算交叉熵 ce=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.argmax(y_,1)) cem=tf.reduce_mean(ce) #求每個(gè)樣本的交叉熵 loss=cem+tf.add_n(tf.get_collection('losses')) tf.summary.scalar('loss',loss) #loss只有一個(gè)值,就直接輸出 learning_rate=tf.train.exponential_decay( LEARNING_RATE_BASE, global_step, data_len/BATCH_SIZE, LEARNING_RATE_DECAY, staircase=True ) with namespace('train'): train_step=tf.train.GradientDescentOptimizer(learning_rate).minimize(loss,global_step=global_step) ema=tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step) ema_op=ema.apply(tf.trainable_variables()) with namespace('accuracy'): correct_prediction=tf.equal(tf.argmax(y,1),tf.argmax(y_,1)) accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32)) tf.summary.scalar('accuracy',accuracy) with tf.control_dependencies([train_step,ema_op]): train_op=tf.no_op(name='train') init_op=tf.global_variables_initializer() sess.run(init_op) #合并所有的summary merged=tf.summary.merge_all() #寫入圖結(jié)構(gòu) writer=tf.summary.FileWriter(os.path.join(SAVE_PATH,'projector'),sess.graph) saver=tf.train.Saver() #保存網(wǎng)絡(luò)的模型 #配置可視化 config=projector.ProjectorConfig() #tensorboard配置對(duì)象 embed=config.embeddings.add() #增加一項(xiàng) embed.tensor_name=embedding.name #指定可視化的變量 embed.metadata_path='D:/Jupyter/TensorflowLearning/facial_expression_cnn_projector/projector/metadata.tsv' #路徑 embed.sprite.image_path='D:/Jupyter/TensorflowLearning/facial_expression_cnn_projector/data/faces.png' embed.sprite.single_image_dim.extend([IMAGE_SIZE,IMAGE_SIZE])#可視化圖片大小 projector.visualize_embeddings(writer,config) #斷點(diǎn)續(xù)訓(xùn) #ckpt=tf.train.get_checkpoint_state(MODEL_SAVE_PATH) #if(ckpt and ckpt.model_checkpoint_path): # saver.restore(sess,ckpt.model_checkpoint_path) for i in range(STEPS): run_option=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata=tf.RunMetadata() start=(i*BATCH_SIZE)%(data_len-BATCH_SIZE) end=start+BATCH_SIZE summary,_,loss_value,step=sess.run([merged,train_op,loss,global_step], feed_dict={x:train_data[start:end],y_:train_labels_onehot[start:end]}, options=run_option, run_metadata=run_metadata) writer.add_run_metadata(run_metadata,'step%03d'%i) writer.add_summary(summary,i)#寫summary和i到文件 if(i%100==0): acc=sess.run(accuracy,feed_dict={x:test_data,y_:test_labels_onehot}) print('%d %g'%(step,loss_value)) print('acc:%f'%(acc)) saver.save(sess,os.path.join(SAVE_PATH,'projector','model'),global_step=global_step) writer.close()可視化訓(xùn)練過程執(zhí)行上面的代碼,打開tensorboard,可以看到訓(xùn)練精度和交叉熵?fù)p失如下:由于只有六百多的訓(xùn)練樣本,故得到曲線抖動(dòng)很大,訓(xùn)練精度大概在百分之八九十多浮動(dòng),測(cè)試精度在百分之七八十浮動(dòng),可見精度不高。下面使用Tensorboard將訓(xùn)練過程可視化(圖片是用Power Point錄頻然后用迅雷應(yīng)用截取gif得到的): ————————————————版權(quán)聲明:本文為CSDN博主「陳建驅(qū)」的原創(chuàng)文章,遵循 CC 4.0 BY-SA 版權(quán)協(xié)議,轉(zhuǎn)載請(qǐng)附上原文出處鏈接及本聲明。
2021-09-20