经验首页 前端设计 程序设计 Java相关 移动开发 数据库/运维 软件/图像 大数据/云计算 其他经验
当前位置:技术经验 » 程序设计 » MATLAB » 查看文章
深度自编码器(Deep Autoencoder)MATLAB解读
来源:cnblogs  作者:凯鲁嘎吉  时间:2019/9/29 9:03:04  对本文有异议

深度自编码器(Deep Autoencoder)MATLAB解读

作者:凯鲁嘎吉 - 博客园 http://www.cnblogs.com/kailugaji/

    这篇文章主要讲解Hinton在2006年Science上提出的一篇文章“Reducing the dimensionality of data with neural networks”的主要思想与MATLAB程序解读。

    深度自编码器首先用受限玻尔兹曼机进行逐层预训练,得到初始的权值与偏置(权值与偏置的更新过程用对比散度CD-1算法)。然后,自编码得到重构数据,通过BP算法进行全局微调权值与偏置(权值与偏置的更新过程用Polak-Ribiere共轭梯度法)。

1. mnistdeepauto.m

  1. %% 自编码器网络结构:784->1000->500->250->30->250->500->1000->784
  2. clear all
  3. close all
  4.  
  5. maxepoch=50; %In the Science paper we use maxepoch=50, but it works just fine. 最大迭代数
  6. numhid=1000; numpen=500; numpen2=250; numopen=30;%rbm每层神经元个数1000-500-250-30
  7. %% 数据预处理
  8. %转换数据格式
  9. fprintf(1,'Converting Raw files into Matlab format \n');
  10. converter;
  11. %50个来回迭代
  12. fprintf(1,'Pretraining a deep autoencoder. \n');
  13. fprintf(1,'The Science paper used 50 epochs. This uses %3i \n', maxepoch);
  14. %对数据进行批处理
  15. makebatches;
  16. [numcases numdims numbatches]=size(batchdata);%每批样本数、维度、批数
  17. %% 逐层预训练阶段(用RBM
  18. %%可见层->1000隐含层
  19. fprintf(1,'Pretraining Layer 1 with RBM: %d-%d \n',numdims,numhid);
  20. restart=1;
  21. rbm; %01变量 输出权值与偏置的初始更新值
  22. hidrecbiases=hidbiases;
  23. save mnistvh vishid hidrecbiases visbiases;%保存第1rbm的权值、隐含层偏置项、可视化层偏置项,为mnistvh.mat 784*1000 1*1000 1*784
  24. %%1000隐含层->500隐含层
  25. fprintf(1,'\nPretraining Layer 2 with RBM: %d-%d \n',numhid,numpen);
  26. batchdata=batchposhidprobs;
  27. numhid=numpen;
  28. restart=1;
  29. rbm; %01变量 输出权值与偏置的初始更新值
  30. hidpen=vishid; penrecbiases=hidbiases; hidgenbiases=visbiases;
  31. save mnisthp hidpen penrecbiases hidgenbiases;%保存第2rbm的权值、隐含层偏置项、可视化层偏置项,为mnisthp.mat 1000*500 1*500 1*1000
  32. %%500隐含层->250隐含层
  33. fprintf(1,'\nPretraining Layer 3 with RBM: %d-%d \n',numpen,numpen2);
  34. batchdata=batchposhidprobs;
  35. numhid=numpen2;
  36. restart=1;
  37. rbm; %01变量 输出权值与偏置的初始更新值
  38. hidpen2=vishid; penrecbiases2=hidbiases; hidgenbiases2=visbiases;
  39. save mnisthp2 hidpen2 penrecbiases2 hidgenbiases2;%保存第3rbm的权值、隐含层偏置项、可视化层偏置项,为mnisthp2.mat 500*250 1*250 1*500
  40. %250隐含层->30隐含层
  41. fprintf(1,'\nPretraining Layer 4 with RBM: %d-%d \n',numpen2,numopen);
  42. batchdata=batchposhidprobs;
  43. numhid=numopen;
  44. restart=1;
  45. rbmhidlinear; %激活函数为f(x)=x,实值变量 输出权值与偏置的初始更新值
  46. hidtop=vishid; toprecbiases=hidbiases; topgenbiases=visbiases;
  47. save mnistpo hidtop toprecbiases topgenbiases;%保存第4rbm的权值、隐含层偏置项、可视化层偏置项,为mnistpo.mat 250*30 1*30 1*250
  48. %% BP全局调参
  49. backprop; %微调权值与偏置

2. converter.m

  1. %%将gz格式转为matlab的文件格式
  2. %实现的功能是将样本集从.ubyte格式转换成.ascii格式,然后继续转换成.mat格式。
  3. % % 作用:把测试数据集和训练数据集转换为.mat格式
  4. % 最终得到的测试数据集:test(0~9).mat
  5. % 最终得到的训练数据集:digit(0~9).mat
  6. % %% 首先转换测试数据集的格式 Work with test files first
  7. fprintf(1,'You first need to download files:\n train-images-idx3-ubyte.gz\n train-labels-idx1-ubyte.gz\n t10k-images-idx3-ubyte.gz\n t10k-labels-idx1-ubyte.gz\n from http://yann.lecun.com/exdb/mnist/\n and gunzip them \n');
  8. %该文件前四个32位的数字是数据信息 magic number, number of image, number of rows, number of columns
  9. f = fopen('t10k-images-idx3-ubyte','r');
  10. [a,count] = fread(f,4,'int32');
  11. %该文件前两个32位的数字是数据信息 magic number, number of image
  12. g = fopen('t10k-labels-idx1-ubyte','r');
  13. [l,count] = fread(g,2,'int32');
  14.  
  15. fprintf(1,'Starting to convert Test MNIST images (prints 10 dots) \n');
  16. n = 1000;
  17. %Df中存的是.ascii文件代号
  18. Df = cell(1,10);
  19. for d=0:9,
  20. Df{d+1} = fopen(['test' num2str(d) '.ascii'],'w');
  21. end;
  22. %一次从测试集(1w)中读入1000个图片和标签 rawlabel 1000*1 rawimages 784*1000
  23. for i=1:10,
  24. fprintf('.');
  25. rawimages = fread(f,28*28*n,'uchar');
  26. rawlabels = fread(g,n,'uchar');
  27. rawimages = reshape(rawimages,28*28,n);
  28. %在对应文档中输入图片的01值(3个整数位)换行
  29. for j=1:n,
  30. fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j));
  31. fprintf(Df{rawlabels(j)+1},'\n');
  32. end;
  33. end;
  34.  
  35. fprintf(1,'\n');
  36. for d=0:9,
  37. fclose(Df{d+1});
  38. D = load(['test' num2str(d) '.ascii'],'-ascii');%读取.ascii 中的数据D=内包含样本数*784
  39. fprintf('%5d Digits of class %d\n',size(D,1),d);
  40. save(['test' num2str(d) '.mat'],'D','-mat');%转化为.mat文件
  41. end;
  42.  
  43.  
  44. % 然后转换训练数据集的格式 Work with trainig files second
  45. f = fopen('train-images-idx3-ubyte','r');
  46. [a,count] = fread(f,4,'int32');
  47.  
  48. g = fopen('train-labels-idx1-ubyte','r');
  49. [l,count] = fread(g,2,'int32');
  50.  
  51. fprintf(1,'Starting to convert Training MNIST images (prints 60 dots)\n');
  52. n = 1000;
  53.  
  54. Df = cell(1,10);
  55. for d=0:9,
  56. Df{d+1} = fopen(['digit' num2str(d) '.ascii'],'w');
  57. end;
  58.  
  59. for i=1:60,
  60. fprintf('.');
  61. rawimages = fread(f,28*28*n,'uchar');
  62. rawlabels = fread(g,n,'uchar');
  63. rawimages = reshape(rawimages,28*28,n);
  64.  
  65. for j=1:n,
  66. fprintf(Df{rawlabels(j)+1},'%3d ',rawimages(:,j));
  67. fprintf(Df{rawlabels(j)+1},'\n');
  68. end;
  69. end;
  70.  
  71. fprintf(1,'\n');
  72. for d=0:9,
  73. fclose(Df{d+1});
  74. D = load(['digit' num2str(d) '.ascii'],'-ascii');
  75. fprintf('%5d Digits of class %d\n',size(D,1),d);
  76. save(['digit' num2str(d) '.mat'],'D','-mat');
  77. end;
  78.  
  79. dos('rm *.ascii');%删除中间文件.ascii

3. makebatches.m

  1. %把数据集及其标签进行打包或分批,方便以后分批进行处理,因为数据太大了,这样可加快学习速率
  2. %实现的是将原本的2维数据集变成3维的,因为分了多个批次,另外1维表示的是批次。
  3. % 作用:把数据集及其标签进行分批,方便以后分批进行处理,因为数据太大了,分批处理可加快学习速率
  4. % 训练数据集及标签的打包结果:batchdatabatchtargets
  5. % 测试数据集及标签的打包结果:testbatchdatatestbatchtargets
  6. digitdata=[];
  7. targets=[];
  8. %训练集中数字0的样本load 将文件中的所有数据加载D上;digitdata大小样本数*784,target大小样本数*10
  9. load digit0; digitdata = [digitdata; D]; targets = [targets; repmat([1 0 0 0 0 0 0 0 0 0], size(D,1), 1)];
  10. load digit1; digitdata = [digitdata; D]; targets = [targets; repmat([0 1 0 0 0 0 0 0 0 0], size(D,1), 1)];
  11. load digit2; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 1 0 0 0 0 0 0 0], size(D,1), 1)];
  12. load digit3; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 1 0 0 0 0 0 0], size(D,1), 1)];
  13. load digit4; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 1 0 0 0 0 0], size(D,1), 1)];
  14. load digit5; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 1 0 0 0 0], size(D,1), 1)];
  15. load digit6; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 1 0 0 0], size(D,1), 1)];
  16. load digit7; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 1 0 0], size(D,1), 1)];
  17. load digit8; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 1 0], size(D,1), 1)];
  18. load digit9; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 0 1], size(D,1), 1)];
  19. digitdata = digitdata/255;%累加起来并且进行归一化
  20.  
  21. totnum=size(digitdata,1);%样本数60000
  22. fprintf(1, 'Size of the training dataset= %5d \n', totnum);
  23.  
  24. rand('state',0); %so we know the permutation of the training data 打乱顺序 randomorder内有60000个不重复的数字
  25. randomorder=randperm(totnum);
  26.  
  27. numbatches=totnum/100;%批数:600
  28. numdims = size(digitdata,2);%维度 784
  29. batchsize = 100;%每批样本数 100
  30. batchdata = zeros(batchsize, numdims, numbatches);%100*784*600
  31. batchtargets = zeros(batchsize, 10, numbatches);%100*10*600
  32.  
  33. for b=1:numbatches %打乱了进行存储还存在两个数组batchdatabatchtargets
  34. batchdata(:,:,b) = digitdata(randomorder(1+(b-1)*batchsize:b*batchsize), :);
  35. batchtargets(:,:,b) = targets(randomorder(1+(b-1)*batchsize:b*batchsize), :);
  36. end;
  37. clear digitdata targets;
  38.  
  39. digitdata=[];
  40. targets=[];
  41. load test0; digitdata = [digitdata; D]; targets = [targets; repmat([1 0 0 0 0 0 0 0 0 0], size(D,1), 1)];
  42. load test1; digitdata = [digitdata; D]; targets = [targets; repmat([0 1 0 0 0 0 0 0 0 0], size(D,1), 1)];
  43. load test2; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 1 0 0 0 0 0 0 0], size(D,1), 1)];
  44. load test3; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 1 0 0 0 0 0 0], size(D,1), 1)];
  45. load test4; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 1 0 0 0 0 0], size(D,1), 1)];
  46. load test5; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 1 0 0 0 0], size(D,1), 1)];
  47. load test6; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 1 0 0 0], size(D,1), 1)];
  48. load test7; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 1 0 0], size(D,1), 1)];
  49. load test8; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 1 0], size(D,1), 1)];
  50. load test9; digitdata = [digitdata; D]; targets = [targets; repmat([0 0 0 0 0 0 0 0 0 1], size(D,1), 1)];
  51. digitdata = digitdata/255;
  52.  
  53. totnum=size(digitdata,1);
  54. fprintf(1, 'Size of the test dataset= %5d \n', totnum);
  55.  
  56. rand('state',0); %so we know the permutation of the training data
  57. randomorder=randperm(totnum);
  58.  
  59. numbatches=totnum/100;
  60. numdims = size(digitdata,2);
  61. batchsize = 100;
  62. testbatchdata = zeros(batchsize, numdims, numbatches);
  63. testbatchtargets = zeros(batchsize, 10, numbatches);
  64.  
  65. for b=1:numbatches
  66. testbatchdata(:,:,b) = digitdata(randomorder(1+(b-1)*batchsize:b*batchsize), :);
  67. testbatchtargets(:,:,b) = targets(randomorder(1+(b-1)*batchsize:b*batchsize), :);
  68. end;
  69. clear digitdata targets;
  70.  
  71.  
  72. %%% Reset random seeds
  73. rand('state',sum(100*clock));
  74. randn('state',sum(100*clock));

4. rbmhidlinear.m

  1. % maxepoch -- maximum number of epochs
  2. % numhid -- number of hidden units
  3. % batchdata -- the data that is divided into batches (numcases numdims numbatches)
  4. % restart -- set to 1 if learning starts from beginning
  5.  
  6. %可视、二进制、随机像素连接到隐藏的、由单位方差高斯函数绘制的、平均值由逻辑可见单元输入决定的、符号型的实值特征检测器。
  7. % 作用:训练最顶层的一个RBM 250->30
  8. % 输出层神经元的激活函数为1,是线性的,不再是sigmoid函数,所以该函数名字叫:rbmhidlinear.m
  9. epsilonw = 0.001; % Learning rate for weights
  10. epsilonvb = 0.001; % Learning rate for biases of visible units
  11. epsilonhb = 0.001; % Learning rate for biases of hidden units
  12. weightcost = 0.0002;
  13. initialmomentum = 0.5;
  14. finalmomentum = 0.9;
  15.  
  16. [numcases numdims numbatches]=size(batchdata);
  17.  
  18. if restart ==1
  19. restart=0;
  20. epoch=1;
  21.  
  22. % Initializing symmetric weights and biases.
  23. vishid = 0.1*randn(numdims, numhid);
  24. hidbiases = zeros(1,numhid);
  25. visbiases = zeros(1,numdims);
  26.  
  27.  
  28. poshidprobs = zeros(numcases,numhid);
  29. neghidprobs = zeros(numcases,numhid);
  30. posprods = zeros(numdims,numhid);
  31. negprods = zeros(numdims,numhid);
  32. vishidinc = zeros(numdims,numhid);
  33. hidbiasinc = zeros(1,numhid);
  34. visbiasinc = zeros(1,numdims);
  35. sigmainc = zeros(1,numhid);
  36. batchposhidprobs=zeros(numcases,numhid,numbatches);
  37. end
  38.  
  39. for epoch = epoch:maxepoch
  40. fprintf(1,'epoch %d\r',epoch);
  41. errsum=0;
  42.  
  43. for batch = 1:numbatches
  44. fprintf(1,'epoch %d batch %d\r',epoch,batch);
  45.  
  46. %%%%%%%%% START POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  47. data = batchdata(:,:,batch);
  48. poshidprobs = (data*vishid) + repmat(hidbiases,numcases,1);% 样本第一次正向传播时隐含层节点的输出值,即:p(hj=1|v0)=Wji*v0+bj ,因为输出层激活函数为1
  49. batchposhidprobs(:,:,batch)=poshidprobs;%将输出存入一个三位数组
  50. posprods = data' * poshidprobs;%p(h|v0)*v0 更新权重时会使用到 计算正向梯度vh'
  51. poshidact = sum(poshidprobs);%隐藏层中神经元概率和,在更新隐藏层偏置时会使用到
  52. posvisact = sum(data);%可视层中神经元概率和,在更新可视层偏置时会使用到
  53. %%%%%%%%% END OF POSITIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  54. %%gibbs采样 输出实数
  55. poshidstates = poshidprobs+randn(numcases,numhid);% h0:非概率密度,而是01后的实值
  56.  
  57. %%%%%%%%% START NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  58. negdata = 1./(1 + exp(-poshidstates*vishid' - repmat(visbiases,numcases,1)));
  59. neghidprobs = (negdata*vishid) + repmat(hidbiases,numcases,1);%p(hj=1|v1)=Wji*v1+bj, neghidprobs表示样本第二次正向传播时隐含层节点的输出值,即:p(hj=1|v1)
  60. negprods = negdata'*neghidprobs;
  61. neghidact = sum(neghidprobs);
  62. negvisact = sum(negdata);
  63.  
  64. %%%%%%%%% END OF NEGATIVE PHASE %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  65.  
  66.  
  67. err= sum(sum( (data-negdata).^2 ));
  68. errsum = err + errsum;
  69. if epoch>5
  70. momentum=finalmomentum;
  71. else
  72. momentum=initialmomentum;
  73. end
  74.  
  75. %%%%%%%%% UPDATE WEIGHTS AND BIASES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  76. vishidinc = momentum*vishidinc + ...
  77. epsilonw*( (posprods-negprods)/numcases - weightcost*vishid);
  78. visbiasinc = momentum*visbiasinc + (epsilonvb/numcases)*(posvisact-negvisact);
  79. hidbiasinc = momentum*hidbiasinc + (epsilonhb/numcases)*(poshidact-neghidact);
  80. vishid = vishid + vishidinc;
  81. visbiases = visbiases + visbiasinc;
  82. hidbiases = hidbiases + hidbiasinc;
  83.  
  84. %%%%%%%%%%%%%%%% END OF UPDATES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  85.  
  86. end
  87. fprintf(1, 'epoch %4i error %f \n', epoch, errsum);
  88.  
  89. end

5. backprop.m

  1. %四个RBM连接起来进行,使用BP训练数据进行参数的微调整
  2. maxepoch=200;
  3. fprintf(1,'\nFine-tuning deep autoencoder by minimizing cross entropy error. \n');
  4. fprintf(1,'60 batches of 1000 cases each. \n');
  5. %加载参数:权值与偏置
  6. load mnistvh %第1rbm的权值、隐含层偏置项、可视化层偏置项1000 v->h(1000)
  7. load mnisthp %第二个 1000->500
  8. load mnisthp2 %第三个 500->250
  9. load mnistpo %第四个 250->30
  10. %数据分批
  11. makebatches;
  12. [numcases numdims numbatches]=size(batchdata);
  13. N=numcases; %样本数个数
  14.  
  15. %%%% PREINITIALIZE WEIGHTS OF THE AUTOENCODER 预初始化自动编码器的权重%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  16. w1=[vishid; hidrecbiases]; %v->h(1000)权值和偏置(1000) (784+1)*1000
  17. w2=[hidpen; penrecbiases]; %1000->500权值和偏置(500) 1001*500
  18. w3=[hidpen2; penrecbiases2]; %500->250权值和偏置(250) 501*250
  19. w4=[hidtop; toprecbiases]; %250->30权值与偏置(30) 251*30
  20. w5=[hidtop'; topgenbiases]; %30->250权值与偏置(30) 31*250
  21. w6=[hidpen2'; hidgenbiases2]; %250->500权值与偏置(250) 251*500
  22. w7=[hidpen'; hidgenbiases]; %500->1000权值与偏置(500) 501*1000
  23. w8=[vishid'; visbiases]; %1000->可见层权值与偏置(1000) 1001*784
  24.  
  25. %%%%%%%%%% END OF PREINITIALIZATIO OF WEIGHTS 权重预初始化结束%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  26.  
  27. l1=size(w1,1)-1; %每层节点个数 784
  28. l2=size(w2,1)-1; %1000
  29. l3=size(w3,1)-1; %500
  30. l4=size(w4,1)-1; %250
  31. l5=size(w5,1)-1; %30
  32. l6=size(w6,1)-1; %250
  33. l7=size(w7,1)-1; %500
  34. l8=size(w8,1)-1; %1000
  35. l9=l1; %输入层与输出层节点个数相同 784
  36. test_err=[];
  37. train_err=[];
  38.  
  39.  
  40. for epoch = 1:maxepoch %重复迭代maxepoch
  41.  
  42. %%%%%%%%%%%%%%%%%%%% COMPUTE TRAINING RECONSTRUCTION ERROR 计算训练重构误差%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  43. err=0;
  44. [numcases numdims numbatches]=size(batchdata);%每批样本数、维度、批数
  45. N=numcases;
  46. for batch = 1:numbatches %按匹计算重构误差,最后求平均
  47. data = [batchdata(:,:,batch)]; %100*784
  48. data = [data ones(N,1)]; %每个样本再加一个维度1 是因为w1里既包含权值又包含偏置 100*785
  49. w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)]; %(100*(784+1))*(785*1000)=100*1000; w1probs:100*1001;%正向传播,计算每一层的输出概率密度p(h|v),且同时在输出上增加一维(值为常量1
  50. w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)]; %(100*1001)*(1001*500)=100*500; w2probs:100*501;
  51. w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)]; %(100*501)*(501*250)=100*250; w3probs:100*251;
  52. w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)]; %(100*251)*(251*30)=100*30; w4probs:100*31;% 5层神经元激活函数为1,而不是logistic函数
  53. w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)]; %(100*31)*(31*250)=100*250; w5probs:100*251;
  54. w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)]; %(100*251)*(251*500)=100*500; w6probs:100*501;
  55. w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)]; %(100*501)*(501*1000)=100*1000; w7probs:100*1001;
  56. dataout = 1./(1 + exp(-w7probs*w8)); %(100*1001)*(1001*784)=100*784;% 输出层的输出概率密度,即:重构数据的概率密度,也即:重构数据
  57. err= err + 1/N*sum(sum( (data(:,1:end-1)-dataout).^2 )); %剔除掉最后一维 err=∑(∑(||H-X||^2))/N;% 每个batch内的均方误差
  58. end
  59. train_err(epoch)=err/numbatches; %第epoch轮平均训练误差% 迭代第epoch次的所有样本内的均方误差
  60.  
  61. %%%%%%%%%%%%%% END OF COMPUTING TRAINING RECONSTRUCTION ERROR 训练重构误差计算结束%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  62.  
  63. %%%% DISPLAY FIGURE TOP ROW REAL DATA BOTTOM ROW RECONSTRUCTIONS 显示真实的和重构后的数据 %%%%%%%%%%%%%%%%%%%%%%%%%
  64. fprintf(1,'Displaying in figure 1: Top row - real data, Bottom row -- reconstructions \n'); %上面一行是真实数据,下面一行是重构数据
  65. output=[];
  66. for ii=1:15 %每次显示15组图片
  67. output = [output data(ii,1:end-1)' dataout(ii,:)']; %两列真实数据和重构后的数据%output15(因为是显示15个数字)组,每组2列,分别为理论值和重构值
  68. end
  69. if epoch==1
  70. close all
  71. figure('Position',[100,600,1000,200]);
  72. else
  73. figure(1)
  74. end
  75. mnistdisp(output); %画图 展示一组图
  76. drawnow;
  77.  
  78. %%%%%%%%%%%%%%%%%%%% COMPUTE TEST RECONSTRUCTION ERROR 计算测试重构误差%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  79. [testnumcases testnumdims testnumbatches]=size(testbatchdata);%批数% [100 784 100] 测试数据为100batch,每个batch100个测试样本,每个样本维数为784
  80. N=testnumcases;
  81. err=0;
  82. for batch = 1:testnumbatches
  83. data = [testbatchdata(:,:,batch)];
  84. data = [data ones(N,1)];
  85. w1probs = 1./(1 + exp(-data*w1)); w1probs = [w1probs ones(N,1)];
  86. w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
  87. w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
  88. w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)]; %没有把4RBM展开前输出层神经元(即:第4rbm的隐含层神经元)的激活函数是f(x)=x,而不是原来的logistic函数。所以把4RBM展开并连接起来变为9层神经网络后,它的第5层神经元的激活函数仍然是f(x)=x
  89. w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)];
  90. w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)];
  91. w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)];
  92. dataout = 1./(1 + exp(-w7probs*w8)); %输出层的输出概率密度=重构数据的概率密度=重构数据
  93. err = err + 1/N*sum(sum( (data(:,1:end-1)-dataout).^2 ));
  94. end
  95. test_err(epoch)=err/testnumbatches;
  96. fprintf(1,'Before epoch %d Train squared error: %6.3f Test squared error: %6.3f \t \t \n',epoch,train_err(epoch),test_err(epoch));
  97.  
  98. %%%%%%%%%%%%%% END OF COMPUTING TEST RECONSTRUCTION ERROR 测试重构误差计算结束%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  99.  
  100. %%组合数据的batches大小由原来的100*600mini-batches变为1000*60larger-batches
  101. tt=0;
  102. for batch = 1:numbatches/10% 训练样本:批数numbatches600,每个batch100个样本,组合后变为批数60,每个batch1000个样本
  103. fprintf(1,'epoch %d batch %d\r',epoch,batch);
  104.  
  105. %%%%%%%%%%% COMBINE 10 MINIBATCHES INTO 1 LARGER MINIBATCH 10个小批合并为1个较大的小批%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  106. tt=tt+1;
  107. data=[];
  108. for kk=1:10
  109. data=[data
  110. batchdata(:,:,(tt-1)*10+kk)]; %将10100行数据连成一行%使训练数据变为60batch,每个batch内含1000个样本
  111. end
  112.  
  113. %%%%%%%%%%%%%%% PERFORM CONJUGATE GRADIENT WITH 3 LINESEARCHES 共轭梯度%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  114. max_iter=3; %3次线性搜索
  115. % VV将权值偏置矩阵展成一个长长的列向量
  116. VV = [w1(:)' w2(:)' w3(:)' w4(:)' w5(:)' w6(:)' w7(:)' w8(:)']'; %将所有的权值和偏置合并为1列% 把所有权值(已经包括了偏置值)变成一个大的列向量
  117. Dim = [l1; l2; l3; l4; l5; l6; l7; l8; l9]; %所有结点 每层节点个数% 每层网络对应节点的个数(不包括偏置值)
  118.  
  119. [X, fX] = minimize(VV,'CG_MNIST',max_iter,Dim,data);%实现共轭梯度% X为3次线性搜索最优化后得到的权值参数,是一个列向量
  120. %VV是权值偏置 CG_MNIST输出的是代价函数和偏导 结点 数据
  121. % 将VV列向量重新还原成矩阵
  122. w1 = reshape(X(1:(l1+1)*l2),l1+1,l2); %(l1+1)*l2 (784+1)*1000
  123. xxx = (l1+1)*l2;
  124. w2 = reshape(X(xxx+1:xxx+(l2+1)*l3),l2+1,l3);
  125. xxx = xxx+(l2+1)*l3;
  126. w3 = reshape(X(xxx+1:xxx+(l3+1)*l4),l3+1,l4);
  127. xxx = xxx+(l3+1)*l4;
  128. w4 = reshape(X(xxx+1:xxx+(l4+1)*l5),l4+1,l5);
  129. xxx = xxx+(l4+1)*l5;
  130. w5 = reshape(X(xxx+1:xxx+(l5+1)*l6),l5+1,l6);
  131. xxx = xxx+(l5+1)*l6;
  132. w6 = reshape(X(xxx+1:xxx+(l6+1)*l7),l6+1,l7);
  133. xxx = xxx+(l6+1)*l7;
  134. w7 = reshape(X(xxx+1:xxx+(l7+1)*l8),l7+1,l8);
  135. xxx = xxx+(l7+1)*l8;
  136. w8 = reshape(X(xxx+1:xxx+(l8+1)*l9),l8+1,l9);%依次重新赋值为优化后的参数
  137.  
  138. %%%%%%%%%%%%%%% END OF CONJUGATE GRADIENT WITH 3 LINESEARCHES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  139.  
  140. end
  141.  
  142. save mnist_weights w1 w2 w3 w4 w5 w6 w7 w8
  143. save mnist_error test_err train_err;
  144.  
  145. end

6. CG_MNIST.m

  1. %该函数实现的功能是计算网络代价函数值f,以及f对网络中各个参数值的偏导数df,权值和偏置值是同时处理。
  2. %其中参数VV为网络中所有参数构成的列向量,参数Dim为每层网络的节点数构成的向量,XX为训练样本集合。fdf分别表示网络的代价函数和偏导函数值。
  3. %得代价函数和对权值的偏导数
  4. function [f, df] = CG_MNIST(VV,Dim,XX) %权值,结点,输入数据
  5. % f :代价函数,即交叉熵误差 -1/N*∑∑(H*log(H)+(1-H)*log(1-H))
  6. % df :代价函数对各权值的偏导数
  7. % VV:权值(已经包括了偏置值),为一个大的列向量 用预训练初始的权值与偏置
  8. % Dim:每层网络对应节点的个数
  9. % XX:训练样本
  10. % f :代价函数,即交叉熵误差
  11. % df :代价函数对各权值的偏导数
  12. l1 = Dim(1);%各层节点个数(不包括偏置值) 784
  13. l2 = Dim(2); %1000
  14. l3 = Dim(3); %500
  15. l4= Dim(4); %250
  16. l5= Dim(5); %30
  17. l6= Dim(6); %250
  18. l7= Dim(7); %500
  19. l8= Dim(8); %1000
  20. l9= Dim(9); %784
  21. N = size(XX,1);% 样本的个数
  22.  
  23.  
  24. % Do decomversion. 权值矩阵化
  25. w1 = reshape(VV(1:(l1+1)*l2),l1+1,l2); %依次取出每层的权值和偏置% VV是一个长的列向量,它包括偏置值和权值,这里取出的向量已经包括了偏置值 785*1000
  26. xxx = (l1+1)*l2;%xxx 表示已经使用了的长度
  27. w2 = reshape(VV(xxx+1:xxx+(l2+1)*l3),l2+1,l3); %1001*500
  28. xxx = xxx+(l2+1)*l3;
  29. w3 = reshape(VV(xxx+1:xxx+(l3+1)*l4),l3+1,l4); %501*250
  30. xxx = xxx+(l3+1)*l4;
  31. w4 = reshape(VV(xxx+1:xxx+(l4+1)*l5),l4+1,l5); %251*30
  32. xxx = xxx+(l4+1)*l5;
  33. w5 = reshape(VV(xxx+1:xxx+(l5+1)*l6),l5+1,l6); %31*250
  34. xxx = xxx+(l5+1)*l6;
  35. w6 = reshape(VV(xxx+1:xxx+(l6+1)*l7),l6+1,l7); %251*500
  36. xxx = xxx+(l6+1)*l7;
  37. w7 = reshape(VV(xxx+1:xxx+(l7+1)*l8),l7+1,l8); %501*1000
  38. xxx = xxx+(l7+1)*l8;
  39. w8 = reshape(VV(xxx+1:xxx+(l8+1)*l9),l8+1,l9); %1001*784
  40.  
  41.  
  42. XX = [XX ones(N,1)];% 训练样本,加1维使其下可乘w1
  43. w1probs = 1./(1 + exp(-XX*w1)); w1probs = [w1probs ones(N,1)];
  44. w2probs = 1./(1 + exp(-w1probs*w2)); w2probs = [w2probs ones(N,1)];
  45. w3probs = 1./(1 + exp(-w2probs*w3)); w3probs = [w3probs ones(N,1)];
  46. w4probs = w3probs*w4; w4probs = [w4probs ones(N,1)];% 5层神经元激活函数为1,而不是logistic函数
  47. w5probs = 1./(1 + exp(-w4probs*w5)); w5probs = [w5probs ones(N,1)];
  48. w6probs = 1./(1 + exp(-w5probs*w6)); w6probs = [w6probs ones(N,1)];
  49. w7probs = 1./(1 + exp(-w6probs*w7)); w7probs = [w7probs ones(N,1)];
  50. XXout = 1./(1 + exp(-w7probs*w8)); %输出的概率密度% 输出层的概率密度,也就是重构数据
  51.  
  52. %看邱锡鹏: 神经网络与深度学习 P100
  53. %计算每一层参数的导数
  54. f = -1/N*sum(sum( XX(:,1:end-1).*log(XXout) + (1-XX(:,1:end-1)).*log(1-XXout))); %代价函数交叉熵 -1/N*∑∑(H*log(H)+(1-H)*log(1-H))
  55. IO = 1/N*(XXout-XX(:,1:end-1)); %误差项
  56. Ix8=IO;% 相当于输出层“残差”
  57. dw8 = w7probs'*Ix8; %向后推导输出层偏导 W8的偏导=激活值(f(aW+b))'*残差项
  58.  
  59. Ix7 = (Ix8*w8').*w7probs.*(1-w7probs); %第七层残差
  60. Ix7 = Ix7(:,1:end-1); %误差项
  61. dw7 = w6probs'*Ix7; %第七层偏导=激活值(f(aW+b))'*残差项
  62.  
  63. Ix6 = (Ix7*w7').*w6probs.*(1-w6probs);
  64. Ix6 = Ix6(:,1:end-1); %误差项
  65. dw6 = w5probs'*Ix6;
  66.  
  67. Ix5 = (Ix6*w6').*w5probs.*(1-w5probs);
  68. Ix5 = Ix5(:,1:end-1);
  69. dw5 = w4probs'*Ix5;
  70.  
  71. Ix4 = (Ix5*w5');
  72. Ix4 = Ix4(:,1:end-1);
  73. dw4 = w3probs'*Ix4;
  74.  
  75. Ix3 = (Ix4*w4').*w3probs.*(1-w3probs);
  76. Ix3 = Ix3(:,1:end-1);
  77. dw3 = w2probs'*Ix3;
  78.  
  79. Ix2 = (Ix3*w3').*w2probs.*(1-w2probs);
  80. Ix2 = Ix2(:,1:end-1);
  81. dw2 = w1probs'*Ix2;
  82.  
  83. Ix1 = (Ix2*w2').*w1probs.*(1-w1probs);
  84. Ix1 = Ix1(:,1:end-1);
  85. dw1 = XX'*Ix1;
  86.  
  87. df = [dw1(:)' dw2(:)' dw3(:)' dw4(:)' dw5(:)' dw6(:)' dw7(:)' dw8(:)' ]'; %网络代价函数的偏导数

7. rbm.m 和 minimize.m

    rbm.m程序在受限玻尔兹曼机(Restricted Boltzmann Machine)中详细阐述了,minimize.m程序在minimize.m:共轭梯度法更新BP算法权值中详细阐述了。

8. 实验结果

9. 参考文献

[1]  Hinton G E, Salakhutdinov R R. Reducing the dimensionality of data with neural networks[J]. science, 2006, 313(5786): 504-507.

[2] Hinton, Training a deep autoencoder or a classifier on MNIST digits.

[3] Hinton, Supporting Online Material.

原文链接:http://www.cnblogs.com/kailugaji/p/11599870.html

 友情链接:直通硅谷  点职佳  北美留学生论坛

本站QQ群:前端 618073944 | Java 606181507 | Python 626812652 | C/C++ 612253063 | 微信 634508462 | 苹果 692586424 | C#/.net 182808419 | PHP 305140648 | 运维 608723728

W3xue 的所有内容仅供测试,对任何法律问题及风险不承担任何责任。通过使用本站内容随之而来的风险与本站无关。
关于我们  |  意见建议  |  捐助我们  |  报错有奖  |  广告合作、友情链接(目前9元/月)请联系QQ:27243702 沸活量
皖ICP备17017327号-2 皖公网安备34020702000426号