mirror of
https://github.com/JHUAPL/CCFO.git
synced 2026-01-09 23:07:54 -05:00
Initial commit pre-publication
This commit is contained in:
243
MNIST/experiment_mnist.m
Normal file
243
MNIST/experiment_mnist.m
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
function experiment_mnist(varargin)
|
||||||
|
|
||||||
|
% experiment_mnist.m
|
||||||
|
%
|
||||||
|
% This MATLAB script runs the feature selection experiment for the MNIST
|
||||||
|
% data set that was published in the following manuscript:
|
||||||
|
% C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
|
||||||
|
% Optimization in Kernel Machine Classifiers," IEEE Signal Processing
|
||||||
|
% Letters, 2015.
|
||||||
|
%
|
||||||
|
% The script requires installation of the Pattern Recognition Toolbox
|
||||||
|
% (PRT) for MATLAB:
|
||||||
|
% http://covartech.github.io/
|
||||||
|
%
|
||||||
|
% Author: Christopher R. Ratto, JHU/APL
|
||||||
|
% Date: 5 October 2015
|
||||||
|
%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||||
|
% All Rights Reserved
|
||||||
|
%
|
||||||
|
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
|
||||||
|
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
|
||||||
|
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
|
||||||
|
% provided you agree to and comply with the terms and conditions set forth in the license.
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Load in the data set and feature extraction times
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
dsRaw = prtDataGenMnist; % The MNIST data set comes with the PRT for demonstration purposes
|
||||||
|
[dsFeat,featTime] = extractFeaturesFromMNIST(dsRaw); % Extract various types of features from the MNIST data
|
||||||
|
tNorm = mean(featTime); % Average relative feature extraction time
|
||||||
|
tNorm = tNorm./sum(tNorm); % Normalize the average relative times
|
||||||
|
featCategories = {'Stats','PCA','GLCM','Sobel'}; % Get Names of Feature Categories
|
||||||
|
nFeatCategories = length(featCategories); % Number of feature categories
|
||||||
|
tNormCategory = [sum(tNorm(1:4)),sum(tNorm(5:14)),sum(tNorm(15:26)),sum(tNorm(27:end))]; % Relative time per category
|
||||||
|
categoryInds = {1:4,5:14,15:26,27:42}; % Indices of features in each category
|
||||||
|
categoryIndBegin = [1,5,15,27]; % Indices where each feature category begins
|
||||||
|
clear categoryTimes iCategory i sortVec sortInds
|
||||||
|
|
||||||
|
% Setup training and testing sets
|
||||||
|
dsTrain = dsFeat.retainObservations(1:1000); % Train on 10% of the data
|
||||||
|
dsTest = dsFeat.retainObservations(1001:dsRaw.nObservations); % Test on 90% of the data
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Estimate CCFO hyperparameters
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
a0 = linspace(0,2,500);
|
||||||
|
b0 = linspace(0,300,500);
|
||||||
|
tau = nan(500);
|
||||||
|
F = nan(500);
|
||||||
|
for i = 1:500
|
||||||
|
for j = 1:500
|
||||||
|
a = a0(i)*ones(size(tNorm));
|
||||||
|
b = a0(i) + b0(j)*tNorm;
|
||||||
|
tau(i,j) = sum(tNorm .* a./(a+b));
|
||||||
|
F(i,j) = (1/dsFeat.nFeatures)*sum((a+1)./(a+b+1));
|
||||||
|
end
|
||||||
|
end
|
||||||
|
desiredT = 0.1; % Expected runtime
|
||||||
|
desiredF = 0.50; % Maximum posterior probability of a feature being selected
|
||||||
|
dist = (tau-desiredT).^2 + (F-desiredF).^2;
|
||||||
|
[iMin,jMin] = find(dist == min(dist(:)));
|
||||||
|
a0 = a0(iMin);
|
||||||
|
b0 = b0(jMin);
|
||||||
|
a = a0*ones(size(tNorm));
|
||||||
|
b = a0 + b0*tNorm;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Initialize the classifiers
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% All classifiers will use the same kernel
|
||||||
|
kernel = prtKernelRbfNdimensionScale; % RBF kernel, scale the sigma parameter to dimensionality of the features
|
||||||
|
kernelSet = prtKernelDc & kernel; % Add a bias dimension to the kernel (dc kernel)
|
||||||
|
|
||||||
|
% CCFO - Cost Constrained Feature Optimization
|
||||||
|
CCFO = prtClassCCFO('kernels',kernelSet,'pruneFeatures',false,'pruneObservations',false,'verbosePlot',false,'verboseText',true,'a',a','b',b','gamma',1,'ridge',10);
|
||||||
|
algoCCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',CCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
|
||||||
|
|
||||||
|
% RVM - Relevance Vector Machine
|
||||||
|
RVM = prtClassRvm('kernels',kernelSet);
|
||||||
|
algoRVM = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',RVM); % Normalize features, One-vs-All classification since this is a multiclass problem
|
||||||
|
|
||||||
|
% JCFO - Joint Classifier and Feature Optimization
|
||||||
|
JCFO = prtClassJCFO('kernels',kernelSet,'ridge',10,'pruneFeatures',false,'pruneObservations',false,'verboseText',1,'verbosePlot',0,'gamma1',1,'gamma2',1);
|
||||||
|
algoJCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseclassifier',JCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Train and test CCFO
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
trainedCCFO = algoCCFO.train(dsTrain); % Train CCFO on the training set
|
||||||
|
dsOutCCFO = trainedCCFO.run(dsTest); % Run CCFO on the test set
|
||||||
|
[~,dsOutCCFO.X] = max(dsOutCCFO.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
|
||||||
|
dsOutCCFO.X = dsOutCCFO.X-1;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Train and test JCFO
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
trainedJCFO = algoJCFO.train(dsTrain); % Train the one-vs-all JCFO
|
||||||
|
dsOutJCFO = trainedJCFO.run(dsTest); % Run the one-vs-all JCFO
|
||||||
|
[~,dsOutJCFO.X] = max(dsOutJCFO.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
|
||||||
|
dsOutJCFO.X = dsOutJCFO.X-1;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Train and test RVM (individual feature categories)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
pcCategory = nan(1,nFeatCategories); % Percent correct using each feature category
|
||||||
|
for iCategory = 1:nFeatCategories % Loop over all feature categories
|
||||||
|
dsCategoryTrain = dsTrain.retainFeatures(categoryInds{iCategory}); % Retain only features from this category for the training set
|
||||||
|
dsCategoryTest = dsTest.retainFeatures(categoryInds{iCategory}); % Retain only features from this category for the testing set
|
||||||
|
trainedRVM = algoRVM.train(dsCategoryTrain); % Train one-vs-all RVM
|
||||||
|
dsOutCategory = trainedRVM.run(dsCategoryTest); % Run one-vs-all RVM
|
||||||
|
[~,dsOutCategory.X] = max(dsOutCategory.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
|
||||||
|
dsOutCategory.X = dsOutCategory.X-1;
|
||||||
|
pcCategory(iCategory) = prtScorePercentCorrect(dsOutCategory); % Calculate percent correct (accuracy overall)
|
||||||
|
end
|
||||||
|
clear iCategory dsCategoryTrain dsCategoryTest dsOutCategory
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Train and test RVM (all features)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
trainedRVM = algoRVM.train(dsTrain); % Train the one-vs-all RVM
|
||||||
|
dsOutRVM = trainedRVM.run(dsTest); % Run the one-vs-all RVM
|
||||||
|
[~,dsOutRVM.X] = max(dsOutRVM.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
|
||||||
|
dsOutRVM.X = dsOutRVM.X - 1;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Plot results for publication
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Plot the prior on selecting features from each of the feature categories
|
||||||
|
% This will be a beta distribution over [0,1]. Features that take longer to
|
||||||
|
% compute should have higher probability of not being selected.
|
||||||
|
figure(1),hold on
|
||||||
|
colors = prtPlotUtilClassColors(length(featCategories));
|
||||||
|
for iFeat = 1:length(featCategories)
|
||||||
|
featInd = categoryInds{iFeat}(1);
|
||||||
|
plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
|
||||||
|
xlabel('\rho'),ylabel('p(\rho|a,b)')
|
||||||
|
end
|
||||||
|
title('Priors on MNIST Feature Selection')
|
||||||
|
legend(featCategories,'location','southeastoutside')
|
||||||
|
clear iFeat featInd colors
|
||||||
|
|
||||||
|
% Baseline - for each category, show feature computation and RVM performance
|
||||||
|
% Take-home point: most expensive features not always the best for
|
||||||
|
% classification performance
|
||||||
|
figure(2)
|
||||||
|
[ha,h1,h2] = plotyy(1:nFeatCategories,100*pcCategory,1:nFeatCategories,tNormCategory);
|
||||||
|
h1.LineStyle = '-';h1.LineWidth = 2;h1.Marker = 'o';h1.MarkerSize = 8;
|
||||||
|
h2.LineStyle = '--';h1.LineWidth = 2;h1.Marker = '^';h1.MarkerSize = 8;
|
||||||
|
ha(1).XTick = 1:length(pcCategory); ha(1).XTickLabel = featCategories; ha(1).YLim = [0,100]; ha(1).YTick = 0:10:100; ha(1).XLim = [1,nFeatCategories]; ha(1).XTickLabelRotation = 30;
|
||||||
|
ha(2).XTick = 1:length(pcCategory); ha(2).XTickLabel = []; ha(2).YLim = [0,1]; ha(2).YTick = 0:0.1:1; ha(2).XLim = [1,nFeatCategories];
|
||||||
|
ylabel(ha(1),'Accuracy (% Correct)')
|
||||||
|
ylabel(ha(2),'Total Normalized Cost')
|
||||||
|
title('RVM Accuracy and Total Cost of Each Feature Category','FontSize',12)
|
||||||
|
clear ha h1 h2
|
||||||
|
|
||||||
|
% Plot the confusion matrices using all the features
|
||||||
|
% CCFO
|
||||||
|
figure(3),set(gcf,'outerposition',[65,301,1780,579])
|
||||||
|
h = subplot(1,3,1);
|
||||||
|
prtScoreConfusionMatrix(dsOutCCFO)
|
||||||
|
pcCCFO = prtScorePercentCorrect(dsOutCCFO);
|
||||||
|
h.XTickLabelRotation = 20;
|
||||||
|
title(['CCFO - ',num2str(100*pcCCFO,'%0.2f'),'% Correct'],'Fontsize',12)
|
||||||
|
axis square
|
||||||
|
% RVM
|
||||||
|
h = subplot(1,3,2);
|
||||||
|
prtScoreConfusionMatrix(dsOutRVM)
|
||||||
|
pcRVM = prtScorePercentCorrect(dsOutRVM);
|
||||||
|
title(['RVM - ',num2str(100*pcRVM,'%0.2f'),'% Correct'],'fontsize',12)
|
||||||
|
h.XTickLabelRotation = 20;
|
||||||
|
axis square
|
||||||
|
% JCFO
|
||||||
|
h = subplot(1,3,3);
|
||||||
|
prtScoreConfusionMatrix(dsOutJCFO)
|
||||||
|
pcJCFO = prtScorePercentCorrect(dsOutJCFO);
|
||||||
|
title(['JCFO - ',num2str(100*pcJCFO,'%0.2f'),'% Correct'],'fontsize',12)
|
||||||
|
h.XTickLabelRotation = 20;
|
||||||
|
axis square
|
||||||
|
clear h
|
||||||
|
|
||||||
|
% Compare feature selection performance
|
||||||
|
thetaCCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
|
||||||
|
thetaJCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
|
||||||
|
for iClass = 1:dsTrain.nClasses % Loop over all one-vs-all classifiers
|
||||||
|
thetaCCFO(iClass,:) = trainedCCFO.actionCell{2}.baseClassifier(iClass).theta'; % CCFO feature selector parameters for this one-vs-all classifier
|
||||||
|
thetaJCFO(iClass,:) = trainedJCFO.actionCell{2}.baseClassifier(iClass).theta'; % JCFO feature selector parameters for this one-vs-all classifier
|
||||||
|
end
|
||||||
|
costReductionCCFO = nan(dsTrain.nClasses,1);
|
||||||
|
costReductionJCFO = nan(dsTrain.nClasses,1);
|
||||||
|
for iClass = 1:dsTrain.nClasses
|
||||||
|
costReductionCCFO(iClass,:) = sum(tNorm(thetaCCFO(iClass,:)>=0.5));
|
||||||
|
costReductionJCFO(iClass,:) = sum(tNorm(thetaJCFO(iClass,:)>=2*median(thetaJCFO(:))));
|
||||||
|
end
|
||||||
|
costReductionCCFO = mean(costReductionCCFO);
|
||||||
|
costReductionJCFO = mean(costReductionJCFO);
|
||||||
|
|
||||||
|
figure(4),set(gcf,'position',[610,512,700,441])
|
||||||
|
h = subplot(2,1,1);
|
||||||
|
imagesc(thetaCCFO),colormap bone
|
||||||
|
ylabel('Class'),xlabel('Features')
|
||||||
|
h.YTick = 1:10; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories;
|
||||||
|
caxis([0,1]); h = colorbar; ylabel(h,'\theta')
|
||||||
|
title('CCFO: Learned \theta (MNIST)')
|
||||||
|
h = subplot(2,1,2);
|
||||||
|
imagesc(thetaJCFO),colormap bone
|
||||||
|
ylabel('Class'),xlabel('Features')
|
||||||
|
h.YTick = 1:10; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories;
|
||||||
|
caxis([0,2*median(thetaJCFO(:))]); h=colorbar; ylabel(h,'\theta')
|
||||||
|
title('JCFO: Learned \theta (MNIST)')
|
||||||
|
clear iClass h m
|
||||||
|
|
||||||
|
% Calculate average # features selected
|
||||||
|
avgNumFeatsSelectedRVM = dsTrain.nFeatures;
|
||||||
|
avgNumFeatsSelectedJCFO = mean(sum(thetaJCFO > 2*median(thetaJCFO(:)),2));
|
||||||
|
avgNumFeatsSelectedCCFO = mean(sum(thetaCCFO > 0.5,2));
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Print out summary of results
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
fprintf('*************************************\n')
|
||||||
|
fprintf('Urban Land Cover Feature Set Summary\n')
|
||||||
|
fprintf('*************************************\n')
|
||||||
|
for iCategory = 1:nFeatCategories
|
||||||
|
fprintf('%s \t %d \t %0.4f \t %0.2f\n',featCategories{iCategory},length(categoryInds{iCategory}),tNormCategory(iCategory),100*pcCategory(iCategory));
|
||||||
|
end
|
||||||
|
fprintf('*************************************\n')
|
||||||
|
fprintf('Urban Land Cover Performance Comparison\n')
|
||||||
|
fprintf('*************************************\n')
|
||||||
|
fprintf('Accuracy (RVM): %0.2f\n',100*pcRVM)
|
||||||
|
fprintf('Accuracy (JCFO): %0.2f\n',100*pcJCFO)
|
||||||
|
fprintf('Accuracy (CCFO): %0.2f\n',100*pcCCFO)
|
||||||
|
fprintf('Avg. # Features Selected (RVM): %0.2f\n',avgNumFeatsSelectedRVM)
|
||||||
|
fprintf('Avg. # Features Selected (JCFO): %0.2f\n',avgNumFeatsSelectedJCFO)
|
||||||
|
fprintf('Avg. # Features Selected (CCFO): %0.2f\n',avgNumFeatsSelectedCCFO)
|
||||||
|
fprintf('Avg. Relative Extraction Cost (RVM): 100\n')
|
||||||
|
fprintf('Avg. Relative Extraction Cost (JCFO) %0.2f\n',100*costReductionJCFO)
|
||||||
|
fprintf('Avg. Relative Extraction Cost (CCFO): %0.2f\n',100*costReductionCCFO)
|
||||||
|
keyboard
|
||||||
|
end
|
||||||
95
MNIST/extractFeaturesFromMNIST.m
Normal file
95
MNIST/extractFeaturesFromMNIST.m
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
function [dsFeat,time] = extractFeaturesFromMNIST(dsRaw)
|
||||||
|
|
||||||
|
% [dsFeat,time] = extractFeaturesFromMNIST(dsRaw)
|
||||||
|
%
|
||||||
|
% This function extracts four types of features from the MNIST handwritten
|
||||||
|
% digit recognition data set: statistical features, principal components
|
||||||
|
% analysis, co-occurence features, and Sobel edge features.
|
||||||
|
%
|
||||||
|
% The function requires installation of the Pattern Recognition Toolbox
|
||||||
|
% (PRT) for MATLAB:
|
||||||
|
% http://covartech.github.io/
|
||||||
|
%
|
||||||
|
% INPUTS:
|
||||||
|
% dsRaw: the PRT data set provided by prtDataGenMnist()
|
||||||
|
% OUTPUTS:
|
||||||
|
% dsFeat: a new PRT data set containing the features that were computed
|
||||||
|
% time: the amount of time (in seconds) to compute each feature for
|
||||||
|
% each observation in the data set.
|
||||||
|
|
||||||
|
% Author: Christopher R. Ratto, JHU/APL
|
||||||
|
% Date: 5 October 2015
|
||||||
|
%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||||
|
% All Rights Reserved
|
||||||
|
%
|
||||||
|
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
|
||||||
|
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
|
||||||
|
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
|
||||||
|
% provided you agree to and comply with the terms and conditions set forth in the license.
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Initialize data structures and processors
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
PCA = prtPreProcPca('nComponents',10); % Initialize the PCA preprocessor to use 10 components
|
||||||
|
PCA = PCA.train(dsRaw); % Train PCA on the entire data set
|
||||||
|
feats = []; % Features
|
||||||
|
time = []; % Extraction times
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Extract features from each sample
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
for i = 1:dsRaw.nObservations
|
||||||
|
fprintf('Extracting features from sample %d of %d...\n',i,dsRaw.nObservations)
|
||||||
|
|
||||||
|
% Statistical features
|
||||||
|
tic
|
||||||
|
statFeats = [mean(dsRaw.X(i,:)),std(dsRaw.X(i,:)),skewness(dsRaw.X(i,:)),kurtosis(dsRaw.X(i,:))];
|
||||||
|
tStat = toc*ones(1,4);
|
||||||
|
|
||||||
|
% PCA Features
|
||||||
|
tic;
|
||||||
|
dsPca = PCA.run(dsRaw.retainObservations(i));
|
||||||
|
pcaFeats = dsPca.X;
|
||||||
|
tPca = toc*ones(1,dsPca.nFeatures);
|
||||||
|
|
||||||
|
% Co-occurence features
|
||||||
|
tic;
|
||||||
|
glcm1 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',8);
|
||||||
|
glcmProps1 = graycoprops(glcm1);
|
||||||
|
glcm2 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',16);
|
||||||
|
glcmProps2 = graycoprops(glcm2);
|
||||||
|
glcm3 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',32);
|
||||||
|
glcmProps3 = graycoprops(glcm3);
|
||||||
|
glcmFeats = [glcmProps1.Contrast,glcmProps1.Correlation,glcmProps1.Energy,glcmProps1.Homogeneity,...
|
||||||
|
glcmProps2.Contrast,glcmProps2.Correlation,glcmProps2.Energy,glcmProps2.Homogeneity,...
|
||||||
|
glcmProps3.Contrast,glcmProps3.Correlation,glcmProps3.Energy,glcmProps3.Homogeneity];
|
||||||
|
tGlcm = toc*ones(size(glcmFeats));
|
||||||
|
|
||||||
|
% Sobel edge features
|
||||||
|
tic;
|
||||||
|
edgeDeg = [0, 45, 90, 135];
|
||||||
|
edgeFeats = [];
|
||||||
|
for j = 1:4
|
||||||
|
H = fspecial('sobel');
|
||||||
|
H = imrotate(H,edgeDeg(j));
|
||||||
|
Y = imfilter(reshape(dsRaw.X(i,:),28,28),H);
|
||||||
|
edgeFeats = [edgeFeats,trace(Y)];
|
||||||
|
edgeFeats = [edgeFeats,trace(Y')];
|
||||||
|
edgeFeats = [edgeFeats,sum(Y(14,:))];
|
||||||
|
edgeFeats = [edgeFeats,sum(Y(:,14))];
|
||||||
|
end
|
||||||
|
tEdge = toc*ones(size(edgeFeats));
|
||||||
|
|
||||||
|
% Concatenate the features into a single vector for the observation
|
||||||
|
feats = [feats;statFeats,pcaFeats,glcmFeats,edgeFeats];
|
||||||
|
time = [time;tStat,tPca,tGlcm,tEdge];
|
||||||
|
|
||||||
|
end
|
||||||
|
dsFeat = dsRaw; % Copy the input PRT data set
|
||||||
|
dsFeat.X = feats; % Overwrite the features
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
375
PRT Plugins/prtClassCCFO.m
Normal file
375
PRT Plugins/prtClassCCFO.m
Normal file
@@ -0,0 +1,375 @@
|
|||||||
|
classdef prtClassCCFO < prtClass
|
||||||
|
% prtClassCCFO Cost Constrained Feature Optimization
|
||||||
|
%
|
||||||
|
% This is a class written to be compatible the Pattern Recognition Toolbox
|
||||||
|
% (PRT) for MATLAB. The PRT may be downloaded here:
|
||||||
|
% http://covartech.github.io/
|
||||||
|
%
|
||||||
|
% CLASSIFIER = prtClassCCFO returns a CCFO classifier
|
||||||
|
%
|
||||||
|
% CLASSIFIER = prtClassCCFO(PROPERTY1, VALUE1, ...) constructs a
|
||||||
|
% prtClass object CLASSIFIER with properties as specified by
|
||||||
|
% PROPERTY/VALUE pairs.
|
||||||
|
%
|
||||||
|
% A prtClassCCFO object inherits all properties from the abstract class
|
||||||
|
% prtClass. In addition is has the following properties:
|
||||||
|
%
|
||||||
|
% kernels - A cell array of prtKernel objects specifying
|
||||||
|
% the kernels to use (note CCFO only works
|
||||||
|
% right now with RBF and polynomial kernels)
|
||||||
|
% verbosePlot - Flag indicating whether or not to plot during
|
||||||
|
% training
|
||||||
|
% verboseText - Flag indicating whether or not to output
|
||||||
|
% verbose updates during training
|
||||||
|
% learningMaxIterations - The maximum number of iterations
|
||||||
|
% ridge - Regularization parameter for ridge regression
|
||||||
|
% initialization of the weights
|
||||||
|
% gamma - Hyperparameter controlling the prior on
|
||||||
|
% beta (regression weights)
|
||||||
|
% a - Hyperparameter controlling the prior on
|
||||||
|
% theta (feature selectors)
|
||||||
|
% b - Hyperparameter controlling the prior on
|
||||||
|
% theta (feature selectors)
|
||||||
|
% pruneFeatures - Flag determining whether or not features
|
||||||
|
% with a small enough theta should be
|
||||||
|
% removed
|
||||||
|
% pruneObservations - Flag determining whether or not
|
||||||
|
% observations with a small enough beta should be removed
|
||||||
|
%
|
||||||
|
% A prtClassCCFO also has the following read-only properties:
|
||||||
|
%
|
||||||
|
% learningConverged - Flag indicating if the training converged
|
||||||
|
% beta - The regression weights, estimated during training
|
||||||
|
% theta - The feature scaling factors, estimated in training
|
||||||
|
% delta - Term defined in (14) of CCFO paper
|
||||||
|
% omega - Term defined in (13) of CCFO paper
|
||||||
|
% Q - The EM objective function being optimized
|
||||||
|
% relevantFeats - Indices of features determined to be relevant
|
||||||
|
% relevantObs - Indices of observations determined to be relevant
|
||||||
|
%
|
||||||
|
% A prtClassCCFO object inherits the TRAIN, RUN, CROSSVALIDATE and
|
||||||
|
% KFOLDS methods from prtAction. It also inherits the PLOT method
|
||||||
|
% from prtClass.
|
||||||
|
%
|
||||||
|
% Reference:
|
||||||
|
% C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained
|
||||||
|
% Feature Optimization for Kernel Machine Classifiers," IEEE
|
||||||
|
% Signal Processing Letters, 2015.
|
||||||
|
%
|
||||||
|
% B. Krishnapuram, A. Herermink, L. Carin, & M.A. Figueiredo, "A
|
||||||
|
% Bayesian approach to joint feature selection and classifier
|
||||||
|
% design," IEEE Trans. PAMI, vol. 26, no. 9, pp. 1105-1111, 2004.
|
||||||
|
%
|
||||||
|
% Author: Christopher R. Ratto, JHU/APL
|
||||||
|
% Date: 7 October, 2015
|
||||||
|
%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||||
|
% All Rights Reserved
|
||||||
|
%
|
||||||
|
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
|
||||||
|
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
|
||||||
|
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
|
||||||
|
% provided you agree to and comply with the terms and conditions set forth in the license.
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Define properties
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
% Private properties for internal PRT use
|
||||||
|
properties (SetAccess=private)
|
||||||
|
name = 'Cost Constrained Feature Optimization' % Full name of the classifier
|
||||||
|
nameAbbreviation = 'CCFO'; % Abbreviated name
|
||||||
|
isNativeMary = false; % Cannot handle multi-class data
|
||||||
|
end
|
||||||
|
|
||||||
|
% Public properties for general use
|
||||||
|
properties
|
||||||
|
verbosePlot = false; % Whether or not to plot during training
|
||||||
|
verboseText = false; % Whether or not to write text during training
|
||||||
|
ridge = 1; % Ridge regression penalty (for initializing beta)
|
||||||
|
gamma = 1; % Hyperparameter for beta
|
||||||
|
a = 1; % Hyperparameter for theta
|
||||||
|
b = 1; % Hyperparameter for theta
|
||||||
|
kernels = prtKernelDc & prtKernelRbfNdimensionScale; % Kernel function
|
||||||
|
pruneFeatures = false; % Flag for removing features as we go
|
||||||
|
pruneObservations = false; % Flag for removing observations as we go
|
||||||
|
end
|
||||||
|
|
||||||
|
% Hidden properties that should generally be left alone
|
||||||
|
properties (Hidden = true)
|
||||||
|
learningMaxIterations = 100; % Maximum number of iteratoins
|
||||||
|
learningConvergedThreshold = .0001; % Threshold for whether learning has converged
|
||||||
|
learningNormWeightsThresh = 0.001; % Threshold for whether the weights aren't changing
|
||||||
|
learningNormFeatSelectThresh = 0.001; % Threshold for whether feature selection has converged
|
||||||
|
pruningThreshBeta = 0.0001; % Threshold for removing observations
|
||||||
|
pruningThreshTheta = 0.5; % Threshold for removing features
|
||||||
|
featuresRetained = []; % List of features that were retained
|
||||||
|
nMaxFminconEvals = 100; % Number of steps for fmincon optimization
|
||||||
|
end
|
||||||
|
|
||||||
|
% Properties that may be accessed for monitoring the learning algorithm
|
||||||
|
properties (SetAccess = 'protected',GetAccess = 'public')
|
||||||
|
learningConverged = []; % Whether or not the training converged
|
||||||
|
beta = []; % The regression weights
|
||||||
|
theta = []; % The feature scaling factors
|
||||||
|
omega = []; % Equation 14 in Krishnapuram et al.
|
||||||
|
Q = []; % The EM objective function
|
||||||
|
relevantFeats = []; % List of relevant features
|
||||||
|
relevantObs = []; % List of relevant observations
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Error checking
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
methods
|
||||||
|
% Allow for string, value pairs
|
||||||
|
function Obj = prtClassCCFO(varargin)
|
||||||
|
Obj = prtUtilAssignStringValuePairs(Obj,varargin{:});
|
||||||
|
end
|
||||||
|
|
||||||
|
% Make sure the kernel is compatible with JCFO
|
||||||
|
function Obj = set.kernels(Obj,val)
|
||||||
|
|
||||||
|
if ~(isa(val.kernelCell{2},'prtKernelRbf') || isa(val.kernelCell{2},'prtKernelRbfNdimensionScale') || isa(val.kernelCell{2},'prtKernelPolynomial')) && ~isa(val.kernelCell{1},'prtKernelDc')
|
||||||
|
error('prt:prtClassJCFO:kernels','Kernel must be DC followed by RBF or Polynomial.');
|
||||||
|
else
|
||||||
|
Obj.kernels = val;
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Training, testing, and helper functions (called by PRT train and run API)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
methods (Access = protected, Hidden = true)
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Training function (called by Obj.train)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
function Obj = trainAction(Obj,DataSet)
|
||||||
|
|
||||||
|
%%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
|
||||||
|
X = DataSet.X;
|
||||||
|
Y = DataSet.Y;
|
||||||
|
N = size(X,1);
|
||||||
|
P = size(X,2);
|
||||||
|
beta = ones(N+1,1);
|
||||||
|
theta = .9*ones(P,1);
|
||||||
|
omega = ones(N+1,1);
|
||||||
|
kernels = Obj.kernels;
|
||||||
|
|
||||||
|
converged = false;
|
||||||
|
iteration = 0;
|
||||||
|
relevantFeats = true(P,1);
|
||||||
|
relevantObs = true(N,1);
|
||||||
|
relevantKernels = [true;relevantObs];
|
||||||
|
while ~converged
|
||||||
|
%%%%%%%%%%%% Iteration counter %%%%%%%%%%%%%%
|
||||||
|
iteration = iteration + 1;
|
||||||
|
if Obj.verboseText
|
||||||
|
fprintf('CCFO EM Iteration %d:\t',iteration)
|
||||||
|
end
|
||||||
|
Xrel = X(:,relevantFeats);
|
||||||
|
Nrel = size(Xrel,1);
|
||||||
|
Prel = size(Xrel,2);
|
||||||
|
thetaRel = theta(relevantFeats);
|
||||||
|
betaRel = beta(relevantKernels);
|
||||||
|
aRel = Obj.a(relevantFeats);
|
||||||
|
bRel = Obj.b(relevantFeats);
|
||||||
|
|
||||||
|
%%%%%%%%%%%% M-step %%%%%%%%%%%%%%
|
||||||
|
% Update the feature scaling factors
|
||||||
|
if iteration > 1
|
||||||
|
if abs(thetaNormDiff) > Obj.learningNormWeightsThresh || isnan(thetaNormDiff)
|
||||||
|
opts = optimoptions(@fmincon,'Algorithm','interior-point','MaxFunEvals',Obj.nMaxFminconEvals,'GradObj','on','TypicalX',betarnd(ones(size(thetaRel)),ones(size(thetaRel))),'Display','iter-detailed','TolX',1e-4,'TolFun',1e-4);%'PlotFcns',{@optimplotx,@optimplotfval,@optimplotstepsize});
|
||||||
|
thetaRel = fmincon(@(x)Obj.calcQ(Xrel,kernels,v,omegaRel,x,relevantKernels,aRel,bRel),thetaRel,[],[],[],[],zeros(size(thetaRel)),ones(size(thetaRel)),[],opts);
|
||||||
|
theta(relevantFeats) = thetaRel;
|
||||||
|
thetaNormDiff = (norm(theta)-thetaNorm)./thetaNorm;
|
||||||
|
else
|
||||||
|
fprintf('Feature selection converged. Skipping constrained optimization.\n')
|
||||||
|
end
|
||||||
|
else
|
||||||
|
thetaNormDiff = nan;
|
||||||
|
end
|
||||||
|
thetaNorm = norm(theta);
|
||||||
|
|
||||||
|
% Apply scaling factors to features and re-compute the Gram
|
||||||
|
% matrix via the kernel function
|
||||||
|
XT = bsxfun(@times,Xrel,thetaRel');
|
||||||
|
dsTmp = prtDataSetClass(XT,Y);
|
||||||
|
kernels = train(kernels,dsTmp);
|
||||||
|
H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
|
||||||
|
H = H(:,relevantKernels);
|
||||||
|
|
||||||
|
% Update the regression weights
|
||||||
|
if iteration == 1
|
||||||
|
betaRel = inv(Obj.ridge*eye(size(H,2)) + H'*H)*H'*Y; % Initialize weights using ridge regression
|
||||||
|
beta(relevantKernels) = betaRel;
|
||||||
|
betaNormDiff = nan;
|
||||||
|
else
|
||||||
|
betaRel = S*inv(eye(length(betaRel)) + S*H'*H*S)*S*H'*v;
|
||||||
|
beta(relevantKernels) = betaRel;
|
||||||
|
betaNormDiff = (norm(beta)-betaNorm)./betaNorm;
|
||||||
|
end
|
||||||
|
betaNorm = norm(beta);
|
||||||
|
beta = beta./betaNorm;
|
||||||
|
|
||||||
|
%%%%%%%%%%%% E-step %%%%%%%%%%%%%%
|
||||||
|
v = nan(N,1);
|
||||||
|
for i = 1:N
|
||||||
|
normFactor = (2*Y(i)-1)*normpdf(H(i,:)*betaRel,0,1)/normcdf((2*Y(i)-1)*H(i,:)*betaRel,0,1);
|
||||||
|
if isnan(normFactor)
|
||||||
|
normFactor = 0;
|
||||||
|
end
|
||||||
|
v(i,:) = H(i,:)*betaRel + normFactor; % Expected value of linear observation model
|
||||||
|
end
|
||||||
|
|
||||||
|
omegaRel = nan(length(betaRel),1);
|
||||||
|
for i = 1:length(betaRel)
|
||||||
|
omegaRel(i,:) = Obj.gamma*abs(betaRel(i))^(-1); % Expected value of weight variance
|
||||||
|
end
|
||||||
|
omega(relevantKernels) = omegaRel;
|
||||||
|
S = diag(omegaRel.^(-1/2));
|
||||||
|
|
||||||
|
% Recompute the expected log-posterior
|
||||||
|
Q(iteration) = Obj.calcQ(Xrel,kernels,v,omegaRel,thetaRel,relevantKernels,aRel,bRel);
|
||||||
|
|
||||||
|
%%%%%%%%%%%% Prune deselected training points and/or features, if enabled %%%%%%%%%%%%%%
|
||||||
|
if Obj.pruneFeatures
|
||||||
|
relevantFeats(theta < Obj.pruningThreshTheta) = false;
|
||||||
|
theta(~relevantFeats) = 0;
|
||||||
|
thetaRel = theta(relevantFeats);
|
||||||
|
end
|
||||||
|
|
||||||
|
if Obj.pruneObservations
|
||||||
|
relevantObs(abs(beta) < Obj.pruningThreshBeta) = false;
|
||||||
|
relevantKernels = [true;relevantObs];
|
||||||
|
beta(~relevantKernels) = 0;
|
||||||
|
betaRel = beta(relevantKernels);
|
||||||
|
omegaRel = omega(relevantKernels);
|
||||||
|
S = diag(omegaRel.^(-1/2));
|
||||||
|
end
|
||||||
|
|
||||||
|
% For debugging purposes, plot how all of the parameters are updating
|
||||||
|
if Obj.verbosePlot
|
||||||
|
figure(666)
|
||||||
|
subplot(2,3,1),plot(v),title('v'),axis tight
|
||||||
|
subplot(2,3,2),plot(log(omega),'marker','o'),title('log(\omega)'),axis tight
|
||||||
|
subplot(2,3,4),plot(beta,'marker','o'),title('\beta'),axis tight
|
||||||
|
subplot(2,3,5),plot(theta,'marker','o'),title('\theta'),axis tight
|
||||||
|
subplot(2,3,6),plot(Q),title('-E[log p(\beta,\theta|-)]'),axis tight
|
||||||
|
drawnow
|
||||||
|
end
|
||||||
|
|
||||||
|
%%%%%%%%%%%% Check for convergence %%%%%%%%%%%%%%
|
||||||
|
if iteration == 1
|
||||||
|
Qdiff = nan;
|
||||||
|
else
|
||||||
|
Qdiff = (Q(iteration)-Q(iteration-1))./Q(iteration-1);
|
||||||
|
end
|
||||||
|
if Obj.verboseText
|
||||||
|
fprintf('Q = %0.4f (diff = %0.4f)\t ||beta|| = %0.4f (diff = %0.4f)\n',Q(iteration),Qdiff,betaNorm,betaNormDiff)
|
||||||
|
end
|
||||||
|
|
||||||
|
if abs(Qdiff) < Obj.learningConvergedThreshold;
|
||||||
|
converged = true;
|
||||||
|
fprintf('Expected log-posterior converged within threshold, exiting.\n')
|
||||||
|
elseif iteration == Obj.learningMaxIterations;
|
||||||
|
converged = true;
|
||||||
|
fprintf('Maximum number of iterations reached, exiting.\n')
|
||||||
|
elseif abs(betaNormDiff) < Obj.learningNormWeightsThresh
|
||||||
|
converged = true;
|
||||||
|
fprintf('Magnitude of weight vector converged within threshold, exiting.\n')
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
%%%%%%%%%% Save out learned parameters %%%%%%%%%%%%
|
||||||
|
Obj.beta = beta;
|
||||||
|
Obj.theta = theta;
|
||||||
|
Obj.omega = omega;
|
||||||
|
Obj.Q = Q(end);
|
||||||
|
|
||||||
|
XT = bsxfun(@times,X,theta');
|
||||||
|
dsTmp = prtDataSetClass(XT,Y);
|
||||||
|
Obj.kernels = train(kernels,dsTmp);
|
||||||
|
end
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Running function (called by Obj.run)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
function DataSetOut = runAction(Obj,DataSet)
|
||||||
|
%%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
|
||||||
|
X = DataSet.X;
|
||||||
|
kernels = Obj.kernels;
|
||||||
|
theta = Obj.theta;
|
||||||
|
beta = Obj.beta;
|
||||||
|
|
||||||
|
%%%%%%%%%% Run CCFO on dataset %%%%%%%%%%%%
|
||||||
|
DataSet.X = bsxfun(@times,X,theta');
|
||||||
|
H = kernels.run_OutputDoubleArray(DataSet);
|
||||||
|
|
||||||
|
%%%%%%%%%% Build output dataset %%%%%%%%%%%%
|
||||||
|
DataSetOut = DataSet;
|
||||||
|
DataSetOut.X = normcdf(H*beta);
|
||||||
|
end
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Function for calculating the EM objective, Q and its derivative (called by Obj.trainAction)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
function [Qout,dQdTout,beta,omega] = calcQ(Obj,X,kernel,v,omega,theta,relevantKernels,a,b)
|
||||||
|
|
||||||
|
% Build gram matrix using the proposed vector of feature scaling factors
|
||||||
|
N = size(X,1);
|
||||||
|
Nk = sum(relevantKernels);
|
||||||
|
P = size(X,2);
|
||||||
|
XT = bsxfun(@times,X,theta');
|
||||||
|
dsTmp = prtDataSetClass(XT);
|
||||||
|
kernels = train(kernel,prtDataSetClass(XT));
|
||||||
|
H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
|
||||||
|
H = H(:,relevantKernels);
|
||||||
|
S = diag(omega.^(-1/2));
|
||||||
|
|
||||||
|
% Calcualte the expected log-posterior
|
||||||
|
beta = S*inv(eye(Nk) + S*(H'*H)*S)*S*H'*v;
|
||||||
|
|
||||||
|
% Calcualte the expected log-posterior
|
||||||
|
nu1 = psi(a) - psi(a + b);
|
||||||
|
nu2 = psi(b) - psi(a + b);
|
||||||
|
Q = -beta'*(H'*H)*beta + 2*beta'*H'*v - beta'*diag(omega)*beta + sum(theta.*nu1 + (1-theta).*nu2);
|
||||||
|
Qout = -Q;
|
||||||
|
|
||||||
|
% Calculate derivative of Q w.r.t. each theta
|
||||||
|
dQdT = nan(1,P);
|
||||||
|
if isa(Obj.kernels.kernelCell{2},'prtKernelPolynomial')
|
||||||
|
n = Obj.kernels.kernelCell{2}.d;
|
||||||
|
xTx = X*diag(theta)*X';
|
||||||
|
for k = 1:P
|
||||||
|
xxk = X(:,k)*X(:,k)';
|
||||||
|
dHdT = [zeros(N,1),(n*(1+xTx).^(n-1)).*xxk]; % Derivative of polynomial kernel provided in Kirshnapuram et al., RECOMB '03
|
||||||
|
dQdT(k) = nu2(k) - nu1(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
|
||||||
|
end
|
||||||
|
dQdTout = -dQdT;
|
||||||
|
elseif isa(Obj.kernels.kernelCell{2},'prtKernelRbf')
|
||||||
|
for k = 1:P
|
||||||
|
Xk = X(:,k);
|
||||||
|
dXk = repmat(sum((Xk.^2), 2), [1 N]) + repmat(sum((Xk.^2),2), [1 N]).' - 2*Xk*(Xk.');
|
||||||
|
if isa(Obj.kernels.kernelCell{2},'prtKernelRbfNdimensionScale')
|
||||||
|
dXk = dXk./(P*Obj.kernels.kernelCell{2}.sigma.^2);
|
||||||
|
else
|
||||||
|
dXk = dXk./Obj.kernels.kernelCell{2}.sigma.^2;
|
||||||
|
end
|
||||||
|
if isa(Obj.kernels.kernelCell{1},'prtKernelDc')
|
||||||
|
dXk = [zeros(N,1),dXk];
|
||||||
|
end
|
||||||
|
dHdT = -H.*dXk;
|
||||||
|
dQdT(k) = nu1(k) - nu2(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
|
||||||
|
dQdTout(k) = -dQdT(k);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
369
PRT Plugins/prtClassJCFO.m
Normal file
369
PRT Plugins/prtClassJCFO.m
Normal file
@@ -0,0 +1,369 @@
|
|||||||
|
classdef prtClassJCFO < prtClass
|
||||||
|
% prtClassJCFO Joint Classifier and Feature Optimization
|
||||||
|
%
|
||||||
|
% This is a class written to be compatible the Pattern Recognition Toolbox
|
||||||
|
% (PRT) for MATLAB. The PRT may be downloaded here:
|
||||||
|
% http://covartech.github.io/
|
||||||
|
%
|
||||||
|
% CLASSIFIER = prtClassJCFO returns a JCFO classifier
|
||||||
|
%
|
||||||
|
% CLASSIFIER = prtClassJCFO(PROPERTY1, VALUE1, ...) constructs a
|
||||||
|
% prtClass object CLASSIFIER with properties as specified by
|
||||||
|
% PROPERTY/VALUE pairs.
|
||||||
|
%
|
||||||
|
% A prtClassJCFO object inherits all properties from the abstract class
|
||||||
|
% prtClass. In addition is has the following properties:
|
||||||
|
%
|
||||||
|
% kernels - A cell array of prtKernel objects specifying
|
||||||
|
% the kernels to use (note JCFO only works
|
||||||
|
% right now with RBF and polynomial kernels)
|
||||||
|
% verbosePlot - Flag indicating whether or not to plot during
|
||||||
|
% training
|
||||||
|
% verboseText - Flag indicating whether or not to output
|
||||||
|
% verbose updates during training
|
||||||
|
% learningMaxIterations - The maximum number of iterations
|
||||||
|
% ridge - Regularization parameter for ridge regression
|
||||||
|
% initialization of the weights
|
||||||
|
% gamma1 - Hyperparameter controlling the prior on
|
||||||
|
% beta (regression weights)
|
||||||
|
% gamma2 - Hyperparameter controlling the prior on
|
||||||
|
% theta (feature scaling factors)
|
||||||
|
% pruneFeatures - Flag determining whether or not features
|
||||||
|
% with a small enough theta should be
|
||||||
|
% removed
|
||||||
|
% pruneObservations - Flag determining whether or not
|
||||||
|
% observations with a small enough beta should be removed
|
||||||
|
%
|
||||||
|
% A prtClassJCFO also has the following read-only properties:
|
||||||
|
%
|
||||||
|
% learningConverged - Flag indicating if the training converged
|
||||||
|
% beta - The regression weights, estimated during training
|
||||||
|
% theta - The feature scaling factors, estimated in training
|
||||||
|
% delta - Term defined in (14) of JCFO paper
|
||||||
|
% omega - Term defined in (13) of JCFO paper
|
||||||
|
% Q - The EM objective function being optimized
|
||||||
|
% relevantFeats - Indices of features determined to be relevant
|
||||||
|
% relevantObs - Indices of observations determined to be relevant
|
||||||
|
%
|
||||||
|
% A prtClassJCFO object inherits the TRAIN, RUN, CROSSVALIDATE and
|
||||||
|
% KFOLDS methods from prtAction. It also inherits the PLOT method
|
||||||
|
% from prtClass.
|
||||||
|
%
|
||||||
|
% Reference:
|
||||||
|
% B. Krishnapuram, A. Herermink, L. Carin, & M.A. Figueiredo, "A
|
||||||
|
% Bayesian approach to joint feature selection and classifier
|
||||||
|
% design," IEEE Trans. PAMI, vol. 26, no. 9, pp. 1105-1111, 2004.
|
||||||
|
%
|
||||||
|
% Author: Christopher R. Ratto, JHU/APL
|
||||||
|
% Date: 7 October, 2015
|
||||||
|
%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||||
|
% All Rights Reserved
|
||||||
|
%
|
||||||
|
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
|
||||||
|
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
|
||||||
|
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
|
||||||
|
% provided you agree to and comply with the terms and conditions set forth in the license.
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Define properties
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
% Private properties for internal PRT use
|
||||||
|
properties (SetAccess=private)
|
||||||
|
name = 'Joint Classifier and Feature Optimization' % Full name of the classifier
|
||||||
|
nameAbbreviation = 'JCFO'; % Abbreviated name
|
||||||
|
isNativeMary = false; % Cannot handle multi-class data
|
||||||
|
end
|
||||||
|
|
||||||
|
% Public properties for general use
|
||||||
|
properties
|
||||||
|
verbosePlot = false; % Whether or not to plot during training
|
||||||
|
verboseText = false; % Whether or not to write text during training
|
||||||
|
ridge = 1; % Ridge regression penalty (for initializing beta)
|
||||||
|
gamma1 = 1; % Hyperparameter for beta
|
||||||
|
gamma2 = 1; % Hyperparameter for theta
|
||||||
|
kernels = prtKernelDc & prtKernelRbfNdimensionScale; % Kernel function
|
||||||
|
pruneFeatures = false; % Flag for removing features as we go
|
||||||
|
pruneObservations = false; % Flag for removing observations as we go
|
||||||
|
end
|
||||||
|
|
||||||
|
% Hidden properties that should generally be left alone
|
||||||
|
properties (Hidden = true)
|
||||||
|
learningMaxIterations = 100; % Maximum number of iterations
|
||||||
|
learningConvergedThreshold = .001; % Threshold for whether learning has converged
|
||||||
|
learningNormWeightsThresh = 0.001; % Threshold for whether the weights aren't changing
|
||||||
|
learningNormFeatSelectThresh = 0.001; % Threshold for whether feature selection has converged
|
||||||
|
pruningThreshBeta = 0.0001; % Threshold for removing observations
|
||||||
|
pruningThreshTheta = 0.1; % Threshold for removing features
|
||||||
|
featuresRetained = []; % List of features being retained
|
||||||
|
nMaxFminconEvals = 100; % Number of steps for fmincon optimization
|
||||||
|
end
|
||||||
|
|
||||||
|
% Properties that may be accessed for monitoring of learning algorithm
|
||||||
|
properties (SetAccess = 'protected',GetAccess = 'public')
|
||||||
|
learningConverged = []; % Whether or not the training converged
|
||||||
|
beta = []; % The regression weights
|
||||||
|
theta = []; % The feature scaling factors
|
||||||
|
delta = []; % Equation (14) in Krishnapuraum et al.
|
||||||
|
omega = []; % Equation (13) in Krishnapuraum et al.
|
||||||
|
Q = []; % EM objective function
|
||||||
|
relevantFeats = []; % List of relevant features
|
||||||
|
relevantObs = []; % List of relevant observations
|
||||||
|
end
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Error checking
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
methods
|
||||||
|
% Allow for string, value pairs
|
||||||
|
function Obj = prtClassJCFO(varargin)
|
||||||
|
Obj = prtUtilAssignStringValuePairs(Obj,varargin{:});
|
||||||
|
end
|
||||||
|
|
||||||
|
% Make sure the kernel is compatible with JCFO
|
||||||
|
function Obj = set.kernels(Obj,val)
|
||||||
|
if ~(isa(val.kernelCell{2},'prtKernelRbf') || isa(val.kernelCell{2},'prtKernelRbfNdimensionScale') || isa(val.kernelCell{2},'prtKernelPolynomial')) && ~isa(val.kernelCell{1},'prtKernelDc')
|
||||||
|
error('prt:prtClassJCFO:kernels','Kernel must be DC followed by RBF or Polynomial.');
|
||||||
|
else
|
||||||
|
Obj.kernels = val;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Training, testing, and helper functions (called by PRT train and run API)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
methods (Access = protected, Hidden = true)
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Training function (called by Obj.train)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
function Obj = trainAction(Obj,DataSet)
|
||||||
|
|
||||||
|
%%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
|
||||||
|
X = DataSet.X;
|
||||||
|
Y = DataSet.Y;
|
||||||
|
N = size(X,1);
|
||||||
|
P = size(X,2);
|
||||||
|
beta = ones(N+1,1);
|
||||||
|
theta = ones(P,1);
|
||||||
|
delta = ones(P,1);
|
||||||
|
omega = ones(N+1,1);
|
||||||
|
kernels = Obj.kernels;
|
||||||
|
|
||||||
|
converged = false;
|
||||||
|
iteration = 0;
|
||||||
|
relevantFeats = true(P,1);
|
||||||
|
relevantObs = true(N,1);
|
||||||
|
relevantKernels = [true;relevantObs];
|
||||||
|
while ~converged
|
||||||
|
%%%%%%%%%%%% Iteration counter %%%%%%%%%%%%%%
|
||||||
|
iteration = iteration + 1;
|
||||||
|
if Obj.verboseText
|
||||||
|
fprintf('JCFO EM Iteration %d:\t',iteration)
|
||||||
|
end
|
||||||
|
|
||||||
|
Xrel = X(:,relevantFeats);
|
||||||
|
Nrel = size(Xrel,1);
|
||||||
|
Prel = size(Xrel,2);
|
||||||
|
thetaRel = theta(relevantFeats);
|
||||||
|
betaRel = beta(relevantKernels);
|
||||||
|
|
||||||
|
%%%%%%%%%%%% M-step %%%%%%%%%%%%%%
|
||||||
|
% Update the feature scaling factors
|
||||||
|
if iteration > 1
|
||||||
|
if abs(thetaNormDiff) > Obj.learningNormWeightsThresh || isnan(thetaNormDiff)
|
||||||
|
opts = optimoptions(@fmincon,'Algorithm','interior-point','MaxFunEvals',Obj.nMaxFminconEvals,'GradObj','on','TypicalX',ones(size(thetaRel)),'Display','off','TolX',1e-6,'TolFun',1e-6);%,'PlotFcns',{@optimplotx,@optimplotfval,@optimplotstepsize});
|
||||||
|
thetaRel = fmincon(@(x)Obj.calcQ(Xrel,kernels,v,omegaRel,deltaRel,x,relevantKernels),thetaRel,[],[],[],[],zeros(size(thetaRel)),inf(size(thetaRel)),[],opts);
|
||||||
|
theta(relevantFeats) = thetaRel;
|
||||||
|
thetaNormDiff = (norm(theta)-thetaNorm)./thetaNorm;
|
||||||
|
else
|
||||||
|
fprintf('Feature selection converged. Skipping constrained optimization.\n')
|
||||||
|
end
|
||||||
|
else
|
||||||
|
thetaNormDiff = nan;
|
||||||
|
end
|
||||||
|
thetaNorm = norm(theta);
|
||||||
|
|
||||||
|
% Apply scaling factors to features and re-compute the Gram
|
||||||
|
% matrix via the kernel function
|
||||||
|
XT = bsxfun(@times,Xrel,thetaRel');
|
||||||
|
dsTmp = prtDataSetClass(XT,Y);
|
||||||
|
kernels = train(kernels,dsTmp);
|
||||||
|
H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
|
||||||
|
H = H(:,relevantKernels);
|
||||||
|
% Update the regression weights
|
||||||
|
if iteration == 1
|
||||||
|
betaRel = inv(Obj.ridge*eye(size(H,2)) + H'*H)*H'*Y; % Initialize weights using ridge regression
|
||||||
|
beta(relevantKernels) = betaRel;
|
||||||
|
betaNormDiff = nan;
|
||||||
|
else
|
||||||
|
betaRel = S*inv(eye(length(betaRel)) + S*H'*H*S)*S*H'*v;
|
||||||
|
beta(relevantKernels) = betaRel;
|
||||||
|
betaNormDiff = (norm(beta)-betaNorm)./betaNorm;
|
||||||
|
end
|
||||||
|
betaNorm = norm(beta);
|
||||||
|
beta = beta./betaNorm;
|
||||||
|
|
||||||
|
%%%%%%%%%%%% E-step %%%%%%%%%%%%%%
|
||||||
|
v = nan(N,1);
|
||||||
|
for i = 1:N
|
||||||
|
normFactor = (2*Y(i)-1)*normpdf(H(i,:)*betaRel,0,1)/normcdf((2*Y(i)-1)*H(i,:)*betaRel,0,1);
|
||||||
|
if isnan(normFactor)
|
||||||
|
normFactor = 0;
|
||||||
|
end
|
||||||
|
v(i,:) = H(i,:)*betaRel + normFactor; % Expected value of linear observation model
|
||||||
|
end
|
||||||
|
|
||||||
|
omegaRel = nan(length(betaRel),1);
|
||||||
|
for i = 1:length(betaRel)
|
||||||
|
omegaRel(i,:) = Obj.gamma1*abs(betaRel(i))^(-1); % Expected value of weight variance
|
||||||
|
end
|
||||||
|
omega(relevantKernels) = omegaRel;
|
||||||
|
S = diag(omegaRel.^(-1/2));
|
||||||
|
|
||||||
|
deltaRel = nan(Prel,1);
|
||||||
|
for k = 1:Prel
|
||||||
|
deltaRel(k,:) = Obj.gamma2*thetaRel(k)^(-1); % Expected value of feature selectors
|
||||||
|
end
|
||||||
|
delta(relevantFeats) = deltaRel;
|
||||||
|
|
||||||
|
% Recompute the expected log-posterior
|
||||||
|
Q(iteration) = Obj.calcQ(Xrel,kernels,v,omegaRel,deltaRel,thetaRel,relevantKernels); % Expected log-posterior
|
||||||
|
|
||||||
|
%%%%%%%%%%%% Prune deselected training points and/or features, if enabled %%%%%%%%%%%%%%
|
||||||
|
if Obj.pruneFeatures
|
||||||
|
relevantFeats(theta < Obj.pruningThreshTheta) = false;
|
||||||
|
theta(~relevantFeats) = 0;
|
||||||
|
thetaRel = theta(relevantFeats);
|
||||||
|
deltaRel = delta(relevantFeats);
|
||||||
|
end
|
||||||
|
|
||||||
|
if Obj.pruneObservations
|
||||||
|
relevantObs(abs(beta) < Obj.pruningThreshBeta) = false;
|
||||||
|
relevantKernels = [true;relevantObs];
|
||||||
|
beta(~relevantKernels) = 0;
|
||||||
|
betaRel = beta(relevantKernels);
|
||||||
|
omegaRel = omega(relevantKernels);
|
||||||
|
S = diag(omegaRel.^(-1/2));
|
||||||
|
end
|
||||||
|
|
||||||
|
% For debugging purposes, plot how all of the parameters are updating
|
||||||
|
if Obj.verbosePlot
|
||||||
|
figure(666)
|
||||||
|
subplot(2,3,1),plot(v),title('v'),axis tight
|
||||||
|
subplot(2,3,2),plot(log(omega),'marker','o'),title('log(\omega)'),axis tight
|
||||||
|
subplot(2,3,3),plot(log(delta),'marker','o'),title('log(\delta)'),axis tight
|
||||||
|
subplot(2,3,4),plot(beta,'marker','o'),title('\beta'),axis tight
|
||||||
|
subplot(2,3,5),plot(theta,'marker','o'),title('\theta'),axis tight
|
||||||
|
subplot(2,3,6),plot(Q),title('-E[log p(\beta,\theta|-)]'),axis tight
|
||||||
|
drawnow
|
||||||
|
end
|
||||||
|
|
||||||
|
%%%%%%%%%%%% Check for convergence %%%%%%%%%%%%%%
|
||||||
|
if iteration == 1
|
||||||
|
Qdiff = nan;
|
||||||
|
else
|
||||||
|
Qdiff = (Q(iteration)-Q(iteration-1))./Q(iteration-1);
|
||||||
|
end
|
||||||
|
if Obj.verboseText
|
||||||
|
fprintf('Q = %0.4f (diff = %0.4f)\t ||beta|| = %0.4f (diff = %0.4f)\n',Q(iteration),Qdiff,betaNorm,betaNormDiff)
|
||||||
|
end
|
||||||
|
|
||||||
|
if abs(Qdiff) < Obj.learningConvergedThreshold;
|
||||||
|
converged = true;
|
||||||
|
fprintf('Expected log-posterior converged within threshold, exiting.\n')
|
||||||
|
elseif iteration == Obj.learningMaxIterations;
|
||||||
|
converged = true;
|
||||||
|
fprintf('Maximum number of iterations reached, exiting.\n')
|
||||||
|
elseif abs(betaNormDiff) < Obj.learningNormWeightsThresh
|
||||||
|
converged = true;
|
||||||
|
fprintf('Magnitude of weight vector converged within threshold, exiting.\n')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
%%%%%%%%%% Save out learned parameters %%%%%%%%%%%%
|
||||||
|
Obj.beta = beta;
|
||||||
|
Obj.theta = theta;
|
||||||
|
Obj.delta = delta;
|
||||||
|
Obj.omega = omega;
|
||||||
|
Obj.Q = Q(end);
|
||||||
|
|
||||||
|
XT = bsxfun(@times,X,theta');
|
||||||
|
dsTmp = prtDataSetClass(XT,Y);
|
||||||
|
Obj.kernels = train(kernels,dsTmp);
|
||||||
|
end
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Running function (called by Obj.run)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
function DataSetOut = runAction(Obj,DataSet)
|
||||||
|
%%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
|
||||||
|
X = DataSet.X;
|
||||||
|
kernels = Obj.kernels;
|
||||||
|
theta = Obj.theta;
|
||||||
|
beta = Obj.beta;
|
||||||
|
|
||||||
|
%%%%%%%%%% Run JCFO on dataset %%%%%%%%%%%%
|
||||||
|
DataSet.X = bsxfun(@times,X,theta');
|
||||||
|
H = kernels.run_OutputDoubleArray(DataSet);
|
||||||
|
|
||||||
|
%%%%%%%%%% Build output dataset %%%%%%%%%%%%
|
||||||
|
DataSetOut = DataSet;
|
||||||
|
DataSetOut.X = normcdf(H*beta);
|
||||||
|
end
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Function for calculating the EM objective, Q and its derivative (called by Obj.trainAction)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
function [Qout,dQdTout] = calcQ(Obj,X,kernel,v,omega,delta,theta,relevantKernels)
|
||||||
|
% Build gram matrix using the proposed vector of feature scaling factors
|
||||||
|
N = size(X,1);
|
||||||
|
Nk = sum(relevantKernels);
|
||||||
|
P = size(X,2);
|
||||||
|
XT = bsxfun(@times,X,theta');
|
||||||
|
dsTmp = prtDataSetClass(XT);
|
||||||
|
kernels = train(kernel,prtDataSetClass(XT));
|
||||||
|
H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
|
||||||
|
H = H(:,relevantKernels);
|
||||||
|
S = diag(omega.^(-1/2));
|
||||||
|
|
||||||
|
% Calcualte the expected log-posterior
|
||||||
|
beta = S*inv(eye(Nk) + S*(H'*H)*S)*S*H'*v;
|
||||||
|
Q = -beta'*(H'*H)*beta + 2*beta'*H'*v - beta'*diag(omega)*beta - theta'*diag(delta)*theta;
|
||||||
|
Qout = -Q; % Using negative since matlab optimization does minimization only
|
||||||
|
|
||||||
|
dQdT = nan(1,P);
|
||||||
|
% Calculate derivative of Q w.r.t. each theta
|
||||||
|
if isa(kernel.kernelCell{2},'prtKernelPolynomial')
|
||||||
|
n = kernel.kernelCell{2}.d;
|
||||||
|
xTx = X*diag(theta)*X';
|
||||||
|
for k = 1:P
|
||||||
|
xxk = X(:,k)*X(:,k)';
|
||||||
|
dHdT = [zeros(N,1),(n*(1+xTx).^(n-1)).*xxk]; % Derivative of polynomial kernel provided in Kirshnapuram et al., RECOMB '03
|
||||||
|
dQdT(k) = -2*delta(k)*theta(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
|
||||||
|
end
|
||||||
|
dQdTout = -dQdT;
|
||||||
|
elseif isa(kernel.kernelCell{2},'prtKernelRbf')
|
||||||
|
for k = 1:P
|
||||||
|
Xk = X(:,k);
|
||||||
|
dXk = repmat(sum((Xk.^2), 2), [1 N]) + repmat(sum((Xk.^2),2), [1 N]).' - 2*Xk*(Xk.');
|
||||||
|
if isa(kernel.kernelCell{2},'prtKernelRbfNdimensionScale')
|
||||||
|
dXk = dXk./(P*kernel.kernelCell{2}.sigma.^2);
|
||||||
|
else
|
||||||
|
dXk = dXk./kernel.kernelCell{2}.sigma.^2;
|
||||||
|
end
|
||||||
|
if isa(kernel.kernelCell{1},'prtKernelDc')
|
||||||
|
dXk = [zeros(N,1),dXk];
|
||||||
|
end
|
||||||
|
dHdT = -H.*dXk(:,relevantKernels);
|
||||||
|
dQdT(k) = -2*delta(k)*theta(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
|
||||||
|
dQdTout(k) = -dQdT(k);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
37
README.txt
Normal file
37
README.txt
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
Cost-Constrained Feature Optimization (CCFO) for MATLAB
|
||||||
|
|
||||||
|
This MATLAB code may be used to replicate the results published in the following manuscript:
|
||||||
|
C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
|
||||||
|
Optimization in Kernel Machine Classifiers," IEEE Signal Processing
|
||||||
|
Letters, 2015.
|
||||||
|
|
||||||
|
The code is organized into three directories as follows:
|
||||||
|
Urban Land Cover/ - Code for replicating the urban land cover experiment from the paper.
|
||||||
|
experiment_urbanLandCover.m - Script for running the experiment
|
||||||
|
featureTimes_urbanLandCover.m - Function for estimating feature computation times
|
||||||
|
MNIST/ - Code for replicating the MNIST experiment from the paper
|
||||||
|
experiment_mnist.m - Script for running the experiment
|
||||||
|
extractFeaturesFromMNIST.m - Function for extracting features from the handwritten digits
|
||||||
|
PRT Plugins/ - The actual machine learning code, written as a plugin to the Pattern Recognition Toolbox (PRT)
|
||||||
|
prtClassJCFO - Joint Classifier and Feature Optimization
|
||||||
|
prtClassCCFO - Cost Constrained Feature Optimization
|
||||||
|
|
||||||
|
To run either experiment, or to use our code in your own research, you must download and install the
|
||||||
|
Pattern Recognition Toolbox (PRT) for MATLAB at http://covartech.github.io/
|
||||||
|
|
||||||
|
To run the urban land cover experiment, you must download the data from the UCI machine
|
||||||
|
learning repository: https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
|
||||||
|
|
||||||
|
The code benchmarks each of the feature computation times on a test image of a black and white circle.
|
||||||
|
The function "MidpointCircle.m" required to generate the test image for doing this is available via Matlab Central:
|
||||||
|
http://www.mathworks.com/matlabcentral/fileexchange/14331-draw-a-circle-in-a-matrix-image/content/MidpointCircle.m
|
||||||
|
|
||||||
|
*******************************************************************************************************************
|
||||||
|
This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||||
|
All Rights Reserved
|
||||||
|
|
||||||
|
This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
|
||||||
|
a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
|
||||||
|
review, modification, and/or use of the software, in source and binary forms are ONLY permitted
|
||||||
|
provided you agree to and comply with the terms and conditions set forth in the license.
|
||||||
|
*******************************************************************************************************************
|
||||||
307
Urban Land Cover/experiment_urbanLandCover.m
Normal file
307
Urban Land Cover/experiment_urbanLandCover.m
Normal file
@@ -0,0 +1,307 @@
|
|||||||
|
function experiment_urbanLandCover(varargin)
|
||||||
|
|
||||||
|
% experiment_urbanLandCover.m
|
||||||
|
%
|
||||||
|
% This MATLAB script runs the feature selection experiment for the Urban
|
||||||
|
% Land Cover data set that was published in the following manuscript:
|
||||||
|
% C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
|
||||||
|
% Optimization in Kernel Machine Classifiers," IEEE Signal Processing
|
||||||
|
% Letters, 2015.
|
||||||
|
%
|
||||||
|
% The script requires the Urban Land Cover data set, which is available
|
||||||
|
% from the UCI Machine Learning Repository:
|
||||||
|
% https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
|
||||||
|
%
|
||||||
|
% The script also requires installation of the Pattern Recognition Toolbox
|
||||||
|
% (PRT) for MATLAB:
|
||||||
|
% http://covartech.github.io/
|
||||||
|
%
|
||||||
|
% INPUTS (optional):
|
||||||
|
% dataDir: full path to the directory in which the XLS data is saved
|
||||||
|
% (default is the current working directory)
|
||||||
|
%
|
||||||
|
% Author: Christopher R. Ratto, JHU/APL
|
||||||
|
% Date: 5 October 2015
|
||||||
|
%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||||
|
% All Rights Reserved
|
||||||
|
%
|
||||||
|
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
|
||||||
|
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
|
||||||
|
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
|
||||||
|
% provided you agree to and comply with the terms and conditions set forth in the license.
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Load in the training and test data
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
if nargin == 0
|
||||||
|
dataDir = pwd;
|
||||||
|
else
|
||||||
|
dataDir = varargin{1};
|
||||||
|
end
|
||||||
|
[~,~,raw] = xlsread(fullfile(dataDir,'training.csv')); % Read raw data from XLS-converted CSV
|
||||||
|
X = cell2mat(raw(2:end,2:end)); % Features from spreadsheet
|
||||||
|
labels = strrep(raw(2:end,1),' ',''); % Class labels from spreadsheet
|
||||||
|
uLabels = unique(labels); % Unique class labels
|
||||||
|
Y = nan(size(labels)); % Numeric class labels
|
||||||
|
for i = 1:length(labels)
|
||||||
|
Y(i,:) = find(strcmp(labels{i},uLabels)); % Assign a numeric label to each class name
|
||||||
|
end
|
||||||
|
featNames = raw(1,2:end); % Feature names
|
||||||
|
dsTrain = prtDataSetClass(X,Y); % Training PRT data set
|
||||||
|
dsTrain.classNames = uLabels; % Training class names
|
||||||
|
dsTrain.featureNames = featNames; % Training feature names
|
||||||
|
clear raw X labels uLabels Y featNames i
|
||||||
|
|
||||||
|
[~,~,raw] = xlsread(fullfile(dataDir,'testing.csv')); % Read raw data from XLS-converted CSV
|
||||||
|
X = cell2mat(raw(2:end,2:end)); % Features from spreadsheet
|
||||||
|
labels = strrep(raw(2:end,1),' ',''); % Class labels from spreadsheet
|
||||||
|
uLabels = unique(labels); % Unique class labels
|
||||||
|
Y = nan(size(labels)); % Numeric class labels
|
||||||
|
for i = 1:length(labels)
|
||||||
|
Y(i,:) = find(strcmp(labels{i},uLabels)); % Assign a numeric label to each class name
|
||||||
|
end
|
||||||
|
featNames = raw(1,2:end); % Feature names
|
||||||
|
dsTest = prtDataSetClass(X,Y); % Training PRT data set
|
||||||
|
dsTest.classNames = uLabels; % Training class names
|
||||||
|
dsTest.featureNames = featNames; % Training feature names
|
||||||
|
clear raw X labels uLabels Y featNames i
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Load in the feature extraction times
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
[featCategories,~,categoryTimes] = featureTimes_urbanLandCover; % Benchmarked feature computation times (see 'estimateFeatureTimes.m')
|
||||||
|
nFeatCategories = length(featCategories); % Number of feature categories
|
||||||
|
[categoryTimes,sortInds] = sort(cat(1,[],categoryTimes),'ascend'); % Sort the computation time of each feature category
|
||||||
|
featCategories = featCategories(sortInds); % Rearrange the order of feature category names
|
||||||
|
sortVec = []; % Vector of feature indices corresponding to the sorted categories
|
||||||
|
for iCategory = 1:nFeatCategories
|
||||||
|
sortVec = [sortVec,find(~cellfun(@isempty,strfind(dsTest.featureNames,featCategories{iCategory})))]; %#ok<AGROW>
|
||||||
|
end
|
||||||
|
dsTrain.X = dsTrain.X(:,sortVec); % Rearrange the order of features in the training set
|
||||||
|
dsTrain.featureNames = dsTrain.featureNames(:,sortVec); % Rearrange the order of feature names in the training set
|
||||||
|
dsTest.X = dsTest.X(:,sortVec); % Rearrange the order of features in the testing set
|
||||||
|
dsTest.featureNames = dsTest.featureNames(:,sortVec); % Rearrange the order of feature names in the testing set
|
||||||
|
categoryInds = cell(1,nFeatCategories); % Feature indices corresponding to each category
|
||||||
|
T = nan(1,dsTrain.nFeatures); % Computation time (seconds) of each feature
|
||||||
|
TperCategory = nan(1,nFeatCategories); % Total computation time (seconds) of each category
|
||||||
|
categoryIndBegin = nan(1,nFeatCategories); % The index of the first feature from each category
|
||||||
|
for iCategory = 1:nFeatCategories
|
||||||
|
categoryInds{iCategory} = find(~cellfun(@isempty,strfind(dsTest.featureNames,featCategories{iCategory}))); % Find the features that belong to this category
|
||||||
|
categoryIndBegin(iCategory) = min(categoryInds{iCategory});
|
||||||
|
T(categoryInds{iCategory}) = categoryTimes(iCategory); % Computation time for each feature (seconds)
|
||||||
|
TperCategory(iCategory) = sum(T(categoryInds{iCategory})); % Computation time for each feature category (seconds)
|
||||||
|
end
|
||||||
|
tNorm = T./sum(T); % Normalized computation time of each feature
|
||||||
|
tNormCategory = TperCategory./sum(TperCategory); % Normalized computation time of each feature category
|
||||||
|
featCategories = strrep(featCategories,'_',''); % Remove underscores from feature category names since it screws up the plots
|
||||||
|
clear categoryTimes iCategory i sortVec sortInds
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Set CCFO hyperparameters
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
a0 = linspace(0,2,500);
|
||||||
|
b0 = linspace(0,300,500);
|
||||||
|
tau = nan(500,500);
|
||||||
|
F = nan(500,500);
|
||||||
|
for i = 1:500
|
||||||
|
for j = 1:500
|
||||||
|
a = a0(i)*ones(size(tNorm));
|
||||||
|
b = a0(i) + b0(j)*tNorm;
|
||||||
|
tau(i,j) = sum(tNorm .* a./(a+b));
|
||||||
|
F(i,j) = (1/dsTrain.nFeatures)*sum((a+1)./(a+b+1));
|
||||||
|
end
|
||||||
|
end
|
||||||
|
desiredT = 0.25; % Expected runtime
|
||||||
|
desiredF = 0.50; % Maximum posterior probability of a feature being selected
|
||||||
|
dist = (tau-desiredT).^2 + (F-desiredF).^2;
|
||||||
|
[iMin,jMin] = find(dist == min(dist(:)));
|
||||||
|
a0 = a0(iMin);
|
||||||
|
b0 = b0(jMin);
|
||||||
|
a = a0*ones(size(tNorm));
|
||||||
|
b = a0 + b0*tNorm;
|
||||||
|
clear featInd legendNames colors tau F iA iB iFeat
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Initialize the classifiers
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% All classifiers will use the same kernel
|
||||||
|
kernel = prtKernelRbfNdimensionScale; % RBF kernel, scale the sigma parameter to dimensionality of the features
|
||||||
|
kernelSet = prtKernelDc & kernel; % Add a bias dimension to the kernel (dc kernel)
|
||||||
|
|
||||||
|
% CCFO - Cost Constrained Feature Optimization
|
||||||
|
CCFO = prtClassCCFO('kernels',kernelSet,'pruneFeatures',false,'pruneObservations',false,'verbosePlot',false,'verboseText',true,'a',a','b',b','gamma',1,'ridge',10);
|
||||||
|
algoCCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',CCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
|
||||||
|
|
||||||
|
% RVM - Relevance Vector Machine
|
||||||
|
RVM = prtClassRvm('kernels',kernelSet);
|
||||||
|
algoRVM = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',RVM); % Normalize features, One-vs-All classification since this is a multiclass problem
|
||||||
|
|
||||||
|
% JCFO - Joint Classifier and Feature Optimization
|
||||||
|
JCFO = prtClassJCFO('kernels',kernelSet,'ridge',10,'pruneFeatures',false,'pruneObservations',false,'verboseText',1,'verbosePlot',0,'gamma1',1,'gamma2',1);
|
||||||
|
algoJCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseclassifier',JCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Train and test CCFO
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
trainedCCFO = algoCCFO.train(dsTrain); % Train CCFO on the training set
|
||||||
|
dsOutCCFO = trainedCCFO.run(dsTest); % Run CCFO on the test set
|
||||||
|
[~,dsOutCCFO.X] = max(dsOutCCFO.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Train and test JCFO
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
trainedJCFO = algoJCFO.train(dsTrain); % Train the one-vs-all JCFO
|
||||||
|
dsOutJCFO = trainedJCFO.run(dsTest); % Run the one-vs-all JCFO
|
||||||
|
[~,dsOutJCFO.X] = max(dsOutJCFO.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Train and test RVM (individual feature categories)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
pcCategory = nan(1,nFeatCategories); % Percent correct using each feature category
|
||||||
|
for iCategory = 1:nFeatCategories % Loop over all feature categories
|
||||||
|
dsCategoryTrain = dsTrain.retainFeatures(categoryInds{iCategory}); % Retain only features from this category for the training set
|
||||||
|
dsCategoryTest = dsTest.retainFeatures(categoryInds{iCategory}); % Retain only features from this category for the testing set
|
||||||
|
trainedRVM = algoRVM.train(dsCategoryTrain); % Train one-vs-all RVM
|
||||||
|
dsOutCategory = trainedRVM.run(dsCategoryTest); % Run one-vs-all RVM
|
||||||
|
[~,dsOutCategory.X] = max(dsOutCategory.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
|
||||||
|
pcCategory(iCategory) = prtScorePercentCorrect(dsOutCategory); % Calculate percent correct (accuracy overall)
|
||||||
|
end
|
||||||
|
clear iCategory dsCategoryTrain dsCategoryTest dsOutCategory
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Train and test RVM (all features together)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
trainedRVM = algoRVM.train(dsTrain); % Train the one-vs-all RVM
|
||||||
|
dsOutRVM = trainedRVM.run(dsTest); % Run the one-vs-all RVM
|
||||||
|
[~,dsOutRVM.X] = max(dsOutRVM.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Plot results
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Plot the prior on selecting features from each of the feature categories
|
||||||
|
% This will be a beta distribution over [0,1]. Features that take longer to
|
||||||
|
% compute should have higher probability of not being selected.
|
||||||
|
figure(1),set(gcf,'position',[610,512,700,441])
|
||||||
|
colors = prtPlotUtilClassColors(length(featCategories));
|
||||||
|
colors(7,:) = [0,1,1];
|
||||||
|
subplot(2,1,1),hold on
|
||||||
|
for iFeat = 1:length(featCategories(1:8))
|
||||||
|
featInd = categoryInds{iFeat}(1);
|
||||||
|
plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
|
||||||
|
xlabel('\rho'),ylabel('p(\rho|a,b)')
|
||||||
|
end
|
||||||
|
axis tight
|
||||||
|
title('Cheap Features (Urban Land Cover)')
|
||||||
|
legend(featCategories(1:8),'location','southeastoutside')
|
||||||
|
subplot(2,1,2),hold on
|
||||||
|
for iFeat = 1:length(featCategories(9:15))
|
||||||
|
featInd = categoryInds{8+iFeat}(1);
|
||||||
|
plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
|
||||||
|
xlabel('\rho'),ylabel('p(\rho|a,b)')
|
||||||
|
end
|
||||||
|
axis tight
|
||||||
|
title('Expensive Features (Urban Land Cover)')
|
||||||
|
legend(featCategories(9:15),'location','southeastoutside')
|
||||||
|
clear iFeat featInd colors
|
||||||
|
|
||||||
|
% Baseline - for each category, show feature computation and RVM performance
|
||||||
|
% Take-home point: most expensive features not always the best for
|
||||||
|
% classification performance
|
||||||
|
figure(2)
|
||||||
|
[ha,h1,h2] = plotyy(1:nFeatCategories,100*pcCategory,1:nFeatCategories,tNormCategory);
|
||||||
|
h1.LineStyle = '-';h1.LineWidth = 2;h1.Marker = 'o';h1.MarkerSize = 8;
|
||||||
|
h2.LineStyle = '--';h1.LineWidth = 2;h1.Marker = '^';h1.MarkerSize = 8;
|
||||||
|
ha(1).XTick = 1:length(pcCategory); ha(1).XTickLabel = featCategories; ha(1).YLim = [0,100]; ha(1).YTick = 0:10:100; ha(1).XLim = [1,nFeatCategories]; ha(1).XTickLabelRotation = 30;
|
||||||
|
ha(2).XTick = 1:length(pcCategory); ha(2).XTickLabel = []; ha(2).YLim = [0,1]; ha(2).YTick = 0:0.1:1; ha(2).XLim = [1,nFeatCategories];
|
||||||
|
ylabel(ha(1),'Accuracy (% Correct)')
|
||||||
|
ylabel(ha(2),'Total Normalized Cost')
|
||||||
|
title('RVM Accuracy and Total Cost of Each Feature Category','FontSize',12)
|
||||||
|
clear ha h1 h2
|
||||||
|
|
||||||
|
% Plot the confusion matrices using all the features
|
||||||
|
% CCFO
|
||||||
|
figure(3),set(gcf,'outerposition',[65,301,1780,579])
|
||||||
|
h = subplot(1,3,1);
|
||||||
|
prtScoreConfusionMatrix(dsOutCCFO)
|
||||||
|
pcCCFO = prtScorePercentCorrect(dsOutCCFO);
|
||||||
|
h.XTickLabelRotation = 20;
|
||||||
|
title(['CCFO - ',num2str(100*pcCCFO,'%0.2f'),'% Correct'],'Fontsize',12)
|
||||||
|
axis square
|
||||||
|
% RVM
|
||||||
|
h = subplot(1,3,2);
|
||||||
|
prtScoreConfusionMatrix(dsOutRVM)
|
||||||
|
pcRVM = prtScorePercentCorrect(dsOutRVM);
|
||||||
|
title(['RVM - ',num2str(100*pcRVM,'%0.2f'),'% Correct'],'fontsize',12)
|
||||||
|
h.XTickLabelRotation = 20;
|
||||||
|
axis square
|
||||||
|
% JCFO
|
||||||
|
h = subplot(1,3,3);
|
||||||
|
prtScoreConfusionMatrix(dsOutJCFO)
|
||||||
|
pcJCFO = prtScorePercentCorrect(dsOutJCFO);
|
||||||
|
title(['JCFO - ',num2str(100*pcJCFO,'%0.2f'),'% Correct'],'fontsize',12)
|
||||||
|
h.XTickLabelRotation = 20;
|
||||||
|
axis square
|
||||||
|
clear h
|
||||||
|
|
||||||
|
% Compare feature selection performance
|
||||||
|
thetaCCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
|
||||||
|
thetaJCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
|
||||||
|
for iClass = 1:dsTrain.nClasses % Loop over all one-vs-all classifiers
|
||||||
|
thetaCCFO(iClass,:) = trainedCCFO.actionCell{2}.baseClassifier(iClass).theta'; % CCFO feature selector parameters for this one-vs-all classifier
|
||||||
|
thetaJCFO(iClass,:) = trainedJCFO.actionCell{2}.baseClassifier(iClass).theta'; % JCFO feature selector parameters for this one-vs-all classifier
|
||||||
|
end
|
||||||
|
costReductionCCFO = nan(dsTrain.nClasses,1);
|
||||||
|
costReductionJCFO = nan(dsTrain.nClasses,1);
|
||||||
|
for iClass = 1:dsTrain.nClasses
|
||||||
|
costReductionCCFO(iClass,:) = sum(tNorm(thetaCCFO(iClass,:)>=0.5));
|
||||||
|
costReductionJCFO(iClass,:) = sum(tNorm(thetaJCFO(iClass,:)>=2*median(thetaJCFO(:))));
|
||||||
|
end
|
||||||
|
costReductionCCFO = mean(costReductionCCFO);
|
||||||
|
costReductionJCFO = mean(costReductionJCFO);
|
||||||
|
|
||||||
|
figure(4),set(gcf,'outerposition',[255,317,568,596])
|
||||||
|
h = subplot(2,1,1);
|
||||||
|
imagesc(thetaCCFO),colormap bone
|
||||||
|
h.YTick = 1:9; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; h.XTickLabelRotation = 70;
|
||||||
|
caxis([0,1]); h = colorbar; ylabel(h,'Feature Scaling Parameter (\theta)')
|
||||||
|
title({'CCFO: \theta Learned for Each One-vs-All Classifier',sprintf('Average Cost of Feature Extraction: %0.2f',costReductionCCFO)})
|
||||||
|
h = subplot(2,1,2);
|
||||||
|
imagesc(thetaJCFO),colormap bone
|
||||||
|
h.YTick = 1:9; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; h.XTickLabelRotation = 70;
|
||||||
|
caxis([0,2*median(thetaJCFO(:))]); h=colorbar; ylabel(h,'Feature Scaling Parameter (\theta)')
|
||||||
|
title({'JCFO: \theta Learned for Each One-vs-All Classifier',sprintf('Average Cost of Feature Extraction: %0.2f',costReductionJCFO)})
|
||||||
|
clear iClass h m
|
||||||
|
|
||||||
|
% Average number of features selected
|
||||||
|
avgNumFeatsSelectedRVM = dsTest.nFeatures;
|
||||||
|
avgNumFeatsSelectedJCFO = mean(sum(thetaJCFO > 2*median(thetaJCFO(:)),2));
|
||||||
|
avgNumFeatsSelectedCCFO = mean(sum(thetaCCFO > 0.5,2));
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Print out summary of results
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
fprintf('*************************************\n')
|
||||||
|
fprintf('Urban Land Cover Feature Set Summary\n')
|
||||||
|
fprintf('*************************************\n')
|
||||||
|
for iCategory = 1:nFeatCategories
|
||||||
|
fprintf('%s \t %d \t %0.4f \t %0.2f\n',featCategories{iCategory},length(categoryInds{iCategory}),tNormCategory(iCategory),100*pcCategory(iCategory));
|
||||||
|
end
|
||||||
|
fprintf('*************************************\n')
|
||||||
|
fprintf('Urban Land Cover Performance Comparison\n')
|
||||||
|
fprintf('*************************************\n')
|
||||||
|
fprintf('Accuracy (RVM): %0.2f\n',100*pcRVM)
|
||||||
|
fprintf('Accuracy (JCFO): %0.2f\n',100*pcJCFO)
|
||||||
|
fprintf('Accuracy (CCFO): %0.2f\n',100*pcCCFO)
|
||||||
|
fprintf('Avg. # Features Selected (RVM): %0.2f\n',avgNumFeatsSelectedRVM)
|
||||||
|
fprintf('Avg. # Features Selected (JCFO): %0.2f\n',avgNumFeatsSelectedJCFO)
|
||||||
|
fprintf('Avg. # Features Selected (CCFO): %0.2f\n',avgNumFeatsSelectedCCFO)
|
||||||
|
fprintf('Avg. Relative Extraction Cost (RVM): 100\n')
|
||||||
|
fprintf('Avg. Relative Extraction Cost (JCFO) %0.2f\n',100*costReductionJCFO)
|
||||||
|
fprintf('Avg. Relative Extraction Cost (CCFO): %0.2f\n',100*costReductionCCFO)
|
||||||
|
keyboard
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
280
Urban Land Cover/featureTimes_urbanLandCover.m
Normal file
280
Urban Land Cover/featureTimes_urbanLandCover.m
Normal file
@@ -0,0 +1,280 @@
|
|||||||
|
function [featureCategories,timeAbs,timeRel] = featureTimes_urbanLandCover()
|
||||||
|
|
||||||
|
% [timeAbs,timeRel] = featureTimes_urbanLandCover
|
||||||
|
%
|
||||||
|
% Function to estimate the extraction times for each of the features in the
|
||||||
|
% urban land cover data set which is available from the UCI Machine Learning Repository:
|
||||||
|
% https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
|
||||||
|
%
|
||||||
|
% The code benchmarks each of the feature computation times on a test image
|
||||||
|
% of a black and white circle. The function "MidpointCircle.m" required to
|
||||||
|
% generate the test image is available via Matlab Central:
|
||||||
|
% http://www.mathworks.com/matlabcentral/fileexchange/14331-draw-a-circle-in-a-matrix-image/content/MidpointCircle.m
|
||||||
|
%
|
||||||
|
% The computations for each feature were derived from the following reference
|
||||||
|
% (page references provided in the source code):
|
||||||
|
% Definiens AG, "Definiens 5 Reference Book," Munich, Germany 2006.
|
||||||
|
%
|
||||||
|
% INPUTS: none
|
||||||
|
%
|
||||||
|
% OUTPUTS:
|
||||||
|
% timeAbs: the absolute timeAbs to compute each feature (in sec) via tic/toc
|
||||||
|
% timeRel: the relative timeAbs (%) to compute each feature in the set
|
||||||
|
%
|
||||||
|
% Author: Carlos A. Caceres, JHU/APL
|
||||||
|
% Date: 5 October 2015
|
||||||
|
%
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
|
||||||
|
% All Rights Reserved
|
||||||
|
%
|
||||||
|
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
|
||||||
|
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
|
||||||
|
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
|
||||||
|
% provided you agree to and comply with the terms and conditions set forth in the license.
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Initialize the test image
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
featureCategories = {'Area','Assym','BordLngth','BrdIndx','Bright','Compact','Dens','GLCM','LW','Mean','NDVI','Rect','Round','SD','ShpIndx'};
|
||||||
|
imgSize = 1024; % Size of test image
|
||||||
|
img = zeros(imgSize); % Initialize the test image
|
||||||
|
img = MidpointCircle(img, 250, imgSize/2, imgSize/2, 1); % Fill in with a circle
|
||||||
|
img2 = img + randn(size(img)); % Add white Gaussian noise
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Area (ref. page 58)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
area = sum(img(:)==1); %assume each pixel has area = 1;
|
||||||
|
timeAbs(1) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Assymmetry (ref. page 60)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
[idListX,idListY] = find(img==1);
|
||||||
|
varX = var(idListX);
|
||||||
|
varY = var(idListY);
|
||||||
|
varXY = var(idListX.*idListY);
|
||||||
|
assym = 2*sqrt(.25*(varX+varY)^2 + (varXY)^2 - varX*varY)/(varX+varY);
|
||||||
|
timeAbs(2) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Border length (ref. page 36)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
temp = double(bwperim(img)==1);
|
||||||
|
bordlength = sum(temp(:));
|
||||||
|
timeAbs(3) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Border index (ref. page 63)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
temp = double(bwperim(img)==1);
|
||||||
|
[idListX,idListY] = find(img==1);
|
||||||
|
P(1,:) = idListX;P(2,:) = idListY;
|
||||||
|
xbar = mean(idListX);
|
||||||
|
ybar = mean(idListY);
|
||||||
|
x = idListX - xbar;
|
||||||
|
y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
|
||||||
|
N = length(x);
|
||||||
|
|
||||||
|
% Calculate normalized second central moments for the region. 1/12 is
|
||||||
|
% the normalized second central moment of a pixel with unit length.
|
||||||
|
uxx = sum(x.^2)/N + 1/12;
|
||||||
|
uyy = sum(y.^2)/N + 1/12;
|
||||||
|
uxy = sum(x.*y)/N;
|
||||||
|
|
||||||
|
% Calculate major axis length, minor axis length, and eccentricity.
|
||||||
|
common = sqrt((uxx - uyy)^2 + 4*uxy^2);
|
||||||
|
l = 2*sqrt(2)*sqrt(uxx + uyy + common);
|
||||||
|
width = 2*sqrt(2)*sqrt(uxx + uyy - common);
|
||||||
|
|
||||||
|
borderIndex = sum(temp(:))/(2*(l+width));
|
||||||
|
timeAbs(4) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Brightness (ref. page 42)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
img3 = repmat(img,1,1,3);
|
||||||
|
tic;
|
||||||
|
% for an RGB image
|
||||||
|
wkb = [1,2,3];
|
||||||
|
brightness = (1/mean(wkb))*sum((wkb.*reshape(mean(mean(img3,1),2),1,3)));
|
||||||
|
% % for a binary image
|
||||||
|
% mean(img(:));
|
||||||
|
timeAbs(5) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Compactness (ref. page 63)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
[idListX,idListY] = find(img==1);
|
||||||
|
P(1,:) = idListX;P(2,:) = idListY;
|
||||||
|
xbar = mean(idListX);
|
||||||
|
ybar = mean(idListY);
|
||||||
|
x = idListX - xbar;
|
||||||
|
y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
|
||||||
|
N = length(x);
|
||||||
|
|
||||||
|
% Calculate normalized second central moments for the region. 1/12 is
|
||||||
|
% the normalized second central moment of a pixel with unit length.
|
||||||
|
uxx = sum(x.^2)/N + 1/12;
|
||||||
|
uyy = sum(y.^2)/N + 1/12;
|
||||||
|
uxy = sum(x.*y)/N;
|
||||||
|
|
||||||
|
% Calculate major axis length, minor axis length, and eccentricity.
|
||||||
|
common = sqrt((uxx - uyy)^2 + 4*uxy^2);
|
||||||
|
l = 2*sqrt(2)*sqrt(uxx + uyy + common);
|
||||||
|
width = 2*sqrt(2)*sqrt(uxx + uyy - common);
|
||||||
|
|
||||||
|
compactness = l*width/sum(img(:)==1);
|
||||||
|
timeAbs(6) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Density (ref. page 62)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
[idListX,idListY] = find(img==1);
|
||||||
|
varX = var(idListX);
|
||||||
|
varY = var(idListY);
|
||||||
|
dens = sqrt(sum(img(:)==1))/(1+sqrt(varX+varY));
|
||||||
|
timeAbs(7) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Gray-level co-ocurrence matrix
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
graycomatrix(img2);
|
||||||
|
timeAbs(8) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Length/width (ref. page 59)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
[idListX,idListY] = find(img==1);
|
||||||
|
P(1,:) = idListX; P(2,:) = idListY;
|
||||||
|
xbar = mean(idListX);
|
||||||
|
ybar = mean(idListY);
|
||||||
|
x = idListX - xbar;
|
||||||
|
y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
|
||||||
|
N = length(x);
|
||||||
|
|
||||||
|
% Calculate normalized second central moments for the region. 1/12 is
|
||||||
|
% the normalized second central moment of a pixel with unit length.
|
||||||
|
uxx = sum(x.^2)/N + 1/12;
|
||||||
|
uyy = sum(y.^2)/N + 1/12;
|
||||||
|
uxy = sum(x.*y)/N;
|
||||||
|
|
||||||
|
% Calculate major axis length, minor axis length, and eccentricity.
|
||||||
|
common = sqrt((uxx - uyy)^2 + 4*uxy^2);
|
||||||
|
l = 2*sqrt(2)*sqrt(uxx + uyy + common);
|
||||||
|
width = 2*sqrt(2)*sqrt(uxx + uyy - common);
|
||||||
|
|
||||||
|
lw = l/width;
|
||||||
|
timeAbs(9) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Mean (ref. page 40)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
datMean = mean(img2(:));
|
||||||
|
timeAbs(10) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% NDVI
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
ndvi = sum((img + img2)./(img-img2));
|
||||||
|
timeAbs(11) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Rectangular fit (ref. page 65)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
%get outlines of each object
|
||||||
|
tic;
|
||||||
|
[idListX,idListY] = find(img==1);
|
||||||
|
xbar = mean(idListX);
|
||||||
|
ybar = mean(idListY);
|
||||||
|
x = idListX - xbar;
|
||||||
|
y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
|
||||||
|
N = length(x);
|
||||||
|
|
||||||
|
% Calculate normalized second central moments for the region. 1/12 is
|
||||||
|
% the normalized second central moment of a pixel with unit length.
|
||||||
|
uxx = sum(x.^2)/N + 1/12;
|
||||||
|
uyy = sum(y.^2)/N + 1/12;
|
||||||
|
uxy = sum(x.*y)/N;
|
||||||
|
|
||||||
|
% Calculate major axis length, minor axis length, and eccentricity.
|
||||||
|
common = sqrt((uxx - uyy)^2 + 4*uxy^2);
|
||||||
|
height = 2*sqrt(2)*sqrt(uxx + uyy + common);
|
||||||
|
width = 2*sqrt(2)*sqrt(uxx + uyy - common);
|
||||||
|
area = sum(img(:)==1);
|
||||||
|
SquareMetric = width/height;
|
||||||
|
if SquareMetric > 1,
|
||||||
|
SquareMetric = height/width; %make aspect ratio less than unity
|
||||||
|
end
|
||||||
|
SquareMetric = SquareMetric/area;
|
||||||
|
timeAbs(12) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Roundness (ref. page 64)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
area = sum(img(:)==1);
|
||||||
|
temp = double(bwperim(img)==1);
|
||||||
|
perimeter = sum(temp(:));
|
||||||
|
roundness = 4*pi*area/perimeter^2;
|
||||||
|
timeAbs(13) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Standard deviation
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
stdDeviation = std(img(:));
|
||||||
|
timeAbs(14) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Shape index (ref. page 62)
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
tic;
|
||||||
|
temp = double(bwperim(img)==1);
|
||||||
|
shpindx = sum(temp(:))/(4*sqrt(sum(img(:)==1)));
|
||||||
|
timeAbs(15) = toc;
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Save results
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
timeRel = 100*timeAbs./repmat(sum(timeAbs),1,length(timeAbs));
|
||||||
|
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Legend of feature names
|
||||||
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
% Class: Land cover class (nominal)
|
||||||
|
% BrdIndx: Border Index (shape variable)
|
||||||
|
% Area: Area in m2 (size variable)
|
||||||
|
% Round: Roundness (shape variable)
|
||||||
|
% Bright: Brightness (spectral variable)
|
||||||
|
% Compact: Compactness (shape variable)
|
||||||
|
% ShpIndx: Shape Index (shape variable)
|
||||||
|
% Mean_G: Green (spectral variable)
|
||||||
|
% Mean_R: Red (spectral variable)
|
||||||
|
% Mean_NIR: Near Infrared (spectral variable)
|
||||||
|
% SD_G: Standard deviation of Green (texture variable)
|
||||||
|
% SD_R: Standard deviation of Red (texture variable)
|
||||||
|
% SD_NIR: Standard deviation of Near Infrared (texture variable)
|
||||||
|
% LW: Length/Width (shape variable)
|
||||||
|
% GLCM1: Gray-Level Co-occurrence Matrix [i forget which type of GLCM metric this one is] (texture variable)
|
||||||
|
% Rect: Rectangularity (shape variable)
|
||||||
|
% GLCM2: Another Gray-Level Co-occurrence Matrix attribute (texture variable)
|
||||||
|
% Dens: Density (shape variable)
|
||||||
|
% Assym: Assymetry (shape variable)
|
||||||
|
% NDVI: Normalized Difference Vegetation Index (spectral variable)
|
||||||
|
% BordLngth: Border Length (shape variable)
|
||||||
|
% GLCM3: Another Gray-Level Co-occurrence Matrix attribute (texture variable) d
|
||||||
|
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user