Initial commit pre-publication

This commit is contained in:
Christopher Ratto
2015-10-20 08:59:00 -04:00
parent 9abc1da216
commit cb34cdc1a1
7 changed files with 1706 additions and 0 deletions

243
MNIST/experiment_mnist.m Normal file
View File

@@ -0,0 +1,243 @@
function experiment_mnist(varargin)
% experiment_mnist.m
%
% This MATLAB script runs the feature selection experiment for the MNIST
% data set that was published in the following manuscript:
% C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
% Optimization in Kernel Machine Classifiers," IEEE Signal Processing
% Letters, 2015.
%
% The script requires installation of the Pattern Recognition Toolbox
% (PRT) for MATLAB:
% http://covartech.github.io/
%
% Author: Christopher R. Ratto, JHU/APL
% Date: 5 October 2015
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
% All Rights Reserved
%
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
% provided you agree to and comply with the terms and conditions set forth in the license.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Load in the data set and feature extraction times
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
dsRaw = prtDataGenMnist; % The MNIST data set comes with the PRT for demonstration purposes
[dsFeat,featTime] = extractFeaturesFromMNIST(dsRaw); % Extract various types of features from the MNIST data
tNorm = mean(featTime); % Average relative feature extraction time
tNorm = tNorm./sum(tNorm); % Normalize the average relative times
featCategories = {'Stats','PCA','GLCM','Sobel'}; % Get Names of Feature Categories
nFeatCategories = length(featCategories); % Number of feature categories
tNormCategory = [sum(tNorm(1:4)),sum(tNorm(5:14)),sum(tNorm(15:26)),sum(tNorm(27:end))]; % Relative time per category
categoryInds = {1:4,5:14,15:26,27:42}; % Indices of features in each category
categoryIndBegin = [1,5,15,27]; % Indices where each feature category begins
clear categoryTimes iCategory i sortVec sortInds
% Setup training and testing sets
dsTrain = dsFeat.retainObservations(1:1000); % Train on 10% of the data
dsTest = dsFeat.retainObservations(1001:dsRaw.nObservations); % Test on 90% of the data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Estimate CCFO hyperparameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
a0 = linspace(0,2,500);
b0 = linspace(0,300,500);
tau = nan(500);
F = nan(500);
for i = 1:500
for j = 1:500
a = a0(i)*ones(size(tNorm));
b = a0(i) + b0(j)*tNorm;
tau(i,j) = sum(tNorm .* a./(a+b));
F(i,j) = (1/dsFeat.nFeatures)*sum((a+1)./(a+b+1));
end
end
desiredT = 0.1; % Expected runtime
desiredF = 0.50; % Maximum posterior probability of a feature being selected
dist = (tau-desiredT).^2 + (F-desiredF).^2;
[iMin,jMin] = find(dist == min(dist(:)));
a0 = a0(iMin);
b0 = b0(jMin);
a = a0*ones(size(tNorm));
b = a0 + b0*tNorm;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initialize the classifiers
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% All classifiers will use the same kernel
kernel = prtKernelRbfNdimensionScale; % RBF kernel, scale the sigma parameter to dimensionality of the features
kernelSet = prtKernelDc & kernel; % Add a bias dimension to the kernel (dc kernel)
% CCFO - Cost Constrained Feature Optimization
CCFO = prtClassCCFO('kernels',kernelSet,'pruneFeatures',false,'pruneObservations',false,'verbosePlot',false,'verboseText',true,'a',a','b',b','gamma',1,'ridge',10);
algoCCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',CCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
% RVM - Relevance Vector Machine
RVM = prtClassRvm('kernels',kernelSet);
algoRVM = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',RVM); % Normalize features, One-vs-All classification since this is a multiclass problem
% JCFO - Joint Classifier and Feature Optimization
JCFO = prtClassJCFO('kernels',kernelSet,'ridge',10,'pruneFeatures',false,'pruneObservations',false,'verboseText',1,'verbosePlot',0,'gamma1',1,'gamma2',1);
algoJCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseclassifier',JCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Train and test CCFO
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
trainedCCFO = algoCCFO.train(dsTrain); % Train CCFO on the training set
dsOutCCFO = trainedCCFO.run(dsTest); % Run CCFO on the test set
[~,dsOutCCFO.X] = max(dsOutCCFO.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
dsOutCCFO.X = dsOutCCFO.X-1;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Train and test JCFO
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
trainedJCFO = algoJCFO.train(dsTrain); % Train the one-vs-all JCFO
dsOutJCFO = trainedJCFO.run(dsTest); % Run the one-vs-all JCFO
[~,dsOutJCFO.X] = max(dsOutJCFO.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
dsOutJCFO.X = dsOutJCFO.X-1;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Train and test RVM (individual feature categories)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
pcCategory = nan(1,nFeatCategories); % Percent correct using each feature category
for iCategory = 1:nFeatCategories % Loop over all feature categories
dsCategoryTrain = dsTrain.retainFeatures(categoryInds{iCategory}); % Retain only features from this category for the training set
dsCategoryTest = dsTest.retainFeatures(categoryInds{iCategory}); % Retain only features from this category for the testing set
trainedRVM = algoRVM.train(dsCategoryTrain); % Train one-vs-all RVM
dsOutCategory = trainedRVM.run(dsCategoryTest); % Run one-vs-all RVM
[~,dsOutCategory.X] = max(dsOutCategory.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
dsOutCategory.X = dsOutCategory.X-1;
pcCategory(iCategory) = prtScorePercentCorrect(dsOutCategory); % Calculate percent correct (accuracy overall)
end
clear iCategory dsCategoryTrain dsCategoryTest dsOutCategory
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Train and test RVM (all features)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
trainedRVM = algoRVM.train(dsTrain); % Train the one-vs-all RVM
dsOutRVM = trainedRVM.run(dsTest); % Run the one-vs-all RVM
[~,dsOutRVM.X] = max(dsOutRVM.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
dsOutRVM.X = dsOutRVM.X - 1;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Plot results for publication
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Plot the prior on selecting features from each of the feature categories
% This will be a beta distribution over [0,1]. Features that take longer to
% compute should have higher probability of not being selected.
figure(1),hold on
colors = prtPlotUtilClassColors(length(featCategories));
for iFeat = 1:length(featCategories)
featInd = categoryInds{iFeat}(1);
plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
xlabel('\rho'),ylabel('p(\rho|a,b)')
end
title('Priors on MNIST Feature Selection')
legend(featCategories,'location','southeastoutside')
clear iFeat featInd colors
% Baseline - for each category, show feature computation and RVM performance
% Take-home point: most expensive features not always the best for
% classification performance
figure(2)
[ha,h1,h2] = plotyy(1:nFeatCategories,100*pcCategory,1:nFeatCategories,tNormCategory);
h1.LineStyle = '-';h1.LineWidth = 2;h1.Marker = 'o';h1.MarkerSize = 8;
h2.LineStyle = '--';h1.LineWidth = 2;h1.Marker = '^';h1.MarkerSize = 8;
ha(1).XTick = 1:length(pcCategory); ha(1).XTickLabel = featCategories; ha(1).YLim = [0,100]; ha(1).YTick = 0:10:100; ha(1).XLim = [1,nFeatCategories]; ha(1).XTickLabelRotation = 30;
ha(2).XTick = 1:length(pcCategory); ha(2).XTickLabel = []; ha(2).YLim = [0,1]; ha(2).YTick = 0:0.1:1; ha(2).XLim = [1,nFeatCategories];
ylabel(ha(1),'Accuracy (% Correct)')
ylabel(ha(2),'Total Normalized Cost')
title('RVM Accuracy and Total Cost of Each Feature Category','FontSize',12)
clear ha h1 h2
% Plot the confusion matrices using all the features
% CCFO
figure(3),set(gcf,'outerposition',[65,301,1780,579])
h = subplot(1,3,1);
prtScoreConfusionMatrix(dsOutCCFO)
pcCCFO = prtScorePercentCorrect(dsOutCCFO);
h.XTickLabelRotation = 20;
title(['CCFO - ',num2str(100*pcCCFO,'%0.2f'),'% Correct'],'Fontsize',12)
axis square
% RVM
h = subplot(1,3,2);
prtScoreConfusionMatrix(dsOutRVM)
pcRVM = prtScorePercentCorrect(dsOutRVM);
title(['RVM - ',num2str(100*pcRVM,'%0.2f'),'% Correct'],'fontsize',12)
h.XTickLabelRotation = 20;
axis square
% JCFO
h = subplot(1,3,3);
prtScoreConfusionMatrix(dsOutJCFO)
pcJCFO = prtScorePercentCorrect(dsOutJCFO);
title(['JCFO - ',num2str(100*pcJCFO,'%0.2f'),'% Correct'],'fontsize',12)
h.XTickLabelRotation = 20;
axis square
clear h
% Compare feature selection performance
thetaCCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
thetaJCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
for iClass = 1:dsTrain.nClasses % Loop over all one-vs-all classifiers
thetaCCFO(iClass,:) = trainedCCFO.actionCell{2}.baseClassifier(iClass).theta'; % CCFO feature selector parameters for this one-vs-all classifier
thetaJCFO(iClass,:) = trainedJCFO.actionCell{2}.baseClassifier(iClass).theta'; % JCFO feature selector parameters for this one-vs-all classifier
end
costReductionCCFO = nan(dsTrain.nClasses,1);
costReductionJCFO = nan(dsTrain.nClasses,1);
for iClass = 1:dsTrain.nClasses
costReductionCCFO(iClass,:) = sum(tNorm(thetaCCFO(iClass,:)>=0.5));
costReductionJCFO(iClass,:) = sum(tNorm(thetaJCFO(iClass,:)>=2*median(thetaJCFO(:))));
end
costReductionCCFO = mean(costReductionCCFO);
costReductionJCFO = mean(costReductionJCFO);
figure(4),set(gcf,'position',[610,512,700,441])
h = subplot(2,1,1);
imagesc(thetaCCFO),colormap bone
ylabel('Class'),xlabel('Features')
h.YTick = 1:10; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories;
caxis([0,1]); h = colorbar; ylabel(h,'\theta')
title('CCFO: Learned \theta (MNIST)')
h = subplot(2,1,2);
imagesc(thetaJCFO),colormap bone
ylabel('Class'),xlabel('Features')
h.YTick = 1:10; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories;
caxis([0,2*median(thetaJCFO(:))]); h=colorbar; ylabel(h,'\theta')
title('JCFO: Learned \theta (MNIST)')
clear iClass h m
% Calculate average # features selected
avgNumFeatsSelectedRVM = dsTrain.nFeatures;
avgNumFeatsSelectedJCFO = mean(sum(thetaJCFO > 2*median(thetaJCFO(:)),2));
avgNumFeatsSelectedCCFO = mean(sum(thetaCCFO > 0.5,2));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Print out summary of results
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
fprintf('*************************************\n')
fprintf('Urban Land Cover Feature Set Summary\n')
fprintf('*************************************\n')
for iCategory = 1:nFeatCategories
fprintf('%s \t %d \t %0.4f \t %0.2f\n',featCategories{iCategory},length(categoryInds{iCategory}),tNormCategory(iCategory),100*pcCategory(iCategory));
end
fprintf('*************************************\n')
fprintf('Urban Land Cover Performance Comparison\n')
fprintf('*************************************\n')
fprintf('Accuracy (RVM): %0.2f\n',100*pcRVM)
fprintf('Accuracy (JCFO): %0.2f\n',100*pcJCFO)
fprintf('Accuracy (CCFO): %0.2f\n',100*pcCCFO)
fprintf('Avg. # Features Selected (RVM): %0.2f\n',avgNumFeatsSelectedRVM)
fprintf('Avg. # Features Selected (JCFO): %0.2f\n',avgNumFeatsSelectedJCFO)
fprintf('Avg. # Features Selected (CCFO): %0.2f\n',avgNumFeatsSelectedCCFO)
fprintf('Avg. Relative Extraction Cost (RVM): 100\n')
fprintf('Avg. Relative Extraction Cost (JCFO) %0.2f\n',100*costReductionJCFO)
fprintf('Avg. Relative Extraction Cost (CCFO): %0.2f\n',100*costReductionCCFO)
keyboard
end

View File

@@ -0,0 +1,95 @@
function [dsFeat,time] = extractFeaturesFromMNIST(dsRaw)
% [dsFeat,time] = extractFeaturesFromMNIST(dsRaw)
%
% This function extracts four types of features from the MNIST handwritten
% digit recognition data set: statistical features, principal components
% analysis, co-occurence features, and Sobel edge features.
%
% The function requires installation of the Pattern Recognition Toolbox
% (PRT) for MATLAB:
% http://covartech.github.io/
%
% INPUTS:
% dsRaw: the PRT data set provided by prtDataGenMnist()
% OUTPUTS:
% dsFeat: a new PRT data set containing the features that were computed
% time: the amount of time (in seconds) to compute each feature for
% each observation in the data set.
% Author: Christopher R. Ratto, JHU/APL
% Date: 5 October 2015
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
% All Rights Reserved
%
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
% provided you agree to and comply with the terms and conditions set forth in the license.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initialize data structures and processors
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
PCA = prtPreProcPca('nComponents',10); % Initialize the PCA preprocessor to use 10 components
PCA = PCA.train(dsRaw); % Train PCA on the entire data set
feats = []; % Features
time = []; % Extraction times
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Extract features from each sample
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for i = 1:dsRaw.nObservations
fprintf('Extracting features from sample %d of %d...\n',i,dsRaw.nObservations)
% Statistical features
tic
statFeats = [mean(dsRaw.X(i,:)),std(dsRaw.X(i,:)),skewness(dsRaw.X(i,:)),kurtosis(dsRaw.X(i,:))];
tStat = toc*ones(1,4);
% PCA Features
tic;
dsPca = PCA.run(dsRaw.retainObservations(i));
pcaFeats = dsPca.X;
tPca = toc*ones(1,dsPca.nFeatures);
% Co-occurence features
tic;
glcm1 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',8);
glcmProps1 = graycoprops(glcm1);
glcm2 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',16);
glcmProps2 = graycoprops(glcm2);
glcm3 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',32);
glcmProps3 = graycoprops(glcm3);
glcmFeats = [glcmProps1.Contrast,glcmProps1.Correlation,glcmProps1.Energy,glcmProps1.Homogeneity,...
glcmProps2.Contrast,glcmProps2.Correlation,glcmProps2.Energy,glcmProps2.Homogeneity,...
glcmProps3.Contrast,glcmProps3.Correlation,glcmProps3.Energy,glcmProps3.Homogeneity];
tGlcm = toc*ones(size(glcmFeats));
% Sobel edge features
tic;
edgeDeg = [0, 45, 90, 135];
edgeFeats = [];
for j = 1:4
H = fspecial('sobel');
H = imrotate(H,edgeDeg(j));
Y = imfilter(reshape(dsRaw.X(i,:),28,28),H);
edgeFeats = [edgeFeats,trace(Y)];
edgeFeats = [edgeFeats,trace(Y')];
edgeFeats = [edgeFeats,sum(Y(14,:))];
edgeFeats = [edgeFeats,sum(Y(:,14))];
end
tEdge = toc*ones(size(edgeFeats));
% Concatenate the features into a single vector for the observation
feats = [feats;statFeats,pcaFeats,glcmFeats,edgeFeats];
time = [time;tStat,tPca,tGlcm,tEdge];
end
dsFeat = dsRaw; % Copy the input PRT data set
dsFeat.X = feats; % Overwrite the features
end

375
PRT Plugins/prtClassCCFO.m Normal file
View File

@@ -0,0 +1,375 @@
classdef prtClassCCFO < prtClass
% prtClassCCFO Cost Constrained Feature Optimization
%
% This is a class written to be compatible the Pattern Recognition Toolbox
% (PRT) for MATLAB. The PRT may be downloaded here:
% http://covartech.github.io/
%
% CLASSIFIER = prtClassCCFO returns a CCFO classifier
%
% CLASSIFIER = prtClassCCFO(PROPERTY1, VALUE1, ...) constructs a
% prtClass object CLASSIFIER with properties as specified by
% PROPERTY/VALUE pairs.
%
% A prtClassCCFO object inherits all properties from the abstract class
% prtClass. In addition is has the following properties:
%
% kernels - A cell array of prtKernel objects specifying
% the kernels to use (note CCFO only works
% right now with RBF and polynomial kernels)
% verbosePlot - Flag indicating whether or not to plot during
% training
% verboseText - Flag indicating whether or not to output
% verbose updates during training
% learningMaxIterations - The maximum number of iterations
% ridge - Regularization parameter for ridge regression
% initialization of the weights
% gamma - Hyperparameter controlling the prior on
% beta (regression weights)
% a - Hyperparameter controlling the prior on
% theta (feature selectors)
% b - Hyperparameter controlling the prior on
% theta (feature selectors)
% pruneFeatures - Flag determining whether or not features
% with a small enough theta should be
% removed
% pruneObservations - Flag determining whether or not
% observations with a small enough beta should be removed
%
% A prtClassCCFO also has the following read-only properties:
%
% learningConverged - Flag indicating if the training converged
% beta - The regression weights, estimated during training
% theta - The feature scaling factors, estimated in training
% delta - Term defined in (14) of CCFO paper
% omega - Term defined in (13) of CCFO paper
% Q - The EM objective function being optimized
% relevantFeats - Indices of features determined to be relevant
% relevantObs - Indices of observations determined to be relevant
%
% A prtClassCCFO object inherits the TRAIN, RUN, CROSSVALIDATE and
% KFOLDS methods from prtAction. It also inherits the PLOT method
% from prtClass.
%
% Reference:
% C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained
% Feature Optimization for Kernel Machine Classifiers," IEEE
% Signal Processing Letters, 2015.
%
% B. Krishnapuram, A. Herermink, L. Carin, & M.A. Figueiredo, "A
% Bayesian approach to joint feature selection and classifier
% design," IEEE Trans. PAMI, vol. 26, no. 9, pp. 1105-1111, 2004.
%
% Author: Christopher R. Ratto, JHU/APL
% Date: 7 October, 2015
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
% All Rights Reserved
%
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
% provided you agree to and comply with the terms and conditions set forth in the license.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Define properties
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Private properties for internal PRT use
properties (SetAccess=private)
name = 'Cost Constrained Feature Optimization' % Full name of the classifier
nameAbbreviation = 'CCFO'; % Abbreviated name
isNativeMary = false; % Cannot handle multi-class data
end
% Public properties for general use
properties
verbosePlot = false; % Whether or not to plot during training
verboseText = false; % Whether or not to write text during training
ridge = 1; % Ridge regression penalty (for initializing beta)
gamma = 1; % Hyperparameter for beta
a = 1; % Hyperparameter for theta
b = 1; % Hyperparameter for theta
kernels = prtKernelDc & prtKernelRbfNdimensionScale; % Kernel function
pruneFeatures = false; % Flag for removing features as we go
pruneObservations = false; % Flag for removing observations as we go
end
% Hidden properties that should generally be left alone
properties (Hidden = true)
learningMaxIterations = 100; % Maximum number of iteratoins
learningConvergedThreshold = .0001; % Threshold for whether learning has converged
learningNormWeightsThresh = 0.001; % Threshold for whether the weights aren't changing
learningNormFeatSelectThresh = 0.001; % Threshold for whether feature selection has converged
pruningThreshBeta = 0.0001; % Threshold for removing observations
pruningThreshTheta = 0.5; % Threshold for removing features
featuresRetained = []; % List of features that were retained
nMaxFminconEvals = 100; % Number of steps for fmincon optimization
end
% Properties that may be accessed for monitoring the learning algorithm
properties (SetAccess = 'protected',GetAccess = 'public')
learningConverged = []; % Whether or not the training converged
beta = []; % The regression weights
theta = []; % The feature scaling factors
omega = []; % Equation 14 in Krishnapuram et al.
Q = []; % The EM objective function
relevantFeats = []; % List of relevant features
relevantObs = []; % List of relevant observations
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Error checking
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
methods
% Allow for string, value pairs
function Obj = prtClassCCFO(varargin)
Obj = prtUtilAssignStringValuePairs(Obj,varargin{:});
end
% Make sure the kernel is compatible with JCFO
function Obj = set.kernels(Obj,val)
if ~(isa(val.kernelCell{2},'prtKernelRbf') || isa(val.kernelCell{2},'prtKernelRbfNdimensionScale') || isa(val.kernelCell{2},'prtKernelPolynomial')) && ~isa(val.kernelCell{1},'prtKernelDc')
error('prt:prtClassJCFO:kernels','Kernel must be DC followed by RBF or Polynomial.');
else
Obj.kernels = val;
end
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Training, testing, and helper functions (called by PRT train and run API)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
methods (Access = protected, Hidden = true)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Training function (called by Obj.train)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function Obj = trainAction(Obj,DataSet)
%%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
X = DataSet.X;
Y = DataSet.Y;
N = size(X,1);
P = size(X,2);
beta = ones(N+1,1);
theta = .9*ones(P,1);
omega = ones(N+1,1);
kernels = Obj.kernels;
converged = false;
iteration = 0;
relevantFeats = true(P,1);
relevantObs = true(N,1);
relevantKernels = [true;relevantObs];
while ~converged
%%%%%%%%%%%% Iteration counter %%%%%%%%%%%%%%
iteration = iteration + 1;
if Obj.verboseText
fprintf('CCFO EM Iteration %d:\t',iteration)
end
Xrel = X(:,relevantFeats);
Nrel = size(Xrel,1);
Prel = size(Xrel,2);
thetaRel = theta(relevantFeats);
betaRel = beta(relevantKernels);
aRel = Obj.a(relevantFeats);
bRel = Obj.b(relevantFeats);
%%%%%%%%%%%% M-step %%%%%%%%%%%%%%
% Update the feature scaling factors
if iteration > 1
if abs(thetaNormDiff) > Obj.learningNormWeightsThresh || isnan(thetaNormDiff)
opts = optimoptions(@fmincon,'Algorithm','interior-point','MaxFunEvals',Obj.nMaxFminconEvals,'GradObj','on','TypicalX',betarnd(ones(size(thetaRel)),ones(size(thetaRel))),'Display','iter-detailed','TolX',1e-4,'TolFun',1e-4);%'PlotFcns',{@optimplotx,@optimplotfval,@optimplotstepsize});
thetaRel = fmincon(@(x)Obj.calcQ(Xrel,kernels,v,omegaRel,x,relevantKernels,aRel,bRel),thetaRel,[],[],[],[],zeros(size(thetaRel)),ones(size(thetaRel)),[],opts);
theta(relevantFeats) = thetaRel;
thetaNormDiff = (norm(theta)-thetaNorm)./thetaNorm;
else
fprintf('Feature selection converged. Skipping constrained optimization.\n')
end
else
thetaNormDiff = nan;
end
thetaNorm = norm(theta);
% Apply scaling factors to features and re-compute the Gram
% matrix via the kernel function
XT = bsxfun(@times,Xrel,thetaRel');
dsTmp = prtDataSetClass(XT,Y);
kernels = train(kernels,dsTmp);
H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
H = H(:,relevantKernels);
% Update the regression weights
if iteration == 1
betaRel = inv(Obj.ridge*eye(size(H,2)) + H'*H)*H'*Y; % Initialize weights using ridge regression
beta(relevantKernels) = betaRel;
betaNormDiff = nan;
else
betaRel = S*inv(eye(length(betaRel)) + S*H'*H*S)*S*H'*v;
beta(relevantKernels) = betaRel;
betaNormDiff = (norm(beta)-betaNorm)./betaNorm;
end
betaNorm = norm(beta);
beta = beta./betaNorm;
%%%%%%%%%%%% E-step %%%%%%%%%%%%%%
v = nan(N,1);
for i = 1:N
normFactor = (2*Y(i)-1)*normpdf(H(i,:)*betaRel,0,1)/normcdf((2*Y(i)-1)*H(i,:)*betaRel,0,1);
if isnan(normFactor)
normFactor = 0;
end
v(i,:) = H(i,:)*betaRel + normFactor; % Expected value of linear observation model
end
omegaRel = nan(length(betaRel),1);
for i = 1:length(betaRel)
omegaRel(i,:) = Obj.gamma*abs(betaRel(i))^(-1); % Expected value of weight variance
end
omega(relevantKernels) = omegaRel;
S = diag(omegaRel.^(-1/2));
% Recompute the expected log-posterior
Q(iteration) = Obj.calcQ(Xrel,kernels,v,omegaRel,thetaRel,relevantKernels,aRel,bRel);
%%%%%%%%%%%% Prune deselected training points and/or features, if enabled %%%%%%%%%%%%%%
if Obj.pruneFeatures
relevantFeats(theta < Obj.pruningThreshTheta) = false;
theta(~relevantFeats) = 0;
thetaRel = theta(relevantFeats);
end
if Obj.pruneObservations
relevantObs(abs(beta) < Obj.pruningThreshBeta) = false;
relevantKernels = [true;relevantObs];
beta(~relevantKernels) = 0;
betaRel = beta(relevantKernels);
omegaRel = omega(relevantKernels);
S = diag(omegaRel.^(-1/2));
end
% For debugging purposes, plot how all of the parameters are updating
if Obj.verbosePlot
figure(666)
subplot(2,3,1),plot(v),title('v'),axis tight
subplot(2,3,2),plot(log(omega),'marker','o'),title('log(\omega)'),axis tight
subplot(2,3,4),plot(beta,'marker','o'),title('\beta'),axis tight
subplot(2,3,5),plot(theta,'marker','o'),title('\theta'),axis tight
subplot(2,3,6),plot(Q),title('-E[log p(\beta,\theta|-)]'),axis tight
drawnow
end
%%%%%%%%%%%% Check for convergence %%%%%%%%%%%%%%
if iteration == 1
Qdiff = nan;
else
Qdiff = (Q(iteration)-Q(iteration-1))./Q(iteration-1);
end
if Obj.verboseText
fprintf('Q = %0.4f (diff = %0.4f)\t ||beta|| = %0.4f (diff = %0.4f)\n',Q(iteration),Qdiff,betaNorm,betaNormDiff)
end
if abs(Qdiff) < Obj.learningConvergedThreshold;
converged = true;
fprintf('Expected log-posterior converged within threshold, exiting.\n')
elseif iteration == Obj.learningMaxIterations;
converged = true;
fprintf('Maximum number of iterations reached, exiting.\n')
elseif abs(betaNormDiff) < Obj.learningNormWeightsThresh
converged = true;
fprintf('Magnitude of weight vector converged within threshold, exiting.\n')
end
end
%%%%%%%%%% Save out learned parameters %%%%%%%%%%%%
Obj.beta = beta;
Obj.theta = theta;
Obj.omega = omega;
Obj.Q = Q(end);
XT = bsxfun(@times,X,theta');
dsTmp = prtDataSetClass(XT,Y);
Obj.kernels = train(kernels,dsTmp);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Running function (called by Obj.run)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function DataSetOut = runAction(Obj,DataSet)
%%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
X = DataSet.X;
kernels = Obj.kernels;
theta = Obj.theta;
beta = Obj.beta;
%%%%%%%%%% Run CCFO on dataset %%%%%%%%%%%%
DataSet.X = bsxfun(@times,X,theta');
H = kernels.run_OutputDoubleArray(DataSet);
%%%%%%%%%% Build output dataset %%%%%%%%%%%%
DataSetOut = DataSet;
DataSetOut.X = normcdf(H*beta);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Function for calculating the EM objective, Q and its derivative (called by Obj.trainAction)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [Qout,dQdTout,beta,omega] = calcQ(Obj,X,kernel,v,omega,theta,relevantKernels,a,b)
% Build gram matrix using the proposed vector of feature scaling factors
N = size(X,1);
Nk = sum(relevantKernels);
P = size(X,2);
XT = bsxfun(@times,X,theta');
dsTmp = prtDataSetClass(XT);
kernels = train(kernel,prtDataSetClass(XT));
H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
H = H(:,relevantKernels);
S = diag(omega.^(-1/2));
% Calcualte the expected log-posterior
beta = S*inv(eye(Nk) + S*(H'*H)*S)*S*H'*v;
% Calcualte the expected log-posterior
nu1 = psi(a) - psi(a + b);
nu2 = psi(b) - psi(a + b);
Q = -beta'*(H'*H)*beta + 2*beta'*H'*v - beta'*diag(omega)*beta + sum(theta.*nu1 + (1-theta).*nu2);
Qout = -Q;
% Calculate derivative of Q w.r.t. each theta
dQdT = nan(1,P);
if isa(Obj.kernels.kernelCell{2},'prtKernelPolynomial')
n = Obj.kernels.kernelCell{2}.d;
xTx = X*diag(theta)*X';
for k = 1:P
xxk = X(:,k)*X(:,k)';
dHdT = [zeros(N,1),(n*(1+xTx).^(n-1)).*xxk]; % Derivative of polynomial kernel provided in Kirshnapuram et al., RECOMB '03
dQdT(k) = nu2(k) - nu1(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
end
dQdTout = -dQdT;
elseif isa(Obj.kernels.kernelCell{2},'prtKernelRbf')
for k = 1:P
Xk = X(:,k);
dXk = repmat(sum((Xk.^2), 2), [1 N]) + repmat(sum((Xk.^2),2), [1 N]).' - 2*Xk*(Xk.');
if isa(Obj.kernels.kernelCell{2},'prtKernelRbfNdimensionScale')
dXk = dXk./(P*Obj.kernels.kernelCell{2}.sigma.^2);
else
dXk = dXk./Obj.kernels.kernelCell{2}.sigma.^2;
end
if isa(Obj.kernels.kernelCell{1},'prtKernelDc')
dXk = [zeros(N,1),dXk];
end
dHdT = -H.*dXk;
dQdT(k) = nu1(k) - nu2(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
dQdTout(k) = -dQdT(k);
end
end
end
end
end

369
PRT Plugins/prtClassJCFO.m Normal file
View File

@@ -0,0 +1,369 @@
classdef prtClassJCFO < prtClass
% prtClassJCFO Joint Classifier and Feature Optimization
%
% This is a class written to be compatible the Pattern Recognition Toolbox
% (PRT) for MATLAB. The PRT may be downloaded here:
% http://covartech.github.io/
%
% CLASSIFIER = prtClassJCFO returns a JCFO classifier
%
% CLASSIFIER = prtClassJCFO(PROPERTY1, VALUE1, ...) constructs a
% prtClass object CLASSIFIER with properties as specified by
% PROPERTY/VALUE pairs.
%
% A prtClassJCFO object inherits all properties from the abstract class
% prtClass. In addition is has the following properties:
%
% kernels - A cell array of prtKernel objects specifying
% the kernels to use (note JCFO only works
% right now with RBF and polynomial kernels)
% verbosePlot - Flag indicating whether or not to plot during
% training
% verboseText - Flag indicating whether or not to output
% verbose updates during training
% learningMaxIterations - The maximum number of iterations
% ridge - Regularization parameter for ridge regression
% initialization of the weights
% gamma1 - Hyperparameter controlling the prior on
% beta (regression weights)
% gamma2 - Hyperparameter controlling the prior on
% theta (feature scaling factors)
% pruneFeatures - Flag determining whether or not features
% with a small enough theta should be
% removed
% pruneObservations - Flag determining whether or not
% observations with a small enough beta should be removed
%
% A prtClassJCFO also has the following read-only properties:
%
% learningConverged - Flag indicating if the training converged
% beta - The regression weights, estimated during training
% theta - The feature scaling factors, estimated in training
% delta - Term defined in (14) of JCFO paper
% omega - Term defined in (13) of JCFO paper
% Q - The EM objective function being optimized
% relevantFeats - Indices of features determined to be relevant
% relevantObs - Indices of observations determined to be relevant
%
% A prtClassJCFO object inherits the TRAIN, RUN, CROSSVALIDATE and
% KFOLDS methods from prtAction. It also inherits the PLOT method
% from prtClass.
%
% Reference:
% B. Krishnapuram, A. Herermink, L. Carin, & M.A. Figueiredo, "A
% Bayesian approach to joint feature selection and classifier
% design," IEEE Trans. PAMI, vol. 26, no. 9, pp. 1105-1111, 2004.
%
% Author: Christopher R. Ratto, JHU/APL
% Date: 7 October, 2015
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
% All Rights Reserved
%
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
% provided you agree to and comply with the terms and conditions set forth in the license.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Define properties
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Private properties for internal PRT use
properties (SetAccess=private)
name = 'Joint Classifier and Feature Optimization' % Full name of the classifier
nameAbbreviation = 'JCFO'; % Abbreviated name
isNativeMary = false; % Cannot handle multi-class data
end
% Public properties for general use
properties
verbosePlot = false; % Whether or not to plot during training
verboseText = false; % Whether or not to write text during training
ridge = 1; % Ridge regression penalty (for initializing beta)
gamma1 = 1; % Hyperparameter for beta
gamma2 = 1; % Hyperparameter for theta
kernels = prtKernelDc & prtKernelRbfNdimensionScale; % Kernel function
pruneFeatures = false; % Flag for removing features as we go
pruneObservations = false; % Flag for removing observations as we go
end
% Hidden properties that should generally be left alone
properties (Hidden = true)
learningMaxIterations = 100; % Maximum number of iterations
learningConvergedThreshold = .001; % Threshold for whether learning has converged
learningNormWeightsThresh = 0.001; % Threshold for whether the weights aren't changing
learningNormFeatSelectThresh = 0.001; % Threshold for whether feature selection has converged
pruningThreshBeta = 0.0001; % Threshold for removing observations
pruningThreshTheta = 0.1; % Threshold for removing features
featuresRetained = []; % List of features being retained
nMaxFminconEvals = 100; % Number of steps for fmincon optimization
end
% Properties that may be accessed for monitoring of learning algorithm
properties (SetAccess = 'protected',GetAccess = 'public')
learningConverged = []; % Whether or not the training converged
beta = []; % The regression weights
theta = []; % The feature scaling factors
delta = []; % Equation (14) in Krishnapuraum et al.
omega = []; % Equation (13) in Krishnapuraum et al.
Q = []; % EM objective function
relevantFeats = []; % List of relevant features
relevantObs = []; % List of relevant observations
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Error checking
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
methods
% Allow for string, value pairs
function Obj = prtClassJCFO(varargin)
Obj = prtUtilAssignStringValuePairs(Obj,varargin{:});
end
% Make sure the kernel is compatible with JCFO
function Obj = set.kernels(Obj,val)
if ~(isa(val.kernelCell{2},'prtKernelRbf') || isa(val.kernelCell{2},'prtKernelRbfNdimensionScale') || isa(val.kernelCell{2},'prtKernelPolynomial')) && ~isa(val.kernelCell{1},'prtKernelDc')
error('prt:prtClassJCFO:kernels','Kernel must be DC followed by RBF or Polynomial.');
else
Obj.kernels = val;
end
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Training, testing, and helper functions (called by PRT train and run API)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
methods (Access = protected, Hidden = true)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Training function (called by Obj.train)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function Obj = trainAction(Obj,DataSet)
%%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
X = DataSet.X;
Y = DataSet.Y;
N = size(X,1);
P = size(X,2);
beta = ones(N+1,1);
theta = ones(P,1);
delta = ones(P,1);
omega = ones(N+1,1);
kernels = Obj.kernels;
converged = false;
iteration = 0;
relevantFeats = true(P,1);
relevantObs = true(N,1);
relevantKernels = [true;relevantObs];
while ~converged
%%%%%%%%%%%% Iteration counter %%%%%%%%%%%%%%
iteration = iteration + 1;
if Obj.verboseText
fprintf('JCFO EM Iteration %d:\t',iteration)
end
Xrel = X(:,relevantFeats);
Nrel = size(Xrel,1);
Prel = size(Xrel,2);
thetaRel = theta(relevantFeats);
betaRel = beta(relevantKernels);
%%%%%%%%%%%% M-step %%%%%%%%%%%%%%
% Update the feature scaling factors
if iteration > 1
if abs(thetaNormDiff) > Obj.learningNormWeightsThresh || isnan(thetaNormDiff)
opts = optimoptions(@fmincon,'Algorithm','interior-point','MaxFunEvals',Obj.nMaxFminconEvals,'GradObj','on','TypicalX',ones(size(thetaRel)),'Display','off','TolX',1e-6,'TolFun',1e-6);%,'PlotFcns',{@optimplotx,@optimplotfval,@optimplotstepsize});
thetaRel = fmincon(@(x)Obj.calcQ(Xrel,kernels,v,omegaRel,deltaRel,x,relevantKernels),thetaRel,[],[],[],[],zeros(size(thetaRel)),inf(size(thetaRel)),[],opts);
theta(relevantFeats) = thetaRel;
thetaNormDiff = (norm(theta)-thetaNorm)./thetaNorm;
else
fprintf('Feature selection converged. Skipping constrained optimization.\n')
end
else
thetaNormDiff = nan;
end
thetaNorm = norm(theta);
% Apply scaling factors to features and re-compute the Gram
% matrix via the kernel function
XT = bsxfun(@times,Xrel,thetaRel');
dsTmp = prtDataSetClass(XT,Y);
kernels = train(kernels,dsTmp);
H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
H = H(:,relevantKernels);
% Update the regression weights
if iteration == 1
betaRel = inv(Obj.ridge*eye(size(H,2)) + H'*H)*H'*Y; % Initialize weights using ridge regression
beta(relevantKernels) = betaRel;
betaNormDiff = nan;
else
betaRel = S*inv(eye(length(betaRel)) + S*H'*H*S)*S*H'*v;
beta(relevantKernels) = betaRel;
betaNormDiff = (norm(beta)-betaNorm)./betaNorm;
end
betaNorm = norm(beta);
beta = beta./betaNorm;
%%%%%%%%%%%% E-step %%%%%%%%%%%%%%
v = nan(N,1);
for i = 1:N
normFactor = (2*Y(i)-1)*normpdf(H(i,:)*betaRel,0,1)/normcdf((2*Y(i)-1)*H(i,:)*betaRel,0,1);
if isnan(normFactor)
normFactor = 0;
end
v(i,:) = H(i,:)*betaRel + normFactor; % Expected value of linear observation model
end
omegaRel = nan(length(betaRel),1);
for i = 1:length(betaRel)
omegaRel(i,:) = Obj.gamma1*abs(betaRel(i))^(-1); % Expected value of weight variance
end
omega(relevantKernels) = omegaRel;
S = diag(omegaRel.^(-1/2));
deltaRel = nan(Prel,1);
for k = 1:Prel
deltaRel(k,:) = Obj.gamma2*thetaRel(k)^(-1); % Expected value of feature selectors
end
delta(relevantFeats) = deltaRel;
% Recompute the expected log-posterior
Q(iteration) = Obj.calcQ(Xrel,kernels,v,omegaRel,deltaRel,thetaRel,relevantKernels); % Expected log-posterior
%%%%%%%%%%%% Prune deselected training points and/or features, if enabled %%%%%%%%%%%%%%
if Obj.pruneFeatures
relevantFeats(theta < Obj.pruningThreshTheta) = false;
theta(~relevantFeats) = 0;
thetaRel = theta(relevantFeats);
deltaRel = delta(relevantFeats);
end
if Obj.pruneObservations
relevantObs(abs(beta) < Obj.pruningThreshBeta) = false;
relevantKernels = [true;relevantObs];
beta(~relevantKernels) = 0;
betaRel = beta(relevantKernels);
omegaRel = omega(relevantKernels);
S = diag(omegaRel.^(-1/2));
end
% For debugging purposes, plot how all of the parameters are updating
if Obj.verbosePlot
figure(666)
subplot(2,3,1),plot(v),title('v'),axis tight
subplot(2,3,2),plot(log(omega),'marker','o'),title('log(\omega)'),axis tight
subplot(2,3,3),plot(log(delta),'marker','o'),title('log(\delta)'),axis tight
subplot(2,3,4),plot(beta,'marker','o'),title('\beta'),axis tight
subplot(2,3,5),plot(theta,'marker','o'),title('\theta'),axis tight
subplot(2,3,6),plot(Q),title('-E[log p(\beta,\theta|-)]'),axis tight
drawnow
end
%%%%%%%%%%%% Check for convergence %%%%%%%%%%%%%%
if iteration == 1
Qdiff = nan;
else
Qdiff = (Q(iteration)-Q(iteration-1))./Q(iteration-1);
end
if Obj.verboseText
fprintf('Q = %0.4f (diff = %0.4f)\t ||beta|| = %0.4f (diff = %0.4f)\n',Q(iteration),Qdiff,betaNorm,betaNormDiff)
end
if abs(Qdiff) < Obj.learningConvergedThreshold;
converged = true;
fprintf('Expected log-posterior converged within threshold, exiting.\n')
elseif iteration == Obj.learningMaxIterations;
converged = true;
fprintf('Maximum number of iterations reached, exiting.\n')
elseif abs(betaNormDiff) < Obj.learningNormWeightsThresh
converged = true;
fprintf('Magnitude of weight vector converged within threshold, exiting.\n')
end
end
%%%%%%%%%% Save out learned parameters %%%%%%%%%%%%
Obj.beta = beta;
Obj.theta = theta;
Obj.delta = delta;
Obj.omega = omega;
Obj.Q = Q(end);
XT = bsxfun(@times,X,theta');
dsTmp = prtDataSetClass(XT,Y);
Obj.kernels = train(kernels,dsTmp);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Running function (called by Obj.run)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function DataSetOut = runAction(Obj,DataSet)
%%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
X = DataSet.X;
kernels = Obj.kernels;
theta = Obj.theta;
beta = Obj.beta;
%%%%%%%%%% Run JCFO on dataset %%%%%%%%%%%%
DataSet.X = bsxfun(@times,X,theta');
H = kernels.run_OutputDoubleArray(DataSet);
%%%%%%%%%% Build output dataset %%%%%%%%%%%%
DataSetOut = DataSet;
DataSetOut.X = normcdf(H*beta);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Function for calculating the EM objective, Q and its derivative (called by Obj.trainAction)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [Qout,dQdTout] = calcQ(Obj,X,kernel,v,omega,delta,theta,relevantKernels)
% Build gram matrix using the proposed vector of feature scaling factors
N = size(X,1);
Nk = sum(relevantKernels);
P = size(X,2);
XT = bsxfun(@times,X,theta');
dsTmp = prtDataSetClass(XT);
kernels = train(kernel,prtDataSetClass(XT));
H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
H = H(:,relevantKernels);
S = diag(omega.^(-1/2));
% Calcualte the expected log-posterior
beta = S*inv(eye(Nk) + S*(H'*H)*S)*S*H'*v;
Q = -beta'*(H'*H)*beta + 2*beta'*H'*v - beta'*diag(omega)*beta - theta'*diag(delta)*theta;
Qout = -Q; % Using negative since matlab optimization does minimization only
dQdT = nan(1,P);
% Calculate derivative of Q w.r.t. each theta
if isa(kernel.kernelCell{2},'prtKernelPolynomial')
n = kernel.kernelCell{2}.d;
xTx = X*diag(theta)*X';
for k = 1:P
xxk = X(:,k)*X(:,k)';
dHdT = [zeros(N,1),(n*(1+xTx).^(n-1)).*xxk]; % Derivative of polynomial kernel provided in Kirshnapuram et al., RECOMB '03
dQdT(k) = -2*delta(k)*theta(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
end
dQdTout = -dQdT;
elseif isa(kernel.kernelCell{2},'prtKernelRbf')
for k = 1:P
Xk = X(:,k);
dXk = repmat(sum((Xk.^2), 2), [1 N]) + repmat(sum((Xk.^2),2), [1 N]).' - 2*Xk*(Xk.');
if isa(kernel.kernelCell{2},'prtKernelRbfNdimensionScale')
dXk = dXk./(P*kernel.kernelCell{2}.sigma.^2);
else
dXk = dXk./kernel.kernelCell{2}.sigma.^2;
end
if isa(kernel.kernelCell{1},'prtKernelDc')
dXk = [zeros(N,1),dXk];
end
dHdT = -H.*dXk(:,relevantKernels);
dQdT(k) = -2*delta(k)*theta(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
dQdTout(k) = -dQdT(k);
end
end
end
end
end

37
README.txt Normal file
View File

@@ -0,0 +1,37 @@
Cost-Constrained Feature Optimization (CCFO) for MATLAB
This MATLAB code may be used to replicate the results published in the following manuscript:
C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
Optimization in Kernel Machine Classifiers," IEEE Signal Processing
Letters, 2015.
The code is organized into three directories as follows:
Urban Land Cover/ - Code for replicating the urban land cover experiment from the paper.
experiment_urbanLandCover.m - Script for running the experiment
featureTimes_urbanLandCover.m - Function for estimating feature computation times
MNIST/ - Code for replicating the MNIST experiment from the paper
experiment_mnist.m - Script for running the experiment
extractFeaturesFromMNIST.m - Function for extracting features from the handwritten digits
PRT Plugins/ - The actual machine learning code, written as a plugin to the Pattern Recognition Toolbox (PRT)
prtClassJCFO - Joint Classifier and Feature Optimization
prtClassCCFO - Cost Constrained Feature Optimization
To run either experiment, or to use our code in your own research, you must download and install the
Pattern Recognition Toolbox (PRT) for MATLAB at http://covartech.github.io/
To run the urban land cover experiment, you must download the data from the UCI machine
learning repository: https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
The code benchmarks each of the feature computation times on a test image of a black and white circle.
The function "MidpointCircle.m" required to generate the test image for doing this is available via Matlab Central:
http://www.mathworks.com/matlabcentral/fileexchange/14331-draw-a-circle-in-a-matrix-image/content/MidpointCircle.m
*******************************************************************************************************************
This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
All Rights Reserved
This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
review, modification, and/or use of the software, in source and binary forms are ONLY permitted
provided you agree to and comply with the terms and conditions set forth in the license.
*******************************************************************************************************************

View File

@@ -0,0 +1,307 @@
function experiment_urbanLandCover(varargin)
% experiment_urbanLandCover.m
%
% This MATLAB script runs the feature selection experiment for the Urban
% Land Cover data set that was published in the following manuscript:
% C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
% Optimization in Kernel Machine Classifiers," IEEE Signal Processing
% Letters, 2015.
%
% The script requires the Urban Land Cover data set, which is available
% from the UCI Machine Learning Repository:
% https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
%
% The script also requires installation of the Pattern Recognition Toolbox
% (PRT) for MATLAB:
% http://covartech.github.io/
%
% INPUTS (optional):
% dataDir: full path to the directory in which the XLS data is saved
% (default is the current working directory)
%
% Author: Christopher R. Ratto, JHU/APL
% Date: 5 October 2015
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
% All Rights Reserved
%
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
% provided you agree to and comply with the terms and conditions set forth in the license.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Load in the training and test data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if nargin == 0
dataDir = pwd;
else
dataDir = varargin{1};
end
[~,~,raw] = xlsread(fullfile(dataDir,'training.csv')); % Read raw data from XLS-converted CSV
X = cell2mat(raw(2:end,2:end)); % Features from spreadsheet
labels = strrep(raw(2:end,1),' ',''); % Class labels from spreadsheet
uLabels = unique(labels); % Unique class labels
Y = nan(size(labels)); % Numeric class labels
for i = 1:length(labels)
Y(i,:) = find(strcmp(labels{i},uLabels)); % Assign a numeric label to each class name
end
featNames = raw(1,2:end); % Feature names
dsTrain = prtDataSetClass(X,Y); % Training PRT data set
dsTrain.classNames = uLabels; % Training class names
dsTrain.featureNames = featNames; % Training feature names
clear raw X labels uLabels Y featNames i
[~,~,raw] = xlsread(fullfile(dataDir,'testing.csv')); % Read raw data from XLS-converted CSV
X = cell2mat(raw(2:end,2:end)); % Features from spreadsheet
labels = strrep(raw(2:end,1),' ',''); % Class labels from spreadsheet
uLabels = unique(labels); % Unique class labels
Y = nan(size(labels)); % Numeric class labels
for i = 1:length(labels)
Y(i,:) = find(strcmp(labels{i},uLabels)); % Assign a numeric label to each class name
end
featNames = raw(1,2:end); % Feature names
dsTest = prtDataSetClass(X,Y); % Training PRT data set
dsTest.classNames = uLabels; % Training class names
dsTest.featureNames = featNames; % Training feature names
clear raw X labels uLabels Y featNames i
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Load in the feature extraction times
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[featCategories,~,categoryTimes] = featureTimes_urbanLandCover; % Benchmarked feature computation times (see 'estimateFeatureTimes.m')
nFeatCategories = length(featCategories); % Number of feature categories
[categoryTimes,sortInds] = sort(cat(1,[],categoryTimes),'ascend'); % Sort the computation time of each feature category
featCategories = featCategories(sortInds); % Rearrange the order of feature category names
sortVec = []; % Vector of feature indices corresponding to the sorted categories
for iCategory = 1:nFeatCategories
sortVec = [sortVec,find(~cellfun(@isempty,strfind(dsTest.featureNames,featCategories{iCategory})))]; %#ok<AGROW>
end
dsTrain.X = dsTrain.X(:,sortVec); % Rearrange the order of features in the training set
dsTrain.featureNames = dsTrain.featureNames(:,sortVec); % Rearrange the order of feature names in the training set
dsTest.X = dsTest.X(:,sortVec); % Rearrange the order of features in the testing set
dsTest.featureNames = dsTest.featureNames(:,sortVec); % Rearrange the order of feature names in the testing set
categoryInds = cell(1,nFeatCategories); % Feature indices corresponding to each category
T = nan(1,dsTrain.nFeatures); % Computation time (seconds) of each feature
TperCategory = nan(1,nFeatCategories); % Total computation time (seconds) of each category
categoryIndBegin = nan(1,nFeatCategories); % The index of the first feature from each category
for iCategory = 1:nFeatCategories
categoryInds{iCategory} = find(~cellfun(@isempty,strfind(dsTest.featureNames,featCategories{iCategory}))); % Find the features that belong to this category
categoryIndBegin(iCategory) = min(categoryInds{iCategory});
T(categoryInds{iCategory}) = categoryTimes(iCategory); % Computation time for each feature (seconds)
TperCategory(iCategory) = sum(T(categoryInds{iCategory})); % Computation time for each feature category (seconds)
end
tNorm = T./sum(T); % Normalized computation time of each feature
tNormCategory = TperCategory./sum(TperCategory); % Normalized computation time of each feature category
featCategories = strrep(featCategories,'_',''); % Remove underscores from feature category names since it screws up the plots
clear categoryTimes iCategory i sortVec sortInds
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Set CCFO hyperparameters
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
a0 = linspace(0,2,500);
b0 = linspace(0,300,500);
tau = nan(500,500);
F = nan(500,500);
for i = 1:500
for j = 1:500
a = a0(i)*ones(size(tNorm));
b = a0(i) + b0(j)*tNorm;
tau(i,j) = sum(tNorm .* a./(a+b));
F(i,j) = (1/dsTrain.nFeatures)*sum((a+1)./(a+b+1));
end
end
desiredT = 0.25; % Expected runtime
desiredF = 0.50; % Maximum posterior probability of a feature being selected
dist = (tau-desiredT).^2 + (F-desiredF).^2;
[iMin,jMin] = find(dist == min(dist(:)));
a0 = a0(iMin);
b0 = b0(jMin);
a = a0*ones(size(tNorm));
b = a0 + b0*tNorm;
clear featInd legendNames colors tau F iA iB iFeat
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initialize the classifiers
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% All classifiers will use the same kernel
kernel = prtKernelRbfNdimensionScale; % RBF kernel, scale the sigma parameter to dimensionality of the features
kernelSet = prtKernelDc & kernel; % Add a bias dimension to the kernel (dc kernel)
% CCFO - Cost Constrained Feature Optimization
CCFO = prtClassCCFO('kernels',kernelSet,'pruneFeatures',false,'pruneObservations',false,'verbosePlot',false,'verboseText',true,'a',a','b',b','gamma',1,'ridge',10);
algoCCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',CCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
% RVM - Relevance Vector Machine
RVM = prtClassRvm('kernels',kernelSet);
algoRVM = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',RVM); % Normalize features, One-vs-All classification since this is a multiclass problem
% JCFO - Joint Classifier and Feature Optimization
JCFO = prtClassJCFO('kernels',kernelSet,'ridge',10,'pruneFeatures',false,'pruneObservations',false,'verboseText',1,'verbosePlot',0,'gamma1',1,'gamma2',1);
algoJCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseclassifier',JCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Train and test CCFO
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
trainedCCFO = algoCCFO.train(dsTrain); % Train CCFO on the training set
dsOutCCFO = trainedCCFO.run(dsTest); % Run CCFO on the test set
[~,dsOutCCFO.X] = max(dsOutCCFO.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Train and test JCFO
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
trainedJCFO = algoJCFO.train(dsTrain); % Train the one-vs-all JCFO
dsOutJCFO = trainedJCFO.run(dsTest); % Run the one-vs-all JCFO
[~,dsOutJCFO.X] = max(dsOutJCFO.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Train and test RVM (individual feature categories)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
pcCategory = nan(1,nFeatCategories); % Percent correct using each feature category
for iCategory = 1:nFeatCategories % Loop over all feature categories
dsCategoryTrain = dsTrain.retainFeatures(categoryInds{iCategory}); % Retain only features from this category for the training set
dsCategoryTest = dsTest.retainFeatures(categoryInds{iCategory}); % Retain only features from this category for the testing set
trainedRVM = algoRVM.train(dsCategoryTrain); % Train one-vs-all RVM
dsOutCategory = trainedRVM.run(dsCategoryTest); % Run one-vs-all RVM
[~,dsOutCategory.X] = max(dsOutCategory.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
pcCategory(iCategory) = prtScorePercentCorrect(dsOutCategory); % Calculate percent correct (accuracy overall)
end
clear iCategory dsCategoryTrain dsCategoryTest dsOutCategory
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Train and test RVM (all features together)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
trainedRVM = algoRVM.train(dsTrain); % Train the one-vs-all RVM
dsOutRVM = trainedRVM.run(dsTest); % Run the one-vs-all RVM
[~,dsOutRVM.X] = max(dsOutRVM.X,[],2); % Change 'soft' decision values to 'hard' values (maximum a posteriori)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Plot results
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Plot the prior on selecting features from each of the feature categories
% This will be a beta distribution over [0,1]. Features that take longer to
% compute should have higher probability of not being selected.
figure(1),set(gcf,'position',[610,512,700,441])
colors = prtPlotUtilClassColors(length(featCategories));
colors(7,:) = [0,1,1];
subplot(2,1,1),hold on
for iFeat = 1:length(featCategories(1:8))
featInd = categoryInds{iFeat}(1);
plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
xlabel('\rho'),ylabel('p(\rho|a,b)')
end
axis tight
title('Cheap Features (Urban Land Cover)')
legend(featCategories(1:8),'location','southeastoutside')
subplot(2,1,2),hold on
for iFeat = 1:length(featCategories(9:15))
featInd = categoryInds{8+iFeat}(1);
plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
xlabel('\rho'),ylabel('p(\rho|a,b)')
end
axis tight
title('Expensive Features (Urban Land Cover)')
legend(featCategories(9:15),'location','southeastoutside')
clear iFeat featInd colors
% Baseline - for each category, show feature computation and RVM performance
% Take-home point: most expensive features not always the best for
% classification performance
figure(2)
[ha,h1,h2] = plotyy(1:nFeatCategories,100*pcCategory,1:nFeatCategories,tNormCategory);
h1.LineStyle = '-';h1.LineWidth = 2;h1.Marker = 'o';h1.MarkerSize = 8;
h2.LineStyle = '--';h1.LineWidth = 2;h1.Marker = '^';h1.MarkerSize = 8;
ha(1).XTick = 1:length(pcCategory); ha(1).XTickLabel = featCategories; ha(1).YLim = [0,100]; ha(1).YTick = 0:10:100; ha(1).XLim = [1,nFeatCategories]; ha(1).XTickLabelRotation = 30;
ha(2).XTick = 1:length(pcCategory); ha(2).XTickLabel = []; ha(2).YLim = [0,1]; ha(2).YTick = 0:0.1:1; ha(2).XLim = [1,nFeatCategories];
ylabel(ha(1),'Accuracy (% Correct)')
ylabel(ha(2),'Total Normalized Cost')
title('RVM Accuracy and Total Cost of Each Feature Category','FontSize',12)
clear ha h1 h2
% Plot the confusion matrices using all the features
% CCFO
figure(3),set(gcf,'outerposition',[65,301,1780,579])
h = subplot(1,3,1);
prtScoreConfusionMatrix(dsOutCCFO)
pcCCFO = prtScorePercentCorrect(dsOutCCFO);
h.XTickLabelRotation = 20;
title(['CCFO - ',num2str(100*pcCCFO,'%0.2f'),'% Correct'],'Fontsize',12)
axis square
% RVM
h = subplot(1,3,2);
prtScoreConfusionMatrix(dsOutRVM)
pcRVM = prtScorePercentCorrect(dsOutRVM);
title(['RVM - ',num2str(100*pcRVM,'%0.2f'),'% Correct'],'fontsize',12)
h.XTickLabelRotation = 20;
axis square
% JCFO
h = subplot(1,3,3);
prtScoreConfusionMatrix(dsOutJCFO)
pcJCFO = prtScorePercentCorrect(dsOutJCFO);
title(['JCFO - ',num2str(100*pcJCFO,'%0.2f'),'% Correct'],'fontsize',12)
h.XTickLabelRotation = 20;
axis square
clear h
% Compare feature selection performance
thetaCCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
thetaJCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
for iClass = 1:dsTrain.nClasses % Loop over all one-vs-all classifiers
thetaCCFO(iClass,:) = trainedCCFO.actionCell{2}.baseClassifier(iClass).theta'; % CCFO feature selector parameters for this one-vs-all classifier
thetaJCFO(iClass,:) = trainedJCFO.actionCell{2}.baseClassifier(iClass).theta'; % JCFO feature selector parameters for this one-vs-all classifier
end
costReductionCCFO = nan(dsTrain.nClasses,1);
costReductionJCFO = nan(dsTrain.nClasses,1);
for iClass = 1:dsTrain.nClasses
costReductionCCFO(iClass,:) = sum(tNorm(thetaCCFO(iClass,:)>=0.5));
costReductionJCFO(iClass,:) = sum(tNorm(thetaJCFO(iClass,:)>=2*median(thetaJCFO(:))));
end
costReductionCCFO = mean(costReductionCCFO);
costReductionJCFO = mean(costReductionJCFO);
figure(4),set(gcf,'outerposition',[255,317,568,596])
h = subplot(2,1,1);
imagesc(thetaCCFO),colormap bone
h.YTick = 1:9; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; h.XTickLabelRotation = 70;
caxis([0,1]); h = colorbar; ylabel(h,'Feature Scaling Parameter (\theta)')
title({'CCFO: \theta Learned for Each One-vs-All Classifier',sprintf('Average Cost of Feature Extraction: %0.2f',costReductionCCFO)})
h = subplot(2,1,2);
imagesc(thetaJCFO),colormap bone
h.YTick = 1:9; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; h.XTickLabelRotation = 70;
caxis([0,2*median(thetaJCFO(:))]); h=colorbar; ylabel(h,'Feature Scaling Parameter (\theta)')
title({'JCFO: \theta Learned for Each One-vs-All Classifier',sprintf('Average Cost of Feature Extraction: %0.2f',costReductionJCFO)})
clear iClass h m
% Average number of features selected
avgNumFeatsSelectedRVM = dsTest.nFeatures;
avgNumFeatsSelectedJCFO = mean(sum(thetaJCFO > 2*median(thetaJCFO(:)),2));
avgNumFeatsSelectedCCFO = mean(sum(thetaCCFO > 0.5,2));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Print out summary of results
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
fprintf('*************************************\n')
fprintf('Urban Land Cover Feature Set Summary\n')
fprintf('*************************************\n')
for iCategory = 1:nFeatCategories
fprintf('%s \t %d \t %0.4f \t %0.2f\n',featCategories{iCategory},length(categoryInds{iCategory}),tNormCategory(iCategory),100*pcCategory(iCategory));
end
fprintf('*************************************\n')
fprintf('Urban Land Cover Performance Comparison\n')
fprintf('*************************************\n')
fprintf('Accuracy (RVM): %0.2f\n',100*pcRVM)
fprintf('Accuracy (JCFO): %0.2f\n',100*pcJCFO)
fprintf('Accuracy (CCFO): %0.2f\n',100*pcCCFO)
fprintf('Avg. # Features Selected (RVM): %0.2f\n',avgNumFeatsSelectedRVM)
fprintf('Avg. # Features Selected (JCFO): %0.2f\n',avgNumFeatsSelectedJCFO)
fprintf('Avg. # Features Selected (CCFO): %0.2f\n',avgNumFeatsSelectedCCFO)
fprintf('Avg. Relative Extraction Cost (RVM): 100\n')
fprintf('Avg. Relative Extraction Cost (JCFO) %0.2f\n',100*costReductionJCFO)
fprintf('Avg. Relative Extraction Cost (CCFO): %0.2f\n',100*costReductionCCFO)
keyboard
end

View File

@@ -0,0 +1,280 @@
function [featureCategories,timeAbs,timeRel] = featureTimes_urbanLandCover()
% [timeAbs,timeRel] = featureTimes_urbanLandCover
%
% Function to estimate the extraction times for each of the features in the
% urban land cover data set which is available from the UCI Machine Learning Repository:
% https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
%
% The code benchmarks each of the feature computation times on a test image
% of a black and white circle. The function "MidpointCircle.m" required to
% generate the test image is available via Matlab Central:
% http://www.mathworks.com/matlabcentral/fileexchange/14331-draw-a-circle-in-a-matrix-image/content/MidpointCircle.m
%
% The computations for each feature were derived from the following reference
% (page references provided in the source code):
% Definiens AG, "Definiens 5 Reference Book," Munich, Germany 2006.
%
% INPUTS: none
%
% OUTPUTS:
% timeAbs: the absolute timeAbs to compute each feature (in sec) via tic/toc
% timeRel: the relative timeAbs (%) to compute each feature in the set
%
% Author: Carlos A. Caceres, JHU/APL
% Date: 5 October 2015
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
% All Rights Reserved
%
% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
% a copy of which can be found at http://opensource.org/licenses/EPL-1.0. Redistribution,
% review, modification, and/or use of the software, in source and binary forms are ONLY permitted
% provided you agree to and comply with the terms and conditions set forth in the license.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Initialize the test image
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
featureCategories = {'Area','Assym','BordLngth','BrdIndx','Bright','Compact','Dens','GLCM','LW','Mean','NDVI','Rect','Round','SD','ShpIndx'};
imgSize = 1024; % Size of test image
img = zeros(imgSize); % Initialize the test image
img = MidpointCircle(img, 250, imgSize/2, imgSize/2, 1); % Fill in with a circle
img2 = img + randn(size(img)); % Add white Gaussian noise
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Area (ref. page 58)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
area = sum(img(:)==1); %assume each pixel has area = 1;
timeAbs(1) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Assymmetry (ref. page 60)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
[idListX,idListY] = find(img==1);
varX = var(idListX);
varY = var(idListY);
varXY = var(idListX.*idListY);
assym = 2*sqrt(.25*(varX+varY)^2 + (varXY)^2 - varX*varY)/(varX+varY);
timeAbs(2) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Border length (ref. page 36)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
temp = double(bwperim(img)==1);
bordlength = sum(temp(:));
timeAbs(3) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Border index (ref. page 63)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
temp = double(bwperim(img)==1);
[idListX,idListY] = find(img==1);
P(1,:) = idListX;P(2,:) = idListY;
xbar = mean(idListX);
ybar = mean(idListY);
x = idListX - xbar;
y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
N = length(x);
% Calculate normalized second central moments for the region. 1/12 is
% the normalized second central moment of a pixel with unit length.
uxx = sum(x.^2)/N + 1/12;
uyy = sum(y.^2)/N + 1/12;
uxy = sum(x.*y)/N;
% Calculate major axis length, minor axis length, and eccentricity.
common = sqrt((uxx - uyy)^2 + 4*uxy^2);
l = 2*sqrt(2)*sqrt(uxx + uyy + common);
width = 2*sqrt(2)*sqrt(uxx + uyy - common);
borderIndex = sum(temp(:))/(2*(l+width));
timeAbs(4) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Brightness (ref. page 42)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
img3 = repmat(img,1,1,3);
tic;
% for an RGB image
wkb = [1,2,3];
brightness = (1/mean(wkb))*sum((wkb.*reshape(mean(mean(img3,1),2),1,3)));
% % for a binary image
% mean(img(:));
timeAbs(5) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Compactness (ref. page 63)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
[idListX,idListY] = find(img==1);
P(1,:) = idListX;P(2,:) = idListY;
xbar = mean(idListX);
ybar = mean(idListY);
x = idListX - xbar;
y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
N = length(x);
% Calculate normalized second central moments for the region. 1/12 is
% the normalized second central moment of a pixel with unit length.
uxx = sum(x.^2)/N + 1/12;
uyy = sum(y.^2)/N + 1/12;
uxy = sum(x.*y)/N;
% Calculate major axis length, minor axis length, and eccentricity.
common = sqrt((uxx - uyy)^2 + 4*uxy^2);
l = 2*sqrt(2)*sqrt(uxx + uyy + common);
width = 2*sqrt(2)*sqrt(uxx + uyy - common);
compactness = l*width/sum(img(:)==1);
timeAbs(6) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Density (ref. page 62)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
[idListX,idListY] = find(img==1);
varX = var(idListX);
varY = var(idListY);
dens = sqrt(sum(img(:)==1))/(1+sqrt(varX+varY));
timeAbs(7) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Gray-level co-ocurrence matrix
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
graycomatrix(img2);
timeAbs(8) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Length/width (ref. page 59)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
[idListX,idListY] = find(img==1);
P(1,:) = idListX; P(2,:) = idListY;
xbar = mean(idListX);
ybar = mean(idListY);
x = idListX - xbar;
y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
N = length(x);
% Calculate normalized second central moments for the region. 1/12 is
% the normalized second central moment of a pixel with unit length.
uxx = sum(x.^2)/N + 1/12;
uyy = sum(y.^2)/N + 1/12;
uxy = sum(x.*y)/N;
% Calculate major axis length, minor axis length, and eccentricity.
common = sqrt((uxx - uyy)^2 + 4*uxy^2);
l = 2*sqrt(2)*sqrt(uxx + uyy + common);
width = 2*sqrt(2)*sqrt(uxx + uyy - common);
lw = l/width;
timeAbs(9) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Mean (ref. page 40)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
datMean = mean(img2(:));
timeAbs(10) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% NDVI
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
ndvi = sum((img + img2)./(img-img2));
timeAbs(11) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Rectangular fit (ref. page 65)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%get outlines of each object
tic;
[idListX,idListY] = find(img==1);
xbar = mean(idListX);
ybar = mean(idListY);
x = idListX - xbar;
y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
N = length(x);
% Calculate normalized second central moments for the region. 1/12 is
% the normalized second central moment of a pixel with unit length.
uxx = sum(x.^2)/N + 1/12;
uyy = sum(y.^2)/N + 1/12;
uxy = sum(x.*y)/N;
% Calculate major axis length, minor axis length, and eccentricity.
common = sqrt((uxx - uyy)^2 + 4*uxy^2);
height = 2*sqrt(2)*sqrt(uxx + uyy + common);
width = 2*sqrt(2)*sqrt(uxx + uyy - common);
area = sum(img(:)==1);
SquareMetric = width/height;
if SquareMetric > 1,
SquareMetric = height/width; %make aspect ratio less than unity
end
SquareMetric = SquareMetric/area;
timeAbs(12) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Roundness (ref. page 64)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
area = sum(img(:)==1);
temp = double(bwperim(img)==1);
perimeter = sum(temp(:));
roundness = 4*pi*area/perimeter^2;
timeAbs(13) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Standard deviation
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
stdDeviation = std(img(:));
timeAbs(14) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Shape index (ref. page 62)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic;
temp = double(bwperim(img)==1);
shpindx = sum(temp(:))/(4*sqrt(sum(img(:)==1)));
timeAbs(15) = toc;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Save results
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
timeRel = 100*timeAbs./repmat(sum(timeAbs),1,length(timeAbs));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Legend of feature names
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Class: Land cover class (nominal)
% BrdIndx: Border Index (shape variable)
% Area: Area in m2 (size variable)
% Round: Roundness (shape variable)
% Bright: Brightness (spectral variable)
% Compact: Compactness (shape variable)
% ShpIndx: Shape Index (shape variable)
% Mean_G: Green (spectral variable)
% Mean_R: Red (spectral variable)
% Mean_NIR: Near Infrared (spectral variable)
% SD_G: Standard deviation of Green (texture variable)
% SD_R: Standard deviation of Red (texture variable)
% SD_NIR: Standard deviation of Near Infrared (texture variable)
% LW: Length/Width (shape variable)
% GLCM1: Gray-Level Co-occurrence Matrix [i forget which type of GLCM metric this one is] (texture variable)
% Rect: Rectangularity (shape variable)
% GLCM2: Another Gray-Level Co-occurrence Matrix attribute (texture variable)
% Dens: Density (shape variable)
% Assym: Assymetry (shape variable)
% NDVI: Normalized Difference Vegetation Index (spectral variable)
% BordLngth: Border Length (shape variable)
% GLCM3: Another Gray-Level Co-occurrence Matrix attribute (texture variable) d
end