Initial commit pre-publication

2026-01-09 06:48:02 -05:00 · 2015-10-20 08:59:00 -04:00
parent 9abc1da216
commit cb34cdc1a1
7 changed files with 1706 additions and 0 deletions
--- a/MNIST/experiment_mnist.m
+++ b/MNIST/experiment_mnist.m
@@ -0,0 +1,243 @@
+function experiment_mnist(varargin)
+
+% experiment_mnist.m
+%
+% This MATLAB script runs the feature selection experiment for the MNIST
+% data set that was published in the following manuscript:
+%   C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
+%   Optimization in Kernel Machine Classifiers," IEEE Signal Processing
+%   Letters, 2015.
+%
+% The script requires installation of the Pattern Recognition Toolbox
+% (PRT) for MATLAB:
+%   http://covartech.github.io/ 
+%
+% Author: Christopher R. Ratto, JHU/APL
+% Date:   5 October 2015
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
+% All Rights Reserved
+%
+% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
+% a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
+% review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
+% provided you agree to and comply with the terms and conditions set forth in the license.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Load in the data set and feature extraction times
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+dsRaw = prtDataGenMnist;                                % The MNIST data set comes with the PRT for demonstration purposes
+[dsFeat,featTime] = extractFeaturesFromMNIST(dsRaw);    % Extract various types of features from the MNIST data
+tNorm = mean(featTime);                                 % Average relative feature extraction time
+tNorm = tNorm./sum(tNorm);                              % Normalize the average relative times
+featCategories = {'Stats','PCA','GLCM','Sobel'};        % Get Names of Feature Categories
+nFeatCategories = length(featCategories);               % Number of feature categories            
+tNormCategory = [sum(tNorm(1:4)),sum(tNorm(5:14)),sum(tNorm(15:26)),sum(tNorm(27:end))];    % Relative time per category
+categoryInds = {1:4,5:14,15:26,27:42};  % Indices of features in each category
+categoryIndBegin = [1,5,15,27];         % Indices where each feature category begins
+clear categoryTimes iCategory i sortVec sortInds
+
+% Setup training and testing sets
+dsTrain = dsFeat.retainObservations(1:1000);                    % Train on 10% of the data
+dsTest = dsFeat.retainObservations(1001:dsRaw.nObservations);   % Test on 90% of the data
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Estimate CCFO hyperparameters
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+a0 = linspace(0,2,500);
+b0 = linspace(0,300,500);
+tau = nan(500);
+F = nan(500);
+for i = 1:500
+    for j = 1:500
+        a = a0(i)*ones(size(tNorm));
+        b = a0(i) + b0(j)*tNorm;
+        tau(i,j) = sum(tNorm .* a./(a+b));
+        F(i,j) = (1/dsFeat.nFeatures)*sum((a+1)./(a+b+1));
+    end
+end
+desiredT = 0.1;     % Expected runtime
+desiredF = 0.50;    % Maximum posterior probability of a feature being selected
+dist = (tau-desiredT).^2 + (F-desiredF).^2;
+[iMin,jMin] = find(dist == min(dist(:)));
+a0 = a0(iMin);
+b0 = b0(jMin);
+a = a0*ones(size(tNorm));
+b = a0 + b0*tNorm;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Initialize the classifiers
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% All classifiers will use the same kernel
+kernel = prtKernelRbfNdimensionScale;                               % RBF kernel, scale the sigma parameter to dimensionality of the features
+kernelSet = prtKernelDc & kernel;                                   % Add a bias dimension to the kernel (dc kernel)
+
+% CCFO - Cost Constrained Feature Optimization
+CCFO = prtClassCCFO('kernels',kernelSet,'pruneFeatures',false,'pruneObservations',false,'verbosePlot',false,'verboseText',true,'a',a','b',b','gamma',1,'ridge',10);    
+algoCCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',CCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
+
+% RVM - Relevance Vector Machine
+RVM = prtClassRvm('kernels',kernelSet);
+algoRVM = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',RVM);   % Normalize features, One-vs-All classification since this is a multiclass problem
+
+% JCFO - Joint Classifier and Feature Optimization
+JCFO = prtClassJCFO('kernels',kernelSet,'ridge',10,'pruneFeatures',false,'pruneObservations',false,'verboseText',1,'verbosePlot',0,'gamma1',1,'gamma2',1);
+algoJCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseclassifier',JCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Train and test CCFO
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+trainedCCFO = algoCCFO.train(dsTrain);                              % Train CCFO on the training set
+dsOutCCFO = trainedCCFO.run(dsTest);                                % Run CCFO on the test set
+[~,dsOutCCFO.X] = max(dsOutCCFO.X,[],2);                            % Change 'soft' decision values to 'hard' values (maximum a posteriori)
+dsOutCCFO.X = dsOutCCFO.X-1;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Train and test JCFO
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+trainedJCFO = algoJCFO.train(dsTrain);                              % Train the one-vs-all JCFO
+dsOutJCFO = trainedJCFO.run(dsTest);                                % Run the one-vs-all JCFO
+[~,dsOutJCFO.X] = max(dsOutJCFO.X,[],2);                            % Change 'soft' decision values to 'hard' values (maximum a posteriori) 
+dsOutJCFO.X = dsOutJCFO.X-1;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Train and test RVM (individual feature categories)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+pcCategory = nan(1,nFeatCategories);                                        % Percent correct using each feature category
+for iCategory = 1:nFeatCategories                                           % Loop over all feature categories
+    dsCategoryTrain = dsTrain.retainFeatures(categoryInds{iCategory});      % Retain only features from this category for the training set
+    dsCategoryTest = dsTest.retainFeatures(categoryInds{iCategory});        % Retain only features from this category for the testing set
+    trainedRVM = algoRVM.train(dsCategoryTrain);                            % Train one-vs-all RVM
+    dsOutCategory = trainedRVM.run(dsCategoryTest);                         % Run one-vs-all RVM
+    [~,dsOutCategory.X] = max(dsOutCategory.X,[],2);                        % Change 'soft' decision values to 'hard' values (maximum a posteriori)  
+    dsOutCategory.X = dsOutCategory.X-1;
+    pcCategory(iCategory) = prtScorePercentCorrect(dsOutCategory);          % Calculate percent correct (accuracy overall) 
+end
+clear iCategory dsCategoryTrain dsCategoryTest dsOutCategory
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Train and test RVM (all features)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+trainedRVM = algoRVM.train(dsTrain);                                        % Train the one-vs-all RVM
+dsOutRVM = trainedRVM.run(dsTest);                                          % Run the one-vs-all RVM
+[~,dsOutRVM.X] = max(dsOutRVM.X,[],2);                                      % Change 'soft' decision values to 'hard' values (maximum a posteriori)
+dsOutRVM.X = dsOutRVM.X - 1;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Plot results for publication
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Plot the prior on selecting features from each of the feature categories
+% This will be a beta distribution over [0,1]. Features that take longer to
+% compute should have higher probability of not being selected.
+figure(1),hold on
+colors = prtPlotUtilClassColors(length(featCategories)); 
+for iFeat = 1:length(featCategories)
+    featInd = categoryInds{iFeat}(1);
+    plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
+    xlabel('\rho'),ylabel('p(\rho|a,b)')
+end
+title('Priors on MNIST Feature Selection')
+legend(featCategories,'location','southeastoutside')
+clear iFeat featInd colors
+
+% Baseline - for each category, show feature computation and RVM performance
+% Take-home point: most expensive features not always the best for
+% classification performance
+figure(2)     
+[ha,h1,h2] = plotyy(1:nFeatCategories,100*pcCategory,1:nFeatCategories,tNormCategory);
+h1.LineStyle = '-';h1.LineWidth = 2;h1.Marker = 'o';h1.MarkerSize = 8;
+h2.LineStyle = '--';h1.LineWidth = 2;h1.Marker = '^';h1.MarkerSize = 8;
+ha(1).XTick = 1:length(pcCategory); ha(1).XTickLabel = featCategories; ha(1).YLim = [0,100]; ha(1).YTick = 0:10:100; ha(1).XLim = [1,nFeatCategories]; ha(1).XTickLabelRotation = 30;
+ha(2).XTick = 1:length(pcCategory); ha(2).XTickLabel = []; ha(2).YLim = [0,1]; ha(2).YTick = 0:0.1:1; ha(2).XLim = [1,nFeatCategories];
+ylabel(ha(1),'Accuracy (% Correct)')
+ylabel(ha(2),'Total Normalized Cost')
+title('RVM Accuracy and Total Cost of Each Feature Category','FontSize',12)
+clear ha h1 h2
+
+% Plot the confusion matrices using all the features
+% CCFO
+figure(3),set(gcf,'outerposition',[65,301,1780,579])
+h = subplot(1,3,1);
+prtScoreConfusionMatrix(dsOutCCFO)
+pcCCFO = prtScorePercentCorrect(dsOutCCFO);
+h.XTickLabelRotation = 20;
+title(['CCFO - ',num2str(100*pcCCFO,'%0.2f'),'% Correct'],'Fontsize',12)
+axis square
+% RVM
+h = subplot(1,3,2);
+prtScoreConfusionMatrix(dsOutRVM)                       
+pcRVM = prtScorePercentCorrect(dsOutRVM);                        
+title(['RVM - ',num2str(100*pcRVM,'%0.2f'),'% Correct'],'fontsize',12)
+h.XTickLabelRotation = 20;
+axis square
+% JCFO 
+h = subplot(1,3,3);
+prtScoreConfusionMatrix(dsOutJCFO)   
+pcJCFO = prtScorePercentCorrect(dsOutJCFO);
+title(['JCFO - ',num2str(100*pcJCFO,'%0.2f'),'% Correct'],'fontsize',12)
+h.XTickLabelRotation = 20;
+axis square
+clear h
+
+% Compare feature selection performance
+thetaCCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
+thetaJCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
+for iClass = 1:dsTrain.nClasses                                                                         % Loop over all one-vs-all classifiers
+    thetaCCFO(iClass,:) = trainedCCFO.actionCell{2}.baseClassifier(iClass).theta';                      % CCFO feature selector parameters for this one-vs-all classifier
+    thetaJCFO(iClass,:) = trainedJCFO.actionCell{2}.baseClassifier(iClass).theta';                      % JCFO feature selector parameters for this one-vs-all classifier
+end
+costReductionCCFO = nan(dsTrain.nClasses,1);
+costReductionJCFO = nan(dsTrain.nClasses,1);
+for iClass = 1:dsTrain.nClasses
+    costReductionCCFO(iClass,:) = sum(tNorm(thetaCCFO(iClass,:)>=0.5));
+    costReductionJCFO(iClass,:) = sum(tNorm(thetaJCFO(iClass,:)>=2*median(thetaJCFO(:))));
+end
+costReductionCCFO = mean(costReductionCCFO);
+costReductionJCFO = mean(costReductionJCFO);
+
+figure(4),set(gcf,'position',[610,512,700,441])
+h = subplot(2,1,1);
+imagesc(thetaCCFO),colormap bone
+ylabel('Class'),xlabel('Features')
+h.YTick = 1:10; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; 
+caxis([0,1]); h = colorbar; ylabel(h,'\theta')
+title('CCFO: Learned \theta (MNIST)')
+h = subplot(2,1,2);
+imagesc(thetaJCFO),colormap bone
+ylabel('Class'),xlabel('Features')
+h.YTick = 1:10; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; 
+caxis([0,2*median(thetaJCFO(:))]); h=colorbar; ylabel(h,'\theta')
+title('JCFO: Learned \theta (MNIST)')
+clear iClass h m
+
+% Calculate average # features selected
+avgNumFeatsSelectedRVM = dsTrain.nFeatures;
+avgNumFeatsSelectedJCFO = mean(sum(thetaJCFO > 2*median(thetaJCFO(:)),2));
+avgNumFeatsSelectedCCFO = mean(sum(thetaCCFO > 0.5,2));
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Print out summary of results
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+fprintf('*************************************\n')
+fprintf('Urban Land Cover Feature Set Summary\n')
+fprintf('*************************************\n')
+for iCategory = 1:nFeatCategories
+    fprintf('%s \t %d \t %0.4f \t %0.2f\n',featCategories{iCategory},length(categoryInds{iCategory}),tNormCategory(iCategory),100*pcCategory(iCategory));
+end
+fprintf('*************************************\n')
+fprintf('Urban Land Cover Performance Comparison\n')
+fprintf('*************************************\n')
+fprintf('Accuracy (RVM): %0.2f\n',100*pcRVM)
+fprintf('Accuracy (JCFO): %0.2f\n',100*pcJCFO)
+fprintf('Accuracy (CCFO): %0.2f\n',100*pcCCFO)
+fprintf('Avg. # Features Selected (RVM): %0.2f\n',avgNumFeatsSelectedRVM)
+fprintf('Avg. # Features Selected (JCFO): %0.2f\n',avgNumFeatsSelectedJCFO)
+fprintf('Avg. # Features Selected (CCFO): %0.2f\n',avgNumFeatsSelectedCCFO)
+fprintf('Avg. Relative Extraction Cost (RVM): 100\n')
+fprintf('Avg. Relative Extraction Cost (JCFO) %0.2f\n',100*costReductionJCFO)
+fprintf('Avg. Relative Extraction Cost (CCFO): %0.2f\n',100*costReductionCCFO)
+keyboard
+end
--- a/MNIST/extractFeaturesFromMNIST.m
+++ b/MNIST/extractFeaturesFromMNIST.m
@@ -0,0 +1,95 @@
+function [dsFeat,time] = extractFeaturesFromMNIST(dsRaw)
+
+% [dsFeat,time] = extractFeaturesFromMNIST(dsRaw)
+%
+% This function extracts four types of features from the MNIST handwritten
+% digit recognition data set: statistical features, principal components
+% analysis, co-occurence features, and Sobel edge features. 
+%
+% The function requires installation of the Pattern Recognition Toolbox
+% (PRT) for MATLAB:
+%   http://covartech.github.io/ 
+%
+% INPUTS:
+%   dsRaw: the PRT data set provided by prtDataGenMnist()
+% OUTPUTS:
+%   dsFeat: a new PRT data set containing the features that were computed
+%   time:   the amount of time (in seconds) to compute each feature for
+%           each observation in the data set.
+
+% Author: Christopher R. Ratto, JHU/APL
+% Date:   5 October 2015
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
+% All Rights Reserved
+%
+% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
+% a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
+% review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
+% provided you agree to and comply with the terms and conditions set forth in the license.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Initialize data structures and processors
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+PCA = prtPreProcPca('nComponents',10);  % Initialize the PCA preprocessor to use 10 components
+PCA = PCA.train(dsRaw);                 % Train PCA on the entire data set
+feats = [];                             % Features
+time = [];                              % Extraction times
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Extract features from each sample
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+for i = 1:dsRaw.nObservations
+    fprintf('Extracting features from sample %d of %d...\n',i,dsRaw.nObservations)
+    
+    % Statistical features
+    tic
+    statFeats = [mean(dsRaw.X(i,:)),std(dsRaw.X(i,:)),skewness(dsRaw.X(i,:)),kurtosis(dsRaw.X(i,:))];
+    tStat = toc*ones(1,4);
+    
+    % PCA Features
+    tic;
+    dsPca = PCA.run(dsRaw.retainObservations(i));
+    pcaFeats = dsPca.X;
+    tPca = toc*ones(1,dsPca.nFeatures);
+
+    % Co-occurence features
+    tic;
+    glcm1 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',8);
+    glcmProps1 = graycoprops(glcm1);
+    glcm2 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',16);
+    glcmProps2 = graycoprops(glcm2);
+    glcm3 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',32);
+    glcmProps3 = graycoprops(glcm3);
+    glcmFeats = [glcmProps1.Contrast,glcmProps1.Correlation,glcmProps1.Energy,glcmProps1.Homogeneity,...
+        glcmProps2.Contrast,glcmProps2.Correlation,glcmProps2.Energy,glcmProps2.Homogeneity,...
+        glcmProps3.Contrast,glcmProps3.Correlation,glcmProps3.Energy,glcmProps3.Homogeneity];
+    tGlcm = toc*ones(size(glcmFeats));
+    
+    % Sobel edge features
+    tic;
+    edgeDeg = [0, 45, 90, 135];
+    edgeFeats = [];
+    for j = 1:4
+        H = fspecial('sobel');
+        H = imrotate(H,edgeDeg(j));
+        Y = imfilter(reshape(dsRaw.X(i,:),28,28),H);
+        edgeFeats = [edgeFeats,trace(Y)];
+        edgeFeats = [edgeFeats,trace(Y')];
+        edgeFeats = [edgeFeats,sum(Y(14,:))];
+        edgeFeats = [edgeFeats,sum(Y(:,14))];
+    end
+    tEdge = toc*ones(size(edgeFeats));
+
+    % Concatenate the features into a single vector for the observation
+    feats = [feats;statFeats,pcaFeats,glcmFeats,edgeFeats];
+    time = [time;tStat,tPca,tGlcm,tEdge];
+
+end
+dsFeat = dsRaw;         % Copy the input PRT data set
+dsFeat.X = feats;       % Overwrite the features
+
+end
+
--- a/Plugins/prtClassCCFO.m
+++ b/Plugins/prtClassCCFO.m
@@ -0,0 +1,375 @@
+classdef prtClassCCFO < prtClass
+ % prtClassCCFO  Cost Constrained Feature Optimization
+    %
+    % This is a class written to be compatible the Pattern Recognition Toolbox
+    % (PRT) for MATLAB. The PRT may be downloaded here:
+    %        http://covartech.github.io/ 
+    %
+    %   CLASSIFIER = prtClassCCFO returns a CCFO classifier
+    %
+    %   CLASSIFIER = prtClassCCFO(PROPERTY1, VALUE1, ...) constructs a
+    %   prtClass object CLASSIFIER with properties as specified by
+    %   PROPERTY/VALUE pairs.
+    %
+    %   A prtClassCCFO object inherits all properties from the abstract class
+    %   prtClass. In addition is has the following properties:
+    %
+    %   kernels                - A cell array of prtKernel objects specifying
+    %                            the kernels to use (note CCFO only works
+    %                            right now with RBF and polynomial kernels)
+    %   verbosePlot            - Flag indicating whether or not to plot during
+    %                            training
+    %   verboseText            - Flag indicating whether or not to output
+    %                            verbose updates during training
+    %   learningMaxIterations  - The maximum number of iterations
+    %   ridge                  - Regularization parameter for ridge regression
+    %                            initialization of the weights
+    %   gamma                  - Hyperparameter controlling the prior on
+    %                            beta (regression weights)
+    %   a                      - Hyperparameter controlling the prior on
+    %                            theta (feature selectors)
+    %   b                      - Hyperparameter controlling the prior on
+    %                            theta (feature selectors)
+    %   pruneFeatures          - Flag determining whether or not features
+    %                            with a small enough theta should be
+    %                            removed
+    %   pruneObservations      - Flag determining whether or not
+    %                            observations with a small enough beta should be removed
+    %
+    %   A prtClassCCFO also has the following read-only properties:
+    %
+    %   learningConverged  - Flag indicating if the training converged
+    %   beta               - The regression weights, estimated during training  
+    %   theta              - The feature scaling factors, estimated in training
+    %   delta              - Term defined in (14) of CCFO paper
+    %   omega              - Term defined in (13) of CCFO paper
+    %   Q                  - The EM objective function being optimized 
+    %   relevantFeats      - Indices of features determined to be relevant
+    %   relevantObs        - Indices of observations determined to be relevant
+    %
+    %   A prtClassCCFO object inherits the TRAIN, RUN, CROSSVALIDATE and
+    %   KFOLDS methods from prtAction. It also inherits the PLOT method
+    %   from prtClass.
+    %
+    %   Reference:
+    %       C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained
+    %       Feature Optimization for Kernel Machine Classifiers," IEEE
+    %       Signal Processing Letters, 2015.
+    %
+    %       B. Krishnapuram, A. Herermink, L. Carin, & M.A. Figueiredo, "A
+    %       Bayesian approach to joint feature selection and classifier
+    %       design," IEEE Trans. PAMI, vol. 26, no. 9, pp. 1105-1111, 2004.
+    %
+    %   Author: Christopher R. Ratto, JHU/APL
+    %   Date:   7 October, 2015
+    %
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
+    % All Rights Reserved
+    %
+    % This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
+    % a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
+    % review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
+    % provided you agree to and comply with the terms and conditions set forth in the license.
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+      
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Define properties
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+    % Private properties for internal PRT use
+    properties (SetAccess=private)
+        name = 'Cost Constrained Feature Optimization'                      % Full name of the classifier
+        nameAbbreviation = 'CCFO';                                          % Abbreviated name
+        isNativeMary = false;                                               % Cannot handle multi-class data
+    end
+    
+    % Public properties for general use
+    properties
+        verbosePlot = false;                                                % Whether or not to plot during training
+        verboseText = false;                                                % Whether or not to write text during training
+        ridge = 1;                                                          % Ridge regression penalty (for initializing beta)
+        gamma = 1;                                                          % Hyperparameter for beta
+        a = 1;                                                              % Hyperparameter for theta
+        b = 1;                                                              % Hyperparameter for theta
+        kernels = prtKernelDc & prtKernelRbfNdimensionScale;                % Kernel function
+        pruneFeatures = false;                                              % Flag for removing features as we go
+        pruneObservations = false;                                          % Flag for removing observations as we go
+    end
+    
+    % Hidden properties that should generally be left alone
+    properties (Hidden = true)
+        learningMaxIterations = 100;                                        % Maximum number of iteratoins
+        learningConvergedThreshold = .0001;                                 % Threshold for whether learning has converged
+        learningNormWeightsThresh = 0.001;                                  % Threshold for whether the weights aren't changing
+        learningNormFeatSelectThresh = 0.001;                               % Threshold for whether feature selection has converged
+        pruningThreshBeta = 0.0001;                                         % Threshold for removing observations
+        pruningThreshTheta = 0.5;                                           % Threshold for removing features
+        featuresRetained = [];                                              % List of features that were retained
+        nMaxFminconEvals = 100;                                             % Number of steps for fmincon optimization
+    end
+    
+    % Properties that may be accessed for monitoring the learning algorithm
+    properties (SetAccess = 'protected',GetAccess = 'public')
+        learningConverged = [];                                             % Whether or not the training converged
+        beta = [];                                                          % The regression weights
+        theta = [];                                                         % The feature scaling factors
+        omega = [];                                                         % Equation 14 in Krishnapuram et al.
+        Q = [];                                                             % The EM objective function
+        relevantFeats = [];                                                 % List of relevant features
+        relevantObs = [];                                                   % List of relevant observations
+    end
+    
+    
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Error checking
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    methods
+        % Allow for string, value pairs
+        function Obj = prtClassCCFO(varargin)
+            Obj = prtUtilAssignStringValuePairs(Obj,varargin{:});
+        end
+        
+        % Make sure the kernel is compatible with JCFO
+        function Obj = set.kernels(Obj,val)
+
+            if ~(isa(val.kernelCell{2},'prtKernelRbf') || isa(val.kernelCell{2},'prtKernelRbfNdimensionScale') || isa(val.kernelCell{2},'prtKernelPolynomial')) && ~isa(val.kernelCell{1},'prtKernelDc')
+                error('prt:prtClassJCFO:kernels','Kernel must be DC followed by RBF or Polynomial.');
+            else
+                Obj.kernels = val;
+            end
+
+        end
+    end
+    
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Training, testing, and helper functions (called by PRT train and run API)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    methods (Access = protected, Hidden = true)
+        
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        % Training function (called by Obj.train)
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        function Obj = trainAction(Obj,DataSet)
+            
+            %%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
+            X = DataSet.X;
+            Y = DataSet.Y;
+            N = size(X,1);
+            P = size(X,2);
+            beta = ones(N+1,1);
+            theta = .9*ones(P,1);
+            omega = ones(N+1,1);
+            kernels = Obj.kernels;
+            
+            converged = false;
+            iteration = 0;
+            relevantFeats = true(P,1);
+            relevantObs = true(N,1);
+            relevantKernels = [true;relevantObs];
+            while ~converged
+                %%%%%%%%%%%% Iteration counter %%%%%%%%%%%%%%
+                iteration = iteration + 1;
+                if Obj.verboseText
+                    fprintf('CCFO EM Iteration %d:\t',iteration)
+                end
+                Xrel = X(:,relevantFeats);
+                Nrel = size(Xrel,1);
+                Prel = size(Xrel,2);
+                thetaRel = theta(relevantFeats);
+                betaRel = beta(relevantKernels);
+                aRel = Obj.a(relevantFeats);
+                bRel = Obj.b(relevantFeats);
+                
+                %%%%%%%%%%%% M-step %%%%%%%%%%%%%%
+                % Update the feature scaling factors
+                if iteration > 1
+                    if abs(thetaNormDiff) > Obj.learningNormWeightsThresh || isnan(thetaNormDiff)
+                        opts = optimoptions(@fmincon,'Algorithm','interior-point','MaxFunEvals',Obj.nMaxFminconEvals,'GradObj','on','TypicalX',betarnd(ones(size(thetaRel)),ones(size(thetaRel))),'Display','iter-detailed','TolX',1e-4,'TolFun',1e-4);%'PlotFcns',{@optimplotx,@optimplotfval,@optimplotstepsize});
+                        thetaRel = fmincon(@(x)Obj.calcQ(Xrel,kernels,v,omegaRel,x,relevantKernels,aRel,bRel),thetaRel,[],[],[],[],zeros(size(thetaRel)),ones(size(thetaRel)),[],opts);
+                        theta(relevantFeats) = thetaRel;
+                        thetaNormDiff = (norm(theta)-thetaNorm)./thetaNorm;
+                    else
+                        fprintf('Feature selection converged. Skipping constrained optimization.\n')
+                    end
+                else
+                    thetaNormDiff = nan;
+                end
+                thetaNorm = norm(theta);
+                
+                % Apply scaling factors to features and re-compute the Gram
+                % matrix via the kernel function
+                XT = bsxfun(@times,Xrel,thetaRel');
+                dsTmp = prtDataSetClass(XT,Y);
+                kernels = train(kernels,dsTmp);
+                H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
+                H = H(:,relevantKernels);
+           
+                % Update the regression weights
+                if iteration == 1
+                    betaRel = inv(Obj.ridge*eye(size(H,2)) + H'*H)*H'*Y; % Initialize weights using ridge regression
+                    beta(relevantKernels) = betaRel;
+                    betaNormDiff = nan;
+                else
+                    betaRel = S*inv(eye(length(betaRel)) + S*H'*H*S)*S*H'*v;
+                    beta(relevantKernels) = betaRel;
+                    betaNormDiff = (norm(beta)-betaNorm)./betaNorm;
+                end
+                betaNorm = norm(beta);
+                beta = beta./betaNorm;
+                
+               %%%%%%%%%%%% E-step %%%%%%%%%%%%%%
+                v = nan(N,1);
+                for i = 1:N
+                    normFactor = (2*Y(i)-1)*normpdf(H(i,:)*betaRel,0,1)/normcdf((2*Y(i)-1)*H(i,:)*betaRel,0,1);
+                    if isnan(normFactor)
+                        normFactor = 0;
+                    end
+                    v(i,:) = H(i,:)*betaRel + normFactor; % Expected value of linear observation model
+                end
+                
+                omegaRel = nan(length(betaRel),1);
+                for i = 1:length(betaRel)
+                    omegaRel(i,:) = Obj.gamma*abs(betaRel(i))^(-1); % Expected value of weight variance
+                end
+                omega(relevantKernels) = omegaRel;
+                S = diag(omegaRel.^(-1/2));
+                
+                % Recompute the expected log-posterior
+                Q(iteration) = Obj.calcQ(Xrel,kernels,v,omegaRel,thetaRel,relevantKernels,aRel,bRel);
+                
+                %%%%%%%%%%%% Prune deselected training points and/or features, if enabled %%%%%%%%%%%%%%
+                if Obj.pruneFeatures
+                    relevantFeats(theta < Obj.pruningThreshTheta) = false;
+                    theta(~relevantFeats) = 0;
+                    thetaRel = theta(relevantFeats);
+                end
+                
+                if Obj.pruneObservations
+                    relevantObs(abs(beta) < Obj.pruningThreshBeta) = false;
+                    relevantKernels = [true;relevantObs];
+                    beta(~relevantKernels) = 0;
+                    betaRel = beta(relevantKernels);
+                    omegaRel = omega(relevantKernels);
+                    S = diag(omegaRel.^(-1/2));
+                end
+                
+                % For debugging purposes, plot how all of the parameters are updating
+                if Obj.verbosePlot
+                    figure(666)
+                    subplot(2,3,1),plot(v),title('v'),axis tight
+                    subplot(2,3,2),plot(log(omega),'marker','o'),title('log(\omega)'),axis tight
+                    subplot(2,3,4),plot(beta,'marker','o'),title('\beta'),axis tight
+                    subplot(2,3,5),plot(theta,'marker','o'),title('\theta'),axis tight
+                    subplot(2,3,6),plot(Q),title('-E[log p(\beta,\theta|-)]'),axis tight
+                    drawnow
+                end
+                
+                %%%%%%%%%%%% Check for convergence %%%%%%%%%%%%%%
+                if iteration == 1
+                    Qdiff = nan;
+                else
+                    Qdiff = (Q(iteration)-Q(iteration-1))./Q(iteration-1);
+                end
+                if Obj.verboseText
+                    fprintf('Q = %0.4f (diff = %0.4f)\t ||beta|| = %0.4f (diff = %0.4f)\n',Q(iteration),Qdiff,betaNorm,betaNormDiff)
+                end
+                
+                if abs(Qdiff) < Obj.learningConvergedThreshold;
+                    converged = true;
+                    fprintf('Expected log-posterior converged within threshold, exiting.\n')
+                elseif iteration == Obj.learningMaxIterations;
+                    converged = true;
+                    fprintf('Maximum number of iterations reached, exiting.\n')
+                elseif abs(betaNormDiff) < Obj.learningNormWeightsThresh
+                    converged = true;
+                    fprintf('Magnitude of weight vector converged within threshold, exiting.\n')
+                end
+      
+            end
+            %%%%%%%%%% Save out learned parameters %%%%%%%%%%%%
+            Obj.beta = beta;
+            Obj.theta = theta;
+            Obj.omega = omega;
+            Obj.Q = Q(end);
+            
+            XT = bsxfun(@times,X,theta');
+            dsTmp = prtDataSetClass(XT,Y);
+            Obj.kernels = train(kernels,dsTmp);
+        end
+        
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        % Running function (called by Obj.run)
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        function DataSetOut = runAction(Obj,DataSet)
+            %%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
+            X = DataSet.X;
+            kernels = Obj.kernels;
+            theta = Obj.theta;
+            beta = Obj.beta;
+            
+            %%%%%%%%%% Run CCFO on dataset %%%%%%%%%%%%
+            DataSet.X = bsxfun(@times,X,theta');
+            H = kernels.run_OutputDoubleArray(DataSet);
+            
+            %%%%%%%%%% Build output dataset %%%%%%%%%%%%
+            DataSetOut = DataSet;
+            DataSetOut.X = normcdf(H*beta);
+        end
+        
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        % Function for calculating the EM objective, Q and its derivative (called by Obj.trainAction)
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        function [Qout,dQdTout,beta,omega] = calcQ(Obj,X,kernel,v,omega,theta,relevantKernels,a,b)
+
+            % Build gram matrix using the proposed vector of feature scaling factors
+            N = size(X,1);
+            Nk = sum(relevantKernels);
+            P = size(X,2);
+            XT = bsxfun(@times,X,theta');
+            dsTmp = prtDataSetClass(XT);
+            kernels = train(kernel,prtDataSetClass(XT));
+            H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
+            H = H(:,relevantKernels);
+            S = diag(omega.^(-1/2));
+            
+            % Calcualte the expected log-posterior
+            beta = S*inv(eye(Nk) + S*(H'*H)*S)*S*H'*v;
+            
+            % Calcualte the expected log-posterior
+            nu1 = psi(a) - psi(a + b);
+            nu2 = psi(b) - psi(a + b);
+            Q = -beta'*(H'*H)*beta + 2*beta'*H'*v - beta'*diag(omega)*beta + sum(theta.*nu1 + (1-theta).*nu2);
+            Qout = -Q;
+            
+            % Calculate derivative of Q w.r.t. each theta
+            dQdT = nan(1,P);
+            if isa(Obj.kernels.kernelCell{2},'prtKernelPolynomial')
+                n = Obj.kernels.kernelCell{2}.d;
+                xTx = X*diag(theta)*X';
+                for k = 1:P
+                    xxk = X(:,k)*X(:,k)';
+                    dHdT = [zeros(N,1),(n*(1+xTx).^(n-1)).*xxk]; % Derivative of polynomial kernel provided in Kirshnapuram et al., RECOMB '03
+                    dQdT(k) = nu2(k) - nu1(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
+                end
+                dQdTout = -dQdT;
+            elseif isa(Obj.kernels.kernelCell{2},'prtKernelRbf')
+                for k = 1:P
+                    Xk = X(:,k);
+                    dXk = repmat(sum((Xk.^2), 2), [1 N]) + repmat(sum((Xk.^2),2), [1 N]).' - 2*Xk*(Xk.');
+                    if isa(Obj.kernels.kernelCell{2},'prtKernelRbfNdimensionScale')
+                        dXk = dXk./(P*Obj.kernels.kernelCell{2}.sigma.^2);
+                    else
+                        dXk = dXk./Obj.kernels.kernelCell{2}.sigma.^2;
+                    end
+                    if isa(Obj.kernels.kernelCell{1},'prtKernelDc')
+                        dXk = [zeros(N,1),dXk];
+                    end
+                    dHdT = -H.*dXk;
+                    dQdT(k) = nu1(k) - nu2(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
+                    dQdTout(k) = -dQdT(k);
+                end
+            end
+        end
+    end
+end
--- a/Plugins/prtClassJCFO.m
+++ b/Plugins/prtClassJCFO.m
@@ -0,0 +1,369 @@
+classdef prtClassJCFO < prtClass
+    % prtClassJCFO  Joint Classifier and Feature Optimization
+    %
+    % This is a class written to be compatible the Pattern Recognition Toolbox
+    % (PRT) for MATLAB. The PRT may be downloaded here:
+    %        http://covartech.github.io/ 
+    %
+    %   CLASSIFIER = prtClassJCFO returns a JCFO classifier
+    %
+    %   CLASSIFIER = prtClassJCFO(PROPERTY1, VALUE1, ...) constructs a
+    %   prtClass object CLASSIFIER with properties as specified by
+    %   PROPERTY/VALUE pairs.
+    %
+    %   A prtClassJCFO object inherits all properties from the abstract class
+    %   prtClass. In addition is has the following properties:
+    %
+    %   kernels                - A cell array of prtKernel objects specifying
+    %                            the kernels to use (note JCFO only works
+    %                            right now with RBF and polynomial kernels)
+    %   verbosePlot            - Flag indicating whether or not to plot during
+    %                            training
+    %   verboseText            - Flag indicating whether or not to output
+    %                            verbose updates during training
+    %   learningMaxIterations  - The maximum number of iterations
+    %   ridge                  - Regularization parameter for ridge regression
+    %                            initialization of the weights
+    %   gamma1                 - Hyperparameter controlling the prior on
+    %                            beta (regression weights)
+    %   gamma2                 - Hyperparameter controlling the prior on
+    %                            theta (feature scaling factors)
+    %   pruneFeatures          - Flag determining whether or not features
+    %                            with a small enough theta should be
+    %                            removed
+    %   pruneObservations      - Flag determining whether or not
+    %                            observations with a small enough beta should be removed
+    %
+    %   A prtClassJCFO also has the following read-only properties:
+    %
+    %   learningConverged  - Flag indicating if the training converged
+    %   beta               - The regression weights, estimated during training  
+    %   theta              - The feature scaling factors, estimated in training
+    %   delta              - Term defined in (14) of JCFO paper
+    %   omega              - Term defined in (13) of JCFO paper
+    %   Q                  - The EM objective function being optimized 
+    %   relevantFeats      - Indices of features determined to be relevant
+    %   relevantObs        - Indices of observations determined to be relevant
+    %
+    %   A prtClassJCFO object inherits the TRAIN, RUN, CROSSVALIDATE and
+    %   KFOLDS methods from prtAction. It also inherits the PLOT method
+    %   from prtClass.
+    %
+    %   Reference:
+    %       B. Krishnapuram, A. Herermink, L. Carin, & M.A. Figueiredo, "A
+    %       Bayesian approach to joint feature selection and classifier
+    %       design," IEEE Trans. PAMI, vol. 26, no. 9, pp. 1105-1111, 2004.
+    %
+    %   Author: Christopher R. Ratto, JHU/APL
+    %   Date:   7 October, 2015
+    %
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
+    % All Rights Reserved
+    %
+    % This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
+    % a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
+    % review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
+    % provided you agree to and comply with the terms and conditions set forth in the license.
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    
+    
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Define properties
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    
+    % Private properties for internal PRT use
+    properties (SetAccess=private)
+        name = 'Joint Classifier and Feature Optimization'                      % Full name of the classifier
+        nameAbbreviation = 'JCFO';                                              % Abbreviated name
+        isNativeMary = false;                                                   % Cannot handle multi-class data
+    end
+    
+    % Public properties for general use
+    properties
+        verbosePlot = false;                                                    % Whether or not to plot during training
+        verboseText = false;                                                    % Whether or not to write text during training
+        ridge = 1;                                                              % Ridge regression penalty (for initializing beta)
+        gamma1 = 1;                                                             % Hyperparameter for beta
+        gamma2 = 1;                                                             % Hyperparameter for theta
+        kernels = prtKernelDc & prtKernelRbfNdimensionScale;                    % Kernel function
+        pruneFeatures = false;                                                  % Flag for removing features as we go
+        pruneObservations = false;                                              % Flag for removing observations as we go
+    end
+    
+    % Hidden properties that should generally be left alone
+    properties (Hidden = true)
+        learningMaxIterations = 100;                                            % Maximum number of iterations
+        learningConvergedThreshold = .001;                                      % Threshold for whether learning has converged
+        learningNormWeightsThresh = 0.001;                                      % Threshold for whether the weights aren't changing
+        learningNormFeatSelectThresh = 0.001;                                   % Threshold for whether feature selection has converged
+        pruningThreshBeta = 0.0001;                                             % Threshold for removing observations
+        pruningThreshTheta = 0.1;                                               % Threshold for removing features
+        featuresRetained = [];                                                  % List of features being retained
+        nMaxFminconEvals = 100;                                                 % Number of steps for fmincon optimization
+    end
+    
+    % Properties that may be accessed for monitoring of learning algorithm
+    properties (SetAccess = 'protected',GetAccess = 'public')
+        learningConverged = [];                                                 % Whether or not the training converged
+        beta = [];                                                              % The regression weights
+        theta = [];                                                             % The feature scaling factors
+        delta = [];                                                             % Equation (14) in Krishnapuraum et al.
+        omega = [];                                                             % Equation (13) in Krishnapuraum et al.
+        Q = [];                                                                 % EM objective function
+        relevantFeats = [];                                                     % List of relevant features
+        relevantObs = [];                                                       % List of relevant observations
+    end
+    
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Error checking
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    methods
+        % Allow for string, value pairs
+        function Obj = prtClassJCFO(varargin)
+            Obj = prtUtilAssignStringValuePairs(Obj,varargin{:});
+        end
+        
+        % Make sure the kernel is compatible with JCFO
+        function Obj = set.kernels(Obj,val)
+            if ~(isa(val.kernelCell{2},'prtKernelRbf') || isa(val.kernelCell{2},'prtKernelRbfNdimensionScale') || isa(val.kernelCell{2},'prtKernelPolynomial')) && ~isa(val.kernelCell{1},'prtKernelDc')
+                error('prt:prtClassJCFO:kernels','Kernel must be DC followed by RBF or Polynomial.');
+            else
+                Obj.kernels = val;
+            end
+        end
+    end
+    
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Training, testing, and helper functions (called by PRT train and run API)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    methods (Access = protected, Hidden = true)
+        
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        % Training function (called by Obj.train)
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        function Obj = trainAction(Obj,DataSet)
+
+            %%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
+            X = DataSet.X;
+            Y = DataSet.Y;
+            N = size(X,1);
+            P = size(X,2);
+            beta = ones(N+1,1);
+            theta = ones(P,1);
+            delta = ones(P,1);
+            omega = ones(N+1,1);
+            kernels = Obj.kernels;
+            
+            converged = false;
+            iteration = 0;
+            relevantFeats = true(P,1);
+            relevantObs = true(N,1);
+            relevantKernels = [true;relevantObs];
+            while ~converged
+                %%%%%%%%%%%% Iteration counter %%%%%%%%%%%%%%
+                iteration = iteration + 1;
+                if Obj.verboseText
+                    fprintf('JCFO EM Iteration %d:\t',iteration)
+                end
+                
+                Xrel = X(:,relevantFeats);
+                Nrel = size(Xrel,1);
+                Prel = size(Xrel,2);
+                thetaRel = theta(relevantFeats);
+                betaRel = beta(relevantKernels);
+                
+                %%%%%%%%%%%% M-step %%%%%%%%%%%%%%
+                % Update the feature scaling factors
+                if iteration > 1
+                    if abs(thetaNormDiff) > Obj.learningNormWeightsThresh || isnan(thetaNormDiff)
+                        opts = optimoptions(@fmincon,'Algorithm','interior-point','MaxFunEvals',Obj.nMaxFminconEvals,'GradObj','on','TypicalX',ones(size(thetaRel)),'Display','off','TolX',1e-6,'TolFun',1e-6);%,'PlotFcns',{@optimplotx,@optimplotfval,@optimplotstepsize});     
+                            thetaRel = fmincon(@(x)Obj.calcQ(Xrel,kernels,v,omegaRel,deltaRel,x,relevantKernels),thetaRel,[],[],[],[],zeros(size(thetaRel)),inf(size(thetaRel)),[],opts);
+                        theta(relevantFeats) = thetaRel;
+                        thetaNormDiff = (norm(theta)-thetaNorm)./thetaNorm;
+                    else
+                        fprintf('Feature selection converged. Skipping constrained optimization.\n')
+                    end
+                else
+                    thetaNormDiff = nan;
+                end
+                thetaNorm = norm(theta);
+                
+                % Apply scaling factors to features and re-compute the Gram
+                % matrix via the kernel function
+                XT = bsxfun(@times,Xrel,thetaRel');
+                dsTmp = prtDataSetClass(XT,Y);
+                kernels = train(kernels,dsTmp);
+                H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
+                H = H(:,relevantKernels);
+                % Update the regression weights
+                if iteration == 1
+                    betaRel = inv(Obj.ridge*eye(size(H,2)) + H'*H)*H'*Y; % Initialize weights using ridge regression
+                    beta(relevantKernels) = betaRel;
+                    betaNormDiff = nan;
+                else
+                    betaRel = S*inv(eye(length(betaRel)) + S*H'*H*S)*S*H'*v;
+                    beta(relevantKernels) = betaRel;
+                    betaNormDiff = (norm(beta)-betaNorm)./betaNorm;
+                end
+                betaNorm = norm(beta);
+                beta = beta./betaNorm;
+                
+                %%%%%%%%%%%% E-step %%%%%%%%%%%%%%
+                v = nan(N,1);
+                for i = 1:N
+                    normFactor = (2*Y(i)-1)*normpdf(H(i,:)*betaRel,0,1)/normcdf((2*Y(i)-1)*H(i,:)*betaRel,0,1);
+                    if isnan(normFactor)
+                        normFactor = 0;
+                    end
+                    v(i,:) = H(i,:)*betaRel + normFactor; % Expected value of linear observation model
+                end
+                
+                omegaRel = nan(length(betaRel),1);
+                for i = 1:length(betaRel)
+                    omegaRel(i,:) = Obj.gamma1*abs(betaRel(i))^(-1); % Expected value of weight variance
+                end
+                omega(relevantKernels) = omegaRel;
+                S = diag(omegaRel.^(-1/2));
+                
+                    deltaRel = nan(Prel,1);
+                    for k = 1:Prel
+                        deltaRel(k,:) = Obj.gamma2*thetaRel(k)^(-1); % Expected value of feature selectors
+                    end
+                    delta(relevantFeats) = deltaRel;
+                    
+                % Recompute the expected log-posterior
+                Q(iteration) = Obj.calcQ(Xrel,kernels,v,omegaRel,deltaRel,thetaRel,relevantKernels); % Expected log-posterior
+                
+                %%%%%%%%%%%% Prune deselected training points and/or features, if enabled %%%%%%%%%%%%%%
+                if Obj.pruneFeatures
+                    relevantFeats(theta < Obj.pruningThreshTheta) = false;
+                    theta(~relevantFeats) = 0;
+                    thetaRel = theta(relevantFeats);
+                    deltaRel = delta(relevantFeats);
+                end
+                
+                if Obj.pruneObservations
+                    relevantObs(abs(beta) < Obj.pruningThreshBeta) = false;
+                    relevantKernels = [true;relevantObs];
+                    beta(~relevantKernels) = 0;
+                    betaRel = beta(relevantKernels);
+                    omegaRel = omega(relevantKernels);
+                    S = diag(omegaRel.^(-1/2));
+                end
+                
+                % For debugging purposes, plot how all of the parameters are updating
+                if Obj.verbosePlot
+                    figure(666)
+                    subplot(2,3,1),plot(v),title('v'),axis tight
+                    subplot(2,3,2),plot(log(omega),'marker','o'),title('log(\omega)'),axis tight
+                    subplot(2,3,3),plot(log(delta),'marker','o'),title('log(\delta)'),axis tight
+                    subplot(2,3,4),plot(beta,'marker','o'),title('\beta'),axis tight
+                    subplot(2,3,5),plot(theta,'marker','o'),title('\theta'),axis tight
+                    subplot(2,3,6),plot(Q),title('-E[log p(\beta,\theta|-)]'),axis tight
+                    drawnow
+                end
+                
+                %%%%%%%%%%%% Check for convergence %%%%%%%%%%%%%%
+                if iteration == 1
+                    Qdiff = nan;
+                else
+                    Qdiff = (Q(iteration)-Q(iteration-1))./Q(iteration-1);
+                end
+                if Obj.verboseText
+                    fprintf('Q = %0.4f (diff = %0.4f)\t ||beta|| = %0.4f (diff = %0.4f)\n',Q(iteration),Qdiff,betaNorm,betaNormDiff)
+                end
+                
+                if abs(Qdiff) < Obj.learningConvergedThreshold;
+                    converged = true;
+                    fprintf('Expected log-posterior converged within threshold, exiting.\n')
+                elseif iteration == Obj.learningMaxIterations;
+                    converged = true;
+                    fprintf('Maximum number of iterations reached, exiting.\n')
+                elseif abs(betaNormDiff) < Obj.learningNormWeightsThresh
+                    converged = true;
+                    fprintf('Magnitude of weight vector converged within threshold, exiting.\n')
+                end
+            end
+            %%%%%%%%%% Save out learned parameters %%%%%%%%%%%%
+            Obj.beta = beta;
+            Obj.theta = theta;
+            Obj.delta = delta;
+            Obj.omega = omega;
+            Obj.Q = Q(end);
+            
+            XT = bsxfun(@times,X,theta');
+            dsTmp = prtDataSetClass(XT,Y);
+            Obj.kernels = train(kernels,dsTmp);
+        end
+        
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        % Running function (called by Obj.run)
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        function DataSetOut = runAction(Obj,DataSet)
+            %%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
+            X = DataSet.X;
+            kernels = Obj.kernels;
+            theta = Obj.theta;
+            beta = Obj.beta;
+            
+            %%%%%%%%%% Run JCFO on dataset %%%%%%%%%%%%
+            DataSet.X = bsxfun(@times,X,theta');
+            H = kernels.run_OutputDoubleArray(DataSet);
+            
+            %%%%%%%%%% Build output dataset %%%%%%%%%%%%
+            DataSetOut = DataSet;
+            DataSetOut.X = normcdf(H*beta);
+        end
+        
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        % Function for calculating the EM objective, Q and its derivative (called by Obj.trainAction)
+        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+        function [Qout,dQdTout] = calcQ(Obj,X,kernel,v,omega,delta,theta,relevantKernels)
+            % Build gram matrix using the proposed vector of feature scaling factors
+            N = size(X,1);
+            Nk = sum(relevantKernels);
+            P = size(X,2);
+            XT = bsxfun(@times,X,theta');
+            dsTmp = prtDataSetClass(XT);
+            kernels = train(kernel,prtDataSetClass(XT));
+            H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
+            H = H(:,relevantKernels);
+            S = diag(omega.^(-1/2));
+            
+            % Calcualte the expected log-posterior
+            beta = S*inv(eye(Nk) + S*(H'*H)*S)*S*H'*v;
+            Q = -beta'*(H'*H)*beta + 2*beta'*H'*v - beta'*diag(omega)*beta - theta'*diag(delta)*theta;
+            Qout = -Q; % Using negative since matlab optimization does minimization only
+            
+            dQdT = nan(1,P);
+            % Calculate derivative of Q w.r.t. each theta
+            if isa(kernel.kernelCell{2},'prtKernelPolynomial')
+                n = kernel.kernelCell{2}.d;
+                xTx = X*diag(theta)*X';
+                for k = 1:P
+                    xxk = X(:,k)*X(:,k)';
+                    dHdT = [zeros(N,1),(n*(1+xTx).^(n-1)).*xxk]; % Derivative of polynomial kernel provided in Kirshnapuram et al., RECOMB '03
+                    dQdT(k) = -2*delta(k)*theta(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
+                end
+                dQdTout = -dQdT;
+            elseif isa(kernel.kernelCell{2},'prtKernelRbf')
+                for k = 1:P
+                    Xk = X(:,k);
+                    dXk = repmat(sum((Xk.^2), 2), [1 N]) + repmat(sum((Xk.^2),2), [1 N]).' - 2*Xk*(Xk.');
+                    if isa(kernel.kernelCell{2},'prtKernelRbfNdimensionScale')
+                        dXk = dXk./(P*kernel.kernelCell{2}.sigma.^2);
+                    else
+                        dXk = dXk./kernel.kernelCell{2}.sigma.^2;
+                    end
+                    if isa(kernel.kernelCell{1},'prtKernelDc')
+                        dXk = [zeros(N,1),dXk];
+                    end
+                    dHdT = -H.*dXk(:,relevantKernels);
+                    dQdT(k) = -2*delta(k)*theta(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
+                    dQdTout(k) = -dQdT(k);
+                end
+            end
+        end
+    end
+end
--- a/README.txt
+++ b/README.txt
@@ -0,0 +1,37 @@
+Cost-Constrained Feature Optimization (CCFO) for MATLAB
+ 
+This MATLAB code may be used to replicate the results published in the following manuscript:
+   C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
+   Optimization in Kernel Machine Classifiers," IEEE Signal Processing
+   Letters, 2015.
+
+The code is organized into three directories as follows:
+	Urban Land Cover/ - Code for replicating the urban land cover experiment from the paper.
+		experiment_urbanLandCover.m   - Script for running the experiment
+		featureTimes_urbanLandCover.m - Function for estimating feature computation times
+	MNIST/ - Code for replicating the MNIST experiment from the paper
+		experiment_mnist.m         - Script for running the experiment
+		extractFeaturesFromMNIST.m - Function for extracting features from the handwritten digits
+	PRT Plugins/ - The actual machine learning code, written as a plugin to the Pattern Recognition Toolbox (PRT)
+		prtClassJCFO - Joint Classifier and Feature Optimization
+		prtClassCCFO - Cost Constrained Feature Optimization
+
+To run either experiment, or to use our code in your own research, you must download and install the 
+Pattern Recognition Toolbox (PRT) for MATLAB at http://covartech.github.io/
+
+To run the urban land cover experiment, you must download the data from the UCI machine
+learning repository: https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
+
+The code benchmarks each of the feature computation times on a test image of a black and white circle. 
+The function "MidpointCircle.m" required to generate the test image for doing this is available via Matlab Central:
+http://www.mathworks.com/matlabcentral/fileexchange/14331-draw-a-circle-in-a-matrix-image/content/MidpointCircle.m
+
+*******************************************************************************************************************
+This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
+All Rights Reserved
+
+This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
+a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
+review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
+provided you agree to and comply with the terms and conditions set forth in the license.
+*******************************************************************************************************************
--- a/Cover/experiment_urbanLandCover.m
+++ b/Cover/experiment_urbanLandCover.m
@@ -0,0 +1,307 @@
+function experiment_urbanLandCover(varargin)
+
+% experiment_urbanLandCover.m
+%
+% This MATLAB script runs the feature selection experiment for the Urban
+% Land Cover data set that was published in the following manuscript:
+%   C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
+%   Optimization in Kernel Machine Classifiers," IEEE Signal Processing
+%   Letters, 2015.
+%
+% The script requires the Urban Land Cover data set, which is available
+% from the UCI Machine Learning Repository:
+%   https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
+%
+% The script also requires installation of the Pattern Recognition Toolbox
+% (PRT) for MATLAB:
+%   http://covartech.github.io/ 
+%
+% INPUTS (optional):
+%   dataDir:   full path to the directory in which the XLS data is saved
+%              (default is the current working directory)
+%
+% Author: Christopher R. Ratto, JHU/APL
+% Date:   5 October 2015
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
+% All Rights Reserved
+%
+% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
+% a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
+% review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
+% provided you agree to and comply with the terms and conditions set forth in the license.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Load in the training and test data
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+if nargin == 0
+    dataDir = pwd;
+else
+    dataDir = varargin{1};
+end
+[~,~,raw] = xlsread(fullfile(dataDir,'training.csv'));             % Read raw data from XLS-converted CSV
+X = cell2mat(raw(2:end,2:end));                                     % Features from spreadsheet
+labels = strrep(raw(2:end,1),' ','');                               % Class labels from spreadsheet
+uLabels = unique(labels);                                           % Unique class labels
+Y = nan(size(labels));                                              % Numeric class labels 
+for i = 1:length(labels)
+    Y(i,:) = find(strcmp(labels{i},uLabels));                       % Assign a numeric label to each class name
+end
+featNames = raw(1,2:end);                                           % Feature names
+dsTrain = prtDataSetClass(X,Y);                                     % Training PRT data set
+dsTrain.classNames = uLabels;                                       % Training class names
+dsTrain.featureNames = featNames;                                   % Training feature names
+clear raw X labels uLabels Y featNames i
+
+[~,~,raw] = xlsread(fullfile(dataDir,'testing.csv'));              % Read raw data from XLS-converted CSV
+X = cell2mat(raw(2:end,2:end));                                     % Features from spreadsheet
+labels = strrep(raw(2:end,1),' ','');                               % Class labels from spreadsheet
+uLabels = unique(labels);                                           % Unique class labels
+Y = nan(size(labels));                                              % Numeric class labels 
+for i = 1:length(labels)
+    Y(i,:) = find(strcmp(labels{i},uLabels));                       % Assign a numeric label to each class name
+end
+featNames = raw(1,2:end);                                           % Feature names
+dsTest = prtDataSetClass(X,Y);                                      % Training PRT data set
+dsTest.classNames = uLabels;                                        % Training class names
+dsTest.featureNames = featNames;                                    % Training feature names
+clear raw X labels uLabels Y featNames i
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Load in the feature extraction times
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+[featCategories,~,categoryTimes] = featureTimes_urbanLandCover;       % Benchmarked feature computation times (see 'estimateFeatureTimes.m')
+nFeatCategories = length(featCategories);                           % Number of feature categories
+[categoryTimes,sortInds] = sort(cat(1,[],categoryTimes),'ascend');  % Sort the computation time of each feature category
+featCategories = featCategories(sortInds);                          % Rearrange the order of feature category names
+sortVec = [];                                                       % Vector of feature indices corresponding to the sorted categories
+for iCategory = 1:nFeatCategories
+    sortVec = [sortVec,find(~cellfun(@isempty,strfind(dsTest.featureNames,featCategories{iCategory})))]; %#ok<AGROW>
+end
+dsTrain.X = dsTrain.X(:,sortVec);                                   % Rearrange the order of features in the training set
+dsTrain.featureNames = dsTrain.featureNames(:,sortVec);             % Rearrange the order of feature names in the training set
+dsTest.X = dsTest.X(:,sortVec);                                     % Rearrange the order of features in the testing set
+dsTest.featureNames = dsTest.featureNames(:,sortVec);               % Rearrange the order of feature names in the testing set
+categoryInds = cell(1,nFeatCategories);                             % Feature indices corresponding to each category
+T = nan(1,dsTrain.nFeatures);                                       % Computation time (seconds) of each feature
+TperCategory = nan(1,nFeatCategories);                              % Total computation time (seconds) of each category
+categoryIndBegin = nan(1,nFeatCategories);                          % The index of the first feature from each category
+for iCategory = 1:nFeatCategories
+    categoryInds{iCategory} = find(~cellfun(@isempty,strfind(dsTest.featureNames,featCategories{iCategory}))); % Find the features that belong to this category      
+    categoryIndBegin(iCategory) = min(categoryInds{iCategory});
+    T(categoryInds{iCategory}) = categoryTimes(iCategory);          % Computation time for each feature (seconds)
+    TperCategory(iCategory) = sum(T(categoryInds{iCategory}));      % Computation time for each feature category (seconds)
+end
+tNorm = T./sum(T);                                                  % Normalized computation time of each feature
+tNormCategory = TperCategory./sum(TperCategory);                    % Normalized computation time of each feature category
+featCategories = strrep(featCategories,'_','');                     % Remove underscores from feature category names since it screws up the plots
+clear categoryTimes iCategory i sortVec sortInds
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Set CCFO hyperparameters
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+a0 = linspace(0,2,500);
+b0 = linspace(0,300,500);
+tau = nan(500,500);
+F = nan(500,500);
+for i = 1:500
+    for j = 1:500
+        a = a0(i)*ones(size(tNorm));
+        b = a0(i) + b0(j)*tNorm;
+        tau(i,j) = sum(tNorm .* a./(a+b));
+        F(i,j) = (1/dsTrain.nFeatures)*sum((a+1)./(a+b+1));
+    end
+end
+desiredT = 0.25;                                                    % Expected runtime
+desiredF = 0.50;                                                    % Maximum posterior probability of a feature being selected
+dist = (tau-desiredT).^2 + (F-desiredF).^2;
+[iMin,jMin] = find(dist == min(dist(:)));
+a0 = a0(iMin);
+b0 = b0(jMin);
+a = a0*ones(size(tNorm));
+b = a0 + b0*tNorm;
+clear featInd legendNames colors tau F iA iB iFeat
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Initialize the classifiers
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% All classifiers will use the same kernel
+kernel = prtKernelRbfNdimensionScale;                               % RBF kernel, scale the sigma parameter to dimensionality of the features
+kernelSet = prtKernelDc & kernel;                                   % Add a bias dimension to the kernel (dc kernel)
+
+% CCFO - Cost Constrained Feature Optimization
+CCFO = prtClassCCFO('kernels',kernelSet,'pruneFeatures',false,'pruneObservations',false,'verbosePlot',false,'verboseText',true,'a',a','b',b','gamma',1,'ridge',10);    
+algoCCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',CCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
+
+% RVM - Relevance Vector Machine
+RVM = prtClassRvm('kernels',kernelSet);
+algoRVM = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',RVM);   % Normalize features, One-vs-All classification since this is a multiclass problem
+
+% JCFO - Joint Classifier and Feature Optimization
+JCFO = prtClassJCFO('kernels',kernelSet,'ridge',10,'pruneFeatures',false,'pruneObservations',false,'verboseText',1,'verbosePlot',0,'gamma1',1,'gamma2',1);
+algoJCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseclassifier',JCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Train and test CCFO
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+trainedCCFO = algoCCFO.train(dsTrain);                                     % Train CCFO on the training set
+dsOutCCFO = trainedCCFO.run(dsTest);                                       % Run CCFO on the test set
+[~,dsOutCCFO.X] = max(dsOutCCFO.X,[],2);                                   % Change 'soft' decision values to 'hard' values (maximum a posteriori)
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Train and test JCFO
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+trainedJCFO = algoJCFO.train(dsTrain);                                     % Train the one-vs-all JCFO
+dsOutJCFO = trainedJCFO.run(dsTest);                                       % Run the one-vs-all JCFO
+[~,dsOutJCFO.X] = max(dsOutJCFO.X,[],2);                                   % Change 'soft' decision values to 'hard' values (maximum a posteriori) 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Train and test RVM (individual feature categories)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+pcCategory = nan(1,nFeatCategories);                                       % Percent correct using each feature category
+for iCategory = 1:nFeatCategories                                          % Loop over all feature categories
+    dsCategoryTrain = dsTrain.retainFeatures(categoryInds{iCategory});     % Retain only features from this category for the training set
+    dsCategoryTest = dsTest.retainFeatures(categoryInds{iCategory});       % Retain only features from this category for the testing set
+    trainedRVM = algoRVM.train(dsCategoryTrain);                           % Train one-vs-all RVM
+    dsOutCategory = trainedRVM.run(dsCategoryTest);                        % Run one-vs-all RVM
+    [~,dsOutCategory.X] = max(dsOutCategory.X,[],2);                       % Change 'soft' decision values to 'hard' values (maximum a posteriori)  
+    pcCategory(iCategory) = prtScorePercentCorrect(dsOutCategory);         % Calculate percent correct (accuracy overall) 
+end
+clear iCategory dsCategoryTrain dsCategoryTest dsOutCategory
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Train and test RVM (all features together)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+trainedRVM = algoRVM.train(dsTrain);                                % Train the one-vs-all RVM
+dsOutRVM = trainedRVM.run(dsTest);                                  % Run the one-vs-all RVM
+[~,dsOutRVM.X] = max(dsOutRVM.X,[],2);                              % Change 'soft' decision values to 'hard' values (maximum a posteriori)
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Plot results
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Plot the prior on selecting features from each of the feature categories
+% This will be a beta distribution over [0,1]. Features that take longer to
+% compute should have higher probability of not being selected.
+figure(1),set(gcf,'position',[610,512,700,441])
+colors = prtPlotUtilClassColors(length(featCategories)); 
+colors(7,:) = [0,1,1];
+subplot(2,1,1),hold on
+for iFeat = 1:length(featCategories(1:8))
+    featInd = categoryInds{iFeat}(1);
+    plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
+    xlabel('\rho'),ylabel('p(\rho|a,b)')
+end
+axis tight
+title('Cheap Features (Urban Land Cover)')
+legend(featCategories(1:8),'location','southeastoutside')
+subplot(2,1,2),hold on
+for iFeat = 1:length(featCategories(9:15))
+    featInd = categoryInds{8+iFeat}(1);
+    plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
+    xlabel('\rho'),ylabel('p(\rho|a,b)')
+end
+axis tight
+title('Expensive Features (Urban Land Cover)')
+legend(featCategories(9:15),'location','southeastoutside')
+clear iFeat featInd colors
+
+% Baseline - for each category, show feature computation and RVM performance
+% Take-home point: most expensive features not always the best for
+% classification performance
+figure(2)     
+[ha,h1,h2] = plotyy(1:nFeatCategories,100*pcCategory,1:nFeatCategories,tNormCategory);
+h1.LineStyle = '-';h1.LineWidth = 2;h1.Marker = 'o';h1.MarkerSize = 8;
+h2.LineStyle = '--';h1.LineWidth = 2;h1.Marker = '^';h1.MarkerSize = 8;
+ha(1).XTick = 1:length(pcCategory); ha(1).XTickLabel = featCategories; ha(1).YLim = [0,100]; ha(1).YTick = 0:10:100; ha(1).XLim = [1,nFeatCategories]; ha(1).XTickLabelRotation = 30;
+ha(2).XTick = 1:length(pcCategory); ha(2).XTickLabel = []; ha(2).YLim = [0,1]; ha(2).YTick = 0:0.1:1; ha(2).XLim = [1,nFeatCategories];
+ylabel(ha(1),'Accuracy (% Correct)')
+ylabel(ha(2),'Total Normalized Cost')
+title('RVM Accuracy and Total Cost of Each Feature Category','FontSize',12)
+clear ha h1 h2
+
+% Plot the confusion matrices using all the features
+% CCFO
+figure(3),set(gcf,'outerposition',[65,301,1780,579])
+h = subplot(1,3,1);
+prtScoreConfusionMatrix(dsOutCCFO)
+pcCCFO = prtScorePercentCorrect(dsOutCCFO);
+h.XTickLabelRotation = 20;
+title(['CCFO - ',num2str(100*pcCCFO,'%0.2f'),'% Correct'],'Fontsize',12)
+axis square
+% RVM
+h = subplot(1,3,2);
+prtScoreConfusionMatrix(dsOutRVM)                       
+pcRVM = prtScorePercentCorrect(dsOutRVM);                        
+title(['RVM - ',num2str(100*pcRVM,'%0.2f'),'% Correct'],'fontsize',12)
+h.XTickLabelRotation = 20;
+axis square
+% JCFO 
+h = subplot(1,3,3);
+prtScoreConfusionMatrix(dsOutJCFO)   
+pcJCFO = prtScorePercentCorrect(dsOutJCFO);
+title(['JCFO - ',num2str(100*pcJCFO,'%0.2f'),'% Correct'],'fontsize',12)
+h.XTickLabelRotation = 20;
+axis square
+clear h
+
+% Compare feature selection performance
+thetaCCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
+thetaJCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
+for iClass = 1:dsTrain.nClasses                                                                         % Loop over all one-vs-all classifiers
+    thetaCCFO(iClass,:) = trainedCCFO.actionCell{2}.baseClassifier(iClass).theta';                      % CCFO feature selector parameters for this one-vs-all classifier
+    thetaJCFO(iClass,:) = trainedJCFO.actionCell{2}.baseClassifier(iClass).theta';                      % JCFO feature selector parameters for this one-vs-all classifier
+end
+costReductionCCFO = nan(dsTrain.nClasses,1);
+costReductionJCFO = nan(dsTrain.nClasses,1);
+for iClass = 1:dsTrain.nClasses
+    costReductionCCFO(iClass,:) = sum(tNorm(thetaCCFO(iClass,:)>=0.5));
+    costReductionJCFO(iClass,:) = sum(tNorm(thetaJCFO(iClass,:)>=2*median(thetaJCFO(:))));
+end
+costReductionCCFO = mean(costReductionCCFO);
+costReductionJCFO = mean(costReductionJCFO);
+
+figure(4),set(gcf,'outerposition',[255,317,568,596])
+h = subplot(2,1,1);
+imagesc(thetaCCFO),colormap bone
+h.YTick = 1:9; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; h.XTickLabelRotation = 70;
+caxis([0,1]); h = colorbar; ylabel(h,'Feature Scaling Parameter (\theta)')
+title({'CCFO: \theta Learned for Each One-vs-All Classifier',sprintf('Average Cost of Feature Extraction: %0.2f',costReductionCCFO)})
+h = subplot(2,1,2);
+imagesc(thetaJCFO),colormap bone
+h.YTick = 1:9; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; h.XTickLabelRotation = 70;
+caxis([0,2*median(thetaJCFO(:))]); h=colorbar; ylabel(h,'Feature Scaling Parameter (\theta)')
+title({'JCFO: \theta Learned for Each One-vs-All Classifier',sprintf('Average Cost of Feature Extraction: %0.2f',costReductionJCFO)})
+clear iClass h m
+
+% Average number of features selected
+avgNumFeatsSelectedRVM = dsTest.nFeatures;
+avgNumFeatsSelectedJCFO = mean(sum(thetaJCFO > 2*median(thetaJCFO(:)),2));
+avgNumFeatsSelectedCCFO = mean(sum(thetaCCFO > 0.5,2));
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Print out summary of results
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+fprintf('*************************************\n')
+fprintf('Urban Land Cover Feature Set Summary\n')
+fprintf('*************************************\n')
+for iCategory = 1:nFeatCategories
+    fprintf('%s \t %d \t %0.4f \t %0.2f\n',featCategories{iCategory},length(categoryInds{iCategory}),tNormCategory(iCategory),100*pcCategory(iCategory));
+end
+fprintf('*************************************\n')
+fprintf('Urban Land Cover Performance Comparison\n')
+fprintf('*************************************\n')
+fprintf('Accuracy (RVM): %0.2f\n',100*pcRVM)
+fprintf('Accuracy (JCFO): %0.2f\n',100*pcJCFO)
+fprintf('Accuracy (CCFO): %0.2f\n',100*pcCCFO)
+fprintf('Avg. # Features Selected (RVM): %0.2f\n',avgNumFeatsSelectedRVM)
+fprintf('Avg. # Features Selected (JCFO): %0.2f\n',avgNumFeatsSelectedJCFO)
+fprintf('Avg. # Features Selected (CCFO): %0.2f\n',avgNumFeatsSelectedCCFO)
+fprintf('Avg. Relative Extraction Cost (RVM): 100\n')
+fprintf('Avg. Relative Extraction Cost (JCFO) %0.2f\n',100*costReductionJCFO)
+fprintf('Avg. Relative Extraction Cost (CCFO): %0.2f\n',100*costReductionCCFO)
+keyboard
+end
+
+
--- a/Cover/featureTimes_urbanLandCover.m
+++ b/Cover/featureTimes_urbanLandCover.m
@@ -0,0 +1,280 @@
+function [featureCategories,timeAbs,timeRel] = featureTimes_urbanLandCover()
+
+% [timeAbs,timeRel] = featureTimes_urbanLandCover
+%
+% Function to estimate the extraction times for each of the features in the
+% urban land cover data set which is available from the UCI Machine Learning Repository:
+%   https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
+%
+% The code benchmarks each of the feature computation times on a test image
+% of a black and white circle. The function "MidpointCircle.m" required to
+% generate the test image is available via Matlab Central:
+%   http://www.mathworks.com/matlabcentral/fileexchange/14331-draw-a-circle-in-a-matrix-image/content/MidpointCircle.m
+%
+% The computations for each feature were derived from the following reference 
+% (page references provided in the source code):
+%   Definiens AG, "Definiens 5 Reference Book," Munich, Germany 2006.
+%
+% INPUTS: none
+%
+% OUTPUTS:
+%   timeAbs: the absolute timeAbs to compute each feature (in sec) via tic/toc
+%   timeRel: the relative timeAbs (%) to compute each feature in the set
+%
+% Author: Carlos A. Caceres, JHU/APL
+% Date:   5 October 2015
+%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
+% All Rights Reserved
+%
+% This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
+% a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
+% review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
+% provided you agree to and comply with the terms and conditions set forth in the license.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Initialize the test image
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+featureCategories = {'Area','Assym','BordLngth','BrdIndx','Bright','Compact','Dens','GLCM','LW','Mean','NDVI','Rect','Round','SD','ShpIndx'};
+imgSize = 1024;                                                                 % Size of test image
+img = zeros(imgSize);                                                           % Initialize the test image
+img = MidpointCircle(img, 250, imgSize/2, imgSize/2, 1);                        % Fill in with a circle
+img2 = img + randn(size(img));                                                  % Add white Gaussian noise
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Area (ref. page 58)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+area = sum(img(:)==1);  %assume each pixel has area = 1;
+timeAbs(1) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Assymmetry (ref. page 60)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+[idListX,idListY] = find(img==1);
+varX = var(idListX);
+varY = var(idListY);
+varXY = var(idListX.*idListY);
+assym = 2*sqrt(.25*(varX+varY)^2 + (varXY)^2 - varX*varY)/(varX+varY);
+timeAbs(2) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Border length (ref. page 36)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+temp = double(bwperim(img)==1);
+bordlength = sum(temp(:));
+timeAbs(3) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Border index (ref. page 63)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+temp = double(bwperim(img)==1);
+[idListX,idListY] = find(img==1);
+P(1,:) = idListX;P(2,:) = idListY;
+xbar = mean(idListX);
+ybar = mean(idListY);
+x = idListX - xbar;
+y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
+N = length(x);
+
+% Calculate normalized second central moments for the region. 1/12 is
+% the normalized second central moment of a pixel with unit length.
+uxx = sum(x.^2)/N + 1/12;
+uyy = sum(y.^2)/N + 1/12;
+uxy = sum(x.*y)/N;
+
+% Calculate major axis length, minor axis length, and eccentricity.
+common = sqrt((uxx - uyy)^2 + 4*uxy^2);
+l = 2*sqrt(2)*sqrt(uxx + uyy + common);
+width = 2*sqrt(2)*sqrt(uxx + uyy - common);
+
+borderIndex = sum(temp(:))/(2*(l+width));
+timeAbs(4) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Brightness (ref. page 42)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+img3 = repmat(img,1,1,3);
+tic;
+%     for an RGB image
+wkb = [1,2,3];
+brightness = (1/mean(wkb))*sum((wkb.*reshape(mean(mean(img3,1),2),1,3)));
+%     % for a binary image
+%     mean(img(:));
+timeAbs(5) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Compactness (ref. page 63)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+[idListX,idListY] = find(img==1);
+P(1,:) = idListX;P(2,:) = idListY;
+xbar = mean(idListX);
+ybar = mean(idListY);
+x = idListX - xbar;
+y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
+N = length(x);
+
+% Calculate normalized second central moments for the region. 1/12 is
+% the normalized second central moment of a pixel with unit length.
+uxx = sum(x.^2)/N + 1/12;
+uyy = sum(y.^2)/N + 1/12;
+uxy = sum(x.*y)/N;
+
+% Calculate major axis length, minor axis length, and eccentricity.
+common = sqrt((uxx - uyy)^2 + 4*uxy^2);
+l = 2*sqrt(2)*sqrt(uxx + uyy + common);
+width = 2*sqrt(2)*sqrt(uxx + uyy - common);
+
+compactness = l*width/sum(img(:)==1);
+timeAbs(6) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Density (ref. page 62)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+[idListX,idListY] = find(img==1);
+varX = var(idListX);
+varY = var(idListY);
+dens = sqrt(sum(img(:)==1))/(1+sqrt(varX+varY));
+timeAbs(7) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Gray-level co-ocurrence matrix
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+graycomatrix(img2);
+timeAbs(8) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Length/width (ref. page 59)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+[idListX,idListY] = find(img==1);
+P(1,:) = idListX; P(2,:) = idListY;
+xbar = mean(idListX);
+ybar = mean(idListY);
+x = idListX - xbar;
+y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
+N = length(x);
+
+% Calculate normalized second central moments for the region. 1/12 is
+% the normalized second central moment of a pixel with unit length.
+uxx = sum(x.^2)/N + 1/12;
+uyy = sum(y.^2)/N + 1/12;
+uxy = sum(x.*y)/N;
+
+% Calculate major axis length, minor axis length, and eccentricity.
+common = sqrt((uxx - uyy)^2 + 4*uxy^2);
+l = 2*sqrt(2)*sqrt(uxx + uyy + common);
+width = 2*sqrt(2)*sqrt(uxx + uyy - common);
+
+lw = l/width;
+timeAbs(9) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Mean (ref. page 40)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+datMean = mean(img2(:));
+timeAbs(10) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% NDVI
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+ndvi = sum((img + img2)./(img-img2));
+timeAbs(11) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Rectangular fit (ref. page 65)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%get outlines of each object
+tic;
+[idListX,idListY] = find(img==1);
+xbar = mean(idListX);
+ybar = mean(idListY);
+x = idListX - xbar;
+y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
+N = length(x);
+
+% Calculate normalized second central moments for the region. 1/12 is
+% the normalized second central moment of a pixel with unit length.
+uxx = sum(x.^2)/N + 1/12;
+uyy = sum(y.^2)/N + 1/12;
+uxy = sum(x.*y)/N;
+
+% Calculate major axis length, minor axis length, and eccentricity.
+common = sqrt((uxx - uyy)^2 + 4*uxy^2);
+height = 2*sqrt(2)*sqrt(uxx + uyy + common);
+width = 2*sqrt(2)*sqrt(uxx + uyy - common);
+area = sum(img(:)==1);
+SquareMetric = width/height;
+if SquareMetric > 1,
+    SquareMetric = height/width;  %make aspect ratio less than unity
+end
+SquareMetric = SquareMetric/area;
+timeAbs(12) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Roundness (ref. page 64)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+area = sum(img(:)==1);
+temp = double(bwperim(img)==1);
+perimeter = sum(temp(:));
+roundness = 4*pi*area/perimeter^2;
+timeAbs(13) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Standard deviation
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+stdDeviation = std(img(:));
+timeAbs(14) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Shape index (ref. page 62)
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+tic;
+temp = double(bwperim(img)==1);
+shpindx = sum(temp(:))/(4*sqrt(sum(img(:)==1)));
+timeAbs(15) = toc;
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Save results
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+timeRel = 100*timeAbs./repmat(sum(timeAbs),1,length(timeAbs));
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Legend of feature names
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+% Class: Land cover class (nominal)
+% BrdIndx: Border Index (shape variable)
+% Area: Area in m2 (size variable)
+% Round: Roundness (shape variable)
+% Bright: Brightness (spectral variable)
+% Compact: Compactness (shape variable)
+% ShpIndx: Shape Index (shape variable)
+% Mean_G: Green (spectral variable)
+% Mean_R: Red (spectral variable)
+% Mean_NIR: Near Infrared (spectral variable)
+% SD_G: Standard deviation of Green (texture variable)
+% SD_R: Standard deviation of Red (texture variable)
+% SD_NIR: Standard deviation of Near Infrared (texture variable)
+% LW: Length/Width (shape variable)
+% GLCM1: Gray-Level Co-occurrence Matrix [i forget which type of GLCM metric this one is] (texture variable)
+% Rect: Rectangularity (shape variable)
+% GLCM2: Another Gray-Level Co-occurrence Matrix attribute (texture variable)
+% Dens: Density (shape variable)
+% Assym: Assymetry (shape variable)
+% NDVI: Normalized Difference Vegetation Index (spectral variable)
+% BordLngth: Border Length (shape variable)
+% GLCM3: Another Gray-Level Co-occurrence Matrix attribute (texture variable) d
+
+end