Initial commit pre-publication

2026-01-09 23:07:54 -05:00 · 2015-10-20 08:59:00 -04:00
parent 9abc1da216
commit cb34cdc1a1
7 changed files with 1706 additions and 0 deletions
--- a/MNIST/experiment_mnist.m
+++ b/MNIST/experiment_mnist.m
@@ -0,0 +1,243 @@
 function experiment_mnist(varargin)
 % experiment_mnist.m
 %
 % This MATLAB script runs the feature selection experiment for the MNIST
 % data set that was published in the following manuscript:
 %   C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
 %   Optimization in Kernel Machine Classifiers," IEEE Signal Processing
 %   Letters, 2015.
 %
 % The script requires installation of the Pattern Recognition Toolbox
 % (PRT) for MATLAB:
 %   http://covartech.github.io/ 
 %
 % Author: Christopher R. Ratto, JHU/APL
 % Date:   5 October 2015
 %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
 % All Rights Reserved
 %
 % This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
 % a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
 % review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
 % provided you agree to and comply with the terms and conditions set forth in the license.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Load in the data set and feature extraction times
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 dsRaw = prtDataGenMnist;                                % The MNIST data set comes with the PRT for demonstration purposes
 [dsFeat,featTime] = extractFeaturesFromMNIST(dsRaw);    % Extract various types of features from the MNIST data
 tNorm = mean(featTime);                                 % Average relative feature extraction time
 tNorm = tNorm./sum(tNorm);                              % Normalize the average relative times
 featCategories = {'Stats','PCA','GLCM','Sobel'};        % Get Names of Feature Categories
 nFeatCategories = length(featCategories);               % Number of feature categories            
 tNormCategory = [sum(tNorm(1:4)),sum(tNorm(5:14)),sum(tNorm(15:26)),sum(tNorm(27:end))];    % Relative time per category
 categoryInds = {1:4,5:14,15:26,27:42};  % Indices of features in each category
 categoryIndBegin = [1,5,15,27];         % Indices where each feature category begins
 clear categoryTimes iCategory i sortVec sortInds
 % Setup training and testing sets
 dsTrain = dsFeat.retainObservations(1:1000);                    % Train on 10% of the data
 dsTest = dsFeat.retainObservations(1001:dsRaw.nObservations);   % Test on 90% of the data
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Estimate CCFO hyperparameters
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 a0 = linspace(0,2,500);
 b0 = linspace(0,300,500);
 tau = nan(500);
 F = nan(500);
 for i = 1:500
    for j = 1:500
        a = a0(i)*ones(size(tNorm));
        b = a0(i) + b0(j)*tNorm;
        tau(i,j) = sum(tNorm .* a./(a+b));
        F(i,j) = (1/dsFeat.nFeatures)*sum((a+1)./(a+b+1));
    end
 end
 desiredT = 0.1;     % Expected runtime
 desiredF = 0.50;    % Maximum posterior probability of a feature being selected
 dist = (tau-desiredT).^2 + (F-desiredF).^2;
 [iMin,jMin] = find(dist == min(dist(:)));
 a0 = a0(iMin);
 b0 = b0(jMin);
 a = a0*ones(size(tNorm));
 b = a0 + b0*tNorm;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Initialize the classifiers
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % All classifiers will use the same kernel
 kernel = prtKernelRbfNdimensionScale;                               % RBF kernel, scale the sigma parameter to dimensionality of the features
 kernelSet = prtKernelDc & kernel;                                   % Add a bias dimension to the kernel (dc kernel)
 % CCFO - Cost Constrained Feature Optimization
 CCFO = prtClassCCFO('kernels',kernelSet,'pruneFeatures',false,'pruneObservations',false,'verbosePlot',false,'verboseText',true,'a',a','b',b','gamma',1,'ridge',10);    
 algoCCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',CCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
 % RVM - Relevance Vector Machine
 RVM = prtClassRvm('kernels',kernelSet);
 algoRVM = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',RVM);   % Normalize features, One-vs-All classification since this is a multiclass problem
 % JCFO - Joint Classifier and Feature Optimization
 JCFO = prtClassJCFO('kernels',kernelSet,'ridge',10,'pruneFeatures',false,'pruneObservations',false,'verboseText',1,'verbosePlot',0,'gamma1',1,'gamma2',1);
 algoJCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseclassifier',JCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Train and test CCFO
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 trainedCCFO = algoCCFO.train(dsTrain);                              % Train CCFO on the training set
 dsOutCCFO = trainedCCFO.run(dsTest);                                % Run CCFO on the test set
 [~,dsOutCCFO.X] = max(dsOutCCFO.X,[],2);                            % Change 'soft' decision values to 'hard' values (maximum a posteriori)
 dsOutCCFO.X = dsOutCCFO.X-1;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Train and test JCFO
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 trainedJCFO = algoJCFO.train(dsTrain);                              % Train the one-vs-all JCFO
 dsOutJCFO = trainedJCFO.run(dsTest);                                % Run the one-vs-all JCFO
 [~,dsOutJCFO.X] = max(dsOutJCFO.X,[],2);                            % Change 'soft' decision values to 'hard' values (maximum a posteriori) 
 dsOutJCFO.X = dsOutJCFO.X-1;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Train and test RVM (individual feature categories)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 pcCategory = nan(1,nFeatCategories);                                        % Percent correct using each feature category
 for iCategory = 1:nFeatCategories                                           % Loop over all feature categories
    dsCategoryTrain = dsTrain.retainFeatures(categoryInds{iCategory});      % Retain only features from this category for the training set
    dsCategoryTest = dsTest.retainFeatures(categoryInds{iCategory});        % Retain only features from this category for the testing set
    trainedRVM = algoRVM.train(dsCategoryTrain);                            % Train one-vs-all RVM
    dsOutCategory = trainedRVM.run(dsCategoryTest);                         % Run one-vs-all RVM
    [~,dsOutCategory.X] = max(dsOutCategory.X,[],2);                        % Change 'soft' decision values to 'hard' values (maximum a posteriori)  
    dsOutCategory.X = dsOutCategory.X-1;
    pcCategory(iCategory) = prtScorePercentCorrect(dsOutCategory);          % Calculate percent correct (accuracy overall) 
 end
 clear iCategory dsCategoryTrain dsCategoryTest dsOutCategory
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Train and test RVM (all features)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 trainedRVM = algoRVM.train(dsTrain);                                        % Train the one-vs-all RVM
 dsOutRVM = trainedRVM.run(dsTest);                                          % Run the one-vs-all RVM
 [~,dsOutRVM.X] = max(dsOutRVM.X,[],2);                                      % Change 'soft' decision values to 'hard' values (maximum a posteriori)
 dsOutRVM.X = dsOutRVM.X - 1;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Plot results for publication
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Plot the prior on selecting features from each of the feature categories
 % This will be a beta distribution over [0,1]. Features that take longer to
 % compute should have higher probability of not being selected.
 figure(1),hold on
 colors = prtPlotUtilClassColors(length(featCategories)); 
 for iFeat = 1:length(featCategories)
    featInd = categoryInds{iFeat}(1);
    plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
    xlabel('\rho'),ylabel('p(\rho|a,b)')
 end
 title('Priors on MNIST Feature Selection')
 legend(featCategories,'location','southeastoutside')
 clear iFeat featInd colors
 % Baseline - for each category, show feature computation and RVM performance
 % Take-home point: most expensive features not always the best for
 % classification performance
 figure(2)     
 [ha,h1,h2] = plotyy(1:nFeatCategories,100*pcCategory,1:nFeatCategories,tNormCategory);
 h1.LineStyle = '-';h1.LineWidth = 2;h1.Marker = 'o';h1.MarkerSize = 8;
 h2.LineStyle = '--';h1.LineWidth = 2;h1.Marker = '^';h1.MarkerSize = 8;
 ha(1).XTick = 1:length(pcCategory); ha(1).XTickLabel = featCategories; ha(1).YLim = [0,100]; ha(1).YTick = 0:10:100; ha(1).XLim = [1,nFeatCategories]; ha(1).XTickLabelRotation = 30;
 ha(2).XTick = 1:length(pcCategory); ha(2).XTickLabel = []; ha(2).YLim = [0,1]; ha(2).YTick = 0:0.1:1; ha(2).XLim = [1,nFeatCategories];
 ylabel(ha(1),'Accuracy (% Correct)')
 ylabel(ha(2),'Total Normalized Cost')
 title('RVM Accuracy and Total Cost of Each Feature Category','FontSize',12)
 clear ha h1 h2
 % Plot the confusion matrices using all the features
 % CCFO
 figure(3),set(gcf,'outerposition',[65,301,1780,579])
 h = subplot(1,3,1);
 prtScoreConfusionMatrix(dsOutCCFO)
 pcCCFO = prtScorePercentCorrect(dsOutCCFO);
 h.XTickLabelRotation = 20;
 title(['CCFO - ',num2str(100*pcCCFO,'%0.2f'),'% Correct'],'Fontsize',12)
 axis square
 % RVM
 h = subplot(1,3,2);
 prtScoreConfusionMatrix(dsOutRVM)                       
 pcRVM = prtScorePercentCorrect(dsOutRVM);                        
 title(['RVM - ',num2str(100*pcRVM,'%0.2f'),'% Correct'],'fontsize',12)
 h.XTickLabelRotation = 20;
 axis square
 % JCFO 
 h = subplot(1,3,3);
 prtScoreConfusionMatrix(dsOutJCFO)   
 pcJCFO = prtScorePercentCorrect(dsOutJCFO);
 title(['JCFO - ',num2str(100*pcJCFO,'%0.2f'),'% Correct'],'fontsize',12)
 h.XTickLabelRotation = 20;
 axis square
 clear h
 % Compare feature selection performance
 thetaCCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
 thetaJCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
 for iClass = 1:dsTrain.nClasses                                                                         % Loop over all one-vs-all classifiers
    thetaCCFO(iClass,:) = trainedCCFO.actionCell{2}.baseClassifier(iClass).theta';                      % CCFO feature selector parameters for this one-vs-all classifier
    thetaJCFO(iClass,:) = trainedJCFO.actionCell{2}.baseClassifier(iClass).theta';                      % JCFO feature selector parameters for this one-vs-all classifier
 end
 costReductionCCFO = nan(dsTrain.nClasses,1);
 costReductionJCFO = nan(dsTrain.nClasses,1);
 for iClass = 1:dsTrain.nClasses
    costReductionCCFO(iClass,:) = sum(tNorm(thetaCCFO(iClass,:)>=0.5));
    costReductionJCFO(iClass,:) = sum(tNorm(thetaJCFO(iClass,:)>=2*median(thetaJCFO(:))));
 end
 costReductionCCFO = mean(costReductionCCFO);
 costReductionJCFO = mean(costReductionJCFO);
 figure(4),set(gcf,'position',[610,512,700,441])
 h = subplot(2,1,1);
 imagesc(thetaCCFO),colormap bone
 ylabel('Class'),xlabel('Features')
 h.YTick = 1:10; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; 
 caxis([0,1]); h = colorbar; ylabel(h,'\theta')
 title('CCFO: Learned \theta (MNIST)')
 h = subplot(2,1,2);
 imagesc(thetaJCFO),colormap bone
 ylabel('Class'),xlabel('Features')
 h.YTick = 1:10; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; 
 caxis([0,2*median(thetaJCFO(:))]); h=colorbar; ylabel(h,'\theta')
 title('JCFO: Learned \theta (MNIST)')
 clear iClass h m
 % Calculate average # features selected
 avgNumFeatsSelectedRVM = dsTrain.nFeatures;
 avgNumFeatsSelectedJCFO = mean(sum(thetaJCFO > 2*median(thetaJCFO(:)),2));
 avgNumFeatsSelectedCCFO = mean(sum(thetaCCFO > 0.5,2));
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Print out summary of results
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 fprintf('*************************************\n')
 fprintf('Urban Land Cover Feature Set Summary\n')
 fprintf('*************************************\n')
 for iCategory = 1:nFeatCategories
    fprintf('%s \t %d \t %0.4f \t %0.2f\n',featCategories{iCategory},length(categoryInds{iCategory}),tNormCategory(iCategory),100*pcCategory(iCategory));
 end
 fprintf('*************************************\n')
 fprintf('Urban Land Cover Performance Comparison\n')
 fprintf('*************************************\n')
 fprintf('Accuracy (RVM): %0.2f\n',100*pcRVM)
 fprintf('Accuracy (JCFO): %0.2f\n',100*pcJCFO)
 fprintf('Accuracy (CCFO): %0.2f\n',100*pcCCFO)
 fprintf('Avg. # Features Selected (RVM): %0.2f\n',avgNumFeatsSelectedRVM)
 fprintf('Avg. # Features Selected (JCFO): %0.2f\n',avgNumFeatsSelectedJCFO)
 fprintf('Avg. # Features Selected (CCFO): %0.2f\n',avgNumFeatsSelectedCCFO)
 fprintf('Avg. Relative Extraction Cost (RVM): 100\n')
 fprintf('Avg. Relative Extraction Cost (JCFO) %0.2f\n',100*costReductionJCFO)
 fprintf('Avg. Relative Extraction Cost (CCFO): %0.2f\n',100*costReductionCCFO)
 keyboard
 end
--- a/MNIST/extractFeaturesFromMNIST.m
+++ b/MNIST/extractFeaturesFromMNIST.m
@@ -0,0 +1,95 @@
 function [dsFeat,time] = extractFeaturesFromMNIST(dsRaw)
 % [dsFeat,time] = extractFeaturesFromMNIST(dsRaw)
 %
 % This function extracts four types of features from the MNIST handwritten
 % digit recognition data set: statistical features, principal components
 % analysis, co-occurence features, and Sobel edge features. 
 %
 % The function requires installation of the Pattern Recognition Toolbox
 % (PRT) for MATLAB:
 %   http://covartech.github.io/ 
 %
 % INPUTS:
 %   dsRaw: the PRT data set provided by prtDataGenMnist()
 % OUTPUTS:
 %   dsFeat: a new PRT data set containing the features that were computed
 %   time:   the amount of time (in seconds) to compute each feature for
 %           each observation in the data set.
 % Author: Christopher R. Ratto, JHU/APL
 % Date:   5 October 2015
 %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
 % All Rights Reserved
 %
 % This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
 % a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
 % review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
 % provided you agree to and comply with the terms and conditions set forth in the license.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Initialize data structures and processors
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 PCA = prtPreProcPca('nComponents',10);  % Initialize the PCA preprocessor to use 10 components
 PCA = PCA.train(dsRaw);                 % Train PCA on the entire data set
 feats = [];                             % Features
 time = [];                              % Extraction times
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Extract features from each sample
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 for i = 1:dsRaw.nObservations
    fprintf('Extracting features from sample %d of %d...\n',i,dsRaw.nObservations)
    % Statistical features
    tic
    statFeats = [mean(dsRaw.X(i,:)),std(dsRaw.X(i,:)),skewness(dsRaw.X(i,:)),kurtosis(dsRaw.X(i,:))];
    tStat = toc*ones(1,4);
    % PCA Features
    tic;
    dsPca = PCA.run(dsRaw.retainObservations(i));
    pcaFeats = dsPca.X;
    tPca = toc*ones(1,dsPca.nFeatures);
    % Co-occurence features
    tic;
    glcm1 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',8);
    glcmProps1 = graycoprops(glcm1);
    glcm2 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',16);
    glcmProps2 = graycoprops(glcm2);
    glcm3 = graycomatrix(reshape(dsRaw.X(i,:),28,28),'NumLevels',32);
    glcmProps3 = graycoprops(glcm3);
    glcmFeats = [glcmProps1.Contrast,glcmProps1.Correlation,glcmProps1.Energy,glcmProps1.Homogeneity,...
        glcmProps2.Contrast,glcmProps2.Correlation,glcmProps2.Energy,glcmProps2.Homogeneity,...
        glcmProps3.Contrast,glcmProps3.Correlation,glcmProps3.Energy,glcmProps3.Homogeneity];
    tGlcm = toc*ones(size(glcmFeats));
    % Sobel edge features
    tic;
    edgeDeg = [0, 45, 90, 135];
    edgeFeats = [];
    for j = 1:4
        H = fspecial('sobel');
        H = imrotate(H,edgeDeg(j));
        Y = imfilter(reshape(dsRaw.X(i,:),28,28),H);
        edgeFeats = [edgeFeats,trace(Y)];
        edgeFeats = [edgeFeats,trace(Y')];
        edgeFeats = [edgeFeats,sum(Y(14,:))];
        edgeFeats = [edgeFeats,sum(Y(:,14))];
    end
    tEdge = toc*ones(size(edgeFeats));
    % Concatenate the features into a single vector for the observation
    feats = [feats;statFeats,pcaFeats,glcmFeats,edgeFeats];
    time = [time;tStat,tPca,tGlcm,tEdge];
 end
 dsFeat = dsRaw;         % Copy the input PRT data set
 dsFeat.X = feats;       % Overwrite the features
 end
--- a/Plugins/prtClassCCFO.m
+++ b/Plugins/prtClassCCFO.m
@@ -0,0 +1,375 @@
 classdef prtClassCCFO < prtClass
 % prtClassCCFO  Cost Constrained Feature Optimization
    %
    % This is a class written to be compatible the Pattern Recognition Toolbox
    % (PRT) for MATLAB. The PRT may be downloaded here:
    %        http://covartech.github.io/ 
    %
    %   CLASSIFIER = prtClassCCFO returns a CCFO classifier
    %
    %   CLASSIFIER = prtClassCCFO(PROPERTY1, VALUE1, ...) constructs a
    %   prtClass object CLASSIFIER with properties as specified by
    %   PROPERTY/VALUE pairs.
    %
    %   A prtClassCCFO object inherits all properties from the abstract class
    %   prtClass. In addition is has the following properties:
    %
    %   kernels                - A cell array of prtKernel objects specifying
    %                            the kernels to use (note CCFO only works
    %                            right now with RBF and polynomial kernels)
    %   verbosePlot            - Flag indicating whether or not to plot during
    %                            training
    %   verboseText            - Flag indicating whether or not to output
    %                            verbose updates during training
    %   learningMaxIterations  - The maximum number of iterations
    %   ridge                  - Regularization parameter for ridge regression
    %                            initialization of the weights
    %   gamma                  - Hyperparameter controlling the prior on
    %                            beta (regression weights)
    %   a                      - Hyperparameter controlling the prior on
    %                            theta (feature selectors)
    %   b                      - Hyperparameter controlling the prior on
    %                            theta (feature selectors)
    %   pruneFeatures          - Flag determining whether or not features
    %                            with a small enough theta should be
    %                            removed
    %   pruneObservations      - Flag determining whether or not
    %                            observations with a small enough beta should be removed
    %
    %   A prtClassCCFO also has the following read-only properties:
    %
    %   learningConverged  - Flag indicating if the training converged
    %   beta               - The regression weights, estimated during training  
    %   theta              - The feature scaling factors, estimated in training
    %   delta              - Term defined in (14) of CCFO paper
    %   omega              - Term defined in (13) of CCFO paper
    %   Q                  - The EM objective function being optimized 
    %   relevantFeats      - Indices of features determined to be relevant
    %   relevantObs        - Indices of observations determined to be relevant
    %
    %   A prtClassCCFO object inherits the TRAIN, RUN, CROSSVALIDATE and
    %   KFOLDS methods from prtAction. It also inherits the PLOT method
    %   from prtClass.
    %
    %   Reference:
    %       C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained
    %       Feature Optimization for Kernel Machine Classifiers," IEEE
    %       Signal Processing Letters, 2015.
    %
    %       B. Krishnapuram, A. Herermink, L. Carin, & M.A. Figueiredo, "A
    %       Bayesian approach to joint feature selection and classifier
    %       design," IEEE Trans. PAMI, vol. 26, no. 9, pp. 1105-1111, 2004.
    %
    %   Author: Christopher R. Ratto, JHU/APL
    %   Date:   7 October, 2015
    %
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
    % All Rights Reserved
    %
    % This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
    % a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
    % review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
    % provided you agree to and comply with the terms and conditions set forth in the license.
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Define properties
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Private properties for internal PRT use
    properties (SetAccess=private)
        name = 'Cost Constrained Feature Optimization'                      % Full name of the classifier
        nameAbbreviation = 'CCFO';                                          % Abbreviated name
        isNativeMary = false;                                               % Cannot handle multi-class data
    end
    % Public properties for general use
    properties
        verbosePlot = false;                                                % Whether or not to plot during training
        verboseText = false;                                                % Whether or not to write text during training
        ridge = 1;                                                          % Ridge regression penalty (for initializing beta)
        gamma = 1;                                                          % Hyperparameter for beta
        a = 1;                                                              % Hyperparameter for theta
        b = 1;                                                              % Hyperparameter for theta
        kernels = prtKernelDc & prtKernelRbfNdimensionScale;                % Kernel function
        pruneFeatures = false;                                              % Flag for removing features as we go
        pruneObservations = false;                                          % Flag for removing observations as we go
    end
    % Hidden properties that should generally be left alone
    properties (Hidden = true)
        learningMaxIterations = 100;                                        % Maximum number of iteratoins
        learningConvergedThreshold = .0001;                                 % Threshold for whether learning has converged
        learningNormWeightsThresh = 0.001;                                  % Threshold for whether the weights aren't changing
        learningNormFeatSelectThresh = 0.001;                               % Threshold for whether feature selection has converged
        pruningThreshBeta = 0.0001;                                         % Threshold for removing observations
        pruningThreshTheta = 0.5;                                           % Threshold for removing features
        featuresRetained = [];                                              % List of features that were retained
        nMaxFminconEvals = 100;                                             % Number of steps for fmincon optimization
    end
    % Properties that may be accessed for monitoring the learning algorithm
    properties (SetAccess = 'protected',GetAccess = 'public')
        learningConverged = [];                                             % Whether or not the training converged
        beta = [];                                                          % The regression weights
        theta = [];                                                         % The feature scaling factors
        omega = [];                                                         % Equation 14 in Krishnapuram et al.
        Q = [];                                                             % The EM objective function
        relevantFeats = [];                                                 % List of relevant features
        relevantObs = [];                                                   % List of relevant observations
    end
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Error checking
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    methods
        % Allow for string, value pairs
        function Obj = prtClassCCFO(varargin)
            Obj = prtUtilAssignStringValuePairs(Obj,varargin{:});
        end
        % Make sure the kernel is compatible with JCFO
        function Obj = set.kernels(Obj,val)
            if ~(isa(val.kernelCell{2},'prtKernelRbf') || isa(val.kernelCell{2},'prtKernelRbfNdimensionScale') || isa(val.kernelCell{2},'prtKernelPolynomial')) && ~isa(val.kernelCell{1},'prtKernelDc')
                error('prt:prtClassJCFO:kernels','Kernel must be DC followed by RBF or Polynomial.');
            else
                Obj.kernels = val;
            end
        end
    end
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Training, testing, and helper functions (called by PRT train and run API)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    methods (Access = protected, Hidden = true)
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % Training function (called by Obj.train)
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        function Obj = trainAction(Obj,DataSet)
            %%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
            X = DataSet.X;
            Y = DataSet.Y;
            N = size(X,1);
            P = size(X,2);
            beta = ones(N+1,1);
            theta = .9*ones(P,1);
            omega = ones(N+1,1);
            kernels = Obj.kernels;
            converged = false;
            iteration = 0;
            relevantFeats = true(P,1);
            relevantObs = true(N,1);
            relevantKernels = [true;relevantObs];
            while ~converged
                %%%%%%%%%%%% Iteration counter %%%%%%%%%%%%%%
                iteration = iteration + 1;
                if Obj.verboseText
                    fprintf('CCFO EM Iteration %d:\t',iteration)
                end
                Xrel = X(:,relevantFeats);
                Nrel = size(Xrel,1);
                Prel = size(Xrel,2);
                thetaRel = theta(relevantFeats);
                betaRel = beta(relevantKernels);
                aRel = Obj.a(relevantFeats);
                bRel = Obj.b(relevantFeats);
                %%%%%%%%%%%% M-step %%%%%%%%%%%%%%
                % Update the feature scaling factors
                if iteration > 1
                    if abs(thetaNormDiff) > Obj.learningNormWeightsThresh || isnan(thetaNormDiff)
                        opts = optimoptions(@fmincon,'Algorithm','interior-point','MaxFunEvals',Obj.nMaxFminconEvals,'GradObj','on','TypicalX',betarnd(ones(size(thetaRel)),ones(size(thetaRel))),'Display','iter-detailed','TolX',1e-4,'TolFun',1e-4);%'PlotFcns',{@optimplotx,@optimplotfval,@optimplotstepsize});
                        thetaRel = fmincon(@(x)Obj.calcQ(Xrel,kernels,v,omegaRel,x,relevantKernels,aRel,bRel),thetaRel,[],[],[],[],zeros(size(thetaRel)),ones(size(thetaRel)),[],opts);
                        theta(relevantFeats) = thetaRel;
                        thetaNormDiff = (norm(theta)-thetaNorm)./thetaNorm;
                    else
                        fprintf('Feature selection converged. Skipping constrained optimization.\n')
                    end
                else
                    thetaNormDiff = nan;
                end
                thetaNorm = norm(theta);
                % Apply scaling factors to features and re-compute the Gram
                % matrix via the kernel function
                XT = bsxfun(@times,Xrel,thetaRel');
                dsTmp = prtDataSetClass(XT,Y);
                kernels = train(kernels,dsTmp);
                H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
                H = H(:,relevantKernels);
                % Update the regression weights
                if iteration == 1
                    betaRel = inv(Obj.ridge*eye(size(H,2)) + H'*H)*H'*Y; % Initialize weights using ridge regression
                    beta(relevantKernels) = betaRel;
                    betaNormDiff = nan;
                else
                    betaRel = S*inv(eye(length(betaRel)) + S*H'*H*S)*S*H'*v;
                    beta(relevantKernels) = betaRel;
                    betaNormDiff = (norm(beta)-betaNorm)./betaNorm;
                end
                betaNorm = norm(beta);
                beta = beta./betaNorm;
               %%%%%%%%%%%% E-step %%%%%%%%%%%%%%
                v = nan(N,1);
                for i = 1:N
                    normFactor = (2*Y(i)-1)*normpdf(H(i,:)*betaRel,0,1)/normcdf((2*Y(i)-1)*H(i,:)*betaRel,0,1);
                    if isnan(normFactor)
                        normFactor = 0;
                    end
                    v(i,:) = H(i,:)*betaRel + normFactor; % Expected value of linear observation model
                end
                omegaRel = nan(length(betaRel),1);
                for i = 1:length(betaRel)
                    omegaRel(i,:) = Obj.gamma*abs(betaRel(i))^(-1); % Expected value of weight variance
                end
                omega(relevantKernels) = omegaRel;
                S = diag(omegaRel.^(-1/2));
                % Recompute the expected log-posterior
                Q(iteration) = Obj.calcQ(Xrel,kernels,v,omegaRel,thetaRel,relevantKernels,aRel,bRel);
                %%%%%%%%%%%% Prune deselected training points and/or features, if enabled %%%%%%%%%%%%%%
                if Obj.pruneFeatures
                    relevantFeats(theta < Obj.pruningThreshTheta) = false;
                    theta(~relevantFeats) = 0;
                    thetaRel = theta(relevantFeats);
                end
                if Obj.pruneObservations
                    relevantObs(abs(beta) < Obj.pruningThreshBeta) = false;
                    relevantKernels = [true;relevantObs];
                    beta(~relevantKernels) = 0;
                    betaRel = beta(relevantKernels);
                    omegaRel = omega(relevantKernels);
                    S = diag(omegaRel.^(-1/2));
                end
                % For debugging purposes, plot how all of the parameters are updating
                if Obj.verbosePlot
                    figure(666)
                    subplot(2,3,1),plot(v),title('v'),axis tight
                    subplot(2,3,2),plot(log(omega),'marker','o'),title('log(\omega)'),axis tight
                    subplot(2,3,4),plot(beta,'marker','o'),title('\beta'),axis tight
                    subplot(2,3,5),plot(theta,'marker','o'),title('\theta'),axis tight
                    subplot(2,3,6),plot(Q),title('-E[log p(\beta,\theta|-)]'),axis tight
                    drawnow
                end
                %%%%%%%%%%%% Check for convergence %%%%%%%%%%%%%%
                if iteration == 1
                    Qdiff = nan;
                else
                    Qdiff = (Q(iteration)-Q(iteration-1))./Q(iteration-1);
                end
                if Obj.verboseText
                    fprintf('Q = %0.4f (diff = %0.4f)\t ||beta|| = %0.4f (diff = %0.4f)\n',Q(iteration),Qdiff,betaNorm,betaNormDiff)
                end
                if abs(Qdiff) < Obj.learningConvergedThreshold;
                    converged = true;
                    fprintf('Expected log-posterior converged within threshold, exiting.\n')
                elseif iteration == Obj.learningMaxIterations;
                    converged = true;
                    fprintf('Maximum number of iterations reached, exiting.\n')
                elseif abs(betaNormDiff) < Obj.learningNormWeightsThresh
                    converged = true;
                    fprintf('Magnitude of weight vector converged within threshold, exiting.\n')
                end
            end
            %%%%%%%%%% Save out learned parameters %%%%%%%%%%%%
            Obj.beta = beta;
            Obj.theta = theta;
            Obj.omega = omega;
            Obj.Q = Q(end);
            XT = bsxfun(@times,X,theta');
            dsTmp = prtDataSetClass(XT,Y);
            Obj.kernels = train(kernels,dsTmp);
        end
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % Running function (called by Obj.run)
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        function DataSetOut = runAction(Obj,DataSet)
            %%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
            X = DataSet.X;
            kernels = Obj.kernels;
            theta = Obj.theta;
            beta = Obj.beta;
            %%%%%%%%%% Run CCFO on dataset %%%%%%%%%%%%
            DataSet.X = bsxfun(@times,X,theta');
            H = kernels.run_OutputDoubleArray(DataSet);
            %%%%%%%%%% Build output dataset %%%%%%%%%%%%
            DataSetOut = DataSet;
            DataSetOut.X = normcdf(H*beta);
        end
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % Function for calculating the EM objective, Q and its derivative (called by Obj.trainAction)
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        function [Qout,dQdTout,beta,omega] = calcQ(Obj,X,kernel,v,omega,theta,relevantKernels,a,b)
            % Build gram matrix using the proposed vector of feature scaling factors
            N = size(X,1);
            Nk = sum(relevantKernels);
            P = size(X,2);
            XT = bsxfun(@times,X,theta');
            dsTmp = prtDataSetClass(XT);
            kernels = train(kernel,prtDataSetClass(XT));
            H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
            H = H(:,relevantKernels);
            S = diag(omega.^(-1/2));
            % Calcualte the expected log-posterior
            beta = S*inv(eye(Nk) + S*(H'*H)*S)*S*H'*v;
            % Calcualte the expected log-posterior
            nu1 = psi(a) - psi(a + b);
            nu2 = psi(b) - psi(a + b);
            Q = -beta'*(H'*H)*beta + 2*beta'*H'*v - beta'*diag(omega)*beta + sum(theta.*nu1 + (1-theta).*nu2);
            Qout = -Q;
            % Calculate derivative of Q w.r.t. each theta
            dQdT = nan(1,P);
            if isa(Obj.kernels.kernelCell{2},'prtKernelPolynomial')
                n = Obj.kernels.kernelCell{2}.d;
                xTx = X*diag(theta)*X';
                for k = 1:P
                    xxk = X(:,k)*X(:,k)';
                    dHdT = [zeros(N,1),(n*(1+xTx).^(n-1)).*xxk]; % Derivative of polynomial kernel provided in Kirshnapuram et al., RECOMB '03
                    dQdT(k) = nu2(k) - nu1(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
                end
                dQdTout = -dQdT;
            elseif isa(Obj.kernels.kernelCell{2},'prtKernelRbf')
                for k = 1:P
                    Xk = X(:,k);
                    dXk = repmat(sum((Xk.^2), 2), [1 N]) + repmat(sum((Xk.^2),2), [1 N]).' - 2*Xk*(Xk.');
                    if isa(Obj.kernels.kernelCell{2},'prtKernelRbfNdimensionScale')
                        dXk = dXk./(P*Obj.kernels.kernelCell{2}.sigma.^2);
                    else
                        dXk = dXk./Obj.kernels.kernelCell{2}.sigma.^2;
                    end
                    if isa(Obj.kernels.kernelCell{1},'prtKernelDc')
                        dXk = [zeros(N,1),dXk];
                    end
                    dHdT = -H.*dXk;
                    dQdT(k) = nu1(k) - nu2(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
                    dQdTout(k) = -dQdT(k);
                end
            end
        end
    end
 end
--- a/Plugins/prtClassJCFO.m
+++ b/Plugins/prtClassJCFO.m
@@ -0,0 +1,369 @@
 classdef prtClassJCFO < prtClass
    % prtClassJCFO  Joint Classifier and Feature Optimization
    %
    % This is a class written to be compatible the Pattern Recognition Toolbox
    % (PRT) for MATLAB. The PRT may be downloaded here:
    %        http://covartech.github.io/ 
    %
    %   CLASSIFIER = prtClassJCFO returns a JCFO classifier
    %
    %   CLASSIFIER = prtClassJCFO(PROPERTY1, VALUE1, ...) constructs a
    %   prtClass object CLASSIFIER with properties as specified by
    %   PROPERTY/VALUE pairs.
    %
    %   A prtClassJCFO object inherits all properties from the abstract class
    %   prtClass. In addition is has the following properties:
    %
    %   kernels                - A cell array of prtKernel objects specifying
    %                            the kernels to use (note JCFO only works
    %                            right now with RBF and polynomial kernels)
    %   verbosePlot            - Flag indicating whether or not to plot during
    %                            training
    %   verboseText            - Flag indicating whether or not to output
    %                            verbose updates during training
    %   learningMaxIterations  - The maximum number of iterations
    %   ridge                  - Regularization parameter for ridge regression
    %                            initialization of the weights
    %   gamma1                 - Hyperparameter controlling the prior on
    %                            beta (regression weights)
    %   gamma2                 - Hyperparameter controlling the prior on
    %                            theta (feature scaling factors)
    %   pruneFeatures          - Flag determining whether or not features
    %                            with a small enough theta should be
    %                            removed
    %   pruneObservations      - Flag determining whether or not
    %                            observations with a small enough beta should be removed
    %
    %   A prtClassJCFO also has the following read-only properties:
    %
    %   learningConverged  - Flag indicating if the training converged
    %   beta               - The regression weights, estimated during training  
    %   theta              - The feature scaling factors, estimated in training
    %   delta              - Term defined in (14) of JCFO paper
    %   omega              - Term defined in (13) of JCFO paper
    %   Q                  - The EM objective function being optimized 
    %   relevantFeats      - Indices of features determined to be relevant
    %   relevantObs        - Indices of observations determined to be relevant
    %
    %   A prtClassJCFO object inherits the TRAIN, RUN, CROSSVALIDATE and
    %   KFOLDS methods from prtAction. It also inherits the PLOT method
    %   from prtClass.
    %
    %   Reference:
    %       B. Krishnapuram, A. Herermink, L. Carin, & M.A. Figueiredo, "A
    %       Bayesian approach to joint feature selection and classifier
    %       design," IEEE Trans. PAMI, vol. 26, no. 9, pp. 1105-1111, 2004.
    %
    %   Author: Christopher R. Ratto, JHU/APL
    %   Date:   7 October, 2015
    %
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
    % All Rights Reserved
    %
    % This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
    % a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
    % review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
    % provided you agree to and comply with the terms and conditions set forth in the license.
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Define properties
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Private properties for internal PRT use
    properties (SetAccess=private)
        name = 'Joint Classifier and Feature Optimization'                      % Full name of the classifier
        nameAbbreviation = 'JCFO';                                              % Abbreviated name
        isNativeMary = false;                                                   % Cannot handle multi-class data
    end
    % Public properties for general use
    properties
        verbosePlot = false;                                                    % Whether or not to plot during training
        verboseText = false;                                                    % Whether or not to write text during training
        ridge = 1;                                                              % Ridge regression penalty (for initializing beta)
        gamma1 = 1;                                                             % Hyperparameter for beta
        gamma2 = 1;                                                             % Hyperparameter for theta
        kernels = prtKernelDc & prtKernelRbfNdimensionScale;                    % Kernel function
        pruneFeatures = false;                                                  % Flag for removing features as we go
        pruneObservations = false;                                              % Flag for removing observations as we go
    end
    % Hidden properties that should generally be left alone
    properties (Hidden = true)
        learningMaxIterations = 100;                                            % Maximum number of iterations
        learningConvergedThreshold = .001;                                      % Threshold for whether learning has converged
        learningNormWeightsThresh = 0.001;                                      % Threshold for whether the weights aren't changing
        learningNormFeatSelectThresh = 0.001;                                   % Threshold for whether feature selection has converged
        pruningThreshBeta = 0.0001;                                             % Threshold for removing observations
        pruningThreshTheta = 0.1;                                               % Threshold for removing features
        featuresRetained = [];                                                  % List of features being retained
        nMaxFminconEvals = 100;                                                 % Number of steps for fmincon optimization
    end
    % Properties that may be accessed for monitoring of learning algorithm
    properties (SetAccess = 'protected',GetAccess = 'public')
        learningConverged = [];                                                 % Whether or not the training converged
        beta = [];                                                              % The regression weights
        theta = [];                                                             % The feature scaling factors
        delta = [];                                                             % Equation (14) in Krishnapuraum et al.
        omega = [];                                                             % Equation (13) in Krishnapuraum et al.
        Q = [];                                                                 % EM objective function
        relevantFeats = [];                                                     % List of relevant features
        relevantObs = [];                                                       % List of relevant observations
    end
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Error checking
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    methods
        % Allow for string, value pairs
        function Obj = prtClassJCFO(varargin)
            Obj = prtUtilAssignStringValuePairs(Obj,varargin{:});
        end
        % Make sure the kernel is compatible with JCFO
        function Obj = set.kernels(Obj,val)
            if ~(isa(val.kernelCell{2},'prtKernelRbf') || isa(val.kernelCell{2},'prtKernelRbfNdimensionScale') || isa(val.kernelCell{2},'prtKernelPolynomial')) && ~isa(val.kernelCell{1},'prtKernelDc')
                error('prt:prtClassJCFO:kernels','Kernel must be DC followed by RBF or Polynomial.');
            else
                Obj.kernels = val;
            end
        end
    end
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Training, testing, and helper functions (called by PRT train and run API)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    methods (Access = protected, Hidden = true)
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % Training function (called by Obj.train)
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        function Obj = trainAction(Obj,DataSet)
            %%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
            X = DataSet.X;
            Y = DataSet.Y;
            N = size(X,1);
            P = size(X,2);
            beta = ones(N+1,1);
            theta = ones(P,1);
            delta = ones(P,1);
            omega = ones(N+1,1);
            kernels = Obj.kernels;
            converged = false;
            iteration = 0;
            relevantFeats = true(P,1);
            relevantObs = true(N,1);
            relevantKernels = [true;relevantObs];
            while ~converged
                %%%%%%%%%%%% Iteration counter %%%%%%%%%%%%%%
                iteration = iteration + 1;
                if Obj.verboseText
                    fprintf('JCFO EM Iteration %d:\t',iteration)
                end
                Xrel = X(:,relevantFeats);
                Nrel = size(Xrel,1);
                Prel = size(Xrel,2);
                thetaRel = theta(relevantFeats);
                betaRel = beta(relevantKernels);
                %%%%%%%%%%%% M-step %%%%%%%%%%%%%%
                % Update the feature scaling factors
                if iteration > 1
                    if abs(thetaNormDiff) > Obj.learningNormWeightsThresh || isnan(thetaNormDiff)
                        opts = optimoptions(@fmincon,'Algorithm','interior-point','MaxFunEvals',Obj.nMaxFminconEvals,'GradObj','on','TypicalX',ones(size(thetaRel)),'Display','off','TolX',1e-6,'TolFun',1e-6);%,'PlotFcns',{@optimplotx,@optimplotfval,@optimplotstepsize});     
                            thetaRel = fmincon(@(x)Obj.calcQ(Xrel,kernels,v,omegaRel,deltaRel,x,relevantKernels),thetaRel,[],[],[],[],zeros(size(thetaRel)),inf(size(thetaRel)),[],opts);
                        theta(relevantFeats) = thetaRel;
                        thetaNormDiff = (norm(theta)-thetaNorm)./thetaNorm;
                    else
                        fprintf('Feature selection converged. Skipping constrained optimization.\n')
                    end
                else
                    thetaNormDiff = nan;
                end
                thetaNorm = norm(theta);
                % Apply scaling factors to features and re-compute the Gram
                % matrix via the kernel function
                XT = bsxfun(@times,Xrel,thetaRel');
                dsTmp = prtDataSetClass(XT,Y);
                kernels = train(kernels,dsTmp);
                H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
                H = H(:,relevantKernels);
                % Update the regression weights
                if iteration == 1
                    betaRel = inv(Obj.ridge*eye(size(H,2)) + H'*H)*H'*Y; % Initialize weights using ridge regression
                    beta(relevantKernels) = betaRel;
                    betaNormDiff = nan;
                else
                    betaRel = S*inv(eye(length(betaRel)) + S*H'*H*S)*S*H'*v;
                    beta(relevantKernels) = betaRel;
                    betaNormDiff = (norm(beta)-betaNorm)./betaNorm;
                end
                betaNorm = norm(beta);
                beta = beta./betaNorm;
                %%%%%%%%%%%% E-step %%%%%%%%%%%%%%
                v = nan(N,1);
                for i = 1:N
                    normFactor = (2*Y(i)-1)*normpdf(H(i,:)*betaRel,0,1)/normcdf((2*Y(i)-1)*H(i,:)*betaRel,0,1);
                    if isnan(normFactor)
                        normFactor = 0;
                    end
                    v(i,:) = H(i,:)*betaRel + normFactor; % Expected value of linear observation model
                end
                omegaRel = nan(length(betaRel),1);
                for i = 1:length(betaRel)
                    omegaRel(i,:) = Obj.gamma1*abs(betaRel(i))^(-1); % Expected value of weight variance
                end
                omega(relevantKernels) = omegaRel;
                S = diag(omegaRel.^(-1/2));
                    deltaRel = nan(Prel,1);
                    for k = 1:Prel
                        deltaRel(k,:) = Obj.gamma2*thetaRel(k)^(-1); % Expected value of feature selectors
                    end
                    delta(relevantFeats) = deltaRel;
                % Recompute the expected log-posterior
                Q(iteration) = Obj.calcQ(Xrel,kernels,v,omegaRel,deltaRel,thetaRel,relevantKernels); % Expected log-posterior
                %%%%%%%%%%%% Prune deselected training points and/or features, if enabled %%%%%%%%%%%%%%
                if Obj.pruneFeatures
                    relevantFeats(theta < Obj.pruningThreshTheta) = false;
                    theta(~relevantFeats) = 0;
                    thetaRel = theta(relevantFeats);
                    deltaRel = delta(relevantFeats);
                end
                if Obj.pruneObservations
                    relevantObs(abs(beta) < Obj.pruningThreshBeta) = false;
                    relevantKernels = [true;relevantObs];
                    beta(~relevantKernels) = 0;
                    betaRel = beta(relevantKernels);
                    omegaRel = omega(relevantKernels);
                    S = diag(omegaRel.^(-1/2));
                end
                % For debugging purposes, plot how all of the parameters are updating
                if Obj.verbosePlot
                    figure(666)
                    subplot(2,3,1),plot(v),title('v'),axis tight
                    subplot(2,3,2),plot(log(omega),'marker','o'),title('log(\omega)'),axis tight
                    subplot(2,3,3),plot(log(delta),'marker','o'),title('log(\delta)'),axis tight
                    subplot(2,3,4),plot(beta,'marker','o'),title('\beta'),axis tight
                    subplot(2,3,5),plot(theta,'marker','o'),title('\theta'),axis tight
                    subplot(2,3,6),plot(Q),title('-E[log p(\beta,\theta|-)]'),axis tight
                    drawnow
                end
                %%%%%%%%%%%% Check for convergence %%%%%%%%%%%%%%
                if iteration == 1
                    Qdiff = nan;
                else
                    Qdiff = (Q(iteration)-Q(iteration-1))./Q(iteration-1);
                end
                if Obj.verboseText
                    fprintf('Q = %0.4f (diff = %0.4f)\t ||beta|| = %0.4f (diff = %0.4f)\n',Q(iteration),Qdiff,betaNorm,betaNormDiff)
                end
                if abs(Qdiff) < Obj.learningConvergedThreshold;
                    converged = true;
                    fprintf('Expected log-posterior converged within threshold, exiting.\n')
                elseif iteration == Obj.learningMaxIterations;
                    converged = true;
                    fprintf('Maximum number of iterations reached, exiting.\n')
                elseif abs(betaNormDiff) < Obj.learningNormWeightsThresh
                    converged = true;
                    fprintf('Magnitude of weight vector converged within threshold, exiting.\n')
                end
            end
            %%%%%%%%%% Save out learned parameters %%%%%%%%%%%%
            Obj.beta = beta;
            Obj.theta = theta;
            Obj.delta = delta;
            Obj.omega = omega;
            Obj.Q = Q(end);
            XT = bsxfun(@times,X,theta');
            dsTmp = prtDataSetClass(XT,Y);
            Obj.kernels = train(kernels,dsTmp);
        end
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % Running function (called by Obj.run)
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        function DataSetOut = runAction(Obj,DataSet)
            %%%%%%%%%% Get necessary classifier parameters %%%%%%%%%%%%
            X = DataSet.X;
            kernels = Obj.kernels;
            theta = Obj.theta;
            beta = Obj.beta;
            %%%%%%%%%% Run JCFO on dataset %%%%%%%%%%%%
            DataSet.X = bsxfun(@times,X,theta');
            H = kernels.run_OutputDoubleArray(DataSet);
            %%%%%%%%%% Build output dataset %%%%%%%%%%%%
            DataSetOut = DataSet;
            DataSetOut.X = normcdf(H*beta);
        end
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        % Function for calculating the EM objective, Q and its derivative (called by Obj.trainAction)
        %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
        function [Qout,dQdTout] = calcQ(Obj,X,kernel,v,omega,delta,theta,relevantKernels)
            % Build gram matrix using the proposed vector of feature scaling factors
            N = size(X,1);
            Nk = sum(relevantKernels);
            P = size(X,2);
            XT = bsxfun(@times,X,theta');
            dsTmp = prtDataSetClass(XT);
            kernels = train(kernel,prtDataSetClass(XT));
            H = kernels.run_OutputDoubleArray(dsTmp); % Gram matrix for the kernels-transformed features that have been selected so far
            H = H(:,relevantKernels);
            S = diag(omega.^(-1/2));
            % Calcualte the expected log-posterior
            beta = S*inv(eye(Nk) + S*(H'*H)*S)*S*H'*v;
            Q = -beta'*(H'*H)*beta + 2*beta'*H'*v - beta'*diag(omega)*beta - theta'*diag(delta)*theta;
            Qout = -Q; % Using negative since matlab optimization does minimization only
            dQdT = nan(1,P);
            % Calculate derivative of Q w.r.t. each theta
            if isa(kernel.kernelCell{2},'prtKernelPolynomial')
                n = kernel.kernelCell{2}.d;
                xTx = X*diag(theta)*X';
                for k = 1:P
                    xxk = X(:,k)*X(:,k)';
                    dHdT = [zeros(N,1),(n*(1+xTx).^(n-1)).*xxk]; % Derivative of polynomial kernel provided in Kirshnapuram et al., RECOMB '03
                    dQdT(k) = -2*delta(k)*theta(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
                end
                dQdTout = -dQdT;
            elseif isa(kernel.kernelCell{2},'prtKernelRbf')
                for k = 1:P
                    Xk = X(:,k);
                    dXk = repmat(sum((Xk.^2), 2), [1 N]) + repmat(sum((Xk.^2),2), [1 N]).' - 2*Xk*(Xk.');
                    if isa(kernel.kernelCell{2},'prtKernelRbfNdimensionScale')
                        dXk = dXk./(P*kernel.kernelCell{2}.sigma.^2);
                    else
                        dXk = dXk./kernel.kernelCell{2}.sigma.^2;
                    end
                    if isa(kernel.kernelCell{1},'prtKernelDc')
                        dXk = [zeros(N,1),dXk];
                    end
                    dHdT = -H.*dXk(:,relevantKernels);
                    dQdT(k) = -2*delta(k)*theta(k) - 2*sum(sum(((H*beta-v)*beta').*dHdT));
                    dQdTout(k) = -dQdT(k);
                end
            end
        end
    end
 end
--- a/README.txt
+++ b/README.txt
@@ -0,0 +1,37 @@
 Cost-Constrained Feature Optimization (CCFO) for MATLAB
 This MATLAB code may be used to replicate the results published in the following manuscript:
   C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
   Optimization in Kernel Machine Classifiers," IEEE Signal Processing
   Letters, 2015.
 The code is organized into three directories as follows:
 	Urban Land Cover/ - Code for replicating the urban land cover experiment from the paper.
 		experiment_urbanLandCover.m   - Script for running the experiment
 		featureTimes_urbanLandCover.m - Function for estimating feature computation times
 	MNIST/ - Code for replicating the MNIST experiment from the paper
 		experiment_mnist.m         - Script for running the experiment
 		extractFeaturesFromMNIST.m - Function for extracting features from the handwritten digits
 	PRT Plugins/ - The actual machine learning code, written as a plugin to the Pattern Recognition Toolbox (PRT)
 		prtClassJCFO - Joint Classifier and Feature Optimization
 		prtClassCCFO - Cost Constrained Feature Optimization
 To run either experiment, or to use our code in your own research, you must download and install the 
 Pattern Recognition Toolbox (PRT) for MATLAB at http://covartech.github.io/
 To run the urban land cover experiment, you must download the data from the UCI machine
 learning repository: https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
 The code benchmarks each of the feature computation times on a test image of a black and white circle. 
 The function "MidpointCircle.m" required to generate the test image for doing this is available via Matlab Central:
 http://www.mathworks.com/matlabcentral/fileexchange/14331-draw-a-circle-in-a-matrix-image/content/MidpointCircle.m
 *******************************************************************************************************************
 This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
 All Rights Reserved
 This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
 a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
 review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
 provided you agree to and comply with the terms and conditions set forth in the license.
 *******************************************************************************************************************
--- a/Cover/experiment_urbanLandCover.m
+++ b/Cover/experiment_urbanLandCover.m
@@ -0,0 +1,307 @@
 function experiment_urbanLandCover(varargin)
 % experiment_urbanLandCover.m
 %
 % This MATLAB script runs the feature selection experiment for the Urban
 % Land Cover data set that was published in the following manuscript:
 %   C.R. Ratto, C.A. Caceres, H.C. Schoeberlein, "Cost-Constrained Feature
 %   Optimization in Kernel Machine Classifiers," IEEE Signal Processing
 %   Letters, 2015.
 %
 % The script requires the Urban Land Cover data set, which is available
 % from the UCI Machine Learning Repository:
 %   https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
 %
 % The script also requires installation of the Pattern Recognition Toolbox
 % (PRT) for MATLAB:
 %   http://covartech.github.io/ 
 %
 % INPUTS (optional):
 %   dataDir:   full path to the directory in which the XLS data is saved
 %              (default is the current working directory)
 %
 % Author: Christopher R. Ratto, JHU/APL
 % Date:   5 October 2015
 %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
 % All Rights Reserved
 %
 % This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
 % a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
 % review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
 % provided you agree to and comply with the terms and conditions set forth in the license.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Load in the training and test data
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 if nargin == 0
    dataDir = pwd;
 else
    dataDir = varargin{1};
 end
 [~,~,raw] = xlsread(fullfile(dataDir,'training.csv'));             % Read raw data from XLS-converted CSV
 X = cell2mat(raw(2:end,2:end));                                     % Features from spreadsheet
 labels = strrep(raw(2:end,1),' ','');                               % Class labels from spreadsheet
 uLabels = unique(labels);                                           % Unique class labels
 Y = nan(size(labels));                                              % Numeric class labels 
 for i = 1:length(labels)
    Y(i,:) = find(strcmp(labels{i},uLabels));                       % Assign a numeric label to each class name
 end
 featNames = raw(1,2:end);                                           % Feature names
 dsTrain = prtDataSetClass(X,Y);                                     % Training PRT data set
 dsTrain.classNames = uLabels;                                       % Training class names
 dsTrain.featureNames = featNames;                                   % Training feature names
 clear raw X labels uLabels Y featNames i
 [~,~,raw] = xlsread(fullfile(dataDir,'testing.csv'));              % Read raw data from XLS-converted CSV
 X = cell2mat(raw(2:end,2:end));                                     % Features from spreadsheet
 labels = strrep(raw(2:end,1),' ','');                               % Class labels from spreadsheet
 uLabels = unique(labels);                                           % Unique class labels
 Y = nan(size(labels));                                              % Numeric class labels 
 for i = 1:length(labels)
    Y(i,:) = find(strcmp(labels{i},uLabels));                       % Assign a numeric label to each class name
 end
 featNames = raw(1,2:end);                                           % Feature names
 dsTest = prtDataSetClass(X,Y);                                      % Training PRT data set
 dsTest.classNames = uLabels;                                        % Training class names
 dsTest.featureNames = featNames;                                    % Training feature names
 clear raw X labels uLabels Y featNames i
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Load in the feature extraction times
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 [featCategories,~,categoryTimes] = featureTimes_urbanLandCover;       % Benchmarked feature computation times (see 'estimateFeatureTimes.m')
 nFeatCategories = length(featCategories);                           % Number of feature categories
 [categoryTimes,sortInds] = sort(cat(1,[],categoryTimes),'ascend');  % Sort the computation time of each feature category
 featCategories = featCategories(sortInds);                          % Rearrange the order of feature category names
 sortVec = [];                                                       % Vector of feature indices corresponding to the sorted categories
 for iCategory = 1:nFeatCategories
    sortVec = [sortVec,find(~cellfun(@isempty,strfind(dsTest.featureNames,featCategories{iCategory})))]; %#ok<AGROW>
 end
 dsTrain.X = dsTrain.X(:,sortVec);                                   % Rearrange the order of features in the training set
 dsTrain.featureNames = dsTrain.featureNames(:,sortVec);             % Rearrange the order of feature names in the training set
 dsTest.X = dsTest.X(:,sortVec);                                     % Rearrange the order of features in the testing set
 dsTest.featureNames = dsTest.featureNames(:,sortVec);               % Rearrange the order of feature names in the testing set
 categoryInds = cell(1,nFeatCategories);                             % Feature indices corresponding to each category
 T = nan(1,dsTrain.nFeatures);                                       % Computation time (seconds) of each feature
 TperCategory = nan(1,nFeatCategories);                              % Total computation time (seconds) of each category
 categoryIndBegin = nan(1,nFeatCategories);                          % The index of the first feature from each category
 for iCategory = 1:nFeatCategories
    categoryInds{iCategory} = find(~cellfun(@isempty,strfind(dsTest.featureNames,featCategories{iCategory}))); % Find the features that belong to this category      
    categoryIndBegin(iCategory) = min(categoryInds{iCategory});
    T(categoryInds{iCategory}) = categoryTimes(iCategory);          % Computation time for each feature (seconds)
    TperCategory(iCategory) = sum(T(categoryInds{iCategory}));      % Computation time for each feature category (seconds)
 end
 tNorm = T./sum(T);                                                  % Normalized computation time of each feature
 tNormCategory = TperCategory./sum(TperCategory);                    % Normalized computation time of each feature category
 featCategories = strrep(featCategories,'_','');                     % Remove underscores from feature category names since it screws up the plots
 clear categoryTimes iCategory i sortVec sortInds
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Set CCFO hyperparameters
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 a0 = linspace(0,2,500);
 b0 = linspace(0,300,500);
 tau = nan(500,500);
 F = nan(500,500);
 for i = 1:500
    for j = 1:500
        a = a0(i)*ones(size(tNorm));
        b = a0(i) + b0(j)*tNorm;
        tau(i,j) = sum(tNorm .* a./(a+b));
        F(i,j) = (1/dsTrain.nFeatures)*sum((a+1)./(a+b+1));
    end
 end
 desiredT = 0.25;                                                    % Expected runtime
 desiredF = 0.50;                                                    % Maximum posterior probability of a feature being selected
 dist = (tau-desiredT).^2 + (F-desiredF).^2;
 [iMin,jMin] = find(dist == min(dist(:)));
 a0 = a0(iMin);
 b0 = b0(jMin);
 a = a0*ones(size(tNorm));
 b = a0 + b0*tNorm;
 clear featInd legendNames colors tau F iA iB iFeat
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Initialize the classifiers
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % All classifiers will use the same kernel
 kernel = prtKernelRbfNdimensionScale;                               % RBF kernel, scale the sigma parameter to dimensionality of the features
 kernelSet = prtKernelDc & kernel;                                   % Add a bias dimension to the kernel (dc kernel)
 % CCFO - Cost Constrained Feature Optimization
 CCFO = prtClassCCFO('kernels',kernelSet,'pruneFeatures',false,'pruneObservations',false,'verbosePlot',false,'verboseText',true,'a',a','b',b','gamma',1,'ridge',10);    
 algoCCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',CCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
 % RVM - Relevance Vector Machine
 RVM = prtClassRvm('kernels',kernelSet);
 algoRVM = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseClassifier',RVM);   % Normalize features, One-vs-All classification since this is a multiclass problem
 % JCFO - Joint Classifier and Feature Optimization
 JCFO = prtClassJCFO('kernels',kernelSet,'ridge',10,'pruneFeatures',false,'pruneObservations',false,'verboseText',1,'verbosePlot',0,'gamma1',1,'gamma2',1);
 algoJCFO = prtPreProcZmuv + prtClassBinaryToMaryOneVsAll('baseclassifier',JCFO); % Normalize features, One-vs-All classification since this is a multiclass problem
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Train and test CCFO
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 trainedCCFO = algoCCFO.train(dsTrain);                                     % Train CCFO on the training set
 dsOutCCFO = trainedCCFO.run(dsTest);                                       % Run CCFO on the test set
 [~,dsOutCCFO.X] = max(dsOutCCFO.X,[],2);                                   % Change 'soft' decision values to 'hard' values (maximum a posteriori)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Train and test JCFO
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 trainedJCFO = algoJCFO.train(dsTrain);                                     % Train the one-vs-all JCFO
 dsOutJCFO = trainedJCFO.run(dsTest);                                       % Run the one-vs-all JCFO
 [~,dsOutJCFO.X] = max(dsOutJCFO.X,[],2);                                   % Change 'soft' decision values to 'hard' values (maximum a posteriori) 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Train and test RVM (individual feature categories)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 pcCategory = nan(1,nFeatCategories);                                       % Percent correct using each feature category
 for iCategory = 1:nFeatCategories                                          % Loop over all feature categories
    dsCategoryTrain = dsTrain.retainFeatures(categoryInds{iCategory});     % Retain only features from this category for the training set
    dsCategoryTest = dsTest.retainFeatures(categoryInds{iCategory});       % Retain only features from this category for the testing set
    trainedRVM = algoRVM.train(dsCategoryTrain);                           % Train one-vs-all RVM
    dsOutCategory = trainedRVM.run(dsCategoryTest);                        % Run one-vs-all RVM
    [~,dsOutCategory.X] = max(dsOutCategory.X,[],2);                       % Change 'soft' decision values to 'hard' values (maximum a posteriori)  
    pcCategory(iCategory) = prtScorePercentCorrect(dsOutCategory);         % Calculate percent correct (accuracy overall) 
 end
 clear iCategory dsCategoryTrain dsCategoryTest dsOutCategory
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Train and test RVM (all features together)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 trainedRVM = algoRVM.train(dsTrain);                                % Train the one-vs-all RVM
 dsOutRVM = trainedRVM.run(dsTest);                                  % Run the one-vs-all RVM
 [~,dsOutRVM.X] = max(dsOutRVM.X,[],2);                              % Change 'soft' decision values to 'hard' values (maximum a posteriori)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Plot results
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Plot the prior on selecting features from each of the feature categories
 % This will be a beta distribution over [0,1]. Features that take longer to
 % compute should have higher probability of not being selected.
 figure(1),set(gcf,'position',[610,512,700,441])
 colors = prtPlotUtilClassColors(length(featCategories)); 
 colors(7,:) = [0,1,1];
 subplot(2,1,1),hold on
 for iFeat = 1:length(featCategories(1:8))
    featInd = categoryInds{iFeat}(1);
    plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
    xlabel('\rho'),ylabel('p(\rho|a,b)')
 end
 axis tight
 title('Cheap Features (Urban Land Cover)')
 legend(featCategories(1:8),'location','southeastoutside')
 subplot(2,1,2),hold on
 for iFeat = 1:length(featCategories(9:15))
    featInd = categoryInds{8+iFeat}(1);
    plot(linspace(0,1),betapdf(linspace(0,1),a(featInd),b(featInd)),'color',colors(iFeat,:),'linewidth',2);
    xlabel('\rho'),ylabel('p(\rho|a,b)')
 end
 axis tight
 title('Expensive Features (Urban Land Cover)')
 legend(featCategories(9:15),'location','southeastoutside')
 clear iFeat featInd colors
 % Baseline - for each category, show feature computation and RVM performance
 % Take-home point: most expensive features not always the best for
 % classification performance
 figure(2)     
 [ha,h1,h2] = plotyy(1:nFeatCategories,100*pcCategory,1:nFeatCategories,tNormCategory);
 h1.LineStyle = '-';h1.LineWidth = 2;h1.Marker = 'o';h1.MarkerSize = 8;
 h2.LineStyle = '--';h1.LineWidth = 2;h1.Marker = '^';h1.MarkerSize = 8;
 ha(1).XTick = 1:length(pcCategory); ha(1).XTickLabel = featCategories; ha(1).YLim = [0,100]; ha(1).YTick = 0:10:100; ha(1).XLim = [1,nFeatCategories]; ha(1).XTickLabelRotation = 30;
 ha(2).XTick = 1:length(pcCategory); ha(2).XTickLabel = []; ha(2).YLim = [0,1]; ha(2).YTick = 0:0.1:1; ha(2).XLim = [1,nFeatCategories];
 ylabel(ha(1),'Accuracy (% Correct)')
 ylabel(ha(2),'Total Normalized Cost')
 title('RVM Accuracy and Total Cost of Each Feature Category','FontSize',12)
 clear ha h1 h2
 % Plot the confusion matrices using all the features
 % CCFO
 figure(3),set(gcf,'outerposition',[65,301,1780,579])
 h = subplot(1,3,1);
 prtScoreConfusionMatrix(dsOutCCFO)
 pcCCFO = prtScorePercentCorrect(dsOutCCFO);
 h.XTickLabelRotation = 20;
 title(['CCFO - ',num2str(100*pcCCFO,'%0.2f'),'% Correct'],'Fontsize',12)
 axis square
 % RVM
 h = subplot(1,3,2);
 prtScoreConfusionMatrix(dsOutRVM)                       
 pcRVM = prtScorePercentCorrect(dsOutRVM);                        
 title(['RVM - ',num2str(100*pcRVM,'%0.2f'),'% Correct'],'fontsize',12)
 h.XTickLabelRotation = 20;
 axis square
 % JCFO 
 h = subplot(1,3,3);
 prtScoreConfusionMatrix(dsOutJCFO)   
 pcJCFO = prtScorePercentCorrect(dsOutJCFO);
 title(['JCFO - ',num2str(100*pcJCFO,'%0.2f'),'% Correct'],'fontsize',12)
 h.XTickLabelRotation = 20;
 axis square
 clear h
 % Compare feature selection performance
 thetaCCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
 thetaJCFO = nan(dsTrain.nClasses,dsTrain.nFeatures);
 for iClass = 1:dsTrain.nClasses                                                                         % Loop over all one-vs-all classifiers
    thetaCCFO(iClass,:) = trainedCCFO.actionCell{2}.baseClassifier(iClass).theta';                      % CCFO feature selector parameters for this one-vs-all classifier
    thetaJCFO(iClass,:) = trainedJCFO.actionCell{2}.baseClassifier(iClass).theta';                      % JCFO feature selector parameters for this one-vs-all classifier
 end
 costReductionCCFO = nan(dsTrain.nClasses,1);
 costReductionJCFO = nan(dsTrain.nClasses,1);
 for iClass = 1:dsTrain.nClasses
    costReductionCCFO(iClass,:) = sum(tNorm(thetaCCFO(iClass,:)>=0.5));
    costReductionJCFO(iClass,:) = sum(tNorm(thetaJCFO(iClass,:)>=2*median(thetaJCFO(:))));
 end
 costReductionCCFO = mean(costReductionCCFO);
 costReductionJCFO = mean(costReductionJCFO);
 figure(4),set(gcf,'outerposition',[255,317,568,596])
 h = subplot(2,1,1);
 imagesc(thetaCCFO),colormap bone
 h.YTick = 1:9; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; h.XTickLabelRotation = 70;
 caxis([0,1]); h = colorbar; ylabel(h,'Feature Scaling Parameter (\theta)')
 title({'CCFO: \theta Learned for Each One-vs-All Classifier',sprintf('Average Cost of Feature Extraction: %0.2f',costReductionCCFO)})
 h = subplot(2,1,2);
 imagesc(thetaJCFO),colormap bone
 h.YTick = 1:9; h.YTickLabel = dsTrain.classNames; h.XTick = categoryIndBegin; h.XTickLabel = featCategories; h.XTickLabelRotation = 70;
 caxis([0,2*median(thetaJCFO(:))]); h=colorbar; ylabel(h,'Feature Scaling Parameter (\theta)')
 title({'JCFO: \theta Learned for Each One-vs-All Classifier',sprintf('Average Cost of Feature Extraction: %0.2f',costReductionJCFO)})
 clear iClass h m
 % Average number of features selected
 avgNumFeatsSelectedRVM = dsTest.nFeatures;
 avgNumFeatsSelectedJCFO = mean(sum(thetaJCFO > 2*median(thetaJCFO(:)),2));
 avgNumFeatsSelectedCCFO = mean(sum(thetaCCFO > 0.5,2));
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Print out summary of results
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 fprintf('*************************************\n')
 fprintf('Urban Land Cover Feature Set Summary\n')
 fprintf('*************************************\n')
 for iCategory = 1:nFeatCategories
    fprintf('%s \t %d \t %0.4f \t %0.2f\n',featCategories{iCategory},length(categoryInds{iCategory}),tNormCategory(iCategory),100*pcCategory(iCategory));
 end
 fprintf('*************************************\n')
 fprintf('Urban Land Cover Performance Comparison\n')
 fprintf('*************************************\n')
 fprintf('Accuracy (RVM): %0.2f\n',100*pcRVM)
 fprintf('Accuracy (JCFO): %0.2f\n',100*pcJCFO)
 fprintf('Accuracy (CCFO): %0.2f\n',100*pcCCFO)
 fprintf('Avg. # Features Selected (RVM): %0.2f\n',avgNumFeatsSelectedRVM)
 fprintf('Avg. # Features Selected (JCFO): %0.2f\n',avgNumFeatsSelectedJCFO)
 fprintf('Avg. # Features Selected (CCFO): %0.2f\n',avgNumFeatsSelectedCCFO)
 fprintf('Avg. Relative Extraction Cost (RVM): 100\n')
 fprintf('Avg. Relative Extraction Cost (JCFO) %0.2f\n',100*costReductionJCFO)
 fprintf('Avg. Relative Extraction Cost (CCFO): %0.2f\n',100*costReductionCCFO)
 keyboard
 end
--- a/Cover/featureTimes_urbanLandCover.m
+++ b/Cover/featureTimes_urbanLandCover.m
@@ -0,0 +1,280 @@
 function [featureCategories,timeAbs,timeRel] = featureTimes_urbanLandCover()
 % [timeAbs,timeRel] = featureTimes_urbanLandCover
 %
 % Function to estimate the extraction times for each of the features in the
 % urban land cover data set which is available from the UCI Machine Learning Repository:
 %   https://archive.ics.uci.edu/ml/datasets/Urban+Land+Cover
 %
 % The code benchmarks each of the feature computation times on a test image
 % of a black and white circle. The function "MidpointCircle.m" required to
 % generate the test image is available via Matlab Central:
 %   http://www.mathworks.com/matlabcentral/fileexchange/14331-draw-a-circle-in-a-matrix-image/content/MidpointCircle.m
 %
 % The computations for each feature were derived from the following reference 
 % (page references provided in the source code):
 %   Definiens AG, "Definiens 5 Reference Book," Munich, Germany 2006.
 %
 % INPUTS: none
 %
 % OUTPUTS:
 %   timeAbs: the absolute timeAbs to compute each feature (in sec) via tic/toc
 %   timeRel: the relative timeAbs (%) to compute each feature in the set
 %
 % Author: Carlos A. Caceres, JHU/APL
 % Date:   5 October 2015
 %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % This software is Copyright 2015 The Johns Hopkins University Applied Physics Laboratory LLC
 % All Rights Reserved
 %
 % This software is licensed to you under the terms of the Eclipse Public License, Version 1.0,
 % a copy of which can be found at http://opensource.org/licenses/EPL-1.0.  Redistribution, 
 % review, modification, and/or use of the software, in source and binary forms are ONLY permitted 
 % provided you agree to and comply with the terms and conditions set forth in the license.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Initialize the test image
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 featureCategories = {'Area','Assym','BordLngth','BrdIndx','Bright','Compact','Dens','GLCM','LW','Mean','NDVI','Rect','Round','SD','ShpIndx'};
 imgSize = 1024;                                                                 % Size of test image
 img = zeros(imgSize);                                                           % Initialize the test image
 img = MidpointCircle(img, 250, imgSize/2, imgSize/2, 1);                        % Fill in with a circle
 img2 = img + randn(size(img));                                                  % Add white Gaussian noise
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Area (ref. page 58)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 area = sum(img(:)==1);  %assume each pixel has area = 1;
 timeAbs(1) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Assymmetry (ref. page 60)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 [idListX,idListY] = find(img==1);
 varX = var(idListX);
 varY = var(idListY);
 varXY = var(idListX.*idListY);
 assym = 2*sqrt(.25*(varX+varY)^2 + (varXY)^2 - varX*varY)/(varX+varY);
 timeAbs(2) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Border length (ref. page 36)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 temp = double(bwperim(img)==1);
 bordlength = sum(temp(:));
 timeAbs(3) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Border index (ref. page 63)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 temp = double(bwperim(img)==1);
 [idListX,idListY] = find(img==1);
 P(1,:) = idListX;P(2,:) = idListY;
 xbar = mean(idListX);
 ybar = mean(idListY);
 x = idListX - xbar;
 y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
 N = length(x);
 % Calculate normalized second central moments for the region. 1/12 is
 % the normalized second central moment of a pixel with unit length.
 uxx = sum(x.^2)/N + 1/12;
 uyy = sum(y.^2)/N + 1/12;
 uxy = sum(x.*y)/N;
 % Calculate major axis length, minor axis length, and eccentricity.
 common = sqrt((uxx - uyy)^2 + 4*uxy^2);
 l = 2*sqrt(2)*sqrt(uxx + uyy + common);
 width = 2*sqrt(2)*sqrt(uxx + uyy - common);
 borderIndex = sum(temp(:))/(2*(l+width));
 timeAbs(4) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Brightness (ref. page 42)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 img3 = repmat(img,1,1,3);
 tic;
 %     for an RGB image
 wkb = [1,2,3];
 brightness = (1/mean(wkb))*sum((wkb.*reshape(mean(mean(img3,1),2),1,3)));
 %     % for a binary image
 %     mean(img(:));
 timeAbs(5) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Compactness (ref. page 63)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 [idListX,idListY] = find(img==1);
 P(1,:) = idListX;P(2,:) = idListY;
 xbar = mean(idListX);
 ybar = mean(idListY);
 x = idListX - xbar;
 y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
 N = length(x);
 % Calculate normalized second central moments for the region. 1/12 is
 % the normalized second central moment of a pixel with unit length.
 uxx = sum(x.^2)/N + 1/12;
 uyy = sum(y.^2)/N + 1/12;
 uxy = sum(x.*y)/N;
 % Calculate major axis length, minor axis length, and eccentricity.
 common = sqrt((uxx - uyy)^2 + 4*uxy^2);
 l = 2*sqrt(2)*sqrt(uxx + uyy + common);
 width = 2*sqrt(2)*sqrt(uxx + uyy - common);
 compactness = l*width/sum(img(:)==1);
 timeAbs(6) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Density (ref. page 62)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 [idListX,idListY] = find(img==1);
 varX = var(idListX);
 varY = var(idListY);
 dens = sqrt(sum(img(:)==1))/(1+sqrt(varX+varY));
 timeAbs(7) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Gray-level co-ocurrence matrix
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 graycomatrix(img2);
 timeAbs(8) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Length/width (ref. page 59)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 [idListX,idListY] = find(img==1);
 P(1,:) = idListX; P(2,:) = idListY;
 xbar = mean(idListX);
 ybar = mean(idListY);
 x = idListX - xbar;
 y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
 N = length(x);
 % Calculate normalized second central moments for the region. 1/12 is
 % the normalized second central moment of a pixel with unit length.
 uxx = sum(x.^2)/N + 1/12;
 uyy = sum(y.^2)/N + 1/12;
 uxy = sum(x.*y)/N;
 % Calculate major axis length, minor axis length, and eccentricity.
 common = sqrt((uxx - uyy)^2 + 4*uxy^2);
 l = 2*sqrt(2)*sqrt(uxx + uyy + common);
 width = 2*sqrt(2)*sqrt(uxx + uyy - common);
 lw = l/width;
 timeAbs(9) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Mean (ref. page 40)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 datMean = mean(img2(:));
 timeAbs(10) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % NDVI
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 ndvi = sum((img + img2)./(img-img2));
 timeAbs(11) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Rectangular fit (ref. page 65)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %get outlines of each object
 tic;
 [idListX,idListY] = find(img==1);
 xbar = mean(idListX);
 ybar = mean(idListY);
 x = idListX - xbar;
 y = -(idListY - ybar); % This is negative for the orientation calculation (measured in the counter-clockwise direction).
 N = length(x);
 % Calculate normalized second central moments for the region. 1/12 is
 % the normalized second central moment of a pixel with unit length.
 uxx = sum(x.^2)/N + 1/12;
 uyy = sum(y.^2)/N + 1/12;
 uxy = sum(x.*y)/N;
 % Calculate major axis length, minor axis length, and eccentricity.
 common = sqrt((uxx - uyy)^2 + 4*uxy^2);
 height = 2*sqrt(2)*sqrt(uxx + uyy + common);
 width = 2*sqrt(2)*sqrt(uxx + uyy - common);
 area = sum(img(:)==1);
 SquareMetric = width/height;
 if SquareMetric > 1,
    SquareMetric = height/width;  %make aspect ratio less than unity
 end
 SquareMetric = SquareMetric/area;
 timeAbs(12) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Roundness (ref. page 64)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 area = sum(img(:)==1);
 temp = double(bwperim(img)==1);
 perimeter = sum(temp(:));
 roundness = 4*pi*area/perimeter^2;
 timeAbs(13) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Standard deviation
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 stdDeviation = std(img(:));
 timeAbs(14) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Shape index (ref. page 62)
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 tic;
 temp = double(bwperim(img)==1);
 shpindx = sum(temp(:))/(4*sqrt(sum(img(:)==1)));
 timeAbs(15) = toc;
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Save results
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 timeRel = 100*timeAbs./repmat(sum(timeAbs),1,length(timeAbs));
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Legend of feature names
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 % Class: Land cover class (nominal)
 % BrdIndx: Border Index (shape variable)
 % Area: Area in m2 (size variable)
 % Round: Roundness (shape variable)
 % Bright: Brightness (spectral variable)
 % Compact: Compactness (shape variable)
 % ShpIndx: Shape Index (shape variable)
 % Mean_G: Green (spectral variable)
 % Mean_R: Red (spectral variable)
 % Mean_NIR: Near Infrared (spectral variable)
 % SD_G: Standard deviation of Green (texture variable)
 % SD_R: Standard deviation of Red (texture variable)
 % SD_NIR: Standard deviation of Near Infrared (texture variable)
 % LW: Length/Width (shape variable)
 % GLCM1: Gray-Level Co-occurrence Matrix [i forget which type of GLCM metric this one is] (texture variable)
 % Rect: Rectangularity (shape variable)
 % GLCM2: Another Gray-Level Co-occurrence Matrix attribute (texture variable)
 % Dens: Density (shape variable)
 % Assym: Assymetry (shape variable)
 % NDVI: Normalized Difference Vegetation Index (spectral variable)
 % BordLngth: Border Length (shape variable)
 % GLCM3: Another Gray-Level Co-occurrence Matrix attribute (texture variable) d
 end