Stratified K-fold cross validation Matlab

My implementation of stratified K-fold cross-validation, pretty much like the c = cvpartition(group,'KFold',k)  from Matlab statistic toolbox library.
<pre>function [X, partition] = KfoldCVBalance(X, y, k)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Author: Pree Thiengburanathum
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Description:
% To ensure that the training, testing, and validating dataset have similar
% proportions of classes (e.g., 20 classes). This stratified sampling
% technique provided the analyst with more control over the sampling process.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Input:
% X - dataset
% k - number of fold
% classData - the class data
%
% Output:
% X - new dataset
% partition - fold index
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
n = size(X, 1);
partition = repmat(0, n, 1);
% shuffle the dataset
[~, idx] = sort(rand(1, n));
X = X(idx, :);
y = y(idx);
% find the unique class
group = unique(y);
nGroup = numel(group);
% find min max number of sample per class
nmax = 0;
for i=1:nGroup
    idx = find(y == group(i));
    ni = length(idx);
    nmax = max(nmax, ni);
end
% create fold indices
foldIndices = zeros(nGroup, nmax);
for i=1:nGroup
    idx = find(y == group(i));
    foldIndices(i, 1:numel(idx)) = idx;
end
% compute fold size for each fold
foldSize = zeros(nGroup, 1);
for i=1:nGroup
    % find the number of element of the class
    numElement = numel(find(foldIndices(i,:) ~= 0));
    % calculate number of element for each fold
    foldSize(i) = floor(numElement/k);
end
ptr = ones(nGroup, 1);
for i=1:k
    for j=1:nGroup
        idx =  foldIndices(j, (ptr(j): (ptr(j)+foldSize(j)) ));
        if(idx(end) == 0)
           idx = idx(1:end-1);
        end
        partition (idx) = i;
        ptr(j) = ptr(j)+foldSize(j);
    end
end
% dump the rest of index to the last fold
idx = find(partition == 0);
partition(idx) = k;
data = [X partition];
% check class balance for each fold
for i=1:k
    idx = find(data(:, 2) == i);
    fold = X(idx);
    disp(['fold# ', int2str(i), ' has ', int2str( numel(fold) ) ]);
    for j=1:nGroup
        idx = find(fold == group(j));
        percentage = (numel(idx)/numel(fold)) * 100;
        disp(['class# ', int2str(j), ' = ', num2str(percentage), '%']);


    end
    disp(' ');
end
end % end function

 

Entropy and Probability state Matlab version


classdef ProbabilityState &lt; handle
% Athor: Pree Thiengburanathum
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This is the updated from the previous MI implementation which was % implemented using array indexing. % This implementation use Map container from Matlab to calculate</pre>
 % the Probability state to enhance indexing.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


    properties
        X % discrete random vector X
        Y % discrete random vector Y
        probXMap % probability mass function of X,  p(x)
        probYMap % probability mass function of Y,  p(y)
        jointMap % joint probability mass function, p(x,y)
    end
    methods
        function obj = ProbabilityState(X, Y)
            if nargin &gt; 0
                obj.X = X;
                obj.Y = Y;
                obj.probXMap = containers.Map();
                obj.probYMap = containers.Map();
                obj.jointMap = containers.Map();
            end
            obj.calculateProbabilityX();
            obj.calculateProbabilityY();
            obj.calculateJointProbabilityMassFunction();
        end % end constructor
      
        function calculateProbabilityX(obj)
            % create key and insert to the map
            for i=1:numel(obj.X) 
                tmpKey = sprintf('%d', obj.X(i));
                if( isKey(obj.probXMap, tmpKey ) )
                    obj.probXMap(tmpKey) =  obj.probXMap(tmpKey) + 1;
                else
                    keySet = tmpKey ;
          
                    valueSet = 1;
                    newMap = containers.Map(keySet,valueSet);
                    obj.probXMap = [obj.probXMap; newMap];
                end
            end
          
            % calcuate probability of each x in X   
            key = keys(obj.probXMap);
            for i=1:numel(key)
                 obj.probXMap(key{i}) = obj.probXMap(key{i}) / numel(obj.X);
            end
        end % end function
      
         function calculateProbabilityY(obj)
            % create key and insert to the map
            for i=1:numel(obj.Y) 
                tmpKey = sprintf('%d', obj.Y(i));
                if( isKey(obj.probYMap, tmpKey) )
                    obj.probYMap(tmpKey) =  obj.probYMap(tmpKey) + 1;
                else
                    keySet = tmpKey;
                    valueSet = 1;
                    newMap = containers.Map(keySet,valueSet);
                    obj.probYMap = [obj.probYMap; newMap];
                end
            end
          
            % calcuate probability of each y in Y   
            key = keys(obj.probYMap);
            for i=1:numel(key)
                 obj.probYMap(key{i}) = obj.probYMap(key{i}) / numel(obj.Y);
            end
          
        end % end function


        function result = marginalProbabilityX(obj, x)
            result = 0;
            for i=1:size(obj.probXMap)
                if ( isKey(obj.probXMap,x) )
                    result = obj.probXMap(x);
                end
            end
        end % end function
      
        function result = marginalProbabilityY(obj, y)
            result = 0;
            for i=1:size(obj.probYMap)
                if ( isKey(obj.probYMap,y) )
                    result = obj.probYMap(y);
                end
            end
        end % end function
      
        function result = jointProbability(obj, x, y)
            result = 0;
            keySet = strcat(sprintf('%d', x), ',', sprintf('%d', y));
            for i=1:size(obj.probYMap)
                if ( isKey(obj.jointMap,keySet) )
                    result = obj.jointMap(keySet);
                end
            end
        end % end function
      
        function entropyX = calculateEntropyX(obj)
            % H(X) = -sumX p(x)log p(x)
            entropyX = 0.0;
            key = keys(obj.probXMap);
            for i=1:numel(key)
                entropyX = entropyX - obj.probXMap(key{i}) * log2(obj.probXMap(key{i}));
            end
        end % end function
      
         function entropyY = calculateEntropyY(obj)
            % H(Y) = -sumY p(y)log p(y)
            entropyY = 0.0;
            key = keys(obj.probYMap);
            for i=1:numel(key)
                entropyY = entropyY - obj.probYMap(key{i}) * log2(obj.probYMap(key{i}));
            end
        end % end function
      
        function entropyXY = calculateJointEntropy(obj)
            % H(X,Y) = - sumx sumy p(x,y) log p(x,y)
            entropyXY = 0.0;
            key = keys(obj.jointMap);
            for i=1:numel(key)
               entropyXY = entropyXY  - obj.jointMap(key{i}) * log2(obj.jointMap(key{i}));
            end
        end % end function
      
        function displayProbXMap(obj)
            disp(keys(obj.probXMap));disp(values(obj.probXMap));
        end % end function
      
        function displayProbYMap(obj)
            disp(keys(obj.probYMap));disp(values(obj.probYMap));
        end % end function
      
        function displayJointMap(obj)
            disp(keys(obj.jointMap));disp(values(obj.jointMap));
        end % end function
      end % end method
    
      methods (Access = private)
            function calculateJointProbabilityMassFunction(obj)
            % count frequency (occurrence) from the elements in X and Y
            for i=1:numel(obj.X) % loop through the size of X or Y doesn't matter
                jointKey = [sprintf('%d', obj.X(i)), ',', sprintf('%d', obj.Y(i))];
                if( isKey(obj.jointMap, jointKey ) )
                    obj.jointMap(jointKey) =  obj.jointMap(jointKey) + 1;
                else
                    keySet = [sprintf('%d', obj.X(i)), ',', sprintf('%d', obj.Y(i))];
                    valueSet = 1;
                    newMap = containers.Map(keySet,valueSet);
                    obj.jointMap = [obj.jointMap; newMap];
                end
            end
          
            % calculate probability p(x,y)
            key = keys(obj.jointMap);
            for i=1:numel(key)
                obj.jointMap(key{i}) = obj.jointMap(key{i}) / numel(obj.X);
            end
          
        end % end function
      end % end method
end % end class