function data = safaCORN( analyte, priPercent, secPercent, distinct )
%% LOAD CORN DATA SET
%>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
% This script gives an example of how to include your own data set.  In
% this case, we will create two possible corn data sets
%
% A) distinct = true; 
%    Here, the corn data set which mimics the GOAT data set.  In this 
%    case, the first half of the eighty samples from instrument m5 
%    (indices 1:40) will be deemed the primary samples, and the remaining 
%    m5 samples (indices 41:80) will be thrown away.  The secondary 
%    samples will come from insruument mp5 but only from the second half 
%    (indices 41:80), and the first half (indices 1:40) will be thrown 
%    away.  In this case, each sample has only one domain---either primary 
%    or secondary but not both.  Each sample is distinct.
%
% B) distinct = false; 
%    Here, the corn data set which mimics the SOY SEED (SOYS) data set in 
%    that each sample is measured on two different instruments 
%    (e.g., instruments m5 and mp5).  There are no distinct samples.
%
% The data set that you will use will be either distinct or not (most
% likely distinct).
%
%>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


%% LOAD THE CORN DATA SET
%--------------------------------------------------------------------------
%
load corn; 
AnalyteIdx = strcmpi({'MOISTURE','PROTEIN','OIL','STARCH'},analyte);
if distinct
    % CASE A
    %-------------------------------------------------------
    % A1) Define the primary and secondary samples
    %-------------------------------------------------------
    data.Xp =  corn.Xm5(  1:40,:);
    data.Yp =    corn.y(  1:40, AnalyteIdx );
    data.Xs = corn.Xmp5( 41:80,:);
    data.Ys =    corn.y( 41:80, AnalyteIdx );
    
    %-------------------------------------------------------
    % A2) Quantize reference measurements in low, medium or 
    %     high values. This will be used for shuffling purposes.  
    %     I use 3 (low, medium, high) or 4 (low, medium low,
    %     medium high, high) depending upon whether the number 
    %     of samples is divisible by 3 or 4, but divisibility 
    %     is not necessary,
    %-------------------------------------------------------
    data.Yc = QuantizeRef(data.Yp,4); 
    
    % This part is the setup for creating sample blocks
    num.pri = length(data.Yp);
    num.sec = length(data.Ys);
    calp = 1:num.pri;
    S = PercentBins(secPercent,num.sec);
    %-------------------------------------------------------
    % A3) Define the sample blocks
    %-------------------------------------------------------
    data.calp  = calp;
    data.cals1 = S.idx{1};
    data.cals2 = S.idx{2};
    data.unks  = S.idx{3};
    data.vals  = S.idx{4};
    
else
    % CASE B
    %-------------------------------------------------------
    % B1) Define the primary and secondary samples
    %-------------------------------------------------------
    data.Xp = corn.Xm5;
    data.Yp = corn.y(:,AnalyteIdx);
    data.Xs = corn.Xmp5;
    data.Ys = data.Yp;
    
    %-------------------------------------------------------
    % B2) Quantize 
    %-------------------------------------------------------
    data.Yc = QuantizeRef(data.Yp,4); 
    
    % This part is the setup for creating sample blocks
    num.total = length(data.Yp);
    % Assign how many of the 80 samples will be primary
    num.pri = round(priPercent/100*num.total);
    % The remaining will be secondary
    num.sec = num.total - num.pri;
    % Assign calibration primary samples
    calp = 1:num.pri; calp = calp(:);
    % Assign secondary samples 
    cals = num.pri + (1:num.sec); cals = cals(:);
    % Split secondary samples into percentage bins: 
    % > 10% will go into calibration secondary with known labels
    % > 60% will go into calibration secondary with unknown labels
    % > 30% will go into validation secondary for assessment purposes
    % We want to preserve (as much as possible) the proportion 
    % low, medium and high reference measurements in each bin.
    idxS = GroupBins(data.Yc(cals), secPercent);
    %-------------------------------------------------------
    % B3) Define the sample blocks
    %-------------------------------------------------------
    data.calp  = calp;
    data.cals1 = cals(idxS{1});
    data.cals2 = cals(idxS{2});
    data.unks  = cals(idxS{3});
    data.vals  = cals(idxS{4});
    
end    

%--------------------------------------------------------------------------
% 4) Define the sample block sizes
%--------------------------------------------------------------------------
data.size  = [num.pri, length(data.cals1), length(data.cals2), ...
                       length(data.unks),  length(data.vals)];

%--------------------------------------------------------------------------
% 5) Define wavelengths, domain, analyte, and data set name
%--------------------------------------------------------------------------
data.wave    = 1102:2:2498;         % Wavelengths
data.domain  = {'m5','mp5'};        % Spectrometers/instruments
data.analyte = upper(analyte);      % Analyte
if distinct
    data.name = 'CORN_A';        % data set name
else
    data.name = 'CORN_B';
end    
data.distinct = distinct;

%>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
end

