From 84202111076df942345f434e5452808b9b1c0859 Mon Sep 17 00:00:00 2001 From: Swayam Shah Date: Thu, 18 Dec 2025 01:48:52 +0530 Subject: [PATCH 1/8] 1 --- inst/grp2idx.m | 396 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 337 insertions(+), 59 deletions(-) diff --git a/inst/grp2idx.m b/inst/grp2idx.m index 051123a4..90ed3b94 100644 --- a/inst/grp2idx.m +++ b/inst/grp2idx.m @@ -56,7 +56,7 @@ undef = isundefined (s); cats = categories (s); s = cellstr (string (s)); - s(undef) = {""}; + s(undef) = {''}; elseif (isduration (s) && isvector (s)) s_was_duration = true; elseif (! isvector (s)) @@ -141,119 +141,100 @@ %!test %! g = grp2idx ([3 2 1 2 3 1]); %! assert (isequal (g, [3; 2; 1; 2; 3; 1])); - # test for two output arguments %!test %! [g, gn] = grp2idx (['b'; 'a'; 'c'; 'a']); %! assert (isequal (g, [1; 2; 3; 2])); %! assert (isequal (gn, {'b'; 'a'; 'c'})); - ## test boolean input and note that row or column vector makes no difference %!test %! in = [true false false true]; -%! out = {[2; 1; 1; 2] {"0"; "1"} [false; true]}; +%! out = {[2; 1; 1; 2] {'0'; '1'} [false; true]}; %! assert (nthargout (1:3, @grp2idx, in), out) %! assert (nthargout (1:3, @grp2idx, in), nthargout (1:3, @grp2idx, in')) - ## test that boolean groups are ordered in order of appearance %!test -%! assert (nthargout (1:3, @grp2idx, [false, true]), -%! {[1; 2] {"0"; "1"} [false; true]}); -%! assert (nthargout (1:3, @grp2idx, [true, false]), -%! {[2; 1] {"0"; "1"} [false; true]}); - +%! assert (nthargout (1:3, @grp2idx, [false, true]), {[1; 2] {'0'; '1'} ... +%! [false; true]}); +%! assert (nthargout (1:3, @grp2idx, [true, false]), {[2; 1] {'0'; '1'} ... +%! [false; true]}); ## test char matrix and cell array of strings -%!assert (nthargout (1:3, @grp2idx, ["oct"; "sci"; "oct"; "oct"; "sci"]), -%! {[1; 2; 1; 1; 2] {"oct"; "sci"} ["oct"; "sci"]}); +%!assert (nthargout (1:3, @grp2idx, ['oct'; 'sci'; 'oct'; 'oct'; 'sci']), +%! {[1; 2; 1; 1; 2] {'oct'; 'sci'} ['oct'; 'sci']}); ## and cell array of strings -%!assert (nthargout (1:3, @grp2idx, {"oct"; "sci"; "oct"; "oct"; "sci"}), -%! {[1; 2; 1; 1; 2] {"oct"; "sci"} {"oct"; "sci"}}); - +%!assert (nthargout (1:3, @grp2idx, {'oct'; 'sci'; 'oct'; 'oct'; 'sci'}), +%! {[1; 2; 1; 1; 2] {'oct'; 'sci'} {'oct'; 'sci'}}); ## test numeric arrays %!assert (nthargout (1:3, @grp2idx, [ 1 -3 -2 -3 -3 2 1 -1 3 -3]), -%! {[4; 1; 2; 1; 1; 5; 4; 3; 6; 1], {"-3"; "-2"; "-1"; "1"; "2"; "3"}, ... +%! {[4; 1; 2; 1; 1; 5; 4; 3; 6; 1], {'-3'; '-2'; '-1'; '1'; '2'; '3'}, ... %! [-3; -2; -1; 1; 2; 3]}); - %!test %! s = [1e6 2e6 1e6 3e6]; %! [g, gn, gl] = grp2idx (s); %! assert (g, [1; 2; 1; 3]); -%! assert (gn, {"1000000"; "2000000"; "3000000"}); +%! assert (gn, {'1000000'; '2000000'; '3000000'}); %! assert (gl, [1000000; 2000000; 3000000]); - %!test %! s = [0.1 0.2 0.3 0.1 0.2]; %! [g, gn, gl] = grp2idx (s); %! assert (g, [1; 2; 3; 1; 2]); -%! assert (gn, {"0.1"; "0.2"; "0.3"}); +%! assert (gn, {'0.1'; '0.2'; '0.3'}); %! assert (gl, [0.1; 0.2; 0.3]); - %!test %! s = [-5 -10 0 5 10 -5]; %! [g, gn, gl] = grp2idx (s); %! assert (g, [2; 1; 3; 4; 5; 2]); -%! assert (gn, {"-10"; "-5"; "0"; "5"; "10"}); +%! assert (gn, {'-10'; '-5'; '0'; '5'; '10'}); %! assert (gl, [-10; -5; 0; 5; 10]); - - ## test for NaN and empty strings %!assert (nthargout (1:3, @grp2idx, [2 2 3 NaN 2 3]), %! {[1; 1; 2; NaN; 1; 2] {'2'; '3'} [2; 3]}) %!assert (nthargout (1:3, @grp2idx, {'et' 'sa' 'sa' '' 'et'}), %! {[1; 2; 2; NaN; 1] {'et'; 'sa'} {'et'; 'sa'}}) - %!assert (nthargout (1:3, @grp2idx, [2 2 3 NaN 2 4]), %! {[1; 1; 2; NaN; 1; 3] {'2'; '3'; '4'} [2; 3; 4]}) - %!test %! s = [NaN, NaN, NaN]; %! [g, gn, gl] = grp2idx (s); %! assert (isequaln (g, [NaN; NaN; NaN])); %! assert (isequaln (gn, cell (0,1))); %! assert (isequaln (gl, zeros (0,1))); - %!test %! s = single ([NaN, NaN, NaN]); %! [g, gn, gl] = grp2idx (s); %! assert (isequaln (g, [NaN; NaN; NaN])); %! assert (isequaln (gn, cell (0,1))); -%! assert (isequaln (gl, single (zeros(0,1)))); - +%! assert (isequaln (gl, single (zeros (0,1)))); %!test -%! s = {""; ""; ""; ""}; +%! s = {''; ''; ''; ''}; %! [g, gn, gl] = grp2idx (s); %! assert (isequaln (g, [NaN; NaN; NaN; NaN]), true); -%! assert (isequaln (gn, cell(0,1))); -%! assert (isequaln (gl, cell(0,1))); - +%! assert (isequaln (gn, cell (0,1))); +%! assert (isequaln (gl, cell (0,1))); %!test -%! s = {"a"; ""; "b"; ""; "c"}; +%! s = {'a'; ''; 'b'; ''; 'c'}; %! [g, gn, gl] = grp2idx (s); %! assert (isequaln (g, [1; NaN; 2; NaN; 3]), true); -%! assert (gn, {"a"; "b"; "c"}); -%! assert (gl, {"a"; "b"; "c"}); - +%! assert (gn, {'a'; 'b'; 'c'}); +%! assert (gl, {'a'; 'b'; 'c'}); %!test -%! s = categorical ({""; ""; ""; ""}); +%! s = categorical ({''; ''; ''; ''}); %! [g, gn, gl] = grp2idx (s); %! assert (isequaln (g, [NaN; NaN; NaN; NaN]), true); %! assert (isequaln (gn, cell (0,1))); %! assert (isequaln (gl, categorical (cell (0,1)))); - %!test %! s = string ({missing, missing, missing}); %! out = string (cell (0,1)); %! [g, gn, gl] = grp2idx (s); %! assert (isequaln (g, [NaN; NaN; NaN]), true); %! assert (isequal (gn, cell (0,1))); - %!test %! s = [duration(NaN,0,0), duration(NaN,0,0), duration(NaN,0,0)]; %! [g, gn, gl] = grp2idx (s); %! assert (isequaln (g, [NaN; NaN; NaN])); %! assert (isequal (gn, cell (0,1))); %! assert (isequal (gl, duration (NaN (0,3)))); - ## Test that order when handling strings is by order of appearance %!test assert (nthargout (1:3, @grp2idx, ['sci'; 'oct'; 'sci'; 'oct'; 'oct']), %! {[1; 2; 1; 2; 2] {'sci'; 'oct'} ['sci'; 'oct']}); @@ -261,27 +242,23 @@ %! {[1; 2; 1; 2; 2] {'sci'; 'oct'} {'sci'; 'oct'}}); %!test assert (nthargout (1:3, @grp2idx, {'sa' 'et' 'et' '' 'sa'}), %! {[1; 2; 2; NaN; 1] {'sa'; 'et'} {'sa'; 'et'}}) - ## test for categorical arrays %!test %! [g, gn, gl] = grp2idx (categorical ({'low', 'med', 'high', 'low'})); %! assert (isequal (g, [2; 3; 1; 2])); %! assert (isequal (gn, {'high'; 'low'; 'med'})); %! assert (isequal (gl, categorical ({'high'; 'low'; 'med'}))); - %!test %! [g, gn, gl] = grp2idx (categorical ([10, 20, 10, 30, 20])); %! assert (isequal (g, [1; 2; 1; 3; 2])); %! assert (isequal (gn, {'10'; '20'; '30'})); %! assert (isequal (gl, categorical ([10; 20; 30]))); - %!test %! cats = categorical ({'high', '', 'low', ''}); %! [g, gn, gl] = grp2idx (cats); %! assert (isequal (g, [2; 1; 3; 1])); %! assert (isequal (gn, {''; 'high'; 'low'})); %! assert (isequal (gl, categorical ({''; 'high'; 'low'}))); - ## test for duration arrays %!test %! g = gn = gl = []; @@ -289,47 +266,348 @@ %! assert (isequal (g, [1; 1; 2; 3])); %! assert (isequal (gn, {'1.234 sec'; '2.5 sec'; '3 sec'})); %! assert (isequal (gl, seconds ([1.234; 2.5; 3.000]))); - %!test %! [g, gn, gl] = grp2idx ([hours(1); hours(2); hours(1); hours(3)]); %! assert(isequal (g, [1; 2; 1; 3])); %! assert(isequal (gn, {'1 hr'; '2 hr'; '3 hr'})); %! assert(isequal (gl, [hours(1); hours(2); hours(3)])); - %!test %! in = [duration(1, 30, 0); duration(0, 45, 30); duration(1, 30, 0); duration(2, 15, 15)]; -%! [g, gn, gl] = grp2idx(in); -%! assert(isequal(g, [2; 1; 2; 3])); +%! [g, gn, gl] = grp2idx (in); +%! assert(isequal (g, [2; 1; 2; 3])); %! assert(isequal (gn, {'00:45:30'; '01:30:00'; '02:15:15'})); %! assert(isequal (gl, [duration(0,45,30); duration(1,30,0); duration(2,15,15)])); - ## Inconsistency Note: following test is inconsistent with MATLAB due to a ## probable bug in their implementation, where they include multiple NaNs ## in the output group labels for duration array inputs. %!test %! in = [hours(1); NaN; minutes(30); hours(1); NaN; seconds(90)]; -%! [g, gn, gl] = grp2idx(in); -%! assert(isequaln(g, [3; NaN; 2; 3; NaN; 1])); -%! assert(isequal(gn, {'0.025 hr'; '0.5 hr'; '1 hr'})); -%! assert(isequal(gl, [seconds(90); minutes(30); hours(1)])); - - +%! [g, gn, gl] = grp2idx (in); +%! assert(isequaln (g, [3; NaN; 2; 3; NaN; 1])); +%! assert(isequal (gn, {'0.025 hr'; '0.5 hr'; '1 hr'})); +%! assert(isequal (gl, [seconds(90); minutes(30); hours(1)])); ## test for string arrays - %!test %! [g, gn, gl] = grp2idx (string ({'123', 'erw', missing, '', '234'})); %! assert (isequaln (g, [1; 2; NaN; NaN; 3])); %! assert (isequal (gn, {'123'; 'erw'; '234'})); %! assert (isequal (gl, string ({'123'; 'erw'; '234'}))); - %!test %! [g, gn, gl] = grp2idx (string ({'medium', 'low', 'high', 'medium', 'medium'})); %! assert (isequaln (g, [1; 2; 3; 1; 1])); %! assert (isequal (gn, {'medium'; 'low'; 'high'})); %! assert (isequal (gl, string ({'medium'; 'low'; 'high'}))); - %!test %! [g, gn, gl] = grp2idx (string ({'', 'high', 'low', ''})); %! assert (isequaln (g, [NaN; 1; 2; NaN])); %! assert (isequal (gn, {'high'; 'low'})); %! assert (isequal (gl, string ({'high'; 'low'}))); +%!test +%! s = [true false false true]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [2; 1; 1; 2]); +%! assert (gn, {'0'; '1'}); +%! assert (gl, [false; true]); +%!test +%! s = [true; false; false; true]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [2; 1; 1; 2]); +%! assert (gn, {'0'; '1'}); +%! assert (gl, [false; true]); +%!test +%! s = [false true]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2]); +%! assert (gn, {'0'; '1'}); +%! assert (gl, [false; true]); +%!test +%! s = [true false]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [2; 1]); +%! assert (gn, {'0'; '1'}); +%! assert (gl, [false; true]); +%!test +%! s = [true true true]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 1; 1]); +%! assert (gn, {'1'}); +%! assert (gl, true); +%!test +%! s = [false false false]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 1; 1]); +%! assert (gn, {'0'}); +%! assert (gl, false); +%!test +%! s = ['oct'; 'sci'; 'oct'; 'oct'; 'sci']; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 1; 1; 2]); +%! assert (gn, {'oct'; 'sci'}); +%! assert (gl, ['oct'; 'sci']); +%!test +%! s = ['sci'; 'oct'; 'sci'; 'oct'; 'oct']; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 1; 2; 2]); +%! assert (gn, {'sci'; 'oct'}); +%! assert (gl, ['sci'; 'oct']); +%!test +%! s = {'oct'; 'sci'; 'oct'; 'oct'; 'sci'}; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 1; 1; 2]); +%! assert (gn, {'oct'; 'sci'}); +%! assert (gl, {'oct'; 'sci'}); +%!test +%! s = {'sci'; 'oct'; 'sci'; 'oct'; 'oct'}; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 1; 2; 2]); +%! assert (gn, {'sci'; 'oct'}); +%! assert (gl, {'sci'; 'oct'}); +%!test +%! s = {'et' 'sa' 'sa' '' 'et'}; +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [1; 2; 2; NaN; 1]), true); +%! assert (gn, {'et'; 'sa'}); +%! assert (gl, {'et'; 'sa'}); +%!test +%! s = {'sa' 'et' 'et' '' 'sa'}; +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [1; 2; 2; NaN; 1]), true); +%! assert (gn, {'sa'; 'et'}); +%! assert (gl, {'sa'; 'et'}); +%!test +%! s = {'a'; 'b'; 'c'; 'a'; 'b'; 'c'}; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 3; 1; 2; 3]); +%! assert (gn, {'a'; 'b'; 'c'}); +%! assert (gl, {'a'; 'b'; 'c'}); +%!test +%! s = {'c'; 'b'; 'a'; 'c'; 'b'; 'a'}; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 3; 1; 2; 3]); +%! assert (gn, {'c'; 'b'; 'a'}); +%! assert (gl, {'c'; 'b'; 'a'}); +%!test +%! s = {''; ''; ''; ''}; +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [NaN; NaN; NaN; NaN]), true); +%! assert (isequal (size (gn), [0 1])) +%! assert (isequal (size (gl), [0 1])) +%!test +%! s = {'a'; ''; 'b'; ''; 'c'}; +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [1; NaN; 2; NaN; 3]), true); +%! assert (gn, {'a'; 'b'; 'c'}); +%! assert (gl, {'a'; 'b'; 'c'}); +%!test +%! s = [1 -3 -2 -3 -3 2 1 -1 3 -3]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [4; 1; 2; 1; 1; 5; 4; 3; 6; 1]); +%! assert (gn, {'-3'; '-2'; '-1'; '1'; '2'; '3'}); +%! assert (gl, [-3; -2; -1; 1; 2; 3]); +%!test +%! s = [1 2 3 4 5]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 3; 4; 5]); +%! assert (gn, {'1'; '2'; '3'; '4'; '5'}); +%! assert (gl, [1; 2; 3; 4; 5]); +%!test +%! s = [5 4 3 2 1]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [5; 4; 3; 2; 1]); +%! assert (gn, {'1'; '2'; '3'; '4'; '5'}); +%! assert (gl, [1; 2; 3; 4; 5]); +%!test +%! s = [1 1 1 1 1]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 1; 1; 1; 1]); +%! assert (gn, {'1'}); +%! assert (gl, 1); +%!test +%! s = [10; 20; 30; 10; 20]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 3; 1; 2]); +%! assert (gn, {'10'; '20'; '30'}); +%! assert (gl, [10; 20; 30]); +%!test +%! s = [2 2 3 NaN 2 3]; +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [1; 1; 2; NaN; 1; 2]), true); +%! assert (gn, {'2'; '3'}); +%! assert (gl, [2; 3]); +%!test +%! s = [1 NaN 2 NaN 3]; +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [1; NaN; 2; NaN; 3]), true); +%! assert (gn, {'1'; '2'; '3'}); +%! assert (gl, [1; 2; 3]); +%!test +%! s = [NaN NaN NaN]; +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [NaN; NaN; NaN]), true); +%! assert (isequal (size (gn), [0 1])) +%! assert (gl, zeros (0, 1)); +%!test +%! s = [1.5 2.5 1.5 3.5 2.5]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 1; 3; 2]); +%! assert (gn, {'1.5'; '2.5'; '3.5'}); +%! assert (gl, [1.5; 2.5; 3.5]); +%!test +%! s = [-5 -10 0 5 10 -5]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [2; 1; 3; 4; 5; 2]); +%! assert (gn, {'-10'; '-5'; '0'; '5'; '10'}); +%! assert (gl, [-10; -5; 0; 5; 10]); +%!test +%! s = [0 0 0 0]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 1; 1; 1]); +%! assert (gn, {'0'}); +%! assert (gl, 0); +%!test +%! s = [1e6 2e6 1e6 3e6]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 1; 3]); +%! assert (gn, {'1000000'; '2000000'; '3000000'}); +%! assert (gl, [1000000; 2000000; 3000000]); +%!test +%! s = categorical ({'low', 'med', 'high', 'low'}); +%! [g, gn, gl] = grp2idx (s); +%! assert (isequal (g, [2; 3; 1; 2])); +%! assert (isequal (gn, {'high'; 'low'; 'med'})); +%! assert (isequal (gl, categorical ({'high'; 'low'; 'med'}))); +%!test +%! s = categorical ([10, 20, 10, 30, 20]); +%! [g, gn, gl] = grp2idx (s); +%! assert (isequal (g, [1; 2; 1; 3; 2])); +%! assert (isequal (gn, {'10'; '20'; '30'})); +%! assert (isequal (gl, categorical ({'10'; '20'; '30'}))); +%!test +%! s = categorical ({'high', '', 'low', ''}); +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [1; NaN; 2; NaN]), true); +%! assert (isequal (gn, {'high'; 'low'})); +%! assert (isequal (gl, categorical ({'high'; 'low'}))); +%!test +%! s = categorical ({'a', 'b', 'c', 'a', 'b', 'c'}); +%! [g, gn, gl] = grp2idx (s); +%! assert (isequal (g, [1; 2; 3; 1; 2; 3])); +%! assert (isequal (gn, {'a'; 'b'; 'c'})); +%! assert (isequal (gl, categorical ({'a'; 'b'; 'c'}))); +%!test +%! s = seconds ([1.234, 1.234, 2.5, 3.000]); +%! [g, gn, gl] = grp2idx (s); +%! assert (isequal (g, [1; 1; 2; 3])); +%! assert (isequal (gn, {'1.234 sec'; '2.5 sec'; '3 sec'})); +%! assert (isequal (gl, seconds ([1.234; 2.5; 3]))); +%!test +%! s = [hours(1); hours(2); hours(1); hours(3)]; +%! [g, gn, gl] = grp2idx (s); +%! assert (isequal (g, [1; 2; 1; 3])); +%! assert (isequal (gn, {'1 hr'; '2 hr'; '3 hr'})); +%! assert (isequal (gl, [hours(1); hours(2); hours(3)])); +%!test +%! s = [duration(1, 30, 0); duration(0, 45, 30); duration(1, 30, 0); duration(2, 15, 15)]; +%! [g, gn, gl] = grp2idx (s); +%! assert (isequal (g, [2; 1; 2; 3])); +%! assert (isequal (gn, {'00:45:30'; '01:30:00'; '02:15:15'})); +%! assert (isequal (gl, [duration(0,45,30); duration(1,30,0); duration(2,15,15)])); +%!test +%! s = [minutes(10); minutes(20); minutes(10); minutes(30)]; +%! [g, gn, gl] = grp2idx (s); +%! assert (isequal (g, [1; 2; 1; 3])); +%! assert (isequal (gn, {'10 min'; '20 min'; '30 min'})); +%! assert (isequal (gl, [minutes(10); minutes(20); minutes(30)])); +%!test +%! s = string ({'123', 'erw', missing, '', '234'}); +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [1; 2; NaN; NaN; 3]), true); +%! assert (isequal (gn, {'123'; 'erw'; '234'})); +%! assert (isequal (gl, string ({'123'; 'erw'; '234'}))); +%!test +%! s = string ({'medium', 'low', 'high', 'medium', 'medium'}); +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [1; 2; 3; 1; 1]), true); +%! assert (isequal (gn, {'medium'; 'low'; 'high'})); +%! assert (isequal (gl, string ({'medium'; 'low'; 'high'}))); +%!test +%! s = string ({'', 'high', 'low', ''}); +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [NaN; 1; 2; NaN]), true); +%! assert (isequal (gn, {'high'; 'low'})); +%! assert (isequal (gl, string ({'high'; 'low'}))); +%!test +%! s = string ({'a', 'b', 'c', 'a', 'b'}); +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [1; 2; 3; 1; 2]), true); +%! assert (isequal (gn, {'a'; 'b'; 'c'})); +%! assert (isequal (gl, string ({'a'; 'b'; 'c'}))); +%!test +%! s = string ({'test', 'test', 'test'}); +%! [g, gn, gl] = grp2idx (s); +%! assert (isequal (g, [1; 1; 1])); +%! assert (isequal (gn, {'test'})); +%! assert (isequal (gl, string ({'test'}))); +%!test +%! s = string ({missing, missing, missing}); +%! [g, gn, gl] = grp2idx (s); +%! assert (isequaln (g, [NaN; NaN; NaN]), true); +%! assert (isequal (size (gn), [0 1])) +%! assert (isequal (size (gl), [0 1])); +%!test +%! s = [1 2 3]; +%! g = grp2idx (s); +%! assert (g, [1; 2; 3]); +%!test +%! s = {'a'; 'b'; 'c'}; +%! g = grp2idx (s); +%! assert (g, [1; 2; 3]); +%!test +%! s = [true false true]; +%! g = grp2idx (s); +%! assert (g, [2; 1; 2]); +%!test +%! s = [1 2 3 4 5]; +%! [g, gn] = grp2idx (s); +%! assert (g, [1; 2; 3; 4; 5]); +%! assert (gn, {'1'; '2'; '3'; '4'; '5'}); +%!test +%! s = {'x'; 'y'; 'z'}; +%! [g, gn] = grp2idx (s); +%! assert (g, [1; 2; 3]); +%! assert (gn, {'x'; 'y'; 'z'}); +%!test +%! s = categorical ({'cat1', 'cat2', 'cat3'}); +%! [g, gn] = grp2idx (s); +%! assert (g, [1; 2; 3]); +%! assert (gn, {'cat1'; 'cat2'; 'cat3'}); +%!test +%! s = [1; 1; 2; 2; 3; 3; 4; 4; 5; 5]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 1; 2; 2; 3; 3; 4; 4; 5; 5]); +%! assert (gn, {'1'; '2'; '3'; '4'; '5'}); +%! assert (gl, [1; 2; 3; 4; 5]); +%!test +%! s = {'group1'; 'group2'; 'group1'; 'group3'; 'group2'; 'group3'}; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 1; 3; 2; 3]); +%! assert (gn, {'group1'; 'group2'; 'group3'}); +%! assert (gl, {'group1'; 'group2'; 'group3'}); +%!test +%! s = [10 20 30 40 50]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 3; 4; 5]); +%! assert (gn, {'10'; '20'; '30'; '40'; '50'}); +%! assert (gl, [10; 20; 30; 40; 50]); +%!test +%! s = logical ([1 0 1 0 1 0]); +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [2; 1; 2; 1; 2; 1]); +%! assert (gn, {'0'; '1'}); +%! assert (gl, [false; true]); +%!test +%! s = [0.1 0.2 0.3 0.1 0.2]; +%! [g, gn, gl] = grp2idx (s); +%! assert (g, [1; 2; 3; 1; 2]); +%! assert (gn, {'0.1'; '0.2'; '0.3'}); +%! assert (gl, [0.1; 0.2; 0.3]); \ No newline at end of file From 5a26040c12eba02be3f100d1539803e531fdd23a Mon Sep 17 00:00:00 2001 From: Swayam Shah Date: Thu, 18 Dec 2025 18:05:32 +0530 Subject: [PATCH 2/8] 2 --- inst/grp2idx.m | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/inst/grp2idx.m b/inst/grp2idx.m index 90ed3b94..0ac03f6b 100644 --- a/inst/grp2idx.m +++ b/inst/grp2idx.m @@ -59,7 +59,7 @@ s(undef) = {''}; elseif (isduration (s) && isvector (s)) s_was_duration = true; - elseif (! isvector (s)) + elseif (! isvector (s) || isstruct (s) || istable (s)) error ("grp2idx: S must be a vector, cell array of strings, or char matrix"); endif @@ -268,24 +268,25 @@ %! assert (isequal (gl, seconds ([1.234; 2.5; 3.000]))); %!test %! [g, gn, gl] = grp2idx ([hours(1); hours(2); hours(1); hours(3)]); -%! assert(isequal (g, [1; 2; 1; 3])); -%! assert(isequal (gn, {'1 hr'; '2 hr'; '3 hr'})); -%! assert(isequal (gl, [hours(1); hours(2); hours(3)])); +%! assert (isequal (g, [1; 2; 1; 3])); +%! assert (isequal (gn, {'1 hr'; '2 hr'; '3 hr'})); +%! assert (isequal (gl, [hours(1); hours(2); hours(3)])); %!test -%! in = [duration(1, 30, 0); duration(0, 45, 30); duration(1, 30, 0); duration(2, 15, 15)]; +%! in = [duration(1, 30, 0); duration(0, 45, 30); duration(1, 30, 0); ... +%! duration(2, 15, 15)]; %! [g, gn, gl] = grp2idx (in); -%! assert(isequal (g, [2; 1; 2; 3])); -%! assert(isequal (gn, {'00:45:30'; '01:30:00'; '02:15:15'})); -%! assert(isequal (gl, [duration(0,45,30); duration(1,30,0); duration(2,15,15)])); +%! assert (isequal (g, [2; 1; 2; 3])); +%! assert (isequal (gn, {'00:45:30'; '01:30:00'; '02:15:15'})); +%! assert (isequal (gl, [duration(0,45,30); duration(1,30,0); duration(2,15,15)])); ## Inconsistency Note: following test is inconsistent with MATLAB due to a ## probable bug in their implementation, where they include multiple NaNs ## in the output group labels for duration array inputs. %!test %! in = [hours(1); NaN; minutes(30); hours(1); NaN; seconds(90)]; %! [g, gn, gl] = grp2idx (in); -%! assert(isequaln (g, [3; NaN; 2; 3; NaN; 1])); -%! assert(isequal (gn, {'0.025 hr'; '0.5 hr'; '1 hr'})); -%! assert(isequal (gl, [seconds(90); minutes(30); hours(1)])); +%! assert (isequaln (g, [3; NaN; 2; 3; NaN; 1])); +%! assert (isequal (gn, {'0.025 hr'; '0.5 hr'; '1 hr'})); +%! assert (isequal (gl, [seconds(90); minutes(30); hours(1)])); ## test for string arrays %!test %! [g, gn, gl] = grp2idx (string ({'123', 'erw', missing, '', '234'})); @@ -610,4 +611,11 @@ %! [g, gn, gl] = grp2idx (s); %! assert (g, [1; 2; 3; 1; 2]); %! assert (gn, {'0.1'; '0.2'; '0.3'}); -%! assert (gl, [0.1; 0.2; 0.3]); \ No newline at end of file +%! assert (gl, [0.1; 0.2; 0.3]); + +%!error ... +%! grp2idx (ones (2, 2)) +%!error ... +%! grp2idx (struct ("a", 1)) +%!error ... +%! grp2idx (table ([1 2])) \ No newline at end of file From 3d802042d89c01d1f5902392b8e1a995f528e93e Mon Sep 17 00:00:00 2001 From: Swayam Shah Date: Thu, 18 Dec 2025 18:12:12 +0530 Subject: [PATCH 3/8] 3 --- inst/grp2idx.m | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/inst/grp2idx.m b/inst/grp2idx.m index 0ac03f6b..f112cd88 100644 --- a/inst/grp2idx.m +++ b/inst/grp2idx.m @@ -41,6 +41,10 @@ print_usage (); endif + if (iscell (s) && ! iscellstr (s)) + error ("grp2idx: cell array S must be a cell array of strings"); + endif + s_was_char = false; s_was_categorical = false; s_was_duration = false; @@ -65,7 +69,7 @@ [gl, I, g] = unique (s(:)); ## Fix order in here, since unique does not support this yet - if (iscellstr (s) && ! s_was_categorical) + if (iscellstr (s) && ! s_was_categorical && ! isempty (gl)) I = sort (I); for i = 1:length (gl) gl_s(i) = gl(g(I(i))); @@ -613,6 +617,12 @@ %! assert (gn, {'0.1'; '0.2'; '0.3'}); %! assert (gl, [0.1; 0.2; 0.3]); +%!error ... +%! grp2idx ({'a', 1}) +%!error ... +%! grp2idx ({1, 2}) +%!error ... +%! grp2idx ({'a', {}}) %!error ... %! grp2idx (ones (2, 2)) %!error ... From 029cb77d6f917df41861300757be9ef0ed99746b Mon Sep 17 00:00:00 2001 From: Swayam Shah Date: Thu, 18 Dec 2025 18:12:53 +0530 Subject: [PATCH 4/8] 4 --- inst/grp2idx.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/grp2idx.m b/inst/grp2idx.m index f112cd88..816242bd 100644 --- a/inst/grp2idx.m +++ b/inst/grp2idx.m @@ -628,4 +628,4 @@ %!error ... %! grp2idx (struct ("a", 1)) %!error ... -%! grp2idx (table ([1 2])) \ No newline at end of file +%! grp2idx (table ([1 2])) From 472a7e73e3daa90a42d70b5fa8e803af722ddb7a Mon Sep 17 00:00:00 2001 From: Swayam Shah Date: Thu, 18 Dec 2025 18:22:38 +0530 Subject: [PATCH 5/8] 5 --- inst/grp2idx.m | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/inst/grp2idx.m b/inst/grp2idx.m index 816242bd..959e8174 100644 --- a/inst/grp2idx.m +++ b/inst/grp2idx.m @@ -1,5 +1,6 @@ ## Copyright (C) 2015 Carnë Draug ## Copyright (C) 2022 Andreas Bertsatos +## Copyright (C) 2025 Swayam Shah ## ## This file is part of the statistics package for GNU Octave. ## @@ -22,10 +23,14 @@ ## ## Get index for group variables. ## -## For variable @var{s}, returns the indices @var{g}, into the variable -## groups @var{gn} and @var{gl}. The first has a string representation of -## the groups while the later has its actual values. In the case of numerical -## and logical data types, the group indices are ordered in sorted order of +## For the grouping variable @var{s}, return the indices @var{g}, into the +## variable groups @var{gn} and @var{gl}. The input @var{s} may be a +## numeric vector, logical vector, char array, cell array of strings, string +## vector, categorical vector, or duration vector. +## +## The first output @var{gn} has a string representation of the groups while +## the later @var{gl} has its actual values. In the case of numerical and +## logical data types, the group indices are ordered in sorted order of ## @var{s}. In the case of categorical arrays, the group indices are allocated ## by the order of the categories in @var{s}. For the rest of the data types, ## the group indices are allocated by order of first appearance in @var{s}. From d2e5c570686bdcc44f7b33d1486e4859f7e5d6cc Mon Sep 17 00:00:00 2001 From: Swayam Shah Date: Thu, 18 Dec 2025 19:47:50 +0530 Subject: [PATCH 6/8] 6 --- inst/grp2idx.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/grp2idx.m b/inst/grp2idx.m index 959e8174..7ea8d823 100644 --- a/inst/grp2idx.m +++ b/inst/grp2idx.m @@ -68,7 +68,7 @@ s(undef) = {''}; elseif (isduration (s) && isvector (s)) s_was_duration = true; - elseif (! isvector (s) || isstruct (s) || istable (s)) + elseif (istable (s) || ! isvector (s) || isstruct (s)) error ("grp2idx: S must be a vector, cell array of strings, or char matrix"); endif From fc0869aef836602481c9d66eba84d674770fe867 Mon Sep 17 00:00:00 2001 From: Swayam Shah Date: Thu, 18 Dec 2025 19:53:36 +0530 Subject: [PATCH 7/8] 7 --- inst/grp2idx.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/grp2idx.m b/inst/grp2idx.m index 7ea8d823..9c889851 100644 --- a/inst/grp2idx.m +++ b/inst/grp2idx.m @@ -633,4 +633,4 @@ %!error ... %! grp2idx (struct ("a", 1)) %!error ... -%! grp2idx (table ([1 2])) +%! grp2idx (table ([1; 2])) From bb31d6f6911dc00c171b51f2d9611afce59f7ae9 Mon Sep 17 00:00:00 2001 From: Swayam Shah Date: Thu, 18 Dec 2025 19:55:08 +0530 Subject: [PATCH 8/8] 8 --- inst/grp2idx.m | 55 -------------------------------------------------- 1 file changed, 55 deletions(-) diff --git a/inst/grp2idx.m b/inst/grp2idx.m index 9c889851..7944bece 100644 --- a/inst/grp2idx.m +++ b/inst/grp2idx.m @@ -146,15 +146,6 @@ endfunction -# test for one output argument -%!test -%! g = grp2idx ([3 2 1 2 3 1]); -%! assert (isequal (g, [3; 2; 1; 2; 3; 1])); -# test for two output arguments -%!test -%! [g, gn] = grp2idx (['b'; 'a'; 'c'; 'a']); -%! assert (isequal (g, [1; 2; 3; 2])); -%! assert (isequal (gn, {'b'; 'a'; 'c'})); ## test boolean input and note that row or column vector makes no difference %!test %! in = [true false false true]; @@ -421,12 +412,6 @@ %! assert (gn, {'1'; '2'; '3'; '4'; '5'}); %! assert (gl, [1; 2; 3; 4; 5]); %!test -%! s = [5 4 3 2 1]; -%! [g, gn, gl] = grp2idx (s); -%! assert (g, [5; 4; 3; 2; 1]); -%! assert (gn, {'1'; '2'; '3'; '4'; '5'}); -%! assert (gl, [1; 2; 3; 4; 5]); -%!test %! s = [1 1 1 1 1]; %! [g, gn, gl] = grp2idx (s); %! assert (g, [1; 1; 1; 1; 1]); @@ -565,62 +550,22 @@ %! assert (isequal (size (gn), [0 1])) %! assert (isequal (size (gl), [0 1])); %!test -%! s = [1 2 3]; -%! g = grp2idx (s); -%! assert (g, [1; 2; 3]); -%!test -%! s = {'a'; 'b'; 'c'}; -%! g = grp2idx (s); -%! assert (g, [1; 2; 3]); -%!test -%! s = [true false true]; -%! g = grp2idx (s); -%! assert (g, [2; 1; 2]); -%!test -%! s = [1 2 3 4 5]; -%! [g, gn] = grp2idx (s); -%! assert (g, [1; 2; 3; 4; 5]); -%! assert (gn, {'1'; '2'; '3'; '4'; '5'}); -%!test -%! s = {'x'; 'y'; 'z'}; -%! [g, gn] = grp2idx (s); -%! assert (g, [1; 2; 3]); -%! assert (gn, {'x'; 'y'; 'z'}); -%!test %! s = categorical ({'cat1', 'cat2', 'cat3'}); %! [g, gn] = grp2idx (s); %! assert (g, [1; 2; 3]); %! assert (gn, {'cat1'; 'cat2'; 'cat3'}); %!test -%! s = [1; 1; 2; 2; 3; 3; 4; 4; 5; 5]; -%! [g, gn, gl] = grp2idx (s); -%! assert (g, [1; 1; 2; 2; 3; 3; 4; 4; 5; 5]); -%! assert (gn, {'1'; '2'; '3'; '4'; '5'}); -%! assert (gl, [1; 2; 3; 4; 5]); -%!test %! s = {'group1'; 'group2'; 'group1'; 'group3'; 'group2'; 'group3'}; %! [g, gn, gl] = grp2idx (s); %! assert (g, [1; 2; 1; 3; 2; 3]); %! assert (gn, {'group1'; 'group2'; 'group3'}); %! assert (gl, {'group1'; 'group2'; 'group3'}); %!test -%! s = [10 20 30 40 50]; -%! [g, gn, gl] = grp2idx (s); -%! assert (g, [1; 2; 3; 4; 5]); -%! assert (gn, {'10'; '20'; '30'; '40'; '50'}); -%! assert (gl, [10; 20; 30; 40; 50]); -%!test %! s = logical ([1 0 1 0 1 0]); %! [g, gn, gl] = grp2idx (s); %! assert (g, [2; 1; 2; 1; 2; 1]); %! assert (gn, {'0'; '1'}); %! assert (gl, [false; true]); -%!test -%! s = [0.1 0.2 0.3 0.1 0.2]; -%! [g, gn, gl] = grp2idx (s); -%! assert (g, [1; 2; 3; 1; 2]); -%! assert (gn, {'0.1'; '0.2'; '0.3'}); -%! assert (gl, [0.1; 0.2; 0.3]); %!error ... %! grp2idx ({'a', 1})