Skip to content

Commit 774ec19

Browse files
committed
feat: implement filtering and selection methods
- Added methods for column selection: select, drop, selectByPattern - Added methods for row filtering: filter, query, where - Added methods for row selection: at, iloc, loc - Added methods for sampling: sample, stratifiedSample - Created comprehensive test suite for all new methods - Fixed Excel reader syntax error - Applied code formatting according to project standards
1 parent dbc38aa commit 774ec19

29 files changed

+2666
-2
lines changed

.gitignore

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,5 +49,6 @@ coverage/
4949

5050
TODO.md
5151
CONTEXT*
52-
todo*
53-
todo.md
52+
#todo*
53+
#todo.md
54+
#TODO.md

output.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
date,open,high,low,close,volume
2+
2023-01-01,100.5,105.75,99.25,103.5,1000000
3+
2023-01-02,103.75,108.25,102.5,107.25,1500000

output.tsv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
date open high low close volume
2+
2023-01-01 100.5 105.75 99.25 103.5 1000000
3+
2023-01-02 103.75 108.25 102.5 107.25 1500000

src/methods/filtering/at.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// src/methods/filtering/at.js
2+
3+
/**
4+
* Creates a function that selects a row from a DataFrame by its index.
5+
*
6+
* @param {Object} deps - Dependencies
7+
* @returns {Function} Function that selects a row by index
8+
*/
9+
export const at = (deps) => (frame, index) => {
10+
// Validate input
11+
if (typeof index !== 'number' || !Number.isInteger(index)) {
12+
throw new Error('Index must be an integer');
13+
}
14+
15+
if (index < 0) {
16+
throw new Error('Index must be non-negative');
17+
}
18+
19+
// Get all column names
20+
const columns = Object.keys(frame.columns);
21+
22+
// Get the number of rows
23+
const rowCount = frame.columns[columns[0]]?.length || 0;
24+
25+
if (index >= rowCount) {
26+
throw new Error(`Index ${index} is out of bounds (0-${rowCount - 1})`);
27+
}
28+
29+
// Create an object with values from the specified row
30+
const result = {};
31+
32+
columns.forEach((column) => {
33+
result[column] = frame.columns[column][index];
34+
});
35+
36+
return result;
37+
};

src/methods/filtering/drop.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// src/methods/filtering/drop.js
2+
3+
/**
4+
* Creates a function that removes specific columns from a DataFrame.
5+
*
6+
* @param {Object} deps - Dependencies
7+
* @param {Function} deps.validateColumn - Function to validate column names
8+
* @returns {Function} Function that removes columns from a DataFrame
9+
*/
10+
export const drop =
11+
({ validateColumn }) =>
12+
(frame, columns) => {
13+
// Validate input
14+
if (!Array.isArray(columns)) {
15+
throw new Error('Columns must be an array');
16+
}
17+
18+
// Validate each column exists in the frame
19+
columns.forEach((column) => validateColumn(frame, column));
20+
21+
// Create a new frame without the specified columns
22+
const result = {
23+
columns: {},
24+
};
25+
26+
// Get all column names
27+
const allColumns = Object.keys(frame.columns);
28+
29+
// Add only columns that are not in the drop list
30+
allColumns.forEach((column) => {
31+
if (!columns.includes(column)) {
32+
result.columns[column] = frame.columns[column];
33+
}
34+
});
35+
36+
return result;
37+
};

src/methods/filtering/filter.js

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// src/methods/filtering/filter.js
2+
3+
/**
4+
* Creates a function that filters rows in a DataFrame based on a condition.
5+
*
6+
* @param {Object} deps - Dependencies
7+
* @returns {Function} Function that filters rows in a DataFrame
8+
*/
9+
export const filter = (deps) => (frame, condition) => {
10+
// Validate input
11+
if (typeof condition !== 'function') {
12+
throw new Error('Condition must be a function');
13+
}
14+
15+
// Get all column names and create a new frame
16+
const columns = Object.keys(frame.columns);
17+
const result = {
18+
columns: {},
19+
};
20+
21+
// Initialize empty arrays for each column
22+
columns.forEach((column) => {
23+
result.columns[column] = [];
24+
});
25+
26+
// Get the number of rows
27+
const rowCount = frame.columns[columns[0]]?.length || 0;
28+
29+
// Apply the filter condition to each row
30+
for (let i = 0; i < rowCount; i++) {
31+
// Create a row object for the condition function
32+
const row = {};
33+
columns.forEach((column) => {
34+
row[column] = frame.columns[column][i];
35+
});
36+
37+
// Check if the row passes the condition
38+
if (condition(row)) {
39+
// Add the row to the result
40+
columns.forEach((column) => {
41+
result.columns[column].push(frame.columns[column][i]);
42+
});
43+
}
44+
}
45+
46+
// Convert arrays to typed arrays if the original columns were typed
47+
columns.forEach((column) => {
48+
const originalArray = frame.columns[column];
49+
if (originalArray instanceof Float64Array) {
50+
result.columns[column] = new Float64Array(result.columns[column]);
51+
} else if (originalArray instanceof Int32Array) {
52+
result.columns[column] = new Int32Array(result.columns[column]);
53+
}
54+
});
55+
56+
return result;
57+
};

src/methods/filtering/iloc.js

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// src/methods/filtering/iloc.js
2+
3+
/**
4+
* Creates a function that selects rows and columns by their integer positions.
5+
*
6+
* @param {Object} deps - Dependencies
7+
* @returns {Function} Function that selects rows and columns by integer positions
8+
*/
9+
export const iloc = (deps) => (frame, rowIndices, columnIndices) => {
10+
// Validate input
11+
if (!Array.isArray(rowIndices)) {
12+
rowIndices = [rowIndices];
13+
}
14+
15+
if (!Array.isArray(columnIndices)) {
16+
columnIndices = [columnIndices];
17+
}
18+
19+
// Validate that all indices are numbers
20+
if (!rowIndices.every((idx) => typeof idx === 'number' && idx >= 0)) {
21+
throw new Error('Row indices must be non-negative numbers');
22+
}
23+
24+
if (!columnIndices.every((idx) => typeof idx === 'number' && idx >= 0)) {
25+
throw new Error('Column indices must be non-negative numbers');
26+
}
27+
28+
// Get all column names
29+
const allColumns = Object.keys(frame.columns);
30+
31+
// Get the number of rows
32+
const rowCount = frame.columns[allColumns[0]]?.length || 0;
33+
34+
// Check if row indices are valid
35+
const maxRowIndex = Math.max(...rowIndices);
36+
if (maxRowIndex >= rowCount) {
37+
throw new Error(
38+
`Row index ${maxRowIndex} is out of bounds (0-${rowCount - 1})`,
39+
);
40+
}
41+
42+
// Check if column indices are valid
43+
const maxColumnIndex = Math.max(...columnIndices);
44+
if (maxColumnIndex >= allColumns.length) {
45+
throw new Error(
46+
`Column index ${maxColumnIndex} is out of bounds (0-${allColumns.length - 1})`,
47+
);
48+
}
49+
50+
// Map column indices to column names
51+
const selectedColumns = columnIndices.map((idx) => allColumns[idx]);
52+
53+
// Create a new frame with selected rows and columns
54+
const result = {
55+
columns: {},
56+
};
57+
58+
// Initialize columns in the result
59+
selectedColumns.forEach((column) => {
60+
result.columns[column] = [];
61+
});
62+
63+
// Add selected rows to the result
64+
rowIndices.forEach((rowIdx) => {
65+
selectedColumns.forEach((column) => {
66+
result.columns[column].push(frame.columns[column][rowIdx]);
67+
});
68+
});
69+
70+
// Convert arrays to typed arrays if the original columns were typed
71+
selectedColumns.forEach((column) => {
72+
const originalArray = frame.columns[column];
73+
if (originalArray instanceof Float64Array) {
74+
result.columns[column] = new Float64Array(result.columns[column]);
75+
} else if (originalArray instanceof Int32Array) {
76+
result.columns[column] = new Int32Array(result.columns[column]);
77+
}
78+
});
79+
80+
return result;
81+
};

src/methods/filtering/index.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// src/methods/filtering/index.js
2+
3+
export { select } from './select.js';
4+
export { drop } from './drop.js';
5+
export { selectByPattern } from './selectByPattern.js';
6+
export { filter } from './filter.js';
7+
export { query } from './query.js';
8+
export { where } from './where.js';
9+
export { at } from './at.js';
10+
export { iloc } from './iloc.js';
11+
export { loc } from './loc.js';
12+
export { sample } from './sample.js';
13+
export { stratifiedSample } from './stratifiedSample.js';

src/methods/filtering/loc.js

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// src/methods/filtering/loc.js
2+
3+
/**
4+
* Creates a function that selects rows and columns by their labels.
5+
*
6+
* @param {Object} deps - Dependencies
7+
* @param {Function} deps.validateColumn - Function to validate column names
8+
* @returns {Function} Function that selects rows and columns by labels
9+
*/
10+
export const loc =
11+
({ validateColumn }) =>
12+
(frame, rowIndices, columnNames) => {
13+
// Validate input
14+
if (!Array.isArray(rowIndices)) {
15+
rowIndices = [rowIndices];
16+
}
17+
18+
if (!Array.isArray(columnNames)) {
19+
columnNames = [columnNames];
20+
}
21+
22+
// Validate that all row indices are numbers
23+
if (!rowIndices.every((idx) => typeof idx === 'number' && idx >= 0)) {
24+
throw new Error('Row indices must be non-negative numbers');
25+
}
26+
27+
// Validate that all column names exist
28+
columnNames.forEach((column) => validateColumn(frame, column));
29+
30+
// Get the number of rows
31+
const rowCount = frame.columns[columnNames[0]]?.length || 0;
32+
33+
// Check if row indices are valid
34+
const maxRowIndex = Math.max(...rowIndices);
35+
if (maxRowIndex >= rowCount) {
36+
throw new Error(
37+
`Row index ${maxRowIndex} is out of bounds (0-${rowCount - 1})`,
38+
);
39+
}
40+
41+
// Create a new frame with selected rows and columns
42+
const result = {
43+
columns: {},
44+
};
45+
46+
// Initialize columns in the result
47+
columnNames.forEach((column) => {
48+
result.columns[column] = [];
49+
});
50+
51+
// Add selected rows to the result
52+
rowIndices.forEach((rowIdx) => {
53+
columnNames.forEach((column) => {
54+
result.columns[column].push(frame.columns[column][rowIdx]);
55+
});
56+
});
57+
58+
// Convert arrays to typed arrays if the original columns were typed
59+
columnNames.forEach((column) => {
60+
const originalArray = frame.columns[column];
61+
if (originalArray instanceof Float64Array) {
62+
result.columns[column] = new Float64Array(result.columns[column]);
63+
} else if (originalArray instanceof Int32Array) {
64+
result.columns[column] = new Int32Array(result.columns[column]);
65+
}
66+
});
67+
68+
return result;
69+
};

0 commit comments

Comments
 (0)