Skip to content

Commit 1838874

Browse files
authored
Merge pull request #12 from AlphaQuantJS/dev
feat: implement dataframe transform methods with mutate functionality
2 parents 5322e2d + c50ee2c commit 1838874

File tree

18 files changed

+2248
-1306
lines changed

18 files changed

+2248
-1306
lines changed
Lines changed: 111 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,122 @@
1+
import { Series } from '../../../core/dataframe/Series.js';
2+
import { VectorFactory } from '../../../core/storage/VectorFactory.js';
3+
14
/**
25
* Apply a function to each column in a DataFrame
36
*
4-
* @returns {Function} - Function that takes a DataFrame and applies the function to each column
7+
* @param {DataFrame} df - DataFrame to transform
8+
* @param {Function} func - Function to apply to each value
9+
* @param {Object} options - Options for apply
10+
* @param {boolean} [options.inplace=false] - Whether to modify the DataFrame in place
11+
* @param {string|string[]} [options.columns] - Columns to apply the function to (default: all columns)
12+
* @returns {DataFrame} - New DataFrame with transformed values or the original DataFrame if inplace=true
513
*/
6-
export const apply =
7-
() =>
8-
(df, func, options = {}) => {
9-
const { inplace = false, columns = df.columns } = options;
10-
11-
// Validate columns
12-
for (const col of columns) {
13-
if (!df.columns.includes(col)) {
14-
throw new Error(`Column '${col}' not found`);
15-
}
16-
}
14+
export function apply(df, func, options = {}) {
15+
const { inplace = false, columns = df.columns } = options;
1716

18-
// Create a new object to hold the transformed columns
19-
const result = {};
17+
// Validate function
18+
if (typeof func !== 'function') {
19+
throw new Error('Function to apply must be provided');
20+
}
2021

21-
// Copy columns that are not being transformed
22-
for (const col of df.columns) {
23-
if (!columns.includes(col)) {
24-
result[col] = df.col(col).toArray();
25-
}
26-
}
22+
// Convert columns to array if it's a string
23+
const targetColumns = Array.isArray(columns) ? columns : [columns];
2724

28-
// Apply function to specified columns
29-
for (const col of columns) {
30-
const series = df.col(col);
31-
const values = series.toArray();
32-
result[col] = values.map(func);
33-
}
25+
// Validate columns
26+
for (const col of targetColumns) {
27+
if (!df.columns.includes(col)) {
28+
throw new Error(`Column '${col}' not found`);
29+
}
30+
}
31+
32+
// Apply function to specified columns
33+
if (inplace) {
34+
// Directly modify the DataFrame's internal structure for inplace
35+
for (const col of targetColumns) {
36+
const values = df.col(col).toArray();
37+
const transformedValues = values.map((value, index) => {
38+
const result = func(value, index, col);
39+
// Преобразуем null и undefined в NaN для соответствия тестам
40+
return result === null || result === undefined ? NaN : result;
41+
});
42+
43+
// Create a new Series for this column
44+
const vector = VectorFactory.from(transformedValues);
45+
const series = new Series(vector, { name: col });
46+
47+
// Update the Series in the DataFrame
48+
df._columns[col] = series;
49+
}
50+
51+
return df;
52+
} else {
53+
// Create a new object to hold the transformed columns
54+
const result = {};
55+
56+
// Copy all columns from the original DataFrame
57+
for (const col of df.columns) {
58+
result[col] = df.col(col).toArray();
59+
}
60+
61+
// Apply function to specified columns
62+
for (const col of targetColumns) {
63+
const values = result[col];
64+
result[col] = values.map((value, index) => {
65+
const result = func(value, index, col);
66+
// Преобразуем null и undefined в NaN для соответствия тестам
67+
return result === null || result === undefined ? NaN : result;
68+
});
69+
}
70+
71+
return new df.constructor(result);
72+
}
73+
}
74+
75+
/**
76+
* Apply a function to all columns in a DataFrame
77+
*
78+
* @param {DataFrame} df - DataFrame to transform
79+
* @param {Function} func - Function to apply to each value
80+
* @param {Object} options - Options for applyAll
81+
* @param {boolean} [options.inplace=false] - Whether to modify the DataFrame in place
82+
* @returns {DataFrame} - New DataFrame with transformed values or the original DataFrame if inplace=true
83+
*/
84+
export function applyAll(df, func, options = {}) {
85+
// Simply call apply with all columns
86+
return apply(df, func, { ...options, columns: df.columns });
87+
}
88+
89+
/**
90+
* Register apply methods on DataFrame prototype
91+
* @param {Class} DataFrame - DataFrame class to extend
92+
*/
93+
export function register(DataFrame) {
94+
if (!DataFrame) {
95+
throw new Error('DataFrame instance is required');
96+
}
3497

35-
// Return new DataFrame or modify in place
36-
if (inplace) {
37-
// Replace columns in original DataFrame
38-
for (const col of columns) {
39-
df._columns[col] = result[col];
40-
}
41-
return df;
98+
DataFrame.prototype.apply = function (columns, func, options = {}) {
99+
// If first argument is a function, assume it's for all columns
100+
if (typeof columns === 'function') {
101+
const result = applyAll(this, columns, options);
102+
if (options.inplace) {
103+
return this;
42104
}
105+
return result;
106+
}
43107

44-
// Create a new DataFrame with the transformed columns
45-
return new df.constructor(result);
46-
};
108+
const result = apply(this, func, { ...options, columns });
109+
if (options.inplace) {
110+
return this;
111+
}
112+
return result;
113+
};
47114

48-
export default { apply };
115+
DataFrame.prototype.applyAll = function (func, options = {}) {
116+
const result = applyAll(this, func, options);
117+
if (options.inplace) {
118+
return this;
119+
}
120+
return result;
121+
};
122+
}
Lines changed: 70 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,94 @@
1+
import { Series } from '../../../core/dataframe/Series.js';
2+
import { VectorFactory } from '../../../core/storage/VectorFactory.js';
3+
14
/**
25
* Adds or updates columns in a DataFrame.
36
*
47
* @param {DataFrame} df - DataFrame instance
58
* @param {Object} columns - Object with column names as keys and arrays or Series as values
6-
* @returns {DataFrame} - New DataFrame with added/updated columns
9+
* @param {Object} options - Options for assign
10+
* @param {boolean} [options.inplace=false] - Whether to modify the DataFrame in place
11+
* @returns {DataFrame} - New DataFrame with added/updated columns or the original DataFrame if inplace=true
712
*/
8-
export const assign = (df, columns) => {
9-
// Проверяем, что df существует и является объектом
13+
export function assign(df, columns, options = {}) {
14+
// Validate arguments
1015
if (!df || typeof df !== 'object') {
1116
throw new Error('DataFrame instance is required');
1217
}
1318

14-
// Use the built-in assign method if available
15-
if (df && typeof df.assign === 'function') {
16-
return df.assign(columns);
19+
if (!columns || typeof columns !== 'object' || Array.isArray(columns)) {
20+
throw new Error('Columns must be an object');
1721
}
1822

19-
// Create a copy of the existing columns
20-
const newData = {};
23+
const { inplace = false } = options;
2124

22-
// Copy existing columns
23-
const columnNames = Array.isArray(df.columns) ? df.columns : [];
24-
for (const col of columnNames) {
25-
if (typeof df.col === 'function') {
26-
newData[col] = df.col(col).toArray();
25+
// Process column values to handle Series, arrays, and constants
26+
const processedColumns = {};
27+
for (const [key, value] of Object.entries(columns)) {
28+
// If value is a Series, get its values
29+
if (value && typeof value.toArray === 'function') {
30+
processedColumns[key] = value.toArray();
31+
} else if (Array.isArray(value)) {
32+
processedColumns[key] = value;
33+
} else {
34+
// For constant values, create an array of that value
35+
processedColumns[key] = Array(df.rowCount).fill(value);
2736
}
2837
}
2938

30-
// Add or update columns
31-
for (const [key, value] of Object.entries(columns)) {
32-
// If value is a Series, get its values
33-
const columnData =
34-
value && typeof value.toArray === 'function' ? value.toArray() : value;
39+
if (inplace) {
40+
// For inplace modification, directly modify the DataFrame's internal structure
41+
for (const [key, values] of Object.entries(processedColumns)) {
42+
// Create a vector from the values
43+
const vector = VectorFactory.from(values);
3544

36-
newData[key] = columnData;
37-
}
45+
// Create a new Series for this column
46+
const series = new Series(vector, { name: key });
47+
48+
// Update or add the Series to the DataFrame
49+
df._columns[key] = series;
50+
51+
// If it's a new column, update the _order array
52+
if (!df._order.includes(key)) {
53+
// Since _order is frozen, we need to create a new array and replace it
54+
const newOrder = [...df._order, key];
55+
Object.defineProperty(df, '_order', { value: Object.freeze(newOrder) });
56+
}
57+
}
58+
59+
// Return the original DataFrame instance
60+
return df;
61+
} else {
62+
// Create a new DataFrame with all columns
63+
const newData = {};
64+
65+
// Copy existing columns
66+
for (const col of df.columns) {
67+
if (!(col in processedColumns)) {
68+
newData[col] = df.col(col).toArray();
69+
}
70+
}
3871

39-
// Create new DataFrame with updated columns
40-
return new df.constructor(newData);
41-
};
72+
// Add new/updated columns
73+
Object.assign(newData, processedColumns);
74+
75+
// Create a new DataFrame with the updated data
76+
return new df.constructor(newData);
77+
}
78+
}
4279

4380
/**
4481
* Registers the assign method on DataFrame prototype
4582
* @param {Class} DataFrame - DataFrame class to extend
4683
*/
47-
export const register = (DataFrame) => {
48-
DataFrame.prototype.assign = function(columns) {
49-
return assign(this, columns);
84+
export function register(DataFrame) {
85+
// Store the original reference to the DataFrame instance
86+
DataFrame.prototype.assign = function (columns, options = {}) {
87+
const result = assign(this, columns, options);
88+
if (options.inplace) {
89+
// For inplace modification, return this (the original DataFrame instance)
90+
return this;
91+
}
92+
return result;
5093
};
51-
};
52-
53-
export default { assign, register };
94+
}

0 commit comments

Comments
 (0)