-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Description
Bug Report: transformHeader Called Multiple Times in Papa Parse v5.5.1
-
Title
transformHeader Function Invoked for Data Rows in Papa Parse v5.5.1 -
Description
In Papa Parse version 5.5.1, the transformHeader function is being called for each data row's fields instead of exclusively for the header row. This results in unexpected transformations of data fields, leading to incorrect parsed output. -
Expected Behavior
transformHeader should be invoked only once for each header field in the first row of the CSV.
Data rows should be processed without invoking transformHeader. -
Actual Behavior
transformHeader is invoked for each field in data rows, causing data values to be transformed incorrectly.
This leads to incorrect and malformed rows in the transformation process. -
Steps to Reproduce
Setup the Environment:
Install Papa Parse version 5.5.1.
npm install [email protected]
Create a file named index.js and paste the following code:
const Papa = require('papaparse');
const csvContent = `Name,Age,City
John Doe,30,New York
Jane Smith,25,Los Angeles
,abc`;
function mapHeader(header) {
const transformed = header.trim().toLowerCase();
console.log(`Transforming header: "${header}" -> "${transformed}"`);
return transformed;
}
function transformData(data) {
console.log(`Transforming data: "${data}"`);
return data;
}
let currentChunk = [];
const chunkSize = 2;
function processChunk(chunk) {
console.log(chunk);
}
Papa.parse(csvContent, {
header: true,
dynamicTyping: true,
skipEmptyLines: true,
transformHeader: mapHeader,
transform: transformData,
step: function (results, parser) {
console.log('Parsed Row:', results.data);
currentChunk.push(results.data);
if (currentChunk.length === chunkSize) {
processChunk(currentChunk);
currentChunk = [];
}
},
complete: function () {
console.log('Parsing Complete.');
if (currentChunk.length > 0) {
processChunk(currentChunk);
} else {
console.log('No remaining data to process.');
}
},
error: function (error) {
console.error('Parsing Error:', error.message);
}
});
Run the script
node index.js
Observe the Output:
With Papa Parse v5.5.1:
Transforming header: "Name" -> "name"
Transforming header: "Age" -> "age"
Transforming header: "City" -> "city"
Transforming header: "name" -> "name"
Transforming header: "age" -> "age"
Transforming header: "city" -> "city"
Transforming header: "John Doe" -> "john doe"
Transforming header: "30" -> "30"
Transforming header: "New York" -> "new york"
Transforming data: "john doe"
Transforming data: "30"
Transforming data: "new york"
Parsed Row: { name: 'john doe', age: 30, city: 'new york' }
Transforming header: "Jane Smith" -> "jane smith"
Transforming header: "25" -> "25"
Transforming header: "Los Angeles" -> "los angeles"
Transforming data: "jane smith"
Transforming data: "25"
Transforming data: "los angeles"
Parsed Row: { name: 'jane smith', age: 25, city: 'los angeles' }
[ { name: 'john doe', age: 30, city: 'new york' },
{ name: 'jane smith', age: 25, city: 'los angeles' } ]
Transforming header: "" -> ""
Transforming header: "abc" -> "abc"
Transforming data: ""
Transforming data: "abc"
Parsed Row: { name: null, age: 'abc' }
Parsing Complete.
[ { name: null, age: 'abc' } ]
With Papa Parse v5.4.1:
Transforming header: "Name" -> "name"
Transforming header: "Age" -> "age"
Transforming header: "City" -> "city"
Transforming header: "Name" -> "name"
Transforming header: "Age" -> "age"
Transforming header: "City" -> "city"
Transforming data: "John Doe"
Transforming data: "30"
Transforming data: "New York"
Parsed Row: { name: 'John Doe', age: 30, city: 'New York' }
Transforming data: "Jane Smith"
Transforming data: "25"
Transforming data: "Los Angeles"
Parsed Row: { name: 'Jane Smith', age: 25, city: 'Los Angeles' }
[ { name: 'John Doe', age: 30, city: 'New York' }, { name: 'Jane Smith', age: 25, city: 'Los Angeles' } ]
Transforming data: ""
Transforming data: "abc"
Parsed Row: { name: null, age: 'abc' }
Parsing Complete.
[ { name: null, age: 'abc' } ]
The transformHeader function in Papa Parse version 5.5.1 is incorrectly being invoked for data rows, leading to unexpected transformations. This regression affects data integrity during CSV parsing.