Here's a great tutorial on using Data-Forge for data science from the book JavaScript for Data Science.
Scroll to the end for more resources.
Node.js:
npm install --save data-forge
To read and write files in Node.js:
npm install --save data-forge-fs
To plot charts in Node.js:
npm install --save data-forge-plot @data-forge-plot/render
const dataForge = require('data-forge');
To read and write files:
require('data-forge-fs');
To plot charts:
require('data-forge-plot');
require('@data-forge-plot/render');
dataForge.readFileSync("input.csv") // Read input file.
.parseCSV() // Parse CSV data to a dataframe.
.parseInts("Column2", "Column3") // Parse from strings to integers.
.parseDates("Column4") // Parse from strings to date objects.
.dropSeries("Column5") // Don't want column 5.
.where(row => predicate(row)) // Filter out rows that you don't want.
.select(row => transform(row)) // Apply transformation to each row.
.asCSV() // Write out data file in CSV (or other) format.
.writeFileSync("output.csv"); // Write output file.
dataForge.readFileSync("input.json") // Read input file.
.parseJSON() // Parse JSON data to a dataframe.
.parseDates("Column4") // Parse from strings to date objects.
.dropSeries("Column5") // Don't want column 5.
.where(row => predicate(row)) // Filter out rows that you don't want.
.select(row => transform(row)) // Apply transformation to each row.
.asJSON() // Write out data file in CSV (or other) format.
.writeFileSync("output.json"); // Write output file.
const inputData = [ /* a JavaScript array of data records */ ];
const transformedData = new dataForge.DataFrame(inputData)
.dropSeries("Column5") // Don't want column 5.
.where(row => predicate(row)) // Filter out rows that you don't want.
.select(row => transform(row)) // Apply transformation to each row.
.toArray() // Back to normal JavaScript data!.
const salesData = dataForge.readFileSync("sales.csv")
.asCSV()
.parseDates("Date")
.parseFloats("Sales");
const summmarized = salesData
.orderBy(row => row.Date) // Sort by date.
.groupBy(row => row.ClientName) // Group by client.
.select(group => ({ // Aggregate sales per client.
ClientName: group.first().ClientName,
Average: group.select(row => row.Sales).average(), // Average sales per client.
Total: group.select(row => row.Sales).sum(), // Sum sales per client.
}))
.inflate(); // Series -> dataframe.
salesData.asCSV().writeFileSync("sales-by-client.csv");
salesData.plot().renderImage("my-chart.png");