Skip to content

Commit f0e05c3

Browse files
add new outputLoader.ts for advanced csv, json and xml data parsing (#34)
1 parent 8e9c7c7 commit f0e05c3

File tree

4 files changed

+239
-26
lines changed

4 files changed

+239
-26
lines changed

src/renderer/index.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
import type {ActivationFunction, OutputItem} from 'vscode-notebook-renderer';
1+
import type {
2+
ActivationFunction,
3+
OutputItem
4+
} from 'vscode-notebook-renderer';
25
import errorOverlay from 'vscode-notebook-error-overlay';
36
import {render} from './renderer';
47

src/renderer/outputLoader.ts

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
import type {OutputItem} from 'vscode-notebook-renderer';
2+
import {csvParse} from 'd3-dsv';
3+
const xmlParser = require('fast-xml-parser');
4+
5+
/**
6+
* OutputLoaders loads data from notebook cell output items.
7+
*/
8+
export class OutputLoader {
9+
10+
/**
11+
* Creates new OutputLoader instance.
12+
* @param outputData Notebook cell output item.
13+
* @param mimeType Notebook cell output mime type.
14+
*/
15+
constructor (private outputData: OutputItem, private mimeType: string) {
16+
}
17+
18+
/**
19+
* Gets data output.
20+
*/
21+
getData(): any {
22+
// try getting JSON data first
23+
const objectData = this.getJsonData(this.outputData);
24+
if (objectData !== undefined) {
25+
return objectData;
26+
}
27+
28+
// try parsing text data
29+
let textData: string = this.outputData.text();
30+
if (textData.length > 0) {
31+
if (textData.startsWith("'") && textData.endsWith("'")) {
32+
// strip out start/end single quotes from notebook cell output
33+
textData = textData.substr(1, textData.length-2);
34+
}
35+
console.log('leaflet.map:data:text:', textData.substring(0, Math.min(300, textData.length)), '...');
36+
37+
// see if text data is in json data format
38+
const jsonData = this.getJsonData(textData);
39+
if (jsonData !== undefined) {
40+
return jsonData;
41+
}
42+
else if (textData.startsWith('<?xml version="1.0"')) {
43+
// parse XML data
44+
return this.xmlParse(textData);
45+
}
46+
else if (this.isCsv(textData)) {
47+
// parse CSV data
48+
return csvParse(textData);
49+
}
50+
else if (textData !== '{}' && !textData.startsWith('<Buffer ')) { // empty object or binary data
51+
return textData;
52+
}
53+
}
54+
55+
// TODO: try loading binary Apache Arrow data
56+
// console.log('leaflet.map:data:output', this.outputData);
57+
const dataArray: Uint8Array = this.outputData.data();
58+
console.log(dataArray);
59+
if (dataArray.length > 0 ) {
60+
console.log(`leaflet.map:dataType: ${dataArray.constructor}`);
61+
// return aq.fromArrow(dataArray);
62+
}
63+
64+
return this.outputData;
65+
}
66+
67+
/**
68+
* Gets JSON object or data array,
69+
* CSV rows data array, or undefined
70+
* for plain text and binary data types.
71+
* @param data Notebook cell output data value.
72+
*/
73+
getJsonData(data: any): any {
74+
// console.log('leaflet.map:data:json:', data);
75+
try {
76+
if (typeof data === 'string') {
77+
// try parsing JSON string
78+
const textData: string = this.patchJson(data);
79+
const objectData: any = JSON.parse(textData);
80+
if (Array.isArray(objectData)) {
81+
console.log('leaflet.map:data:format: JSON array');
82+
return objectData;
83+
}
84+
else {
85+
console.log('leaflet.map:data:format: JSON');
86+
return objectData;
87+
}
88+
}
89+
90+
// try getting json data object
91+
// console.log('leaflet.map:data:json:', data);
92+
let jsonData: any = data.json();
93+
if (jsonData.data) {
94+
// use data object from REST response
95+
jsonData = jsonData.data;
96+
}
97+
98+
if (jsonData.features) {
99+
console.log('leaflet.map:data:format: GeoJSON');
100+
return jsonData;
101+
}
102+
103+
if (Array.isArray(jsonData)) {
104+
console.log('leaflet.map:data:format: JSON array');
105+
return jsonData;
106+
}
107+
108+
if (typeof jsonData === 'string') {
109+
if (this.isCsv(jsonData)) {
110+
// parse CSV data for JSON response from REST Book
111+
// see: https://github.com/tanhakabir/rest-book/issues/114
112+
return csvParse(jsonData);
113+
}
114+
else if (jsonData.startsWith('<?xml version="1.0"')) {
115+
// try to parse XML data as the last resort
116+
return this.xmlParse(jsonData);
117+
}
118+
}
119+
}
120+
catch (error: any) {
121+
console.log('leaflet.map:data: JSON.parse error:\n', error.message);
122+
}
123+
return undefined;
124+
}
125+
126+
/**
127+
* Patches garbled JSON string.
128+
* @param data JSON data string.
129+
* @returns Patched up JSON string.
130+
*/
131+
patchJson(data: string): string {
132+
// patch garbled json string
133+
const escapedQuoteRegEx = /\\\\"/g;
134+
const objectStartRegEx = /"{/g;
135+
const objectEndRegEx = /}"/g;
136+
const xRegEx = /\\xa0/g;
137+
const newLineRegEx = /\\n/g;
138+
let textData: string = data.replace(escapedQuoteRegEx, '"');
139+
textData = textData.replace(objectStartRegEx, '{');
140+
textData = textData.replace(objectEndRegEx, '}');
141+
textData = textData.replace(xRegEx, ' ');
142+
textData = textData.replace(newLineRegEx, '');
143+
// console.log('leaflet.map:data:text:', textData.substring(0, Math.min(300, textData.length)), '...');
144+
return textData;
145+
}
146+
147+
/**
148+
* Checks if text content is in CSV format.
149+
* @param text Text content to check.
150+
*/
151+
isCsv(text: string): boolean {
152+
if (text === undefined || text.length === 0) {
153+
return false;
154+
}
155+
156+
// get text lines
157+
const maxLines: number = 10;
158+
const lines: string[] = text.trimEnd().split('\n', maxLines);
159+
const minRows: number = Math.min(lines.length, maxLines);
160+
161+
if (lines.length > 0) {
162+
console.log('leaflet.map:data:lines:', lines);
163+
const columns: string[] = lines[0].split(',');
164+
const columnCount = columns.length;
165+
166+
if (columnCount > 1) {
167+
console.log('leaflet.map:data:columns:', columns);
168+
// check columns for garbled json
169+
for (let k =0; k < columnCount; k++) {
170+
let columnName: string = columns[k];
171+
if (columnName.startsWith('[') || columnName.startsWith('{')) {
172+
return false;
173+
}
174+
}
175+
176+
// do naive check for some commas in the first 9 rows
177+
for (let i = 1; i < minRows; i++) {
178+
const columnValues: string[] = lines[i].split(',');
179+
// console.log(`data.table:row[${i}]`, columnValues);
180+
if (columnValues.length < columnCount) {
181+
return false;
182+
}
183+
}
184+
console.log('leaflet.map:data:format: CSV');
185+
return true;
186+
}
187+
}
188+
return false;
189+
}
190+
191+
/**
192+
* Parses xml data.
193+
* @param xml Xml data string.
194+
*/
195+
xmlParse(xml: string): any {
196+
let jsonData = {};
197+
const xmlParserOptions = {
198+
attributeNamePrefix : '',
199+
textNodeName : 'value',
200+
ignoreAttributes : false,
201+
ignoreNameSpace : true,
202+
allowBooleanAttributes : true,
203+
parseNodeValue : true,
204+
parseAttributeValue : true,
205+
trimValues: true,
206+
// parseTrueNumberOnly: false,
207+
// arrayMode: false, //"strict"
208+
};
209+
try {
210+
jsonData = xmlParser.parse(xml, xmlParserOptions); // , true); // validate xml
211+
console.log('leaflet.map:data:format: XML');
212+
// console.log(JSON.stringify(jsonData, null, 2));
213+
}
214+
catch(error: any) {
215+
console.log('leaflet.map:data: XML parse error:\n', error.message);
216+
}
217+
return jsonData;
218+
}
219+
}

src/renderer/renderer.ts

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
import type {RendererContext, OutputItem} from 'vscode-notebook-renderer';
1+
import type {
2+
RendererContext,
3+
OutputItem
4+
} from 'vscode-notebook-renderer';
5+
6+
import {OutputLoader} from './outputLoader';
7+
28
import './styles.css';
39
import './leaflet.css';
410
import './markerCluster.css';
@@ -22,43 +28,28 @@ const leafletMap = require('./leafletMap.js');
2228
*/
2329
export function render(output: IRenderInfo) {
2430
console.log(`leaflet.map:data:mimeType: ${output.mimeType}`);
25-
26-
// try to get JSON data
27-
let jsonData: any = {};
28-
try {
29-
jsonData = output.value.json();
30-
}
31-
catch (error: any) {
32-
console.log('leaflet.map:data: JSON.parse error:\n', error.message);
33-
}
34-
35-
if (jsonData.data) {
36-
// get JSON data from REST Book output
37-
jsonData = jsonData.data;
38-
}
39-
40-
if (jsonData.features) {
31+
const outputLoader: OutputLoader = new OutputLoader(output.value, output.mimeType);
32+
let data: any = outputLoader.getData();
33+
if (data.features) { // has geometry features collection
4134
// create leaflet map and add it to notebook cell output display
4235
const mapContainer: HTMLDivElement = document.createElement('div');
4336
mapContainer.className = 'map-container';
4437
output.container.appendChild(mapContainer);
45-
const map = leafletMap.createMap(jsonData, mapContainer); //output.container);
38+
const map = leafletMap.createMap(data, mapContainer);
4639
}
4740
else {
48-
// create Geo JSON text output display nodes
41+
// create text output display nodes
4942
const pre = document.createElement('pre');
50-
pre.className = 'geo-json';
43+
pre.className = 'text-output';
5144
const code = document.createElement('code');
52-
53-
if (typeof jsonData !== 'string') {
45+
if (typeof data !== 'string') {
5446
// stringify json data
55-
code.textContent = JSON.stringify(jsonData, null, 2);
47+
code.textContent = JSON.stringify(data, null, 2);
5648
}
5749
else {
5850
// show cell output text
5951
code.textContent = output.value.text();
6052
}
61-
6253
pre.appendChild(code);
6354
output.container.appendChild(pre);
6455
}

src/renderer/styles.css

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
overflow-y: hidden;
4040
}
4141

42-
.geo-json {
42+
.text-output {
4343
max-height: 360px;
4444
overflow: auto;
4545
tab-size: 2;

0 commit comments

Comments
 (0)