feat: properly parse data types in OBSVER1

This commit is contained in:
Paul Bienkowski 2020-11-21 13:26:52 +01:00
parent 598c6620d7
commit 16ca8db8c7

View file

@ -1,16 +1,57 @@
const csvParse = require('csv-parse/lib/sync'); const csvParse = require('csv-parse/lib/sync');
function _parseFloat(token) { function _parseFloat(token) {
if (typeof token !== 'string') {
return null
}
token = token.trim()
if (token === '') {
return null
}
if (/^nan$/i.test(token)) {
return null
}
let f = parseFloat(token); let f = parseFloat(token);
if (isNaN(f)) { if (isNaN(f)) {
f = parseFloat(token.substring(0, 10)); f = parseFloat(token.substring(0, 10));
} }
if (isNaN(f)) { if (isNaN(f)) {
f = 0.0; f = 0.0;
} }
return f; return f;
} }
function _parseInt(token) {
const asFloat = parseFloat(token)
if (asFloat !== null) {
return Math.floor(asFloat)
} else{
return asFloat
}
}
function _parseString(token) {
if (typeof token !== 'string') {
return null
}
// This time we do not trim -- because we assume that the quoting mechanism
// from CSV might have kicked in and we actually want the spacing around the
// token.
if (token === '') {
return null
}
return token
}
function replaceDollarNewlinesHack(body) { function replaceDollarNewlinesHack(body) {
// see if we are using the hack with $ as newlines, replace them for the csv parser // see if we are using the hack with $ as newlines, replace them for the csv parser
if (body.endsWith('$')) { if (body.endsWith('$')) {
@ -87,13 +128,23 @@ function* parseObsver1(body) {
for (const record of csvParse(body, { for (const record of csvParse(body, {
delimiter: ';', delimiter: ';',
encoding: 'utf8', encoding: 'utf8',
// We specify different column names here, as the order of columns was
// always the same, but their naming was different. By enforicing these
// column names we don't have to translate between them. Then we just
// ignore the first line (or any line that starts with "Date;").
// Original header usually is:
// Date;Time;Latitude;Longitude;Course;Speed;Right;Left;Confirmed;insidePrivacyArea
columns: ['date', 'time', 'latitude', 'longitude', 'course', 'speed', 'd1', 'd2', 'flag', 'private'], columns: ['date', 'time', 'latitude', 'longitude', 'course', 'speed', 'd1', 'd2', 'flag', 'private'],
relax_column_count: true, relax_column_count: true,
cast(value, { column }) { cast(value, { column }) {
if (/latitude|longitude|course|speed/.test(column)) { if (['latitude', 'longitude', 'course', 'speed'].includes(column)) {
return _parseFloat(value); return _parseFloat(value);
} else if (['d1', 'd2', 'flag'].includes(column)) {
return _parseInt(value);
} else if (column === 'private') {
return Boolean(_parseInt(value));
} else { } else {
return value; return _parseString(value);
} }
}, },
})) { })) {