Compare commits
3 Commits
0b302734e9
...
53e87e2f7f
Author | SHA1 | Date | |
---|---|---|---|
|
53e87e2f7f | ||
|
f3ed173842 | ||
|
f392036982 |
@ -12,7 +12,9 @@ public class SaneTsv
|
||||
public class StringType : ColumnType { }
|
||||
public class BooleanType : ColumnType { }
|
||||
public class Float32Type : ColumnType { }
|
||||
public class Float32LEType : ColumnType { }
|
||||
public class Float64Type : ColumnType { }
|
||||
public class Float64LEType : ColumnType { }
|
||||
public class UInt32Type : ColumnType { }
|
||||
public class UInt64Type : ColumnType { }
|
||||
public class Int32Type : ColumnType { }
|
||||
@ -166,9 +168,15 @@ public class SaneTsv
|
||||
case "float32":
|
||||
type = typeof(Float32Type);
|
||||
break;
|
||||
case "float32-le":
|
||||
type = typeof(Float32LEType);
|
||||
break;
|
||||
case "float64":
|
||||
type = typeof(Float64Type);
|
||||
break;
|
||||
case "float64-le":
|
||||
type = typeof(Float64LEType);
|
||||
break;
|
||||
case "uint32":
|
||||
type = typeof(UInt32Type);
|
||||
break;
|
||||
@ -339,6 +347,13 @@ public class SaneTsv
|
||||
|
||||
parsedFields[j] = parsedFloat;
|
||||
}
|
||||
else if (parsed.ColumnTypes[j] == typeof(Float32LEType))
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
// TODO: Implement and do byte-swapping if necessary
|
||||
|
||||
//parsedFields[j] = parsedFloat;
|
||||
}
|
||||
else if (parsed.ColumnTypes[j] == typeof(Float64Type))
|
||||
{
|
||||
if (!double.TryParse(fieldString, out double parsedDouble))
|
||||
@ -348,6 +363,13 @@ public class SaneTsv
|
||||
|
||||
parsedFields[j] = parsedDouble;
|
||||
}
|
||||
else if (parsed.ColumnTypes[j] == typeof(Float64LEType))
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
// TODO: Implement and do byte-swapping if necessary
|
||||
|
||||
//parsedFields[j] = parsedFloat;
|
||||
}
|
||||
else if (parsed.ColumnTypes[j] == typeof(UInt32Type))
|
||||
{
|
||||
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
|
||||
|
@ -8,7 +8,7 @@ Simple TSV is a strict format for tabular data.
|
||||
|
||||
'\n' (0x0A) character delimit lines, and '\t' (0x09) characters delimit fields within a line.
|
||||
|
||||
'\n' and '\t' characters are allowed within fields by escaping them with a backslash character (0x5C) followed by 'n' (0x6E) and 't' (0x74) respectively. Additionaly, '\\' and '#' (0x23) must also be escaped. The '#' character is escaped for compatility with [Commented TSVs](#commented-tsv).
|
||||
'\n' and '\t' characters are allowed within fields by escaping them with a backslash character (0x5C) followed by 'n' (0x6E) and 't' (0x74) respectively. Additionally, '\\' and '#' (0x23) must also be escaped. The '#' character is escaped for compatility with [Commented TSVs](#commented-tsv).
|
||||
|
||||
All fields must be UTF-8 encoded text. All escaping can be done before decoding (and after encoding).
|
||||
|
||||
@ -29,7 +29,9 @@ Typed TSV builds on Simple TSV to allow for typing of columns. All column names
|
||||
- 'string'
|
||||
- 'boolean'
|
||||
- 'float32'
|
||||
- 'float32-le'
|
||||
- 'float64'
|
||||
- 'float64-le'
|
||||
- 'uint32'
|
||||
- 'uint64'
|
||||
- 'int32'
|
||||
@ -40,7 +42,7 @@ Any other values are an error, however, the portion of the name prior to the las
|
||||
|
||||
All fields in the rest of the file must be of the type corresponding to their column.
|
||||
|
||||
Aside from the 'binary' column type, all fields must be UTF-8 encoded text. Each type has the following restrictions:
|
||||
Aside from the 'binary', 'float32-le', and 'float64-le' column types, all fields must be UTF-8 encoded text. Each type has the following restrictions:
|
||||
|
||||
- 'boolean' fields must contain only and exactly the text "TRUE" or "FALSE".
|
||||
- 'float32' and 'float64' correspond to single and double precision IEEE 754 floating-point numbers respectively. They should be formatted like this regex: `-?[0-9]\.([0-9]|[0-9]+[1-9])E-?[1-9][0-9]*`
|
||||
@ -50,6 +52,9 @@ Aside from the 'binary' column type, all fields must be UTF-8 encoded text. Each
|
||||
- 'qNaN'
|
||||
- '+inf'
|
||||
- '-inf'
|
||||
- 'float32-le' and 'float64-le' are also IEEE 754 floating-point, but are stored as binary. They must always be stored in little-endian order.
|
||||
|
||||
The reason for having a separate binary format for them is that round-tripping floating-point text values between different parsers is not likely to work for all cases. The text-based format should be fine for general use, but when exact value transfer is needed, the binary formats are available.
|
||||
- 'uint32' and 'uint64' are unsigned 32 and 64 bit integers respectively. They should be formatted like this regex: `[1-9][0-9]*`
|
||||
- 'int32' and 'int64' are signed 32 and 64 bit integers respectively. They should be formatted like this regex: `-?[1-9][0-9]*` (except that '-0' is not allowed)
|
||||
|
||||
@ -92,5 +97,11 @@ Extended formats may still use the .ctsv extension, though they could use a dedi
|
||||
|
||||
- Physical units
|
||||
- Multiformats
|
||||
- Instead of multihashes, maybe have a column type for each hash type. That way we can avoid wasting data on the type within each field.
|
||||
- ISO 8601
|
||||
- https://github.com/multiformats/unsigned-varint
|
||||
- Color codes (e.g. #E359FF)
|
||||
- Both binary and string-based
|
||||
- JSON
|
||||
- XML
|
||||
- URL
|
||||
|
Loading…
Reference in New Issue
Block a user