Add binary floating-point types

This commit is contained in:
Nathan McRae 2024-02-16 17:20:11 -08:00
parent f392036982
commit f3ed173842
2 changed files with 28 additions and 1 deletions

View File

@ -12,7 +12,9 @@ public class SaneTsv
public class StringType : ColumnType { } public class StringType : ColumnType { }
public class BooleanType : ColumnType { } public class BooleanType : ColumnType { }
public class Float32Type : ColumnType { } public class Float32Type : ColumnType { }
public class Float32LEType : ColumnType { }
public class Float64Type : ColumnType { } public class Float64Type : ColumnType { }
public class Float64LEType : ColumnType { }
public class UInt32Type : ColumnType { } public class UInt32Type : ColumnType { }
public class UInt64Type : ColumnType { } public class UInt64Type : ColumnType { }
public class Int32Type : ColumnType { } public class Int32Type : ColumnType { }
@ -166,9 +168,15 @@ public class SaneTsv
case "float32": case "float32":
type = typeof(Float32Type); type = typeof(Float32Type);
break; break;
case "float32-le":
type = typeof(Float32LEType);
break;
case "float64": case "float64":
type = typeof(Float64Type); type = typeof(Float64Type);
break; break;
case "float64-le":
type = typeof(Float64LEType);
break;
case "uint32": case "uint32":
type = typeof(UInt32Type); type = typeof(UInt32Type);
break; break;
@ -339,6 +347,13 @@ public class SaneTsv
parsedFields[j] = parsedFloat; parsedFields[j] = parsedFloat;
} }
else if (parsed.ColumnTypes[j] == typeof(Float32LEType))
{
throw new NotImplementedException();
// TODO: Implement and do byte-swapping if necessary
//parsedFields[j] = parsedFloat;
}
else if (parsed.ColumnTypes[j] == typeof(Float64Type)) else if (parsed.ColumnTypes[j] == typeof(Float64Type))
{ {
if (!double.TryParse(fieldString, out double parsedDouble)) if (!double.TryParse(fieldString, out double parsedDouble))
@ -348,6 +363,13 @@ public class SaneTsv
parsedFields[j] = parsedDouble; parsedFields[j] = parsedDouble;
} }
else if (parsed.ColumnTypes[j] == typeof(Float64LEType))
{
throw new NotImplementedException();
// TODO: Implement and do byte-swapping if necessary
//parsedFields[j] = parsedFloat;
}
else if (parsed.ColumnTypes[j] == typeof(UInt32Type)) else if (parsed.ColumnTypes[j] == typeof(UInt32Type))
{ {
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32)) if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))

View File

@ -29,7 +29,9 @@ Typed TSV builds on Simple TSV to allow for typing of columns. All column names
- 'string' - 'string'
- 'boolean' - 'boolean'
- 'float32' - 'float32'
- 'float32-le'
- 'float64' - 'float64'
- 'float64-le'
- 'uint32' - 'uint32'
- 'uint64' - 'uint64'
- 'int32' - 'int32'
@ -40,7 +42,7 @@ Any other values are an error, however, the portion of the name prior to the las
All fields in the rest of the file must be of the type corresponding to their column. All fields in the rest of the file must be of the type corresponding to their column.
Aside from the 'binary' column type, all fields must be UTF-8 encoded text. Each type has the following restrictions: Aside from the 'binary', 'float32-le', and 'float64-le' column types, all fields must be UTF-8 encoded text. Each type has the following restrictions:
- 'boolean' fields must contain only and exactly the text "TRUE" or "FALSE". - 'boolean' fields must contain only and exactly the text "TRUE" or "FALSE".
- 'float32' and 'float64' correspond to single and double precision IEEE 754 floating-point numbers respectively. They should be formatted like this regex: `-?[0-9]\.([0-9]|[0-9]+[1-9])E-?[1-9][0-9]*` - 'float32' and 'float64' correspond to single and double precision IEEE 754 floating-point numbers respectively. They should be formatted like this regex: `-?[0-9]\.([0-9]|[0-9]+[1-9])E-?[1-9][0-9]*`
@ -50,6 +52,9 @@ Aside from the 'binary' column type, all fields must be UTF-8 encoded text. Each
- 'qNaN' - 'qNaN'
- '+inf' - '+inf'
- '-inf' - '-inf'
- 'float32-le' and 'float64-le' are also IEEE 754 floating-point, but are stored as binary. They must always be stored in little-endian order.
The reason for having a separate binary format for them is that round-tripping floating-point text values between different parsers is not likely to work for all cases. The text-based format should be fine for general use, but when exact value transfer is needed, the binary formats are available.
- 'uint32' and 'uint64' are unsigned 32 and 64 bit integers respectively. They should be formatted like this regex: `[1-9][0-9]*` - 'uint32' and 'uint64' are unsigned 32 and 64 bit integers respectively. They should be formatted like this regex: `[1-9][0-9]*`
- 'int32' and 'int64' are signed 32 and 64 bit integers respectively. They should be formatted like this regex: `-?[1-9][0-9]*` (except that '-0' is not allowed) - 'int32' and 'int64' are signed 32 and 64 bit integers respectively. They should be formatted like this regex: `-?[1-9][0-9]*` (except that '-0' is not allowed)