diff --git a/SaneTsv/SaneTsv.cs b/SaneTsv/SaneTsv.cs index 811b981..3c64f73 100644 --- a/SaneTsv/SaneTsv.cs +++ b/SaneTsv/SaneTsv.cs @@ -394,18 +394,42 @@ public class SaneTsv } else if (parsed.ColumnTypes[j] == typeof(Float32Type)) { - if (!float.TryParse(fieldString, out float parsedFloat)) + float parsedFloat; + if (!float.TryParse(fieldString, out parsedFloat)) { - throw new Exception($"Field {j} on line {line} is not valid single-precision float"); + if (fieldString == "-inf") + { + parsedFloat = float.NegativeInfinity; + } + else if (fieldString == "+inf") + { + parsedFloat = float.PositiveInfinity; + } + else + { + throw new Exception($"Field {j} on line {line} is not valid single-precision float"); + } } parsedFields[j] = parsedFloat; } else if (parsed.ColumnTypes[j] == typeof(Float64Type)) { - if (!double.TryParse(fieldString, out double parsedDouble)) + double parsedDouble; + if (!double.TryParse(fieldString, out parsedDouble)) { - throw new Exception($"Field {j} on line {line} is not valid double-precision float"); + if (fieldString == "-inf") + { + parsedDouble = float.NegativeInfinity; + } + else if (fieldString == "+inf") + { + parsedDouble = float.PositiveInfinity; + } + else + { + throw new Exception($"Field {j} on line {line} is not valid double-precision float"); + } } parsedFields[j] = parsedDouble; @@ -725,7 +749,9 @@ public class SaneTsv { try { - byte[] fieldEncoded; + byte[] fieldEncoded = null; + // Some fields definitely don't need escaping, so we add them directly to bytes + bool skipEscaping = false; if (headerTypes[j] == typeof(StringType)) { @@ -734,15 +760,31 @@ public class SaneTsv else if (headerTypes[j] == typeof(BooleanType)) { bytes.AddRange((bool)data[i][j] ? TrueEncoded : FalseEncoded); - // In this case we know these values don't need escaping - continue; + skipEscaping = true; } else if (headerTypes[j] == typeof(Float32Type)) { - // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r - bytes.AddRange(Encoding.UTF8.GetBytes(((float)data[i][j]).ToString("G9"))); - // In this case we know these values don't need escaping - continue; + if (data[i][j] is float f) + { + if (float.IsNegativeInfinity(f)) + { + bytes.AddRange(Encoding.UTF8.GetBytes("-inf")); + } + else if (float.IsPositiveInfinity(f)) + { + bytes.AddRange(Encoding.UTF8.GetBytes("+inf")); + } + else + { + // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r + bytes.AddRange(Encoding.UTF8.GetBytes(((float)data[i][j]).ToString("G9"))); + } + } + else + { + throw new InvalidCastException(); + } + skipEscaping = true; } else if (headerTypes[j] == typeof(Float32LEType)) { @@ -762,10 +804,27 @@ public class SaneTsv } else if (headerTypes[j] == typeof(Float64Type)) { - // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r - bytes.AddRange(Encoding.UTF8.GetBytes(((double)data[i][j]).ToString("G17"))); - // In this case we know these values don't need escaping - continue; + if (data[i][j] is double d) + { + if (double.IsNegativeInfinity(d)) + { + bytes.AddRange(Encoding.UTF8.GetBytes("-inf")); + } + else if (double.IsPositiveInfinity(d)) + { + bytes.AddRange(Encoding.UTF8.GetBytes("+inf")); + } + else + { + // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r + bytes.AddRange(Encoding.UTF8.GetBytes((d).ToString("G17"))); + } + } + else + { + throw new InvalidCastException(); + } + skipEscaping = true; } else if (headerTypes[j] == typeof(Float64LEType)) { @@ -785,27 +844,23 @@ public class SaneTsv } else if (headerTypes[j] == typeof(UInt32Type)) { - bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)data[i][j]).ToString())); - // In this case we know these values don't need escaping - continue; + bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)(int)data[i][j]).ToString())); + skipEscaping = true; } else if (headerTypes[j] == typeof(UInt64Type)) { - bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)data[i][j]).ToString())); - // In this case we know these values don't need escaping - continue; + bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)(int)data[i][j]).ToString())); + skipEscaping = true; } else if (headerTypes[j] == typeof(Int32Type)) { - bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)data[i][j]).ToString())); - // In this case we know these values don't need escaping - continue; + bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)(int)data[i][j]).ToString())); + skipEscaping = true; } else if (headerTypes[j] == typeof(Int64Type)) { - bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)data[i][j]).ToString())); - // In this case we know these values don't need escaping - continue; + bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)(int)data[i][j]).ToString())); + skipEscaping = true; } else if (headerTypes[j] == typeof(BinaryType)) { @@ -816,42 +871,45 @@ public class SaneTsv throw new Exception($"Unexpected column type {headerTypes[j]} for column {j}"); } - for (int k = 0; k < fieldEncoded.Length; k++) + if (!skipEscaping) { - if (fieldEncoded[k] == '\n') + for (int k = 0; k < fieldEncoded.Length; k++) { - bytes.Add((byte)'\\'); - bytes.Add((byte)'n'); - } - else if (fieldEncoded[k] == '\t') - { - bytes.Add((byte)'\\'); - bytes.Add((byte)'t'); - } - else if (fieldEncoded[k] == '\\') - { - bytes.Add((byte)'\\'); - bytes.Add((byte)'\\'); - } - else if (fieldEncoded[k] == '#') - { - bytes.Add((byte)'\\'); - bytes.Add((byte)'#'); - } - else - { - bytes.Add(fieldEncoded[k]); + if (fieldEncoded[k] == '\n') + { + bytes.Add((byte)'\\'); + bytes.Add((byte)'n'); + } + else if (fieldEncoded[k] == '\t') + { + bytes.Add((byte)'\\'); + bytes.Add((byte)'t'); + } + else if (fieldEncoded[k] == '\\') + { + bytes.Add((byte)'\\'); + bytes.Add((byte)'\\'); + } + else if (fieldEncoded[k] == '#') + { + bytes.Add((byte)'\\'); + bytes.Add((byte)'#'); + } + else + { + bytes.Add(fieldEncoded[k]); + } } } - if (j == headerNames.Count - 1) - { - bytes.Add((byte)'\n'); - } - else + if (j < data[i].Count - 1) { bytes.Add((byte)'\t'); } + else if (i < data.Count - 1) + { + bytes.Add((byte)'\n'); + } } catch (InvalidCastException e) {