Fix Typed TSV serialization
And add inf serializing/parsing to floats
This commit is contained in:
		| @@ -394,19 +394,43 @@ public class SaneTsv | |||||||
|       } |       } | ||||||
|       else if (parsed.ColumnTypes[j] == typeof(Float32Type)) |       else if (parsed.ColumnTypes[j] == typeof(Float32Type)) | ||||||
|       { |       { | ||||||
|         if (!float.TryParse(fieldString, out float parsedFloat)) |         float parsedFloat; | ||||||
|  |         if (!float.TryParse(fieldString, out parsedFloat)) | ||||||
|  |         { | ||||||
|  |           if (fieldString == "-inf") | ||||||
|  |           { | ||||||
|  |             parsedFloat = float.NegativeInfinity; | ||||||
|  |           } | ||||||
|  |           else if (fieldString == "+inf") | ||||||
|  |           { | ||||||
|  |             parsedFloat = float.PositiveInfinity; | ||||||
|  |           } | ||||||
|  |           else | ||||||
|           { |           { | ||||||
|             throw new Exception($"Field {j} on line {line} is not valid single-precision float"); |             throw new Exception($"Field {j} on line {line} is not valid single-precision float"); | ||||||
|           } |           } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         parsedFields[j] = parsedFloat; |         parsedFields[j] = parsedFloat; | ||||||
|       } |       } | ||||||
|       else if (parsed.ColumnTypes[j] == typeof(Float64Type)) |       else if (parsed.ColumnTypes[j] == typeof(Float64Type)) | ||||||
|       { |       { | ||||||
|         if (!double.TryParse(fieldString, out double parsedDouble)) |         double parsedDouble; | ||||||
|  |         if (!double.TryParse(fieldString, out parsedDouble)) | ||||||
|  |         { | ||||||
|  |           if (fieldString == "-inf") | ||||||
|  |           { | ||||||
|  |             parsedDouble = float.NegativeInfinity; | ||||||
|  |           } | ||||||
|  |           else if (fieldString == "+inf") | ||||||
|  |           { | ||||||
|  |             parsedDouble = float.PositiveInfinity; | ||||||
|  |           } | ||||||
|  |           else | ||||||
|           { |           { | ||||||
|             throw new Exception($"Field {j} on line {line} is not valid double-precision float"); |             throw new Exception($"Field {j} on line {line} is not valid double-precision float"); | ||||||
|           } |           } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         parsedFields[j] = parsedDouble; |         parsedFields[j] = parsedDouble; | ||||||
|       } |       } | ||||||
| @@ -725,7 +749,9 @@ public class SaneTsv | |||||||
|       { |       { | ||||||
|         try |         try | ||||||
|         { |         { | ||||||
|           byte[] fieldEncoded; |           byte[] fieldEncoded = null; | ||||||
|  |           // Some fields definitely don't need escaping, so we add them directly to bytes | ||||||
|  |           bool skipEscaping = false; | ||||||
|  |  | ||||||
|           if (headerTypes[j] == typeof(StringType)) |           if (headerTypes[j] == typeof(StringType)) | ||||||
|           { |           { | ||||||
| @@ -734,15 +760,31 @@ public class SaneTsv | |||||||
|           else if (headerTypes[j] == typeof(BooleanType)) |           else if (headerTypes[j] == typeof(BooleanType)) | ||||||
|           { |           { | ||||||
|             bytes.AddRange((bool)data[i][j] ? TrueEncoded : FalseEncoded); |             bytes.AddRange((bool)data[i][j] ? TrueEncoded : FalseEncoded); | ||||||
|             // In this case we know these values don't need escaping |             skipEscaping = true; | ||||||
|             continue; |  | ||||||
|           } |           } | ||||||
|           else if (headerTypes[j] == typeof(Float32Type)) |           else if (headerTypes[j] == typeof(Float32Type)) | ||||||
|  |           { | ||||||
|  |             if (data[i][j] is float f) | ||||||
|  |             { | ||||||
|  |               if (float.IsNegativeInfinity(f)) | ||||||
|  |               { | ||||||
|  |                 bytes.AddRange(Encoding.UTF8.GetBytes("-inf")); | ||||||
|  |               } | ||||||
|  |               else if (float.IsPositiveInfinity(f)) | ||||||
|  |               { | ||||||
|  |                 bytes.AddRange(Encoding.UTF8.GetBytes("+inf")); | ||||||
|  |               } | ||||||
|  |               else | ||||||
|               { |               { | ||||||
|                 // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r |                 // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r | ||||||
|                 bytes.AddRange(Encoding.UTF8.GetBytes(((float)data[i][j]).ToString("G9"))); |                 bytes.AddRange(Encoding.UTF8.GetBytes(((float)data[i][j]).ToString("G9"))); | ||||||
|             // In this case we know these values don't need escaping |               } | ||||||
|             continue; |             } | ||||||
|  |             else | ||||||
|  |             { | ||||||
|  |               throw new InvalidCastException(); | ||||||
|  |             } | ||||||
|  |             skipEscaping = true; | ||||||
|           } |           } | ||||||
|           else if (headerTypes[j] == typeof(Float32LEType)) |           else if (headerTypes[j] == typeof(Float32LEType)) | ||||||
|           { |           { | ||||||
| @@ -761,11 +803,28 @@ public class SaneTsv | |||||||
|             } |             } | ||||||
|           } |           } | ||||||
|           else if (headerTypes[j] == typeof(Float64Type)) |           else if (headerTypes[j] == typeof(Float64Type)) | ||||||
|  |           { | ||||||
|  |             if (data[i][j] is double d) | ||||||
|  |             { | ||||||
|  |               if (double.IsNegativeInfinity(d)) | ||||||
|  |               { | ||||||
|  |                 bytes.AddRange(Encoding.UTF8.GetBytes("-inf")); | ||||||
|  |               } | ||||||
|  |               else if (double.IsPositiveInfinity(d)) | ||||||
|  |               { | ||||||
|  |                 bytes.AddRange(Encoding.UTF8.GetBytes("+inf")); | ||||||
|  |               } | ||||||
|  |               else | ||||||
|               { |               { | ||||||
|                 // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r |                 // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r | ||||||
|             bytes.AddRange(Encoding.UTF8.GetBytes(((double)data[i][j]).ToString("G17"))); |                 bytes.AddRange(Encoding.UTF8.GetBytes((d).ToString("G17"))); | ||||||
|             // In this case we know these values don't need escaping |               } | ||||||
|             continue; |             } | ||||||
|  |             else | ||||||
|  |             { | ||||||
|  |               throw new InvalidCastException(); | ||||||
|  |             } | ||||||
|  |             skipEscaping = true; | ||||||
|           } |           } | ||||||
|           else if (headerTypes[j] == typeof(Float64LEType)) |           else if (headerTypes[j] == typeof(Float64LEType)) | ||||||
|           { |           { | ||||||
| @@ -785,27 +844,23 @@ public class SaneTsv | |||||||
|           } |           } | ||||||
|           else if (headerTypes[j] == typeof(UInt32Type)) |           else if (headerTypes[j] == typeof(UInt32Type)) | ||||||
|           { |           { | ||||||
|             bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)data[i][j]).ToString())); |             bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)(int)data[i][j]).ToString())); | ||||||
|             // In this case we know these values don't need escaping |             skipEscaping = true; | ||||||
|             continue; |  | ||||||
|           } |           } | ||||||
|           else if (headerTypes[j] == typeof(UInt64Type)) |           else if (headerTypes[j] == typeof(UInt64Type)) | ||||||
|           { |           { | ||||||
|             bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)data[i][j]).ToString())); |             bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)(int)data[i][j]).ToString())); | ||||||
|             // In this case we know these values don't need escaping |             skipEscaping = true; | ||||||
|             continue; |  | ||||||
|           } |           } | ||||||
|           else if (headerTypes[j] == typeof(Int32Type)) |           else if (headerTypes[j] == typeof(Int32Type)) | ||||||
|           { |           { | ||||||
|             bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)data[i][j]).ToString())); |             bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)(int)data[i][j]).ToString())); | ||||||
|             // In this case we know these values don't need escaping |             skipEscaping = true; | ||||||
|             continue; |  | ||||||
|           } |           } | ||||||
|           else if (headerTypes[j] == typeof(Int64Type)) |           else if (headerTypes[j] == typeof(Int64Type)) | ||||||
|           { |           { | ||||||
|             bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)data[i][j]).ToString())); |             bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)(int)data[i][j]).ToString())); | ||||||
|             // In this case we know these values don't need escaping |             skipEscaping = true; | ||||||
|             continue; |  | ||||||
|           } |           } | ||||||
|           else if (headerTypes[j] == typeof(BinaryType)) |           else if (headerTypes[j] == typeof(BinaryType)) | ||||||
|           { |           { | ||||||
| @@ -816,6 +871,8 @@ public class SaneTsv | |||||||
|             throw new Exception($"Unexpected column type {headerTypes[j]} for column {j}"); |             throw new Exception($"Unexpected column type {headerTypes[j]} for column {j}"); | ||||||
|           } |           } | ||||||
|  |  | ||||||
|  |           if (!skipEscaping) | ||||||
|  |           { | ||||||
|             for (int k = 0; k < fieldEncoded.Length; k++) |             for (int k = 0; k < fieldEncoded.Length; k++) | ||||||
|             { |             { | ||||||
|               if (fieldEncoded[k] == '\n') |               if (fieldEncoded[k] == '\n') | ||||||
| @@ -843,15 +900,16 @@ public class SaneTsv | |||||||
|                 bytes.Add(fieldEncoded[k]); |                 bytes.Add(fieldEncoded[k]); | ||||||
|               } |               } | ||||||
|             } |             } | ||||||
|  |  | ||||||
|           if (j == headerNames.Count - 1) |  | ||||||
|           { |  | ||||||
|             bytes.Add((byte)'\n'); |  | ||||||
|           } |           } | ||||||
|           else |  | ||||||
|  |           if (j < data[i].Count - 1) | ||||||
|           { |           { | ||||||
|             bytes.Add((byte)'\t'); |             bytes.Add((byte)'\t'); | ||||||
|           } |           } | ||||||
|  |           else if (i < data.Count - 1) | ||||||
|  |           { | ||||||
|  |             bytes.Add((byte)'\n'); | ||||||
|  |           } | ||||||
|         } |         } | ||||||
|         catch (InvalidCastException e) |         catch (InvalidCastException e) | ||||||
|         { |         { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user