From ee46c93ce1ee8bfbdd49c8c415ca45700a56f7a2 Mon Sep 17 00:00:00 2001 From: Nathan McRae Date: Thu, 15 Feb 2024 20:22:43 -0800 Subject: [PATCH] Change column type implementation Use types instead of an enum so it's more easily extended. --- SaneTsv/SaneTsv.cs | 201 ++++++++++++++++++++++----------------------- 1 file changed, 98 insertions(+), 103 deletions(-) diff --git a/SaneTsv/SaneTsv.cs b/SaneTsv/SaneTsv.cs index 4fef70e..f8c8f00 100644 --- a/SaneTsv/SaneTsv.cs +++ b/SaneTsv/SaneTsv.cs @@ -7,18 +7,17 @@ namespace NathanMcRae; /// public class SaneTsv { - public enum ColumnType - { - STRING, - BOOLEAN, - FLOAT32, - FLOAT64, - UINT32, - UINT64, - INT32, - INT64, - BINARY, - } + // Like an enum, but more extensible + public class ColumnType { } + public class StringType : ColumnType { } + public class BooleanType : ColumnType { } + public class Float32Type : ColumnType { } + public class Float64Type : ColumnType { } + public class UInt32Type : ColumnType { } + public class UInt64Type : ColumnType { } + public class Int32Type : ColumnType { } + public class Int64Type : ColumnType { } + public class BinaryType : ColumnType { } protected enum FormatType { @@ -29,7 +28,7 @@ public class SaneTsv // TODO: We need to be able to update all these in tandem somehow public string[] ColumnNames { get; protected set; } - public ColumnType[] ColumnTypes { get; protected set; } + public Type[] ColumnTypes { get; protected set; } public List Records { get; protected set; } public string FileComment { get; protected set; } = null; @@ -53,7 +52,7 @@ public class SaneTsv { var parsed = new SaneTsv(); parsed.ColumnNames = new string[] { }; - parsed.ColumnTypes = new ColumnType[] { }; + parsed.ColumnTypes = new Type[] { }; parsed.Records = new List(); var fieldBytes = new List(); @@ -93,7 +92,7 @@ public class SaneTsv } else { - throw new Exception($"Expected 'n', 't', or '\\' after '\\' at {i}"); + throw new Exception($"Expected 'n', 't', '#', or '\\' after '\\' at line {line} column {i - currentLineStart}"); } } else if (inputBuffer[i] == '\t') @@ -114,7 +113,7 @@ public class SaneTsv numFields = fields.Count; parsed.ColumnNames = new string[numFields]; - parsed.ColumnTypes = new ColumnType[numFields]; + parsed.ColumnTypes = new Type[numFields]; int numTypesBlank = 0; @@ -150,40 +149,40 @@ public class SaneTsv columnName = columnString; } - ColumnType type; + Type type; switch (columnTypeString) { case "": numTypesBlank++; - type = ColumnType.STRING; + type = typeof(StringType); break; case "string": - type = ColumnType.STRING; + type = typeof(StringType); break; case "boolean": - type = ColumnType.BOOLEAN; + type = typeof(BooleanType); break; case "float32": - type = ColumnType.FLOAT32; + type = typeof(Float32Type); break; case "float64": - type = ColumnType.FLOAT64; + type = typeof(Float64Type); break; case "uint32": - type = ColumnType.UINT32; + type = typeof(UInt32Type); break; case "uint64": - type = ColumnType.UINT64; + type = typeof(UInt64Type); break; case "int32": - type = ColumnType.INT32; + type = typeof(Int32Type); break; case "int64": - type = ColumnType.INT64; + type = typeof(Int64Type); break; case "binary": - type = ColumnType.BINARY; + type = typeof(BinaryType); break; default: throw new Exception($"Invalid type '{columnTypeString}' for column {j}"); @@ -285,10 +284,9 @@ public class SaneTsv for (int j = 0; j < fields.Count; j++) { // All other types require the content to be UTF-8. Binary fields can ignore that. - if (parsed.ColumnTypes[j] == ColumnType.BINARY) + if (parsed.ColumnTypes[j] == typeof(BinaryType)) { parsedFields[j] = fields[j]; - parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]); continue; } @@ -304,88 +302,85 @@ public class SaneTsv // TODO: Add checking for numeric types format - switch (parsed.ColumnTypes[j]) + if (parsed.ColumnTypes[j] == typeof(StringType)) { - case ColumnType.STRING: - parsedFields[j] = fieldString; - parsed.Columns[parsed.ColumnNames[j]].Add(fieldString); - break; - case ColumnType.BOOLEAN: - bool parsedBool; - if (fieldString == "TRUE") - { - parsedBool = true; - } - else if (fieldString == "FALSE") - { - parsedBool = false; - } - else - { - throw new Exception($"Field {j} on line {line} is not valid boolean. Must be 'TRUE' or 'FALSE' exactly"); - } + parsedFields[j] = fieldString; + } + else if (parsed.ColumnTypes[j] == typeof(BooleanType)) + { + bool parsedBool; + if (fieldString == "TRUE") + { + parsedBool = true; + } + else if (fieldString == "FALSE") + { + parsedBool = false; + } + else + { + throw new Exception($"Field {j} on line {line} is not valid boolean. Must be 'TRUE' or 'FALSE' exactly"); + } - parsedFields[j] = parsedBool; - parsed.Columns[parsed.ColumnNames[j]].Add(parsedBool); - break; - case ColumnType.FLOAT32: - if (!float.TryParse(fieldString, out float parsedFloat)) - { - throw new Exception($"Field {j} on line {line} is not valid single-precision float"); - } + parsedFields[j] = parsedBool; + } + else if (parsed.ColumnTypes[j] == typeof(Float32Type)) + { + if (!float.TryParse(fieldString, out float parsedFloat)) + { + throw new Exception($"Field {j} on line {line} is not valid single-precision float"); + } - parsedFields[j] = parsedFloat; - parsed.Columns[parsed.ColumnNames[j]].Add(parsedFloat); - break; - case ColumnType.FLOAT64: - if (!double.TryParse(fieldString, out double parsedDouble)) - { - throw new Exception($"Field {j} on line {line} is not valid double-precision float"); - } + parsedFields[j] = parsedFloat; + } + else if (parsed.ColumnTypes[j] == typeof(Float64Type)) + { + if (!double.TryParse(fieldString, out double parsedDouble)) + { + throw new Exception($"Field {j} on line {line} is not valid double-precision float"); + } - parsedFields[j] = parsedDouble; - parsed.Columns[parsed.ColumnNames[j]].Add(parsedDouble); - break; - case ColumnType.UINT32: - if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32)) - { - throw new Exception($"Field {j} on line {line} is not valid UInt32"); - } + parsedFields[j] = parsedDouble; + } + else if (parsed.ColumnTypes[j] == typeof(UInt32Type)) + { + if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32)) + { + throw new Exception($"Field {j} on line {line} is not valid UInt32"); + } - parsedFields[j] = parsedUInt32; - parsed.Columns[parsed.ColumnNames[j]].Add(parsedUInt32); - break; - case ColumnType.UINT64: - if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64)) - { - throw new Exception($"Field {j} on line {line} is not valid UInt64"); - } + parsedFields[j] = parsedUInt32; + } + else if (parsed.ColumnTypes[j] == typeof(UInt64Type)) + { + if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64)) + { + throw new Exception($"Field {j} on line {line} is not valid UInt64"); + } - parsedFields[j] = parsedUInt64; - parsed.Columns[parsed.ColumnNames[j]].Add(parsedUInt64); - break; - case ColumnType.INT32: - if (!Int32.TryParse(fieldString, out Int32 parsedInt32)) - { - throw new Exception($"Field {j} on line {line} is not valid Int32"); - } + parsedFields[j] = parsedUInt64; + } + else if (parsed.ColumnTypes[j] == typeof(Int32Type)) + { + if (!Int32.TryParse(fieldString, out Int32 parsedInt32)) + { + throw new Exception($"Field {j} on line {line} is not valid Int32"); + } - parsedFields[j] = parsedInt32; - parsed.Columns[parsed.ColumnNames[j]].Add(parsedInt32); - break; - case ColumnType.INT64: - if (!Int64.TryParse(fieldString, out Int64 parsedInt64)) - { - throw new Exception($"Field {j} on line {line} is not valid Int64"); - } + parsedFields[j] = parsedInt32; + } + else if (parsed.ColumnTypes[j] == typeof(Int64Type)) + { + if (!Int64.TryParse(fieldString, out Int64 parsedInt64)) + { + throw new Exception($"Field {j} on line {line} is not valid Int64"); + } - parsedFields[j] = parsedInt64; - parsed.Columns[parsed.ColumnNames[j]].Add(parsedInt64); - break; - case ColumnType.BINARY: - throw new Exception($"Unexpected type {parsed.ColumnTypes[j]}"); - default: - throw new Exception($"Unexpected type {parsed.ColumnTypes[j]}"); + parsedFields[j] = parsedInt64; + } + else + { + throw new Exception($"Unexpected type {parsed.ColumnTypes[j]}"); } }