Change column type implementation

Use types instead of an enum so it's more easily extended.
This commit is contained in:
Nathan McRae 2024-02-15 20:22:43 -08:00
parent ddbda890cf
commit 2dd6b1e25b

View File

@ -7,18 +7,17 @@ namespace NathanMcRae;
/// </summary> /// </summary>
public class SaneTsv public class SaneTsv
{ {
public enum ColumnType // Like an enum, but more extensible
{ public class ColumnType { }
STRING, public class StringType : ColumnType { }
BOOLEAN, public class BooleanType : ColumnType { }
FLOAT32, public class Float32Type : ColumnType { }
FLOAT64, public class Float64Type : ColumnType { }
UINT32, public class UInt32Type : ColumnType { }
UINT64, public class UInt64Type : ColumnType { }
INT32, public class Int32Type : ColumnType { }
INT64, public class Int64Type : ColumnType { }
BINARY, public class BinaryType : ColumnType { }
}
protected enum FormatType protected enum FormatType
{ {
@ -29,7 +28,7 @@ public class SaneTsv
// TODO: We need to be able to update all these in tandem somehow // TODO: We need to be able to update all these in tandem somehow
public string[] ColumnNames { get; protected set; } public string[] ColumnNames { get; protected set; }
public ColumnType[] ColumnTypes { get; protected set; } public Type[] ColumnTypes { get; protected set; }
public List<SaneTsvRecord> Records { get; protected set; } public List<SaneTsvRecord> Records { get; protected set; }
public string FileComment { get; protected set; } = null; public string FileComment { get; protected set; } = null;
@ -53,7 +52,7 @@ public class SaneTsv
{ {
var parsed = new SaneTsv(); var parsed = new SaneTsv();
parsed.ColumnNames = new string[] { }; parsed.ColumnNames = new string[] { };
parsed.ColumnTypes = new ColumnType[] { }; parsed.ColumnTypes = new Type[] { };
parsed.Records = new List<SaneTsvRecord>(); parsed.Records = new List<SaneTsvRecord>();
var fieldBytes = new List<byte>(); var fieldBytes = new List<byte>();
@ -93,7 +92,7 @@ public class SaneTsv
} }
else else
{ {
throw new Exception($"Expected 'n', 't', or '\\' after '\\' at {i}"); throw new Exception($"Expected 'n', 't', '#', or '\\' after '\\' at line {line} column {i - currentLineStart}");
} }
} }
else if (inputBuffer[i] == '\t') else if (inputBuffer[i] == '\t')
@ -114,7 +113,7 @@ public class SaneTsv
numFields = fields.Count; numFields = fields.Count;
parsed.ColumnNames = new string[numFields]; parsed.ColumnNames = new string[numFields];
parsed.ColumnTypes = new ColumnType[numFields]; parsed.ColumnTypes = new Type[numFields];
int numTypesBlank = 0; int numTypesBlank = 0;
@ -150,40 +149,40 @@ public class SaneTsv
columnName = columnString; columnName = columnString;
} }
ColumnType type; Type type;
switch (columnTypeString) switch (columnTypeString)
{ {
case "": case "":
numTypesBlank++; numTypesBlank++;
type = ColumnType.STRING; type = typeof(StringType);
break; break;
case "string": case "string":
type = ColumnType.STRING; type = typeof(StringType);
break; break;
case "boolean": case "boolean":
type = ColumnType.BOOLEAN; type = typeof(BooleanType);
break; break;
case "float32": case "float32":
type = ColumnType.FLOAT32; type = typeof(Float32Type);
break; break;
case "float64": case "float64":
type = ColumnType.FLOAT64; type = typeof(Float64Type);
break; break;
case "uint32": case "uint32":
type = ColumnType.UINT32; type = typeof(UInt32Type);
break; break;
case "uint64": case "uint64":
type = ColumnType.UINT64; type = typeof(UInt64Type);
break; break;
case "int32": case "int32":
type = ColumnType.INT32; type = typeof(Int32Type);
break; break;
case "int64": case "int64":
type = ColumnType.INT64; type = typeof(Int64Type);
break; break;
case "binary": case "binary":
type = ColumnType.BINARY; type = typeof(BinaryType);
break; break;
default: default:
throw new Exception($"Invalid type '{columnTypeString}' for column {j}"); throw new Exception($"Invalid type '{columnTypeString}' for column {j}");
@ -285,10 +284,9 @@ public class SaneTsv
for (int j = 0; j < fields.Count; j++) for (int j = 0; j < fields.Count; j++)
{ {
// All other types require the content to be UTF-8. Binary fields can ignore that. // All other types require the content to be UTF-8. Binary fields can ignore that.
if (parsed.ColumnTypes[j] == ColumnType.BINARY) if (parsed.ColumnTypes[j] == typeof(BinaryType))
{ {
parsedFields[j] = fields[j]; parsedFields[j] = fields[j];
parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]);
continue; continue;
} }
@ -304,88 +302,85 @@ public class SaneTsv
// TODO: Add checking for numeric types format // TODO: Add checking for numeric types format
switch (parsed.ColumnTypes[j]) if (parsed.ColumnTypes[j] == typeof(StringType))
{ {
case ColumnType.STRING: parsedFields[j] = fieldString;
parsedFields[j] = fieldString; }
parsed.Columns[parsed.ColumnNames[j]].Add(fieldString); else if (parsed.ColumnTypes[j] == typeof(BooleanType))
break; {
case ColumnType.BOOLEAN: bool parsedBool;
bool parsedBool; if (fieldString == "TRUE")
if (fieldString == "TRUE") {
{ parsedBool = true;
parsedBool = true; }
} else if (fieldString == "FALSE")
else if (fieldString == "FALSE") {
{ parsedBool = false;
parsedBool = false; }
} else
else {
{ throw new Exception($"Field {j} on line {line} is not valid boolean. Must be 'TRUE' or 'FALSE' exactly");
throw new Exception($"Field {j} on line {line} is not valid boolean. Must be 'TRUE' or 'FALSE' exactly"); }
}
parsedFields[j] = parsedBool; parsedFields[j] = parsedBool;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedBool); }
break; else if (parsed.ColumnTypes[j] == typeof(Float32Type))
case ColumnType.FLOAT32: {
if (!float.TryParse(fieldString, out float parsedFloat)) if (!float.TryParse(fieldString, out float parsedFloat))
{ {
throw new Exception($"Field {j} on line {line} is not valid single-precision float"); throw new Exception($"Field {j} on line {line} is not valid single-precision float");
} }
parsedFields[j] = parsedFloat; parsedFields[j] = parsedFloat;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedFloat); }
break; else if (parsed.ColumnTypes[j] == typeof(Float64Type))
case ColumnType.FLOAT64: {
if (!double.TryParse(fieldString, out double parsedDouble)) if (!double.TryParse(fieldString, out double parsedDouble))
{ {
throw new Exception($"Field {j} on line {line} is not valid double-precision float"); throw new Exception($"Field {j} on line {line} is not valid double-precision float");
} }
parsedFields[j] = parsedDouble; parsedFields[j] = parsedDouble;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedDouble); }
break; else if (parsed.ColumnTypes[j] == typeof(UInt32Type))
case ColumnType.UINT32: {
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32)) if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
{ {
throw new Exception($"Field {j} on line {line} is not valid UInt32"); throw new Exception($"Field {j} on line {line} is not valid UInt32");
} }
parsedFields[j] = parsedUInt32; parsedFields[j] = parsedUInt32;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedUInt32); }
break; else if (parsed.ColumnTypes[j] == typeof(UInt64Type))
case ColumnType.UINT64: {
if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64)) if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64))
{ {
throw new Exception($"Field {j} on line {line} is not valid UInt64"); throw new Exception($"Field {j} on line {line} is not valid UInt64");
} }
parsedFields[j] = parsedUInt64; parsedFields[j] = parsedUInt64;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedUInt64); }
break; else if (parsed.ColumnTypes[j] == typeof(Int32Type))
case ColumnType.INT32: {
if (!Int32.TryParse(fieldString, out Int32 parsedInt32)) if (!Int32.TryParse(fieldString, out Int32 parsedInt32))
{ {
throw new Exception($"Field {j} on line {line} is not valid Int32"); throw new Exception($"Field {j} on line {line} is not valid Int32");
} }
parsedFields[j] = parsedInt32; parsedFields[j] = parsedInt32;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedInt32); }
break; else if (parsed.ColumnTypes[j] == typeof(Int64Type))
case ColumnType.INT64: {
if (!Int64.TryParse(fieldString, out Int64 parsedInt64)) if (!Int64.TryParse(fieldString, out Int64 parsedInt64))
{ {
throw new Exception($"Field {j} on line {line} is not valid Int64"); throw new Exception($"Field {j} on line {line} is not valid Int64");
} }
parsedFields[j] = parsedInt64; parsedFields[j] = parsedInt64;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedInt64); }
break; else
case ColumnType.BINARY: {
throw new Exception($"Unexpected type {parsed.ColumnTypes[j]}"); throw new Exception($"Unexpected type {parsed.ColumnTypes[j]}");
default:
throw new Exception($"Unexpected type {parsed.ColumnTypes[j]}");
} }
} }