Change column type implementation

Use types instead of an enum so it's more easily extended.
This commit is contained in:
Nathan McRae 2024-02-15 20:22:43 -08:00
parent ddbda890cf
commit 2dd6b1e25b

View File

@ -7,18 +7,17 @@ namespace NathanMcRae;
/// </summary>
public class SaneTsv
{
public enum ColumnType
{
STRING,
BOOLEAN,
FLOAT32,
FLOAT64,
UINT32,
UINT64,
INT32,
INT64,
BINARY,
}
// Like an enum, but more extensible
public class ColumnType { }
public class StringType : ColumnType { }
public class BooleanType : ColumnType { }
public class Float32Type : ColumnType { }
public class Float64Type : ColumnType { }
public class UInt32Type : ColumnType { }
public class UInt64Type : ColumnType { }
public class Int32Type : ColumnType { }
public class Int64Type : ColumnType { }
public class BinaryType : ColumnType { }
protected enum FormatType
{
@ -29,7 +28,7 @@ public class SaneTsv
// TODO: We need to be able to update all these in tandem somehow
public string[] ColumnNames { get; protected set; }
public ColumnType[] ColumnTypes { get; protected set; }
public Type[] ColumnTypes { get; protected set; }
public List<SaneTsvRecord> Records { get; protected set; }
public string FileComment { get; protected set; } = null;
@ -53,7 +52,7 @@ public class SaneTsv
{
var parsed = new SaneTsv();
parsed.ColumnNames = new string[] { };
parsed.ColumnTypes = new ColumnType[] { };
parsed.ColumnTypes = new Type[] { };
parsed.Records = new List<SaneTsvRecord>();
var fieldBytes = new List<byte>();
@ -93,7 +92,7 @@ public class SaneTsv
}
else
{
throw new Exception($"Expected 'n', 't', or '\\' after '\\' at {i}");
throw new Exception($"Expected 'n', 't', '#', or '\\' after '\\' at line {line} column {i - currentLineStart}");
}
}
else if (inputBuffer[i] == '\t')
@ -114,7 +113,7 @@ public class SaneTsv
numFields = fields.Count;
parsed.ColumnNames = new string[numFields];
parsed.ColumnTypes = new ColumnType[numFields];
parsed.ColumnTypes = new Type[numFields];
int numTypesBlank = 0;
@ -150,40 +149,40 @@ public class SaneTsv
columnName = columnString;
}
ColumnType type;
Type type;
switch (columnTypeString)
{
case "":
numTypesBlank++;
type = ColumnType.STRING;
type = typeof(StringType);
break;
case "string":
type = ColumnType.STRING;
type = typeof(StringType);
break;
case "boolean":
type = ColumnType.BOOLEAN;
type = typeof(BooleanType);
break;
case "float32":
type = ColumnType.FLOAT32;
type = typeof(Float32Type);
break;
case "float64":
type = ColumnType.FLOAT64;
type = typeof(Float64Type);
break;
case "uint32":
type = ColumnType.UINT32;
type = typeof(UInt32Type);
break;
case "uint64":
type = ColumnType.UINT64;
type = typeof(UInt64Type);
break;
case "int32":
type = ColumnType.INT32;
type = typeof(Int32Type);
break;
case "int64":
type = ColumnType.INT64;
type = typeof(Int64Type);
break;
case "binary":
type = ColumnType.BINARY;
type = typeof(BinaryType);
break;
default:
throw new Exception($"Invalid type '{columnTypeString}' for column {j}");
@ -285,10 +284,9 @@ public class SaneTsv
for (int j = 0; j < fields.Count; j++)
{
// All other types require the content to be UTF-8. Binary fields can ignore that.
if (parsed.ColumnTypes[j] == ColumnType.BINARY)
if (parsed.ColumnTypes[j] == typeof(BinaryType))
{
parsedFields[j] = fields[j];
parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]);
continue;
}
@ -304,13 +302,12 @@ public class SaneTsv
// TODO: Add checking for numeric types format
switch (parsed.ColumnTypes[j])
if (parsed.ColumnTypes[j] == typeof(StringType))
{
case ColumnType.STRING:
parsedFields[j] = fieldString;
parsed.Columns[parsed.ColumnNames[j]].Add(fieldString);
break;
case ColumnType.BOOLEAN:
}
else if (parsed.ColumnTypes[j] == typeof(BooleanType))
{
bool parsedBool;
if (fieldString == "TRUE")
{
@ -326,65 +323,63 @@ public class SaneTsv
}
parsedFields[j] = parsedBool;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedBool);
break;
case ColumnType.FLOAT32:
}
else if (parsed.ColumnTypes[j] == typeof(Float32Type))
{
if (!float.TryParse(fieldString, out float parsedFloat))
{
throw new Exception($"Field {j} on line {line} is not valid single-precision float");
}
parsedFields[j] = parsedFloat;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedFloat);
break;
case ColumnType.FLOAT64:
}
else if (parsed.ColumnTypes[j] == typeof(Float64Type))
{
if (!double.TryParse(fieldString, out double parsedDouble))
{
throw new Exception($"Field {j} on line {line} is not valid double-precision float");
}
parsedFields[j] = parsedDouble;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedDouble);
break;
case ColumnType.UINT32:
}
else if (parsed.ColumnTypes[j] == typeof(UInt32Type))
{
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
{
throw new Exception($"Field {j} on line {line} is not valid UInt32");
}
parsedFields[j] = parsedUInt32;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedUInt32);
break;
case ColumnType.UINT64:
}
else if (parsed.ColumnTypes[j] == typeof(UInt64Type))
{
if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64))
{
throw new Exception($"Field {j} on line {line} is not valid UInt64");
}
parsedFields[j] = parsedUInt64;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedUInt64);
break;
case ColumnType.INT32:
}
else if (parsed.ColumnTypes[j] == typeof(Int32Type))
{
if (!Int32.TryParse(fieldString, out Int32 parsedInt32))
{
throw new Exception($"Field {j} on line {line} is not valid Int32");
}
parsedFields[j] = parsedInt32;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedInt32);
break;
case ColumnType.INT64:
}
else if (parsed.ColumnTypes[j] == typeof(Int64Type))
{
if (!Int64.TryParse(fieldString, out Int64 parsedInt64))
{
throw new Exception($"Field {j} on line {line} is not valid Int64");
}
parsedFields[j] = parsedInt64;
parsed.Columns[parsed.ColumnNames[j]].Add(parsedInt64);
break;
case ColumnType.BINARY:
throw new Exception($"Unexpected type {parsed.ColumnTypes[j]}");
default:
}
else
{
throw new Exception($"Unexpected type {parsed.ColumnTypes[j]}");
}
}