Compare commits

...

2 Commits

Author SHA1 Message Date
Nathan McRae
a80206767e Change column type management
The column types were tracked just as a Type. This changes them to be an instance
so they can track additional information (such as the specific units of a physical
units type). Because of this, the column type attribute need to be passed as strings
(see CS0181).
2024-03-10 22:28:05 -07:00
Nathan McRae
b8ae3ce65d Fix file comment serialization 2024-03-10 22:16:04 -07:00
2 changed files with 172 additions and 94 deletions

View File

@ -84,7 +84,7 @@ public class SaneTsv
} }
parsed.Records = new List<T>(); parsed.Records = new List<T>();
var columnTypes = new List<Type>(); var columnTypes = new List<ColumnType>();
var columnNames = new List<string>(); var columnNames = new List<string>();
var columnPropertyInfos = new List<PropertyInfo>(); var columnPropertyInfos = new List<PropertyInfo>();
int columnCount = 0; int columnCount = 0;
@ -190,46 +190,46 @@ public class SaneTsv
columnName = columnString; columnName = columnString;
} }
Type type; ColumnType type;
switch (columnTypeString) switch (columnTypeString)
{ {
case "": case "":
numTypesBlank++; numTypesBlank++;
type = typeof(StringType); type = new StringType();
break; break;
case "string": case "string":
type = typeof(StringType); type = new StringType();
break; break;
case "boolean": case "boolean":
type = typeof(BooleanType); type = new BooleanType();
break; break;
case "float32": case "float32":
type = typeof(Float32Type); type = new Float32Type();
break; break;
case "float32-le": case "float32-le":
type = typeof(Float32LEType); type = new Float32LEType();
break; break;
case "float64": case "float64":
type = typeof(Float64Type); type = new Float64Type();
break; break;
case "float64-le": case "float64-le":
type = typeof(Float64LEType); type = new Float64LEType();
break; break;
case "uint32": case "uint32":
type = typeof(UInt32Type); type = new UInt32Type();
break; break;
case "uint64": case "uint64":
type = typeof(UInt64Type); type = new UInt64Type();
break; break;
case "int32": case "int32":
type = typeof(Int32Type); type = new Int32Type();
break; break;
case "int64": case "int64":
type = typeof(Int64Type); type = new Int64Type();
break; break;
case "binary": case "binary":
type = typeof(BinaryType); type = new BinaryType();
break; break;
default: default:
throw new Exception($"Invalid type '{columnTypeString}' for column {j}"); throw new Exception($"Invalid type '{columnTypeString}' for column {j}");
@ -242,7 +242,7 @@ public class SaneTsv
throw new Exception($"Column {j} has name {columnName}, but expected {columnNames[j]}"); throw new Exception($"Column {j} has name {columnName}, but expected {columnNames[j]}");
} }
if (columnTypes[j] != type) if (columnTypes[j].GetType() != type.GetType())
{ {
throw new Exception($"Column {j} has type {type}, but expected {columnTypes[j]}"); throw new Exception($"Column {j} has type {type}, but expected {columnTypes[j]}");
} }
@ -348,7 +348,7 @@ public class SaneTsv
// startIndex is in we'd have to go back to the start of the record's comment, and to know // startIndex is in we'd have to go back to the start of the record's comment, and to know
// exactly where that comment started we'd have to go back to the start of the record before that // exactly where that comment started we'd have to go back to the start of the record before that
// (not including that other record's comment). // (not including that other record's comment).
protected static T[] Parse<T>(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, Type[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new() protected static T[] Parse<T>(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new()
{ {
var fieldBytes = new List<byte>(); var fieldBytes = new List<byte>();
var fields = new List<byte[]>(); var fields = new List<byte[]>();
@ -509,12 +509,12 @@ public class SaneTsv
return parsed.ToArray(); return parsed.ToArray();
} }
protected static T ParseCurrentCommentedRecord<T>(Type[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : CommentedTsvRecord, new() protected static T ParseCurrentCommentedRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : CommentedTsvRecord, new()
{ {
return (T)ParseCurrentRecord<T>(columnTypes, properties, fields, comment, line); return (T)ParseCurrentRecord<T>(columnTypes, properties, fields, comment, line);
} }
protected static T ParseCurrentRecord<T>(Type[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : TsvRecord, new() protected static T ParseCurrentRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : TsvRecord, new()
{ {
T record = new T(); T record = new T();
@ -532,7 +532,7 @@ public class SaneTsv
for (int j = 0; j < fields.Count; j++) for (int j = 0; j < fields.Count; j++)
{ {
// All other types require the content to be UTF-8. Binary fields can ignore that. // All other types require the content to be UTF-8. Binary fields can ignore that.
if (columnTypes[j] == typeof(BinaryType)) if (columnTypes[j].GetType() == typeof(BinaryType))
{ {
// TODO: Use faster method for property setting // TODO: Use faster method for property setting
// e.g. https://blog.marcgravell.com/2012/01/playing-with-your-member.html // e.g. https://blog.marcgravell.com/2012/01/playing-with-your-member.html
@ -541,7 +541,7 @@ public class SaneTsv
properties[j].SetValue(record, fields[j]); properties[j].SetValue(record, fields[j]);
continue; continue;
} }
else if (columnTypes[j] == typeof(Float32LEType)) else if (columnTypes[j].GetType() == typeof(Float32LEType))
{ {
byte[] floatBytes; byte[] floatBytes;
if (!LittleEndian) if (!LittleEndian)
@ -560,7 +560,7 @@ public class SaneTsv
continue; continue;
} }
else if (columnTypes[j] == typeof(Float64LEType)) else if (columnTypes[j].GetType() == typeof(Float64LEType))
{ {
byte[] floatBytes; byte[] floatBytes;
if (!LittleEndian) if (!LittleEndian)
@ -592,11 +592,11 @@ public class SaneTsv
// TODO: Add checking for numeric types format // TODO: Add checking for numeric types format
if (columnTypes[j] == typeof(StringType)) if (columnTypes[j].GetType() == typeof(StringType))
{ {
properties[j].SetValue(record, fieldString); properties[j].SetValue(record, fieldString);
} }
else if (columnTypes[j] == typeof(BooleanType)) else if (columnTypes[j].GetType() == typeof(BooleanType))
{ {
bool parsedBool; bool parsedBool;
if (fieldString == "TRUE") if (fieldString == "TRUE")
@ -614,7 +614,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedBool); properties[j].SetValue(record, parsedBool);
} }
else if (columnTypes[j] == typeof(Float32Type)) else if (columnTypes[j].GetType() == typeof(Float32Type))
{ {
float parsedFloat; float parsedFloat;
if (!float.TryParse(fieldString, out parsedFloat)) if (!float.TryParse(fieldString, out parsedFloat))
@ -635,7 +635,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedFloat); properties[j].SetValue(record, parsedFloat);
} }
else if (columnTypes[j] == typeof(Float64Type)) else if (columnTypes[j].GetType() == typeof(Float64Type))
{ {
double parsedDouble; double parsedDouble;
if (!double.TryParse(fieldString, out parsedDouble)) if (!double.TryParse(fieldString, out parsedDouble))
@ -656,7 +656,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedDouble); properties[j].SetValue(record, parsedDouble);
} }
else if (columnTypes[j] == typeof(UInt32Type)) else if (columnTypes[j].GetType() == typeof(UInt32Type))
{ {
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32)) if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
{ {
@ -665,7 +665,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedUInt32); properties[j].SetValue(record, parsedUInt32);
} }
else if (columnTypes[j] == typeof(UInt64Type)) else if (columnTypes[j].GetType() == typeof(UInt64Type))
{ {
if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64)) if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64))
{ {
@ -674,7 +674,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedUInt64); properties[j].SetValue(record, parsedUInt64);
} }
else if (columnTypes[j] == typeof(Int32Type)) else if (columnTypes[j].GetType() == typeof(Int32Type))
{ {
if (!Int32.TryParse(fieldString, out Int32 parsedInt32)) if (!Int32.TryParse(fieldString, out Int32 parsedInt32))
{ {
@ -683,7 +683,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedInt32); properties[j].SetValue(record, parsedInt32);
} }
else if (columnTypes[j] == typeof(Int64Type)) else if (columnTypes[j].GetType() == typeof(Int64Type))
{ {
if (!Int64.TryParse(fieldString, out Int64 parsedInt64)) if (!Int64.TryParse(fieldString, out Int64 parsedInt64))
{ {
@ -1066,99 +1066,151 @@ public class SaneTsv
return records.ToArray(); return records.ToArray();
} }
public static Type GetColumnFromType(Type type) public static ColumnType GetColumnFromString(string type)
{ {
if (type == typeof(string)) if (type == "string")
{ {
return typeof(StringType); return new StringType();
} }
else if (type == typeof(bool)) else if (type == "boolean")
{ {
return typeof(BooleanType); return new BooleanType();
} }
else if (type == typeof(float)) else if (type == "float32")
{ {
return typeof(Float32Type); return new Float32Type();
} }
else if (type == typeof(double)) else if (type == "float32-le")
{ {
return typeof(Float64Type); return new Float32LEType();
} }
else if (type == typeof(UInt32)) else if (type == "float64")
{ {
return typeof(UInt32Type); return new Float64Type();
} }
else if (type == typeof(UInt64)) else if (type == "float64-le")
{ {
return typeof(UInt64Type); return new Float64LEType();
} }
else if (type == typeof(Int32)) else if (type == "uint32")
{ {
return typeof(Int32Type); return new UInt32Type();
} }
else if (type == typeof(Int64)) else if (type == "uint64")
{ {
return typeof(Int64Type); return new UInt64Type();
} }
else if (type == typeof(byte[])) else if (type == "int32")
{ {
return typeof(BinaryType); return new Int32Type();
}
else if (type == "int64")
{
return new Int64Type();
}
else if (type == "binary")
{
return new BinaryType();
} }
else else
{ {
throw new Exception($"Invalid type: {type}"); throw new Exception($"Invalid type: {type.GetType()}");
} }
} }
public static string GetNameFromColumn(Type type) public static ColumnType GetColumnFromType(Type type)
{ {
if (type == typeof(StringType)) if (type == typeof(string))
{
return new StringType();
}
else if (type == typeof(bool))
{
return new BooleanType();
}
else if (type == typeof(float))
{
return new Float32Type();
}
else if (type == typeof(double))
{
return new Float64Type();
}
else if (type == typeof(UInt32))
{
return new UInt32Type();
}
else if (type == typeof(UInt64))
{
return new UInt64Type();
}
else if (type == typeof(Int32))
{
return new Int32Type();
}
else if (type == typeof(Int64))
{
return new Int64Type();
}
else if (type == typeof(byte[]))
{
return new BinaryType();
}
else
{
throw new Exception($"Invalid type: {type.GetType()}");
}
}
public static string GetNameFromColumn(ColumnType type)
{
if (type.GetType() == typeof(StringType))
{ {
return "string"; return "string";
} }
else if (type == typeof(BooleanType)) else if (type.GetType() == typeof(BooleanType))
{ {
return "boolean"; return "boolean";
} }
else if (type == typeof(Float32Type)) else if (type.GetType() == typeof(Float32Type))
{ {
return "float32"; return "float32";
} }
else if (type == typeof(Float32LEType)) else if (type.GetType() == typeof(Float32LEType))
{ {
return "float32-le"; return "float32-le";
} }
else if (type == typeof(Float64Type)) else if (type.GetType() == typeof(Float64Type))
{ {
return "float64"; return "float64";
} }
else if (type == typeof(Float64LEType)) else if (type.GetType() == typeof(Float64LEType))
{ {
return "float64-le"; return "float64-le";
} }
else if (type == typeof(UInt32Type)) else if (type.GetType() == typeof(UInt32Type))
{ {
return "uint32"; return "uint32";
} }
else if (type == typeof(UInt64Type)) else if (type.GetType() == typeof(UInt64Type))
{ {
return "uint64"; return "uint64";
} }
else if (type == typeof(Int32Type)) else if (type.GetType() == typeof(Int32Type))
{ {
return "int32"; return "int32";
} }
else if (type == typeof(Int64Type)) else if (type.GetType() == typeof(Int64Type))
{ {
return "int64"; return "int64";
} }
else if (type == typeof(BinaryType)) else if (type.GetType() == typeof(BinaryType))
{ {
return "binary"; return "binary";
} }
else else
{ {
throw new Exception($"Invalid type: {type}"); throw new Exception($"Invalid type: {type.GetType()}");
} }
} }
@ -1174,14 +1226,24 @@ public class SaneTsv
public static byte[] SerializeCommentedTsv<T>(IList<T> data, string fileComment) where T : CommentedTsvRecord public static byte[] SerializeCommentedTsv<T>(IList<T> data, string fileComment) where T : CommentedTsvRecord
{ {
return SerializeTsv<T>(data, FormatType.COMMENTED_TSV); return SerializeTsv<T>(data, FormatType.COMMENTED_TSV, fileComment);
} }
protected static byte[] SerializeTsv<T>(IList<T> data, FormatType tsvFormat) protected static byte[] SerializeTsv<T>(IList<T> data, FormatType tsvFormat, string fileComment = null)
{ {
var bytes = new List<byte>(); var bytes = new List<byte>();
var columnTypes = new List<Type>(); if (fileComment != null)
{
if (tsvFormat != FormatType.COMMENTED_TSV)
{
throw new Exception($"File comments are not valid for {tsvFormat}");
}
bytes.AddRange(Encoding.UTF8.GetBytes("#" + fileComment.Replace("\n", "\n#") + "\n"));
}
var columnTypes = new List<ColumnType>();
var columnNames = new List<string>(); var columnNames = new List<string>();
var columnPropertyInfos = new List<PropertyInfo>(); var columnPropertyInfos = new List<PropertyInfo>();
int columnCount = 0; int columnCount = 0;
@ -1197,8 +1259,8 @@ public class SaneTsv
string headerName = attribute.ColumnName ?? property.Name; string headerName = attribute.ColumnName ?? property.Name;
columnNames.Add(headerName); columnNames.Add(headerName);
Type headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType); ColumnType headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType);
if (tsvFormat == FormatType.SIMPLE_TSV && headerType != typeof(StringType)) if (tsvFormat == FormatType.SIMPLE_TSV && headerType.GetType() != typeof(StringType))
{ {
throw new Exception($"Serializing Simple TSV requires all columns be of type string, but column '{headerName}' has type '{headerType}'"); throw new Exception($"Serializing Simple TSV requires all columns be of type string, but column '{headerName}' has type '{headerType}'");
} }
@ -1278,7 +1340,7 @@ public class SaneTsv
return bytes.ToArray(); return bytes.ToArray();
} }
protected static void SerializeTsv<T>(IList<T> data, List<byte> bytes, PropertyInfo[] columnPropertyInfos, Type[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex) protected static void SerializeTsv<T>(IList<T> data, List<byte> bytes, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex)
{ {
// Serialize data // Serialize data
for (int i = 0; i < data.Count; i++) for (int i = 0; i < data.Count; i++)
@ -1293,16 +1355,16 @@ public class SaneTsv
// Some fields definitely don't need escaping, so we add them directly to bytes // Some fields definitely don't need escaping, so we add them directly to bytes
bool skipEscaping = false; bool skipEscaping = false;
if (columnTypes[j] == typeof(StringType)) if (columnTypes[j].GetType() == typeof(StringType))
{ {
fieldEncoded = Encoding.UTF8.GetBytes((string)datum); fieldEncoded = Encoding.UTF8.GetBytes((string)datum);
} }
else if (columnTypes[j] == typeof(BooleanType)) else if (columnTypes[j].GetType() == typeof(BooleanType))
{ {
bytes.AddRange((bool)datum ? TrueEncoded : FalseEncoded); bytes.AddRange((bool)datum ? TrueEncoded : FalseEncoded);
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(Float32Type)) else if (columnTypes[j].GetType() == typeof(Float32Type))
{ {
if (datum is float f) if (datum is float f)
{ {
@ -1326,7 +1388,7 @@ public class SaneTsv
} }
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(Float32LEType)) else if (columnTypes[j].GetType() == typeof(Float32LEType))
{ {
if (LittleEndian) if (LittleEndian)
{ {
@ -1342,7 +1404,7 @@ public class SaneTsv
} }
} }
} }
else if (columnTypes[j] == typeof(Float64Type)) else if (columnTypes[j].GetType() == typeof(Float64Type))
{ {
if (datum is double d) if (datum is double d)
{ {
@ -1366,7 +1428,7 @@ public class SaneTsv
} }
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(Float64LEType)) else if (columnTypes[j].GetType() == typeof(Float64LEType))
{ {
if (LittleEndian) if (LittleEndian)
{ {
@ -1382,27 +1444,27 @@ public class SaneTsv
} }
} }
} }
else if (columnTypes[j] == typeof(UInt32Type)) else if (columnTypes[j].GetType() == typeof(UInt32Type))
{ {
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)datum).ToString())); bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)datum).ToString()));
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(UInt64Type)) else if (columnTypes[j].GetType() == typeof(UInt64Type))
{ {
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)datum).ToString())); bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)datum).ToString()));
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(Int32Type)) else if (columnTypes[j].GetType() == typeof(Int32Type))
{ {
bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)datum).ToString())); bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)datum).ToString()));
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(Int64Type)) else if (columnTypes[j].GetType() == typeof(Int64Type))
{ {
bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)datum).ToString())); bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)datum).ToString()));
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(BinaryType)) else if (columnTypes[j].GetType() == typeof(BinaryType))
{ {
fieldEncoded = (byte[])datum; fieldEncoded = (byte[])datum;
} }
@ -1512,15 +1574,15 @@ public class SaneTsv
public class TsvColumnAttribute : Attribute public class TsvColumnAttribute : Attribute
{ {
public string ColumnName { get; } public string ColumnName { get; }
public virtual Type ColumnType { get; } public virtual ColumnType ColumnType { get; }
public TsvColumnAttribute() public TsvColumnAttribute()
{ {
ColumnType = typeof(StringType); ColumnType = new StringType();
} }
public TsvColumnAttribute(string columnName) public TsvColumnAttribute(string columnName)
{ {
ColumnType = typeof(StringType); ColumnType = new StringType();
ColumnName = columnName; ColumnName = columnName;
} }
} }
@ -1528,27 +1590,19 @@ public class SaneTsv
// TODO: Add column ordering // TODO: Add column ordering
public class TypedTsvColumnAttribute : TsvColumnAttribute public class TypedTsvColumnAttribute : TsvColumnAttribute
{ {
public override Type ColumnType { get; } public override ColumnType ColumnType { get; }
public TypedTsvColumnAttribute() { } public TypedTsvColumnAttribute() { }
public TypedTsvColumnAttribute(string columnName) : base(columnName) { } public TypedTsvColumnAttribute(string columnName) : base(columnName) { }
public TypedTsvColumnAttribute(string columnName, Type columnType) : base(columnName) public TypedTsvColumnAttribute(string columnName, string columnType) : base(columnName)
{ {
if (columnType.BaseType != typeof(ColumnType)) ColumnType = GetColumnFromString(columnType);
{
throw new Exception("Column type must inherit from SaneTsv.ColumnType");
}
ColumnType = columnType;
} }
public TypedTsvColumnAttribute(Type columnType) public TypedTsvColumnAttribute(ColumnType columnType)
{ {
if (columnType.BaseType != typeof(ColumnType))
{
throw new Exception("Column type must inherit from SaneTsv.ColumnType");
}
ColumnType = columnType; ColumnType = columnType;
} }
} }

View File

@ -759,6 +759,30 @@ internal class Program : SaneTsv
} }
} }
{
string testName = "File comment serde";
string testString1 = "#this is a file comment" +
"\n# and one more line since you're such a good customer" +
"\ncolumn1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
"\nFALSE\tnother\tno\\ther";
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
string reserialized = Encoding.UTF8.GetString(SaneTsv.SerializeCommentedTsv<BoolTestRecord2>(parsed.Records, parsed.FileComment));
if (reserialized == testString1)
{
Console.WriteLine($"Passed {testName}");
}
else
{
Console.WriteLine($"Failed {testName}");
}
}
Console.WriteLine("Done with tests"); Console.WriteLine("Done with tests");
} }
} }