Compare commits

..

2 Commits

Author SHA1 Message Date
Nathan McRae
a80206767e Change column type management
The column types were tracked just as a Type. This changes them to be an instance
so they can track additional information (such as the specific units of a physical
units type). Because of this, the column type attribute need to be passed as strings
(see CS0181).
2024-03-10 22:28:05 -07:00
Nathan McRae
b8ae3ce65d Fix file comment serialization 2024-03-10 22:16:04 -07:00
2 changed files with 172 additions and 94 deletions

View File

@ -84,7 +84,7 @@ public class SaneTsv
}
parsed.Records = new List<T>();
var columnTypes = new List<Type>();
var columnTypes = new List<ColumnType>();
var columnNames = new List<string>();
var columnPropertyInfos = new List<PropertyInfo>();
int columnCount = 0;
@ -190,46 +190,46 @@ public class SaneTsv
columnName = columnString;
}
Type type;
ColumnType type;
switch (columnTypeString)
{
case "":
numTypesBlank++;
type = typeof(StringType);
type = new StringType();
break;
case "string":
type = typeof(StringType);
type = new StringType();
break;
case "boolean":
type = typeof(BooleanType);
type = new BooleanType();
break;
case "float32":
type = typeof(Float32Type);
type = new Float32Type();
break;
case "float32-le":
type = typeof(Float32LEType);
type = new Float32LEType();
break;
case "float64":
type = typeof(Float64Type);
type = new Float64Type();
break;
case "float64-le":
type = typeof(Float64LEType);
type = new Float64LEType();
break;
case "uint32":
type = typeof(UInt32Type);
type = new UInt32Type();
break;
case "uint64":
type = typeof(UInt64Type);
type = new UInt64Type();
break;
case "int32":
type = typeof(Int32Type);
type = new Int32Type();
break;
case "int64":
type = typeof(Int64Type);
type = new Int64Type();
break;
case "binary":
type = typeof(BinaryType);
type = new BinaryType();
break;
default:
throw new Exception($"Invalid type '{columnTypeString}' for column {j}");
@ -242,7 +242,7 @@ public class SaneTsv
throw new Exception($"Column {j} has name {columnName}, but expected {columnNames[j]}");
}
if (columnTypes[j] != type)
if (columnTypes[j].GetType() != type.GetType())
{
throw new Exception($"Column {j} has type {type}, but expected {columnTypes[j]}");
}
@ -348,7 +348,7 @@ public class SaneTsv
// startIndex is in we'd have to go back to the start of the record's comment, and to know
// exactly where that comment started we'd have to go back to the start of the record before that
// (not including that other record's comment).
protected static T[] Parse<T>(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, Type[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new()
protected static T[] Parse<T>(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new()
{
var fieldBytes = new List<byte>();
var fields = new List<byte[]>();
@ -509,12 +509,12 @@ public class SaneTsv
return parsed.ToArray();
}
protected static T ParseCurrentCommentedRecord<T>(Type[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : CommentedTsvRecord, new()
protected static T ParseCurrentCommentedRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : CommentedTsvRecord, new()
{
return (T)ParseCurrentRecord<T>(columnTypes, properties, fields, comment, line);
}
protected static T ParseCurrentRecord<T>(Type[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : TsvRecord, new()
protected static T ParseCurrentRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : TsvRecord, new()
{
T record = new T();
@ -532,7 +532,7 @@ public class SaneTsv
for (int j = 0; j < fields.Count; j++)
{
// All other types require the content to be UTF-8. Binary fields can ignore that.
if (columnTypes[j] == typeof(BinaryType))
if (columnTypes[j].GetType() == typeof(BinaryType))
{
// TODO: Use faster method for property setting
// e.g. https://blog.marcgravell.com/2012/01/playing-with-your-member.html
@ -541,7 +541,7 @@ public class SaneTsv
properties[j].SetValue(record, fields[j]);
continue;
}
else if (columnTypes[j] == typeof(Float32LEType))
else if (columnTypes[j].GetType() == typeof(Float32LEType))
{
byte[] floatBytes;
if (!LittleEndian)
@ -560,7 +560,7 @@ public class SaneTsv
continue;
}
else if (columnTypes[j] == typeof(Float64LEType))
else if (columnTypes[j].GetType() == typeof(Float64LEType))
{
byte[] floatBytes;
if (!LittleEndian)
@ -592,11 +592,11 @@ public class SaneTsv
// TODO: Add checking for numeric types format
if (columnTypes[j] == typeof(StringType))
if (columnTypes[j].GetType() == typeof(StringType))
{
properties[j].SetValue(record, fieldString);
}
else if (columnTypes[j] == typeof(BooleanType))
else if (columnTypes[j].GetType() == typeof(BooleanType))
{
bool parsedBool;
if (fieldString == "TRUE")
@ -614,7 +614,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedBool);
}
else if (columnTypes[j] == typeof(Float32Type))
else if (columnTypes[j].GetType() == typeof(Float32Type))
{
float parsedFloat;
if (!float.TryParse(fieldString, out parsedFloat))
@ -635,7 +635,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedFloat);
}
else if (columnTypes[j] == typeof(Float64Type))
else if (columnTypes[j].GetType() == typeof(Float64Type))
{
double parsedDouble;
if (!double.TryParse(fieldString, out parsedDouble))
@ -656,7 +656,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedDouble);
}
else if (columnTypes[j] == typeof(UInt32Type))
else if (columnTypes[j].GetType() == typeof(UInt32Type))
{
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
{
@ -665,7 +665,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedUInt32);
}
else if (columnTypes[j] == typeof(UInt64Type))
else if (columnTypes[j].GetType() == typeof(UInt64Type))
{
if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64))
{
@ -674,7 +674,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedUInt64);
}
else if (columnTypes[j] == typeof(Int32Type))
else if (columnTypes[j].GetType() == typeof(Int32Type))
{
if (!Int32.TryParse(fieldString, out Int32 parsedInt32))
{
@ -683,7 +683,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedInt32);
}
else if (columnTypes[j] == typeof(Int64Type))
else if (columnTypes[j].GetType() == typeof(Int64Type))
{
if (!Int64.TryParse(fieldString, out Int64 parsedInt64))
{
@ -1066,99 +1066,151 @@ public class SaneTsv
return records.ToArray();
}
public static Type GetColumnFromType(Type type)
public static ColumnType GetColumnFromString(string type)
{
if (type == typeof(string))
if (type == "string")
{
return typeof(StringType);
return new StringType();
}
else if (type == typeof(bool))
else if (type == "boolean")
{
return typeof(BooleanType);
return new BooleanType();
}
else if (type == typeof(float))
else if (type == "float32")
{
return typeof(Float32Type);
return new Float32Type();
}
else if (type == typeof(double))
else if (type == "float32-le")
{
return typeof(Float64Type);
return new Float32LEType();
}
else if (type == typeof(UInt32))
else if (type == "float64")
{
return typeof(UInt32Type);
return new Float64Type();
}
else if (type == typeof(UInt64))
else if (type == "float64-le")
{
return typeof(UInt64Type);
return new Float64LEType();
}
else if (type == typeof(Int32))
else if (type == "uint32")
{
return typeof(Int32Type);
return new UInt32Type();
}
else if (type == typeof(Int64))
else if (type == "uint64")
{
return typeof(Int64Type);
return new UInt64Type();
}
else if (type == typeof(byte[]))
else if (type == "int32")
{
return typeof(BinaryType);
return new Int32Type();
}
else if (type == "int64")
{
return new Int64Type();
}
else if (type == "binary")
{
return new BinaryType();
}
else
{
throw new Exception($"Invalid type: {type}");
throw new Exception($"Invalid type: {type.GetType()}");
}
}
public static string GetNameFromColumn(Type type)
public static ColumnType GetColumnFromType(Type type)
{
if (type == typeof(StringType))
if (type == typeof(string))
{
return new StringType();
}
else if (type == typeof(bool))
{
return new BooleanType();
}
else if (type == typeof(float))
{
return new Float32Type();
}
else if (type == typeof(double))
{
return new Float64Type();
}
else if (type == typeof(UInt32))
{
return new UInt32Type();
}
else if (type == typeof(UInt64))
{
return new UInt64Type();
}
else if (type == typeof(Int32))
{
return new Int32Type();
}
else if (type == typeof(Int64))
{
return new Int64Type();
}
else if (type == typeof(byte[]))
{
return new BinaryType();
}
else
{
throw new Exception($"Invalid type: {type.GetType()}");
}
}
public static string GetNameFromColumn(ColumnType type)
{
if (type.GetType() == typeof(StringType))
{
return "string";
}
else if (type == typeof(BooleanType))
else if (type.GetType() == typeof(BooleanType))
{
return "boolean";
}
else if (type == typeof(Float32Type))
else if (type.GetType() == typeof(Float32Type))
{
return "float32";
}
else if (type == typeof(Float32LEType))
else if (type.GetType() == typeof(Float32LEType))
{
return "float32-le";
}
else if (type == typeof(Float64Type))
else if (type.GetType() == typeof(Float64Type))
{
return "float64";
}
else if (type == typeof(Float64LEType))
else if (type.GetType() == typeof(Float64LEType))
{
return "float64-le";
}
else if (type == typeof(UInt32Type))
else if (type.GetType() == typeof(UInt32Type))
{
return "uint32";
}
else if (type == typeof(UInt64Type))
else if (type.GetType() == typeof(UInt64Type))
{
return "uint64";
}
else if (type == typeof(Int32Type))
else if (type.GetType() == typeof(Int32Type))
{
return "int32";
}
else if (type == typeof(Int64Type))
else if (type.GetType() == typeof(Int64Type))
{
return "int64";
}
else if (type == typeof(BinaryType))
else if (type.GetType() == typeof(BinaryType))
{
return "binary";
}
else
{
throw new Exception($"Invalid type: {type}");
throw new Exception($"Invalid type: {type.GetType()}");
}
}
@ -1174,14 +1226,24 @@ public class SaneTsv
public static byte[] SerializeCommentedTsv<T>(IList<T> data, string fileComment) where T : CommentedTsvRecord
{
return SerializeTsv<T>(data, FormatType.COMMENTED_TSV);
return SerializeTsv<T>(data, FormatType.COMMENTED_TSV, fileComment);
}
protected static byte[] SerializeTsv<T>(IList<T> data, FormatType tsvFormat)
protected static byte[] SerializeTsv<T>(IList<T> data, FormatType tsvFormat, string fileComment = null)
{
var bytes = new List<byte>();
var columnTypes = new List<Type>();
if (fileComment != null)
{
if (tsvFormat != FormatType.COMMENTED_TSV)
{
throw new Exception($"File comments are not valid for {tsvFormat}");
}
bytes.AddRange(Encoding.UTF8.GetBytes("#" + fileComment.Replace("\n", "\n#") + "\n"));
}
var columnTypes = new List<ColumnType>();
var columnNames = new List<string>();
var columnPropertyInfos = new List<PropertyInfo>();
int columnCount = 0;
@ -1197,8 +1259,8 @@ public class SaneTsv
string headerName = attribute.ColumnName ?? property.Name;
columnNames.Add(headerName);
Type headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType);
if (tsvFormat == FormatType.SIMPLE_TSV && headerType != typeof(StringType))
ColumnType headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType);
if (tsvFormat == FormatType.SIMPLE_TSV && headerType.GetType() != typeof(StringType))
{
throw new Exception($"Serializing Simple TSV requires all columns be of type string, but column '{headerName}' has type '{headerType}'");
}
@ -1278,7 +1340,7 @@ public class SaneTsv
return bytes.ToArray();
}
protected static void SerializeTsv<T>(IList<T> data, List<byte> bytes, PropertyInfo[] columnPropertyInfos, Type[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex)
protected static void SerializeTsv<T>(IList<T> data, List<byte> bytes, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex)
{
// Serialize data
for (int i = 0; i < data.Count; i++)
@ -1293,16 +1355,16 @@ public class SaneTsv
// Some fields definitely don't need escaping, so we add them directly to bytes
bool skipEscaping = false;
if (columnTypes[j] == typeof(StringType))
if (columnTypes[j].GetType() == typeof(StringType))
{
fieldEncoded = Encoding.UTF8.GetBytes((string)datum);
}
else if (columnTypes[j] == typeof(BooleanType))
else if (columnTypes[j].GetType() == typeof(BooleanType))
{
bytes.AddRange((bool)datum ? TrueEncoded : FalseEncoded);
skipEscaping = true;
}
else if (columnTypes[j] == typeof(Float32Type))
else if (columnTypes[j].GetType() == typeof(Float32Type))
{
if (datum is float f)
{
@ -1326,7 +1388,7 @@ public class SaneTsv
}
skipEscaping = true;
}
else if (columnTypes[j] == typeof(Float32LEType))
else if (columnTypes[j].GetType() == typeof(Float32LEType))
{
if (LittleEndian)
{
@ -1342,7 +1404,7 @@ public class SaneTsv
}
}
}
else if (columnTypes[j] == typeof(Float64Type))
else if (columnTypes[j].GetType() == typeof(Float64Type))
{
if (datum is double d)
{
@ -1366,7 +1428,7 @@ public class SaneTsv
}
skipEscaping = true;
}
else if (columnTypes[j] == typeof(Float64LEType))
else if (columnTypes[j].GetType() == typeof(Float64LEType))
{
if (LittleEndian)
{
@ -1382,27 +1444,27 @@ public class SaneTsv
}
}
}
else if (columnTypes[j] == typeof(UInt32Type))
else if (columnTypes[j].GetType() == typeof(UInt32Type))
{
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)datum).ToString()));
skipEscaping = true;
}
else if (columnTypes[j] == typeof(UInt64Type))
else if (columnTypes[j].GetType() == typeof(UInt64Type))
{
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)datum).ToString()));
skipEscaping = true;
}
else if (columnTypes[j] == typeof(Int32Type))
else if (columnTypes[j].GetType() == typeof(Int32Type))
{
bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)datum).ToString()));
skipEscaping = true;
}
else if (columnTypes[j] == typeof(Int64Type))
else if (columnTypes[j].GetType() == typeof(Int64Type))
{
bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)datum).ToString()));
skipEscaping = true;
}
else if (columnTypes[j] == typeof(BinaryType))
else if (columnTypes[j].GetType() == typeof(BinaryType))
{
fieldEncoded = (byte[])datum;
}
@ -1512,15 +1574,15 @@ public class SaneTsv
public class TsvColumnAttribute : Attribute
{
public string ColumnName { get; }
public virtual Type ColumnType { get; }
public virtual ColumnType ColumnType { get; }
public TsvColumnAttribute()
{
ColumnType = typeof(StringType);
ColumnType = new StringType();
}
public TsvColumnAttribute(string columnName)
{
ColumnType = typeof(StringType);
ColumnType = new StringType();
ColumnName = columnName;
}
}
@ -1528,27 +1590,19 @@ public class SaneTsv
// TODO: Add column ordering
public class TypedTsvColumnAttribute : TsvColumnAttribute
{
public override Type ColumnType { get; }
public override ColumnType ColumnType { get; }
public TypedTsvColumnAttribute() { }
public TypedTsvColumnAttribute(string columnName) : base(columnName) { }
public TypedTsvColumnAttribute(string columnName, Type columnType) : base(columnName)
public TypedTsvColumnAttribute(string columnName, string columnType) : base(columnName)
{
if (columnType.BaseType != typeof(ColumnType))
{
throw new Exception("Column type must inherit from SaneTsv.ColumnType");
}
ColumnType = columnType;
ColumnType = GetColumnFromString(columnType);
}
public TypedTsvColumnAttribute(Type columnType)
public TypedTsvColumnAttribute(ColumnType columnType)
{
if (columnType.BaseType != typeof(ColumnType))
{
throw new Exception("Column type must inherit from SaneTsv.ColumnType");
}
ColumnType = columnType;
}
}

View File

@ -759,6 +759,30 @@ internal class Program : SaneTsv
}
}
{
string testName = "File comment serde";
string testString1 = "#this is a file comment" +
"\n# and one more line since you're such a good customer" +
"\ncolumn1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
"\nFALSE\tnother\tno\\ther";
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
string reserialized = Encoding.UTF8.GetString(SaneTsv.SerializeCommentedTsv<BoolTestRecord2>(parsed.Records, parsed.FileComment));
if (reserialized == testString1)
{
Console.WriteLine($"Passed {testName}");
}
else
{
Console.WriteLine($"Failed {testName}");
}
}
Console.WriteLine("Done with tests");
}
}