diff --git a/SaneTsv.cs b/SaneTsv.cs index ff3b83f..71df6ea 100644 --- a/SaneTsv.cs +++ b/SaneTsv.cs @@ -84,7 +84,7 @@ public class SaneTsv } parsed.Records = new List(); - var columnTypes = new List(); + var columnTypes = new List(); var columnNames = new List(); var columnPropertyInfos = new List(); int columnCount = 0; @@ -190,46 +190,46 @@ public class SaneTsv columnName = columnString; } - Type type; + ColumnType type; switch (columnTypeString) { case "": numTypesBlank++; - type = typeof(StringType); + type = new StringType(); break; case "string": - type = typeof(StringType); + type = new StringType(); break; case "boolean": - type = typeof(BooleanType); + type = new BooleanType(); break; case "float32": - type = typeof(Float32Type); + type = new Float32Type(); break; case "float32-le": - type = typeof(Float32LEType); + type = new Float32LEType(); break; case "float64": - type = typeof(Float64Type); + type = new Float64Type(); break; case "float64-le": - type = typeof(Float64LEType); + type = new Float64LEType(); break; case "uint32": - type = typeof(UInt32Type); + type = new UInt32Type(); break; case "uint64": - type = typeof(UInt64Type); + type = new UInt64Type(); break; case "int32": - type = typeof(Int32Type); + type = new Int32Type(); break; case "int64": - type = typeof(Int64Type); + type = new Int64Type(); break; case "binary": - type = typeof(BinaryType); + type = new BinaryType(); break; default: throw new Exception($"Invalid type '{columnTypeString}' for column {j}"); @@ -242,7 +242,7 @@ public class SaneTsv throw new Exception($"Column {j} has name {columnName}, but expected {columnNames[j]}"); } - if (columnTypes[j] != type) + if (columnTypes[j].GetType() != type.GetType()) { throw new Exception($"Column {j} has type {type}, but expected {columnTypes[j]}"); } @@ -348,7 +348,7 @@ public class SaneTsv // startIndex is in we'd have to go back to the start of the record's comment, and to know // exactly where that comment started we'd have to go back to the start of the record before that // (not including that other record's comment). - protected static T[] Parse(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, Type[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new() + protected static T[] Parse(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new() { var fieldBytes = new List(); var fields = new List(); @@ -509,12 +509,12 @@ public class SaneTsv return parsed.ToArray(); } - protected static T ParseCurrentCommentedRecord(Type[] columnTypes, PropertyInfo[] properties, List fields, string comment, int line) where T : CommentedTsvRecord, new() + protected static T ParseCurrentCommentedRecord(ColumnType[] columnTypes, PropertyInfo[] properties, List fields, string comment, int line) where T : CommentedTsvRecord, new() { return (T)ParseCurrentRecord(columnTypes, properties, fields, comment, line); } - protected static T ParseCurrentRecord(Type[] columnTypes, PropertyInfo[] properties, List fields, string comment, int line) where T : TsvRecord, new() + protected static T ParseCurrentRecord(ColumnType[] columnTypes, PropertyInfo[] properties, List fields, string comment, int line) where T : TsvRecord, new() { T record = new T(); @@ -532,7 +532,7 @@ public class SaneTsv for (int j = 0; j < fields.Count; j++) { // All other types require the content to be UTF-8. Binary fields can ignore that. - if (columnTypes[j] == typeof(BinaryType)) + if (columnTypes[j].GetType() == typeof(BinaryType)) { // TODO: Use faster method for property setting // e.g. https://blog.marcgravell.com/2012/01/playing-with-your-member.html @@ -541,7 +541,7 @@ public class SaneTsv properties[j].SetValue(record, fields[j]); continue; } - else if (columnTypes[j] == typeof(Float32LEType)) + else if (columnTypes[j].GetType() == typeof(Float32LEType)) { byte[] floatBytes; if (!LittleEndian) @@ -560,7 +560,7 @@ public class SaneTsv continue; } - else if (columnTypes[j] == typeof(Float64LEType)) + else if (columnTypes[j].GetType() == typeof(Float64LEType)) { byte[] floatBytes; if (!LittleEndian) @@ -592,11 +592,11 @@ public class SaneTsv // TODO: Add checking for numeric types format - if (columnTypes[j] == typeof(StringType)) + if (columnTypes[j].GetType() == typeof(StringType)) { properties[j].SetValue(record, fieldString); } - else if (columnTypes[j] == typeof(BooleanType)) + else if (columnTypes[j].GetType() == typeof(BooleanType)) { bool parsedBool; if (fieldString == "TRUE") @@ -614,7 +614,7 @@ public class SaneTsv properties[j].SetValue(record, parsedBool); } - else if (columnTypes[j] == typeof(Float32Type)) + else if (columnTypes[j].GetType() == typeof(Float32Type)) { float parsedFloat; if (!float.TryParse(fieldString, out parsedFloat)) @@ -635,7 +635,7 @@ public class SaneTsv properties[j].SetValue(record, parsedFloat); } - else if (columnTypes[j] == typeof(Float64Type)) + else if (columnTypes[j].GetType() == typeof(Float64Type)) { double parsedDouble; if (!double.TryParse(fieldString, out parsedDouble)) @@ -656,7 +656,7 @@ public class SaneTsv properties[j].SetValue(record, parsedDouble); } - else if (columnTypes[j] == typeof(UInt32Type)) + else if (columnTypes[j].GetType() == typeof(UInt32Type)) { if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32)) { @@ -665,7 +665,7 @@ public class SaneTsv properties[j].SetValue(record, parsedUInt32); } - else if (columnTypes[j] == typeof(UInt64Type)) + else if (columnTypes[j].GetType() == typeof(UInt64Type)) { if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64)) { @@ -674,7 +674,7 @@ public class SaneTsv properties[j].SetValue(record, parsedUInt64); } - else if (columnTypes[j] == typeof(Int32Type)) + else if (columnTypes[j].GetType() == typeof(Int32Type)) { if (!Int32.TryParse(fieldString, out Int32 parsedInt32)) { @@ -683,7 +683,7 @@ public class SaneTsv properties[j].SetValue(record, parsedInt32); } - else if (columnTypes[j] == typeof(Int64Type)) + else if (columnTypes[j].GetType() == typeof(Int64Type)) { if (!Int64.TryParse(fieldString, out Int64 parsedInt64)) { @@ -1066,99 +1066,151 @@ public class SaneTsv return records.ToArray(); } - public static Type GetColumnFromType(Type type) + public static ColumnType GetColumnFromString(string type) { - if (type == typeof(string)) + if (type == "string") { - return typeof(StringType); + return new StringType(); } - else if (type == typeof(bool)) + else if (type == "boolean") { - return typeof(BooleanType); + return new BooleanType(); } - else if (type == typeof(float)) + else if (type == "float32") { - return typeof(Float32Type); + return new Float32Type(); } - else if (type == typeof(double)) + else if (type == "float32-le") { - return typeof(Float64Type); + return new Float32LEType(); } - else if (type == typeof(UInt32)) + else if (type == "float64") { - return typeof(UInt32Type); + return new Float64Type(); } - else if (type == typeof(UInt64)) + else if (type == "float64-le") { - return typeof(UInt64Type); + return new Float64LEType(); } - else if (type == typeof(Int32)) + else if (type == "uint32") { - return typeof(Int32Type); + return new UInt32Type(); } - else if (type == typeof(Int64)) + else if (type == "uint64") { - return typeof(Int64Type); + return new UInt64Type(); } - else if (type == typeof(byte[])) + else if (type == "int32") { - return typeof(BinaryType); + return new Int32Type(); + } + else if (type == "int64") + { + return new Int64Type(); + } + else if (type == "binary") + { + return new BinaryType(); } else { - throw new Exception($"Invalid type: {type}"); + throw new Exception($"Invalid type: {type.GetType()}"); } } - public static string GetNameFromColumn(Type type) + public static ColumnType GetColumnFromType(Type type) { - if (type == typeof(StringType)) + if (type == typeof(string)) + { + return new StringType(); + } + else if (type == typeof(bool)) + { + return new BooleanType(); + } + else if (type == typeof(float)) + { + return new Float32Type(); + } + else if (type == typeof(double)) + { + return new Float64Type(); + } + else if (type == typeof(UInt32)) + { + return new UInt32Type(); + } + else if (type == typeof(UInt64)) + { + return new UInt64Type(); + } + else if (type == typeof(Int32)) + { + return new Int32Type(); + } + else if (type == typeof(Int64)) + { + return new Int64Type(); + } + else if (type == typeof(byte[])) + { + return new BinaryType(); + } + else + { + throw new Exception($"Invalid type: {type.GetType()}"); + } + } + + public static string GetNameFromColumn(ColumnType type) + { + if (type.GetType() == typeof(StringType)) { return "string"; } - else if (type == typeof(BooleanType)) + else if (type.GetType() == typeof(BooleanType)) { return "boolean"; } - else if (type == typeof(Float32Type)) + else if (type.GetType() == typeof(Float32Type)) { return "float32"; } - else if (type == typeof(Float32LEType)) + else if (type.GetType() == typeof(Float32LEType)) { return "float32-le"; } - else if (type == typeof(Float64Type)) + else if (type.GetType() == typeof(Float64Type)) { return "float64"; } - else if (type == typeof(Float64LEType)) + else if (type.GetType() == typeof(Float64LEType)) { return "float64-le"; } - else if (type == typeof(UInt32Type)) + else if (type.GetType() == typeof(UInt32Type)) { return "uint32"; } - else if (type == typeof(UInt64Type)) + else if (type.GetType() == typeof(UInt64Type)) { return "uint64"; } - else if (type == typeof(Int32Type)) + else if (type.GetType() == typeof(Int32Type)) { return "int32"; } - else if (type == typeof(Int64Type)) + else if (type.GetType() == typeof(Int64Type)) { return "int64"; } - else if (type == typeof(BinaryType)) + else if (type.GetType() == typeof(BinaryType)) { return "binary"; } else { - throw new Exception($"Invalid type: {type}"); + throw new Exception($"Invalid type: {type.GetType()}"); } } @@ -1174,14 +1226,24 @@ public class SaneTsv public static byte[] SerializeCommentedTsv(IList data, string fileComment) where T : CommentedTsvRecord { - return SerializeTsv(data, FormatType.COMMENTED_TSV); + return SerializeTsv(data, FormatType.COMMENTED_TSV, fileComment); } - protected static byte[] SerializeTsv(IList data, FormatType tsvFormat) + protected static byte[] SerializeTsv(IList data, FormatType tsvFormat, string fileComment = null) { var bytes = new List(); - var columnTypes = new List(); + if (fileComment != null) + { + if (tsvFormat != FormatType.COMMENTED_TSV) + { + throw new Exception($"File comments are not valid for {tsvFormat}"); + } + + bytes.AddRange(Encoding.UTF8.GetBytes("#" + fileComment.Replace("\n", "\n#") + "\n")); + } + + var columnTypes = new List(); var columnNames = new List(); var columnPropertyInfos = new List(); int columnCount = 0; @@ -1197,8 +1259,8 @@ public class SaneTsv string headerName = attribute.ColumnName ?? property.Name; columnNames.Add(headerName); - Type headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType); - if (tsvFormat == FormatType.SIMPLE_TSV && headerType != typeof(StringType)) + ColumnType headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType); + if (tsvFormat == FormatType.SIMPLE_TSV && headerType.GetType() != typeof(StringType)) { throw new Exception($"Serializing Simple TSV requires all columns be of type string, but column '{headerName}' has type '{headerType}'"); } @@ -1278,7 +1340,7 @@ public class SaneTsv return bytes.ToArray(); } - protected static void SerializeTsv(IList data, List bytes, PropertyInfo[] columnPropertyInfos, Type[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex) + protected static void SerializeTsv(IList data, List bytes, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex) { // Serialize data for (int i = 0; i < data.Count; i++) @@ -1293,16 +1355,16 @@ public class SaneTsv // Some fields definitely don't need escaping, so we add them directly to bytes bool skipEscaping = false; - if (columnTypes[j] == typeof(StringType)) + if (columnTypes[j].GetType() == typeof(StringType)) { fieldEncoded = Encoding.UTF8.GetBytes((string)datum); } - else if (columnTypes[j] == typeof(BooleanType)) + else if (columnTypes[j].GetType() == typeof(BooleanType)) { bytes.AddRange((bool)datum ? TrueEncoded : FalseEncoded); skipEscaping = true; } - else if (columnTypes[j] == typeof(Float32Type)) + else if (columnTypes[j].GetType() == typeof(Float32Type)) { if (datum is float f) { @@ -1326,7 +1388,7 @@ public class SaneTsv } skipEscaping = true; } - else if (columnTypes[j] == typeof(Float32LEType)) + else if (columnTypes[j].GetType() == typeof(Float32LEType)) { if (LittleEndian) { @@ -1342,7 +1404,7 @@ public class SaneTsv } } } - else if (columnTypes[j] == typeof(Float64Type)) + else if (columnTypes[j].GetType() == typeof(Float64Type)) { if (datum is double d) { @@ -1366,7 +1428,7 @@ public class SaneTsv } skipEscaping = true; } - else if (columnTypes[j] == typeof(Float64LEType)) + else if (columnTypes[j].GetType() == typeof(Float64LEType)) { if (LittleEndian) { @@ -1382,27 +1444,27 @@ public class SaneTsv } } } - else if (columnTypes[j] == typeof(UInt32Type)) + else if (columnTypes[j].GetType() == typeof(UInt32Type)) { bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)datum).ToString())); skipEscaping = true; } - else if (columnTypes[j] == typeof(UInt64Type)) + else if (columnTypes[j].GetType() == typeof(UInt64Type)) { bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)datum).ToString())); skipEscaping = true; } - else if (columnTypes[j] == typeof(Int32Type)) + else if (columnTypes[j].GetType() == typeof(Int32Type)) { bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)datum).ToString())); skipEscaping = true; } - else if (columnTypes[j] == typeof(Int64Type)) + else if (columnTypes[j].GetType() == typeof(Int64Type)) { bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)datum).ToString())); skipEscaping = true; } - else if (columnTypes[j] == typeof(BinaryType)) + else if (columnTypes[j].GetType() == typeof(BinaryType)) { fieldEncoded = (byte[])datum; } @@ -1512,15 +1574,15 @@ public class SaneTsv public class TsvColumnAttribute : Attribute { public string ColumnName { get; } - public virtual Type ColumnType { get; } + public virtual ColumnType ColumnType { get; } public TsvColumnAttribute() { - ColumnType = typeof(StringType); + ColumnType = new StringType(); } public TsvColumnAttribute(string columnName) { - ColumnType = typeof(StringType); + ColumnType = new StringType(); ColumnName = columnName; } } @@ -1528,27 +1590,19 @@ public class SaneTsv // TODO: Add column ordering public class TypedTsvColumnAttribute : TsvColumnAttribute { - public override Type ColumnType { get; } + public override ColumnType ColumnType { get; } public TypedTsvColumnAttribute() { } public TypedTsvColumnAttribute(string columnName) : base(columnName) { } - public TypedTsvColumnAttribute(string columnName, Type columnType) : base(columnName) + public TypedTsvColumnAttribute(string columnName, string columnType) : base(columnName) { - if (columnType.BaseType != typeof(ColumnType)) - { - throw new Exception("Column type must inherit from SaneTsv.ColumnType"); - } - ColumnType = columnType; + ColumnType = GetColumnFromString(columnType); } - public TypedTsvColumnAttribute(Type columnType) + public TypedTsvColumnAttribute(ColumnType columnType) { - if (columnType.BaseType != typeof(ColumnType)) - { - throw new Exception("Column type must inherit from SaneTsv.ColumnType"); - } ColumnType = columnType; } }