Compare commits
10 Commits
f98a40a173
...
master
Author | SHA1 | Date | |
---|---|---|---|
d9ef2a4bb6 | |||
a80206767e | |||
b8ae3ce65d | |||
0fd092685d | |||
55fa00a6e7 | |||
d428af51bb | |||
aef92e87d4 | |||
b56236cbb7 | |||
7230f982ac | |||
f4145bacd2 |
@ -43,9 +43,6 @@ public class SaneTsv
|
||||
public static readonly byte[] TrueEncoded = Encoding.UTF8.GetBytes("TRUE");
|
||||
public static readonly byte[] FalseEncoded = Encoding.UTF8.GetBytes("FALSE");
|
||||
|
||||
// TODO: We need to be able to update all these in tandem somehow
|
||||
public string[] ColumnNames { get; protected set; }
|
||||
public Type[] ColumnTypes { get; protected set; }
|
||||
protected static bool? _littleEndian = null;
|
||||
public static bool LittleEndian
|
||||
{
|
||||
@ -71,7 +68,6 @@ public class SaneTsv
|
||||
|
||||
public static CommentedTsv<T> ParseCommentedTsv<T>(byte[] inputBuffer) where T : CommentedTsvRecord, new()
|
||||
{
|
||||
// TODO: add the file comment?
|
||||
return (CommentedTsv<T>)Parse<T>(inputBuffer, FormatType.COMMENTED_TSV);
|
||||
}
|
||||
|
||||
@ -88,22 +84,22 @@ public class SaneTsv
|
||||
}
|
||||
parsed.Records = new List<T>();
|
||||
|
||||
var headerTypes = new List<Type>();
|
||||
var headerNames = new List<string>();
|
||||
var headerPropertyInfos = new List<PropertyInfo>();
|
||||
var columnTypes = new List<ColumnType>();
|
||||
var columnNames = new List<string>();
|
||||
var columnPropertyInfos = new List<PropertyInfo>();
|
||||
int columnCount = 0;
|
||||
|
||||
foreach (PropertyInfo property in typeof(T).GetProperties())
|
||||
{
|
||||
TypedTsvColumnAttribute attribute = (TypedTsvColumnAttribute)Attribute.GetCustomAttribute(property, typeof(TypedTsvColumnAttribute));
|
||||
TsvColumnAttribute attribute = (TsvColumnAttribute)Attribute.GetCustomAttribute(property, typeof(TsvColumnAttribute));
|
||||
if (attribute == null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
headerNames.Add(attribute.ColumnName ?? property.Name);
|
||||
headerTypes.Add(attribute.ColumnType ?? GetColumnFromType(property.PropertyType));
|
||||
headerPropertyInfos.Add(property);
|
||||
columnNames.Add(attribute.ColumnName ?? property.Name);
|
||||
columnTypes.Add(attribute.ColumnType ?? GetColumnFromType(property.PropertyType));
|
||||
columnPropertyInfos.Add(property);
|
||||
// TODO: Check that the property type and given column type are compatible
|
||||
columnCount++;
|
||||
}
|
||||
@ -170,7 +166,7 @@ public class SaneTsv
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new Exception($"Header {fields.Count} is not valid UTF-8", e);
|
||||
throw new Exception($"Header field {fields.Count} is not valid UTF-8", e);
|
||||
}
|
||||
|
||||
string columnTypeString;
|
||||
@ -179,7 +175,7 @@ public class SaneTsv
|
||||
{
|
||||
if (format == FormatType.SIMPLE_TSV)
|
||||
{
|
||||
throw new Exception($"Header {fields.Count} contain ':', which is not allowed for column names");
|
||||
throw new Exception($"Header field {j} contains ':', which is not allowed for column names");
|
||||
}
|
||||
columnTypeString = columnString.Split(":").Last();
|
||||
columnName = columnString.Substring(0, columnString.Length - columnTypeString.Length - 1);
|
||||
@ -188,52 +184,52 @@ public class SaneTsv
|
||||
{
|
||||
if (format > FormatType.SIMPLE_TSV)
|
||||
{
|
||||
throw new Exception($"Header {fields.Count} has no type");
|
||||
throw new Exception($"Header field {fields.Count} has no type");
|
||||
}
|
||||
columnTypeString = "";
|
||||
columnName = columnString;
|
||||
}
|
||||
|
||||
Type type;
|
||||
ColumnType type;
|
||||
|
||||
switch (columnTypeString)
|
||||
{
|
||||
case "":
|
||||
numTypesBlank++;
|
||||
type = typeof(StringType);
|
||||
type = new StringType();
|
||||
break;
|
||||
case "string":
|
||||
type = typeof(StringType);
|
||||
type = new StringType();
|
||||
break;
|
||||
case "boolean":
|
||||
type = typeof(BooleanType);
|
||||
type = new BooleanType();
|
||||
break;
|
||||
case "float32":
|
||||
type = typeof(Float32Type);
|
||||
type = new Float32Type();
|
||||
break;
|
||||
case "float32-le":
|
||||
type = typeof(Float32LEType);
|
||||
type = new Float32LEType();
|
||||
break;
|
||||
case "float64":
|
||||
type = typeof(Float64Type);
|
||||
type = new Float64Type();
|
||||
break;
|
||||
case "float64-le":
|
||||
type = typeof(Float64LEType);
|
||||
type = new Float64LEType();
|
||||
break;
|
||||
case "uint32":
|
||||
type = typeof(UInt32Type);
|
||||
type = new UInt32Type();
|
||||
break;
|
||||
case "uint64":
|
||||
type = typeof(UInt64Type);
|
||||
type = new UInt64Type();
|
||||
break;
|
||||
case "int32":
|
||||
type = typeof(Int32Type);
|
||||
type = new Int32Type();
|
||||
break;
|
||||
case "int64":
|
||||
type = typeof(Int64Type);
|
||||
type = new Int64Type();
|
||||
break;
|
||||
case "binary":
|
||||
type = typeof(BinaryType);
|
||||
type = new BinaryType();
|
||||
break;
|
||||
default:
|
||||
throw new Exception($"Invalid type '{columnTypeString}' for column {j}");
|
||||
@ -241,14 +237,14 @@ public class SaneTsv
|
||||
|
||||
// TODO: Allow lax parsing (only worry about parsing columns that are given in the specifying type
|
||||
|
||||
if (headerNames[j] != columnName)
|
||||
if (columnNames[j] != columnName)
|
||||
{
|
||||
throw new Exception($"Column {j} has name {columnName}, but expected {headerNames[j]}");
|
||||
throw new Exception($"Column {j} has name {columnName}, but expected {columnNames[j]}");
|
||||
}
|
||||
|
||||
if (headerTypes[j] != type)
|
||||
if (columnTypes[j].GetType() != type.GetType())
|
||||
{
|
||||
throw new Exception($"Column {j} has type {type}, but expected {headerTypes[j]}");
|
||||
throw new Exception($"Column {j} has type {type}, but expected {columnTypes[j]}");
|
||||
}
|
||||
}
|
||||
|
||||
@ -312,7 +308,7 @@ public class SaneTsv
|
||||
// Complication: it probably depends on processor count
|
||||
if (inputBuffer.Length < 10000)
|
||||
{
|
||||
parsed.Records.AddRange(Parse<T>(inputBuffer, format, headerPropertyInfos.ToArray(), headerTypes.ToArray(), currentLineStart - 1, inputBuffer.Length));
|
||||
parsed.Records.AddRange(Parse<T>(inputBuffer, format, columnPropertyInfos.ToArray(), columnTypes.ToArray(), currentLineStart - 1, inputBuffer.Length));
|
||||
return parsed;
|
||||
}
|
||||
else
|
||||
@ -334,9 +330,10 @@ public class SaneTsv
|
||||
endIndex = (i + 1) * splitCount + parseStart;
|
||||
}
|
||||
|
||||
parsedValues[i] = Parse<T>(inputBuffer, format, headerPropertyInfos.ToArray(), headerTypes.ToArray(), startIndex, endIndex);
|
||||
parsedValues[i] = Parse<T>(inputBuffer, format, columnPropertyInfos.ToArray(), columnTypes.ToArray(), startIndex, endIndex);
|
||||
});
|
||||
|
||||
// TODO: Handle relative line numbers
|
||||
for (int i = 0; i < tasks; i++)
|
||||
{
|
||||
parsed.Records.AddRange(parsedValues[i]);
|
||||
@ -351,7 +348,7 @@ public class SaneTsv
|
||||
// startIndex is in we'd have to go back to the start of the record's comment, and to know
|
||||
// exactly where that comment started we'd have to go back to the start of the record before that
|
||||
// (not including that other record's comment).
|
||||
protected static T[] Parse<T>(byte[] inputBuffer, FormatType format, PropertyInfo[] headerPropertyInfos, Type[] headerTypes, int startIndex, int endIndex) where T : TsvRecord, new()
|
||||
protected static T[] Parse<T>(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new()
|
||||
{
|
||||
var fieldBytes = new List<byte>();
|
||||
var fields = new List<byte[]>();
|
||||
@ -421,9 +418,9 @@ public class SaneTsv
|
||||
fields.Add(fieldBytes.ToArray());
|
||||
fieldBytes.Clear();
|
||||
|
||||
if (headerTypes.Length != fields.Count)
|
||||
if (columnTypes.Length != fields.Count)
|
||||
{
|
||||
throw new Exception($"Expected {headerTypes.Length} fields on line {relativeLine}, but found {fields.Count}");
|
||||
throw new Exception($"Expected {columnTypes.Length} fields on line {relativeLine}, but found {fields.Count}");
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -433,7 +430,7 @@ public class SaneTsv
|
||||
comment = currentComment.ToString();
|
||||
currentComment.Clear();
|
||||
}
|
||||
parsed.Add(ParseCurrentRecord<T>(headerTypes.ToArray(), headerPropertyInfos.ToArray(), fields, comment, relativeLine));
|
||||
parsed.Add(ParseCurrentRecord<T>(columnTypes.ToArray(), columnPropertyInfos.ToArray(), fields, comment, relativeLine));
|
||||
fields.Clear();
|
||||
}
|
||||
|
||||
@ -493,9 +490,9 @@ public class SaneTsv
|
||||
// TODO
|
||||
throw new Exception("Not sure when this will happen. THis might actuall be fine");
|
||||
}
|
||||
if (fields.Count != headerTypes.Length)
|
||||
if (fields.Count != columnTypes.Length)
|
||||
{
|
||||
throw new Exception($"Expected {headerTypes} fields on line {relativeLine}, but found {fields.Count}");
|
||||
throw new Exception($"Expected {columnTypes} fields on line {relativeLine}, but found {fields.Count}");
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -505,19 +502,19 @@ public class SaneTsv
|
||||
comment = currentComment.ToString();
|
||||
currentComment.Clear();
|
||||
}
|
||||
parsed.Add(ParseCurrentRecord<T>(headerTypes.ToArray(), headerPropertyInfos.ToArray(), fields, comment, relativeLine));
|
||||
parsed.Add(ParseCurrentRecord<T>(columnTypes.ToArray(), columnPropertyInfos.ToArray(), fields, comment, relativeLine));
|
||||
fields.Clear();
|
||||
}
|
||||
|
||||
return parsed.ToArray();
|
||||
}
|
||||
|
||||
protected static T ParseCurrentCommentedRecord<T>(Type[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : CommentedTsvRecord, new()
|
||||
protected static T ParseCurrentCommentedRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : CommentedTsvRecord, new()
|
||||
{
|
||||
return (T)ParseCurrentRecord<T>(columnTypes, properties, fields, comment, line);
|
||||
}
|
||||
|
||||
protected static T ParseCurrentRecord<T>(Type[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : TsvRecord, new()
|
||||
protected static T ParseCurrentRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : TsvRecord, new()
|
||||
{
|
||||
T record = new T();
|
||||
|
||||
@ -535,7 +532,7 @@ public class SaneTsv
|
||||
for (int j = 0; j < fields.Count; j++)
|
||||
{
|
||||
// All other types require the content to be UTF-8. Binary fields can ignore that.
|
||||
if (columnTypes[j] == typeof(BinaryType))
|
||||
if (columnTypes[j].GetType() == typeof(BinaryType))
|
||||
{
|
||||
// TODO: Use faster method for property setting
|
||||
// e.g. https://blog.marcgravell.com/2012/01/playing-with-your-member.html
|
||||
@ -544,7 +541,7 @@ public class SaneTsv
|
||||
properties[j].SetValue(record, fields[j]);
|
||||
continue;
|
||||
}
|
||||
else if (columnTypes[j] == typeof(Float32LEType))
|
||||
else if (columnTypes[j].GetType() == typeof(Float32LEType))
|
||||
{
|
||||
byte[] floatBytes;
|
||||
if (!LittleEndian)
|
||||
@ -563,7 +560,7 @@ public class SaneTsv
|
||||
|
||||
continue;
|
||||
}
|
||||
else if (columnTypes[j] == typeof(Float64LEType))
|
||||
else if (columnTypes[j].GetType() == typeof(Float64LEType))
|
||||
{
|
||||
byte[] floatBytes;
|
||||
if (!LittleEndian)
|
||||
@ -595,11 +592,11 @@ public class SaneTsv
|
||||
|
||||
// TODO: Add checking for numeric types format
|
||||
|
||||
if (columnTypes[j] == typeof(StringType))
|
||||
if (columnTypes[j].GetType() == typeof(StringType))
|
||||
{
|
||||
properties[j].SetValue(record, fieldString);
|
||||
}
|
||||
else if (columnTypes[j] == typeof(BooleanType))
|
||||
else if (columnTypes[j].GetType() == typeof(BooleanType))
|
||||
{
|
||||
bool parsedBool;
|
||||
if (fieldString == "TRUE")
|
||||
@ -617,7 +614,7 @@ public class SaneTsv
|
||||
|
||||
properties[j].SetValue(record, parsedBool);
|
||||
}
|
||||
else if (columnTypes[j] == typeof(Float32Type))
|
||||
else if (columnTypes[j].GetType() == typeof(Float32Type))
|
||||
{
|
||||
float parsedFloat;
|
||||
if (!float.TryParse(fieldString, out parsedFloat))
|
||||
@ -638,7 +635,7 @@ public class SaneTsv
|
||||
|
||||
properties[j].SetValue(record, parsedFloat);
|
||||
}
|
||||
else if (columnTypes[j] == typeof(Float64Type))
|
||||
else if (columnTypes[j].GetType() == typeof(Float64Type))
|
||||
{
|
||||
double parsedDouble;
|
||||
if (!double.TryParse(fieldString, out parsedDouble))
|
||||
@ -659,7 +656,7 @@ public class SaneTsv
|
||||
|
||||
properties[j].SetValue(record, parsedDouble);
|
||||
}
|
||||
else if (columnTypes[j] == typeof(UInt32Type))
|
||||
else if (columnTypes[j].GetType() == typeof(UInt32Type))
|
||||
{
|
||||
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
|
||||
{
|
||||
@ -668,7 +665,7 @@ public class SaneTsv
|
||||
|
||||
properties[j].SetValue(record, parsedUInt32);
|
||||
}
|
||||
else if (columnTypes[j] == typeof(UInt64Type))
|
||||
else if (columnTypes[j].GetType() == typeof(UInt64Type))
|
||||
{
|
||||
if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64))
|
||||
{
|
||||
@ -677,7 +674,7 @@ public class SaneTsv
|
||||
|
||||
properties[j].SetValue(record, parsedUInt64);
|
||||
}
|
||||
else if (columnTypes[j] == typeof(Int32Type))
|
||||
else if (columnTypes[j].GetType() == typeof(Int32Type))
|
||||
{
|
||||
if (!Int32.TryParse(fieldString, out Int32 parsedInt32))
|
||||
{
|
||||
@ -686,7 +683,7 @@ public class SaneTsv
|
||||
|
||||
properties[j].SetValue(record, parsedInt32);
|
||||
}
|
||||
else if (columnTypes[j] == typeof(Int64Type))
|
||||
else if (columnTypes[j].GetType() == typeof(Int64Type))
|
||||
{
|
||||
if (!Int64.TryParse(fieldString, out Int64 parsedInt64))
|
||||
{
|
||||
@ -846,7 +843,7 @@ public class SaneTsv
|
||||
public static (string[] columns, string[][] data) ParseSimpleTsv(byte[] inputBuffer)
|
||||
{
|
||||
string[] columnNames = null;
|
||||
var headers = new List<byte[]>();
|
||||
var headerFields = new List<byte[]>();
|
||||
var fieldBytes = new List<byte>();
|
||||
int startOfData = -1;
|
||||
for (int i = 0; i < inputBuffer.Count(); i++)
|
||||
@ -885,33 +882,33 @@ public class SaneTsv
|
||||
else if (inputBuffer[i] == '\t')
|
||||
{
|
||||
// end of field
|
||||
headers.Add(fieldBytes.ToArray());
|
||||
headerFields.Add(fieldBytes.ToArray());
|
||||
fieldBytes.Clear();
|
||||
}
|
||||
else if (inputBuffer[i] == '\n')
|
||||
{
|
||||
// This is the end of the header
|
||||
headers.Add(fieldBytes.ToArray());
|
||||
headerFields.Add(fieldBytes.ToArray());
|
||||
startOfData = i + 1;
|
||||
|
||||
columnNames = new string[headers.Count];
|
||||
columnNames = new string[headerFields.Count];
|
||||
fieldBytes.Clear();
|
||||
|
||||
for (int j = 0; j < headers.Count; j++)
|
||||
for (int j = 0; j < headerFields.Count; j++)
|
||||
{
|
||||
string columnString;
|
||||
try
|
||||
{
|
||||
columnString = Encoding.UTF8.GetString(headers[j]);
|
||||
columnString = Encoding.UTF8.GetString(headerFields[j]);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new Exception($"Column {headers.Count} name is not valid UTF-8", e);
|
||||
throw new Exception($"Column {headerFields.Count} name is not valid UTF-8", e);
|
||||
}
|
||||
|
||||
if (columnString.Contains(':'))
|
||||
{
|
||||
throw new Exception($"Header {headers.Count} contain ':', which is not allowed for column names");
|
||||
throw new Exception($"Header field {headerFields.Count} contain ':', which is not allowed for column names");
|
||||
}
|
||||
|
||||
columnNames[j] = columnString;
|
||||
@ -939,7 +936,7 @@ public class SaneTsv
|
||||
var fields = new List<byte[]>();
|
||||
var records = new List<string[]>();
|
||||
|
||||
int line = 1;
|
||||
int line = 2;
|
||||
int currentLineStart = 0;
|
||||
|
||||
// Go back to the start of the current line
|
||||
@ -1033,13 +1030,20 @@ public class SaneTsv
|
||||
|
||||
fields.Add(fieldBytes.ToArray());
|
||||
|
||||
if (fields.Count == 0)
|
||||
if (fields.Count == 0 && endIndex == inputBuffer.Length)
|
||||
{
|
||||
throw new Exception("Found 0 fields on last line. Possibly because of extra \\n after last record");
|
||||
}
|
||||
if (numFields != fields.Count)
|
||||
{
|
||||
throw new Exception($"Expected {numFields} fields on line {line}, but found {fields.Count}");
|
||||
if (endIndex == inputBuffer.Length)
|
||||
{
|
||||
throw new Exception($"Expected {numFields} fields on line {line}, but found {fields.Count}");
|
||||
}
|
||||
else
|
||||
{
|
||||
return records.ToArray();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1062,99 +1066,151 @@ public class SaneTsv
|
||||
return records.ToArray();
|
||||
}
|
||||
|
||||
public static Type GetColumnFromType(Type type)
|
||||
public static ColumnType GetColumnFromString(string type)
|
||||
{
|
||||
if (type == typeof(string))
|
||||
if (type == "string")
|
||||
{
|
||||
return typeof(StringType);
|
||||
return new StringType();
|
||||
}
|
||||
else if (type == typeof(bool))
|
||||
else if (type == "boolean")
|
||||
{
|
||||
return typeof(BooleanType);
|
||||
return new BooleanType();
|
||||
}
|
||||
else if (type == typeof(float))
|
||||
else if (type == "float32")
|
||||
{
|
||||
return typeof(Float32Type);
|
||||
return new Float32Type();
|
||||
}
|
||||
else if (type == typeof(double))
|
||||
else if (type == "float32-le")
|
||||
{
|
||||
return typeof(Float64Type);
|
||||
return new Float32LEType();
|
||||
}
|
||||
else if (type == typeof(UInt32))
|
||||
else if (type == "float64")
|
||||
{
|
||||
return typeof(UInt32Type);
|
||||
return new Float64Type();
|
||||
}
|
||||
else if (type == typeof(UInt64))
|
||||
else if (type == "float64-le")
|
||||
{
|
||||
return typeof(UInt64Type);
|
||||
return new Float64LEType();
|
||||
}
|
||||
else if (type == typeof(Int32))
|
||||
else if (type == "uint32")
|
||||
{
|
||||
return typeof(Int32Type);
|
||||
return new UInt32Type();
|
||||
}
|
||||
else if (type == typeof(Int64))
|
||||
else if (type == "uint64")
|
||||
{
|
||||
return typeof(Int64Type);
|
||||
return new UInt64Type();
|
||||
}
|
||||
else if (type == typeof(byte[]))
|
||||
else if (type == "int32")
|
||||
{
|
||||
return typeof(BinaryType);
|
||||
return new Int32Type();
|
||||
}
|
||||
else if (type == "int64")
|
||||
{
|
||||
return new Int64Type();
|
||||
}
|
||||
else if (type == "binary")
|
||||
{
|
||||
return new BinaryType();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Invalid type: {type}");
|
||||
throw new Exception($"Invalid type: {type.GetType()}");
|
||||
}
|
||||
}
|
||||
|
||||
public static string GetNameFromColumn(Type type)
|
||||
public static ColumnType GetColumnFromType(Type type)
|
||||
{
|
||||
if (type == typeof(StringType))
|
||||
if (type == typeof(string))
|
||||
{
|
||||
return new StringType();
|
||||
}
|
||||
else if (type == typeof(bool))
|
||||
{
|
||||
return new BooleanType();
|
||||
}
|
||||
else if (type == typeof(float))
|
||||
{
|
||||
return new Float32Type();
|
||||
}
|
||||
else if (type == typeof(double))
|
||||
{
|
||||
return new Float64Type();
|
||||
}
|
||||
else if (type == typeof(UInt32))
|
||||
{
|
||||
return new UInt32Type();
|
||||
}
|
||||
else if (type == typeof(UInt64))
|
||||
{
|
||||
return new UInt64Type();
|
||||
}
|
||||
else if (type == typeof(Int32))
|
||||
{
|
||||
return new Int32Type();
|
||||
}
|
||||
else if (type == typeof(Int64))
|
||||
{
|
||||
return new Int64Type();
|
||||
}
|
||||
else if (type == typeof(byte[]))
|
||||
{
|
||||
return new BinaryType();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Invalid type: {type.GetType()}");
|
||||
}
|
||||
}
|
||||
|
||||
public static string GetNameFromColumn(ColumnType type)
|
||||
{
|
||||
if (type.GetType() == typeof(StringType))
|
||||
{
|
||||
return "string";
|
||||
}
|
||||
else if (type == typeof(BooleanType))
|
||||
else if (type.GetType() == typeof(BooleanType))
|
||||
{
|
||||
return "boolean";
|
||||
}
|
||||
else if (type == typeof(Float32Type))
|
||||
else if (type.GetType() == typeof(Float32Type))
|
||||
{
|
||||
return "float32";
|
||||
}
|
||||
else if (type == typeof(Float32LEType))
|
||||
else if (type.GetType() == typeof(Float32LEType))
|
||||
{
|
||||
return "float32-le";
|
||||
}
|
||||
else if (type == typeof(Float64Type))
|
||||
else if (type.GetType() == typeof(Float64Type))
|
||||
{
|
||||
return "float64";
|
||||
}
|
||||
else if (type == typeof(Float64LEType))
|
||||
else if (type.GetType() == typeof(Float64LEType))
|
||||
{
|
||||
return "float64-le";
|
||||
}
|
||||
else if (type == typeof(UInt32Type))
|
||||
else if (type.GetType() == typeof(UInt32Type))
|
||||
{
|
||||
return "uint32";
|
||||
}
|
||||
else if (type == typeof(UInt64Type))
|
||||
else if (type.GetType() == typeof(UInt64Type))
|
||||
{
|
||||
return "uint64";
|
||||
}
|
||||
else if (type == typeof(Int32Type))
|
||||
else if (type.GetType() == typeof(Int32Type))
|
||||
{
|
||||
return "int32";
|
||||
}
|
||||
else if (type == typeof(Int64Type))
|
||||
else if (type.GetType() == typeof(Int64Type))
|
||||
{
|
||||
return "int64";
|
||||
}
|
||||
else if (type == typeof(BinaryType))
|
||||
else if (type.GetType() == typeof(BinaryType))
|
||||
{
|
||||
return "binary";
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Invalid type: {type}");
|
||||
throw new Exception($"Invalid type: {type.GetType()}");
|
||||
}
|
||||
}
|
||||
|
||||
@ -1170,16 +1226,26 @@ public class SaneTsv
|
||||
|
||||
public static byte[] SerializeCommentedTsv<T>(IList<T> data, string fileComment) where T : CommentedTsvRecord
|
||||
{
|
||||
return SerializeTsv<T>(data, FormatType.COMMENTED_TSV);
|
||||
return SerializeTsv<T>(data, FormatType.COMMENTED_TSV, fileComment);
|
||||
}
|
||||
|
||||
protected static byte[] SerializeTsv<T>(IList<T> data, FormatType tsvFormat)
|
||||
protected static byte[] SerializeTsv<T>(IList<T> data, FormatType tsvFormat, string fileComment = null)
|
||||
{
|
||||
var bytes = new List<byte>();
|
||||
|
||||
var headerTypes = new List<Type>();
|
||||
var headerNames = new List<string>();
|
||||
var headerPropertyInfos = new List<PropertyInfo>();
|
||||
if (fileComment != null)
|
||||
{
|
||||
if (tsvFormat != FormatType.COMMENTED_TSV)
|
||||
{
|
||||
throw new Exception($"File comments are not valid for {tsvFormat}");
|
||||
}
|
||||
|
||||
bytes.AddRange(Encoding.UTF8.GetBytes("#" + fileComment.Replace("\n", "\n#") + "\n"));
|
||||
}
|
||||
|
||||
var columnTypes = new List<ColumnType>();
|
||||
var columnNames = new List<string>();
|
||||
var columnPropertyInfos = new List<PropertyInfo>();
|
||||
int columnCount = 0;
|
||||
|
||||
// Serialize header
|
||||
@ -1192,30 +1258,30 @@ public class SaneTsv
|
||||
}
|
||||
|
||||
string headerName = attribute.ColumnName ?? property.Name;
|
||||
headerNames.Add(headerName);
|
||||
Type headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType);
|
||||
if (tsvFormat == FormatType.SIMPLE_TSV && headerType != typeof(StringType))
|
||||
columnNames.Add(headerName);
|
||||
ColumnType headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType);
|
||||
if (tsvFormat == FormatType.SIMPLE_TSV && headerType.GetType() != typeof(StringType))
|
||||
{
|
||||
throw new Exception($"Serializing Simple TSV requires all columns be of type string, but column '{headerName}' has type '{headerType}'");
|
||||
}
|
||||
headerTypes.Add(headerType);
|
||||
headerPropertyInfos.Add(property);
|
||||
columnTypes.Add(headerType);
|
||||
columnPropertyInfos.Add(property);
|
||||
// TODO: Check that the property type and given column type are compatible
|
||||
columnCount++;
|
||||
}
|
||||
|
||||
// Serialize header
|
||||
for (int i = 0; i < headerNames.Count; i++)
|
||||
for (int i = 0; i < columnNames.Count; i++)
|
||||
{
|
||||
for (int j = i + 1; j < headerNames.Count; j++)
|
||||
for (int j = i + 1; j < columnNames.Count; j++)
|
||||
{
|
||||
if (headerNames[i] == headerNames[j])
|
||||
if (columnNames[i] == columnNames[j])
|
||||
{
|
||||
throw new Exception("Column names in header must be unique");
|
||||
}
|
||||
}
|
||||
|
||||
byte[] nameEncoded = Encoding.UTF8.GetBytes(headerNames[i]);
|
||||
byte[] nameEncoded = Encoding.UTF8.GetBytes(columnNames[i]);
|
||||
|
||||
for (int j = 0; j < nameEncoded.Length; j++)
|
||||
{
|
||||
@ -1250,15 +1316,15 @@ public class SaneTsv
|
||||
bytes.Add((byte)':');
|
||||
try
|
||||
{
|
||||
bytes.AddRange(Encoding.UTF8.GetBytes(GetNameFromColumn(headerTypes[i])));
|
||||
bytes.AddRange(Encoding.UTF8.GetBytes(GetNameFromColumn(columnTypes[i])));
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new Exception($"Invalid header type for column {i}", e);
|
||||
throw new Exception($"Invalid column type for column {i}", e);
|
||||
}
|
||||
}
|
||||
|
||||
if (i == headerNames.Count - 1)
|
||||
if (i == columnNames.Count - 1)
|
||||
{
|
||||
bytes.Add((byte)'\n');
|
||||
}
|
||||
@ -1269,19 +1335,19 @@ public class SaneTsv
|
||||
}
|
||||
|
||||
// Serialize data
|
||||
SerializeTsv<T>(data, bytes, headerPropertyInfos.ToArray(), headerTypes.ToArray(), tsvFormat, 0, data.Count);
|
||||
SerializeTsv<T>(data, bytes, columnPropertyInfos.ToArray(), columnTypes.ToArray(), tsvFormat, 0, data.Count);
|
||||
|
||||
return bytes.ToArray();
|
||||
}
|
||||
|
||||
protected static void SerializeTsv<T>(IList<T> data, List<byte> bytes, PropertyInfo[] headerPropertyInfos, Type[] headerTypes, FormatType tsvFormat, int startIndex, int endIndex)
|
||||
protected static void SerializeTsv<T>(IList<T> data, List<byte> bytes, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex)
|
||||
{
|
||||
// Serialize data
|
||||
for (int i = 0; i < data.Count; i++)
|
||||
{
|
||||
for (int j = 0; j < headerTypes.Length; j++)
|
||||
for (int j = 0; j < columnTypes.Length; j++)
|
||||
{
|
||||
object datum = headerPropertyInfos[j].GetValue(data[i]);
|
||||
object datum = columnPropertyInfos[j].GetValue(data[i]);
|
||||
|
||||
try
|
||||
{
|
||||
@ -1289,16 +1355,16 @@ public class SaneTsv
|
||||
// Some fields definitely don't need escaping, so we add them directly to bytes
|
||||
bool skipEscaping = false;
|
||||
|
||||
if (headerTypes[j] == typeof(StringType))
|
||||
if (columnTypes[j].GetType() == typeof(StringType))
|
||||
{
|
||||
fieldEncoded = Encoding.UTF8.GetBytes((string)datum);
|
||||
}
|
||||
else if (headerTypes[j] == typeof(BooleanType))
|
||||
else if (columnTypes[j].GetType() == typeof(BooleanType))
|
||||
{
|
||||
bytes.AddRange((bool)datum ? TrueEncoded : FalseEncoded);
|
||||
skipEscaping = true;
|
||||
}
|
||||
else if (headerTypes[j] == typeof(Float32Type))
|
||||
else if (columnTypes[j].GetType() == typeof(Float32Type))
|
||||
{
|
||||
if (datum is float f)
|
||||
{
|
||||
@ -1322,7 +1388,7 @@ public class SaneTsv
|
||||
}
|
||||
skipEscaping = true;
|
||||
}
|
||||
else if (headerTypes[j] == typeof(Float32LEType))
|
||||
else if (columnTypes[j].GetType() == typeof(Float32LEType))
|
||||
{
|
||||
if (LittleEndian)
|
||||
{
|
||||
@ -1338,7 +1404,7 @@ public class SaneTsv
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (headerTypes[j] == typeof(Float64Type))
|
||||
else if (columnTypes[j].GetType() == typeof(Float64Type))
|
||||
{
|
||||
if (datum is double d)
|
||||
{
|
||||
@ -1362,7 +1428,7 @@ public class SaneTsv
|
||||
}
|
||||
skipEscaping = true;
|
||||
}
|
||||
else if (headerTypes[j] == typeof(Float64LEType))
|
||||
else if (columnTypes[j].GetType() == typeof(Float64LEType))
|
||||
{
|
||||
if (LittleEndian)
|
||||
{
|
||||
@ -1378,33 +1444,33 @@ public class SaneTsv
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (headerTypes[j] == typeof(UInt32Type))
|
||||
else if (columnTypes[j].GetType() == typeof(UInt32Type))
|
||||
{
|
||||
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)datum).ToString()));
|
||||
skipEscaping = true;
|
||||
}
|
||||
else if (headerTypes[j] == typeof(UInt64Type))
|
||||
else if (columnTypes[j].GetType() == typeof(UInt64Type))
|
||||
{
|
||||
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)datum).ToString()));
|
||||
skipEscaping = true;
|
||||
}
|
||||
else if (headerTypes[j] == typeof(Int32Type))
|
||||
else if (columnTypes[j].GetType() == typeof(Int32Type))
|
||||
{
|
||||
bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)datum).ToString()));
|
||||
skipEscaping = true;
|
||||
}
|
||||
else if (headerTypes[j] == typeof(Int64Type))
|
||||
else if (columnTypes[j].GetType() == typeof(Int64Type))
|
||||
{
|
||||
bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)datum).ToString()));
|
||||
skipEscaping = true;
|
||||
}
|
||||
else if (headerTypes[j] == typeof(BinaryType))
|
||||
else if (columnTypes[j].GetType() == typeof(BinaryType))
|
||||
{
|
||||
fieldEncoded = (byte[])datum;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Unexpected column type {headerTypes[j]} for column {j}");
|
||||
throw new Exception($"Unexpected column type {columnTypes[j]} for column {j}");
|
||||
}
|
||||
|
||||
if (!skipEscaping)
|
||||
@ -1438,7 +1504,7 @@ public class SaneTsv
|
||||
}
|
||||
}
|
||||
|
||||
if (j < headerTypes.Length - 1)
|
||||
if (j < columnTypes.Length - 1)
|
||||
{
|
||||
bytes.Add((byte)'\t');
|
||||
}
|
||||
@ -1449,7 +1515,7 @@ public class SaneTsv
|
||||
}
|
||||
catch (InvalidCastException e)
|
||||
{
|
||||
throw new Exception($"Record {i}, field {j} expected type compatible with {GetNameFromColumn(headerTypes[j])}", e);
|
||||
throw new Exception($"Record {i}, field {j} expected type compatible with {GetNameFromColumn(columnTypes[j])}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1508,15 +1574,15 @@ public class SaneTsv
|
||||
public class TsvColumnAttribute : Attribute
|
||||
{
|
||||
public string ColumnName { get; }
|
||||
public virtual Type ColumnType { get; }
|
||||
public virtual ColumnType ColumnType { get; }
|
||||
|
||||
public TsvColumnAttribute()
|
||||
{
|
||||
ColumnType = typeof(StringType);
|
||||
ColumnType = new StringType();
|
||||
}
|
||||
public TsvColumnAttribute(string columnName)
|
||||
{
|
||||
ColumnType = typeof(StringType);
|
||||
ColumnType = new StringType();
|
||||
ColumnName = columnName;
|
||||
}
|
||||
}
|
||||
@ -1524,27 +1590,19 @@ public class SaneTsv
|
||||
// TODO: Add column ordering
|
||||
public class TypedTsvColumnAttribute : TsvColumnAttribute
|
||||
{
|
||||
public override Type ColumnType { get; }
|
||||
public override ColumnType ColumnType { get; }
|
||||
|
||||
public TypedTsvColumnAttribute() { }
|
||||
|
||||
public TypedTsvColumnAttribute(string columnName) : base(columnName) { }
|
||||
|
||||
public TypedTsvColumnAttribute(string columnName, Type columnType) : base(columnName)
|
||||
public TypedTsvColumnAttribute(string columnName, string columnType) : base(columnName)
|
||||
{
|
||||
if (columnType.BaseType != typeof(ColumnType))
|
||||
{
|
||||
throw new Exception("Column type must inherit from SaneTsv.ColumnType");
|
||||
}
|
||||
ColumnType = columnType;
|
||||
ColumnType = GetColumnFromString(columnType);
|
||||
}
|
||||
|
||||
public TypedTsvColumnAttribute(Type columnType)
|
||||
public TypedTsvColumnAttribute(ColumnType columnType)
|
||||
{
|
||||
if (columnType.BaseType != typeof(ColumnType))
|
||||
{
|
||||
throw new Exception("Column type must inherit from SaneTsv.ColumnType");
|
||||
}
|
||||
ColumnType = columnType;
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
using NathanMcRae;
|
||||
using System.Reflection;
|
||||
using System.Text;
|
||||
|
||||
internal class Program : SaneTsv
|
||||
@ -80,6 +81,18 @@ internal class Program : SaneTsv
|
||||
public string Column3 { get; set; }
|
||||
}
|
||||
|
||||
public class BoolTestRecord3 : SaneTsv.CommentedTsvRecord
|
||||
{
|
||||
[SaneTsv.TsvColumn("column1")]
|
||||
public string Column1 { get; set; }
|
||||
|
||||
[SaneTsv.TsvColumn]
|
||||
public string column2 { get; set; }
|
||||
|
||||
[SaneTsv.TsvColumn("columnthree\nyep")]
|
||||
public string Column3 { get; set; }
|
||||
}
|
||||
|
||||
public class SerdeTestRecord : SaneTsv.CommentedTsvRecord
|
||||
{
|
||||
[SaneTsv.TypedTsvColumn("column1")]
|
||||
@ -348,6 +361,428 @@ internal class Program : SaneTsv
|
||||
Console.WriteLine($"Unspecced parse time: {unspeccedParseTime}");
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "With and without file comment";
|
||||
|
||||
string testString1 = "#This is a file comment\n" +
|
||||
"#One more file comment line\n" +
|
||||
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\n#This is a comment" +
|
||||
"\n#Another comment line" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
string testString2 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\n#This is a comment" +
|
||||
"\n#Another comment line" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
CommentedTsv<BoolTestRecord2> parsed2 = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString2));
|
||||
|
||||
if (parsed.FileComment == "This is a file comment\nOne more file comment line" && parsed2.FileComment == null)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName}");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "With and without types";
|
||||
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\twoo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\twoo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 2A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 2A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 2B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 2B");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 2C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 2C");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "With and without line comment";
|
||||
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\n#This is a comment" +
|
||||
"\n#Another comment line" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
try
|
||||
{
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "End of file comment";
|
||||
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n# Hey, you're not supposed to have comments at the end of the tsv!";
|
||||
|
||||
try
|
||||
{
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n# Hey, you're not supposed to have comments at the end of the tsv!";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Partial parsing";
|
||||
|
||||
string line1 = "column1\tcolumn2\tcolumnthree\\nyep";
|
||||
string line2 = "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee";
|
||||
string line3 = "\nFALSE\tnother\tno\\ther";
|
||||
|
||||
byte[] inputBuffer = Encoding.UTF8.GetBytes(line1 + line2 + line3);
|
||||
|
||||
var headerTypes = new List<Type>();
|
||||
var headerNames = new List<string>();
|
||||
var headerPropertyInfos = new List<PropertyInfo>();
|
||||
int columnCount = 0;
|
||||
|
||||
foreach (PropertyInfo property in typeof(BoolTestRecord3).GetProperties())
|
||||
{
|
||||
TsvColumnAttribute attribute = (TsvColumnAttribute)Attribute.GetCustomAttribute(property, typeof(TsvColumnAttribute));
|
||||
if (attribute == null)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
headerNames.Add(attribute.ColumnName ?? property.Name);
|
||||
headerTypes.Add(attribute.ColumnType ?? GetColumnFromType(property.PropertyType));
|
||||
headerPropertyInfos.Add(property);
|
||||
// TODO: Check that the property type and given column type are compatible
|
||||
columnCount++;
|
||||
}
|
||||
|
||||
BoolTestRecord3[] records = SaneTsv.Parse<BoolTestRecord3>(inputBuffer,
|
||||
FormatType.SIMPLE_TSV,
|
||||
headerPropertyInfos.ToArray(),
|
||||
headerTypes.ToArray(),
|
||||
line1.Length + line2.Length + 1,
|
||||
inputBuffer.Length);
|
||||
|
||||
if (records.Length == 0 )
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 1");
|
||||
}
|
||||
|
||||
BoolTestRecord3[] records2 = SaneTsv.Parse<BoolTestRecord3>(inputBuffer,
|
||||
FormatType.SIMPLE_TSV,
|
||||
headerPropertyInfos.ToArray(),
|
||||
headerTypes.ToArray(),
|
||||
line1.Length,
|
||||
line1.Length + 3);
|
||||
|
||||
if (records2[0].Column3 == "valuetrhee")
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 2");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 2");
|
||||
}
|
||||
|
||||
string[][] data = SaneTsv.ParseSimpleTsv(inputBuffer, 3, line1.Length + line2.Length + 1, inputBuffer.Length);
|
||||
|
||||
if (data[0][1] == "nother")
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 3");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 3");
|
||||
}
|
||||
|
||||
string[][] data2 = SaneTsv.ParseSimpleTsv(inputBuffer, 3, line1.Length, line1.Length + 3);
|
||||
|
||||
if (data2.Length == 0)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 4");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 4");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "End of file \\n";
|
||||
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n";
|
||||
|
||||
try
|
||||
{
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "End of file partial record";
|
||||
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\nTRUE\t";
|
||||
|
||||
try
|
||||
{
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\nTRUE\t";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "File comment serde";
|
||||
|
||||
string testString1 = "#this is a file comment" +
|
||||
"\n# and one more line since you're such a good customer" +
|
||||
"\ncolumn1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
|
||||
string reserialized = Encoding.UTF8.GetString(SaneTsv.SerializeCommentedTsv<BoolTestRecord2>(parsed.Records, parsed.FileComment));
|
||||
|
||||
if (reserialized == testString1)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.WriteLine("Done with tests");
|
||||
}
|
||||
}
|
||||
|
41
readme.md
41
readme.md
@ -3,14 +3,43 @@
|
||||
## Roadmap
|
||||
|
||||
- Improve error reporting by including line/column information in exceptions
|
||||
- Come up with a static-typing interface
|
||||
- Use this to get line numbers for parallel parsing implementations
|
||||
- [x] Come up with a static-typing interface
|
||||
|
||||
Something that doesn't require an array of objects
|
||||
|
||||
Use a class with SaveTsv attributes
|
||||
|
||||
- Check numeric formatting matches spec
|
||||
- Do parallel parsing / serializing implementation
|
||||
- Next task: Refactor parsing so that it will start and end at arbitrary indices and return an array of SaneTsvRecords. The refactor should ignore the current record (unless at the start of the buffer) and continue parsing the record the end index is in.
|
||||
- More optimization and making parsing modular:
|
||||
- [x] Maybe add a binary representation for f32/f64. It should specify that it is Little-endian (since we have to pick one). That way we can guarantee bit-compatibility between implementations where an application might require that.
|
||||
- [x] Add Column name/type specification to API
|
||||
- So you can tell it what columns to expect
|
||||
- [ ] Lax/strict versions
|
||||
|
||||
See the attributes thing above
|
||||
- Generate test cases
|
||||
- [x] File comment / no file comment
|
||||
- [x] header types / no header types
|
||||
- [x] Line comments / no line comments
|
||||
- [x] end of file comment
|
||||
- [x] Test with the start index of parallel methods in last record
|
||||
- end index in first record
|
||||
- [x] Extra \n at end of file
|
||||
- [x] Wrong number of fields
|
||||
- Wrong number of fields at end of file
|
||||
|
||||
- [x] Do parallel parsing / serializing implementation
|
||||
- [x] Next task: Refactor parsing so that it will start and end at arbitrary indices and return an array of SaneTsvRecords. The refactor should ignore the current record (unless at the start of the buffer) and continue parsing the record the end index is in.
|
||||
- ~~More optimization and making parsing modular:~~
|
||||
- Have callbacks for header parsing and field parsing
|
||||
- That way other formats (like ExtraTSV) don't have to iterate through the entire set of data again.
|
||||
- Finish ExtraTSV implementation
|
||||
- Do zig implementation
|
||||
- [x] Make untyped Simple TSV (De)serialization
|
||||
- [x] ~~Finish~~ Minimal ExtraTSV implementation
|
||||
- [ ] Do zig implementation
|
||||
- Make a c interface from that
|
||||
- Make a commandline interface
|
||||
- Make a viewer / editor
|
||||
- Streaming interface
|
||||
So you can start processing your data while it finishes parsing?
|
||||
- [ ] Decoding a binary stream with a \0 in it via UTF-8 doesn't seem to cause any issues. I thought that valid UTF-8 wouldn't have a \0?
|
||||
- [ ] Instead of exceptions when parsing, we should parse as much as possible and reflect parsing errors in the returned data structure
|
||||
|
Reference in New Issue
Block a user