Compare commits
3 Commits
0fd092685d
...
master
Author | SHA1 | Date | |
---|---|---|---|
d9ef2a4bb6 | |||
a80206767e | |||
b8ae3ce65d |
@ -84,7 +84,7 @@ public class SaneTsv
|
|||||||
}
|
}
|
||||||
parsed.Records = new List<T>();
|
parsed.Records = new List<T>();
|
||||||
|
|
||||||
var columnTypes = new List<Type>();
|
var columnTypes = new List<ColumnType>();
|
||||||
var columnNames = new List<string>();
|
var columnNames = new List<string>();
|
||||||
var columnPropertyInfos = new List<PropertyInfo>();
|
var columnPropertyInfos = new List<PropertyInfo>();
|
||||||
int columnCount = 0;
|
int columnCount = 0;
|
||||||
@ -190,46 +190,46 @@ public class SaneTsv
|
|||||||
columnName = columnString;
|
columnName = columnString;
|
||||||
}
|
}
|
||||||
|
|
||||||
Type type;
|
ColumnType type;
|
||||||
|
|
||||||
switch (columnTypeString)
|
switch (columnTypeString)
|
||||||
{
|
{
|
||||||
case "":
|
case "":
|
||||||
numTypesBlank++;
|
numTypesBlank++;
|
||||||
type = typeof(StringType);
|
type = new StringType();
|
||||||
break;
|
break;
|
||||||
case "string":
|
case "string":
|
||||||
type = typeof(StringType);
|
type = new StringType();
|
||||||
break;
|
break;
|
||||||
case "boolean":
|
case "boolean":
|
||||||
type = typeof(BooleanType);
|
type = new BooleanType();
|
||||||
break;
|
break;
|
||||||
case "float32":
|
case "float32":
|
||||||
type = typeof(Float32Type);
|
type = new Float32Type();
|
||||||
break;
|
break;
|
||||||
case "float32-le":
|
case "float32-le":
|
||||||
type = typeof(Float32LEType);
|
type = new Float32LEType();
|
||||||
break;
|
break;
|
||||||
case "float64":
|
case "float64":
|
||||||
type = typeof(Float64Type);
|
type = new Float64Type();
|
||||||
break;
|
break;
|
||||||
case "float64-le":
|
case "float64-le":
|
||||||
type = typeof(Float64LEType);
|
type = new Float64LEType();
|
||||||
break;
|
break;
|
||||||
case "uint32":
|
case "uint32":
|
||||||
type = typeof(UInt32Type);
|
type = new UInt32Type();
|
||||||
break;
|
break;
|
||||||
case "uint64":
|
case "uint64":
|
||||||
type = typeof(UInt64Type);
|
type = new UInt64Type();
|
||||||
break;
|
break;
|
||||||
case "int32":
|
case "int32":
|
||||||
type = typeof(Int32Type);
|
type = new Int32Type();
|
||||||
break;
|
break;
|
||||||
case "int64":
|
case "int64":
|
||||||
type = typeof(Int64Type);
|
type = new Int64Type();
|
||||||
break;
|
break;
|
||||||
case "binary":
|
case "binary":
|
||||||
type = typeof(BinaryType);
|
type = new BinaryType();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Exception($"Invalid type '{columnTypeString}' for column {j}");
|
throw new Exception($"Invalid type '{columnTypeString}' for column {j}");
|
||||||
@ -242,7 +242,7 @@ public class SaneTsv
|
|||||||
throw new Exception($"Column {j} has name {columnName}, but expected {columnNames[j]}");
|
throw new Exception($"Column {j} has name {columnName}, but expected {columnNames[j]}");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (columnTypes[j] != type)
|
if (columnTypes[j].GetType() != type.GetType())
|
||||||
{
|
{
|
||||||
throw new Exception($"Column {j} has type {type}, but expected {columnTypes[j]}");
|
throw new Exception($"Column {j} has type {type}, but expected {columnTypes[j]}");
|
||||||
}
|
}
|
||||||
@ -348,7 +348,7 @@ public class SaneTsv
|
|||||||
// startIndex is in we'd have to go back to the start of the record's comment, and to know
|
// startIndex is in we'd have to go back to the start of the record's comment, and to know
|
||||||
// exactly where that comment started we'd have to go back to the start of the record before that
|
// exactly where that comment started we'd have to go back to the start of the record before that
|
||||||
// (not including that other record's comment).
|
// (not including that other record's comment).
|
||||||
protected static T[] Parse<T>(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, Type[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new()
|
protected static T[] Parse<T>(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new()
|
||||||
{
|
{
|
||||||
var fieldBytes = new List<byte>();
|
var fieldBytes = new List<byte>();
|
||||||
var fields = new List<byte[]>();
|
var fields = new List<byte[]>();
|
||||||
@ -509,12 +509,12 @@ public class SaneTsv
|
|||||||
return parsed.ToArray();
|
return parsed.ToArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static T ParseCurrentCommentedRecord<T>(Type[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : CommentedTsvRecord, new()
|
protected static T ParseCurrentCommentedRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : CommentedTsvRecord, new()
|
||||||
{
|
{
|
||||||
return (T)ParseCurrentRecord<T>(columnTypes, properties, fields, comment, line);
|
return (T)ParseCurrentRecord<T>(columnTypes, properties, fields, comment, line);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static T ParseCurrentRecord<T>(Type[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : TsvRecord, new()
|
protected static T ParseCurrentRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : TsvRecord, new()
|
||||||
{
|
{
|
||||||
T record = new T();
|
T record = new T();
|
||||||
|
|
||||||
@ -532,7 +532,7 @@ public class SaneTsv
|
|||||||
for (int j = 0; j < fields.Count; j++)
|
for (int j = 0; j < fields.Count; j++)
|
||||||
{
|
{
|
||||||
// All other types require the content to be UTF-8. Binary fields can ignore that.
|
// All other types require the content to be UTF-8. Binary fields can ignore that.
|
||||||
if (columnTypes[j] == typeof(BinaryType))
|
if (columnTypes[j].GetType() == typeof(BinaryType))
|
||||||
{
|
{
|
||||||
// TODO: Use faster method for property setting
|
// TODO: Use faster method for property setting
|
||||||
// e.g. https://blog.marcgravell.com/2012/01/playing-with-your-member.html
|
// e.g. https://blog.marcgravell.com/2012/01/playing-with-your-member.html
|
||||||
@ -541,7 +541,7 @@ public class SaneTsv
|
|||||||
properties[j].SetValue(record, fields[j]);
|
properties[j].SetValue(record, fields[j]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Float32LEType))
|
else if (columnTypes[j].GetType() == typeof(Float32LEType))
|
||||||
{
|
{
|
||||||
byte[] floatBytes;
|
byte[] floatBytes;
|
||||||
if (!LittleEndian)
|
if (!LittleEndian)
|
||||||
@ -560,7 +560,7 @@ public class SaneTsv
|
|||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Float64LEType))
|
else if (columnTypes[j].GetType() == typeof(Float64LEType))
|
||||||
{
|
{
|
||||||
byte[] floatBytes;
|
byte[] floatBytes;
|
||||||
if (!LittleEndian)
|
if (!LittleEndian)
|
||||||
@ -592,11 +592,11 @@ public class SaneTsv
|
|||||||
|
|
||||||
// TODO: Add checking for numeric types format
|
// TODO: Add checking for numeric types format
|
||||||
|
|
||||||
if (columnTypes[j] == typeof(StringType))
|
if (columnTypes[j].GetType() == typeof(StringType))
|
||||||
{
|
{
|
||||||
properties[j].SetValue(record, fieldString);
|
properties[j].SetValue(record, fieldString);
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(BooleanType))
|
else if (columnTypes[j].GetType() == typeof(BooleanType))
|
||||||
{
|
{
|
||||||
bool parsedBool;
|
bool parsedBool;
|
||||||
if (fieldString == "TRUE")
|
if (fieldString == "TRUE")
|
||||||
@ -614,7 +614,7 @@ public class SaneTsv
|
|||||||
|
|
||||||
properties[j].SetValue(record, parsedBool);
|
properties[j].SetValue(record, parsedBool);
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Float32Type))
|
else if (columnTypes[j].GetType() == typeof(Float32Type))
|
||||||
{
|
{
|
||||||
float parsedFloat;
|
float parsedFloat;
|
||||||
if (!float.TryParse(fieldString, out parsedFloat))
|
if (!float.TryParse(fieldString, out parsedFloat))
|
||||||
@ -635,7 +635,7 @@ public class SaneTsv
|
|||||||
|
|
||||||
properties[j].SetValue(record, parsedFloat);
|
properties[j].SetValue(record, parsedFloat);
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Float64Type))
|
else if (columnTypes[j].GetType() == typeof(Float64Type))
|
||||||
{
|
{
|
||||||
double parsedDouble;
|
double parsedDouble;
|
||||||
if (!double.TryParse(fieldString, out parsedDouble))
|
if (!double.TryParse(fieldString, out parsedDouble))
|
||||||
@ -656,7 +656,7 @@ public class SaneTsv
|
|||||||
|
|
||||||
properties[j].SetValue(record, parsedDouble);
|
properties[j].SetValue(record, parsedDouble);
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(UInt32Type))
|
else if (columnTypes[j].GetType() == typeof(UInt32Type))
|
||||||
{
|
{
|
||||||
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
|
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
|
||||||
{
|
{
|
||||||
@ -665,7 +665,7 @@ public class SaneTsv
|
|||||||
|
|
||||||
properties[j].SetValue(record, parsedUInt32);
|
properties[j].SetValue(record, parsedUInt32);
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(UInt64Type))
|
else if (columnTypes[j].GetType() == typeof(UInt64Type))
|
||||||
{
|
{
|
||||||
if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64))
|
if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64))
|
||||||
{
|
{
|
||||||
@ -674,7 +674,7 @@ public class SaneTsv
|
|||||||
|
|
||||||
properties[j].SetValue(record, parsedUInt64);
|
properties[j].SetValue(record, parsedUInt64);
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Int32Type))
|
else if (columnTypes[j].GetType() == typeof(Int32Type))
|
||||||
{
|
{
|
||||||
if (!Int32.TryParse(fieldString, out Int32 parsedInt32))
|
if (!Int32.TryParse(fieldString, out Int32 parsedInt32))
|
||||||
{
|
{
|
||||||
@ -683,7 +683,7 @@ public class SaneTsv
|
|||||||
|
|
||||||
properties[j].SetValue(record, parsedInt32);
|
properties[j].SetValue(record, parsedInt32);
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Int64Type))
|
else if (columnTypes[j].GetType() == typeof(Int64Type))
|
||||||
{
|
{
|
||||||
if (!Int64.TryParse(fieldString, out Int64 parsedInt64))
|
if (!Int64.TryParse(fieldString, out Int64 parsedInt64))
|
||||||
{
|
{
|
||||||
@ -1066,99 +1066,151 @@ public class SaneTsv
|
|||||||
return records.ToArray();
|
return records.ToArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Type GetColumnFromType(Type type)
|
public static ColumnType GetColumnFromString(string type)
|
||||||
{
|
{
|
||||||
if (type == typeof(string))
|
if (type == "string")
|
||||||
{
|
{
|
||||||
return typeof(StringType);
|
return new StringType();
|
||||||
}
|
}
|
||||||
else if (type == typeof(bool))
|
else if (type == "boolean")
|
||||||
{
|
{
|
||||||
return typeof(BooleanType);
|
return new BooleanType();
|
||||||
}
|
}
|
||||||
else if (type == typeof(float))
|
else if (type == "float32")
|
||||||
{
|
{
|
||||||
return typeof(Float32Type);
|
return new Float32Type();
|
||||||
}
|
}
|
||||||
else if (type == typeof(double))
|
else if (type == "float32-le")
|
||||||
{
|
{
|
||||||
return typeof(Float64Type);
|
return new Float32LEType();
|
||||||
}
|
}
|
||||||
else if (type == typeof(UInt32))
|
else if (type == "float64")
|
||||||
{
|
{
|
||||||
return typeof(UInt32Type);
|
return new Float64Type();
|
||||||
}
|
}
|
||||||
else if (type == typeof(UInt64))
|
else if (type == "float64-le")
|
||||||
{
|
{
|
||||||
return typeof(UInt64Type);
|
return new Float64LEType();
|
||||||
}
|
}
|
||||||
else if (type == typeof(Int32))
|
else if (type == "uint32")
|
||||||
{
|
{
|
||||||
return typeof(Int32Type);
|
return new UInt32Type();
|
||||||
}
|
}
|
||||||
else if (type == typeof(Int64))
|
else if (type == "uint64")
|
||||||
{
|
{
|
||||||
return typeof(Int64Type);
|
return new UInt64Type();
|
||||||
}
|
}
|
||||||
else if (type == typeof(byte[]))
|
else if (type == "int32")
|
||||||
{
|
{
|
||||||
return typeof(BinaryType);
|
return new Int32Type();
|
||||||
|
}
|
||||||
|
else if (type == "int64")
|
||||||
|
{
|
||||||
|
return new Int64Type();
|
||||||
|
}
|
||||||
|
else if (type == "binary")
|
||||||
|
{
|
||||||
|
return new BinaryType();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw new Exception($"Invalid type: {type}");
|
throw new Exception($"Invalid type: {type.GetType()}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static string GetNameFromColumn(Type type)
|
public static ColumnType GetColumnFromType(Type type)
|
||||||
{
|
{
|
||||||
if (type == typeof(StringType))
|
if (type == typeof(string))
|
||||||
|
{
|
||||||
|
return new StringType();
|
||||||
|
}
|
||||||
|
else if (type == typeof(bool))
|
||||||
|
{
|
||||||
|
return new BooleanType();
|
||||||
|
}
|
||||||
|
else if (type == typeof(float))
|
||||||
|
{
|
||||||
|
return new Float32Type();
|
||||||
|
}
|
||||||
|
else if (type == typeof(double))
|
||||||
|
{
|
||||||
|
return new Float64Type();
|
||||||
|
}
|
||||||
|
else if (type == typeof(UInt32))
|
||||||
|
{
|
||||||
|
return new UInt32Type();
|
||||||
|
}
|
||||||
|
else if (type == typeof(UInt64))
|
||||||
|
{
|
||||||
|
return new UInt64Type();
|
||||||
|
}
|
||||||
|
else if (type == typeof(Int32))
|
||||||
|
{
|
||||||
|
return new Int32Type();
|
||||||
|
}
|
||||||
|
else if (type == typeof(Int64))
|
||||||
|
{
|
||||||
|
return new Int64Type();
|
||||||
|
}
|
||||||
|
else if (type == typeof(byte[]))
|
||||||
|
{
|
||||||
|
return new BinaryType();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new Exception($"Invalid type: {type.GetType()}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string GetNameFromColumn(ColumnType type)
|
||||||
|
{
|
||||||
|
if (type.GetType() == typeof(StringType))
|
||||||
{
|
{
|
||||||
return "string";
|
return "string";
|
||||||
}
|
}
|
||||||
else if (type == typeof(BooleanType))
|
else if (type.GetType() == typeof(BooleanType))
|
||||||
{
|
{
|
||||||
return "boolean";
|
return "boolean";
|
||||||
}
|
}
|
||||||
else if (type == typeof(Float32Type))
|
else if (type.GetType() == typeof(Float32Type))
|
||||||
{
|
{
|
||||||
return "float32";
|
return "float32";
|
||||||
}
|
}
|
||||||
else if (type == typeof(Float32LEType))
|
else if (type.GetType() == typeof(Float32LEType))
|
||||||
{
|
{
|
||||||
return "float32-le";
|
return "float32-le";
|
||||||
}
|
}
|
||||||
else if (type == typeof(Float64Type))
|
else if (type.GetType() == typeof(Float64Type))
|
||||||
{
|
{
|
||||||
return "float64";
|
return "float64";
|
||||||
}
|
}
|
||||||
else if (type == typeof(Float64LEType))
|
else if (type.GetType() == typeof(Float64LEType))
|
||||||
{
|
{
|
||||||
return "float64-le";
|
return "float64-le";
|
||||||
}
|
}
|
||||||
else if (type == typeof(UInt32Type))
|
else if (type.GetType() == typeof(UInt32Type))
|
||||||
{
|
{
|
||||||
return "uint32";
|
return "uint32";
|
||||||
}
|
}
|
||||||
else if (type == typeof(UInt64Type))
|
else if (type.GetType() == typeof(UInt64Type))
|
||||||
{
|
{
|
||||||
return "uint64";
|
return "uint64";
|
||||||
}
|
}
|
||||||
else if (type == typeof(Int32Type))
|
else if (type.GetType() == typeof(Int32Type))
|
||||||
{
|
{
|
||||||
return "int32";
|
return "int32";
|
||||||
}
|
}
|
||||||
else if (type == typeof(Int64Type))
|
else if (type.GetType() == typeof(Int64Type))
|
||||||
{
|
{
|
||||||
return "int64";
|
return "int64";
|
||||||
}
|
}
|
||||||
else if (type == typeof(BinaryType))
|
else if (type.GetType() == typeof(BinaryType))
|
||||||
{
|
{
|
||||||
return "binary";
|
return "binary";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw new Exception($"Invalid type: {type}");
|
throw new Exception($"Invalid type: {type.GetType()}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1174,14 +1226,24 @@ public class SaneTsv
|
|||||||
|
|
||||||
public static byte[] SerializeCommentedTsv<T>(IList<T> data, string fileComment) where T : CommentedTsvRecord
|
public static byte[] SerializeCommentedTsv<T>(IList<T> data, string fileComment) where T : CommentedTsvRecord
|
||||||
{
|
{
|
||||||
return SerializeTsv<T>(data, FormatType.COMMENTED_TSV);
|
return SerializeTsv<T>(data, FormatType.COMMENTED_TSV, fileComment);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static byte[] SerializeTsv<T>(IList<T> data, FormatType tsvFormat)
|
protected static byte[] SerializeTsv<T>(IList<T> data, FormatType tsvFormat, string fileComment = null)
|
||||||
{
|
{
|
||||||
var bytes = new List<byte>();
|
var bytes = new List<byte>();
|
||||||
|
|
||||||
var columnTypes = new List<Type>();
|
if (fileComment != null)
|
||||||
|
{
|
||||||
|
if (tsvFormat != FormatType.COMMENTED_TSV)
|
||||||
|
{
|
||||||
|
throw new Exception($"File comments are not valid for {tsvFormat}");
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes.AddRange(Encoding.UTF8.GetBytes("#" + fileComment.Replace("\n", "\n#") + "\n"));
|
||||||
|
}
|
||||||
|
|
||||||
|
var columnTypes = new List<ColumnType>();
|
||||||
var columnNames = new List<string>();
|
var columnNames = new List<string>();
|
||||||
var columnPropertyInfos = new List<PropertyInfo>();
|
var columnPropertyInfos = new List<PropertyInfo>();
|
||||||
int columnCount = 0;
|
int columnCount = 0;
|
||||||
@ -1197,8 +1259,8 @@ public class SaneTsv
|
|||||||
|
|
||||||
string headerName = attribute.ColumnName ?? property.Name;
|
string headerName = attribute.ColumnName ?? property.Name;
|
||||||
columnNames.Add(headerName);
|
columnNames.Add(headerName);
|
||||||
Type headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType);
|
ColumnType headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType);
|
||||||
if (tsvFormat == FormatType.SIMPLE_TSV && headerType != typeof(StringType))
|
if (tsvFormat == FormatType.SIMPLE_TSV && headerType.GetType() != typeof(StringType))
|
||||||
{
|
{
|
||||||
throw new Exception($"Serializing Simple TSV requires all columns be of type string, but column '{headerName}' has type '{headerType}'");
|
throw new Exception($"Serializing Simple TSV requires all columns be of type string, but column '{headerName}' has type '{headerType}'");
|
||||||
}
|
}
|
||||||
@ -1278,7 +1340,7 @@ public class SaneTsv
|
|||||||
return bytes.ToArray();
|
return bytes.ToArray();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static void SerializeTsv<T>(IList<T> data, List<byte> bytes, PropertyInfo[] columnPropertyInfos, Type[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex)
|
protected static void SerializeTsv<T>(IList<T> data, List<byte> bytes, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex)
|
||||||
{
|
{
|
||||||
// Serialize data
|
// Serialize data
|
||||||
for (int i = 0; i < data.Count; i++)
|
for (int i = 0; i < data.Count; i++)
|
||||||
@ -1293,16 +1355,16 @@ public class SaneTsv
|
|||||||
// Some fields definitely don't need escaping, so we add them directly to bytes
|
// Some fields definitely don't need escaping, so we add them directly to bytes
|
||||||
bool skipEscaping = false;
|
bool skipEscaping = false;
|
||||||
|
|
||||||
if (columnTypes[j] == typeof(StringType))
|
if (columnTypes[j].GetType() == typeof(StringType))
|
||||||
{
|
{
|
||||||
fieldEncoded = Encoding.UTF8.GetBytes((string)datum);
|
fieldEncoded = Encoding.UTF8.GetBytes((string)datum);
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(BooleanType))
|
else if (columnTypes[j].GetType() == typeof(BooleanType))
|
||||||
{
|
{
|
||||||
bytes.AddRange((bool)datum ? TrueEncoded : FalseEncoded);
|
bytes.AddRange((bool)datum ? TrueEncoded : FalseEncoded);
|
||||||
skipEscaping = true;
|
skipEscaping = true;
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Float32Type))
|
else if (columnTypes[j].GetType() == typeof(Float32Type))
|
||||||
{
|
{
|
||||||
if (datum is float f)
|
if (datum is float f)
|
||||||
{
|
{
|
||||||
@ -1326,7 +1388,7 @@ public class SaneTsv
|
|||||||
}
|
}
|
||||||
skipEscaping = true;
|
skipEscaping = true;
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Float32LEType))
|
else if (columnTypes[j].GetType() == typeof(Float32LEType))
|
||||||
{
|
{
|
||||||
if (LittleEndian)
|
if (LittleEndian)
|
||||||
{
|
{
|
||||||
@ -1342,7 +1404,7 @@ public class SaneTsv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Float64Type))
|
else if (columnTypes[j].GetType() == typeof(Float64Type))
|
||||||
{
|
{
|
||||||
if (datum is double d)
|
if (datum is double d)
|
||||||
{
|
{
|
||||||
@ -1366,7 +1428,7 @@ public class SaneTsv
|
|||||||
}
|
}
|
||||||
skipEscaping = true;
|
skipEscaping = true;
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Float64LEType))
|
else if (columnTypes[j].GetType() == typeof(Float64LEType))
|
||||||
{
|
{
|
||||||
if (LittleEndian)
|
if (LittleEndian)
|
||||||
{
|
{
|
||||||
@ -1382,27 +1444,27 @@ public class SaneTsv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(UInt32Type))
|
else if (columnTypes[j].GetType() == typeof(UInt32Type))
|
||||||
{
|
{
|
||||||
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)datum).ToString()));
|
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)datum).ToString()));
|
||||||
skipEscaping = true;
|
skipEscaping = true;
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(UInt64Type))
|
else if (columnTypes[j].GetType() == typeof(UInt64Type))
|
||||||
{
|
{
|
||||||
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)datum).ToString()));
|
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)datum).ToString()));
|
||||||
skipEscaping = true;
|
skipEscaping = true;
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Int32Type))
|
else if (columnTypes[j].GetType() == typeof(Int32Type))
|
||||||
{
|
{
|
||||||
bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)datum).ToString()));
|
bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)datum).ToString()));
|
||||||
skipEscaping = true;
|
skipEscaping = true;
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(Int64Type))
|
else if (columnTypes[j].GetType() == typeof(Int64Type))
|
||||||
{
|
{
|
||||||
bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)datum).ToString()));
|
bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)datum).ToString()));
|
||||||
skipEscaping = true;
|
skipEscaping = true;
|
||||||
}
|
}
|
||||||
else if (columnTypes[j] == typeof(BinaryType))
|
else if (columnTypes[j].GetType() == typeof(BinaryType))
|
||||||
{
|
{
|
||||||
fieldEncoded = (byte[])datum;
|
fieldEncoded = (byte[])datum;
|
||||||
}
|
}
|
||||||
@ -1512,15 +1574,15 @@ public class SaneTsv
|
|||||||
public class TsvColumnAttribute : Attribute
|
public class TsvColumnAttribute : Attribute
|
||||||
{
|
{
|
||||||
public string ColumnName { get; }
|
public string ColumnName { get; }
|
||||||
public virtual Type ColumnType { get; }
|
public virtual ColumnType ColumnType { get; }
|
||||||
|
|
||||||
public TsvColumnAttribute()
|
public TsvColumnAttribute()
|
||||||
{
|
{
|
||||||
ColumnType = typeof(StringType);
|
ColumnType = new StringType();
|
||||||
}
|
}
|
||||||
public TsvColumnAttribute(string columnName)
|
public TsvColumnAttribute(string columnName)
|
||||||
{
|
{
|
||||||
ColumnType = typeof(StringType);
|
ColumnType = new StringType();
|
||||||
ColumnName = columnName;
|
ColumnName = columnName;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1528,27 +1590,19 @@ public class SaneTsv
|
|||||||
// TODO: Add column ordering
|
// TODO: Add column ordering
|
||||||
public class TypedTsvColumnAttribute : TsvColumnAttribute
|
public class TypedTsvColumnAttribute : TsvColumnAttribute
|
||||||
{
|
{
|
||||||
public override Type ColumnType { get; }
|
public override ColumnType ColumnType { get; }
|
||||||
|
|
||||||
public TypedTsvColumnAttribute() { }
|
public TypedTsvColumnAttribute() { }
|
||||||
|
|
||||||
public TypedTsvColumnAttribute(string columnName) : base(columnName) { }
|
public TypedTsvColumnAttribute(string columnName) : base(columnName) { }
|
||||||
|
|
||||||
public TypedTsvColumnAttribute(string columnName, Type columnType) : base(columnName)
|
public TypedTsvColumnAttribute(string columnName, string columnType) : base(columnName)
|
||||||
{
|
{
|
||||||
if (columnType.BaseType != typeof(ColumnType))
|
ColumnType = GetColumnFromString(columnType);
|
||||||
{
|
|
||||||
throw new Exception("Column type must inherit from SaneTsv.ColumnType");
|
|
||||||
}
|
|
||||||
ColumnType = columnType;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public TypedTsvColumnAttribute(Type columnType)
|
public TypedTsvColumnAttribute(ColumnType columnType)
|
||||||
{
|
{
|
||||||
if (columnType.BaseType != typeof(ColumnType))
|
|
||||||
{
|
|
||||||
throw new Exception("Column type must inherit from SaneTsv.ColumnType");
|
|
||||||
}
|
|
||||||
ColumnType = columnType;
|
ColumnType = columnType;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -759,6 +759,30 @@ internal class Program : SaneTsv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "File comment serde";
|
||||||
|
|
||||||
|
string testString1 = "#this is a file comment" +
|
||||||
|
"\n# and one more line since you're such a good customer" +
|
||||||
|
"\ncolumn1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
|
||||||
|
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
|
||||||
|
string reserialized = Encoding.UTF8.GetString(SaneTsv.SerializeCommentedTsv<BoolTestRecord2>(parsed.Records, parsed.FileComment));
|
||||||
|
|
||||||
|
if (reserialized == testString1)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Console.WriteLine("Done with tests");
|
Console.WriteLine("Done with tests");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
41
readme.md
41
readme.md
@ -3,14 +3,43 @@
|
|||||||
## Roadmap
|
## Roadmap
|
||||||
|
|
||||||
- Improve error reporting by including line/column information in exceptions
|
- Improve error reporting by including line/column information in exceptions
|
||||||
- Come up with a static-typing interface
|
- Use this to get line numbers for parallel parsing implementations
|
||||||
|
- [x] Come up with a static-typing interface
|
||||||
|
|
||||||
Something that doesn't require an array of objects
|
Something that doesn't require an array of objects
|
||||||
|
|
||||||
|
Use a class with SaveTsv attributes
|
||||||
|
|
||||||
- Check numeric formatting matches spec
|
- Check numeric formatting matches spec
|
||||||
- Do parallel parsing / serializing implementation
|
- [x] Maybe add a binary representation for f32/f64. It should specify that it is Little-endian (since we have to pick one). That way we can guarantee bit-compatibility between implementations where an application might require that.
|
||||||
- Next task: Refactor parsing so that it will start and end at arbitrary indices and return an array of SaneTsvRecords. The refactor should ignore the current record (unless at the start of the buffer) and continue parsing the record the end index is in.
|
- [x] Add Column name/type specification to API
|
||||||
- More optimization and making parsing modular:
|
- So you can tell it what columns to expect
|
||||||
|
- [ ] Lax/strict versions
|
||||||
|
|
||||||
|
See the attributes thing above
|
||||||
|
- Generate test cases
|
||||||
|
- [x] File comment / no file comment
|
||||||
|
- [x] header types / no header types
|
||||||
|
- [x] Line comments / no line comments
|
||||||
|
- [x] end of file comment
|
||||||
|
- [x] Test with the start index of parallel methods in last record
|
||||||
|
- end index in first record
|
||||||
|
- [x] Extra \n at end of file
|
||||||
|
- [x] Wrong number of fields
|
||||||
|
- Wrong number of fields at end of file
|
||||||
|
|
||||||
|
- [x] Do parallel parsing / serializing implementation
|
||||||
|
- [x] Next task: Refactor parsing so that it will start and end at arbitrary indices and return an array of SaneTsvRecords. The refactor should ignore the current record (unless at the start of the buffer) and continue parsing the record the end index is in.
|
||||||
|
- ~~More optimization and making parsing modular:~~
|
||||||
- Have callbacks for header parsing and field parsing
|
- Have callbacks for header parsing and field parsing
|
||||||
- That way other formats (like ExtraTSV) don't have to iterate through the entire set of data again.
|
- That way other formats (like ExtraTSV) don't have to iterate through the entire set of data again.
|
||||||
- Finish ExtraTSV implementation
|
- [x] Make untyped Simple TSV (De)serialization
|
||||||
- Do zig implementation
|
- [x] ~~Finish~~ Minimal ExtraTSV implementation
|
||||||
|
- [ ] Do zig implementation
|
||||||
- Make a c interface from that
|
- Make a c interface from that
|
||||||
|
- Make a commandline interface
|
||||||
|
- Make a viewer / editor
|
||||||
|
- Streaming interface
|
||||||
|
So you can start processing your data while it finishes parsing?
|
||||||
|
- [ ] Decoding a binary stream with a \0 in it via UTF-8 doesn't seem to cause any issues. I thought that valid UTF-8 wouldn't have a \0?
|
||||||
|
- [ ] Instead of exceptions when parsing, we should parse as much as possible and reflect parsing errors in the returned data structure
|
||||||
|
Reference in New Issue
Block a user