Compare commits

...

3 Commits

Author SHA1 Message Date
d9ef2a4bb6 Update roadmap 2024-03-16 09:51:26 -07:00
a80206767e Change column type management
The column types were tracked just as a Type. This changes them to be an instance
so they can track additional information (such as the specific units of a physical
units type). Because of this, the column type attribute need to be passed as strings
(see CS0181).
2024-03-10 22:28:05 -07:00
b8ae3ce65d Fix file comment serialization 2024-03-10 22:16:04 -07:00
3 changed files with 207 additions and 100 deletions

View File

@ -84,7 +84,7 @@ public class SaneTsv
} }
parsed.Records = new List<T>(); parsed.Records = new List<T>();
var columnTypes = new List<Type>(); var columnTypes = new List<ColumnType>();
var columnNames = new List<string>(); var columnNames = new List<string>();
var columnPropertyInfos = new List<PropertyInfo>(); var columnPropertyInfos = new List<PropertyInfo>();
int columnCount = 0; int columnCount = 0;
@ -190,46 +190,46 @@ public class SaneTsv
columnName = columnString; columnName = columnString;
} }
Type type; ColumnType type;
switch (columnTypeString) switch (columnTypeString)
{ {
case "": case "":
numTypesBlank++; numTypesBlank++;
type = typeof(StringType); type = new StringType();
break; break;
case "string": case "string":
type = typeof(StringType); type = new StringType();
break; break;
case "boolean": case "boolean":
type = typeof(BooleanType); type = new BooleanType();
break; break;
case "float32": case "float32":
type = typeof(Float32Type); type = new Float32Type();
break; break;
case "float32-le": case "float32-le":
type = typeof(Float32LEType); type = new Float32LEType();
break; break;
case "float64": case "float64":
type = typeof(Float64Type); type = new Float64Type();
break; break;
case "float64-le": case "float64-le":
type = typeof(Float64LEType); type = new Float64LEType();
break; break;
case "uint32": case "uint32":
type = typeof(UInt32Type); type = new UInt32Type();
break; break;
case "uint64": case "uint64":
type = typeof(UInt64Type); type = new UInt64Type();
break; break;
case "int32": case "int32":
type = typeof(Int32Type); type = new Int32Type();
break; break;
case "int64": case "int64":
type = typeof(Int64Type); type = new Int64Type();
break; break;
case "binary": case "binary":
type = typeof(BinaryType); type = new BinaryType();
break; break;
default: default:
throw new Exception($"Invalid type '{columnTypeString}' for column {j}"); throw new Exception($"Invalid type '{columnTypeString}' for column {j}");
@ -242,7 +242,7 @@ public class SaneTsv
throw new Exception($"Column {j} has name {columnName}, but expected {columnNames[j]}"); throw new Exception($"Column {j} has name {columnName}, but expected {columnNames[j]}");
} }
if (columnTypes[j] != type) if (columnTypes[j].GetType() != type.GetType())
{ {
throw new Exception($"Column {j} has type {type}, but expected {columnTypes[j]}"); throw new Exception($"Column {j} has type {type}, but expected {columnTypes[j]}");
} }
@ -348,7 +348,7 @@ public class SaneTsv
// startIndex is in we'd have to go back to the start of the record's comment, and to know // startIndex is in we'd have to go back to the start of the record's comment, and to know
// exactly where that comment started we'd have to go back to the start of the record before that // exactly where that comment started we'd have to go back to the start of the record before that
// (not including that other record's comment). // (not including that other record's comment).
protected static T[] Parse<T>(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, Type[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new() protected static T[] Parse<T>(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new()
{ {
var fieldBytes = new List<byte>(); var fieldBytes = new List<byte>();
var fields = new List<byte[]>(); var fields = new List<byte[]>();
@ -509,12 +509,12 @@ public class SaneTsv
return parsed.ToArray(); return parsed.ToArray();
} }
protected static T ParseCurrentCommentedRecord<T>(Type[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : CommentedTsvRecord, new() protected static T ParseCurrentCommentedRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : CommentedTsvRecord, new()
{ {
return (T)ParseCurrentRecord<T>(columnTypes, properties, fields, comment, line); return (T)ParseCurrentRecord<T>(columnTypes, properties, fields, comment, line);
} }
protected static T ParseCurrentRecord<T>(Type[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : TsvRecord, new() protected static T ParseCurrentRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : TsvRecord, new()
{ {
T record = new T(); T record = new T();
@ -532,7 +532,7 @@ public class SaneTsv
for (int j = 0; j < fields.Count; j++) for (int j = 0; j < fields.Count; j++)
{ {
// All other types require the content to be UTF-8. Binary fields can ignore that. // All other types require the content to be UTF-8. Binary fields can ignore that.
if (columnTypes[j] == typeof(BinaryType)) if (columnTypes[j].GetType() == typeof(BinaryType))
{ {
// TODO: Use faster method for property setting // TODO: Use faster method for property setting
// e.g. https://blog.marcgravell.com/2012/01/playing-with-your-member.html // e.g. https://blog.marcgravell.com/2012/01/playing-with-your-member.html
@ -541,7 +541,7 @@ public class SaneTsv
properties[j].SetValue(record, fields[j]); properties[j].SetValue(record, fields[j]);
continue; continue;
} }
else if (columnTypes[j] == typeof(Float32LEType)) else if (columnTypes[j].GetType() == typeof(Float32LEType))
{ {
byte[] floatBytes; byte[] floatBytes;
if (!LittleEndian) if (!LittleEndian)
@ -560,7 +560,7 @@ public class SaneTsv
continue; continue;
} }
else if (columnTypes[j] == typeof(Float64LEType)) else if (columnTypes[j].GetType() == typeof(Float64LEType))
{ {
byte[] floatBytes; byte[] floatBytes;
if (!LittleEndian) if (!LittleEndian)
@ -592,11 +592,11 @@ public class SaneTsv
// TODO: Add checking for numeric types format // TODO: Add checking for numeric types format
if (columnTypes[j] == typeof(StringType)) if (columnTypes[j].GetType() == typeof(StringType))
{ {
properties[j].SetValue(record, fieldString); properties[j].SetValue(record, fieldString);
} }
else if (columnTypes[j] == typeof(BooleanType)) else if (columnTypes[j].GetType() == typeof(BooleanType))
{ {
bool parsedBool; bool parsedBool;
if (fieldString == "TRUE") if (fieldString == "TRUE")
@ -614,7 +614,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedBool); properties[j].SetValue(record, parsedBool);
} }
else if (columnTypes[j] == typeof(Float32Type)) else if (columnTypes[j].GetType() == typeof(Float32Type))
{ {
float parsedFloat; float parsedFloat;
if (!float.TryParse(fieldString, out parsedFloat)) if (!float.TryParse(fieldString, out parsedFloat))
@ -635,7 +635,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedFloat); properties[j].SetValue(record, parsedFloat);
} }
else if (columnTypes[j] == typeof(Float64Type)) else if (columnTypes[j].GetType() == typeof(Float64Type))
{ {
double parsedDouble; double parsedDouble;
if (!double.TryParse(fieldString, out parsedDouble)) if (!double.TryParse(fieldString, out parsedDouble))
@ -656,7 +656,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedDouble); properties[j].SetValue(record, parsedDouble);
} }
else if (columnTypes[j] == typeof(UInt32Type)) else if (columnTypes[j].GetType() == typeof(UInt32Type))
{ {
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32)) if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
{ {
@ -665,7 +665,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedUInt32); properties[j].SetValue(record, parsedUInt32);
} }
else if (columnTypes[j] == typeof(UInt64Type)) else if (columnTypes[j].GetType() == typeof(UInt64Type))
{ {
if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64)) if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64))
{ {
@ -674,7 +674,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedUInt64); properties[j].SetValue(record, parsedUInt64);
} }
else if (columnTypes[j] == typeof(Int32Type)) else if (columnTypes[j].GetType() == typeof(Int32Type))
{ {
if (!Int32.TryParse(fieldString, out Int32 parsedInt32)) if (!Int32.TryParse(fieldString, out Int32 parsedInt32))
{ {
@ -683,7 +683,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedInt32); properties[j].SetValue(record, parsedInt32);
} }
else if (columnTypes[j] == typeof(Int64Type)) else if (columnTypes[j].GetType() == typeof(Int64Type))
{ {
if (!Int64.TryParse(fieldString, out Int64 parsedInt64)) if (!Int64.TryParse(fieldString, out Int64 parsedInt64))
{ {
@ -1066,99 +1066,151 @@ public class SaneTsv
return records.ToArray(); return records.ToArray();
} }
public static Type GetColumnFromType(Type type) public static ColumnType GetColumnFromString(string type)
{ {
if (type == typeof(string)) if (type == "string")
{ {
return typeof(StringType); return new StringType();
} }
else if (type == typeof(bool)) else if (type == "boolean")
{ {
return typeof(BooleanType); return new BooleanType();
} }
else if (type == typeof(float)) else if (type == "float32")
{ {
return typeof(Float32Type); return new Float32Type();
} }
else if (type == typeof(double)) else if (type == "float32-le")
{ {
return typeof(Float64Type); return new Float32LEType();
} }
else if (type == typeof(UInt32)) else if (type == "float64")
{ {
return typeof(UInt32Type); return new Float64Type();
} }
else if (type == typeof(UInt64)) else if (type == "float64-le")
{ {
return typeof(UInt64Type); return new Float64LEType();
} }
else if (type == typeof(Int32)) else if (type == "uint32")
{ {
return typeof(Int32Type); return new UInt32Type();
} }
else if (type == typeof(Int64)) else if (type == "uint64")
{ {
return typeof(Int64Type); return new UInt64Type();
} }
else if (type == typeof(byte[])) else if (type == "int32")
{ {
return typeof(BinaryType); return new Int32Type();
}
else if (type == "int64")
{
return new Int64Type();
}
else if (type == "binary")
{
return new BinaryType();
} }
else else
{ {
throw new Exception($"Invalid type: {type}"); throw new Exception($"Invalid type: {type.GetType()}");
} }
} }
public static string GetNameFromColumn(Type type) public static ColumnType GetColumnFromType(Type type)
{ {
if (type == typeof(StringType)) if (type == typeof(string))
{
return new StringType();
}
else if (type == typeof(bool))
{
return new BooleanType();
}
else if (type == typeof(float))
{
return new Float32Type();
}
else if (type == typeof(double))
{
return new Float64Type();
}
else if (type == typeof(UInt32))
{
return new UInt32Type();
}
else if (type == typeof(UInt64))
{
return new UInt64Type();
}
else if (type == typeof(Int32))
{
return new Int32Type();
}
else if (type == typeof(Int64))
{
return new Int64Type();
}
else if (type == typeof(byte[]))
{
return new BinaryType();
}
else
{
throw new Exception($"Invalid type: {type.GetType()}");
}
}
public static string GetNameFromColumn(ColumnType type)
{
if (type.GetType() == typeof(StringType))
{ {
return "string"; return "string";
} }
else if (type == typeof(BooleanType)) else if (type.GetType() == typeof(BooleanType))
{ {
return "boolean"; return "boolean";
} }
else if (type == typeof(Float32Type)) else if (type.GetType() == typeof(Float32Type))
{ {
return "float32"; return "float32";
} }
else if (type == typeof(Float32LEType)) else if (type.GetType() == typeof(Float32LEType))
{ {
return "float32-le"; return "float32-le";
} }
else if (type == typeof(Float64Type)) else if (type.GetType() == typeof(Float64Type))
{ {
return "float64"; return "float64";
} }
else if (type == typeof(Float64LEType)) else if (type.GetType() == typeof(Float64LEType))
{ {
return "float64-le"; return "float64-le";
} }
else if (type == typeof(UInt32Type)) else if (type.GetType() == typeof(UInt32Type))
{ {
return "uint32"; return "uint32";
} }
else if (type == typeof(UInt64Type)) else if (type.GetType() == typeof(UInt64Type))
{ {
return "uint64"; return "uint64";
} }
else if (type == typeof(Int32Type)) else if (type.GetType() == typeof(Int32Type))
{ {
return "int32"; return "int32";
} }
else if (type == typeof(Int64Type)) else if (type.GetType() == typeof(Int64Type))
{ {
return "int64"; return "int64";
} }
else if (type == typeof(BinaryType)) else if (type.GetType() == typeof(BinaryType))
{ {
return "binary"; return "binary";
} }
else else
{ {
throw new Exception($"Invalid type: {type}"); throw new Exception($"Invalid type: {type.GetType()}");
} }
} }
@ -1174,14 +1226,24 @@ public class SaneTsv
public static byte[] SerializeCommentedTsv<T>(IList<T> data, string fileComment) where T : CommentedTsvRecord public static byte[] SerializeCommentedTsv<T>(IList<T> data, string fileComment) where T : CommentedTsvRecord
{ {
return SerializeTsv<T>(data, FormatType.COMMENTED_TSV); return SerializeTsv<T>(data, FormatType.COMMENTED_TSV, fileComment);
} }
protected static byte[] SerializeTsv<T>(IList<T> data, FormatType tsvFormat) protected static byte[] SerializeTsv<T>(IList<T> data, FormatType tsvFormat, string fileComment = null)
{ {
var bytes = new List<byte>(); var bytes = new List<byte>();
var columnTypes = new List<Type>(); if (fileComment != null)
{
if (tsvFormat != FormatType.COMMENTED_TSV)
{
throw new Exception($"File comments are not valid for {tsvFormat}");
}
bytes.AddRange(Encoding.UTF8.GetBytes("#" + fileComment.Replace("\n", "\n#") + "\n"));
}
var columnTypes = new List<ColumnType>();
var columnNames = new List<string>(); var columnNames = new List<string>();
var columnPropertyInfos = new List<PropertyInfo>(); var columnPropertyInfos = new List<PropertyInfo>();
int columnCount = 0; int columnCount = 0;
@ -1197,8 +1259,8 @@ public class SaneTsv
string headerName = attribute.ColumnName ?? property.Name; string headerName = attribute.ColumnName ?? property.Name;
columnNames.Add(headerName); columnNames.Add(headerName);
Type headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType); ColumnType headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType);
if (tsvFormat == FormatType.SIMPLE_TSV && headerType != typeof(StringType)) if (tsvFormat == FormatType.SIMPLE_TSV && headerType.GetType() != typeof(StringType))
{ {
throw new Exception($"Serializing Simple TSV requires all columns be of type string, but column '{headerName}' has type '{headerType}'"); throw new Exception($"Serializing Simple TSV requires all columns be of type string, but column '{headerName}' has type '{headerType}'");
} }
@ -1278,7 +1340,7 @@ public class SaneTsv
return bytes.ToArray(); return bytes.ToArray();
} }
protected static void SerializeTsv<T>(IList<T> data, List<byte> bytes, PropertyInfo[] columnPropertyInfos, Type[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex) protected static void SerializeTsv<T>(IList<T> data, List<byte> bytes, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex)
{ {
// Serialize data // Serialize data
for (int i = 0; i < data.Count; i++) for (int i = 0; i < data.Count; i++)
@ -1293,16 +1355,16 @@ public class SaneTsv
// Some fields definitely don't need escaping, so we add them directly to bytes // Some fields definitely don't need escaping, so we add them directly to bytes
bool skipEscaping = false; bool skipEscaping = false;
if (columnTypes[j] == typeof(StringType)) if (columnTypes[j].GetType() == typeof(StringType))
{ {
fieldEncoded = Encoding.UTF8.GetBytes((string)datum); fieldEncoded = Encoding.UTF8.GetBytes((string)datum);
} }
else if (columnTypes[j] == typeof(BooleanType)) else if (columnTypes[j].GetType() == typeof(BooleanType))
{ {
bytes.AddRange((bool)datum ? TrueEncoded : FalseEncoded); bytes.AddRange((bool)datum ? TrueEncoded : FalseEncoded);
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(Float32Type)) else if (columnTypes[j].GetType() == typeof(Float32Type))
{ {
if (datum is float f) if (datum is float f)
{ {
@ -1326,7 +1388,7 @@ public class SaneTsv
} }
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(Float32LEType)) else if (columnTypes[j].GetType() == typeof(Float32LEType))
{ {
if (LittleEndian) if (LittleEndian)
{ {
@ -1342,7 +1404,7 @@ public class SaneTsv
} }
} }
} }
else if (columnTypes[j] == typeof(Float64Type)) else if (columnTypes[j].GetType() == typeof(Float64Type))
{ {
if (datum is double d) if (datum is double d)
{ {
@ -1366,7 +1428,7 @@ public class SaneTsv
} }
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(Float64LEType)) else if (columnTypes[j].GetType() == typeof(Float64LEType))
{ {
if (LittleEndian) if (LittleEndian)
{ {
@ -1382,27 +1444,27 @@ public class SaneTsv
} }
} }
} }
else if (columnTypes[j] == typeof(UInt32Type)) else if (columnTypes[j].GetType() == typeof(UInt32Type))
{ {
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)datum).ToString())); bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)datum).ToString()));
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(UInt64Type)) else if (columnTypes[j].GetType() == typeof(UInt64Type))
{ {
bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)datum).ToString())); bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)datum).ToString()));
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(Int32Type)) else if (columnTypes[j].GetType() == typeof(Int32Type))
{ {
bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)datum).ToString())); bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)datum).ToString()));
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(Int64Type)) else if (columnTypes[j].GetType() == typeof(Int64Type))
{ {
bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)datum).ToString())); bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)datum).ToString()));
skipEscaping = true; skipEscaping = true;
} }
else if (columnTypes[j] == typeof(BinaryType)) else if (columnTypes[j].GetType() == typeof(BinaryType))
{ {
fieldEncoded = (byte[])datum; fieldEncoded = (byte[])datum;
} }
@ -1512,15 +1574,15 @@ public class SaneTsv
public class TsvColumnAttribute : Attribute public class TsvColumnAttribute : Attribute
{ {
public string ColumnName { get; } public string ColumnName { get; }
public virtual Type ColumnType { get; } public virtual ColumnType ColumnType { get; }
public TsvColumnAttribute() public TsvColumnAttribute()
{ {
ColumnType = typeof(StringType); ColumnType = new StringType();
} }
public TsvColumnAttribute(string columnName) public TsvColumnAttribute(string columnName)
{ {
ColumnType = typeof(StringType); ColumnType = new StringType();
ColumnName = columnName; ColumnName = columnName;
} }
} }
@ -1528,27 +1590,19 @@ public class SaneTsv
// TODO: Add column ordering // TODO: Add column ordering
public class TypedTsvColumnAttribute : TsvColumnAttribute public class TypedTsvColumnAttribute : TsvColumnAttribute
{ {
public override Type ColumnType { get; } public override ColumnType ColumnType { get; }
public TypedTsvColumnAttribute() { } public TypedTsvColumnAttribute() { }
public TypedTsvColumnAttribute(string columnName) : base(columnName) { } public TypedTsvColumnAttribute(string columnName) : base(columnName) { }
public TypedTsvColumnAttribute(string columnName, Type columnType) : base(columnName) public TypedTsvColumnAttribute(string columnName, string columnType) : base(columnName)
{ {
if (columnType.BaseType != typeof(ColumnType)) ColumnType = GetColumnFromString(columnType);
{
throw new Exception("Column type must inherit from SaneTsv.ColumnType");
}
ColumnType = columnType;
} }
public TypedTsvColumnAttribute(Type columnType) public TypedTsvColumnAttribute(ColumnType columnType)
{ {
if (columnType.BaseType != typeof(ColumnType))
{
throw new Exception("Column type must inherit from SaneTsv.ColumnType");
}
ColumnType = columnType; ColumnType = columnType;
} }
} }

View File

@ -759,6 +759,30 @@ internal class Program : SaneTsv
} }
} }
{
string testName = "File comment serde";
string testString1 = "#this is a file comment" +
"\n# and one more line since you're such a good customer" +
"\ncolumn1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
"\nFALSE\tnother\tno\\ther";
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
string reserialized = Encoding.UTF8.GetString(SaneTsv.SerializeCommentedTsv<BoolTestRecord2>(parsed.Records, parsed.FileComment));
if (reserialized == testString1)
{
Console.WriteLine($"Passed {testName}");
}
else
{
Console.WriteLine($"Failed {testName}");
}
}
Console.WriteLine("Done with tests"); Console.WriteLine("Done with tests");
} }
} }

View File

@ -3,14 +3,43 @@
## Roadmap ## Roadmap
- Improve error reporting by including line/column information in exceptions - Improve error reporting by including line/column information in exceptions
- Come up with a static-typing interface - Use this to get line numbers for parallel parsing implementations
- [x] Come up with a static-typing interface
Something that doesn't require an array of objects Something that doesn't require an array of objects
Use a class with SaveTsv attributes
- Check numeric formatting matches spec - Check numeric formatting matches spec
- Do parallel parsing / serializing implementation - [x] Maybe add a binary representation for f32/f64. It should specify that it is Little-endian (since we have to pick one). That way we can guarantee bit-compatibility between implementations where an application might require that.
- Next task: Refactor parsing so that it will start and end at arbitrary indices and return an array of SaneTsvRecords. The refactor should ignore the current record (unless at the start of the buffer) and continue parsing the record the end index is in. - [x] Add Column name/type specification to API
- More optimization and making parsing modular: - So you can tell it what columns to expect
- [ ] Lax/strict versions
See the attributes thing above
- Generate test cases
- [x] File comment / no file comment
- [x] header types / no header types
- [x] Line comments / no line comments
- [x] end of file comment
- [x] Test with the start index of parallel methods in last record
- end index in first record
- [x] Extra \n at end of file
- [x] Wrong number of fields
- Wrong number of fields at end of file
- [x] Do parallel parsing / serializing implementation
- [x] Next task: Refactor parsing so that it will start and end at arbitrary indices and return an array of SaneTsvRecords. The refactor should ignore the current record (unless at the start of the buffer) and continue parsing the record the end index is in.
- ~~More optimization and making parsing modular:~~
- Have callbacks for header parsing and field parsing - Have callbacks for header parsing and field parsing
- That way other formats (like ExtraTSV) don't have to iterate through the entire set of data again. - That way other formats (like ExtraTSV) don't have to iterate through the entire set of data again.
- Finish ExtraTSV implementation - [x] Make untyped Simple TSV (De)serialization
- Do zig implementation - [x] ~~Finish~~ Minimal ExtraTSV implementation
- [ ] Do zig implementation
- Make a c interface from that - Make a c interface from that
- Make a commandline interface
- Make a viewer / editor
- Streaming interface
So you can start processing your data while it finishes parsing?
- [ ] Decoding a binary stream with a \0 in it via UTF-8 doesn't seem to cause any issues. I thought that valid UTF-8 wouldn't have a \0?
- [ ] Instead of exceptions when parsing, we should parse as much as possible and reflect parsing errors in the returned data structure