Compare commits
10 Commits
cc8a122b57
...
0b302734e9
Author | SHA1 | Date | |
---|---|---|---|
|
0b302734e9 | ||
|
7bc553905d | ||
|
932fbd553a | ||
|
93f2e2ea5b | ||
|
99766f99a6 | ||
|
a5eedef36b | ||
|
ee46c93ce1 | ||
|
83602391ab | ||
|
725a5b2034 | ||
|
38d324738e |
125
SaneTsv/ExtraTsv/ExtraTsv.cs
Normal file
125
SaneTsv/ExtraTsv/ExtraTsv.cs
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
|
||||||
|
using System.Globalization;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
|
namespace NathanMcRae;
|
||||||
|
|
||||||
|
public class ExtraTsv : SaneTsv
|
||||||
|
{
|
||||||
|
public class Iso8601Type : ColumnType { }
|
||||||
|
public class PhysicalUnitsType : ColumnType
|
||||||
|
{
|
||||||
|
public string Units { get; }
|
||||||
|
public PhysicalUnitsType(string Units) { }
|
||||||
|
}
|
||||||
|
|
||||||
|
public static readonly string[] ValidUnits =
|
||||||
|
{
|
||||||
|
"m",
|
||||||
|
"s",
|
||||||
|
"A",
|
||||||
|
"K",
|
||||||
|
"cd",
|
||||||
|
"mol",
|
||||||
|
"kg",
|
||||||
|
"Hz",
|
||||||
|
"rad",
|
||||||
|
"sr",
|
||||||
|
"N",
|
||||||
|
"Pa",
|
||||||
|
"J",
|
||||||
|
"W",
|
||||||
|
"C",
|
||||||
|
"V",
|
||||||
|
"F",
|
||||||
|
"Ω",
|
||||||
|
"S",
|
||||||
|
"Wb",
|
||||||
|
"T",
|
||||||
|
"H",
|
||||||
|
"°C",
|
||||||
|
"lm",
|
||||||
|
"lx",
|
||||||
|
"Bq",
|
||||||
|
"Gy",
|
||||||
|
"Sv",
|
||||||
|
"kat"
|
||||||
|
};
|
||||||
|
|
||||||
|
public static readonly int MajorVersion = 0;
|
||||||
|
public static readonly int MinorVersion = 0;
|
||||||
|
public static readonly int PatchVersion = 1;
|
||||||
|
|
||||||
|
public static Regex VersionRegex = new Regex(@"^ ExtraTSV V(\d+)\.(\d+)\.(\d+)");
|
||||||
|
|
||||||
|
public static ExtraTsv ParseExtraTsv(byte[] inputBuffer)
|
||||||
|
{
|
||||||
|
SaneTsv tsv = ParseCommentedTsv(inputBuffer);
|
||||||
|
|
||||||
|
if (tsv.FileComment == null) {
|
||||||
|
throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
|
||||||
|
}
|
||||||
|
|
||||||
|
Match match = VersionRegex.Match(tsv.FileComment);
|
||||||
|
if (!match.Success)
|
||||||
|
{
|
||||||
|
throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
|
||||||
|
}
|
||||||
|
|
||||||
|
int fileMajorVersion = int.Parse(match.Groups[1].Value);
|
||||||
|
|
||||||
|
if (fileMajorVersion != MajorVersion)
|
||||||
|
{
|
||||||
|
throw new Exception($"File has major version ({fileMajorVersion}) which is newer than this parser's version {MajorVersion}");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < tsv.ColumnNames.Count(); i++)
|
||||||
|
{
|
||||||
|
string[] typeParts = tsv.ColumnNames[i].Split(":");
|
||||||
|
if (typeParts[typeParts.Length - 1] == "iso8601" && tsv.ColumnTypes[i] == typeof(StringType))
|
||||||
|
{
|
||||||
|
string columnName = tsv.ColumnNames[i].Substring(0, tsv.ColumnNames[i].Length - ":iso8601".Length);
|
||||||
|
tsv.ColumnNames[i] = columnName;
|
||||||
|
tsv.ColumnTypes[i] = typeof(Iso8601Type);
|
||||||
|
}
|
||||||
|
// TODO: ISO8601 time spans
|
||||||
|
// TODO: ISO8601 time durations
|
||||||
|
else if (typeParts[typeParts.Length - 1] == "units" && (tsv.ColumnTypes[i] == typeof(Float64Type) || tsv.ColumnTypes[i] == typeof(Float32Type)))
|
||||||
|
{
|
||||||
|
if (typeParts.Count() > 1 && ValidUnits.Contains(typeParts[typeParts.Length - 2]))
|
||||||
|
{
|
||||||
|
// TODO: How to store type information since the ColumnTypes is of type Type?
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new Exception($"Invalid units type '{typeParts[typeParts.Length - 2]}' for column {i}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CultureInfo provider = CultureInfo.InvariantCulture;
|
||||||
|
|
||||||
|
for (int i = 0; i < tsv.Records.Count; i++)
|
||||||
|
{
|
||||||
|
if (tsv.Records[i].Comment != null)
|
||||||
|
{
|
||||||
|
throw new Exception($"Line {tsv.Records[i].Line} has comment above it which is not allowed");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < tsv.ColumnNames.Count(); j++)
|
||||||
|
{
|
||||||
|
if (tsv.ColumnTypes[j] == typeof(Iso8601Type))
|
||||||
|
{
|
||||||
|
if (!DateTime.TryParseExact((string)tsv.Records[i][j], "yyyy-MM-ddTHH:mm:ss.ffff", provider, DateTimeStyles.None, out DateTime parsed))
|
||||||
|
{
|
||||||
|
throw new Exception($"ISO 8601 timestamp format error on line {tsv.Records[i].Line}, field {j}");
|
||||||
|
}
|
||||||
|
|
||||||
|
tsv.Records[i].Fields[j] = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (ExtraTsv)tsv;
|
||||||
|
}
|
||||||
|
}
|
14
SaneTsv/ExtraTsv/ExtraTsv.csproj
Normal file
14
SaneTsv/ExtraTsv/ExtraTsv.csproj
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net6.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<RootNamespace>NathanMcRae</RootNamespace>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\SaneTsv.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
44
SaneTsv/ExtraTsv/readme.md
Normal file
44
SaneTsv/ExtraTsv/readme.md
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
Extra TSV adds many convenience types to Sane TSV:
|
||||||
|
|
||||||
|
- Timestamps
|
||||||
|
Just this format for now: yyyy-MM-ddTHH:mm:ss.ffff
|
||||||
|
- Timespans
|
||||||
|
- Time durations
|
||||||
|
- Multiformats
|
||||||
|
- Multihashes
|
||||||
|
- Multiprotocols
|
||||||
|
- ...
|
||||||
|
- Physical units
|
||||||
|
To start with, just use SI base and derived units
|
||||||
|
- Base units
|
||||||
|
- m
|
||||||
|
- s
|
||||||
|
- A
|
||||||
|
- K
|
||||||
|
- cd
|
||||||
|
- mol
|
||||||
|
- kg
|
||||||
|
- Derived units
|
||||||
|
- Hz
|
||||||
|
- rad
|
||||||
|
- sr
|
||||||
|
- N
|
||||||
|
- Pa
|
||||||
|
- J
|
||||||
|
- W
|
||||||
|
- C
|
||||||
|
- V
|
||||||
|
- F
|
||||||
|
- Ω
|
||||||
|
- S
|
||||||
|
- Wb
|
||||||
|
- T
|
||||||
|
- H
|
||||||
|
- °C
|
||||||
|
- lm
|
||||||
|
- lx
|
||||||
|
- Bq
|
||||||
|
- Gy
|
||||||
|
- Sv
|
||||||
|
- kat
|
||||||
|
How to handle derived units?
|
14
SaneTsv/ExtraTsvTest/ExtraTsvTest.csproj
Normal file
14
SaneTsv/ExtraTsvTest/ExtraTsvTest.csproj
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net6.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\ExtraTsv\ExtraTsv.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
20
SaneTsv/ExtraTsvTest/Program.cs
Normal file
20
SaneTsv/ExtraTsvTest/Program.cs
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
using NathanMcRae;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Bool test";
|
||||||
|
string testString1 = "# ExtraTSV V0.0.1\n" +
|
||||||
|
"column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:iso8601:string" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\t2024-02-15T18:03:30.0000" +
|
||||||
|
"\nFALSE\tnother\t2024-02-15T18:03:39.0001";
|
||||||
|
|
||||||
|
ExtraTsv parsed = ExtraTsv.ParseExtraTsv(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
if (parsed.Records[0]["column1:ty#pe"] is bool result && result)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
}
|
@ -7,36 +7,34 @@ namespace NathanMcRae;
|
|||||||
/// </summary>
|
/// </summary>
|
||||||
public class SaneTsv
|
public class SaneTsv
|
||||||
{
|
{
|
||||||
public enum ColumnType
|
// Like an enum, but more extensible
|
||||||
{
|
public class ColumnType { }
|
||||||
STRING,
|
public class StringType : ColumnType { }
|
||||||
BOOLEAN,
|
public class BooleanType : ColumnType { }
|
||||||
FLOAT32,
|
public class Float32Type : ColumnType { }
|
||||||
FLOAT64,
|
public class Float64Type : ColumnType { }
|
||||||
UINT32,
|
public class UInt32Type : ColumnType { }
|
||||||
UINT64,
|
public class UInt64Type : ColumnType { }
|
||||||
INT32,
|
public class Int32Type : ColumnType { }
|
||||||
INT64,
|
public class Int64Type : ColumnType { }
|
||||||
BINARY,
|
public class BinaryType : ColumnType { }
|
||||||
}
|
|
||||||
|
|
||||||
protected enum FormatType
|
protected enum FormatType
|
||||||
{
|
{
|
||||||
SANE_TSV = 0,
|
SIMPLE_TSV = 0,
|
||||||
TYPED_TSV = 1,
|
TYPED_TSV = 1,
|
||||||
COMMENTED_TSV = 2,
|
COMMENTED_TSV = 2,
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: We need to be able to update all these in tandem somehow
|
// TODO: We need to be able to update all these in tandem somehow
|
||||||
public string[] ColumnNames { get; protected set; }
|
public string[] ColumnNames { get; protected set; }
|
||||||
public ColumnType[] ColumnTypes { get; protected set; }
|
public Type[] ColumnTypes { get; protected set; }
|
||||||
public Dictionary<string, List<object>> Columns { get; protected set; }
|
|
||||||
public List<SaneTsvRecord> Records { get; protected set; }
|
public List<SaneTsvRecord> Records { get; protected set; }
|
||||||
public string FileComment { get; protected set; } = null;
|
public string FileComment { get; protected set; } = null;
|
||||||
|
|
||||||
public static SaneTsv ParseSaneTsv(byte[] inputBuffer)
|
public static SaneTsv ParseSimpleTsv(byte[] inputBuffer)
|
||||||
{
|
{
|
||||||
return Parse(inputBuffer, FormatType.SANE_TSV);
|
return Parse(inputBuffer, FormatType.SIMPLE_TSV);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static SaneTsv ParseTypedTsv(byte[] inputBuffer)
|
public static SaneTsv ParseTypedTsv(byte[] inputBuffer)
|
||||||
@ -53,9 +51,8 @@ public class SaneTsv
|
|||||||
protected static SaneTsv Parse(byte[] inputBuffer, FormatType format)
|
protected static SaneTsv Parse(byte[] inputBuffer, FormatType format)
|
||||||
{
|
{
|
||||||
var parsed = new SaneTsv();
|
var parsed = new SaneTsv();
|
||||||
parsed.Columns = new Dictionary<string, List<object>>();
|
|
||||||
parsed.ColumnNames = new string[] { };
|
parsed.ColumnNames = new string[] { };
|
||||||
parsed.ColumnTypes = new ColumnType[] { };
|
parsed.ColumnTypes = new Type[] { };
|
||||||
parsed.Records = new List<SaneTsvRecord>();
|
parsed.Records = new List<SaneTsvRecord>();
|
||||||
|
|
||||||
var fieldBytes = new List<byte>();
|
var fieldBytes = new List<byte>();
|
||||||
@ -95,7 +92,7 @@ public class SaneTsv
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw new Exception($"Expected 'n', 't', or '\\' after '\\' at {i}");
|
throw new Exception($"Expected 'n', 't', '#', or '\\' after '\\' at line {line} column {i - currentLineStart}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (inputBuffer[i] == '\t')
|
else if (inputBuffer[i] == '\t')
|
||||||
@ -116,7 +113,7 @@ public class SaneTsv
|
|||||||
numFields = fields.Count;
|
numFields = fields.Count;
|
||||||
|
|
||||||
parsed.ColumnNames = new string[numFields];
|
parsed.ColumnNames = new string[numFields];
|
||||||
parsed.ColumnTypes = new ColumnType[numFields];
|
parsed.ColumnTypes = new Type[numFields];
|
||||||
|
|
||||||
int numTypesBlank = 0;
|
int numTypesBlank = 0;
|
||||||
|
|
||||||
@ -135,7 +132,7 @@ public class SaneTsv
|
|||||||
string columnTypeString;
|
string columnTypeString;
|
||||||
string columnName;
|
string columnName;
|
||||||
if (columnString.Contains(':')) {
|
if (columnString.Contains(':')) {
|
||||||
if (format == FormatType.SANE_TSV)
|
if (format == FormatType.SIMPLE_TSV)
|
||||||
{
|
{
|
||||||
throw new Exception($"Header {fields.Count} contain ':', which is not allowed for column names");
|
throw new Exception($"Header {fields.Count} contain ':', which is not allowed for column names");
|
||||||
}
|
}
|
||||||
@ -144,7 +141,7 @@ public class SaneTsv
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (format > FormatType.SANE_TSV)
|
if (format > FormatType.SIMPLE_TSV)
|
||||||
{
|
{
|
||||||
throw new Exception($"Header {fields.Count} has no type");
|
throw new Exception($"Header {fields.Count} has no type");
|
||||||
}
|
}
|
||||||
@ -152,53 +149,46 @@ public class SaneTsv
|
|||||||
columnName = columnString;
|
columnName = columnString;
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnType type;
|
Type type;
|
||||||
|
|
||||||
switch (columnTypeString)
|
switch (columnTypeString)
|
||||||
{
|
{
|
||||||
case "":
|
case "":
|
||||||
numTypesBlank++;
|
numTypesBlank++;
|
||||||
type = ColumnType.STRING;
|
type = typeof(StringType);
|
||||||
break;
|
break;
|
||||||
case "string":
|
case "string":
|
||||||
type = ColumnType.STRING;
|
type = typeof(StringType);
|
||||||
break;
|
break;
|
||||||
case "boolean":
|
case "boolean":
|
||||||
type = ColumnType.BOOLEAN;
|
type = typeof(BooleanType);
|
||||||
break;
|
break;
|
||||||
case "float32":
|
case "float32":
|
||||||
type = ColumnType.FLOAT32;
|
type = typeof(Float32Type);
|
||||||
break;
|
break;
|
||||||
case "float64":
|
case "float64":
|
||||||
type = ColumnType.FLOAT64;
|
type = typeof(Float64Type);
|
||||||
break;
|
break;
|
||||||
case "uint32":
|
case "uint32":
|
||||||
type = ColumnType.UINT32;
|
type = typeof(UInt32Type);
|
||||||
break;
|
break;
|
||||||
case "uint64":
|
case "uint64":
|
||||||
type = ColumnType.UINT64;
|
type = typeof(UInt64Type);
|
||||||
break;
|
break;
|
||||||
case "int32":
|
case "int32":
|
||||||
type = ColumnType.INT32;
|
type = typeof(Int32Type);
|
||||||
break;
|
break;
|
||||||
case "int64":
|
case "int64":
|
||||||
type = ColumnType.INT64;
|
type = typeof(Int64Type);
|
||||||
break;
|
break;
|
||||||
case "binary":
|
case "binary":
|
||||||
type = ColumnType.BINARY;
|
type = typeof(BinaryType);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
throw new Exception($"Invalid type '{columnTypeString}' for column {j}");
|
throw new Exception($"Invalid type '{columnTypeString}' for column {j}");
|
||||||
}
|
}
|
||||||
|
|
||||||
try
|
// TODO: Check column name uniqueness
|
||||||
{
|
|
||||||
parsed.Columns.Add(columnName, new List<object>());
|
|
||||||
}
|
|
||||||
catch (Exception e)
|
|
||||||
{
|
|
||||||
throw new Exception($"Column name {columnName} is not unique", e);
|
|
||||||
}
|
|
||||||
|
|
||||||
parsed.ColumnNames[j] = columnName;
|
parsed.ColumnNames[j] = columnName;
|
||||||
parsed.ColumnTypes[j] = type;
|
parsed.ColumnTypes[j] = type;
|
||||||
@ -224,7 +214,7 @@ public class SaneTsv
|
|||||||
comment = currentComment.ToString();
|
comment = currentComment.ToString();
|
||||||
currentComment.Clear();
|
currentComment.Clear();
|
||||||
}
|
}
|
||||||
parsed.Records.Add(new SaneTsvRecord(parsed, ParseCurrentRecord(parsed, fields, line), comment));
|
parsed.Records.Add(new SaneTsvRecord(parsed, ParseCurrentRecord(parsed, fields, line), comment, line));
|
||||||
fields.Clear();
|
fields.Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -241,8 +231,11 @@ public class SaneTsv
|
|||||||
{
|
{
|
||||||
var commentBytes = new byte[j - i - 1];
|
var commentBytes = new byte[j - i - 1];
|
||||||
Array.Copy(inputBuffer, i + 1, commentBytes, 0, j - i - 1);
|
Array.Copy(inputBuffer, i + 1, commentBytes, 0, j - i - 1);
|
||||||
|
if (currentComment.Length > 0)
|
||||||
|
{
|
||||||
|
currentComment.Append('\n');
|
||||||
|
}
|
||||||
currentComment.Append(Encoding.UTF8.GetString(commentBytes));
|
currentComment.Append(Encoding.UTF8.GetString(commentBytes));
|
||||||
currentComment.Append("\n");
|
|
||||||
i = j;
|
i = j;
|
||||||
currentLineStart = i + 1;
|
currentLineStart = i + 1;
|
||||||
line++;
|
line++;
|
||||||
@ -281,23 +274,25 @@ public class SaneTsv
|
|||||||
comment = currentComment.ToString();
|
comment = currentComment.ToString();
|
||||||
currentComment.Clear();
|
currentComment.Clear();
|
||||||
}
|
}
|
||||||
parsed.Records.Add(new SaneTsvRecord(parsed, ParseCurrentRecord(parsed, fields, line), comment));
|
parsed.Records.Add(new SaneTsvRecord(parsed, ParseCurrentRecord(parsed, fields, line), comment, line));
|
||||||
fields.Clear();
|
fields.Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
return parsed;
|
return parsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Note: this modifies 'parsed'
|
||||||
|
/// </summary>
|
||||||
protected static object[] ParseCurrentRecord(SaneTsv parsed, List<byte[]> fields, int line)
|
protected static object[] ParseCurrentRecord(SaneTsv parsed, List<byte[]> fields, int line)
|
||||||
{
|
{
|
||||||
var parsedFields = new object[fields.Count];
|
var parsedFields = new object[fields.Count];
|
||||||
for (int j = 0; j < fields.Count; j++)
|
for (int j = 0; j < fields.Count; j++)
|
||||||
{
|
{
|
||||||
// All other types require the content to be UTF-8. Binary fields can ignore that.
|
// All other types require the content to be UTF-8. Binary fields can ignore that.
|
||||||
if (parsed.ColumnTypes[j] == ColumnType.BINARY)
|
if (parsed.ColumnTypes[j] == typeof(BinaryType))
|
||||||
{
|
{
|
||||||
parsedFields[j] = fields[j];
|
parsedFields[j] = fields[j];
|
||||||
parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -311,94 +306,190 @@ public class SaneTsv
|
|||||||
throw new Exception($"Field {j} on line {line} is not valid UTF-8", e);
|
throw new Exception($"Field {j} on line {line} is not valid UTF-8", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (parsed.ColumnTypes[j])
|
// TODO: Add checking for numeric types format
|
||||||
|
|
||||||
|
if (parsed.ColumnTypes[j] == typeof(StringType))
|
||||||
{
|
{
|
||||||
case ColumnType.STRING:
|
parsedFields[j] = fieldString;
|
||||||
parsedFields[j] = fieldString;
|
}
|
||||||
parsed.Columns[parsed.ColumnNames[j]].Add(fieldString);
|
else if (parsed.ColumnTypes[j] == typeof(BooleanType))
|
||||||
break;
|
{
|
||||||
case ColumnType.BOOLEAN:
|
bool parsedBool;
|
||||||
bool parsedBool;
|
if (fieldString == "TRUE")
|
||||||
if (fieldString == "TRUE")
|
{
|
||||||
{
|
parsedBool = true;
|
||||||
parsedBool = true;
|
}
|
||||||
}
|
else if (fieldString == "FALSE")
|
||||||
else if (fieldString == "FALSE")
|
{
|
||||||
{
|
parsedBool = false;
|
||||||
parsedBool = false;
|
}
|
||||||
}
|
else
|
||||||
else
|
{
|
||||||
{
|
throw new Exception($"Field {j} on line {line} is not valid boolean. Must be 'TRUE' or 'FALSE' exactly");
|
||||||
throw new Exception($"Field {j} on line {line} is not valid boolean. Must be 'TRUE' or 'FALSE' exactly");
|
}
|
||||||
}
|
|
||||||
|
|
||||||
parsedFields[j] = parsedBool;
|
parsedFields[j] = parsedBool;
|
||||||
parsed.Columns[parsed.ColumnNames[j]].Add(parsedBool);
|
}
|
||||||
break;
|
else if (parsed.ColumnTypes[j] == typeof(Float32Type))
|
||||||
case ColumnType.FLOAT32:
|
{
|
||||||
if (!float.TryParse(fieldString, out float parsedFloat))
|
if (!float.TryParse(fieldString, out float parsedFloat))
|
||||||
{
|
{
|
||||||
throw new Exception($"Field {j} on line {line} is not valid single-precision float");
|
throw new Exception($"Field {j} on line {line} is not valid single-precision float");
|
||||||
}
|
}
|
||||||
|
|
||||||
parsedFields[j] = parsedFloat;
|
parsedFields[j] = parsedFloat;
|
||||||
parsed.Columns[parsed.ColumnNames[j]].Add(parsedFloat);
|
}
|
||||||
break;
|
else if (parsed.ColumnTypes[j] == typeof(Float64Type))
|
||||||
case ColumnType.FLOAT64:
|
{
|
||||||
if (!double.TryParse(fieldString, out double parsedDouble))
|
if (!double.TryParse(fieldString, out double parsedDouble))
|
||||||
{
|
{
|
||||||
throw new Exception($"Field {j} on line {line} is not valid double-precision float");
|
throw new Exception($"Field {j} on line {line} is not valid double-precision float");
|
||||||
}
|
}
|
||||||
|
|
||||||
parsedFields[j] = parsedDouble;
|
parsedFields[j] = parsedDouble;
|
||||||
parsed.Columns[parsed.ColumnNames[j]].Add(parsedDouble);
|
}
|
||||||
break;
|
else if (parsed.ColumnTypes[j] == typeof(UInt32Type))
|
||||||
case ColumnType.UINT32:
|
{
|
||||||
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
|
if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
|
||||||
{
|
{
|
||||||
throw new Exception($"Field {j} on line {line} is not valid UInt32");
|
throw new Exception($"Field {j} on line {line} is not valid UInt32");
|
||||||
}
|
}
|
||||||
|
|
||||||
parsedFields[j] = parsedUInt32;
|
parsedFields[j] = parsedUInt32;
|
||||||
parsed.Columns[parsed.ColumnNames[j]].Add(parsedUInt32);
|
}
|
||||||
break;
|
else if (parsed.ColumnTypes[j] == typeof(UInt64Type))
|
||||||
case ColumnType.UINT64:
|
{
|
||||||
if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64))
|
if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64))
|
||||||
{
|
{
|
||||||
throw new Exception($"Field {j} on line {line} is not valid UInt64");
|
throw new Exception($"Field {j} on line {line} is not valid UInt64");
|
||||||
}
|
}
|
||||||
|
|
||||||
parsedFields[j] = parsedUInt64;
|
parsedFields[j] = parsedUInt64;
|
||||||
parsed.Columns[parsed.ColumnNames[j]].Add(parsedUInt64);
|
}
|
||||||
break;
|
else if (parsed.ColumnTypes[j] == typeof(Int32Type))
|
||||||
case ColumnType.INT32:
|
{
|
||||||
if (!Int32.TryParse(fieldString, out Int32 parsedInt32))
|
if (!Int32.TryParse(fieldString, out Int32 parsedInt32))
|
||||||
{
|
{
|
||||||
throw new Exception($"Field {j} on line {line} is not valid Int32");
|
throw new Exception($"Field {j} on line {line} is not valid Int32");
|
||||||
}
|
}
|
||||||
|
|
||||||
parsedFields[j] = parsedInt32;
|
parsedFields[j] = parsedInt32;
|
||||||
parsed.Columns[parsed.ColumnNames[j]].Add(parsedInt32);
|
}
|
||||||
break;
|
else if (parsed.ColumnTypes[j] == typeof(Int64Type))
|
||||||
case ColumnType.INT64:
|
{
|
||||||
if (!Int64.TryParse(fieldString, out Int64 parsedInt64))
|
if (!Int64.TryParse(fieldString, out Int64 parsedInt64))
|
||||||
{
|
{
|
||||||
throw new Exception($"Field {j} on line {line} is not valid Int64");
|
throw new Exception($"Field {j} on line {line} is not valid Int64");
|
||||||
}
|
}
|
||||||
|
|
||||||
parsedFields[j] = parsedInt64;
|
parsedFields[j] = parsedInt64;
|
||||||
parsed.Columns[parsed.ColumnNames[j]].Add(parsedInt64);
|
}
|
||||||
break;
|
else
|
||||||
case ColumnType.BINARY:
|
{
|
||||||
throw new Exception($"Unexpected type {parsed.ColumnTypes[j]}");
|
throw new Exception($"Unexpected type {parsed.ColumnTypes[j]}");
|
||||||
default:
|
|
||||||
throw new Exception($"Unexpected type {parsed.ColumnTypes[j]}");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return parsedFields;
|
return parsedFields;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static byte[] SerializeSimpleTsv(IList<string> header, IList<IList<string>> data)
|
||||||
|
{
|
||||||
|
var escapedString = new StringBuilder();
|
||||||
|
|
||||||
|
// Serialize header
|
||||||
|
for (int i = 0; i < header.Count; i++)
|
||||||
|
{
|
||||||
|
if (header[i].Contains(':'))
|
||||||
|
{
|
||||||
|
throw new Exception($"Column {i} contains the character ':'");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = i + 1; j < header.Count; j++)
|
||||||
|
{
|
||||||
|
if (header[i] == header[j])
|
||||||
|
{
|
||||||
|
throw new Exception("Column names in header must be unique");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < header[i].Count(); j++)
|
||||||
|
{
|
||||||
|
if (header[i][j] == '\n')
|
||||||
|
{
|
||||||
|
escapedString.Append("\\n");
|
||||||
|
}
|
||||||
|
else if (header[i][j] == '\t')
|
||||||
|
{
|
||||||
|
escapedString.Append("\\t");
|
||||||
|
}
|
||||||
|
else if (header[i][j] == '\\')
|
||||||
|
{
|
||||||
|
escapedString.Append("\\\\");
|
||||||
|
}
|
||||||
|
else if (header[i][j] == '#')
|
||||||
|
{
|
||||||
|
escapedString.Append("\\#");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
escapedString.Append(header[i][j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == header.Count - 1)
|
||||||
|
{
|
||||||
|
escapedString.Append('\n');
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
escapedString.Append('\t');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serialize data
|
||||||
|
for (int i = 0; i < data.Count; i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < data[i].Count; j++)
|
||||||
|
{
|
||||||
|
for (int k = 0; k < data[i][j].Length; k++)
|
||||||
|
{
|
||||||
|
if (data[i][j][k] == '\n')
|
||||||
|
{
|
||||||
|
escapedString.Append("\\n");
|
||||||
|
}
|
||||||
|
else if (data[i][j][k] == '\t')
|
||||||
|
{
|
||||||
|
escapedString.Append("\\t");
|
||||||
|
}
|
||||||
|
else if (data[i][j][k] == '\\')
|
||||||
|
{
|
||||||
|
escapedString.Append("\\\\");
|
||||||
|
}
|
||||||
|
else if (data[i][j][k] == '#')
|
||||||
|
{
|
||||||
|
escapedString.Append("\\#");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
escapedString.Append(data[i][j][k]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (j < data[i].Count - 1)
|
||||||
|
{
|
||||||
|
escapedString.Append('\t');
|
||||||
|
}
|
||||||
|
else if (i < data.Count - 1)
|
||||||
|
{
|
||||||
|
escapedString.Append('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Encoding.UTF8.GetBytes(escapedString.ToString());
|
||||||
|
}
|
||||||
|
|
||||||
public SaneTsvRecord this[int i] => Records[i];
|
public SaneTsvRecord this[int i] => Records[i];
|
||||||
|
|
||||||
public class SaneTsvRecord
|
public class SaneTsvRecord
|
||||||
@ -406,14 +497,18 @@ public class SaneTsv
|
|||||||
public SaneTsv Parent { get; }
|
public SaneTsv Parent { get; }
|
||||||
public string Comment { get; }
|
public string Comment { get; }
|
||||||
public object[] Fields { get; }
|
public object[] Fields { get; }
|
||||||
|
public int Line { get; }
|
||||||
|
|
||||||
public object this[string columnName] => Fields[Array.IndexOf(Parent.ColumnNames, columnName)];
|
public object this[string columnName] => Fields[Array.IndexOf(Parent.ColumnNames, columnName)];
|
||||||
|
|
||||||
public SaneTsvRecord(SaneTsv parent, object[] fields, string comment)
|
public object this[int columnIndex] => Fields[columnIndex];
|
||||||
|
|
||||||
|
public SaneTsvRecord(SaneTsv parent, object[] fields, string comment, int line)
|
||||||
{
|
{
|
||||||
Parent = parent;
|
Parent = parent;
|
||||||
Fields = fields;
|
Fields = fields;
|
||||||
Comment = comment;
|
Comment = comment;
|
||||||
|
Line = line;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -10,8 +10,14 @@
|
|||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<Compile Remove="ExtraTsvTest\**" />
|
||||||
|
<Compile Remove="ExtraTsv\**" />
|
||||||
<Compile Remove="SaneTsvTest\**" />
|
<Compile Remove="SaneTsvTest\**" />
|
||||||
|
<EmbeddedResource Remove="ExtraTsvTest\**" />
|
||||||
|
<EmbeddedResource Remove="ExtraTsv\**" />
|
||||||
<EmbeddedResource Remove="SaneTsvTest\**" />
|
<EmbeddedResource Remove="SaneTsvTest\**" />
|
||||||
|
<None Remove="ExtraTsvTest\**" />
|
||||||
|
<None Remove="ExtraTsv\**" />
|
||||||
<None Remove="SaneTsvTest\**" />
|
<None Remove="SaneTsvTest\**" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
@ -5,7 +5,11 @@ VisualStudioVersion = 17.7.34024.191
|
|||||||
MinimumVisualStudioVersion = 10.0.40219.1
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsv", "SaneTsv.csproj", "{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsv", "SaneTsv.csproj", "{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}"
|
||||||
EndProject
|
EndProject
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}"
|
||||||
|
EndProject
|
||||||
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ExtraTsv", "ExtraTsv\ExtraTsv.csproj", "{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}"
|
||||||
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ExtraTsvTest", "ExtraTsvTest\ExtraTsvTest.csproj", "{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}"
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
@ -21,6 +25,14 @@ Global
|
|||||||
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.Build.0 = Release|Any CPU
|
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
@ -39,7 +39,7 @@ using System.Text;
|
|||||||
{
|
{
|
||||||
string testName = "Comment test";
|
string testName = "Comment test";
|
||||||
string testString1 = "#This is a file comment\n" +
|
string testString1 = "#This is a file comment\n" +
|
||||||
" #One more file comment line\n" +
|
"#One more file comment line\n" +
|
||||||
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
"\n#This is a comment" +
|
"\n#This is a comment" +
|
||||||
"\n#Another comment line" +
|
"\n#Another comment line" +
|
||||||
@ -49,4 +49,26 @@ using System.Text;
|
|||||||
SaneTsv parsed = SaneTsv.ParseCommentedTsv(Encoding.UTF8.GetBytes(testString1));
|
SaneTsv parsed = SaneTsv.ParseCommentedTsv(Encoding.UTF8.GetBytes(testString1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Serde test";
|
||||||
|
string testString1 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||||
|
"\nTRUE\tvalue\\\\twoo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
SaneTsv parsed = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
string serialized = Encoding.UTF8.GetString(SaneTsv.SerializeSimpleTsv(parsed.ColumnNames, parsed.Records.Select(r => r.Fields.Select(f => f.ToString()).ToArray()).ToArray()));
|
||||||
|
|
||||||
|
if (testString1 == serialized)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Console.WriteLine("Done with tests");
|
Console.WriteLine("Done with tests");
|
||||||
|
|
||||||
|
|
||||||
|
// TODO: Check qNaN, sNaN, +inf, -inf values for float types
|
||||||
|
@ -1,6 +1,10 @@
|
|||||||
# Sane TSV
|
# Sane TSV
|
||||||
|
|
||||||
Sane TSV is a strict format for tabular data.
|
Sane Tab-Separate Values is a series of tabular formats as an alternative to the under-specified TSV / CSV quagmire.
|
||||||
|
|
||||||
|
# Simple TSV
|
||||||
|
|
||||||
|
Simple TSV is a strict format for tabular data.
|
||||||
|
|
||||||
'\n' (0x0A) character delimit lines, and '\t' (0x09) characters delimit fields within a line.
|
'\n' (0x0A) character delimit lines, and '\t' (0x09) characters delimit fields within a line.
|
||||||
|
|
||||||
@ -12,7 +16,7 @@ Empty fields (i.e. two subsequent '\t' characters) are allowed.
|
|||||||
|
|
||||||
The first line is always the header and the fields of the header are the column names for the file. Column names must be unique within the file and must not contain ':' characters (for compatibility with [Typed TSVs](#typed-tsv)).
|
The first line is always the header and the fields of the header are the column names for the file. Column names must be unique within the file and must not contain ':' characters (for compatibility with [Typed TSVs](#typed-tsv)).
|
||||||
|
|
||||||
All lines in the file must have the same number of fields.
|
All lines in the file must have the same number of fields as are in the header.
|
||||||
|
|
||||||
The file must not end with '\n'. That will be treated as if there is an empty row at the end of a file and cause an error.
|
The file must not end with '\n'. That will be treated as if there is an empty row at the end of a file and cause an error.
|
||||||
|
|
||||||
@ -20,7 +24,7 @@ Implementations of the format do not need to handle file reading and writing dir
|
|||||||
|
|
||||||
# Typed TSV
|
# Typed TSV
|
||||||
|
|
||||||
Typed TSV builds on Sane TSV to allow for typing of columns. All column names in a typed TSV must end with ':' (0x3A) and then one of the following types:
|
Typed TSV builds on Simple TSV to allow for typing of columns. All column names in a typed TSV must end with ':' (0x3A) and then one of the following types:
|
||||||
|
|
||||||
- 'string'
|
- 'string'
|
||||||
- 'boolean'
|
- 'boolean'
|
||||||
@ -34,7 +38,7 @@ Typed TSV builds on Sane TSV to allow for typing of columns. All column names in
|
|||||||
|
|
||||||
Any other values are an error, however, the portion of the name prior to the last ':' may be anything and may include ':' characters.
|
Any other values are an error, however, the portion of the name prior to the last ':' may be anything and may include ':' characters.
|
||||||
|
|
||||||
All fields in the rest of the file must be of the type corresponding the their column.
|
All fields in the rest of the file must be of the type corresponding to their column.
|
||||||
|
|
||||||
Aside from the 'binary' column type, all fields must be UTF-8 encoded text. Each type has the following restrictions:
|
Aside from the 'binary' column type, all fields must be UTF-8 encoded text. Each type has the following restrictions:
|
||||||
|
|
||||||
@ -49,13 +53,17 @@ Aside from the 'binary' column type, all fields must be UTF-8 encoded text. Each
|
|||||||
- 'uint32' and 'uint64' are unsigned 32 and 64 bit integers respectively. They should be formatted like this regex: `[1-9][0-9]*`
|
- 'uint32' and 'uint64' are unsigned 32 and 64 bit integers respectively. They should be formatted like this regex: `[1-9][0-9]*`
|
||||||
- 'int32' and 'int64' are signed 32 and 64 bit integers respectively. They should be formatted like this regex: `-?[1-9][0-9]*` (except that '-0' is not allowed)
|
- 'int32' and 'int64' are signed 32 and 64 bit integers respectively. They should be formatted like this regex: `-?[1-9][0-9]*` (except that '-0' is not allowed)
|
||||||
|
|
||||||
|
Binary fields are left as-is (after unescaping is performed).
|
||||||
|
|
||||||
Typed TSV files should have the .ytsv extension (.ttsv is already used).
|
Typed TSV files should have the .ytsv extension (.ttsv is already used).
|
||||||
|
|
||||||
# Commented TSV
|
# Commented TSV
|
||||||
|
|
||||||
Commented TSV builds on Typed TSV and allows for more flexibility in the format by including line comments. They are kept distinct so that some applications of it can take advantage of the extra flexibility, while others can stick with the more restricted Typed TSV format.
|
Commented TSV builds on Typed TSV and allows for more flexibility in the format by including line comments. The formats are kept distinct so that some applications can take advantage of the extra flexibility comments allow, while others can stick with the more restricted Typed TSV format.
|
||||||
|
|
||||||
Commented lines start with a '#' character at the beginning of the line. Unescaped '#' characters are not allowed on a line that does not start with a '#'. Any '#' characters in fields must be escaped. Any unescaped '#' after the start of a line are errors.
|
Commented lines start with a '#' character at the beginning of the line. Unescaped '#' characters are not allowed on a line that does not start with a '#'. Any '#' characters in fields must be escaped. Note that the '#' character is excluded from the comment data.
|
||||||
|
|
||||||
|
Multiple consecutive comment lines are considered a single comment, with each line separated by a '\n'.
|
||||||
|
|
||||||
Comments must be UTF-8 encoded text.
|
Comments must be UTF-8 encoded text.
|
||||||
|
|
||||||
@ -78,7 +86,7 @@ Note that extended formats must remain parseable by baseline parsers, hence we m
|
|||||||
|
|
||||||
Extending formats may also have restrictions. For example, they could disallow record comments and only allow the file comment above the header.
|
Extending formats may also have restrictions. For example, they could disallow record comments and only allow the file comment above the header.
|
||||||
|
|
||||||
Extended formats may still use the .ctsv extension, though they could use a dedicated one as well.
|
Extended formats may still use the .ctsv extension, though they could use a dedicated one instead.
|
||||||
|
|
||||||
## Ideas for Extension
|
## Ideas for Extension
|
||||||
|
|
||||||
|
16
readme.md
Normal file
16
readme.md
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
# Sane TSV
|
||||||
|
|
||||||
|
## Roadmap
|
||||||
|
|
||||||
|
- Improve error reporting by including line/column information in exceptions
|
||||||
|
- Come up with a static-typing interface
|
||||||
|
Something that doesn't require an array of objects
|
||||||
|
- Check numeric formatting matches spec
|
||||||
|
- Do parallel parsing / serializing implementation
|
||||||
|
- Next task: Refactor parsing so that it will start and end at arbitrary indices and return an array of SaneTsvRecords. The refactor should ignore the current record (unless at the start of the buffer) and continue parsing the record the end index is in.
|
||||||
|
- More optimization and making parsing modular:
|
||||||
|
- Have callbacks for header parsing and field parsing
|
||||||
|
- That way other formats (like ExtraTSV) don't have to iterate through the entire set of data again.
|
||||||
|
- Finish ExtraTSV implementation
|
||||||
|
- Do zig implementation
|
||||||
|
- Make a c interface from that
|
Loading…
Reference in New Issue
Block a user