Compare commits
37 Commits
6cea9b7e59
...
master
Author | SHA1 | Date | |
---|---|---|---|
d9ef2a4bb6 | |||
a80206767e | |||
b8ae3ce65d | |||
0fd092685d | |||
55fa00a6e7 | |||
d428af51bb | |||
aef92e87d4 | |||
b56236cbb7 | |||
7230f982ac | |||
f4145bacd2 | |||
f98a40a173 | |||
0c61128e0e | |||
78eaa5dbab | |||
4ddb8dc44d | |||
3727f8051b | |||
7368ac816b | |||
0a45f541a4 | |||
b593fb9613 | |||
52ed949529 | |||
dc0c300fdc | |||
ea77db46a6 | |||
bb750fac58 | |||
53e87e2f7f | |||
f3ed173842 | |||
f392036982 | |||
0b302734e9 | |||
7bc553905d | |||
932fbd553a | |||
93f2e2ea5b | |||
99766f99a6 | |||
a5eedef36b | |||
ee46c93ce1 | |||
83602391ab | |||
725a5b2034 | |||
38d324738e | |||
cc8a122b57 | |||
e52dc01e7a |
125
SaneTsv/ExtraTsv/ExtraTsv.cs
Normal file
125
SaneTsv/ExtraTsv/ExtraTsv.cs
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
|
||||||
|
using System.Globalization;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
|
namespace NathanMcRae;
|
||||||
|
|
||||||
|
public class ExtraTsv : SaneTsv
|
||||||
|
{
|
||||||
|
public class Iso8601Type : ColumnType { }
|
||||||
|
public class PhysicalUnitsType : ColumnType
|
||||||
|
{
|
||||||
|
public string Units { get; }
|
||||||
|
public PhysicalUnitsType(string Units) { }
|
||||||
|
}
|
||||||
|
|
||||||
|
public static readonly string[] ValidUnits =
|
||||||
|
{
|
||||||
|
"m",
|
||||||
|
"s",
|
||||||
|
"A",
|
||||||
|
"K",
|
||||||
|
"cd",
|
||||||
|
"mol",
|
||||||
|
"kg",
|
||||||
|
"Hz",
|
||||||
|
"rad",
|
||||||
|
"sr",
|
||||||
|
"N",
|
||||||
|
"Pa",
|
||||||
|
"J",
|
||||||
|
"W",
|
||||||
|
"C",
|
||||||
|
"V",
|
||||||
|
"F",
|
||||||
|
"Ω",
|
||||||
|
"S",
|
||||||
|
"Wb",
|
||||||
|
"T",
|
||||||
|
"H",
|
||||||
|
"°C",
|
||||||
|
"lm",
|
||||||
|
"lx",
|
||||||
|
"Bq",
|
||||||
|
"Gy",
|
||||||
|
"Sv",
|
||||||
|
"kat"
|
||||||
|
};
|
||||||
|
|
||||||
|
public static readonly int MajorVersion = 0;
|
||||||
|
public static readonly int MinorVersion = 0;
|
||||||
|
public static readonly int PatchVersion = 1;
|
||||||
|
|
||||||
|
public static Regex VersionRegex = new Regex(@"^ ExtraTSV V(\d+)\.(\d+)\.(\d+)");
|
||||||
|
|
||||||
|
public static ExtraTsv ParseExtraTsv(byte[] inputBuffer)
|
||||||
|
{
|
||||||
|
SaneTsv tsv = ParseCommentedTsv(inputBuffer);
|
||||||
|
|
||||||
|
if (tsv.FileComment == null) {
|
||||||
|
throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
|
||||||
|
}
|
||||||
|
|
||||||
|
Match match = VersionRegex.Match(tsv.FileComment);
|
||||||
|
if (!match.Success)
|
||||||
|
{
|
||||||
|
throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
|
||||||
|
}
|
||||||
|
|
||||||
|
int fileMajorVersion = int.Parse(match.Groups[1].Value);
|
||||||
|
|
||||||
|
if (fileMajorVersion != MajorVersion)
|
||||||
|
{
|
||||||
|
throw new Exception($"File has major version ({fileMajorVersion}) which is newer than this parser's version {MajorVersion}");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < tsv.ColumnNames.Count(); i++)
|
||||||
|
{
|
||||||
|
string[] typeParts = tsv.ColumnNames[i].Split(":");
|
||||||
|
if (typeParts[typeParts.Length - 1] == "iso8601" && tsv.ColumnTypes[i] == typeof(StringType))
|
||||||
|
{
|
||||||
|
string columnName = tsv.ColumnNames[i].Substring(0, tsv.ColumnNames[i].Length - ":iso8601".Length);
|
||||||
|
tsv.ColumnNames[i] = columnName;
|
||||||
|
tsv.ColumnTypes[i] = typeof(Iso8601Type);
|
||||||
|
}
|
||||||
|
// TODO: ISO8601 time spans
|
||||||
|
// TODO: ISO8601 time durations
|
||||||
|
else if (typeParts[typeParts.Length - 1] == "units" && (tsv.ColumnTypes[i] == typeof(Float64Type) || tsv.ColumnTypes[i] == typeof(Float32Type)))
|
||||||
|
{
|
||||||
|
if (typeParts.Count() > 1 && ValidUnits.Contains(typeParts[typeParts.Length - 2]))
|
||||||
|
{
|
||||||
|
// TODO: How to store type information since the ColumnTypes is of type Type?
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw new Exception($"Invalid units type '{typeParts[typeParts.Length - 2]}' for column {i}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CultureInfo provider = CultureInfo.InvariantCulture;
|
||||||
|
|
||||||
|
for (int i = 0; i < tsv.Records.Count; i++)
|
||||||
|
{
|
||||||
|
if (tsv.Records[i].Comment != null)
|
||||||
|
{
|
||||||
|
throw new Exception($"Line {tsv.Records[i].Line} has comment above it which is not allowed");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < tsv.ColumnNames.Count(); j++)
|
||||||
|
{
|
||||||
|
if (tsv.ColumnTypes[j] == typeof(Iso8601Type))
|
||||||
|
{
|
||||||
|
if (!DateTime.TryParseExact((string)tsv.Records[i][j], "yyyy-MM-ddTHH:mm:ss.ffff", provider, DateTimeStyles.None, out DateTime parsed))
|
||||||
|
{
|
||||||
|
throw new Exception($"ISO 8601 timestamp format error on line {tsv.Records[i].Line}, field {j}");
|
||||||
|
}
|
||||||
|
|
||||||
|
tsv.Records[i].Fields[j] = parsed;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (ExtraTsv)tsv;
|
||||||
|
}
|
||||||
|
}
|
14
SaneTsv/ExtraTsv/ExtraTsv.csproj
Normal file
14
SaneTsv/ExtraTsv/ExtraTsv.csproj
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net6.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
<RootNamespace>NathanMcRae</RootNamespace>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\SaneTsv.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
44
SaneTsv/ExtraTsv/readme.md
Normal file
44
SaneTsv/ExtraTsv/readme.md
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
Extra TSV adds many convenience types to Sane TSV:
|
||||||
|
|
||||||
|
- Timestamps
|
||||||
|
Just this format for now: yyyy-MM-ddTHH:mm:ss.ffff
|
||||||
|
- Timespans
|
||||||
|
- Time durations
|
||||||
|
- Multiformats
|
||||||
|
- Multihashes
|
||||||
|
- Multiprotocols
|
||||||
|
- ...
|
||||||
|
- Physical units
|
||||||
|
To start with, just use SI base and derived units
|
||||||
|
- Base units
|
||||||
|
- m
|
||||||
|
- s
|
||||||
|
- A
|
||||||
|
- K
|
||||||
|
- cd
|
||||||
|
- mol
|
||||||
|
- kg
|
||||||
|
- Derived units
|
||||||
|
- Hz
|
||||||
|
- rad
|
||||||
|
- sr
|
||||||
|
- N
|
||||||
|
- Pa
|
||||||
|
- J
|
||||||
|
- W
|
||||||
|
- C
|
||||||
|
- V
|
||||||
|
- F
|
||||||
|
- Ω
|
||||||
|
- S
|
||||||
|
- Wb
|
||||||
|
- T
|
||||||
|
- H
|
||||||
|
- °C
|
||||||
|
- lm
|
||||||
|
- lx
|
||||||
|
- Bq
|
||||||
|
- Gy
|
||||||
|
- Sv
|
||||||
|
- kat
|
||||||
|
How to handle derived units?
|
14
SaneTsv/ExtraTsvTest/ExtraTsvTest.csproj
Normal file
14
SaneTsv/ExtraTsvTest/ExtraTsvTest.csproj
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>net6.0</TargetFramework>
|
||||||
|
<ImplicitUsings>enable</ImplicitUsings>
|
||||||
|
<Nullable>enable</Nullable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\ExtraTsv\ExtraTsv.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
20
SaneTsv/ExtraTsvTest/Program.cs
Normal file
20
SaneTsv/ExtraTsvTest/Program.cs
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
using NathanMcRae;
|
||||||
|
using System.Text;
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Bool test";
|
||||||
|
string testString1 = "# ExtraTSV V0.0.1\n" +
|
||||||
|
"column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:iso8601:string" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\t2024-02-15T18:03:30.0000" +
|
||||||
|
"\nFALSE\tnother\t2024-02-15T18:03:39.0001";
|
||||||
|
|
||||||
|
ExtraTsv parsed = ExtraTsv.ParseExtraTsv(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
if (parsed.Records[0]["column1:ty#pe"] is bool result && result)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
}
|
1648
SaneTsv/SaneTsv.cs
1648
SaneTsv/SaneTsv.cs
File diff suppressed because it is too large
Load Diff
@ -10,8 +10,14 @@
|
|||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<Compile Remove="ExtraTsvTest\**" />
|
||||||
|
<Compile Remove="ExtraTsv\**" />
|
||||||
<Compile Remove="SaneTsvTest\**" />
|
<Compile Remove="SaneTsvTest\**" />
|
||||||
|
<EmbeddedResource Remove="ExtraTsvTest\**" />
|
||||||
|
<EmbeddedResource Remove="ExtraTsv\**" />
|
||||||
<EmbeddedResource Remove="SaneTsvTest\**" />
|
<EmbeddedResource Remove="SaneTsvTest\**" />
|
||||||
|
<None Remove="ExtraTsvTest\**" />
|
||||||
|
<None Remove="ExtraTsv\**" />
|
||||||
<None Remove="SaneTsvTest\**" />
|
<None Remove="SaneTsvTest\**" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
@ -5,7 +5,11 @@ VisualStudioVersion = 17.7.34024.191
|
|||||||
MinimumVisualStudioVersion = 10.0.40219.1
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsv", "SaneTsv.csproj", "{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsv", "SaneTsv.csproj", "{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}"
|
||||||
EndProject
|
EndProject
|
||||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}"
|
||||||
|
EndProject
|
||||||
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ExtraTsv", "ExtraTsv\ExtraTsv.csproj", "{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}"
|
||||||
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ExtraTsvTest", "ExtraTsvTest\ExtraTsvTest.csproj", "{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}"
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
@ -21,6 +25,14 @@ Global
|
|||||||
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.Build.0 = Release|Any CPU
|
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
|
@ -1,38 +1,790 @@
|
|||||||
using NathanMcRae;
|
using NathanMcRae;
|
||||||
|
using System.Reflection;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
|
|
||||||
|
internal class Program : SaneTsv
|
||||||
{
|
{
|
||||||
string testName = "Bool test";
|
public class TestRecord : SaneTsv.TsvRecord
|
||||||
string testString1 = "column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
{
|
||||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
[SaneTsv.TypedTsvColumn("string-test")]
|
||||||
"\nFALSE\tnother\tno\\ther";
|
public string StringTest { get; set; }
|
||||||
|
|
||||||
SaneTsv parsed = SaneTsv.ParseTypedTsv(Encoding.UTF8.GetBytes(testString1));
|
[SaneTsv.TypedTsvColumn("bool-test")]
|
||||||
if (parsed.Records[0]["column1:ty#pe"] is bool result && result)
|
public bool BoolTest { get; set; }
|
||||||
{
|
|
||||||
Console.WriteLine($"Passed {testName}");
|
[SaneTsv.TypedTsvColumn("float32-test")]
|
||||||
|
public float Float32Test { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("float32-le-test", typeof(SaneTsv.Float32LEType))]
|
||||||
|
public float Float32LETest { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("float64-test")]
|
||||||
|
public double Float64Test { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("float64-le-test", typeof(SaneTsv.Float64LEType))]
|
||||||
|
public double Float64LETest { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("uint32-test")]
|
||||||
|
public UInt32 UInt32Test { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("uint64-test")]
|
||||||
|
public UInt64 UInt64Test { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("int32-test")]
|
||||||
|
public Int32 Int32Test { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("int64-test")]
|
||||||
|
public Int64 Int64Test { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("binary-test")]
|
||||||
|
public byte[] BinaryTest { get; set; }
|
||||||
|
|
||||||
|
public TestRecord(string stringTest, bool boolTest, float float32Test, float float32LETest, double float64Test, double float64LETest, UInt32 uInt32Test, UInt64 uInt64Test, Int32 int32Test, Int64 int64Test, byte[] binaryTest)
|
||||||
|
{
|
||||||
|
StringTest = stringTest;
|
||||||
|
BoolTest = boolTest;
|
||||||
|
Float32Test = float32Test;
|
||||||
|
Float32LETest = float32LETest;
|
||||||
|
Float64Test = float64Test;
|
||||||
|
Float64LETest = float64LETest;
|
||||||
|
UInt32Test = uInt32Test;
|
||||||
|
UInt64Test = uInt64Test;
|
||||||
|
Int32Test = int32Test;
|
||||||
|
Int64Test = int64Test;
|
||||||
|
BinaryTest = binaryTest;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TestRecord() { }
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
public class BoolTestRecord : SaneTsv.CommentedTsvRecord
|
||||||
{
|
{
|
||||||
Console.WriteLine($"Failed {testName}");
|
[SaneTsv.TypedTsvColumn("column1:ty#pe")]
|
||||||
|
public bool Column1 { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn]
|
||||||
|
public byte[] column2 { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("columnthree\nyep")]
|
||||||
|
public string Column3 { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class BoolTestRecord2 : SaneTsv.CommentedTsvRecord
|
||||||
|
{
|
||||||
|
[SaneTsv.TypedTsvColumn("column1:type")]
|
||||||
|
public bool Column1 { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn]
|
||||||
|
public byte[] column2 { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("columnthree\nyep")]
|
||||||
|
public string Column3 { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class BoolTestRecord3 : SaneTsv.CommentedTsvRecord
|
||||||
|
{
|
||||||
|
[SaneTsv.TsvColumn("column1")]
|
||||||
|
public string Column1 { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TsvColumn]
|
||||||
|
public string column2 { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TsvColumn("columnthree\nyep")]
|
||||||
|
public string Column3 { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class SerdeTestRecord : SaneTsv.CommentedTsvRecord
|
||||||
|
{
|
||||||
|
[SaneTsv.TypedTsvColumn("column1")]
|
||||||
|
public bool Column1 { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn]
|
||||||
|
public byte[] column2 { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("columnthree\nyep")]
|
||||||
|
public string Column3 { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class FloatTestRecord : SaneTsv.CommentedTsvRecord
|
||||||
|
{
|
||||||
|
[SaneTsv.TypedTsvColumn("somefloat")]
|
||||||
|
public double SomeFloat { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("binfloat", typeof(SaneTsv.Float64LEType))]
|
||||||
|
public double BinFloat { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
public class StringTestRecord : SaneTsv.TsvRecord
|
||||||
|
{
|
||||||
|
[SaneTsv.TypedTsvColumn("column1")]
|
||||||
|
public string Column1 { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn]
|
||||||
|
public string column2 { get; set; }
|
||||||
|
|
||||||
|
[SaneTsv.TypedTsvColumn("columnthree\nyep")]
|
||||||
|
public string Column3 { get; set; }
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void Main(string[] args)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
string testName = "Bool test";
|
||||||
|
string testString1 = "column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
Tsv<BoolTestRecord> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
if (parsed.Records[0].Column1)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Bad bool test";
|
||||||
|
try
|
||||||
|
{
|
||||||
|
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\nTUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
Tsv<BoolTestRecord> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
catch (Exception)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Comment test";
|
||||||
|
string testString1 = "#This is a file comment\n" +
|
||||||
|
"#One more file comment line\n" +
|
||||||
|
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\n#This is a comment" +
|
||||||
|
"\n#Another comment line" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
}
|
||||||
|
|
||||||
|
//{
|
||||||
|
// string testName = "Serde test";
|
||||||
|
// string testString1 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||||
|
// "\nTRUE\tvalue\\\\twoo\tvaluetrhee" +
|
||||||
|
// "\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
// Tsv<SerdeTestRecord> parsed = SaneTsv.ParseSimpleTsv<SerdeTestRecord>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
// string serialized = Encoding.UTF8.GetString(SaneTsv.SerializeSimpleTsv(parsed.ColumnNames, parsed.Records.Select(r => r.Fields.Select(f => f.ToString()).ToArray()).ToArray()));
|
||||||
|
|
||||||
|
// if (testString1 == serialized)
|
||||||
|
// {
|
||||||
|
// Console.WriteLine($"Passed {testName}");
|
||||||
|
// }
|
||||||
|
// else
|
||||||
|
// {
|
||||||
|
// Console.WriteLine($"Failed {testName}");
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Float binary test";
|
||||||
|
var bytes = new List<byte>();
|
||||||
|
bytes.AddRange(Encoding.UTF8.GetBytes("somefloat:float64\tbinfloat:float64-le" +
|
||||||
|
"\n1.5\t")); bytes.AddRange(BitConverter.GetBytes(1.5));
|
||||||
|
bytes.AddRange(Encoding.UTF8.GetBytes("\n-8.0000005E-14\t")); bytes.AddRange(BitConverter.GetBytes(-8.0000005E-14));
|
||||||
|
|
||||||
|
Tsv<FloatTestRecord> parsed = SaneTsv.ParseTypedTsv<FloatTestRecord>(bytes.ToArray());
|
||||||
|
if (parsed.Records[0].BinFloat == parsed.Records[0].SomeFloat)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Serde test";
|
||||||
|
|
||||||
|
TestRecord[] data =
|
||||||
|
{
|
||||||
|
new TestRecord("test", true, 44.5f, 44.5f, -88e-3, -88e-3, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3 }),
|
||||||
|
new TestRecord("test2", false, 44.5000005f, 44.5000005f, -88e-30, -88e-30, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }),
|
||||||
|
new TestRecord("test2", false, float.NaN, float.NaN, double.NaN, double.NaN, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }),
|
||||||
|
new TestRecord("test2", false, float.NegativeInfinity, float.NegativeInfinity, double.NegativeInfinity, double.NegativeInfinity, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }),
|
||||||
|
new TestRecord("test2", false, float.PositiveInfinity, float.PositiveInfinity, double.PositiveInfinity, double.PositiveInfinity, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }),
|
||||||
|
};
|
||||||
|
|
||||||
|
byte[] serialized = SaneTsv.SerializeTypedTsv(data);
|
||||||
|
|
||||||
|
Tsv<TestRecord> parsed = SaneTsv.ParseTypedTsv<TestRecord>(serialized);
|
||||||
|
|
||||||
|
if ((float)parsed.Records[1].Float32Test == 44.5000005f)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Trying to parse a not commented record as a Commented TSV test";
|
||||||
|
|
||||||
|
// These should not compile:
|
||||||
|
|
||||||
|
//byte[] serialized = SaneTsv.SerializeCommentedTsv(data);
|
||||||
|
// Gives this error: error CS7036: There is no argument given that corresponds to the required parameter 'fileComment' of 'SaneTsv.SerializeCommentedTsv<T>(IList<T>, string)'
|
||||||
|
|
||||||
|
//Tsv<TestRecord> parsed = SaneTsv.ParseCommentedTsv<TestRecord>(serialized);
|
||||||
|
// Gives this error: error CS0311: The type 'Program.TestRecord' cannot be used as type parameter 'T' in the generic type or method 'SaneTsv.ParseCommentedTsv<T>(byte[])'. There is no implicit reference conversion from 'Program.TestRecord' to 'NathanMcRae.SaneTsv.CommentedTsvRecord'.
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Try to parsed a Commented TSV as a Simple TSV";
|
||||||
|
|
||||||
|
string testString1 = "#This is a file comment\n" +
|
||||||
|
"#One more file comment line\n" +
|
||||||
|
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\n#This is a comment" +
|
||||||
|
"\n#Another comment line" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Try to parsed a Commented TSV as a Typed TSV";
|
||||||
|
|
||||||
|
string testString1 = "#This is a file comment\n" +
|
||||||
|
"#One more file comment line\n" +
|
||||||
|
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\n#This is a comment" +
|
||||||
|
"\n#Another comment line" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Try to parsed a Typed TSV as a Simple TSV";
|
||||||
|
|
||||||
|
string testString1 =
|
||||||
|
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Timing comparison of simple parse methods and comparison of simple serialization methods";
|
||||||
|
|
||||||
|
int N = 1000000;
|
||||||
|
var records = new StringTestRecord[N];
|
||||||
|
var rand = new Random(1);
|
||||||
|
|
||||||
|
for (int i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
records[i] = new StringTestRecord()
|
||||||
|
{
|
||||||
|
Column1 = rand.Next().ToString(),
|
||||||
|
column2 = rand.Next().ToString(),
|
||||||
|
Column3 = rand.Next().ToString(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
string[][] recordStrings = records.Select(record => new string[] { record.Column1, record.column2, record.Column3 }).ToArray();
|
||||||
|
|
||||||
|
DateTime lastTime = DateTime.Now;
|
||||||
|
byte[] serialized1 = SaneTsv.SerializeSimpleTsv<StringTestRecord>(records);
|
||||||
|
|
||||||
|
TimeSpan speccedSerializationTime = DateTime.Now - lastTime;
|
||||||
|
Console.WriteLine($"Specced serialization time: {speccedSerializationTime}");
|
||||||
|
lastTime = DateTime.Now;
|
||||||
|
|
||||||
|
byte[] serialized2 = SaneTsv.SerializeSimpleTsv(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings);
|
||||||
|
|
||||||
|
TimeSpan unspeccedSerializationTime = DateTime.Now - lastTime;
|
||||||
|
Console.WriteLine($"Unspecced serialization time: {unspeccedSerializationTime}");
|
||||||
|
lastTime = DateTime.Now;
|
||||||
|
|
||||||
|
Tsv<StringTestRecord> parsed = SaneTsv.ParseSimpleTsv<StringTestRecord>(serialized1);
|
||||||
|
|
||||||
|
TimeSpan speccedParseTime = DateTime.Now - lastTime;
|
||||||
|
Console.WriteLine($"Specced parse time: {speccedParseTime}");
|
||||||
|
lastTime = DateTime.Now;
|
||||||
|
|
||||||
|
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(serialized2);
|
||||||
|
|
||||||
|
TimeSpan unspeccedParseTime = DateTime.Now - lastTime;
|
||||||
|
Console.WriteLine($"Unspecced parse time: {unspeccedParseTime}");
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "With and without file comment";
|
||||||
|
|
||||||
|
string testString1 = "#This is a file comment\n" +
|
||||||
|
"#One more file comment line\n" +
|
||||||
|
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\n#This is a comment" +
|
||||||
|
"\n#Another comment line" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
string testString2 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\n#This is a comment" +
|
||||||
|
"\n#Another comment line" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
CommentedTsv<BoolTestRecord2> parsed2 = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString2));
|
||||||
|
|
||||||
|
if (parsed.FileComment == "This is a file comment\nOne more file comment line" && parsed2.FileComment == null)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "With and without types";
|
||||||
|
|
||||||
|
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\nTRUE\tvalue\\\\twoo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Passed {testName} 1A");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName} 1A");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 1B");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1B");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 1C");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1C");
|
||||||
|
}
|
||||||
|
|
||||||
|
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||||
|
"\nTRUE\tvalue\\\\twoo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString2));
|
||||||
|
Console.WriteLine($"Failed {testName} 2A");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 2A");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 2B");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 2B");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 2C");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 2C");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "With and without line comment";
|
||||||
|
|
||||||
|
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\n#This is a comment" +
|
||||||
|
"\n#Another comment line" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Passed {testName} 1A");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName} 1A");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 1B");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1B");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 1C");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1C");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 1D");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1D");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "End of file comment";
|
||||||
|
|
||||||
|
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther" +
|
||||||
|
"\n# Hey, you're not supposed to have comments at the end of the tsv!";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 1A");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1A");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 1B");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1B");
|
||||||
|
}
|
||||||
|
|
||||||
|
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther" +
|
||||||
|
"\n# Hey, you're not supposed to have comments at the end of the tsv!";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||||
|
Console.WriteLine($"Failed {testName} 1C");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1C");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||||
|
Console.WriteLine($"Failed {testName} 1D");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1D");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "Partial parsing";
|
||||||
|
|
||||||
|
string line1 = "column1\tcolumn2\tcolumnthree\\nyep";
|
||||||
|
string line2 = "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee";
|
||||||
|
string line3 = "\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
byte[] inputBuffer = Encoding.UTF8.GetBytes(line1 + line2 + line3);
|
||||||
|
|
||||||
|
var headerTypes = new List<Type>();
|
||||||
|
var headerNames = new List<string>();
|
||||||
|
var headerPropertyInfos = new List<PropertyInfo>();
|
||||||
|
int columnCount = 0;
|
||||||
|
|
||||||
|
foreach (PropertyInfo property in typeof(BoolTestRecord3).GetProperties())
|
||||||
|
{
|
||||||
|
TsvColumnAttribute attribute = (TsvColumnAttribute)Attribute.GetCustomAttribute(property, typeof(TsvColumnAttribute));
|
||||||
|
if (attribute == null)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
headerNames.Add(attribute.ColumnName ?? property.Name);
|
||||||
|
headerTypes.Add(attribute.ColumnType ?? GetColumnFromType(property.PropertyType));
|
||||||
|
headerPropertyInfos.Add(property);
|
||||||
|
// TODO: Check that the property type and given column type are compatible
|
||||||
|
columnCount++;
|
||||||
|
}
|
||||||
|
|
||||||
|
BoolTestRecord3[] records = SaneTsv.Parse<BoolTestRecord3>(inputBuffer,
|
||||||
|
FormatType.SIMPLE_TSV,
|
||||||
|
headerPropertyInfos.ToArray(),
|
||||||
|
headerTypes.ToArray(),
|
||||||
|
line1.Length + line2.Length + 1,
|
||||||
|
inputBuffer.Length);
|
||||||
|
|
||||||
|
if (records.Length == 0 )
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName} 1");
|
||||||
|
}
|
||||||
|
|
||||||
|
BoolTestRecord3[] records2 = SaneTsv.Parse<BoolTestRecord3>(inputBuffer,
|
||||||
|
FormatType.SIMPLE_TSV,
|
||||||
|
headerPropertyInfos.ToArray(),
|
||||||
|
headerTypes.ToArray(),
|
||||||
|
line1.Length,
|
||||||
|
line1.Length + 3);
|
||||||
|
|
||||||
|
if (records2[0].Column3 == "valuetrhee")
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 2");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName} 2");
|
||||||
|
}
|
||||||
|
|
||||||
|
string[][] data = SaneTsv.ParseSimpleTsv(inputBuffer, 3, line1.Length + line2.Length + 1, inputBuffer.Length);
|
||||||
|
|
||||||
|
if (data[0][1] == "nother")
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 3");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName} 3");
|
||||||
|
}
|
||||||
|
|
||||||
|
string[][] data2 = SaneTsv.ParseSimpleTsv(inputBuffer, 3, line1.Length, line1.Length + 3);
|
||||||
|
|
||||||
|
if (data2.Length == 0)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 4");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName} 4");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "End of file \\n";
|
||||||
|
|
||||||
|
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther" +
|
||||||
|
"\n";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 1A");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1A");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 1B");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1B");
|
||||||
|
}
|
||||||
|
|
||||||
|
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther" +
|
||||||
|
"\n";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||||
|
Console.WriteLine($"Failed {testName} 1C");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1C");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||||
|
Console.WriteLine($"Failed {testName} 1D");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1D");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "End of file partial record";
|
||||||
|
|
||||||
|
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther" +
|
||||||
|
"\nTRUE\t";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 1A");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1A");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
Console.WriteLine($"Failed {testName} 1B");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1B");
|
||||||
|
}
|
||||||
|
|
||||||
|
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther" +
|
||||||
|
"\nTRUE\t";
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||||
|
Console.WriteLine($"Failed {testName} 1C");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1C");
|
||||||
|
}
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||||
|
Console.WriteLine($"Failed {testName} 1D");
|
||||||
|
}
|
||||||
|
catch (Exception e)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName} 1D");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
string testName = "File comment serde";
|
||||||
|
|
||||||
|
string testString1 = "#this is a file comment" +
|
||||||
|
"\n# and one more line since you're such a good customer" +
|
||||||
|
"\ncolumn1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||||
|
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||||
|
"\nFALSE\tnother\tno\\ther";
|
||||||
|
|
||||||
|
|
||||||
|
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||||
|
|
||||||
|
string reserialized = Encoding.UTF8.GetString(SaneTsv.SerializeCommentedTsv<BoolTestRecord2>(parsed.Records, parsed.FileComment));
|
||||||
|
|
||||||
|
if (reserialized == testString1)
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Passed {testName}");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Console.WriteLine($"Failed {testName}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine("Done with tests");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
|
||||||
string testName = "Bad bool test";
|
|
||||||
try
|
|
||||||
{
|
|
||||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
|
||||||
"\nTUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
|
||||||
"\nFALSE\tnother\tno\\ther";
|
|
||||||
|
|
||||||
SaneTsv parsed = SaneTsv.ParseTypedTsv(Encoding.UTF8.GetBytes(testString1));
|
|
||||||
Console.WriteLine($"Failed {testName}");
|
|
||||||
}
|
|
||||||
catch (Exception)
|
|
||||||
{
|
|
||||||
Console.WriteLine($"Passed {testName}");
|
|
||||||
}
|
|
||||||
|
|
||||||
Console.WriteLine("Done with tests");
|
|
||||||
}
|
|
||||||
|
@ -1,10 +1,14 @@
|
|||||||
# Sane TSV
|
# Sane TSV
|
||||||
|
|
||||||
Sane TSV is a strict format for tabular data.
|
Sane Tab-Separate Values is a series of tabular formats as an alternative to the under-specified TSV / CSV quagmire.
|
||||||
|
|
||||||
|
# Simple TSV
|
||||||
|
|
||||||
|
Simple TSV is a strict format for tabular data.
|
||||||
|
|
||||||
'\n' (0x0A) character delimit lines, and '\t' (0x09) characters delimit fields within a line.
|
'\n' (0x0A) character delimit lines, and '\t' (0x09) characters delimit fields within a line.
|
||||||
|
|
||||||
'\n' and '\t' characters are allowed within fields by escaping them with a backslash character (0x5C) followed by 'n' (0x6E) and 't' (0x74) respectively. Additionaly, '\\' and '#' (0x23) must also be escaped. The '#' character is escaped for compatility with [Commented TSVs](#commented-tsv).
|
'\n' and '\t' characters are allowed within fields by escaping them with a backslash character (0x5C) followed by 'n' (0x6E) and 't' (0x74) respectively. Additionally, '\\' and '#' (0x23) must also be escaped. The '#' character is escaped for compatility with [Commented TSVs](#commented-tsv).
|
||||||
|
|
||||||
All fields must be UTF-8 encoded text. All escaping can be done before decoding (and after encoding).
|
All fields must be UTF-8 encoded text. All escaping can be done before decoding (and after encoding).
|
||||||
|
|
||||||
@ -12,7 +16,7 @@ Empty fields (i.e. two subsequent '\t' characters) are allowed.
|
|||||||
|
|
||||||
The first line is always the header and the fields of the header are the column names for the file. Column names must be unique within the file and must not contain ':' characters (for compatibility with [Typed TSVs](#typed-tsv)).
|
The first line is always the header and the fields of the header are the column names for the file. Column names must be unique within the file and must not contain ':' characters (for compatibility with [Typed TSVs](#typed-tsv)).
|
||||||
|
|
||||||
All lines in the file must have the same number of fields.
|
All lines in the file must have the same number of fields as are in the header.
|
||||||
|
|
||||||
The file must not end with '\n'. That will be treated as if there is an empty row at the end of a file and cause an error.
|
The file must not end with '\n'. That will be treated as if there is an empty row at the end of a file and cause an error.
|
||||||
|
|
||||||
@ -20,12 +24,14 @@ Implementations of the format do not need to handle file reading and writing dir
|
|||||||
|
|
||||||
# Typed TSV
|
# Typed TSV
|
||||||
|
|
||||||
Typed TSV allows for typing of columns. All column names in a typed TSV must end with ':' (0x3A) and then one of the following types:
|
Typed TSV builds on Simple TSV to allow for typing of columns. All column names in a typed TSV must end with ':' (0x3A) and then one of the following types:
|
||||||
|
|
||||||
- 'string'
|
- 'string'
|
||||||
- 'boolean'
|
- 'boolean'
|
||||||
- 'float32'
|
- 'float32'
|
||||||
|
- 'float32-le'
|
||||||
- 'float64'
|
- 'float64'
|
||||||
|
- 'float64-le'
|
||||||
- 'uint32'
|
- 'uint32'
|
||||||
- 'uint64'
|
- 'uint64'
|
||||||
- 'int32'
|
- 'int32'
|
||||||
@ -34,9 +40,9 @@ Typed TSV allows for typing of columns. All column names in a typed TSV must end
|
|||||||
|
|
||||||
Any other values are an error, however, the portion of the name prior to the last ':' may be anything and may include ':' characters.
|
Any other values are an error, however, the portion of the name prior to the last ':' may be anything and may include ':' characters.
|
||||||
|
|
||||||
All fields in the rest of the file must be of the type corresponding the their column.
|
All fields in the rest of the file must be of the type corresponding to their column.
|
||||||
|
|
||||||
Aside from the 'binary' column type, all fields must be UTF-8 encoded text. Each type has the following restrictions:
|
Aside from the 'binary', 'float32-le', and 'float64-le' column types, all fields must be UTF-8 encoded text. Each type has the following restrictions:
|
||||||
|
|
||||||
- 'boolean' fields must contain only and exactly the text "TRUE" or "FALSE".
|
- 'boolean' fields must contain only and exactly the text "TRUE" or "FALSE".
|
||||||
- 'float32' and 'float64' correspond to single and double precision IEEE 754 floating-point numbers respectively. They should be formatted like this regex: `-?[0-9]\.([0-9]|[0-9]+[1-9])E-?[1-9][0-9]*`
|
- 'float32' and 'float64' correspond to single and double precision IEEE 754 floating-point numbers respectively. They should be formatted like this regex: `-?[0-9]\.([0-9]|[0-9]+[1-9])E-?[1-9][0-9]*`
|
||||||
@ -46,13 +52,56 @@ Aside from the 'binary' column type, all fields must be UTF-8 encoded text. Each
|
|||||||
- 'qNaN'
|
- 'qNaN'
|
||||||
- '+inf'
|
- '+inf'
|
||||||
- '-inf'
|
- '-inf'
|
||||||
|
- 'float32-le' and 'float64-le' are also IEEE 754 floating-point, but are stored as binary. They must always be stored in little-endian order.
|
||||||
|
|
||||||
|
The reason for having a separate binary format for them is that round-tripping floating-point text values between different parsers is not likely to work for all cases. The text-based format should be fine for general use, but when exact value transfer is needed, the binary formats are available.
|
||||||
- 'uint32' and 'uint64' are unsigned 32 and 64 bit integers respectively. They should be formatted like this regex: `[1-9][0-9]*`
|
- 'uint32' and 'uint64' are unsigned 32 and 64 bit integers respectively. They should be formatted like this regex: `[1-9][0-9]*`
|
||||||
- 'int32' and 'int64' are signed 32 and 64 bit integers respectively. They should be formatted like this regex: `-?[1-9][0-9]*` (except that '-0' is not allowed)
|
- 'int32' and 'int64' are signed 32 and 64 bit integers respectively. They should be formatted like this regex: `-?[1-9][0-9]*` (except that '-0' is not allowed)
|
||||||
|
|
||||||
|
Binary fields are left as-is (after unescaping is performed).
|
||||||
|
|
||||||
|
Typed TSV files should have the .ytsv extension (.ttsv is already used).
|
||||||
|
|
||||||
# Commented TSV
|
# Commented TSV
|
||||||
|
|
||||||
Commented lines start with a '#' character at the beginning of the line. Unescaped '#' characters are not allowed on a line that does not start with a '#'. Any '#' characters in fields must be escaped.
|
Commented TSV builds on Typed TSV and allows for more flexibility in the format by including line comments. The formats are kept distinct so that some applications can take advantage of the extra flexibility comments allow, while others can stick with the more restricted Typed TSV format.
|
||||||
|
|
||||||
|
Commented lines start with a '#' character at the beginning of the line. Unescaped '#' characters are not allowed on a line that does not start with a '#'. Any '#' characters in fields must be escaped. Note that the '#' character is excluded from the comment data.
|
||||||
|
|
||||||
|
Multiple consecutive comment lines are considered a single comment, with each line separated by a '\n'.
|
||||||
|
|
||||||
Comments must be UTF-8 encoded text.
|
Comments must be UTF-8 encoded text.
|
||||||
|
|
||||||
|
Comments are associated with the record beneath them. If a comment appears at the top of the file, it is associated with the file as a whole.
|
||||||
|
|
||||||
Comments after the last record are an error.
|
Comments after the last record are an error.
|
||||||
|
|
||||||
|
Commented TSV files should have the .ctsv extension.
|
||||||
|
|
||||||
|
## Extending the Commented TSV Format
|
||||||
|
|
||||||
|
Because it can include comments, this format lends itself well to extension. For example, if we wanted to extend the type system to include physical units, we could do so like this:
|
||||||
|
|
||||||
|
```
|
||||||
|
# UnitsTSV V1.0.0
|
||||||
|
id:uint32\tdatetime:string\tmeasurement1:m:float64\tmeasurement2:v:float64\tmeasurement3:1/s:float64
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that extended formats must remain parseable by baseline parsers, hence we must include the base types after the new types.
|
||||||
|
|
||||||
|
Extending formats may also have restrictions. For example, they could disallow record comments and only allow the file comment above the header.
|
||||||
|
|
||||||
|
Extended formats may still use the .ctsv extension, though they could use a dedicated one instead.
|
||||||
|
|
||||||
|
## Ideas for Extension
|
||||||
|
|
||||||
|
- Physical units
|
||||||
|
- Multiformats
|
||||||
|
- Instead of multihashes, maybe have a column type for each hash type. That way we can avoid wasting data on the type within each field.
|
||||||
|
- ISO 8601
|
||||||
|
- https://github.com/multiformats/unsigned-varint
|
||||||
|
- Color codes (e.g. #E359FF)
|
||||||
|
- Both binary and string-based
|
||||||
|
- JSON
|
||||||
|
- XML
|
||||||
|
- URL
|
||||||
|
45
readme.md
Normal file
45
readme.md
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# Sane TSV
|
||||||
|
|
||||||
|
## Roadmap
|
||||||
|
|
||||||
|
- Improve error reporting by including line/column information in exceptions
|
||||||
|
- Use this to get line numbers for parallel parsing implementations
|
||||||
|
- [x] Come up with a static-typing interface
|
||||||
|
|
||||||
|
Something that doesn't require an array of objects
|
||||||
|
|
||||||
|
Use a class with SaveTsv attributes
|
||||||
|
|
||||||
|
- Check numeric formatting matches spec
|
||||||
|
- [x] Maybe add a binary representation for f32/f64. It should specify that it is Little-endian (since we have to pick one). That way we can guarantee bit-compatibility between implementations where an application might require that.
|
||||||
|
- [x] Add Column name/type specification to API
|
||||||
|
- So you can tell it what columns to expect
|
||||||
|
- [ ] Lax/strict versions
|
||||||
|
|
||||||
|
See the attributes thing above
|
||||||
|
- Generate test cases
|
||||||
|
- [x] File comment / no file comment
|
||||||
|
- [x] header types / no header types
|
||||||
|
- [x] Line comments / no line comments
|
||||||
|
- [x] end of file comment
|
||||||
|
- [x] Test with the start index of parallel methods in last record
|
||||||
|
- end index in first record
|
||||||
|
- [x] Extra \n at end of file
|
||||||
|
- [x] Wrong number of fields
|
||||||
|
- Wrong number of fields at end of file
|
||||||
|
|
||||||
|
- [x] Do parallel parsing / serializing implementation
|
||||||
|
- [x] Next task: Refactor parsing so that it will start and end at arbitrary indices and return an array of SaneTsvRecords. The refactor should ignore the current record (unless at the start of the buffer) and continue parsing the record the end index is in.
|
||||||
|
- ~~More optimization and making parsing modular:~~
|
||||||
|
- Have callbacks for header parsing and field parsing
|
||||||
|
- That way other formats (like ExtraTSV) don't have to iterate through the entire set of data again.
|
||||||
|
- [x] Make untyped Simple TSV (De)serialization
|
||||||
|
- [x] ~~Finish~~ Minimal ExtraTSV implementation
|
||||||
|
- [ ] Do zig implementation
|
||||||
|
- Make a c interface from that
|
||||||
|
- Make a commandline interface
|
||||||
|
- Make a viewer / editor
|
||||||
|
- Streaming interface
|
||||||
|
So you can start processing your data while it finishes parsing?
|
||||||
|
- [ ] Decoding a binary stream with a \0 in it via UTF-8 doesn't seem to cause any issues. I thought that valid UTF-8 wouldn't have a \0?
|
||||||
|
- [ ] Instead of exceptions when parsing, we should parse as much as possible and reflect parsing errors in the returned data structure
|
Reference in New Issue
Block a user