Compare commits

...

2 Commits

Author SHA1 Message Date
Nathan McRae
78574d1872 Add basic implementation of physical units
Serde still isn't working, need to store the exact format of the unit as specified in the attribute
2024-03-15 18:58:07 -07:00
Nathan McRae
77b679bbdc Remove old ExtraTsv stuff 2024-03-10 23:26:42 -07:00
9 changed files with 236 additions and 261 deletions

View File

@ -1,125 +0,0 @@

using System.Globalization;
using System.Text.RegularExpressions;
namespace NathanMcRae;
public class ExtraTsv : SaneTsv
{
public class Iso8601Type : ColumnType { }
public class PhysicalUnitsType : ColumnType
{
public string Units { get; }
public PhysicalUnitsType(string Units) { }
}
public static readonly string[] ValidUnits =
{
"m",
"s",
"A",
"K",
"cd",
"mol",
"kg",
"Hz",
"rad",
"sr",
"N",
"Pa",
"J",
"W",
"C",
"V",
"F",
"Ω",
"S",
"Wb",
"T",
"H",
"°C",
"lm",
"lx",
"Bq",
"Gy",
"Sv",
"kat"
};
public static readonly int MajorVersion = 0;
public static readonly int MinorVersion = 0;
public static readonly int PatchVersion = 1;
public static Regex VersionRegex = new Regex(@"^ ExtraTSV V(\d+)\.(\d+)\.(\d+)");
public static ExtraTsv ParseExtraTsv(byte[] inputBuffer)
{
SaneTsv tsv = ParseCommentedTsv(inputBuffer);
if (tsv.FileComment == null) {
throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
}
Match match = VersionRegex.Match(tsv.FileComment);
if (!match.Success)
{
throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
}
int fileMajorVersion = int.Parse(match.Groups[1].Value);
if (fileMajorVersion != MajorVersion)
{
throw new Exception($"File has major version ({fileMajorVersion}) which is newer than this parser's version {MajorVersion}");
}
for (int i = 0; i < tsv.ColumnNames.Count(); i++)
{
string[] typeParts = tsv.ColumnNames[i].Split(":");
if (typeParts[typeParts.Length - 1] == "iso8601" && tsv.ColumnTypes[i] == typeof(StringType))
{
string columnName = tsv.ColumnNames[i].Substring(0, tsv.ColumnNames[i].Length - ":iso8601".Length);
tsv.ColumnNames[i] = columnName;
tsv.ColumnTypes[i] = typeof(Iso8601Type);
}
// TODO: ISO8601 time spans
// TODO: ISO8601 time durations
else if (typeParts[typeParts.Length - 1] == "units" && (tsv.ColumnTypes[i] == typeof(Float64Type) || tsv.ColumnTypes[i] == typeof(Float32Type)))
{
if (typeParts.Count() > 1 && ValidUnits.Contains(typeParts[typeParts.Length - 2]))
{
// TODO: How to store type information since the ColumnTypes is of type Type?
}
else
{
throw new Exception($"Invalid units type '{typeParts[typeParts.Length - 2]}' for column {i}");
}
}
}
CultureInfo provider = CultureInfo.InvariantCulture;
for (int i = 0; i < tsv.Records.Count; i++)
{
if (tsv.Records[i].Comment != null)
{
throw new Exception($"Line {tsv.Records[i].Line} has comment above it which is not allowed");
}
for (int j = 0; j < tsv.ColumnNames.Count(); j++)
{
if (tsv.ColumnTypes[j] == typeof(Iso8601Type))
{
if (!DateTime.TryParseExact((string)tsv.Records[i][j], "yyyy-MM-ddTHH:mm:ss.ffff", provider, DateTimeStyles.None, out DateTime parsed))
{
throw new Exception($"ISO 8601 timestamp format error on line {tsv.Records[i].Line}, field {j}");
}
tsv.Records[i].Fields[j] = parsed;
}
}
}
return (ExtraTsv)tsv;
}
}

View File

@ -1,14 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RootNamespace>NathanMcRae</RootNamespace>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\SaneTsv.csproj" />
</ItemGroup>
</Project>

View File

@ -1,44 +0,0 @@
Extra TSV adds many convenience types to Sane TSV:
- Timestamps
Just this format for now: yyyy-MM-ddTHH:mm:ss.ffff
- Timespans
- Time durations
- Multiformats
- Multihashes
- Multiprotocols
- ...
- Physical units
To start with, just use SI base and derived units
- Base units
- m
- s
- A
- K
- cd
- mol
- kg
- Derived units
- Hz
- rad
- sr
- N
- Pa
- J
- W
- C
- V
- F
- Ω
- S
- Wb
- T
- H
- °C
- lm
- lx
- Bq
- Gy
- Sv
- kat
How to handle derived units?

View File

@ -1,14 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\ExtraTsv\ExtraTsv.csproj" />
</ItemGroup>
</Project>

View File

@ -1,20 +0,0 @@
using NathanMcRae;
using System.Text;
{
string testName = "Bool test";
string testString1 = "# ExtraTSV V0.0.1\n" +
"column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:iso8601:string" +
"\nTRUE\tvalue\\\\t\0woo\t2024-02-15T18:03:30.0000" +
"\nFALSE\tnother\t2024-02-15T18:03:39.0001";
ExtraTsv parsed = ExtraTsv.ParseExtraTsv(Encoding.UTF8.GetBytes(testString1));
if (parsed.Records[0]["column1:ty#pe"] is bool result && result)
{
Console.WriteLine($"Passed {testName}");
}
else
{
Console.WriteLine($"Failed {testName}");
}
}

View File

@ -2,6 +2,8 @@
using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
using UnitsNet;
using UnitsNet.Units;
namespace NathanMcRae;
@ -37,8 +39,14 @@ public class SaneTsv
public class Iso8601Type : ColumnType { }
public class PhysicalUnitsType : ColumnType
{
public string Units { get; }
public PhysicalUnitsType(string Units) { }
public UnitsNet.UnitInfo Units { get; }
public ColumnType BaseType { get; internal set; }
public PhysicalUnitsType(UnitsNet.UnitInfo units, ColumnType baseType)
{
Units = units;
BaseType = baseType;
}
}
public static readonly string[] ValidUnits =
@ -271,7 +279,7 @@ public class SaneTsv
type = new StringType();
break;
case "string":
if (columnTypeStrings[columnTypeStrings.Length - 2] == "iso8601")
if (columnTypeStrings.Length > 2 && columnTypeStrings[columnTypeStrings.Length - 2] == "iso8601")
{
type = new Iso8601Type();
columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 2));
@ -293,10 +301,44 @@ public class SaneTsv
type = new Float32LEType();
break;
case "float64":
type = new Float64Type();
if (columnTypeStrings.Length > 3 && columnTypeStrings[columnTypeStrings.Length - 2] == UnitsTypeText)
{
string unitName = columnTypeStrings[columnTypeStrings.Length - 3];
if (UnitsNet.Quantity.TryFromUnitAbbreviation(1, unitName, out UnitsNet.IQuantity quantity))
{
type = new PhysicalUnitsType(UnitsNet.Quantity.GetUnitInfo(quantity.Unit), new Float64Type());
}
else
{
throw new Exception($"Invalid units: {unitName}");
}
columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 3));
}
else
{
type = new Float64Type();
}
break;
case "float64-le":
type = new Float64LEType();
if (columnTypeStrings.Length > 3 && columnTypeStrings[columnTypeStrings.Length - 2] == UnitsTypeText)
{
string unitName = columnTypeStrings[columnTypeStrings.Length - 3];
if (UnitsNet.Quantity.TryFromUnitAbbreviation(1, unitName, out UnitsNet.IQuantity quantity))
{
type = new PhysicalUnitsType(UnitsNet.Quantity.GetUnitInfo(quantity.Unit), new Float64LEType());
}
else
{
throw new Exception($"Invalid units: {unitName}");
}
columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 3));
}
else
{
type = new Float64LEType();
}
break;
case "uint32":
type = new UInt32Type();
@ -317,8 +359,6 @@ public class SaneTsv
throw new Exception($"Invalid type '{columnTypeStrings.Last()}' for column {j}");
}
// TODO: physical unit types
// TODO: Allow lax parsing (only worry about parsing columns that are given in the specifying type
if (columnNames[j] != columnName)
@ -644,7 +684,7 @@ public class SaneTsv
continue;
}
else if (columnTypes[j].GetType() == typeof(Float64LEType))
else if (columnTypes[j].GetType() == typeof(Float64LEType) || (columnTypes[j] is PhysicalUnitsType f64PhUnit && f64PhUnit.BaseType is Float64LEType))
{
byte[] floatBytes;
if (!LittleEndian)
@ -659,7 +699,15 @@ public class SaneTsv
{
floatBytes = fields[j];
}
properties[j].SetValue(record, BitConverter.ToDouble(floatBytes, 0));
double value = BitConverter.ToDouble(floatBytes, 0);
if (columnTypes[j] is PhysicalUnitsType unit)
{
properties[j].SetValue(record, UnitsNet.Quantity.From(value, unit.Units.Value));
}
else
{
properties[j].SetValue(record, value);
}
continue;
}
@ -719,7 +767,7 @@ public class SaneTsv
properties[j].SetValue(record, parsedFloat);
}
else if (columnTypes[j].GetType() == typeof(Float64Type))
else if (columnTypes[j].GetType() == typeof(Float64Type) || (columnTypes[j] is PhysicalUnitsType f64PhUnit && f64PhUnit.BaseType is Float64Type))
{
double parsedDouble;
if (!double.TryParse(fieldString, out parsedDouble))
@ -738,7 +786,14 @@ public class SaneTsv
}
}
properties[j].SetValue(record, parsedDouble);
if (columnTypes[j] is PhysicalUnitsType unit)
{
properties[j].SetValue(record, UnitsNet.Quantity.From(parsedDouble, unit.Units.Value));
}
else
{
properties[j].SetValue(record, parsedDouble);
}
}
else if (columnTypes[j].GetType() == typeof(UInt32Type))
{
@ -1159,6 +1214,9 @@ public class SaneTsv
return records.ToArray();
}
public static string UnitsTypeText = "ph-unit";
public static Regex UnitsRegex = new Regex("([^:]+):" + UnitsTypeText + ":(float32|float32-le|float64|float64-le|uint32|uint64|int32|int64)");
public static ColumnType GetColumnFromString(string type)
{
if (type == "string")
@ -1205,6 +1263,26 @@ public class SaneTsv
{
return new BinaryType();
}
else if (type == "iso8601")
{
return new Iso8601Type();
}
else if (UnitsRegex.IsMatch(type))
{
Match match = UnitsRegex.Match(type);
string unitName = match.Groups[1].Value;
string baseType = match.Groups[2].Value;
return new PhysicalUnitsType(ParseUnit(unitName), GetColumnFromString(baseType));
//if (UnitsNet.Quantity.TryFromUnitAbbreviation(1, unitName, out UnitsNet.IQuantity quantity))
//{
// return new PhysicalUnitsType(UnitsNet.Quantity.GetUnitInfo(quantity.Unit), GetColumnFromString(baseType));
//}
//else
//{
// throw new Exception($"Invalid units: {unitName}");
//}
}
else
{
throw new Exception($"Invalid type: {type.GetType()}");
@ -1253,6 +1331,13 @@ public class SaneTsv
{
return new Iso8601Type();
}
else if (type == typeof(UnitsNet.Mass))
{
// TODO
//UnitsNet.UnitInfo a = new UnitsNet.UnitInfo([d])
var a = new UnitsNet.UnitInfo<UnitsNet.Units.MassUnit>(UnitsNet.Units.MassUnit.Kilogram, "kgs", new UnitsNet.BaseUnits(mass: UnitsNet.Units.MassUnit.Kilogram));
return new PhysicalUnitsType(a, new Float64Type());
}
else
{
throw new Exception($"Invalid type: {type.GetType()}");
@ -1309,6 +1394,10 @@ public class SaneTsv
{
return "iso8601:string";
}
else if (type is PhysicalUnitsType unit)
{
return $"{unit.Units.Name}:{UnitsTypeText}:{GetNameFromColumn(unit.BaseType)}";
}
else
{
throw new Exception($"Invalid type: {type.GetType()}");
@ -1510,39 +1599,63 @@ public class SaneTsv
}
}
}
else if (columnTypes[j].GetType() == typeof(Float64Type))
else if (columnTypes[j].GetType() == typeof(Float64Type) || (columnTypes[j] is PhysicalUnitsType f64PhUnit && f64PhUnit.BaseType is Float64Type))
{
double value;
if (datum is double d)
{
if (double.IsNegativeInfinity(d))
{
bytes.AddRange(Encoding.UTF8.GetBytes("-inf"));
}
else if (double.IsPositiveInfinity(d))
{
bytes.AddRange(Encoding.UTF8.GetBytes("+inf"));
}
else
{
// See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r
bytes.AddRange(Encoding.UTF8.GetBytes((d).ToString("G17")));
}
value = d;
}
// TODO: check units match
else if (datum is UnitsNet.IQuantity quantity)
{
value = quantity.Value;
}
else
{
throw new InvalidCastException();
}
skipEscaping = true;
}
else if (columnTypes[j].GetType() == typeof(Float64LEType))
{
if (LittleEndian)
if (double.IsNegativeInfinity(value))
{
fieldEncoded = BitConverter.GetBytes((double)datum);
bytes.AddRange(Encoding.UTF8.GetBytes("-inf"));
}
else if (double.IsPositiveInfinity(value))
{
bytes.AddRange(Encoding.UTF8.GetBytes("+inf"));
}
else
{
byte[] doubleBytes = BitConverter.GetBytes((double)datum);
// See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r
bytes.AddRange(Encoding.UTF8.GetBytes((value).ToString("G17")));
}
skipEscaping = true;
}
else if (columnTypes[j].GetType() == typeof(Float64LEType) || (columnTypes[j] is PhysicalUnitsType f64LEPhUnit && f64LEPhUnit.BaseType is Float64LEType))
{
double value;
if (datum is double d)
{
value = d;
}
// TODO: check units match
else if (datum is UnitsNet.IQuantity quantity)
{
value = quantity.Value;
}
else
{
throw new InvalidCastException();
}
if (LittleEndian)
{
fieldEncoded = BitConverter.GetBytes((double)value);
}
else
{
byte[] doubleBytes = BitConverter.GetBytes((double)value);
fieldEncoded = new byte[sizeof(double)];
for (int k = 0; k < sizeof(double); k++)
{
@ -1578,6 +1691,10 @@ public class SaneTsv
{
fieldEncoded = Encoding.UTF8.GetBytes(((DateTime)datum).ToString("yyyy-MM-ddTHH:mm:ss.ffff"));
}
else if (columnTypes[j] is PhysicalUnitsType phUnits)
{
throw new NotImplementedException($"Physical units types don't support {GetNameFromColumn(phUnits.BaseType)} as a base type");
}
else
{
throw new Exception($"Unexpected column type {columnTypes[j]} for column {j}");
@ -1716,4 +1833,23 @@ public class SaneTsv
ColumnType = columnType;
}
}
public static UnitInfo ParseUnit(string unitName)
{
// Find all unit enum types in the UnitsNet namespace
var unitEnumTypes = Assembly.GetAssembly(typeof(LengthUnit))
.GetTypes()
.Where(t => t.IsEnum && t.Namespace == typeof(LengthUnit).Namespace);
foreach (var unitEnumType in unitEnumTypes)
{
if (UnitParser.Default.TryParse(unitName, unitEnumType, out Enum unitEnum))
{
// Successfully parsed the abbreviation, retrieve UnitInfo
return Quantity.GetUnitInfo(unitEnum);
}
}
throw new ArgumentException($"Unable to parse unit abbreviation: {unitName}");
}
}

View File

@ -21,4 +21,8 @@
<None Remove="SaneTsvTest\**" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="UnitsNet" Version="6.0.0-pre004" />
</ItemGroup>
</Project>

View File

@ -7,10 +7,6 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsv", "SaneTsv.csproj",
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ExtraTsv", "ExtraTsv\ExtraTsv.csproj", "{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ExtraTsvTest", "ExtraTsvTest\ExtraTsvTest.csproj", "{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@ -25,14 +21,6 @@ Global
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.Build.0 = Release|Any CPU
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.Build.0 = Release|Any CPU
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -15,6 +15,15 @@ internal class Program : SaneTsv
public DateTime Column3 { get; set; }
}
public class UnitTest : SaneTsv.CommentedTsvRecord
{
[SaneTsv.TypedTsvColumn("id")]
public UInt32 Id { get; set; }
[SaneTsv.TypedTsvColumn("value", "m/s:ph-unit:float64")]
public UnitsNet.Speed Value { get; set; }
}
private static void Main(string[] args)
{
{
@ -35,6 +44,24 @@ internal class Program : SaneTsv
}
}
{
string testName = "Bad date column name";
string testString1 = "# ExtraTSV V0.0.1\n" +
"column1:ty\\#pe:boolean\tcolumn2:binary\tiso8601:string" +
"\nTRUE\tvalue\\\\t\0woo\t2024-02-15T18:03:30.0000" +
"\nFALSE\tnother\t2024-02-15T18:03:39.0001";
try
{
CommentedTsv<DateTest> parsed = SaneTsv.ParseExtraTsv<DateTest>(Encoding.UTF8.GetBytes(testString1));
Console.WriteLine($"Failed {testName}");
}
catch (Exception e)
{
Console.WriteLine($"Passed {testName}");
}
}
{
string testName = "Serde date";
string testString1 = "# ExtraTSV V0.0.1\n" +
@ -54,6 +81,43 @@ internal class Program : SaneTsv
}
}
{
string testName = "Parse unit";
string testString1 = "# ExtraTSV V0.0.1\n" +
"id:uint32\tvalue:m/s:ph-unit:float64\n" +
"0\t1.5\n" +
"1\t5.4e3";
CommentedTsv<UnitTest> parsed = SaneTsv.ParseExtraTsv<UnitTest>(Encoding.UTF8.GetBytes(testString1));
if (parsed.Records[0].Value.Value == 1.5)
{
Console.WriteLine($"Passed {testName}");
}
else
{
Console.WriteLine($"Failed {testName}");
}
}
{
string testName = "Serde unit";
string testString1 = "# ExtraTSV V0.0.1\n" +
"id:uint32\tvalue:m/s:ph-unit:float64\n" +
"0\t1.5\n" +
"1\t5.4e3";
CommentedTsv<UnitTest> parsed = SaneTsv.ParseExtraTsv<UnitTest>(Encoding.UTF8.GetBytes(testString1));
string serialized = Encoding.UTF8.GetString(SaneTsv.SerializeExtraTsv<UnitTest>(parsed.Records));
if (testString1 == serialized)
{
Console.WriteLine($"Passed {testName}");
}
else
{
Console.WriteLine($"Failed {testName}");
}
}
Console.WriteLine("Done with tests");
}
}