From 78574d1872cb9be8421d9a9748915e6b53545715 Mon Sep 17 00:00:00 2001 From: Nathan McRae Date: Fri, 15 Mar 2024 18:58:07 -0700 Subject: [PATCH] Add basic implementation of physical units Serde still isn't working, need to store the exact format of the unit as specified in the attribute --- SaneTsv.cs | 200 ++++++++++++++++++++++++++++++++++------- SaneTsv.csproj | 4 + SaneTsvTest/Program.cs | 64 +++++++++++++ 3 files changed, 236 insertions(+), 32 deletions(-) diff --git a/SaneTsv.cs b/SaneTsv.cs index 86bf535..8c0ce2c 100644 --- a/SaneTsv.cs +++ b/SaneTsv.cs @@ -2,6 +2,8 @@ using System.Reflection; using System.Text; using System.Text.RegularExpressions; +using UnitsNet; +using UnitsNet.Units; namespace NathanMcRae; @@ -37,8 +39,14 @@ public class SaneTsv public class Iso8601Type : ColumnType { } public class PhysicalUnitsType : ColumnType { - public string Units { get; } - public PhysicalUnitsType(string Units) { } + public UnitsNet.UnitInfo Units { get; } + public ColumnType BaseType { get; internal set; } + + public PhysicalUnitsType(UnitsNet.UnitInfo units, ColumnType baseType) + { + Units = units; + BaseType = baseType; + } } public static readonly string[] ValidUnits = @@ -271,7 +279,7 @@ public class SaneTsv type = new StringType(); break; case "string": - if (columnTypeStrings[columnTypeStrings.Length - 2] == "iso8601") + if (columnTypeStrings.Length > 2 && columnTypeStrings[columnTypeStrings.Length - 2] == "iso8601") { type = new Iso8601Type(); columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 2)); @@ -293,10 +301,44 @@ public class SaneTsv type = new Float32LEType(); break; case "float64": - type = new Float64Type(); + if (columnTypeStrings.Length > 3 && columnTypeStrings[columnTypeStrings.Length - 2] == UnitsTypeText) + { + string unitName = columnTypeStrings[columnTypeStrings.Length - 3]; + if (UnitsNet.Quantity.TryFromUnitAbbreviation(1, unitName, out UnitsNet.IQuantity quantity)) + { + type = new PhysicalUnitsType(UnitsNet.Quantity.GetUnitInfo(quantity.Unit), new Float64Type()); + } + else + { + throw new Exception($"Invalid units: {unitName}"); + } + + columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 3)); + } + else + { + type = new Float64Type(); + } break; case "float64-le": - type = new Float64LEType(); + if (columnTypeStrings.Length > 3 && columnTypeStrings[columnTypeStrings.Length - 2] == UnitsTypeText) + { + string unitName = columnTypeStrings[columnTypeStrings.Length - 3]; + if (UnitsNet.Quantity.TryFromUnitAbbreviation(1, unitName, out UnitsNet.IQuantity quantity)) + { + type = new PhysicalUnitsType(UnitsNet.Quantity.GetUnitInfo(quantity.Unit), new Float64LEType()); + } + else + { + throw new Exception($"Invalid units: {unitName}"); + } + + columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 3)); + } + else + { + type = new Float64LEType(); + } break; case "uint32": type = new UInt32Type(); @@ -317,8 +359,6 @@ public class SaneTsv throw new Exception($"Invalid type '{columnTypeStrings.Last()}' for column {j}"); } - // TODO: physical unit types - // TODO: Allow lax parsing (only worry about parsing columns that are given in the specifying type if (columnNames[j] != columnName) @@ -644,7 +684,7 @@ public class SaneTsv continue; } - else if (columnTypes[j].GetType() == typeof(Float64LEType)) + else if (columnTypes[j].GetType() == typeof(Float64LEType) || (columnTypes[j] is PhysicalUnitsType f64PhUnit && f64PhUnit.BaseType is Float64LEType)) { byte[] floatBytes; if (!LittleEndian) @@ -659,7 +699,15 @@ public class SaneTsv { floatBytes = fields[j]; } - properties[j].SetValue(record, BitConverter.ToDouble(floatBytes, 0)); + double value = BitConverter.ToDouble(floatBytes, 0); + if (columnTypes[j] is PhysicalUnitsType unit) + { + properties[j].SetValue(record, UnitsNet.Quantity.From(value, unit.Units.Value)); + } + else + { + properties[j].SetValue(record, value); + } continue; } @@ -719,7 +767,7 @@ public class SaneTsv properties[j].SetValue(record, parsedFloat); } - else if (columnTypes[j].GetType() == typeof(Float64Type)) + else if (columnTypes[j].GetType() == typeof(Float64Type) || (columnTypes[j] is PhysicalUnitsType f64PhUnit && f64PhUnit.BaseType is Float64Type)) { double parsedDouble; if (!double.TryParse(fieldString, out parsedDouble)) @@ -738,7 +786,14 @@ public class SaneTsv } } - properties[j].SetValue(record, parsedDouble); + if (columnTypes[j] is PhysicalUnitsType unit) + { + properties[j].SetValue(record, UnitsNet.Quantity.From(parsedDouble, unit.Units.Value)); + } + else + { + properties[j].SetValue(record, parsedDouble); + } } else if (columnTypes[j].GetType() == typeof(UInt32Type)) { @@ -1159,6 +1214,9 @@ public class SaneTsv return records.ToArray(); } + public static string UnitsTypeText = "ph-unit"; + public static Regex UnitsRegex = new Regex("([^:]+):" + UnitsTypeText + ":(float32|float32-le|float64|float64-le|uint32|uint64|int32|int64)"); + public static ColumnType GetColumnFromString(string type) { if (type == "string") @@ -1205,6 +1263,26 @@ public class SaneTsv { return new BinaryType(); } + else if (type == "iso8601") + { + return new Iso8601Type(); + } + else if (UnitsRegex.IsMatch(type)) + { + Match match = UnitsRegex.Match(type); + string unitName = match.Groups[1].Value; + string baseType = match.Groups[2].Value; + + return new PhysicalUnitsType(ParseUnit(unitName), GetColumnFromString(baseType)); + //if (UnitsNet.Quantity.TryFromUnitAbbreviation(1, unitName, out UnitsNet.IQuantity quantity)) + //{ + // return new PhysicalUnitsType(UnitsNet.Quantity.GetUnitInfo(quantity.Unit), GetColumnFromString(baseType)); + //} + //else + //{ + // throw new Exception($"Invalid units: {unitName}"); + //} + } else { throw new Exception($"Invalid type: {type.GetType()}"); @@ -1253,6 +1331,13 @@ public class SaneTsv { return new Iso8601Type(); } + else if (type == typeof(UnitsNet.Mass)) + { + // TODO + //UnitsNet.UnitInfo a = new UnitsNet.UnitInfo([d]) + var a = new UnitsNet.UnitInfo(UnitsNet.Units.MassUnit.Kilogram, "kgs", new UnitsNet.BaseUnits(mass: UnitsNet.Units.MassUnit.Kilogram)); + return new PhysicalUnitsType(a, new Float64Type()); + } else { throw new Exception($"Invalid type: {type.GetType()}"); @@ -1309,6 +1394,10 @@ public class SaneTsv { return "iso8601:string"; } + else if (type is PhysicalUnitsType unit) + { + return $"{unit.Units.Name}:{UnitsTypeText}:{GetNameFromColumn(unit.BaseType)}"; + } else { throw new Exception($"Invalid type: {type.GetType()}"); @@ -1510,39 +1599,63 @@ public class SaneTsv } } } - else if (columnTypes[j].GetType() == typeof(Float64Type)) + else if (columnTypes[j].GetType() == typeof(Float64Type) || (columnTypes[j] is PhysicalUnitsType f64PhUnit && f64PhUnit.BaseType is Float64Type)) { + double value; if (datum is double d) { - if (double.IsNegativeInfinity(d)) - { - bytes.AddRange(Encoding.UTF8.GetBytes("-inf")); - } - else if (double.IsPositiveInfinity(d)) - { - bytes.AddRange(Encoding.UTF8.GetBytes("+inf")); - } - else - { - // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r - bytes.AddRange(Encoding.UTF8.GetBytes((d).ToString("G17"))); - } + value = d; + } + // TODO: check units match + else if (datum is UnitsNet.IQuantity quantity) + { + value = quantity.Value; } else { throw new InvalidCastException(); } - skipEscaping = true; - } - else if (columnTypes[j].GetType() == typeof(Float64LEType)) - { - if (LittleEndian) + + if (double.IsNegativeInfinity(value)) { - fieldEncoded = BitConverter.GetBytes((double)datum); + bytes.AddRange(Encoding.UTF8.GetBytes("-inf")); + } + else if (double.IsPositiveInfinity(value)) + { + bytes.AddRange(Encoding.UTF8.GetBytes("+inf")); } else { - byte[] doubleBytes = BitConverter.GetBytes((double)datum); + // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r + bytes.AddRange(Encoding.UTF8.GetBytes((value).ToString("G17"))); + } + + skipEscaping = true; + } + else if (columnTypes[j].GetType() == typeof(Float64LEType) || (columnTypes[j] is PhysicalUnitsType f64LEPhUnit && f64LEPhUnit.BaseType is Float64LEType)) + { + double value; + if (datum is double d) + { + value = d; + } + // TODO: check units match + else if (datum is UnitsNet.IQuantity quantity) + { + value = quantity.Value; + } + else + { + throw new InvalidCastException(); + } + + if (LittleEndian) + { + fieldEncoded = BitConverter.GetBytes((double)value); + } + else + { + byte[] doubleBytes = BitConverter.GetBytes((double)value); fieldEncoded = new byte[sizeof(double)]; for (int k = 0; k < sizeof(double); k++) { @@ -1578,6 +1691,10 @@ public class SaneTsv { fieldEncoded = Encoding.UTF8.GetBytes(((DateTime)datum).ToString("yyyy-MM-ddTHH:mm:ss.ffff")); } + else if (columnTypes[j] is PhysicalUnitsType phUnits) + { + throw new NotImplementedException($"Physical units types don't support {GetNameFromColumn(phUnits.BaseType)} as a base type"); + } else { throw new Exception($"Unexpected column type {columnTypes[j]} for column {j}"); @@ -1716,4 +1833,23 @@ public class SaneTsv ColumnType = columnType; } } + + public static UnitInfo ParseUnit(string unitName) + { + // Find all unit enum types in the UnitsNet namespace + var unitEnumTypes = Assembly.GetAssembly(typeof(LengthUnit)) + .GetTypes() + .Where(t => t.IsEnum && t.Namespace == typeof(LengthUnit).Namespace); + + foreach (var unitEnumType in unitEnumTypes) + { + if (UnitParser.Default.TryParse(unitName, unitEnumType, out Enum unitEnum)) + { + // Successfully parsed the abbreviation, retrieve UnitInfo + return Quantity.GetUnitInfo(unitEnum); + } + } + + throw new ArgumentException($"Unable to parse unit abbreviation: {unitName}"); + } } diff --git a/SaneTsv.csproj b/SaneTsv.csproj index c231904..05b85ff 100644 --- a/SaneTsv.csproj +++ b/SaneTsv.csproj @@ -21,4 +21,8 @@ + + + + diff --git a/SaneTsvTest/Program.cs b/SaneTsvTest/Program.cs index ddfecbd..e8eddec 100644 --- a/SaneTsvTest/Program.cs +++ b/SaneTsvTest/Program.cs @@ -15,6 +15,15 @@ internal class Program : SaneTsv public DateTime Column3 { get; set; } } + public class UnitTest : SaneTsv.CommentedTsvRecord + { + [SaneTsv.TypedTsvColumn("id")] + public UInt32 Id { get; set; } + + [SaneTsv.TypedTsvColumn("value", "m/s:ph-unit:float64")] + public UnitsNet.Speed Value { get; set; } + } + private static void Main(string[] args) { { @@ -35,6 +44,24 @@ internal class Program : SaneTsv } } + { + string testName = "Bad date column name"; + string testString1 = "# ExtraTSV V0.0.1\n" + + "column1:ty\\#pe:boolean\tcolumn2:binary\tiso8601:string" + + "\nTRUE\tvalue\\\\t\0woo\t2024-02-15T18:03:30.0000" + + "\nFALSE\tnother\t2024-02-15T18:03:39.0001"; + + try + { + CommentedTsv parsed = SaneTsv.ParseExtraTsv(Encoding.UTF8.GetBytes(testString1)); + Console.WriteLine($"Failed {testName}"); + } + catch (Exception e) + { + Console.WriteLine($"Passed {testName}"); + } + } + { string testName = "Serde date"; string testString1 = "# ExtraTSV V0.0.1\n" + @@ -54,6 +81,43 @@ internal class Program : SaneTsv } } + { + string testName = "Parse unit"; + string testString1 = "# ExtraTSV V0.0.1\n" + + "id:uint32\tvalue:m/s:ph-unit:float64\n" + + "0\t1.5\n" + + "1\t5.4e3"; + + CommentedTsv parsed = SaneTsv.ParseExtraTsv(Encoding.UTF8.GetBytes(testString1)); + if (parsed.Records[0].Value.Value == 1.5) + { + Console.WriteLine($"Passed {testName}"); + } + else + { + Console.WriteLine($"Failed {testName}"); + } + } + + { + string testName = "Serde unit"; + string testString1 = "# ExtraTSV V0.0.1\n" + + "id:uint32\tvalue:m/s:ph-unit:float64\n" + + "0\t1.5\n" + + "1\t5.4e3"; + + CommentedTsv parsed = SaneTsv.ParseExtraTsv(Encoding.UTF8.GetBytes(testString1)); + string serialized = Encoding.UTF8.GetString(SaneTsv.SerializeExtraTsv(parsed.Records)); + if (testString1 == serialized) + { + Console.WriteLine($"Passed {testName}"); + } + else + { + Console.WriteLine($"Failed {testName}"); + } + } + Console.WriteLine("Done with tests"); } }