extra-tsv/SaneTsv.cs

using System.Globalization;
using System.Reflection;
using System.Text;
using System.Text.RegularExpressions;
using UnitsNet;
using UnitsNet.Units;

namespace NathanMcRae;

public class Tsv<T> where T : SaneTsv.TsvRecord
{
  public virtual List<T> Records { get; set; }
}

public class CommentedTsv<T> : Tsv<T> where T : SaneTsv.TsvRecord
{
  public override List<T> Records { get; set; }
  public string FileComment { get; set; } = null;
}

/// <summary>
/// Sane Tab-Separated Values
/// </summary>
public class SaneTsv
{
  // Like an enum, but more extensible
  public class ColumnType { }
  public class StringType : ColumnType { }
  public class BooleanType : ColumnType { }
  public class Float32Type : ColumnType { }
  public class Float32LEType : ColumnType { }
  public class Float64Type : ColumnType { }
  public class Float64LEType : ColumnType { }
  public class UInt32Type : ColumnType { }
  public class UInt64Type : ColumnType { }
  public class Int32Type : ColumnType { }
  public class Int64Type : ColumnType { }
  public class BinaryType : ColumnType { }
  public class Iso8601Type : ColumnType { }
  public class PhysicalUnitsType : ColumnType
  {
    public string UnitString { get; }
    public UnitsNet.UnitInfo Units { get; }
    public ColumnType BaseType { get; internal set; }

    public PhysicalUnitsType(string unitString, UnitsNet.UnitInfo units, ColumnType baseType)
    {
      UnitString = unitString;
      Units = units;
      BaseType = baseType;
    }
  }

  public static readonly string[] ValidUnits =
  {
    "m",
    "s",
    "A",
    "K",
    "cd",
    "mol",
    "kg",
    "Hz",
    "rad",
    "sr",
    "N",
    "Pa",
    "J",
    "W",
    "C",
    "V",
    "F",
    "Ω",
    "S",
    "Wb",
    "T",
    "H",
    "°C",
    "lm",
    "lx",
    "Bq",
    "Gy",
    "Sv",
    "kat"
  };

  public static readonly int MajorVersion = 0;
  public static readonly int MinorVersion = 0;
  public static readonly int PatchVersion = 1;

  public static Regex VersionRegex = new Regex(@"^ ExtraTSV V(\d+)\.(\d+)\.(\d+)");

  protected enum FormatType
  {
    SIMPLE_TSV = 0,
    TYPED_TSV = 1,
    COMMENTED_TSV = 2,
  }

  public static readonly byte[] TrueEncoded = Encoding.UTF8.GetBytes("TRUE");
  public static readonly byte[] FalseEncoded = Encoding.UTF8.GetBytes("FALSE");

  protected static bool? _littleEndian = null;
  public static bool LittleEndian
  {
    get
    {
      if (_littleEndian == null)
      {
        _littleEndian = BitConverter.GetBytes(double.NegativeInfinity)[7] == 255;
      }
      return _littleEndian.Value;
    }
  }

  public static Tsv<T> ParseSimpleTsv<T>(byte[] inputBuffer) where T : TsvRecord, new()
  {
    return Parse<T>(inputBuffer, FormatType.SIMPLE_TSV);
  }

  public static Tsv<T> ParseTypedTsv<T>(byte[] inputBuffer) where T : TsvRecord, new()
  {
    return Parse<T>(inputBuffer, FormatType.TYPED_TSV);
  }

  public static CommentedTsv<T> ParseCommentedTsv<T>(byte[] inputBuffer) where T : CommentedTsvRecord, new()
  {
    return (CommentedTsv<T>)Parse<T>(inputBuffer, FormatType.COMMENTED_TSV);
  }

  public static CommentedTsv<T> ParseExtraTsv<T>(byte[] inputBuffer) where T : CommentedTsvRecord, new()
  {
    CommentedTsv<T> parsed = (CommentedTsv<T>)Parse<T>(inputBuffer, FormatType.COMMENTED_TSV);

    if (parsed.FileComment == null)
    {
      throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
    }

    Match match = VersionRegex.Match(parsed.FileComment);
    if (!match.Success)
    {
      throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
    }

    int fileMajorVersion = int.Parse(match.Groups[1].Value);

    if (fileMajorVersion != MajorVersion)
    {
      throw new Exception($"File has major version ({fileMajorVersion}) which is newer than this parser's version {MajorVersion}");
    }

    return parsed;
  }

  protected static Tsv<T> Parse<T>(byte[] inputBuffer, FormatType format) where T : TsvRecord, new()
  {
    Tsv<T> parsed;
    if (format == FormatType.COMMENTED_TSV)
    {
      parsed = new CommentedTsv<T>();
    }
    else
    {
      parsed = new Tsv<T>();
    }
    parsed.Records = new List<T>();

    var columnTypes = new List<ColumnType>();
    var columnNames = new List<string>();
    var columnPropertyInfos = new List<PropertyInfo>();
    int columnCount = 0;

    foreach (PropertyInfo property in typeof(T).GetProperties())
    {
      TsvColumnAttribute attribute = (TsvColumnAttribute)Attribute.GetCustomAttribute(property, typeof(TsvColumnAttribute));
      if (attribute == null)
      {
        continue;
      }

      columnNames.Add(attribute.ColumnName ?? property.Name);
      columnTypes.Add(attribute.ColumnType ?? GetColumnFromType(property.PropertyType));
      columnPropertyInfos.Add(property);
      // TODO: Check that the property type and given column type are compatible
      columnCount++;
    }

    var fieldBytes = new List<byte>();
    var fields = new List<byte[]>();
    var currentComment = new StringBuilder();

    int numFields = -1;
    int line = 1;
    int currentLineStart = 0;
    for (int i = 0; i < inputBuffer.Count(); i++)
    {
      if (inputBuffer[i] == '\\')
      {
        if (i + 1 == inputBuffer.Count())
        {
          throw new Exception($"Found '\\' at end of input");
        }
        if (inputBuffer[i + 1] == 'n')
        {
          fieldBytes.Add((byte)'\n');
          i++;
        }
        else if (inputBuffer[i + 1] == '\\')
        {
          fieldBytes.Add((byte)'\\');
          i++;
        }
        else if (inputBuffer[i + 1] == 't')
        {
          fieldBytes.Add((byte)'\t');
          i++;
        }
        else if (inputBuffer[i + 1] == '#')
        {
          fieldBytes.Add((byte)'#');
          i++;
        }
        else
        {
          throw new Exception($"Expected 'n', 't', '#', or '\\' after '\\' at line {line} column {i - currentLineStart}");
        }
      }
      else if (inputBuffer[i] == '\t')
      {
        // end of field
        fields.Add(fieldBytes.ToArray());
        fieldBytes.Clear();
      }
      else if (inputBuffer[i] == '\n')
      {
        fields.Add(fieldBytes.ToArray());
        fieldBytes.Clear();

        int numTypesBlank = 0;

        for (int j = 0; j < fields.Count; j++)
        {
          string columnString;
          try
          {
            columnString = Encoding.UTF8.GetString(fields[j]);
          }
          catch (Exception e)
          {
            throw new Exception($"Header field {fields.Count} is not valid UTF-8", e);
          }

          string[] columnTypeStrings;
          string columnName;
          if (columnString.Contains(':'))
          {
            if (format == FormatType.SIMPLE_TSV)
            {
              throw new Exception($"Header field {j} contains ':', which is not allowed for column names");
            }
            columnTypeStrings = columnString.Split(":");
            columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 1));
          }
          else
          {
            if (format > FormatType.SIMPLE_TSV)
            {
              throw new Exception($"Header field {fields.Count} has no type");
            }
            columnTypeStrings = new string[] { "" };
            columnName = columnString;
          }

          ColumnType type;

          switch (columnTypeStrings.Last())
          {
            case "":
              numTypesBlank++;
              type = new StringType();
              break;
            case "string":
              if (columnTypeStrings.Length > 2 && columnTypeStrings[columnTypeStrings.Length - 2] == "iso8601")
              {
                type = new Iso8601Type();
                columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 2));
              }
              // TODO: ISO8601 time spans
              // TODO: ISO8601 time durations
              else
              {
                type = new StringType();
              }
              break;
            case "boolean":
              type = new BooleanType();
              break;
            case "float32":
              type = new Float32Type();
              break;
            case "float32-le":
              type = new Float32LEType();
              break;
            case "float64":
              if (columnTypeStrings.Length > 3 && columnTypeStrings[columnTypeStrings.Length - 2] == UnitsTypeText)
              {
                string unitName = columnTypeStrings[columnTypeStrings.Length - 3];
                if (UnitsNet.Quantity.TryFromUnitAbbreviation(1, unitName, out UnitsNet.IQuantity quantity))
                {
                  type = new PhysicalUnitsType(unitName, UnitsNet.Quantity.GetUnitInfo(quantity.Unit), new Float64Type());
                }
                else
                {
                  throw new Exception($"Invalid units: {unitName}");
                }

                columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 3));
              }
              else
              {
                type = new Float64Type();
              }
              break;
            case "float64-le":
              if (columnTypeStrings.Length > 3 && columnTypeStrings[columnTypeStrings.Length - 2] == UnitsTypeText)
              {
                string unitName = columnTypeStrings[columnTypeStrings.Length - 3];
                if (UnitsNet.Quantity.TryFromUnitAbbreviation(1, unitName, out UnitsNet.IQuantity quantity))
                {
                  type = new PhysicalUnitsType(unitName, UnitsNet.Quantity.GetUnitInfo(quantity.Unit), new Float64LEType());
                }
                else
                {
                  throw new Exception($"Invalid units: {unitName}");
                }

                columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 3));
              }
              else
              {
                type = new Float64LEType();
              }
              break;
            case "uint32":
              type = new UInt32Type();
              break;
            case "uint64":
              type = new UInt64Type();
              break;
            case "int32":
              type = new Int32Type();
              break;
            case "int64":
              type = new Int64Type();
              break;
            case "binary":
              type = new BinaryType();
              break;
            default:
              throw new Exception($"Invalid type '{columnTypeStrings.Last()}' for column {j}");
          }

          // TODO: Allow lax parsing (only worry about parsing columns that are given in the specifying type

          if (columnNames[j] != columnName)
          {
            throw new Exception($"Column {j} has name {columnName}, but expected {columnNames[j]}");
          }

          if (columnTypes[j].GetType() != type.GetType())
          {
            throw new Exception($"Column {j} has type {type}, but expected {columnTypes[j]}");
          }
        }

        if (currentComment.Length > 0)
        {
          if (parsed is CommentedTsv<T> commentedParsed)
          {
            commentedParsed.FileComment = currentComment.ToString();
            currentComment.Clear();
          }
          else
          {
            throw new Exception("Found a file comment, but parser wasn't expecting a comment");
          }
        }

        fields.Clear();

        line++;
        currentLineStart = i + 1;

        // Done parsing header
        break;
      }
      else if (inputBuffer[i] == '#')
      {
        if (i == currentLineStart && format >= FormatType.COMMENTED_TSV)
        {
          int j = i;
          for (; j < inputBuffer.Length && inputBuffer[j] != '\n'; j++) { }
          if (j < inputBuffer.Length)
          {
            var commentBytes = new byte[j - i - 1];
            Array.Copy(inputBuffer, i + 1, commentBytes, 0, j - i - 1);
            if (currentComment.Length > 0)
            {
              currentComment.Append('\n');
            }
            currentComment.Append(Encoding.UTF8.GetString(commentBytes));
            i = j;
            currentLineStart = i + 1;
            line++;
          }
          else
          {
            throw new Exception("Comments at end of file are not allowed");
          }
        }
        else
        {
          throw new Exception($"Found unescaped '#' at line {line}, column {i - currentLineStart}");
        }
      }
      else
      {
        fieldBytes.Add(inputBuffer[i]);
      }
    }

    // TODO: need to figure out where the crossover is
    // Complication: it probably depends on processor count
    if (inputBuffer.Length < 10000)
    {
      parsed.Records.AddRange(Parse<T>(inputBuffer, format, columnPropertyInfos.ToArray(), columnTypes.ToArray(), currentLineStart - 1, inputBuffer.Length));
      return parsed;
    }
    else
    {
      int parseStart = currentLineStart;
      int tasks = Environment.ProcessorCount - 1;
      int splitCount = (inputBuffer.Length - parseStart) / tasks;
      T[][] parsedValues = new T[tasks][];
      Parallel.For(0, tasks, i =>
      {
        int startIndex = i * splitCount + parseStart - 1;
        int endIndex;
        if (i == tasks - 1)
        {
          endIndex = inputBuffer.Length;
        }
        else
        {
          endIndex = (i + 1) * splitCount + parseStart;
        }

        parsedValues[i] = Parse<T>(inputBuffer, format, columnPropertyInfos.ToArray(), columnTypes.ToArray(), startIndex, endIndex);
      });

      // TODO: Handle relative line numbers
      for (int i = 0; i < tasks; i++)
      {
        parsed.Records.AddRange(parsedValues[i]);
      }
    }

    return parsed;
  }

  // This approach is slightly different than others. We skip the record that startIndex is in and
  // include the record that endIndex is in. We do this because in order to include the record
  // startIndex is in we'd have to go back to the start of the record's comment, and to know
  // exactly where that comment started we'd have to go back to the start of the record before that
  // (not including that other record's comment).
  protected static T[] Parse<T>(byte[] inputBuffer, FormatType format, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, int startIndex, int endIndex) where T : TsvRecord, new()
  {
    var fieldBytes = new List<byte>();
    var fields = new List<byte[]>();
    var currentComment = new StringBuilder();
    List<T> parsed = new List<T>();
    bool parsingLastRecord = false;

    int relativeLine = 0;

    int i = startIndex;
    while (i < inputBuffer.Length - 1 && inputBuffer[i] != '\n' && inputBuffer[i + 1] != '#')
    {
      i++;
    }

    if (i >= inputBuffer.Length - 1)
    {
      return Array.Empty<T>();
    }

    // Start parsing after \n
    i++;

    int currentLineStart = i;

    for (; i < inputBuffer.Length && (i < endIndex || parsingLastRecord); i++)
    {
      if (inputBuffer[i] == '\\')
      {
        if (i + 1 == inputBuffer.Count())
        {
          throw new Exception($"Found '\\' at end of input");
        }
        if (inputBuffer[i + 1] == 'n')
        {
          fieldBytes.Add((byte)'\n');
          i++;
        }
        else if (inputBuffer[i + 1] == '\\')
        {
          fieldBytes.Add((byte)'\\');
          i++;
        }
        else if (inputBuffer[i + 1] == 't')
        {
          fieldBytes.Add((byte)'\t');
          i++;
        }
        else if (inputBuffer[i + 1] == '#')
        {
          fieldBytes.Add((byte)'#');
          i++;
        }
        else
        {
          throw new Exception($"Expected 'n', 't', '#', or '\\' after '\\' at line {relativeLine} column {i - currentLineStart}");
        }
      }
      else if (inputBuffer[i] == '\t')
      {
        // end of field
        fields.Add(fieldBytes.ToArray());
        fieldBytes.Clear();
      }
      else if (inputBuffer[i] == '\n')
      {
        fields.Add(fieldBytes.ToArray());
        fieldBytes.Clear();

        if (columnTypes.Length != fields.Count)
        {
          throw new Exception($"Expected {columnTypes.Length} fields on line {relativeLine}, but found {fields.Count}");
        }
        else
        {
          string comment = null;
          if (currentComment.Length > 0)
          {
            comment = currentComment.ToString();
            currentComment.Clear();
          }
          parsed.Add(ParseCurrentRecord<T>(columnTypes.ToArray(), columnPropertyInfos.ToArray(), fields, comment, relativeLine));
          fields.Clear();
        }

        parsingLastRecord = false;
        relativeLine++;
        currentLineStart = i + 1;
      }
      else if (inputBuffer[i] == '#')
      {
        if (i == currentLineStart && format >= FormatType.COMMENTED_TSV)
        {
          int j = i;
          for (; j < inputBuffer.Length && inputBuffer[j] != '\n'; j++) { }
          if (j < inputBuffer.Length)
          {
            var commentBytes = new byte[j - i - 1];
            Array.Copy(inputBuffer, i + 1, commentBytes, 0, j - i - 1);
            if (currentComment.Length > 0)
            {
              currentComment.Append('\n');
            }
            currentComment.Append(Encoding.UTF8.GetString(commentBytes));
            i = j;
            currentLineStart = i + 1;
            relativeLine++;
          }
          else
          {
            throw new Exception("Comments at end of file are not allowed");
          }
        }
        else
        {
          throw new Exception($"Found unescaped '#' at line {relativeLine}, column {i - currentLineStart}");
        }
      }
      else
      {
        fieldBytes.Add(inputBuffer[i]);
      }

      if (i == endIndex - 1)
      {
        parsingLastRecord = true;
      }
    }

    if (endIndex < inputBuffer.Length)
    {
      return parsed.ToArray();
    }

    fields.Add(fieldBytes.ToArray());

    if (fields.Count == 0)
    {
      // TODO
      throw new Exception("Not sure when this will happen. THis might actuall be fine");
    }
    if (fields.Count != columnTypes.Length)
    {
      throw new Exception($"Expected {columnTypes} fields on line {relativeLine}, but found {fields.Count}");
    }
    else
    {
      string comment = null;
      if (currentComment.Length > 0)
      {
        comment = currentComment.ToString();
        currentComment.Clear();
      }
      parsed.Add(ParseCurrentRecord<T>(columnTypes.ToArray(), columnPropertyInfos.ToArray(), fields, comment, relativeLine));
      fields.Clear();
    }

    return parsed.ToArray();
  }

  protected static T ParseCurrentCommentedRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : CommentedTsvRecord, new()
  {
    return (T)ParseCurrentRecord<T>(columnTypes, properties, fields, comment, line);
  }

  protected static T ParseCurrentRecord<T>(ColumnType[] columnTypes, PropertyInfo[] properties, List<byte[]> fields, string comment, int line) where T : TsvRecord, new()
  {
    T record = new T();

    if (record is CommentedTsvRecord commentedRecord)
    {
      commentedRecord.Comment = comment;
    }
    else if (comment != null)
    {
      throw new Exception($"Found comment for line {line}, but format does not support comments");
    }

    record.Line = line;

    for (int j = 0; j < fields.Count; j++)
    {
      // All other types require the content to be UTF-8. Binary fields can ignore that.
      if (columnTypes[j].GetType() == typeof(BinaryType))
      {
        // TODO: Use faster method for property setting
        // e.g. https://blog.marcgravell.com/2012/01/playing-with-your-member.html
        // or https://stackoverflow.com/questions/1027980/improving-performance-reflection-what-alternatives-should-i-consider
        // or https://stackoverflow.com/questions/12767091/why-are-propertyinfo-setvalue-and-getvalue-so-slow
        properties[j].SetValue(record, fields[j]);
        continue;
      }
      else if (columnTypes[j].GetType() == typeof(Float32LEType))
      {
        byte[] floatBytes;
        if (!LittleEndian)
        {
          floatBytes = new byte[sizeof(float)];
          for (int k = 0; k < sizeof(float); k++)
          {
            floatBytes[k] = fields[j][sizeof(float) - 1 - k];
          }
        }
        else
        {
          floatBytes = fields[j];
        }
        properties[j].SetValue(record, BitConverter.ToSingle(floatBytes, 0));

        continue;
      }
      else if (columnTypes[j].GetType() == typeof(Float64LEType) || (columnTypes[j] is PhysicalUnitsType f64PhUnit && f64PhUnit.BaseType is Float64LEType))
      {
        byte[] floatBytes;
        if (!LittleEndian)
        {
          floatBytes = new byte[sizeof(double)];
          for (int k = 0; k < sizeof(double); k++)
          {
            floatBytes[k] = fields[j][sizeof(double) - 1 - k];
          }
        }
        else
        {
          floatBytes = fields[j];
        }
        double value = BitConverter.ToDouble(floatBytes, 0);
        if (columnTypes[j] is PhysicalUnitsType unit)
        {
          properties[j].SetValue(record, UnitsNet.Quantity.From(value, unit.Units.Value));
        }
        else
        {
          properties[j].SetValue(record, value);
        }

        continue;
      }

      string fieldString;
      try
      {
        fieldString = Encoding.UTF8.GetString(fields[j]);
      }
      catch (Exception e)
      {
        throw new Exception($"Field {j} on line {line} is not valid UTF-8", e);
      }

      // TODO: Add checking for numeric types format

      if (columnTypes[j].GetType() == typeof(StringType))
      {
        properties[j].SetValue(record, fieldString);
      }
      else if (columnTypes[j].GetType() == typeof(BooleanType))
      {
        bool parsedBool;
        if (fieldString == "TRUE")
        {
          parsedBool = true;
        }
        else if (fieldString == "FALSE")
        {
          parsedBool = false;
        }
        else
        {
          throw new Exception($"Field {j} on line {line} is not valid boolean. Must be 'TRUE' or 'FALSE' exactly");
        }

        properties[j].SetValue(record, parsedBool);
      }
      else if (columnTypes[j].GetType() == typeof(Float32Type))
      {
        float parsedFloat;
        if (!float.TryParse(fieldString, out parsedFloat))
        {
          if (fieldString == "-inf")
          {
            parsedFloat = float.NegativeInfinity;
          }
          else if (fieldString == "+inf")
          {
            parsedFloat = float.PositiveInfinity;
          }
          else
          {
            throw new Exception($"Field {j} on line {line} is not valid single-precision float");
          }
        }

        properties[j].SetValue(record, parsedFloat);
      }
      else if (columnTypes[j].GetType() == typeof(Float64Type) || (columnTypes[j] is PhysicalUnitsType f64PhUnit && f64PhUnit.BaseType is Float64Type))
      {
        double parsedDouble;
        if (!double.TryParse(fieldString, out parsedDouble))
        {
          if (fieldString == "-inf")
          {
            parsedDouble = float.NegativeInfinity;
          }
          else if (fieldString == "+inf")
          {
            parsedDouble = float.PositiveInfinity;
          }
          else
          {
            throw new Exception($"Field {j} on line {line} is not valid double-precision float");
          }
        }

        if (columnTypes[j] is PhysicalUnitsType unit)
        {
          properties[j].SetValue(record, UnitsNet.Quantity.From(parsedDouble, unit.Units.Value));
        }
        else
        {
          properties[j].SetValue(record, parsedDouble);
        }
      }
      else if (columnTypes[j].GetType() == typeof(UInt32Type))
      {
        if (!UInt32.TryParse(fieldString, out UInt32 parsedUInt32))
        {
          throw new Exception($"Field {j} on line {line} is not valid UInt32");
        }

        properties[j].SetValue(record, parsedUInt32);
      }
      else if (columnTypes[j].GetType() == typeof(UInt64Type))
      {
        if (!UInt64.TryParse(fieldString, out UInt64 parsedUInt64))
        {
          throw new Exception($"Field {j} on line {line} is not valid UInt64");
        }

        properties[j].SetValue(record, parsedUInt64);
      }
      else if (columnTypes[j].GetType() == typeof(Int32Type))
      {
        if (!Int32.TryParse(fieldString, out Int32 parsedInt32))
        {
          throw new Exception($"Field {j} on line {line} is not valid Int32");
        }

        properties[j].SetValue(record, parsedInt32);
      }
      else if (columnTypes[j].GetType() == typeof(Int64Type))
      {
        if (!Int64.TryParse(fieldString, out Int64 parsedInt64))
        {
          throw new Exception($"Field {j} on line {line} is not valid Int64");
        }

        properties[j].SetValue(record, parsedInt64);
      }
      else if (columnTypes[j].GetType() == typeof(Iso8601Type))
      {
        if (!DateTime.TryParseExact(fieldString, "yyyy-MM-ddTHH:mm:ss.ffff", CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime parsed))
        {
          throw new Exception($"ISO 8601 timestamp format error on line {line}, field {j}");
        }

        properties[j].SetValue(record, parsed);
      }
      else
      {
        throw new Exception($"Unexpected type {columnTypes[j]}");
      }
    }

    return record;
  }

  public static byte[] SerializeSimpleTsv(IList<string> header, IList<IList<string>> data)
  {
    var serialized = new List<byte>();
    var escapedString = new StringBuilder();

    // Serialize header
    for (int i = 0; i < header.Count; i++)
    {
      if (header[i].Contains(':'))
      {
        throw new Exception($"Column {i} contains the character ':'");
      }

      for (int j = i + 1; j < header.Count; j++)
      {
        if (header[i] == header[j])
        {
          throw new Exception("Column names in header must be unique");
        }
      }

      for (int j = 0; j < header[i].Count(); j++)
      {
        if (header[i][j] == '\n')
        {
          escapedString.Append("\\n");
        }
        else if (header[i][j] == '\t')
        {
          escapedString.Append("\\t");
        }
        else if (header[i][j] == '\\')
        {
          escapedString.Append("\\\\");
        }
        else if (header[i][j] == '#')
        {
          escapedString.Append("\\#");
        }
        else
        {
          escapedString.Append(header[i][j]);
        }
      }

      if (i == header.Count - 1)
      {
        escapedString.Append('\n');
      }
      else
      {
        escapedString.Append('\t');
      }
    }

    serialized.AddRange(Encoding.UTF8.GetBytes(escapedString.ToString()));

    // TODO: need to figure out where the crossover it
    // Complication: it probably depends on processor count
    if (data.Count < 100)
    {
      serialized.AddRange(Encoding.UTF8.GetBytes(SerializeSimpleTsv(data, 0, data.Count)));
    }
    else
    {
      int tasks = Environment.ProcessorCount - 1;
      int splitCount = data.Count / tasks;
      byte[][] bytes = new byte[tasks][];
      Parallel.For(0, tasks, i =>
      {
        int endIndex;
        if (i == tasks - 1)
        {
          endIndex = data.Count;
        }
        else
        {
          endIndex = (i + 1) * splitCount;
        }
        string escapedString = SerializeSimpleTsv(data, i * splitCount, endIndex);
        bytes[i] = Encoding.UTF8.GetBytes(escapedString);
      });

      for (int i = 0; i < tasks; i++)
      {
        serialized.AddRange(bytes[i]);
      }
    }

    return serialized.ToArray();
  }

  public static string SerializeSimpleTsv(IList<IList<string>> data, int startIndex, int endIndex)
  {
    var escapedString = new StringBuilder();

    // Serialize data
    for (int i = startIndex; i < endIndex; i++)
    {
      for (int j = 0; j < data[i].Count; j++)
      {
        for (int k = 0; k < data[i][j].Length; k++)
        {
          if (data[i][j][k] == '\n')
          {
            escapedString.Append("\\n");
          }
          else if (data[i][j][k] == '\t')
          {
            escapedString.Append("\\t");
          }
          else if (data[i][j][k] == '\\')
          {
            escapedString.Append("\\\\");
          }
          else if (data[i][j][k] == '#')
          {
            escapedString.Append("\\#");
          }
          else
          {
            escapedString.Append(data[i][j][k]);
          }
        }

        if (j < data[i].Count - 1)
        {
          escapedString.Append('\t');
        }
        else if (i < data.Count - 1)
        {
          escapedString.Append('\n');
        }
      }
    }

    return escapedString.ToString();
  }

  public static (string[] columns, string[][] data) ParseSimpleTsv(byte[] inputBuffer)
  {
    string[] columnNames = null;
    var headerFields = new List<byte[]>();
    var fieldBytes = new List<byte>();
    int startOfData = -1;
    for (int i = 0; i < inputBuffer.Count(); i++)
    {
      if (inputBuffer[i] == '\\')
      {
        if (i + 1 == inputBuffer.Count())
        {
          throw new Exception($"Found '\\' at end of input");
        }
        if (inputBuffer[i + 1] == 'n')
        {
          fieldBytes.Add((byte)'\n');
          i++;
        }
        else if (inputBuffer[i + 1] == '\\')
        {
          fieldBytes.Add((byte)'\\');
          i++;
        }
        else if (inputBuffer[i + 1] == 't')
        {
          fieldBytes.Add((byte)'\t');
          i++;
        }
        else if (inputBuffer[i + 1] == '#')
        {
          fieldBytes.Add((byte)'#');
          i++;
        }
        else
        {
          throw new Exception($"Expected 'n', 't', '#', or '\\' after '\\' at line {1} column {i}");
        }
      }
      else if (inputBuffer[i] == '\t')
      {
        // end of field
        headerFields.Add(fieldBytes.ToArray());
        fieldBytes.Clear();
      }
      else if (inputBuffer[i] == '\n')
      {
        // This is the end of the header
        headerFields.Add(fieldBytes.ToArray());
        startOfData = i + 1;

        columnNames = new string[headerFields.Count];
        fieldBytes.Clear();

        for (int j = 0; j < headerFields.Count; j++)
        {
          string columnString;
          try
          {
            columnString = Encoding.UTF8.GetString(headerFields[j]);
          }
          catch (Exception e)
          {
            throw new Exception($"Column {headerFields.Count} name is not valid UTF-8", e);
          }

          if (columnString.Contains(':'))
          {
            throw new Exception($"Header field {headerFields.Count} contain ':', which is not allowed for column names");
          }

          columnNames[j] = columnString;
        }

        // Done parsing header
        break;
      }
      else if (inputBuffer[i] == '#')
      {
        throw new Exception($"Found unescaped '#' at line 1, column {i}");
      }
      else
      {
        fieldBytes.Add(inputBuffer[i]);
      }
    }

    return (columnNames, ParseSimpleTsv(inputBuffer, columnNames.Length, startOfData, inputBuffer.Length));
  }

  public static string[][] ParseSimpleTsv(byte[] inputBuffer, int numFields, int startIndex, int endIndex)
  {
    var fieldBytes = new List<byte>();
    var fields = new List<byte[]>();
    var records = new List<string[]>();

    int line = 2;
    int currentLineStart = 0;

    // Go back to the start of the current line
    int i = startIndex;
    while (inputBuffer[i] != '\n')
    {
      i--;
    }

    // We want to start at the first byte of the current line
    i++;

    for (; i < endIndex; i++)
    {
      if (inputBuffer[i] == '\\')
      {
        if (i + 1 == inputBuffer.Count())
        {
          throw new Exception($"Found '\\' at end of input");
        }
        if (inputBuffer[i + 1] == 'n')
        {
          fieldBytes.Add((byte)'\n');
          i++;
        }
        else if (inputBuffer[i + 1] == '\\')
        {
          fieldBytes.Add((byte)'\\');
          i++;
        }
        else if (inputBuffer[i + 1] == 't')
        {
          fieldBytes.Add((byte)'\t');
          i++;
        }
        else if (inputBuffer[i + 1] == '#')
        {
          fieldBytes.Add((byte)'#');
          i++;
        }
        else
        {
          throw new Exception($"Expected 'n', 't', '#', or '\\' after '\\' at line {line} column {i - currentLineStart}");
        }
      }
      else if (inputBuffer[i] == '\t')
      {
        // end of field
        fields.Add(fieldBytes.ToArray());
        fieldBytes.Clear();
      }
      else if (inputBuffer[i] == '\n')
      {
        fields.Add(fieldBytes.ToArray());
        fieldBytes.Clear();

        if (numFields != fields.Count)
        {
          throw new Exception($"Expected {numFields} fields on line {line}, but found {fields.Count}");
        }
        else
        {
          var fieldStrings = new string[fields.Count];
          for (int j = 0; j < fields.Count; j++)
          {
            try
            {
              fieldStrings[j] = Encoding.UTF8.GetString(fields[j]);
            }
            catch (Exception e)
            {
              throw new Exception($"Line {line}, column {j} is not valid UTF-8", e);
            }
          }
          records.Add(fieldStrings);
          fields.Clear();
        }

        line++;
        currentLineStart = i + 1;
      }
      else if (inputBuffer[i] == '#')
      {
        throw new Exception($"Found unescaped '#' at line {line}, column {i - currentLineStart}");
      }
      else
      {
        fieldBytes.Add(inputBuffer[i]);
      }
    }

    fields.Add(fieldBytes.ToArray());

    if (fields.Count == 0 && endIndex == inputBuffer.Length)
    {
      throw new Exception("Found 0 fields on last line. Possibly because of extra \\n after last record");
    }
    if (numFields != fields.Count)
    {
      if (endIndex == inputBuffer.Length)
      {
        throw new Exception($"Expected {numFields} fields on line {line}, but found {fields.Count}");
      }
      else
      {
        return records.ToArray();
      }
    }
    else
    {
      var fieldStrings = new string[fields.Count];
      for (int j = 0; j < fields.Count; j++)
      {
        try
        {
          fieldStrings[j] = Encoding.UTF8.GetString(fields[j]);
        }
        catch (Exception e)
        {
          throw new Exception($"Line {line}, column {j} is not valid UTF-8", e);
        }
      }
      records.Add(fieldStrings);
      fields.Clear();
    }

    return records.ToArray();
  }

  public static string UnitsTypeText = "ph-unit";
  public static Regex UnitsRegex = new Regex("([^:]+):" + UnitsTypeText + ":(float32|float32-le|float64|float64-le|uint32|uint64|int32|int64)");

  public static ColumnType GetColumnFromString(string type)
  {
    if (type == "string")
    {
      return new StringType();
    }
    else if (type == "boolean")
    {
      return new BooleanType();
    }
    else if (type == "float32")
    {
      return new Float32Type();
    }
    else if (type == "float32-le")
    {
      return new Float32LEType();
    }
    else if (type == "float64")
    {
      return new Float64Type();
    }
    else if (type == "float64-le")
    {
      return new Float64LEType();
    }
    else if (type == "uint32")
    {
      return new UInt32Type();
    }
    else if (type == "uint64")
    {
      return new UInt64Type();
    }
    else if (type == "int32")
    {
      return new Int32Type();
    }
    else if (type == "int64")
    {
      return new Int64Type();
    }
    else if (type == "binary")
    {
      return new BinaryType();
    }
    else if (type == "iso8601")
    {
      return new Iso8601Type();
    }
    else if (UnitsRegex.IsMatch(type))
    {
      Match match = UnitsRegex.Match(type);
      string unitName = match.Groups[1].Value;
      string baseType = match.Groups[2].Value;

      return new PhysicalUnitsType(unitName, ParseUnit(unitName), GetColumnFromString(baseType));
      //if (UnitsNet.Quantity.TryFromUnitAbbreviation(1, unitName, out UnitsNet.IQuantity quantity))
      //{
      //  return new PhysicalUnitsType(UnitsNet.Quantity.GetUnitInfo(quantity.Unit), GetColumnFromString(baseType));
      //}
      //else
      //{
      //  throw new Exception($"Invalid units: {unitName}");
      //}
    }
    else
    {
      throw new Exception($"Invalid type: {type.GetType()}");
    }
  }

  public static ColumnType GetColumnFromType(Type type)
  {
    if (type == typeof(string))
    {
      return new StringType();
    }
    else if (type == typeof(bool))
    {
      return new BooleanType();
    }
    else if (type == typeof(float))
    {
      return new Float32Type();
    }
    else if (type == typeof(double))
    {
      return new Float64Type();
    }
    else if (type == typeof(UInt32))
    {
      return new UInt32Type();
    }
    else if (type == typeof(UInt64))
    {
      return new UInt64Type();
    }
    else if (type == typeof(Int32))
    {
      return new Int32Type();
    }
    else if (type == typeof(Int64))
    {
      return new Int64Type();
    }
    else if (type == typeof(byte[]))
    {
      return new BinaryType();
    }
    else if (type == typeof(DateTime))
    {
      return new Iso8601Type();
    }
    else if (type == typeof(UnitsNet.Mass))
    {
      // TODO
      //UnitsNet.UnitInfo a = new UnitsNet.UnitInfo([d])
      var a = new UnitsNet.UnitInfo<UnitsNet.Units.MassUnit>(UnitsNet.Units.MassUnit.Kilogram, "kgs", new UnitsNet.BaseUnits(mass: UnitsNet.Units.MassUnit.Kilogram));
      return new PhysicalUnitsType("kg", a, new Float64Type());
    }
    else
    {
      throw new Exception($"Invalid type: {type.GetType()}");
    }
  }

  public static string GetNameFromColumn(ColumnType type)
  {
    if (type.GetType() == typeof(StringType))
    {
      return "string";
    }
    else if (type.GetType() == typeof(BooleanType))
    {
      return "boolean";
    }
    else if (type.GetType() == typeof(Float32Type))
    {
      return "float32";
    }
    else if (type.GetType() == typeof(Float32LEType))
    {
      return "float32-le";
    }
    else if (type.GetType() == typeof(Float64Type))
    {
      return "float64";
    }
    else if (type.GetType() == typeof(Float64LEType))
    {
      return "float64-le";
    }
    else if (type.GetType() == typeof(UInt32Type))
    {
      return "uint32";
    }
    else if (type.GetType() == typeof(UInt64Type))
    {
      return "uint64";
    }
    else if (type.GetType() == typeof(Int32Type))
    {
      return "int32";
    }
    else if (type.GetType() == typeof(Int64Type))
    {
      return "int64";
    }
    else if (type.GetType() == typeof(BinaryType))
    {
      return "binary";
    }
    else if (type.GetType() == typeof(Iso8601Type))
    {
      return "iso8601:string";
    }
    else if (type is PhysicalUnitsType unit)
    {
      return $"{unit.UnitString}:{UnitsTypeText}:{GetNameFromColumn(unit.BaseType)}";
    }
    else
    {
      throw new Exception($"Invalid type: {type.GetType()}");
    }
  }

  public static byte[] SerializeSimpleTsv<T>(IList<T> data) where T : TsvRecord
  {
    return SerializeTsv<T>(data, FormatType.SIMPLE_TSV);
  }

  public static byte[] SerializeTypedTsv<T>(IList<T> data) where T : TsvRecord
  {
    return SerializeTsv<T>(data, FormatType.TYPED_TSV);
  }

  public static byte[] SerializeCommentedTsv<T>(IList<T> data, string fileComment) where T : CommentedTsvRecord
  {
    return SerializeTsv<T>(data, FormatType.COMMENTED_TSV, fileComment);
  }

  public static byte[] SerializeExtraTsv<T>(IList<T> data) where T : TsvRecord
  {
    return SerializeTsv<T>(data, FormatType.COMMENTED_TSV, $" ExtraTSV V{MajorVersion}.{MinorVersion}.{PatchVersion}");
  }

  protected static byte[] SerializeTsv<T>(IList<T> data, FormatType tsvFormat, string fileComment = null)
  {
    var bytes = new List<byte>();

    if (fileComment != null)
    {
      if (tsvFormat != FormatType.COMMENTED_TSV)
      {
        throw new Exception($"File comments are not valid for {tsvFormat}");
      }

      bytes.AddRange(Encoding.UTF8.GetBytes("#" + fileComment.Replace("\n", "\n#") + "\n"));
    }

    var columnTypes = new List<ColumnType>();
    var columnNames = new List<string>();
    var columnPropertyInfos = new List<PropertyInfo>();
    int columnCount = 0;

    // Serialize header
    foreach (PropertyInfo property in typeof(T).GetProperties())
    {
      TsvColumnAttribute attribute = (TsvColumnAttribute)Attribute.GetCustomAttribute(property, typeof(TsvColumnAttribute));
      if (attribute == null)
      {
        continue;
      }

      string headerName = attribute.ColumnName ?? property.Name;
      columnNames.Add(headerName);
      ColumnType headerType = attribute.ColumnType ?? GetColumnFromType(property.PropertyType);
      if (tsvFormat == FormatType.SIMPLE_TSV && headerType.GetType() != typeof(StringType))
      {
        throw new Exception($"Serializing Simple TSV requires all columns be of type string, but column '{headerName}' has type '{headerType}'");
      }
      columnTypes.Add(headerType);
      columnPropertyInfos.Add(property);
      // TODO: Check that the property type and given column type are compatible
      columnCount++;
    }

    // Serialize header
    for (int i = 0; i < columnNames.Count; i++)
    {
      for (int j = i + 1; j < columnNames.Count; j++)
      {
        if (columnNames[i] == columnNames[j])
        {
          throw new Exception("Column names in header must be unique");
        }
      }

      byte[] nameEncoded = Encoding.UTF8.GetBytes(columnNames[i]);

      for (int j = 0; j < nameEncoded.Length; j++)
      {
        if (nameEncoded[j] == '\n')
        {
          bytes.Add((byte)'\\');
          bytes.Add((byte)'n');
        }
        else if (nameEncoded[j] == '\t')
        {
          bytes.Add((byte)'\\');
          bytes.Add((byte)'t');
        }
        else if (nameEncoded[j] == '\\')
        {
          bytes.Add((byte)'\\');
          bytes.Add((byte)'\\');
        }
        else if (nameEncoded[j] == '#')
        {
          bytes.Add((byte)'\\');
          bytes.Add((byte)'#');
        }
        else
        {
          bytes.Add(nameEncoded[j]);
        }
      }

      if (tsvFormat != FormatType.SIMPLE_TSV)
      {
        bytes.Add((byte)':');
        try
        {
          bytes.AddRange(Encoding.UTF8.GetBytes(GetNameFromColumn(columnTypes[i])));
        }
        catch (Exception e)
        {
          throw new Exception($"Invalid column type for column {i}", e);
        }
      }

      if (i == columnNames.Count - 1)
      {
        bytes.Add((byte)'\n');
      }
      else
      {
        bytes.Add((byte)'\t');
      }
    }

    // Serialize data
    SerializeTsv<T>(data, bytes, columnPropertyInfos.ToArray(), columnTypes.ToArray(), tsvFormat, 0, data.Count);

    return bytes.ToArray();
  }

  protected static void SerializeTsv<T>(IList<T> data, List<byte> bytes, PropertyInfo[] columnPropertyInfos, ColumnType[] columnTypes, FormatType tsvFormat, int startIndex, int endIndex)
  {
    // Serialize data
    for (int i = 0; i < data.Count; i++)
    {
      for (int j = 0; j < columnTypes.Length; j++)
      {
        object datum = columnPropertyInfos[j].GetValue(data[i]);

        try
        {
          byte[] fieldEncoded = null;
          // Some fields definitely don't need escaping, so we add them directly to bytes
          bool skipEscaping = false;

          if (columnTypes[j].GetType() == typeof(StringType))
          {
            fieldEncoded = Encoding.UTF8.GetBytes((string)datum);
          }
          else if (columnTypes[j].GetType() == typeof(BooleanType))
          {
            bytes.AddRange((bool)datum ? TrueEncoded : FalseEncoded);
            skipEscaping = true;
          }
          else if (columnTypes[j].GetType() == typeof(Float32Type))
          {
            if (datum is float f)
            {
              if (float.IsNegativeInfinity(f))
              {
                bytes.AddRange(Encoding.UTF8.GetBytes("-inf"));
              }
              else if (float.IsPositiveInfinity(f))
              {
                bytes.AddRange(Encoding.UTF8.GetBytes("+inf"));
              }
              else
              {
                // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r
                bytes.AddRange(Encoding.UTF8.GetBytes(((float)datum).ToString("G9")));
              }
            }
            else
            {
              throw new InvalidCastException();
            }
            skipEscaping = true;
          }
          else if (columnTypes[j].GetType() == typeof(Float32LEType))
          {
            if (LittleEndian)
            {
              fieldEncoded = BitConverter.GetBytes((float)datum);
            }
            else
            {
              byte[] floatBytes = BitConverter.GetBytes((float)datum);
              fieldEncoded = new byte[sizeof(float)];
              for (int k = 0; k < sizeof(float); k++)
              {
                fieldEncoded[k] = floatBytes[sizeof(float) - 1 - k];
              }
            }
          }
          else if (columnTypes[j].GetType() == typeof(Float64Type) || (columnTypes[j] is PhysicalUnitsType f64PhUnit && f64PhUnit.BaseType is Float64Type))
          {
            double value;
            if (datum is double d)
            {
              value = d;
            }
            // TODO: check units match
            else if (datum is UnitsNet.IQuantity quantity)
            {
              value = quantity.Value;
            }
            else
            {
              throw new InvalidCastException();
            }

            if (double.IsNegativeInfinity(value))
            {
              bytes.AddRange(Encoding.UTF8.GetBytes("-inf"));
            }
            else if (double.IsPositiveInfinity(value))
            {
              bytes.AddRange(Encoding.UTF8.GetBytes("+inf"));
            }
            else
            {
              // See https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-numeric-format-strings#round-trip-format-specifier-r
              bytes.AddRange(Encoding.UTF8.GetBytes((value).ToString("G17")));
            }

            skipEscaping = true;
          }
          else if (columnTypes[j].GetType() == typeof(Float64LEType) || (columnTypes[j] is PhysicalUnitsType f64LEPhUnit && f64LEPhUnit.BaseType is Float64LEType))
          {
            double value;
            if (datum is double d)
            {
              value = d;
            }
            // TODO: check units match
            else if (datum is UnitsNet.IQuantity quantity)
            {
              value = quantity.Value;
            }
            else
            {
              throw new InvalidCastException();
            }

            if (LittleEndian)
            {
              fieldEncoded = BitConverter.GetBytes((double)value);
            }
            else
            {
              byte[] doubleBytes = BitConverter.GetBytes((double)value);
              fieldEncoded = new byte[sizeof(double)];
              for (int k = 0; k < sizeof(double); k++)
              {
                fieldEncoded[k] = doubleBytes[sizeof(double) - 1 - k];
              }
            }
          }
          else if (columnTypes[j].GetType() == typeof(UInt32Type))
          {
            bytes.AddRange(Encoding.UTF8.GetBytes(((UInt32)datum).ToString()));
            skipEscaping = true;
          }
          else if (columnTypes[j].GetType() == typeof(UInt64Type))
          {
            bytes.AddRange(Encoding.UTF8.GetBytes(((UInt64)datum).ToString()));
            skipEscaping = true;
          }
          else if (columnTypes[j].GetType() == typeof(Int32Type))
          {
            bytes.AddRange(Encoding.UTF8.GetBytes(((Int32)datum).ToString()));
            skipEscaping = true;
          }
          else if (columnTypes[j].GetType() == typeof(Int64Type))
          {
            bytes.AddRange(Encoding.UTF8.GetBytes(((Int64)datum).ToString()));
            skipEscaping = true;
          }
          else if (columnTypes[j].GetType() == typeof(BinaryType))
          {
            fieldEncoded = (byte[])datum;
          }
          else if (columnTypes[j].GetType() == typeof(Iso8601Type))
          {
            fieldEncoded = Encoding.UTF8.GetBytes(((DateTime)datum).ToString("yyyy-MM-ddTHH:mm:ss.ffff"));
          }
          else if (columnTypes[j] is PhysicalUnitsType phUnits)
          {
            throw new NotImplementedException($"Physical units types don't support {GetNameFromColumn(phUnits.BaseType)} as a base type");
          }
          else
          {
            throw new Exception($"Unexpected column type {columnTypes[j]} for column {j}");
          }

          if (!skipEscaping)
          {
            for (int k = 0; k < fieldEncoded.Length; k++)
            {
              if (fieldEncoded[k] == '\n')
              {
                bytes.Add((byte)'\\');
                bytes.Add((byte)'n');
              }
              else if (fieldEncoded[k] == '\t')
              {
                bytes.Add((byte)'\\');
                bytes.Add((byte)'t');
              }
              else if (fieldEncoded[k] == '\\')
              {
                bytes.Add((byte)'\\');
                bytes.Add((byte)'\\');
              }
              else if (fieldEncoded[k] == '#')
              {
                bytes.Add((byte)'\\');
                bytes.Add((byte)'#');
              }
              else
              {
                bytes.Add(fieldEncoded[k]);
              }
            }
          }

          if (j < columnTypes.Length - 1)
          {
            bytes.Add((byte)'\t');
          }
          else if (i < data.Count - 1)
          {
            bytes.Add((byte)'\n');
          }
        }
        catch (InvalidCastException e)
        {
          throw new Exception($"Record {i}, field {j} expected type compatible with {GetNameFromColumn(columnTypes[j])}", e);
        }
      }
    }
  }

  public class SimpleTsvRecord
  {
    public string[] ColumnNames { get; }
    public string Comment { get; }
    public string[] Fields { get; }
    public int? Line { get; }

    public string this[string columnName] => Fields[Array.IndexOf(ColumnNames, columnName)];
    public string this[int columnIndex] => Fields[columnIndex];

    public SimpleTsvRecord(string[] columnNames, string[] fields, string comment, int line)
    {
      ColumnNames = columnNames;
      Fields = fields;
      Comment = comment;
      Line = line;
    }
  }

  public class TsvRecord
  {
    public int? Line { get; set; }
    public TsvRecord(int? line)
    {
      Line = line;
    }

    public TsvRecord() { }
  }

  public class CommentedTsvRecord : TsvRecord
  {
    public string Comment { get; set; }

    public CommentedTsvRecord(string comment, int? line)
    {
      Comment = comment;
      Line = line;
    }

    public CommentedTsvRecord() { }
  }

  public class TestRecord : CommentedTsvRecord
  {
    [TypedTsvColumn("my-column")]
    public string MyColumn { get; set; }
  }

  // TODO: Add column ordering
  public class TsvColumnAttribute : Attribute
  {
    public string ColumnName { get; }
    public virtual ColumnType ColumnType { get; }

    public TsvColumnAttribute()
    {
      ColumnType = new StringType();
    }
    public TsvColumnAttribute(string columnName)
    {
      ColumnType = new StringType();
      ColumnName = columnName;
    }
  }

  // TODO: Add column ordering
  public class TypedTsvColumnAttribute : TsvColumnAttribute
  {
    public override ColumnType ColumnType { get; }

    public TypedTsvColumnAttribute() { }

    public TypedTsvColumnAttribute(string columnName) : base(columnName) { }

    public TypedTsvColumnAttribute(string columnName, string columnType) : base(columnName)
    {
      ColumnType = GetColumnFromString(columnType);
    }

    public TypedTsvColumnAttribute(ColumnType columnType)
    {
      ColumnType = columnType;
    }
  }

  public static UnitInfo ParseUnit(string unitName)
  {
      // Find all unit enum types in the UnitsNet namespace
      var unitEnumTypes = Assembly.GetAssembly(typeof(LengthUnit))
          .GetTypes()
          .Where(t => t.IsEnum && t.Namespace == typeof(LengthUnit).Namespace);

      foreach (var unitEnumType in unitEnumTypes)
      {
          if (UnitParser.Default.TryParse(unitName, unitEnumType, out Enum unitEnum))
          {
              // Successfully parsed the abbreviation, retrieve UnitInfo
              return Quantity.GetUnitInfo(unitEnum);
          }
      }

      throw new ArgumentException($"Unable to parse unit abbreviation: {unitName}");
  }
}