From 38d324738ebd4bdb6ebf9492fdd84a75c7c4ce16 Mon Sep 17 00:00:00 2001 From: Nathan McRae Date: Thu, 15 Feb 2024 11:57:45 -0800 Subject: [PATCH] Add basic serialization --- SaneTsv/SaneTsv.cs | 99 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/SaneTsv/SaneTsv.cs b/SaneTsv/SaneTsv.cs index 7eccd88..b1b1756 100644 --- a/SaneTsv/SaneTsv.cs +++ b/SaneTsv/SaneTsv.cs @@ -311,6 +311,8 @@ public class SaneTsv throw new Exception($"Field {j} on line {line} is not valid UTF-8", e); } + // TODO: Add checking for numeric types format + switch (parsed.ColumnTypes[j]) { case ColumnType.STRING: @@ -399,6 +401,103 @@ public class SaneTsv return parsedFields; } + public static byte[] SerializeSaneTsv(IList header, IList> data) + { + var escapedString = new StringBuilder(); + + // Serialize header + for (int i = 0; i < header.Count; i++) + { + if (header[i].Contains(':')) + { + throw new Exception($"Column {i} contains the character ':'"); + } + + for (int j = i + 1; j < header.Count; j++) + { + if (header[i] == header[j]) + { + throw new Exception("Column names in header must be unique"); + } + } + + for (int j = 0; j < header[i].Count(); j++) + { + if (header[i][j] == '\n') + { + escapedString.Append("\\n"); + } + else if (header[i][j] == '\t') + { + escapedString.Append("\\t"); + } + else if (header[i][j] == '\\') + { + escapedString.Append("\\\\"); + } + else if (header[i][j] == '#') + { + escapedString.Append("\\#"); + } + else + { + escapedString.Append(header[i][j]); + } + } + + if (i == header.Count - 1) + { + escapedString.Append('\n'); + } + else + { + escapedString.Append('\t'); + } + } + + // Serialize data + for (int i = 0; i < data.Count; i++) + { + for (int j = 0; j < data[i].Count; j++) + { + for (int k = 0; k < data[i][j].Length; k++) + { + if (data[i][j][k] == '\n') + { + escapedString.Append("\\n"); + } + else if (data[i][j][k] == '\t') + { + escapedString.Append("\\t"); + } + else if (data[i][j][k] == '\\') + { + escapedString.Append("\\\\"); + } + else if (data[i][j][k] == '#') + { + escapedString.Append("\\#"); + } + else + { + escapedString.Append(data[i][j][k]); + } + } + + if (j < data[i].Count - 1) + { + escapedString.Append('\t'); + } + else if (i < data.Count - 1) + { + escapedString.Append('\n'); + } + } + } + + return Encoding.UTF8.GetBytes(escapedString.ToString()); + } + public SaneTsvRecord this[int i] => Records[i]; public class SaneTsvRecord