Add basic serialization

This commit is contained in:
Nathan McRae 2024-02-15 11:57:45 -08:00
parent 73bc3485ee
commit 34c69c1c69

View File

@ -311,6 +311,8 @@ public class SaneTsv
throw new Exception($"Field {j} on line {line} is not valid UTF-8", e);
}
// TODO: Add checking for numeric types format
switch (parsed.ColumnTypes[j])
{
case ColumnType.STRING:
@ -399,6 +401,103 @@ public class SaneTsv
return parsedFields;
}
public static byte[] SerializeSaneTsv(IList<string> header, IList<IList<string>> data)
{
var escapedString = new StringBuilder();
// Serialize header
for (int i = 0; i < header.Count; i++)
{
if (header[i].Contains(':'))
{
throw new Exception($"Column {i} contains the character ':'");
}
for (int j = i + 1; j < header.Count; j++)
{
if (header[i] == header[j])
{
throw new Exception("Column names in header must be unique");
}
}
for (int j = 0; j < header[i].Count(); j++)
{
if (header[i][j] == '\n')
{
escapedString.Append("\\n");
}
else if (header[i][j] == '\t')
{
escapedString.Append("\\t");
}
else if (header[i][j] == '\\')
{
escapedString.Append("\\\\");
}
else if (header[i][j] == '#')
{
escapedString.Append("\\#");
}
else
{
escapedString.Append(header[i][j]);
}
}
if (i == header.Count - 1)
{
escapedString.Append('\n');
}
else
{
escapedString.Append('\t');
}
}
// Serialize data
for (int i = 0; i < data.Count; i++)
{
for (int j = 0; j < data[i].Count; j++)
{
for (int k = 0; k < data[i][j].Length; k++)
{
if (data[i][j][k] == '\n')
{
escapedString.Append("\\n");
}
else if (data[i][j][k] == '\t')
{
escapedString.Append("\\t");
}
else if (data[i][j][k] == '\\')
{
escapedString.Append("\\\\");
}
else if (data[i][j][k] == '#')
{
escapedString.Append("\\#");
}
else
{
escapedString.Append(data[i][j][k]);
}
}
if (j < data[i].Count - 1)
{
escapedString.Append('\t');
}
else if (i < data.Count - 1)
{
escapedString.Append('\n');
}
}
}
return Encoding.UTF8.GetBytes(escapedString.ToString());
}
public SaneTsvRecord this[int i] => Records[i];
public class SaneTsvRecord