using System.Text; namespace NathanMcRae; /// /// Sane Tab-Separated Values /// public class Stsv { // TODO: We need to be able to update all these in tandem somehow public string[] ColumnNames { get; protected set; } public Dictionary> Columns { get; protected set; } public List Records { get; protected set; } public static Stsv Parse(byte[] inputBuffer) { var parsed = new Stsv(); parsed.Columns = new Dictionary>(); parsed.ColumnNames = new string[] { }; parsed.Records = new List(); var fieldBytes = new List(); var fields = new List(); int numFields = -1; for (int i = 0; i < inputBuffer.Count(); i++) { if (inputBuffer[i] == '\\') { if (i + 1 == inputBuffer.Count()) { throw new Exception($"Found '\\' at end of input"); } if (inputBuffer[i + 1] == 'n') { fieldBytes.Add((byte)'\n'); i++; } else if (inputBuffer[i + 1] == '\\') { fieldBytes.Add((byte)'\\'); i++; } else if (inputBuffer[i + 1] == 't') { fieldBytes.Add((byte)'\t'); i++; } else { throw new Exception($"Expected 'n', 't', or '\\' after '\\' at {i}"); } } else if (inputBuffer[i] == '\t') { // end of field try { fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray())); } catch (Exception e) { throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e); } fieldBytes.Clear(); } else if (inputBuffer[i] == '\n') { try { fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray())); } catch (Exception e) { throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e); } fieldBytes.Clear(); if (numFields < 0) { // This is the header numFields = fields.Count; parsed.ColumnNames = new string[numFields]; for (int j = 0; j < fields.Count; j++) { string columnName = fields[j]; try { parsed.Columns.Add(columnName, new List()); } catch (Exception e) { throw new Exception($"Column name {columnName} is not unique", e); } parsed.ColumnNames[j] = columnName; } fields.Clear(); } else if (numFields != fields.Count) { throw new Exception($"Expected {numFields} fields on line {parsed.Records.Count + 2}, but found {fields.Count}"); } else { for (int j = 0; j < fields.Count; j++) { parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]); } parsed.Records.Add(new StsvRecord(parsed, fields.ToArray())); fields.Clear(); } } else { fieldBytes.Add(inputBuffer[i]); } } try { fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray())); } catch (Exception e) { throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e); } if (numFields != fields.Count) { throw new Exception($"Expected {numFields} fields on line {parsed.Records.Count + 2}, but found {fields.Count}"); } else { for (int j = 0; j < fields.Count; j++) { try { parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]); } catch (Exception e) { throw new Exception($"Field {j} on line {parsed.Records.Count + 1} is not valid UTF-8", e); } } parsed.Records.Add(new StsvRecord(parsed, fields.ToArray())); fields.Clear(); } return parsed; } public StsvRecord this[int i] => Records[i]; public class StsvRecord { public Stsv Parent { get; } public string[] Fields { get; } public string this[string columnName] => Fields[Array.IndexOf(Parent.ColumnNames, columnName)]; public StsvRecord(Stsv parent, string[] fields) { Parent = parent; Fields = fields; } } }