diff --git a/libwyag.py b/libwyag.py index ca57c01..d9e106b 100644 --- a/libwyag.py +++ b/libwyag.py @@ -783,3 +783,177 @@ def cmd_rev_parse(args): repo = repo_find() print(object_find(repo, args.name, fmt, follow=True)) + +class GitIndexEntry (object): + def __init__(self, ctime=None, mtime=None, dev=None, ino=None, + mode_type=None, mode_perms=None, uid=None, gid=None, + fsize=None, sha=None, flag_assume_valid=None, + flag_stage=None, name=None): + # The last time a file's metadata changed. This is a pair + # (timestamp in seconds, nanoseconds) + self.ctime = ctime + # The last time a file's data changed. This is a pair + # (timestamp in seconds, nanoseconds) + self.mtime = mtime + # The ID of device containing this file + self.dev = dev + # The file's inode number + self.ino = ino + # The object type, either b1000 (regular), b1010 (symlink), + # b1110 (gitlink). + self.mode_type = mode_type + # The object permissions, and integer + self.mode_perms = mode_perms + # User ID of the owner + self.uid = uid + # Group ID of the owner + self.gid = gid + # Size of this object, in bytes + self.fize = fsize + # The object's SHA + self.sha = sha + self.flag_assume_valid = flag_assume_valid + self.flag_stage = flag_stage + # Name of the object (full path) + self.name = name + +class GitIndex (object): + version = None + entries = [] + # ext = None + # sha = None + + def __init__(self, version=2, entries=None): + if not entries: + entries = list() + + self.version = version + self.entries = entries + +def index_read(repo): + index_file = GitRepository.repo_file(repo, "index") + + # New repositories have no index! + if not os.path.exists(index_file): + return GitIndex() + + with open(index_file, 'rb') as f: + raw = f.read() + + header = raw[:12] + signature = header[:4] + assert signature == b"DIRC" # Stands for DIR Cache" + version = int.from_bytes(header[4:8], "big") + assert version == 2, "wyag only supports index file version 2" + count = int.from_bytes(header[8:12], "big") + + entries = list() + + content = raw[12:] + idx = 0 + for i in range(0, count): + # Read creation time, as a unix timestamp (seconds since + # 1970-01-01 00:00:00, the "epoch") + ctime_s = int.from_bytes(content[idx: idx+4], "big") + # Read creation time, as nanoseconds after unix seconds + ctime_ns = int.from_bytes(content[idx+4: idx+8], "big") + # Modification time, unix timestamp + mtime_s = int.from_bytes(content[idx+8: idx+12], "big") + # Modification time, nanoseconds + mtime_ns = int.from_bytes(content[idx+12: idx+16], "big") + # Device ID + dev = int.from_bytes(content[idx+16: idx+20], "big") + # Inode + ino = int.from_bytes(content[idx+20: idx+24], "big") + # Ignored + unused = int.from_bytes(content[idx+24: idx+26], "big") + assert 0 == unused + mode = int.from_bytes(content[idx+26: idx+28], "big") + mode_type = mode >> 12 + assert mode_type in [0b1000, 0b1010, 0b1110] + mode_perms = mode & 0b0000000111111111 + # User ID + uid = int.from_bytes(content[idx+28: idx+32], "big") + # Group ID + gid = int.from_bytes(content[idx+32: idx+36], "big") + # Size + fsize = int.from_bytes(content[idx+36: idx+40], "big") + # SHA (object ID). We'll store it as a lowercase hex string + # for consistency + sha = format(int.from_bytes(content[idx+40: idx+60], "big"), "040x") + # Flags we're going to ignore + flags = int.from_bytes(content[idx+60: idx+62], "big") + # Parse flags + flag_assume_valid = (flags & 0b1000000000000000) != 0 + flag_extended = (flags & 0b0100000000000000) != 0 + assert not flag_extended + flag_stage = flags & 0b0011000000000000 + # Length of the name. This is stored on 12 bits, some max value + # is 0xFFF, 4095. + name_length = flags & 0b0000111111111111 + + idx += 62 + + if name_length < 0xFFF: + assert content[idx + name_length] == 0x00 + raw_name = content[idx:idx + name_length] + idx += name_length + 1 + else: + print(f"Notice: Name is 0x{name_length:X} bytes long") + # TODO: This probably wasn't tested enough. It works with a + # path of exactly 0xFFF bytes. Any extra bytes broke + # something between git, my shell and my filesystem + null_idx = content.find(b'\x00', idx + 0xFFF) + raw_name = content[idx: null_idx] + idx = null_idx + 1 + + name = raw_name.decode("utf8") + + # Data is padded on multiples of eight bytes for pointer + # alignment, so we skip as many bytes as we need for the next + # read to start at the right position. + idx = 8 * ceil(idx / 8) + + entries.append(GitIndexEntry(ctime=(ctime_s, ctime_ns), + mtime=(mtime_s, mtime_ns), + dev=dev, + ino=ino, + mode_type=mode_type, + mode_perms=mode_perms, + uid=uid, + gid=gid, + fsize=fsize, + sha=sha, + flag_assume_valid=flag_assume_valid, + flag_stage=flag_stage, + name=name)) + + return GitIndex(version=version, entries=entries) + +argsp = argsubparsers.add_parser("ls-files", help="List all the staged files") +argsp.add_argument("--verbose", action="store_true", help="Show everything.") + +def cmd_ls_files(args): + repo = repo_find() + index = index_read(repo) + if args.verbose: + print(f"Index file format v{index.version}, containing {len(index.entries)} entries.") + + for e in index.entries: + print(e.name) + if args.verbose: + print(" {} with perms: {:o}".format( + {0b1000: "regular file", + 0b1010: "symlink", + 0b1110: "git link" }[e.mode_type], + e.mode_perms)) + print(f" on blob: {e.sha}") + print(" created: {}.{}, modified: {}.{}".format( + datetime.fromtimestamp(e.ctime[0]), + e.ctime[1], + datetime.fromtimestamp(e.mtime[0]), + e.mtime[1])) + print(f" device: {e.dev}, inode: {e.ino}") + print(f" uid: {e.uid} group: {e.gid}") + print(f" flags: stage={e.flag_stage} assume_valid={e.flag_assume_valid}") + diff --git a/test b/test new file mode 160000 index 0000000..d53851a --- /dev/null +++ b/test @@ -0,0 +1 @@ +Subproject commit d53851a324df2b0b1090b51b5f8a7c19dacb3fcc