diff --git a/libwyag.py b/libwyag.py index 35e0483..29395eb 100644 --- a/libwyag.py +++ b/libwyag.py @@ -185,3 +185,145 @@ argsp.add_argument("path", def cmd_status(args): # TODO: actually get status print(repo_find(args.path).worktree) + +class GitObject (object): + def __init__(self, data=None): + if data != None: + self.deserialize(data) + else: + self.init() + + def serialize(self, repo): + """This function MUST be implemented by subclasses. + +It must read the object's contents from self.data, a byte string, and +do whatever it takes to convert it into a meaningful representation. What exactly that means depends on each subclass.""" + raise Exception("Unimplemented!") + + def deserialize(self, repo): + raise Exception("Unimplemented!") + + def init(self): + pass # other implementations might do something here + +def object_read(repo, sha): + """Read object sha from Git repository repo. Return a + GitObject whose exact type depends on the object.""" + + path = GitRepository.repo_file(repo, "objects", sha[0:2], sha[2:]) + + if not os.path.isfile(path): + return None + + with open(path, "rb") as f: + raw = zlib.decompress(f.read()) + + # Read object type + x = raw.find(b' ') + fmt = raw[0:x] + + # Read and validate object size + y = raw.find(b'\x00', x) + size = int(raw[x:y].decode("ascii")) + if size != len(raw)-y-1: + raise Exception(f"Malformed object {sha}: bad length") + + match fmt: + case b'commit' : c=GitCommit + case b'tree' : c=GitTree + case b'tag' : c=GitTag + case b'blob' : c=GitBlob + case _: + raise Exception(f"Unknown type {fmt.decode('ascii')} for object {sha}") + + return c(raw[y+1:]) + +def object_write(obj, repo=None): + data = obj.serialize() + # Add header + result = obj.fmt + b' ' + str(len(data)).encode() + b'\x00' + data + # Compute hash + sha = hashlib.sha1(result).hexdigest() + + if repo: + # Compute path + path=GitRepository.repo_file(repo, "objects", sha[0:2], sha[2:], mkdir=True) + + if not os.path.exists(path): + with open(path, 'wb') as f: + f.write(zlib.compress(result)) + return sha + +class GitBlob(GitObject): + fmt=b'blob' + + def serialize(self): + return self.blobdata + + def deserialize(self, data): + self.blobdata = data + +argsp = argsubparsers.add_parser("cat-file", + help="Provide content of repository objects") + +argsp.add_argument("type", + metavar="type", + choices=["blob", "commit", "tag", "tree"], + help="Specify the type") + +argsp.add_argument("object", + metavar="object", + help="The object to display") + +def cmd_cat_file(args): + repo = repo_find() + cat_file(repo, args.object, fmt=args.type.encode()) + +def cat_file(repo, obj, fmt=None): + obj = object_read(repo, object_find(repo, obj, fmt=fmt)) + sys.stdout.buffer.write(obj.serialize()) + +def object_find(repo, name, fmt=None, follow=True): + return name + +argsp = argsubparsers.add_parser( + "hash-object", + help="Compute object ID an optionally creates a blob from a file") + +argsp.add_argument("-t", + metavar="type", + dest="type", + choices=["blob", "commit", "tag", "tree"], + default="blob", + help="Specify the type") + +argsp.add_argument("-w", + dest="write", + action="store_true", + help="Actually write the object into the database") + +argsp.add_argument("path", + help="Read object from ") + +def cmd_hash_object(args): + if args.write: + repo = repo_find() + else: + repo = None + + with open(args.path, "rb") as fd: + sha = object_hash(fd, args.type.encode(), repo) + print(sha) + +def object_hash(fd, fmt, repo=None): + """Hash object, writing it to repo if provided""" + data = fd.read() + + match fmt: + case b'commit' : obj=GitCommit(data) + case b'tree' : obj=GitTree(data) + case b'tag' : obj=GitTag(data) + case b'blob' : obj=GitBlob(data) + case _ : raise Exception(f"Unknown type {fmt}") + + return object_write(obj, repo)