diff --git a/README.md b/README.md index 2aedea7..a8411ea 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,14 @@ Therefore, to restore `Partition #0` in our example, you need to run: The files will be saved inside the output directory specified by `-o`. +## Mounting Partitions +RecuperaBit along with python-fuse can mount the recovered partitions for easier access to the contents. Run: + + mount [mountpoint] + +to mount all of the partitions in folders inside of the mountpoint. + + ## License This software is released under the GNU GPLv3. See `LICENSE` for more details. diff --git a/main.py b/main.py index 8ad41dd..0f7fde0 100755 --- a/main.py +++ b/main.py @@ -29,10 +29,28 @@ import pickle import sys +import traceback + + + from recuperabit import logic, utils # scanners from recuperabit.fs.ntfs import NTFSScanner +try: + import fuse + fuseSupport = True +except ImportError: + print("python-fuse not found, disabling fuse support!") + fuseSupport = False + pass + +if fuseSupport: + fuse.fuse_python_api = (0, 2) + from fuse import Fuse + from recuperabit.ifuse import PartView, MultiPartView + + __author__ = "Andrea Lazzarotto" __copyright__ = "(c) 2014-2021, Andrea Lazzarotto" __license__ = "GPLv3" @@ -247,6 +265,17 @@ def interpret(cmd, arguments, parts, shorthands, outdir): list_parts(parts, shorthands, lambda x: True) elif cmd == 'quit': exit(0) + elif cmd == 'mount': + if fuseSupport: + try: + fuse = MultiPartView(parts, shorthands, rebuilt) + fuse.parse(arguments, errex=0) + fuse.main() + except Exception as e: + print(e) + print(traceback.format_exc()) + else: + print('FUSE mounting not available') else: print('Unknown command.') diff --git a/recuperabit/fs/core_types.py b/recuperabit/fs/core_types.py index 87dda78..e81efa1 100644 --- a/recuperabit/fs/core_types.py +++ b/recuperabit/fs/core_types.py @@ -50,6 +50,7 @@ def __init__(self, index, name, size, is_directory=False, self.children = set() self.children_names = set() # Avoid name clashes breaking restore self.offset = None # Offset from beginning of disk + self.isopen = False # if the file is currently open def set_parent(self, parent): """Set a pointer to the parent directory.""" @@ -107,6 +108,13 @@ def get_content(self, partition): if self.is_directory or self.is_ghost: return None raise NotImplementedError + + def open(self, partition): + raise NotImplementedError + def close(self, partition): + pass + def read(self, partition, offset, size): + raise NotImplementedError # pylint: disable=R0201 def ignore(self): diff --git a/recuperabit/fs/ntfs.py b/recuperabit/fs/ntfs.py index d66c3e9..6eb219d 100644 --- a/recuperabit/fs/ntfs.py +++ b/recuperabit/fs/ntfs.py @@ -321,28 +321,26 @@ def _padded_bytes(image, offset, size): '{}'.format(offset, size)) dump += bytearray('\x00' * (size - len(dump))) return dump - - def content_iterator(self, partition, image, datas): - """Return an iterator for the contents of this file.""" + + def parse_data(self, partition, datas): vcn = 0 spc = partition.sec_per_clus + + output = [] # (vcn start, vcn end, sector offset (-1 if empty)) for attr in datas: diff = attr['start_VCN'] - vcn if diff > 0: - # We do not try to fill with zeroes as this might produce huge useless files logging.warning( u'Missing part for {}, {} clusters skipped'.format(self, diff) ) - vcn += diff - yield b'' - + output.append((vcn, attr['start_VCN'], -1)) + + vcn = attr['start_VCN'] clusters_pos = 0 size = attr['real_size'] if 'runlist' not in attr: - logging.error( - u'Cannot restore {}, missing runlist'.format(self) - ) + raise ValueError(u'Cannot restore {}, missing runlist'.format(self)) break for entry in attr['runlist']: @@ -350,40 +348,35 @@ def content_iterator(self, partition, image, datas): size -= length # Sparse runlist if entry['offset'] is None: - while length > 0: - amount = min(max_sectors*sector_size, length) - length -= amount - yield b'\x00' * amount - continue - # Normal runlists - clusters_pos += entry['offset'] - real_pos = clusters_pos * spc + partition.offset - # Avoid to fill memory with huge blocks - offset = 0 - while length > 0: - amount = min(max_sectors*sector_size, length) - position = real_pos*sector_size + offset - partial = self._padded_bytes(image, position, amount) - length -= amount - offset += amount - yield bytes(partial) + output.append((vcn, vcn+entry['length'], -1)) + else: + # Normal runlists + clusters_pos += entry['offset'] + real_pos = clusters_pos * spc + partition.offset + output.append((vcn, vcn+entry['length'], real_pos)) + vcn += entry['length'] + if vcn != attr['end_VCN'] + 1: + logging.error("VCN miscalcuation! {} {}".format(vcn, attr['end_VCN'] + 1)) vcn = attr['end_VCN'] + 1 - - def get_content(self, partition): - """Extract the content of the file. + return output + + def open(self, partition): + """Opens the file and get the sector locations of the file. This method works by extracting the $DATA attribute.""" + if self.isopen: + logging.warning(u'Tried to open already open file {}!'.format(self)) + return # already open! + if self.is_ghost: - logging.error(u'Cannot restore ghost file {}'.format(self)) - return None + raise ValueError(u'Cannot open ghost file {}'.format(self)) image = DiskScanner.get_image(partition.scanner) dump = sectors(image, File.get_offset(self), FILE_size) parsed = parse_file_record(dump) if not parsed['valid'] or 'attributes' not in parsed: - logging.error(u'Invalid MFT entry for {}'.format(self)) - return None + raise ValueError(u'Invalid MFT entry for {}'.format(self)) attrs = parsed['attributes'] if ('$ATTRIBUTE_LIST' in attrs and partition.sec_per_clus is not None): @@ -393,32 +386,30 @@ def get_content(self, partition): datas = [d for d in attrs['$DATA'] if d['name'] == self.ads] if not len(datas): if not self.is_directory: - logging.error(u'Cannot restore $DATA attribute(s) ' + raise ValueError(u'Cannot restore $DATA attribute(s) ' 'for {}'.format(self)) - return None # TODO implemented compressed attributes for d in datas: if d['flags'] & 0x01: - logging.error(u'Cannot restore compressed $DATA attribute(s) ' + raise ValueError(u'Cannot restore compressed $DATA attribute(s) ' 'for {}'.format(self)) - return None elif d['flags'] & 0x4000: logging.warning(u'Found encrypted $DATA attribute(s) ' 'for {}'.format(self)) - + self.isopen = True # Handle resident file content if len(datas) == 1 and not datas[0]['non_resident']: single = datas[0] start = single['dump_offset'] + single['content_off'] end = start + single['content_size'] - content = dump[start:end] - return bytes(content) + self.resident = True + self.content = dump[start:end] + return else: if partition.sec_per_clus is None: - logging.error(u'Cannot restore non-resident $DATA ' + raise ValueError(u'Cannot restore non-resident $DATA ' 'attribute(s) for {}'.format(self)) - return None non_resident = sorted( (d for d in attrs['$DATA'] if d['non_resident']), key=lambda x: x['start_VCN'] @@ -428,7 +419,77 @@ def get_content(self, partition): u'Found leftover resident $DATA attributes for ' '{}'.format(self) ) - return self.content_iterator(partition, image, non_resident) + self.resident = False + self.content = self.parse_data(partition, non_resident) + return + + def content_iterator(self, partition, image, datas): + """Return an iterator for the contents of this file.""" + + spc = partition.sec_per_clus + bpc = sector_size*spc # bytes per cluster + + curlen = 0 + for attr in self.content: + (attr_start, attr_end, sectoroff) = attr + curoff = (attr_end - attr_start) * bpc + length = min(self.size - curlen, curoff) + + if length <= 0: + break + if sectoroff == -1: + yield '\x00' * length; + else: + yield self._padded_bytes(image, sectoroff*sector_size, length) + + def get_content(self, partition): + """Extract the entire content of the file.""" + self.open(partition) + assert self.isopen + if self.resident: + return bytes(self.content) # typecast from bytearray -> bytes + else: + image = DiskScanner.get_image(partition.scanner) + return self.content_iterator(partition, image, self.content) + + # TODO it can technically read off the end of the file a bit.... + def read(self, partition, roffset, rsize): + if not self.isopen: + raise RuntimeError("tried to read file that wasn't open!") + if self.resident: + trim = self.content[roffset:roffset+rsize] + return bytes(trim) # typecast from bytearray -> bytes + + image = DiskScanner.get_image(partition.scanner) + spc = partition.sec_per_clus + bpc = sector_size*spc # bytes per cluster + + start_vcn = roffset // bpc + offset_startvcn = roffset % bpc + end_vcn = (roffset+rsize) // bpc + + value = bytearray() + for attr in self.content: + (attr_start, attr_end, sectoroff) = attr + vcn_off = 0 + if start_vcn > attr_end: + continue + elif start_vcn >= attr_start: + vcn_off = start_vcn - attr_start + + + length = attr_end - (attr_start + vcn_off) + offset = sectoroff + (spc*vcn_off) + if sectoroff == -1: + value.extend('\x00' * bpc * length) + else: + value.extend(self._padded_bytes(image, offset*sector_size, length*bpc)) + + if end_vcn < attr_end: + break + + trim = value[offset_startvcn:offset_startvcn+rsize] + return bytes(trim) # typecast from bytearray -> bytes def ignore(self): """Determine which files should be ignored.""" diff --git a/recuperabit/ifuse.py b/recuperabit/ifuse.py new file mode 100644 index 0000000..80bb882 --- /dev/null +++ b/recuperabit/ifuse.py @@ -0,0 +1,191 @@ +from errno import * +import fuse +from fuse import Fuse + +from stat import S_IFDIR, S_IFLNK, S_IFREG + +import os, sys +import logging +from .fs.constants import max_sectors, sector_size +import time +from datetime import datetime +from .fs.core_types import File +import traceback + +# was originally named fuse.py until i realized it conflicted with fusepy + +def split_all_path(path): + allpath = [] + while True: + (head, tail) = os.path.split(path) + if head == path: # end of absolute path + allpath.insert(0, head) + break + elif tail == path: # end of relative path + allpath.insert(0, tail) + break + else: + path = head + allpath.insert(0, tail) + return allpath + +def recurse_path(spath, node): + if len(spath) == 1: + return node + if node.is_directory: + for entry in node.children: + if entry.name == spath[1]: + return recurse_path(spath[1:], entry) + return None + +def date2utc(dt): + if dt is None: + return time.time() + return (dt - datetime(1970, 1, 1)).total_seconds() + +def _file_view_repr(node): + """Give the file a name with some metadata about it""" + desc = "" + if node.is_ghost: + desc = desc + '[GHOST]' + if node.is_deleted: + desc = desc + '[DELETED]' + return desc + node.name + +class AbstractView(Fuse): + def __init__(self, *args, **kw): + Fuse.__init__(self, *args, **kw) + self.fd = 0 + self.files = {} + + def get_part_from_path(self, path): + raise NotImplementedError + def get_file_from_path(self, path): + raise NotImplementedError + + def readdir(self, path, offset): + file = self.get_file_from_path(path) + + dirents = ['.', '..'] + if file is not None and file.is_directory: + for entry in file.children: + dirents.append(_file_view_repr(entry)) + for r in dirents: + yield fuse.Direntry(r) + + def getattr(self, path): + file = self.get_file_from_path(path) + if file is None: + return -errno.ENOENT + + attrs = fuse.Stat() + attrs.st_nlink=1 + attrs.st_blksize=sector_size + + if file.is_directory: + attrs.st_mode = S_IFDIR + else: + attrs.st_mode = S_IFREG + + if file.size is not None: + attrs.st_size = file.size + else: + attrs.st_size = 0 + + #TODO grab actual info? + attrs.st_blocks = (attrs.st_size + (attrs.st_blksize - 1)) // attrs.st_blksize + + mac = file.get_mac() + if mac is not None: + attrs.st_mtime = date2utc(mac[0]) + attrs.st_atime = date2utc(mac[1]) + attrs.st_ctime = date2utc(mac[2]) + else: + attrs.st_mtime = time.time() + attrs.st_atime = time.time() + attrs.st_ctime = time.time() + + return attrs + + def open(self, path, mode): + file = self.get_file_from_path(path) + if file is None: + return -errno.ENOENT + part = self.get_part_from_path(path) + + try: + file.open(part) + except Exception as e: + track = traceback.format_exc() + logging.error(e) + logging.error(track) + return -errno.EIO + + self.fd += 1 + self.files[self.fd] = file + return (0, self.fd) + + def release(self, path, flags, fh): + del self.files[fh] + return 0 + + def read(self, path, size, offset, fh): + file = self.get_file_from_path(path) + part = self.get_part_from_path(path) + try: + return file.read(part, offset, size) + except Exception as e: + track = traceback.format_exc() + logging.error(e) + logging.error(track) + return -errno.EIO + +class PartView(AbstractView): + def __init__(self, part, root, *args, **kw): + AbstractView.__init__(self, *args, **kw) + self.part = part + self.root = root + + def get_part_from_path(self, path): + return self.part + def get_file_from_path(self, path): + spath = split_all_path(path) + return recurse_path(spath, self.root) + + +class MultiPartView(AbstractView): + def __init__(self, parts, shorthands, rebuilt, *args, **kw): + AbstractView.__init__(self, *args, **kw) + self.partdict = {} + self.root = File(0, "ROOT", 0, True) + self.build_tree(parts, shorthands, rebuilt) + + def build_tree(self, parts, shorthands, rebuilt): + for i in range(len(shorthands)): + i, par = shorthands[i] + part = parts[par] + if par not in rebuilt: + print('Rebuilding partition...') + part.rebuild() + rebuilt.add(par) + print('Done') + partname = 'Partition ' + str(i) + file = File(0, partname, 0, True) + file.set_mac(datetime.now(), datetime.now(), datetime.now()) + + file.add_child(part.root) + file.add_child(part.lost) + self.root.add_child(file) + + self.partdict[partname] = part + self.root.set_mac(datetime.now(), datetime.now(), datetime.now()) + + + def get_part_from_path(self, path): + spath = split_all_path(path) + return self.partdict[spath[1]] + + def get_file_from_path(self, path): + spath = split_all_path(path) + # todo include lost files as well? + return recurse_path(spath, self.root)