From 6b857527bb55bc4537ca442a177fdc412b1577b4 Mon Sep 17 00:00:00 2001 From: Oliver Heyme Date: Wed, 17 Aug 2016 11:45:48 +0200 Subject: [PATCH 1/8] First version of the split plugin with FULL file download --- acdcli/plugins/split.py | 144 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 acdcli/plugins/split.py diff --git a/acdcli/plugins/split.py b/acdcli/plugins/split.py new file mode 100644 index 0000000..028f745 --- /dev/null +++ b/acdcli/plugins/split.py @@ -0,0 +1,144 @@ +""" +This is a template that you can use for adding custom plugins. +""" + +import io +import os +import sys +import hashlib +import logging + +from . import * + +logger = logging.getLogger(__name__) + + +class SplitPlugin(Plugin): + MIN_VERSION = '0.3.1' + + @classmethod + def attach(cls, subparsers: argparse.ArgumentParser, log: list, **kwargs): + """ Attaches this plugin to the top-level argparse subparser group + :param subparsers the action subparser group + :param log a list to put initialization log messages in + """ + arg_parser = subparsers.add_parser('split-upload', add_help=False) + arg_parser.add_argument('localpath') + arg_parser.add_argument('remotepath') + arg_parser.add_argument('splitsize', type=int) + arg_parser.set_defaults(func=cls.split_upload) + + arg_parser = subparsers.add_parser('split-download', add_help=False) + arg_parser.add_argument('remotepath') + arg_parser.add_argument('localpath') + arg_parser.set_defaults(func=cls.split_download) + + log.append(str(cls) + ' attached.') + + @classmethod + def split_upload(cls, args: argparse.Namespace) -> int: + """ This is where the magic happens. + Return a zero for success, a non-zero int for failure. """ + + local_path = os.path.abspath(args.localpath) + remote_path = args.remotepath + chunk_size = args.splitsize + + client = args.acd_client + cache = args.cache + + if not os.path.exists(local_path): + logger.critical("File %s doe not exist", local_path) + sys.exit(-1) + + local_file_size = os.path.getsize(local_path) + logger.debug("Local file size: %d", local_file_size) + + chunk_count = local_file_size // chunk_size + last_chunk_size = local_file_size % chunk_size + if 0 != last_chunk_size: + chunk_count += 1 + + logger.debug("Chunk count: %d", chunk_count) + logger.debug("Last chunk size: %d", last_chunk_size) + + remote_node = cache.resolve(remote_path) + if not remote_node: + parent_name = os.path.dirname(remote_path) + parent_node = cache.resolve(parent_name) + if not parent_node: + logger.critical("Parent %s for %s does not exist", parent_name, remote_path) + sys.exit(-1) + + remote_node = client.create_folder(os.path.basename(remote_path), parent_node.id) + cache.insert_node(remote_node) + remote_node = cache.resolve(remote_path) + + if not remote_node.is_folder: + logger.critical("Remote path %s is not a directory", remote_path) + sys.exit(-1) + + with open(local_path, "rb") as file_stream: + for chunk_id in range(chunk_count): + chunk = file_stream.read(chunk_size) + md5_digest = hashlib.md5(chunk).hexdigest() + + upload_chunk = True + chunk_name = "%08x" % chunk_id + remote_child_node = cache.get_child(remote_node.id, chunk_name) + if remote_child_node: + logger.info('%s: Exists', chunk_name) + upload_chunk = remote_child_node.md5 != md5_digest + if upload_chunk: + logger.info('%s: MD5 mis-match (%s / %s)' % (chunk_name, remote_child_node.md5, md5_digest)) + else: + logger.info('%s: MD5 match', chunk_name) + + if upload_chunk: + if remote_child_node: + # if it exists, overwrite + logger.info("%s: Overwriting %d bytes to node %s", chunk_name, len(chunk), remote_child_node.id) + remote_child_node = client.overwrite_stream(io.BytesIO(chunk), remote_child_node.id) + else: + logger.info("%s: Uploading %d bytes", chunk_name, len(chunk)) + remote_child_node = client.upload_stream(io.BytesIO(chunk), chunk_name, remote_node.id) + cache.insert_node(remote_child_node) + return 0 + + @classmethod + def split_download(cls, args: argparse.Namespace) -> int: + """ This is where the magic happens. + Return a zero for success, a non-zero int for failure. """ + + remote_path = args.remotepath + local_path = os.path.abspath(args.localpath) + + client = args.acd_client + cache = args.cache + + remote_node = cache.resolve(remote_path) + if not remote_node: + logger.critical("Remote %s does not exist", remote_path) + sys.exit(-1) + + if not remote_node.is_folder: + logger.critical("Remote path %s is not a directory", remote_path) + sys.exit(-1) + + chunk_id = 0 + with open(local_path, "wb") as file_stream: + while True: + chunk_name = "%08x" % chunk_id + chunk_node = cache.get_child(remote_node.id, chunk_name) + if not chunk_node: + logger.debug("Chunk %s not found, stopping", chunk_name) + break + + logger.info("Download chunk %s with %d bytes", chunk_name, chunk_node.size) + chunk = client.download_chunk(chunk_node.id, 0, chunk_node.size) + + file_stream.write(chunk) + + chunk_id += 1 + + return 0 From c45cddcc752fcce8a569d59c6e0c175f9104c45e Mon Sep 17 00:00:00 2001 From: Oliver Heyme Date: Wed, 17 Aug 2016 14:37:23 +0200 Subject: [PATCH 2/8] Changed to full streaming --- acdcli/plugins/split.py | 57 ++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/acdcli/plugins/split.py b/acdcli/plugins/split.py index 028f745..bb8fb6e 100644 --- a/acdcli/plugins/split.py +++ b/acdcli/plugins/split.py @@ -31,6 +31,7 @@ def attach(cls, subparsers: argparse.ArgumentParser, log: list, **kwargs): arg_parser = subparsers.add_parser('split-download', add_help=False) arg_parser.add_argument('remotepath') arg_parser.add_argument('localpath') + arg_parser.add_argument('splitsize', type=int) arg_parser.set_defaults(func=cls.split_download) log.append(str(cls) + ' attached.') @@ -51,17 +52,6 @@ def split_upload(cls, args: argparse.Namespace) -> int: logger.critical("File %s doe not exist", local_path) sys.exit(-1) - local_file_size = os.path.getsize(local_path) - logger.debug("Local file size: %d", local_file_size) - - chunk_count = local_file_size // chunk_size - last_chunk_size = local_file_size % chunk_size - if 0 != last_chunk_size: - chunk_count += 1 - - logger.debug("Chunk count: %d", chunk_count) - logger.debug("Last chunk size: %d", last_chunk_size) - remote_node = cache.resolve(remote_path) if not remote_node: parent_name = os.path.dirname(remote_path) @@ -79,20 +69,21 @@ def split_upload(cls, args: argparse.Namespace) -> int: sys.exit(-1) with open(local_path, "rb") as file_stream: - for chunk_id in range(chunk_count): + chunk_id = 0 + while True: chunk = file_stream.read(chunk_size) + if not chunk: + break + md5_digest = hashlib.md5(chunk).hexdigest() upload_chunk = True chunk_name = "%08x" % chunk_id remote_child_node = cache.get_child(remote_node.id, chunk_name) if remote_child_node: - logger.info('%s: Exists', chunk_name) upload_chunk = remote_child_node.md5 != md5_digest if upload_chunk: logger.info('%s: MD5 mis-match (%s / %s)' % (chunk_name, remote_child_node.md5, md5_digest)) - else: - logger.info('%s: MD5 match', chunk_name) if upload_chunk: if remote_child_node: @@ -103,6 +94,11 @@ def split_upload(cls, args: argparse.Namespace) -> int: logger.info("%s: Uploading %d bytes", chunk_name, len(chunk)) remote_child_node = client.upload_stream(io.BytesIO(chunk), chunk_name, remote_node.id) cache.insert_node(remote_child_node) + else: + logger.info("%s: Keeping %d bytes", chunk_name, len(chunk)) + + chunk_id += 1 + return 0 @classmethod @@ -112,6 +108,7 @@ def split_download(cls, args: argparse.Namespace) -> int: remote_path = args.remotepath local_path = os.path.abspath(args.localpath) + chunk_size = args.splitsize client = args.acd_client cache = args.cache @@ -125,8 +122,12 @@ def split_download(cls, args: argparse.Namespace) -> int: logger.critical("Remote path %s is not a directory", remote_path) sys.exit(-1) + open_mode = "wb" + if os.path.exists(local_path): + open_mode = "rb+" + chunk_id = 0 - with open(local_path, "wb") as file_stream: + with open(local_path, open_mode) as file_stream: while True: chunk_name = "%08x" % chunk_id chunk_node = cache.get_child(remote_node.id, chunk_name) @@ -134,10 +135,26 @@ def split_download(cls, args: argparse.Namespace) -> int: logger.debug("Chunk %s not found, stopping", chunk_name) break - logger.info("Download chunk %s with %d bytes", chunk_name, chunk_node.size) - chunk = client.download_chunk(chunk_node.id, 0, chunk_node.size) - - file_stream.write(chunk) + overwrite_chunk = False + current_position = file_stream.tell() + current_chunk = file_stream.read(chunk_size) + if len(current_chunk) != chunk_node.size: + logger.debug("%s: Size mis-match %d / %d", chunk_name, len(current_chunk), chunk_node.size) + overwrite_chunk = True + else: + current_md5 = hashlib.md5(current_chunk).hexdigest() + if current_md5 != chunk_node.md5: + logger.debug("%s: MD5 mis-match %s / %s", chunk_name, current_md5, chunk_node.md5) + overwrite_chunk = True + + if not overwrite_chunk: + logger.info("%s: Keeping chunk with %d bytes", chunk_name, chunk_node.size) + else: + logger.info("Download chunk %s with %d bytes", chunk_name, chunk_node.size) + chunk = client.download_chunk(chunk_node.id, 0, chunk_node.size) + + file_stream.seek(current_position) + file_stream.write(chunk) chunk_id += 1 From 304b52017dda90227f36640ccf7e36dbea9123d3 Mon Sep 17 00:00:00 2001 From: Oliver Heyme Date: Wed, 17 Aug 2016 15:44:48 +0200 Subject: [PATCH 3/8] Upload only lzma compression added --- acdcli/plugins/split.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/acdcli/plugins/split.py b/acdcli/plugins/split.py index bb8fb6e..5abfafe 100644 --- a/acdcli/plugins/split.py +++ b/acdcli/plugins/split.py @@ -5,6 +5,7 @@ import io import os import sys +import lzma import hashlib import logging @@ -12,6 +13,8 @@ logger = logging.getLogger(__name__) +lzma_filters = [{"id": lzma.FILTER_LZMA2, "preset": lzma.PRESET_EXTREME}] + class SplitPlugin(Plugin): MIN_VERSION = '0.3.1' @@ -23,6 +26,7 @@ def attach(cls, subparsers: argparse.ArgumentParser, log: list, **kwargs): :param log a list to put initialization log messages in """ arg_parser = subparsers.add_parser('split-upload', add_help=False) + arg_parser.add_argument('--lzma-compress', '-lc', action='store_true') arg_parser.add_argument('localpath') arg_parser.add_argument('remotepath') arg_parser.add_argument('splitsize', type=int) @@ -85,6 +89,14 @@ def split_upload(cls, args: argparse.Namespace) -> int: if upload_chunk: logger.info('%s: MD5 mis-match (%s / %s)' % (chunk_name, remote_child_node.md5, md5_digest)) + if args.lzma_compress: + chunk_length_original = len(chunk) + chunk = lzma.compress(chunk, filters=lzma_filters) + logger.debug("Compressed %d bytes to %s, saved %02.2f%%", + chunk_length_original, + len(chunk), + 100.0 * ((chunk_length_original - len(chunk)) / chunk_length_original)) + if upload_chunk: if remote_child_node: # if it exists, overwrite From e8fb06d2962117894233c4721b80077fc895ac28 Mon Sep 17 00:00:00 2001 From: Oliver Heyme Date: Wed, 17 Aug 2016 20:00:37 +0200 Subject: [PATCH 4/8] Fixed downloading and added OpenSSL compatible encryption --- acdcli/plugins/split.py | 51 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 5 deletions(-) diff --git a/acdcli/plugins/split.py b/acdcli/plugins/split.py index 5abfafe..063ff84 100644 --- a/acdcli/plugins/split.py +++ b/acdcli/plugins/split.py @@ -1,5 +1,9 @@ """ This is a template that you can use for adding custom plugins. + +openssl aes-256-cbc -d -in 00000000 > 00000000.xz +7za x 00000000.xz + """ import io @@ -9,6 +13,9 @@ import hashlib import logging +from Crypto import Random +from Crypto.Cipher import AES + from . import * logger = logging.getLogger(__name__) @@ -16,6 +23,14 @@ lzma_filters = [{"id": lzma.FILTER_LZMA2, "preset": lzma.PRESET_EXTREME}] +def derive_key_and_iv(password, salt, key_length, iv_length): + d = d_i = b'' + while len(d) < key_length + iv_length: + d_i = hashlib.md5(d_i + str.encode(password) + salt).digest() + d += d_i + return d[:key_length], d[key_length:key_length+iv_length] + + class SplitPlugin(Plugin): MIN_VERSION = '0.3.1' @@ -27,12 +42,14 @@ def attach(cls, subparsers: argparse.ArgumentParser, log: list, **kwargs): """ arg_parser = subparsers.add_parser('split-upload', add_help=False) arg_parser.add_argument('--lzma-compress', '-lc', action='store_true') + arg_parser.add_argument('--password', '-p', type=str) arg_parser.add_argument('localpath') arg_parser.add_argument('remotepath') arg_parser.add_argument('splitsize', type=int) arg_parser.set_defaults(func=cls.split_upload) arg_parser = subparsers.add_parser('split-download', add_help=False) + arg_parser.add_argument('--password', '-p', type=str) arg_parser.add_argument('remotepath') arg_parser.add_argument('localpath') arg_parser.add_argument('splitsize', type=int) @@ -97,6 +114,16 @@ def split_upload(cls, args: argparse.Namespace) -> int: len(chunk), 100.0 * ((chunk_length_original - len(chunk)) / chunk_length_original)) + if args.password: + salt = Random.new().read(8) + key, iv = derive_key_and_iv(args.password, salt, 32, 16) + cipher_header = b'Salted__' + salt + + cipher = AES.new(key, AES.MODE_CBC, iv) + padding_length = (AES.block_size - len(chunk) % AES.block_size) or AES.block_size + chunk += str.encode(padding_length * chr(padding_length)) + chunk = cipher_header + cipher.encrypt(chunk) + if upload_chunk: if remote_child_node: # if it exists, overwrite @@ -134,12 +161,8 @@ def split_download(cls, args: argparse.Namespace) -> int: logger.critical("Remote path %s is not a directory", remote_path) sys.exit(-1) - open_mode = "wb" - if os.path.exists(local_path): - open_mode = "rb+" - chunk_id = 0 - with open(local_path, open_mode) as file_stream: + with open(local_path, "wb+") as file_stream: while True: chunk_name = "%08x" % chunk_id chunk_node = cache.get_child(remote_node.id, chunk_name) @@ -165,6 +188,24 @@ def split_download(cls, args: argparse.Namespace) -> int: logger.info("Download chunk %s with %d bytes", chunk_name, chunk_node.size) chunk = client.download_chunk(chunk_node.id, 0, chunk_node.size) + if b'Salted__' == chunk[:8]: + # it's encrypted + if not args.password: + logger.critical("Found encrypted chunk but no password specified") + sys.exit(-1) + + salt = chunk[8:16] + key, iv = derive_key_and_iv(args.password, salt, 32, 16) + cipher = AES.new(key, AES.MODE_CBC, iv) + chunk = cipher.decrypt(bytes(chunk[16:])) + padding_length = chunk[-1] + chunk = chunk[:-padding_length] + + try: + chunk = lzma.decompress(chunk) + except: + pass + file_stream.seek(current_position) file_stream.write(chunk) From c53db63c45fa6cf804d41303763dd2a54dff75bd Mon Sep 17 00:00:00 2001 From: Oliver Heyme Date: Wed, 17 Aug 2016 21:38:06 +0200 Subject: [PATCH 5/8] Changed chunk name to just number without leading zeros --- acdcli/plugins/split.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/acdcli/plugins/split.py b/acdcli/plugins/split.py index 063ff84..8429f3f 100644 --- a/acdcli/plugins/split.py +++ b/acdcli/plugins/split.py @@ -99,7 +99,7 @@ def split_upload(cls, args: argparse.Namespace) -> int: md5_digest = hashlib.md5(chunk).hexdigest() upload_chunk = True - chunk_name = "%08x" % chunk_id + chunk_name = "%x" % chunk_id remote_child_node = cache.get_child(remote_node.id, chunk_name) if remote_child_node: upload_chunk = remote_child_node.md5 != md5_digest @@ -164,7 +164,7 @@ def split_download(cls, args: argparse.Namespace) -> int: chunk_id = 0 with open(local_path, "wb+") as file_stream: while True: - chunk_name = "%08x" % chunk_id + chunk_name = "%x" % chunk_id chunk_node = cache.get_child(remote_node.id, chunk_name) if not chunk_node: logger.debug("Chunk %s not found, stopping", chunk_name) From 063dbf651114fd4d9d038592bbcc34399ab3bcfc Mon Sep 17 00:00:00 2001 From: Oliver Heyme Date: Wed, 17 Aug 2016 22:14:16 +0200 Subject: [PATCH 6/8] download working --- acdcli/plugins/split.py | 175 +++++++++++++++++++++++++++++----------- 1 file changed, 127 insertions(+), 48 deletions(-) diff --git a/acdcli/plugins/split.py b/acdcli/plugins/split.py index 8429f3f..0244b61 100644 --- a/acdcli/plugins/split.py +++ b/acdcli/plugins/split.py @@ -9,6 +9,7 @@ import io import os import sys +import json import lzma import hashlib import logging @@ -31,6 +32,70 @@ def derive_key_and_iv(password, salt, key_length, iv_length): return d[:key_length], d[key_length:key_length+iv_length] +def prepare_chunk(chunk, compress=False, encrypt_password=None): + if compress: + chunk_length_original = len(chunk) + chunk = lzma.compress(chunk, filters=lzma_filters) + logger.debug("Compressed %d bytes to %s, saved %02.2f%%", + chunk_length_original, + len(chunk), + 100.0 * ((chunk_length_original - len(chunk)) / chunk_length_original)) + + if encrypt_password: + salt = Random.new().read(8) + key, iv = derive_key_and_iv(encrypt_password, salt, 32, 16) + cipher_header = b'Salted__' + salt + + cipher = AES.new(key, AES.MODE_CBC, iv) + padding_length = (AES.block_size - len(chunk) % AES.block_size) or AES.block_size + chunk += str.encode(padding_length * chr(padding_length)) + chunk = cipher_header + cipher.encrypt(chunk) + + return io.BytesIO(chunk) + + +def unprepare_chunk(chunk, decrypt_password=None): + if b'Salted__' == chunk[:8]: + # it's encrypted + if not decrypt_password: + logger.critical("Found encrypted chunk but no password specified") + sys.exit(-1) + + salt = chunk[8:16] + key, iv = derive_key_and_iv(decrypt_password, salt, 32, 16) + cipher = AES.new(key, AES.MODE_CBC, iv) + chunk = cipher.decrypt(bytes(chunk[16:])) + padding_length = chunk[-1] + chunk = chunk[:-padding_length] + + try: + chunk = lzma.decompress(chunk) + except: + pass + + return chunk + + +def load_chunks(client, cache, remote_path, remote_node): + chunks_data = {"chunks": []} + chunks_node = cache.resolve(remote_path + "/chunks") + if chunks_node: + chunks_data = json.loads(unprepare_chunk(client.download_chunk(chunks_node.id, + 0, + chunks_node.size), + decrypt_password=args.password).decode("utf8")) + else: + result = client.upload_stream(prepare_chunk(json.dumps(chunks_data).encode("utf8"), + args.lzma_compress, + args.password), + "chunks", + remote_node.id) + cache.insert_node(result) + chunks_node = cache.resolve(remote_path + "/chunks") + + return chunks_data, chunks_node + + class SplitPlugin(Plugin): MIN_VERSION = '0.3.1' @@ -89,6 +154,22 @@ def split_upload(cls, args: argparse.Namespace) -> int: logger.critical("Remote path %s is not a directory", remote_path) sys.exit(-1) + chunks_data = {"chunks": []} + chunks_node = cache.resolve(remote_path + "/chunks") + if chunks_node: + chunks_data = json.loads(unprepare_chunk(client.download_chunk(chunks_node.id, + 0, + chunks_node.size), + decrypt_password=args.password).decode("utf8")) + else: + result = client.upload_stream(prepare_chunk(json.dumps(chunks_data).encode("utf8"), + args.lzma_compress, + args.password), + "chunks", + remote_node.id) + cache.insert_node(result) + chunks_node = cache.resolve(remote_path + "/chunks") + with open(local_path, "rb") as file_stream: chunk_id = 0 while True: @@ -101,38 +182,38 @@ def split_upload(cls, args: argparse.Namespace) -> int: upload_chunk = True chunk_name = "%x" % chunk_id remote_child_node = cache.get_child(remote_node.id, chunk_name) + if remote_child_node: - upload_chunk = remote_child_node.md5 != md5_digest + if len(chunks_data["chunks"]) > chunk_id and (args.lzma_compress or args.password): + upload_chunk = md5_digest != chunks_data["chunks"][chunk_id] + else: + upload_chunk = md5_digest != remote_child_node.md5 + if upload_chunk: logger.info('%s: MD5 mis-match (%s / %s)' % (chunk_name, remote_child_node.md5, md5_digest)) - if args.lzma_compress: - chunk_length_original = len(chunk) - chunk = lzma.compress(chunk, filters=lzma_filters) - logger.debug("Compressed %d bytes to %s, saved %02.2f%%", - chunk_length_original, - len(chunk), - 100.0 * ((chunk_length_original - len(chunk)) / chunk_length_original)) - - if args.password: - salt = Random.new().read(8) - key, iv = derive_key_and_iv(args.password, salt, 32, 16) - cipher_header = b'Salted__' + salt + if upload_chunk: + if len(chunks_data["chunks"]) > chunk_id: + chunks_data["chunks"][chunk_id] = md5_digest + else: + chunks_data["chunks"].append(md5_digest) - cipher = AES.new(key, AES.MODE_CBC, iv) - padding_length = (AES.block_size - len(chunk) % AES.block_size) or AES.block_size - chunk += str.encode(padding_length * chr(padding_length)) - chunk = cipher_header + cipher.encrypt(chunk) + chunk = prepare_chunk(chunk, compress=args.lzma_compress, encrypt_password=args.password) - if upload_chunk: if remote_child_node: # if it exists, overwrite - logger.info("%s: Overwriting %d bytes to node %s", chunk_name, len(chunk), remote_child_node.id) - remote_child_node = client.overwrite_stream(io.BytesIO(chunk), remote_child_node.id) + logger.info("%s: Overwriting %d bytes to node %s", chunk_name, len(chunk.getvalue()), remote_child_node.id) + remote_child_node = client.overwrite_stream(chunk, remote_child_node.id) else: - logger.info("%s: Uploading %d bytes", chunk_name, len(chunk)) - remote_child_node = client.upload_stream(io.BytesIO(chunk), chunk_name, remote_node.id) + logger.info("%s: Uploading %d bytes", chunk_name, len(chunk.getvalue())) + remote_child_node = client.upload_stream(chunk, chunk_name, remote_node.id) cache.insert_node(remote_child_node) + + result = client.overwrite_stream(prepare_chunk(json.dumps(chunks_data).encode("utf8"), + compress=args.lzma_compress, + encrypt_password=args.password), + chunks_node.id) + cache.insert_node(result) else: logger.info("%s: Keeping %d bytes", chunk_name, len(chunk)) @@ -161,8 +242,20 @@ def split_download(cls, args: argparse.Namespace) -> int: logger.critical("Remote path %s is not a directory", remote_path) sys.exit(-1) + chunks_data = {"chunks": []} + chunks_node = cache.resolve(remote_path + "/chunks") + if chunks_node: + chunks_data = json.loads(unprepare_chunk(client.download_chunk(chunks_node.id, + 0, + chunks_node.size), + decrypt_password=args.password).decode("utf8")) + + open_mode = "rb+" + if not os.path.exists(local_path): + open_mode = "wb+" + chunk_id = 0 - with open(local_path, "wb+") as file_stream: + with open(local_path, open_mode) as file_stream: while True: chunk_name = "%x" % chunk_id chunk_node = cache.get_child(remote_node.id, chunk_name) @@ -173,14 +266,18 @@ def split_download(cls, args: argparse.Namespace) -> int: overwrite_chunk = False current_position = file_stream.tell() current_chunk = file_stream.read(chunk_size) - if len(current_chunk) != chunk_node.size: - logger.debug("%s: Size mis-match %d / %d", chunk_name, len(current_chunk), chunk_node.size) - overwrite_chunk = True + current_md5 = hashlib.md5(current_chunk).hexdigest() + + if len(chunks_data["chunks"]) > chunk_id: + overwrite_chunk = current_md5 != chunks_data["chunks"][chunk_id] else: - current_md5 = hashlib.md5(current_chunk).hexdigest() - if current_md5 != chunk_node.md5: - logger.debug("%s: MD5 mis-match %s / %s", chunk_name, current_md5, chunk_node.md5) + if len(current_chunk) != chunk_node.size: + logger.debug("%s: Size mis-match %d / %d", chunk_name, len(current_chunk), chunk_node.size) overwrite_chunk = True + else: + if current_md5 != chunk_node.md5: + logger.debug("%s: MD5 mis-match %s / %s", chunk_name, current_md5, chunk_node.md5) + overwrite_chunk = True if not overwrite_chunk: logger.info("%s: Keeping chunk with %d bytes", chunk_name, chunk_node.size) @@ -188,26 +285,8 @@ def split_download(cls, args: argparse.Namespace) -> int: logger.info("Download chunk %s with %d bytes", chunk_name, chunk_node.size) chunk = client.download_chunk(chunk_node.id, 0, chunk_node.size) - if b'Salted__' == chunk[:8]: - # it's encrypted - if not args.password: - logger.critical("Found encrypted chunk but no password specified") - sys.exit(-1) - - salt = chunk[8:16] - key, iv = derive_key_and_iv(args.password, salt, 32, 16) - cipher = AES.new(key, AES.MODE_CBC, iv) - chunk = cipher.decrypt(bytes(chunk[16:])) - padding_length = chunk[-1] - chunk = chunk[:-padding_length] - - try: - chunk = lzma.decompress(chunk) - except: - pass - file_stream.seek(current_position) - file_stream.write(chunk) + file_stream.write(unprepare_chunk(chunk, decrypt_password=args.password)) chunk_id += 1 From 79761d0331b8d0fe318bbca00eec003d9a9fd792 Mon Sep 17 00:00:00 2001 From: Oliver Heyme Date: Wed, 17 Aug 2016 22:41:31 +0200 Subject: [PATCH 7/8] Cleanup and comments --- acdcli/plugins/split.py | 171 +++++++++++++++++++++++++++++----------- 1 file changed, 125 insertions(+), 46 deletions(-) diff --git a/acdcli/plugins/split.py b/acdcli/plugins/split.py index 0244b61..ece7580 100644 --- a/acdcli/plugins/split.py +++ b/acdcli/plugins/split.py @@ -1,5 +1,5 @@ """ -This is a template that you can use for adding custom plugins. +Plugin to upload and download bug files in chunks openssl aes-256-cbc -d -in 00000000 > 00000000.xz 7za x 00000000.xz @@ -21,10 +21,15 @@ logger = logging.getLogger(__name__) +# compress settings lzma_filters = [{"id": lzma.FILTER_LZMA2, "preset": lzma.PRESET_EXTREME}] def derive_key_and_iv(password, salt, key_length, iv_length): + """ + Helper function to get the key and IV from a password and optional salt + OpenSSL compatible + """ d = d_i = b'' while len(d) < key_length + iv_length: d_i = hashlib.md5(d_i + str.encode(password) + salt).digest() @@ -33,69 +38,85 @@ def derive_key_and_iv(password, salt, key_length, iv_length): def prepare_chunk(chunk, compress=False, encrypt_password=None): + """ + Compresses and encrypt a chunk of data + """ if compress: + # save original chunk size for comparing + # to the compressed size chunk_length_original = len(chunk) + + # compress chunk = lzma.compress(chunk, filters=lzma_filters) + logger.debug("Compressed %d bytes to %s, saved %02.2f%%", chunk_length_original, len(chunk), 100.0 * ((chunk_length_original - len(chunk)) / chunk_length_original)) if encrypt_password: + # create a new Random salt for each chunk salt = Random.new().read(8) + # get key and IV based on password and the salt key, iv = derive_key_and_iv(encrypt_password, salt, 32, 16) + # header for an OpenSSL encrypted file + # the term "Salted__" followed by 8 bytes salt cipher_header = b'Salted__' + salt + # create the cipher, AES-256 cipher = AES.new(key, AES.MODE_CBC, iv) + # we need to padd padding_length = (AES.block_size - len(chunk) % AES.block_size) or AES.block_size + # add the padding at the end of the chunk + # for OpenSSL compatibility and to get original + # size of the chunk after decryption chunk += str.encode(padding_length * chr(padding_length)) + # finally add the header and the encrypted chunk = cipher_header + cipher.encrypt(chunk) + # return a fake stream return io.BytesIO(chunk) def unprepare_chunk(chunk, decrypt_password=None): + """ + Helper function that decrypted and decompresses + a chunk of data + """ + if b'Salted__' == chunk[:8]: - # it's encrypted + # the chunk starts with "Salted__" + # so it's encrypted if not decrypt_password: + # no dice without password logger.critical("Found encrypted chunk but no password specified") + # we can't recover from that sys.exit(-1) + # read the salt, the 8 bytes following the "Salted__" salt = chunk[8:16] + # again generate key and IV key, iv = derive_key_and_iv(decrypt_password, salt, 32, 16) + # and the AES-256 cipher cipher = AES.new(key, AES.MODE_CBC, iv) + # decrypt chunk = cipher.decrypt(bytes(chunk[16:])) + # get the length of the padding bytes padding_length = chunk[-1] + # cut them off chunk = chunk[:-padding_length] try: + # decompress chunk = lzma.decompress(chunk) except: + # not so nice code, should check if it at least + # looks like a LZMA compressed chunk pass return chunk -def load_chunks(client, cache, remote_path, remote_node): - chunks_data = {"chunks": []} - chunks_node = cache.resolve(remote_path + "/chunks") - if chunks_node: - chunks_data = json.loads(unprepare_chunk(client.download_chunk(chunks_node.id, - 0, - chunks_node.size), - decrypt_password=args.password).decode("utf8")) - else: - result = client.upload_stream(prepare_chunk(json.dumps(chunks_data).encode("utf8"), - args.lzma_compress, - args.password), - "chunks", - remote_node.id) - cache.insert_node(result) - chunks_node = cache.resolve(remote_path + "/chunks") - - return chunks_data, chunks_node - - class SplitPlugin(Plugin): MIN_VERSION = '0.3.1' @@ -124,80 +145,107 @@ def attach(cls, subparsers: argparse.ArgumentParser, log: list, **kwargs): @classmethod def split_upload(cls, args: argparse.Namespace) -> int: - """ This is where the magic happens. - Return a zero for success, a non-zero int for failure. """ + """ + Upload method + """ + # extract the importanr value for easier access local_path = os.path.abspath(args.localpath) remote_path = args.remotepath chunk_size = args.splitsize - client = args.acd_client cache = args.cache + # does the local file exists at all? if not os.path.exists(local_path): + # Nope, we are done logger.critical("File %s doe not exist", local_path) - sys.exit(-1) + return 1 + # does the remote already exists remote_node = cache.resolve(remote_path) if not remote_node: + # But we need at least the parent node + # to create a directory in it parent_name = os.path.dirname(remote_path) parent_node = cache.resolve(parent_name) if not parent_node: + # No parent, no go logger.critical("Parent %s for %s does not exist", parent_name, remote_path) - sys.exit(-1) + return 1 + # create a directory we can put the chunks in remote_node = client.create_folder(os.path.basename(remote_path), parent_node.id) cache.insert_node(remote_node) remote_node = cache.resolve(remote_path) + # sanity check if the remote is a directory if not remote_node.is_folder: logger.critical("Remote path %s is not a directory", remote_path) - sys.exit(-1) + return 1 + # default for chunks data chunks_data = {"chunks": []} chunks_node = cache.resolve(remote_path + "/chunks") if chunks_node: + # load from ACD chunks_data = json.loads(unprepare_chunk(client.download_chunk(chunks_node.id, - 0, - chunks_node.size), - decrypt_password=args.password).decode("utf8")) + 0, + chunks_node.size), + decrypt_password=args.password).decode("utf8")) else: + # chunks file doesn't existsi yet, create it with the defalt values' result = client.upload_stream(prepare_chunk(json.dumps(chunks_data).encode("utf8"), args.lzma_compress, args.password), - "chunks", - remote_node.id) + "chunks", + remote_node.id) cache.insert_node(result) chunks_node = cache.resolve(remote_path + "/chunks") + # okay, time to get started with open(local_path, "rb") as file_stream: chunk_id = 0 while True: + # read the local file chunk by chunk chunk = file_stream.read(chunk_size) if not chunk: + # nothing to ready anymore, WE ARE DONE break + # calculate the hash md5_digest = hashlib.md5(chunk).hexdigest() upload_chunk = True + # the name for our current chunk chunk_name = "%x" % chunk_id - remote_child_node = cache.get_child(remote_node.id, chunk_name) + # does it already exist on ACD + remote_child_node = cache.get_child(remote_node.id, chunk_name) if remote_child_node: + # YES + + # if we have a MD5 hash in the chunks file and the user has requested compression + # or encryption us the cached MD5 to compare if len(chunks_data["chunks"]) > chunk_id and (args.lzma_compress or args.password): upload_chunk = md5_digest != chunks_data["chunks"][chunk_id] else: + # otherwise compare against the MD5 of ACD upload_chunk = md5_digest != remote_child_node.md5 if upload_chunk: logger.info('%s: MD5 mis-match (%s / %s)' % (chunk_name, remote_child_node.md5, md5_digest)) if upload_chunk: + # So, we need to upload something + + # update the chunks data if len(chunks_data["chunks"]) > chunk_id: chunks_data["chunks"][chunk_id] = md5_digest else: chunks_data["chunks"].append(md5_digest) + # prepate the chunk for ulpoad (maybe compress and encrypt) chunk = prepare_chunk(chunk, compress=args.lzma_compress, encrypt_password=args.password) if remote_child_node: @@ -205,76 +253,102 @@ def split_upload(cls, args: argparse.Namespace) -> int: logger.info("%s: Overwriting %d bytes to node %s", chunk_name, len(chunk.getvalue()), remote_child_node.id) remote_child_node = client.overwrite_stream(chunk, remote_child_node.id) else: + # if not, create a new file logger.info("%s: Uploading %d bytes", chunk_name, len(chunk.getvalue())) remote_child_node = client.upload_stream(chunk, chunk_name, remote_node.id) cache.insert_node(remote_child_node) + # always update the chunks file on ACD result = client.overwrite_stream(prepare_chunk(json.dumps(chunks_data).encode("utf8"), - compress=args.lzma_compress, - encrypt_password=args.password), - chunks_node.id) + compress=args.lzma_compress, + encrypt_password=args.password), + chunks_node.id) cache.insert_node(result) else: logger.info("%s: Keeping %d bytes", chunk_name, len(chunk)) + # and the next chunk chunk_id += 1 return 0 @classmethod def split_download(cls, args: argparse.Namespace) -> int: - """ This is where the magic happens. - Return a zero for success, a non-zero int for failure. """ + """ + Download method + """ + # extract the importanr value for easier access remote_path = args.remotepath local_path = os.path.abspath(args.localpath) chunk_size = args.splitsize - client = args.acd_client cache = args.cache + # get the remote node remote_node = cache.resolve(remote_path) if not remote_node: + # does not exist? logger.critical("Remote %s does not exist", remote_path) - sys.exit(-1) + return 1 + # at least it's folder if not remote_node.is_folder: + # NO?!?!? WTF?! logger.critical("Remote path %s is not a directory", remote_path) - sys.exit(-1) + return 1 + # default for chunks data chunks_data = {"chunks": []} chunks_node = cache.resolve(remote_path + "/chunks") if chunks_node: + # load from ACD chunks_data = json.loads(unprepare_chunk(client.download_chunk(chunks_node.id, - 0, - chunks_node.size), - decrypt_password=args.password).decode("utf8")) + 0, + chunks_node.size), + decrypt_password=args.password).decode("utf8")) + # if the file already exists use this so we can also read from it open_mode = "rb+" if not os.path.exists(local_path): + # if there is no local file we just need to write open_mode = "wb+" - chunk_id = 0 + # okay, time to get started with open(local_path, open_mode) as file_stream: + chunk_id = 0 while True: + # the name for our current chunk chunk_name = "%x" % chunk_id chunk_node = cache.get_child(remote_node.id, chunk_name) if not chunk_node: + # no chunk file on ACD, we are done logger.debug("Chunk %s not found, stopping", chunk_name) break overwrite_chunk = False + # save the current position so we can jump back when we + # need to overwrite this current_position = file_stream.tell() + # read the chunk current_chunk = file_stream.read(chunk_size) + # get the hash current_md5 = hashlib.md5(current_chunk).hexdigest() + # Do we have a MD5 from the chunks file? if len(chunks_data["chunks"]) > chunk_id: + # Yes, compare the local hash against the one from + # the caches file overwrite_chunk = current_md5 != chunks_data["chunks"][chunk_id] else: + # no data + + # overwrite if the filesizes don't match if len(current_chunk) != chunk_node.size: logger.debug("%s: Size mis-match %d / %d", chunk_name, len(current_chunk), chunk_node.size) overwrite_chunk = True else: + # or if the hash from ACD is different if current_md5 != chunk_node.md5: logger.debug("%s: MD5 mis-match %s / %s", chunk_name, current_md5, chunk_node.md5) overwrite_chunk = True @@ -282,12 +356,17 @@ def split_download(cls, args: argparse.Namespace) -> int: if not overwrite_chunk: logger.info("%s: Keeping chunk with %d bytes", chunk_name, chunk_node.size) else: + # we need to overwrite our local file chunk logger.info("Download chunk %s with %d bytes", chunk_name, chunk_node.size) + # download the chunk from ACD chunk = client.download_chunk(chunk_node.id, 0, chunk_node.size) + # jump back to the start of the chunk file_stream.seek(current_position) + # overwrite it file_stream.write(unprepare_chunk(chunk, decrypt_password=args.password)) + # and the next chunk chunk_id += 1 return 0 From caa7489e4826b8128891a722959030057dbc8c3d Mon Sep 17 00:00:00 2001 From: Oliver Heyme Date: Wed, 17 Aug 2016 22:45:29 +0200 Subject: [PATCH 8/8] More cosmetic changes --- acdcli/plugins/split.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/acdcli/plugins/split.py b/acdcli/plugins/split.py index ece7580..66aa9f3 100644 --- a/acdcli/plugins/split.py +++ b/acdcli/plugins/split.py @@ -146,7 +146,7 @@ def attach(cls, subparsers: argparse.ArgumentParser, log: list, **kwargs): @classmethod def split_upload(cls, args: argparse.Namespace) -> int: """ - Upload method + Upload method """ # extract the importanr value for easier access @@ -308,7 +308,7 @@ def split_download(cls, args: argparse.Namespace) -> int: chunks_node.size), decrypt_password=args.password).decode("utf8")) - # if the file already exists use this so we can also read from it + # if the file already exists use this so we can also read from it open_mode = "rb+" if not os.path.exists(local_path): # if there is no local file we just need to write @@ -343,7 +343,7 @@ def split_download(cls, args: argparse.Namespace) -> int: else: # no data - # overwrite if the filesizes don't match + # overwrite if the filesizes don't match if len(current_chunk) != chunk_node.size: logger.debug("%s: Size mis-match %d / %d", chunk_name, len(current_chunk), chunk_node.size) overwrite_chunk = True