From 502492308e7d839bf8d7a47fa9784d13abe4f501 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Sun, 7 Aug 2016 19:45:05 -0400
Subject: [PATCH 01/63] support for ecryptfs; this requires sequential file
 writing using multiple file descriptors, as well as support for getting and
 setting xattrs on files.

---
 acd_cli.py             |  32 ++++-----
 acdcli/acd_fuse.py     | 145 ++++++++++++++++++++++++++++++++++++-----
 acdcli/cache/db.py     |  13 +++-
 acdcli/cache/query.py  |  34 ++++++++++
 acdcli/cache/schema.py |   9 +++
 acdcli/cache/sync.py   |  32 +++++++++
 6 files changed, 231 insertions(+), 34 deletions(-)

diff --git a/acd_cli.py b/acd_cli.py
index 52f4b4b..74e224e 100755
--- a/acd_cli.py
+++ b/acd_cli.py
@@ -1,35 +1,33 @@
 #!/usr/bin/env python3
-import sys
-import os
-import json
 import argparse
+import json
 import logging
 import logging.handlers
+import os
+import re
 import signal
+import sys
 import time
-import re
-import appdirs
-
 from collections import namedtuple
 from configparser import ConfigParser
 from functools import partial
 from multiprocessing import Event
-
 from pkgutil import walk_packages
+
+import appdirs
 from pkg_resources import iter_entry_points
 
 import acdcli
+from acdcli import plugins
 from acdcli.api import client
 from acdcli.api.common import RequestError, is_valid_id
 from acdcli.cache import format, db
+from acdcli.cache.db import CacheConsts
 from acdcli.utils import hashing, progress
 from acdcli.utils.conf import get_conf
 from acdcli.utils.threading import QueuedLoader
 from acdcli.utils.time import *
 
-# load local plugin modules (default ones, for developers)
-from acdcli import plugins
-
 for importer, modname, ispkg in walk_packages(path=plugins.__path__, prefix=plugins.__name__ + '.',
                                               onerror=lambda x: None):
     if not ispkg:
@@ -120,11 +118,13 @@ def pprint(d: dict):
 # Glue functions (API, cache)
 #
 
-
-class CacheConsts(object):
-    CHECKPOINT_KEY = 'checkpoint'
-    LAST_SYNC_KEY = 'last_sync'
-    MAX_AGE = 30
+def sync_owner_id():
+    global cache
+    owner_id = cache.KeyValueStorage.get(CacheConsts.OWNER_ID)
+    if not owner_id:
+        owner_id = acd_client.get_owner_id()
+        cache.KeyValueStorage[CacheConsts.OWNER_ID] = owner_id
+    return owner_id
 
 
 def sync_node_list(full=False, to_file=None, from_file=None) -> 'Union[int, None]':
@@ -185,12 +185,14 @@ def sync_node_list(full=False, to_file=None, from_file=None) -> 'Union[int, None
             print()
         if to_file:
             out.close()
+    sync_owner_id()
 
 
 def old_sync() -> 'Union[int, None]':
     global cache
     cache.drop_all()
     cache = db.NodeCache(CACHE_PATH)
+    sync_owner_id()
     try:
         folders = acd_client.get_folder_list()
         folders.extend(acd_client.get_trashed_folders())
diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 68e026c..24465b9 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -2,6 +2,7 @@
 
 import configparser
 import errno
+import json
 import logging
 import os
 import stat
@@ -14,6 +15,10 @@
 from time import time, sleep
 
 import ctypes.util
+import binascii
+
+from acdcli.cache.db import CacheConsts
+
 ctypes.util.__find_library = ctypes.util.find_library
 
 def find_library(*args):
@@ -43,6 +48,7 @@ def find_library(*args):
     errno.EREMOTEIO = errno.EIO
 
 _SETTINGS_FILENAME = 'fuse.ini'
+_XATTR_PROPERTY_NAME = 'xattrs'
 
 _def_conf = configparser.ConfigParser()
 _def_conf['read'] = dict(open_chunk_limit=10, timeout=5)
@@ -211,7 +217,7 @@ class WriteStream(object):
         """A WriteStream is a binary file-like object that is backed by a Queue.
         It will remember its current offset."""
 
-        __slots__ = ('q', 'offset', 'error', 'closed', 'done', 'timeout')
+        __slots__ = ('q', 'offset', 'error', 'closed', 'done', 'timeout', 'lock')
 
         def __init__(self, buffer_size, timeout):
             self.q = Queue(maxsize=buffer_size)
@@ -224,6 +230,8 @@ def __init__(self, buffer_size, timeout):
             self.done = Event()
             """done event is triggered when file is successfully read and transferred"""
             self.timeout = timeout
+            self.lock = Lock()
+            """make sure only one writer is appending to the queue at once"""
 
         def write(self, data: bytes):
             """Writes data into queue.
@@ -309,35 +317,36 @@ def write(self, node_id, fh, offset, bytes_):
 
         :raises: FuseOSError: wrong offset or writing failed"""
 
-        f = self.files[fh]
+        f = self.files[node_id]
 
-        if f.offset == offset:
-            f.write(bytes_)
-        else:
-            f.error = True  # necessary?
-            logger.error('Wrong offset for writing to fh %s.' % fh)
-            raise FuseOSError(errno.ESPIPE)
+        with f.lock:
+            if f.offset == offset:
+                f.write(bytes_)
+            else:
+                f.error = True  # necessary?
+                logger.error('Wrong offset for writing to fh %s.' % fh)
+                raise FuseOSError(errno.ESPIPE)
 
         if offset == 0:
             t = Thread(target=self.write_n_sync, args=(f, node_id))
             t.daemon = True
             t.start()
 
-    def flush(self, fh):
-        f = self.files.get(fh)
+    def flush(self, node_id, fh):
+        f = self.files.get(node_id)
         if f:
             f.flush()
 
-    def release(self, fh):
+    def release(self, node_id, fh):
         """:raises: FuseOSError"""
-        f = self.files.get(fh)
+        f = self.files.get(node_id)
         if f:
             try:
                 f.close()
             except:
                 raise
             finally:
-                del self.files[fh]
+                del self.files[node_id]
 
 
 class LoggingMixIn(object):
@@ -377,15 +386,20 @@ def __init__(self, **kwargs):
 
         :param kwargs: cache (NodeCache), acd_client (ACDClient), autosync (partial)"""
 
+        self.xattr_cache = {}
+        self.xattr_dirty = set()
+        self.xattr_cache_lock = Lock()
+
         self.cache = kwargs['cache']
         self.acd_client = kwargs['acd_client']
+        self.acd_client_owner = self.cache.KeyValueStorage.get(CacheConsts.OWNER_ID)
         autosync = kwargs['autosync']
         conf = kwargs['conf']
 
-        self.rp = ReadProxy(self.acd_client, 
+        self.rp = ReadProxy(self.acd_client,
                             conf.getint('read', 'open_chunk_limit'), conf.getint('read', 'timeout'))
         """collection of files opened for reading"""
-        self.wp = WriteProxy(self.acd_client, self.cache, 
+        self.wp = WriteProxy(self.acd_client, self.cache,
                              conf.getint('write', 'buffer_size'), conf.getint('write', 'timeout'))
         """collection of files opened for writing"""
         try:
@@ -403,6 +417,8 @@ def __init__(self, **kwargs):
         """file handle counter\n\n :type: int"""
         self.handles = {}
         """map fh->node\n\n :type: dict"""
+        self.node_to_fh = defaultdict(lambda: set())
+        """map node_id to list of interested file handles"""
         self.fh_lock = Lock()
         """lock for fh counter increment and handle dict writes"""
         self.nlinks = kwargs.get('nlinks', False)
@@ -415,6 +431,7 @@ def __init__(self, **kwargs):
         p.start()
 
     def destroy(self, path):
+        self._xattr_write_and_sync()
         self.destroyed.set()
 
     def readdir(self, path, fh) -> 'List[str]':
@@ -455,6 +472,87 @@ def getattr(self, path, fh=None) -> dict:
                         st_size=node.size,
                         **times)
 
+    # def listxattr(self, path):
+    #     node_id = self.cache.resolve_id(path)
+    #     if not node_id:
+    #         raise FuseOSError(errno.ENOENT)
+    #     self._xattr_load(node_id)
+    #
+    #     with self.xattr_cache_lock:
+    #         try:
+    #             return [k for k, v in self.xattr_cache[node_id].items()]
+    #         except:
+    #             return []
+
+    def getxattr(self, path, name, position=0):
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
+            raise FuseOSError(errno.ENOENT)
+        self._xattr_load(node_id)
+
+        with self.xattr_cache_lock:
+            try:
+                ret = self.xattr_cache[node_id][name]
+                if ret:
+                    return ret
+            except:
+                raise FuseOSError(errno.ENODATA)  # should be ENOATTR
+            else:
+                raise FuseOSError(errno.ENODATA)  # should be ENOATTR
+
+    # def removexattr(self, path, name):
+    #     node_id = self.cache.resolve_id(path)
+    #     if not node_id:
+    #         raise FuseOSError(errno.ENOENT)
+    #     self._xattr_load(node_id)
+    #
+    #     with self.xattr_cache_lock:
+    #         try:
+    #             if self.xattr_cache[node_id][name]:
+    #                 del self.xattr_cache[node_id][name]
+    #                 self.properties_dirty.add(node_id)
+    #         except:
+    #             raise FuseOSError(errno.ENODATA)  # should be ENOATTR
+
+    def setxattr(self, path, name, value, options, position=0):
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
+            raise FuseOSError(errno.ENOENT)
+        self._xattr_load(node_id)
+
+        with self.xattr_cache_lock:
+            try:
+                self.xattr_cache[node_id][name] = value
+                self.xattr_dirty.add(node_id)
+            except:
+                raise FuseOSError(errno.ENOTSUP)
+
+    def _xattr_load(self, node_id):
+        with self.xattr_cache_lock:
+            if node_id not in self.xattr_cache:
+                xattrs_str = self.cache.get_property(node_id, self.acd_client_owner, _XATTR_PROPERTY_NAME)
+                try: self.xattr_cache[node_id] = json.loads(xattrs_str)
+                except: self.xattr_cache[node_id] = {}
+                for k, v in self.xattr_cache[node_id].items():
+                    self.xattr_cache[node_id][k] = binascii.a2b_base64(v)
+
+    def _xattr_write_and_sync(self):
+        with self.xattr_cache_lock:
+            for node_id in self.xattr_dirty:
+                try:
+                    xattrs = {}
+                    for k, v in self.xattr_cache[node_id].items():
+                        xattrs[k] = binascii.b2a_base64(v).decode("utf-8")
+                    xattrs_str = json.dumps(xattrs)
+
+                    self.acd_client.add_property(node_id, self.acd_client_owner, _XATTR_PROPERTY_NAME,
+                                                 xattrs_str)
+                except (RequestError, IOError) as e:
+                    logger.error('Error writing node xattrs "%s". %s' % (node_id, str(e)))
+                else:
+                    self.cache.insert_property(node_id, self.acd_client_owner, _XATTR_PROPERTY_NAME, xattrs_str)
+            self.xattr_dirty.clear()
+
     def read(self, path, length, offset, fh) -> bytes:
         """Read ```length`` bytes from ``path`` at ``offset``."""
 
@@ -550,6 +648,7 @@ def create(self, path, mode) -> int:
         with self.fh_lock:
             self.fh += 1
             self.handles[self.fh] = node
+            self.node_to_fh[node.id].add(self.fh)
         return self.fh
 
     def rename(self, old, new):
@@ -618,6 +717,7 @@ def open(self, path, flags) -> int:
         with self.fh_lock:
             self.fh += 1
             self.handles[self.fh] = node
+            self.node_to_fh[node.id].add(self.fh)
         return self.fh
 
     def write(self, path, data, offset, fh) -> int:
@@ -631,7 +731,8 @@ def write(self, path, data, offset, fh) -> int:
 
     def flush(self, path, fh):
         """Flushes ``fh`` in WriteProxy."""
-        self.wp.flush(fh)
+        node_id = self.handles[fh].id
+        self.wp.flush(node_id, fh)
 
     def truncate(self, path, length, fh=None):
         """Pseudo-truncates a file, i.e. clears content if ``length``==0 or does nothing
@@ -666,8 +767,18 @@ def release(self, path, fh):
             node = self.cache.resolve(path, trash=False)
         if node:
             self.rp.release(node.id)
-            self.wp.release(fh)
             with self.fh_lock:
+                """release the writer if there's no more interest. This allows many file
+                handles to write to a single node provided they do it in order, enabling
+                sequential writes using mmap.
+                """
+                interest = self.node_to_fh.get(node.id)
+                if interest:
+                    interest.discard(fh)
+                if not interest:
+                    self.wp.release(node.id, fh)
+                    self._xattr_write_and_sync()
+                    del self.node_to_fh[node.id]
                 del self.handles[fh]
         else:
             raise FuseOSError(errno.ENOENT)
diff --git a/acdcli/cache/db.py b/acdcli/cache/db.py
index 994a925..6ac66f2 100644
--- a/acdcli/cache/db.py
+++ b/acdcli/cache/db.py
@@ -3,7 +3,7 @@
 import os
 import re
 import sqlite3
-from threading import local
+from threading import local, Lock
 
 from acdcli.utils.conf import get_conf
 
@@ -24,7 +24,11 @@
 _def_conf['sqlite'] = dict(filename='nodes.db', busy_timeout=30000, journal_mode='wal')
 _def_conf['blacklist'] = dict(folders= [])
 
-
+class CacheConsts(object):
+    CHECKPOINT_KEY = 'checkpoint'
+    LAST_SYNC_KEY = 'last_sync'
+    OWNER_ID = 'owner_id'
+    MAX_AGE = 30
 
 class IntegrityError(Exception):
     def __init__(self, msg):
@@ -61,6 +65,11 @@ def __init__(self, cache_path: str='', settings_path='', check=IntegrityCheckTyp
 
         self._conn.create_function('REGEXP', _regex_match.__code__.co_argcount, _regex_match)
 
+        self.path_to_node_id = {}
+        self.path_to_node_id_lock = Lock()
+        """There are a huge number of repeated path lookups,
+        so cache results and invalidate on new nodes."""
+
         with cursor(self._conn) as c:
             c.execute(_ROOT_ID_SQL)
             row = c.fetchone()
diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index 44ea869..04a0d6a 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -45,6 +45,8 @@ def datetime_from_string(dt: str) -> datetime:
 NODE_BY_ID_SQL = """SELECT n.*, f.* FROM nodes n LEFT OUTER JOIN files f ON n.id = f.id
                     WHERE n.id = (?)"""
 
+PROPERTY_BY_ID_SQL = """SELECT * FROM properties WHERE id=? AND owner=? AND key=?"""
+
 USAGE_SQL = 'SELECT SUM(size) FROM files'
 
 FIND_BY_NAME_SQL = """SELECT n.*, f.* FROM nodes n
@@ -151,7 +153,31 @@ def get_conflicting_node(self, name: str, parent_id: str):
             if n.is_available and n.name.lower() == name.lower():
                 return n
 
+    def resolve_id(self, path: str, trash=False) -> 'Union[str|None]':
+        """Gets a node's id from a path
+        This is far faster than the below method if the id is cached;
+        there are zero sqlite queries."""
+        with self.path_to_node_id_lock:
+            try: return self.path_to_node_id[path]
+            except: pass
+            n = self._resolve(path, trash)
+            if n:
+                self.path_to_node_id[path] = n.id
+                return n.id
+            return None
+
     def resolve(self, path: str, trash=False) -> 'Union[Node|None]':
+        """Gets a node from a path"""
+        with self.path_to_node_id_lock:
+            try: return self.get_node(self.path_to_node_id[path])
+            except: pass
+            n = self._resolve(path,trash)
+            if n:
+                self.path_to_node_id[path] = n.id
+                return n
+            return None
+
+    def _resolve(self, path: str, trash=False) -> 'Union[Node|None]':
         segments = list(filter(bool, path.split('/')))
         if not segments:
             if not self.root_id:
@@ -312,3 +338,11 @@ def file_size_exists(self, size) -> bool:
             no = c.fetchone()[0]
 
         return bool(no)
+
+    def get_property(self, node_id, owner_id, key) -> 'Union[str|None]':
+        with cursor(self._conn) as c:
+            c.execute(PROPERTY_BY_ID_SQL, [node_id, owner_id, key])
+            r = c.fetchone()
+            if r:
+                return r['value']
+        return None
diff --git a/acdcli/cache/schema.py b/acdcli/cache/schema.py
index d5e138b..9939af1 100644
--- a/acdcli/cache/schema.py
+++ b/acdcli/cache/schema.py
@@ -28,6 +28,15 @@
         CHECK (status IN ('AVAILABLE', 'TRASH', 'PURGED', 'PENDING'))
     );
 
+    CREATE TABLE properties (
+        id VARCHAR(50) NOT NULL,
+        owner TEXT NOT NULL,
+        key TEXT NOT NULL,
+        value TEXT,
+        PRIMARY KEY (id),
+        FOREIGN KEY(id) REFERENCES nodes (id)
+    );
+
     CREATE TABLE labels (
         id VARCHAR(50) NOT NULL,
         name VARCHAR(256) NOT NULL,
diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py
index d6dbe80..1ff4672 100644
--- a/acdcli/cache/sync.py
+++ b/acdcli/cache/sync.py
@@ -42,12 +42,16 @@ def remove_purged(self, purged: list):
                 c.execute('DELETE FROM files WHERE id IN %s' % placeholders(slice_), slice_)
                 c.execute('DELETE FROM parentage WHERE parent IN %s' % placeholders(slice_), slice_)
                 c.execute('DELETE FROM parentage WHERE child IN %s' % placeholders(slice_), slice_)
+                c.execute('DELETE FROM properties WHERE id IN %s' % placeholders(slice_), slice_)
                 c.execute('DELETE FROM labels WHERE id IN %s' % placeholders(slice_), slice_)
 
         logger.info('Purged %i node(s).' % len(purged))
 
     def insert_nodes(self, nodes: list, partial=True):
         """Inserts mixed list of files and folders into cache."""
+        with self.path_to_node_id_lock:
+            self.path_to_node_id.clear()
+
         files = []
         folders = []
         for node in nodes:
@@ -72,6 +76,7 @@ def insert_nodes(self, nodes: list, partial=True):
         self.insert_files(files)
 
         self.insert_parentage(files + folders, partial)
+        self.insert_properties(files + folders)
 
     def insert_node(self, node: dict):
         """Inserts single file or folder into cache."""
@@ -143,3 +148,30 @@ def insert_parentage(self, nodes: list, partial=True):
                     c.execute('INSERT OR IGNORE INTO parentage VALUES (?, ?)', [p, n['id']])
 
         logger.info('Parented %d node(s).' % len(nodes))
+
+    def insert_properties(self, nodes: list):
+        if not nodes:
+            return
+
+        with mod_cursor(self._conn) as c:
+            for n in nodes:
+                if 'properties' not in n:
+                    continue
+                id = n['id']
+                for owner_id, key_value in n['properties'].items():
+                    for key, value in key_value.items():
+                        c.execute('INSERT OR REPLACE INTO properties '
+                                  '(id, owner, key, value) '
+                                  'VALUES (?, ?, ?, ?)',
+                                  [id, owner_id, key, value]
+                                  )
+
+        logger.info('Applied properties to %d node(s).' % len(nodes))
+
+    def insert_property(self, node_id, owner_id, key, value):
+        with mod_cursor(self._conn) as c:
+            c.execute('INSERT OR REPLACE INTO properties '
+                      '(id, owner, key, value) '
+                      'VALUES (?, ?, ?, ?)',
+                      [node_id, owner_id, key, value]
+                      )
\ No newline at end of file

From 6a1ee48be9274adea11b1dd450dec0926c816e8c Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Mon, 8 Aug 2016 14:13:26 -0400
Subject: [PATCH 02/63] prevent binary blob spam in the logs when using
 set/getxattr

---
 acdcli/acd_fuse.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 24465b9..636f3ca 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -361,6 +361,8 @@ def __call__(self, op, path, *args):
             targs = (len(args[0]),) + args[1:]
         elif op == 'chmod':
             targs = (oct(args[0]),) + args[1:]
+        elif op == 'setxattr':
+            targs = (args[0], "binary")
 
         logger.debug('-> %s %s %s', op, path, repr(args if not targs else targs))
 
@@ -374,6 +376,8 @@ def __call__(self, op, path, *args):
         finally:
             if op == 'read':
                 ret = len(ret)
+            elif op == 'getxattr' and ret:
+                ret = "binary"
             logger.debug('<- %s %s', op, repr(ret))
 
 

From 15ac1e798687f954e3dbcfe2e70243ac92d50e34 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Wed, 10 Aug 2016 18:31:58 -0400
Subject: [PATCH 03/63] fix requirements

---
 requirements.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index d59ad89..e161b1b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,9 @@
+appdirs
+colorama
+fusepy
+python-dateutil
+requests
+requests_toolbelt
 # adds sphinx module for rtfd.org build process
 
 -e .

From db9f9b066edaf1888a2e0886c9aa26b589f1fd47 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Wed, 10 Aug 2016 18:41:46 -0400
Subject: [PATCH 04/63] implement proper mtime handling so rsync can work over
 acd_cli.

---
 acdcli/acd_fuse.py    | 89 +++++++++++++++++++++++++++++--------------
 acdcli/cache/query.py |  7 +++-
 2 files changed, 65 insertions(+), 31 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 636f3ca..f93ef95 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -6,6 +6,7 @@
 import logging
 import os
 import stat
+import struct
 import sys
 
 from collections import deque, defaultdict
@@ -49,6 +50,7 @@ def find_library(*args):
 
 _SETTINGS_FILENAME = 'fuse.ini'
 _XATTR_PROPERTY_NAME = 'xattrs'
+_XATTR_MTIME_OVERRIDE_NAME = 'fuse.mtime'
 
 _def_conf = configparser.ConfigParser()
 _def_conf['read'] = dict(open_chunk_limit=10, timeout=5)
@@ -462,8 +464,11 @@ def getattr(self, path, fh=None) -> dict:
         if not node:
             raise FuseOSError(errno.ENOENT)
 
+        try: mtime = self._getxattr_f(node.id, _XATTR_MTIME_OVERRIDE_NAME)
+        except: mtime = node.modified.timestamp()
+
         times = dict(st_atime=time(),
-                     st_mtime=node.modified.timestamp(),
+                     st_mtime=mtime,
                      st_ctime=node.created.timestamp())
 
         if node.is_folder:
@@ -476,24 +481,28 @@ def getattr(self, path, fh=None) -> dict:
                         st_size=node.size,
                         **times)
 
-    # def listxattr(self, path):
-    #     node_id = self.cache.resolve_id(path)
-    #     if not node_id:
-    #         raise FuseOSError(errno.ENOENT)
-    #     self._xattr_load(node_id)
-    #
-    #     with self.xattr_cache_lock:
-    #         try:
-    #             return [k for k, v in self.xattr_cache[node_id].items()]
-    #         except:
-    #             return []
+    def listxattr(self, path):
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
+            raise FuseOSError(errno.ENOENT)
+        return self._listxattr(node_id)
+
+    def _listxattr(self, node_id):
+        self._xattr_load(node_id)
+        with self.xattr_cache_lock:
+            try:
+                return [k for k, v in self.xattr_cache[node_id].items()]
+            except:
+                return []
 
     def getxattr(self, path, name, position=0):
         node_id = self.cache.resolve_id(path)
         if not node_id:
             raise FuseOSError(errno.ENOENT)
-        self._xattr_load(node_id)
+        return self._getxattr(node_id, name)
 
+    def _getxattr(self, node_id, name):
+        self._xattr_load(node_id)
         with self.xattr_cache_lock:
             try:
                 ret = self.xattr_cache[node_id][name]
@@ -504,26 +513,30 @@ def getxattr(self, path, name, position=0):
             else:
                 raise FuseOSError(errno.ENODATA)  # should be ENOATTR
 
-    # def removexattr(self, path, name):
-    #     node_id = self.cache.resolve_id(path)
-    #     if not node_id:
-    #         raise FuseOSError(errno.ENOENT)
-    #     self._xattr_load(node_id)
-    #
-    #     with self.xattr_cache_lock:
-    #         try:
-    #             if self.xattr_cache[node_id][name]:
-    #                 del self.xattr_cache[node_id][name]
-    #                 self.properties_dirty.add(node_id)
-    #         except:
-    #             raise FuseOSError(errno.ENODATA)  # should be ENOATTR
+    def _getxattr_f(self, node_id, name):
+        return struct.unpack('d', self._getxattr(node_id, name))[0]
 
-    def setxattr(self, path, name, value, options, position=0):
+    def removexattr(self, path, name):
         node_id = self.cache.resolve_id(path)
         if not node_id:
             raise FuseOSError(errno.ENOENT)
+        self._removexattr(node_id, name)
+
+    def _removexattr(self, node_id, name):
         self._xattr_load(node_id)
+        with self.xattr_cache_lock:
+            if name in self.xattr_cache[node_id]:
+                del self.xattr_cache[node_id][name]
+                self.properties_dirty.add(node_id)
 
+    def setxattr(self, path, name, value, options, position=0):
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
+            raise FuseOSError(errno.ENOENT)
+        self._setxattr(node_id, name, value)
+
+    def _setxattr(self, node_id, name, value):
+        self._xattr_load(node_id)
         with self.xattr_cache_lock:
             try:
                 self.xattr_cache[node_id][name] = value
@@ -531,6 +544,9 @@ def setxattr(self, path, name, value, options, position=0):
             except:
                 raise FuseOSError(errno.ENOTSUP)
 
+    def _setxattr_f(self, node_id, name, value: float):
+        self._setxattr(node_id, name, struct.pack('d', value))
+
     def _xattr_load(self, node_id):
         with self.xattr_cache_lock:
             if node_id not in self.xattr_cache:
@@ -731,6 +747,8 @@ def write(self, path, data, offset, fh) -> int:
 
         node_id = self.handles[fh].id
         self.wp.write(node_id, fh, offset, data)
+        """on a write, we can use amazon's modified time"""
+        self._removexattr(node_id, _XATTR_MTIME_OVERRIDE_NAME)
         return len(data)
 
     def flush(self, path, fh):
@@ -788,11 +806,16 @@ def release(self, path, fh):
             raise FuseOSError(errno.ENOENT)
 
     def utimens(self, path, times=None):
-        """Not functional. Should set node atime and mtime to values as passed in ``times``
-        or current time (see :manpage:`utimesat(2)`).
+        """Should set node atime and mtime to values as passed in ``times``
+        or current time (see :manpage:`utimensat(2)`).
+        Note that this is only implemented for modified time.
 
         :param times: [atime, mtime]"""
 
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
+            raise FuseOSError(errno.ENOENT)
+
         if times:
             # atime = times[0]
             mtime = times[1]
@@ -800,6 +823,14 @@ def utimens(self, path, times=None):
             # atime = time()
             mtime = time()
 
+        try:
+            self._setxattr_f(node_id, _XATTR_MTIME_OVERRIDE_NAME, mtime)
+            self._xattr_write_and_sync()
+        except:
+            raise FuseOSError(errno.ENOTSUP)
+
+        return 0
+
     def chmod(self, path, mode):
         """Not implemented."""
         pass
diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index 04a0d6a..842189e 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -1,5 +1,8 @@
 import logging
 from datetime import datetime
+
+from dateutil.tz import tzutc
+
 from .cursors import cursor
 
 logger = logging.getLogger(__name__)
@@ -7,9 +10,9 @@
 
 def datetime_from_string(dt: str) -> datetime:
     try:
-        dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S.%f+00:00')
+        dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S.%f+00:00').replace(tzinfo=tzutc())
     except ValueError:
-        dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S+00:00')
+        dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S+00:00').replace(tzinfo=tzutc())
     return dt
 
 

From 076dec0542bf8dca08f43325832878c9c7de5d44 Mon Sep 17 00:00:00 2001
From: "benjamin.gemmill@gmail.com" <rikx38xr>
Date: Thu, 11 Aug 2016 18:32:08 -0400
Subject: [PATCH 05/63] bugfix: turns out reinterpret casting floats to bytes
 and back via structs leads to epsilon problems, causing rsync to think that
 mtime is different when it isn't.

---
 acdcli/acd_fuse.py | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index f93ef95..0013045 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -464,7 +464,7 @@ def getattr(self, path, fh=None) -> dict:
         if not node:
             raise FuseOSError(errno.ENOENT)
 
-        try: mtime = self._getxattr_f(node.id, _XATTR_MTIME_OVERRIDE_NAME)
+        try: mtime = self._getxattr(node.id, _XATTR_MTIME_OVERRIDE_NAME)
         except: mtime = node.modified.timestamp()
 
         times = dict(st_atime=time(),
@@ -499,7 +499,7 @@ def getxattr(self, path, name, position=0):
         node_id = self.cache.resolve_id(path)
         if not node_id:
             raise FuseOSError(errno.ENOENT)
-        return self._getxattr(node_id, name)
+        return self._getxattr_bytes(node_id, name)
 
     def _getxattr(self, node_id, name):
         self._xattr_load(node_id)
@@ -513,8 +513,8 @@ def _getxattr(self, node_id, name):
             else:
                 raise FuseOSError(errno.ENODATA)  # should be ENOATTR
 
-    def _getxattr_f(self, node_id, name):
-        return struct.unpack('d', self._getxattr(node_id, name))[0]
+    def _getxattr_bytes(self, node_id, name):
+        return binascii.a2b_base64(self._getxattr(node_id, name))
 
     def removexattr(self, path, name):
         node_id = self.cache.resolve_id(path)
@@ -533,7 +533,7 @@ def setxattr(self, path, name, value, options, position=0):
         node_id = self.cache.resolve_id(path)
         if not node_id:
             raise FuseOSError(errno.ENOENT)
-        self._setxattr(node_id, name, value)
+        self._setxattr_bytes(node_id, name, value)
 
     def _setxattr(self, node_id, name, value):
         self._xattr_load(node_id)
@@ -544,8 +544,8 @@ def _setxattr(self, node_id, name, value):
             except:
                 raise FuseOSError(errno.ENOTSUP)
 
-    def _setxattr_f(self, node_id, name, value: float):
-        self._setxattr(node_id, name, struct.pack('d', value))
+    def _setxattr_bytes(self, node_id, name, value: bytes):
+        self._setxattr(node_id, name, binascii.b2a_base64(value).decode("utf-8"))
 
     def _xattr_load(self, node_id):
         with self.xattr_cache_lock:
@@ -553,24 +553,19 @@ def _xattr_load(self, node_id):
                 xattrs_str = self.cache.get_property(node_id, self.acd_client_owner, _XATTR_PROPERTY_NAME)
                 try: self.xattr_cache[node_id] = json.loads(xattrs_str)
                 except: self.xattr_cache[node_id] = {}
-                for k, v in self.xattr_cache[node_id].items():
-                    self.xattr_cache[node_id][k] = binascii.a2b_base64(v)
 
     def _xattr_write_and_sync(self):
         with self.xattr_cache_lock:
             for node_id in self.xattr_dirty:
                 try:
-                    xattrs = {}
-                    for k, v in self.xattr_cache[node_id].items():
-                        xattrs[k] = binascii.b2a_base64(v).decode("utf-8")
-                    xattrs_str = json.dumps(xattrs)
-
+                    xattrs_str = json.dumps(self.xattr_cache[node_id])
                     self.acd_client.add_property(node_id, self.acd_client_owner, _XATTR_PROPERTY_NAME,
                                                  xattrs_str)
                 except (RequestError, IOError) as e:
                     logger.error('Error writing node xattrs "%s". %s' % (node_id, str(e)))
                 else:
                     self.cache.insert_property(node_id, self.acd_client_owner, _XATTR_PROPERTY_NAME, xattrs_str)
+                    logger.debug('_xattr_write_and_sync: node: %s xattrs: %s: ' % (node_id, xattrs_str))
             self.xattr_dirty.clear()
 
     def read(self, path, length, offset, fh) -> bytes:
@@ -747,8 +742,6 @@ def write(self, path, data, offset, fh) -> int:
 
         node_id = self.handles[fh].id
         self.wp.write(node_id, fh, offset, data)
-        """on a write, we can use amazon's modified time"""
-        self._removexattr(node_id, _XATTR_MTIME_OVERRIDE_NAME)
         return len(data)
 
     def flush(self, path, fh):
@@ -824,7 +817,7 @@ def utimens(self, path, times=None):
             mtime = time()
 
         try:
-            self._setxattr_f(node_id, _XATTR_MTIME_OVERRIDE_NAME, mtime)
+            self._setxattr(node_id, _XATTR_MTIME_OVERRIDE_NAME, mtime)
             self._xattr_write_and_sync()
         except:
             raise FuseOSError(errno.ENOTSUP)

From 9184a803668ea56e44007606d27ad40658ee9cb7 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Sat, 13 Aug 2016 15:53:28 -0400
Subject: [PATCH 06/63] workaround: due to ecryptfs' bug, reporting incorrect
 file sizes when using xattrs for crypto headers, we have to allow re-writing
 the first bytes of a file to make ecryptfs happy. once they fix their bug,
 this can be removed and we can go back to xattrs.

---
 acdcli/acd_fuse.py | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 0013045..b15673c 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -51,6 +51,7 @@ def find_library(*args):
 _SETTINGS_FILENAME = 'fuse.ini'
 _XATTR_PROPERTY_NAME = 'xattrs'
 _XATTR_MTIME_OVERRIDE_NAME = 'fuse.mtime'
+_XATTR_HEADER_OVERRIDE_NAME = 'fuse.header'
 
 _def_conf = configparser.ConfigParser()
 _def_conf['read'] = dict(open_chunk_limit=10, timeout=5)
@@ -363,8 +364,6 @@ def __call__(self, op, path, *args):
             targs = (len(args[0]),) + args[1:]
         elif op == 'chmod':
             targs = (oct(args[0]),) + args[1:]
-        elif op == 'setxattr':
-            targs = (args[0], "binary")
 
         logger.debug('-> %s %s %s', op, path, repr(args if not targs else targs))
 
@@ -378,8 +377,6 @@ def __call__(self, op, path, *args):
         finally:
             if op == 'read':
                 ret = len(ret)
-            elif op == 'getxattr' and ret:
-                ret = "binary"
             logger.debug('<- %s %s', op, repr(ret))
 
 
@@ -584,7 +581,17 @@ def read(self, path, length, offset, fh) -> bytes:
         if node.size < offset + length:
             length = node.size - offset
 
-        return self.rp.get(node.id, offset, length, node.size)
+        ret = self.rp.get(node.id, offset, length, node.size)
+
+        """Check if we're overwriting the file's header, and splice that into the read bytes"""
+        try:
+            header = self._getxattr_bytes(node.id, _XATTR_HEADER_OVERRIDE_NAME)
+            if offset < len(header):
+                header = header[offset:]
+                ret = header + ret[len(header):]
+        except:
+            pass
+        return ret
 
     def statfs(self, path) -> dict:
         """Gets some filesystem statistics as specified in :manpage:`stat(2)`."""
@@ -741,6 +748,21 @@ def write(self, path, data, offset, fh) -> int:
         :returns: number of bytes written"""
 
         node_id = self.handles[fh].id
+
+        """Allow overwriting a file's header. This is useful to support encrypted
+        filesystems that leave a header at the start of each file, and write to
+        it while writing to the body.."""
+        f = self.wp.files[node_id]
+        with f.lock:
+            if f.offset > 0 and offset == 0:
+                """sanity check that all headers must be the same size,
+                or we could end up overwriting the file in an xattr"""
+                try: header_sz = len(self._getxattr_bytes(node_id, _XATTR_HEADER_OVERRIDE_NAME))
+                except: header_sz = len(data)
+                if header_sz == len(data):
+                    self._setxattr_bytes(node_id, _XATTR_HEADER_OVERRIDE_NAME, data)
+                    return len(data)
+
         self.wp.write(node_id, fh, offset, data)
         return len(data)
 

From 45ed3fa617853b1fa430abf4578a309b55ad579e Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Mon, 15 Aug 2016 14:41:53 -0400
Subject: [PATCH 07/63] bugfix: who's sick of index errors when writing? this
 guy. turns out there are some rsync flags that write multiple times to the
 same memory location, for reasons unknown. this keeps the whole file in a
 buffer until it's flushed to amazon on file handle closed. future work will
 be for super large files, we should use a temp file as backing.

---
 acdcli/acd_fuse.py | 88 ++++++++++++++++++++++++----------------------
 1 file changed, 46 insertions(+), 42 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index b15673c..507ff8d 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -51,7 +51,6 @@ def find_library(*args):
 _SETTINGS_FILENAME = 'fuse.ini'
 _XATTR_PROPERTY_NAME = 'xattrs'
 _XATTR_MTIME_OVERRIDE_NAME = 'fuse.mtime'
-_XATTR_HEADER_OVERRIDE_NAME = 'fuse.header'
 
 _def_conf = configparser.ConfigParser()
 _def_conf['read'] = dict(open_chunk_limit=10, timeout=5)
@@ -215,6 +214,37 @@ def __init__(self, acd_client, cache, buffer_size, timeout):
         self.acd_client = acd_client
         self.cache = cache
         self.files = defaultdict(lambda: WriteProxy.WriteStream(buffer_size, timeout))
+        self.buffers = defaultdict(lambda: WriteProxy.WriteBuffer())
+
+    class WriteBuffer(object):
+        """An in-memory segment of a file. This gets pushed out to amazon via a WriteStream on
+        flush() calls. Anything that hasn't been flushed yet can be rewritten in place any
+        number of times."""
+
+        def __init__(self):
+            self.b = bytearray()
+            """The memory backing"""
+            self.lock = Lock()
+
+        def write(self, offset, bytes_: bytes):
+            """Writes to the buffer and returns the old buffer length"""
+            with self.lock:
+                old_len = len(self.b)
+                if offset > old_len:
+                    logger.error('Wrong offset for writing to buffer; writing gap detected')
+                    raise FuseOSError(errno.ESPIPE)
+                self.b[offset:offset + len(bytes_)] = bytes_
+                return old_len
+
+        def flush(self) -> bytes:
+            with self.lock:
+                ret = self.b
+                self.b = bytearray()
+                return ret
+
+        def __len__(self):
+            with self.lock:
+                return len(self.b)
 
     class WriteStream(object):
         """A WriteStream is a binary file-like object that is backed by a Queue.
@@ -320,28 +350,26 @@ def write(self, node_id, fh, offset, bytes_):
 
         :raises: FuseOSError: wrong offset or writing failed"""
 
+        b = self.buffers[node_id]
         f = self.files[node_id]
 
-        with f.lock:
-            if f.offset == offset:
-                f.write(bytes_)
-            else:
-                f.error = True  # necessary?
-                logger.error('Wrong offset for writing to fh %s.' % fh)
-                raise FuseOSError(errno.ESPIPE)
-
-        if offset == 0:
+        if b.write(offset, bytes_) == 0:
             t = Thread(target=self.write_n_sync, args=(f, node_id))
             t.daemon = True
             t.start()
 
-    def flush(self, node_id, fh):
+    def _flush(self, node_id, fh):
         f = self.files.get(node_id)
-        if f:
-            f.flush()
+        b = self.buffers.get(node_id)
+        if f and b:
+            if len(b):
+                data = b.flush()
+                with f.lock:
+                    f.write(data)
 
     def release(self, node_id, fh):
         """:raises: FuseOSError"""
+        self._flush(node_id, fh)
         f = self.files.get(node_id)
         if f:
             try:
@@ -350,6 +378,7 @@ def release(self, node_id, fh):
                 raise
             finally:
                 del self.files[node_id]
+                del self.buffers[node_id]
 
 
 class LoggingMixIn(object):
@@ -581,17 +610,7 @@ def read(self, path, length, offset, fh) -> bytes:
         if node.size < offset + length:
             length = node.size - offset
 
-        ret = self.rp.get(node.id, offset, length, node.size)
-
-        """Check if we're overwriting the file's header, and splice that into the read bytes"""
-        try:
-            header = self._getxattr_bytes(node.id, _XATTR_HEADER_OVERRIDE_NAME)
-            if offset < len(header):
-                header = header[offset:]
-                ret = header + ret[len(header):]
-        except:
-            pass
-        return ret
+        return self.rp.get(node.id, offset, length, node.size)
 
     def statfs(self, path) -> dict:
         """Gets some filesystem statistics as specified in :manpage:`stat(2)`."""
@@ -748,28 +767,13 @@ def write(self, path, data, offset, fh) -> int:
         :returns: number of bytes written"""
 
         node_id = self.handles[fh].id
-
-        """Allow overwriting a file's header. This is useful to support encrypted
-        filesystems that leave a header at the start of each file, and write to
-        it while writing to the body.."""
-        f = self.wp.files[node_id]
-        with f.lock:
-            if f.offset > 0 and offset == 0:
-                """sanity check that all headers must be the same size,
-                or we could end up overwriting the file in an xattr"""
-                try: header_sz = len(self._getxattr_bytes(node_id, _XATTR_HEADER_OVERRIDE_NAME))
-                except: header_sz = len(data)
-                if header_sz == len(data):
-                    self._setxattr_bytes(node_id, _XATTR_HEADER_OVERRIDE_NAME, data)
-                    return len(data)
-
         self.wp.write(node_id, fh, offset, data)
         return len(data)
 
     def flush(self, path, fh):
-        """Flushes ``fh`` in WriteProxy."""
-        node_id = self.handles[fh].id
-        self.wp.flush(node_id, fh)
+        """noop since we need to keep the whole buffer in memory;
+        acd only supports sequentual writes otherwise"""
+        pass
 
     def truncate(self, path, length, fh=None):
         """Pseudo-truncates a file, i.e. clears content if ``length``==0 or does nothing

From 6e30a93934307e8df2eb466e691b438645d09a47 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Mon, 15 Aug 2016 18:00:35 -0400
Subject: [PATCH 08/63] implement truncate and ftruncate since rsync wants
 these too sometimes.

---
 acdcli/acd_fuse.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 507ff8d..18f2623 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -6,7 +6,6 @@
 import logging
 import os
 import stat
-import struct
 import sys
 
 from collections import deque, defaultdict
@@ -236,6 +235,13 @@ def write(self, offset, bytes_: bytes):
                 self.b[offset:offset + len(bytes_)] = bytes_
                 return old_len
 
+        def truncate(self, length):
+            with self.lock:
+                if len(self.b) < length:
+                    self.b = self.b.ljust(length, '\0')
+                else:
+                    self.b = self.b[:length]
+
         def flush(self) -> bytes:
             with self.lock:
                 ret = self.b
@@ -358,6 +364,15 @@ def write(self, node_id, fh, offset, bytes_):
             t.daemon = True
             t.start()
 
+    def truncate(self, node_id, fh, length):
+        """truncates a buffer if it exists to a given length and returns true.
+        If not, does nothing (we don't preallocate) and returns false"""
+        b = self.buffers.get(node_id)
+        if b:
+            b.trunate(length)
+            return True
+        return False
+
     def _flush(self, node_id, fh):
         f = self.files.get(node_id)
         b = self.buffers.get(node_id)
@@ -796,8 +811,11 @@ def truncate(self, path, length, fh=None):
             else:
                 self.cache.insert_node(r)
         elif length > 0:
-            if node.size != length:
-                raise FuseOSError(errno.ENOSYS)
+            if not self.wp.truncate(node.id, fh, length):
+                if node.size != length:
+                    """from man 2 truncate; the file is not open for writing"""
+                    raise FuseOSError(errno.EINVAL)
+        return 0
 
     def release(self, path, fh):
         """Releases an open ``path``."""

From e80636264e57acdfd000a71aa40713193f066b3e Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Mon, 15 Aug 2016 18:47:19 -0400
Subject: [PATCH 09/63] only truncates that shorten are safe without reading
 the file from acd first and filling in the gaps

---
 acdcli/acd_fuse.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 18f2623..f5a564a 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -237,10 +237,10 @@ def write(self, offset, bytes_: bytes):
 
         def truncate(self, length):
             with self.lock:
-                if len(self.b) < length:
-                    self.b = self.b.ljust(length, '\0')
-                else:
+                if len(self.b) >= length:
                     self.b = self.b[:length]
+                    return True
+                return False
 
         def flush(self) -> bytes:
             with self.lock:
@@ -369,8 +369,7 @@ def truncate(self, node_id, fh, length):
         If not, does nothing (we don't preallocate) and returns false"""
         b = self.buffers.get(node_id)
         if b:
-            b.trunate(length)
-            return True
+            return b.trunate(length)
         return False
 
     def _flush(self, node_id, fh):
@@ -812,9 +811,8 @@ def truncate(self, path, length, fh=None):
                 self.cache.insert_node(r)
         elif length > 0:
             if not self.wp.truncate(node.id, fh, length):
-                if node.size != length:
-                    """from man 2 truncate; the file is not open for writing"""
-                    raise FuseOSError(errno.EINVAL)
+                logger.debug("truncate: attempting to skip ahead, ignoring")
+                # raise FuseOSError(errno.EINVAL)
         return 0
 
     def release(self, path, fh):

From 8d87e06a0ec36e10156cd84de13988cf5eec554e Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Tue, 16 Aug 2016 00:48:32 -0400
Subject: [PATCH 10/63] The corner cases around implementing truncate when we
 only have a (partial) buffer in the middle of writing is hard enough that we
 bail on it. We don't care about pre-allocating files since we have infinite
 space, and shortening a file is only possible when it's being written to....
 so we can only catch the rare use case of file overwrites and truncate back.
 Neither are worth it.

---
 acdcli/acd_fuse.py | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index f5a564a..2991e5d 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -235,13 +235,6 @@ def write(self, offset, bytes_: bytes):
                 self.b[offset:offset + len(bytes_)] = bytes_
                 return old_len
 
-        def truncate(self, length):
-            with self.lock:
-                if len(self.b) >= length:
-                    self.b = self.b[:length]
-                    return True
-                return False
-
         def flush(self) -> bytes:
             with self.lock:
                 ret = self.b
@@ -364,14 +357,6 @@ def write(self, node_id, fh, offset, bytes_):
             t.daemon = True
             t.start()
 
-    def truncate(self, node_id, fh, length):
-        """truncates a buffer if it exists to a given length and returns true.
-        If not, does nothing (we don't preallocate) and returns false"""
-        b = self.buffers.get(node_id)
-        if b:
-            return b.trunate(length)
-        return False
-
     def _flush(self, node_id, fh):
         f = self.files.get(node_id)
         b = self.buffers.get(node_id)
@@ -791,7 +776,7 @@ def flush(self, path, fh):
 
     def truncate(self, path, length, fh=None):
         """Pseudo-truncates a file, i.e. clears content if ``length``==0 or does nothing
-        if ``length`` is equal to current file size.
+        if ``length`` is positive.
 
         :raises FuseOSError: if pseudo-truncation to length is not supported"""
 
@@ -809,10 +794,12 @@ def truncate(self, path, length, fh=None):
                 raise FuseOSError.convert(e)
             else:
                 self.cache.insert_node(r)
-        elif length > 0:
-            if not self.wp.truncate(node.id, fh, length):
-                logger.debug("truncate: attempting to skip ahead, ignoring")
-                # raise FuseOSError(errno.EINVAL)
+
+        """No good way to deal with positive lengths at the moment; since we can only do
+        something about it in the middle of writing, this means the only use case we can
+        capture is when a program over-writes and then truncates back. In the future, if
+        we can get cached file backing instead of memory backing, there would be more to
+        do here. In the mean time we ignore."""
         return 0
 
     def release(self, path, fh):

From 06efeca565720d0e74fadbdbc6f1ff2b2ccaeea4 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Wed, 17 Aug 2016 02:57:17 -0400
Subject: [PATCH 11/63] Use memory as a backing for buffers only if the writing
 size is 1G (configurable) or smaller, disk otherwise.

---
 acdcli/acd_fuse.py    | 167 ++++++------------------------------------
 acdcli/api/content.py |  20 +++++
 2 files changed, 43 insertions(+), 144 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 2991e5d..1d0bdc3 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -7,12 +7,12 @@
 import os
 import stat
 import sys
+import tempfile
 
 from collections import deque, defaultdict
 from multiprocessing import Process
-from queue import Queue, Full as QueueFull
-from threading import Thread, Lock, Event
-from time import time, sleep
+from threading import Thread, Lock
+from time import time
 
 import ctypes.util
 import binascii
@@ -53,7 +53,7 @@ def find_library(*args):
 
 _def_conf = configparser.ConfigParser()
 _def_conf['read'] = dict(open_chunk_limit=10, timeout=5)
-_def_conf['write'] = dict(buffer_size = 32, timeout=30)
+_def_conf['write'] = dict(buffer_size=int(1e9), timeout=30)
 
 
 class FuseOSError(FuseError):
@@ -212,172 +212,51 @@ class WriteProxy(object):
     def __init__(self, acd_client, cache, buffer_size, timeout):
         self.acd_client = acd_client
         self.cache = cache
-        self.files = defaultdict(lambda: WriteProxy.WriteStream(buffer_size, timeout))
-        self.buffers = defaultdict(lambda: WriteProxy.WriteBuffer())
+        self.buffers = defaultdict(lambda: WriteProxy.WriteBuffer(buffer_size))
 
     class WriteBuffer(object):
-        """An in-memory segment of a file. This gets pushed out to amazon via a WriteStream on
-        flush() calls. Anything that hasn't been flushed yet can be rewritten in place any
-        number of times."""
-
-        def __init__(self):
-            self.b = bytearray()
-            """The memory backing"""
+        def __init__(self, buffer_size):
+            self.f = tempfile.SpooledTemporaryFile(max_size=buffer_size)
             self.lock = Lock()
 
         def write(self, offset, bytes_: bytes):
-            """Writes to the buffer and returns the old buffer length"""
             with self.lock:
-                old_len = len(self.b)
+                self.f.seek(0, os.SEEK_END)
+                old_len = self.f.tell()
                 if offset > old_len:
                     logger.error('Wrong offset for writing to buffer; writing gap detected')
                     raise FuseOSError(errno.ESPIPE)
-                self.b[offset:offset + len(bytes_)] = bytes_
+                self.f.seek(offset)
+                self.f.write(bytes_)
                 return old_len
 
-        def flush(self) -> bytes:
-            with self.lock:
-                ret = self.b
-                self.b = bytearray()
-                return ret
-
-        def __len__(self):
-            with self.lock:
-                return len(self.b)
-
-    class WriteStream(object):
-        """A WriteStream is a binary file-like object that is backed by a Queue.
-        It will remember its current offset."""
-
-        __slots__ = ('q', 'offset', 'error', 'closed', 'done', 'timeout', 'lock')
-
-        def __init__(self, buffer_size, timeout):
-            self.q = Queue(maxsize=buffer_size)
-            """a queue that buffers written blocks"""
-            self.offset = 0
-            """the beginning fpos"""
-            self.error = False
-            """whether the read or write failed"""
-            self.closed = False
-            self.done = Event()
-            """done event is triggered when file is successfully read and transferred"""
-            self.timeout = timeout
-            self.lock = Lock()
-            """make sure only one writer is appending to the queue at once"""
-
-        def write(self, data: bytes):
-            """Writes data into queue.
-
-            :raises: FuseOSError on timeout"""
-
-            if self.error:
-                raise FuseOSError(errno.EREMOTEIO)
-            try:
-                self.q.put(data, timeout=self.timeout)
-            except QueueFull:
-                logger.error('Write timeout.')
-                raise FuseOSError(errno.ETIMEDOUT)
-            self.offset += len(data)
-
-        def read(self, ln=0) -> bytes:
-            """Returns as much byte data from queue as possible.
-            Returns empty bytestring (EOF) if queue is empty and file was closed.
-
-            :raises: IOError"""
-
-            if self.error:
-                raise IOError(errno.EIO, errno.errorcode[errno.EIO])
-
-            if self.closed and self.q.empty():
-                return b''
-
-            b = [self.q.get()]
-            self.q.task_done()
-            while not self.q.empty():
-                b.append(self.q.get())
-                self.q.task_done()
-
-            return b''.join(b)
-
-        def flush(self):
-            """Waits until the queue is emptied.
-
-            :raises: FuseOSError"""
-
-            while True:
-                if self.error:
-                    raise FuseOSError(errno.EREMOTEIO)
-                if self.q.empty():
-                    return
-                sleep(1)
-
-        def close(self):
-            """Sets the closed flag to signal 'EOF' to the read function.
-            Then, waits until :attr:`done` event is triggered.
-
-            :raises: FuseOSError"""
-
-            self.closed = True
-            # prevent read deadlock
-            self.q.put(b'')
-
-            # wait until read is complete
-            while True:
-                if self.error:
-                    raise FuseOSError(errno.EREMOTEIO)
-                if self.done.wait(1):
-                    return
-
-    def write_n_sync(self, stream: WriteStream, node_id: str):
-        """Try to overwrite file with id ``node_id`` with content from ``stream``.
-        Triggers the :attr:`WriteStream.done` event on success.
-
-        :param stream: a file-like object"""
+        def get_file(self):
+            self.f.seek(0)
+            return self.f
 
+    def _write_and_sync(self, buffer: WriteBuffer, node_id: str):
         try:
-            r = self.acd_client.overwrite_stream(stream, node_id)
+            r = self.acd_client.overwrite_tempfile(node_id, buffer.get_file())
         except (RequestError, IOError) as e:
-            stream.error = True
             logger.error('Error writing node "%s". %s' % (node_id, str(e)))
         else:
             self.cache.insert_node(r)
-            stream.done.set()
 
     def write(self, node_id, fh, offset, bytes_):
-        """Gets WriteStream from defaultdict. Creates overwrite thread if offset is 0,
-        tries to continue otherwise.
+        """Gets WriteBuffer from defaultdict.
 
         :raises: FuseOSError: wrong offset or writing failed"""
 
         b = self.buffers[node_id]
-        f = self.files[node_id]
-
-        if b.write(offset, bytes_) == 0:
-            t = Thread(target=self.write_n_sync, args=(f, node_id))
-            t.daemon = True
-            t.start()
-
-    def _flush(self, node_id, fh):
-        f = self.files.get(node_id)
-        b = self.buffers.get(node_id)
-        if f and b:
-            if len(b):
-                data = b.flush()
-                with f.lock:
-                    f.write(data)
+        b.write(offset, bytes_)
 
     def release(self, node_id, fh):
         """:raises: FuseOSError"""
-        self._flush(node_id, fh)
-        f = self.files.get(node_id)
-        if f:
-            try:
-                f.close()
-            except:
-                raise
-            finally:
-                del self.files[node_id]
-                del self.buffers[node_id]
+
+        b = self.buffers.get(node_id)
+        if b:
+            self._write_and_sync(b, node_id)
+            del self.buffers[node_id]
 
 
 class LoggingMixIn(object):
diff --git a/acdcli/api/content.py b/acdcli/api/content.py
index ae5d9fd..1218501 100644
--- a/acdcli/api/content.py
+++ b/acdcli/api/content.py
@@ -3,6 +3,7 @@
 import json
 import io
 import mimetypes
+import tempfile
 from collections import OrderedDict
 import logging
 from urllib.parse import quote_plus
@@ -211,6 +212,25 @@ def overwrite_file(self, node_id: str, file_name: str,
 
         return r.json()
 
+    def overwrite_tempfile(self, node_id: str, file,
+                       read_callbacks: list = None, deduplication=False) -> dict:
+        params = {} if deduplication else {'suppress': 'deduplication'}
+
+        basename = "file.bin"
+        mime_type = _get_mimetype(basename)
+        f = _TeeBufferedReader(file, callbacks=read_callbacks)
+
+        # basename is ignored
+        m = MultipartEncoder(fields={('content', (quote_plus(basename), f, mime_type))})
+
+        r = self.BOReq.put(self.content_url + 'nodes/' + node_id + '/content', params=params,
+                           data=m, stream=True, headers={'Content-Type': m.content_type})
+
+        if r.status_code not in OK_CODES:
+            raise RequestError(r.status_code, r.text)
+
+        return r.json()
+
     def overwrite_stream(self, stream, node_id: str, read_callbacks: list = None) -> dict:
         """Overwrite content of node with ID *node_id* with content of *stream*.
 

From f1428f84379e7f010cee85062dd1bb023b4cb3fd Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Thu, 18 Aug 2016 11:26:49 -0400
Subject: [PATCH 12/63] expand the path -> node caching to solve large delays
 with readdir + many getattr calls for very full directories

---
 acdcli/acd_fuse.py    | 33 +++++++++++++++++----------------
 acdcli/cache/db.py    |  4 ++--
 acdcli/cache/query.py | 29 ++++++++++++-----------------
 acdcli/cache/sync.py  |  7 +++++--
 4 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 1d0bdc3..8b9c28f 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -355,7 +355,8 @@ def readdir(self, path, fh) -> 'List[str]':
         if not node.type == 'folder':
             raise FuseOSError(errno.ENOTDIR)
 
-        return [_ for _ in ['.', '..'] + [c for c in self.cache.childrens_names(node.id)]]
+        folders, files = self.cache.list_children(folder_id=node.id, folder_path=path)
+        return [_ for _ in ['.', '..'] + [c.name for c in folders + files]]
 
     def getattr(self, path, fh=None) -> dict:
         """Creates a stat-like attribute dict, see :manpage:`stat(2)`.
@@ -386,10 +387,10 @@ def getattr(self, path, fh=None) -> dict:
                         **times)
 
     def listxattr(self, path):
-        node_id = self.cache.resolve_id(path)
-        if not node_id:
+        node = self.cache.resolve(path)
+        if not node:
             raise FuseOSError(errno.ENOENT)
-        return self._listxattr(node_id)
+        return self._listxattr(node.id)
 
     def _listxattr(self, node_id):
         self._xattr_load(node_id)
@@ -400,10 +401,10 @@ def _listxattr(self, node_id):
                 return []
 
     def getxattr(self, path, name, position=0):
-        node_id = self.cache.resolve_id(path)
-        if not node_id:
+        node = self.cache.resolve(path)
+        if not node:
             raise FuseOSError(errno.ENOENT)
-        return self._getxattr_bytes(node_id, name)
+        return self._getxattr_bytes(node.id, name)
 
     def _getxattr(self, node_id, name):
         self._xattr_load(node_id)
@@ -421,10 +422,10 @@ def _getxattr_bytes(self, node_id, name):
         return binascii.a2b_base64(self._getxattr(node_id, name))
 
     def removexattr(self, path, name):
-        node_id = self.cache.resolve_id(path)
-        if not node_id:
+        node = self.cache.resolve(path)
+        if not node:
             raise FuseOSError(errno.ENOENT)
-        self._removexattr(node_id, name)
+        self._removexattr(node.id, name)
 
     def _removexattr(self, node_id, name):
         self._xattr_load(node_id)
@@ -434,10 +435,10 @@ def _removexattr(self, node_id, name):
                 self.properties_dirty.add(node_id)
 
     def setxattr(self, path, name, value, options, position=0):
-        node_id = self.cache.resolve_id(path)
-        if not node_id:
+        node = self.cache.resolve(path)
+        if not node:
             raise FuseOSError(errno.ENOENT)
-        self._setxattr_bytes(node_id, name, value)
+        self._setxattr_bytes(node.id, name, value)
 
     def _setxattr(self, node_id, name, value):
         self._xattr_load(node_id)
@@ -713,8 +714,8 @@ def utimens(self, path, times=None):
 
         :param times: [atime, mtime]"""
 
-        node_id = self.cache.resolve_id(path)
-        if not node_id:
+        node = self.cache.resolve(path)
+        if not node:
             raise FuseOSError(errno.ENOENT)
 
         if times:
@@ -725,7 +726,7 @@ def utimens(self, path, times=None):
             mtime = time()
 
         try:
-            self._setxattr(node_id, _XATTR_MTIME_OVERRIDE_NAME, mtime)
+            self._setxattr(node.id, _XATTR_MTIME_OVERRIDE_NAME, mtime)
             self._xattr_write_and_sync()
         except:
             raise FuseOSError(errno.ENOTSUP)
diff --git a/acdcli/cache/db.py b/acdcli/cache/db.py
index 6ac66f2..7cb4b6b 100644
--- a/acdcli/cache/db.py
+++ b/acdcli/cache/db.py
@@ -65,8 +65,8 @@ def __init__(self, cache_path: str='', settings_path='', check=IntegrityCheckTyp
 
         self._conn.create_function('REGEXP', _regex_match.__code__.co_argcount, _regex_match)
 
-        self.path_to_node_id = {}
-        self.path_to_node_id_lock = Lock()
+        self.path_to_node_cache = {}
+        self.path_to_node_cache_lock = Lock()
         """There are a huge number of repeated path lookups,
         so cache results and invalidate on new nodes."""
 
diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index 842189e..6e9c05e 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -156,27 +156,14 @@ def get_conflicting_node(self, name: str, parent_id: str):
             if n.is_available and n.name.lower() == name.lower():
                 return n
 
-    def resolve_id(self, path: str, trash=False) -> 'Union[str|None]':
-        """Gets a node's id from a path
-        This is far faster than the below method if the id is cached;
-        there are zero sqlite queries."""
-        with self.path_to_node_id_lock:
-            try: return self.path_to_node_id[path]
-            except: pass
-            n = self._resolve(path, trash)
-            if n:
-                self.path_to_node_id[path] = n.id
-                return n.id
-            return None
-
     def resolve(self, path: str, trash=False) -> 'Union[Node|None]':
         """Gets a node from a path"""
-        with self.path_to_node_id_lock:
-            try: return self.get_node(self.path_to_node_id[path])
+        with self.path_to_node_cache_lock:
+            try: return self.path_to_node_cache[path]
             except: pass
             n = self._resolve(path,trash)
             if n:
-                self.path_to_node_id[path] = n.id
+                self.path_to_node_cache[path] = n
                 return n
             return None
 
@@ -270,7 +257,7 @@ def get_child(self, folder_id, child_name) -> 'Union[Node|None]':
             if r.is_available:
                 return r
 
-    def list_children(self, folder_id, trash=False) -> 'Tuple[List[Node], List[Node]]':
+    def list_children(self, folder_id, trash=False, folder_path=None) -> 'Tuple[List[Node], List[Node]]':
         files = []
         folders = []
 
@@ -286,6 +273,14 @@ def list_children(self, folder_id, trash=False) -> 'Tuple[List[Node], List[Node]
                         folders.append(node)
                 node = c.fetchone()
 
+        """If the caller provides the folder_path, we can add all the children to the
+        path->node_id cache for faster lookup after a directory listing"""
+        if folder_path:
+            children = folders + files
+            with self.path_to_node_cache_lock:
+                for c in children:
+                    self.path_to_node_cache[folder_path + '/' + c.name] = c
+
         return folders, files
 
     def list_trashed_children(self, folder_id) -> 'Tuple[List[Node], List[Node]]':
diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py
index 1ff4672..daacc90 100644
--- a/acdcli/cache/sync.py
+++ b/acdcli/cache/sync.py
@@ -49,8 +49,11 @@ def remove_purged(self, purged: list):
 
     def insert_nodes(self, nodes: list, partial=True):
         """Inserts mixed list of files and folders into cache."""
-        with self.path_to_node_id_lock:
-            self.path_to_node_id.clear()
+
+        """Flush the path cache since these new nodes may be deletes, moves, or renames
+        that affect the path cache, or overwrites that would invalidate the data in it."""
+        with self.path_to_node_cache_lock:
+            self.path_to_node_cache.clear()
 
         files = []
         folders = []

From 4f9bc8b1da32256fdf3a3ba9081e5746e0494c6b Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Sun, 21 Aug 2016 08:51:34 -0400
Subject: [PATCH 13/63] remove tz-aware timestamp references since we're
 handing mtime in xattrs now.

---
 acdcli/cache/query.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index 6e9c05e..46fe0a4 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -1,8 +1,5 @@
 import logging
 from datetime import datetime
-
-from dateutil.tz import tzutc
-
 from .cursors import cursor
 
 logger = logging.getLogger(__name__)
@@ -10,9 +7,9 @@
 
 def datetime_from_string(dt: str) -> datetime:
     try:
-        dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S.%f+00:00').replace(tzinfo=tzutc())
+        dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S.%f+00:00')
     except ValueError:
-        dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S+00:00').replace(tzinfo=tzutc())
+        dt = datetime.strptime(dt, '%Y-%m-%d %H:%M:%S+00:00')
     return dt
 
 

From d75fc545f694ebff32d1221745cec400fbc97f01 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Sun, 21 Aug 2016 11:28:40 -0400
Subject: [PATCH 14/63] helper methods for debugging, cleanup of log and
 comments

---
 acdcli/acd_fuse.py | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 8b9c28f..25cb315 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -219,6 +219,11 @@ def __init__(self, buffer_size):
             self.f = tempfile.SpooledTemporaryFile(max_size=buffer_size)
             self.lock = Lock()
 
+        def read(self, offset, length: int):
+            with self.lock:
+                self.f.seek(offset)
+                return self.f.read(length)
+
         def write(self, offset, bytes_: bytes):
             with self.lock:
                 self.f.seek(0, os.SEEK_END)
@@ -231,18 +236,25 @@ def write(self, offset, bytes_: bytes):
                 return old_len
 
         def get_file(self):
+            """Return the file for direct access. Be sure to lock from the outside when doing so"""
             self.f.seek(0)
             return self.f
 
     def _write_and_sync(self, buffer: WriteBuffer, node_id: str):
         try:
-            r = self.acd_client.overwrite_tempfile(node_id, buffer.get_file())
+            with buffer.lock:
+                r = self.acd_client.overwrite_tempfile(node_id, buffer.get_file())
         except (RequestError, IOError) as e:
             logger.error('Error writing node "%s". %s' % (node_id, str(e)))
         else:
             self.cache.insert_node(r)
 
-    def write(self, node_id, fh, offset, bytes_):
+    def read(self, node_id, fh, offset, length: int):
+        b = self.buffers.get(node_id)
+        if b:
+            return b.read(offset, length)
+
+    def write(self, node_id, fh, offset, bytes_: bytes):
         """Gets WriteBuffer from defaultdict.
 
         :raises: FuseOSError: wrong offset or writing failed"""
@@ -271,6 +283,8 @@ def __call__(self, op, path, *args):
             targs = (len(args[0]),) + args[1:]
         elif op == 'chmod':
             targs = (oct(args[0]),) + args[1:]
+        elif op == 'setxattr':
+            targs = (len(args[0]),) + args[1:]
 
         logger.debug('-> %s %s %s', op, path, repr(args if not targs else targs))
 
@@ -284,6 +298,8 @@ def __call__(self, op, path, *args):
         finally:
             if op == 'read':
                 ret = len(ret)
+            elif op == 'getxattr' and ret and ret != '[Errno 61] No data available':
+                ret = len(ret)
             logger.debug('<- %s %s', op, repr(ret))
 
 
@@ -677,9 +693,7 @@ def truncate(self, path, length, fh=None):
 
         """No good way to deal with positive lengths at the moment; since we can only do
         something about it in the middle of writing, this means the only use case we can
-        capture is when a program over-writes and then truncates back. In the future, if
-        we can get cached file backing instead of memory backing, there would be more to
-        do here. In the mean time we ignore."""
+        capture is when a program over-writes and then truncates back."""
         return 0
 
     def release(self, path, fh):

From c25a2eadc4c50b0b567504b6111decbc064ec3fc Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Sun, 21 Aug 2016 23:14:04 -0400
Subject: [PATCH 15/63] duplicity support: opportunistic reading and file size
 query if we're writing at the same time but have not yet flushed to amazon

---
 acdcli/acd_fuse.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 25cb315..19a5b0c 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -235,6 +235,11 @@ def write(self, offset, bytes_: bytes):
                 self.f.write(bytes_)
                 return old_len
 
+        def length(self):
+            with self.lock:
+                self.f.seek(0, os.SEEK_END)
+                return self.f.tell()
+
         def get_file(self):
             """Return the file for direct access. Be sure to lock from the outside when doing so"""
             self.f.seek(0)
@@ -262,6 +267,11 @@ def write(self, node_id, fh, offset, bytes_: bytes):
         b = self.buffers[node_id]
         b.write(offset, bytes_)
 
+    def length(self, node_id, fh):
+        b = self.buffers.get(node_id)
+        if b:
+            return b.length()
+
     def release(self, node_id, fh):
         """:raises: FuseOSError"""
 
@@ -388,6 +398,9 @@ def getattr(self, path, fh=None) -> dict:
         try: mtime = self._getxattr(node.id, _XATTR_MTIME_OVERRIDE_NAME)
         except: mtime = node.modified.timestamp()
 
+        size = self.wp.length(node.id, fh)
+        if not size: size = node.size
+
         times = dict(st_atime=time(),
                      st_mtime=mtime,
                      st_ctime=node.created.timestamp())
@@ -399,7 +412,7 @@ def getattr(self, path, fh=None) -> dict:
         elif node.is_file:
             return dict(st_mode=stat.S_IFREG | 0o0666,
                         st_nlink=self.cache.num_parents(node.id) if self.nlinks else 1,
-                        st_size=node.size,
+                        st_size=size,
                         **times)
 
     def listxattr(self, path):
@@ -505,6 +518,11 @@ def read(self, path, length, offset, fh) -> bytes:
         if node.size < offset + length:
             length = node.size - offset
 
+        """If we attempt to read something we just wrote, give it back"""
+        ret = self.wp.read(node.id, fh, offset, length)
+        if ret and len(ret) == length:
+            return ret
+
         return self.rp.get(node.id, offset, length, node.size)
 
     def statfs(self, path) -> dict:

From 1f2b4c698c58e1b940dd51b488845a33521fabd4 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Sat, 10 Sep 2016 23:16:41 -0500
Subject: [PATCH 16/63] borgbackup support by allowing manual flushes,
 deduplicated with releases

---
 acdcli/acd_fuse.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 19a5b0c..916a773 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -218,6 +218,7 @@ class WriteBuffer(object):
         def __init__(self, buffer_size):
             self.f = tempfile.SpooledTemporaryFile(max_size=buffer_size)
             self.lock = Lock()
+            self.dirty = True
 
         def read(self, offset, length: int):
             with self.lock:
@@ -226,6 +227,7 @@ def read(self, offset, length: int):
 
         def write(self, offset, bytes_: bytes):
             with self.lock:
+                self.dirty = True
                 self.f.seek(0, os.SEEK_END)
                 old_len = self.f.tell()
                 if offset > old_len:
@@ -248,7 +250,10 @@ def get_file(self):
     def _write_and_sync(self, buffer: WriteBuffer, node_id: str):
         try:
             with buffer.lock:
+                if not buffer.dirty:
+                    return
                 r = self.acd_client.overwrite_tempfile(node_id, buffer.get_file())
+                buffer.dirty = False
         except (RequestError, IOError) as e:
             logger.error('Error writing node "%s". %s' % (node_id, str(e)))
         else:
@@ -272,9 +277,12 @@ def length(self, node_id, fh):
         if b:
             return b.length()
 
-    def release(self, node_id, fh):
-        """:raises: FuseOSError"""
+    def flush(self, node_id, fh):
+        b = self.buffers.get(node_id)
+        if b:
+            self._write_and_sync(b, node_id)
 
+    def release(self, node_id, fh):
         b = self.buffers.get(node_id)
         if b:
             self._write_and_sync(b, node_id)
@@ -684,9 +692,13 @@ def write(self, path, data, offset, fh) -> int:
         return len(data)
 
     def flush(self, path, fh):
-        """noop since we need to keep the whole buffer in memory;
-        acd only supports sequentual writes otherwise"""
-        pass
+        if fh:
+            node = self.handles[fh]
+        else:
+            node = self.cache.resolve(path)
+        if not node:
+            raise FuseOSError(errno.ENOENT)
+        self.wp.flush(node.id, fh)
 
     def truncate(self, path, length, fh=None):
         """Pseudo-truncates a file, i.e. clears content if ``length``==0 or does nothing

From d135b519bd1a92ee60e30a91a79aa944e42c44f3 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Sun, 11 Sep 2016 22:14:06 -0500
Subject: [PATCH 17/63] retry fuse operations if there's a failure on amazon's
 side and the return code says to retry.

---
 acdcli/api/content.py  | 94 +++++++++++++++++++++++-------------------
 acdcli/api/metadata.py | 28 +++++++------
 acdcli/api/trash.py    | 10 +++--
 3 files changed, 73 insertions(+), 59 deletions(-)

diff --git a/acdcli/api/content.py b/acdcli/api/content.py
index 1218501..357d77f 100644
--- a/acdcli/api/content.py
+++ b/acdcli/api/content.py
@@ -61,57 +61,63 @@ class ContentMixin(object):
     """Implements content portion of the ACD API."""
 
     def create_folder(self, name: str, parent=None) -> dict:
-        body = {'kind': 'FOLDER', 'name': name}
-        if parent:
-            body['parents'] = [parent]
-        body_str = json.dumps(body)
+        while True:
+            body = {'kind': 'FOLDER', 'name': name}
+            if parent:
+                body['parents'] = [parent]
+            body_str = json.dumps(body)
 
-        acc_codes = [http.CREATED]
+            acc_codes = [http.CREATED]
 
-        r = self.BOReq.post(self.metadata_url + 'nodes', acc_codes=acc_codes, data=body_str)
+            r = self.BOReq.post(self.metadata_url + 'nodes', acc_codes=acc_codes, data=body_str)
+            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
 
-        if r.status_code not in acc_codes:
-            raise RequestError(r.status_code, r.text)
+            if r.status_code not in acc_codes:
+                raise RequestError(r.status_code, r.text)
 
-        return r.json()
+            return r.json()
 
     def create_file(self, file_name: str, parent: str = None) -> dict:
-        params = {'suppress': 'deduplication'}
+        while True:
+            params = {'suppress': 'deduplication'}
 
-        basename = os.path.basename(file_name)
-        metadata = {'kind': 'FILE', 'name': basename}
-        if parent:
-            metadata['parents'] = [parent]
-        mime_type = _get_mimetype(basename)
-        f = io.BytesIO()
+            basename = os.path.basename(file_name)
+            metadata = {'kind': 'FILE', 'name': basename}
+            if parent:
+                metadata['parents'] = [parent]
+            mime_type = _get_mimetype(basename)
+            f = io.BytesIO()
 
-        # basename is ignored
-        m = MultipartEncoder(fields=OrderedDict([('metadata', json.dumps(metadata)),
-                                                 ('content', (quote_plus(basename), f, mime_type))])
-                             )
+            # basename is ignored
+            m = MultipartEncoder(fields=OrderedDict([('metadata', json.dumps(metadata)),
+                                                     ('content', (quote_plus(basename), f, mime_type))])
+                                 )
 
-        ok_codes = [http.CREATED]
-        r = self.BOReq.post(self.content_url + 'nodes', params=params, data=m,
-                            acc_codes=ok_codes, headers={'Content-Type': m.content_type})
+            ok_codes = [http.CREATED]
+            r = self.BOReq.post(self.content_url + 'nodes', params=params, data=m,
+                                acc_codes=ok_codes, headers={'Content-Type': m.content_type})
+            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
 
-        if r.status_code not in ok_codes:
-            raise RequestError(r.status_code, r.text)
-        return r.json()
+            if r.status_code not in ok_codes:
+                raise RequestError(r.status_code, r.text)
+            return r.json()
 
     def clear_file(self, node_id: str) -> dict:
         """Clears a file's content by overwriting it with an empty BytesIO.
 
         :param node_id: valid file node ID"""
 
-        m = MultipartEncoder(fields={('content', (' ', io.BytesIO(), _get_mimetype()))})
+        while True:
+            m = MultipartEncoder(fields={('content', (' ', io.BytesIO(), _get_mimetype()))})
 
-        r = self.BOReq.put(self.content_url + 'nodes/' + node_id + '/content', params={},
-                           data=m, stream=True, headers={'Content-Type': m.content_type})
+            r = self.BOReq.put(self.content_url + 'nodes/' + node_id + '/content', params={},
+                               data=m, stream=True, headers={'Content-Type': m.content_type})
+            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
 
-        if r.status_code not in OK_CODES:
-            raise RequestError(r.status_code, r.text)
+            if r.status_code not in OK_CODES:
+                raise RequestError(r.status_code, r.text)
 
-        return r.json()
+            return r.json()
 
     def upload_file(self, file_name: str, parent: str = None,
                     read_callbacks=None, deduplication=False) -> dict:
@@ -214,22 +220,24 @@ def overwrite_file(self, node_id: str, file_name: str,
 
     def overwrite_tempfile(self, node_id: str, file,
                        read_callbacks: list = None, deduplication=False) -> dict:
-        params = {} if deduplication else {'suppress': 'deduplication'}
+        while True:
+            params = {} if deduplication else {'suppress': 'deduplication'}
 
-        basename = "file.bin"
-        mime_type = _get_mimetype(basename)
-        f = _TeeBufferedReader(file, callbacks=read_callbacks)
+            basename = "file.bin"
+            mime_type = _get_mimetype(basename)
+            f = _TeeBufferedReader(file, callbacks=read_callbacks)
 
-        # basename is ignored
-        m = MultipartEncoder(fields={('content', (quote_plus(basename), f, mime_type))})
+            # basename is ignored
+            m = MultipartEncoder(fields={('content', (quote_plus(basename), f, mime_type))})
 
-        r = self.BOReq.put(self.content_url + 'nodes/' + node_id + '/content', params=params,
-                           data=m, stream=True, headers={'Content-Type': m.content_type})
+            r = self.BOReq.put(self.content_url + 'nodes/' + node_id + '/content', params=params,
+                               data=m, stream=True, headers={'Content-Type': m.content_type})
+            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
 
-        if r.status_code not in OK_CODES:
-            raise RequestError(r.status_code, r.text)
+            if r.status_code not in OK_CODES:
+                raise RequestError(r.status_code, r.text)
 
-        return r.json()
+            return r.json()
 
     def overwrite_stream(self, stream, node_id: str, read_callbacks: list = None) -> dict:
         """Overwrite content of node with ID *node_id* with content of *stream*.
diff --git a/acdcli/api/metadata.py b/acdcli/api/metadata.py
index fdfc34e..8071e9c 100644
--- a/acdcli/api/metadata.py
+++ b/acdcli/api/metadata.py
@@ -153,11 +153,13 @@ def get_metadata(self, node_id: str, assets=False, temp_link=True) -> dict:
     # this will increment the node's version attribute
     def update_metadata(self, node_id: str, properties: dict) -> dict:
         """Update a node's properties like name, description, status, parents, ..."""
-        body = json.dumps(properties)
-        r = self.BOReq.patch(self.metadata_url + 'nodes/' + node_id, data=body)
-        if r.status_code not in OK_CODES:
-            raise RequestError(r.status_code, r.text)
-        return r.json()
+        while True:
+            body = json.dumps(properties)
+            r = self.BOReq.patch(self.metadata_url + 'nodes/' + node_id, data=body)
+            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code not in OK_CODES:
+                raise RequestError(r.status_code, r.text)
+            return r.json()
 
     def get_root_id(self) -> str:
         """Gets the ID of the root node
@@ -249,13 +251,15 @@ def add_property(self, node_id: str, owner_id: str, key: str, value: str) -> dic
         :returns dict: {'key': '<KEY>', 'location': '<NODE_ADDRESS>/properties/<OWNER_ID/<KEY>',
         'value': '<VALUE>'}"""
 
-        ok_codes = [requests.codes.CREATED]
-        r = self.BOReq.put(self.metadata_url + 'nodes/' + node_id +
-                           '/properties/' + owner_id + '/' + key,
-                           data=json.dumps({'value': value}), acc_codes=ok_codes)
-        if r.status_code not in ok_codes:
-            raise RequestError(r.status_code, r.text)
-        return r.json()
+        while True:
+            ok_codes = [requests.codes.CREATED]
+            r = self.BOReq.put(self.metadata_url + 'nodes/' + node_id +
+                               '/properties/' + owner_id + '/' + key,
+                               data=json.dumps({'value': value}), acc_codes=ok_codes)
+            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code not in ok_codes:
+                raise RequestError(r.status_code, r.text)
+            return r.json()
 
     def delete_property(self, node_id: str, owner_id: str, key: str):
         """Deletes *key* property from node with ID *node_id*."""
diff --git a/acdcli/api/trash.py b/acdcli/api/trash.py
index 36c7186..d42cdb0 100644
--- a/acdcli/api/trash.py
+++ b/acdcli/api/trash.py
@@ -12,10 +12,12 @@ def list_trash(self) -> list:
         return self.BOReq.paginated_get(self.metadata_url + 'trash')
 
     def move_to_trash(self, node_id: str) -> dict:
-        r = self.BOReq.put(self.metadata_url + 'trash/' + node_id)
-        if r.status_code not in OK_CODES:
-            raise RequestError(r.status_code, r.text)
-        return r.json()
+        while True:
+            r = self.BOReq.put(self.metadata_url + 'trash/' + node_id)
+            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code not in OK_CODES:
+                raise RequestError(r.status_code, r.text)
+            return r.json()
 
     def restore(self, node_id: str) -> dict:
         r = self.BOReq.post(self.metadata_url + 'trash/' + node_id + '/restore')

From d867f531ee32422062dfe99f251169ad6f8dca17 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Mon, 12 Sep 2016 19:04:39 -0500
Subject: [PATCH 18/63] clean up codes when we retry

---
 acdcli/api/common.py   |  4 ++--
 acdcli/api/content.py  | 17 +++++++++--------
 acdcli/api/metadata.py |  6 +++---
 acdcli/api/trash.py    |  2 +-
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/acdcli/api/common.py b/acdcli/api/common.py
index 0a4d71c..527f729 100644
--- a/acdcli/api/common.py
+++ b/acdcli/api/common.py
@@ -1,6 +1,6 @@
-import requests
 import re
 
+import requests
 from requests.exceptions import ConnectionError
 
 try:
@@ -14,7 +14,7 @@ class ReadTimeoutError(Exception):
 
 # status codes that indicate request success
 OK_CODES = [requests.codes.OK]
-
+RETRY_CODES = [requests.codes.server_error, requests.codes.gateway_timeout]
 
 class RequestError(Exception):
     """Catch-all exception class for various connection and ACD server errors."""
diff --git a/acdcli/api/content.py b/acdcli/api/content.py
index 357d77f..283f5a9 100644
--- a/acdcli/api/content.py
+++ b/acdcli/api/content.py
@@ -1,12 +1,12 @@
 import http.client as http
-import os
-import json
 import io
+import json
+import logging
 import mimetypes
-import tempfile
+import os
 from collections import OrderedDict
-import logging
 from urllib.parse import quote_plus
+
 from requests import Response
 from requests_toolbelt import MultipartEncoder
 
@@ -70,7 +70,7 @@ def create_folder(self, name: str, parent=None) -> dict:
             acc_codes = [http.CREATED]
 
             r = self.BOReq.post(self.metadata_url + 'nodes', acc_codes=acc_codes, data=body_str)
-            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code in RETRY_CODES: continue  # the fault lies not in our stars, but in amazon
 
             if r.status_code not in acc_codes:
                 raise RequestError(r.status_code, r.text)
@@ -96,7 +96,7 @@ def create_file(self, file_name: str, parent: str = None) -> dict:
             ok_codes = [http.CREATED]
             r = self.BOReq.post(self.content_url + 'nodes', params=params, data=m,
                                 acc_codes=ok_codes, headers={'Content-Type': m.content_type})
-            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code in RETRY_CODES: continue  # the fault lies not in our stars, but in amazon
 
             if r.status_code not in ok_codes:
                 raise RequestError(r.status_code, r.text)
@@ -112,7 +112,7 @@ def clear_file(self, node_id: str) -> dict:
 
             r = self.BOReq.put(self.content_url + 'nodes/' + node_id + '/content', params={},
                                data=m, stream=True, headers={'Content-Type': m.content_type})
-            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code in RETRY_CODES: continue  # the fault lies not in our stars, but in amazon
 
             if r.status_code not in OK_CODES:
                 raise RequestError(r.status_code, r.text)
@@ -221,6 +221,7 @@ def overwrite_file(self, node_id: str, file_name: str,
     def overwrite_tempfile(self, node_id: str, file,
                        read_callbacks: list = None, deduplication=False) -> dict:
         while True:
+            file.seek(0)
             params = {} if deduplication else {'suppress': 'deduplication'}
 
             basename = "file.bin"
@@ -232,7 +233,7 @@ def overwrite_tempfile(self, node_id: str, file,
 
             r = self.BOReq.put(self.content_url + 'nodes/' + node_id + '/content', params=params,
                                data=m, stream=True, headers={'Content-Type': m.content_type})
-            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code in RETRY_CODES: continue  # the fault lies not in our stars, but in amazon
 
             if r.status_code not in OK_CODES:
                 raise RequestError(r.status_code, r.text)
diff --git a/acdcli/api/metadata.py b/acdcli/api/metadata.py
index 8071e9c..8ee60f6 100644
--- a/acdcli/api/metadata.py
+++ b/acdcli/api/metadata.py
@@ -1,8 +1,8 @@
 """Node metadata operations"""
 
+import http.client
 import json
 import logging
-import http.client
 import tempfile
 from collections import namedtuple
 
@@ -156,7 +156,7 @@ def update_metadata(self, node_id: str, properties: dict) -> dict:
         while True:
             body = json.dumps(properties)
             r = self.BOReq.patch(self.metadata_url + 'nodes/' + node_id, data=body)
-            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code in RETRY_CODES: continue  # the fault lies not in our stars, but in amazon
             if r.status_code not in OK_CODES:
                 raise RequestError(r.status_code, r.text)
             return r.json()
@@ -256,7 +256,7 @@ def add_property(self, node_id: str, owner_id: str, key: str, value: str) -> dic
             r = self.BOReq.put(self.metadata_url + 'nodes/' + node_id +
                                '/properties/' + owner_id + '/' + key,
                                data=json.dumps({'value': value}), acc_codes=ok_codes)
-            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code in RETRY_CODES: continue  # the fault lies not in our stars, but in amazon
             if r.status_code not in ok_codes:
                 raise RequestError(r.status_code, r.text)
             return r.json()
diff --git a/acdcli/api/trash.py b/acdcli/api/trash.py
index d42cdb0..904f203 100644
--- a/acdcli/api/trash.py
+++ b/acdcli/api/trash.py
@@ -14,7 +14,7 @@ def list_trash(self) -> list:
     def move_to_trash(self, node_id: str) -> dict:
         while True:
             r = self.BOReq.put(self.metadata_url + 'trash/' + node_id)
-            if r.status_code == 500: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code in RETRY_CODES: continue  # the fault lies not in our stars, but in amazon
             if r.status_code not in OK_CODES:
                 raise RequestError(r.status_code, r.text)
             return r.json()

From d7bc91748d5f30e0dc235565ca469bb4fc2b1c48 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Mon, 12 Sep 2016 22:07:53 -0500
Subject: [PATCH 19/63] Amazon has any number of ways to make file operations
 fail.

---
 acdcli/api/content.py | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/acdcli/api/content.py b/acdcli/api/content.py
index 283f5a9..4f31526 100644
--- a/acdcli/api/content.py
+++ b/acdcli/api/content.py
@@ -377,19 +377,21 @@ def chunked_download(self, node_id: str, file: io.BufferedWriter, **kwargs):
         return
 
     def response_chunk(self, node_id: str, offset: int, length: int, **kwargs) -> Response:
-        ok_codes = [http.PARTIAL_CONTENT]
-        end = offset + length - 1
-        logger.debug('chunk o %d l %d' % (offset, length))
-
-        r = self.BOReq.get(self.content_url + 'nodes/' + node_id + '/content',
-                           acc_codes=ok_codes, stream=True,
-                           headers={'Range': 'bytes=%d-%d' % (offset, end)}, **kwargs)
-        # if r.status_code == http.REQUESTED_RANGE_NOT_SATISFIABLE:
-        #     return
-        if r.status_code not in ok_codes:
-            raise RequestError(r.status_code, r.text)
-
-        return r
+        while True:
+            ok_codes = [http.PARTIAL_CONTENT]
+            retry_codes = [1000]
+            end = offset + length - 1
+            logger.debug('chunk o %d l %d' % (offset, length))
+
+            r = self.BOReq.get(self.content_url + 'nodes/' + node_id + '/content',
+                               acc_codes=ok_codes, stream=True,
+                               headers={'Range': 'bytes=%d-%d' % (offset, end)}, **kwargs)
+            # if r.status_code == http.REQUESTED_RANGE_NOT_SATISFIABLE:
+            #     return
+            if r.status_code in retry_codes: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code not in ok_codes:
+                raise RequestError(r.status_code, r.text)
+            return r
 
     def download_chunk(self, node_id: str, offset: int, length: int, **kwargs) -> bytearray:
         """Load a file chunk into memory.

From f4121e894e024339511e17e1a0d63a0d6eaa05b6 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Wed, 14 Sep 2016 07:53:39 -0500
Subject: [PATCH 20/63] turns out the 1000 response is thrown rather then
 returned

---
 acdcli/api/content.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/acdcli/api/content.py b/acdcli/api/content.py
index 4f31526..a943929 100644
--- a/acdcli/api/content.py
+++ b/acdcli/api/content.py
@@ -379,13 +379,17 @@ def chunked_download(self, node_id: str, file: io.BufferedWriter, **kwargs):
     def response_chunk(self, node_id: str, offset: int, length: int, **kwargs) -> Response:
         while True:
             ok_codes = [http.PARTIAL_CONTENT]
-            retry_codes = [1000]
+            retry_codes = [400]
             end = offset + length - 1
             logger.debug('chunk o %d l %d' % (offset, length))
 
-            r = self.BOReq.get(self.content_url + 'nodes/' + node_id + '/content',
-                               acc_codes=ok_codes, stream=True,
-                               headers={'Range': 'bytes=%d-%d' % (offset, end)}, **kwargs)
+            try:
+                r = self.BOReq.get(self.content_url + 'nodes/' + node_id + '/content',
+                                   acc_codes=ok_codes, stream=True,
+                                   headers={'Range': 'bytes=%d-%d' % (offset, end)}, **kwargs)
+            except RequestError as e:
+                if e.status_code == RequestError.CODE.CONN_EXCEPTION: continue
+                raise
             # if r.status_code == http.REQUESTED_RANGE_NOT_SATISFIABLE:
             #     return
             if r.status_code in retry_codes: continue  # the fault lies not in our stars, but in amazon

From 18671ea0c190df1655ed5d22f6415d40c42f6d0d Mon Sep 17 00:00:00 2001
From: Ben Gemmill <benjamin.gemmill@gmail.com>
Date: Sun, 6 Nov 2016 23:34:40 -0600
Subject: [PATCH 21/63] we can get 1000 error codes on writing too.

---
 acdcli/api/content.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/acdcli/api/content.py b/acdcli/api/content.py
index a943929..cc19721 100644
--- a/acdcli/api/content.py
+++ b/acdcli/api/content.py
@@ -231,8 +231,13 @@ def overwrite_tempfile(self, node_id: str, file,
             # basename is ignored
             m = MultipartEncoder(fields={('content', (quote_plus(basename), f, mime_type))})
 
-            r = self.BOReq.put(self.content_url + 'nodes/' + node_id + '/content', params=params,
-                               data=m, stream=True, headers={'Content-Type': m.content_type})
+            try:
+                r = self.BOReq.put(self.content_url + 'nodes/' + node_id + '/content', params=params,
+                                   data=m, stream=True, headers={'Content-Type': m.content_type})
+            except RequestError as e:
+                if e.status_code == RequestError.CODE.CONN_EXCEPTION: continue
+                raise
+
             if r.status_code in RETRY_CODES: continue  # the fault lies not in our stars, but in amazon
 
             if r.status_code not in OK_CODES:

From 58c6bd47c40e68478a88c9cb09db635a273a4187 Mon Sep 17 00:00:00 2001
From: Ben Gemmill <benjamin.gemmill@gmail.com>
Date: Tue, 8 Nov 2016 20:28:16 -0600
Subject: [PATCH 22/63] handle more error codes when reading, and generally
 retry on 400s.

---
 acdcli/api/common.py  | 2 +-
 acdcli/api/content.py | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/acdcli/api/common.py b/acdcli/api/common.py
index 527f729..1512235 100644
--- a/acdcli/api/common.py
+++ b/acdcli/api/common.py
@@ -14,7 +14,7 @@ class ReadTimeoutError(Exception):
 
 # status codes that indicate request success
 OK_CODES = [requests.codes.OK]
-RETRY_CODES = [requests.codes.server_error, requests.codes.gateway_timeout]
+RETRY_CODES = [requests.codes.server_error, requests.codes.gateway_timeout, requests.codes.bad_request]
 
 class RequestError(Exception):
     """Catch-all exception class for various connection and ACD server errors."""
diff --git a/acdcli/api/content.py b/acdcli/api/content.py
index cc19721..e9be28c 100644
--- a/acdcli/api/content.py
+++ b/acdcli/api/content.py
@@ -384,7 +384,6 @@ def chunked_download(self, node_id: str, file: io.BufferedWriter, **kwargs):
     def response_chunk(self, node_id: str, offset: int, length: int, **kwargs) -> Response:
         while True:
             ok_codes = [http.PARTIAL_CONTENT]
-            retry_codes = [400]
             end = offset + length - 1
             logger.debug('chunk o %d l %d' % (offset, length))
 
@@ -397,7 +396,7 @@ def response_chunk(self, node_id: str, offset: int, length: int, **kwargs) -> Re
                 raise
             # if r.status_code == http.REQUESTED_RANGE_NOT_SATISFIABLE:
             #     return
-            if r.status_code in retry_codes: continue  # the fault lies not in our stars, but in amazon
+            if r.status_code in RETRY_CODES: continue  # the fault lies not in our stars, but in amazon
             if r.status_code not in ok_codes:
                 raise RequestError(r.status_code, r.text)
             return r

From c1f58a497751f6464c05080f645884b3b0ceadd4 Mon Sep 17 00:00:00 2001
From: bgemmill <bgemmill@users.noreply.github.com>
Date: Thu, 1 Dec 2016 23:17:28 -0500
Subject: [PATCH 23/63] handle 503 codes when amazon doesn't get a request body

---
 acdcli/api/common.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/acdcli/api/common.py b/acdcli/api/common.py
index 1512235..b0ddd53 100644
--- a/acdcli/api/common.py
+++ b/acdcli/api/common.py
@@ -14,7 +14,10 @@ class ReadTimeoutError(Exception):
 
 # status codes that indicate request success
 OK_CODES = [requests.codes.OK]
-RETRY_CODES = [requests.codes.server_error, requests.codes.gateway_timeout, requests.codes.bad_request]
+RETRY_CODES = [requests.codes.server_error,
+               requests.codes.gateway_timeout,
+               requests.codes.bad_request,
+               requests.codes.service_unavailable]
 
 class RequestError(Exception):
     """Catch-all exception class for various connection and ACD server errors."""

From 5124699d9a1dfb6126d66f72ed681827a91e26c0 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Sun, 25 Dec 2016 21:48:44 -0500
Subject: [PATCH 24/63] verify that file renames and moves happen since amazon
 can drop some despite returning success

---
 acdcli/api/metadata.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/acdcli/api/metadata.py b/acdcli/api/metadata.py
index 4850ee8..1dd0c7d 100644
--- a/acdcli/api/metadata.py
+++ b/acdcli/api/metadata.py
@@ -227,11 +227,24 @@ def move_node_from(self, node_id: str, old_parent_id: str, new_parent_id: str) -
         return r.json()
 
     def move_node(self, node_id: str, parent_id: str) -> dict:
-        return self.update_metadata(node_id, {'parents': [parent_id]})
+        properties = {'parents': [parent_id]}
+        # logger.debug('MOVE: parents: %s' % str([parent_id]))
+        while True:
+            ret = self.update_metadata(node_id, properties)
+            metadata = self.get_metadata(node_id, False, False)
+            # logger.debug('MOVE: metadata: %s' % str(metadata))
+            if metadata['parents'] == [parent_id]: break
+        return ret
 
     def rename_node(self, node_id: str, new_name: str) -> dict:
         properties = {'name': new_name}
-        return self.update_metadata(node_id, properties)
+        # logger.debug('RENAME: new_name: %s' % new_name)
+        while True:
+            ret = self.update_metadata(node_id, properties)
+            metadata = self.get_metadata(node_id, False, False)
+            # logger.debug('RENAME: metadata: %s' % str(metadata))
+            if metadata['name'] == new_name: break
+        return ret
 
     def set_available(self, node_id: str) -> dict:
         """Sets node status from 'PENDING' to 'AVAILABLE'."""

From 6fb1a06016682049b87d2368434013a08006728d Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Sat, 7 Jan 2017 23:57:18 -0500
Subject: [PATCH 25/63] move tempfile uploading to multipart streams to see if
 that helps with large uploads

---
 acdcli/api/content.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/acdcli/api/content.py b/acdcli/api/content.py
index ef81f85..565227a 100644
--- a/acdcli/api/content.py
+++ b/acdcli/api/content.py
@@ -217,21 +217,27 @@ def overwrite_file(self, node_id: str, file_name: str,
         return r.json()
 
     def overwrite_tempfile(self, node_id: str, file,
-                       read_callbacks: list = None, deduplication=False) -> dict:
+                       read_callbacks: list = None) -> dict:
+        """Overwrite content of node with ID *node_id* with content of *file*.
+
+        :param file: readable and seekable object"""
+
         while True:
+            # logger.debug('OVERWRITE: node_id: %s' % node_id)
             file.seek(0)
-            params = {} if deduplication else {'suppress': 'deduplication'}
 
-            basename = "file.bin"
-            mime_type = _get_mimetype(basename)
-            f = _TeeBufferedReader(file, callbacks=read_callbacks)
+            if _stream_is_empty(file):
+                return self.clear_file(node_id)
 
-            # basename is ignored
-            m = MultipartEncoder(fields={('content', (quote_plus(basename), f, mime_type))})
+            metadata = {}
+            import uuid
+            boundary = uuid.uuid4().hex
 
             try:
-                r = self.BOReq.put(self.content_url + 'nodes/' + node_id + '/content', params=params,
-                                   data=m, stream=True, headers={'Content-Type': m.content_type})
+                r = self.BOReq.put(self.content_url + 'nodes/' + node_id + '/content',
+                                   data=self._multipart_stream(metadata, file, boundary, read_callbacks),
+                                   headers={'Content-Type': 'multipart/form-data; boundary=%s'
+                                                            % boundary})
             except RequestError as e:
                 if e.status_code == RequestError.CODE.CONN_EXCEPTION: continue
                 raise

From 09289761726ce127159e52946ca7a94988b45882 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Sun, 8 Jan 2017 18:08:26 -0500
Subject: [PATCH 26/63] extra logging around renames to catch when amazon drops
 rename requests

---
 acdcli/acd_fuse.py     | 13 +++++++++++++
 acdcli/api/metadata.py | 14 ++++++--------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 79ec005..8087c17 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -17,6 +17,8 @@
 import ctypes.util
 import binascii
 
+import requests
+
 from acdcli.cache.db import CacheConsts
 
 ctypes.util.__find_library = ctypes.util.find_library
@@ -605,6 +607,17 @@ def create(self, path, mode) -> int:
             self.cache.insert_node(r)
             node = self.cache.get_node(r['id'])
         except RequestError as e:
+            # file all ready exists, see what we know about it since the
+            # cache may be out of sync or amazon missed a rename
+            if e.status_code == requests.codes.conflict:
+                prior_node_id = json.loads(e.msg)["info"]["nodeId"]
+                logger.error('create: duplicate name: %s prior_node_id: %s' % (name, prior_node_id))
+                prior_node_amazon = self.acd_client.get_metadata(prior_node_id, False, False)
+                logger.error('create: prior_node(amazon): %s' % str(prior_node_amazon))
+                prior_node_cache = self.cache.get_node(prior_node_id)
+                logger.error('create: prior_node(cache): %s' % str(prior_node_cache))
+                # if prior_node_cache.name != prior_node_amazon["name"]:
+                #     self._rename(prior_node_id, prior_node_cache.name)
             FuseOSError.convert(e)
 
         with self.fh_lock:
diff --git a/acdcli/api/metadata.py b/acdcli/api/metadata.py
index 1dd0c7d..7645473 100644
--- a/acdcli/api/metadata.py
+++ b/acdcli/api/metadata.py
@@ -228,22 +228,20 @@ def move_node_from(self, node_id: str, old_parent_id: str, new_parent_id: str) -
 
     def move_node(self, node_id: str, parent_id: str) -> dict:
         properties = {'parents': [parent_id]}
-        # logger.debug('MOVE: parents: %s' % str([parent_id]))
+        logger.debug('move_node: node_id: %s parents: %s' % (node_id, str([parent_id])))
         while True:
             ret = self.update_metadata(node_id, properties)
-            metadata = self.get_metadata(node_id, False, False)
-            # logger.debug('MOVE: metadata: %s' % str(metadata))
-            if metadata['parents'] == [parent_id]: break
+            logger.debug('move_node: metadata: %s' % str(ret))
+            if ret['parents'] == [parent_id]: break
         return ret
 
     def rename_node(self, node_id: str, new_name: str) -> dict:
         properties = {'name': new_name}
-        # logger.debug('RENAME: new_name: %s' % new_name)
+        logger.debug('rename_node: node_id: %s new_name: %s' % (node_id, new_name))
         while True:
             ret = self.update_metadata(node_id, properties)
-            metadata = self.get_metadata(node_id, False, False)
-            # logger.debug('RENAME: metadata: %s' % str(metadata))
-            if metadata['name'] == new_name: break
+            logger.debug('rename_node: metadata: %s' % str(ret))
+            if ret['name'] == new_name: break
         return ret
 
     def set_available(self, node_id: str) -> dict:

From 6843dc19b4b3b73cea13644e2612c0935f6b6709 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Wed, 11 Jan 2017 21:02:30 -0500
Subject: [PATCH 27/63] mode, uid, and gid

---
 acd_cli.py         |  7 +++--
 acdcli/acd_fuse.py | 66 +++++++++++++++++++++++++++++++++++-----------
 2 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/acd_cli.py b/acd_cli.py
index ca76841..47d4156 100755
--- a/acd_cli.py
+++ b/acd_cli.py
@@ -1147,11 +1147,14 @@ def mount_action(args: argparse.Namespace):
     import acdcli.acd_fuse
     acdcli.acd_fuse.mount(args.path, dict(acd_client=acd_client, cache=cache,
                                           nlinks=args.nlinks, autosync=asp,
-                                          settings_path=SETTINGS_PATH),
+                                          settings_path=SETTINGS_PATH,
+                                          umask = args.umask,
+                                          uid = args.uid,
+                                          gid = args.gid
+                                          ),
                           ro=args.read_only, foreground=args.foreground,
                           nothreads=args.single_threaded,
                           nonempty=args.nonempty, modules=args.modules,
-                          umask=args.umask,gid=args.gid,uid=args.uid,
                           allow_root=args.allow_root, allow_other=args.allow_other,
                           volname=args.volname)
 
diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 8087c17..93eff1a 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -52,6 +52,9 @@ def find_library(*args):
 _SETTINGS_FILENAME = 'fuse.ini'
 _XATTR_PROPERTY_NAME = 'xattrs'
 _XATTR_MTIME_OVERRIDE_NAME = 'fuse.mtime'
+_XATTR_MODE_OVERRIDE_NAME = 'fuse.mode'
+_XATTR_UID_OVERRIDE_NAME = 'fuse.uid'
+_XATTR_GID_OVERRIDE_NAME = 'fuse.gid'
 
 _def_conf = configparser.ConfigParser()
 _def_conf['read'] = dict(open_chunk_limit=10, timeout=5)
@@ -369,6 +372,12 @@ def __init__(self, **kwargs):
         """lock for fh counter increment and handle dict writes"""
         self.nlinks = kwargs.get('nlinks', False)
         """whether to calculate the number of hardlinks for folders"""
+        self.uid = kwargs['uid']
+        """sets the default uid"""
+        self.gid = kwargs['gid']
+        """sets the default gid"""
+        self.umask = kwargs['umask']
+        """sets the default umask"""
 
         self.destroyed = autosync.keywords['stop']
         """:type: multiprocessing.Event"""
@@ -411,19 +420,30 @@ def getattr(self, path, fh=None) -> dict:
         size = self.wp.length(node.id, fh)
         if not size: size = node.size
 
-        times = dict(st_atime=time(),
+        try: uid = self._getxattr(node.id, _XATTR_UID_OVERRIDE_NAME)
+        except: uid = self.uid
+
+        try: gid = self._getxattr(node.id, _XATTR_GID_OVERRIDE_NAME)
+        except: gid = self.gid
+
+        attrs = dict(st_atime=time(),
                      st_mtime=mtime,
-                     st_ctime=node.created.timestamp())
+                     st_ctime=node.created.timestamp(),
+                     st_uid=uid,
+                     st_gid=gid)
+
+        try: mode = stat.S_IMODE(self._getxattr(node.id, _XATTR_MODE_OVERRIDE_NAME))
+        except: mode = None
 
         if node.is_folder:
-            return dict(st_mode=stat.S_IFDIR | 0o0777,
+            return dict(st_mode=stat.S_IFDIR | (mode if mode else 0o0777 & ~self.umask),
                         st_nlink=self.cache.num_children(node.id) if self.nlinks else 1,
-                        **times)
+                        **attrs)
         elif node.is_file:
-            return dict(st_mode=stat.S_IFREG | 0o0666,
+            return dict(st_mode=stat.S_IFREG | (mode if mode else 0o0666 & ~self.umask),
                         st_nlink=self.cache.num_parents(node.id) if self.nlinks else 1,
                         st_size=size,
-                        **times)
+                        **attrs)
 
     def listxattr(self, path):
         node = self.cache.resolve(path)
@@ -450,7 +470,7 @@ def _getxattr(self, node_id, name):
         with self.xattr_cache_lock:
             try:
                 ret = self.xattr_cache[node_id][name]
-                if ret:
+                if ret is not None:
                     return ret
             except:
                 raise FuseOSError(errno.ENODATA)  # should be ENOATTR
@@ -548,9 +568,7 @@ def statfs(self, path) -> dict:
                     )
 
     def mkdir(self, path, mode):
-        """Creates a directory at ``path`` (see :manpage:`mkdir(2)`).
-
-        :param mode: not used"""
+        """Creates a directory at ``path`` (see :manpage:`mkdir(2)`)."""
 
         name = os.path.basename(path)
         ppath = os.path.dirname(path)
@@ -564,6 +582,8 @@ def mkdir(self, path, mode):
             FuseOSError.convert(e)
         else:
             self.cache.insert_node(r)
+            node = self.cache.get_node(r['id'])
+            self._chmod(node, mode)
 
     def _trash(self, path):
         logger.debug('trash %s' % path)
@@ -593,7 +613,6 @@ def unlink(self, path):
     def create(self, path, mode) -> int:
         """Creates an empty file at ``path``.
 
-        :param mode: not used
         :returns int: file handle"""
 
         name = os.path.basename(path)
@@ -620,6 +639,8 @@ def create(self, path, mode) -> int:
                 #     self._rename(prior_node_id, prior_node_cache.name)
             FuseOSError.convert(e)
 
+        self._chmod(node, mode)
+
         with self.fh_lock:
             self.fh += 1
             self.handles[self.fh] = node
@@ -791,12 +812,27 @@ def utimens(self, path, times=None):
         return 0
 
     def chmod(self, path, mode):
-        """Not implemented."""
-        pass
+        node = self.cache.resolve(path)
+        if not node:
+            raise FuseOSError(errno.ENOENT)
+        return self._chmod(node, mode)
+
+    def _chmod(self, node, mode):
+        self._setxattr(node.id, _XATTR_MODE_OVERRIDE_NAME, mode)
+        self._xattr_write_and_sync()
+        return 0
 
     def chown(self, path, uid, gid):
-        """Not implemented."""
-        pass
+        node = self.cache.resolve(path)
+        if not node:
+            raise FuseOSError(errno.ENOENT)
+        return self._chown(node, uid, gid)
+
+    def _chown(self, node, uid, gid):
+        if uid != -1: self._setxattr(node.id, _XATTR_UID_OVERRIDE_NAME, uid)
+        if gid != -1: self._setxattr(node.id, _XATTR_GID_OVERRIDE_NAME, gid)
+        self._xattr_write_and_sync()
+        return 0
 
 
 def mount(path: str, args: dict, **kwargs) -> 'Union[int, None]':

From cf3cbb721c9d3406303bbbf6432fa187c0fe888f Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Wed, 11 Jan 2017 21:19:19 -0500
Subject: [PATCH 28/63] blksize, blocks

---
 acdcli/acd_fuse.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 93eff1a..af85b46 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -378,6 +378,8 @@ def __init__(self, **kwargs):
         """sets the default gid"""
         self.umask = kwargs['umask']
         """sets the default umask"""
+        self.blksize = self.acd_client._conf.getint('transfer', 'fs_chunk_size')
+        """size of the filesystem blocks for stat queries"""
 
         self.destroyed = autosync.keywords['stop']
         """:type: multiprocessing.Event"""
@@ -443,6 +445,8 @@ def getattr(self, path, fh=None) -> dict:
             return dict(st_mode=stat.S_IFREG | (mode if mode else 0o0666 & ~self.umask),
                         st_nlink=self.cache.num_parents(node.id) if self.nlinks else 1,
                         st_size=size,
+                        st_blksize=self.blksize,
+                        st_blocks=(node.size + 511) // 512,
                         **attrs)
 
     def listxattr(self, path):
@@ -558,12 +562,11 @@ def read(self, path, length, offset, fh) -> bytes:
     def statfs(self, path) -> dict:
         """Gets some filesystem statistics as specified in :manpage:`stat(2)`."""
 
-        bs = 512 * 1024  # no effect?
-        return dict(f_bsize=bs,
-                    f_frsize=bs,
-                    f_blocks=self.total // bs,  # total no of blocks
-                    f_bfree=self.free // bs,  # free blocks
-                    f_bavail=self.free // bs,
+        return dict(f_bsize=self.blksize,
+                    f_frsize=self.blksize,
+                    f_blocks=self.total // self.blksize,  # total no of blocks
+                    f_bfree=self.free // self.blksize,  # free blocks
+                    f_bavail=self.free // self.blksize,
                     f_namemax=256
                     )
 

From be9a0a206253e9f5ef4ce14a31cd570e0a2962ad Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Wed, 18 Jan 2017 21:45:27 -0500
Subject: [PATCH 29/63] symlink, readlink

---
 acdcli/acd_fuse.py | 42 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index af85b46..9f5232a 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -415,7 +415,9 @@ def getattr(self, path, fh=None) -> dict:
             node = self.cache.resolve(path)
         if not node:
             raise FuseOSError(errno.ENOENT)
+        return self._getattr(node, fh)
 
+    def _getattr(self, node, fh=None) -> dict:
         try: mtime = self._getxattr(node.id, _XATTR_MTIME_OVERRIDE_NAME)
         except: mtime = node.modified.timestamp()
 
@@ -434,15 +436,23 @@ def getattr(self, path, fh=None) -> dict:
                      st_uid=uid,
                      st_gid=gid)
 
-        try: mode = stat.S_IMODE(self._getxattr(node.id, _XATTR_MODE_OVERRIDE_NAME))
+        try: mode = self._getxattr(node.id, _XATTR_MODE_OVERRIDE_NAME)
         except: mode = None
 
         if node.is_folder:
-            return dict(st_mode=stat.S_IFDIR | (mode if mode else 0o0777 & ~self.umask),
+            # directory
+            mode = stat.S_IFDIR | (stat.S_IMODE(mode) if mode else 0o0777 & ~self.umask)
+
+            return dict(st_mode=mode,
                         st_nlink=self.cache.num_children(node.id) if self.nlinks else 1,
                         **attrs)
         elif node.is_file:
-            return dict(st_mode=stat.S_IFREG | (mode if mode else 0o0666 & ~self.umask),
+            # symlink
+            if mode and stat.S_ISLNK(stat.S_IFMT(mode)): mode = stat.S_IFLNK | 0o0777
+            # file
+            else: mode = stat.S_IFREG | (stat.S_IMODE(mode) if mode else 0o0666 & ~self.umask)
+
+            return dict(st_mode=mode,
                         st_nlink=self.cache.num_parents(node.id) if self.nlinks else 1,
                         st_size=size,
                         st_blksize=self.blksize,
@@ -536,7 +546,7 @@ def _xattr_write_and_sync(self):
                     logger.debug('_xattr_write_and_sync: node: %s xattrs: %s: ' % (node_id, xattrs_str))
             self.xattr_dirty.clear()
 
-    def read(self, path, length, offset, fh) -> bytes:
+    def read(self, path, length, offset, fh=None) -> bytes:
         """Read ```length`` bytes from ``path`` at ``offset``."""
 
         if fh:
@@ -642,7 +652,8 @@ def create(self, path, mode) -> int:
                 #     self._rename(prior_node_id, prior_node_cache.name)
             FuseOSError.convert(e)
 
-        self._chmod(node, mode)
+        if mode is not None:
+            self._chmod(node, mode)
 
         with self.fh_lock:
             self.fh += 1
@@ -821,7 +832,9 @@ def chmod(self, path, mode):
         return self._chmod(node, mode)
 
     def _chmod(self, node, mode):
-        self._setxattr(node.id, _XATTR_MODE_OVERRIDE_NAME, mode)
+        mode_perms = stat.S_IMODE(mode)
+        mode_type = stat.S_IFMT(self._getattr(node)['st_mode'])
+        self._setxattr(node.id, _XATTR_MODE_OVERRIDE_NAME, mode_type | mode_perms)
         self._xattr_write_and_sync()
         return 0
 
@@ -837,6 +850,23 @@ def _chown(self, node, uid, gid):
         self._xattr_write_and_sync()
         return 0
 
+    def symlink(self, target, source):
+        fh = self.create(target, None)
+        node = self.handles[fh]
+        self._setxattr(node.id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFLNK | 0o0777)
+        # While it may be tempting to store the link's source in xattr space, note that encrypting file
+        # systems like gocryptfs pass xattrs straight through to the native file system; so amazon would
+        # have a look at unencrypted file names via links. So we must place this in the contents.
+        #TODO: have a cache of node -> link source somewhere in sql and memory so we don't need to read from amazon
+        self.write(target, source.encode('utf-8'), 0, fh)
+        self.release(target, fh)
+        return 0
+
+    def readlink(self, path):
+        attr = self.getattr(path)
+        source = self.read(path, attr['st_size'], 0).decode('utf-8')
+        return source
+
 
 def mount(path: str, args: dict, **kwargs) -> 'Union[int, None]':
     """Fusermounts Amazon Cloud Drive to specified mountpoint.

From b2b6d4bfbd8d92e0e15b27ca07341cef44af7d9c Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Wed, 18 Jan 2017 22:27:41 -0500
Subject: [PATCH 30/63] caching of symlink targts

---
 acdcli/acd_fuse.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 9f5232a..026a530 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -55,6 +55,7 @@ def find_library(*args):
 _XATTR_MODE_OVERRIDE_NAME = 'fuse.mode'
 _XATTR_UID_OVERRIDE_NAME = 'fuse.uid'
 _XATTR_GID_OVERRIDE_NAME = 'fuse.gid'
+_XATTR_SYMLINK_OVERRIDE_NAME = 'fuse.symlink'
 
 _def_conf = configparser.ConfigParser()
 _def_conf['read'] = dict(open_chunk_limit=10, timeout=5)
@@ -854,17 +855,15 @@ def symlink(self, target, source):
         fh = self.create(target, None)
         node = self.handles[fh]
         self._setxattr(node.id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFLNK | 0o0777)
-        # While it may be tempting to store the link's source in xattr space, note that encrypting file
-        # systems like gocryptfs pass xattrs straight through to the native file system; so amazon would
-        # have a look at unencrypted file names via links. So we must place this in the contents.
-        #TODO: have a cache of node -> link source somewhere in sql and memory so we don't need to read from amazon
-        self.write(target, source.encode('utf-8'), 0, fh)
+        self._setxattr(node.id, _XATTR_SYMLINK_OVERRIDE_NAME, source)
         self.release(target, fh)
         return 0
 
     def readlink(self, path):
-        attr = self.getattr(path)
-        source = self.read(path, attr['st_size'], 0).decode('utf-8')
+        node = self.cache.resolve(path)
+        if not node:
+            raise FuseOSError(errno.ENOENT)
+        source = self._getxattr(node.id, _XATTR_SYMLINK_OVERRIDE_NAME)
         return source
 
 

From 879d652072229b05dfc2519e5b474ca395c2a4d7 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Sat, 21 Jan 2017 00:41:06 -0500
Subject: [PATCH 31/63] invalidate the path resolving cache less to help with
 rsync --partial file moves

---
 acdcli/acd_fuse.py   | 29 ++++++++++++++++++-----------
 acdcli/cache/sync.py | 22 +++++++++++++++-------
 2 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 026a530..4f76370 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -263,7 +263,7 @@ def _write_and_sync(self, buffer: WriteBuffer, node_id: str):
         except (RequestError, IOError) as e:
             logger.error('Error writing node "%s". %s' % (node_id, str(e)))
         else:
-            self.cache.insert_node(r)
+            self.cache.insert_node(r, flush_cache=False)
 
     def read(self, node_id, fh, offset, length: int):
         b = self.buffers.get(node_id)
@@ -595,7 +595,7 @@ def mkdir(self, path, mode):
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r)
+            self.cache.insert_node(r, flush_cache=False)
             node = self.cache.get_node(r['id'])
             self._chmod(node, mode)
 
@@ -614,7 +614,11 @@ def _trash(self, path):
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r)
+            if node.is_file:
+                self.cache.insert_node(r, flush_cache=False)
+                self.cache.cache_del(path)
+            else:
+                self.cache.insert_node(r)
 
     def rmdir(self, path):
         """Moves a directory into ACD trash."""
@@ -637,7 +641,7 @@ def create(self, path, mode) -> int:
 
         try:
             r = self.acd_client.create_file(name, p.id)
-            self.cache.insert_node(r)
+            self.cache.insert_node(r, flush_cache=False)
             node = self.cache.get_node(r['id'])
         except RequestError as e:
             # file all ready exists, see what we know about it since the
@@ -688,30 +692,33 @@ def rename(self, old, new):
                 raise FuseOSError(errno.EEXIST)
 
         if new_bn != old_bn:
-            self._rename(node.id, new_bn)
+            self._rename(node.id, new_bn, not node.is_file)
 
         if new_dn != old_dn:
             # odir_id = self.cache.resolve_path(old_dn, False)
             ndir = self.cache.resolve(new_dn, False)
             if not ndir:
                 raise FuseOSError(errno.ENOTDIR)
-            self._move(node.id, ndir.id)
+            self._move(node.id, ndir.id, not node.is_file)
 
-    def _rename(self, id, name):
+        if node.is_file:
+            self.cache.cache_del(old)
+
+    def _rename(self, id, name, flush_cache:bool=True):
         try:
             r = self.acd_client.rename_node(id, name)
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r)
+            self.cache.insert_node(r, flush_cache=flush_cache)
 
-    def _move(self, id, new_folder):
+    def _move(self, id, new_folder, flush_cache:bool=True):
         try:
             r = self.acd_client.move_node(id, new_folder)
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r)
+            self.cache.insert_node(r, flush_cache=flush_cache)
 
     def open(self, path, flags) -> int:
         """Opens a file.
@@ -768,7 +775,7 @@ def truncate(self, path, length, fh=None):
             except RequestError as e:
                 raise FuseOSError.convert(e)
             else:
-                self.cache.insert_node(r)
+                self.cache.insert_node(r, flush_cache=False)
 
         """No good way to deal with positive lengths at the moment; since we can only do
         something about it in the middle of writing, this means the only use case we can
diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py
index daacc90..c473d45 100644
--- a/acdcli/cache/sync.py
+++ b/acdcli/cache/sync.py
@@ -47,13 +47,21 @@ def remove_purged(self, purged: list):
 
         logger.info('Purged %i node(s).' % len(purged))
 
-    def insert_nodes(self, nodes: list, partial=True):
-        """Inserts mixed list of files and folders into cache."""
-
-        """Flush the path cache since these new nodes may be deletes, moves, or renames
-        that affect the path cache, or overwrites that would invalidate the data in it."""
+    def cache_flush(self):
         with self.path_to_node_cache_lock:
             self.path_to_node_cache.clear()
+            logger.warning("path_to_node_cache flushed")
+
+    def cache_del(self, path:str):
+        with self.path_to_node_cache_lock:
+            try: del self.path_to_node_cache[path]
+            except: pass
+
+    def insert_nodes(self, nodes: list, partial:bool=True, flush_cache:bool=True):
+        """Inserts mixed list of files and folders into cache."""
+
+        if flush_cache:
+            self.cache_flush()
 
         files = []
         folders = []
@@ -81,11 +89,11 @@ def insert_nodes(self, nodes: list, partial=True):
         self.insert_parentage(files + folders, partial)
         self.insert_properties(files + folders)
 
-    def insert_node(self, node: dict):
+    def insert_node(self, node:dict, flush_cache:bool=True):
         """Inserts single file or folder into cache."""
         if not node:
             return
-        self.insert_nodes([node])
+        self.insert_nodes([node], flush_cache=flush_cache)
 
     def insert_folders(self, folders: list):
         """ Inserts list of folders into cache. Sets 'update' column to current date.

From bb1bec8f2847eff5360db70202dc02ae79f76484 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Sat, 21 Jan 2017 17:42:16 -0500
Subject: [PATCH 32/63] clean log spam

---
 acdcli/api/content.py | 8 +++++---
 acdcli/cache/sync.py  | 1 -
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/acdcli/api/content.py b/acdcli/api/content.py
index 565227a..e31c455 100644
--- a/acdcli/api/content.py
+++ b/acdcli/api/content.py
@@ -222,13 +222,15 @@ def overwrite_tempfile(self, node_id: str, file,
 
         :param file: readable and seekable object"""
 
+        # If we're writing 0 bytes, clear instead
+        file.seek(0, os.SEEK_END)
+        if file.tell() == 0:
+            return self.clear_file(node_id)
+
         while True:
             # logger.debug('OVERWRITE: node_id: %s' % node_id)
             file.seek(0)
 
-            if _stream_is_empty(file):
-                return self.clear_file(node_id)
-
             metadata = {}
             import uuid
             boundary = uuid.uuid4().hex
diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py
index c473d45..f7d9317 100644
--- a/acdcli/cache/sync.py
+++ b/acdcli/cache/sync.py
@@ -50,7 +50,6 @@ def remove_purged(self, purged: list):
     def cache_flush(self):
         with self.path_to_node_cache_lock:
             self.path_to_node_cache.clear()
-            logger.warning("path_to_node_cache flushed")
 
     def cache_del(self, path:str):
         with self.path_to_node_cache_lock:

From 1ef5657d8c5192c81dc4bde8a1f02c74951b7bce Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Sun, 22 Jan 2017 12:07:07 -0500
Subject: [PATCH 33/63] add elements to the resolve cache on creation

---
 acdcli/acd_fuse.py    | 11 +++++------
 acdcli/cache/query.py |  4 ++++
 acdcli/cache/sync.py  |  6 +++---
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 4f76370..1c4d21b 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -597,6 +597,7 @@ def mkdir(self, path, mode):
         else:
             self.cache.insert_node(r, flush_cache=False)
             node = self.cache.get_node(r['id'])
+            self.cache.resolve_cache_add(path, node)
             self._chmod(node, mode)
 
     def _trash(self, path):
@@ -614,11 +615,8 @@ def _trash(self, path):
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            if node.is_file:
-                self.cache.insert_node(r, flush_cache=False)
-                self.cache.cache_del(path)
-            else:
-                self.cache.insert_node(r)
+            self.cache.insert_node(r, not node.is_file)
+            self.cache.resolve_cache_del(path)
 
     def rmdir(self, path):
         """Moves a directory into ACD trash."""
@@ -643,6 +641,7 @@ def create(self, path, mode) -> int:
             r = self.acd_client.create_file(name, p.id)
             self.cache.insert_node(r, flush_cache=False)
             node = self.cache.get_node(r['id'])
+            self.cache.resolve_cache_add(path, node)
         except RequestError as e:
             # file all ready exists, see what we know about it since the
             # cache may be out of sync or amazon missed a rename
@@ -702,7 +701,7 @@ def rename(self, old, new):
             self._move(node.id, ndir.id, not node.is_file)
 
         if node.is_file:
-            self.cache.cache_del(old)
+            self.cache.resolve_cache_del(old)
 
     def _rename(self, id, name, flush_cache:bool=True):
         try:
diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index 795e37f..487982c 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -142,6 +142,10 @@ def simple_name(self):
 
 
 class QueryMixin(object):
+    def resolve_cache_add(self, path:str, node:Node):
+        with self.path_to_node_cache_lock:
+            self.path_to_node_cache[path] = node
+
     def get_node(self, id) -> 'Union[Node|None]':
         with cursor(self._conn) as c:
             c.execute(NODE_BY_ID_SQL, [id])
diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py
index f7d9317..c2a3f6d 100644
--- a/acdcli/cache/sync.py
+++ b/acdcli/cache/sync.py
@@ -47,11 +47,11 @@ def remove_purged(self, purged: list):
 
         logger.info('Purged %i node(s).' % len(purged))
 
-    def cache_flush(self):
+    def resolve_cache_flush(self):
         with self.path_to_node_cache_lock:
             self.path_to_node_cache.clear()
 
-    def cache_del(self, path:str):
+    def resolve_cache_del(self, path:str):
         with self.path_to_node_cache_lock:
             try: del self.path_to_node_cache[path]
             except: pass
@@ -60,7 +60,7 @@ def insert_nodes(self, nodes: list, partial:bool=True, flush_cache:bool=True):
         """Inserts mixed list of files and folders into cache."""
 
         if flush_cache:
-            self.cache_flush()
+            self.resolve_cache_flush()
 
         files = []
         folders = []

From 067a8872cec740896804f965b6e6d1679ca039a0 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Tue, 24 Jan 2017 17:48:52 -0500
Subject: [PATCH 34/63] The entirety of resolve caching can be removed if we
 apply gerph's PR. Thanks gerph!

---
 acdcli/acd_fuse.py     | 28 +++++++++++-----------------
 acdcli/cache/db.py     |  5 -----
 acdcli/cache/query.py  | 23 -----------------------
 acdcli/cache/schema.py | 18 +++++++++++++++---
 acdcli/cache/sync.py   | 18 +++---------------
 docs/contributors.rst  |  2 ++
 6 files changed, 31 insertions(+), 63 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 1c4d21b..026a530 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -263,7 +263,7 @@ def _write_and_sync(self, buffer: WriteBuffer, node_id: str):
         except (RequestError, IOError) as e:
             logger.error('Error writing node "%s". %s' % (node_id, str(e)))
         else:
-            self.cache.insert_node(r, flush_cache=False)
+            self.cache.insert_node(r)
 
     def read(self, node_id, fh, offset, length: int):
         b = self.buffers.get(node_id)
@@ -595,9 +595,8 @@ def mkdir(self, path, mode):
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r, flush_cache=False)
+            self.cache.insert_node(r)
             node = self.cache.get_node(r['id'])
-            self.cache.resolve_cache_add(path, node)
             self._chmod(node, mode)
 
     def _trash(self, path):
@@ -615,8 +614,7 @@ def _trash(self, path):
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r, not node.is_file)
-            self.cache.resolve_cache_del(path)
+            self.cache.insert_node(r)
 
     def rmdir(self, path):
         """Moves a directory into ACD trash."""
@@ -639,9 +637,8 @@ def create(self, path, mode) -> int:
 
         try:
             r = self.acd_client.create_file(name, p.id)
-            self.cache.insert_node(r, flush_cache=False)
+            self.cache.insert_node(r)
             node = self.cache.get_node(r['id'])
-            self.cache.resolve_cache_add(path, node)
         except RequestError as e:
             # file all ready exists, see what we know about it since the
             # cache may be out of sync or amazon missed a rename
@@ -691,33 +688,30 @@ def rename(self, old, new):
                 raise FuseOSError(errno.EEXIST)
 
         if new_bn != old_bn:
-            self._rename(node.id, new_bn, not node.is_file)
+            self._rename(node.id, new_bn)
 
         if new_dn != old_dn:
             # odir_id = self.cache.resolve_path(old_dn, False)
             ndir = self.cache.resolve(new_dn, False)
             if not ndir:
                 raise FuseOSError(errno.ENOTDIR)
-            self._move(node.id, ndir.id, not node.is_file)
+            self._move(node.id, ndir.id)
 
-        if node.is_file:
-            self.cache.resolve_cache_del(old)
-
-    def _rename(self, id, name, flush_cache:bool=True):
+    def _rename(self, id, name):
         try:
             r = self.acd_client.rename_node(id, name)
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r, flush_cache=flush_cache)
+            self.cache.insert_node(r)
 
-    def _move(self, id, new_folder, flush_cache:bool=True):
+    def _move(self, id, new_folder):
         try:
             r = self.acd_client.move_node(id, new_folder)
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r, flush_cache=flush_cache)
+            self.cache.insert_node(r)
 
     def open(self, path, flags) -> int:
         """Opens a file.
@@ -774,7 +768,7 @@ def truncate(self, path, length, fh=None):
             except RequestError as e:
                 raise FuseOSError.convert(e)
             else:
-                self.cache.insert_node(r, flush_cache=False)
+                self.cache.insert_node(r)
 
         """No good way to deal with positive lengths at the moment; since we can only do
         something about it in the middle of writing, this means the only use case we can
diff --git a/acdcli/cache/db.py b/acdcli/cache/db.py
index b7af974..2f78ddb 100644
--- a/acdcli/cache/db.py
+++ b/acdcli/cache/db.py
@@ -69,11 +69,6 @@ def __init__(self, cache_path: str='', settings_path='', check=IntegrityCheckTyp
 
         self._conn.create_function('REGEXP', _regex_match.__code__.co_argcount, _regex_match)
 
-        self.path_to_node_cache = {}
-        self.path_to_node_cache_lock = Lock()
-        """There are a huge number of repeated path lookups,
-        so cache results and invalidate on new nodes."""
-
         with cursor(self._conn) as c:
             c.execute(_ROOT_ID_SQL)
             row = c.fetchone()
diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index 487982c..e79c51b 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -142,10 +142,6 @@ def simple_name(self):
 
 
 class QueryMixin(object):
-    def resolve_cache_add(self, path:str, node:Node):
-        with self.path_to_node_cache_lock:
-            self.path_to_node_cache[path] = node
-
     def get_node(self, id) -> 'Union[Node|None]':
         with cursor(self._conn) as c:
             c.execute(NODE_BY_ID_SQL, [id])
@@ -165,17 +161,6 @@ def get_conflicting_node(self, name: str, parent_id: str):
                 return Node(r)
 
     def resolve(self, path: str, trash=False) -> 'Union[Node|None]':
-        """Gets a node from a path"""
-        with self.path_to_node_cache_lock:
-            try: return self.path_to_node_cache[path]
-            except: pass
-            n = self._resolve(path,trash)
-            if n:
-                self.path_to_node_cache[path] = n
-                return n
-            return None
-
-    def _resolve(self, path: str, trash=False) -> 'Union[Node|None]':
         segments = list(filter(bool, path.split('/')))
         if not segments:
             if not self.root_id:
@@ -281,14 +266,6 @@ def list_children(self, folder_id, trash=False, folder_path=None) -> 'Tuple[List
                         folders.append(node)
                 node = c.fetchone()
 
-        """If the caller provides the folder_path, we can add all the children to the
-        path->node_id cache for faster lookup after a directory listing"""
-        if folder_path:
-            children = folders + files
-            with self.path_to_node_cache_lock:
-                for c in children:
-                    self.path_to_node_cache[folder_path + '/' + c.name] = c
-
         return folders, files
 
     def list_trashed_children(self, folder_id) -> 'Tuple[List[Node], List[Node]]':
diff --git a/acdcli/cache/schema.py b/acdcli/cache/schema.py
index 9939af1..71b5c5e 100644
--- a/acdcli/cache/schema.py
+++ b/acdcli/cache/schema.py
@@ -61,8 +61,9 @@
         FOREIGN KEY(child) REFERENCES nodes (id)
     );
 
+    CREATE INDEX ix_parentage_child ON parentage(child);
     CREATE INDEX ix_nodes_names ON nodes(name);
-    PRAGMA user_version = 2;
+    PRAGMA user_version = 3;
     """
 
 _GEN_DROP_TABLES_SQL = \
@@ -88,12 +89,23 @@ def _1_to_2(conn):
     conn.commit()
 
 
-_migrations = [_0_to_1, _1_to_2]
+def _2_to_3(conn):
+    conn.executescript(
+        'CREATE INDEX IF NOT EXISTS ix_parentage_child ON parentage(child);'
+        # Having changed the schema, the queries can be optimised differently.
+        # In order to be aware of that, re-analyze the type of data and indexes,
+        # allowing SQLite3 to make better decisions.
+        'ANALYZE;'
+        'PRAGMA user_version = 3;'
+    )
+    conn.commit()
+
+_migrations = [_0_to_1, _1_to_2, _2_to_3]
 """list of all migrations from index -> index+1"""
 
 
 class SchemaMixin(object):
-    _DB_SCHEMA_VER = 2
+    _DB_SCHEMA_VER = 3
 
     def init(self):
         try:
diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py
index c2a3f6d..393c551 100644
--- a/acdcli/cache/sync.py
+++ b/acdcli/cache/sync.py
@@ -47,21 +47,9 @@ def remove_purged(self, purged: list):
 
         logger.info('Purged %i node(s).' % len(purged))
 
-    def resolve_cache_flush(self):
-        with self.path_to_node_cache_lock:
-            self.path_to_node_cache.clear()
-
-    def resolve_cache_del(self, path:str):
-        with self.path_to_node_cache_lock:
-            try: del self.path_to_node_cache[path]
-            except: pass
-
-    def insert_nodes(self, nodes: list, partial:bool=True, flush_cache:bool=True):
+    def insert_nodes(self, nodes: list, partial:bool=True):
         """Inserts mixed list of files and folders into cache."""
 
-        if flush_cache:
-            self.resolve_cache_flush()
-
         files = []
         folders = []
         for node in nodes:
@@ -88,11 +76,11 @@ def insert_nodes(self, nodes: list, partial:bool=True, flush_cache:bool=True):
         self.insert_parentage(files + folders, partial)
         self.insert_properties(files + folders)
 
-    def insert_node(self, node:dict, flush_cache:bool=True):
+    def insert_node(self, node:dict):
         """Inserts single file or folder into cache."""
         if not node:
             return
-        self.insert_nodes([node], flush_cache=flush_cache)
+        self.insert_nodes([node])
 
     def insert_folders(self, folders: list):
         """ Inserts list of folders into cache. Sets 'update' column to current date.
diff --git a/docs/contributors.rst b/docs/contributors.rst
index ce1944a..89cd829 100644
--- a/docs/contributors.rst
+++ b/docs/contributors.rst
@@ -23,6 +23,8 @@ Thanks to
 
 - `memoz <https://github.com/memoz>`_ for amending proxy documentation
 
+- `gerph <https://github.com/gerph>`_ for making file searches faster, particularly on large repositories
+
 Also thanks to
 
 - `fibersnet <https://github.com/fibersnet>`_ for pointing out a possible deadlock in ACDFuse.

From cc478117b5bd516974eebf264ed568745a211672 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Tue, 24 Jan 2017 22:32:13 -0500
Subject: [PATCH 35/63] turns out it's faster with both

---
 acdcli/acd_fuse.py    | 29 ++++++++++++++++-------------
 acdcli/cache/db.py    |  5 +++++
 acdcli/cache/query.py | 20 ++++++++++++++++++++
 acdcli/cache/sync.py  | 15 ++++++++++++---
 4 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 026a530..738b64f 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -263,7 +263,7 @@ def _write_and_sync(self, buffer: WriteBuffer, node_id: str):
         except (RequestError, IOError) as e:
             logger.error('Error writing node "%s". %s' % (node_id, str(e)))
         else:
-            self.cache.insert_node(r)
+            self.cache.insert_node(r, flush_resolve_cache=False)
 
     def read(self, node_id, fh, offset, length: int):
         b = self.buffers.get(node_id)
@@ -595,7 +595,7 @@ def mkdir(self, path, mode):
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r)
+            self.cache.insert_node(r, flush_resolve_cache=False)
             node = self.cache.get_node(r['id'])
             self._chmod(node, mode)
 
@@ -614,7 +614,8 @@ def _trash(self, path):
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r)
+            self.cache.insert_node(r, flush_resolve_cache=node.is_folder)
+            self.cache.resolve_cache_del(path)
 
     def rmdir(self, path):
         """Moves a directory into ACD trash."""
@@ -637,7 +638,7 @@ def create(self, path, mode) -> int:
 
         try:
             r = self.acd_client.create_file(name, p.id)
-            self.cache.insert_node(r)
+            self.cache.insert_node(r, flush_resolve_cache=False)
             node = self.cache.get_node(r['id'])
         except RequestError as e:
             # file all ready exists, see what we know about it since the
@@ -687,31 +688,33 @@ def rename(self, old, new):
             else:
                 raise FuseOSError(errno.EEXIST)
 
+        self.cache.resolve_cache_del(old)
+
         if new_bn != old_bn:
-            self._rename(node.id, new_bn)
+            self._rename(node, new_bn)
 
         if new_dn != old_dn:
             # odir_id = self.cache.resolve_path(old_dn, False)
             ndir = self.cache.resolve(new_dn, False)
             if not ndir:
                 raise FuseOSError(errno.ENOTDIR)
-            self._move(node.id, ndir.id)
+            self._move(node, ndir.id)
 
-    def _rename(self, id, name):
+    def _rename(self, node, name):
         try:
-            r = self.acd_client.rename_node(id, name)
+            r = self.acd_client.rename_node(node.id, name)
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r)
+            self.cache.insert_node(r, flush_resolve_cache=node.is_folder)
 
-    def _move(self, id, new_folder):
+    def _move(self, node, new_folder):
         try:
-            r = self.acd_client.move_node(id, new_folder)
+            r = self.acd_client.move_node(node.id, new_folder)
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r)
+            self.cache.insert_node(r, flush_resolve_cache=node.is_folder)
 
     def open(self, path, flags) -> int:
         """Opens a file.
@@ -768,7 +771,7 @@ def truncate(self, path, length, fh=None):
             except RequestError as e:
                 raise FuseOSError.convert(e)
             else:
-                self.cache.insert_node(r)
+                self.cache.insert_node(r, flush_resolve_cache=False)
 
         """No good way to deal with positive lengths at the moment; since we can only do
         something about it in the middle of writing, this means the only use case we can
diff --git a/acdcli/cache/db.py b/acdcli/cache/db.py
index 2f78ddb..b7af974 100644
--- a/acdcli/cache/db.py
+++ b/acdcli/cache/db.py
@@ -69,6 +69,11 @@ def __init__(self, cache_path: str='', settings_path='', check=IntegrityCheckTyp
 
         self._conn.create_function('REGEXP', _regex_match.__code__.co_argcount, _regex_match)
 
+        self.path_to_node_cache = {}
+        self.path_to_node_cache_lock = Lock()
+        """There are a huge number of repeated path lookups,
+        so cache results and invalidate on new nodes."""
+
         with cursor(self._conn) as c:
             c.execute(_ROOT_ID_SQL)
             row = c.fetchone()
diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index e79c51b..1955dd6 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -161,6 +161,19 @@ def get_conflicting_node(self, name: str, parent_id: str):
                 return Node(r)
 
     def resolve(self, path: str, trash=False) -> 'Union[Node|None]':
+        """Gets a node from a path"""
+        with self.path_to_node_cache_lock:
+            try:
+                return self.path_to_node_cache[path]
+            except:
+                pass
+            n = self._resolve(path, trash)
+            if n:
+                self.path_to_node_cache[path] = n
+                return n
+            return None
+
+    def _resolve(self, path: str, trash=False) -> 'Union[Node|None]':
         segments = list(filter(bool, path.split('/')))
         if not segments:
             if not self.root_id:
@@ -266,6 +279,13 @@ def list_children(self, folder_id, trash=False, folder_path=None) -> 'Tuple[List
                         folders.append(node)
                 node = c.fetchone()
 
+        """If the caller provides the folder_path, we can add all the children to the
+        path->node_id cache for faster lookup after a directory listing"""
+        if folder_path:
+            with self.path_to_node_cache_lock:
+                for c in folders + files:
+                    self.path_to_node_cache[folder_path + '/' + c.name] = c
+
         return folders, files
 
     def list_trashed_children(self, folder_id) -> 'Tuple[List[Node], List[Node]]':
diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py
index 393c551..7de6b2d 100644
--- a/acdcli/cache/sync.py
+++ b/acdcli/cache/sync.py
@@ -47,9 +47,18 @@ def remove_purged(self, purged: list):
 
         logger.info('Purged %i node(s).' % len(purged))
 
-    def insert_nodes(self, nodes: list, partial:bool=True):
+    def resolve_cache_del(self, path:str):
+        with self.path_to_node_cache_lock:
+            try: del self.path_to_node_cache[path]
+            except:pass
+
+    def insert_nodes(self, nodes: list, partial:bool=True, flush_resolve_cache:bool=False):
         """Inserts mixed list of files and folders into cache."""
 
+        if flush_resolve_cache:
+            with self.path_to_node_cache_lock:
+                self.path_to_node_cache.clear()
+
         files = []
         folders = []
         for node in nodes:
@@ -76,11 +85,11 @@ def insert_nodes(self, nodes: list, partial:bool=True):
         self.insert_parentage(files + folders, partial)
         self.insert_properties(files + folders)
 
-    def insert_node(self, node:dict):
+    def insert_node(self, node:dict, flush_resolve_cache:bool=False):
         """Inserts single file or folder into cache."""
         if not node:
             return
-        self.insert_nodes([node])
+        self.insert_nodes([node], flush_resolve_cache=flush_resolve_cache)
 
     def insert_folders(self, folders: list):
         """ Inserts list of folders into cache. Sets 'update' column to current date.

From 2b75256756bfb2c5d33efbe709ce42ffd2d02af3 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Sat, 28 Jan 2017 21:48:31 -0500
Subject: [PATCH 36/63] cache node ids instead of nodes to fix 0 file size
 issues

---
 acdcli/acd_fuse.py    | 141 ++++++++++++++++++++++--------------------
 acdcli/cache/db.py    |   6 +-
 acdcli/cache/query.py |  23 +++++--
 acdcli/cache/sync.py  |   8 +--
 4 files changed, 98 insertions(+), 80 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 738b64f..d9eaef8 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -308,7 +308,7 @@ def __call__(self, op, path, *args):
         elif op == 'chmod':
             targs = (oct(args[0]),) + args[1:]
         elif op == 'setxattr':
-            targs = (len(args[0]),) + args[1:]
+            targs = (args[0],) + (len(args[1]),)
 
         logger.debug('-> %s %s %s', op, path, repr(args if not targs else targs))
 
@@ -365,8 +365,8 @@ def __init__(self, **kwargs):
         """manually calculated available disk space"""
         self.fh = 1
         """file handle counter\n\n :type: int"""
-        self.handles = {}
-        """map fh->node\n\n :type: dict"""
+        self.fh_to_node = {}
+        """map fh->node_id\n\n :type: dict"""
         self.node_to_fh = defaultdict(lambda: set())
         """map node_id to list of interested file handles"""
         self.fh_lock = Lock()
@@ -411,7 +411,8 @@ def getattr(self, path, fh=None) -> dict:
         Calculates correct number of links for folders if :attr:`nlinks` is set."""
 
         if fh:
-            node = self.handles[fh]
+            node_id = self.fh_to_node[fh]
+            node = self.cache.get_node(node_id)
         else:
             node = self.cache.resolve(path)
         if not node:
@@ -457,14 +458,14 @@ def _getattr(self, node, fh=None) -> dict:
                         st_nlink=self.cache.num_parents(node.id) if self.nlinks else 1,
                         st_size=size,
                         st_blksize=self.blksize,
-                        st_blocks=(node.size + 511) // 512,
+                        st_blocks=(size + 511) // 512,
                         **attrs)
 
     def listxattr(self, path):
-        node = self.cache.resolve(path)
-        if not node:
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
             raise FuseOSError(errno.ENOENT)
-        return self._listxattr(node.id)
+        return self._listxattr(node_id)
 
     def _listxattr(self, node_id):
         self._xattr_load(node_id)
@@ -475,10 +476,10 @@ def _listxattr(self, node_id):
                 return []
 
     def getxattr(self, path, name, position=0):
-        node = self.cache.resolve(path)
-        if not node:
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
             raise FuseOSError(errno.ENOENT)
-        return self._getxattr_bytes(node.id, name)
+        return self._getxattr_bytes(node_id, name)
 
     def _getxattr(self, node_id, name):
         self._xattr_load(node_id)
@@ -496,10 +497,10 @@ def _getxattr_bytes(self, node_id, name):
         return binascii.a2b_base64(self._getxattr(node_id, name))
 
     def removexattr(self, path, name):
-        node = self.cache.resolve(path)
-        if not node:
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
             raise FuseOSError(errno.ENOENT)
-        self._removexattr(node.id, name)
+        self._removexattr(node_id, name)
 
     def _removexattr(self, node_id, name):
         self._xattr_load(node_id)
@@ -509,10 +510,10 @@ def _removexattr(self, node_id, name):
                 self.properties_dirty.add(node_id)
 
     def setxattr(self, path, name, value, options, position=0):
-        node = self.cache.resolve(path)
-        if not node:
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
             raise FuseOSError(errno.ENOENT)
-        self._setxattr_bytes(node.id, name, value)
+        self._setxattr_bytes(node_id, name, value)
 
     def _setxattr(self, node_id, name, value):
         self._xattr_load(node_id)
@@ -551,21 +552,25 @@ def read(self, path, length, offset, fh=None) -> bytes:
         """Read ```length`` bytes from ``path`` at ``offset``."""
 
         if fh:
-            node = self.handles[fh]
+            node_id = self.fh_to_node[fh]
+            node = self.cache.get_node(node_id)
         else:
             node = self.cache.resolve(path, trash=False)
         if not node:
             raise FuseOSError(errno.ENOENT)
 
-        if node.size <= offset:
+        size = self.wp.length(node.id, fh)
+        if size is None: size = node.size
+
+        if size <= offset:
             return b''
 
-        if node.size < offset + length:
-            length = node.size - offset
+        if size < offset + length:
+            length = size - offset
 
         """If we attempt to read something we just wrote, give it back"""
         ret = self.wp.read(node.id, fh, offset, length)
-        if ret and len(ret) == length:
+        if ret is not None:
             return ret
 
         return self.rp.get(node.id, offset, length, node.size)
@@ -586,12 +591,12 @@ def mkdir(self, path, mode):
 
         name = os.path.basename(path)
         ppath = os.path.dirname(path)
-        p = self.cache.resolve(ppath)
-        if not p:
+        p_id = self.cache.resolve_id(ppath)
+        if not p_id:
             raise FuseOSError(errno.ENOTDIR)
 
         try:
-            r = self.acd_client.create_folder(name, p.id)
+            r = self.acd_client.create_folder(name, p_id)
         except RequestError as e:
             FuseOSError.convert(e)
         else:
@@ -632,14 +637,14 @@ def create(self, path, mode) -> int:
 
         name = os.path.basename(path)
         ppath = os.path.dirname(path)
-        p = self.cache.resolve(ppath, False)
-        if not p:
+        p_id = self.cache.resolve_id(ppath, False)
+        if not p_id:
             raise FuseOSError(errno.ENOTDIR)
 
         try:
-            r = self.acd_client.create_file(name, p.id)
+            r = self.acd_client.create_file(name, p_id)
             self.cache.insert_node(r, flush_resolve_cache=False)
-            node = self.cache.get_node(r['id'])
+            node_id = r['id']
         except RequestError as e:
             # file all ready exists, see what we know about it since the
             # cache may be out of sync or amazon missed a rename
@@ -655,12 +660,12 @@ def create(self, path, mode) -> int:
             FuseOSError.convert(e)
 
         if mode is not None:
-            self._chmod(node, mode)
+            self._setxattr(node_id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFREG | (stat.S_IMODE(mode)))
 
         with self.fh_lock:
             self.fh += 1
-            self.handles[self.fh] = node
-            self.node_to_fh[node.id].add(self.fh)
+            self.fh_to_node[self.fh] = node_id
+            self.node_to_fh[node_id].add(self.fh)
         return self.fh
 
     def rename(self, old, new):
@@ -725,13 +730,13 @@ def open(self, path, flags) -> int:
         if (flags & os.O_APPEND) == os.O_APPEND:
             raise FuseOSError(errno.EFAULT)
 
-        node = self.cache.resolve(path, False)
-        if not node:
+        node_id = self.cache.resolve_id(path, False)
+        if not node_id:
             raise FuseOSError(errno.ENOENT)
         with self.fh_lock:
             self.fh += 1
-            self.handles[self.fh] = node
-            self.node_to_fh[node.id].add(self.fh)
+            self.fh_to_node[self.fh] = node_id
+            self.node_to_fh[node_id].add(self.fh)
         return self.fh
 
     def write(self, path, data, offset, fh) -> int:
@@ -739,18 +744,18 @@ def write(self, path, data, offset, fh) -> int:
 
         :returns: number of bytes written"""
 
-        node_id = self.handles[fh].id
+        node_id = self.fh_to_node[fh]
         self.wp.write(node_id, fh, offset, data)
         return len(data)
 
     def flush(self, path, fh):
         if fh:
-            node = self.handles[fh]
+            node_id = self.fh_to_node[fh]
         else:
-            node = self.cache.resolve(path)
-        if not node:
+            node_id = self.cache.resolve_id(path)
+        if not node_id:
             raise FuseOSError(errno.ENOENT)
-        self.wp.flush(node.id, fh)
+        self.wp.flush(node_id, fh)
 
     def truncate(self, path, length, fh=None):
         """Pseudo-truncates a file, i.e. clears content if ``length``==0 or does nothing
@@ -759,15 +764,15 @@ def truncate(self, path, length, fh=None):
         :raises FuseOSError: if pseudo-truncation to length is not supported"""
 
         if fh:
-            node = self.handles[fh]
+            node_id = self.fh_to_node[fh]
         else:
-            node = self.cache.resolve(path)
-        if not node:
+            node_id = self.cache.resolve_id(path)
+        if not node_id:
             raise FuseOSError(errno.ENOENT)
 
         if length == 0:
             try:
-                r = self.acd_client.clear_file(node.id)
+                r = self.acd_client.clear_file(node_id)
             except RequestError as e:
                 raise FuseOSError.convert(e)
             else:
@@ -782,24 +787,24 @@ def release(self, path, fh):
         """Releases an open ``path``."""
 
         if fh:
-            node = self.handles[fh]
+            node_id = self.fh_to_node[fh]
         else:
-            node = self.cache.resolve(path, trash=False)
-        if node:
-            self.rp.release(node.id)
+            node_id = self.cache.resolve_id(path)
+        if node_id:
+            self.rp.release(node_id)
             with self.fh_lock:
                 """release the writer if there's no more interest. This allows many file
                 handles to write to a single node provided they do it in order, enabling
                 sequential writes using mmap.
                 """
-                interest = self.node_to_fh.get(node.id)
+                interest = self.node_to_fh.get(node_id)
                 if interest:
                     interest.discard(fh)
                 if not interest:
-                    self.wp.release(node.id, fh)
+                    self.wp.release(node_id, fh)
                     self._xattr_write_and_sync()
-                    del self.node_to_fh[node.id]
-                del self.handles[fh]
+                    del self.node_to_fh[node_id]
+                del self.fh_to_node[fh]
         else:
             raise FuseOSError(errno.ENOENT)
 
@@ -810,8 +815,8 @@ def utimens(self, path, times=None):
 
         :param times: [atime, mtime]"""
 
-        node = self.cache.resolve(path)
-        if not node:
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
             raise FuseOSError(errno.ENOENT)
 
         if times:
@@ -822,7 +827,7 @@ def utimens(self, path, times=None):
             mtime = time()
 
         try:
-            self._setxattr(node.id, _XATTR_MTIME_OVERRIDE_NAME, mtime)
+            self._setxattr(node_id, _XATTR_MTIME_OVERRIDE_NAME, mtime)
             self._xattr_write_and_sync()
         except:
             raise FuseOSError(errno.ENOTSUP)
@@ -843,30 +848,30 @@ def _chmod(self, node, mode):
         return 0
 
     def chown(self, path, uid, gid):
-        node = self.cache.resolve(path)
-        if not node:
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
             raise FuseOSError(errno.ENOENT)
-        return self._chown(node, uid, gid)
+        return self._chown(node_id, uid, gid)
 
-    def _chown(self, node, uid, gid):
-        if uid != -1: self._setxattr(node.id, _XATTR_UID_OVERRIDE_NAME, uid)
-        if gid != -1: self._setxattr(node.id, _XATTR_GID_OVERRIDE_NAME, gid)
+    def _chown(self, node_id, uid, gid):
+        if uid != -1: self._setxattr(node_id, _XATTR_UID_OVERRIDE_NAME, uid)
+        if gid != -1: self._setxattr(node_id, _XATTR_GID_OVERRIDE_NAME, gid)
         self._xattr_write_and_sync()
         return 0
 
     def symlink(self, target, source):
         fh = self.create(target, None)
-        node = self.handles[fh]
-        self._setxattr(node.id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFLNK | 0o0777)
-        self._setxattr(node.id, _XATTR_SYMLINK_OVERRIDE_NAME, source)
+        node_id = self.fh_to_node[fh]
+        self._setxattr(node_id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFLNK | 0o0777)
+        self._setxattr(node_id, _XATTR_SYMLINK_OVERRIDE_NAME, source)
         self.release(target, fh)
         return 0
 
     def readlink(self, path):
-        node = self.cache.resolve(path)
-        if not node:
+        node_id = self.cache.resolve_id(path)
+        if not node_id:
             raise FuseOSError(errno.ENOENT)
-        source = self._getxattr(node.id, _XATTR_SYMLINK_OVERRIDE_NAME)
+        source = self._getxattr(node_id, _XATTR_SYMLINK_OVERRIDE_NAME)
         return source
 
 
diff --git a/acdcli/cache/db.py b/acdcli/cache/db.py
index b7af974..304d60b 100644
--- a/acdcli/cache/db.py
+++ b/acdcli/cache/db.py
@@ -69,10 +69,10 @@ def __init__(self, cache_path: str='', settings_path='', check=IntegrityCheckTyp
 
         self._conn.create_function('REGEXP', _regex_match.__code__.co_argcount, _regex_match)
 
-        self.path_to_node_cache = {}
-        self.path_to_node_cache_lock = Lock()
+        self.path_to_node_id_cache = {}
+        self.path_to_node_id_cache_lock = Lock()
         """There are a huge number of repeated path lookups,
-        so cache results and invalidate on new nodes."""
+        so cache results and selectively invalidate."""
 
         with cursor(self._conn) as c:
             c.execute(_ROOT_ID_SQL)
diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index 1955dd6..6a9eb7d 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -160,16 +160,29 @@ def get_conflicting_node(self, name: str, parent_id: str):
             if r:
                 return Node(r)
 
+    def resolve_id(self, path: str, trash=False) -> str:
+        with self.path_to_node_id_cache_lock:
+            try:
+                return self.path_to_node_id_cache[path]
+            except:
+                pass
+            n = self._resolve(path, trash)
+            if n:
+                self.path_to_node_id_cache[path] = n.id
+                return n.id
+            return None
+
     def resolve(self, path: str, trash=False) -> 'Union[Node|None]':
         """Gets a node from a path"""
-        with self.path_to_node_cache_lock:
+        with self.path_to_node_id_cache_lock:
             try:
-                return self.path_to_node_cache[path]
+                node_id = self.path_to_node_id_cache[path]
+                return self.get_node(node_id)
             except:
                 pass
             n = self._resolve(path, trash)
             if n:
-                self.path_to_node_cache[path] = n
+                self.path_to_node_id_cache[path] = n.id
                 return n
             return None
 
@@ -282,9 +295,9 @@ def list_children(self, folder_id, trash=False, folder_path=None) -> 'Tuple[List
         """If the caller provides the folder_path, we can add all the children to the
         path->node_id cache for faster lookup after a directory listing"""
         if folder_path:
-            with self.path_to_node_cache_lock:
+            with self.path_to_node_id_cache_lock:
                 for c in folders + files:
-                    self.path_to_node_cache[folder_path + '/' + c.name] = c
+                    self.path_to_node_id_cache[folder_path + '/' + c.name] = c.id
 
         return folders, files
 
diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py
index 7de6b2d..12a9e9c 100644
--- a/acdcli/cache/sync.py
+++ b/acdcli/cache/sync.py
@@ -48,16 +48,16 @@ def remove_purged(self, purged: list):
         logger.info('Purged %i node(s).' % len(purged))
 
     def resolve_cache_del(self, path:str):
-        with self.path_to_node_cache_lock:
-            try: del self.path_to_node_cache[path]
+        with self.path_to_node_id_cache_lock:
+            try: del self.path_to_node_id_cache[path]
             except:pass
 
     def insert_nodes(self, nodes: list, partial:bool=True, flush_resolve_cache:bool=False):
         """Inserts mixed list of files and folders into cache."""
 
         if flush_resolve_cache:
-            with self.path_to_node_cache_lock:
-                self.path_to_node_cache.clear()
+            with self.path_to_node_id_cache_lock:
+                self.path_to_node_id_cache.clear()
 
         files = []
         folders = []

From 4c191846ab8ec8d21e1fa655aa3c002bf876d76d Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Fri, 3 Feb 2017 20:14:46 -0500
Subject: [PATCH 37/63] cache write buffer length so long uploads won't hold up
 getattr calls

---
 acdcli/acd_fuse.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index d9eaef8..2b389a7 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -225,6 +225,7 @@ def __init__(self, buffer_size):
             self.f = tempfile.SpooledTemporaryFile(max_size=buffer_size)
             self.lock = Lock()
             self.dirty = True
+            self.len = 0
 
         def read(self, offset, length: int):
             with self.lock:
@@ -234,19 +235,17 @@ def read(self, offset, length: int):
         def write(self, offset, bytes_: bytes):
             with self.lock:
                 self.dirty = True
-                self.f.seek(0, os.SEEK_END)
-                old_len = self.f.tell()
-                if offset > old_len:
+                if offset > self.len:
                     logger.error('Wrong offset for writing to buffer; writing gap detected')
                     raise FuseOSError(errno.ESPIPE)
                 self.f.seek(offset)
-                self.f.write(bytes_)
-                return old_len
+                ret = self.f.write(bytes_)
+                self.f.seek(0, os.SEEK_END)
+                self.len = self.f.tell()
+                return ret
 
         def length(self):
-            with self.lock:
-                self.f.seek(0, os.SEEK_END)
-                return self.f.tell()
+            return self.len
 
         def get_file(self):
             """Return the file for direct access. Be sure to lock from the outside when doing so"""
@@ -424,7 +423,7 @@ def _getattr(self, node, fh=None) -> dict:
         except: mtime = node.modified.timestamp()
 
         size = self.wp.length(node.id, fh)
-        if not size: size = node.size
+        if size is None: size = node.size
 
         try: uid = self._getxattr(node.id, _XATTR_UID_OVERRIDE_NAME)
         except: uid = self.uid

From 3b5044bfcd7e38d09513aba40b77ebc9d8025818 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Mon, 6 Feb 2017 00:10:13 -0500
Subject: [PATCH 38/63] hit sqlite less

---
 acdcli/acd_fuse.py   | 11 ++++++++---
 acdcli/api/common.py |  1 +
 acdcli/cache/sync.py |  4 ++++
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 2b389a7..6c81506 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -544,7 +544,6 @@ def _xattr_write_and_sync(self):
                     logger.error('Error writing node xattrs "%s". %s' % (node_id, str(e)))
                 else:
                     self.cache.insert_property(node_id, self.acd_client_owner, _XATTR_PROPERTY_NAME, xattrs_str)
-                    logger.debug('_xattr_write_and_sync: node: %s xattrs: %s: ' % (node_id, xattrs_str))
             self.xattr_dirty.clear()
 
     def read(self, path, length, offset, fh=None) -> bytes:
@@ -600,8 +599,11 @@ def mkdir(self, path, mode):
             FuseOSError.convert(e)
         else:
             self.cache.insert_node(r, flush_resolve_cache=False)
-            node = self.cache.get_node(r['id'])
-            self._chmod(node, mode)
+            node_id = r['id']
+            self.cache.resolve_cache_add(path, node_id)
+            if mode is not None:
+                self._setxattr(node_id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFDIR | (stat.S_IMODE(mode)))
+                self._xattr_write_and_sync()
 
     def _trash(self, path):
         logger.debug('trash %s' % path)
@@ -644,6 +646,7 @@ def create(self, path, mode) -> int:
             r = self.acd_client.create_file(name, p_id)
             self.cache.insert_node(r, flush_resolve_cache=False)
             node_id = r['id']
+            self.cache.resolve_cache_add(path, node_id)
         except RequestError as e:
             # file all ready exists, see what we know about it since the
             # cache may be out of sync or amazon missed a rename
@@ -704,6 +707,8 @@ def rename(self, old, new):
                 raise FuseOSError(errno.ENOTDIR)
             self._move(node, ndir.id)
 
+        self.cache.resolve_cache_add(new, node.id)
+
     def _rename(self, node, name):
         try:
             r = self.acd_client.rename_node(node.id, name)
diff --git a/acdcli/api/common.py b/acdcli/api/common.py
index b0ddd53..d6e2246 100644
--- a/acdcli/api/common.py
+++ b/acdcli/api/common.py
@@ -16,6 +16,7 @@ class ReadTimeoutError(Exception):
 OK_CODES = [requests.codes.OK]
 RETRY_CODES = [requests.codes.server_error,
                requests.codes.gateway_timeout,
+               requests.codes.request_timeout,
                requests.codes.bad_request,
                requests.codes.service_unavailable]
 
diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py
index 12a9e9c..f1fdcf4 100644
--- a/acdcli/cache/sync.py
+++ b/acdcli/cache/sync.py
@@ -47,6 +47,10 @@ def remove_purged(self, purged: list):
 
         logger.info('Purged %i node(s).' % len(purged))
 
+    def resolve_cache_add(self, path:str, node_id:str):
+        with self.path_to_node_id_cache_lock:
+            self.path_to_node_id_cache[path] = node_id
+
     def resolve_cache_del(self, path:str):
         with self.path_to_node_id_cache_lock:
             try: del self.path_to_node_id_cache[path]

From 8c0e506a9dcc7133c831fae6876c5a5c53b24a73 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Fri, 10 Feb 2017 12:02:39 -0500
Subject: [PATCH 39/63] tidy and make flush a noop again

---
 acdcli/acd_fuse.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 6c81506..b4ef701 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -488,9 +488,8 @@ def _getxattr(self, node_id, name):
                 if ret is not None:
                     return ret
             except:
-                raise FuseOSError(errno.ENODATA)  # should be ENOATTR
-            else:
-                raise FuseOSError(errno.ENODATA)  # should be ENOATTR
+                pass
+            raise FuseOSError(errno.ENODATA)  # should be ENOATTR
 
     def _getxattr_bytes(self, node_id, name):
         return binascii.a2b_base64(self._getxattr(node_id, name))
@@ -748,18 +747,15 @@ def write(self, path, data, offset, fh) -> int:
 
         :returns: number of bytes written"""
 
-        node_id = self.fh_to_node[fh]
-        self.wp.write(node_id, fh, offset, data)
-        return len(data)
-
-    def flush(self, path, fh):
         if fh:
             node_id = self.fh_to_node[fh]
-        else:
-            node_id = self.cache.resolve_id(path)
+        # This is not resolving by path on purpose, since flushing to
+        # amazon is done on closing all interested file handles.
         if not node_id:
             raise FuseOSError(errno.ENOENT)
-        self.wp.flush(node_id, fh)
+
+        self.wp.write(node_id, fh, offset, data)
+        return len(data)
 
     def truncate(self, path, length, fh=None):
         """Pseudo-truncates a file, i.e. clears content if ``length``==0 or does nothing
@@ -798,8 +794,7 @@ def release(self, path, fh):
             self.rp.release(node_id)
             with self.fh_lock:
                 """release the writer if there's no more interest. This allows many file
-                handles to write to a single node provided they do it in order, enabling
-                sequential writes using mmap.
+                handles to write to a single node provided they do it in order.
                 """
                 interest = self.node_to_fh.get(node_id)
                 if interest:

From c5401ac76384e5cb46dd378a4e5f8368e41fdd85 Mon Sep 17 00:00:00 2001
From: Ben <benjamin.gemmill@gmail.com>
Date: Fri, 10 Feb 2017 12:02:48 -0500
Subject: [PATCH 40/63] credit

---
 docs/contributors.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/contributors.rst b/docs/contributors.rst
index 89cd829..b922a66 100644
--- a/docs/contributors.rst
+++ b/docs/contributors.rst
@@ -25,6 +25,8 @@ Thanks to
 
 - `gerph <https://github.com/gerph>`_ for making file searches faster, particularly on large repositories
 
+- `bgemmill <https://github.com/bgemmill>`_ for fuse write-back caching, xattrs, symlinks, and rsync support
+
 Also thanks to
 
 - `fibersnet <https://github.com/fibersnet>`_ for pointing out a possible deadlock in ACDFuse.

From a167351c292dda74c0b4045a8dc94b04cc36b131 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Tue, 14 Feb 2017 12:51:17 -0500
Subject: [PATCH 41/63] cache nodes by id for faster getattr/getxattr calls in
 large directories

---
 acdcli/cache/db.py    |  3 +-
 acdcli/cache/query.py | 50 +++++++++++++-----------
 acdcli/cache/sync.py  | 88 +++++++++++++++++++++++++++++++------------
 3 files changed, 94 insertions(+), 47 deletions(-)

diff --git a/acdcli/cache/db.py b/acdcli/cache/db.py
index 304d60b..d03f92e 100644
--- a/acdcli/cache/db.py
+++ b/acdcli/cache/db.py
@@ -69,8 +69,9 @@ def __init__(self, cache_path: str='', settings_path='', check=IntegrityCheckTyp
 
         self._conn.create_function('REGEXP', _regex_match.__code__.co_argcount, _regex_match)
 
+        self.node_id_to_node_cache = {}
         self.path_to_node_id_cache = {}
-        self.path_to_node_id_cache_lock = Lock()
+        self.node_cache_lock = Lock()
         """There are a huge number of repeated path lookups,
         so cache results and selectively invalidate."""
 
diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index 6a9eb7d..d3ea90e 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -128,11 +128,15 @@ def is_trashed(self):
 
     @property
     def created(self):
-        return datetime_from_string(self.cre)
+        if isinstance(self.cre, str):
+            self.cre = datetime_from_string(self.cre)
+        return self.cre
 
     @property
     def modified(self):
-        return datetime_from_string(self.mod)
+        if isinstance(self.mod, str):
+            self.mod = datetime_from_string(self.mod)
+        return self.mod
 
     @property
     def simple_name(self):
@@ -143,11 +147,19 @@ def simple_name(self):
 
 class QueryMixin(object):
     def get_node(self, id) -> 'Union[Node|None]':
-        with cursor(self._conn) as c:
-            c.execute(NODE_BY_ID_SQL, [id])
-            r = c.fetchone()
-            if r:
-                return Node(r)
+        with self.node_cache_lock:
+            try:
+                return self.node_id_to_node_cache[id]
+            except:
+                pass
+            with cursor(self._conn) as c:
+                c.execute(NODE_BY_ID_SQL, [id])
+                r = c.fetchone()
+                if r:
+                    n = Node(r)
+                    if n.is_available:
+                        self.node_id_to_node_cache[n.id] = n
+                    return n
 
     def get_root_node(self):
         return self.get_node(self.root_id)
@@ -161,30 +173,22 @@ def get_conflicting_node(self, name: str, parent_id: str):
                 return Node(r)
 
     def resolve_id(self, path: str, trash=False) -> str:
-        with self.path_to_node_id_cache_lock:
+        with self.node_cache_lock:
             try:
                 return self.path_to_node_id_cache[path]
             except:
                 pass
             n = self._resolve(path, trash)
             if n:
+                self.node_id_to_node_cache[n.id] = n
                 self.path_to_node_id_cache[path] = n.id
                 return n.id
             return None
 
     def resolve(self, path: str, trash=False) -> 'Union[Node|None]':
         """Gets a node from a path"""
-        with self.path_to_node_id_cache_lock:
-            try:
-                node_id = self.path_to_node_id_cache[path]
-                return self.get_node(node_id)
-            except:
-                pass
-            n = self._resolve(path, trash)
-            if n:
-                self.path_to_node_id_cache[path] = n.id
-                return n
-            return None
+        id = self.resolve_id(path=path, trash=trash)
+        return self.get_node(id=id) if id else None
 
     def _resolve(self, path: str, trash=False) -> 'Union[Node|None]':
         segments = list(filter(bool, path.split('/')))
@@ -294,9 +298,11 @@ def list_children(self, folder_id, trash=False, folder_path=None) -> 'Tuple[List
 
         """If the caller provides the folder_path, we can add all the children to the
         path->node_id cache for faster lookup after a directory listing"""
-        if folder_path:
-            with self.path_to_node_id_cache_lock:
-                for c in folders + files:
+        with self.node_cache_lock:
+            for c in folders + files:
+                if c.is_available:
+                    self.node_id_to_node_cache[c.id] = c
+                if folder_path:
                     self.path_to_node_id_cache[folder_path + '/' + c.name] = c.id
 
         return folders, files
diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py
index f1fdcf4..a1756dc 100644
--- a/acdcli/cache/sync.py
+++ b/acdcli/cache/sync.py
@@ -5,6 +5,8 @@
 import logging
 from datetime import datetime
 from itertools import islice
+
+from acdcli.cache.query import Node
 from .cursors import mod_cursor
 import dateutil.parser as iso_date
 
@@ -48,11 +50,11 @@ def remove_purged(self, purged: list):
         logger.info('Purged %i node(s).' % len(purged))
 
     def resolve_cache_add(self, path:str, node_id:str):
-        with self.path_to_node_id_cache_lock:
+        with self.node_cache_lock:
             self.path_to_node_id_cache[path] = node_id
 
     def resolve_cache_del(self, path:str):
-        with self.path_to_node_id_cache_lock:
+        with self.node_cache_lock:
             try: del self.path_to_node_id_cache[path]
             except:pass
 
@@ -60,7 +62,7 @@ def insert_nodes(self, nodes: list, partial:bool=True, flush_resolve_cache:bool=
         """Inserts mixed list of files and folders into cache."""
 
         if flush_resolve_cache:
-            with self.path_to_node_id_cache_lock:
+            with self.node_cache_lock:
                 self.path_to_node_id_cache.clear()
 
         files = []
@@ -105,14 +107,32 @@ def insert_folders(self, folders: list):
 
         with mod_cursor(self._conn) as c:
             for f in folders:
+                n = Node(dict(id=f['id'],
+                              type="folder",
+                              name=f.get('name'),
+                              description=f.get('description'),
+                              created=iso_date.parse(f['createdDate']),
+                              modified=iso_date.parse(f['modifiedDate']),
+                              updated=datetime.utcnow(),
+                              status=f['status'],
+                              md5=None,
+                              size=0,
+                              ))
+
+                with self.node_cache_lock:
+                    if n.is_available:
+                        self.node_id_to_node_cache[n.id] = n
+                    else:
+                        self.node_id_to_node_cache.clear()
+
                 c.execute(
                     'INSERT OR REPLACE INTO nodes '
                     '(id, type, name, description, created, modified, updated, status) '
-                    'VALUES (?, "folder", ?, ?, ?, ?, ?, ?)',
-                    [f['id'], f.get('name'), f.get('description'),
-                     iso_date.parse(f['createdDate']), iso_date.parse(f['modifiedDate']),
-                     datetime.utcnow(),
-                     f['status']
+                    'VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
+                    [n.id, n.type, n.name, n.description,
+                     n.created, n.modified,
+                     n.updated,
+                     n.status
                      ]
                 )
 
@@ -124,22 +144,42 @@ def insert_files(self, files: list):
 
         with mod_cursor(self._conn) as c:
             for f in files:
-                c.execute('INSERT OR REPLACE INTO nodes '
-                          '(id, type, name, description, created, modified, updated, status)'
-                          'VALUES (?, "file", ?, ?, ?, ?, ?, ?)',
-                          [f['id'], f.get('name'), f.get('description'),
-                           iso_date.parse(f['createdDate']), iso_date.parse(f['modifiedDate']),
-                           datetime.utcnow(),
-                           f['status']
-                           ]
-                          )
-                c.execute('INSERT OR REPLACE INTO files (id, md5, size) VALUES (?, ?, ?)',
-                          [f['id'],
-                           f.get('contentProperties', {}).get('md5',
-                                                              'd41d8cd98f00b204e9800998ecf8427e'),
-                           f.get('contentProperties', {}).get('size', 0)
-                           ]
-                          )
+                n = Node(dict(id=f['id'],
+                              type="file",
+                              name=f.get('name'),
+                              description=f.get('description'),
+                              created=iso_date.parse(f['createdDate']),
+                              modified=iso_date.parse(f['modifiedDate']),
+                              updated=datetime.utcnow(),
+                              status=f['status'],
+                              md5=f.get('contentProperties', {}).get('md5', 'd41d8cd98f00b204e9800998ecf8427e'),
+                              size=f.get('contentProperties', {}).get('size', 0),
+                              ))
+
+                with self.node_cache_lock:
+                    if n.is_available:
+                        self.node_id_to_node_cache[n.id] = n
+                    else:
+                        try: del self.node_id_to_node_cache[n.id]
+                        except: pass
+
+                c.execute(
+                    'INSERT OR REPLACE INTO nodes '
+                    '(id, type, name, description, created, modified, updated, status) '
+                    'VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
+                    [n.id, n.type, n.name, n.description,
+                     n.created, n.modified,
+                     n.updated,
+                     n.status
+                     ]
+                )
+                c.execute(
+                    'INSERT OR REPLACE INTO files (id, md5, size) VALUES (?, ?, ?)',
+                    [n.id,
+                     n.md5,
+                     n.size
+                     ]
+                )
 
         logger.info('Inserted/updated %d file(s).' % len(files))
 

From 885b4d0b73d2b9a8b0dcbad20b5c29938dc97439 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Wed, 15 Feb 2017 15:10:31 -0500
Subject: [PATCH 42/63] store symlinks targets in file contents to ultimately
 allow for longer targets than amazon's max xattr size

---
 acdcli/acd_fuse.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index b4ef701..e02493b 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -863,14 +863,19 @@ def symlink(self, target, source):
         node_id = self.fh_to_node[fh]
         self._setxattr(node_id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFLNK | 0o0777)
         self._setxattr(node_id, _XATTR_SYMLINK_OVERRIDE_NAME, source)
+        self.write(target, source.encode('utf-8'), 0, fh)
         self.release(target, fh)
         return 0
 
     def readlink(self, path):
-        node_id = self.cache.resolve_id(path)
-        if not node_id:
+        node = self.cache.resolve(path)
+        if not node:
             raise FuseOSError(errno.ENOENT)
-        source = self._getxattr(node_id, _XATTR_SYMLINK_OVERRIDE_NAME)
+        source = self._getxattr(node.id, _XATTR_SYMLINK_OVERRIDE_NAME)
+        if source is None:
+            size = self.wp.length(node.id, None)
+            if size is None: size = node.size
+            source = self.read(path, size, 0).decode('utf-8')
         return source
 
 

From dc47fe2d873692ddd29bdef08b42d85fa0d436ba Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Sat, 18 Feb 2017 21:09:57 -0500
Subject: [PATCH 43/63] cache files' content, starting with small files and
 symlinks

---
 acdcli/acd_fuse.py     | 73 +++++++++++++++++++++++++++++++-----------
 acdcli/api/content.py  |  4 +--
 acdcli/cache/query.py  | 16 +++++++++
 acdcli/cache/schema.py | 45 ++++++++++++++++++++++++--
 acdcli/cache/sync.py   | 30 ++++++++++++++---
 5 files changed, 140 insertions(+), 28 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index e02493b..bfacbd5 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -56,9 +56,10 @@ def find_library(*args):
 _XATTR_UID_OVERRIDE_NAME = 'fuse.uid'
 _XATTR_GID_OVERRIDE_NAME = 'fuse.gid'
 _XATTR_SYMLINK_OVERRIDE_NAME = 'fuse.symlink'
+_FS_BLOCK_SIZE = 4096  # for stat and statfs calls. This could be anything as long as it's consistent
 
 _def_conf = configparser.ConfigParser()
-_def_conf['read'] = dict(open_chunk_limit=10, timeout=5)
+_def_conf['read'] = dict(open_chunk_limit=10, timeout=5, cache_small_file_size=1024)
 _def_conf['write'] = dict(buffer_size=int(1e9), timeout=30)
 
 
@@ -378,8 +379,8 @@ def __init__(self, **kwargs):
         """sets the default gid"""
         self.umask = kwargs['umask']
         """sets the default umask"""
-        self.blksize = self.acd_client._conf.getint('transfer', 'fs_chunk_size')
-        """size of the filesystem blocks for stat queries"""
+        self.cache_small_file_size = conf.getint('read', 'cache_small_file_size')
+        """size of files under which we cache the contents automatically"""
 
         self.destroyed = autosync.keywords['stop']
         """:type: multiprocessing.Event"""
@@ -456,8 +457,8 @@ def _getattr(self, node, fh=None) -> dict:
             return dict(st_mode=mode,
                         st_nlink=self.cache.num_parents(node.id) if self.nlinks else 1,
                         st_size=size,
-                        st_blksize=self.blksize,
-                        st_blocks=(size + 511) // 512,
+                        st_blksize=_FS_BLOCK_SIZE,
+                        st_blocks=(size + 511) // 512,  # this field always expects a 512 block size
                         **attrs)
 
     def listxattr(self, path):
@@ -505,7 +506,7 @@ def _removexattr(self, node_id, name):
         with self.xattr_cache_lock:
             if name in self.xattr_cache[node_id]:
                 del self.xattr_cache[node_id][name]
-                self.properties_dirty.add(node_id)
+                self.xattr_dirty.add(node_id)
 
     def setxattr(self, path, name, value, options, position=0):
         node_id = self.cache.resolve_id(path)
@@ -570,17 +571,29 @@ def read(self, path, length, offset, fh=None) -> bytes:
         if ret is not None:
             return ret
 
+        """Next, check our local cache"""
+        content = self.cache.get_content(node.id, node.version)
+        if content is not None:
+            return content[offset:offset+length]
+
+        """For small files, read and cache the whole file"""
+        if node.size <= self.cache_small_file_size:
+            content = self.acd_client.download_chunk(node.id, 0, node.size)
+            self.cache.insert_content(node.id, node.version, content)
+            return content[offset:offset+length]
+
+        """For all other files, stream from amazon"""
         return self.rp.get(node.id, offset, length, node.size)
 
     def statfs(self, path) -> dict:
-        """Gets some filesystem statistics as specified in :manpage:`stat(2)`."""
-
-        return dict(f_bsize=self.blksize,
-                    f_frsize=self.blksize,
-                    f_blocks=self.total // self.blksize,  # total no of blocks
-                    f_bfree=self.free // self.blksize,  # free blocks
-                    f_bavail=self.free // self.blksize,
-                    f_namemax=256
+        """Gets some filesystem statistics as specified in :manpage:`statfs(2)`."""
+
+        return dict(f_bsize=_FS_BLOCK_SIZE,
+                    f_frsize=_FS_BLOCK_SIZE,
+                    f_blocks=self.total // _FS_BLOCK_SIZE,  # total no of blocks
+                    f_bfree=self.free // _FS_BLOCK_SIZE,  # free blocks
+                    f_bavail=self.free // _FS_BLOCK_SIZE,
+                    f_namemax=256  # from amazon's spec
                     )
 
     def mkdir(self, path, mode):
@@ -619,7 +632,7 @@ def _trash(self, path):
         except RequestError as e:
             FuseOSError.convert(e)
         else:
-            self.cache.insert_node(r, flush_resolve_cache=node.is_folder)
+            self.cache.insert_node(r, flush_resolve_cache=False)
             self.cache.resolve_cache_del(path)
 
     def rmdir(self, path):
@@ -859,11 +872,12 @@ def _chown(self, node_id, uid, gid):
         return 0
 
     def symlink(self, target, source):
+        source_bytes = source.encode('utf-8')
         fh = self.create(target, None)
         node_id = self.fh_to_node[fh]
         self._setxattr(node_id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFLNK | 0o0777)
-        self._setxattr(node_id, _XATTR_SYMLINK_OVERRIDE_NAME, source)
-        self.write(target, source.encode('utf-8'), 0, fh)
+        # self._setxattr(node_id, _XATTR_SYMLINK_OVERRIDE_NAME, source)
+        self.write(target, source_bytes, 0, fh)
         self.release(target, fh)
         return 0
 
@@ -871,11 +885,32 @@ def readlink(self, path):
         node = self.cache.resolve(path)
         if not node:
             raise FuseOSError(errno.ENOENT)
-        source = self._getxattr(node.id, _XATTR_SYMLINK_OVERRIDE_NAME)
+
+        source = None
+
+        # amazon reduced property size (all our xattr space) to 500 characters or less,
+        # so we're moving symlinks to file bodies.
+        try: source = self._getxattr(node.id, _XATTR_SYMLINK_OVERRIDE_NAME)
+        except: pass
+        if source is not None:
+            logger.debug("readlink: upgrading node: %s path: %s" % (node.id, path))
+            source_bytes = source.encode('utf-8')
+            fh = self.open(path, 0)
+            self.write(path, source_bytes, 0, fh)
+            self.release(path, fh)
+            self._removexattr(node.id, _XATTR_SYMLINK_OVERRIDE_NAME)
+
+        if source is None:
+            source_bytes = self.cache.get_content(node.id, node.version)
+            if source_bytes is not None:
+                source = source_bytes.decode('utf-8')
+
         if source is None:
             size = self.wp.length(node.id, None)
             if size is None: size = node.size
-            source = self.read(path, size, 0).decode('utf-8')
+            source_bytes = self.read(path, size, 0)
+            source = source_bytes.decode('utf-8')
+            self.cache.insert_content(node.id, node.version, source_bytes)
         return source
 
 
diff --git a/acdcli/api/content.py b/acdcli/api/content.py
index e31c455..710b13a 100644
--- a/acdcli/api/content.py
+++ b/acdcli/api/content.py
@@ -416,7 +416,7 @@ def response_chunk(self, node_id: str, offset: int, length: int, **kwargs) -> Re
                 raise RequestError(r.status_code, r.text)
             return r
 
-    def download_chunk(self, node_id: str, offset: int, length: int, **kwargs) -> bytearray:
+    def download_chunk(self, node_id: str, offset: int, length: int, **kwargs) -> bytes:
         """Load a file chunk into memory.
 
         :param length: the length of the download chunk"""
@@ -432,7 +432,7 @@ def download_chunk(self, node_id: str, offset: int, length: int, **kwargs) -> by
                     buffer.extend(chunk)
         finally:
             r.close()
-        return buffer
+        return bytes(buffer)
 
     def download_thumbnail(self, node_id: str, file_name: str, max_dim=128):
         """Download a movie's or picture's thumbnail into a file.
diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index d3ea90e..d2a5c39 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -53,6 +53,9 @@ def datetime_from_string(dt: str) -> datetime:
 
 PROPERTY_BY_ID_SQL = """SELECT * FROM properties WHERE id=? AND owner=? AND key=?"""
 
+CONTENT_BY_ID_SQL = """SELECT * FROM content WHERE id=? AND version=?"""
+CONTENT_ACCESSED_SQL = """UPDATE content SET accessed=? WHERE id=?"""
+
 USAGE_SQL = 'SELECT SUM(size) FROM files'
 
 FIND_BY_NAME_SQL = """SELECT n.*, f.* FROM nodes n
@@ -100,6 +103,10 @@ def __init__(self, row):
             self.size = row['size']
         except IndexError:
             self.size = 0
+        try:
+            self.version = row['version']
+        except IndexError:
+            self.version = 0
 
     def __lt__(self, other):
         return self.name < other.name
@@ -368,3 +375,12 @@ def get_property(self, node_id, owner_id, key) -> 'Union[str|None]':
             if r:
                 return r['value']
         return None
+
+    def get_content(self, node_id:str, version:int) -> 'Union[bytes|None]':
+        if version == 0: return None
+        with cursor(self._conn) as c:
+            c.execute(CONTENT_ACCESSED_SQL, [datetime.utcnow(), node_id])
+            c.execute(CONTENT_BY_ID_SQL, [node_id, version])
+            r = c.fetchone()
+            if r:
+                return r['value']
diff --git a/acdcli/cache/schema.py b/acdcli/cache/schema.py
index 71b5c5e..5bf6a4c 100644
--- a/acdcli/cache/schema.py
+++ b/acdcli/cache/schema.py
@@ -48,6 +48,7 @@
         id VARCHAR(50) NOT NULL,
         md5 VARCHAR(32),
         size BIGINT,
+        version BIGINT,
         PRIMARY KEY (id),
         UNIQUE (id),
         FOREIGN KEY(id) REFERENCES nodes (id)
@@ -61,9 +62,22 @@
         FOREIGN KEY(child) REFERENCES nodes (id)
     );
 
+    CREATE TABLE content (
+        id VARCHAR(50) NOT NULL,
+        value BLOB,
+        size BIGINT,
+        version BIGINT,
+        accessed DATETIME,
+        PRIMARY KEY (id),
+        UNIQUE (id),
+        FOREIGN KEY(id) REFERENCES nodes (id)
+    );
+
+    CREATE INDEX ix_content_size ON content(size);
+    CREATE INDEX ix_content_accessed ON content(accessed);
     CREATE INDEX ix_parentage_child ON parentage(child);
     CREATE INDEX ix_nodes_names ON nodes(name);
-    PRAGMA user_version = 3;
+    PRAGMA user_version = 4;
     """
 
 _GEN_DROP_TABLES_SQL = \
@@ -91,6 +105,12 @@ def _1_to_2(conn):
 
 def _2_to_3(conn):
     conn.executescript(
+        # For people upgrading from the main branch to PR374, this line should make the db queries work.
+        # The user would also need to old-sync if they had multiple databases *and* were all ready using
+        # properties in some of them. It's not clear how to do that from here aside from dropping all data.
+        'CREATE TABLE IF NOT EXISTS properties (id VARCHAR(50) NOT NULL, owner TEXT NOT NULL, '
+        'key TEXT NOT NULL, value TEXT, PRIMARY KEY (id), FOREIGN KEY(id) REFERENCES nodes (id));'
+
         'CREATE INDEX IF NOT EXISTS ix_parentage_child ON parentage(child);'
         # Having changed the schema, the queries can be optimised differently.
         # In order to be aware of that, re-analyze the type of data and indexes,
@@ -100,12 +120,31 @@ def _2_to_3(conn):
     )
     conn.commit()
 
-_migrations = [_0_to_1, _1_to_2, _2_to_3]
+
+def _3_to_4(conn):
+    conn.executescript(
+        'ALTER TABLE files ADD version BIGINT;'
+
+        'DROP TABLE IF EXISTS content;'
+        'CREATE TABLE content (id VARCHAR(50) NOT NULL, value BLOB, size BIGINT, version BIGINT, accessed DATETIME,'
+        'PRIMARY KEY (id), UNIQUE (id), FOREIGN KEY(id) REFERENCES nodes (id)); '
+
+        'CREATE INDEX IF NOT EXISTS ix_content_size ON content(size);'
+        'CREATE INDEX IF NOT EXISTS ix_content_accessed ON content(accessed);'
+        # Having changed the schema, the queries can be optimised differently.
+        # In order to be aware of that, re-analyze the type of data and indexes,
+        # allowing SQLite3 to make better decisions.
+        'ANALYZE;'
+        'PRAGMA user_version = 4;'
+    )
+    conn.commit()
+
+_migrations = [_0_to_1, _1_to_2, _2_to_3, _3_to_4]
 """list of all migrations from index -> index+1"""
 
 
 class SchemaMixin(object):
-    _DB_SCHEMA_VER = 3
+    _DB_SCHEMA_VER = 4
 
     def init(self):
         try:
diff --git a/acdcli/cache/sync.py b/acdcli/cache/sync.py
index a1756dc..c9cfae1 100644
--- a/acdcli/cache/sync.py
+++ b/acdcli/cache/sync.py
@@ -42,6 +42,7 @@ def remove_purged(self, purged: list):
             with mod_cursor(self._conn) as c:
                 c.execute('DELETE FROM nodes WHERE id IN %s' % placeholders(slice_), slice_)
                 c.execute('DELETE FROM files WHERE id IN %s' % placeholders(slice_), slice_)
+                c.execute('DELETE FROM content WHERE id IN %s' % placeholders(slice_), slice_)
                 c.execute('DELETE FROM parentage WHERE parent IN %s' % placeholders(slice_), slice_)
                 c.execute('DELETE FROM parentage WHERE child IN %s' % placeholders(slice_), slice_)
                 c.execute('DELETE FROM properties WHERE id IN %s' % placeholders(slice_), slice_)
@@ -117,13 +118,15 @@ def insert_folders(self, folders: list):
                               status=f['status'],
                               md5=None,
                               size=0,
+                              version=0,
                               ))
 
                 with self.node_cache_lock:
                     if n.is_available:
                         self.node_id_to_node_cache[n.id] = n
                     else:
-                        self.node_id_to_node_cache.clear()
+                        try: del self.node_id_to_node_cache[n.id]
+                        except: pass
 
                 c.execute(
                     'INSERT OR REPLACE INTO nodes '
@@ -154,6 +157,7 @@ def insert_files(self, files: list):
                               status=f['status'],
                               md5=f.get('contentProperties', {}).get('md5', 'd41d8cd98f00b204e9800998ecf8427e'),
                               size=f.get('contentProperties', {}).get('size', 0),
+                              version=f.get('contentProperties', {}).get('version', 0),
                               ))
 
                 with self.node_cache_lock:
@@ -163,6 +167,9 @@ def insert_files(self, files: list):
                         try: del self.node_id_to_node_cache[n.id]
                         except: pass
 
+                if not n.is_available:
+                    self.remove_content(n.id)
+
                 c.execute(
                     'INSERT OR REPLACE INTO nodes '
                     '(id, type, name, description, created, modified, updated, status) '
@@ -174,10 +181,11 @@ def insert_files(self, files: list):
                      ]
                 )
                 c.execute(
-                    'INSERT OR REPLACE INTO files (id, md5, size) VALUES (?, ?, ?)',
+                    'INSERT OR REPLACE INTO files (id, md5, size, version) VALUES (?, ?, ?, ?)',
                     [n.id,
                      n.md5,
-                     n.size
+                     n.size,
+                     n.version,
                      ]
                 )
 
@@ -225,4 +233,18 @@ def insert_property(self, node_id, owner_id, key, value):
                       '(id, owner, key, value) '
                       'VALUES (?, ?, ?, ?)',
                       [node_id, owner_id, key, value]
-                      )
\ No newline at end of file
+                      )
+
+    def insert_content(self, node_id:str, version:int, value:bytes):
+        with mod_cursor(self._conn) as c:
+            c.execute('INSERT OR REPLACE INTO content '
+                      '(id, value, size, version, accessed) '
+                      'VALUES (?, ?, ?, ?, ?)',
+                      [node_id, value, len(value), version, datetime.utcnow()]
+                      )
+
+    def remove_content(self, node_id:str):
+        with mod_cursor(self._conn) as c:
+            c.execute('DELETE FROM content WHERE id=?',
+                      [node_id]
+                      )

From d42854490e490f41d0f69e46d52eba17ba0adb53 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Tue, 21 Feb 2017 12:28:57 -0500
Subject: [PATCH 44/63] retry on 429 rate throttling errors

---
 acdcli/api/common.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/acdcli/api/common.py b/acdcli/api/common.py
index d6e2246..505f9a3 100644
--- a/acdcli/api/common.py
+++ b/acdcli/api/common.py
@@ -14,11 +14,14 @@ class ReadTimeoutError(Exception):
 
 # status codes that indicate request success
 OK_CODES = [requests.codes.OK]
-RETRY_CODES = [requests.codes.server_error,
-               requests.codes.gateway_timeout,
-               requests.codes.request_timeout,
-               requests.codes.bad_request,
-               requests.codes.service_unavailable]
+RETRY_CODES = [requests.codes.server_error,         # 500
+               requests.codes.service_unavailable,  # 503
+               requests.codes.gateway_timeout,      # 504
+               requests.codes.bad_request,          # 400
+               requests.codes.request_timeout,      # 408
+               requests.codes.too_many_requests,    # 429
+               ]
+
 
 class RequestError(Exception):
     """Catch-all exception class for various connection and ACD server errors."""

From 3a88b053e810a8c42abf24b3f91175bf9be0e076 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Wed, 22 Feb 2017 21:52:06 -0500
Subject: [PATCH 45/63] release file handle lock during file writes to amazon

---
 acdcli/acd_fuse.py | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index bfacbd5..0290ae9 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -294,6 +294,9 @@ def release(self, node_id, fh):
             self._write_and_sync(b, node_id)
             del self.buffers[node_id]
 
+    def remove(self, node_id, fh):
+        try: del self.buffers[node_id]
+        except: pass
 
 class LoggingMixIn(object):
     """Modified fusepy LoggingMixIn that does not log read or written bytes
@@ -803,22 +806,32 @@ def release(self, path, fh):
             node_id = self.fh_to_node[fh]
         else:
             node_id = self.cache.resolve_id(path)
-        if node_id:
+        if not node_id:
+            raise FuseOSError(errno.ENOENT)
+
+        flush = False
+        with self.fh_lock:
+            """release the writer if there's no more interest. This allows many file
+            handles to write to a single node provided they do it in order.
+            """
+            interest = self.node_to_fh.get(node_id)
+            if interest:
+                interest.discard(fh)
+            if not interest:
+                flush = True
+                del self.node_to_fh[node_id]
+            del self.fh_to_node[fh]
+
+        if flush:
             self.rp.release(node_id)
+            self.wp.flush(node_id, None)
+            self._xattr_write_and_sync()
+            """make sure no additional file handles showed interest before we get rid of the write buffer"""
             with self.fh_lock:
-                """release the writer if there's no more interest. This allows many file
-                handles to write to a single node provided they do it in order.
-                """
                 interest = self.node_to_fh.get(node_id)
-                if interest:
-                    interest.discard(fh)
                 if not interest:
-                    self.wp.release(node_id, fh)
-                    self._xattr_write_and_sync()
-                    del self.node_to_fh[node_id]
-                del self.fh_to_node[fh]
-        else:
-            raise FuseOSError(errno.ENOENT)
+                    self.wp.remove(node_id, None)
+        return 0
 
     def utimens(self, path, times=None):
         """Should set node atime and mtime to values as passed in ``times``

From f760f46b07dbed0ff03b404beed6abeb2cd07be3 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Sat, 25 Feb 2017 22:41:31 -0500
Subject: [PATCH 46/63] rely on rsync's later chmod to save an amazon call per
 file/folder create.

---
 acdcli/acd_fuse.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 0290ae9..0acdd3b 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -616,7 +616,10 @@ def mkdir(self, path, mode):
             self.cache.insert_node(r, flush_resolve_cache=False)
             node_id = r['id']
             self.cache.resolve_cache_add(path, node_id)
-            if mode is not None:
+
+            # TODO: Set properties in the node creation call. Doing it here means we call amazon twice;
+            # and if we're rsyncing chmod does it a third time.
+            if False and mode is not None:
                 self._setxattr(node_id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFDIR | (stat.S_IMODE(mode)))
                 self._xattr_write_and_sync()
 
@@ -676,7 +679,9 @@ def create(self, path, mode) -> int:
                 #     self._rename(prior_node_id, prior_node_cache.name)
             FuseOSError.convert(e)
 
-        if mode is not None:
+        # TODO: Set properties in the node creation call. Doing it here means we call amazon twice;
+        # and if we're rsyncing chmod does it a third time.
+        if False and mode is not None:
             self._setxattr(node_id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFREG | (stat.S_IMODE(mode)))
 
         with self.fh_lock:

From 6098ebcc7358a903814ff930c7d26bf95c0251e4 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Sat, 25 Feb 2017 23:01:45 -0500
Subject: [PATCH 47/63] the folders table no longer exists

---
 acdcli/cache/schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/acdcli/cache/schema.py b/acdcli/cache/schema.py
index 5bf6a4c..ca26d51 100644
--- a/acdcli/cache/schema.py
+++ b/acdcli/cache/schema.py
@@ -58,7 +58,7 @@
         parent VARCHAR(50) NOT NULL,
         child VARCHAR(50) NOT NULL,
         PRIMARY KEY (parent, child),
-        FOREIGN KEY(parent) REFERENCES folders (id),
+        FOREIGN KEY(parent) REFERENCES nodes (id),
         FOREIGN KEY(child) REFERENCES nodes (id)
     );
 

From a3e291e7d62120e0c5fadc4e398e8facd295ac80 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Mon, 27 Feb 2017 13:33:15 -0500
Subject: [PATCH 48/63] turns out rsync needs this for proper change detection
 in some cases.

---
 acdcli/acd_fuse.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 0acdd3b..0290ae9 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -616,10 +616,7 @@ def mkdir(self, path, mode):
             self.cache.insert_node(r, flush_resolve_cache=False)
             node_id = r['id']
             self.cache.resolve_cache_add(path, node_id)
-
-            # TODO: Set properties in the node creation call. Doing it here means we call amazon twice;
-            # and if we're rsyncing chmod does it a third time.
-            if False and mode is not None:
+            if mode is not None:
                 self._setxattr(node_id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFDIR | (stat.S_IMODE(mode)))
                 self._xattr_write_and_sync()
 
@@ -679,9 +676,7 @@ def create(self, path, mode) -> int:
                 #     self._rename(prior_node_id, prior_node_cache.name)
             FuseOSError.convert(e)
 
-        # TODO: Set properties in the node creation call. Doing it here means we call amazon twice;
-        # and if we're rsyncing chmod does it a third time.
-        if False and mode is not None:
+        if mode is not None:
             self._setxattr(node_id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFREG | (stat.S_IMODE(mode)))
 
         with self.fh_lock:

From ac80bec9d252170ab88954ebef3af5565cbbf04b Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Tue, 28 Feb 2017 19:41:25 -0500
Subject: [PATCH 49/63] releasing the fh lock can lead to 409 concurrent
 modification errors in huge directory trees with rsync

---
 acdcli/acd_fuse.py | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 0290ae9..3862f6e 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -294,9 +294,6 @@ def release(self, node_id, fh):
             self._write_and_sync(b, node_id)
             del self.buffers[node_id]
 
-    def remove(self, node_id, fh):
-        try: del self.buffers[node_id]
-        except: pass
 
 class LoggingMixIn(object):
     """Modified fusepy LoggingMixIn that does not log read or written bytes
@@ -809,7 +806,6 @@ def release(self, path, fh):
         if not node_id:
             raise FuseOSError(errno.ENOENT)
 
-        flush = False
         with self.fh_lock:
             """release the writer if there's no more interest. This allows many file
             handles to write to a single node provided they do it in order.
@@ -818,19 +814,11 @@ def release(self, path, fh):
             if interest:
                 interest.discard(fh)
             if not interest:
-                flush = True
+                self.rp.release(node_id)
+                self.wp.release(node_id, None)
+                self._xattr_write_and_sync()
                 del self.node_to_fh[node_id]
             del self.fh_to_node[fh]
-
-        if flush:
-            self.rp.release(node_id)
-            self.wp.flush(node_id, None)
-            self._xattr_write_and_sync()
-            """make sure no additional file handles showed interest before we get rid of the write buffer"""
-            with self.fh_lock:
-                interest = self.node_to_fh.get(node_id)
-                if not interest:
-                    self.wp.remove(node_id, None)
         return 0
 
     def utimens(self, path, times=None):

From c9c07193b6a5b9016ad83c8689681e325e4b899d Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Fri, 3 Mar 2017 16:16:55 -0500
Subject: [PATCH 50/63] sparse file support

---
 acdcli/acd_fuse.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 3862f6e..ee512ef 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -236,9 +236,6 @@ def read(self, offset, length: int):
         def write(self, offset, bytes_: bytes):
             with self.lock:
                 self.dirty = True
-                if offset > self.len:
-                    logger.error('Wrong offset for writing to buffer; writing gap detected')
-                    raise FuseOSError(errno.ESPIPE)
                 self.f.seek(offset)
                 ret = self.f.write(bytes_)
                 self.f.seek(0, os.SEEK_END)

From 3d2c26a8565a2fc79780078554edc4c318c6bfd7 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Sat, 4 Mar 2017 00:59:19 -0500
Subject: [PATCH 51/63] sparse file support at ends of files too

---
 acdcli/acd_fuse.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index ee512ef..2ab3100 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -768,30 +768,40 @@ def write(self, path, data, offset, fh) -> int:
         return len(data)
 
     def truncate(self, path, length, fh=None):
-        """Pseudo-truncates a file, i.e. clears content if ``length``==0 or does nothing
-        if ``length`` is positive.
+        """Pseudo-truncates a file, i.e. clears content if ``length``==0 or grows
+        newly created nodes if ``length`` is greater than the write-back cache size.
 
         :raises FuseOSError: if pseudo-truncation to length is not supported"""
 
         if fh:
             node_id = self.fh_to_node[fh]
+            node = self.cache.get_node(node_id)
         else:
-            node_id = self.cache.resolve_id(path)
-        if not node_id:
+            node = self.cache.resolve(path, trash=False)
+        if not node:
             raise FuseOSError(errno.ENOENT)
 
-        if length == 0:
+        # cut file size to 0
+        if length == 0 and node.size:
             try:
-                r = self.acd_client.clear_file(node_id)
+                r = self.acd_client.clear_file(node.id)
             except RequestError as e:
                 raise FuseOSError.convert(e)
             else:
                 self.cache.insert_node(r, flush_resolve_cache=False)
+                return 0
 
-        """No good way to deal with positive lengths at the moment; since we can only do
-        something about it in the middle of writing, this means the only use case we can
-        capture is when a program over-writes and then truncates back."""
-        return 0
+        # grow newly created files
+        if node.size == 0 and length:
+            size = self.wp.length(node.id, fh)
+            if size is None: size = node.size
+            if length > size:
+                # amazon doesn't understand sparse files, so we send zeros
+                self.wp.write(node.id, fh, size, bytes(length - size))
+                return 0
+
+        # throw until there's an api for modifying existing files' length
+        raise FuseOSError(errno.ENOSYS)
 
     def release(self, path, fh):
         """Releases an open ``path``."""

From 96735d5dac20de37c59daf3a890d6c0230d33d93 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Sun, 5 Mar 2017 20:25:52 -0500
Subject: [PATCH 52/63] put flush back to try to solve plex issues

---
 acdcli/acd_fuse.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 2ab3100..7adf5ee 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -767,6 +767,15 @@ def write(self, path, data, offset, fh) -> int:
         self.wp.write(node_id, fh, offset, data)
         return len(data)
 
+    def flush(self, path, fh):
+        if fh:
+            node_id = self.fh_to_node[fh]
+        else:
+            node_id = self.cache.resolve_id(path)
+        if not node_id:
+            raise FuseOSError(errno.ENOENT)
+        self.wp.flush(node_id, fh)
+
     def truncate(self, path, length, fh=None):
         """Pseudo-truncates a file, i.e. clears content if ``length``==0 or grows
         newly created nodes if ``length`` is greater than the write-back cache size.

From 26325db0f3df4d4b5adf12d35c33ed5496079d64 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Mon, 6 Mar 2017 20:41:06 -0500
Subject: [PATCH 53/63] fix hanging write buffers full of 0s after a truncate
 if no fh's are open

---
 acdcli/acd_fuse.py | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 7adf5ee..284d5fe 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -291,6 +291,11 @@ def release(self, node_id, fh):
             self._write_and_sync(b, node_id)
             del self.buffers[node_id]
 
+    def remove(self, node_id, fh):
+        b = self.buffers.get(node_id)
+        if b:
+            del self.buffers[node_id]
+
 
 class LoggingMixIn(object):
     """Modified fusepy LoggingMixIn that does not log read or written bytes
@@ -746,6 +751,9 @@ def open(self, path, flags) -> int:
         node_id = self.cache.resolve_id(path, False)
         if not node_id:
             raise FuseOSError(errno.ENOENT)
+        return self._open(node_id)
+
+    def _open(self, node_id):
         with self.fh_lock:
             self.fh += 1
             self.fh_to_node[self.fh] = node_id
@@ -791,14 +799,16 @@ def truncate(self, path, length, fh=None):
             raise FuseOSError(errno.ENOENT)
 
         # cut file size to 0
-        if length == 0 and node.size:
-            try:
-                r = self.acd_client.clear_file(node.id)
-            except RequestError as e:
-                raise FuseOSError.convert(e)
-            else:
-                self.cache.insert_node(r, flush_resolve_cache=False)
-                return 0
+        if length == 0:
+            if node.size:
+                try:
+                    r = self.acd_client.clear_file(node.id)
+                except RequestError as e:
+                    raise FuseOSError.convert(e)
+                else:
+                    self.cache.insert_node(r, flush_resolve_cache=False)
+            self.wp.remove(node.id, None)
+            return 0
 
         # grow newly created files
         if node.size == 0 and length:
@@ -806,7 +816,9 @@ def truncate(self, path, length, fh=None):
             if size is None: size = node.size
             if length > size:
                 # amazon doesn't understand sparse files, so we send zeros
-                self.wp.write(node.id, fh, size, bytes(length - size))
+                internal_fh = self._open(node.id)
+                self.wp.write(node.id, fh, length-1, bytes(1))
+                self.release(path, internal_fh)
                 return 0
 
         # throw until there's an api for modifying existing files' length

From 89c33e362368d3fd0c7f70768de63d14bcc31efc Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Tue, 7 Mar 2017 18:36:35 -0500
Subject: [PATCH 54/63] lazy xattr writing and general cleanup

---
 acdcli/acd_fuse.py | 71 +++++++++++++++++++++++++++++-----------------
 1 file changed, 45 insertions(+), 26 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 284d5fe..e8f01d4 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -11,8 +11,8 @@
 
 from collections import deque, defaultdict
 from multiprocessing import Process
-from threading import Thread, Lock
-from time import time
+from threading import Lock, Thread
+from time import time, sleep
 
 import ctypes.util
 import binascii
@@ -21,6 +21,10 @@
 
 from acdcli.cache.db import CacheConsts
 
+from fuse import FUSE, FuseOSError as FuseError, Operations
+from acdcli.api.common import RequestError
+from acdcli.utils.conf import get_conf
+
 ctypes.util.__find_library = ctypes.util.find_library
 
 def find_library(*args):
@@ -32,12 +36,6 @@ def find_library(*args):
     return ctypes.util.__find_library(*args)
 
 ctypes.util.find_library = find_library
-
-from fuse import FUSE, FuseOSError as FuseError, Operations
-from acdcli.api.common import RequestError
-from acdcli.utils.conf import get_conf
-from acdcli.utils.time import *
-
 logger = logging.getLogger(__name__)
 
 try:
@@ -56,7 +54,8 @@ def find_library(*args):
 _XATTR_UID_OVERRIDE_NAME = 'fuse.uid'
 _XATTR_GID_OVERRIDE_NAME = 'fuse.gid'
 _XATTR_SYMLINK_OVERRIDE_NAME = 'fuse.symlink'
-_FS_BLOCK_SIZE = 4096  # for stat and statfs calls. This could be anything as long as it's consistent
+_XATTR_DELAY = 2  # seconds to wait for additional xattr changes before flushing to amazon
+_FS_BLOCK_SIZE = 4096  # for stat and statfs calls. Needs to be consistent and may affect read sizes from fuse
 
 _def_conf = configparser.ConfigParser()
 _def_conf['read'] = dict(open_chunk_limit=10, timeout=5, cache_small_file_size=1024)
@@ -391,7 +390,6 @@ def __init__(self, **kwargs):
         p.start()
 
     def destroy(self, path):
-        self._xattr_write_and_sync()
         self.destroyed.set()
 
     def readdir(self, path, fh) -> 'List[str]':
@@ -535,9 +533,18 @@ def _xattr_load(self, node_id):
                 try: self.xattr_cache[node_id] = json.loads(xattrs_str)
                 except: self.xattr_cache[node_id] = {}
 
-    def _xattr_write_and_sync(self):
+    def _xattr_flush(self, node_id):
+        # collect all xattr changes while any fh's are open so we talk to amazon less
+        with self.fh_lock:
+            if self.node_to_fh.get(node_id):
+                return
+        Thread(target=self._xattr_write_and_sync, args=(node_id,)).start()
+
+    def _xattr_write_and_sync(self, node_id):
+        # try to collect many xattr changes at once so we talk to amazon less
+        sleep(_XATTR_DELAY)
         with self.xattr_cache_lock:
-            for node_id in self.xattr_dirty:
+            if node_id in self.xattr_dirty:
                 try:
                     xattrs_str = json.dumps(self.xattr_cache[node_id])
                     self.acd_client.add_property(node_id, self.acd_client_owner, _XATTR_PROPERTY_NAME,
@@ -546,7 +553,7 @@ def _xattr_write_and_sync(self):
                     logger.error('Error writing node xattrs "%s". %s' % (node_id, str(e)))
                 else:
                     self.cache.insert_property(node_id, self.acd_client_owner, _XATTR_PROPERTY_NAME, xattrs_str)
-            self.xattr_dirty.clear()
+                self.xattr_dirty.discard(node_id)
 
     def read(self, path, length, offset, fh=None) -> bytes:
         """Read ```length`` bytes from ``path`` at ``offset``."""
@@ -555,7 +562,7 @@ def read(self, path, length, offset, fh=None) -> bytes:
             node_id = self.fh_to_node[fh]
             node = self.cache.get_node(node_id)
         else:
-            node = self.cache.resolve(path, trash=False)
+            node = self.cache.resolve(path)
         if not node:
             raise FuseOSError(errno.ENOENT)
 
@@ -617,7 +624,7 @@ def mkdir(self, path, mode):
             self.cache.resolve_cache_add(path, node_id)
             if mode is not None:
                 self._setxattr(node_id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFDIR | (stat.S_IMODE(mode)))
-                self._xattr_write_and_sync()
+                self._xattr_flush(node_id)
 
     def _trash(self, path):
         logger.debug('trash %s' % path)
@@ -645,9 +652,11 @@ def unlink(self, path):
         """Moves a file into ACD trash."""
         self._trash(path)
 
-    def create(self, path, mode) -> int:
-        """Creates an empty file at ``path``.
+    def create(self, path, mode, **kwargs) -> int:
+        """Creates an empty file at ``path`` with access ``mode``.
 
+        :param mode:
+        :param path:
         :returns int: file handle"""
 
         name = os.path.basename(path)
@@ -742,6 +751,7 @@ def _move(self, node, new_folder):
     def open(self, path, flags) -> int:
         """Opens a file.
 
+        :param path:
         :param flags: flags defined as in :manpage:`open(2)`
         :returns: file handle"""
 
@@ -767,8 +777,10 @@ def write(self, path, data, offset, fh) -> int:
 
         if fh:
             node_id = self.fh_to_node[fh]
-        # This is not resolving by path on purpose, since flushing to
-        # amazon is done on closing all interested file handles.
+        else:
+            # This is not resolving by path on purpose, since flushing to
+            # amazon is done on closing all interested file handles.
+            node_id = None
         if not node_id:
             raise FuseOSError(errno.ENOENT)
 
@@ -794,7 +806,7 @@ def truncate(self, path, length, fh=None):
             node_id = self.fh_to_node[fh]
             node = self.cache.get_node(node_id)
         else:
-            node = self.cache.resolve(path, trash=False)
+            node = self.cache.resolve(path)
         if not node:
             raise FuseOSError(errno.ENOENT)
 
@@ -834,6 +846,7 @@ def release(self, path, fh):
         if not node_id:
             raise FuseOSError(errno.ENOENT)
 
+        last_handle = False
         with self.fh_lock:
             """release the writer if there's no more interest. This allows many file
             handles to write to a single node provided they do it in order.
@@ -842,11 +855,15 @@ def release(self, path, fh):
             if interest:
                 interest.discard(fh)
             if not interest:
-                self.rp.release(node_id)
-                self.wp.release(node_id, None)
-                self._xattr_write_and_sync()
+                last_handle = True
                 del self.node_to_fh[node_id]
             del self.fh_to_node[fh]
+
+        if last_handle:
+            self.rp.release(node_id)
+            self.wp.release(node_id, None)
+            self._xattr_flush(node_id)
+
         return 0
 
     def utimens(self, path, times=None):
@@ -854,6 +871,7 @@ def utimens(self, path, times=None):
         or current time (see :manpage:`utimensat(2)`).
         Note that this is only implemented for modified time.
 
+        :param path:
         :param times: [atime, mtime]"""
 
         node_id = self.cache.resolve_id(path)
@@ -869,7 +887,7 @@ def utimens(self, path, times=None):
 
         try:
             self._setxattr(node_id, _XATTR_MTIME_OVERRIDE_NAME, mtime)
-            self._xattr_write_and_sync()
+            self._xattr_flush(node_id)
         except:
             raise FuseOSError(errno.ENOTSUP)
 
@@ -885,7 +903,7 @@ def _chmod(self, node, mode):
         mode_perms = stat.S_IMODE(mode)
         mode_type = stat.S_IFMT(self._getattr(node)['st_mode'])
         self._setxattr(node.id, _XATTR_MODE_OVERRIDE_NAME, mode_type | mode_perms)
-        self._xattr_write_and_sync()
+        self._xattr_flush(node.id)
         return 0
 
     def chown(self, path, uid, gid):
@@ -897,7 +915,7 @@ def chown(self, path, uid, gid):
     def _chown(self, node_id, uid, gid):
         if uid != -1: self._setxattr(node_id, _XATTR_UID_OVERRIDE_NAME, uid)
         if gid != -1: self._setxattr(node_id, _XATTR_GID_OVERRIDE_NAME, gid)
-        self._xattr_write_and_sync()
+        self._xattr_flush(node_id)
         return 0
 
     def symlink(self, target, source):
@@ -946,6 +964,7 @@ def readlink(self, path):
 def mount(path: str, args: dict, **kwargs) -> 'Union[int, None]':
     """Fusermounts Amazon Cloud Drive to specified mountpoint.
 
+    :param path:
     :raises: RuntimeError
     :param args: args to pass on to ACDFuse init
     :param kwargs: fuse mount options as described in :manpage:`fuse(8)`"""

From b084226a7b3f1428580feac84cc2395e6c58be73 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Wed, 15 Mar 2017 18:49:08 -0400
Subject: [PATCH 55/63] tidy

---
 acdcli/acd_fuse.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index e8f01d4..272c948 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -2,6 +2,7 @@
 
 import configparser
 import errno
+import io
 import json
 import logging
 import os
@@ -55,7 +56,7 @@ def find_library(*args):
 _XATTR_GID_OVERRIDE_NAME = 'fuse.gid'
 _XATTR_SYMLINK_OVERRIDE_NAME = 'fuse.symlink'
 _XATTR_DELAY = 2  # seconds to wait for additional xattr changes before flushing to amazon
-_FS_BLOCK_SIZE = 4096  # for stat and statfs calls. Needs to be consistent and may affect read sizes from fuse
+_FS_BLOCK_SIZE = io.DEFAULT_BUFFER_SIZE  # for stat and statfs calls. Needs to be consistent and may affect read sizes from fuse
 
 _def_conf = configparser.ConfigParser()
 _def_conf['read'] = dict(open_chunk_limit=10, timeout=5, cache_small_file_size=1024)
@@ -450,7 +451,7 @@ def _getattr(self, node, fh=None) -> dict:
                         **attrs)
         elif node.is_file:
             # symlink
-            if mode and stat.S_ISLNK(stat.S_IFMT(mode)): mode = stat.S_IFLNK | 0o0777
+            if mode and stat.S_ISLNK(mode): mode = stat.S_IFLNK | 0o0777
             # file
             else: mode = stat.S_IFREG | (stat.S_IMODE(mode) if mode else 0o0666 & ~self.umask)
 

From 1a4b91e6092b272b7f29e61c7bdb03c2d8592bc6 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Wed, 15 Mar 2017 22:07:44 -0400
Subject: [PATCH 56/63] speed up smaller syncs by not using the disk

---
 acdcli/api/metadata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/acdcli/api/metadata.py b/acdcli/api/metadata.py
index 7645473..c5edcd3 100644
--- a/acdcli/api/metadata.py
+++ b/acdcli/api/metadata.py
@@ -53,7 +53,7 @@ def get_changes(self, checkpoint='', include_purged=False, silent=True, file=Non
         if file:
             tmp = open(file, 'w+b')
         else:
-            tmp = tempfile.TemporaryFile('w+b')
+            tmp = tempfile.SpooledTemporaryFile(max_size=1e9, mode='w+b')
         try:
             for line in r.iter_lines(chunk_size=10 * 1024 ** 2, decode_unicode=False):
                 if line:

From 1d68ecfc22e2e5b2aa6a310e7ece6fde800beda7 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Wed, 15 Mar 2017 23:17:59 -0400
Subject: [PATCH 57/63] make resolve recursive to cache intermediate results

---
 acdcli/cache/db.py    |   4 +-
 acdcli/cache/query.py | 125 +++++++++++++++++++++++++-----------------
 2 files changed, 76 insertions(+), 53 deletions(-)

diff --git a/acdcli/cache/db.py b/acdcli/cache/db.py
index d03f92e..d72dbae 100644
--- a/acdcli/cache/db.py
+++ b/acdcli/cache/db.py
@@ -4,7 +4,7 @@
 import re
 import sqlite3
 import sys
-from threading import local, Lock
+from threading import local, RLock
 
 from acdcli.utils.conf import get_conf
 
@@ -71,7 +71,7 @@ def __init__(self, cache_path: str='', settings_path='', check=IntegrityCheckTyp
 
         self.node_id_to_node_cache = {}
         self.path_to_node_id_cache = {}
-        self.node_cache_lock = Lock()
+        self.node_cache_lock = RLock()
         """There are a huge number of repeated path lookups,
         so cache results and selectively invalidate."""
 
diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index d2a5c39..b3bffba 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -1,4 +1,5 @@
 import logging
+import os
 from datetime import datetime
 from .cursors import cursor
 
@@ -25,6 +26,12 @@ def datetime_from_string(dt: str) -> datetime:
                   WHERE p.parent = (?)
                   ORDER BY n.name"""
 
+PARENTS_SQL = """SELECT n.*, f.* FROM nodes n
+                  JOIN parentage p ON n.id = p.parent
+                  LEFT OUTER JOIN files f ON n.id = f.id
+                  WHERE p.child = (?)
+                  ORDER BY n.name"""
+
 CHILDRENS_NAMES_SQL = """SELECT n.name FROM nodes n
                 JOIN parentage p ON n.id = p.child
                 WHERE p.parent = (?) AND n.status == 'AVAILABLE'
@@ -159,14 +166,15 @@ def get_node(self, id) -> 'Union[Node|None]':
                 return self.node_id_to_node_cache[id]
             except:
                 pass
-            with cursor(self._conn) as c:
-                c.execute(NODE_BY_ID_SQL, [id])
-                r = c.fetchone()
-                if r:
-                    n = Node(r)
-                    if n.is_available:
+        with cursor(self._conn) as c:
+            c.execute(NODE_BY_ID_SQL, [id])
+            r = c.fetchone()
+            if r:
+                n = Node(r)
+                if n.is_available:
+                    with self.node_cache_lock:
                         self.node_id_to_node_cache[n.id] = n
-                    return n
+                return n
 
     def get_root_node(self):
         return self.get_node(self.root_id)
@@ -179,59 +187,50 @@ def get_conflicting_node(self, name: str, parent_id: str):
             if r:
                 return Node(r)
 
-    def resolve_id(self, path: str, trash=False) -> str:
+    def resolve_id(self, path: str, trash=False) -> 'Union[str|None]':
+        n = self.resolve(path, trash)
+        if n:
+            return n.id
+
+    def resolve(self, path: str, trash=False) -> 'Union[Node|None]':
         with self.node_cache_lock:
             try:
-                return self.path_to_node_id_cache[path]
+                return self.get_node(self.path_to_node_id_cache[path])
             except:
                 pass
-            n = self._resolve(path, trash)
-            if n:
-                self.node_id_to_node_cache[n.id] = n
-                self.path_to_node_id_cache[path] = n.id
-                return n.id
-            return None
 
-    def resolve(self, path: str, trash=False) -> 'Union[Node|None]':
-        """Gets a node from a path"""
-        id = self.resolve_id(path=path, trash=trash)
-        return self.get_node(id=id) if id else None
-
-    def _resolve(self, path: str, trash=False) -> 'Union[Node|None]':
-        segments = list(filter(bool, path.split('/')))
-        if not segments:
-            if not self.root_id:
-                return
-            with cursor(self._conn) as c:
-                c.execute(NODE_BY_ID_SQL, [self.root_id])
-                r = c.fetchone()
-                return Node(r)
+        parent_path, name = os.path.split(path)
+        if not name:
+            r = self.get_root_node()
+            with self.node_cache_lock:
+                self.node_id_to_node_cache[r.id] = r
+                self.path_to_node_id_cache[path] = r.id
+            return r
 
-        parent = self.root_id
-        for i, segment in enumerate(segments):
-            with cursor(self._conn) as c:
-                c.execute(CHILD_OF_SQL, [segment, parent])
-                r = c.fetchone()
-                r2 = c.fetchone()
+        parent = self.resolve(parent_path, trash=trash)
+        if not parent:
+            return
 
-            if not r:
-                return
-            r = Node(r)
+        with cursor(self._conn) as c:
+            c.execute(CHILD_OF_SQL, [name, parent.id])
+            r = c.fetchone()
+            r2 = c.fetchone()
+        if not r:
+            return
+        r = Node(r)
 
-            if not r.is_available:
-                if not trash:
-                    return
-                if r2:
-                    logger.debug('None-unique trash name "%s" in %s.' % (segment, parent))
-                    return
-            if i + 1 == len(segments):
-                return r
-            if r.is_folder:
-                parent = r.id
-                continue
-            else:
+        if not r.is_available:
+            if not trash:
+                return
+            if r2:
+                logger.debug('None-unique trash name "%s" in %s.' % (name, parent))
                 return
 
+        with self.node_cache_lock:
+            self.node_id_to_node_cache[r.id] = r
+            self.path_to_node_id_cache[path] = r.id
+        return r
+
     def childrens_names(self, folder_id) -> 'List[str]':
         with cursor(self._conn) as c:
             c.execute(CHILDRENS_NAMES_SQL, [folder_id])
@@ -331,6 +330,29 @@ def first_path(self, node_id: str) -> str:
             return node.simple_name
         return self.first_path(node.id) + node.name + '/'
 
+    def all_path(self, node_id: str, path_suffix=None) -> 'List[str]':
+        if node_id == self.root_id:
+            return ["/" + path_suffix]
+
+        n = self.get_node(node_id)
+        if not n:
+            return []
+        if path_suffix:
+            path_suffix = os.path.join(n.name, path_suffix)
+        else:
+            path_suffix = n.name
+
+        ret = []
+        with cursor(self._conn) as c:
+            c.execute(PARENTS_SQL, [n.id])
+            parent = c.fetchone()
+            while parent:
+                parent = Node(parent)
+                if parent.is_available:
+                    ret += self.all_path(parent.id, path_suffix)
+                parent = c.fetchone()
+        return ret
+
     def find_by_name(self, name: str) -> 'List[Node]':
         nodes = []
         with cursor(self._conn) as c:
@@ -379,7 +401,8 @@ def get_property(self, node_id, owner_id, key) -> 'Union[str|None]':
     def get_content(self, node_id:str, version:int) -> 'Union[bytes|None]':
         if version == 0: return None
         with cursor(self._conn) as c:
-            c.execute(CONTENT_ACCESSED_SQL, [datetime.utcnow(), node_id])
+            # Uncomment if/when we want to purge the cache based on LRU. Until then reduce the db load.
+            # c.execute(CONTENT_ACCESSED_SQL, [datetime.utcnow(), node_id])
             c.execute(CONTENT_BY_ID_SQL, [node_id, version])
             r = c.fetchone()
             if r:

From a5fa452f474e55044acb415e519fa35ff7284b95 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Wed, 15 Mar 2017 23:18:55 -0400
Subject: [PATCH 58/63] fix schema for properties table

---
 acdcli/cache/schema.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/acdcli/cache/schema.py b/acdcli/cache/schema.py
index ca26d51..0a8686f 100644
--- a/acdcli/cache/schema.py
+++ b/acdcli/cache/schema.py
@@ -33,7 +33,7 @@
         owner TEXT NOT NULL,
         key TEXT NOT NULL,
         value TEXT,
-        PRIMARY KEY (id),
+        PRIMARY KEY (id, owner, key),
         FOREIGN KEY(id) REFERENCES nodes (id)
     );
 
@@ -76,6 +76,7 @@
     CREATE INDEX ix_content_size ON content(size);
     CREATE INDEX ix_content_accessed ON content(accessed);
     CREATE INDEX ix_parentage_child ON parentage(child);
+    CREATE INDEX ix_parentage_parent ON parentage(parent);
     CREATE INDEX ix_nodes_names ON nodes(name);
     PRAGMA user_version = 4;
     """
@@ -109,9 +110,10 @@ def _2_to_3(conn):
         # The user would also need to old-sync if they had multiple databases *and* were all ready using
         # properties in some of them. It's not clear how to do that from here aside from dropping all data.
         'CREATE TABLE IF NOT EXISTS properties (id VARCHAR(50) NOT NULL, owner TEXT NOT NULL, '
-        'key TEXT NOT NULL, value TEXT, PRIMARY KEY (id), FOREIGN KEY(id) REFERENCES nodes (id));'
+        'key TEXT NOT NULL, value TEXT, PRIMARY KEY (id, owner, key), FOREIGN KEY(id) REFERENCES nodes (id));'
 
         'CREATE INDEX IF NOT EXISTS ix_parentage_child ON parentage(child);'
+        'CREATE INDEX IF NOT EXISTS ix_parentage_parent ON parentage(parent);'
         # Having changed the schema, the queries can be optimised differently.
         # In order to be aware of that, re-analyze the type of data and indexes,
         # allowing SQLite3 to make better decisions.

From f60ba358f91c0fb0da0f90d9c6b5b516f86ec898 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Tue, 21 Mar 2017 17:36:35 -0400
Subject: [PATCH 59/63] prevent xattr cache from falling out of sync on failed
 acd calls

---
 acdcli/acd_fuse.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 272c948..10e03c6 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -552,6 +552,8 @@ def _xattr_write_and_sync(self, node_id):
                                                  xattrs_str)
                 except (RequestError, IOError) as e:
                     logger.error('Error writing node xattrs "%s". %s' % (node_id, str(e)))
+                    try: del self.xattr_cache[node_id]
+                    except: pass
                 else:
                     self.cache.insert_property(node_id, self.acd_client_owner, _XATTR_PROPERTY_NAME, xattrs_str)
                 self.xattr_dirty.discard(node_id)

From 2583cb0909b49f229cdf2697525b77ec475a81ef Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Wed, 29 Mar 2017 16:40:55 -0400
Subject: [PATCH 60/63] the fuse subdir module sometimes leaves trailing
 slashes on directory paths

---
 acdcli/cache/query.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/acdcli/cache/query.py b/acdcli/cache/query.py
index b3bffba..b7c66fe 100644
--- a/acdcli/cache/query.py
+++ b/acdcli/cache/query.py
@@ -193,6 +193,7 @@ def resolve_id(self, path: str, trash=False) -> 'Union[str|None]':
             return n.id
 
     def resolve(self, path: str, trash=False) -> 'Union[Node|None]':
+        path = path.rstrip('/')
         with self.node_cache_lock:
             try:
                 return self.get_node(self.path_to_node_id_cache[path])
@@ -304,6 +305,8 @@ def list_children(self, folder_id, trash=False, folder_path=None) -> 'Tuple[List
 
         """If the caller provides the folder_path, we can add all the children to the
         path->node_id cache for faster lookup after a directory listing"""
+        if folder_path:
+            folder_path = folder_path.rstrip('/')
         with self.node_cache_lock:
             for c in folders + files:
                 if c.is_available:
@@ -332,7 +335,7 @@ def first_path(self, node_id: str) -> str:
 
     def all_path(self, node_id: str, path_suffix=None) -> 'List[str]':
         if node_id == self.root_id:
-            return ["/" + path_suffix]
+            return ['/' + path_suffix]
 
         n = self.get_node(node_id)
         if not n:

From e96644071f88e893350f889e7e256b4e6714dc03 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Fri, 26 May 2017 19:12:46 -0400
Subject: [PATCH 61/63] Merge branch 'master' of
 https://github.com/yadayada/acd_cli

# Conflicts:
#	acdcli/cache/schema.py
#	docs/contributors.rst
---
 acdcli/cache/schema.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/acdcli/cache/schema.py b/acdcli/cache/schema.py
index be5cf27..6c278c1 100755
--- a/acdcli/cache/schema.py
+++ b/acdcli/cache/schema.py
@@ -133,25 +133,16 @@ def _3_to_4(conn):
         # properties in some of them. It's not clear how to do that from here aside from dropping all data.
         'CREATE TABLE IF NOT EXISTS properties (id VARCHAR(50) NOT NULL, owner TEXT NOT NULL, '
         'key TEXT NOT NULL, value TEXT, PRIMARY KEY (id, owner, key), FOREIGN KEY(id) REFERENCES nodes (id));'
-
-        'CREATE INDEX IF NOT EXISTS ix_parentage_child ON parentage(child);'
-        'CREATE INDEX IF NOT EXISTS ix_parentage_parent ON parentage(parent);'
-        # Having changed the schema, the queries can be optimised differently.
-        # In order to be aware of that, re-analyze the type of data and indexes,
-        # allowing SQLite3 to make better decisions.
-        'ANALYZE;'
-        'PRAGMA user_version = 3;'
-    )
-
-    conn.executescript(
+        
         'ALTER TABLE files ADD version BIGINT;'
-
         'DROP TABLE IF EXISTS content;'
         'CREATE TABLE content (id VARCHAR(50) NOT NULL, value BLOB, size BIGINT, version BIGINT, accessed DATETIME,'
         'PRIMARY KEY (id), UNIQUE (id), FOREIGN KEY(id) REFERENCES nodes (id)); '
 
         'CREATE INDEX IF NOT EXISTS ix_content_size ON content(size);'
         'CREATE INDEX IF NOT EXISTS ix_content_accessed ON content(accessed);'
+        'CREATE INDEX IF NOT EXISTS ix_parentage_parent ON parentage(parent);'
+        
         # Having changed the schema, the queries can be optimised differently.
         # In order to be aware of that, re-analyze the type of data and indexes,
         # allowing SQLite3 to make better decisions.

From 3f3bed85ea8227297bc0bbb53c4405344d9121db Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Sat, 27 May 2017 08:58:28 -0400
Subject: [PATCH 62/63] remove legacy symlink handling since amazon purged
 properties during acdcli's ban

---
 acdcli/acd_fuse.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/acdcli/acd_fuse.py b/acdcli/acd_fuse.py
index 10e03c6..9aef97a 100644
--- a/acdcli/acd_fuse.py
+++ b/acdcli/acd_fuse.py
@@ -926,7 +926,6 @@ def symlink(self, target, source):
         fh = self.create(target, None)
         node_id = self.fh_to_node[fh]
         self._setxattr(node_id, _XATTR_MODE_OVERRIDE_NAME, stat.S_IFLNK | 0o0777)
-        # self._setxattr(node_id, _XATTR_SYMLINK_OVERRIDE_NAME, source)
         self.write(target, source_bytes, 0, fh)
         self.release(target, fh)
         return 0
@@ -938,18 +937,6 @@ def readlink(self, path):
 
         source = None
 
-        # amazon reduced property size (all our xattr space) to 500 characters or less,
-        # so we're moving symlinks to file bodies.
-        try: source = self._getxattr(node.id, _XATTR_SYMLINK_OVERRIDE_NAME)
-        except: pass
-        if source is not None:
-            logger.debug("readlink: upgrading node: %s path: %s" % (node.id, path))
-            source_bytes = source.encode('utf-8')
-            fh = self.open(path, 0)
-            self.write(path, source_bytes, 0, fh)
-            self.release(path, fh)
-            self._removexattr(node.id, _XATTR_SYMLINK_OVERRIDE_NAME)
-
         if source is None:
             source_bytes = self.cache.get_content(node.id, node.version)
             if source_bytes is not None:

From e2554a0210c2f2530e9fedc07b6bbb3274d5e018 Mon Sep 17 00:00:00 2001
From: bgemmill <benjamin.gemmill@gmail.com>
Date: Mon, 29 May 2017 23:02:06 -0400
Subject: [PATCH 63/63] docs

---
 docs/configuration.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/configuration.rst b/docs/configuration.rst
index 4d8658b..1f8540b 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -81,7 +81,7 @@ fuse.ini
 ::
   [fs]
   ;block size used for size info
-  block_size = 512
+  block_size = io.DEFAULT_BUFFER_SIZE
 
   [read]
   ;maximal number of simultaneously opened chunks per file