Skip to content
This repository was archived by the owner on Feb 4, 2020. It is now read-only.

Commit c99332f

Browse files
authored
Merge pull request #222 from siu/atomicAndMultipleEntriesPerManifest
Atomic manifest updates and multiple entries per manifest
2 parents bdf41a6 + 66fa5e8 commit c99332f

File tree

8 files changed

+491
-90
lines changed

8 files changed

+491
-90
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ clcache changelog
1818
`CLCACHE_OBJECT_CACHE_TIMEOUT_MS` environment variable.
1919
* Improvement: Greatly improved concurrency of clcache such that concurrent
2020
invocations of the tool no longer block each other.
21+
* Improvement: Improve hit rate when alternating between two identical
22+
versions of the same source file that transitively get different contents of
23+
the included files (a common case when switching back and forth between
24+
branches).
2125

2226
## clcache 3.2.0 (2016-07-28)
2327

clcache.py

Lines changed: 86 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,11 @@
5858
# to use it as mark for relative path.
5959
BASEDIR_REPLACEMENT = '?'
6060

61+
# ManifestEntry: an entry in a manifest file
6162
# `includeFiles`: list of paths to include files, which this source file uses
62-
# `includesContentToObjectMap`: dictionary
63-
# key: cumulative hash of all include files' content in includeFiles
64-
# value: key in the cache, under which the object file is stored
65-
Manifest = namedtuple('Manifest', ['includeFiles', 'includesContentToObjectMap'])
63+
# `includesContentsHash`: hash of the contents of the includeFiles
64+
# `objectHash`: hash of the object in cache
65+
ManifestEntry = namedtuple('ManifestEntry', ['includeFiles', 'includesContentHash', 'objectHash'])
6666

6767
CompilerArtifacts = namedtuple('CompilerArtifacts', ['objectFilePath', 'stdout', 'stderr'])
6868

@@ -108,10 +108,6 @@ class IncludeNotFoundException(Exception):
108108
pass
109109

110110

111-
class IncludeChangedException(Exception):
112-
pass
113-
114-
115111
class CacheLockException(Exception):
116112
pass
117113

@@ -125,6 +121,24 @@ def __str__(self):
125121
return repr(self.message)
126122

127123

124+
class Manifest(object):
125+
def __init__(self, entries=None):
126+
if entries is None:
127+
entries = []
128+
self._entries = entries.copy()
129+
130+
def entries(self):
131+
return self._entries
132+
133+
def addEntry(self, entry):
134+
"""Adds entry at the top of the entries"""
135+
self._entries.insert(0, entry)
136+
137+
def touchEntry(self, entryIndex):
138+
"""Moves entry in entryIndex position to the top of entries()"""
139+
self._entries.insert(0, self._entries.pop(entryIndex))
140+
141+
128142
class ManifestSection(object):
129143
def __init__(self, manifestSectionDir):
130144
self.manifestSectionDir = manifestSectionDir
@@ -137,10 +151,14 @@ def manifestFiles(self):
137151
return filesBeneath(self.manifestSectionDir)
138152

139153
def setManifest(self, manifestHash, manifest):
154+
manifestPath = self.manifestPath(manifestHash)
155+
printTraceStatement("Writing manifest with manifestHash = {} to {}".format(manifestHash, manifestPath))
140156
ensureDirectoryExists(self.manifestSectionDir)
141-
with open(self.manifestPath(manifestHash), 'w') as outFile:
157+
with open(manifestPath, 'w') as outFile:
142158
# Converting namedtuple to JSON via OrderedDict preserves key names and keys order
143-
json.dump(manifest._asdict(), outFile, sort_keys=True, indent=2)
159+
entries = [e._asdict() for e in manifest.entries()]
160+
jsonobject = {'entries': entries}
161+
json.dump(jsonobject, outFile, sort_keys=True, indent=2)
144162

145163
def getManifest(self, manifestHash):
146164
fileName = self.manifestPath(manifestHash)
@@ -149,7 +167,8 @@ def getManifest(self, manifestHash):
149167
try:
150168
with open(fileName, 'r') as inFile:
151169
doc = json.load(inFile)
152-
return Manifest(doc['includeFiles'], doc['includesContentToObjectMap'])
170+
return Manifest([ManifestEntry(e['includeFiles'], e['includesContentHash'], e['objectHash'])
171+
for e in doc['entries']])
153172
except IOError:
154173
return None
155174

@@ -172,7 +191,7 @@ class ManifestRepository(object):
172191
# invalidation, such that a manifest that was stored using the old format is not
173192
# interpreted using the new format. Instead the old file will not be touched
174193
# again due to a new manifest hash and is cleaned away after some time.
175-
MANIFEST_FILE_FORMAT_VERSION = 4
194+
MANIFEST_FILE_FORMAT_VERSION = 5
176195

177196
def __init__(self, manifestsRootDir):
178197
self._manifestsRootDir = manifestsRootDir
@@ -219,26 +238,19 @@ def getManifestHash(compilerBinary, commandLine, sourceFile):
219238

220239
@staticmethod
221240
def getIncludesContentHashForFiles(includes):
222-
listOfIncludesHashes = []
223-
includeMissing = False
241+
listOfHashes = []
224242

225-
for path in sorted(includes.keys()):
243+
for path in includes:
226244
try:
227-
fileHash = getFileHash(path)
228-
if fileHash != includes[path]:
229-
raise IncludeChangedException()
230-
listOfIncludesHashes.append(fileHash)
245+
listOfHashes.append(getFileHash(path))
231246
except FileNotFoundError:
232-
includeMissing = True
233-
234-
if includeMissing:
235-
raise IncludeNotFoundException()
247+
raise IncludeNotFoundException
236248

237-
return ManifestRepository.getIncludesContentHashForHashes(listOfIncludesHashes)
249+
return ManifestRepository.getIncludesContentHashForHashes(listOfHashes)
238250

239251
@staticmethod
240-
def getIncludesContentHashForHashes(listOfIncludesHashes):
241-
return HashAlgorithm(','.join(listOfIncludesHashes).encode()).hexdigest()
252+
def getIncludesContentHashForHashes(listOfHashes):
253+
return HashAlgorithm(','.join(listOfHashes).encode()).hexdigest()
242254

243255

244256
class CacheLock(object):
@@ -754,7 +766,8 @@ def getStringHash(dataString):
754766
return hasher.hexdigest()
755767

756768

757-
def expandBasedirPlaceholder(path, baseDir):
769+
def expandBasedirPlaceholder(path):
770+
baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR'))
758771
if path.startswith(BASEDIR_REPLACEMENT):
759772
if not baseDir:
760773
raise LogicException('No CLCACHE_BASEDIR set, but found relative path ' + path)
@@ -763,13 +776,17 @@ def expandBasedirPlaceholder(path, baseDir):
763776
return path
764777

765778

766-
def collapseBasedirToPlaceholder(path, baseDir):
767-
assert path == os.path.normcase(path)
768-
assert baseDir == os.path.normcase(baseDir)
769-
if path.startswith(baseDir):
770-
return path.replace(baseDir, BASEDIR_REPLACEMENT, 1)
771-
else:
779+
def collapseBasedirToPlaceholder(path):
780+
baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR'))
781+
if baseDir is None:
772782
return path
783+
else:
784+
assert path == os.path.normcase(path)
785+
assert baseDir == os.path.normcase(baseDir)
786+
if path.startswith(baseDir):
787+
return path.replace(baseDir, BASEDIR_REPLACEMENT, 1)
788+
else:
789+
return path
773790

774791

775792
def ensureDirectoryExists(path):
@@ -1371,24 +1388,20 @@ def processCacheHit(cache, objectFile, cachekey):
13711388
return 0, cachedArtifacts.stdout, cachedArtifacts.stderr, False
13721389

13731390

1374-
def createManifest(manifestHash, includePaths):
1375-
baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR'))
1376-
1377-
includes = {path:getFileHash(path) for path in includePaths}
1378-
includesContentHash = ManifestRepository.getIncludesContentHashForFiles(includes)
1391+
def createManifestEntry(manifestHash, includePaths):
1392+
includesWithHash = {path:getFileHash(path) for path in includePaths}
1393+
includesContentHash = ManifestRepository.getIncludesContentHashForHashes(includesWithHash.values())
13791394
cachekey = CompilerArtifactsRepository.computeKeyDirect(manifestHash, includesContentHash)
13801395

1381-
# Create new manifest
1382-
if baseDir:
1383-
relocatableIncludePaths = {
1384-
collapseBasedirToPlaceholder(path, baseDir):contentHash
1385-
for path, contentHash in includes.items()
1386-
}
1387-
manifest = Manifest(relocatableIncludePaths, {})
1388-
else:
1389-
manifest = Manifest(includes, {})
1390-
manifest.includesContentToObjectMap[includesContentHash] = cachekey
1391-
return manifest, cachekey
1396+
safeIncludes = [collapseBasedirToPlaceholder(path) for path in includesWithHash.keys()]
1397+
return ManifestEntry(safeIncludes, includesContentHash, cachekey)
1398+
1399+
1400+
def createOrUpdateManifest(manifestSection, manifestHash, entry):
1401+
manifest = manifestSection.getManifest(manifestHash) or Manifest()
1402+
manifest.addEntry(entry)
1403+
manifestSection.setManifest(manifestHash, manifest)
1404+
return manifest
13921405

13931406

13941407
def postprocessUnusableManifestMiss(
@@ -1401,8 +1414,8 @@ def postprocessUnusableManifestMiss(
14011414
returnCode, compilerOutput, compilerStderr = invokeRealCompiler(compiler, cmdLine, captureOutput=True)
14021415
includePaths, compilerOutput = parseIncludesSet(compilerOutput, sourceFile, stripIncludes)
14031416

1404-
if returnCode == 0 and os.path.exists(objectFile):
1405-
manifest, cachekey = createManifest(manifestHash, includePaths)
1417+
entry = createManifestEntry(manifestHash, includePaths)
1418+
cachekey = entry.objectHash
14061419

14071420
cleanupRequired = False
14081421
section = cache.compilerArtifactsRepository.section(cachekey)
@@ -1411,6 +1424,7 @@ def postprocessUnusableManifestMiss(
14111424
if returnCode == 0 and os.path.exists(objectFile):
14121425
artifacts = CompilerArtifacts(objectFile, compilerOutput, compilerStderr)
14131426
cleanupRequired = addObjectToCache(stats, cache, section, cachekey, artifacts)
1427+
manifest = createOrUpdateManifest(manifestSection, manifestHash, entry)
14141428
manifestSection.setManifest(manifestHash, manifest)
14151429

14161430
return returnCode, compilerOutput, compilerStderr, cleanupRequired
@@ -1551,7 +1565,6 @@ def processCompileRequest(cache, compiler, args):
15511565

15521566

15531567
def processDirect(cache, objectFile, compiler, cmdLine, sourceFile):
1554-
baseDir = normalizeBaseDir(os.environ.get('CLCACHE_BASEDIR'))
15551568
manifestHash = ManifestRepository.getManifestHash(compiler, cmdLine, sourceFile)
15561569
manifestSection = cache.manifestRepository.section(manifestHash)
15571570
with manifestSection.lock:
@@ -1561,21 +1574,27 @@ def processDirect(cache, objectFile, compiler, cmdLine, sourceFile):
15611574
cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine,
15621575
Statistics.registerSourceChangedMiss)
15631576

1564-
# NOTE: command line options already included in hash for manifest name
1565-
try:
1566-
includesContentHash = ManifestRepository.getIncludesContentHashForFiles({
1567-
expandBasedirPlaceholder(path, baseDir):contentHash
1568-
for path, contentHash in manifest.includeFiles.items()
1569-
})
1570-
except IncludeChangedException:
1571-
return postprocessUnusableManifestMiss(
1572-
cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine,
1573-
Statistics.registerHeaderChangedMiss)
1574-
1575-
cachekey = manifest.includesContentToObjectMap.get(includesContentHash)
1576-
assert cachekey is not None
1577-
1578-
return getOrSetArtifacts(cache, cachekey, objectFile, compiler, cmdLine, Statistics.registerEvictedMiss)
1577+
for entryIndex, entry in enumerate(manifest.entries()):
1578+
# NOTE: command line options already included in hash for manifest name
1579+
try:
1580+
includesContentHash = ManifestRepository.getIncludesContentHashForFiles(
1581+
[expandBasedirPlaceholder(path) for path in entry.includeFiles])
1582+
1583+
if entry.includesContentHash == includesContentHash:
1584+
cachekey = entry.objectHash
1585+
assert cachekey is not None
1586+
# Move manifest entry to the top of the entries in the manifest
1587+
manifest.touchEntry(entryIndex)
1588+
manifestSection.setManifest(manifestHash, manifest)
1589+
1590+
return getOrSetArtifacts(
1591+
cache, cachekey, objectFile, compiler, cmdLine, Statistics.registerEvictedMiss)
1592+
except IncludeNotFoundException:
1593+
pass
1594+
1595+
return postprocessUnusableManifestMiss(
1596+
cache, objectFile, manifestSection, manifestHash, sourceFile, compiler, cmdLine,
1597+
Statistics.registerHeaderChangedMiss)
15791598

15801599

15811600
def processNoDirect(cache, objectFile, compiler, cmdLine, environment):

0 commit comments

Comments
 (0)