Skip to content

Commit 9cf3d68

Browse files
committed
Improves estimator speed (though sacrifices accuracy) when estimating on HDDs. New method can be >10x faster
1 parent 233769d commit 9cf3d68

File tree

2 files changed

+72
-4
lines changed

2 files changed

+72
-4
lines changed

CompactGUI.Core/Estimator.vb

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ Public Class Estimator
1414
Private Const ErrorMargin As Single = 0.1
1515
Private Const SampleSize As Single = 100 '0.25 * (ZScore / ErrorMargin) ^ 2
1616

17+
Private DiskClusterSize As Integer = 4096
18+
Private IsHDD As Boolean = False
19+
1720
Public Sub New()
1821
End Sub
1922

@@ -27,14 +30,15 @@ Public Class Estimator
2730
End Class
2831

2932

30-
Public Function EstimateCompressability(analysisResult As List(Of AnalysedFileDetails), Optional MaxParallelism As Integer = 1, Optional clusterSize As Integer = 4096, Optional cancellationToken As Threading.CancellationToken = Nothing) As List(Of (AnalysedFile As AnalysedFileDetails, CompressionRatio As Single))
33+
Public Function EstimateCompressability(analysisResult As List(Of AnalysedFileDetails), ishdd As Boolean, Optional MaxParallelism As Integer = 1, Optional clusterSize As Integer = 4096, Optional cancellationToken As Threading.CancellationToken = Nothing) As List(Of (AnalysedFile As AnalysedFileDetails, CompressionRatio As Single))
3134

35+
DiskClusterSize = clusterSize
3236
Dim _filesList As New Concurrent.ConcurrentBag(Of FileDetails)
3337
If MaxParallelism <= 0 Then MaxParallelism = Environment.ProcessorCount
3438

35-
Dim paraOptions As New ParallelOptions With {.MaxDegreeOfParallelism = MaxParallelism}
36-
39+
Me.IsHDD = ishdd
3740

41+
Dim paraOptions As New ParallelOptions With {.MaxDegreeOfParallelism = MaxParallelism}
3842

3943
Parallel.ForEach(analysisResult, parallelOptions:=paraOptions, Sub(fl)
4044

@@ -79,6 +83,9 @@ Public Class Estimator
7983
Public Function EstimateCompressabilityLZ4(path As String, filesize As Long, Optional cancellationToken As Threading.CancellationToken = Nothing) As Double
8084
Try
8185
Using fs As FileStream = File.OpenRead(path)
86+
If IsHDD Then
87+
Return EstimateCompressabilityHDD(fs, filesize, Function(output) LZ4Stream.Encode(output, LZ4Level.L00_FAST, 0, True), cancellationToken)
88+
End If
8289
Return EstimateCompressability(fs, filesize, Function(output) LZ4Stream.Encode(output, LZ4Level.L00_FAST, 0, True), cancellationToken)
8390
End Using
8491
Catch cancelledEx As OperationCanceledException
@@ -134,6 +141,63 @@ Public Class Estimator
134141
Return Math.Min(compressed.Length / Math.Max(totalWritten, 1), 1.0)
135142
End Function
136143

144+
'Private Function EstimateCompressabilityHDD(input As FileStream, fileSize As Long, compressionFactory As CompressionStreamFactory, Optional cancellationToken As Threading.CancellationToken = Nothing) As Double
145+
' Dim MiddleChunkSize As Integer = SampleSize * BlockSize ' 10KB
146+
147+
' Dim totalWritten As Long = 0
148+
' Dim compressed = New MemoryStream()
149+
150+
' Using compressionStream As Stream = compressionFactory(compressed)
151+
' ' If file is smaller than 10KB, just use the whole file
152+
' Dim chunkSize As Integer = CInt(Math.Min(MiddleChunkSize, fileSize))
153+
' Dim middleStart As Long = Math.Max(0, (fileSize \ 2) - (chunkSize \ 2))
154+
155+
' Dim buffer(chunkSize - 1) As Byte
156+
' input.Position = middleStart
157+
' Dim bytesRead As Integer = input.Read(buffer, 0, chunkSize)
158+
159+
' If cancellationToken <> Nothing AndAlso cancellationToken.IsCancellationRequested Then
160+
' Throw New OperationCanceledException(cancellationToken)
161+
' End If
162+
163+
' If bytesRead > 0 Then
164+
' compressionStream.Write(buffer, 0, bytesRead)
165+
' totalWritten += bytesRead
166+
' End If
167+
' End Using
168+
169+
' Return Math.Min(compressed.Length / Math.Max(totalWritten, 1), 1.0)
170+
'End Function
171+
172+
Private Function EstimateCompressabilityHDD(input As FileStream, fileSize As Long, compressionFactory As CompressionStreamFactory, Optional cancellationToken As Threading.CancellationToken = Nothing) As Double
173+
Dim NumClusters As Integer = SampleSize ' or any small number you want to sample
174+
Dim clusterSize As Integer = DiskClusterSize
175+
176+
Dim middleCluster As Long = (fileSize \ 2) \ clusterSize
177+
Dim alignedStart As Long = middleCluster * clusterSize
178+
Dim chunkSize As Integer = CInt(Math.Min(clusterSize * NumClusters, fileSize - alignedStart))
179+
180+
Dim totalWritten As Long = 0
181+
Dim compressed = New MemoryStream()
182+
183+
Using compressionStream As Stream = compressionFactory(compressed)
184+
Dim buffer(chunkSize - 1) As Byte
185+
input.Position = alignedStart
186+
Dim bytesRead As Integer = input.Read(buffer, 0, chunkSize)
187+
188+
If cancellationToken <> Nothing AndAlso cancellationToken.IsCancellationRequested Then
189+
Throw New OperationCanceledException(cancellationToken)
190+
End If
191+
192+
If bytesRead > 0 Then
193+
compressionStream.Write(buffer, 0, bytesRead)
194+
totalWritten += bytesRead
195+
End If
196+
End Using
197+
198+
Return Math.Min(compressed.Length / Math.Max(totalWritten, 1), 1.0)
199+
End Function
200+
137201

138202

139203

CompactGUI/Models/NewModels/ICompressableFolder.vb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ Public MustInherit Class CompressableFolder : Inherits ObservableObject
177177
Try
178178
Dim sw As New Stopwatch
179179
sw.Start()
180-
estimatedData = Await Task.Run(Function() estimator.EstimateCompressability(AnalysisResults.ToList, GetThreadCount, GetClusterSize(FolderName), CancellationTokenSource.Token))
180+
estimatedData = Await Task.Run(Function() estimator.EstimateCompressability(AnalysisResults.ToList, IsHDD, GetThreadCount, GetClusterSize(FolderName), CancellationTokenSource.Token))
181181
sw.Stop()
182182
Debug.WriteLine($"Estimated compression took {sw.ElapsedMilliseconds}ms")
183183
Catch ex As AggregateException
@@ -256,6 +256,10 @@ Public MustInherit Class CompressableFolder : Inherits ObservableObject
256256
End Try
257257
End Function
258258

259+
Private Function IsHDD() As Boolean
260+
Dim HDDType As DiskDetector.Models.HardwareType = GetDiskType()
261+
Return HDDType = DiskDetector.Models.HardwareType.Hdd
262+
End Function
259263

260264
Protected Overridable Function GetSkipList() As String()
261265
Dim exclist As String() = Array.Empty(Of String)()

0 commit comments

Comments
 (0)