1
1
using System ;
2
2
using System . Collections . Generic ;
3
+ using System . Diagnostics ;
3
4
using System . Globalization ;
4
5
using System . Linq ;
5
6
using System . Runtime . CompilerServices ;
6
7
using BenchmarkDotNet . Characteristics ;
8
+ using BenchmarkDotNet . Environments ;
7
9
using BenchmarkDotNet . Jobs ;
10
+ using BenchmarkDotNet . Mathematics ;
8
11
using BenchmarkDotNet . Portability ;
9
12
using BenchmarkDotNet . Reports ;
10
13
using JetBrains . Annotations ;
@@ -102,8 +105,14 @@ public void Dispose()
102
105
}
103
106
}
104
107
108
+ // AggressiveOptimization forces the method to go straight to tier1 JIT, and will never be re-jitted,
109
+ // eliminating tiered JIT as a potential variable in measurements.
110
+ [ MethodImpl ( CodeGenHelper . AggressiveOptimizationOption ) ]
105
111
public RunResults Run ( )
106
112
{
113
+ // This method is huge, because all stages are inlined. This ensures the stack size
114
+ // remains constant for each benchmark invocation, eliminating stack sizes as a potential variable in measurements.
115
+ // #1120
107
116
var measurements = new List < Measurement > ( ) ;
108
117
measurements . AddRange ( jittingMeasurements ) ;
109
118
@@ -116,23 +125,187 @@ public RunResults Run()
116
125
{
117
126
if ( Strategy != RunStrategy . Monitoring )
118
127
{
119
- var pilotStageResult = pilotStage . Run ( ) ;
120
- invokeCount = pilotStageResult . PerfectInvocationCount ;
121
- measurements . AddRange ( pilotStageResult . Measurements ) ;
128
+ // Pilot Stage
129
+ {
130
+ long Autocorrect ( long count ) => ( count + UnrollFactor - 1 ) / UnrollFactor * UnrollFactor ;
131
+
132
+ // If InvocationCount is specified, pilot stage should be skipped
133
+ if ( TargetJob . HasValue ( RunMode . InvocationCountCharacteristic ) )
134
+ {
135
+ }
136
+ // Here we want to guess "perfect" amount of invocation
137
+ else if ( TargetJob . HasValue ( RunMode . IterationTimeCharacteristic ) )
138
+ {
139
+ // Perfect invocation count
140
+ invokeCount = Autocorrect ( MinInvokeCount ) ;
141
+
142
+ int iterationCounter = 0 ;
143
+
144
+ int downCount = 0 ; // Amount of iterations where newInvokeCount < invokeCount
145
+ while ( true )
146
+ {
147
+ iterationCounter ++ ;
148
+ var measurement = RunIteration ( new IterationData ( IterationMode . Workload , IterationStage . Pilot , iterationCounter , invokeCount , UnrollFactor ) ) ;
149
+ measurements . Add ( measurement ) ;
150
+ double actualIterationTime = measurement . Nanoseconds ;
151
+ long newInvokeCount = Autocorrect ( Math . Max ( pilotStage . minInvokeCount , ( long ) Math . Round ( invokeCount * pilotStage . targetIterationTime / actualIterationTime ) ) ) ;
152
+
153
+ if ( newInvokeCount < invokeCount )
154
+ downCount ++ ;
155
+
156
+ if ( Math . Abs ( newInvokeCount - invokeCount ) <= 1 || downCount >= 3 )
157
+ break ;
158
+
159
+ invokeCount = newInvokeCount ;
160
+ }
161
+ WriteLine ( ) ;
162
+ }
163
+ else
164
+ {
165
+ // A case where we don't have specific iteration time.
166
+ invokeCount = Autocorrect ( pilotStage . minInvokeCount ) ;
167
+
168
+ int iterationCounter = 0 ;
169
+ while ( true )
170
+ {
171
+ iterationCounter ++ ;
172
+ var measurement = RunIteration ( new IterationData ( IterationMode . Workload , IterationStage . Pilot , iterationCounter , invokeCount , UnrollFactor ) ) ;
173
+ measurements . Add ( measurement ) ;
174
+ double iterationTime = measurement . Nanoseconds ;
175
+ double operationError = 2.0 * pilotStage . resolution / invokeCount ; // An operation error which has arisen due to the Chronometer precision
176
+
177
+ // Max acceptable operation error
178
+ double operationMaxError1 = iterationTime / invokeCount * pilotStage . maxRelativeError ;
179
+ double operationMaxError2 = pilotStage . maxAbsoluteError ? . Nanoseconds ?? double . MaxValue ;
180
+ double operationMaxError = Math . Min ( operationMaxError1 , operationMaxError2 ) ;
181
+
182
+ bool isFinished = operationError < operationMaxError && iterationTime >= pilotStage . minIterationTime . Nanoseconds ;
183
+ if ( isFinished )
184
+ break ;
185
+ if ( invokeCount >= EnginePilotStage . MaxInvokeCount )
186
+ break ;
187
+
188
+ if ( UnrollFactor == 1 && invokeCount < EnvironmentResolver . DefaultUnrollFactorForThroughput )
189
+ invokeCount += 1 ;
190
+ else
191
+ invokeCount *= 2 ;
192
+ }
193
+ WriteLine ( ) ;
194
+ }
195
+ }
196
+ // End Pilot Stage
122
197
123
198
if ( EvaluateOverhead )
124
199
{
125
- measurements . AddRange ( warmupStage . RunOverhead ( invokeCount , UnrollFactor ) ) ;
126
- measurements . AddRange ( actualStage . RunOverhead ( invokeCount , UnrollFactor ) ) ;
200
+ // Warmup Overhead
201
+ {
202
+ var warmupMeasurements = new List < Measurement > ( ) ;
203
+
204
+ var criteria = DefaultStoppingCriteriaFactory . Instance . CreateWarmup ( TargetJob , Resolver , IterationMode . Overhead , RunStrategy . Throughput ) ;
205
+ int iterationCounter = 0 ;
206
+ while ( ! criteria . Evaluate ( warmupMeasurements ) . IsFinished )
207
+ {
208
+ iterationCounter ++ ;
209
+ warmupMeasurements . Add ( RunIteration ( new IterationData ( IterationMode . Overhead , IterationStage . Warmup , iterationCounter , invokeCount , UnrollFactor ) ) ) ;
210
+ }
211
+ WriteLine ( ) ;
212
+
213
+ measurements . AddRange ( warmupMeasurements ) ;
214
+ }
215
+ // End Warmup Overhead
216
+
217
+ // Actual Overhead
218
+ {
219
+ var measurementsForStatistics = new List < Measurement > ( actualStage . maxIterationCount ) ;
220
+
221
+ int iterationCounter = 0 ;
222
+ double effectiveMaxRelativeError = EngineActualStage . MaxOverheadRelativeError ;
223
+ while ( true )
224
+ {
225
+ iterationCounter ++ ;
226
+ var measurement = RunIteration ( new IterationData ( IterationMode . Overhead , IterationStage . Actual , iterationCounter , invokeCount , UnrollFactor ) ) ;
227
+ measurements . Add ( measurement ) ;
228
+ measurementsForStatistics . Add ( measurement ) ;
229
+
230
+ var statistics = MeasurementsStatistics . Calculate ( measurementsForStatistics , actualStage . outlierMode ) ;
231
+ double actualError = statistics . LegacyConfidenceInterval . Margin ;
232
+
233
+ double maxError1 = effectiveMaxRelativeError * statistics . Mean ;
234
+ double maxError2 = actualStage . maxAbsoluteError ? . Nanoseconds ?? double . MaxValue ;
235
+ double maxError = Math . Min ( maxError1 , maxError2 ) ;
236
+
237
+ if ( iterationCounter >= actualStage . minIterationCount && actualError < maxError )
238
+ break ;
239
+
240
+ if ( iterationCounter >= actualStage . maxIterationCount || iterationCounter >= EngineActualStage . MaxOverheadIterationCount )
241
+ break ;
242
+ }
243
+ WriteLine ( ) ;
244
+ }
245
+ // End Actual Overhead
127
246
}
128
247
}
129
248
130
- measurements . AddRange ( warmupStage . RunWorkload ( invokeCount , UnrollFactor , Strategy ) ) ;
249
+ // Warmup Workload
250
+ {
251
+ var workloadMeasurements = new List < Measurement > ( ) ;
252
+
253
+ var criteria = DefaultStoppingCriteriaFactory . Instance . CreateWarmup ( TargetJob , Resolver , IterationMode . Workload , Strategy ) ;
254
+ int iterationCounter = 0 ;
255
+ while ( ! criteria . Evaluate ( workloadMeasurements ) . IsFinished )
256
+ {
257
+ iterationCounter ++ ;
258
+ workloadMeasurements . Add ( RunIteration ( new IterationData ( IterationMode . Workload , IterationStage . Warmup , iterationCounter , invokeCount , UnrollFactor ) ) ) ;
259
+ }
260
+ WriteLine ( ) ;
261
+
262
+ measurements . AddRange ( workloadMeasurements ) ;
263
+ }
264
+ // End Warmup Workload
131
265
}
132
266
133
267
Host . BeforeMainRun ( ) ;
134
268
135
- measurements . AddRange ( actualStage . RunWorkload ( invokeCount , UnrollFactor , forceSpecific : Strategy == RunStrategy . Monitoring ) ) ;
269
+ // Actual Workload
270
+ {
271
+ if ( actualStage . iterationCount == null && Strategy != RunStrategy . Monitoring )
272
+ {
273
+ // RunAuto
274
+ var measurementsForStatistics = new List < Measurement > ( actualStage . maxIterationCount ) ;
275
+
276
+ int iterationCounter = 0 ;
277
+ double effectiveMaxRelativeError = actualStage . maxRelativeError ;
278
+ while ( true )
279
+ {
280
+ iterationCounter ++ ;
281
+ var measurement = RunIteration ( new IterationData ( IterationMode . Workload , IterationStage . Actual , iterationCounter , invokeCount , UnrollFactor ) ) ;
282
+ measurements . Add ( measurement ) ;
283
+ measurementsForStatistics . Add ( measurement ) ;
284
+
285
+ var statistics = MeasurementsStatistics . Calculate ( measurementsForStatistics , actualStage . outlierMode ) ;
286
+ double actualError = statistics . LegacyConfidenceInterval . Margin ;
287
+
288
+ double maxError1 = effectiveMaxRelativeError * statistics . Mean ;
289
+ double maxError2 = actualStage . maxAbsoluteError ? . Nanoseconds ?? double . MaxValue ;
290
+ double maxError = Math . Min ( maxError1 , maxError2 ) ;
291
+
292
+ if ( iterationCounter >= actualStage . minIterationCount && actualError < maxError )
293
+ break ;
294
+
295
+ if ( iterationCounter >= actualStage . maxIterationCount )
296
+ break ;
297
+ }
298
+ }
299
+ else
300
+ {
301
+ // RunSpecific
302
+ var iterationCount = actualStage . iterationCount ?? EngineActualStage . DefaultWorkloadCount ;
303
+ for ( int i = 0 ; i < iterationCount ; i ++ )
304
+ measurements . Add ( RunIteration ( new IterationData ( IterationMode . Workload , IterationStage . Actual , i + 1 , invokeCount , UnrollFactor ) ) ) ;
305
+ }
306
+ WriteLine ( ) ;
307
+ }
308
+ // End Actual Workload
136
309
137
310
Host . AfterMainRun ( ) ;
138
311
@@ -148,11 +321,15 @@ public RunResults Run()
148
321
return new RunResults ( measurements , outlierMode , workGcHasDone , threadingStats , exceptionFrequency ) ;
149
322
}
150
323
324
+ [ MethodImpl ( CodeGenHelper . AggressiveOptimizationOption ) ]
151
325
public Measurement RunIteration ( IterationData data )
152
326
{
153
327
// Initialization
154
328
long invokeCount = data . InvokeCount ;
155
329
int unrollFactor = data . UnrollFactor ;
330
+ if ( invokeCount % unrollFactor != 0 )
331
+ throw new ArgumentOutOfRangeException ( $ "InvokeCount({ invokeCount } ) should be a multiple of UnrollFactor({ unrollFactor } ).") ;
332
+
156
333
long totalOperations = invokeCount * OperationsPerInvoke ;
157
334
bool isOverhead = data . IterationMode == IterationMode . Overhead ;
158
335
bool randomizeMemory = ! isOverhead && MemoryRandomization ;
@@ -167,7 +344,7 @@ public Measurement RunIteration(IterationData data)
167
344
EngineEventSource . Log . IterationStart ( data . IterationMode , data . IterationStage , totalOperations ) ;
168
345
169
346
var clockSpan = randomizeMemory
170
- ? MeasureWithRandomMemory ( action , invokeCount / unrollFactor )
347
+ ? MeasureWithRandomStack ( action , invokeCount / unrollFactor )
171
348
: Measure ( action , invokeCount / unrollFactor ) ;
172
349
173
350
if ( EngineEventSource . Log . IsEnabled ( ) )
@@ -193,8 +370,8 @@ public Measurement RunIteration(IterationData data)
193
370
// This is in a separate method, because stackalloc can affect code alignment,
194
371
// resulting in unexpected measurements on some AMD cpus,
195
372
// even if the stackalloc branch isn't executed. (#2366)
196
- [ MethodImpl ( MethodImplOptions . NoInlining ) ]
197
- private unsafe ClockSpan MeasureWithRandomMemory ( Action < long > action , long invokeCount )
373
+ [ MethodImpl ( MethodImplOptions . NoInlining | CodeGenHelper . AggressiveOptimizationOption ) ]
374
+ private unsafe ClockSpan MeasureWithRandomStack ( Action < long > action , long invokeCount )
198
375
{
199
376
byte * stackMemory = stackalloc byte [ random . Next ( 32 ) ] ;
200
377
var clockSpan = Measure ( action , invokeCount ) ;
@@ -205,6 +382,7 @@ private unsafe ClockSpan MeasureWithRandomMemory(Action<long> action, long invok
205
382
[ MethodImpl ( MethodImplOptions . NoInlining ) ]
206
383
private unsafe void Consume ( byte * _ ) { }
207
384
385
+ [ MethodImpl ( MethodImplOptions . NoInlining | CodeGenHelper . AggressiveOptimizationOption ) ]
208
386
private ClockSpan Measure ( Action < long > action , long invokeCount )
209
387
{
210
388
var clock = Clock . Start ( ) ;
0 commit comments