256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414 | @pytest.mark.stub_parametrize("token_name", "bloated_eoa_")
@pytest.mark.parametrize("distinct_senders", [False, True])
@pytest.mark.parametrize("existing_slots", [False, True])
def test_sload_bloated_prefetch_miss(
benchmark_test: BenchmarkTestFiller,
pre: Alloc,
fork: Fork,
gas_benchmark_value: int,
tx_gas_limit: int,
token_name: str,
existing_slots: bool,
distinct_senders: bool,
) -> None:
"""
Benchmark SLOAD with calldata-driven offsets to defeat prefetching.
A small first transaction writes an initial offset into the
authority's slot 0 via calldata. Subsequent max-gas transactions
each read the previous offset from slot 0, immediately overwrite
slot 0 with a new offset from their own calldata, then SLOAD
sequentially from the previous offset. Because each transaction's
SLOAD range depends on state written by its predecessor, a
prefetcher that predicts SLOAD targets from pre-block state
without simulating intra-block writes will pre-warm incorrect
storage slots. The minimal first tx is load-bearing: it lives
inside the benchmark block so every subsequent max-gas tx reads
a slot 0 value that differs from the prefetcher's pre-block
snapshot, achieving a 100% miss rate.
When ``distinct_senders`` is True every transaction uses a fresh
sender. This additionally defeats per-sender prewarm
serialization (e.g. Nethermind) that groups txs by sender and
runs them sequentially to propagate state changes — forcing
every tx's prewarm scope to restart from pre-block state.
"""
# Runtime: read old offset from slot 0, write new offset from
# calldata to slot 0, then SLOAD sequentially from old offset.
runtime_code = (
Op.SLOAD(Op.PUSH0)
+ Op.SSTORE(Op.PUSH0, Op.CALLDATALOAD(Op.PUSH0))
+ While(
body=(Op.DUP1 + Op.SLOAD + Op.POP + Op.PUSH1(1) + Op.ADD),
condition=Op.GT(Op.GAS, 0xFFFF),
)
)
authority = pre.stub_eoa(token_name)
runtime_address = pre.deploy_contract(code=runtime_code)
# Setup: delegate authority to the runtime contract. Slot 0 is
# left at 0 (the delegation tx's calldata) so the benchmark
# block's pre-state has slot 0 = 0; the first benchmark tx
# then plants base_offset in slot 0 inside the benchmark block,
# forcing the prefetcher's pre-block snapshot to disagree with
# the actual slot 0 value seen by every max-gas tx that follows.
delegation_tx = delegate_with_calldata(
pre, authority, runtime_address, Hash(0)
)
blocks: list[Block] = [Block(txs=[delegation_tx])]
# Offset spacing: upper bound on SLOADs per tx ensures each
# transaction reads a completely disjoint slot range.
max_sloads_per_tx = _max_sloads_per_tx(tx_gas_limit, fork)
# The base offset must be at least max_sloads_per_tx away from
# the pre-block slot 0 value (0) so the prefetcher's predicted
# SLOAD range is completely disjoint from the actual range.
base_offset = max_sloads_per_tx if existing_slots else START_SLOT
intrinsic_gas = fork.transaction_intrinsic_cost_calculator()(
calldata=b"\xff" * 32,
)
# senders_iter yields one sender per tx (fresh per call in
# distinct mode, a single shared sender otherwise). The senders
# list collects one entry per tx so the BAL builder below can
# group nonce changes by sender uniformly.
senders_iter = _sender_generator(pre, distinct_senders)
senders: list[EOA] = []
gas_available = gas_benchmark_value
txs: list[Transaction] = []
# First transaction: minimal gas, only writes the initial
# offset. Gas limit ensures remaining gas after the SLOAD +
# SSTORE setup falls below the 0xFFFF loop threshold so the
# SLOAD loop does not run. This tx's job is to change slot 0
# inside the benchmark block so every subsequent max-gas tx
# reads an offset the prefetcher's pre-block snapshot does
# not see, achieving a 100% prefetch miss rate on max-gas txs.
first_tx_gas = min(gas_available, intrinsic_gas + 30_000)
sender = next(senders_iter)
senders.append(sender)
txs.append(
Transaction(
gas_limit=first_tx_gas,
to=authority,
data=Hash(base_offset),
sender=sender,
)
)
gas_available -= first_tx_gas
# Subsequent transactions: max gas, each shifts the offset
# so the next transaction SLOADs from a different range.
tx_index = 1
while gas_available >= intrinsic_gas:
tx_gas = min(gas_available, tx_gas_limit)
new_offset = base_offset + tx_index * max_sloads_per_tx
sender = next(senders_iter)
senders.append(sender)
txs.append(
Transaction(
gas_limit=tx_gas,
to=authority,
data=Hash(new_offset),
sender=sender,
)
)
gas_available -= tx_gas
tx_index += 1
expectations: dict[Address, BalAccountExpectation] = {
authority: BalAccountExpectation(
storage_reads=[base_offset],
storage_changes=[
BalStorageSlot(
slot=0,
validate_any_change=True,
),
],
),
}
sender_nonces: dict[Address, list[BalNonceChange]] = {}
for i, s in enumerate(senders):
changes = sender_nonces.setdefault(s, [])
changes.append(
BalNonceChange(
block_access_index=i + 1,
post_nonce=len(changes) + 1,
)
)
for addr, nonces in sender_nonces.items():
expectations[addr] = BalAccountExpectation(nonce_changes=nonces)
blocks.append(
Block(
txs=txs,
expected_block_access_list=BlockAccessListExpectation(
account_expectations=expectations,
),
)
)
benchmark_test(
pre=pre,
blocks=blocks,
skip_gas_used_validation=True,
expected_receipt_status=True,
)
|