From ee5e110f163bfc6d9fe6d6e12665c79f8987d853 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Fri, 4 Oct 2024 15:25:32 +0700 Subject: [PATCH 1/3] core: add insert chain benchmark with many storage updates --- core/blockchain_test.go | 73 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/core/blockchain_test.go b/core/blockchain_test.go index 804919abd..1bb72f594 100644 --- a/core/blockchain_test.go +++ b/core/blockchain_test.go @@ -17,6 +17,7 @@ package core import ( + "encoding/binary" "errors" "fmt" "io/ioutil" @@ -2474,6 +2475,78 @@ func BenchmarkBlockChain_1x1000Executions(b *testing.B) { benchmarkLargeNumberOfValueToNonexisting(b, numTxs, numBlocks, recipientFn, dataFn) } +func BenchmarkBlockChain_1000StorageUpdate(b *testing.B) { + var ( + numTxs = 1000 + signer = types.HomesteadSigner{} + testBankKey, _ = crypto.HexToECDSA("b71c71a67e1177ad4e901695e1b4b9ee17ae16c6668d313eac2f96dbcda3f291") + testBankAddress = crypto.PubkeyToAddress(testBankKey.PublicKey) + bankFunds = big.NewInt(100000000000000000) + contractAddress = common.HexToAddress("0x1234") + gspec = Genesis{ + Config: params.TestChainConfig, + Alloc: GenesisAlloc{ + testBankAddress: {Balance: bankFunds}, + contractAddress: { + Nonce: 1, + Balance: common.Big0, + // Store 1 into slot passed by calldata + Code: []byte{ + byte(vm.PUSH0), + byte(vm.CALLDATALOAD), + byte(vm.PUSH1), + byte(0x1), + byte(vm.SWAP1), + byte(vm.SSTORE), + byte(vm.STOP), + }, + }, + }, + GasLimit: 100e6, // 100 M + } + ) + // Generate the original common chain segment and the two competing forks + engine := ethash.NewFaker() + db := rawdb.NewMemoryDatabase() + genesis := gspec.MustCommit(db) + + blockGenerator := func(i int, block *BlockGen) { + block.SetCoinbase(common.Address{1}) + for txi := 0; txi < numTxs; txi++ { + var calldata [32]byte + binary.BigEndian.PutUint64(calldata[:], uint64(txi)) + tx, err := types.SignTx( + types.NewTransaction(uint64(txi), contractAddress, common.Big0, 100_000, + block.header.BaseFee, calldata[:]), + signer, + testBankKey) + if err != nil { + b.Error(err) + } + block.AddTx(tx) + } + } + + shared, _ := GenerateChain(params.TestChainConfig, genesis, engine, db, 1, blockGenerator, true) + b.StopTimer() + b.ResetTimer() + for i := 0; i < b.N; i++ { + // Import the shared chain and the original canonical one + diskdb := rawdb.NewMemoryDatabase() + gspec.MustCommit(diskdb) + + chain, err := NewBlockChain(diskdb, nil, params.TestChainConfig, engine, vm.Config{}, nil, nil) + if err != nil { + b.Fatalf("failed to create tester chain: %v", err) + } + b.StartTimer() + if _, err := chain.InsertChain(shared, nil); err != nil { + b.Fatalf("failed to insert shared chain: %v", err) + } + b.StopTimer() + } +} + // Tests that importing a some old blocks, where all blocks are before the // pruning point. // This internally leads to a sidechain import, since the blocks trigger an From c988694fcafb46d731a7ee15c7aff3f7acb026dd Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 3 Oct 2024 13:55:47 +0700 Subject: [PATCH 2/3] trie: don't copy when we are the exclusive owner of the trie MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, to make the trie copy perform well, we only copy the trie's node on write. However, there is no tracking on whether the trie is copied or not, so we always perform copy-on-write even though we are the only owner of the trie. This commit adds a shared bool to trie, this is set when trie copy is performed. When the trie is not shared, it is safe to do all the modification in-place without the need of creating a new copy. > go test -test.v -test.run=^$ -test.bench=BenchmarkUpdate │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ UpdateBE-8 1247.0n ± 36% 378.4n ± 34% -69.66% (p=0.000 n=10) UpdateLE-8 1675.5n ± 1% 430.8n ± 31% -74.29% (p=0.000 n=10) geomean 1.445µ 403.7n -72.07% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ UpdateBE-8 1796.0 ± 0% 226.0 ± 0% -87.42% (p=0.000 n=10) UpdateLE-8 1848.0 ± 0% 228.5 ± 1% -87.64% (p=0.000 n=10) geomean 1.779Ki 227.2 -87.53% │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ UpdateBE-8 9.000 ± 0% 4.000 ± 0% -55.56% (p=0.000 n=10) UpdateLE-8 9.000 ± 0% 4.000 ± 0% -55.56% (p=0.000 n=10) geomean 9.000 4.000 -55.56% --- trie/secure_trie.go | 8 +++++ trie/trie.go | 82 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 79 insertions(+), 11 deletions(-) diff --git a/trie/secure_trie.go b/trie/secure_trie.go index 18be12d34..50880fe81 100644 --- a/trie/secure_trie.go +++ b/trie/secure_trie.go @@ -64,6 +64,12 @@ func NewSecure(root common.Hash, db *Database) (*SecureTrie, error) { return &SecureTrie{trie: *trie}, nil } +// UnshareTrie marks this trie as unshared, the trie exclusively +// owns all of its nodes. +func (t *SecureTrie) UnshareTrie() { + t.trie.shared = false +} + // Get returns the value for key stored in the trie. // The value bytes must not be modified by the caller. func (t *SecureTrie) Get(key []byte) []byte { @@ -185,6 +191,8 @@ func (t *SecureTrie) Hash() common.Hash { // Copy returns a copy of SecureTrie. func (t *SecureTrie) Copy() *SecureTrie { + // Mark both trie as shared + t.trie.shared = true cpy := *t return &cpy } diff --git a/trie/trie.go b/trie/trie.go index 13343112b..858ea0110 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -66,6 +66,11 @@ type Trie struct { // hashing operation. This number will not directly map to the number of // actually unhashed nodes unhashed int + + // The trie is already copied so we might share the trie node with + // other tries. If shared is false, we are the exclusive owner of + // the trie so we can modify it in place without copying. + shared bool } // newFlag returns the cache flag value for a newly created node. @@ -136,14 +141,22 @@ func (t *Trie) tryGet(origNode node, key []byte, pos int) (value []byte, newnode } value, newnode, didResolve, err = t.tryGet(n.Val, key, pos+len(n.Key)) if err == nil && didResolve { - n = n.copy() + // This node might be shared with other tries, we must copy before + // modifying + if t.shared { + n = n.copy() + } n.Val = newnode } return value, n, didResolve, err case *fullNode: value, newnode, didResolve, err = t.tryGet(n.Children[key[pos]], key, pos+1) if err == nil && didResolve { - n = n.copy() + // This node might be shared with other tries, we must copy before + // modifying + if t.shared { + n = n.copy() + } n.Children[key[pos]] = newnode } return value, n, didResolve, err @@ -210,7 +223,11 @@ func (t *Trie) tryGetNode(origNode node, path []byte, pos int) (item []byte, new } item, newnode, resolved, err = t.tryGetNode(n.Val, path, pos+len(n.Key)) if err == nil && resolved > 0 { - n = n.copy() + // This node might be shared with other tries, we must copy before + // modifying + if t.shared { + n = n.copy() + } n.Val = newnode } return item, n, resolved, err @@ -218,7 +235,11 @@ func (t *Trie) tryGetNode(origNode node, path []byte, pos int) (item []byte, new case *fullNode: item, newnode, resolved, err = t.tryGetNode(n.Children[path[pos]], path, pos+1) if err == nil && resolved > 0 { - n = n.copy() + // This node might be shared with other tries, we must copy before + // modifying + if t.shared { + n = n.copy() + } n.Children[path[pos]] = newnode } return item, n, resolved, err @@ -300,7 +321,14 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error if !dirty || err != nil { return false, n, err } - return true, &shortNode{n.Key, nn, t.newFlag()}, nil + if t.shared { + // This node might be shared with other tries, we must create a new node + return true, &shortNode{n.Key, nn, t.newFlag()}, nil + } else { + n.Val = nn + n.flags = t.newFlag() + return true, n, nil + } } // Otherwise branch out at the index where they differ. branch := &fullNode{flags: t.newFlag()} @@ -317,15 +345,28 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error if matchlen == 0 { return true, branch, nil } - // Otherwise, replace it with a short node leading up to the branch. - return true, &shortNode{key[:matchlen], branch, t.newFlag()}, nil + // Otherwise, replace it with a new short node leading up to the branch if + // trie is shared; otherwise, modify the current short node to point to the + // new branch. + if t.shared { + return true, &shortNode{key[:matchlen], branch, t.newFlag()}, nil + } else { + n.Key = key[:matchlen] + n.Val = branch + n.flags = t.newFlag() + return true, n, nil + } case *fullNode: dirty, nn, err := t.insert(n.Children[key[0]], append(prefix, key[0]), key[1:], value) if !dirty || err != nil { return false, n, err } - n = n.copy() + // This node might be shared with other tries, we must copy before + // modifying + if t.shared { + n = n.copy() + } n.flags = t.newFlag() n.Children[key[0]] = nn return true, n, nil @@ -401,9 +442,24 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { // always creates a new slice) instead of append to // avoid modifying n.Key since it might be shared with // other nodes. - return true, &shortNode{concat(n.Key, child.Key...), child.Val, t.newFlag()}, nil + if t.shared { + // This node might be shared with other tries, we must create a new node + return true, &shortNode{concat(n.Key, child.Key...), child.Val, t.newFlag()}, nil + } else { + n.Key = concat(n.Key, child.Key...) + n.Val = child.Val + n.flags = t.newFlag() + return true, n, nil + } default: - return true, &shortNode{n.Key, child, t.newFlag()}, nil + if t.shared { + // This node might be shared with other tries, we must create a new node + return true, &shortNode{n.Key, child, t.newFlag()}, nil + } else { + n.Val = child + n.flags = t.newFlag() + return true, n, nil + } } case *fullNode: @@ -411,7 +467,11 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { if !dirty || err != nil { return false, n, err } - n = n.copy() + // This node might be shared with other tries, we must copy before + // modifying + if t.shared { + n = n.copy() + } n.flags = t.newFlag() n.Children[key[0]] = nn From 8441ee193cae2d7002193b79d843af6bc3962945 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Fri, 4 Oct 2024 10:19:54 +0700 Subject: [PATCH 3/3] core/state/trie_prefetcher: mark the returned trie as unshared when possible Trie's modification is more optimized when it's marked as unshared. When the prefetched trie is requested, the prefetcher is stopped and the caller can become the exclusive owner of the trie in most case. When the prefetcher is not copied (active prefetcher), when possible, the returned trie is marked as unshared. However, we need to be careful as there might be the case that 2 same storage trie are prefetched. In that case, don't mark the trie as unshared when returning to caller. --- core/state/trie_prefetcher.go | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/core/state/trie_prefetcher.go b/core/state/trie_prefetcher.go index 472c125b7..6970acd0d 100644 --- a/core/state/trie_prefetcher.go +++ b/core/state/trie_prefetcher.go @@ -22,6 +22,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/metrics" + "github.com/ethereum/go-ethereum/trie" ) var ( @@ -40,6 +41,9 @@ type triePrefetcher struct { fetches map[common.Hash]Trie // Partially or fully fetcher tries fetchers map[common.Hash]*subfetcher // Subfetchers for each trie + shared bool // shared is set if the prefetcher is copied + sharedTrie map[common.Hash]struct{} + deliveryMissMeter metrics.Meter accountLoadMeter metrics.Meter accountDupMeter metrics.Meter @@ -55,9 +59,10 @@ type triePrefetcher struct { func newTriePrefetcher(db Database, root common.Hash, namespace string) *triePrefetcher { prefix := triePrefetchMetricsPrefix + namespace p := &triePrefetcher{ - db: db, - root: root, - fetchers: make(map[common.Hash]*subfetcher), // Active prefetchers use the fetchers map + db: db, + root: root, + fetchers: make(map[common.Hash]*subfetcher), // Active prefetchers use the fetchers map + sharedTrie: make(map[common.Hash]struct{}), deliveryMissMeter: metrics.GetOrRegisterMeter(prefix+"/deliverymiss", nil), accountLoadMeter: metrics.GetOrRegisterMeter(prefix+"/account/load", nil), @@ -109,10 +114,12 @@ func (p *triePrefetcher) close() { // is mostly used in the miner which creates a copy of it's actively mutated // state to be sealed while it may further mutate the state. func (p *triePrefetcher) copy() *triePrefetcher { + p.shared = true copy := &triePrefetcher{ db: p.db, root: p.root, fetches: make(map[common.Hash]Trie), // Active prefetchers use the fetches map + shared: true, deliveryMissMeter: p.deliveryMissMeter, accountLoadMeter: p.accountLoadMeter, @@ -152,6 +159,12 @@ func (p *triePrefetcher) prefetch(root common.Hash, keys [][]byte) { if fetcher == nil { fetcher = newSubfetcher(p.db, root) p.fetchers[root] = fetcher + } else { + // The same trie root is scheduled to be prefetched more than once. + // It might be the case that storage tries of 2 different accounts + // are the same. Mark this trie as shared so that we don't mark the + // trie as unshared in the bellow trie() function. + p.sharedTrie[root] = struct{}{} } fetcher.schedule(keys) } @@ -178,12 +191,21 @@ func (p *triePrefetcher) trie(root common.Hash) Trie { // a copy of any pre-loaded trie. fetcher.abort() // safe to do multiple times - trie := fetcher.peek() - if trie == nil { + tr := fetcher.peek() + if tr == nil { p.deliveryMissMeter.Mark(1) return nil } - return trie + + if !p.shared { + if _, shared := p.sharedTrie[root]; !shared { + if tr, ok := tr.(*trie.SecureTrie); ok { + tr.UnshareTrie() + } + } + } + + return tr } // used marks a batch of state items used to allow creating statistics as to