From db95c4f180b39b16eeedf360d429c92e58a1c203 Mon Sep 17 00:00:00 2001 From: Luqun Lou Date: Wed, 21 Jun 2023 12:13:23 -0700 Subject: [PATCH] check upper and lower bound for writebatchwithindex Upstream commit ID: https://github.com/facebook/mysql-5.6/commit/0bfdb056c66e3f1e9168087c735744e21283a658 PS-8951: Merge percona-202305 (https://jira.percona.com/browse/PS-8951) Summary: rocksdb use Writebatchwithindex(WBWI) to support read your own data. But there are two issues for implementations: 1. https://github.com/facebook/rocksdb/issues/11606: Rocksdb may return deleted row or out of range row during iterating WBWI, see code https://github.com/facebook/rocksdb/blob/main/utilities/write_batch_with_index/write_batch_with_index_internal.cc#L311, when it return due out of range(bound), its DeltaIterator may still valid but point to out of range row. 2. https://github.com/facebook/rocksdb/issues/11607: it doesn't check lower_bound_ even lower_bound_ values is passed to rocksdb with readoptions. see https://github.com/facebook/rocksdb/blob/main/utilities/write_batch_with_index/write_batch_with_index_internal.cc#L306 To workaround these issue, add a variable rocksdb_check_iterate_bounds to control whether we should check iterate bounds and check these bounds inside myrocks if rocksdb_check_iterate_bounds is true. Differential Revision: D46908478 fbshipit-source-id: 765f562928a3ad117d23a177b1b2d9e551b0c0ae --- .../suite/rocksdb/r/iterator_bounds.result | 41 +++++++ mysql-test/suite/rocksdb/r/rocksdb.result | 1 + .../suite/rocksdb/t/iterator_bounds.test | 55 ++++++++++ .../rocksdb_check_iterate_bounds_basic.result | 100 ++++++++++++++++++ .../t/rocksdb_check_iterate_bounds_basic.test | 18 ++++ storage/rocksdb/ha_rocksdb.cc | 10 +- storage/rocksdb/ha_rocksdb.h | 3 +- storage/rocksdb/rdb_iterator.cc | 25 ++++- storage/rocksdb/rdb_iterator.h | 1 + 9 files changed, 250 insertions(+), 4 deletions(-) create mode 100644 mysql-test/suite/rocksdb_sys_vars/r/rocksdb_check_iterate_bounds_basic.result create mode 100644 mysql-test/suite/rocksdb_sys_vars/t/rocksdb_check_iterate_bounds_basic.test diff --git a/mysql-test/suite/rocksdb/r/iterator_bounds.result b/mysql-test/suite/rocksdb/r/iterator_bounds.result index 600f19e0d613..77ec66e232d7 100644 --- a/mysql-test/suite/rocksdb/r/iterator_bounds.result +++ b/mysql-test/suite/rocksdb/r/iterator_bounds.result @@ -13,3 +13,44 @@ select j from t order by j desc; j 1 drop table t; +create table t( +a int unsigned not null, +b int unsigned not null, +c varchar(64) not null COLLATE utf8_bin, +primary key(a), +key(b,c) +) engine=rocksdb DEFAULT CHARSET=utf8 COLLATE=utf8_bin; +Warnings: +Warning 3778 'utf8mb3_bin' is a collation of the deprecated character set UTF8MB3. Please consider using UTF8MB4 with an appropriate collation instead. +Warning 3719 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. +Warning 3778 'utf8mb3_bin' is a collation of the deprecated character set UTF8MB3. Please consider using UTF8MB4 with an appropriate collation instead. +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; +begin; +insert into t values(101, 101, 'view_routine_usage'); +update t set b = 100 where b = 101 and c like 'view_routine_usage'; +update t set b = 101 where b = 100 and c like 'view_routine_usage'; +select a from t where b = 101 and c like 'view_routine_usage'; +a +101 +rollback; +drop table t; +create table t( +a int unsigned not null, +b int unsigned not null, +c varchar(64) not null COLLATE utf8_bin, +primary key(a), +key(b,c) comment 'cfname=rev:bc' +) engine=rocksdb DEFAULT CHARSET=utf8 COLLATE=utf8_bin; +Warnings: +Warning 3778 'utf8mb3_bin' is a collation of the deprecated character set UTF8MB3. Please consider using UTF8MB4 with an appropriate collation instead. +Warning 3719 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. +Warning 3778 'utf8mb3_bin' is a collation of the deprecated character set UTF8MB3. Please consider using UTF8MB4 with an appropriate collation instead. +begin; +insert into t values(110, 110, 'view_routine_usage'); +insert into t values(100, 100, 'view_routine_usage'); +update t set b = 101 where b = 100 and c like 'view_routine_usage'; +update t set b = 100 where b = 101 and c like 'view_routine_usage'; +select a from t where b = 101 and c like 'view_routine_usage'; +a +rollback; +drop table t; diff --git a/mysql-test/suite/rocksdb/r/rocksdb.result b/mysql-test/suite/rocksdb/r/rocksdb.result index 39ae9f93a077..a1afb6eee69b 100644 --- a/mysql-test/suite/rocksdb/r/rocksdb.result +++ b/mysql-test/suite/rocksdb/r/rocksdb.result @@ -920,6 +920,7 @@ rocksdb_cache_index_and_filter_blocks ON rocksdb_cache_index_and_filter_with_high_priority ON rocksdb_cancel_manual_compactions OFF rocksdb_charge_memory OFF +rocksdb_check_iterate_bounds ON rocksdb_checksums_pct 100 rocksdb_collect_sst_properties ON rocksdb_column_default_value_as_expression ON diff --git a/mysql-test/suite/rocksdb/t/iterator_bounds.test b/mysql-test/suite/rocksdb/t/iterator_bounds.test index 51981bbfc520..57ac87205321 100644 --- a/mysql-test/suite/rocksdb/t/iterator_bounds.test +++ b/mysql-test/suite/rocksdb/t/iterator_bounds.test @@ -28,3 +28,58 @@ select j from t order by j asc; select j from t order by j desc; drop table t; + +# +# check bounds for writebatch(forward cf), all data changes are in writebatch +# +create table t( + a int unsigned not null, + b int unsigned not null, + c varchar(64) not null COLLATE utf8_bin, + primary key(a), + key(b,c) +) engine=rocksdb DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED; + +begin; +insert into t values(101, 101, 'view_routine_usage'); +# SD(101, 'view_routine_usage',101) +update t set b = 100 where b = 101 and c like 'view_routine_usage'; +# dring iterating, writebatchwithindex may return "out of range" record after +# checking with upper bounds. sometimes the "out of range" record is a SD record. +# For SD record, its value slice will be empty. Try to decode a key slice +# which contains varchar with empty value slice, decoder will crash due missing +# upack_info in value slice +update t set b = 101 where b = 100 and c like 'view_routine_usage'; +select a from t where b = 101 and c like 'view_routine_usage'; +rollback; + +drop table t; + + +# +# check bounds for writebatch(rev cf), all data changes are in writebatch +# +create table t( + a int unsigned not null, + b int unsigned not null, + c varchar(64) not null COLLATE utf8_bin, + primary key(a), + key(b,c) comment 'cfname=rev:bc' +) engine=rocksdb DEFAULT CHARSET=utf8 COLLATE=utf8_bin; + +begin; +insert into t values(110, 110, 'view_routine_usage'); +insert into t values(100, 100, 'view_routine_usage'); +# SD(100, 'view_routine_usage',100) +update t set b = 101 where b = 100 and c like 'view_routine_usage'; +# during iterating, we don't check with lower bound in writebatchwithindex +# in rev cf, +update t set b = 100 where b = 101 and c like 'view_routine_usage'; +select a from t where b = 101 and c like 'view_routine_usage'; +rollback; + +drop table t; + + diff --git a/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_check_iterate_bounds_basic.result b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_check_iterate_bounds_basic.result new file mode 100644 index 000000000000..e41ea9600a97 --- /dev/null +++ b/mysql-test/suite/rocksdb_sys_vars/r/rocksdb_check_iterate_bounds_basic.result @@ -0,0 +1,100 @@ +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); +SET @start_global_value = @@global.ROCKSDB_CHECK_ITERATE_BOUNDS; +SELECT @start_global_value; +@start_global_value +1 +SET @start_session_value = @@session.ROCKSDB_CHECK_ITERATE_BOUNDS; +SELECT @start_session_value; +@start_session_value +1 +'# Setting to valid values in global scope#' +"Trying to set variable @@global.ROCKSDB_CHECK_ITERATE_BOUNDS to 1" +SET @@global.ROCKSDB_CHECK_ITERATE_BOUNDS = 1; +SELECT @@global.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@global.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_CHECK_ITERATE_BOUNDS = DEFAULT; +SELECT @@global.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@global.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +"Trying to set variable @@global.ROCKSDB_CHECK_ITERATE_BOUNDS to 0" +SET @@global.ROCKSDB_CHECK_ITERATE_BOUNDS = 0; +SELECT @@global.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@global.ROCKSDB_CHECK_ITERATE_BOUNDS +0 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_CHECK_ITERATE_BOUNDS = DEFAULT; +SELECT @@global.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@global.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +"Trying to set variable @@global.ROCKSDB_CHECK_ITERATE_BOUNDS to on" +SET @@global.ROCKSDB_CHECK_ITERATE_BOUNDS = on; +SELECT @@global.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@global.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +"Setting the global scope variable back to default" +SET @@global.ROCKSDB_CHECK_ITERATE_BOUNDS = DEFAULT; +SELECT @@global.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@global.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +'# Setting to valid values in session scope#' +"Trying to set variable @@session.ROCKSDB_CHECK_ITERATE_BOUNDS to 1" +SET @@session.ROCKSDB_CHECK_ITERATE_BOUNDS = 1; +SELECT @@session.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@session.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +"Setting the session scope variable back to default" +SET @@session.ROCKSDB_CHECK_ITERATE_BOUNDS = DEFAULT; +SELECT @@session.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@session.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +"Trying to set variable @@session.ROCKSDB_CHECK_ITERATE_BOUNDS to 0" +SET @@session.ROCKSDB_CHECK_ITERATE_BOUNDS = 0; +SELECT @@session.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@session.ROCKSDB_CHECK_ITERATE_BOUNDS +0 +"Setting the session scope variable back to default" +SET @@session.ROCKSDB_CHECK_ITERATE_BOUNDS = DEFAULT; +SELECT @@session.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@session.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +"Trying to set variable @@session.ROCKSDB_CHECK_ITERATE_BOUNDS to on" +SET @@session.ROCKSDB_CHECK_ITERATE_BOUNDS = on; +SELECT @@session.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@session.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +"Setting the session scope variable back to default" +SET @@session.ROCKSDB_CHECK_ITERATE_BOUNDS = DEFAULT; +SELECT @@session.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@session.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +'# Testing with invalid values in global scope #' +"Trying to set variable @@global.ROCKSDB_CHECK_ITERATE_BOUNDS to 'aaa'" +SET @@global.ROCKSDB_CHECK_ITERATE_BOUNDS = 'aaa'; +Got one of the listed errors +SELECT @@global.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@global.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +"Trying to set variable @@global.ROCKSDB_CHECK_ITERATE_BOUNDS to 'bbb'" +SET @@global.ROCKSDB_CHECK_ITERATE_BOUNDS = 'bbb'; +Got one of the listed errors +SELECT @@global.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@global.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +SET @@global.ROCKSDB_CHECK_ITERATE_BOUNDS = @start_global_value; +SELECT @@global.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@global.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +SET @@session.ROCKSDB_CHECK_ITERATE_BOUNDS = @start_session_value; +SELECT @@session.ROCKSDB_CHECK_ITERATE_BOUNDS; +@@session.ROCKSDB_CHECK_ITERATE_BOUNDS +1 +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_check_iterate_bounds_basic.test b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_check_iterate_bounds_basic.test new file mode 100644 index 000000000000..1673eacdbfee --- /dev/null +++ b/mysql-test/suite/rocksdb_sys_vars/t/rocksdb_check_iterate_bounds_basic.test @@ -0,0 +1,18 @@ +--source include/have_rocksdb.inc + +CREATE TABLE valid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO valid_values VALUES(1); +INSERT INTO valid_values VALUES(0); +INSERT INTO valid_values VALUES('on'); + +CREATE TABLE invalid_values (value varchar(255)) ENGINE=myisam; +INSERT INTO invalid_values VALUES('\'aaa\''); +INSERT INTO invalid_values VALUES('\'bbb\''); + +--let $sys_var=ROCKSDB_CHECK_ITERATE_BOUNDS +--let $read_only=0 +--let $session=1 +--source ../include/rocksdb_sys_var.inc + +DROP TABLE valid_values; +DROP TABLE invalid_values; diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc index 74cb9a5d578d..96490e3464b0 100644 --- a/storage/rocksdb/ha_rocksdb.cc +++ b/storage/rocksdb/ha_rocksdb.cc @@ -1337,6 +1337,11 @@ static MYSQL_THDVAR_BOOL( "Enable rocksdb iterator upper/lower bounds in read options.", nullptr, nullptr, true); +static MYSQL_THDVAR_BOOL( + check_iterate_bounds, PLUGIN_VAR_OPCMDARG, + "Check rocksdb iterator upper/lower bounds during iterating.", nullptr, + nullptr, true); + static const char *const DEFAULT_READ_FREE_RPL_TABLES = ".*"; static std::regex_constants::syntax_option_type get_regex_flags() { @@ -2635,6 +2640,7 @@ static struct SYS_VAR *rocksdb_system_variables[] = { MYSQL_SYSVAR(commit_in_the_middle), MYSQL_SYSVAR(blind_delete_primary_key), MYSQL_SYSVAR(enable_iterate_bounds), + MYSQL_SYSVAR(check_iterate_bounds), #if defined(ROCKSDB_INCLUDE_RFR) && ROCKSDB_INCLUDE_RFR MYSQL_SYSVAR(read_free_rpl_tables), MYSQL_SYSVAR(read_free_rpl), @@ -15986,11 +15992,13 @@ bool ha_rocksdb::can_assume_tracked(THD *thd) { bool ha_rocksdb::check_bloom_and_set_bounds( THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, size_t bound_len, uchar *const lower_bound, uchar *const upper_bound, - rocksdb::Slice *lower_bound_slice, rocksdb::Slice *upper_bound_slice) { + rocksdb::Slice *lower_bound_slice, rocksdb::Slice *upper_bound_slice, + bool *check_iterate_bounds) { bool can_use_bloom = can_use_bloom_filter(thd, kd, eq_cond); if (!can_use_bloom && (THDVAR(thd, enable_iterate_bounds))) { setup_iterator_bounds(kd, eq_cond, bound_len, lower_bound, upper_bound, lower_bound_slice, upper_bound_slice); + *check_iterate_bounds = THDVAR(thd, check_iterate_bounds); } return can_use_bloom; } diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h index 7393d0257d16..c15c75c9910e 100644 --- a/storage/rocksdb/ha_rocksdb.h +++ b/storage/rocksdb/ha_rocksdb.h @@ -649,7 +649,8 @@ class ha_rocksdb : public my_core::handler, public blob_buffer { static bool check_bloom_and_set_bounds( THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, size_t bound_len, uchar *const lower_bound, uchar *const upper_bound, - rocksdb::Slice *lower_bound_slice, rocksdb::Slice *upper_bound_slice); + rocksdb::Slice *lower_bound_slice, rocksdb::Slice *upper_bound_slice, + bool *check_iterate_bounds); static bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond); diff --git a/storage/rocksdb/rdb_iterator.cc b/storage/rocksdb/rdb_iterator.cc index 193900fa81d8..e3a1c8a77153 100644 --- a/storage/rocksdb/rdb_iterator.cc +++ b/storage/rocksdb/rdb_iterator.cc @@ -40,7 +40,8 @@ Rdb_iterator_base::Rdb_iterator_base(THD *thd, m_scan_it_snapshot(nullptr), m_scan_it_lower_bound(nullptr), m_scan_it_upper_bound(nullptr), - m_prefix_buf(nullptr) {} + m_prefix_buf(nullptr), + m_check_iterate_bounds(false) {} Rdb_iterator_base::~Rdb_iterator_base() { release_scan_iterator(); @@ -134,7 +135,8 @@ void Rdb_iterator_base::setup_scan_iterator(const rocksdb::Slice *const slice, m_thd, *m_kd, eq_cond, std::max(eq_cond_len, (uint)Rdb_key_def::INDEX_NUMBER_SIZE), m_scan_it_lower_bound, m_scan_it_upper_bound, - &m_scan_it_lower_bound_slice, &m_scan_it_upper_bound_slice)) { + &m_scan_it_lower_bound_slice, &m_scan_it_upper_bound_slice, + &m_check_iterate_bounds)) { skip_bloom = false; } @@ -214,6 +216,8 @@ int Rdb_iterator_base::next_with_direction(bool move_forward, bool skip_next) { const auto &kd = *m_kd; Rdb_transaction *const tx = get_tx_from_thd(m_thd); + const rocksdb::Comparator *kd_comp = kd.get_cf()->GetComparator(); + for (;;) { DEBUG_SYNC(m_thd, "rocksdb.check_flags_nwd"); if (thd_killed(m_thd)) { @@ -251,6 +255,23 @@ int Rdb_iterator_base::next_with_direction(bool move_forward, bool skip_next) { break; } + // Check specified lower/upper bounds + // For example, retrieved key is 00077 + // in cf, lower_bound: 0076 and uppper bound: 0078 + // cf->Compare(0077, 0078) > 0 ==> False + // cf->Compare(0077, 0076) < 0 ==> False + // in rev cf, lower_bound: 0078 and uppper bound: 0076 + // revcf->Compare(0077, 0076) > 0 ==> False + // revcf->Compare(0077, 0078) < 0 ==> False + if (m_check_iterate_bounds && + ((!m_scan_it_upper_bound_slice.empty() && + kd_comp->Compare(key, m_scan_it_upper_bound_slice) > 0) || + (!m_scan_it_lower_bound_slice.empty() && + kd_comp->Compare(key, m_scan_it_lower_bound_slice) < 0))) { + rc = HA_ERR_END_OF_FILE; + break; + } + // Record is not visible due to TTL, move to next record. if (m_pkd->has_ttl() && rdb_should_hide_ttl_rec(kd, value, tx)) { continue; diff --git a/storage/rocksdb/rdb_iterator.h b/storage/rocksdb/rdb_iterator.h index 89b87838697f..00ab6fab740b 100644 --- a/storage/rocksdb/rdb_iterator.h +++ b/storage/rocksdb/rdb_iterator.h @@ -120,6 +120,7 @@ class Rdb_iterator_base : public Rdb_iterator { uchar *m_prefix_buf; rocksdb::Slice m_prefix_tuple; + bool m_check_iterate_bounds; }; class Rdb_iterator_partial : public Rdb_iterator_base {