From d15172155380980856a47c4f21e9b2944f47d556 Mon Sep 17 00:00:00 2001 From: Zhicheng Pan Date: Mon, 24 Apr 2023 13:27:29 +0800 Subject: [PATCH 1/3] fix --- README.md | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index f73168b..1cd9570 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,36 @@ -# join-order-benchmark +# Join-Order-Benchmark This package contains the Join Order Benchmark (JOB) queries from: -"How Good Are Query Optimizers, Really?" +"[How Good Are Query Optimizers, Really?](http://www.vldb.org/pvldb/vol9/p204-leis.pdf)" by Viktor Leis, Andrey Gubichev, Atans Mirchev, Peter Boncz, Alfons Kemper, Thomas Neumann PVLDB Volume 9, No. 3, 2015 -http://www.vldb.org/pvldb/vol9/p204-leis.pdf -the csv_files/schematext.sql and queries/*.sql is modified to MySQL syntax. -IMDB Data Set +The `csv_files/schematext.sql` and `queries/*.sql` are modified to MySQL syntax. -The CSV files used in the paper, which are from May 2013, can be found at http://homepages.cwi.nl/~boncz/job/imdb.tgz -### load +## Quick Start -use `load_data.sh` to load data into mysql or TiDB. +1. Get the data: +```shell +cd csv_files/ +wget http://homepages.cwi.nl/~boncz/job/imdb.tgz +tar -xvzf imdb.tgz +``` -### order problem +2. Launch the database server: +3. Run script: +``` +./load_data.sh +``` +If you meets access restriction, please modify the config: +``` +mysql --local-infile=1 -h 127.0.0.1 -P 4000 -u root -D imdbload < $sql_file +``` + +## order problem `queries/17b.sql` and `queries/8d.sql` have order problem, because we use diffrent order rule from MySQL, so it is not a real bug. -### use analyze table to make TiDB faster +## use analyze table to make TiDB faster exec `analyze_table.sql` From d7d26214073611dbe6cb7a4ed9f74866fac2803d Mon Sep 17 00:00:00 2001 From: Ethan Date: Mon, 24 Apr 2023 14:11:53 +0800 Subject: [PATCH 2/3] Fix: permission issues and separator issues --- README.md | 16 ++++++++-------- load_data.sh | 4 ++-- load_data_mysql.sh | 4 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 1cd9570..085052c 100644 --- a/README.md +++ b/README.md @@ -11,26 +11,26 @@ The `csv_files/schematext.sql` and `queries/*.sql` are modified to MySQL syntax. ## Quick Start -1. Get the data: +1. Obtain the data: ```shell cd csv_files/ wget http://homepages.cwi.nl/~boncz/job/imdb.tgz tar -xvzf imdb.tgz ``` -2. Launch the database server: -3. Run script: +2. Launch the database server. +3. Run the script: ``` ./load_data.sh ``` -If you meets access restriction, please modify the config: +If you encounter any access restrictions, please modify the configuration as follows: ``` mysql --local-infile=1 -h 127.0.0.1 -P 4000 -u root -D imdbload < $sql_file ``` -## order problem +## Order Problem -`queries/17b.sql` and `queries/8d.sql` have order problem, because we use diffrent order rule from MySQL, so it is not a real bug. +Please note that `queries/17b.sql` and `queries/8d.sql` may exhibit order issues due to the use of different order rules from MySQL. This is not a real bug. -## use analyze table to make TiDB faster -exec `analyze_table.sql` +## Improving TiDB Performance using Analyze Table +Please execute analyze_table.sql to optimize the TiDB performance. diff --git a/load_data.sh b/load_data.sh index fe7e878..1153bf0 100755 --- a/load_data.sh +++ b/load_data.sh @@ -22,11 +22,11 @@ load_data() { bname=${csv_file%.*} sql_file="$bname.sql" table=${bname#$PREFIX} - sql="LOAD DATA LOCAL INFILE '$csv_file' INTO TABLE $table FIELDS TERMINATED BY ',';" + sql="LOAD DATA LOCAL INFILE '$csv_file' INTO TABLE $table FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"';" echo $sql > $sql_file echo $sql_file { - mysql -h 127.0.0.1 -P 4000 -u root -D imdbload < $sql_file + mysql --local-infile=1 -h 127.0.0.1 -P 4000 -u root -D imdbload < $sql_file echo >&1000 }& done diff --git a/load_data_mysql.sh b/load_data_mysql.sh index f119504..0f4c8fe 100755 --- a/load_data_mysql.sh +++ b/load_data_mysql.sh @@ -22,11 +22,11 @@ load_data() { bname=${csv_file%.*} sql_file="$bname.sql" table=${bname#$PREFIX} - sql="LOAD DATA LOCAL INFILE '$csv_file' INTO TABLE $table FIELDS TERMINATED BY ',';" + sql="LOAD DATA LOCAL INFILE '$csv_file' INTO TABLE $table FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"';" echo $sql > $sql_file echo $sql_file { - mysql -h 127.0.0.1 -u root -p123456 -D imdbload < $sql_file + mysql --local-infile=1 -h 127.0.0.1 -u root -p123456 -D imdbload < $sql_file echo >&1000 }& done From 767c2cb14b6071589995d948d66cc5a4385ff71a Mon Sep 17 00:00:00 2001 From: ethan Date: Mon, 13 Jan 2025 11:30:11 +0000 Subject: [PATCH 3/3] update --- README.md | 12 +-- csv_files/schema-tidb.sql | 170 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+), 5 deletions(-) create mode 100644 csv_files/schema-tidb.sql diff --git a/README.md b/README.md index 085052c..0d84aed 100644 --- a/README.md +++ b/README.md @@ -11,14 +11,16 @@ The `csv_files/schematext.sql` and `queries/*.sql` are modified to MySQL syntax. ## Quick Start -1. Obtain the data: -```shell +1. Get the `imdb` dataset: +```bash cd csv_files/ -wget http://homepages.cwi.nl/~boncz/job/imdb.tgz +wget https://event.cwi.nl/da/job/imdb.tgz tar -xvzf imdb.tgz ``` - -2. Launch the database server. +2. Create the database `imdbload`: +```bash +csv_files/schema-tidb.sql +``` 3. Run the script: ``` ./load_data.sh diff --git a/csv_files/schema-tidb.sql b/csv_files/schema-tidb.sql new file mode 100644 index 0000000..b3111c7 --- /dev/null +++ b/csv_files/schema-tidb.sql @@ -0,0 +1,170 @@ +CREATE TABLE aka_name ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + person_id INT NOT NULL, + name VARCHAR(255), + imdb_index VARCHAR(3), + name_pcode_cf VARCHAR(11), + name_pcode_nf VARCHAR(11), + surname_pcode VARCHAR(11), + md5sum VARCHAR(65) +); + +CREATE TABLE aka_title ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + movie_id INT NOT NULL, + title VARCHAR(255), + imdb_index VARCHAR(4), + kind_id INT NOT NULL, + production_year INT, + phonetic_code VARCHAR(5), + episode_of_id INT, + season_nr INT, + episode_nr INT, + note VARCHAR(72), + md5sum VARCHAR(32) +); + +CREATE TABLE cast_info ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + person_id INT NOT NULL, + movie_id INT NOT NULL, + person_role_id INT, + note VARCHAR(255), + nr_order INT, + role_id INT NOT NULL +); + +CREATE TABLE char_name ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + name VARCHAR(255) NOT NULL, + imdb_index VARCHAR(2), + imdb_id INT, + name_pcode_nf VARCHAR(5), + surname_pcode VARCHAR(5), + md5sum VARCHAR(32) +); + +CREATE TABLE comp_cast_type ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + kind VARCHAR(32) NOT NULL +); + +CREATE TABLE company_name ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + name VARCHAR(255) NOT NULL, + country_code VARCHAR(6), + imdb_id INT, + name_pcode_nf VARCHAR(5), + name_pcode_sf VARCHAR(5), + md5sum VARCHAR(32) +); + +CREATE TABLE company_type ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + kind VARCHAR(32) +); + +CREATE TABLE complete_cast ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + movie_id INT, + subject_id INT NOT NULL, + status_id INT NOT NULL +); + +CREATE TABLE info_type ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + info VARCHAR(32) NOT NULL +); + +CREATE TABLE keyword ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + keyword VARCHAR(255) NOT NULL, + phonetic_code VARCHAR(5) +); + +CREATE TABLE kind_type ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + kind VARCHAR(15) +); + +CREATE TABLE link_type ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + link VARCHAR(32) NOT NULL +); + +CREATE TABLE movie_companies ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + movie_id INT NOT NULL, + company_id INT NOT NULL, + company_type_id INT NOT NULL, + note VARCHAR(255) +); + +CREATE TABLE movie_info_idx ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + movie_id INT NOT NULL, + info_type_id INT NOT NULL, + info VARCHAR(255) NOT NULL, + note VARCHAR(1) +); + +CREATE TABLE movie_keyword ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + movie_id INT NOT NULL, + keyword_id INT NOT NULL +); + +CREATE TABLE movie_link ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + movie_id INT NOT NULL, + linked_movie_id INT NOT NULL, + link_type_id INT NOT NULL +); + +CREATE TABLE name ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + name VARCHAR(255) NOT NULL, + imdb_index VARCHAR(9), + imdb_id INT, + gender VARCHAR(1), + name_pcode_cf VARCHAR(5), + name_pcode_nf VARCHAR(5), + surname_pcode VARCHAR(5), + md5sum VARCHAR(32) +); + +CREATE TABLE role_type ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + role VARCHAR(32) NOT NULL +); + +CREATE TABLE title ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + title VARCHAR(255) NOT NULL, + imdb_index VARCHAR(5), + kind_id INT NOT NULL, + production_year INT, + imdb_id INT, + phonetic_code VARCHAR(5), + episode_of_id INT, + season_nr INT, + episode_nr INT, + series_years VARCHAR(49), + md5sum VARCHAR(32) +); + +CREATE TABLE movie_info ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + movie_id INT NOT NULL, + info_type_id INT NOT NULL, + info VARCHAR(255) NOT NULL, + note VARCHAR(255) +); + +CREATE TABLE person_info ( + id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, + person_id INT NOT NULL, + info_type_id INT NOT NULL, + info VARCHAR(255) NOT NULL, + note VARCHAR(255) +);