From 1ea9627b1ef257a4882d312bf35449faff5e388a Mon Sep 17 00:00:00 2001 From: zhangli20 Date: Sun, 30 Jun 2024 23:07:40 +0800 Subject: [PATCH] fix stackoverflow fix decompression error in latest nightly rust remove unused header --- .github/workflows/enwik8-benchmark.yml | 4 + Cargo.lock | 267 ++++++++++++++++++------- Cargo.toml | 3 +- src/lib.rs | 72 +++---- src/lz.rs | 58 ++++-- src/matchfinder.rs | 24 ++- src/mem.rs | 4 +- src/symrank.rs | 12 +- 8 files changed, 292 insertions(+), 152 deletions(-) diff --git a/.github/workflows/enwik8-benchmark.yml b/.github/workflows/enwik8-benchmark.yml index 5a6b535..ee8c7de 100644 --- a/.github/workflows/enwik8-benchmark.yml +++ b/.github/workflows/enwik8-benchmark.yml @@ -12,6 +12,10 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: nightly + rustflags: --allow warnings -C target-feature=+aes + - name: Build run: cargo build --release diff --git a/Cargo.lock b/Cargo.lock index 71e3d5b..6df32fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,9 +15,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.1.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "bitflags" @@ -27,21 +27,15 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - -[[package]] -name = "cfg-if" -version = "1.0.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "clap" -version = "3.2.17" +version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29e724a68d9319343bb3328c9cc2dfde263f4b3142ee1059a9980580171c954b" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ "atty", "bitflags", @@ -56,15 +50,15 @@ dependencies = [ [[package]] name = "clap_derive" -version = "3.2.17" +version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13547f7012c01ab4a0e8f8967730ada8f9fdf419e8b6c792788f39cf4e46eefa" +checksum = "ae6371b8bdc8b7d3959e9cf7b22d4435ef3e79e138688421ec654acf8c81b008" dependencies = [ "heck", "proc-macro-error", "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] [[package]] @@ -77,12 +71,21 @@ dependencies = [ ] [[package]] -name = "crc32c-hw" -version = "0.1.3" +name = "deranged" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "994372cb0a1ba4395a9f61bb00552d93862e82d42da598460b55944d0f4d97f8" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" dependencies = [ - "byteorder", + "powerfmt", +] + +[[package]] +name = "gxhash" +version = "3.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a197c9b654827513cf53842c5c6d3da2b4b35a785f8e0eff78bdf8e445aba1bb" +dependencies = [ + "rustversion", ] [[package]] @@ -93,9 +96,9 @@ checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" [[package]] name = "heck" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" @@ -108,9 +111,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "1.9.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg", "hashbrown", @@ -118,24 +121,21 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.3" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "libc" -version = "0.2.152" +version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" [[package]] name = "log" -version = "0.4.17" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "modular-bitfield" @@ -155,23 +155,29 @@ checksum = "5a7d5f7076603ebc68de2dc6a650ec331a062a13abaa346975be747bbfa4b789" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.109", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num_threads" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" dependencies = [ "libc", ] [[package]] name = "once_cell" -version = "1.13.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "074864da206b4973b84eb91683020dbefd6a8c3f0f38e054d93954e891935e4e" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "orz" @@ -179,20 +185,25 @@ version = "1.6.1" dependencies = [ "byteorder", "clap", - "crc32c-hw", + "gxhash", "libc", "log", "modular-bitfield", "simplelog", - "smart-default", "unchecked-index", ] [[package]] name = "os_str_bytes" -version = "6.3.0" +version = "6.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "proc-macro-error" @@ -203,7 +214,7 @@ dependencies = [ "proc-macro-error-attr", "proc-macro2", "quote", - "syn", + "syn 1.0.109", "version_check", ] @@ -220,42 +231,57 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.43" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] [[package]] -name = "simplelog" -version = "0.12.0" +name = "rustversion" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dfff04aade74dd495b007c831cd6f4e0cee19c344dd9dc0884c0289b70a786" +checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" dependencies = [ - "log", - "termcolor", - "time", + "serde_derive", ] [[package]] -name = "smart-default" -version = "0.6.0" +name = "serde_derive" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "133659a15339456eeeb07572eb02a91c91e9815e9cbc89566944d2c8d3efdbf6" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.68", +] + +[[package]] +name = "simplelog" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16257adbfaef1ee58b1363bdc0664c9b8e1e30aed86049635fb5f147d065a9c0" +dependencies = [ + "log", + "termcolor", + "time", ] [[package]] @@ -272,9 +298,20 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.99" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" dependencies = [ "proc-macro2", "quote", @@ -283,36 +320,51 @@ dependencies = [ [[package]] name = "termcolor" -version = "1.1.3" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" dependencies = [ "winapi-util", ] [[package]] name = "textwrap" -version = "0.15.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" +checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" [[package]] name = "time" -version = "0.3.13" +version = "0.3.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db76ff9fa4b1458b3c7f077f3ff9887394058460d21e634355b273aaf11eea45" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ + "deranged", "itoa", "libc", + "num-conv", "num_threads", + "powerfmt", + "serde", + "time-core", "time-macros", ] +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + [[package]] name = "time-macros" -version = "0.2.4" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +dependencies = [ + "num-conv", + "time-core", +] [[package]] name = "unchecked-index" @@ -322,9 +374,9 @@ checksum = "eeba86d422ce181a719445e51872fa30f1f7413b62becb52e95ec91aa262d85c" [[package]] name = "unicode-ident" -version = "1.0.3" +version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "version_check" @@ -350,11 +402,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.5" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "winapi", + "windows-sys", ] [[package]] @@ -362,3 +414,76 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" diff --git a/Cargo.toml b/Cargo.toml index 44fe63c..1d252a7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,10 +14,9 @@ log = "0.4.17" simplelog = "0.12.0" unchecked-index = "0.2.2" libc = "0.2.152" -crc32c-hw = "0.1.3" modular-bitfield = "0.11.2" clap = { version = "3.2.17", features = ["derive"] } -smart-default = "0.6.0" +gxhash = "3.4.1" [profile] [profile.release] diff --git a/src/lib.rs b/src/lib.rs index 8d9d105..3605ca5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,7 +23,6 @@ use crate::lz::LZEncoder; use byteorder::ReadBytesExt; use byteorder::WriteBytesExt; -use smart_default::SmartDefault; const LZ_BLOCK_SIZE: usize = (1 << 25) - 1; // 32MB const LZ_CHUNK_SIZE: usize = 1 << 20; // 1MB @@ -33,14 +32,24 @@ const LZ_MF_BUCKET_ITEM_HASH_SIZE: usize = (LZ_MF_BUCKET_ITEM_SIZE as f64 * 1.13 /// Compression size info: source/target sizes. #[repr(C)] -#[derive(Clone, Copy, SmartDefault)] +#[derive(Clone, Copy)] pub struct Stat { pub source_size: u64, pub target_size: u64, - #[default(_code = "Instant::now()")] pub start_time: Instant, + pub start_time: Instant, pub duration: Duration, } + impl Stat { + pub fn new() -> Self { + Self { + source_size: 0, + target_size: 0, + start_time: Instant::now(), + duration: Duration::ZERO, + } + } + pub fn log_progress(&mut self, source_size_inc: u64, target_size_inc: u64, is_encode: bool) { self.source_size += source_size_inc; self.target_size += target_size_inc; @@ -103,15 +112,14 @@ const SBVEC_PREMATCH_LEN: usize = LZ_BLOCK_SIZE / 2; /// Encode the source into a target ORZ stream. pub fn encode(source: &mut dyn Read, target: &mut dyn Write, cfg: &LZCfg) -> std::io::Result { - let mut stat = Stat::default(); - let mut lzenc = LZEncoder::default(); + let mut stat = Stat::new(); + let mut lzenc = LZEncoder::new(); - #[allow(unused_allocation)] - let sbvec = &mut Box::new([0u8; LZ_BLOCK_SIZE + SBVEC_SENTINEL_LEN])[..LZ_BLOCK_SIZE]; - #[allow(unused_allocation)] - let tbvec = &mut Box::new([0u8; SBVEC_PREMATCH_LEN * 3]); + let mut sbvec_buf = vec![0u8; LZ_BLOCK_SIZE + SBVEC_SENTINEL_LEN * 2]; + let mut tbvec_buf = vec![0u8; SBVEC_PREMATCH_LEN * 3]; + let sbvec = &mut sbvec_buf[SBVEC_SENTINEL_LEN..][..LZ_BLOCK_SIZE]; + let tbvec = &mut tbvec_buf; - stat.target_size += write_version(target)? as u64; loop { let sbvec_read_size = read_repeatedly(source, &mut sbvec[SBVEC_PREMATCH_LEN..])?; let mut spos = SBVEC_PREMATCH_LEN; @@ -156,15 +164,14 @@ pub fn encode(source: &mut dyn Read, target: &mut dyn Write, cfg: &LZCfg) -> std } pub fn decode(target: &mut dyn Read, source: &mut dyn Write) -> std::io::Result { - let mut stat = Stat::default(); - let mut lzdec = LZDecoder::default(); + let mut stat = Stat::new(); + let mut lzdec = LZDecoder::new(); - #[allow(unused_allocation)] - let sbvec = &mut Box::new([0u8; LZ_BLOCK_SIZE + SBVEC_SENTINEL_LEN])[..LZ_BLOCK_SIZE]; - #[allow(unused_allocation)] - let tbvec = &mut Box::new([0u8; SBVEC_PREMATCH_LEN * 3]); + let mut sbvec_buf = vec![0u8; LZ_BLOCK_SIZE * 2 + SBVEC_SENTINEL_LEN * 2]; + let mut tbvec_buf = vec![0u8; SBVEC_PREMATCH_LEN * 3]; + let sbvec = &mut sbvec_buf[SBVEC_SENTINEL_LEN..][..LZ_BLOCK_SIZE]; + let tbvec = &mut tbvec_buf; - stat.target_size += check_version(target)? as u64; let mut spos = SBVEC_PREMATCH_LEN; let mut tpos = 0usize; loop { @@ -212,33 +219,4 @@ pub fn decode(target: &mut dyn Read, source: &mut dyn Write) -> std::io::Result< } } Ok(stat) -} - -fn write_version(target: &mut dyn Write) -> std::io::Result { - let version_bytes = env!("CARGO_PKG_VERSION").as_bytes(); - let mut version_str_buf = [0u8; 10]; // to store version string like xx.yy.zz - version_str_buf[..version_bytes.len()].copy_from_slice(version_bytes); - target.write_all(&version_str_buf)?; - Ok(version_str_buf.len()) -} - -fn check_version(target: &mut dyn Read) -> std::io::Result { - let current_version_str = env!("CARGO_PKG_VERSION"); - let mut version_bytes = [0u8; 10]; - target.read_exact(&mut version_bytes)?; - let version_str = std::str::from_utf8(&version_bytes) - .map_err(|_| { - std::io::Error::new(std::io::ErrorKind::InvalidData, "invalid utf-8 version str") - })? - .trim_end_matches('\u{0}'); - - // print a warning message rather than exit the decompression - if !version_str.to_owned().eq(current_version_str) { - log::warn!( - "version mismatched ({} vs {}), decoding may not work correctly", - version_str, - current_version_str, - ); - } - Ok(version_bytes.len()) -} +} \ No newline at end of file diff --git a/src/lz.rs b/src/lz.rs index 22a8f56..e14c948 100644 --- a/src/lz.rs +++ b/src/lz.rs @@ -18,7 +18,6 @@ use crate::LZ_MF_BUCKET_ITEM_SIZE; use crate::LZ_ROID_SIZE; use crate::SYMRANK_NUM_SYMBOLS; -use smart_default::SmartDefault; use unchecked_index::unchecked_index; const LZ_ROID_ENCODING_ARRAY: [(u8, u8, u16); LZ_MF_BUCKET_ITEM_SIZE] = @@ -36,22 +35,39 @@ pub struct LZCfg { pub lazy_match_depth2: usize, } -#[derive(SmartDefault)] struct LZContext { - #[default(_code = "Box::new([Bucket::default(); 256])")] buckets: Box<[Bucket; 256]>, - #[default(_code = "Box::new([SymRankCoder::default(); 512])")] symranks: Box<[SymRankCoder; 512]>, - #[default(_code = "Box::new([[0, 0]; 32768])")] words: Box<[[u8; 2]; 32768]>, - #[default = true] first_block: bool, - #[default = true] after_literal: bool, + buckets: Vec, + symranks: Vec, + words: Vec<[u8; 2]>, + first_block: bool, + after_literal: bool, +} + +impl LZContext { + pub fn new() -> Self { + Self { + buckets: vec![Bucket::new(); 256], + symranks: vec![SymRankCoder::new(); 512], + words: vec![[0, 0]; 32768], + first_block: true, + after_literal: true, + } + } } -#[derive(SmartDefault)] pub struct LZEncoder { ctx: LZContext, - #[default(_code = "Box::new([BucketMatcher::default(); 256])")] bucket_matchers: Box<[BucketMatcher; 256]>, + bucket_matchers: Vec, } impl LZEncoder { + pub fn new() -> Self { + Self { + ctx: LZContext::new(), + bucket_matchers: vec![BucketMatcher::new(); 256], + } + } + pub fn forward(&mut self, forward_len: usize) { for i in 0..self.bucket_matchers.len() { self.ctx.buckets[i].forward(forward_len); @@ -289,11 +305,17 @@ impl LZEncoder { } } -#[derive(Default)] pub struct LZDecoder { ctx: LZContext, } + impl LZDecoder { + pub fn new() -> Self { + Self { + ctx: LZContext::new(), + } + } + pub fn forward(&mut self, forward_len: usize) { self.ctx .buckets @@ -308,11 +330,9 @@ impl LZDecoder { spos: usize, ) -> Result<(usize, usize), Box> { let roid_decoding_array = &unchecked_index(&LZ_ROID_DECODING_ARRAY); - let sbuf = &mut unchecked_index(sbuf); - let tbuf = &unchecked_index(tbuf); - let ctx_words = &mut unchecked_index(&mut self.ctx.words); - let ctx_buckets = &mut unchecked_index(&mut self.ctx.buckets); - let ctx_symranks = &mut unchecked_index(&mut self.ctx.symranks); + let ctx_words = &mut (&mut self.ctx.words); + let ctx_buckets = &mut (&mut self.ctx.buckets); + let ctx_symranks = &mut (&mut self.ctx.symranks); let mut bits: BitQueue = Default::default(); let mut spos = spos; @@ -416,12 +436,12 @@ impl LZDecoder { #[inline] unsafe fn hash1(buf: &[u8], pos: usize) -> usize { - let buf = unchecked_index(buf); - buf[pos] as usize & 0x7f | (buf[pos - 1].is_ascii_alphanumeric() as usize) << 7 + let ptr = buf.as_ptr(); + ptr.add(pos).read_unaligned() as usize & 0x7f | (ptr.add(pos - 1).read_unaligned().is_ascii_alphanumeric() as usize) << 7 } #[inline] unsafe fn hash2(buf: &[u8], pos: usize) -> usize { - let buf = unchecked_index(buf); - buf[pos] as usize & 0x7f | hash1(&buf[..], pos - 1) << 7 + let ptr = buf.as_ptr(); + ptr.add(pos).read_unaligned() as usize & 0x7f | hash1(buf, pos - 1) << 7 } diff --git a/src/matchfinder.rs b/src/matchfinder.rs index 97f9a7a..3865b00 100644 --- a/src/matchfinder.rs +++ b/src/matchfinder.rs @@ -7,7 +7,6 @@ use crate::LZ_MF_BUCKET_ITEM_HASH_SIZE; use crate::LZ_MF_BUCKET_ITEM_SIZE; use modular_bitfield::prelude::*; -use smart_default::SmartDefault; use unchecked_index::unchecked_index; #[derive(Clone, Copy, Default)] // Match::default = unmatched @@ -25,9 +24,8 @@ pub struct MatchInfo { pub match_len_min: usize, } -#[derive(Clone, Copy, SmartDefault)] +#[derive(Clone, Copy)] pub struct Bucket { - #[default(_code = "[Node::default(); LZ_MF_BUCKET_ITEM_SIZE]")] nodes: [Node; LZ_MF_BUCKET_ITEM_SIZE], // pos:25 | match_len_expected:7 head: i16, /* match_len_expected: @@ -55,6 +53,13 @@ pub struct Bucket { } impl Bucket { + pub fn new() -> Self { + Self { + nodes: [Node::default(); LZ_MF_BUCKET_ITEM_SIZE], + head: 0, + } + } + pub unsafe fn update(&mut self, pos: usize, reduced_offset: u16, match_len: usize) { let mut nodes = unchecked_index(&mut self.nodes); let new_head = node_size_bounded_add(self.head as u16, 1); @@ -96,15 +101,20 @@ impl Bucket { } } -#[derive(Clone, Copy, SmartDefault)] +#[derive(Clone, Copy)] pub struct BucketMatcher { - #[default(_code = "[-1; LZ_MF_BUCKET_ITEM_HASH_SIZE]")] heads: [i16; LZ_MF_BUCKET_ITEM_HASH_SIZE], - #[default(_code = "[-1; LZ_MF_BUCKET_ITEM_SIZE]")] nexts: [i16; LZ_MF_BUCKET_ITEM_SIZE], } impl BucketMatcher { + pub fn new() -> Self { + Self { + heads: [-1; LZ_MF_BUCKET_ITEM_HASH_SIZE], + nexts: [-1; LZ_MF_BUCKET_ITEM_SIZE], + } + } + pub unsafe fn update(&mut self, bucket: &Bucket, buf: &[u8], pos: usize) { let mut heads = unchecked_index(&mut self.heads); let mut nexts = unchecked_index(&mut self.nexts); @@ -285,5 +295,5 @@ fn node_size_bounded_sub(v1: u16, v2: u16) -> u16 { #[inline] unsafe fn hash_dword(buf: &[u8], pos: usize) -> usize { - crc32c_hw::update(0, mem_get::<[u8; 4]>(buf.as_ptr(), pos)) as usize + gxhash::gxhash32(&mem_get::<[u8; 4]>(buf.as_ptr(), pos), 0x9efa2b21) as usize } diff --git a/src/mem.rs b/src/mem.rs index 425d2fb..0202881 100644 --- a/src/mem.rs +++ b/src/mem.rs @@ -1,11 +1,11 @@ #[inline(always)] pub unsafe fn mem_get(buf: *const u8, pos: usize) -> T { - *(buf.add(pos) as *const T) + std::ptr::read_unaligned(buf.add(pos) as *const T) } #[inline(always)] pub unsafe fn mem_put(buf: *mut u8, pos: usize, value: T) { - *(buf.add(pos) as *mut T) = value + std::ptr::write_unaligned(buf.add(pos) as *mut T, value); } // requires max_len = 16n diff --git a/src/symrank.rs b/src/symrank.rs index 8fb5835..f052565 100644 --- a/src/symrank.rs +++ b/src/symrank.rs @@ -2,21 +2,25 @@ use std::cmp::Ordering; use crate::SYMRANK_NUM_SYMBOLS; -use smart_default::SmartDefault; use unchecked_index::unchecked_index; const SYMRANK_NEXT_ARRAY: [u16; SYMRANK_NUM_SYMBOLS] = include!(concat!(env!("OUT_DIR"), "/", "SYMRANK_NEXT_ARRAY.txt")); -#[derive(Clone, Copy, SmartDefault)] +#[derive(Clone, Copy)] pub struct SymRankCoder { - #[default(_code = "[0; SYMRANK_NUM_SYMBOLS]")] value_array: [u16; SYMRANK_NUM_SYMBOLS], - #[default(_code = "[0; SYMRANK_NUM_SYMBOLS]")] index_array: [u16; SYMRANK_NUM_SYMBOLS], } impl SymRankCoder { + pub fn new() -> Self { + SymRankCoder { + value_array: [0; SYMRANK_NUM_SYMBOLS], + index_array: [0; SYMRANK_NUM_SYMBOLS], + } + } + pub fn init(&mut self, value_array: &[u16]) { for (i, &value) in value_array.iter().enumerate() { self.value_array[i] = value;