From 24bd965fe88080b03166e15f8d2f28e46ed46a08 Mon Sep 17 00:00:00 2001 From: Andrej Orsula Date: Tue, 5 Mar 2024 22:02:57 +0100 Subject: [PATCH] Development towards 0.3.0 (#4) * Allow `clippy::{nursery, pedantic}` for generated bindings Signed-off-by: Andrej Orsula * Add support for tuples with ellipsis Signed-off-by: Andrej Orsula * Fix bindings for `__init__()` and `__call__()` Signed-off-by: Andrej Orsula * Fix return type for `__init__()` bindings Signed-off-by: Andrej Orsula * Improve semantics of positional `*args` parameters Signed-off-by: Andrej Orsula * Update test case Signed-off-by: Andrej Orsula * Apply clippy suggestions Signed-off-by: Andrej Orsula * Refactoring: Add parsing [skip ci] Signed-off-by: Andrej Orsula * Refactoring: Add generators for imports & prelude [skip ci] Signed-off-by: Andrej Orsula * Refactoring: Add remaining generators Signed-off-by: Andrej Orsula * Refactoring: Add type mapping Signed-off-by: Andrej Orsula * CI: Format dependabot workflow Signed-off-by: Andrej Orsula * Improve ergonomics of generated bindings Signed-off-by: Andrej Orsula --------- Signed-off-by: Andrej Orsula --- .github/workflows/dependabot.yml | 41 + .github/workflows/rust.yml | 4 +- Cargo.lock | 291 +++-- Cargo.toml | 13 +- README.md | 51 +- pyo3_bindgen/src/lib.rs | 80 +- pyo3_bindgen_cli/src/main.rs | 50 +- pyo3_bindgen_cli/tests/cli.rs | 4 +- pyo3_bindgen_engine/Cargo.toml | 8 +- pyo3_bindgen_engine/benches/bindgen.rs | 155 ++- pyo3_bindgen_engine/src/bindgen.rs | 150 --- pyo3_bindgen_engine/src/bindgen/attribute.rs | 131 -- pyo3_bindgen_engine/src/bindgen/class.rs | 254 ---- pyo3_bindgen_engine/src/bindgen/function.rs | 245 ---- pyo3_bindgen_engine/src/bindgen/module.rs | 513 -------- pyo3_bindgen_engine/src/build_utils.rs | 54 - pyo3_bindgen_engine/src/codegen.rs | 370 ++++++ pyo3_bindgen_engine/src/config.rs | 106 ++ pyo3_bindgen_engine/src/lib.rs | 21 +- pyo3_bindgen_engine/src/syntax/class.rs | 261 ++++ .../src/syntax/common/attribute_variant.rs | 79 ++ .../src/syntax/common/ident.rs | 112 ++ pyo3_bindgen_engine/src/syntax/common/mod.rs | 7 + pyo3_bindgen_engine/src/syntax/common/path.rs | 354 ++++++ pyo3_bindgen_engine/src/syntax/function.rs | 702 +++++++++++ pyo3_bindgen_engine/src/syntax/import.rs | 92 ++ pyo3_bindgen_engine/src/syntax/mod.rs | 15 + pyo3_bindgen_engine/src/syntax/module.rs | 653 ++++++++++ pyo3_bindgen_engine/src/syntax/property.rs | 328 +++++ pyo3_bindgen_engine/src/syntax/type_var.rs | 20 + pyo3_bindgen_engine/src/types.rs | 1098 ----------------- pyo3_bindgen_engine/src/typing/from_py.rs | 525 ++++++++ pyo3_bindgen_engine/src/typing/into_rs.rs | 287 +++++ pyo3_bindgen_engine/src/typing/mod.rs | 90 ++ pyo3_bindgen_engine/src/utils/error.rs | 18 + pyo3_bindgen_engine/src/utils/io.rs | 43 + pyo3_bindgen_engine/src/utils/mod.rs | 5 + pyo3_bindgen_engine/src/utils/result.rs | 5 + pyo3_bindgen_engine/tests/bindgen.rs | 265 ++-- pyo3_bindgen_macros/src/lib.rs | 33 +- pyo3_bindgen_macros/src/parser.rs | 10 +- 41 files changed, 4631 insertions(+), 2912 deletions(-) create mode 100644 .github/workflows/dependabot.yml delete mode 100644 pyo3_bindgen_engine/src/bindgen.rs delete mode 100644 pyo3_bindgen_engine/src/bindgen/attribute.rs delete mode 100644 pyo3_bindgen_engine/src/bindgen/class.rs delete mode 100644 pyo3_bindgen_engine/src/bindgen/function.rs delete mode 100644 pyo3_bindgen_engine/src/bindgen/module.rs delete mode 100644 pyo3_bindgen_engine/src/build_utils.rs create mode 100644 pyo3_bindgen_engine/src/codegen.rs create mode 100644 pyo3_bindgen_engine/src/config.rs create mode 100644 pyo3_bindgen_engine/src/syntax/class.rs create mode 100644 pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs create mode 100644 pyo3_bindgen_engine/src/syntax/common/ident.rs create mode 100644 pyo3_bindgen_engine/src/syntax/common/mod.rs create mode 100644 pyo3_bindgen_engine/src/syntax/common/path.rs create mode 100644 pyo3_bindgen_engine/src/syntax/function.rs create mode 100644 pyo3_bindgen_engine/src/syntax/import.rs create mode 100644 pyo3_bindgen_engine/src/syntax/mod.rs create mode 100644 pyo3_bindgen_engine/src/syntax/module.rs create mode 100644 pyo3_bindgen_engine/src/syntax/property.rs create mode 100644 pyo3_bindgen_engine/src/syntax/type_var.rs delete mode 100644 pyo3_bindgen_engine/src/types.rs create mode 100644 pyo3_bindgen_engine/src/typing/from_py.rs create mode 100644 pyo3_bindgen_engine/src/typing/into_rs.rs create mode 100644 pyo3_bindgen_engine/src/typing/mod.rs create mode 100644 pyo3_bindgen_engine/src/utils/error.rs create mode 100644 pyo3_bindgen_engine/src/utils/io.rs create mode 100644 pyo3_bindgen_engine/src/utils/mod.rs create mode 100644 pyo3_bindgen_engine/src/utils/result.rs diff --git a/.github/workflows/dependabot.yml b/.github/workflows/dependabot.yml new file mode 100644 index 0000000..544bfee --- /dev/null +++ b/.github/workflows/dependabot.yml @@ -0,0 +1,41 @@ +name: Dependabot automation +on: + pull_request: + check_run: + types: [completed] + +permissions: + contents: write + pull-requests: write + +jobs: + approve: + runs-on: ubuntu-latest + if: ${{ github.actor == 'dependabot[bot]' && github.event_name == 'pull_request' }} + steps: + - name: Fetch metadata + id: metadata + uses: dependabot/fetch-metadata@v1 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" + - name: Approve PR + env: + PR_URL: ${{github.event.pull_request.html_url}} + GH_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: gh pr review --approve "$PR_URL" + + auto_merge: + runs-on: ubuntu-latest + if: ${{ github.actor == 'dependabot[bot]' && github.event_name == 'check_run' }} + steps: + - name: Fetch metadata + id: metadata + uses: dependabot/fetch-metadata@v1 + with: + github-token: "${{ secrets.GITHUB_TOKEN }}" + - name: Enable PR auto-merge + if: steps.metadata.outputs.update-type == 'version-update:semver-patch' + env: + PR_URL: ${{github.event.pull_request.html_url}} + GH_TOKEN: ${{secrets.GITHUB_TOKEN}} + run: gh pr merge --auto --merge "$PR_URL" diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 52ffcb7..bf4eaa6 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -41,7 +41,7 @@ jobs: fail-fast: false matrix: toolchain: - - "1.70" # Minimal supported Rust version (MSRV) + - "1.74" # Minimal supported Rust version (MSRV) - stable - beta steps: @@ -106,7 +106,7 @@ jobs: with: token: ${{ secrets.CODECOV_TOKEN }} files: lcov.info - fail_ci_if_error: true + fail_ci_if_error: false deny: runs-on: ubuntu-latest diff --git a/Cargo.lock b/Cargo.lock index 80f3b93..76a9513 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.11" +version = "0.6.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" +checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb" dependencies = [ "anstyle", "anstyle-parse", @@ -33,9 +33,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "anstyle-parse" @@ -67,9 +67,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.13" +version = "2.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00ad3f3a942eee60335ab4342358c161ee296829e0d16ff42fc1d6cb07815467" +checksum = "ed72493ac66d5804837f480ab3766c72bdfab91a65e565fc54fa9e42db0073a8" dependencies = [ "anstyle", "bstr", @@ -92,17 +92,11 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "bitflags" -version = "2.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" - [[package]] name = "bstr" -version = "1.9.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc" +checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" dependencies = [ "memchr", "regex-automata", @@ -111,9 +105,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.14.0" +version = "3.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b" [[package]] name = "cast" @@ -129,9 +123,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "ciborium" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", @@ -140,15 +134,15 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", @@ -261,6 +255,12 @@ version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "difflib" version = "0.4.0" @@ -275,19 +275,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" - -[[package]] -name = "errno" -version = "0.3.8" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" -dependencies = [ - "libc", - "windows-sys", -] +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "float-cmp" @@ -300,9 +290,13 @@ dependencies = [ [[package]] name = "half" -version = "1.8.2" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +checksum = "b5eceaaeec696539ddaf7b333340f1af35a5aa87ae3e4f3ead0532f72affab2e" +dependencies = [ + "cfg-if", + "crunchy", +] [[package]] name = "heck" @@ -312,9 +306,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" -version = "0.3.4" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "indoc" @@ -324,12 +318,12 @@ checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" [[package]] name = "is-terminal" -version = "0.4.10" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ "hermit-abi", - "rustix", + "libc", "windows-sys", ] @@ -359,24 +353,18 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "js-sys" -version = "0.3.67" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" +checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" dependencies = [ "wasm-bindgen", ] [[package]] name = "libc" -version = "0.2.152" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" - -[[package]] -name = "linux-raw-sys" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "lock_api" @@ -390,9 +378,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "memchr" @@ -417,9 +405,9 @@ checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", ] @@ -487,6 +475,12 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + [[package]] name = "predicates" version = "3.1.0" @@ -538,23 +532,24 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a89dc7a5850d0e983be1ec2a463a171d20990487c3cfcd68b5363f1ee3d6fe0" +checksum = "53bdbb96d49157e65d45cc287af5f32ffadd5f4761438b527b055fb0d4bb8233" dependencies = [ "cfg-if", "libc", "memoffset", "parking_lot", + "portable-atomic", "pyo3-build-config", "pyo3-ffi", ] [[package]] name = "pyo3-build-config" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07426f0d8fe5a601f26293f300afd1a7b1ed5e78b2a705870c5f30893c5163be" +checksum = "deaa5745de3f5231ce10517a1f5dd97d53e5a2fd77aa6b5842292085831d48d7" dependencies = [ "once_cell", "target-lexicon", @@ -562,9 +557,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbb7dec17e17766b46bca4f1a4215a85006b4c2ecde122076c562dd058da6cf1" +checksum = "62b42531d03e08d4ef1f6e85a2ed422eb678b8cd62b762e53891c05faf0d4afa" dependencies = [ "libc", "pyo3-build-config", @@ -572,7 +567,7 @@ dependencies = [ [[package]] name = "pyo3_bindgen" -version = "0.2.0" +version = "0.3.0" dependencies = [ "pyo3_bindgen_engine", "pyo3_bindgen_macros", @@ -580,7 +575,7 @@ dependencies = [ [[package]] name = "pyo3_bindgen_cli" -version = "0.2.0" +version = "0.3.0" dependencies = [ "assert_cmd", "clap", @@ -592,7 +587,7 @@ dependencies = [ [[package]] name = "pyo3_bindgen_engine" -version = "0.2.0" +version = "0.3.0" dependencies = [ "criterion", "indoc", @@ -602,12 +597,15 @@ dependencies = [ "pyo3", "pyo3-build-config", "quote", + "rustc-hash", "syn", + "thiserror", + "typed-builder", ] [[package]] name = "pyo3_bindgen_macros" -version = "0.2.0" +version = "0.3.0" dependencies = [ "proc-macro2", "pyo3", @@ -627,9 +625,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" +checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" dependencies = [ "either", "rayon-core", @@ -651,7 +649,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" dependencies = [ - "bitflags 1.3.2", + "bitflags", ] [[package]] @@ -668,9 +666,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", @@ -684,23 +682,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] -name = "rustix" -version = "0.38.30" +name = "rustc-hash" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322394588aaf33c24007e8bb3238ee3e4c5c09c084ab32bc73890b99ff326bca" -dependencies = [ - "bitflags 2.4.2", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "ryu" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "same-file" @@ -719,18 +710,18 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.195" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.195" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", @@ -739,9 +730,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.111" +version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" dependencies = [ "itoa", "ryu", @@ -762,9 +753,9 @@ checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" [[package]] name = "syn" -version = "2.0.49" +version = "2.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915aea9e586f80826ee59f8453c1101f9d1c4b3964cd2460185ee8e299ada496" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" dependencies = [ "proc-macro2", "quote", @@ -773,9 +764,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.13" +version = "0.12.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69758bda2e78f098e4ccb393021a0963bb3442eac05f135c30f61b7370bbafae" +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" [[package]] name = "termtree" @@ -783,6 +774,26 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" +[[package]] +name = "thiserror" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -793,6 +804,26 @@ dependencies = [ "serde_json", ] +[[package]] +name = "typed-builder" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "444d8748011b93cb168770e8092458cb0f8854f931ff82fdf6ddfbd72a9c933e" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "563b3b88238ec95680aef36bdece66896eaa7ce3c0f1b4f39d38fb2435261352" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-ident" version = "1.0.12" @@ -816,9 +847,9 @@ dependencies = [ [[package]] name = "walkdir" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", @@ -826,9 +857,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" +checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -836,9 +867,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" +checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" dependencies = [ "bumpalo", "log", @@ -851,9 +882,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" +checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -861,9 +892,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" +checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" dependencies = [ "proc-macro2", "quote", @@ -874,15 +905,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" +checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" [[package]] name = "web-sys" -version = "0.3.67" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed" +checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" dependencies = [ "js-sys", "wasm-bindgen", @@ -925,7 +956,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.0", + "windows-targets 0.52.4", ] [[package]] @@ -945,17 +976,17 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ - "windows_aarch64_gnullvm 0.52.0", - "windows_aarch64_msvc 0.52.0", - "windows_i686_gnu 0.52.0", - "windows_i686_msvc 0.52.0", - "windows_x86_64_gnu 0.52.0", - "windows_x86_64_gnullvm 0.52.0", - "windows_x86_64_msvc 0.52.0", + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", ] [[package]] @@ -966,9 +997,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" @@ -978,9 +1009,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" @@ -990,9 +1021,9 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" @@ -1002,9 +1033,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" @@ -1014,9 +1045,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" @@ -1026,9 +1057,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" @@ -1038,6 +1069,6 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.0" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" diff --git a/Cargo.toml b/Cargo.toml index 1b8fe0f..178b4d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,13 +19,13 @@ keywords = ["bindgen", "ffi", "pyo3", "python"] license = "MIT OR Apache-2.0" readme = "README.md" repository = "https://github.com/AndrejOrsula/pyo3_bindgen" -rust-version = "1.70" -version = "0.2.0" +rust-version = "1.74" +version = "0.3.0" [workspace.dependencies] -pyo3_bindgen = { path = "pyo3_bindgen", version = "0.2.0" } -pyo3_bindgen_engine = { path = "pyo3_bindgen_engine", version = "0.2.0" } -pyo3_bindgen_macros = { path = "pyo3_bindgen_macros", version = "0.2.0" } +pyo3_bindgen = { path = "pyo3_bindgen", version = "0.3.0" } +pyo3_bindgen_engine = { path = "pyo3_bindgen_engine", version = "0.3.0" } +pyo3_bindgen_macros = { path = "pyo3_bindgen_macros", version = "0.3.0" } assert_cmd = { version = "2" } clap = { version = "4.5", features = ["derive"] } @@ -38,4 +38,7 @@ proc-macro2 = { version = "1" } pyo3 = { version = "0.20", default-features = false } pyo3-build-config = { version = "0.20", features = ["resolve-config"] } quote = { version = "1" } +rustc-hash = { version = "1" } syn = { version = "2" } +thiserror = { version = "1" } +typed-builder = { version = "0.18" } diff --git a/README.md b/README.md index f536dc0..2d0103e 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ pub fn main() -> pyo3::PyResult<()> { This project is intended to simplify the integration or transition of existing Python codebases into Rust. You, as a developer, gain immediate access to the Rust type system and countless other benefits of modern compiled languages with the generated bindings. Furthermore, the entire stock of high-quality crates from [crates.io](https://crates.io) becomes at your disposal. -On its own, the generated Rust code does not provide any performance benefits over using the Python code (it might actually be slower — yet to be benchmarked). However, it can be used as a starting point for further optimization if you decide to rewrite performance-critical parts of your codebase in pure Rust. +On its own, the generated Rust code does not provide any performance benefits over using the Python code. However, it can be used as a starting point for further optimization if you decide to rewrite performance-critical parts of your codebase in pure Rust. ## Overview @@ -92,23 +92,24 @@ Add `pyo3` as a dependency and `pyo3_bindgen` as a build dependency to your [`Ca pyo3 = { version = "0.20", features = ["auto-initialize"] } [build-dependencies] -pyo3_bindgen = { version = "0.1" } +pyo3_bindgen = { version = "0.3" } ``` ### Option 1: Build script -Create a [`build.rs`](https://doc.rust-lang.org/cargo/reference/build-scripts.html) script in the root of your crate that generates bindings to the `target_module` Python module. +Create a [`build.rs`](https://doc.rust-lang.org/cargo/reference/build-scripts.html) script in the root of your crate that generates bindings to the `py_module` Python module. ```rs // build.rs - -fn main() { - // Generate Rust bindings to the Python module - pyo3_bindgen::build_bindings( - "target_module", - std::path::Path::new(&std::env::var("OUT_DIR").unwrap()).join("bindings.rs"), - ) - .unwrap(); +use pyo3_bindgen::{Codegen, Config}; + +fn main() -> Result<(), Box> { + // Generate Rust bindings to Python modules + Codegen::new(Config::default())? + .module_name("py_module")? + .module_names(&["other_module.core", "other_module.utils.io"])? + .build(std::path::Path::new(&std::env::var("OUT_DIR")?).join("bindings.rs"))?; + Ok(()) } ``` @@ -116,7 +117,7 @@ Afterwards, include the generated bindings anywhere in your crate. ```rs include!(concat!(env!("OUT_DIR"), "/bindings.rs")); -pub use target_module::*; +pub use py_module::*; ``` ### Option 2: CLI tool @@ -131,7 +132,7 @@ Afterwards, run the `pyo3_bindgen` executable while passing the name of the targ ```bash # Pass `--help` to show the usage and available options -pyo3_bindgen -m target_module -o bindings.rs +pyo3_bindgen -m py_module other_module.core -o bindings.rs ``` ### Option 3 \[Experimental\]: Procedural macros @@ -142,28 +143,30 @@ Enable the `macros` feature of `pyo3_bindgen`. ```toml [build-dependencies] -pyo3_bindgen = { version = "0.1", features = ["macros"] } +pyo3_bindgen = { version = "0.3", features = ["macros"] } ``` Then, you can call the `import_python!` macro anywhere in your crate. ```rs -pyo3_bindgen::import_python!("target_module"); -pub use target_module::*; +pyo3_bindgen::import_python!("py_module"); +pub use py_module::*; ``` ## Status This project is in early development, and as such, the API of the generated bindings is not yet stable. -- Not all Python types are mapped to their Rust equivalents yet. For this reason, some additional typecasting might be currently required when using the generated bindings (e.g. `let typed_value: target_module::Class = any_value.extract()?;`). -- The binding generation is primarily designed to be used inside build scripts or via procedural macros. Therefore, the performance of the codegen process is [benchmarked](./pyo3_bindgen_engine/benches/bindgen.rs) to understand the potential impact on build times. Although there is currently plenty of room for optimization in the current naive implementation, even the largest modules are processed in less than a second on a *modern* laptop. -- The generation of bindings should never panic as long as the target Python module can be successfully imported. If it does, it is a bug resulting from an unexpected edge-case Python module structure or an unforeseen combination of enabled PyO3 features. -- However, the generated bindings might not directly compile in some specific cases. Currently, there are two known issue; bindings will contain duplicate function definitions if present in the original code, and function parameters might use the same name as a class defined in the same scope (allowed in Python but not in Rust). If you encounter any other issues, consider manually rewriting the problematic parts of the bindings. -- Although implemented, the procedural macros might not work in all cases - especially when some PyO3 features are enabled. In most cases, PyO3 fails to import the target Python module when used from within a `proc_macro` crate. Therefore, it is recommended to use build scripts instead for now. -- The code will be refactored and cleaned up in the upcoming releases. The current implementation is a result of a very quick prototype that was built to test the feasibility of the idea. For example, configurability of the generated bindings is planned (e.g. allowlist/ignorelist of attributes). Furthermore, automatic generation of dependent Python modules will be considered in order to provide a more complete typing experience. - -Please [report](https://github.com/AndrejOrsula/pyo3_bindgen/issues/new) any issues that you might encounter. Contributions are more than welcome! If you are looking for a place to start, consider searching for `TODO` comments in the codebase. +- Not all Python types are mapped to their Rust equivalents yet. For this reason, some additional typecasting might be currently required when using the generated bindings (e.g. `let typed_value: py_module::MyClass = get_value()?.extract()?;`). +- The binding generation is primarily designed to be used inside build scripts or via procedural macros. Therefore, the performance of the codegen process is [benchmarked](./pyo3_bindgen_engine/benches/bindgen.rs) to understand the potential impact on build times. Here are some preliminary results for version `0.3.0` with the default configuration (measured: parsing IO & codegen | not measured: compilation of the generated bindings, which takes much longer): + - `sys`: 1.24 ms (0.66k total LoC) + - `os`: 8.38 ms (3.88k total LoC) + - `numpy`: 1.02 s (294k total LoC) + - `torch`: 7.05 s (1.08M total LoC) +- The generation of bindings should never panic as long as the target Python module can be successfully imported. If it does, please [report](https://github.com/AndrejOrsula/pyo3_bindgen/issues/new) this as a bug. +- The generated bindings should always be compilable and usable in Rust. If you encounter any issues, consider manually fixing the problematic parts of the bindings and please [report](https://github.com/AndrejOrsula/pyo3_bindgen/issues/new) this as a bug. +- However, the generated bindings are based on the introspection of the target Python module. Therefore, the correctness of the generated bindings is directly dependent on the quality of the type annotations and docstrings in the target Python module. Ideally, the generated bindings should be considered unsafe and serve as a starting point for safe and idiomatic Rust APIs. +- Although implemented, the procedural macro does not work in many cases because PyO3 fails to import the target Python module when used from within a `proc_macro` crate. Therefore, it is recommended to use build scripts instead for now. ## License diff --git a/pyo3_bindgen/src/lib.rs b/pyo3_bindgen/src/lib.rs index 67ae92a..c337265 100644 --- a/pyo3_bindgen/src/lib.rs +++ b/pyo3_bindgen/src/lib.rs @@ -1,78 +1,8 @@ -//! Public API library for automatic generation of Rust FFI bindings to Python modules. -//! -//! ## Instructions -//! -//! Add `pyo3` as a dependency and `pyo3_bindgen` as a build dependency to your [`Cargo.toml`](https://doc.rust-lang.org/cargo/reference/manifest.html) manifest (`auto-initialize` feature of `pyo3` is optional and shown here for your convenience). -//! -//! ```toml -//! [dependencies] -//! pyo3 = { version = "0.20", features = ["auto-initialize"] } -//! -//! [build-dependencies] -//! pyo3_bindgen = { version = "0.1" } -//! ``` -//! -//! ### Option 1: Build script -//! -//! Create a [`build.rs`](https://doc.rust-lang.org/cargo/reference/build-scripts.html) script in the root of your crate that generates bindings to the `target_module` Python module. -//! -//! ```rs -//! // build.rs -//! -//! fn main() { -//! // Generate Rust bindings to the Python module -//! pyo3_bindgen::build_bindings( -//! "target_module", -//! std::path::Path::new(&std::env::var("OUT_DIR").unwrap()).join("bindings.rs"), -//! ) -//! .unwrap(); -//! } -//! ``` -//! -//! Afterwards, include the generated bindings anywhere in your crate. -//! -//! ```rs -//! include!(concat!(env!("OUT_DIR"), "/bindings.rs")); -//! pub use target_module::*; -//! ``` -//! -//! ### Option 2: CLI tool -//! -//! Install the `pyo3_bindgen` executable with `cargo`. -//! -//! ```bash -//! cargo install --locked pyo3_bindgen_cli -//! ``` -//! -//! Afterwards, run the `pyo3_bindgen` executable while passing the name of the target Python module. -//! -//! ```bash -//! # Pass `--help` to show the usage and available options -//! pyo3_bindgen -m target_module -o bindings.rs -//! ``` -//! -//! ### Option 3 \[Experimental\]: Procedural macros -//! -//! > **Note:** This feature is experimental and will probably fail in many cases. It is recommended to use build scripts instead. -//! -//! Enable the `macros` feature of `pyo3_bindgen`. -//! -//! ```toml -//! [build-dependencies] -//! pyo3_bindgen = { version = "0.1", features = ["macros"] } -//! ``` -//! -//! Then, you can call the `import_python!` macro anywhere in your crate. -//! -//! ```rs -//! pyo3_bindgen::import_python!("target_module"); -//! pub use target_module::*; -//! ``` +#![doc = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/../README.md"))] -pub use pyo3_bindgen_engine::{ - self as engine, build_bindings, generate_bindings, generate_bindings_for_module, - generate_bindings_from_str, -}; +// Public API re-exports from engine +pub use pyo3_bindgen_engine::{pyo3, Codegen, Config, PyBindgenError, PyBindgenResult}; +// Public API re-exports from macros #[cfg(feature = "macros")] -pub use pyo3_bindgen_macros::{self as macros, import_python}; +pub use pyo3_bindgen_macros::import_python; diff --git a/pyo3_bindgen_cli/src/main.rs b/pyo3_bindgen_cli/src/main.rs index 86e0306..ee7e8d1 100644 --- a/pyo3_bindgen_cli/src/main.rs +++ b/pyo3_bindgen_cli/src/main.rs @@ -1,19 +1,25 @@ //! CLI tool for automatic generation of Rust FFI bindings to Python modules. use clap::Parser; +use std::io::Write; fn main() { // Parse the CLI arguments let args = Args::parse(); // Generate the bindings for the module specified by the `--module-name` argument - let bindings = pyo3_bindgen::generate_bindings(&args.module_name).unwrap_or_else(|_| { - panic!( - "Failed to generate bindings for module: {}", - args.module_name - ) - }); - + let bindings = args + .module_names + .iter() + .fold(pyo3_bindgen::Codegen::default(), |codegen, module_name| { + codegen.module_name(module_name).unwrap_or_else(|err| { + panic!("Failed to parse the content of '{module_name}' Python module:\n{err}") + }) + }) + .generate() + .unwrap_or_else(|err| panic!("Failed to generate bindings for Python modules:\n{err}")); + + // Format the bindings with prettyplease let bindings = prettyplease::unparse(&syn::parse2(bindings).unwrap()); if let Some(output) = args.output { @@ -27,7 +33,7 @@ fn main() { .unwrap_or_else(|_| panic!("Failed to write to file: {}", output.display())); } else { // Otherwise, print the bindings to STDOUT - println!("{bindings}"); + std::io::stdout().write_all(bindings.as_bytes()).unwrap(); } } @@ -35,9 +41,9 @@ fn main() { #[derive(Parser)] #[command(author, version, about)] struct Args { - #[arg(short, long)] + #[arg(short='m', long="module-name", required=true, num_args=1..)] /// Name of the Python module for which to generate the bindings - pub module_name: String, + pub module_names: Vec, #[arg(short, long)] /// Name of the output file to which to write the bindings [default: STDOUT] pub output: Option, @@ -50,37 +56,49 @@ mod tests { #[test] fn test_parser_all() { // Arrange - let input = ["", "-m", "pip", "--output", "bindings.rs"]; + let input = ["", "-m", "os", "--output", "bindings.rs"]; // Act let args = Args::parse_from(input); // Assert - assert_eq!(args.module_name, "pip"); + assert_eq!(args.module_names, ["os"]); assert_eq!(args.output, Some("bindings.rs".into())); } #[test] fn test_parser_short() { // Arrange - let input = ["", "-m", "numpy"]; + let input = ["", "-m", "sys"]; // Act let args = Args::parse_from(input); // Assert - assert_eq!(args.module_name, "numpy"); + assert_eq!(args.module_names, ["sys"]); } #[test] fn test_parser_long() { // Arrange - let input = ["", "--module-name", "setuptools"]; + let input = ["", "--module-name", "io"]; + + // Act + let args = Args::parse_from(input); + + // Assert + assert_eq!(args.module_names, ["io"]); + } + + #[test] + fn test_parser_multiple() { + // Arrange + let input = ["", "-m", "os", "sys", "--module-name", "io"]; // Act let args = Args::parse_from(input); // Assert - assert_eq!(args.module_name, "setuptools"); + assert_eq!(args.module_names, ["os", "sys", "io"]); } } diff --git a/pyo3_bindgen_cli/tests/cli.rs b/pyo3_bindgen_cli/tests/cli.rs index 221034f..e0c64a8 100644 --- a/pyo3_bindgen_cli/tests/cli.rs +++ b/pyo3_bindgen_cli/tests/cli.rs @@ -17,7 +17,7 @@ mod test_cli { assert.success().stdout( predicate::str::contains(format!("Usage: {BIN_NAME}")) .and(predicate::str::contains("Options:")) - .and(predicate::str::contains("--module-name ")) + .and(predicate::str::contains("--module-name ")) .and(predicate::str::contains("--output ")), ); } @@ -33,7 +33,7 @@ mod test_cli { // Assert assert.failure().stderr( predicate::str::contains("error: the following required arguments") - .and(predicate::str::contains("--module-name ")) + .and(predicate::str::contains("--module-name ")) .and(predicate::str::contains(format!("Usage: {BIN_NAME}"))), ); } diff --git a/pyo3_bindgen_engine/Cargo.toml b/pyo3_bindgen_engine/Cargo.toml index 99275c3..90983d3 100644 --- a/pyo3_bindgen_engine/Cargo.toml +++ b/pyo3_bindgen_engine/Cargo.toml @@ -2,7 +2,7 @@ name = "pyo3_bindgen_engine" authors.workspace = true categories.workspace = true -description = "Engine behind automatic generation of Rust bindings to Python modules" +description = "Engine for automatic generation of Rust bindings to Python modules" edition.workspace = true keywords.workspace = true license.workspace = true @@ -16,7 +16,10 @@ itertools = { workspace = true } proc-macro2 = { workspace = true } pyo3 = { workspace = true } quote = { workspace = true } +rustc-hash = { workspace = true } syn = { workspace = true } +thiserror = { workspace = true } +typed-builder = { workspace = true } [dev-dependencies] criterion = { workspace = true } @@ -31,6 +34,9 @@ name = "pyo3_bindgen_engine" path = "src/lib.rs" crate-type = ["rlib"] +[features] +default = [] + [[bench]] name = "bindgen" harness = false diff --git a/pyo3_bindgen_engine/benches/bindgen.rs b/pyo3_bindgen_engine/benches/bindgen.rs index 1383b99..59ad1e2 100644 --- a/pyo3_bindgen_engine/benches/bindgen.rs +++ b/pyo3_bindgen_engine/benches/bindgen.rs @@ -1,68 +1,61 @@ -macro_rules! bench_bindgen_from_str { - { - |$criterion:ident| $(,)? - $bench_name:ident $(,)? - $(py)?$(python)? $(:)? $code_py:literal $(,)? - } => { - { - const CODE_PY: &str = indoc::indoc! { $code_py }; - $criterion.bench_function(stringify!($bench_name), |b| { - b.iter(|| { - pyo3_bindgen_engine::generate_bindings_from_str( - criterion::black_box(CODE_PY), - criterion::black_box(concat!("bench_mod_", stringify!($bench_name))), - ) - .unwrap() - }); - }); - } - }; -} +criterion::criterion_group!(benches, criterion_benchmark); +criterion::criterion_main!(benches); -macro_rules! try_bench_bindgen_for_module { - { - |$py:ident, $criterion:ident| $(,)? - $(module)? $(:)? $module_name:literal $(,)? - } => { - if let Ok(module) = $py.import($module_name) { - $criterion.bench_function(concat!("bench_bindgen_module_", $module_name), |b| { - b.iter(|| { - pyo3_bindgen_engine::generate_bindings_for_module( - criterion::black_box($py), - criterion::black_box(module), - ) - .unwrap() - }); - }); - } - }; +fn criterion_benchmark(crit: &mut criterion::Criterion) { + bench_from_str(crit); + bench_mod(crit); } -fn criterion_benchmark(crit: &mut criterion::Criterion) { - let mut group_from_str = crit.benchmark_group("generate_bindings_from_str"); +fn bench_from_str(crit: &mut criterion::Criterion) { + let mut group_from_str = crit.benchmark_group("bindgen_str"); group_from_str .warm_up_time(std::time::Duration::from_millis(250)) .sample_size(100); - bench_bindgen_from_str! { + + macro_rules! bench_impl { + { + |$criterion:ident| $(,)? + $bench_name:ident $(,)? + $(py)?$(python)?$(:)? $code_py:literal $(,)? + } => { + { + const CODE_PY: &str = indoc::indoc! { $code_py }; + $criterion.bench_function(stringify!($bench_name), |b| { + b.iter(|| { + pyo3_bindgen_engine::Codegen::default() + .module_from_str( + criterion::black_box(CODE_PY), + criterion::black_box(concat!("bench_mod_", stringify!($bench_name))) + ) + .unwrap() + .generate() + .unwrap() + }); + }); + } + }; + } + + bench_impl! { |group_from_str| - bench_bindgen_attribute - py: r#" + attribute + r#" t_const_float: float = 0.42 "# } - bench_bindgen_from_str! { + bench_impl! { |group_from_str| - bench_bindgen_function - py: r#" + function + r#" def t_fn(t_arg1: str) -> int: """t_docs""" ... "# } - bench_bindgen_from_str! { + bench_impl! { |group_from_str| - bench_bindgen_class - py: r#" + class + r#" from typing import Dict, Optional class t_class: """t_docs""" @@ -80,28 +73,54 @@ fn criterion_benchmark(crit: &mut criterion::Criterion) { ... "# } + group_from_str.finish(); +} - let mut group_for_module = crit.benchmark_group("generate_bindings_for_module"); - group_for_module +fn bench_mod(crit: &mut criterion::Criterion) { + let mut group_module = crit.benchmark_group("bindgen_mod"); + group_module .warm_up_time(std::time::Duration::from_secs(2)) .sample_size(10); - pyo3::Python::with_gil(|py| { - try_bench_bindgen_for_module! { - |py, group_for_module| - module: "os" - } - try_bench_bindgen_for_module! { - |py, group_for_module| - module: "sys" - } - try_bench_bindgen_for_module! { - |py, group_for_module| - module: "numpy" - } - }); - group_for_module.finish(); -} -criterion::criterion_group!(benches, criterion_benchmark); -criterion::criterion_main!(benches); + macro_rules! bench_impl { + ( + |$criterion:ident| $(,)? + $(module:)? $module_name:literal $(,)? + ) => { + $criterion.bench_function($module_name, |b| { + b.iter(|| { + pyo3_bindgen_engine::Codegen::default() + .module_name( + criterion::black_box($module_name) + ) + .unwrap() + .generate() + .unwrap() + }); + }); + }; + { + |$criterion:ident| $(,)? + $(modules:)? [ $($module:literal),+ $(,)? ] $(,)? + } => { + $( + bench_impl!(|$criterion| $module); + )+ + }; + } + + bench_impl! { + |group_module| + modules: [ + "io", + "math", + "os", + "re", + "sys", + "time", + ] + } + + group_module.finish(); +} diff --git a/pyo3_bindgen_engine/src/bindgen.rs b/pyo3_bindgen_engine/src/bindgen.rs deleted file mode 100644 index 5c73d0f..0000000 --- a/pyo3_bindgen_engine/src/bindgen.rs +++ /dev/null @@ -1,150 +0,0 @@ -//! Module for handling the binding generation process. - -pub mod attribute; -pub mod class; -pub mod function; -pub mod module; - -pub use attribute::bind_attribute; -pub use class::bind_class; -pub use function::bind_function; -pub use module::{bind_module, bind_reexport}; - -// TODO: Refactor everything into a large configurable struct that keeps track of all the -// important information needed to properly generate the bindings -// - Use builder pattern for the configuration of the struct -// - Keep track of all the types/classes that have been generated -// - Keep track of all imports to understand where each type is coming from -// - Keep track of all the external types that are used as parameters/return types and consider generating bindings for them as well - -// TODO: Ensure there are no duplicate entries in the generated code - -/// Generate Rust bindings to a Python module specified by its name. Generating bindings to -/// submodules such as `os.path` is also supported as long as the module can be directly imported -/// from the Python interpreter via `import os.path`. -/// -/// # Arguments -/// -/// * `module_name` - Name of the Python module to generate bindings for. -/// -/// # Returns -/// -/// `Result` containing the generated bindings as a `proc_macro2::TokenStream` on success, or a -/// `pyo3::PyErr` on failure. -/// -/// # Example -/// -/// ``` -/// // use pyo3_bindgen::generate_bindings; -/// use pyo3_bindgen_engine::generate_bindings; -/// -/// fn main() -> Result<(), pyo3::PyErr> { -/// let bindings: proc_macro2::TokenStream = generate_bindings("os")?; -/// Ok(()) -/// } -/// ``` -pub fn generate_bindings(module_name: &str) -> Result { - #[cfg(not(PyPy))] - pyo3::prepare_freethreaded_python(); - - pyo3::Python::with_gil(|py| { - let module = py.import(module_name)?; - generate_bindings_for_module(py, module) - }) -} - -/// Generate Rust bindings to an instance of `pyo3::types::PyModule` Python module. -/// -/// # Arguments -/// -/// * `py` - Python interpreter instance. -/// * `module` - Python module to generate bindings for. -/// -/// # Returns -/// -/// `Result` containing the generated bindings as a `proc_macro2::TokenStream` on success, or a -/// `pyo3::PyErr` on failure. -/// -/// # Example -/// -/// ``` -/// // use pyo3_bindgen::generate_bindings_for_module; -/// use pyo3_bindgen_engine::generate_bindings_for_module; -/// -/// fn main() -> Result<(), pyo3::PyErr> { -/// pyo3::prepare_freethreaded_python(); -/// let bindings: proc_macro2::TokenStream = pyo3::Python::with_gil(|py| { -/// let module = py.import("os")?; -/// generate_bindings_for_module(py, module) -/// })?; -/// Ok(()) -/// } -/// ``` -pub fn generate_bindings_for_module( - py: pyo3::Python, - module: &pyo3::types::PyModule, -) -> Result { - let all_types = module::collect_types_of_module( - py, - module, - module, - &mut std::collections::HashSet::new(), - &mut std::collections::HashSet::default(), - )?; - - bind_module( - py, - module, - module, - &mut std::collections::HashSet::new(), - &all_types, - ) -} - -/// Generate Rust bindings to a Python module specified by its `source_code`. The module will be -/// named `new_module_name` in the generated bindings. However, the generated bindings might not -/// be immediately functional if the module represented by its `source_code` is not a known Python -/// module in the current Python interpreter. -/// -/// # Arguments -/// -/// * `source_code` - Source code of the Python module to generate bindings for. -/// * `new_module_name` - Name of the Python module to generate bindings for. -/// -/// # Returns -/// -/// `Result` containing the generated bindings as a `proc_macro2::TokenStream` on success, or a -/// `pyo3::PyErr` on failure. -/// -/// # Example -/// -/// ``` -/// // use pyo3_bindgen::generate_bindings_from_str; -/// use pyo3_bindgen_engine::generate_bindings_from_str; -/// -/// fn main() -> Result<(), pyo3::PyErr> { -/// const PYTHON_SOURCE_CODE: &str = r#" -/// def string_length(string: str) -> int: -/// return len(string) -/// "#; -/// let bindings = generate_bindings_from_str(PYTHON_SOURCE_CODE, "utils")?; -/// Ok(()) -/// } -/// ``` -pub fn generate_bindings_from_str( - source_code: &str, - new_module_name: &str, -) -> Result { - #[cfg(not(PyPy))] - pyo3::prepare_freethreaded_python(); - - pyo3::Python::with_gil(|py| { - let module = pyo3::types::PyModule::from_code( - py, - source_code, - &format!("{new_module_name}/__init__.py"), - new_module_name, - )?; - generate_bindings_for_module(py, module) - }) -} diff --git a/pyo3_bindgen_engine/src/bindgen/attribute.rs b/pyo3_bindgen_engine/src/bindgen/attribute.rs deleted file mode 100644 index 57ca6bf..0000000 --- a/pyo3_bindgen_engine/src/bindgen/attribute.rs +++ /dev/null @@ -1,131 +0,0 @@ -use crate::types::Type; - -/// Generate Rust bindings to a Python attribute. The attribute can be a standalone -/// attribute or a property of a class. -pub fn bind_attribute( - py: pyo3::Python, - module_name: &str, - is_class: bool, - name: &str, - attr: &pyo3::PyAny, - attr_type: &pyo3::PyAny, - all_types: &std::collections::HashSet, -) -> Result { - let mut token_stream = proc_macro2::TokenStream::new(); - - let mut has_setter = true; - let mut getter_type = attr_type; - let mut setter_type = attr_type; - let getter_doc = py.None(); - let mut getter_doc = getter_doc.as_ref(py); - let setter_doc = py.None(); - let mut setter_doc = setter_doc.as_ref(py); - - // Check if the attribute has a getter and setter (is a property) - if let Ok(getter) = attr.getattr("fget") { - let inspect = py.import("inspect")?; - let signature = inspect.call_method1("signature", (getter,))?; - let empty_return_annotation = signature.getattr("empty")?; - let return_annotation = signature.getattr("return_annotation")?; - if !return_annotation.is(empty_return_annotation) { - getter_type = return_annotation; - } - if let Ok(doc) = getter.getattr("__doc__") { - getter_doc = doc; - } - has_setter = false; - } - if let Ok(setter) = attr.getattr("fset") { - if !setter.is_none() { - let inspect = py.import("inspect")?; - let signature = inspect.call_method1("signature", (setter,))?; - let empty_return_annotation = signature.getattr("empty")?; - let value_annotation = signature - .getattr("parameters")? - .call_method0("values")? - .iter()? - .last() - .unwrap()? - .getattr("annotation")?; - if !value_annotation.is(empty_return_annotation) { - setter_type = value_annotation; - } - if let Ok(doc) = setter.getattr("__doc__") { - setter_doc = doc; - } - has_setter = true; - } - } - - let mut getter_doc = getter_doc.to_string(); - if getter_doc == "None" || getter_doc.is_empty() { - getter_doc = format!("Getter for the `{name}` attribute"); - }; - - let mut setter_doc = setter_doc.to_string(); - if setter_doc == "None" || setter_doc.is_empty() { - setter_doc = format!("Setter for the `{name}` attribute"); - }; - - let getter_ident = if syn::parse_str::(name).is_ok() { - quote::format_ident!("{}", name) - } else { - quote::format_ident!("r#{}", name) - }; - let setter_ident = quote::format_ident!("set_{}", name); - - let getter_type = Type::try_from(getter_type)?.into_rs_owned(module_name, all_types); - let setter_type = Type::try_from(setter_type)?.into_rs_borrowed(module_name, all_types); - - if is_class { - token_stream.extend(quote::quote! { - #[doc = #getter_doc] - pub fn #getter_ident<'py>( - &'py self, - py: ::pyo3::marker::Python<'py>, - ) -> ::pyo3::PyResult<#getter_type> { - self.getattr(::pyo3::intern!(py, #name))? - .extract() - } - }); - if has_setter { - token_stream.extend(quote::quote! { - #[doc = #setter_doc] - pub fn #setter_ident<'py>( - &'py self, - py: ::pyo3::marker::Python<'py>, - value: #setter_type, - ) -> ::pyo3::PyResult<()> { - self.setattr(::pyo3::intern!(py, #name), value)?; - Ok(()) - } - }); - } - } else { - token_stream.extend(quote::quote! { - #[doc = #getter_doc] - pub fn #getter_ident<'py>( - py: ::pyo3::marker::Python<'py>, - ) -> ::pyo3::PyResult<#getter_type> { - py.import(::pyo3::intern!(py, #module_name))? - .getattr(::pyo3::intern!(py, #name))? - .extract() - } - }); - if has_setter { - token_stream.extend(quote::quote! { - #[doc = #setter_doc] - pub fn #setter_ident<'py>( - py: ::pyo3::marker::Python<'py>, - value: #setter_type, - ) -> ::pyo3::PyResult<()> { - py.import(::pyo3::intern!(py, #module_name))? - .setattr(::pyo3::intern!(py, #name), value)?; - Ok(()) - } - }); - } - } - - Ok(token_stream) -} diff --git a/pyo3_bindgen_engine/src/bindgen/class.rs b/pyo3_bindgen_engine/src/bindgen/class.rs deleted file mode 100644 index 0b5ea46..0000000 --- a/pyo3_bindgen_engine/src/bindgen/class.rs +++ /dev/null @@ -1,254 +0,0 @@ -use crate::bindgen::{bind_attribute, bind_function}; - -/// Generate Rust bindings to a Python class with all its methods and attributes (properties). -/// This function will call itself recursively to generate bindings to all nested classes. -pub fn bind_class( - py: pyo3::Python, - root_module: &pyo3::types::PyModule, - class: &pyo3::types::PyType, - all_types: &std::collections::HashSet, -) -> Result { - let inspect = py.import("inspect")?; - - // Extract the names of the modules - let root_module_name = root_module.name()?; - let class_full_name = class.name()?; - let class_name = class_full_name.split('.').last().unwrap(); - let class_module_name = format!( - "{}{}{}", - class.getattr("__module__")?, - if class_full_name.contains('.') { - "." - } else { - "" - }, - class_full_name.trim_end_matches(&format!(".{class_name}")) - ); - - // Create the Rust class identifier (raw string if it is a keyword) - let class_ident = if syn::parse_str::(class_name).is_ok() { - quote::format_ident!("{class_name}") - } else { - quote::format_ident!("r#{class_name}") - }; - - let mut fn_names = Vec::new(); - - // Iterate over all attributes of the module while updating the token stream - let mut impl_token_stream = proc_macro2::TokenStream::new(); - class - .dir() - .iter() - .map(|name| { - let name = name.str().unwrap().to_str().unwrap(); - let attr = class.getattr(name).unwrap(); - let attr_type = attr.get_type(); - (name, attr, attr_type) - }) - .filter(|&(_, _, attr_type)| { - // Skip builtin functions - !attr_type - .is_subclass_of::() - .unwrap_or(false) - }) - .filter(|&(name, _, _)| { - // Skip private attributes (except for __init__ and __call__) - !name.starts_with('_') || name == "__init__" || name == "__call__" - }) - .filter(|(_, attr, attr_type)| { - // Skip typing attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("typing")) - && !attr_type.to_string().contains("typing") - }) - .filter(|(_, attr, _)| { - // Skip __future__ attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("__future__")) - }) - .filter(|&(_, attr, _)| { - // Skip classes and functions that are not part of the package - // However, this should keep instances of classes and builtins even if they are builtins or from other packages - if let Ok(module) = attr.getattr("__module__") { - if module.to_string().starts_with(root_module_name) { - true - } else { - !(inspect - .call_method1("isclass", (attr,)) - .unwrap() - .is_true() - .unwrap() - || inspect - .call_method1("isfunction", (attr,)) - .unwrap() - .is_true() - .unwrap()) - } - } else { - true - } - }) - .filter(|&(_, attr, attr_type)| { - // Skip external modules - if attr_type - .is_subclass_of::() - .unwrap_or(false) - { - let is_submodule = attr - .getattr("__package__") - .is_ok_and(|package| package.to_string().starts_with(root_module_name)); - is_submodule - } else { - true - } - }) - .for_each(|(name, attr, attr_type)| { - let is_internal = attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .starts_with(root_module_name); - let is_reexport = is_internal - && attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .ne(&class_module_name); - - let is_class = attr_type - .is_subclass_of::() - .unwrap_or(false); - - let is_function = inspect - .call_method1("isfunction", (attr,)) - .unwrap() - .is_true() - .unwrap() - || inspect - .call_method1("ismethod", (attr,)) - .unwrap() - .is_true() - .unwrap(); - - // Make sure that only one of the three is true - debug_assert!(![is_class, is_function].iter().all(|&v| v)); - - if is_class && !is_reexport { - impl_token_stream.extend(bind_class( - py, - root_module, - attr.downcast().unwrap(), - all_types, - )); - } else if is_function { - fn_names.push(name.to_string()); - impl_token_stream.extend(bind_function( - py, - &class_module_name, - name, - attr, - all_types, - )); - } else if !name.starts_with('_') { - impl_token_stream.extend(bind_attribute( - py, - &class_module_name, - true, - name, - attr, - attr_type, - all_types, - )); - } - }); - - // Add new and call aliases (currently a reimplemented versions of the function) - // TODO: Call the Rust `self.__init__()` and `self.__call__()` functions directly instead of reimplementing it - if fn_names.contains(&"__init__".to_string()) && !fn_names.contains(&"new".to_string()) { - impl_token_stream.extend(bind_function( - py, - &class_module_name, - "new", - class.getattr("__init__")?, - all_types, - )); - } - if fn_names.contains(&"__call__".to_string()) && !fn_names.contains(&"call".to_string()) { - impl_token_stream.extend(bind_function( - py, - &class_module_name, - "call", - class.getattr("__call__")?, - all_types, - )); - } - - let mut doc = class.getattr("__doc__")?.to_string(); - if doc == "None" { - doc = String::new(); - }; - - Ok(quote::quote! { - #[doc = #doc] - #[repr(transparent)] - pub struct #class_ident(::pyo3::PyAny); - // Note: Using these macros is probably not the best idea, but it makes possible wrapping around ::pyo3::PyAny instead of ::pyo3::PyObject, which improves usability - ::pyo3::pyobject_native_type_named!(#class_ident); - ::pyo3::pyobject_native_type_info!(#class_ident, ::pyo3::pyobject_native_static_type_object!(::pyo3::ffi::PyBaseObject_Type), ::std::option::Option::Some(#class_module_name)); - ::pyo3::pyobject_native_type_extract!(#class_ident); - #[automatically_derived] - impl #class_ident { - #impl_token_stream - } - }) - - // Ok(quote::quote! { - // #[doc = #doc] - // #[repr(transparent)] - // #[derive(Clone, Debug)] - // pub struct #class_ident(pub ::pyo3::PyObject); - // #[automatically_derived] - // impl ::std::ops::Deref for #class_ident { - // type Target = ::pyo3::PyObject; - // fn deref(&self) -> &Self::Target { - // &self.0 - // } - // } - // #[automatically_derived] - // impl ::std::ops::DerefMut for #class_ident { - // fn deref_mut(&mut self) -> &mut Self::Target { - // &mut self.0 - // } - // } - // #[automatically_derived] - // impl<'py> ::pyo3::FromPyObject<'py> for #class_ident { - // fn extract(value: &'py ::pyo3::PyAny) -> ::pyo3::PyResult { - // Ok(Self(value.into())) - // } - // } - // #[automatically_derived] - // impl ::pyo3::ToPyObject for #class_ident { - // fn to_object<'py>(&'py self, py: ::pyo3::Python<'py>) -> ::pyo3::PyObject { - // self.as_ref(py).to_object(py) - // } - // } - // #[automatically_derived] - // impl From<::pyo3::PyObject> for #class_ident { - // fn from(value: ::pyo3::PyObject) -> Self { - // Self(value) - // } - // } - // #[automatically_derived] - // impl<'py> From<&'py ::pyo3::PyAny> for #class_ident { - // fn from(value: &'py ::pyo3::PyAny) -> Self { - // Self(value.into()) - // } - // } - // #[automatically_derived] - // impl #class_ident { - // #impl_token_stream - // } - // }) -} diff --git a/pyo3_bindgen_engine/src/bindgen/function.rs b/pyo3_bindgen_engine/src/bindgen/function.rs deleted file mode 100644 index 54ccfda..0000000 --- a/pyo3_bindgen_engine/src/bindgen/function.rs +++ /dev/null @@ -1,245 +0,0 @@ -use itertools::Itertools; -use pyo3::PyTypeInfo; - -use crate::types::Type; - -/// Generate Rust bindings to a Python function. The function can be a standalone function or a -/// method of a class. -pub fn bind_function( - py: pyo3::Python, - module_name: &str, - name: &str, - function: &pyo3::PyAny, - all_types: &std::collections::HashSet, -) -> Result { - let inspect = py.import("inspect")?; - - let signature = inspect.call_method1("signature", (function,))?; - - let empty_return_annotation = signature.getattr("empty")?; - - let parameters = signature.getattr("parameters")?; - let return_annotation = signature.getattr("return_annotation")?; - - let return_annotation = if return_annotation.is(empty_return_annotation) { - None - } else { - Some(return_annotation) - }; - - let mut positional_args_idents = Vec::new(); - let mut keyword_args_idents = Vec::new(); - let mut keyword_args_names = Vec::new(); - let mut var_positional_ident = None; - let mut var_keyword_ident = None; - - let parameters = parameters - .call_method0("values")? - .iter()? - .map(|parameter| { - let parameter = parameter.unwrap(); - - let empty_param_annotation = parameter.getattr("empty").unwrap(); - - let param_name = parameter.getattr("name").unwrap().to_string(); - - let param_default = parameter.getattr("default").unwrap(); - let param_annotation = parameter.getattr("annotation").unwrap(); - let param_kind = parameter.getattr("kind").unwrap(); - - let param_annotation = if param_annotation.is(empty_param_annotation) { - None - } else { - Some(param_annotation) - }; - let param_default = if param_default.is(empty_param_annotation) { - None - } else { - Some(param_default) - }; - // TODO: Turn into enum or process in-place - let param_kind = match param_kind.extract::().unwrap() { - 0 => "POSITIONAL_ONLY", - 1 => "POSITIONAL_OR_KEYWORD", - 2 => "VAR_POSITIONAL", // args - 3 => "KEYWORD_ONLY", - 4 => "VAR_KEYWORD", // kwargs - _ => unreachable!(), - }; - - if param_name != "self" { - match param_kind { - "POSITIONAL_ONLY" => { - positional_args_idents.push( - if syn::parse_str::(¶m_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - }, - ); - } - "KEYWORD_ONLY" | "POSITIONAL_OR_KEYWORD" => { - keyword_args_idents.push( - if syn::parse_str::(¶m_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - }, - ); - keyword_args_names.push(param_name.clone()); - } - "VAR_POSITIONAL" => { - var_positional_ident = - Some(if syn::parse_str::(¶m_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - }); - positional_args_idents.push( - if syn::parse_str::(¶m_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - }, - ); - } - "VAR_KEYWORD" => { - var_keyword_ident = - Some(if syn::parse_str::(¶m_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - }); - } - _ => unreachable!(), - } - } - - let param_annotation = match param_kind { - "VAR_POSITIONAL" => Some(pyo3::types::PyTuple::type_object(py).downcast().unwrap()), - "VAR_KEYWORD" => Some(pyo3::types::PyDict::type_object(py).downcast().unwrap()), - _ => param_annotation, - }; - - (param_name, param_annotation, param_default, param_kind) - }) - .collect_vec(); - - let function_ident = if syn::parse_str::(name).is_ok() { - quote::format_ident!("{}", name) - } else { - quote::format_ident!("r#{}", name) - }; - let function_name = function.getattr("__name__")?.to_string(); - - // Check if `self` is the first parameter - let has_self_param = parameters - .iter() - .any(|(param_name, _, _, _)| param_name == "self"); - - let param_idents = parameters - .iter() - .skip(usize::from(has_self_param)) - .map(|(param_name, _, _, _)| { - if syn::parse_str::(param_name).is_ok() { - quote::format_ident!("{}", param_name) - } else { - quote::format_ident!("r#{}", param_name) - } - }) - .collect_vec(); - let pynone = py.None(); - let pynone = pynone.as_ref(py); - let param_types = parameters - .iter() - .skip(usize::from(has_self_param)) - .map(|(_, param_annotation, _, _)| { - Type::try_from(param_annotation.unwrap_or_else(|| pynone)) - .unwrap() - .into_rs_borrowed(module_name, all_types) - }) - .collect_vec(); - let return_annotation = - Type::try_from(return_annotation.unwrap_or(pynone))?.into_rs_owned(module_name, all_types); - - let mut doc = function.getattr("__doc__")?.to_string(); - if doc == "None" { - doc = String::new(); - }; - - let (maybe_ref_self, callable_object) = if has_self_param { - (quote::quote! { &'py self, }, quote::quote! { self }) - } else { - ( - quote::quote! {}, - quote::quote! { py.import(::pyo3::intern!(py, #module_name))? }, - ) - }; - - let has_positional_args = !positional_args_idents.is_empty(); - let set_args = match ( - positional_args_idents.len() > 1, - var_positional_ident.is_some(), - ) { - (true, _) => { - quote::quote! { - let __internal_args = ::pyo3::types::PyTuple::new( - py, - [#(::pyo3::IntoPy::<::pyo3::PyObject>::into_py(#positional_args_idents.to_owned(), py).as_ref(py),)*] - ); - } - } - (false, true) => { - let var_positional_ident = var_positional_ident.unwrap(); - quote::quote! { - let __internal_args = #var_positional_ident; - } - } - (false, false) => { - quote::quote! { let __internal_args = (); } - } - }; - - let has_kwargs = !keyword_args_idents.is_empty(); - let kwargs_initial = if let Some(var_keyword_ident) = var_keyword_ident { - quote::quote! { #var_keyword_ident } - } else { - quote::quote! { ::pyo3::types::PyDict::new(py) } - }; - let set_kwargs = quote::quote! { - let __internal_kwargs = #kwargs_initial; - #(__internal_kwargs.set_item(::pyo3::intern!(py, #keyword_args_names), #keyword_args_idents)?;)* - }; - - let call_method = match (has_positional_args, has_kwargs) { - (_, true) => { - quote::quote! { - #set_args - #set_kwargs - #callable_object.call_method(::pyo3::intern!(py, #function_name), __internal_args, Some(__internal_kwargs))? - } - } - (true, false) => { - quote::quote! { - #set_args - #callable_object.call_method1(::pyo3::intern!(py, #function_name), __internal_args)? - } - } - (false, false) => { - quote::quote! { - #callable_object.call_method0(::pyo3::intern!(py, #function_name))? - } - } - }; - - Ok(quote::quote! { - #[doc = #doc] - pub fn #function_ident<'py>( - #maybe_ref_self - py: ::pyo3::marker::Python<'py>, - #(#param_idents: #param_types),* - ) -> ::pyo3::PyResult<#return_annotation> { - #call_method.extract() - } - }) -} diff --git a/pyo3_bindgen_engine/src/bindgen/module.rs b/pyo3_bindgen_engine/src/bindgen/module.rs deleted file mode 100644 index ccd0655..0000000 --- a/pyo3_bindgen_engine/src/bindgen/module.rs +++ /dev/null @@ -1,513 +0,0 @@ -use itertools::Itertools; - -use crate::bindgen::{bind_attribute, bind_class, bind_function}; - -/// Generate a Rust module from a Python module. This function is called recursively to generate -/// bindings for all submodules. The generated module will contain all classes, functions, and -/// attributes of the Python module. During the first call, the `root_module` argument should be -/// the same as the `module` argument and the `processed_modules` argument should be an empty -/// `HashSet`. -pub fn bind_module( - py: pyo3::Python, - root_module: &pyo3::types::PyModule, - module: &pyo3::types::PyModule, - processed_modules: &mut std::collections::HashSet, - all_types: &std::collections::HashSet, -) -> Result { - let inspect = py.import("inspect")?; - - // Extract the names of the modules - let root_module_name = root_module.name()?; - let full_module_name = module.name()?; - let module_name: &str = full_module_name.split('.').last().unwrap(); - - // Create the Rust module identifier (raw string if it is a keyword) - let module_ident = if syn::parse_str::(module_name).is_ok() { - quote::format_ident!("{module_name}") - } else { - quote::format_ident!("r#{module_name}") - }; - - // Iterate over all attributes of the module while updating the token stream - let mut mod_token_stream = proc_macro2::TokenStream::new(); - module - .dir() - .iter() - .map(|name| { - let name = name.str().unwrap().to_str().unwrap(); - let attr = module.getattr(name).unwrap(); - let attr_type = attr.get_type(); - (name, attr, attr_type) - }) - .filter(|&(_, _, attr_type)| { - // Skip builtin functions - !attr_type - .is_subclass_of::() - .unwrap_or(false) - }) - .filter(|&(name, _, _)| { - // Skip private attributes - !name.starts_with('_') || name == "__init__" || name == "__call__" - }) - .filter(|(_, attr, attr_type)| { - // Skip typing attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("typing")) - && !attr_type.to_string().contains("typing") - }) - .filter(|(_, attr, _)| { - // Skip __future__ attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("__future__")) - }) - .filter(|&(_, attr, _)| { - // Skip classes and functions that are not part of the package - // However, this should keep instances of classes and builtins even if they are builtins or from other packages - if let Ok(module) = attr.getattr("__module__") { - if module.to_string().starts_with(root_module_name) { - true - } else { - !(inspect - .call_method1("isclass", (attr,)) - .unwrap() - .is_true() - .unwrap() - || inspect - .call_method1("isfunction", (attr,)) - .unwrap() - .is_true() - .unwrap()) - } - } else { - true - } - }) - .filter(|&(_, attr, attr_type)| { - // Skip external modules - if attr_type - .is_subclass_of::() - .unwrap_or(false) - { - let is_part_of_package = attr - .getattr("__package__") - .is_ok_and(|package| package.to_string().starts_with(root_module_name)); - is_part_of_package - } else { - true - } - }) - .for_each(|(name, attr, attr_type)| { - let is_internal = attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .starts_with(root_module_name); - let is_reexport = is_internal - && attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .ne(full_module_name); - - let is_module = attr_type - .is_subclass_of::() - .unwrap_or(false); - - let is_class = attr_type - .is_subclass_of::() - .unwrap_or(false); - - let is_function = inspect - .call_method1("isfunction", (attr,)) - .unwrap() - .is_true() - .unwrap() - || inspect - .call_method1("ismethod", (attr,)) - .unwrap() - .is_true() - .unwrap(); - - // Process hidden modules (shadowed by re-exported attributes of the same name) - if (is_class || is_function) - && is_reexport - && attr - .getattr("__module__") - .unwrap() - .to_string() - .split('.') - .last() - .unwrap() - == name - && attr - .getattr("__module__") - .unwrap() - .to_string() - .split('.') - .take(full_module_name.split('.').count()) - .join(".") - == full_module_name - { - let content = if is_class { - bind_class(py, root_module, attr.downcast().unwrap(), all_types).unwrap() - } else if is_function { - bind_function(py, full_module_name, name, attr, all_types).unwrap() - } else { - unreachable!() - }; - - let shadowed_module_name = attr.getattr("__module__").unwrap().to_string(); - let shadowed_module_name = shadowed_module_name.split('.').last().unwrap(); - let shadowed_module_ident = - if syn::parse_str::(shadowed_module_name).is_ok() { - quote::format_ident!("{}", shadowed_module_name) - } else { - quote::format_ident!("r#{}", shadowed_module_name) - }; - - mod_token_stream.extend(quote::quote! { - pub mod #shadowed_module_ident { - #content - } - }); - } - - if is_module { - let is_submodule_of_current_module = attr - .getattr("__package__") - .is_ok_and(|package| package.to_string().starts_with(full_module_name)); - - if is_submodule_of_current_module { - if processed_modules.insert(format!( - "{}.{}", - attr.getattr("__package__").unwrap(), - name - )) { - mod_token_stream.extend(bind_module( - py, - root_module, - attr.downcast().unwrap(), - processed_modules, - all_types, - )); - } - } else { - mod_token_stream.extend(bind_reexport( - root_module_name, - full_module_name, - name, - attr, - )); - } - } else if is_reexport { - mod_token_stream.extend(bind_reexport( - root_module_name, - full_module_name, - name, - attr, - )); - } else if is_class { - mod_token_stream.extend(bind_class( - py, - root_module, - attr.downcast().unwrap(), - all_types, - )); - } else if is_function { - mod_token_stream.extend(bind_function(py, full_module_name, name, attr, all_types)); - } else { - mod_token_stream.extend(bind_attribute( - py, - full_module_name, - false, - name, - attr, - attr_type, - all_types, - )); - } - }); - - let mut doc = module.getattr("__doc__")?.to_string(); - if doc == "None" { - doc = String::new(); - }; - - Ok(if module_name == root_module_name { - quote::quote! { - #[doc = #doc] - #[allow( - clippy::all, - non_camel_case_types, - non_snake_case, - non_upper_case_globals, - unused - )] - mod #module_ident { - #mod_token_stream - } - } - } else { - quote::quote! { - #[doc = #doc] - pub mod #module_ident { - #mod_token_stream - } - } - }) -} - -/// Generate a re-export of an attribute from a submodule. This is commonly used in Python to -/// re-export attributes from submodules in the parent module. For example, `from os import path` -/// makes the `os.path` submodule available in the current module as just `path`. -pub fn bind_reexport( - root_module_name: &str, - module_name: &str, - name: &str, - attr: &pyo3::PyAny, -) -> Result { - let full_attr_name = attr.getattr("__name__")?.to_string(); - let attr_name = if full_attr_name.contains('.') { - full_attr_name.split('.').last().unwrap() - } else { - full_attr_name.as_str() - }; - let is_module; - let attr_origin_module = if let Ok(module) = attr.getattr("__module__") { - is_module = false; - module.to_string() - } else { - is_module = true; - full_attr_name - .clone() - .split('.') - .take((full_attr_name.split('.').count() - 1).max(1)) - .join(".") - }; - - let n_common_ancestors = module_name - .split('.') - .zip(attr_origin_module.split('.')) - .take_while(|(a, b)| a == b) - .count(); - let current_module_depth = module_name.split('.').count(); - let reexport_path = if (current_module_depth - n_common_ancestors) > 0 { - std::iter::repeat("super".to_string()).take( - current_module_depth - n_common_ancestors - + usize::from(is_module && !full_attr_name.contains('.')), - ) - } else { - std::iter::repeat("self".to_string()).take(1) - }; - let reexport_path: String = reexport_path - .chain( - attr_origin_module - .split('.') - .skip(n_common_ancestors) - .map(|s| { - if syn::parse_str::(s).is_ok() { - s.to_owned() - } else { - format!("r#{s}") - } - }), - ) - .chain(std::iter::once(attr_name).map(|s| { - if syn::parse_str::(s).is_ok() { - s.to_owned() - } else { - format!("r#{s}") - } - })) - .join("::"); - - // The path contains both ident and "::", combine into something that can be quoted - let reexport_path = syn::parse_str::(&reexport_path).unwrap(); - - let visibility = if attr_name == root_module_name { - quote::quote! {} - } else { - quote::quote! { - pub - } - }; - - if attr_name == name { - Ok(quote::quote! { - #visibility use #reexport_path; - }) - } else { - let name = if syn::parse_str::(name).is_ok() { - quote::format_ident!("{}", name) - } else { - quote::format_ident!("r#{}", name) - }; - Ok(quote::quote! { - #visibility use #reexport_path as #name; - }) - } -} - -pub fn collect_types_of_module( - py: pyo3::Python, - root_module: &pyo3::types::PyModule, - module: &pyo3::types::PyModule, - processed_modules: &mut std::collections::HashSet, - all_types: &mut std::collections::HashSet, -) -> Result, pyo3::PyErr> { - let inspect = py.import("inspect")?; - - // Extract the names of the modules - let root_module_name = root_module.name()?; - let full_module_name = module.name()?; - - // Iterate over all attributes of the module while updating the token stream - module - .dir() - .iter() - .map(|name| { - let name = name.str().unwrap().to_str().unwrap(); - let attr = module.getattr(name).unwrap(); - let attr_type = attr.get_type(); - (name, attr, attr_type) - }) - .filter(|&(_, _, attr_type)| { - // Skip builtin functions - !attr_type - .is_subclass_of::() - .unwrap_or(false) - }) - .filter(|&(name, _, _)| { - // Skip private attributes - !name.starts_with('_') || name == "__init__" || name == "__call__" - }) - .filter(|(_, attr, attr_type)| { - // Skip typing attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("typing")) - && !attr_type.to_string().contains("typing") - }) - .filter(|(_, attr, _)| { - // Skip __future__ attributes - !attr - .getattr("__module__") - .is_ok_and(|module| module.to_string().contains("__future__")) - }) - .filter(|&(_, attr, _)| { - // Skip classes and functions that are not part of the package - // However, this should keep instances of classes and builtins even if they are builtins or from other packages - if let Ok(module) = attr.getattr("__module__") { - if module.to_string().starts_with(root_module_name) { - true - } else { - !(inspect - .call_method1("isclass", (attr,)) - .unwrap() - .is_true() - .unwrap() - || inspect - .call_method1("isfunction", (attr,)) - .unwrap() - .is_true() - .unwrap()) - } - } else { - true - } - }) - .filter(|&(_, attr, attr_type)| { - // Skip external modules - if attr_type - .is_subclass_of::() - .unwrap_or(false) - { - let is_part_of_package = attr - .getattr("__package__") - .is_ok_and(|package| package.to_string().starts_with(root_module_name)); - is_part_of_package - } else { - true - } - }) - .for_each(|(name, attr, attr_type)| { - let is_internal = attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .starts_with(root_module_name); - let is_reexport = is_internal - && attr - .getattr("__module__") - .unwrap_or(pyo3::types::PyString::new(py, "")) - .to_string() - .ne(full_module_name); - - let is_module = attr_type - .is_subclass_of::() - .unwrap_or(false); - - let is_class = attr_type - .is_subclass_of::() - .unwrap_or(false); - - // Process hidden modules (shadowed by re-exported attributes of the same name) - if is_class - && is_reexport - && attr - .getattr("__module__") - .unwrap() - .to_string() - .split('.') - .last() - .unwrap() - == name - && attr - .getattr("__module__") - .unwrap() - .to_string() - .split('.') - .take(full_module_name.split('.').count()) - .join(".") - == full_module_name - { - let full_class_name = - format!("{}.{}", full_module_name, attr.getattr("__name__").unwrap()); - all_types.insert(full_class_name.clone()); - let full_class_name = format!("{full_module_name}.{name}"); - all_types.insert(full_class_name.clone()); - } - - if is_module { - let is_submodule_of_current_module = attr - .getattr("__package__") - .is_ok_and(|package| package.to_string().starts_with(full_module_name)); - - if is_submodule_of_current_module - && processed_modules.insert(format!( - "{}.{}", - attr.getattr("__package__").unwrap(), - name - )) - { - let _ = collect_types_of_module( - py, - root_module, - attr.downcast().unwrap(), - processed_modules, - all_types, - ); - } - } else if is_class { - let full_class_name = - format!("{}.{}", full_module_name, attr.getattr("__name__").unwrap()); - all_types.insert(full_class_name.clone()); - let full_class_name = format!("{full_module_name}.{name}"); - all_types.insert(full_class_name.clone()); - } - }); - - Ok(all_types.clone()) -} diff --git a/pyo3_bindgen_engine/src/build_utils.rs b/pyo3_bindgen_engine/src/build_utils.rs deleted file mode 100644 index fd9c1b6..0000000 --- a/pyo3_bindgen_engine/src/build_utils.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Module with utilities for generating bindings in build scripts. - -/// Convenience function for generating bindings in build scripts. This function is equivalent to -/// calling `generate_bindings` and writing the result to a file. -/// -/// # Arguments -/// -/// * `module_name` - Name of the Python module to generate bindings for. -/// * `output_path` - Path to write the generated bindings to. -/// -/// # Returns -/// -/// `Result` containing `std::io::Error` on failure. -/// -/// # Example -/// -/// 1. Generate bindings using `build.rs` script. -/// -/// ```ignore -/// // build.rs -/// -/// // use pyo3_bindgen::build_bindings; -/// use pyo3_bindgen_engine::build_bindings; -/// -/// fn main() { -/// build_bindings( -/// "os", -/// std::path::Path::new(&std::env::var("OUT_DIR").unwrap()).join("bindings.rs"), -/// ) -/// .unwrap(); -/// } -/// ``` -/// -/// 2. Include the generated bindings in `src/lib.rs`. -/// -/// ```ignore -/// // src/lib.rs -/// -/// include!(concat!(env!("OUT_DIR"), "/bindings.rs")); -/// pub use os::*; -/// ``` -// TODO: Add `println!("cargo:rerun-if-changed={}.py");` for all files of the target Python module -pub fn build_bindings( - module_name: &str, - output_path: impl AsRef, -) -> std::io::Result<()> { - let bindings = crate::generate_bindings(module_name).map_err(|err| { - std::io::Error::new( - std::io::ErrorKind::Other, - format!("Failed to generate bindings for Python module '{module_name}': {err}"), - ) - })?; - std::fs::write(output_path, bindings.to_string()) -} diff --git a/pyo3_bindgen_engine/src/codegen.rs b/pyo3_bindgen_engine/src/codegen.rs new file mode 100644 index 0000000..241ff48 --- /dev/null +++ b/pyo3_bindgen_engine/src/codegen.rs @@ -0,0 +1,370 @@ +use crate::{ + syntax::{Ident, Import, Module, Path}, + Config, Result, +}; +use itertools::Itertools; +use rustc_hash::FxHashSet as HashSet; + +/// Engine for automatic generation of Rust FFI bindings to Python modules. +/// +/// # Examples +/// +/// Here is a simple example of how to use the `Codegen` engine to generate +/// Rust FFI bindings for the full `os` and `sys` Python modules. With the +/// default configuration, all submodules, classes, functions, and parameters +/// will be recursively parsed and included in the generated bindings. +/// +/// ```no_run +/// # use pyo3_bindgen_engine::{Codegen, Config}; +/// fn main() -> Result<(), Box> { +/// Codegen::new(Config::default())? +/// .module_name("os")? +/// .module_name("sys")? +/// .generate()?; +/// Ok(()) +/// } +/// ``` +/// +/// For more focused generation, paths to specific submodules can be provided. +/// In the following example, only the `core` and `utils.io` submodules of the +/// `other_module` module will be included in the generated bindings alongside +/// their respective submodules, classes, functions, and parameters. +/// +/// ```no_run +/// # use pyo3_bindgen_engine::{Codegen, Config}; +/// fn main() -> Result<(), Box> { +/// Codegen::new(Config::default())? +/// .module_names(&["other_module.core", "other_module.utils.io"])? +/// .generate()?; +/// Ok(()) +/// } +/// ``` +#[derive(Debug, Default, Clone)] +pub struct Codegen { + cfg: Config, + modules: Vec, +} + +impl Codegen { + /// Create a new `Codegen` engine with the given configuration. + pub fn new(cfg: Config) -> Result { + Ok(Self { + cfg, + ..Default::default() + }) + } + + /// Add a Python module to the list of modules for which to generate bindings. + pub fn module(mut self, module: &pyo3::types::PyModule) -> Result { + crate::io_utils::with_suppressed_python_output( + module.py(), + self.cfg.suppress_python_stdout, + self.cfg.suppress_python_stderr, + || { + self.modules.push(Module::parse(&self.cfg, module)?); + Ok(()) + }, + )?; + Ok(self) + } + + /// Add a Python module by its name to the list of modules for which to generate bindings. + pub fn module_name(self, module_name: &str) -> Result { + #[cfg(not(PyPy))] + pyo3::prepare_freethreaded_python(); + pyo3::Python::with_gil(|py| { + let module = py.import(module_name)?; + self.module(module) + }) + } + + /// Add a Python module from its source code and name to the list of modules for which to generate bindings. + pub fn module_from_str(self, source_code: &str, new_module_name: &str) -> Result { + #[cfg(not(PyPy))] + pyo3::prepare_freethreaded_python(); + pyo3::Python::with_gil(|py| { + let module = pyo3::types::PyModule::from_code( + py, + source_code, + &format!("{new_module_name}/__init__.py"), + new_module_name, + )?; + self.module(module) + }) + } + + /// Add multiple Python modules to the list of modules for which to generate bindings. + pub fn modules(mut self, modules: &[&pyo3::types::PyModule]) -> Result { + self.modules.reserve(modules.len()); + for module in modules { + self = self.module(module)?; + } + Ok(self) + } + + /// Add multiple Python modules by their names to the list of modules for which to generate bindings. + pub fn module_names(mut self, module_names: &[&str]) -> Result { + self.modules.reserve(module_names.len()); + for module_name in module_names { + self = self.module_name(module_name)?; + } + Ok(self) + } + + /// Generate the Rust FFI bindings for all modules added to the engine. + pub fn generate(mut self) -> Result { + assert!( + !self.modules.is_empty(), + "There are no modules for which to generate bindings" + ); + + // Parse external modules (if enabled) + if self.cfg.generate_dependencies { + self.parse_dependencies()?; + } + + // Canonicalize the module tree + self.canonicalize(); + + // Generate the bindings for all modules + self.modules + .iter() + .map(|module| module.generate(&self.cfg, &self.modules, &self.get_all_types())) + .collect::>() + } + + /// Generate the Rust FFI bindings for all modules added to the engine and write them to the given file. + /// This is a convenience method that combines `generate` and `std::fs::write`. + pub fn build(self, output_path: impl AsRef) -> Result<()> { + Ok(std::fs::write(output_path, self.generate()?.to_string())?) + } + + fn parse_dependencies(&mut self) -> Result<()> { + fn get_imports_recursive(input: &[Module]) -> Vec { + let mut imports = Vec::new(); + for module in input { + imports.extend( + module + .imports + .iter() + .filter(|import| import.is_external()) + .cloned(), + ); + imports.extend(get_imports_recursive(&module.submodules)); + } + imports + } + + // Get a unique list of all external imports (these could be modules, classes, functions, etc.) + let external_imports = get_imports_recursive(&self.modules) + .into_iter() + .filter(super::syntax::import::Import::is_external) + .map(|import| import.origin.clone()) + .unique() + .collect_vec(); + + // Parse the external imports and add them to the module tree + pyo3::Python::with_gil(|py| { + external_imports + .iter() + // Get the last valid module within the path of the import + .map(|import| { + let mut last_module = py + .import( + import + .root() + .unwrap_or_else(|| unreachable!()) + .to_py() + .as_str(), + ) + .unwrap(); + for path in &import[1..] { + if let Ok(attr) = last_module.getattr(path.as_py()) { + if let Ok(module) = attr.extract::<&pyo3::types::PyModule>() { + last_module = module; + } else { + break; + } + } else { + break; + } + } + last_module + }) + // Parse the module and add it to the module tree + .unique_by(|module| module.name().unwrap().to_string()) + // Filter attributes based on various configurable conditions + .filter(|module| { + self.cfg.is_attr_allowed( + &Ident::from_py(module.name().unwrap()), + &Path::from_py( + &module + .getattr(pyo3::intern!(py, "__module__")) + .map(std::string::ToString::to_string) + .unwrap_or_default(), + ), + py.get_type::(), + ) + }) + .try_for_each(|module| { + crate::io_utils::with_suppressed_python_output( + module.py(), + self.cfg.suppress_python_stdout, + self.cfg.suppress_python_stderr, + || { + self.modules.push(Module::parse(&self.cfg, module)?); + Ok(()) + }, + ) + })?; + Ok(()) + }) + } + + fn canonicalize(&mut self) { + // Canonicalize the module tree, such that no submodules remain at the top-level + // Example: If `mod.submod.subsubmod` is currently top-level, it will be embedded as submodule into `mod.submod` + // and `mod.submod` will be embedded in top-level `mod` + pyo3::Python::with_gil(|py| { + self.modules.iter_mut().for_each(|module| { + if module.name.len() > 1 { + *module = + (0..module.name.len() - 1) + .rev() + .fold(module.clone(), |package, i| { + let name = Path::from(&module.name[0..=i]); + let mut parent_package = + Module::empty(py, name).unwrap_or_else(|_| unreachable!()); + parent_package.submodules.push(package); + parent_package + }); + } + }); + }); + + // Merge duplicate modules in the tree + self.merge_duplicate_modules(); + } + + fn merge_duplicate_modules(&mut self) { + fn get_duplicate_modules(modules: &mut [Module]) -> Vec> { + modules.sort_by(|a, b| a.name.cmp(&b.name)); + let mut i = 0; + let mut duplicates = Vec::new(); + while i < modules.len() { + let name = modules[i].name.clone(); + let span = modules + .iter() + .skip(i) + .take_while(|module| module.name == name) + .count(); + if span > 1 { + duplicates.push(i..i + span); + } + i += span; + } + duplicates + } + + fn merge_duplicate_submodules_recursive(input: &[Module]) -> Module { + Module { + prelude: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.prelude.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + imports: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.imports.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + submodules: { + let mut submodules = + input.iter().fold(Vec::default(), |mut submodule, module| { + submodule.extend(module.submodules.iter().cloned()); + submodule + }); + get_duplicate_modules(&mut submodules) + .into_iter() + .rev() + .for_each(|range| { + submodules[range.start] = + merge_duplicate_submodules_recursive(&submodules[range.clone()]); + submodules.drain(range.start + 1..range.end); + }); + submodules + }, + classes: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.classes.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + functions: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.functions.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + type_vars: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.type_vars.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + properties: input + .iter() + .fold(HashSet::default(), |mut prelude, module| { + prelude.extend(module.properties.iter().cloned()); + prelude + }) + .into_iter() + .collect(), + ..input[0].clone() + } + } + + get_duplicate_modules(&mut self.modules) + .into_iter() + .rev() + .for_each(|range| { + self.modules[range.start] = + merge_duplicate_submodules_recursive(&self.modules[range.clone()]); + self.modules.drain(range.start + 1..range.end); + }); + } + + fn get_all_types(&self) -> Vec { + fn get_types_recursive(input: &[Module]) -> Vec { + let mut types = Vec::new(); + for module in input { + types.extend(module.classes.iter().map(|class| class.name.clone())); + types.extend( + module + .type_vars + .iter() + .map(|type_var| type_var.name.clone()), + ); + types.extend(get_types_recursive(&module.submodules)); + } + types + } + + get_types_recursive(&self.modules) + .into_iter() + .unique() + .collect() + } +} diff --git a/pyo3_bindgen_engine/src/config.rs b/pyo3_bindgen_engine/src/config.rs new file mode 100644 index 0000000..3bc7d1f --- /dev/null +++ b/pyo3_bindgen_engine/src/config.rs @@ -0,0 +1,106 @@ +use crate::syntax::{Ident, Path}; + +/// Array of forbidden attribute names that are reserved for internal use by derived traits +pub const FORBIDDEN_FUNCTION_NAMES: [&str; 4] = ["get_type", "obj", "repr", "str"]; +/// Array of forbidden type names +pub const FORBIDDEN_TYPE_NAMES: [&str; 6] = [ + "_collections._tuplegetter", + "AsyncState", + "getset_descriptor", + "member_descriptor", + "method_descriptor", + "property", +]; + +/// Default array of blocklisted attribute names +const DEFAULT_BLOCKLIST_ATTRIBUTE_NAMES: [&str; 4] = ["builtins", "testing", "tests", "test"]; + +/// Configuration for `Codegen` engine. +#[derive(Debug, Clone, PartialEq, Eq, Hash, typed_builder::TypedBuilder)] +pub struct Config { + /// Flag that determines whether to recursively generate code for all submodules of the target modules. + #[builder(default = true)] + pub(crate) traverse_submodules: bool, + + /// Flag that determines whether to generate code for prelude modules (Python `__all__` attribute). + #[builder(default = true)] + pub(crate) generate_preludes: bool, + /// Flag that determines whether to generate code for imports. + #[builder(default = true)] + pub(crate) generate_imports: bool, + /// Flag that determines whether to generate code for classes. + #[builder(default = true)] + pub(crate) generate_classes: bool, + /// Flag that determines whether to generate code for type variables. + #[builder(default = true)] + pub(crate) generate_type_vars: bool, + /// Flag that determines whether to generate code for functions. + #[builder(default = true)] + pub(crate) generate_functions: bool, + /// Flag that determines whether to generate code for properties. + #[builder(default = true)] + pub(crate) generate_properties: bool, + /// Flag that determines whether to documentation for the generate code. + /// The documentation is based on Python docstrings. + #[builder(default = true)] + pub(crate) generate_docs: bool, + + /// List of blocklisted attribute names that are skipped during the code generation. + #[builder(default = DEFAULT_BLOCKLIST_ATTRIBUTE_NAMES.iter().map(|&s| s.to_string()).collect())] + pub(crate) blocklist_names: Vec, + /// Flag that determines whether private attributes are considered while parsing the Python code. + #[builder(default = false)] + pub(crate) include_private: bool, + + /// Flag that determines whether to generate code for all dependencies of the target modules. + /// The list of dependent modules is derived from the imports of the target modules. + /// + /// Warning: This feature is not fully supported yet. + #[builder(default = false)] + pub(crate) generate_dependencies: bool, + + /// Flag that suppresses the generation of Python STDOUT while parsing the Python code. + #[builder(default = true)] + pub(crate) suppress_python_stdout: bool, + /// Flag that suppresses the generation of Python STDERR while parsing the Python code. + #[builder(default = true)] + pub(crate) suppress_python_stderr: bool, +} + +impl Default for Config { + fn default() -> Self { + Self::builder().build() + } +} + +impl Config { + pub(crate) fn is_attr_allowed( + &self, + attr_name: &Ident, + attr_module: &Path, + attr_type: &pyo3::types::PyType, + ) -> bool { + if + // Skip always forbidden attribute names + FORBIDDEN_FUNCTION_NAMES.contains(&attr_name.as_py()) || + // Skip private attributes if `include_private` is disabled + (!self.include_private && + (attr_name.as_py().starts_with('_') || + attr_module.iter().any(|segment| segment.as_py().starts_with('_')))) || + // Skip blocklisted attributes + self.blocklist_names.iter().any(|blocklist_match| { + attr_name.as_py() == blocklist_match + }) || + // Skip builtin functions + attr_type.is_subclass_of::().unwrap_or(false) || + // Skip `__future__` attributes + attr_module.iter().any(|segment| segment.as_py() == "__future__") || + // Skip `typing` attributes + attr_module.iter().any(|segment| segment.as_py() == "typing") + { + false + } else { + true + } + } +} diff --git a/pyo3_bindgen_engine/src/lib.rs b/pyo3_bindgen_engine/src/lib.rs index d4400ff..4ccbcb4 100644 --- a/pyo3_bindgen_engine/src/lib.rs +++ b/pyo3_bindgen_engine/src/lib.rs @@ -1,8 +1,19 @@ //! Engine for automatic generation of Rust FFI bindings to Python modules. -pub mod bindgen; -pub mod build_utils; -pub mod types; +mod codegen; +mod config; +mod syntax; +mod typing; +mod utils; -pub use bindgen::{generate_bindings, generate_bindings_for_module, generate_bindings_from_str}; -pub use build_utils::build_bindings; +// Internal re-exports for convenience +use utils::io as io_utils; +use utils::result::Result; + +// Public API re-exports +pub use codegen::Codegen; +pub use config::Config; +pub use utils::{error::PyBindgenError, result::PyBindgenResult}; + +// Public re-export of PyO3 for convenience +pub use pyo3; diff --git a/pyo3_bindgen_engine/src/syntax/class.rs b/pyo3_bindgen_engine/src/syntax/class.rs new file mode 100644 index 0000000..95ff3fe --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/class.rs @@ -0,0 +1,261 @@ +use super::{ + AttributeVariant, Function, FunctionType, Ident, MethodType, Path, Property, PropertyOwner, +}; +use crate::{Config, Result}; +use itertools::Itertools; +use rustc_hash::FxHashMap as HashMap; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Class { + pub name: Path, + // subclasses: Vec, + methods: Vec, + properties: Vec, + docstring: Option, +} + +impl Class { + pub fn parse(cfg: &Config, class: &pyo3::types::PyType, name: Path) -> Result { + let py = class.py(); + + // Initialize lists for all members of the class + // let mut subclasses = Vec::new(); + let mut methods = Vec::new(); + let mut properties = Vec::new(); + + // Extract the list of all attribute names in the module + class + .dir() + .iter() + // Convert each attribute name to an identifier + .map(|attr_name| Ident::from_py(&attr_name.to_string())) + .unique() + // TODO: Try to first access the attribute via __dict__ because Python's descriptor protocol might change the attributes obtained via getattr() + // - For example, classmethod and staticmethod are converted to method/function + // - However, this might also change some of the parsing and it would need to be fixed + // Expand each attribute to a tuple of (attr, attr_name, attr_module, attr_type) + .filter_map(|attr_name| { + if let Ok(attr) = class.getattr(attr_name.as_py()) { + + let attr_module = Path::from_py( + &attr + .getattr(pyo3::intern!(py, "__module__")) + .map(std::string::ToString::to_string) + .unwrap_or_default(), + ); + let attr_type = attr.get_type(); + + Some((attr, attr_name, attr_module, attr_type)) + } else { + eprintln!( + "WARN: Cannot get attribute '{attr_name}' of '{name}' even though it is listed in its `__dir__`. Bindings will not be generated.", + ); + None + } + }) + // Filter attributes based on various configurable conditions + .filter(|(_attr, attr_name, attr_module, attr_type)| { + cfg.is_attr_allowed(attr_name, attr_module, attr_type) + || ["__init__", "__call__"].contains(&attr_name.as_py()) + }) + // Iterate over the remaining attributes and parse them + .try_for_each(|(attr, attr_name, attr_module, attr_type)| { + let attr_name_full = name.join(&attr_name.clone().into()); + match AttributeVariant::determine(py, attr, attr_type, &attr_module, &name, false) + ? + { + AttributeVariant::Import => { + eprintln!("WARN: Imports in classes are not supported: '{name}.{attr_name}'. Bindings will not be generated."); + } + AttributeVariant::Module => { + eprintln!( + "WARN: Submodules in classes are not supported: '{name}.{attr_name}'. Bindings will not be generated.", + ); + } + AttributeVariant::Class => { + // let subclass = + // Self::parse(cfg, attr.downcast()?, attr_name_full)?; + // subclasses.push(subclass); + eprintln!( + "WARN: Subclasses in classes are not supported: '{name}.{attr_name}'. Bindings will not be generated.", + ); + } + AttributeVariant::Function | AttributeVariant::Method => { + let method = Function::parse( + cfg, + attr, + attr_name_full, + FunctionType::Method { + class_path: name.clone(), + typ: match attr_name.as_py() { + "__init__" => MethodType::Constructor, + "__call__" => MethodType::Callable, + _ => MethodType::Unknown, + }, + }, + ) + ?; + methods.push(method); + } + AttributeVariant::Closure => { + eprintln!("WARN: Closures are not supported in classes: '{attr_name}'. Bindings will not be generated."); + } + AttributeVariant::TypeVar => { + eprintln!("WARN: TypesVars are not supported in classes: '{attr_name}'. Bindings will not be generated."); + } + AttributeVariant::Property => { + let property = Property::parse( + cfg, + attr, + attr_name_full, + PropertyOwner::Class, + ) + ?; + properties.push(property); + } + } + Result::Ok(()) + })?; + + // Extract the docstring of the class + let docstring = { + let docstring = class.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + + Ok(Self { + name, + // subclasses, + methods, + properties, + docstring, + }) + } + + pub fn generate( + &self, + cfg: &Config, + local_types: &HashMap, + ) -> Result { + let mut output = proc_macro2::TokenStream::new(); + + // Documentation + if cfg.generate_docs { + if let Some(docstring) = &self.docstring { + // Trim the docstring and add a leading whitespace (looks better in the generated code) + let mut docstring = docstring.trim().trim_end_matches('/').to_owned(); + docstring.insert(0, ' '); + // Replace all double quotes with single quotes + docstring = docstring.replace('"', "'"); + + output.extend(quote::quote! { + #[doc = #docstring] + }); + } + } + + // Generate the struct + let struct_ident: syn::Ident = { + let name = self.name.name(); + if let Ok(ident) = name.try_into() { + ident + } else { + // Sanitize the struct name + let new_name = Ident::from_py(&format!( + "s_{}", + name.as_py().replace(|c: char| !c.is_alphanumeric(), "_") + )); + if let Ok(sanitized_ident) = new_name.clone().try_into() { + eprintln!( + "WARN: Struct '{}' is an invalid Rust ident for a struct name. Renamed to '{}'.", + self.name, self.name.parent().unwrap_or_default().join(&new_name.into()) + ); + sanitized_ident + } else { + eprintln!( + "WARN: Struct '{}' is an invalid Rust ident for a struct name. Renaming failed. Bindings will not be generated.", + self.name + ); + return Ok(proc_macro2::TokenStream::new()); + } + } + }; + output.extend(quote::quote! { + #[repr(transparent)] + pub struct #struct_ident(::pyo3::PyAny); + }); + + // Employ pyo3 macros for native types + // Note: Using these macros is probably not the best idea, but it makes possible wrapping around ::pyo3::PyAny instead of ::pyo3::PyObject, which improves usability + let object_name = self.name.to_py(); + output.extend(quote::quote! { + ::pyo3::pyobject_native_type_named!(#struct_ident); + ::pyo3::pyobject_native_type_info!(#struct_ident, ::pyo3::pyobject_native_static_type_object!(::pyo3::ffi::PyBaseObject_Type), ::std::option::Option::Some(#object_name)); + ::pyo3::pyobject_native_type_extract!(#struct_ident); + }); + + // Get the names of all methods to avoid name clashes + let mut scoped_function_idents = self + .methods + .iter() + .map(|method| method.name.name()) + .collect::>(); + + // Generate the struct implementation block + let mut struct_impl = proc_macro2::TokenStream::new(); + // Methods + struct_impl.extend( + self.methods + .iter() + .map(|method| method.generate(cfg, &scoped_function_idents, local_types)) + .collect::>()?, + ); + // Properties + { + let mut scoped_function_idents_extra = Vec::with_capacity(2); + if self.methods.iter().any(|method| { + matches!( + method.typ, + FunctionType::Method { + typ: MethodType::Constructor, + .. + } + ) + }) { + scoped_function_idents_extra.push(Ident::from_py("new")); + } + if self.methods.iter().any(|method| { + matches!( + method.typ, + FunctionType::Method { + typ: MethodType::Callable, + .. + } + ) + }) { + scoped_function_idents_extra.push(Ident::from_py("call")); + } + scoped_function_idents.extend(scoped_function_idents_extra.iter()); + struct_impl.extend( + self.properties + .iter() + .map(|property| property.generate(cfg, &scoped_function_idents, local_types)) + .collect::>()?, + ); + } + + // Finalize the implementation block of the struct + output.extend(quote::quote! { + #[automatically_derived] + impl #struct_ident { + #struct_impl + } + }); + + Ok(output) + } +} diff --git a/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs b/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs new file mode 100644 index 0000000..70771fa --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/common/attribute_variant.rs @@ -0,0 +1,79 @@ +use crate::{ + syntax::{Ident, Path}, + Result, +}; + +pub enum AttributeVariant { + Import, + Module, + Class, + Function, + Method, + Closure, + TypeVar, + Property, +} + +impl AttributeVariant { + pub fn determine( + py: pyo3::prelude::Python, + attr: &pyo3::prelude::PyAny, + attr_type: &pyo3::types::PyType, + attr_module: &Path, + owner_name: &Path, + consider_import: bool, + ) -> Result { + let inspect = py.import("inspect")?; + + // Get the name and module of the attribute type + let attr_type_name = Ident::from_py(attr_type.name().unwrap_or_default()); + let attr_type_module = Path::from_py( + &attr_type + .getattr(pyo3::intern!(py, "__module__")) + .map(std::string::ToString::to_string) + .unwrap_or_default(), + ); + + // Determine the type of the attribute + let is_submodule = attr_type + .is_subclass_of::() + .unwrap_or(false); + let is_class = attr_type + .is_subclass_of::() + .unwrap_or(false); + let is_function = inspect + .call_method1(pyo3::intern!(py, "isfunction"), (attr,))? + .is_true()?; + let is_method = inspect + .call_method1(pyo3::intern!(py, "ismethod"), (attr,))? + .is_true()?; + let is_closure = + attr_type_module.to_py().as_str() == "functools" && attr_type_name.as_py() == "partial"; + let is_type = ["typing", "types"].contains(&attr_type_module.to_py().as_str()); + + // Some decorators might make a class look external, but they tend to include "" in their name + let is_in_locals = attr.to_string().contains(""); + + // Determine if the attribute is imported + let is_external = !is_in_locals && (attr_module != owner_name); + let is_imported = is_external && (is_submodule || is_class || is_function || is_method); + + Ok(if consider_import && is_imported { + AttributeVariant::Import + } else if is_submodule { + AttributeVariant::Module + } else if is_class { + AttributeVariant::Class + } else if is_function { + AttributeVariant::Function + } else if is_method { + AttributeVariant::Method + } else if is_closure { + AttributeVariant::Closure + } else if is_type { + AttributeVariant::TypeVar + } else { + AttributeVariant::Property + }) + } +} diff --git a/pyo3_bindgen_engine/src/syntax/common/ident.rs b/pyo3_bindgen_engine/src/syntax/common/ident.rs new file mode 100644 index 0000000..b6fda6a --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/common/ident.rs @@ -0,0 +1,112 @@ +#[repr(transparent)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Ident(String); + +impl Ident { + pub fn from_rs(value: &str) -> Self { + debug_assert!(!value.is_empty()); + Self(value.to_owned()) + } + + pub fn from_py(value: &str) -> Self { + debug_assert!(!value.is_empty()); + Self(Self::py_to_rs(value)) + } + + pub fn into_rs(self) -> String { + self.0 + } + + pub fn as_rs(&self) -> &str { + &self.0 + } + + pub fn as_py(&self) -> &str { + Self::rs_as_py(&self.0) + } + + fn rs_as_py(value: &str) -> &str { + value.strip_prefix("r#").unwrap_or(value) + } + + fn py_to_rs(value: &str) -> String { + if syn::parse_str::(value).is_ok() { + value.to_owned() + } else { + format!("r#{value}") + } + } +} + +impl TryFrom for syn::Ident { + type Error = syn::Error; + fn try_from(value: Ident) -> Result { + syn::parse_str::(&value.into_rs()) + } +} + +impl TryFrom<&Ident> for syn::Ident { + type Error = syn::Error; + fn try_from(value: &Ident) -> Result { + syn::parse_str::(value.as_rs()) + } +} + +impl std::cmp::PartialOrd for Ident { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl std::cmp::Ord for Ident { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.as_py().cmp(other.as_py()) + } +} + +impl std::ops::Deref for Ident { + type Target = str; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl std::fmt::Display for Ident { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.as_py()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_from_rs() { + let ident = Ident::from_rs("ident"); + assert_eq!(ident.as_rs(), "ident"); + assert_eq!(ident.as_py(), "ident"); + assert_eq!(ident.into_rs(), "ident"); + } + + #[test] + fn test_from_py() { + let ident = Ident::from_py("ident"); + assert_eq!(ident.as_rs(), "ident"); + assert_eq!(ident.as_py(), "ident"); + } + + #[test] + fn test_from_py_keyword() { + let ident = Ident::from_py("struct"); + assert_eq!(ident.as_rs(), "r#struct"); + assert_eq!(ident.as_py(), "struct"); + } + + #[test] + fn test_into_syn() { + let ident = Ident::from_rs("ident"); + let _syn_ident: syn::Ident = (&ident).try_into().unwrap(); + let _syn_ident: syn::Ident = ident.try_into().unwrap(); + } +} diff --git a/pyo3_bindgen_engine/src/syntax/common/mod.rs b/pyo3_bindgen_engine/src/syntax/common/mod.rs new file mode 100644 index 0000000..42a109c --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/common/mod.rs @@ -0,0 +1,7 @@ +pub(crate) mod attribute_variant; +pub(crate) mod ident; +pub(crate) mod path; + +pub use attribute_variant::AttributeVariant; +pub use ident::Ident; +pub use path::Path; diff --git a/pyo3_bindgen_engine/src/syntax/common/path.rs b/pyo3_bindgen_engine/src/syntax/common/path.rs new file mode 100644 index 0000000..175df8a --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/common/path.rs @@ -0,0 +1,354 @@ +use super::Ident; +use itertools::Itertools; + +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)] +pub struct Path { + pub leading_colon: bool, + segments: Vec, +} + +impl Path { + pub fn from_rs(value: &str) -> Self { + if value.is_empty() { + return Self::default(); + } + debug_assert!(!value.contains('.'), "Invalid Rust path: {value}"); + Self { + leading_colon: value.starts_with("::"), + segments: value + .split("::") + .filter(|s| !s.is_empty()) + .map(Ident::from_rs) + .collect(), + } + } + + pub fn from_py(value: &str) -> Self { + if value.is_empty() { + return Self::default(); + } + debug_assert!(!value.contains("::"), "Invalid Python path: {value}"); + Self { + leading_colon: false, + segments: std::iter::repeat(Ident::from_rs("super")) + .take(value.chars().take_while(|&c| c == '.').count()) + .chain( + value + .split('.') + .filter(|s| !s.is_empty()) + .map(Ident::from_py), + ) + .collect_vec(), + } + } + + pub fn into_rs(self) -> String { + std::iter::repeat(String::new()) + .take(usize::from(self.leading_colon)) + .chain(self.segments.into_iter().map(Ident::into_rs)) + .collect_vec() + .join("::") + } + + pub fn to_rs(&self) -> String { + std::iter::repeat("") + .take(usize::from(self.leading_colon)) + .chain(self.segments.iter().map(Ident::as_rs)) + .collect_vec() + .join("::") + } + + pub fn to_py(&self) -> String { + self.segments + .iter() + .map(Ident::as_py) + .map(|s| if s == "super" { "" } else { s }) + .collect_vec() + .join(".") + } + + pub fn join(&self, other: &Path) -> Self { + assert!( + !other.leading_colon, + "Leading colon is not allowed in the second path when joining" + ); + Self { + leading_colon: self.leading_colon, + segments: self + .segments + .iter() + .cloned() + .chain(other.iter().cloned()) + .collect(), + } + } + + pub fn concat(&self, other: &Path) -> Self { + assert!( + !other.leading_colon, + "Leading colon is not allowed in the second path when concatenating" + ); + Self { + leading_colon: self.leading_colon, + segments: self + .segments + .iter() + .chain(&other.segments) + .cloned() + .collect(), + } + } + + pub fn name(&self) -> &Ident { + self.segments.last().unwrap() + } + + pub fn root(&self) -> Option { + if self.segments.is_empty() { + None + } else { + Some(Self { + leading_colon: self.leading_colon, + segments: vec![self.segments[0].clone()], + }) + } + } + + pub fn parent(&self) -> Option { + if self.segments.len() > 1 { + Some(Self { + leading_colon: self.leading_colon, + segments: self.segments[..self.segments.len() - 1].to_vec(), + }) + } else { + None + } + } + + /// Define a fully qualified path from self to target. + /// Use self if they start at the same point. + /// Use super to go up the hierarchy. + /// If they do not share any common prefix, use super until the nothing is reached + pub fn relative_to(&self, target: &Path, fully_unambiguous: bool) -> Self { + if self == target { + return if fully_unambiguous { + Path { + leading_colon: false, + segments: vec![Ident::from_rs("super"), target.name().clone()], + } + } else { + Path { + leading_colon: false, + segments: vec![Ident::from_rs("self")], + } + }; + } + + // Find the length of the common prefix + let common_prefix_length = self + .segments + .iter() + .zip(target.segments.iter()) + .take_while(|(a, b)| a == b) + .count(); + + // Determine the relative path + let mut relative_segments = if fully_unambiguous { + match common_prefix_length { + n if n < self.segments.len() => std::iter::repeat(Ident::from_rs("super")) + .take(self.segments.len() - n) + .chain(target.segments.iter().skip(n).cloned()) + .collect_vec(), + n if n == self.segments.len() => std::iter::once(Ident::from_rs("self")) + .chain(target.segments.iter().skip(n).cloned()) + .collect_vec(), + _ => { + unreachable!() + } + } + } else { + match common_prefix_length { + n if n < self.segments.len() => std::iter::repeat(Ident::from_rs("super")) + .take(self.segments.len() - n) + .chain(target.segments.iter().skip(n).cloned()) + .collect_vec(), + n if n == self.segments.len() => { + target.segments.iter().skip(n).cloned().collect_vec() + } + _ => { + unreachable!() + } + } + }; + + if fully_unambiguous { + // If the relative segment ends with "super", fully specify the path by adding another "super" and the name of the target + if relative_segments.last().map(Ident::as_rs) == Some("super") { + relative_segments.extend([Ident::from_rs("super"), target.name().clone()]); + } + } + + Path { + leading_colon: false, + segments: relative_segments, + } + } + + pub fn import_quote(&self, py: pyo3::marker::Python) -> proc_macro2::TokenStream { + // Find the last package and import it via py.import, then get the rest of the path via getattr() + let mut package_path = self.root().unwrap_or_else(|| unreachable!()); + for i in (1..self.len()).rev() { + let module_name = Self::from(&self[..i]); + if py.import(module_name.to_py().as_str()).is_ok() { + package_path = module_name; + break; + } + } + + // Resolve the remaining path + let remaining_path = self + .strip_prefix(package_path.segments.as_slice()) + .unwrap_or_else(|| unreachable!()); + + // Convert paths to strings + let package_path = package_path.to_py(); + let remaining_path = remaining_path + .iter() + .map(|ident| ident.as_py().to_owned()) + .collect_vec(); + + // Generate the import code + quote::quote! { + py.import(::pyo3::intern!(py, #package_path))?#(.getattr(::pyo3::intern!(py, #remaining_path))?)* + } + } +} + +impl From for Path { + fn from(ident: Ident) -> Self { + Self { + leading_colon: false, + segments: vec![ident], + } + } +} + +impl From<&[Ident]> for Path { + fn from(segments: &[Ident]) -> Self { + Self { + leading_colon: false, + segments: segments.to_owned(), + } + } +} + +impl std::cmp::PartialOrd for Path { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl std::cmp::Ord for Path { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.to_py().cmp(&other.to_py()) + } +} + +impl TryFrom for syn::Path { + type Error = syn::Error; + fn try_from(value: Path) -> Result { + syn::parse_str::(&value.into_rs()) + } +} + +impl TryFrom<&Path> for syn::Path { + type Error = syn::Error; + fn try_from(value: &Path) -> Result { + syn::parse_str::(&value.to_rs()) + } +} + +impl std::ops::Deref for Path { + type Target = [Ident]; + fn deref(&self) -> &Self::Target { + &self.segments + } +} + +impl std::ops::DerefMut for Path { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.segments + } +} + +impl std::fmt::Display for Path { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.to_py()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_from_rs() { + let path = Path::from_rs("long::path::to"); + assert_eq!(path.to_rs(), "long::path::to"); + assert_eq!(path.to_py(), "long.path.to"); + assert_eq!(path.into_rs(), "long::path::to"); + } + + #[test] + fn test_from_rs_leading_colon() { + let path = Path::from_rs("::long::path::to"); + assert_eq!(path.to_rs(), "::long::path::to"); + assert_eq!(path.to_py(), "long.path.to"); + } + + #[test] + fn test_from_py() { + let path = Path::from_py("long.path.to"); + assert_eq!(path.to_py(), "long.path.to"); + assert_eq!(path.to_rs(), "long::path::to"); + } + + #[test] + fn test_from_py_relative() { + let path = Path::from_py("..long.path.to"); + assert_eq!(path.to_py(), "..long.path.to"); + assert_eq!(path.to_rs(), "super::super::long::path::to"); + } + + #[test] + fn test_from_py_keyword() { + let path = Path::from_py("mod.struct"); + assert_eq!(path.to_py(), "mod.struct"); + assert_eq!(path.to_rs(), "r#mod::r#struct"); + } + + #[test] + fn test_name() { + let path = Path::from_rs("long::path::to"); + assert_eq!(path.name().as_rs(), "to"); + } + + #[test] + fn test_root() { + let path = Path::from_rs("long::path::to"); + assert_eq!(path.root().unwrap().to_rs(), "long"); + } + + #[test] + fn test_parent() { + let path = Path::from_rs("long::path::to"); + assert_eq!(path.parent().unwrap().to_rs(), "long::path"); + } + + #[test] + fn test_into_syn() { + let path = Path::from_rs("long::path::to"); + let _syn_path: syn::Path = (&path).try_into().unwrap(); + let _syn_path: syn::Path = path.try_into().unwrap(); + } +} diff --git a/pyo3_bindgen_engine/src/syntax/function.rs b/pyo3_bindgen_engine/src/syntax/function.rs new file mode 100644 index 0000000..93ef5d2 --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/function.rs @@ -0,0 +1,702 @@ +use super::{Ident, Path}; +use crate::{typing::Type, Config, Result}; +use itertools::Itertools; +use pyo3::{types::IntoPyDict, ToPyObject}; +use rustc_hash::FxHashMap as HashMap; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Function { + pub name: Path, + pub typ: FunctionType, + parameters: Vec, + return_annotation: Type, + docstring: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum FunctionType { + Function, + Method { class_path: Path, typ: MethodType }, + Closure, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum MethodType { + InstanceMethod, + ClassMethod, + StaticMethod, + Constructor, + Callable, + Unknown, +} + +impl Function { + pub fn parse( + _cfg: &Config, + function: &pyo3::types::PyAny, + name: Path, + mut typ: FunctionType, + ) -> Result { + let py = function.py(); + + // Extract the docstring of the function + let docstring = { + let docstring = function.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + + // Extract the signature of the function + if let Ok(function_signature) = py + .import(pyo3::intern!(py, "inspect"))? + .call_method1(pyo3::intern!(py, "signature"), (function,)) + { + // Extract the parameters of the function + let mut parameters = function_signature + .getattr(pyo3::intern!(py, "parameters"))? + .call_method0(pyo3::intern!(py, "values"))? + .iter()? + .map(|param| { + let param = param?; + + let name = + Ident::from_py(¶m.getattr(pyo3::intern!(py, "name"))?.to_string()); + let kind = ParameterKind::from( + param.getattr(pyo3::intern!(py, "kind"))?.extract::()?, + ); + let annotation = match kind { + ParameterKind::VarPositional => Type::PyTuple(vec![Type::Unknown]), + ParameterKind::VarKeyword => Type::PyDict { + key_type: Box::new(Type::Unknown), + value_type: Box::new(Type::Unknown), + }, + _ => { + let annotation = param.getattr(pyo3::intern!(py, "annotation"))?; + if annotation.is(param.getattr(pyo3::intern!(py, "empty"))?) { + Type::Unknown + } else { + annotation.try_into()? + } + } + }; + + let default = { + let default = param.getattr(pyo3::intern!(py, "default"))?; + if default.is(param.getattr(pyo3::intern!(py, "empty"))?) { + None + } else { + Some(default.to_object(py)) + } + }; + + Result::Ok(Parameter { + name, + kind, + annotation, + default, + }) + }) + .collect::>>()?; + + // Retain only used parameters (discard unused `_` parameters) + parameters.retain(|param| param.name.as_rs() != "r#_"); + + // Extract the return annotation of the function + let return_annotation = { + let return_annotation = + function_signature.getattr(pyo3::intern!(py, "return_annotation"))?; + if return_annotation.is(function_signature.getattr(pyo3::intern!(py, "empty"))?) { + Type::Unknown + } else { + return_annotation.try_into()? + } + }; + + // If marked as an unknown method, try to infer the method type + match &typ { + FunctionType::Method { + class_path, + typ: method_typ, + } if *method_typ == MethodType::Unknown => { + // Get the class object from its class path + let class = py + .import( + class_path + .root() + .unwrap_or_else(|| unreachable!()) + .to_py() + .as_str(), + ) + .and_then(|root_module| { + class_path.iter().skip(1).try_fold( + root_module.extract::<&pyo3::types::PyAny>()?, + |module, name| module.getattr(name.as_py()), + ) + }); + + // Try to get the static object of the method (from __dict__), which still contains information about what kind of method it is + if let Ok(static_fn_obj) = class.and_then(|class| { + class + .getattr(pyo3::intern!(py, "__dict__"))? + .get_item(name.name().as_py()) + }) { + let locals = [("obj", static_fn_obj)].into_py_dict(py); + let method_type = if py + .eval("isinstance(obj, classmethod)", None, Some(locals))? + .is_true()? + { + MethodType::ClassMethod + } else if py + .eval("isinstance(obj, staticmethod)", None, Some(locals))? + .is_true()? + { + MethodType::StaticMethod + } else { + MethodType::InstanceMethod + }; + typ = FunctionType::Method { + class_path: class_path.clone(), + typ: method_type, + }; + } else { + // Cannot determine the method type, default to static method (will be changed to instance method if the first parameter is named 'self') + typ = FunctionType::Method { + class_path: class_path.clone(), + typ: MethodType::StaticMethod, + }; + } + } + _ => {} + }; + + // As a final step in determining the method type, check parameters for all non-instance/callable methods + // Note: This is not 100% reliable, because Python does not enforce the first parameter to be named "self" + // TODO: See if there is a better way to infer the method type from parameters alone + match &typ { + FunctionType::Method { + typ: MethodType::InstanceMethod | MethodType::Constructor | MethodType::Callable, + .. + } => {} + FunctionType::Method { class_path, typ: _ } => { + if parameters.first().map(|p| p.name.as_rs()) == Some("r#self") { + typ = FunctionType::Method { + class_path: class_path.clone(), + typ: MethodType::InstanceMethod, + }; + } + } + FunctionType::Function | FunctionType::Closure => { + if parameters.first().map(|p| p.name.as_rs()) == Some("r#self") { + if [ + ParameterKind::PositionalOnly, + ParameterKind::PositionalOrKeyword, + ] + .contains(¶meters[0].kind) + { + eprintln!( + "WARN: Function '{name}' has the first parameter named 'self', but is not marked as a method. The parameter is renamed to '__unknown_self__'." + ); + parameters[0].name = Ident::from_rs("__unknown_self__"); + parameters[0].annotation = Type::Unknown; + } else { + eprintln!( + "WARN: Function '{name}' has the first parameter named 'self', but is not marked as a method. All parameters are replaced with '*args' and '**kwargs'." + ); + parameters = vec![ + Parameter { + name: Ident::from_rs("args"), + kind: ParameterKind::VarPositional, + annotation: Type::PyTuple(vec![Type::Unknown]), + default: None, + }, + Parameter { + name: Ident::from_rs("kwargs"), + kind: ParameterKind::VarKeyword, + annotation: Type::PyDict { + key_type: Box::new(Type::Unknown), + value_type: Box::new(Type::Unknown), + }, + default: None, + }, + ]; + } + } + } + }; + + // Hack: Reassign InstanceMethod with no parameter to StaticMethod + // This should not be necessary as every InstanceMethod should have at least one parameter (self), but it does for certain complex Python modules + if let FunctionType::Method { + typ: MethodType::InstanceMethod, + .. + } = &typ + { + if parameters.is_empty() { + eprintln!( + "WARN: Method '{name}' is marked as an instance method, but has no parameters. Changed to static method.", + ); + typ = FunctionType::Method { + class_path: name.clone(), + typ: MethodType::StaticMethod, + }; + } + }; + + // Skip the first parameter if it's an instance method (or `__init__`/`__call__`) + if let FunctionType::Method { + typ: MethodType::InstanceMethod | MethodType::Constructor | MethodType::Callable, + .. + } = typ + { + parameters.remove(0); + }; + + // If any of the parameters is still called 'self', do not handle the parameters + if parameters + .iter() + .any(|param| param.name.as_rs() == "r#self") + { + eprintln!( + "WARN: Method '{name}' has a non-first parameter named 'self'. All parameters are replaced with '*args' and '**kwargs'.", + + ); + parameters = vec![ + Parameter { + name: Ident::from_rs("args"), + kind: ParameterKind::VarPositional, + annotation: Type::PyTuple(vec![Type::Unknown]), + default: None, + }, + Parameter { + name: Ident::from_rs("kwargs"), + kind: ParameterKind::VarKeyword, + annotation: Type::PyDict { + key_type: Box::new(Type::Unknown), + value_type: Box::new(Type::Unknown), + }, + default: None, + }, + ]; + } + + Ok(Self { + name, + typ, + parameters, + return_annotation, + docstring, + }) + } else { + Ok(Self { + name, + typ, + parameters: vec![ + Parameter { + name: Ident::from_rs("args"), + kind: ParameterKind::VarPositional, + annotation: Type::PyTuple(vec![Type::Unknown]), + default: None, + }, + Parameter { + name: Ident::from_rs("kwargs"), + kind: ParameterKind::VarKeyword, + annotation: Type::PyDict { + key_type: Box::new(Type::Unknown), + value_type: Box::new(Type::Unknown), + }, + default: None, + }, + ], + return_annotation: Type::Unknown, + docstring, + }) + } + } + + pub fn generate( + &self, + cfg: &Config, + scoped_function_idents: &[&Ident], + local_types: &HashMap, + ) -> Result { + let mut output = proc_macro2::TokenStream::new(); + + // Documentation + if cfg.generate_docs { + if let Some(docstring) = &self.docstring { + // Trim the docstring and add a leading whitespace (looks better in the generated code) + let mut docstring = docstring.trim().trim_end_matches('/').to_owned(); + docstring.insert(0, ' '); + // Replace all double quotes with single quotes + docstring = docstring.replace('"', "'"); + + output.extend(quote::quote! { + #[doc = #docstring] + }); + } + } + + // Function signature + let function_ident: syn::Ident = { + let name = self.name.name(); + if let Ok(ident) = name.try_into() { + if crate::config::FORBIDDEN_FUNCTION_NAMES.contains(&name.as_py()) { + return Ok(proc_macro2::TokenStream::new()); + } else { + ident + } + } else { + // Sanitize the function name + let new_name = Ident::from_py(&format!( + "f_{}", + name.as_py().replace(|c: char| !c.is_alphanumeric(), "_") + )); + if let Ok(sanitized_ident) = new_name.clone().try_into() { + eprintln!( + "WARN: Function '{}' is an invalid Rust ident for a function name. Renamed to '{}'.", + self.name, self.name.parent().unwrap_or_default().join(&new_name.into()) + ); + sanitized_ident + } else { + eprintln!( + "WARN: Function '{}' is an invalid Rust ident for a function name. Renaming failed. Bindings will not be generated.", + self.name + ); + return Ok(proc_macro2::TokenStream::new()); + } + } + }; + let param_idents: Vec = self + .parameters + .iter() + .map(|param| Ok(Ident::from_py(&format!("p_{}", param.name)).try_into()?)) + .collect::>>()?; + // Pre-process parameters that require it + let param_preprocessing: proc_macro2::TokenStream = self + .parameters + .iter() + .zip(param_idents.iter()) + .map(|(param, param_ident)| { + param + .annotation + .preprocess_borrowed(param_ident, local_types) + }) + .collect(); + let param_types: Vec = self + .parameters + .iter() + .map(|param| Result::Ok(param.annotation.clone().into_rs_borrowed(local_types))) + .collect::>>()?; + let return_type = self.return_annotation.clone().into_rs_owned(local_types); + output.extend(match &self.typ { + FunctionType::Method { + typ: MethodType::InstanceMethod, + .. + } => { + quote::quote! { + pub fn #function_ident<'py>( + &'py self, + py: ::pyo3::marker::Python<'py>, + #(#param_idents: #param_types),* + ) -> ::pyo3::PyResult<#return_type> + } + } + FunctionType::Method { + typ: MethodType::Callable, + .. + } => { + let call_fn_ident: syn::Ident = { + let mut i = 0; + loop { + let ident = Ident::from_py(&format!( + "call{}", + (i > 0).then(|| i.to_string()).unwrap_or_default() + )); + if !scoped_function_idents.contains(&&ident) { + break ident; + } + i += 1; + } + } + .try_into()?; + quote::quote! { + pub fn #call_fn_ident<'py>( + &'py self, + py: ::pyo3::marker::Python<'py>, + #(#param_idents: #param_types),* + ) -> ::pyo3::PyResult<#return_type> + } + } + FunctionType::Method { + typ: MethodType::Constructor, + .. + } => { + let new_fn_ident: syn::Ident = { + let mut i = 0; + loop { + let ident = Ident::from_py(&format!( + "new{}", + (i > 0).then(|| i.to_string()).unwrap_or_default() + )); + if !scoped_function_idents.contains(&&ident) { + break ident; + } + i += 1; + } + } + .try_into()?; + quote::quote! { + pub fn #new_fn_ident<'py>( + py: ::pyo3::marker::Python<'py>, + #(#param_idents: #param_types),* + ) -> ::pyo3::PyResult<&'py Self> + } + } + _ => { + quote::quote! { + pub fn #function_ident<'py>( + py: ::pyo3::marker::Python<'py>, + #(#param_idents: #param_types),* + ) -> ::pyo3::PyResult<#return_type> + } + } + }); + + // Function body (function dispatcher) + let function_dispatcher = match &self.typ { + FunctionType::Function | FunctionType::Closure => pyo3::Python::with_gil(|py| { + self.name + .parent() + .unwrap_or_else(|| unreachable!()) + .import_quote(py) + }), + FunctionType::Method { + class_path, + typ: MethodType::ClassMethod | MethodType::StaticMethod | MethodType::Constructor, + } => pyo3::Python::with_gil(|py| class_path.import_quote(py)), + FunctionType::Method { + typ: MethodType::InstanceMethod | MethodType::Callable, + .. + } => { + quote::quote! { + self.0 + } + } + FunctionType::Method { + typ: MethodType::Unknown, + .. + } => { + eprintln!( + "WARN: Method '{}' has an unknown type. Bindings will not be generated.", + self.name + ); + return Ok(proc_macro2::TokenStream::new()); + } + }; + + // Function body: positional args + let positional_args_idents: Vec = self + .parameters + .iter() + .filter(|param| { + [ + ParameterKind::PositionalOnly, + ParameterKind::PositionalOrKeyword, + ] + .contains(¶m.kind) + }) + .map(|param| Ok(Ident::from_py(&format!("p_{}", param.name)).try_into()?)) + .collect::>()?; + let var_positional_args_ident: Option = self + .parameters + .iter() + .find(|param| param.kind == ParameterKind::VarPositional) + .and_then(|param| Ident::from_py(&format!("p_{}", param.name)).try_into().ok()); + let has_positional_args = + !positional_args_idents.is_empty() || var_positional_args_ident.is_some(); + let positional_args = if let Some(var_positional_args_ident) = var_positional_args_ident { + if positional_args_idents.is_empty() { + quote::quote! { + #var_positional_args_ident + } + } else { + let n_args_fixed = positional_args_idents.len(); + // TODO: The reference here might be incorrect (&#positional_args_idents could cause double reference) - check + quote::quote! { + { + let mut __internal__args = Vec::with_capacity(#n_args_fixed + #var_positional_args_ident.len()); + __internal__args.extend([#(::pyo3::ToPyObject::to_object(&#positional_args_idents, py),)*]); + __internal__args.extend(#var_positional_args_ident.iter().map(|__internal__arg| ::pyo3::ToPyObject::to_object(__internal__arg, py))); + ::pyo3::types::PyTuple::new( + py, + __internal__args, + ) + } + } + } + } else if positional_args_idents.is_empty() { + quote::quote! { + () + } + } else { + // TODO: The reference here might be incorrect (&#positional_args_idents could cause double reference) - check + quote::quote! { + ::pyo3::types::PyTuple::new( + py, + [#(::pyo3::ToPyObject::to_object(&#positional_args_idents, py),)*], + ) + } + }; + // Function body: keyword args + let keyword_args: Vec<&Parameter> = self + .parameters + .iter() + .filter(|param| [ParameterKind::KeywordOnly].contains(¶m.kind)) + .collect_vec(); + let keyword_args_names: Vec<&str> = keyword_args + .iter() + .map(|param| param.name.as_py()) + .collect(); + let keyword_args_idents: Vec = keyword_args + .iter() + .map(|param| Ok(Ident::from_py(&format!("p_{}", param.name)).try_into()?)) + .collect::>()?; + let var_keyword_args_ident: Option = self + .parameters + .iter() + .find(|param| param.kind == ParameterKind::VarKeyword) + .and_then(|param| Ident::from_py(&format!("p_{}", param.name)).try_into().ok()); + let has_keyword_args = !keyword_args_idents.is_empty() || var_keyword_args_ident.is_some(); + let keyword_args = if let Some(var_keyword_args_ident) = var_keyword_args_ident { + if keyword_args_idents.is_empty() { + quote::quote! { + #var_keyword_args_ident + } + } else { + quote::quote! { + { + let __internal__kwargs = #var_keyword_args_ident; + #( + __internal__kwargs.set_item(::pyo3::intern!(py, #keyword_args_names), #keyword_args_idents); + )* + __internal__kwargs + } + } + } + } else if keyword_args_idents.is_empty() { + quote::quote! { + ::pyo3::types::PyDict::new(py) + } + } else { + quote::quote! { + { + let __internal__kwargs = ::pyo3::types::PyDict::new(py); + #( + __internal__kwargs.set_item(::pyo3::intern!(py, #keyword_args_names), #keyword_args_idents); + )* + __internal__kwargs + } + } + }; + // Function body: call + let call = if let FunctionType::Method { + typ: MethodType::Constructor | MethodType::Callable, + .. + } = &self.typ + { + if has_keyword_args { + quote::quote! { + call(#positional_args, Some(#keyword_args)) + } + } else if has_positional_args { + quote::quote! { + call1(#positional_args) + } + } else { + quote::quote! { + call0() + } + } + } else { + let method_name = self.name.name().as_py(); + if has_keyword_args { + quote::quote! { + call_method(::pyo3::intern!(py, #method_name), #positional_args, Some(#keyword_args)) + } + } else if has_positional_args { + quote::quote! { + call_method1(::pyo3::intern!(py, #method_name), #positional_args) + } + } else { + quote::quote! { + call_method0(::pyo3::intern!(py, #method_name)) + } + } + }; + + // Function body + output.extend(quote::quote! { + { + #param_preprocessing + ::pyo3::FromPyObject::extract( + #function_dispatcher.#call? + ) + } + }); + + Ok(output) + } +} + +#[derive(Debug, Clone)] +struct Parameter { + name: Ident, + kind: ParameterKind, + annotation: Type, + default: Option>, +} + +impl PartialEq for Parameter { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + && self.kind == other.kind + && self.annotation == other.annotation + && self.default.is_some() == other.default.is_some() + } +} + +impl Eq for Parameter {} + +impl std::hash::Hash for Parameter { + fn hash(&self, state: &mut H) { + self.name.hash(state); + self.kind.hash(state); + self.annotation.hash(state); + self.default.is_some().hash(state); + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum ParameterKind { + PositionalOnly, + PositionalOrKeyword, + VarPositional, + KeywordOnly, + VarKeyword, +} + +impl From for ParameterKind { + fn from(kind: u8) -> Self { + match kind { + 0 => Self::PositionalOnly, + 1 => Self::PositionalOrKeyword, + 2 => Self::VarPositional, + 3 => Self::KeywordOnly, + 4 => Self::VarKeyword, + _ => unreachable!(), + } + } +} diff --git a/pyo3_bindgen_engine/src/syntax/import.rs b/pyo3_bindgen_engine/src/syntax/import.rs new file mode 100644 index 0000000..a3e3b5b --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/import.rs @@ -0,0 +1,92 @@ +use super::Path; +use crate::{Config, Result}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Import { + pub origin: Path, + pub target: Path, + pub import_type: ImportType, +} + +impl Import { + pub fn new(origin: Path, target: Path) -> Self { + let import_type = ImportType::from_paths(&origin, &target); + Self { + origin, + target, + import_type, + } + } + + pub fn is_external(&self) -> bool { + self.import_type == ImportType::ExternalImport + } + + pub fn generate(&self, _cfg: &Config) -> Result { + // For now, we only generate imports for submodule reexports + if self.import_type != ImportType::SubmoduleReexport { + return Ok(proc_macro2::TokenStream::new()); + } + + // Skip identity imports + if self.origin == self.target { + return Ok(proc_macro2::TokenStream::new()); + } + + // Determine the visibility of the import based on its type + let visibility = match self.import_type { + ImportType::ExternalImport | ImportType::PackageReexport => { + proc_macro2::TokenStream::new() + } + ImportType::SubmoduleReexport => quote::quote! { pub }, + }; + + // Generate the path to the target module + let relative_path: std::result::Result = self + .target + .parent() + .unwrap_or_default() + .relative_to(&self.origin, true) + .try_into(); + if let Ok(relative_path) = relative_path { + // Use alias for the target module if it has a different name than the last segment of its path + let maybe_alias = if self.origin.name() == self.target.name() { + proc_macro2::TokenStream::new() + } else { + let alias: syn::Ident = self.target.name().try_into()?; + quote::quote! { as #alias } + }; + + Ok(quote::quote! { + #visibility use #relative_path #maybe_alias; + }) + } else { + Ok(proc_macro2::TokenStream::new()) + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum ImportType { + ExternalImport, + PackageReexport, + SubmoduleReexport, +} + +impl ImportType { + fn from_paths(origin: &Path, target: &Path) -> Self { + let is_package_reexport = target + .root() + .is_some_and(|root_module| origin.starts_with(&root_module)); + let is_submodule_reexport = is_package_reexport + && target + .parent() + .is_some_and(|parent_module| origin.starts_with(&parent_module)); + match (is_package_reexport, is_submodule_reexport) { + (false, false) => Self::ExternalImport, + (true, false) => Self::PackageReexport, + (true, true) => Self::SubmoduleReexport, + _ => unreachable!(), + } + } +} diff --git a/pyo3_bindgen_engine/src/syntax/mod.rs b/pyo3_bindgen_engine/src/syntax/mod.rs new file mode 100644 index 0000000..2ad8f1e --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/mod.rs @@ -0,0 +1,15 @@ +pub(crate) mod class; +pub(crate) mod common; +pub(crate) mod function; +pub(crate) mod import; +pub(crate) mod module; +pub(crate) mod property; +pub(crate) mod type_var; + +pub use class::Class; +pub use common::{AttributeVariant, Ident, Path}; +pub use function::{Function, FunctionType, MethodType}; +pub use import::Import; +pub use module::Module; +pub use property::{Property, PropertyOwner}; +pub use type_var::TypeVar; diff --git a/pyo3_bindgen_engine/src/syntax/module.rs b/pyo3_bindgen_engine/src/syntax/module.rs new file mode 100644 index 0000000..038cbb7 --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/module.rs @@ -0,0 +1,653 @@ +use super::{ + AttributeVariant, Class, Function, FunctionType, Ident, Import, Path, Property, PropertyOwner, + TypeVar, +}; +use crate::{Config, Result}; +use itertools::Itertools; +use rustc_hash::FxHashSet as HashSet; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Module { + pub name: Path, + pub prelude: Vec, + pub imports: Vec, + pub submodules: Vec, + pub classes: Vec, + pub type_vars: Vec, + pub functions: Vec, + pub properties: Vec, + pub docstring: Option, + pub is_package: bool, +} + +impl Module { + pub fn empty(py: pyo3::Python, name: Path) -> Result { + let module = py.import(name.to_py().as_str())?; + + // Extract the docstring of the module + let docstring = { + let docstring = module.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + + Ok(Self { + name, + prelude: Vec::default(), + imports: Vec::default(), + submodules: Vec::default(), + classes: Vec::default(), + type_vars: Vec::default(), + functions: Vec::default(), + properties: Vec::default(), + docstring, + is_package: true, + }) + } + + pub fn parse(cfg: &Config, module: &pyo3::types::PyModule) -> Result { + let py = module.py(); + + // Extract the name of the module + let name = Path::from_py(module.name()?); + + // Extract the index of the module as prelude (if enabled) + let prelude = if cfg.generate_preludes { + Self::extract_prelude(cfg, module, &name) + } else { + Vec::new() + }; + + // Determine if the module is a package that contains submodules + let is_package = module.hasattr(pyo3::intern!(py, "__path__"))?; + + // Extract the list of all submodules for packages + let mut submodules_to_process = if is_package { + Self::extract_submodules(cfg, module)? + } else { + HashSet::default() + }; + + // Initialize lists for all other members of the module + let mut imports = Vec::new(); + let mut conflicting_imports = Vec::new(); + let mut classes: Vec = Vec::new(); + let mut type_vars = Vec::new(); + let mut functions = Vec::new(); + let mut properties = Vec::new(); + + // Extract the list of all attribute names in the module + module + .dir() + .iter() + // Convert each attribute name to an identifier + .map(|attr_name| Ident::from_py(&attr_name.to_string())) + // Remove duplicates + .unique() + // TODO: Try to first access the attribute via __dict__ because Python's descriptor protocol might change the attributes obtained via getattr() + // - For example, classmethod and staticmethod are converted to method/function + // - However, this might also change some of the parsing and it would need to be fixed + // Expand each attribute to a tuple of (attr, attr_name, attr_module, attr_type) + .filter_map(|attr_name| { + if let Ok(attr) = module.getattr(attr_name.as_py()) { + + let attr_module = Path::from_py( + &attr + .getattr(pyo3::intern!(py, "__module__")) + .map(std::string::ToString::to_string) + .unwrap_or_default(), + ); + let attr_type = attr.get_type(); + + Some((attr, attr_name, attr_module, attr_type)) + } else { + eprintln!( + "WARN: Cannot get attribute '{attr_name}' of '{name}' even though it is listed in its `__dir__`. Bindings will not be generated.", + ); + None + } + }) + // Filter attributes based on various configurable conditions + .filter(|(_attr, attr_name, attr_module, attr_type)| { + cfg.is_attr_allowed(attr_name, attr_module, attr_type) + }) + // Iterate over the remaining attributes and parse them + .try_for_each(|(attr, attr_name, attr_module, attr_type)| { + let attr_name_full = name.join(&attr_name.clone().into()); + match AttributeVariant::determine(py, attr, attr_type, &attr_module, &name, true) + ? + { + AttributeVariant::Import => { + let origin = attr_module.join(&Path::from_py( + &attr + .getattr(pyo3::intern!(py, "__name__")) + .map(std::string::ToString::to_string) + .unwrap_or(attr_name.as_py().to_owned()), + )); + + // Skip if the origin is the same as the target + if origin == attr_name_full { + return Ok(()); + } + + // Make sure the origin attribute is allowed (each segment of the path) + let is_origin_attr_allowed = (0..origin.len()).all(|i| { + let attr_name = &origin[i]; + let attr_module = origin[..i].into(); + let attr_type = if i == origin.len() - 1 { + attr_type + } else { + py.get_type::() + }; + cfg.is_attr_allowed(attr_name, &attr_module, attr_type) + }); + if !is_origin_attr_allowed { + return Ok(()); + } + + // Determine if the import overwrites a submodule + let import_overwrites_submodule = submodules_to_process.contains(&attr_name); + + // Generate the import + let import = Import::new(origin, attr_name_full); + + // Add the import to the appropriate list + if import_overwrites_submodule { + conflicting_imports.push(import); + } else { + imports.push(import); + } + } + AttributeVariant::Module => { + // Note: This should technically not be necessary as `Self::extract_submodules` is supposed to extract all submodules + submodules_to_process.insert(attr_name.clone()); + } + AttributeVariant::Class => { + let class = + Class::parse(cfg, attr.downcast().unwrap_or_else(|_| unreachable!( + "The attribute is known to be a class at this point" + )), attr_name_full)?; + classes.push(class); + } + AttributeVariant::TypeVar => { + let type_var = TypeVar::new(attr_name_full); + type_vars.push(type_var); + } + AttributeVariant::Function => { + let function = + Function::parse(cfg, attr, attr_name_full, FunctionType::Function) + ?; + functions.push(function); + } + AttributeVariant::Method => { + eprintln!("WARN: Methods in modules are not supported: '{name}.{attr_name}'. Bindings will not be generated."); + } + AttributeVariant::Closure => { + let function = + Function::parse(cfg, attr, attr_name_full, FunctionType::Closure) + ?; + functions.push(function); + } + AttributeVariant::Property => { + let property = Property::parse( + cfg, + attr, + attr_name_full, + PropertyOwner::Module, + ) + ?; + properties.push(property); + } + } + Result::Ok(()) + })?; + + // Process submodules + let submodules = if cfg.traverse_submodules { + submodules_to_process + .into_iter() + .filter_map(|submodule_name| { + let full_submodule_name = name.join(&submodule_name.clone().into()); + + // Handle submodules that are overwritten by imports separately + if let Some(conflicting_import) = conflicting_imports + .iter() + .find(|import| import.target == full_submodule_name) + { + if let Ok(submodule) = py + .import(full_submodule_name.to_py().as_str()) + .map_err(crate::PyBindgenError::from) + .and_then(|attr| Ok(attr.downcast::()?)) + .and_then(|module| Self::parse(cfg, module)) + { + // It could be any attribute, so all of them need to be checked + if let Some(mut import) = submodule + .imports + .into_iter() + .find(|import| import.target == conflicting_import.origin) + { + import.target = conflicting_import.target.clone(); + imports.push(import); + } + if let Some(mut class) = submodule + .classes + .into_iter() + .find(|class| class.name == conflicting_import.origin) + { + class.name = conflicting_import.target.clone(); + classes.push(class); + } + if let Some(mut type_var) = submodule + .type_vars + .into_iter() + .find(|type_var| type_var.name == conflicting_import.origin) + { + type_var.name = conflicting_import.target.clone(); + type_vars.push(type_var); + } + if let Some(mut function) = submodule + .functions + .into_iter() + .find(|function| function.name == conflicting_import.origin) + { + function.name = conflicting_import.target.clone(); + functions.push(function); + } + if let Some(mut property) = submodule + .properties + .into_iter() + .find(|property| property.name == conflicting_import.origin) + { + property.name = conflicting_import.target.clone(); + properties.push(property); + } + } + return None; + } + + // Try to import both as a package and as a attribute of the current module + py.import(full_submodule_name.to_py().as_str()) + .or_else(|_| { + module + .getattr(submodule_name.as_py()) + .and_then(|attr| Ok(attr.downcast::()?)) + }) + .ok() + }) + .map(|submodule| Self::parse(cfg, submodule)) + .collect::>()? + } else { + Vec::default() + }; + + // Extract the docstring of the module + let docstring = { + let docstring = module.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + + Ok(Self { + name, + prelude, + imports, + submodules, + classes, + type_vars, + functions, + properties, + docstring, + is_package, + }) + } + + pub fn generate( + &self, + cfg: &Config, + top_level_modules: &[Self], + all_types: &[Path], + ) -> Result { + let mut output = proc_macro2::TokenStream::new(); + + // Extra configuration for top-level modules + let is_top_level = top_level_modules.contains(self); + if is_top_level { + output.extend(quote::quote! { + #[allow( + clippy::all, + clippy::nursery, + clippy::pedantic, + non_camel_case_types, + non_snake_case, + non_upper_case_globals, + unused + )] + }); + } + + // Documentation + if cfg.generate_docs { + if let Some(docstring) = &self.docstring { + // Trim the docstring and add a leading whitespace (looks better in the generated code) + let mut docstring = docstring.trim().trim_end_matches('/').to_owned(); + docstring.insert(0, ' '); + // Replace all double quotes with single quotes + docstring = docstring.replace('"', "'"); + + output.extend(quote::quote! { + #[doc = #docstring] + }); + } + } + + // Get the names of all functions to avoid name clashes + let scoped_function_idents = self + .functions + .iter() + .map(|function| function.name.name()) + .collect::>(); + + // Get all local types mapped to the full path + let local_types = all_types + .iter() + .cloned() + .map(|path| { + let relative_path = self.name.relative_to(&path, false); + (path, relative_path) + }) + .chain(self.imports.iter().flat_map(|import| { + all_types + .iter() + .filter(|&path| path.starts_with(&import.origin)) + .cloned() + .map(|path| { + let imported_path = { + if let Some(stripped_path) = path + .to_py() + .strip_prefix(&format!("{}.", import.origin.to_py())) + { + let mut path = Path::from_py(stripped_path); + // Overwrite the first segment with the target name to support aliasing + path[0] = import.target.name().to_owned(); + path + } else { + import.target.name().to_owned().into() + } + }; + let relative_path = self.name.relative_to(&path, false); + (imported_path, relative_path) + }) + })) + .collect(); + + // Generate the module content + let mut module_content = proc_macro2::TokenStream::new(); + // Imports + if cfg.generate_imports { + module_content.extend( + self.imports + .iter() + .filter(|import| { + top_level_modules + .iter() + .any(|module| module.check_path_exists_recursive(&import.origin, false)) + }) + .map(|import| import.generate(cfg)) + .collect::>()?, + ); + } + // Prelude + if cfg.generate_preludes { + module_content.extend(self.generate_prelude()); + } + // Type variables + if cfg.generate_type_vars { + module_content.extend( + self.type_vars + .iter() + .map(|type_var| type_var.generate(cfg)) + .collect::>()?, + ); + } + // Classes + if cfg.generate_classes { + module_content.extend( + self.classes + .iter() + .map(|class| class.generate(cfg, &local_types)) + .collect::>()?, + ); + } + // Functions + if cfg.generate_functions { + module_content.extend( + self.functions + .iter() + .map(|function| function.generate(cfg, &scoped_function_idents, &local_types)) + .collect::>()?, + ); + } + // Properties + if cfg.generate_properties { + module_content.extend( + self.properties + .iter() + .map(|property| property.generate(cfg, &scoped_function_idents, &local_types)) + .collect::>()?, + ); + } + // Submodules + if cfg.traverse_submodules { + module_content.extend( + self.submodules + .iter() + .map(|module| module.generate(cfg, top_level_modules, all_types)) + .collect::>()?, + ); + } + + // Finalize the module with its content + let module_ident: syn::Ident = self.name.name().try_into()?; + output.extend(quote::quote! { + pub mod #module_ident { + #module_content + } + }); + + Ok(output) + } + + fn extract_submodules(cfg: &Config, module: &pyo3::types::PyModule) -> Result> { + let py = module.py(); + let pkgutil = py.import(pyo3::intern!(py, "pkgutil"))?; + + // Extract the paths of the module + let module_paths = module + .getattr(pyo3::intern!(py, "__path__"))? + .extract::<&pyo3::types::PyList>()? + .iter() + .map(|x| std::path::PathBuf::from(x.to_string())) + .collect_vec(); + + // Extract the names of all submodules via `pkgutil.iter_modules` + let module_name = Path::from_py(module.name()?); + pkgutil + .call_method1(pyo3::intern!(py, "iter_modules"), (module_paths,))? + .iter()? + .map(|submodule| { + Ok(Ident::from_py( + &submodule?.getattr(pyo3::intern!(py, "name"))?.to_string(), + )) + }) + // Filter based on various configurable conditions + .filter(|submodule_name| { + submodule_name.as_ref().is_ok_and(|submodule_name| { + cfg.is_attr_allowed( + submodule_name, + &module_name, + py.get_type::(), + ) + }) + }) + .collect() + } + + fn extract_prelude( + cfg: &Config, + module: &pyo3::types::PyModule, + module_name: &Path, + ) -> Vec { + // Extract the index (__all__) of the module if it exists + let mut index_attr_names = if let Ok(index) = module.index() { + index + .iter() + .map(|x| Ident::from_py(&x.to_string())) + .unique() + .collect() + } else { + Vec::default() + }; + + // Compare the index with public attrs of the module + // Return an empty vector if they are identical (no need to generate a prelude) + { + let public_attr_names_set: HashSet<_> = module + .dir() + .iter() + .map(|attr_name| Ident::from_py(&attr_name.to_string())) + .filter(|attr_name| !attr_name.as_py().starts_with('_')) + .collect(); + let index_attr_names_set: HashSet<_> = index_attr_names.iter().cloned().collect(); + + if index_attr_names_set == public_attr_names_set { + return Vec::new(); + } + } + + // If the generation of dependencies is disabled, retain only reexports + if !cfg.generate_dependencies { + index_attr_names.retain(|attr_name| { + if let Ok(attr) = module.getattr(attr_name.as_py()) { + let is_reexport = module_name.root().is_some_and(|root_module| { + let attr_module = Path::from_py( + &attr + .getattr(pyo3::intern!(module.py(), "__module__")) + .map(std::string::ToString::to_string) + .unwrap_or_default(), + ); + attr_module.starts_with(&root_module) + }); + is_reexport + } else { + false + } + }); + } + + // Retain only allowed attributes + index_attr_names.retain(|attr_name| { + if let Ok(attr) = module.getattr(attr_name.as_py()) { + let attr_type = attr.get_type(); + cfg.is_attr_allowed(attr_name, module_name, attr_type) + } else { + false + } + }); + + index_attr_names + } + + fn generate_prelude(&self) -> Result { + // Skip if the prelude is empty + if self.prelude.is_empty() { + return Ok(proc_macro2::TokenStream::new()); + } + + // Generate the prelude content (re-export all prelude items) + let exports = self + .prelude + .iter() + // Retain only attributes that are within self.modules, self.classes, self.functions, self.type_vars, self.properties + .filter(|&ident| self.check_ident_exists_immediate(ident, false)) + .map(|ident| { + let ident: syn::Ident = ident.try_into()?; + Ok(quote::quote! { + #ident, + }) + }) + .collect::>()?; + + // Return empty prelude if there are no exports + if exports.is_empty() { + return Ok(proc_macro2::TokenStream::new()); + } + + // Finalize the prelude with its content + let prelude_ident: syn::Ident = { + let mut i = 0; + loop { + let ident = Ident::from_py(&format!( + "call{}", + (i > 0).then(|| i.to_string()).unwrap_or_default() + )); + if !self.check_ident_exists_immediate(&ident, true) { + break ident; + } + i += 1; + } + } + .try_into()?; + Ok(quote::quote! { + pub mod #prelude_ident { + pub use super::{#exports}; + } + }) + } + + fn check_path_exists_recursive(&self, path: &Path, consider_imports: bool) -> bool { + (consider_imports && self.imports.iter().any(|import| import.target == *path)) + || self.submodules.iter().any(|module| module.name == *path) + || self.classes.iter().any(|class| class.name == *path) + || self.functions.iter().any(|function| function.name == *path) + || self.type_vars.iter().any(|type_var| type_var.name == *path) + || self + .properties + .iter() + .any(|property| property.name == *path) + || self + .submodules + .iter() + .any(|module| module.check_path_exists_recursive(path, consider_imports)) + } + + fn check_ident_exists_immediate(&self, ident: &Ident, consider_imports: bool) -> bool { + (consider_imports + && self + .imports + .iter() + .any(|import| import.target.name() == ident)) + || self + .submodules + .iter() + .any(|module| module.name.name() == ident) + || self.classes.iter().any(|class| class.name.name() == ident) + || self + .functions + .iter() + .any(|function| function.name.name() == ident) + || self + .type_vars + .iter() + .any(|type_var| type_var.name.name() == ident) + || self + .properties + .iter() + .any(|property| property.name.name() == ident) + } +} diff --git a/pyo3_bindgen_engine/src/syntax/property.rs b/pyo3_bindgen_engine/src/syntax/property.rs new file mode 100644 index 0000000..c564504 --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/property.rs @@ -0,0 +1,328 @@ +use super::{Ident, Path}; +use crate::{typing::Type, Config, Result}; +use rustc_hash::FxHashMap as HashMap; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Property { + pub name: Path, + owner: PropertyOwner, + is_mutable: bool, + annotation: Type, + setter_annotation: Type, + docstring: Option, + setter_docstring: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum PropertyOwner { + Module, + Class, +} + +impl Property { + pub fn parse( + _cfg: &Config, + property: &pyo3::types::PyAny, + name: Path, + owner: PropertyOwner, + ) -> Result { + let py = property.py(); + + // Extract the type of the property + let typ = property.get_type(); + + // Do not extract the docstring of the property, because it would point to the docstring of the type/class itself, not this property + let mut docstring = None; + + // Determine the mutability and type of the property + let (is_mutable, annotation, setter_annotation, mut setter_docstring); + match owner { + PropertyOwner::Module => { + is_mutable = true; + annotation = Type::try_from(typ)?; + setter_annotation = annotation.clone(); + setter_docstring = docstring.clone(); + } + PropertyOwner::Class => { + let signature = py + .import(pyo3::intern!(py, "inspect"))? + .getattr(pyo3::intern!(py, "signature"))?; + + if let Ok(getter) = property.getattr(pyo3::intern!(py, "fget")) { + // Extract the annotation from the return of the function (if available) + if let Ok(function_signature) = signature.call1((getter,)) { + annotation = { + let return_annotation = function_signature + .getattr(pyo3::intern!(py, "return_annotation"))?; + if return_annotation + .is(function_signature.getattr(pyo3::intern!(py, "empty"))?) + { + Type::Unknown + } else { + return_annotation.try_into()? + } + }; + } else { + annotation = Type::try_from(typ)?; + } + + // Update the docstring if it is empty + if docstring.is_none() { + docstring = { + let docstring = + getter.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + } + } else { + annotation = Type::try_from(typ)?; + } + + match property.getattr(pyo3::intern!(py, "fset")) { + Ok(setter) if !setter.is_none() => { + is_mutable = true; + + // Extract the annotation from the parameter of the function (if available) + if let Ok(function_signature) = signature.call1((setter,)) { + setter_annotation = { + let param = function_signature + .getattr(pyo3::intern!(py, "parameters"))? + .call_method0(pyo3::intern!(py, "values"))? + .iter()? + .nth(1) + .unwrap()?; + let annotation = param.getattr(pyo3::intern!(py, "annotation"))?; + if annotation.is(param.getattr(pyo3::intern!(py, "empty"))?) { + Type::Unknown + } else { + annotation.try_into()? + } + }; + } else { + setter_annotation = Type::Unknown; + } + + setter_docstring = { + let docstring = + setter.getattr(pyo3::intern!(py, "__doc__"))?.to_string(); + if docstring.is_empty() || docstring == "None" { + None + } else { + Some(docstring) + } + }; + + if docstring.is_none() { + // Update the getter docstring to match setter docstring if it is still empty + docstring = setter_docstring.clone(); + } else if setter_docstring.is_none() { + // Otherwise, update the setter docstring to match the getter docstring if it is still empty + setter_docstring = docstring.clone(); + } + } + _ => { + is_mutable = false; + setter_annotation = Type::Unknown; + setter_docstring = None; + } + } + } + } + + Ok(Self { + name, + owner, + is_mutable, + annotation, + setter_annotation, + docstring, + setter_docstring, + }) + } + + pub fn generate( + &self, + cfg: &Config, + scoped_function_idents: &[&Ident], + local_types: &HashMap, + ) -> Result { + let mut output = proc_macro2::TokenStream::new(); + + // Getter + output.extend(self.generate_getter(cfg, scoped_function_idents, local_types)?); + + // Setter (if mutable) + if self.is_mutable { + output.extend(self.generate_setter(cfg, scoped_function_idents, local_types)?); + } + + Ok(output) + } + + pub fn generate_getter( + &self, + cfg: &Config, + scoped_function_idents: &[&Ident], + local_types: &HashMap, + ) -> Result { + let mut output = proc_macro2::TokenStream::new(); + + // Documentation + if cfg.generate_docs { + if let Some(docstring) = &self.docstring { + // Trim the docstring and add a leading whitespace (looks better in the generated code) + let mut docstring = docstring.trim().trim_end_matches('/').to_owned(); + docstring.insert(0, ' '); + // Replace all double quotes with single quotes + docstring = docstring.replace('"', "'"); + + output.extend(quote::quote! { + #[doc = #docstring] + }); + } + } + + // Function + let function_ident: syn::Ident = { + let name = self.name.name(); + if let Ok(ident) = name.try_into() { + if scoped_function_idents.contains(&name) + || crate::config::FORBIDDEN_FUNCTION_NAMES.contains(&name.as_py()) + { + let getter_name = Ident::from_py(&format!("get_{}", name.as_py())); + if scoped_function_idents.contains(&&getter_name) + || crate::config::FORBIDDEN_FUNCTION_NAMES.contains(&getter_name.as_py()) + { + return Ok(proc_macro2::TokenStream::new()); + } else { + getter_name.try_into()? + } + } else { + ident + } + } else { + let getter_name = Ident::from_py(&format!("get_{}", name.as_py())); + if scoped_function_idents.contains(&&getter_name) + || crate::config::FORBIDDEN_FUNCTION_NAMES.contains(&getter_name.as_py()) + { + return Ok(proc_macro2::TokenStream::new()); + } else { + getter_name.try_into()? + } + } + }; + let param_name = self.name.name().as_py(); + let param_type = self.annotation.clone().into_rs_owned(local_types); + match &self.owner { + PropertyOwner::Module => { + let import = pyo3::Python::with_gil(|py| { + self.name + .parent() + .unwrap_or_else(|| unreachable!()) + .import_quote(py) + }); + output.extend(quote::quote! { + pub fn #function_ident<'py>( + py: ::pyo3::marker::Python<'py>, + ) -> ::pyo3::PyResult<#param_type> { + ::pyo3::FromPyObject::extract( + #import.getattr(::pyo3::intern!(py, #param_name))? + ) + } + }); + } + PropertyOwner::Class => { + let param_name = self.name.name().as_py(); + + output.extend(quote::quote! { + pub fn #function_ident<'py>( + &'py self, + py: ::pyo3::marker::Python<'py>, + ) -> ::pyo3::PyResult<#param_type> { + self.0.getattr(::pyo3::intern!(py, #param_name))? + .extract() + } + }); + } + } + + Ok(output) + } + + pub fn generate_setter( + &self, + _cfg: &Config, + scoped_function_idents: &[&Ident], + local_types: &HashMap, + ) -> Result { + let mut output = proc_macro2::TokenStream::new(); + + // Documentation + if let Some(docstring) = &self.setter_docstring { + // Trim the docstring and add a leading whitespace (looks better in the generated code) + let mut docstring = docstring.trim().trim_end_matches('/').to_owned(); + docstring.insert(0, ' '); + // Replace all double quotes with single quotes + docstring = docstring.replace('"', "'"); + + output.extend(quote::quote! { + #[doc = #docstring] + }); + } + + // Function + let function_ident: syn::Ident = { + let setter_name = Ident::from_py(&format!("set_{}", self.name.name().as_py())); + if scoped_function_idents.contains(&&setter_name) + || crate::config::FORBIDDEN_FUNCTION_NAMES.contains(&setter_name.as_py()) + { + return Ok(proc_macro2::TokenStream::new()); + } else { + setter_name.try_into()? + } + }; + let param_name = self.name.name().as_py(); + let param_preprocessing = self.annotation.preprocess_borrowed( + &syn::Ident::new("p_value", proc_macro2::Span::call_site()), + local_types, + ); + let param_type = self.annotation.clone().into_rs_borrowed(local_types); + match &self.owner { + PropertyOwner::Module => { + let import = pyo3::Python::with_gil(|py| { + self.name + .parent() + .unwrap_or_else(|| unreachable!()) + .import_quote(py) + }); + output.extend(quote::quote! { + pub fn #function_ident<'py>( + py: ::pyo3::marker::Python<'py>, + p_value: #param_type, + ) -> ::pyo3::PyResult<()> { + #param_preprocessing + #import.setattr(::pyo3::intern!(py, #param_name), p_value) + } + }); + } + PropertyOwner::Class => { + output.extend(quote::quote! { + pub fn #function_ident<'py>( + &'py self, + py: ::pyo3::marker::Python<'py>, + p_value: #param_type, + ) -> ::pyo3::PyResult<()> { + #param_preprocessing + self.0.setattr(::pyo3::intern!(py, #param_name), p_value) + } + }); + } + } + + Ok(output) + } +} diff --git a/pyo3_bindgen_engine/src/syntax/type_var.rs b/pyo3_bindgen_engine/src/syntax/type_var.rs new file mode 100644 index 0000000..7813c58 --- /dev/null +++ b/pyo3_bindgen_engine/src/syntax/type_var.rs @@ -0,0 +1,20 @@ +use super::Path; +use crate::{Config, Result}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct TypeVar { + pub name: Path, +} + +impl TypeVar { + pub fn new(name: Path) -> Self { + Self { name } + } + + pub fn generate(&self, _cfg: &Config) -> Result { + let typevar_ident: syn::Ident = self.name.name().try_into()?; + Ok(quote::quote! { + pub type #typevar_ident = ::pyo3::types::PyAny; + }) + } +} diff --git a/pyo3_bindgen_engine/src/types.rs b/pyo3_bindgen_engine/src/types.rs deleted file mode 100644 index b36d862..0000000 --- a/pyo3_bindgen_engine/src/types.rs +++ /dev/null @@ -1,1098 +0,0 @@ -//! Module for handling Rust, Python and `PyO3` types. -// TODO: Remove allow once impl is finished -#![allow(unused)] - -use itertools::Itertools; -use std::str::FromStr; - -/// Enum that maps Python types to Rust types. -/// -/// Note that this is not a complete mapping at the moment. The public API is -/// subject to large changes. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Type { - PyAny, - Unhandled(String), - Unknown, - - // Primitives - PyBool, - PyByteArray, - PyBytes, - PyFloat, - PyLong, - PyString, - - // Enums - Optional(Box), - Union(Vec), - PyNone, - - // Collections - PyDict { - t_key: Box, - t_value: Box, - }, - PyFrozenSet(Box), - PyList(Box), - PySet(Box), - PyTuple(Vec), - - // Additional types - std - IpV4Addr, - IpV6Addr, - Path, - // TODO: Map `PySlice` to `std::ops::Range` if possible - PySlice, - - // Additional types - num-complex - // TODO: Support conversion of `PyComplex`` to `num_complex::Complex` if enabled via `num-complex` feature - PyComplex, - - // Additional types - datetime - #[cfg(not(Py_LIMITED_API))] - PyDate, - #[cfg(not(Py_LIMITED_API))] - PyDateTime, - PyDelta, - #[cfg(not(Py_LIMITED_API))] - PyTime, - #[cfg(not(Py_LIMITED_API))] - PyTzInfo, - - // Python-specific types - PyCapsule, - PyCFunction, - #[cfg(not(Py_LIMITED_API))] - PyCode, - PyEllipsis, - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - PyFrame, - PyFunction, - PyModule, - #[cfg(not(PyPy))] - PySuper, - PyTraceback, - PyType, -} - -impl TryFrom<&pyo3::types::PyAny> for Type { - type Error = pyo3::PyErr; - fn try_from(value: &pyo3::types::PyAny) -> Result { - Ok(match value { - t if t.is_instance_of::() => { - let t = t.downcast::()?; - Self::try_from(t)? - } - s if s.is_instance_of::() => { - let s = s.downcast::()?; - Self::from_str(s.to_str()?)? - } - typing if typing.get_type().getattr("__module__")?.to_string() == "typing" => { - Self::from_typing(typing)? - } - none if none.is_none() => Self::Unknown, - // Unknown | Handle as string if possible - _ => { - let value = value.to_string(); - match &value { - _class if value.starts_with("") => { - let value = value - .strip_prefix("") - .unwrap(); - Self::from_str(value)? - } - _enum if value.starts_with("") => { - let value = value - .strip_prefix("") - .unwrap(); - Self::from_str(value)? - } - _ => Self::from_str(&value)?, - } - } - }) - } -} - -impl TryFrom<&pyo3::types::PyType> for Type { - type Error = pyo3::PyErr; - fn try_from(value: &pyo3::types::PyType) -> Result { - Ok(match value { - // Primitives - t if t.is_subclass_of::()? => Self::PyBool, - t if t.is_subclass_of::()? => Self::PyByteArray, - t if t.is_subclass_of::()? => Self::PyBytes, - t if t.is_subclass_of::()? => Self::PyFloat, - t if t.is_subclass_of::()? => Self::PyLong, - t if t.is_subclass_of::()? => Self::PyString, - - // Collections - t if t.is_subclass_of::()? => Self::PyDict { - t_key: Box::new(Self::Unknown), - t_value: Box::new(Self::Unknown), - }, - t if t.is_subclass_of::()? => { - Self::PyFrozenSet(Box::new(Self::Unknown)) - } - t if t.is_subclass_of::()? => { - Self::PyList(Box::new(Self::Unknown)) - } - t if t.is_subclass_of::()? => Self::PySet(Box::new(Self::Unknown)), - t if t.is_subclass_of::()? => Self::PyTuple(vec![Self::Unknown]), - - // Additional types - std - t if t.is_subclass_of::()? => Self::PySlice, - - // Additional types - num-complex - t if t.is_subclass_of::()? => Self::PyComplex, - - // Additional types - datetime - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyDate, - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyDateTime, - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyDelta, - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyTime, - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyTzInfo, - - // Python-specific types - t if t.is_subclass_of::()? => Self::PyCapsule, - t if t.is_subclass_of::()? => Self::PyCFunction, - #[cfg(not(Py_LIMITED_API))] - t if t.is_subclass_of::()? => Self::PyCode, - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - t if t.is_subclass_of::()? => Self::PyFrame, - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - t if t.is_subclass_of::()? => Self::PyFunction, - t if t.is_subclass_of::()? => Self::PyModule, - #[cfg(not(PyPy))] - t if t.is_subclass_of::()? => Self::PySuper, - t if t.is_subclass_of::()? => Self::PyTraceback, - t if t.is_subclass_of::()? => Self::PyType, - - // Unknown | Handle as string if possible - _ => { - let value = value.to_string(); - match &value { - _class if value.starts_with("") => { - let value = value - .strip_prefix("") - .unwrap(); - Self::from_str(value)? - } - _enum if value.starts_with("") => { - let value = value - .strip_prefix("") - .unwrap(); - Self::from_str(value)? - } - _ => Self::Unhandled(value), - } - } - }) - } -} - -impl std::str::FromStr for Type { - type Err = pyo3::PyErr; - fn from_str(value: &str) -> Result { - Ok(match value { - "Any" => Self::PyAny, - - // Primitives - "bool" => Self::PyBool, - "bytearray" => Self::PyByteArray, - "bytes" => Self::PyBytes, - "float" => Self::PyFloat, - "int" => Self::PyLong, - "str" => Self::PyString, - - // Enums - optional - if optional.matches('|').count() == 1 && optional.matches("None").count() == 1 => - { - let t = optional - .split('|') - .map(str::trim) - .find(|x| *x != "None") - .unwrap(); - Self::Optional(Box::new(Self::from_str(t)?)) - } - r#union if r#union.contains('|') => { - let mut t_sequence = r#union - .split('|') - .map(|x| x.trim().to_string()) - .collect::>(); - ugly_hack_repair_complex_split_sequence(&mut t_sequence); - Self::Union( - t_sequence - .iter() - .map(|x| Self::from_str(x)) - .collect::, _>>()?, - ) - } - "None" | "NoneType" => Self::PyNone, - - // Collections - dict if dict.starts_with("dict[") && dict.ends_with(']') => { - let (key, value) = dict - .strip_prefix("dict[") - .unwrap() - .strip_suffix(']') - .unwrap() - .split_once(',') - .unwrap(); - let key = key.trim(); - let value = value.trim(); - Self::PyDict { - t_key: Box::new(Self::from_str(key)?), - t_value: Box::new(Self::from_str(value)?), - } - } - "dict" | "Dict" => Self::PyDict { - t_key: Box::new(Self::Unknown), - t_value: Box::new(Self::Unknown), - }, - frozenset if frozenset.starts_with("frozenset[") && frozenset.ends_with(']') => { - let t = frozenset - .strip_prefix("frozenset[") - .unwrap() - .strip_suffix(']') - .unwrap(); - Self::PyFrozenSet(Box::new(Self::from_str(t)?)) - } - list if list.starts_with("list[") && list.ends_with(']') => { - let t = list - .strip_prefix("list[") - .unwrap() - .strip_suffix(']') - .unwrap(); - Self::PyList(Box::new(Self::from_str(t)?)) - } - "list" => Self::PyList(Box::new(Self::Unknown)), - sequence if sequence.starts_with("Sequence[") && sequence.ends_with(']') => { - let t = sequence - .strip_prefix("Sequence[") - .unwrap() - .strip_suffix(']') - .unwrap(); - Self::PyList(Box::new(Self::from_str(t)?)) - } - set if set.starts_with("set[") && set.ends_with(']') => { - let t = set.strip_prefix("set[").unwrap().strip_suffix(']').unwrap(); - Self::PySet(Box::new(Self::from_str(t)?)) - } - tuple if tuple.starts_with("tuple[") && tuple.ends_with(']') => { - let mut t_sequence = tuple - .strip_prefix("tuple[") - .unwrap() - .strip_suffix(']') - .unwrap() - .split(',') - .map(|x| x.trim().to_string()) - .collect::>(); - ugly_hack_repair_complex_split_sequence(&mut t_sequence); - Self::PyTuple( - t_sequence - .iter() - .map(|x| Self::from_str(x)) - .collect::, _>>()?, - ) - } - - // Additional types - std - "ipaddress.IPv4Address" => Self::IpV4Addr, - "ipaddress.IPv6Address" => Self::IpV6Addr, - "os.PathLike" | "pathlib.Path" => Self::Path, - "slice" => Self::PySlice, - - // Additional types - num-complex - "complex" => Self::PyComplex, - - // Additional types - datetime - #[cfg(not(Py_LIMITED_API))] - "datetime.date" => Self::PyDate, - #[cfg(not(Py_LIMITED_API))] - "datetime.datetime" => Self::PyDateTime, - "timedelta" => Self::PyDelta, - #[cfg(not(Py_LIMITED_API))] - "datetime.time" => Self::PyTime, - #[cfg(not(Py_LIMITED_API))] - "datetime.tzinfo" => Self::PyTzInfo, - - // Python-specific types - "capsule" => Self::PyCapsule, - "cfunction" => Self::PyCFunction, - #[cfg(not(Py_LIMITED_API))] - "code" => Self::PyCode, - "Ellipsis" | "..." => Self::PyEllipsis, - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - "frame" => Self::PyFrame, - "function" => Self::PyFunction, - callable if callable.starts_with("Callable[") && callable.ends_with(']') => { - // TODO: Use callable types for something if useful - // let (args, return_value) = callable - // .strip_prefix("Callable[") - // .unwrap() - // .strip_suffix(']') - // .unwrap() - // .split_once(',') - // .unwrap(); - // let args = args - // .strip_prefix("[") - // .unwrap() - // .strip_suffix("]") - // .unwrap() - // .split(',') - // .map(|x| x.trim()) - // .collect::>(); - // let return_value = return_value.trim(); - Self::PyFunction - } - "Callable" | "callable" => Self::PyFunction, - "module" => Self::PyModule, - #[cfg(not(PyPy))] - "super" => Self::PySuper, - "traceback" => Self::PyTraceback, - typ if typ.starts_with("type[") && typ.ends_with(']') => { - // TODO: Use inner type for something if useful - // let t = typ - // .strip_prefix("type[") - // .unwrap() - // .strip_suffix(']') - // .unwrap(); - Self::PyType - } - - // typing - typing if typing.starts_with("typing.") => { - let s = typing.strip_prefix("typing.").unwrap(); - Self::from_str(s)? - } - - // collection.abc - collection if collection.starts_with("collection.abc.") => { - let s = collection.strip_prefix("collection.abc.").unwrap(); - Self::from_str(s)? - } - - unhandled => Self::Unhandled(unhandled.to_owned()), - }) - } -} - -impl Type { - pub fn from_typing(value: &pyo3::types::PyAny) -> pyo3::PyResult { - if let (Ok(t), Ok(t_inner)) = (value.getattr("__origin__"), value.getattr("__args__")) { - let t_inner = t_inner.downcast::()?; - - if t.is_instance_of::() { - let t = t.downcast::()?; - match Self::try_from(t)? { - Self::PyDict { .. } => { - let (t_key, t_value) = ( - Self::try_from(t_inner.get_item(0)?)?, - Self::try_from(t_inner.get_item(1)?)?, - ); - return Ok(Self::PyDict { - t_key: Box::new(t_key), - t_value: Box::new(t_value), - }); - } - Self::PyList(..) => { - let t_inner = Self::try_from(t_inner.get_item(0)?)?; - return Ok(Self::PyList(Box::new(t_inner))); - } - Self::PyTuple(..) => { - let t_sequence = t_inner - .iter() - .map(Self::try_from) - .collect::, _>>()?; - return Ok(Self::PyTuple(t_sequence)); - } - Self::PyType => { - // TODO: See if the inner type is useful for something here - return Ok(Self::PyType); - } - _ => { - // Noop - processed as string below - // eprintln!( - // "Warning: Unexpected type encountered: {value}\n \ - // Bindings could be improved by handling the type here \ - // Please report this as a bug. [scope: Type::from_typing()]", - // ); - } - } - } - - let t = t.to_string(); - Ok(match &t { - _typing if t.starts_with("typing.") => { - let t = t.strip_prefix("typing.").unwrap(); - match t { - "Union" => { - let t_sequence = t_inner - .iter() - .map(Self::try_from) - .collect::, _>>()?; - - if t_sequence.len() == 2 && t_sequence.contains(&Self::PyNone) { - let t = t_sequence - .iter() - .find(|x| **x != Self::PyNone) - .unwrap() - .clone(); - Self::Optional(Box::new(t)) - } else { - Self::Union(t_sequence) - } - } - _ => Self::Unhandled(value.to_string()), - } - } - _collections if t.starts_with("") => { - let t = t - .strip_prefix("") - .unwrap(); - match t { - "Iterable" | "Sequence" => { - let t_inner = Self::try_from(t_inner.get_item(0)?)?; - Self::PyList(Box::new(t_inner)) - } - "Callable" => { - // TODO: Use callable types for something if useful (t_inner) - Self::PyFunction - } - _ => Self::Unhandled(value.to_string()), - } - } - // Unknown | Handle the type as string if possible - _ => { - // TODO: Handle also the inner type here if possible - let t = t.to_string(); - match &t { - _class if t.starts_with("") => { - let t = t - .strip_prefix("") - .unwrap(); - Self::from_str(t)? - } - _enum if t.starts_with("") => { - let t = t - .strip_prefix("") - .unwrap(); - Self::from_str(t)? - } - _ => Self::from_str(&t)?, - } - } - }) - } else { - let value = value.to_string(); - Type::from_str(&value) - } - } - - #[must_use] - pub fn into_rs( - self, - owned: bool, - module_name: &str, - all_types: &std::collections::HashSet, - ) -> proc_macro2::TokenStream { - if owned { - self.into_rs_owned(module_name, all_types) - } else { - self.into_rs_borrowed(module_name, all_types) - } - } - - #[must_use] - pub fn into_rs_owned( - self, - module_name: &str, - all_types: &std::collections::HashSet, - ) -> proc_macro2::TokenStream { - match self { - Self::PyAny => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - Self::Unhandled(..) => self.try_into_module_path(module_name, all_types), - - Self::Unknown => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - - // Primitives - Self::PyBool => { - quote::quote! {bool} - } - Self::PyByteArray | Self::PyBytes => { - quote::quote! {Vec} - } - Self::PyFloat => { - quote::quote! {f64} - } - Self::PyLong => { - quote::quote! {i64} - } - Self::PyString => { - quote::quote! {::std::string::String} - } - - // Enums - Self::Optional(t) => { - let inner = t.into_rs_owned(module_name, all_types); - quote::quote! { - ::std::option::Option<#inner> - } - } - Self::Union(t_alternatives) => { - // TODO: Support Rust enum where possible - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - Self::PyNone => { - // TODO: Not sure what to do with None - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - - // Collections - Self::PyDict { t_key, t_value } => { - if t_key.is_owned_hashable() { - let t_key = t_key.into_rs_owned(module_name, all_types); - let t_value = t_value.into_rs_owned(module_name, all_types); - quote::quote! { - ::std::collections::HashMap<#t_key, #t_value> - } - } else { - quote::quote! { - &'py ::pyo3::types::PyDict - } - } - } - Self::PyFrozenSet(t) => { - if t.is_owned_hashable() { - let t = t.into_rs_owned(module_name, all_types); - quote::quote! { - ::std::collections::HashSet<#t> - } - } else { - quote::quote! { - &'py ::pyo3::types::PyFrozenSet - } - } - } - Self::PyList(t) => { - let inner = t.into_rs_owned(module_name, all_types); - quote::quote! { - Vec<#inner> - } - } - Self::PySet(t) => { - if t.is_owned_hashable() { - let t = t.into_rs_owned(module_name, all_types); - quote::quote! { - ::std::collections::HashSet<#t> - } - } else { - quote::quote! { - &'py ::pyo3::types::PySet - } - } - } - Self::PyTuple(t_sequence) => { - if t_sequence.is_empty() - || (t_sequence.len() == 1 && t_sequence[0] == Self::Unknown) - { - quote::quote! { - &'py ::pyo3::types::PyTuple - } - } else { - let inner = t_sequence - .into_iter() - .map(|x| x.into_rs_owned(module_name, all_types)) - .collect::>(); - quote::quote! { - (#(#inner),*) - } - } - } - - // Additional types - std - Self::IpV4Addr => { - quote::quote! {::std::net::IpV4Addr} - } - Self::IpV6Addr => { - quote::quote! {::std::net::IpV6Addr} - } - Self::Path => { - quote::quote! {::std::path::PathBuf} - } - Self::PySlice => { - quote::quote! {&'py ::pyo3::types::PySlice} - } - - // Additional types - num-complex - Self::PyComplex => { - quote::quote! {&'py ::pyo3::types::PyComplex} - } - - // Additional types - datetime - #[cfg(not(Py_LIMITED_API))] - Self::PyDate => { - quote::quote! {&'py ::pyo3::types::PyDate} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyDateTime => { - quote::quote! {&'py ::pyo3::types::PyDateTime} - } - Self::PyDelta => { - quote::quote! {::std::time::Duration} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyTime => { - quote::quote! {&'py ::pyo3::types::PyTime} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyTzInfo => { - quote::quote! {&'py ::pyo3::types::PyTzInfo} - } - - // Python-specific types - Self::PyCapsule => { - quote::quote! {&'py ::pyo3::types::PyCapsule} - } - Self::PyCFunction => { - quote::quote! {&'py ::pyo3::types::PyCFunction} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyCode => { - quote::quote! {&'py ::pyo3::types::PyCode} - } - Self::PyEllipsis => { - // TODO: Not sure what to do with ellipsis - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - Self::PyFrame => { - quote::quote! {&'py ::pyo3::types::PyFrame} - } - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - Self::PyFunction => { - quote::quote! {&'py ::pyo3::types::PyFunction} - } - #[cfg(not(all(not(Py_LIMITED_API), not(PyPy))))] - Self::PyFunction => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - Self::PyModule => { - quote::quote! {&'py ::pyo3::types::PyModule} - } - #[cfg(not(PyPy))] - Self::PySuper => { - quote::quote! {&'py ::pyo3::types::PySuper} - } - Self::PyTraceback => { - quote::quote! {&'py ::pyo3::types::PyTraceback} - } - Self::PyType => { - quote::quote! {&'py ::pyo3::types::PyType} - } - } - } - - #[must_use] - pub fn into_rs_borrowed( - self, - module_name: &str, - all_types: &std::collections::HashSet, - ) -> proc_macro2::TokenStream { - match self { - Self::PyAny => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - Self::Unhandled(..) => self.try_into_module_path(module_name, all_types), - Self::Unknown => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - - // Primitives - Self::PyBool => { - quote::quote! {bool} - } - Self::PyByteArray | Self::PyBytes => { - quote::quote! {&[u8]} - } - Self::PyFloat => { - quote::quote! {f64} - } - Self::PyLong => { - quote::quote! {i64} - } - Self::PyString => { - quote::quote! {&str} - } - - // Enums - Self::Optional(t) => { - let inner = t.into_rs_owned(module_name, all_types); - quote::quote! { - ::std::option::Option<#inner> - } - } - Self::Union(t_alternatives) => { - // TODO: Support Rust enum where possible - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - Self::PyNone => { - // TODO: Not sure what to do with None - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - - // Collections - Self::PyDict { t_key, t_value } => { - if t_key.is_owned_hashable() { - let t_key = t_key.into_rs_owned(module_name, all_types); - let t_value = t_value.into_rs_owned(module_name, all_types); - quote::quote! { - &::std::collections::HashMap<#t_key, #t_value> - } - } else { - quote::quote! { - &'py ::pyo3::types::PyDict - } - } - } - Self::PyFrozenSet(t) => { - if t.is_owned_hashable() { - let t = t.into_rs_owned(module_name, all_types); - quote::quote! { - &::std::collections::HashSet<#t> - } - } else { - quote::quote! { - &'py ::pyo3::types::PyFrozenSet - } - } - } - Self::PyList(t) => { - let inner = t.into_rs_owned(module_name, all_types); - quote::quote! { - &[#inner] - } - } - Self::PySet(t) => { - if t.is_owned_hashable() { - let t = t.into_rs_owned(module_name, all_types); - quote::quote! { - &::std::collections::HashSet<#t> - } - } else { - quote::quote! { - &'py ::pyo3::types::PySet - } - } - } - Self::PyTuple(t_sequence) => { - if t_sequence.is_empty() - || (t_sequence.len() == 1 && t_sequence[0] == Self::Unknown) - { - quote::quote! { - &'py ::pyo3::types::PyTuple - } - } else { - let inner = t_sequence - .into_iter() - .map(|x| x.into_rs_owned(module_name, all_types)) - .collect::>(); - quote::quote! { - (#(#inner),*) - } - } - } - - // Additional types - std - Self::IpV4Addr => { - quote::quote! {::std::net::IpV4Addr} - } - Self::IpV6Addr => { - quote::quote! {::std::net::IpV6Addr} - } - Self::Path => { - quote::quote! {::std::path::PathBuf} - } - Self::PySlice => { - quote::quote! {&'py ::pyo3::types::PySlice} - } - - // Additional types - num-complex - Self::PyComplex => { - quote::quote! {&'py ::pyo3::types::PyComplex} - } - - // Additional types - datetime - #[cfg(not(Py_LIMITED_API))] - Self::PyDate => { - quote::quote! {&'py ::pyo3::types::PyDate} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyDateTime => { - quote::quote! {&'py ::pyo3::types::PyDateTime} - } - Self::PyDelta => { - quote::quote! {::std::time::Duration} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyTime => { - quote::quote! {&'py ::pyo3::types::PyTime} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyTzInfo => { - quote::quote! {&'py ::pyo3::types::PyTzInfo} - } - - // Python-specific types - Self::PyCapsule => { - quote::quote! {&'py ::pyo3::types::PyCapsule} - } - Self::PyCFunction => { - quote::quote! {&'py ::pyo3::types::PyCFunction} - } - #[cfg(not(Py_LIMITED_API))] - Self::PyCode => { - quote::quote! {&'py ::pyo3::types::PyCode} - } - Self::PyEllipsis => { - // TODO: Not sure what to do with ellipsis - quote::quote! { - &'py ::pyo3::types::PyAny - } - } - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - Self::PyFrame => { - quote::quote! {&'py ::pyo3::types::PyFrame} - } - #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] - Self::PyFunction => { - quote::quote! {&'py ::pyo3::types::PyFunction} - } - #[cfg(not(all(not(Py_LIMITED_API), not(PyPy))))] - Self::PyFunction => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - Self::PyModule => { - quote::quote! {&'py ::pyo3::types::PyModule} - } - #[cfg(not(PyPy))] - Self::PySuper => { - quote::quote! {&'py ::pyo3::types::PySuper} - } - Self::PyTraceback => { - quote::quote! {&'py ::pyo3::types::PyTraceback} - } - Self::PyType => { - quote::quote! {&'py ::pyo3::types::PyType} - } - } - } - - fn try_into_module_path( - self, - module_name: &str, - all_types: &std::collections::HashSet, - ) -> proc_macro2::TokenStream { - let Self::Unhandled(value) = self else { - unreachable!() - }; - let module_root = if module_name.contains('.') { - module_name.split('.').next().unwrap() - } else { - module_name - }; - match value.as_str() { - // Ignorelist - "property" - | "member_descriptor" - | "method_descriptor" - | "getset_descriptor" - | "_collections._tuplegetter" - | "AsyncState" => { - quote::quote! {&'py ::pyo3::types::PyAny} - } - module_member_full if module_member_full.starts_with(module_root) => { - // Ignore unknown types - if !all_types.contains(module_member_full) { - return quote::quote! {&'py ::pyo3::types::PyAny}; - } - - let value_name = module_member_full.split('.').last().unwrap(); - - let n_common_ancestors = module_name - .split('.') - .zip(module_member_full.split('.')) - .take_while(|(a, b)| a == b) - .count(); - let current_module_depth = module_name.split('.').count(); - let reexport_path = if (current_module_depth - n_common_ancestors) > 0 { - std::iter::repeat("super".to_string()) - .take(current_module_depth - n_common_ancestors) - } else { - std::iter::repeat("self".to_string()).take(1) - }; - let reexport_path: String = reexport_path - .chain( - module_member_full - .split('.') - .skip(n_common_ancestors) - .map(|s| { - if syn::parse_str::(s).is_ok() { - s.to_owned() - } else { - format!("r#{s}") - } - }), - ) - .join("::"); - - // The path contains both ident and "::", combine into something that can be quoted - let reexport_path = syn::parse_str::(&reexport_path).unwrap(); - quote::quote! { - &'py #reexport_path - } - } - _ => { - let value_without_brackets = value.split_once('[').unwrap_or((&value, "")).0; - let module_scopes = value_without_brackets.split('.'); - let n_module_scopes = module_scopes.clone().count(); - - // Approach: Find types without a module scope (no dot) and check if the type is local (or imported in the current module) - if !value_without_brackets.contains('.') { - if let Some(member) = all_types - .iter() - .filter(|member| { - member - .split('.') - .take(member.split('.').count() - 1) - .join(".") - == module_name - }) - .find(|&member| { - member.trim_start_matches(&format!("{module_name}.")) - == value_without_brackets - }) - { - return Self::Unhandled(member.to_owned()) - .try_into_module_path(module_name, all_types); - } - } - - // Approach: Find the shallowest match that contains the value - // TODO: Fix this! The matching might be wrong in many cases - let mut possible_matches = std::collections::HashSet::::default(); - for i in 0..n_module_scopes { - let module_member_scopes_end = module_scopes.clone().skip(i).join("."); - all_types - .iter() - .filter(|member| member.ends_with(&module_member_scopes_end)) - .for_each(|member| { - possible_matches.insert(member.to_owned()); - }); - if !possible_matches.is_empty() { - let shallowest_match = possible_matches - .iter() - .min_by(|m1, m2| m1.split('.').count().cmp(&m2.split('.').count())) - .unwrap(); - return Self::Unhandled(shallowest_match.to_owned()) - .try_into_module_path(module_name, all_types); - } - } - - // Unsupported - // TODO: Support more types - // dbg!(value); - quote::quote! {&'py ::pyo3::types::PyAny} - } - } - } - - fn is_owned_hashable(&self) -> bool { - matches!( - self, - Self::PyBool - | Self::IpV4Addr - | Self::IpV6Addr - | Self::Path - | Self::PyDelta - | Self::PyDict { .. } - | Self::PyFrozenSet(..) - | Self::PyLong - | Self::PySet(..) - | Self::PyString - ) - } -} - -// TODO: Replace this with something more sensible -fn ugly_hack_repair_complex_split_sequence(sequence: &mut Vec) { - let mut traversed_all_elements = false; - let mut start_index = 0; - 'outer: while !traversed_all_elements { - traversed_all_elements = true; - 'inner: for i in start_index..(sequence.len() - 1) { - let mut n_scopes = sequence[i].matches('[').count() - sequence[i].matches(']').count(); - if n_scopes == 0 { - continue; - } - for j in (i + 1)..sequence.len() { - n_scopes += sequence[j].matches('[').count(); - n_scopes -= sequence[j].matches(']').count(); - if n_scopes == 0 { - let mut new_element = sequence[i].clone(); - for relevant_element in sequence.iter().take(j + 1).skip(i + 1) { - new_element = format!("{new_element},{relevant_element}"); - } - - // Update sequence and remove the elements that were merged - sequence[i] = new_element; - sequence.drain((i + 1)..=j); - - if j < sequence.len() - 1 { - traversed_all_elements = false; - start_index = i; - break 'inner; - } else { - break 'outer; - } - } - } - } - } -} diff --git a/pyo3_bindgen_engine/src/typing/from_py.rs b/pyo3_bindgen_engine/src/typing/from_py.rs new file mode 100644 index 0000000..357ae4a --- /dev/null +++ b/pyo3_bindgen_engine/src/typing/from_py.rs @@ -0,0 +1,525 @@ +use super::Type; +use crate::{PyBindgenError, Result}; +use itertools::Itertools; +use std::str::FromStr; + +impl TryFrom<&pyo3::types::PyAny> for Type { + type Error = PyBindgenError; + fn try_from(value: &pyo3::types::PyAny) -> Result { + match value { + // None -> Unknown type + none if none.is_none() => Ok(Self::Unknown), + // Handle PyType + t if t.is_instance_of::() => { + Self::try_from(t.downcast::()?) + } + // Handle typing + typing + if typing + .get_type() + .getattr(pyo3::intern!(value.py(), "__module__"))? + .to_string() + == "typing" => + { + Self::from_typing(typing) + } + // Handle everything else as string + _ => { + if value.is_instance_of::() { + Self::from_str(value.downcast::()?.to_str()?) + } else { + Self::from_str(&value.to_string()) + } + } + } + } +} + +impl TryFrom<&pyo3::types::PyType> for Type { + type Error = PyBindgenError; + fn try_from(value: &pyo3::types::PyType) -> Result { + Ok(match value { + // Primitives + t if t.is_subclass_of::()? => Self::PyBool, + t if t.is_subclass_of::()? => Self::PyByteArray, + t if t.is_subclass_of::()? => Self::PyBytes, + t if t.is_subclass_of::()? => Self::PyFloat, + t if t.is_subclass_of::()? => Self::PyLong, + t if t.is_subclass_of::()? => Self::PyString, + + // Collections + t if t.is_subclass_of::()? => Self::PyDict { + key_type: Box::new(Self::Unknown), + value_type: Box::new(Self::Unknown), + }, + t if t.is_subclass_of::()? => { + Self::PyFrozenSet(Box::new(Self::Unknown)) + } + t if t.is_subclass_of::()? => { + Self::PyList(Box::new(Self::Unknown)) + } + t if t.is_subclass_of::()? => Self::PySet(Box::new(Self::Unknown)), + t if t.is_subclass_of::()? => Self::PyTuple(vec![Self::Unknown]), + + // Additional types - std + t if t.is_subclass_of::()? => Self::PySlice, + + // Additional types - num-complex + t if t.is_subclass_of::()? => Self::PyComplex, + + // Additional types - datetime + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyDate, + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyDateTime, + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyDelta, + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyTime, + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyTzInfo, + + // Python-specific types + t if t.is_subclass_of::()? => Self::PyCapsule, + t if t.is_subclass_of::()? => Self::PyCFunction, + #[cfg(not(Py_LIMITED_API))] + t if t.is_subclass_of::()? => Self::PyCode, + #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] + t if t.is_subclass_of::()? => Self::PyFrame, + #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] + t if t.is_subclass_of::()? => Self::PyFunction { + param_types: vec![Self::PyEllipsis], + return_annotation: Box::new(Self::Unknown), + }, + t if t.is_subclass_of::()? => Self::PyModule, + #[cfg(not(PyPy))] + t if t.is_subclass_of::()? => Self::PySuper, + t if t.is_subclass_of::()? => Self::PyTraceback, + t if t.is_subclass_of::()? => Self::PyType, + + // Handle everything else as string + _ => Self::from_str(&value.to_string())?, + }) + } +} + +impl Type { + fn from_typing(value: &pyo3::types::PyAny) -> Result { + let py = value.py(); + debug_assert_eq!( + value + .get_type() + .getattr(pyo3::intern!(py, "__module__"))? + .to_string(), + "typing" + ); + + if let Ok(wrapping_type) = value.getattr(pyo3::intern!(py, "__origin__")) { + let wrapping_type = Self::try_from(wrapping_type)?; + Ok( + if let Ok(inner_types) = value + .getattr(pyo3::intern!(py, "__args__")) + .and_then(|inner_types| Ok(inner_types.downcast::()?)) + { + let inner_types = inner_types + .iter() + .map(Self::try_from) + .collect::>>()?; + match wrapping_type { + Self::Union(..) => { + if inner_types.len() == 2 && inner_types.contains(&Self::PyNone) { + Self::Optional(Box::new( + inner_types + .iter() + .find(|x| **x != Self::PyNone) + .unwrap_or_else(|| unreachable!()) + .to_owned(), + )) + } else { + Self::Union(inner_types) + } + } + Self::Optional(..) => { + debug_assert_eq!(inner_types.len(), 1); + Self::Optional(Box::new(inner_types[0].clone())) + } + Self::PyDict { .. } => { + debug_assert_eq!(inner_types.len(), 2); + Self::PyDict { + key_type: Box::new(inner_types[0].clone()), + value_type: Box::new(inner_types[1].clone()), + } + } + Self::PyFrozenSet(..) => { + debug_assert_eq!(inner_types.len(), 1); + Self::PyFrozenSet(Box::new(inner_types[0].clone())) + } + Self::PyList(..) => { + debug_assert_eq!(inner_types.len(), 1); + Self::PyList(Box::new(inner_types[0].clone())) + } + Self::PySet(..) => { + debug_assert_eq!(inner_types.len(), 1); + Self::PySet(Box::new(inner_types[0].clone())) + } + Self::PyTuple(..) => Self::PyTuple(inner_types), + Self::PyFunction { .. } => { + debug_assert!(!inner_types.is_empty()); + Self::PyFunction { + param_types: match inner_types.len() { + 1 => Vec::default(), + _ => inner_types[..inner_types.len() - 1].to_owned(), + }, + return_annotation: Box::new( + inner_types + .last() + .unwrap_or_else(|| unreachable!()) + .to_owned(), + ), + } + } + Self::PyType => { + debug_assert_eq!(inner_types.len(), 1); + inner_types[0].clone() + } + _ => { + // TODO: Handle other types with inner types if useful (e.g. Generator) + wrapping_type + } + } + } else { + // If there are no inner types, return just the wrapping type + wrapping_type + }, + ) + } else { + // Handle everything else as string + Type::from_str(&value.to_string()) + } + } +} + +impl std::str::FromStr for Type { + type Err = PyBindgenError; + fn from_str(value: &str) -> Result { + Ok(match value { + "Any" => Self::PyAny, + + // Primitives + "bool" => Self::PyBool, + "bytearray" => Self::PyByteArray, + "bytes" => Self::PyBytes, + "float" => Self::PyFloat, + "int" => Self::PyLong, + "str" => Self::PyString, + + // Enums + optional + if optional.matches('|').count() == 1 && optional.matches("None").count() == 1 => + { + let inner_type = Self::from_str( + optional + .split('|') + .map(str::trim) + .find(|x| *x != "None") + .unwrap_or_else(|| unreachable!()), + )?; + Self::Optional(Box::new(inner_type)) + } + r#union if r#union.contains('|') => { + let mut inner_types = r#union + .split('|') + .map(|x| x.trim().to_owned()) + .collect_vec(); + repair_complex_sequence(&mut inner_types, ','); + let inner_types = inner_types + .iter() + .map(|x| Self::from_str(x)) + .collect::>()?; + Self::Union(inner_types) + } + "Union" => Self::Union(vec![Self::Unknown]), + "" | "None" | "NoneType" => Self::PyNone, + + // Collections + dict if dict.starts_with("dict[") && dict.ends_with(']') => { + let mut inner_types = dict + .strip_prefix("dict[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()) + .split(',') + .map(|x| x.trim().to_owned()) + .collect_vec(); + repair_complex_sequence(&mut inner_types, ','); + debug_assert_eq!(inner_types.len(), 2); + let inner_types = inner_types + .iter() + .map(|x| Self::from_str(x)) + .collect::>>()?; + Self::PyDict { + key_type: Box::new(inner_types[0].clone()), + value_type: Box::new(inner_types[1].clone()), + } + } + "dict" | "Dict" | "Mapping" => Self::PyDict { + key_type: Box::new(Self::Unknown), + value_type: Box::new(Self::Unknown), + }, + frozenset if frozenset.starts_with("frozenset[") && frozenset.ends_with(']') => { + let inner_type = Self::from_str( + frozenset + .strip_prefix("frozenset[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PyFrozenSet(Box::new(inner_type)) + } + list if list.starts_with("list[") && list.ends_with(']') => { + let inner_type = Self::from_str( + list.strip_prefix("list[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PyList(Box::new(inner_type)) + } + "list" => Self::PyList(Box::new(Self::Unknown)), + sequence if sequence.starts_with("Sequence[") && sequence.ends_with(']') => { + let inner_type = Self::from_str( + sequence + .strip_prefix("Sequence[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PyList(Box::new(inner_type)) + } + "Sequence" | "Iterable" | "Iterator" => Self::PyList(Box::new(Self::Unknown)), + iterable if iterable.starts_with("Iterable[") && iterable.ends_with(']') => { + let inner_type = Self::from_str( + iterable + .strip_prefix("Iterable[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PyList(Box::new(inner_type)) + } + iterator if iterator.starts_with("Iterator[") && iterator.ends_with(']') => { + let inner_type = Self::from_str( + iterator + .strip_prefix("Iterator[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PyList(Box::new(inner_type)) + } + set if set.starts_with("set[") && set.ends_with(']') => { + let inner_type = Self::from_str( + set.strip_prefix("set[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?; + Self::PySet(Box::new(inner_type)) + } + tuple if tuple.starts_with("tuple[") && tuple.ends_with(']') => { + let mut inner_types = tuple + .strip_prefix("tuple[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()) + .split(',') + .map(|x| x.trim().to_owned()) + .collect_vec(); + repair_complex_sequence(&mut inner_types, ','); + let inner_types = inner_types + .iter() + .map(|x| Self::from_str(x)) + .collect::>()?; + Self::PyTuple(inner_types) + } + "tuple" => Self::PyTuple(vec![Self::Unknown]), + + // Additional types - std + "ipaddress.IPv4Address" => Self::IpV4Addr, + "ipaddress.IPv6Address" => Self::IpV6Addr, + "os.PathLike" | "pathlib.Path" => Self::Path, + "slice" => Self::PySlice, + + // Additional types - num-complex + "complex" => Self::PyComplex, + + // Additional types - datetime + #[cfg(not(Py_LIMITED_API))] + "datetime.date" => Self::PyDate, + #[cfg(not(Py_LIMITED_API))] + "datetime.datetime" => Self::PyDateTime, + "timedelta" => Self::PyDelta, + #[cfg(not(Py_LIMITED_API))] + "datetime.time" => Self::PyTime, + #[cfg(not(Py_LIMITED_API))] + "datetime.tzinfo" => Self::PyTzInfo, + + // Python-specific types + "capsule" => Self::PyCapsule, + "cfunction" => Self::PyCFunction, + #[cfg(not(Py_LIMITED_API))] + "code" => Self::PyCode, + "Ellipsis" | "..." => Self::PyEllipsis, + #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] + "frame" => Self::PyFrame, + "function" => Self::PyFunction { + param_types: vec![Self::PyEllipsis], + return_annotation: Box::new(Self::Unknown), + }, + callable if callable.starts_with("Callable[") && callable.ends_with(']') => { + let mut inner_types = callable + .strip_prefix("Callable[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()) + .split(',') + .map(|x| x.trim().to_owned()) + .collect_vec(); + repair_complex_sequence(&mut inner_types, ','); + debug_assert!(!inner_types.is_empty()); + let inner_types = inner_types + .iter() + .map(|x| Self::from_str(x)) + .collect::>>()?; + Self::PyFunction { + param_types: match inner_types.len() { + 1 => Vec::default(), + _ => inner_types[..inner_types.len() - 1].to_owned(), + }, + return_annotation: Box::new( + inner_types + .last() + .unwrap_or_else(|| unreachable!()) + .to_owned(), + ), + } + } + "Callable" | "callable" => Self::PyFunction { + param_types: vec![Self::PyEllipsis], + return_annotation: Box::new(Self::Unknown), + }, + "module" => Self::PyModule, + #[cfg(not(PyPy))] + "super" => Self::PySuper, + "traceback" => Self::PyTraceback, + typ if typ.starts_with("type[") && typ.ends_with(']') => Self::from_str( + typ.strip_prefix("type[") + .unwrap_or_else(|| unreachable!()) + .strip_suffix(']') + .unwrap_or_else(|| unreachable!()), + )?, + + // classes + class if class.starts_with("") => Self::from_str( + class + .strip_prefix("") + .unwrap_or_else(|| unreachable!()), + )?, + + // enums + enume if enume.starts_with("") => Self::from_str( + enume + .strip_prefix("") + .unwrap_or_else(|| unreachable!()), + )?, + + // typing + typing if typing.starts_with("typing.") => Self::from_str( + typing + .strip_prefix("typing.") + .unwrap_or_else(|| unreachable!()), + )?, + + // collections.abc + collections_abc if collections_abc.starts_with("collections.abc.") => Self::from_str( + collections_abc + .strip_prefix("collections.abc.") + .unwrap_or_else(|| unreachable!()), + )?, + // collections + collections if collections.starts_with("collections.") => Self::from_str( + collections + .strip_prefix("collections.") + .unwrap_or_else(|| unreachable!()), + )?, + + // Forbidden types + forbidden if crate::config::FORBIDDEN_TYPE_NAMES.contains(&forbidden) => Self::PyAny, + + // Other types, that might be known (custom types of modules) + other => Self::Other(other.to_owned()), + }) + } +} + +// TODO: Refactor `repair_complex_sequence()` into something more sensible +/// Repairs complex wrapped sequences. +fn repair_complex_sequence(sequence: &mut Vec, separator: char) { + debug_assert!(!sequence.is_empty()); + debug_assert!({ + let merged_sequence = sequence.iter().join(""); + merged_sequence.matches('[').count() == merged_sequence.matches(']').count() + }); + + let mut traversed_all_elements = false; + let mut start_index = 0; + 'outer: while !traversed_all_elements { + traversed_all_elements = true; + 'inner: for i in start_index..(sequence.len() - 1) { + let mut n_scopes = sequence[i].matches('[').count() - sequence[i].matches(']').count(); + if n_scopes == 0 { + continue; + } + for j in (i + 1)..sequence.len() { + n_scopes += sequence[j].matches('[').count(); + n_scopes -= sequence[j].matches(']').count(); + if n_scopes == 0 { + let mut new_element = sequence[i].clone(); + for relevant_element in sequence.iter().take(j + 1).skip(i + 1) { + new_element = format!("{new_element}{separator}{relevant_element}"); + } + sequence[i] = new_element; + sequence.drain((i + 1)..=j); + if j < sequence.len() - 1 { + traversed_all_elements = false; + start_index = i; + break 'inner; + } else { + break 'outer; + } + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_repair_complex_sequence() { + // Arrange + let mut sequence = vec!["dict[str".to_string(), "Any]".to_string()]; + + // Act + repair_complex_sequence(&mut sequence, ','); + + // Assert + assert_eq!(sequence, vec!["dict[str,Any]".to_string()]); + } +} diff --git a/pyo3_bindgen_engine/src/typing/into_rs.rs b/pyo3_bindgen_engine/src/typing/into_rs.rs new file mode 100644 index 0000000..93d0e35 --- /dev/null +++ b/pyo3_bindgen_engine/src/typing/into_rs.rs @@ -0,0 +1,287 @@ +use super::Type; +use crate::syntax::Path; +use itertools::Itertools; +use quote::quote; +use rustc_hash::FxHashMap as HashMap; +use std::rc::Rc; + +impl Type { + pub fn into_rs_owned(self, local_types: &HashMap) -> proc_macro2::TokenStream { + let owned = self.into_rs(local_types).owned; + Rc::into_inner(owned).unwrap_or_else(|| unreachable!()) + } + + pub fn into_rs_borrowed(self, local_types: &HashMap) -> proc_macro2::TokenStream { + let borrowed = self.into_rs(local_types).borrowed; + Rc::into_inner(borrowed).unwrap_or_else(|| unreachable!()) + } + + pub fn preprocess_borrowed( + &self, + ident: &syn::Ident, + local_types: &HashMap, + ) -> proc_macro2::TokenStream { + match self { + Self::PyDict { + key_type, + value_type, + } if !key_type.is_hashable() + || value_type + .clone() + .into_rs(local_types) + .owned + .to_string() + .contains("PyAny") => + { + quote! { + let #ident = ::pyo3::types::IntoPyDict::into_py_dict(#ident, py); + } + } + Self::PyTuple(inner_types) if inner_types.len() < 2 => { + quote! { + let #ident = ::pyo3::IntoPy::<::pyo3::Py<::pyo3::types::PyTuple>>::into_py(#ident, py); + let #ident = #ident.as_ref(py); + } + } + Self::PyAny + | Self::Unknown + | Self::Union(..) + | Self::PyNone + | Self::PyDelta + | Self::PyEllipsis => { + quote! { + let #ident = ::pyo3::IntoPy::<::pyo3::Py<::pyo3::types::PyAny>>::into_py(#ident, py); + let #ident = #ident.as_ref(py); + } + } + #[cfg(not(all(not(Py_LIMITED_API), not(PyPy))))] + Self::PyFunction { .. } => { + quote! { + let #ident = ::pyo3::IntoPy::<::pyo3::Py<::pyo3::types::PyAny>>::into_py(#ident, py); + let #ident = #ident.as_ref(py); + } + } + Self::Other(type_name) + if Self::try_map_external_type(type_name).is_none() + && !local_types.contains_key(&Path::from_py(type_name)) => + { + quote! { + let #ident = ::pyo3::IntoPy::<::pyo3::Py<::pyo3::types::PyAny>>::into_py(#ident, py); + let #ident = #ident.as_ref(py); + } + } + _ => proc_macro2::TokenStream::new(), + } + } + + fn into_rs(self, local_types: &HashMap) -> OutputType { + match self { + Self::PyAny | Self::Unknown => OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ), + Self::Other(..) => self.map_type(local_types), + + // Primitives + Self::PyBool => OutputType::new_identical(quote!(bool)), + Self::PyByteArray | Self::PyBytes => OutputType::new(quote!(Vec), quote!(&[u8])), + Self::PyFloat => OutputType::new_identical(quote!(f64)), + Self::PyLong => OutputType::new_identical(quote!(i64)), + Self::PyString => OutputType::new(quote!(::std::string::String), quote!(&str)), + + // Enums + Self::Optional(inner_type) => { + let inner_type = inner_type.into_rs(local_types).owned; + OutputType::new_identical(quote!(::std::option::Option<#inner_type>)) + } + Self::Union(_inner_types) => { + // TODO: Support Rust enums where possible | alternatively, overload functions for each variant + OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ) + } + Self::PyNone => { + // TODO: Determine if PyNone is even possible + OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ) + } + + // Collections + Self::PyDict { + key_type, + value_type, + } => { + let value_type = value_type.into_rs(local_types).owned; + if key_type.is_hashable() && !value_type.to_string().contains("PyAny") { + let key_type = key_type.into_rs(local_types).owned; + OutputType::new( + quote!(::std::collections::HashMap<#key_type, #value_type>), + quote!(&::std::collections::HashMap<#key_type, #value_type>), + ) + } else { + OutputType::new( + quote!(&'py ::pyo3::types::PyDict), + quote!(impl ::pyo3::types::IntoPyDict), + ) + } + } + Self::PyFrozenSet(inner_type) => { + if inner_type.is_hashable() { + let inner_type = inner_type.into_rs(local_types).owned; + OutputType::new( + quote!(::std::collections::HashSet<#inner_type>), + quote!(&::std::collections::HashSet<#inner_type>), + ) + } else { + OutputType::new_identical(quote!(&'py ::pyo3::types::PyFrozenSet)) + } + } + Self::PyList(inner_type) => { + let inner_type = inner_type.into_rs(local_types).owned; + OutputType::new(quote!(Vec<#inner_type>), quote!(&[#inner_type])) + } + Self::PySet(inner_type) => { + if inner_type.is_hashable() { + let inner_type = inner_type.into_rs(local_types).owned; + OutputType::new( + quote!(::std::collections::HashSet<#inner_type>), + quote!(&::std::collections::HashSet<#inner_type>), + ) + } else { + OutputType::new_identical(quote!(&'py ::pyo3::types::PySet)) + } + } + Self::PyTuple(inner_types) => { + if inner_types.len() < 2 { + OutputType::new( + quote!(&'py ::pyo3::types::PyTuple), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyTuple>>), + ) + } else if inner_types.len() == 2 + && *inner_types.last().unwrap_or_else(|| unreachable!()) == Self::PyEllipsis + { + Self::PyList(Box::new(inner_types[0].clone())).into_rs(local_types) + } else { + let inner_types = inner_types + .into_iter() + .map(|inner_type| inner_type.into_rs(local_types).owned) + .collect_vec(); + OutputType::new_identical(quote!((#(#inner_types),*))) + } + } + + // Additional types - std + Self::IpV4Addr => OutputType::new_identical(quote!(::std::net::IpV4Addr)), + Self::IpV6Addr => OutputType::new_identical(quote!(::std::net::IpV6Addr)), + Self::Path => OutputType::new(quote!(::std::path::PathBuf), quote!(&::std::path::Path)), + // TODO: Map `PySlice` to `std::ops::Range` if possible + Self::PySlice => OutputType::new_identical(quote!(&'py ::pyo3::types::PySlice)), + + // Additional types - num-complex + // TODO: Support conversion of `PyComplex` to `num_complex::Complex` if enabled via `num-complex` feature + Self::PyComplex => OutputType::new_identical(quote!(&'py ::pyo3::types::PyComplex)), + + // Additional types - datetime + #[cfg(not(Py_LIMITED_API))] + Self::PyDate => OutputType::new_identical(quote!(&'py ::pyo3::types::PyDate)), + #[cfg(not(Py_LIMITED_API))] + Self::PyDateTime => OutputType::new_identical(quote!(&'py ::pyo3::types::PyDateTime)), + Self::PyDelta => { + // The trait `ToPyObject` is not implemented for `Duration`, so we can't use it here yet + // OutputType::new_identical(quote!(::std::time::Duration)) + OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ) + } + #[cfg(not(Py_LIMITED_API))] + Self::PyTime => OutputType::new_identical(quote!(&'py ::pyo3::types::PyTime)), + #[cfg(not(Py_LIMITED_API))] + Self::PyTzInfo => OutputType::new_identical(quote!(&'py ::pyo3::types::PyTzInfo)), + + // Python-specific types + Self::PyCapsule => OutputType::new_identical(quote!(&'py ::pyo3::types::PyCapsule)), + Self::PyCFunction => OutputType::new_identical(quote!(&'py ::pyo3::types::PyCFunction)), + #[cfg(not(Py_LIMITED_API))] + Self::PyCode => OutputType::new_identical(quote!(&'py ::pyo3::types::PyCode)), + Self::PyEllipsis => { + // TODO: Determine if PyEllipsis is even possible + OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ) + } + #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] + Self::PyFrame => OutputType::new_identical(quote!(&'py ::pyo3::types::PyFrame)), + #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] + Self::PyFunction { .. } => { + OutputType::new_identical(quote!(&'py ::pyo3::types::PyFunction)) + } + #[cfg(not(all(not(Py_LIMITED_API), not(PyPy))))] + Self::PyFunction { .. } => OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ), + Self::PyModule => OutputType::new_identical(quote!(&'py ::pyo3::types::PyModule)), + #[cfg(not(PyPy))] + Self::PySuper => OutputType::new_identical(quote!(&'py ::pyo3::types::PySuper)), + Self::PyTraceback => OutputType::new_identical(quote!(&'py ::pyo3::types::PyTraceback)), + Self::PyType => OutputType::new_identical(quote!(&'py ::pyo3::types::PyType)), + } + } + + fn map_type(self, local_types: &HashMap) -> OutputType { + // Get the inner name of the type + let Self::Other(type_name) = self else { + unreachable!() + }; + + // Try to map the external types + if let Some(external_type) = Self::try_map_external_type(&type_name) { + return external_type; + } + + // Try to map the local types + if let Some(relative_path) = local_types.get(&Path::from_py(&type_name)) { + let relative_path: syn::Path = relative_path.try_into().unwrap(); + return OutputType::new_identical(quote!(&'py #relative_path)); + } + + // Unhandled types + OutputType::new( + quote!(&'py ::pyo3::types::PyAny), + quote!(impl ::pyo3::IntoPy<::pyo3::Py<::pyo3::types::PyAny>>), + ) + } + + fn try_map_external_type(_type_name: &str) -> Option { + // TODO: Handle types from other packages with Rust bindings here (e.g. NumPy) + None + } +} + +#[derive(Debug, Clone)] +struct OutputType { + owned: Rc, + borrowed: Rc, +} + +impl OutputType { + fn new(own: proc_macro2::TokenStream, bor: proc_macro2::TokenStream) -> Self { + Self { + owned: Rc::new(own), + borrowed: Rc::new(bor), + } + } + + fn new_identical(output_type: proc_macro2::TokenStream) -> Self { + let output_type = Rc::new(output_type); + Self { + owned: output_type.clone(), + borrowed: output_type, + } + } +} diff --git a/pyo3_bindgen_engine/src/typing/mod.rs b/pyo3_bindgen_engine/src/typing/mod.rs new file mode 100644 index 0000000..c5ec710 --- /dev/null +++ b/pyo3_bindgen_engine/src/typing/mod.rs @@ -0,0 +1,90 @@ +pub(crate) mod from_py; +pub(crate) mod into_rs; + +/// Enum that maps Python types to Rust types. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Type { + PyAny, + Other(String), + Unknown, + + // Primitives + PyBool, + PyByteArray, + PyBytes, + PyFloat, + PyLong, + PyString, + + // Enums + Optional(Box), + Union(Vec), + PyNone, + + // Collections + PyDict { + key_type: Box, + value_type: Box, + }, + PyFrozenSet(Box), + PyList(Box), + PySet(Box), + PyTuple(Vec), + + // Additional types - std + IpV4Addr, + IpV6Addr, + Path, + PySlice, + + // Additional types - num-complex + PyComplex, + + // Additional types - datetime + #[cfg(not(Py_LIMITED_API))] + PyDate, + #[cfg(not(Py_LIMITED_API))] + PyDateTime, + PyDelta, + #[cfg(not(Py_LIMITED_API))] + PyTime, + #[cfg(not(Py_LIMITED_API))] + PyTzInfo, + + // Python-specific types + PyCapsule, + PyCFunction, + #[cfg(not(Py_LIMITED_API))] + PyCode, + PyEllipsis, + #[cfg(all(not(Py_LIMITED_API), not(PyPy)))] + PyFrame, + PyFunction { + param_types: Vec, + return_annotation: Box, + }, + PyModule, + #[cfg(not(PyPy))] + PySuper, + PyTraceback, + #[allow(clippy::enum_variant_names)] + PyType, +} + +impl Type { + fn is_hashable(&self) -> bool { + matches!( + self, + Self::PyBool + | Self::IpV4Addr + | Self::IpV6Addr + | Self::Path + | Self::PyDelta + | Self::PyDict { .. } + | Self::PyFrozenSet(..) + | Self::PyLong + | Self::PySet(..) + | Self::PyString + ) + } +} diff --git a/pyo3_bindgen_engine/src/utils/error.rs b/pyo3_bindgen_engine/src/utils/error.rs new file mode 100644 index 0000000..f1f445e --- /dev/null +++ b/pyo3_bindgen_engine/src/utils/error.rs @@ -0,0 +1,18 @@ +/// Error type for `pyo3_bindgen` operations. +#[derive(thiserror::Error, Debug)] +pub enum PyBindgenError { + #[error(transparent)] + IoError(#[from] std::io::Error), + #[error(transparent)] + PyError(#[from] pyo3::PyErr), + #[error("Failed to downcast Python object")] + PyDowncastError, + #[error(transparent)] + SynError(#[from] syn::Error), +} + +impl From> for PyBindgenError { + fn from(value: pyo3::PyDowncastError) -> Self { + pyo3::PyErr::from(value).into() + } +} diff --git a/pyo3_bindgen_engine/src/utils/io.rs b/pyo3_bindgen_engine/src/utils/io.rs new file mode 100644 index 0000000..06f898f --- /dev/null +++ b/pyo3_bindgen_engine/src/utils/io.rs @@ -0,0 +1,43 @@ +use crate::Result; + +pub fn with_suppressed_python_output( + py: pyo3::Python, + suppress_stdout: bool, + suppress_stderr: bool, + f: impl FnOnce() -> Result, +) -> Result { + // If both stdout and stderr are suppressed, there's no need to do anything + if !suppress_stdout && !suppress_stderr { + return f(); + } + + let sys = py.import(pyo3::intern!(py, "sys"))?; + let stdout_ident = pyo3::intern!(py, "stdout"); + let stderr_ident = pyo3::intern!(py, "stderr"); + + // Record the original stdout and stderr + let original_stdout = sys.getattr(stdout_ident)?; + let original_stderr = sys.getattr(stderr_ident)?; + + // Suppress the output + let supressed_output = py.eval(r"lambda: type('SupressedOutput', (), {'write': lambda self, x: None, 'flush': lambda self: None})", None, None)?; + if suppress_stdout { + sys.setattr(stdout_ident, supressed_output)?; + } + if suppress_stderr { + sys.setattr(stderr_ident, supressed_output)?; + } + + // Run the function + let ret = f()?; + + // Restore the original stdout and stderr + if suppress_stdout { + sys.setattr(stdout_ident, original_stdout)?; + } + if suppress_stderr { + sys.setattr(stderr_ident, original_stderr)?; + } + + Ok(ret) +} diff --git a/pyo3_bindgen_engine/src/utils/mod.rs b/pyo3_bindgen_engine/src/utils/mod.rs new file mode 100644 index 0000000..09821bb --- /dev/null +++ b/pyo3_bindgen_engine/src/utils/mod.rs @@ -0,0 +1,5 @@ +//! Various utilities. + +pub mod error; +pub(crate) mod io; +pub mod result; diff --git a/pyo3_bindgen_engine/src/utils/result.rs b/pyo3_bindgen_engine/src/utils/result.rs new file mode 100644 index 0000000..35673bc --- /dev/null +++ b/pyo3_bindgen_engine/src/utils/result.rs @@ -0,0 +1,5 @@ +/// Result wrapper for `PyBindgenError`. +pub type PyBindgenResult = std::result::Result; + +/// Crate-local alias for `PyBindgenResult`. +pub(crate) type Result = PyBindgenResult; diff --git a/pyo3_bindgen_engine/tests/bindgen.rs b/pyo3_bindgen_engine/tests/bindgen.rs index befbc2c..d65e682 100644 --- a/pyo3_bindgen_engine/tests/bindgen.rs +++ b/pyo3_bindgen_engine/tests/bindgen.rs @@ -1,9 +1,9 @@ macro_rules! test_bindgen { { $(#[$meta:meta])* - $test_name:ident $(,)? - $(py)?$(python)? $(:)? $code_py:literal $(,)? - $(rs)?$(rust)? $(:)? $code_rs:literal $(,)? + $test_name:ident $(,)? + $(py)?$(python)?$(:)? $code_py:literal $(,)? + $(rs)?$(rust)?$(:)? $code_rs:literal $(,)? } => { #[test] $(#[$meta])* @@ -13,13 +13,16 @@ macro_rules! test_bindgen { const CODE_RS: &str = indoc::indoc! { $code_rs }; // Act - let bindings = pyo3_bindgen_engine::generate_bindings_from_str( - CODE_PY, - concat!("t_mod_", stringify!($test_name)), - ) - .unwrap(); + let bindings = pyo3_bindgen_engine::Codegen::default() + .module_from_str(CODE_PY, concat!("mod_", stringify!($test_name))) + .unwrap() + .generate() + .unwrap(); // Assert + fn format_code(input: &str) -> String { + prettyplease::unparse(&syn::parse_str(input).unwrap()) + } let generated_code = format_code(&bindings.to_string()); let target_code = format_code(CODE_RS); assert_eq!( @@ -30,192 +33,202 @@ macro_rules! test_bindgen { }; } -fn format_code(input: &str) -> String { - prettyplease::unparse(&syn::parse_str(input).unwrap()) -} - test_bindgen! { - test_bindgen_attribute + bindgen_property - py:r#" - t_const_float: float = 0.42 + py: r#" + my_property: float = 0.42 "# - rs:r#" - /// - #[allow(clippy::all, non_camel_case_types, non_snake_case, non_upper_case_globals, unused)] - mod t_mod_test_bindgen_attribute { - ///Getter for the `t_const_float` attribute - pub fn t_const_float<'py>(py: ::pyo3::marker::Python<'py>) -> ::pyo3::PyResult { - py.import(::pyo3::intern!(py, "t_mod_test_bindgen_attribute"))? - .getattr(::pyo3::intern!(py, "t_const_float"))? - .extract() + rs: r#" + #[allow( + clippy::all, + clippy::nursery, + clippy::pedantic, + non_camel_case_types, + non_snake_case, + non_upper_case_globals, + unused + )] + pub mod mod_bindgen_property { + pub fn my_property<'py>(py: ::pyo3::marker::Python<'py>) -> ::pyo3::PyResult { + ::pyo3::FromPyObject::extract( + py.import(::pyo3::intern!(py, "mod_bindgen_property"))? + .getattr(::pyo3::intern!(py, "my_property"))?, + ) } - ///Setter for the `t_const_float` attribute - pub fn set_t_const_float<'py>( + pub fn set_my_property<'py>( py: ::pyo3::marker::Python<'py>, - value: f64, + p_value: f64, ) -> ::pyo3::PyResult<()> { - py.import(::pyo3::intern!(py, "t_mod_test_bindgen_attribute"))? - .setattr(::pyo3::intern!(py, "t_const_float"), value)?; - Ok(()) + py.import(::pyo3::intern!(py, "mod_bindgen_property"))? + .setattr(::pyo3::intern!(py, "my_property"), p_value) } } "# } test_bindgen! { - test_bindgen_function + bindgen_function - py:r#" - def t_fn(t_arg1: str) -> int: - """t_docs""" + py: r#" + def my_function(my_arg1: str) -> int: + """My docstring for `my_function`""" ... "# - rs:r#" - /// + rs: r#" #[allow( clippy::all, + clippy::nursery, + clippy::pedantic, non_camel_case_types, non_snake_case, non_upper_case_globals, unused )] - mod t_mod_test_bindgen_function { - ///t_docs - pub fn t_fn<'py>( + pub mod mod_bindgen_function { + /// My docstring for `my_function` + pub fn my_function<'py>( py: ::pyo3::marker::Python<'py>, - t_arg1: &str, + p_my_arg1: &str, ) -> ::pyo3::PyResult { - let __internal_args = (); - let __internal_kwargs = ::pyo3::types::PyDict::new(py); - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg1"), t_arg1)?; - py.import(::pyo3::intern!(py, "t_mod_test_bindgen_function"))? - .call_method( - ::pyo3::intern!(py, "t_fn"), - __internal_args, - Some(__internal_kwargs), - )? - .extract() + ::pyo3::FromPyObject::extract( + py.import(::pyo3::intern!(py, "mod_bindgen_function"))? + .call_method1( + ::pyo3::intern!(py, "my_function"), + ::pyo3::types::PyTuple::new( + py, + [::pyo3::ToPyObject::to_object(&p_my_arg1, py)], + ), + )?, + ) } } "# } test_bindgen! { - test_bindgen_class + bindgen_class - py:r#" + py: r#" from typing import Dict, Optional - class t_class: - """t_docs""" - def __init__(self, t_arg1: str, t_arg2: Optional[int] = None): - """t_docs_init""" + class MyClass: + """My docstring for `MyClass`""" + def __init__(self, my_arg1: str, my_arg2: Optional[int] = None): + """My docstring for __init__""" ... - def t_method(self, t_arg1: Dict[str, int], **kwargs): - """t_docs_method""" + def my_method(self, my_arg1: Dict[str, int], **kwargs): + """My docstring for `my_method`""" ... @property - def t_prop(self) -> int: + def my_property(self) -> int: ... - @t_prop.setter - def t_prop(self, value: int): + @my_property.setter + def my_property(self, value: int): ... + + def my_function_with_class_param(my_arg1: MyClass): + ... + + def my_function_with_class_return() -> MyClass: + ... "# - rs:r#" - /// + rs: r#" #[allow( clippy::all, + clippy::nursery, + clippy::pedantic, non_camel_case_types, non_snake_case, non_upper_case_globals, unused )] - mod t_mod_test_bindgen_class { - ///t_docs + pub mod mod_bindgen_class { + /// My docstring for `MyClass` #[repr(transparent)] - pub struct t_class(::pyo3::PyAny); - ::pyo3::pyobject_native_type_named!(t_class); + pub struct MyClass(::pyo3::PyAny); + ::pyo3::pyobject_native_type_named!(MyClass); ::pyo3::pyobject_native_type_info!( - t_class, + MyClass, ::pyo3::pyobject_native_static_type_object!(::pyo3::ffi::PyBaseObject_Type), - ::std::option::Option::Some("t_mod_test_bindgen_classt_class") + ::std::option::Option::Some("mod_bindgen_class.MyClass") ); - ::pyo3::pyobject_native_type_extract!(t_class); + ::pyo3::pyobject_native_type_extract!(MyClass); #[automatically_derived] - impl t_class { - ///t_docs_init - pub fn __init__<'py>( - &'py self, + impl MyClass { + /// My docstring for __init__ + pub fn new<'py>( py: ::pyo3::marker::Python<'py>, - t_arg1: &str, - t_arg2: ::std::option::Option, - ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { - let __internal_args = (); - let __internal_kwargs = ::pyo3::types::PyDict::new(py); - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg1"), t_arg1)?; - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg2"), t_arg2)?; - self.call_method( - ::pyo3::intern!(py, "__init__"), - __internal_args, - Some(__internal_kwargs), - )? - .extract() + p_my_arg1: &str, + p_my_arg2: ::std::option::Option, + ) -> ::pyo3::PyResult<&'py Self> { + ::pyo3::FromPyObject::extract( + py.import(::pyo3::intern!(py, "mod_bindgen_class"))? + .getattr(::pyo3::intern!(py, "MyClass"))? + .call1(::pyo3::types::PyTuple::new( + py, + [ + ::pyo3::ToPyObject::to_object(&p_my_arg1, py), + ::pyo3::ToPyObject::to_object(&p_my_arg2, py), + ], + ))?, + ) } - ///t_docs_method - pub fn t_method<'py>( + /// My docstring for `my_method` + pub fn my_method<'py>( &'py self, py: ::pyo3::marker::Python<'py>, - t_arg1: &::std::collections::HashMap<::std::string::String, i64>, - kwargs: &'py ::pyo3::types::PyDict, + p_my_arg1: &::std::collections::HashMap<::std::string::String, i64>, + p_kwargs: impl ::pyo3::types::IntoPyDict, ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { - let __internal_args = (); - let __internal_kwargs = kwargs; - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg1"), t_arg1)?; - self.call_method( - ::pyo3::intern!(py, "t_method"), - __internal_args, - Some(__internal_kwargs), - )? - .extract() + let p_kwargs = ::pyo3::types::IntoPyDict::into_py_dict(p_kwargs, py); + ::pyo3::FromPyObject::extract(self.0.call_method( + ::pyo3::intern!(py, "my_method"), + ::pyo3::types::PyTuple::new(py, [::pyo3::ToPyObject::to_object(&p_my_arg1, py)]), + Some(p_kwargs), + )?) } - ///Getter for the `t_prop` attribute - pub fn t_prop<'py>( + pub fn my_property<'py>( &'py self, py: ::pyo3::marker::Python<'py>, ) -> ::pyo3::PyResult { - self.getattr(::pyo3::intern!(py, "t_prop"))?.extract() + self.0 + .getattr(::pyo3::intern!(py, "my_property"))? + .extract() } - ///Setter for the `t_prop` attribute - pub fn set_t_prop<'py>( + pub fn set_my_property<'py>( &'py self, py: ::pyo3::marker::Python<'py>, - value: i64, + p_value: i64, ) -> ::pyo3::PyResult<()> { - self.setattr(::pyo3::intern!(py, "t_prop"), value)?; - Ok(()) - } - ///t_docs_init - pub fn new<'py>( - &'py self, - py: ::pyo3::marker::Python<'py>, - t_arg1: &str, - t_arg2: ::std::option::Option, - ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { - let __internal_args = (); - let __internal_kwargs = ::pyo3::types::PyDict::new(py); - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg1"), t_arg1)?; - __internal_kwargs.set_item(::pyo3::intern!(py, "t_arg2"), t_arg2)?; - self.call_method( - ::pyo3::intern!(py, "__init__"), - __internal_args, - Some(__internal_kwargs), - )? - .extract() + self.0.setattr(::pyo3::intern!(py, "my_property"), p_value) } } + pub fn my_function_with_class_param<'py>( + py: ::pyo3::marker::Python<'py>, + p_my_arg1: &'py MyClass, + ) -> ::pyo3::PyResult<&'py ::pyo3::types::PyAny> { + ::pyo3::FromPyObject::extract( + py.import(::pyo3::intern!(py, "mod_bindgen_class"))? + .call_method1( + ::pyo3::intern!(py, "my_function_with_class_param"), + ::pyo3::types::PyTuple::new( + py, + [::pyo3::ToPyObject::to_object(&p_my_arg1, py)], + ), + )?, + ) + } + pub fn my_function_with_class_return<'py>( + py: ::pyo3::marker::Python<'py>, + ) -> ::pyo3::PyResult<&'py MyClass> { + ::pyo3::FromPyObject::extract( + py.import(::pyo3::intern!(py, "mod_bindgen_class"))? + .call_method0(::pyo3::intern!(py, "my_function_with_class_return"))?, + ) + } } "# } diff --git a/pyo3_bindgen_macros/src/lib.rs b/pyo3_bindgen_macros/src/lib.rs index 5fad22a..fe6954b 100644 --- a/pyo3_bindgen_macros/src/lib.rs +++ b/pyo3_bindgen_macros/src/lib.rs @@ -8,24 +8,41 @@ mod parser; /// /// Panics if the bindings cannot be generated. /// -/// # Example +/// # Examples /// -/// ```ignore -/// // use pyo3_bindgen::import_python; -/// use pyo3_bindgen_macros::import_python; +/// Here is a simple example of how to use the macro to generate bindings for the `sys` module. /// +/// ```ignore /// import_python!("sys"); /// pub use sys::*; +/// ``` /// +/// For consistency, the top-level package is always included in the generated bindings. +/// +/// ```ignore +/// import_python!("mod.submod.subsubmod"); +/// pub use mod::submod::subsubmod::*; +/// ``` +/// +/// Furthermore, the actual name of the package is always used regardless of how it is aliased. +/// +/// ```ignore /// import_python!("os.path"); -/// pub use path::*; +/// pub use posixpath::*; /// ``` #[proc_macro] pub fn import_python(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - let parser::Args { module_name_py } = syn::parse_macro_input!(input as parser::Args); + let parser::Args { module_name } = syn::parse_macro_input!(input as parser::Args); // Generate the bindings - pyo3_bindgen_engine::generate_bindings(&module_name_py) - .unwrap_or_else(|_| panic!("Failed to generate bindings for module: {module_name_py}")) + pyo3_bindgen_engine::Codegen::default() + .module_name(&module_name) + .unwrap_or_else(|err| { + panic!("Failed to parse the content of '{module_name}' Python module:\n{err}") + }) + .generate() + .unwrap_or_else(|err| { + panic!("Failed to generate bindings for '{module_name}' Python module:\n{err}") + }) .into() } diff --git a/pyo3_bindgen_macros/src/parser.rs b/pyo3_bindgen_macros/src/parser.rs index 1110419..5ed890e 100644 --- a/pyo3_bindgen_macros/src/parser.rs +++ b/pyo3_bindgen_macros/src/parser.rs @@ -5,16 +5,16 @@ use syn::{ LitStr, }; -/// Arguments for the `import_python` procedural macro +/// Arguments for the `import_python` procedural macro. pub struct Args { - /// Name of the Python module to generate bindings for - pub module_name_py: String, + /// Name of the Python module for which to generate the bindings. + pub module_name: String, } impl Parse for Args { fn parse(input: ParseStream) -> Result { // Python module name might contain dots, so it is parsed as a string literal - let module_name_py = input.parse::()?.value(); - Ok(Args { module_name_py }) + let module_name = input.parse::()?.value(); + Ok(Args { module_name }) } }