From c4994b3093e02231339d22e12be08418b2af781f Mon Sep 17 00:00:00 2001 From: Seunghoon Lee Date: Wed, 15 Jan 2025 15:11:27 +0900 Subject: [PATCH] ZLUDA v3.8.7 (#66) * Add dummy cuFFTW library. * Bump version. * Implement fft functions required to run torch fftn, ifftn, and rfftn. --- Cargo.lock | 6 +- Cargo.toml | 131 +++---- hipblaslt-sys/src/lib.rs | 1 + zluda_blas/src/lib.rs | 107 +----- zluda_fft/src/cufft.rs | 4 +- zluda_fft/src/cufftxt.rs | 19 +- zluda_fft/src/lib.rs | 138 +++++++- zluda_fftw/Cargo.toml | 15 + zluda_fftw/README | 2 + zluda_fftw/src/cufftw.rs | 736 +++++++++++++++++++++++++++++++++++++++ zluda_fftw/src/lib.rs | 3 + zluda_inject/src/bin.rs | 2 +- zluda_rtc/src/lib.rs | 72 +++- zluda_rtc/src/nvrtc.rs | 6 +- 14 files changed, 1049 insertions(+), 193 deletions(-) create mode 100644 zluda_fftw/Cargo.toml create mode 100644 zluda_fftw/README create mode 100644 zluda_fftw/src/cufftw.rs create mode 100644 zluda_fftw/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 39309981..2805aa7a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2140,7 +2140,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2523,6 +2523,10 @@ dependencies = [ "zluda_dark_api", ] +[[package]] +name = "zluda_fftw" +version = "0.0.0" + [[package]] name = "zluda_inject" version = "0.0.0" diff --git a/Cargo.toml b/Cargo.toml index e4a65c6c..30b30f05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,65 +1,66 @@ -[workspace] - -resolver = "2" - -# Remember to also update the project's Cargo.toml -# if it's a top-level project -members = [ - "atiadlxx-sys", - "comgr", - "cuda_base", - "cuda_types", - "detours-sys", - "ext/llvm-sys.rs", - "hip_common", - "hip_runtime-sys", - "hipblaslt-sys", - "hipfft-sys", - "hiprt-sys", - "miopen-sys", - "offline_compiler", - "optix_base", - "optix_dump", - "process_address_table", - "ptx", - "rocblas-sys", - "rocm_smi-sys", - "rocsparse-sys", - "xtask", - "zluda", - "zluda_api", - "zluda_blas", - "zluda_blaslt", - "zluda_ccl", - "zluda_dark_api", - "zluda_dnn", - "zluda_dump", - "zluda_fft", - "zluda_inject", - "zluda_lib", - "zluda_llvm", - "zluda_ml", - "zluda_redirect", - "zluda_rt", - "zluda_rtc", - "zluda_runtime", - "zluda_sparse", -] - -# Cargo does not support OS-specific or profile-specific -# targets. We keep list here to bare minimum and rely on xtask -default-members = [ - "zluda_lib", - "zluda_ml", - "zluda_inject", - "zluda_redirect" -] - -[profile.dev.package.blake3] -opt-level = 3 - -[profile.dev.package.lz4-sys] -opt-level = 3 - -[profile.dev.package.xtask] -opt-level = 2 +[workspace] + +resolver = "2" + +# Remember to also update the project's Cargo.toml +# if it's a top-level project +members = [ + "atiadlxx-sys", + "comgr", + "cuda_base", + "cuda_types", + "detours-sys", + "ext/llvm-sys.rs", + "hip_common", + "hip_runtime-sys", + "hipblaslt-sys", + "hipfft-sys", + "hiprt-sys", + "miopen-sys", + "offline_compiler", + "optix_base", + "optix_dump", + "process_address_table", + "ptx", + "rocblas-sys", + "rocm_smi-sys", + "rocsparse-sys", + "xtask", + "zluda", + "zluda_api", + "zluda_blas", + "zluda_blaslt", + "zluda_ccl", + "zluda_dark_api", + "zluda_dnn", + "zluda_dump", + "zluda_fft", + "zluda_fftw", + "zluda_inject", + "zluda_lib", + "zluda_llvm", + "zluda_ml", + "zluda_redirect", + "zluda_rt", + "zluda_rtc", + "zluda_runtime", + "zluda_sparse", +] + +# Cargo does not support OS-specific or profile-specific +# targets. We keep list here to bare minimum and rely on xtask +default-members = [ + "zluda_lib", + "zluda_ml", + "zluda_inject", + "zluda_redirect" +] + +[profile.dev.package.blake3] +opt-level = 3 + +[profile.dev.package.lz4-sys] +opt-level = 3 + +[profile.dev.package.xtask] +opt-level = 2 diff --git a/hipblaslt-sys/src/lib.rs b/hipblaslt-sys/src/lib.rs index 4acbf932..f46a12cc 100644 --- a/hipblaslt-sys/src/lib.rs +++ b/hipblaslt-sys/src/lib.rs @@ -11,6 +11,7 @@ impl hipblasOperation_t { impl hipblasOperation_t { pub const HIPBLAS_OP_C: hipblasOperation_t = hipblasOperation_t(113); } +#[allow(non_camel_case_types)] #[repr(transparent)] #[derive(Copy, Clone, Hash, PartialEq, Eq)] pub struct hipblasOperation_t(pub ::std::os::raw::c_int); diff --git a/zluda_blas/src/lib.rs b/zluda_blas/src/lib.rs index eba9f2b0..6afccdef 100644 --- a/zluda_blas/src/lib.rs +++ b/zluda_blas/src/lib.rs @@ -1,6 +1,8 @@ -#![allow(warnings)] +#[allow(warnings)] mod common; +#[allow(warnings)] mod cublas; +#[allow(warnings)] mod cublasxt; pub use common::*; @@ -13,7 +15,7 @@ use rocsolver_sys::{ rocsolver_cgetrf_batched, rocsolver_cgetri_outofplace_batched, rocsolver_dgetrs_batched, rocsolver_sgetrs_batched, rocsolver_zgetrf_batched, rocsolver_zgetri_outofplace_batched, }; -use std::{mem, ptr}; +use std::ptr; #[cfg(debug_assertions)] pub(crate) fn unsupported() -> cublasStatus_t { @@ -223,61 +225,20 @@ unsafe fn set_stream(handle: cublasHandle_t, stream_id: cudaStream_t) -> cublasS ) -> CUresult>(b"cuGetExportTable\0") .unwrap(); let mut export_table = ptr::null(); - (cu_get_export_table)(&mut export_table, &zluda_dark_api::ZludaExt::GUID); + assert_eq!( + (cu_get_export_table)(&mut export_table, &zluda_dark_api::ZludaExt::GUID), + CUresult::CUDA_SUCCESS + ); let zluda_ext = zluda_dark_api::ZludaExt::new(export_table); let stream: Result<_, _> = zluda_ext.get_hip_stream(stream_id as _).into(); to_cuda(rocblas_set_stream(handle as _, stream.unwrap() as _)) } -fn set_math_mode(handle: cublasHandle_t, mode: cublasMath_t) -> cublasStatus_t { +fn set_math_mode(_handle: cublasHandle_t, _mode: cublasMath_t) -> cublasStatus_t { // llama.cpp uses CUBLAS_TF32_TENSOR_OP_MATH cublasStatus_t::CUBLAS_STATUS_SUCCESS } -unsafe fn sgemm( - transa: std::ffi::c_char, - transb: std::ffi::c_char, - m: i32, - n: i32, - k: i32, - alpha: f32, - a: *const f32, - lda: i32, - b: *const f32, - ldb: i32, - beta: f32, - c: *mut f32, - ldc: i32, -) -> cublasStatus_t { - let mut handle = mem::zeroed(); - let mut status = to_cuda(rocblas_create_handle(handle)); - if status != cublasStatus_t::CUBLAS_STATUS_SUCCESS { - return status; - } - let transa = op_from_cuda(cublasOperation_t(transa as _)); - let transb = op_from_cuda(cublasOperation_t(transb as _)); - status = to_cuda(rocblas_sgemm( - handle.cast(), - transa, - transb, - m, - n, - k, - &alpha, - a, - lda, - b, - ldb, - &beta, - c, - ldc, - )); - if status != cublasStatus_t::CUBLAS_STATUS_SUCCESS { - return status; - } - to_cuda(rocblas_destroy_handle(*handle)) -} - unsafe fn sgemm_v2( handle: cublasHandle_t, transa: cublasOperation_t, @@ -495,7 +456,7 @@ unsafe fn gemm_ex( )) } -fn to_algo(algo: cublasGemmAlgo_t) -> rocblas_gemm_algo_ { +fn to_algo(_algo: cublasGemmAlgo_t) -> rocblas_gemm_algo_ { // only option rocblas_gemm_algo::rocblas_gemm_algo_standard } @@ -807,7 +768,7 @@ unsafe fn sgetrs_batched( dev_ipiv: *const i32, b: *const *mut f32, ldb: i32, - info: *mut i32, + _info: *mut i32, batch_size: i32, ) -> cublasStatus_t { let trans = op_from_cuda_for_solver(trans); @@ -837,7 +798,7 @@ unsafe fn dgetrs_batched( dev_ipiv: *const i32, b: *const *mut f64, ldb: i32, - info: *mut i32, + _info: *mut i32, batch_size: i32, ) -> cublasStatus_t { let trans = op_from_cuda_for_solver(trans); @@ -1048,50 +1009,6 @@ unsafe fn dger( )) } -unsafe fn dgemm( - transa: std::ffi::c_char, - transb: std::ffi::c_char, - m: i32, - n: i32, - k: i32, - alpha: f64, - a: *const f64, - lda: i32, - b: *const f64, - ldb: i32, - beta: f64, - c: *mut f64, - ldc: i32, -) -> cublasStatus_t { - let mut handle = mem::zeroed(); - let mut status = to_cuda(rocblas_create_handle(handle)); - if status != cublasStatus_t::CUBLAS_STATUS_SUCCESS { - return status; - } - let transa = op_from_cuda(cublasOperation_t(transa as _)); - let transb = op_from_cuda(cublasOperation_t(transb as _)); - status = to_cuda(rocblas_dgemm( - handle.cast(), - transa, - transb, - m, - n, - k, - &alpha, - a, - lda, - b, - ldb, - &beta, - c, - ldc, - )); - if status != cublasStatus_t::CUBLAS_STATUS_SUCCESS { - return status; - } - to_cuda(rocblas_destroy_handle(*handle)) -} - unsafe fn dgemm_v2( handle: *mut cublasContext, transa: cublasOperation_t, diff --git a/zluda_fft/src/cufft.rs b/zluda_fft/src/cufft.rs index 9fb5dd9d..7a1b61f7 100644 --- a/zluda_fft/src/cufft.rs +++ b/zluda_fft/src/cufft.rs @@ -380,7 +380,7 @@ pub unsafe extern "system" fn cufftSetWorkArea( plan: cufftHandle, workArea: *mut ::std::os::raw::c_void, ) -> cufftResult { - crate::unsupported() + crate::set_work_area(plan, workArea) } #[no_mangle] @@ -388,7 +388,7 @@ pub unsafe extern "system" fn cufftSetAutoAllocation( plan: cufftHandle, autoAllocate: ::std::os::raw::c_int, ) -> cufftResult { - crate::unsupported() + crate::set_auto_allocation(plan, autoAllocate) } #[no_mangle] diff --git a/zluda_fft/src/cufftxt.rs b/zluda_fft/src/cufftxt.rs index 6aa21914..a97506ae 100644 --- a/zluda_fft/src/cufftxt.rs +++ b/zluda_fft/src/cufftxt.rs @@ -376,7 +376,22 @@ pub unsafe extern "system" fn cufftXtMakePlanMany( workSize: *mut usize, executiontype: cudaDataType, ) -> cufftResult { - crate::unsupported() + crate::xt_make_plan_many( + plan, + rank, + n, + inembed, + istride, + idist, + inputtype, + onembed, + ostride, + odist, + outputtype, + batch, + workSize, + executiontype, + ) } #[no_mangle] @@ -406,7 +421,7 @@ pub unsafe extern "system" fn cufftXtExec( output: *mut ::std::os::raw::c_void, direction: ::std::os::raw::c_int, ) -> cufftResult { - crate::unsupported() + crate::xt_exec(plan, input, output, direction) } #[no_mangle] diff --git a/zluda_fft/src/lib.rs b/zluda_fft/src/lib.rs index 20962c15..14dc596f 100644 --- a/zluda_fft/src/lib.rs +++ b/zluda_fft/src/lib.rs @@ -22,11 +22,34 @@ pub(crate) fn unsupported() -> cufftResult { cufftResult::CUFFT_NOT_SUPPORTED } +#[no_mangle] +pub extern "system" fn cufftLeaveCS() { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn cufftEnterCS() { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn cufftMakePlanGuru64() { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn cufftXtMakePlanGuru64() { + unimplemented!() +} + lazy_static! { static ref PLANS: Mutex> = Mutex::new(Slab::new()); } -struct Plan(hipfftHandle); +struct Plan { + handle: hipfftHandle, + xt_typ: Option, +} unsafe impl Send for Plan {} unsafe fn create(handle: *mut cufftHandle) -> cufftResult { @@ -37,7 +60,10 @@ unsafe fn create(handle: *mut cufftHandle) -> cufftResult { } let plan_key = { let mut plans = PLANS.lock().unwrap(); - plans.insert(Plan(hip_handle)) + plans.insert(Plan { + handle: hip_handle, + xt_typ: None, + }) }; *handle = plan_key as i32; cufftResult::CUFFT_SUCCESS @@ -86,11 +112,23 @@ fn cuda_type(type_: cufftType) -> hipfftType_t { } } +fn xt_type(input: cudaDataType, output: cudaDataType) -> cufftType { + match (input, output) { + (cudaDataType::CUDA_R_32F, cudaDataType::CUDA_C_32F) => cufftType::CUFFT_R2C, + (cudaDataType::CUDA_C_32F, cudaDataType::CUDA_R_32F) => cufftType::CUFFT_C2R, + (cudaDataType::CUDA_C_32F, cudaDataType::CUDA_C_32F) => cufftType::CUFFT_C2C, + _ => panic!( + "[ZLUDA] Unknown type combination: ({}, {})", + input.0, output.0 + ), + } +} + fn get_hip_plan(plan: cufftHandle) -> Result { let plans = PLANS.lock().unwrap(); plans .get(plan as usize) - .map(|p| p.0) + .map(|p| p.handle) .ok_or(cufftResult_t::CUFFT_INVALID_PLAN) } @@ -159,7 +197,10 @@ unsafe fn plan_many( } let plan_key = { let mut plans = PLANS.lock().unwrap(); - plans.insert(Plan(hip_plan)) + plans.insert(Plan { + handle: hip_plan, + xt_typ: None, + }) }; *plan = plan_key as i32; result @@ -185,6 +226,22 @@ unsafe fn set_stream(plan: i32, stream: *mut cufft::CUstream_st) -> cufftResult_ to_cuda(hipfftSetStream(hip_plan, stream.unwrap() as _)) } +unsafe fn set_work_area(plan: i32, work_area: *mut ::std::os::raw::c_void) -> cufftResult_t { + let plan = match get_hip_plan(plan) { + Ok(p) => p, + Err(e) => return e, + }; + to_cuda(hipfftSetWorkArea(plan, work_area)) +} + +unsafe fn set_auto_allocation(plan: i32, auto_allocate: i32) -> cufftResult_t { + let plan = match get_hip_plan(plan) { + Ok(p) => p, + Err(e) => return e, + }; + to_cuda(hipfftSetAutoAllocation(plan, auto_allocate)) +} + unsafe fn exec_c2c( plan: i32, idata: *mut cufft::float2, @@ -246,8 +303,79 @@ unsafe fn plan_3d(plan: *mut i32, nx: i32, ny: i32, nz: i32, type_: cufftType) - } let plan_key = { let mut plans = PLANS.lock().unwrap(); - plans.insert(Plan(hip_plan)) + plans.insert(Plan { + handle: hip_plan, + xt_typ: None, + }) }; *plan = plan_key as i32; result } + +fn set_xt_type(plan: cufftHandle, typ: cufftType_t) -> Result<(), cufftResult_t> { + let mut plans = PLANS.lock().unwrap(); + plans + .get_mut(plan as usize) + .map(|x| { + x.xt_typ = Some(typ); + }) + .ok_or(cufftResult_t::CUFFT_INVALID_PLAN) +} + +fn get_xt_type(plan: cufftHandle) -> Result { + let plans = PLANS.lock().unwrap(); + plans + .get(plan as usize) + .map(|x| x.xt_typ) + .flatten() + .ok_or(cufftResult_t::CUFFT_INVALID_PLAN) +} + +unsafe fn xt_make_plan_many( + plan: i32, + rank: i32, + n: *mut i64, + inembed: *mut i64, + istride: i64, + idist: i64, + inputtype: cudaDataType, + onembed: *mut i64, + ostride: i64, + odist: i64, + outputtype: cudaDataType, + batch: i64, + work_size: *mut usize, + _executiontype: cudaDataType, +) -> cufftResult_t { + let typ = xt_type(inputtype, outputtype); + if let Err(result) = set_xt_type(plan, typ) { + return result; + } + let plan = match get_hip_plan(plan) { + Ok(p) => p, + Err(e) => return e, + }; + let typ = cuda_type(typ); + to_cuda(hipfftMakePlanMany64( + plan, rank, n, inembed, istride, idist, onembed, ostride, odist, typ, batch, work_size, + )) +} + +unsafe fn xt_exec( + plan: i32, + input: *mut ::std::os::raw::c_void, + output: *mut ::std::os::raw::c_void, + direction: i32, +) -> cufftResult_t { + let typ = match get_xt_type(plan) { + Ok(t) => t, + Err(e) => return e, + }; + match typ { + cufftType_t::CUFFT_R2C => exec_r2c(plan, input.cast(), output.cast()), + cufftType_t::CUFFT_C2R => exec_c2r(plan, input.cast(), output.cast()), + cufftType_t::CUFFT_C2C => exec_c2c(plan, input.cast(), output.cast(), direction), + cufftType_t::CUFFT_Z2Z => exec_z2z(plan, input.cast(), output.cast(), direction), + _ => unimplemented!(), + } +} diff --git a/zluda_fftw/Cargo.toml b/zluda_fftw/Cargo.toml new file mode 100644 index 00000000..31d81ad9 --- /dev/null +++ b/zluda_fftw/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "zluda_fftw" +version = "0.0.0" +authors = ["Seunghoon Lee "] +edition = "2021" + +[lib] +name = "cufftw" +crate-type = ["cdylib"] + +[dependencies] + +[package.metadata.zluda] +linux_names = ["libcufftw.so.10"] +dump_names = ["libcufftw.so"] diff --git a/zluda_fftw/README b/zluda_fftw/README new file mode 100644 index 00000000..da857fb4 --- /dev/null +++ b/zluda_fftw/README @@ -0,0 +1,2 @@ +bindgen /usr/local/cuda/targets/x86_64-linux/include/cufftw.h -o src/cufftw.rs --allowlist-function="^fftw.*" --default-enum-style=newtype --no-layout-tests --no-derive-debug -- -I/usr/local/cuda/targets/x86_64-linux/include +sed -i -e 's/extern "C" {//g' -e 's/-> fftw_plan;/-> fftw_plan { unimplemented!()/g' -e 's/-> fftwf_plan;/-> fftwf_plan { unimplemented!()/g' -e 's/pub fn /#[no_mangle] pub extern "system" fn /g' src/cufftw.rs \ No newline at end of file diff --git a/zluda_fftw/src/cufftw.rs b/zluda_fftw/src/cufftw.rs new file mode 100644 index 00000000..35b1bc65 --- /dev/null +++ b/zluda_fftw/src/cufftw.rs @@ -0,0 +1,736 @@ +/* automatically generated by rust-bindgen 0.69.4 */ + +#[repr(C)] +#[derive(Copy, Clone)] +pub struct _iobuf { + pub _Placeholder: *mut ::std::os::raw::c_void, +} +pub type FILE = _iobuf; +pub type fftw_complex = [f64; 2usize]; +pub type fftwf_complex = [f32; 2usize]; +pub type fftw_plan = *mut ::std::os::raw::c_void; +pub type fftwf_plan = *mut ::std::os::raw::c_void; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct fftw_iodim { + pub n: ::std::os::raw::c_int, + pub is: ::std::os::raw::c_int, + pub os: ::std::os::raw::c_int, +} +pub type fftwf_iodim = fftw_iodim; +#[repr(C)] +#[derive(Copy, Clone)] +pub struct fftw_iodim64 { + pub n: isize, + pub is: isize, + pub os: isize, +} +pub type fftwf_iodim64 = fftw_iodim64; + +#[no_mangle] +pub extern "system" fn fftw_plan_dft_1d( + n: ::std::os::raw::c_int, + in_: *mut fftw_complex, + out: *mut fftw_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_dft_2d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + in_: *mut fftw_complex, + out: *mut fftw_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_dft_3d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + n2: ::std::os::raw::c_int, + in_: *mut fftw_complex, + out: *mut fftw_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_dft( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + in_: *mut fftw_complex, + out: *mut fftw_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_dft_r2c_1d( + n: ::std::os::raw::c_int, + in_: *mut f64, + out: *mut fftw_complex, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_dft_r2c_2d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + in_: *mut f64, + out: *mut fftw_complex, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_dft_r2c_3d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + n2: ::std::os::raw::c_int, + in_: *mut f64, + out: *mut fftw_complex, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_dft_r2c( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + in_: *mut f64, + out: *mut fftw_complex, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_dft_c2r_1d( + n: ::std::os::raw::c_int, + in_: *mut fftw_complex, + out: *mut f64, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_dft_c2r_2d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + in_: *mut fftw_complex, + out: *mut f64, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_dft_c2r_3d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + n2: ::std::os::raw::c_int, + in_: *mut fftw_complex, + out: *mut f64, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_dft_c2r( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + in_: *mut fftw_complex, + out: *mut f64, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_many_dft( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + batch: ::std::os::raw::c_int, + in_: *mut fftw_complex, + inembed: *const ::std::os::raw::c_int, + istride: ::std::os::raw::c_int, + idist: ::std::os::raw::c_int, + out: *mut fftw_complex, + onembed: *const ::std::os::raw::c_int, + ostride: ::std::os::raw::c_int, + odist: ::std::os::raw::c_int, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_many_dft_r2c( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + batch: ::std::os::raw::c_int, + in_: *mut f64, + inembed: *const ::std::os::raw::c_int, + istride: ::std::os::raw::c_int, + idist: ::std::os::raw::c_int, + out: *mut fftw_complex, + onembed: *const ::std::os::raw::c_int, + ostride: ::std::os::raw::c_int, + odist: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_many_dft_c2r( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + batch: ::std::os::raw::c_int, + in_: *mut fftw_complex, + inembed: *const ::std::os::raw::c_int, + istride: ::std::os::raw::c_int, + idist: ::std::os::raw::c_int, + out: *mut f64, + onembed: *const ::std::os::raw::c_int, + ostride: ::std::os::raw::c_int, + odist: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_guru_dft( + rank: ::std::os::raw::c_int, + dims: *const fftw_iodim, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftw_iodim, + in_: *mut fftw_complex, + out: *mut fftw_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_guru_dft_r2c( + rank: ::std::os::raw::c_int, + dims: *const fftw_iodim, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftw_iodim, + in_: *mut f64, + out: *mut fftw_complex, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_guru_dft_c2r( + rank: ::std::os::raw::c_int, + dims: *const fftw_iodim, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftw_iodim, + in_: *mut fftw_complex, + out: *mut f64, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_execute(plan: fftw_plan) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_execute_dft( + plan: fftw_plan, + idata: *mut fftw_complex, + odata: *mut fftw_complex, +) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_execute_dft_r2c( + plan: fftw_plan, + idata: *mut f64, + odata: *mut fftw_complex, +) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_execute_dft_c2r( + plan: fftw_plan, + idata: *mut fftw_complex, + odata: *mut f64, +) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft_1d( + n: ::std::os::raw::c_int, + in_: *mut fftwf_complex, + out: *mut fftwf_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft_2d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + in_: *mut fftwf_complex, + out: *mut fftwf_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft_3d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + n2: ::std::os::raw::c_int, + in_: *mut fftwf_complex, + out: *mut fftwf_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + in_: *mut fftwf_complex, + out: *mut fftwf_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft_r2c_1d( + n: ::std::os::raw::c_int, + in_: *mut f32, + out: *mut fftwf_complex, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft_r2c_2d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + in_: *mut f32, + out: *mut fftwf_complex, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft_r2c_3d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + n2: ::std::os::raw::c_int, + in_: *mut f32, + out: *mut fftwf_complex, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft_r2c( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + in_: *mut f32, + out: *mut fftwf_complex, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft_c2r_1d( + n: ::std::os::raw::c_int, + in_: *mut fftwf_complex, + out: *mut f32, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft_c2r_2d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + in_: *mut fftwf_complex, + out: *mut f32, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft_c2r_3d( + n0: ::std::os::raw::c_int, + n1: ::std::os::raw::c_int, + n2: ::std::os::raw::c_int, + in_: *mut fftwf_complex, + out: *mut f32, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_dft_c2r( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + in_: *mut fftwf_complex, + out: *mut f32, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_many_dft( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + batch: ::std::os::raw::c_int, + in_: *mut fftwf_complex, + inembed: *const ::std::os::raw::c_int, + istride: ::std::os::raw::c_int, + idist: ::std::os::raw::c_int, + out: *mut fftwf_complex, + onembed: *const ::std::os::raw::c_int, + ostride: ::std::os::raw::c_int, + odist: ::std::os::raw::c_int, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_many_dft_r2c( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + batch: ::std::os::raw::c_int, + in_: *mut f32, + inembed: *const ::std::os::raw::c_int, + istride: ::std::os::raw::c_int, + idist: ::std::os::raw::c_int, + out: *mut fftwf_complex, + onembed: *const ::std::os::raw::c_int, + ostride: ::std::os::raw::c_int, + odist: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_many_dft_c2r( + rank: ::std::os::raw::c_int, + n: *const ::std::os::raw::c_int, + batch: ::std::os::raw::c_int, + in_: *mut fftwf_complex, + inembed: *const ::std::os::raw::c_int, + istride: ::std::os::raw::c_int, + idist: ::std::os::raw::c_int, + out: *mut f32, + onembed: *const ::std::os::raw::c_int, + ostride: ::std::os::raw::c_int, + odist: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_guru_dft( + rank: ::std::os::raw::c_int, + dims: *const fftwf_iodim, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftwf_iodim, + in_: *mut fftwf_complex, + out: *mut fftwf_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_guru_dft_r2c( + rank: ::std::os::raw::c_int, + dims: *const fftwf_iodim, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftwf_iodim, + in_: *mut f32, + out: *mut fftwf_complex, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_guru_dft_c2r( + rank: ::std::os::raw::c_int, + dims: *const fftwf_iodim, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftwf_iodim, + in_: *mut fftwf_complex, + out: *mut f32, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_execute(plan: fftw_plan) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_execute_dft( + plan: fftwf_plan, + idata: *mut fftwf_complex, + odata: *mut fftwf_complex, +) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_execute_dft_r2c( + plan: fftwf_plan, + idata: *mut f32, + odata: *mut fftwf_complex, +) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_execute_dft_c2r( + plan: fftwf_plan, + idata: *mut fftwf_complex, + odata: *mut f32, +) { + unimplemented!() +} + +#[doc = " CUFFTW 64-bit Guru Interface\n dp"] +#[no_mangle] +pub extern "system" fn fftw_plan_guru64_dft( + rank: ::std::os::raw::c_int, + dims: *const fftw_iodim64, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftw_iodim64, + in_: *mut fftw_complex, + out: *mut fftw_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_guru64_dft_r2c( + rank: ::std::os::raw::c_int, + dims: *const fftw_iodim64, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftw_iodim64, + in_: *mut f64, + out: *mut fftw_complex, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_plan_guru64_dft_c2r( + rank: ::std::os::raw::c_int, + dims: *const fftw_iodim64, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftw_iodim64, + in_: *mut fftw_complex, + out: *mut f64, + flags: ::std::os::raw::c_uint, +) -> fftw_plan { + unimplemented!() +} + +#[doc = " sp"] +#[no_mangle] +pub extern "system" fn fftwf_plan_guru64_dft( + rank: ::std::os::raw::c_int, + dims: *const fftwf_iodim64, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftwf_iodim64, + in_: *mut fftwf_complex, + out: *mut fftwf_complex, + sign: ::std::os::raw::c_int, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_guru64_dft_r2c( + rank: ::std::os::raw::c_int, + dims: *const fftwf_iodim64, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftwf_iodim64, + in_: *mut f32, + out: *mut fftwf_complex, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_plan_guru64_dft_c2r( + rank: ::std::os::raw::c_int, + dims: *const fftwf_iodim64, + batch_rank: ::std::os::raw::c_int, + batch_dims: *const fftwf_iodim64, + in_: *mut fftwf_complex, + out: *mut f32, + flags: ::std::os::raw::c_uint, +) -> fftwf_plan { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_malloc(n: usize) -> *mut ::std::os::raw::c_void { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_malloc(n: usize) -> *mut ::std::os::raw::c_void { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_free(pointer: *mut ::std::os::raw::c_void) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_free(pointer: *mut ::std::os::raw::c_void) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_export_wisdom_to_file(output_file: *mut FILE) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_export_wisdom_to_file(output_file: *mut FILE) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_import_wisdom_from_file(input_file: *mut FILE) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_import_wisdom_from_file(input_file: *mut FILE) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_print_plan(plan: fftw_plan) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_print_plan(plan: fftwf_plan) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_set_timelimit(seconds: f64) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_set_timelimit(seconds: f64) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_cost(plan: fftw_plan) -> f64 { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_cost(plan: fftw_plan) -> f64 { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_flops(plan: fftw_plan, add: *mut f64, mul: *mut f64, fma: *mut f64) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_flops(plan: fftw_plan, add: *mut f64, mul: *mut f64, fma: *mut f64) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_destroy_plan(plan: fftw_plan) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_destroy_plan(plan: fftwf_plan) { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftw_cleanup() { + unimplemented!() +} + +#[no_mangle] +pub extern "system" fn fftwf_cleanup() { + unimplemented!() +} diff --git a/zluda_fftw/src/lib.rs b/zluda_fftw/src/lib.rs new file mode 100644 index 00000000..9170fe35 --- /dev/null +++ b/zluda_fftw/src/lib.rs @@ -0,0 +1,3 @@ +#[allow(warnings)] +mod cufftw; +pub use cufftw::*; diff --git a/zluda_inject/src/bin.rs b/zluda_inject/src/bin.rs index fd0e8aaf..bec96db4 100644 --- a/zluda_inject/src/bin.rs +++ b/zluda_inject/src/bin.rs @@ -83,7 +83,7 @@ pub fn main_impl() -> Result<(), Box> { match argument.to_str() { Some(argument) => match argument { "--version" => { - println!("ZLUDA 3.8.6"); + println!("ZLUDA 3.8.7"); process::exit(0); } "--" => break, diff --git a/zluda_rtc/src/lib.rs b/zluda_rtc/src/lib.rs index df2b329d..971161ed 100644 --- a/zluda_rtc/src/lib.rs +++ b/zluda_rtc/src/lib.rs @@ -1,4 +1,4 @@ -#![allow(warnings)] +#[allow(warnings)] mod nvrtc; use std::{env, ffi::c_char, ptr, result, sync::Mutex}; @@ -105,9 +105,16 @@ impl Nvrtc { prog: nvrtcProgram, num_options: i32, options: *const *const c_char, - ) -> Result, nvrtcResult> { + ) -> Result<(), nvrtcResult> { + let program = unsafe { Program::from(prog) }; + if program.is_none() { + return Err(nvrtcResult::NVRTC_ERROR_INVALID_PROGRAM); + } + let program = program.unwrap(); + let nvrtc = self.get()?; call!(nvrtc.nvrtcCompileProgram(prog, num_options, options)); + let mut size = 0; call!(nvrtc.nvrtcGetPTXSize(prog, &mut size)); let mut ptx = { @@ -115,14 +122,17 @@ impl Nvrtc { unsafe { ptx.assume_init() } }; call!(nvrtc.nvrtcGetPTX(prog, ptx.as_mut_ptr())); - Ok(ptx) - } + program.set_ptx(ptx); - pub fn get_program_log_size(&self, prog: nvrtcProgram) -> Result { - let nvrtc = self.get()?; - let mut log_size_ret = 0; - call!(nvrtc.nvrtcGetProgramLogSize(prog, &mut log_size_ret)); - Ok(log_size_ret) + call!(nvrtc.nvrtcGetProgramLogSize(prog, &mut size)); + let mut log = { + let log = Box::<[c_char]>::new_uninit_slice(size); + unsafe { log.assume_init() } + }; + call!(nvrtc.nvrtcGetProgramLog(prog, log.as_mut_ptr())); + program.set_log(log); + + Ok(()) } } @@ -143,11 +153,16 @@ const NVRTC_VERSION_MINOR: i32 = 2; struct Program { base: nvrtcProgram, ptx: Option>, + log: Option>, } impl Program { fn new(base: nvrtcProgram) -> Self { - Program { base, ptx: None } + Program { + base, + ptx: None, + log: None, + } } unsafe fn from<'a>(ptr: nvrtcProgram) -> Option<&'a mut Program> { @@ -157,6 +172,10 @@ impl Program { fn set_ptx(&mut self, ptx: Box<[c_char]>) { self.ptx = Some(ptx); } + + fn set_log(&mut self, log: Box<[c_char]>) { + self.log = Some(log); + } } trait IntoBox { @@ -227,9 +246,8 @@ fn compile_program( nvrtc .compile_program(prog.base, num_options, options) - .then(|ptx| { - prog.set_ptx(ptx); - }) + .err() + .unwrap_or(nvrtcResult::NVRTC_SUCCESS) } unsafe fn get_ptx_size(prog: nvrtcProgram, code_size_ret: *mut usize) -> nvrtcResult { @@ -256,10 +274,26 @@ unsafe fn get_ptx(prog: nvrtcProgram, code: *mut c_char) -> nvrtcResult { nvrtcResult::NVRTC_ERROR_INVALID_PROGRAM } -fn get_program_log_size(prog: nvrtcProgram, log_size_ret: *mut usize) -> nvrtcResult { - let nvrtc_mutex = &*NVRTC; - let nvrtc = &*nvrtc_mutex.lock().unwrap(); - nvrtc.get_program_log_size(prog).then(|size| unsafe { - *log_size_ret = size; - }) +unsafe fn get_program_log_size(prog: nvrtcProgram, log_size_ret: *mut usize) -> nvrtcResult { + let prog = Program::from(prog); + if let Some(prog) = prog { + if let Some(log) = &prog.log { + *log_size_ret = log.len(); + return nvrtcResult::NVRTC_SUCCESS; + } + } + nvrtcResult::NVRTC_ERROR_INVALID_PROGRAM +} + +unsafe fn get_program_log(prog: nvrtcProgram, log: *mut ::std::os::raw::c_char) -> nvrtcResult { + let prog = Program::from(prog); + if let Some(prog) = prog { + if let Some(ptr) = &prog.log { + for (i, &c) in ptr.iter().enumerate() { + *log.add(i) = c; + } + return nvrtcResult::NVRTC_SUCCESS; + } + } + nvrtcResult::NVRTC_ERROR_INVALID_PROGRAM } diff --git a/zluda_rtc/src/nvrtc.rs b/zluda_rtc/src/nvrtc.rs index db58e28d..09ac2ade 100644 --- a/zluda_rtc/src/nvrtc.rs +++ b/zluda_rtc/src/nvrtc.rs @@ -164,7 +164,7 @@ pub extern "system" fn nvrtcGetNVVM( #[doc = " \\ingroup compilation\n \\brief nvrtcGetProgramLogSize sets \\p logSizeRet with the size of the\n log generated by the previous compilation of \\p prog (including the\n trailing \\c NULL).\n\n Note that compilation log may be generated with warnings and informative\n messages, even when the compilation of \\p prog succeeds.\n\n \\param [in] prog CUDA Runtime Compilation program.\n \\param [out] logSizeRet Size of the compilation log\n (including the trailing \\c NULL).\n \\return\n - \\link #nvrtcResult NVRTC_SUCCESS \\endlink\n - \\link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \\endlink\n - \\link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \\endlink\n\n \\see ::nvrtcGetProgramLog"] #[no_mangle] -pub extern "system" fn nvrtcGetProgramLogSize( +pub unsafe extern "system" fn nvrtcGetProgramLogSize( prog: nvrtcProgram, logSizeRet: *mut usize, ) -> nvrtcResult { @@ -173,11 +173,11 @@ pub extern "system" fn nvrtcGetProgramLogSize( #[doc = " \\ingroup compilation\n \\brief nvrtcGetProgramLog stores the log generated by the previous\n compilation of \\p prog in the memory pointed by \\p log.\n\n \\param [in] prog CUDA Runtime Compilation program.\n \\param [out] log Compilation log.\n \\return\n - \\link #nvrtcResult NVRTC_SUCCESS \\endlink\n - \\link #nvrtcResult NVRTC_ERROR_INVALID_INPUT \\endlink\n - \\link #nvrtcResult NVRTC_ERROR_INVALID_PROGRAM \\endlink\n\n \\see ::nvrtcGetProgramLogSize"] #[no_mangle] -pub extern "system" fn nvrtcGetProgramLog( +pub unsafe extern "system" fn nvrtcGetProgramLog( prog: nvrtcProgram, log: *mut ::std::os::raw::c_char, ) -> nvrtcResult { - crate::unsupported() + crate::get_program_log(prog, log) } #[doc = " \\ingroup compilation\n \\brief nvrtcAddNameExpression notes the given name expression\n denoting the address of a __global__ function\n or __device__/__constant__ variable.\n\n The identical name expression string must be provided on a subsequent\n call to nvrtcGetLoweredName to extract the lowered name.\n \\param [in] prog CUDA Runtime Compilation program.\n \\param [in] name_expression constant expression denoting the address of\n a __global__ function or __device__/__constant__ variable.\n \\return\n - \\link #nvrtcResult NVRTC_SUCCESS \\endlink\n - \\link #nvrtcResult NVRTC_ERROR_NO_NAME_EXPRESSIONS_AFTER_COMPILATION \\endlink\n\n \\see ::nvrtcGetLoweredName"]