diff --git a/delta/Cargo.toml b/delta/Cargo.toml index e64ab10..26407ea 100644 --- a/delta/Cargo.toml +++ b/delta/Cargo.toml @@ -24,6 +24,8 @@ reqwest = "0.12.9" ndarray = "0.16.1" tar = "0.4.43" serde_json = "1.0.133" +serde = { version = "1.0", features = ["derive"] } +approx = "0.5.1" walkdir = "2.5.0" image = "0.25.5" serial_test = "3.2" diff --git a/delta/src/neuralnet/layers/conv_2d.rs b/delta/src/neuralnet/layers/conv_2d.rs new file mode 100644 index 0000000..2e33d44 --- /dev/null +++ b/delta/src/neuralnet/layers/conv_2d.rs @@ -0,0 +1,928 @@ +//! BSD 3-Clause License +//! +//! Copyright (c) 2025, BlackPortal ○ +//! +//! Redistribution and use in source and binary forms, with or without +//! modification, are permitted provided that the following conditions are met: +//! +//! 1. Redistributions of source code must retain the above copyright notice, this +//! list of conditions and the following disclaimer. +//! +//! 2. Redistributions in binary form must reproduce the above copyright notice, +//! this list of conditions and the following disclaimer in the documentation +//! and/or other materials provided with the distribution. +//! +//! 3. Neither the name of the copyright holder nor the names of its +//! contributors may be used to endorse or promote products derived from +//! this software without specific prior written permission. +//! +//! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +//! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +//! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +//! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +//! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +//! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +//! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +//! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +//! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +//! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +use log::debug; +use ndarray::{ Array, Dim, Dimension, Ix4, IxDyn, IxDynImpl, Shape, s }; +use serde::Deserialize; +use serde_json; + +use crate::activations::Activation; +use crate::common::Tensor; +use crate::devices::Device; +use crate::neuralnet::layers::Layer; +use crate::neuralnet::layers::error::LayerError; +use crate::optimizers::Optimizer; + +#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize, Deserialize)] +pub enum PaddingMode { + Valid, // No padding + Same, // Pad to keep same spatial dimensions + Custom(usize, usize), // Custom padding for height and width +} + +/// A 2D convolutional layer. +#[derive(Debug)] +pub struct Conv2D { + name: String, + weights: Option, + bias: Option, + kernel_units: usize, + kernel_size: (usize, usize), + stride: (usize, usize), + padding: PaddingMode, + dilation: (usize, usize), // Dilation rate for each dimension + groups: usize, // Number of groups for grouped convolution + include_bias: bool, + activation: Option>, + trainable: bool, + weights_grad: Option, + bias_grad: Option, + input: Option, + input_shape: Option>, + device: Device, +} + +impl Conv2D { + /// Creates a new Conv2D layer with advanced features. + pub fn new( + kernel_units: usize, + kernel_size: (usize, usize), + stride: (usize, usize), + padding: PaddingMode, + dilation: (usize, usize), + groups: usize, + activation: Option, + trainable: bool, + include_bias: bool + ) -> Self { + if groups > kernel_units { + panic!("Number of groups can not be higher than number of kernel units"); + } + Conv2D { + name: format!("conv2d_{}", kernel_units), + weights: None, + bias: None, + kernel_units, + kernel_size, + stride, + padding, + dilation, + groups, + include_bias, + activation: activation.map(|a| Box::new(a) as Box), + trainable, + weights_grad: None, + bias_grad: None, + input: None, + input_shape: None, + device: Device::default(), + } + } + + fn get_input_shape(&self) -> Result, LayerError> { + let shape = self.input_shape.as_ref().ok_or(LayerError::MissingInput)?; + let raw = shape.raw_dim(); + if raw.ndim() != 4 { + return Err(LayerError::InvalidInputShape); + } + Ok(raw.clone()) + } + + /// Calculate padding sizes based on padding mode + fn get_padding_sizes(&self, input_height: usize, input_width: usize) -> (usize, usize) { + match self.padding { + PaddingMode::Valid => (0, 0), + PaddingMode::Same => { + let pad_height = + ((input_height - 1) * self.stride.0 + + (self.kernel_size.0 - 1) * self.dilation.0 + + 1 - + input_height) / + 2; + let pad_width = + ((input_width - 1) * self.stride.1 + + (self.kernel_size.1 - 1) * self.dilation.1 + + 1 - + input_width) / + 2; + (pad_height, pad_width) + } + PaddingMode::Custom(h, w) => (h, w), + } + } + + /// Apply padding to input tensor + fn pad_input(&self, input: &Array) -> Array { + let (batch_size, channels, height, width) = input.dim(); + let (pad_h, pad_w) = self.get_padding_sizes(height, width); + + if pad_h == 0 && pad_w == 0 { + return input.clone(); + } + + let mut padded = Array::zeros(( + batch_size, + channels, + height + 2 * pad_h, + width + 2 * pad_w, + )); + + padded.slice_mut(s![.., .., pad_h..pad_h + height, pad_w..pad_w + width]).assign(input); + + padded + } +} + +/// 2D Convolution operation +/// +/// # Arguments +/// +/// * `input` - Input feature map [batch_size, channels, height, width] +/// * `kernel` - Convolution kernel [out_channels, in_channels, kernel_height, kernel_width] +/// * `stride` - (height_stride, width_stride) +/// +/// # Returns +/// +/// Output feature map +fn conv2d_raw( + input: &Array, + kernel: &Array, + stride: (usize, usize), + dilation: (usize, usize), + groups: usize +) -> Array { + let (batch_size, in_channels, in_height, in_width) = input.dim(); + let (out_channels, _, kernel_height, kernel_width) = kernel.dim(); + + let out_height = (in_height - (kernel_height - 1) * dilation.0 - 1) / stride.0 + 1; + let out_width = (in_width - (kernel_width - 1) * dilation.1 - 1) / stride.1 + 1; + + if in_channels % groups != 0 { + panic!("Input channels must be divisible by the number of groups"); + } + + if out_channels % groups != 0 { + panic!("Output channels must be divisible by the number of groups"); + } + + let channels_per_group = in_channels / groups; + let out_channels_per_group = out_channels / groups; + + let mut output = Array::zeros((batch_size, out_channels, out_height, out_width)); + + for g in 0..groups { + let in_start = g * channels_per_group; + let in_end = (g + 1) * channels_per_group; + let out_start = g * out_channels_per_group; + let out_end = (g + 1) * out_channels_per_group; + + let input_slice = input.slice(s![.., in_start..in_end, .., ..]); + let kernel_slice = kernel.slice(s![out_start..out_end, .., .., ..]); + + for b in 0..batch_size { + for oc in 0..out_channels_per_group { + for oh in 0..out_height { + for ow in 0..out_width { + let mut sum = 0.0; + for ic in 0..channels_per_group { + for kh in 0..kernel_height { + for kw in 0..kernel_width { + let ih = oh * stride.0 + kh * dilation.0; + let iw = ow * stride.1 + kw * dilation.1; + if ih < in_height && iw < in_width { + sum += + input_slice[[b, ic, ih, iw]] * + kernel_slice[[oc, ic, kh, kw]]; + } + } + } + } + output[[b, out_start + oc, oh, ow]] = sum; + } + } + } + } + } + + output +} + +impl Layer for Conv2D { + fn build(&mut self, input_shape: Shape) -> Result<(), LayerError> { + debug!( + "Building Conv2D layer with input shape: {:?}, kernel_units: {}, kernel_size: {:?}, stride: {:?}, padding: {:?}", + input_shape, + self.kernel_units, + self.kernel_size, + self.stride, + self.padding + ); + + self.input_shape = Some(input_shape); + let raw = self.get_input_shape()?; + let (batch_size, in_channels, height, width) = (raw[0], raw[1], raw[2], raw[3]); + + // Initialize weights using He or Xavier initialization + let fan_in = in_channels * self.kernel_size.0 * self.kernel_size.1; + let stddev = if let Some(ref activation) = self.activation { + match activation.name() { + "relu" | "leaky_relu" => (2.0 / (fan_in as f32)).sqrt(), // He + _ => (1.0 / (fan_in as f32)).sqrt(), // Xavier + } + } else { + (1.0 / (fan_in as f32)).sqrt() + }; + + if self.kernel_units % self.groups != 0 { + return Err(LayerError::InvalidParameters); + } + if in_channels % self.groups != 0 { + return Err(LayerError::InvalidParameters); + } + + // Shape: [out_channels, in_channels/groups, kernel_height, kernel_width] + self.weights = Some( + Tensor::random_normal( + Shape::from( + IxDyn( + &[ + self.kernel_units, + in_channels / self.groups, // Correct shape for grouped convolutions + self.kernel_size.0, + self.kernel_size.1, + ] + ) + ), + 0.0, + stddev + ) + ); + + self.weights.as_mut().unwrap().device = self.device.clone(); + + // Initialize bias + if self.include_bias { + self.bias = Some( + Tensor::zeros( + Shape::from(IxDyn(&[1, self.kernel_units, 1, 1])), + self.device.clone() + ) + ); + } + + Ok(()) + } + + fn forward(&mut self, input: &Tensor) -> Result { + self.input = Some(input.clone()); + + let weights = self.weights.as_ref().expect("Weights must be initialized"); + let bias = self.bias.as_ref(); + + // Convert input to 4D + let input_4d = input.data + .clone() + .into_dimensionality::() + .map_err(|_| LayerError::InvalidInputShape)?; + + // Apply padding to the input data + let padded_input = self.pad_input(&input_4d); + + // Perform convolution + let mut output = conv2d_raw( + &padded_input, + &weights.data + .clone() + .into_dimensionality::() + .map_err(|_| LayerError::InvalidInputShape)?, + self.stride, + self.dilation, + self.groups + ); + + // Add bias if present + if let Some(bias) = bias { + output = output + bias.data.clone().into_dimensionality::().unwrap(); + } + + // Convert to Tensor and apply activation + let mut z = Tensor { + data: output.into_dyn(), + device: input.device.clone(), + }; + + if let Some(ref activation) = self.activation { + z = activation.activate(&z); + } + + Ok(z) + } + + fn backward(&mut self, grad: &Tensor) -> Result { + if !self.trainable { + // If not trainable, return zero gradient tensor + return Ok(Tensor::zeros(self.input_shape.clone().unwrap(), self.device.clone())); + } + + let input = self.input.as_ref().ok_or(LayerError::MissingInput)?; + let weights = self.weights.as_ref().ok_or(LayerError::UninitializedWeights)?; + + // Convert tensors to 4D arrays + let grad_4d = grad.data + .clone() + .into_dimensionality::() + .map_err(|_| LayerError::InvalidInputShape)?; + let input_4d = input.data + .clone() + .into_dimensionality::() + .map_err(|_| LayerError::InvalidInputShape)?; + let weights_4d = weights.data + .clone() + .into_dimensionality::() + .map_err(|_| LayerError::InvalidInputShape)?; + + let (batch_size, in_channels, in_height, in_width) = input_4d.dim(); + let (out_channels, _, kernel_height, kernel_width) = weights_4d.dim(); + let channels_per_group = in_channels / self.groups; + + // Initialize gradients + let mut dx = Array::zeros(input_4d.dim()); + let mut dw = Array::zeros(weights_4d.dim()); + let mut db = if self.include_bias { + Some(Array::zeros((1, out_channels, 1, 1))) + } else { + None + }; + + // Apply padding to input if needed + let padded_input = self.pad_input(&input_4d); + + // Calculate bias gradient if needed + if let Some(db_mut) = db.as_mut() { + for c in 0..out_channels { + db_mut[[0, c, 0, 0]] = grad_4d.slice(s![.., c, .., ..]).sum(); + } + } + + // Process each group separately + for g in 0..self.groups { + let in_start = g * channels_per_group; + let in_end = (g + 1) * channels_per_group; + let out_start = g * (out_channels / self.groups); + let out_end = (g + 1) * (out_channels / self.groups); + + // Extract slices for current group + let input_slice = padded_input.slice(s![.., in_start..in_end, .., ..]); + let grad_slice = grad_4d.slice(s![.., out_start..out_end, .., ..]); + let weights_slice = weights_4d.slice(s![out_start..out_end, .., .., ..]); + + // Calculate weights gradient + for b in 0..batch_size { + for oc in 0..out_channels / self.groups { + for ic in 0..channels_per_group { + for kh in 0..kernel_height { + for kw in 0..kernel_width { + let mut grad_sum = 0.0; + + // Sum over output spatial dimensions + for oh in 0..grad_slice.shape()[2] { + for ow in 0..grad_slice.shape()[3] { + let ih = oh * self.stride.0 + kh * self.dilation.0; + let iw = ow * self.stride.1 + kw * self.dilation.1; + + if ih < in_height && iw < in_width { + grad_sum += + grad_slice[[b, oc, oh, ow]] * + input_slice[[b, ic, ih, iw]]; + } + } + } + + dw[[out_start + oc, ic, kh, kw]] += grad_sum; + } + } + } + } + } + + // Calculate input gradient + for b in 0..batch_size { + for ic in 0..channels_per_group { + for ih in 0..in_height { + for iw in 0..in_width { + let mut grad_sum = 0.0; + + // Sum over output channels and kernel dimensions + for oc in 0..out_channels / self.groups { + for kh in 0..kernel_height { + for kw in 0..kernel_width { + let oh = + ((ih as i32) - (kh as i32) * (self.dilation.0 as i32)) / + (self.stride.0 as i32); + let ow = + ((iw as i32) - (kw as i32) * (self.dilation.1 as i32)) / + (self.stride.1 as i32); + + if + oh >= 0 && + ow >= 0 && + oh < (grad_slice.shape()[2] as i32) && + ow < (grad_slice.shape()[3] as i32) + { + grad_sum += + grad_slice[[b, oc, oh as usize, ow as usize]] * + weights_slice[[oc, ic, kh, kw]]; + } + } + } + } + + dx[[b, in_start + ic, ih, iw]] = grad_sum; + } + } + } + } + } + + // Store gradients + self.weights_grad = Some(Tensor { + data: dw.into_dyn(), + device: self.device.clone(), + }); + + if let Some(db) = db { + self.bias_grad = Some(Tensor { + data: db.into_dyn(), + device: self.device.clone(), + }); + } + + // Return input gradient + Ok(Tensor { + data: dx.into_dyn(), + device: self.device.clone(), + }) + } + + fn update_weights(&mut self, optimizer: &mut Box) -> Result<(), LayerError> { + if !self.trainable { + return Ok(()); + } + + if let Some(ref weights_grad) = self.weights_grad { + optimizer + .step(self.weights.as_mut().unwrap(), weights_grad) + .map_err(LayerError::OptimizerError)?; + } + + if let Some(ref bias_grad) = self.bias_grad { + if let Some(ref mut bias) = self.bias { + optimizer.step(bias, bias_grad).map_err(LayerError::OptimizerError)?; + } + } + + self.weights_grad = None; + self.bias_grad = None; + + Ok(()) + } + + fn output_shape(&self) -> Result, LayerError> { + let raw = self.get_input_shape()?; + let (batch_size, _, height, width) = (raw[0], raw[1], raw[2], raw[3]); + + let (pad_h, pad_w) = self.get_padding_sizes(height, width); + let out_height = + (height + 2 * pad_h - (self.kernel_size.0 - 1) * self.dilation.0 - 1) / self.stride.0 + + 1; + let out_width = + (width + 2 * pad_w - (self.kernel_size.1 - 1) * self.dilation.1 - 1) / self.stride.1 + + 1; + + Ok(Shape::from(IxDyn(&[batch_size, self.kernel_units, out_height, out_width]))) + } + + fn param_count(&self) -> Result<(usize, usize), LayerError> { + let weights_count = self.weights.as_ref().map_or(0, |w| w.data.len()); + let bias_count = self.bias.as_ref().map_or(0, |b| b.data.len()); + Ok((weights_count, bias_count)) + } + + fn name(&self) -> &str { + &self.name + } + + fn set_device(&mut self, device: &Device) { + self.device = device.clone(); + + if let Some(ref mut weights) = self.weights { + weights.device = device.clone(); + } + if let Some(ref mut bias) = self.bias { + bias.device = device.clone(); + } + if let Some(ref mut input) = self.input { + input.device = device.clone(); + } + } + + fn get_weights(&self) -> serde_json::Value { + serde_json::json!({ + "weights": self.weights.as_ref().map(|w| w.to_vec()), + "bias": self.bias.as_ref().map(|b| b.to_vec()) + }) + } + + fn get_config(&self) -> serde_json::Value { + serde_json::json!({ + "kernel_units": self.kernel_units, + "kernel_size": self.kernel_size, + "stride": self.stride, + "padding": serde_json::to_value(self.padding.clone()).unwrap(), + "dilation": self.dilation, + "groups": self.groups, + "trainable": self.trainable, + "include_bias": self.include_bias, + "activation": self.activation.as_ref().map(|a| a.name()) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::activations::ReluActivation; + use approx::assert_abs_diff_eq; // Import approx for floating-point comparisons + + #[test] + fn test_conv2d_forward_pass_with_precision() { + // Initialize Conv2D layer + let mut conv = Conv2D::new( + 1, + (1, 1), + (2, 2), + PaddingMode::Valid, + (1, 1), + 1, + None::, + true, + false + ); // No padding, stride 1 + + // Set weights and bias manually + conv.weights = Some( + Tensor::new(vec![0.5, -0.5, 1.0, -1.0], Shape::from(IxDyn(&[1, 1, 2, 2]))) + ); + conv.bias = Some(Tensor::new(vec![0.1], Shape::from(IxDyn(&[1, 1, 1, 1])))); + + // Input tensor + let input = Tensor::new( + vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], + Shape::from(IxDyn(&[1, 1, 3, 3])) + ); + + // Expected output (calculated manually or with a trusted library) + let expected_output = vec![ + vec![ + vec![0.1, 0.2], // Example values with potential rounding + vec![0.3, 0.4] + ] + ]; + + // Perform forward pass + let output = conv.forward(&input).expect("Forward pass failed"); + + // Compare each value with tolerance + for (out_row, exp_row) in output.data.outer_iter().zip(expected_output.iter()) { + for (out_val, exp_val) in out_row.iter().zip(exp_row.iter()) { + for (out_inner, exp_inner) in exp_val.iter().zip(exp_val.iter()) { + assert_abs_diff_eq!(out_inner, exp_inner, epsilon = 1e-6); + } + } + } + } + + #[test] + fn test_padding_with_precision() { + // Initialize Conv2D with padding + let mut conv = Conv2D::new( + 1, + (1, 1), + (2, 2), + PaddingMode::Valid, + (1, 1), + 1, + None::, + true, + false + ); + + // Input tensor + let input = vec![vec![vec![1.0, 2.0, 3.0], vec![4.0, 5.0, 6.0], vec![7.0, 8.0, 9.0]]]; + + // Expected padded input + let expected_padded = vec![ + vec![ + vec![0.0, 0.0, 0.0, 0.0, 0.0], + vec![0.0, 1.0, 2.0, 3.0, 0.0], + vec![0.0, 4.0, 5.0, 6.0, 0.0], + vec![0.0, 7.0, 8.0, 9.0, 0.0], + vec![0.0, 0.0, 0.0, 0.0, 0.0] + ] + ]; + + // Perform padding logic + let input_array = Array::from_shape_vec( + (1, 1, 3, 3), + input.into_iter().flatten().flatten().collect() + ).unwrap(); + let padded_input = conv.pad_input(&input_array); + + // Compare each value in the padded matrix + for (padded_row, exp_row) in padded_input.iter().zip(expected_padded.iter()) { + for (padded_val, exp_val) in exp_row.iter().zip(exp_row.iter()) { + for (padded_inner, exp_inner) in padded_val.iter().zip(exp_val.iter()) { + assert_abs_diff_eq!(padded_inner, exp_inner, epsilon = 1e-6); + } + } + } + } + + #[test] + fn test_conv2d_with_stride() { + let input_shape = Shape::from(IxDyn(&[1, 1, 5, 5])); + let mut conv2d = Conv2D::new( + 1, // kernel_units + (2, 2), // kernel_size + (2, 2), // stride + PaddingMode::Valid, // padding + (1, 1), // dilation + 1, // groups + None::, + true, + false // no bias + ); + + conv2d.build(input_shape.clone()).expect("Failed to build layer"); + + // Check output shape with stride + let output_shape = conv2d.output_shape().expect("Failed to compute output shape"); + assert_eq!(output_shape.raw_dim()[2], 2); // (5-2)/2 + 1 = 2 + assert_eq!(output_shape.raw_dim()[3], 2); + } + + #[test] + fn test_conv2d_with_custom_padding() { + let input_shape = Shape::from(IxDyn(&[1, 1, 3, 3])); + let mut conv2d = Conv2D::new( + 1, // kernel_units + (2, 2), // kernel_size + (1, 1), // stride + PaddingMode::Custom(1, 1), // padding + (1, 1), // dilation + 1, // groups + None::, + true, + false // no bias + ); + let input = Tensor::new( + vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], + input_shape.clone() + ); + + conv2d.build(input_shape).expect("Failed to build layer"); + let output = conv2d.forward(&input).expect("Forward pass failed"); + let output_shape = conv2d.output_shape().expect("Failed to compute output shape"); + // Check output shape + + assert_eq!(output_shape.raw_dim()[2], 4); + assert_eq!(output_shape.raw_dim()[3], 4); + } + + #[test] + fn test_conv2d_grouped() { + let input_shape = Shape::from(IxDyn(&[1, 4, 4, 4])); // 4 input channels + let mut conv2d = Conv2D::new( + 4, // 4 output channels + (2, 2), // kernel_size + (1, 1), // stride + PaddingMode::Valid, // padding + (1, 1), // dilation + 2, // 2 groups + None::, + true, + false // no bias + ); + conv2d.build(input_shape.clone()).expect("Failed to build layer"); + + // Verify weight shape for grouped convolution + assert_eq!( + conv2d.weights.as_ref().unwrap().data.shape(), + &[4, 2, 2, 2] // [out_channels, in_channels/groups, kernel_h, kernel_w] + ); + } + + #[test] + fn test_conv2d_grouped_with_bias() { + let input_shape = Shape::from(IxDyn(&[1, 4, 4, 4])); // 4 input channels + let mut conv2d = Conv2D::new( + 4, // 4 output channels + (2, 2), // kernel_size + (1, 1), // stride + PaddingMode::Valid, // padding + (1, 1), // dilation + 2, // 2 groups + None::, + true, + true + ); + conv2d.build(input_shape.clone()).expect("Failed to build layer"); + + // Verify weight shape for grouped convolution + assert_eq!( + conv2d.weights.as_ref().unwrap().data.shape(), + &[4, 2, 2, 2] // [out_channels, in_channels/groups, kernel_h, kernel_w] + ); + } + + #[test] + fn test_conv2d_backward() { + let input_shape = Shape::from(IxDyn(&[1, 1, 3, 3])); + let mut conv2d = Conv2D::new( + 1, // kernel_units + (2, 2), // kernel_size + (1, 1), // stride + PaddingMode::Valid, // padding + (1, 1), // dilation + 1, // groups + None::, + true, + true // include bias + ); + + let input = Tensor::new( + vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], + input_shape.clone() + ); + conv2d.build(input_shape).expect("Failed to build layer"); + + // Forward pass + let output = conv2d.forward(&input).expect("Forward pass failed"); + + // Create gradient tensor (assuming gradients of 1.0) + let grad = Tensor::ones(output.data.dim().into(), Device::default()); + + // Backward pass + let input_grad = conv2d.backward(&grad).expect("Backward pass failed"); + + // Verify gradients were computed + assert!(conv2d.weights_grad.is_some()); + assert!(conv2d.bias_grad.is_some()); + assert_eq!(input_grad.data.shape(), input.data.shape()); + } + #[test] + fn test_conv2d_with_dilation() { + let input_shape = Shape::from(IxDyn(&[1, 1, 5, 5])); + let mut conv2d = Conv2D::new( + 1, // kernel_units + (2, 2), // kernel_size + (1, 1), // stride + PaddingMode::Valid, // padding + (2, 2), // dilation + 1, // groups + None::, + true, + false // no bias + ); + + conv2d.build(input_shape.clone()).expect("Failed to build layer"); + + // Check output shape with dilation + let output_shape = conv2d.output_shape().expect("Failed to compute output shape"); + assert_eq!(output_shape.raw_dim()[2], 3); + assert_eq!(output_shape.raw_dim()[3], 3); + } + + #[test] + fn test_conv2d_update_weights() { + let input_shape = Shape::from(IxDyn(&[1, 1, 3, 3])); + let mut conv2d = Conv2D::new( + 1, // kernel_units + (2, 2), // kernel_size + (1, 1), // stride + PaddingMode::Valid, // padding + (1, 1), // dilation + 1, // groups + None::, + true, + true // include bias + ); + + let input = Tensor::new( + vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], + input_shape.clone() + ); + + conv2d.build(input_shape).expect("Failed to build layer"); + let initial_weights = conv2d.weights.clone().unwrap(); + + // Forward pass + let output = conv2d.forward(&input).expect("Forward pass failed"); + + // Create gradient tensor (assuming gradients of 1.0) + let grad = Tensor::ones(output.data.dim().into(), Device::default()); + + // Backward pass + conv2d.backward(&grad).expect("Backward pass failed"); + + let mut optimizer: Box = Box::new(crate::optimizers::Adam::new(1.0)); + conv2d.update_weights(&mut optimizer).unwrap(); + + let updated_weights = conv2d.weights.unwrap(); + + assert_ne!(initial_weights.data, updated_weights.data, "Weights should be updated"); + } + + #[test] + fn test_conv2d_bias_update() { + let input_shape = Shape::from(IxDyn(&[1, 1, 3, 3])); + let mut conv2d = Conv2D::new( + 1, // kernel_units + (2, 2), // kernel_size + (1, 1), // stride + PaddingMode::Valid, // padding + (1, 1), // dilation + 1, // groups + None::, + true, + true // include bias + ); + + let input = Tensor::new( + vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0], + input_shape.clone() + ); + conv2d.build(input_shape).expect("Failed to build layer"); + let initial_bias = conv2d.bias.clone().unwrap(); + + // Forward pass + let output = conv2d.forward(&input).expect("Forward pass failed"); + + // Create gradient tensor (assuming gradients of 1.0) + let grad = Tensor::ones(output.data.dim().into(), Device::default()); + + // Backward pass + conv2d.backward(&grad).expect("Backward pass failed"); + + let mut optimizer: Box = Box::new(crate::optimizers::Adam::new(1.0)); + conv2d.update_weights(&mut optimizer).unwrap(); + + let updated_bias = conv2d.bias.unwrap(); + assert_ne!(initial_bias.data, updated_bias.data, "Bias should be updated"); + } + #[test] + fn test_conv2d_param_count() { + let input_shape = Shape::from(IxDyn(&[1, 3, 28, 28])); + let mut conv2d = Conv2D::new( + 2, // kernel_units + (3, 3), // kernel_size + (1, 1), // stride + PaddingMode::Valid, // padding + (1, 1), // dilation + 1, // groups + None::, + true, + true + ); + conv2d.build(input_shape).unwrap(); + let (weights_count, bias_count) = conv2d.param_count().unwrap(); + assert_eq!(weights_count, 2 * 3 * 3 * 3); + assert_eq!(bias_count, 2 * 1); + } +} diff --git a/delta/src/neuralnet/layers/error.rs b/delta/src/neuralnet/layers/error.rs index 17fcae4..64560b6 100644 --- a/delta/src/neuralnet/layers/error.rs +++ b/delta/src/neuralnet/layers/error.rs @@ -44,8 +44,11 @@ pub enum LayerError { MissingInput, /// Error when the input shape is invalid. InvalidInputShape, + /// Error when the parameters are invalid. + InvalidParameters, /// Error when an optimizer error occurs. OptimizerError(OptimizerError), + } impl fmt::Display for LayerError { @@ -54,6 +57,7 @@ impl fmt::Display for LayerError { LayerError::UninitializedWeights => write!(f, "Weights must be initialized"), LayerError::UninitializedBias => write!(f, "Bias must be initialized"), LayerError::UninitializedInput => write!(f, "Input must be initialized"), + LayerError::InvalidParameters => write!(f, "Invalid parameters"), LayerError::MissingInput => write!(f, "Input must be set"), LayerError::OptimizerError(err) => write!(f, "Optimizer error: {}", err), LayerError::InvalidInputShape => write!(f, "Invalid input shape"), diff --git a/delta/src/neuralnet/layers/mod.rs b/delta/src/neuralnet/layers/mod.rs index 70bbd19..0e651f7 100644 --- a/delta/src/neuralnet/layers/mod.rs +++ b/delta/src/neuralnet/layers/mod.rs @@ -32,10 +32,12 @@ pub mod dense; pub mod error; pub mod flatten; pub mod max_pooling_2d; +pub mod conv_2d; use std::fmt::Debug; pub use conv_1d::Conv1D; +pub use conv_2d::Conv2D; pub use dense::Dense; pub use flatten::Flatten; pub use max_pooling_2d::MaxPooling2D;