-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactored code, started to work on lsdb syntax
- Loading branch information
Showing
15 changed files
with
215 additions
and
170 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,2 @@ | ||
pub mod loaders; | ||
pub mod parsers; | ||
pub mod routes; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
pub mod parquet; | ||
pub mod parquet; | ||
pub mod parsers; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
use polars::prelude::*; | ||
use std::collections::HashMap; | ||
|
||
/// Returns the column names of a LazyFrame. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `lf` - A reference to a LazyFrame. | ||
/// | ||
/// # Returns | ||
/// | ||
/// A vector of strings representing the column names of the DataFrame. | ||
pub fn get_lazyframe_column_names(lf : &LazyFrame) -> Vec<String> { | ||
let df = lf.clone().first().collect().unwrap(); | ||
df.get_column_names().iter().map(|x| x.to_string()).collect() | ||
} | ||
|
||
/// Parses a filter condition from a string into a Polars expression. | ||
/// | ||
/// The expected format for `condition` is "{column_name} {operator} {value}", where: | ||
/// - `column_name` identifies a DataFrame column. | ||
/// - `operator` is one of `<`, `<=`, `>`, `>=`, or `=`. | ||
/// - `value` is a number compared against the column's values. | ||
/// | ||
/// # Parameters | ||
/// * `condition` - A string slice representing the filter condition. | ||
/// | ||
/// # Returns | ||
/// A `Result` containing either: | ||
/// - `Ok(Expr)`: A Polars expression if the parsing succeeds. | ||
/// - `Err(Box<dyn Error>)`: An error if the format is incorrect or parsing fails. | ||
pub fn str_filter_to_expr(condition: &str) -> Result<Expr, Box<dyn std::error::Error>> { | ||
use regex::Regex; | ||
|
||
// Regex to catch "{column_name} {operator} {value}" | ||
let re = Regex::new(r"([a-zA-Z_]+)([<>=]+)([-+]?[0-9]*\.?[0-9]*)").unwrap(); | ||
let parts = re.captures(condition).unwrap(); | ||
|
||
if parts.len() == 4 { | ||
let column = parts.get(1).unwrap().as_str(); | ||
let operator = parts.get(2).unwrap().as_str(); | ||
let value = parts.get(3).unwrap().as_str(); | ||
|
||
match operator { | ||
"<" => Ok(col(column).lt(lit(value.parse::<f64>()?))), | ||
"<=" => Ok(col(column).lt_eq(lit(value.parse::<f64>()?))), | ||
">" => Ok(col(column).gt(lit(value.parse::<f64>()?))), | ||
">=" => Ok(col(column).gt_eq(lit(value.parse::<f64>()?))), | ||
"=" => Ok(col(column).eq(lit(value.parse::<f64>()?))), | ||
_ => Err("Unsupported operator".into()), | ||
} | ||
} else { | ||
Err("Invalid condition format".into()) | ||
} | ||
} | ||
|
||
|
||
/// Parses filter conditions from a list of tuples into Polars expressions. | ||
/// | ||
/// The expected format for each tuple in `filters` is (column_name, operator, value), where: | ||
/// - `column_name` identifies a DataFrame column. | ||
/// - `operator` is one of "==", "=", ">", ">=", "<", "<=", "!=", "in", "not in". | ||
/// - `value` is a number or a list of values compared against the column's values. | ||
/// | ||
/// # Parameters | ||
/// * `filters` - An optional vector of tuples representing the filter conditions. | ||
/// | ||
/// # Returns | ||
/// A `Result` containing either: | ||
/// - `Ok(Vec<Expr>)`: A vector of Polars expressions if parsing succeeds. | ||
/// - `Err(Box<dyn Error>)`: An error if the format is incorrect or parsing fails. | ||
pub fn filters_to_expr(filters: Option<Vec<(String, String, Vec<f64>)>>) -> Result<Vec<Expr>, Box<dyn std::error::Error>> { | ||
let mut expressions = Vec::new(); | ||
|
||
if let Some(conditions) = filters { | ||
for (column, operator, values) in conditions { | ||
let expression = match operator.as_str() { | ||
"=" | "==" => col(&column).eq(lit(values[0])), | ||
"!=" => col(&column).neq(lit(values[0])), | ||
">" => col(&column).gt(lit(values[0])), | ||
">=" => col(&column).gt_eq(lit(values[0])), | ||
"<" => col(&column).lt(lit(values[0])), | ||
"<=" => col(&column).lt_eq(lit(values[0])), | ||
_ => return Err("Unsupported operator".into()), | ||
}; | ||
expressions.push(expression); | ||
} | ||
} | ||
|
||
Ok(expressions) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
pub mod parse_params; | ||
pub mod helpers; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
use polars::{lazy::dsl::col, prelude::*}; | ||
use std::collections::HashMap; | ||
use crate::loaders::parsers::helpers; | ||
|
||
/// # Arguments | ||
/// | ||
/// * `params` - A reference to a HashMap of parameters containing 'columns' key. | ||
/// | ||
/// # Returns | ||
/// | ||
/// A vector of Polars with the selected columns. | ||
pub fn parse_columns_from_params( params: &HashMap<String, String> ) -> Option<Vec<Expr>> { | ||
// Parse columns from params | ||
if let Some(cols) = params.get("columns") { | ||
let cols = cols.split(",").collect::<Vec<_>>(); | ||
let select_cols = cols.iter().map(|x| col(x)).collect::<Vec<_>>(); | ||
return Some(select_cols); | ||
} | ||
None | ||
} | ||
|
||
/// Parses a list of filter conditions from query parameter of hashmap. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `params` - A reference to a HashMap of parameters. | ||
/// | ||
/// # Returns | ||
/// | ||
/// A Polars expression representing the combined filter conditions. | ||
pub fn parse_filters_from_params(params: &HashMap<String, String>) -> Result<Expr, Box<dyn std::error::Error>> { | ||
let mut filters = Vec::new(); | ||
if let Some(query) = params.get("filters") { | ||
filters = query.split(",").collect::<Vec<_>>(); | ||
} | ||
|
||
//TODO: DEPRECATE | ||
let conditions: Result<Vec<Expr>, _> = filters.iter() | ||
.map(|condition: &&str| helpers::str_filter_to_expr(*condition)) | ||
.collect(); | ||
|
||
let combined_condition = conditions?.into_iter() | ||
.reduce(|acc, cond| acc.and(cond)) | ||
.ok_or(""); // Handle case where no conditions are provided | ||
|
||
match combined_condition { | ||
Ok(_) => { Ok(combined_condition.unwrap()) }, | ||
Err(_) => { Err( "Couldnt parse queries".into() ) }, | ||
} | ||
} | ||
|
||
|
||
/// # Arguments | ||
/// | ||
/// * `params` - The client request HashMap of parameters. | ||
/// * `lf` - A reference to a LazyFrame. | ||
/// | ||
/// # Returns | ||
/// | ||
/// A vector of Polars expressions representing the columns to exclude. | ||
pub fn parse_exclude_columns_from_params( params: &HashMap<String, String>, lf : &LazyFrame ) -> Option<Vec<Expr>> { | ||
// Parse columns from params | ||
if let Some(exclude_cols) = params.get("exclude_cols") { | ||
let exclude_cols = exclude_cols.split(",").collect::<Vec<_>>(); | ||
let exclude_cols = exclude_cols.iter().map(|&x| x).collect::<Vec<_>>(); | ||
|
||
let cols = helpers::get_lazyframe_column_names(&lf); | ||
|
||
let select_cols = cols.iter() | ||
.filter(|&x| !exclude_cols.contains( &x.as_str() )) | ||
.map(|x| col(x)) | ||
.collect::<Vec<_>>(); | ||
|
||
return Some(select_cols); | ||
} | ||
None | ||
} |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.