Skip to content

Commit

Permalink
Merge pull request #58 from jeertmans/split
Browse files Browse the repository at this point in the history
feat(cli/lib): automatically splitting long text into multiple and threading
  • Loading branch information
jeertmans authored Feb 7, 2023
2 parents 2f4636f + 46a8032 commit 8b1f78e
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 111 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added cli requirements for `username`/`api_key` pair. [#16](https://github.com/jeertmans/languagetool-rust/pull/16), [#30](https://github.com/jeertmans/languagetool-rust/pull/30)
- Added a `CommandNotFound` error variant for when docker is not found. [#52](https://github.com/jeertmans/languagetool-rust/pull/52)
- Added a `split_len` function. [#18](https://github.com/jeertmans/languagetool-rust/pull/18)
- Automatically split long text into multiple fragments. [#58](https://github.com/jeertmans/languagetool-rust/pull/58)

### Changed

Expand Down
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ tokio = {version = "^1.0", features = ["macros"]}

[features]
annotate = ["dep:annotate-snippets"]
cli = ["color", "dep:clap", "dep:is-terminal", "dep:tokio"]
cli = ["annotate", "color", "dep:clap", "dep:is-terminal", "multithreaded"]
cli-complete = ["cli", "clap_complete"]
color = ["annotate-snippets?/color", "dep:termcolor"]
default = ["cli", "native-tls"]
docker = []
full = ["annotate", "cli-complete", "color", "docker", "unstable"]
multithreaded = ["dep:tokio"]
native-tls = ["reqwest/native-tls"]
native-tls-vendored = ["reqwest/native-tls-vendored"]
unstable = []
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
#### Optional Features
- **annotate**: Adds method(s) to annotate results from check request. If **cli** feature is also enabled, the CLI will by default print an annotated output.
- **annotate**: Adds method(s) to annotate results from check request.
- **cli-complete**: Adds commands to generate completion files for various shells. This feature also activates the **cli** feature. Enter `ltrs completions --help` to get help with installing completion files.
- **color**: Enables color outputting in the terminal. If **cli** feature is also enable, the `--color=<WHEN>` option will be available.
- **full**: Enables all features that are mutually compatible (i.e., `annotate`, `cli`, `cli-complete`, `color`, `docker`, and `unstable`).
- **multithreaded**: Enables multithreaded requests.
- **native-tls-vendored**: Enables the `vendored` feature of `native-tls`. This or `native-tls` should be activated if you are planning to use HTTPS servers.
- **unstable**: Adds more fields to JSON responses that are not present in the [Model | Example Value](https://languagetool.org/http-api/swagger-ui/#!/default/) but might be present in some cases. All added fields are optional, hence the `Option` around them.
Expand Down
104 changes: 94 additions & 10 deletions src/lib/check.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
//! Structures for `check` requests and responses.
use super::error::{Error, Result};
#[cfg(feature = "annotate")]
use annotate_snippets::{
display_list::{DisplayList, FormatOptions},
snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation},
};
#[cfg(feature = "cli")]
use clap::{Args, Parser, ValueEnum};
use serde::{Deserialize, Serialize};
#[cfg(feature = "cli")]
use std::path::PathBuf;

/// Requests
Expand Down Expand Up @@ -520,6 +526,22 @@ impl CheckRequest {
);
}
}

/// Split this request into multiple, using [`split_len`] function to split
/// text.
///
/// # Panics
///
/// If `self.text` is none.
#[must_use]
pub fn split(&self, n: usize, pat: &str) -> Vec<Self> {
let text = self.text.as_ref().unwrap();

split_len(text.as_str(), n, pat)
.iter()
.map(|text_fragment| self.clone().with_text(text_fragment.to_string()))
.collect()
}
}

/// Parse a string slice into a [`PathBuf`], and error if the file does not
Expand All @@ -540,19 +562,20 @@ fn parse_filename(s: &str) -> Result<PathBuf> {
#[derive(Debug, Parser)]
pub struct CheckCommand {
/// If present, raw JSON output will be printed instead of annotated text.
#[cfg(feature = "annotate")]
/// This has not effect if `--data` is used, because it is never
/// annotated.
#[cfg(feature = "cli")]
#[clap(short = 'r', long)]
pub raw: bool,
/// If present, more context (i.e., line number and line offset) will be
/// added to response.
#[clap(short = 'm', long)]
pub more_context: bool,
/// Sets the maximum number of characters before splitting.
#[clap(long, default_value_t = 1500)]
pub maximum_length: isize,
pub max_length: usize,
/// If text is too long, will split on this pattern.
#[clap(long, default_value = "\n\n")]
pub split_pattern: String,
/// Max. number of suggestions kept. If negative, all suggestions are kept.
#[clap(long, default_value_t = 5, allow_negative_numbers = true)]
pub max_suggestions: isize,
/// Inner [`CheckRequest`].
#[command(flatten)]
pub request: CheckRequest,
Expand Down Expand Up @@ -606,7 +629,6 @@ pub struct Context {
}

/// More context, post-processed in check response.
#[cfg(feature = "cli")]
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Serialize)]
#[non_exhaustive]
pub struct MoreContext {
Expand Down Expand Up @@ -708,7 +730,6 @@ pub struct Match {
/// Error message.
pub message: String,
/// More context to match, post-processed using original text.
#[cfg(feature = "cli")]
#[serde(skip_serializing_if = "Option::is_none")]
pub more_context: Option<MoreContext>,
/// Char index at which the match start.
Expand Down Expand Up @@ -788,6 +809,71 @@ impl CheckResponse {
pub fn iter_matches_mut(&mut self) -> std::slice::IterMut<'_, Match> {
self.matches.iter_mut()
}

/// Creates an annotated string from current response.
#[cfg(feature = "annotate")]
#[must_use]
pub fn annotate(&self, text: &str, origin: Option<&str>, color: bool) -> String {
if self.matches.is_empty() {
return "No error were found in provided text".to_string();
}
let replacements: Vec<_> = self
.matches
.iter()
.map(|m| {
m.replacements.iter().fold(String::new(), |mut acc, r| {
if !acc.is_empty() {
acc.push_str(", ");
}
acc.push_str(&r.value);
acc
})
})
.collect();

let snippets = self.matches.iter().zip(replacements.iter()).map(|(m, r)| {
Snippet {
title: Some(Annotation {
label: Some(&m.message),
id: Some(&m.rule.id),
annotation_type: AnnotationType::Error,
}),
footer: vec![],
slices: vec![Slice {
source: &m.context.text,
line_start: 1 + text.chars().take(m.offset).filter(|c| *c == '\n').count(),
origin,
fold: true,
annotations: vec![
SourceAnnotation {
label: &m.rule.description,
annotation_type: AnnotationType::Error,
range: (m.context.offset, m.context.offset + m.context.length),
},
SourceAnnotation {
label: r,
annotation_type: AnnotationType::Help,
range: (m.context.offset, m.context.offset + m.context.length),
},
],
}],
opt: FormatOptions {
color,
..Default::default()
},
}
});

let mut annotation = String::new();

for snippet in snippets {
if !annotation.is_empty() {
annotation.push('\n');
}
annotation.push_str(&DisplayList::from(snippet).to_string());
}
annotation
}
}

/// Check response with additional context.
Expand Down Expand Up @@ -866,7 +952,6 @@ impl CheckResponseWithContext {
}
}

#[cfg(feature = "cli")]
impl From<CheckResponseWithContext> for CheckResponse {
#[allow(clippy::needless_borrow)]
fn from(mut resp: CheckResponseWithContext) -> Self {
Expand Down Expand Up @@ -930,7 +1015,6 @@ impl<'source, T> MatchPositions<'source, T> {
}

fn update_line_number_and_offset(&mut self, m: &Match) {
// TODO: check cases where newline is actually '\r\n' (Windows platforms)
let n = m.offset - self.offset;
for _ in 0..n {
match self.text_chars.next() {
Expand Down
60 changes: 29 additions & 31 deletions src/lib/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
//! It contains all the content needed to create LTRS's command line interface.
use crate::{
check::CheckResponseWithContext,
error::Result,
server::{ServerCli, ServerClient},
words::WordsSubcommand,
Expand Down Expand Up @@ -114,50 +115,47 @@ impl Cli {
#[cfg(feature = "annotate")]
let color = stdout.supports_color();

type Item<'a> = Result<(Option<String>, Option<&'a str>)>;
let server_client = server_client.with_max_suggestions(cmd.max_suggestions);

let sources_iter: Box<dyn Iterator<Item = Item>> = if cmd.filenames.is_empty() {
if cmd.filenames.is_empty() {
if request.text.is_none() && request.data.is_none() {
let mut text = String::new();
match read_from_stdin(&mut stdout, &mut text) {
Ok(_) => Box::new(vec![Ok((Some(text), None))].into_iter()),
Err(e) => Box::new(vec![Err(e)].into_iter()),
}
} else {
Box::new(vec![Ok((None, None))].into_iter())
}
} else {
Box::new(cmd.filenames.iter().map(|filename| {
let text = std::fs::read_to_string(filename)?;
Ok((Some(text), filename.to_str()))
}))
};

for source in sources_iter {
let (text, _filename) = source?;
if let Some(text) = text {
read_from_stdin(&mut stdout, &mut text)?;
request = request.with_text(text);
}
let mut response = server_client.check(&request).await?;

#[cfg(feature = "annotate")]
if !cmd.raw {
if request.text.is_some() && !cmd.raw {
let text = request.text.unwrap();
response = CheckResponseWithContext::new(text.clone(), response).into();
writeln!(
&mut stdout,
"{}",
&server_client
.annotate_check(&request, _filename, color)
.await?
&response.annotate(text.as_str(), None, color)
)?;
} else {
let mut resp = server_client.check(&request).await?;
writeln!(&mut stdout, "{}", serde_json::to_string_pretty(&response)?)?;
}

return Ok(());
}

if cmd.more_context {
use crate::check::CheckResponseWithContext;
let text = request.get_text();
resp = CheckResponseWithContext::new(text, resp).into();
}
for filename in cmd.filenames.iter() {
let text = std::fs::read_to_string(filename)?;
let requests = request
.clone()
.with_text(text.clone())
.split(cmd.max_length, cmd.split_pattern.as_str());
let response = server_client.check_multiple_and_join(requests).await?;

writeln!(&mut stdout, "{}", serde_json::to_string_pretty(&resp)?)?;
if !cmd.raw {
writeln!(
&mut stdout,
"{}",
&response.annotate(text.as_str(), filename.to_str(), color)
)?;
} else {
writeln!(&mut stdout, "{}", serde_json::to_string_pretty(&response)?)?;
}
}
},
Expand Down
5 changes: 2 additions & 3 deletions src/lib/docker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,11 @@ pub struct DockerCommand {
#[cfg(feature = "cli")]
impl DockerCommand {
/// Execute a Docker command and write output to stdout.
pub fn execute<W>(&self, stdout: &mut W) -> Result<()>
pub fn execute<W>(&self, _stdout: &mut W) -> Result<()>
where
W: std::io::Write,
{
let result = self.docker.run_action()?;
writeln!(stdout, "{result:?}")?;
self.docker.run_action()?;
Ok(())
}
}
5 changes: 5 additions & 0 deletions src/lib/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ pub enum Error {
/// Error from checking if `filename` exists and is a actualla a file.
#[error("invalid filename (got '{0}', does not exist or is not a file)")]
InvalidFilename(String),

/// Error when joining multiple futures.
#[cfg(feature = "multithreaded")]
#[error(transparent)]
JoinError(#[from] tokio::task::JoinError),
}

/// Result type alias with error type defined above (see [`Error`]]).
Expand Down
Loading

0 comments on commit 8b1f78e

Please sign in to comment.