acorn_lib/analyzer/
mod.rs

1//! # Prose analyzer module
2//!
3//! This is where we keep functions and interfaces necessary to execute ACORN's automated editorial style guide as well as content readability analyzer.
4//!
5use crate::analyzer::vale::{ValeOutput, ValeOutputItem};
6use crate::bucket::{Location, Repository};
7use crate::io::{download_binary, extract_zip, file_checksum, make_executable, network_get_request, standard_project_folder};
8use crate::prelude;
9use crate::prelude::{create_dir_all, remove_file, Error, File, HashMap, PathBuf, Write};
10use crate::schema::ProgrammingLanguage;
11use crate::util::constants::{
12    APPLICATION, CUSTOM_VALE_PACKAGE_NAME, DEFAULT_VALE_PACKAGE_URL, DEFAULT_VALE_ROOT, DISABLED_VALE_RULES, ENABLED_VALE_PACKAGES, ORGANIZATION,
13    VALE_RELEASES_URL, VALE_VERSION,
14};
15use crate::util::{command_exists, suffix, Constant, Label, SemanticVersion, ToAbsoluteString};
16use bat::PrettyPrinter;
17use bon::Builder;
18use color_eyre::owo_colors::OwoColorize;
19use convert_case::{Case, Casing};
20use derive_more::Display;
21use duct::cmd;
22use flate2::read::GzDecoder;
23use ini::Ini;
24use lychee_lib::{CacheStatus, Response, Status};
25use polars::datatypes::PlSmallStr;
26use polars::frame::row::Row;
27use polars::prelude::{AnyValue, DataFrame, PolarsResult};
28use tar::Archive;
29use tracing::{debug, error, info, trace, warn};
30use validator::ValidationErrorsKind;
31use which::which;
32
33pub mod readability;
34pub mod vale;
35
36use readability::ReadabilityType;
37use vale::{Vale, ValeConfig};
38
39/// Trait for converting to a ([Polars]) row
40///
41/// [Polars]: https://docs.rs/polars/latest/polars/
42pub trait IntoRow<'a> {
43    /// Convert to a (Polars) row
44    fn to_row<T>(self) -> Row<'a>;
45}
46/// Trait for static analyzers (e.g. Vale)
47pub trait StaticAnalyzer<Config: StaticAnalyzerConfig> {
48    /// Get command name (e.g. "vale")
49    fn command(self) -> String;
50    /// Download binary
51    fn download(self, config: Option<Config>, skip_verify_checksum: bool) -> Self;
52    /// Download checksum values
53    fn download_checksums(self) -> Result<HashMap<String, String>, String>;
54    /// Extract binary
55    fn extract(self, path: PathBuf, destination: Option<PathBuf>) -> PathBuf;
56    /// Resolve analyzer
57    fn resolve(_config: Config, _is_offline: bool, _skip_verify_checksum: bool) -> Self;
58    /// Run analyzer on content
59    fn run(&self, id: String, content: String, output: Option<String>) -> Check;
60    /// Perform sync operation (only applies to Vale)
61    fn sync(self, is_offline: bool) -> Result<(), Error>;
62    /// Set binary
63    fn with_binary<P>(self, path: P) -> Self
64    where
65        P: Into<PathBuf>;
66    /// Set config
67    fn with_config(self, value: Config) -> Self;
68    /// Set system command
69    fn with_system_command(self) -> Self;
70    /// Set version
71    fn with_version(self, value: String) -> Self;
72}
73/// Trait for static analyzer configuration (e.g. .vale.ini)
74pub trait StaticAnalyzerConfig {
75    /// Get default configuration
76    fn default() -> Self;
77    /// Convert to INI
78    fn ini(self) -> Ini;
79    /// Save configuration
80    fn save(self) -> Self;
81    /// Set parent path of configuration
82    fn with_path(self, path: PathBuf) -> Self;
83}
84/// Various check categories available for validating research activity data
85#[derive(Clone, Debug, Display, PartialEq)]
86pub enum CheckCategory {
87    /// Website avaialability check
88    #[display("link")]
89    Link,
90    /// Static analysis of prose
91    #[display("prose")]
92    Prose,
93    /// Readability check using one of several metrics
94    #[display("readability")]
95    Readability,
96    /// Schema validation check
97    #[display("schema")]
98    Schema,
99}
100/// Error kind
101#[derive(Clone, Debug)]
102pub enum ErrorKind {
103    /// Readability issue where calculated index exceeds threshold of associated metric
104    Readability((f64, ReadabilityType)),
105    /// Prose issue found by Vale
106    Vale(Vec<ValeOutputItem>),
107    /// Schema validation issue found by [validator crate]
108    ///
109    /// [validator crate]: https://crates.io/crates/validator
110    Validator(ValidationErrorsKind),
111}
112/// Data structure for holding the result of a schema validation check
113#[derive(Builder, Clone, Debug, Display)]
114#[builder(start_fn = init)]
115#[display("{message}")]
116pub struct Check {
117    /// Check category
118    pub category: CheckCategory,
119    /// Textual context of check (e.g., paragraph where prose issues were found)
120    pub context: Option<String>,
121    /// Whether or not the check was successful
122    #[builder(default = false)]
123    pub success: bool,
124    /// HTTP status code
125    pub status_code: Option<String>,
126    /// Errors and issues found during check
127    pub errors: Option<ErrorKind>,
128    /// Path of file being validated
129    pub uri: Option<String>,
130    /// Message related to or description of validation issue (e.g., key name of invalid value, result of validation, etc.)
131    #[builder(default = "".to_string())]
132    pub message: String,
133}
134impl Check {
135    /// Returns the number of errors
136    pub fn issue_count(&self) -> usize {
137        match self.category {
138            | CheckCategory::Link => 1,
139            | CheckCategory::Prose => {
140                if let Some(kind) = &self.errors {
141                    match kind {
142                        | ErrorKind::Vale(values) => values.len(),
143                        | _ => 0,
144                    }
145                } else {
146                    0
147                }
148            }
149            | CheckCategory::Readability => 1,
150            | CheckCategory::Schema => {
151                if let Some(kind) = &self.errors {
152                    match kind {
153                        | ErrorKind::Validator(values) => match values {
154                            | ValidationErrorsKind::Field(_) => 1,
155                            | ValidationErrorsKind::Struct(values) => values.clone().into_errors().len(),
156                            | ValidationErrorsKind::List(_) => 0,
157                        },
158                        | _ => 0,
159                    }
160                } else {
161                    0
162                }
163            }
164        }
165    }
166    /// Print the schema check results
167    pub fn print(self) {
168        match self.category {
169            | CheckCategory::Link => {
170                let code = match self.status_code {
171                    | Some(code) => format!(" ({code})").dimmed().to_string(),
172                    | None => "".to_string(),
173                };
174                let url = match self.uri {
175                    | Some(value) => value.underline().italic().to_string(),
176                    | None => "Missing".italic().to_string(),
177                };
178                if self.success {
179                    let message = &self.message.to_case(Case::Title).green().bold().to_string();
180                    info!("=> {} \"{url}\" {message}{code}", Label::valid());
181                } else {
182                    let message = &self.message.to_case(Case::Title).red().bold().to_string();
183                    error!("=> {} \"{url}\" {message}{code}", Label::invalid());
184                }
185            }
186            | CheckCategory::Prose => {
187                let Check {
188                    context, errors, message, ..
189                } = self;
190                match &errors {
191                    | Some(ErrorKind::Vale(values)) => {
192                        error!("=> {} {} issues found in {}", Label::fail(), values.len(), message.underline());
193                        for item in values {
194                            let ValeOutputItem {
195                                check,
196                                line,
197                                message,
198                                severity,
199                                span,
200                                ..
201                            } = item;
202                            let location = format!("Line {}, Character {}", line, span[0]);
203                            println!("  {:<24} {:<21} {} {}", location, severity.colored(), message, check.dimmed());
204                        }
205                        let highlight = values.clone().into_iter().map(|item| item.line as usize).collect::<Vec<_>>();
206                        if let Some(content) = &context {
207                            println!();
208                            pretty_print(content, ProgrammingLanguage::Markdown, highlight);
209                            println!("\n");
210                        }
211                    }
212                    | None | Some(_) => {
213                        let message = format!("=> {} {} has {}", Label::pass(), message.underline(), "no prose issues".green(),);
214                        info!("{}", message);
215                    }
216                }
217            }
218            | CheckCategory::Readability => {
219                let Check {
220                    context, errors, message, ..
221                } = self;
222                match &errors {
223                    | Some(ErrorKind::Readability(values)) => {
224                        let (index, readability_type) = values;
225                        error!(
226                            "=> {} {} has {} value of {} (should be less than {})",
227                            Label::fail(),
228                            message,
229                            readability_type.to_string().to_uppercase(),
230                            index.red().bold(),
231                            context.unwrap().cyan(),
232                        );
233                    }
234                    | None | Some(_) => {
235                        if let Some(context) = &context {
236                            info!(
237                                "=> {} {} has {} {}",
238                                Label::pass(),
239                                message,
240                                "no readability issues".green().bold(),
241                                context.dimmed()
242                            );
243                        }
244                    }
245                }
246            }
247            | CheckCategory::Schema => {
248                let path = self.clone().uri.unwrap();
249                if self.success {
250                    info!("=> {} {} has {}", Label::pass(), path, "no schema validation issues".green().bold());
251                } else {
252                    let count = self.issue_count();
253                    error!(
254                        "=> {} Found {} schema validation issue{} in {}: \n{:#?}",
255                        Label::fail(),
256                        count.red(),
257                        suffix(count),
258                        path.italic().underline(),
259                        self.errors.unwrap()
260                    );
261                }
262            }
263        }
264    }
265    /// Returns a new LinkCheckResult with the given URL
266    pub fn with_uri(self, value: String) -> Self {
267        Check::init()
268            .category(self.category)
269            .success(self.success)
270            .uri(value)
271            .message(self.message)
272            .maybe_status_code(self.status_code)
273            .maybe_errors(self.errors)
274            .build()
275    }
276}
277impl<'a> IntoRow<'a> for Check {
278    fn to_row<Check>(self) -> Row<'a> {
279        let Self {
280            success,
281            category,
282            message,
283            uri,
284            status_code,
285            context,
286            ..
287        } = self;
288        let data = [
289            if success { "pass" } else { "fail" },
290            &category.to_string(),
291            &message,
292            &uri.unwrap_or_default(),
293            &status_code.unwrap_or_default(),
294            &context.unwrap_or_default(),
295        ];
296        Row::new(data.into_iter().map(|x| AnyValue::String(x).into_static()).collect::<Vec<_>>())
297    }
298}
299impl StaticAnalyzer<ValeConfig> for Vale {
300    fn command(self) -> String {
301        "vale".to_string()
302    }
303    /// Resolve Vale
304    /// ### Notes
305    /// - Will use system `vale` command if available
306    /// - Will use local `vale` binary if available (will expect local binary if offline)
307    /// - Will download `vale` binary if not available by other means
308    fn resolve(config: ValeConfig, is_offline: bool, skip_verify_checksum: bool) -> Vale {
309        fn any_exist<S>(paths: Vec<S>) -> bool
310        where
311            S: Into<PathBuf>,
312        {
313            paths.into_iter().any(|s| s.into().exists())
314        }
315        let root = DEFAULT_VALE_ROOT;
316        let name = "vale";
317        let init = Vale::init().build();
318        let vale = if command_exists(name) {
319            init.with_config(config).with_system_command()
320        } else if is_offline || any_exist(vec![format!("{root}{name}"), format!("{root}{name}.exe")]) {
321            info!("=> {} Local {} binary", Label::using(), name.green().bold());
322            #[cfg(any(unix, target_os = "wasi", target_os = "redox"))]
323            {
324                init.with_config(config).with_binary(format!("{root}{name}"))
325            }
326            #[cfg(windows)]
327            {
328                init.with_config(config).with_binary(format!("{root}{name}.exe"))
329            }
330        } else {
331            init.download(Some(config), skip_verify_checksum)
332        };
333        vale
334    }
335    fn run(&self, id: String, content: String, output: Option<String>) -> Check {
336        let root = standard_project_folder("check", None);
337        match create_dir_all(root.clone()) {
338            | Ok(_) => {}
339            | Err(why) => error!(path = root.clone().to_absolute_string(), "=> {} Create - {}", Label::fail(), why),
340        }
341        let path = root.join(&id);
342        let mut file = match File::create(&path) {
343            | Ok(file) => file,
344            | Err(why) => panic!("=> {} Create file {} - {}", Label::fail(), path.display(), why),
345        };
346        file.write_all(content.as_bytes())
347            .expect("Unable to write to cache directory project file");
348        let binary = match &self.binary {
349            | Some(value) => value,
350            | None => {
351                error!("=> {} {} binary", Label::not_found(), self.clone().command());
352                &PathBuf::from("./.vale/vale")
353            }
354        };
355        match &self.config {
356            | Some(config) => {
357                let result = match output {
358                    | Some(value) => cmd!(
359                        binary,
360                        "--no-wrap",
361                        "--config",
362                        config.clone().path,
363                        "--output",
364                        value,
365                        path.clone(),
366                        "--ext",
367                        ".md",
368                        "--no-exit",
369                    )
370                    .read(),
371                    | None => cmd!(
372                        binary,
373                        "--no-wrap",
374                        "--config",
375                        config.clone().path,
376                        path.clone(),
377                        "--ext",
378                        ".md",
379                        "--no-exit"
380                    )
381                    .read(),
382                };
383                match result {
384                    | Ok(output) => {
385                        let parsed = ValeOutput::parse(&output, path);
386                        if parsed.is_empty() {
387                            Check::init().category(CheckCategory::Prose).success(true).message(id).build()
388                        } else {
389                            Check::init()
390                                .category(CheckCategory::Prose)
391                                .success(false)
392                                .message(id)
393                                .errors(ErrorKind::Vale(parsed))
394                                .context(content)
395                                .build()
396                        }
397                    }
398                    | Err(output) => {
399                        error!("=> {} Analyze - {}", Label::fail(), output);
400                        Check::init().category(CheckCategory::Prose).success(false).message(id).build()
401                    }
402                }
403            }
404            | None => {
405                let title = self.clone().command().to_case(Case::Title);
406                error!("=> {} {} configuration", Label::not_found(), title);
407                Check::init().category(CheckCategory::Prose).success(false).message(id).build()
408            }
409        }
410    }
411    fn download(self, config: Option<ValeConfig>, skip_verify_checksum: bool) -> Vale {
412        let platform = prelude::vale_release_filename();
413        let release = match self.version {
414            | Some(value) => value,
415            | None => SemanticVersion::from_string(VALE_VERSION),
416        };
417        let url = format!("{VALE_RELEASES_URL}/download/v{release}/{}_{release}_{platform}", self.clone().command());
418        info!(url, "=> {} Vale release v{release}", Label::using());
419        let binary = match download_binary(&url, ".") {
420            | Ok(path) => {
421                if !skip_verify_checksum {
422                    let dowloaded_checksum = match self.clone().download_checksums() {
423                        | Ok(value) => value.get(&platform).unwrap().to_string(),
424                        | Err(_) => "".to_string(),
425                    };
426                    if let Some(calculated) = file_checksum(path.clone()) {
427                        if !dowloaded_checksum.eq(&calculated) {
428                            error!(dowloaded_checksum, calculated, "=> {}", Label::invalid());
429                            let _cleanup = remove_file(path.clone());
430                        } else {
431                            info!(checksum = dowloaded_checksum, "=> {} Checksum verification", Label::pass());
432                        }
433                    };
434                } else {
435                    warn!("=> {} Checksum verification", Label::skip());
436                }
437                let destination = match config.clone() {
438                    | Some(value) => value.path.parent().unwrap().to_path_buf(),
439                    | None => PathBuf::from("./.vale/"),
440                };
441                let binary = self.clone().extract(path.clone(), Some(destination));
442                if make_executable(&binary) {
443                    let _cleanup = remove_file(path);
444                    Some(binary)
445                } else {
446                    error!("=> {} {} not executable", Label::fail(), self.command());
447                    None
448                }
449            }
450            | Err(error) => {
451                error!(error, url, "=> {} {} download", Label::fail(), self.command());
452                None
453            }
454        };
455        let builder = Vale::init().version(release).maybe_binary(binary);
456        match config {
457            | Some(value) => builder.config(value).build(),
458            | None => {
459                let config = ValeConfig::default();
460                builder.config(config).build()
461            }
462        }
463    }
464    fn download_checksums(self) -> Result<HashMap<String, String>, String> {
465        let release = match self.version {
466            | Some(value) => value,
467            | None => SemanticVersion::from_string(VALE_VERSION),
468        };
469        let url = format!(
470            "{VALE_RELEASES_URL}/download/v{release}/{}_{release}_checksums.txt",
471            self.clone().command()
472        );
473        let response = network_get_request(url).send().unwrap();
474        let content = response.text().unwrap();
475        let checksums = content.lines().clone().fold(HashMap::new(), |mut acc: HashMap<String, String>, line| {
476            let mut values = line.split("  ").collect::<Vec<&str>>();
477            let key = values.pop().unwrap()["vale_#.#.#_".len()..].to_string();
478            let value = values.pop().unwrap().to_string();
479            acc.insert(key, value);
480            acc
481        });
482        debug!(
483            "=> {} {} checksums {:#?}",
484            Label::using(),
485            self.command().to_case(Case::Title),
486            checksums.dimmed().cyan()
487        );
488        Ok(checksums)
489    }
490    fn extract(self, path: PathBuf, destination: Option<PathBuf>) -> PathBuf {
491        let command = self.clone().command();
492        let parent = match destination {
493            | Some(value) => value.to_absolute_string(),
494            | None => format!("./.{command}/"),
495        };
496        let extension = path.extension().unwrap_or_default().to_str().unwrap_or_default().to_string();
497        match extension.as_str() {
498            | "zip" => match extract_zip(path, Some(parent.into())) {
499                | Ok(value) => {
500                    let path = value.join(command);
501                    if cfg!(windows) {
502                        path.with_extension("exe")
503                    } else {
504                        path
505                    }
506                }
507                | Err(why) => {
508                    error!("=> {} {command} extract - {why}", Label::fail());
509                    let path = PathBuf::from(DEFAULT_VALE_ROOT).join(command);
510                    if cfg!(windows) {
511                        path.with_extension("exe")
512                    } else {
513                        path
514                    }
515                }
516            },
517            | "gz" => {
518                let tar_gz = File::open(path).unwrap();
519                let tar = GzDecoder::new(tar_gz);
520                let mut archive = Archive::new(tar);
521                let message = format!("Unable to extract {command} binary");
522                archive.unpack(parent.clone()).expect(&message);
523                debug!(parent, "=> {} Extracted {command} binary", Label::using());
524                PathBuf::from(format!("{parent}/{command}"))
525            }
526            | _ => {
527                error!("=> {} {command} extract - Unsupported format", Label::fail());
528                PathBuf::from(DEFAULT_VALE_ROOT).join(command)
529            }
530        }
531    }
532    fn sync(self, is_offline: bool) -> Result<(), Error> {
533        let command = self.clone().command();
534        let path = match self.binary {
535            | Some(value) => value,
536            | None => {
537                error!("=> {} {} binary", Label::not_found(), command);
538                PathBuf::from(DEFAULT_VALE_ROOT).join(command)
539            }
540        };
541        let config_path = self.config.unwrap().path;
542        let result = if is_offline {
543            println!("=> {} Not running vale sync in offline mode", Label::skip());
544            cmd!("").run()
545        } else {
546            cmd!(path.clone(), "--config", config_path.clone(), "sync").run()
547        };
548        match result {
549            | Ok(_) => {
550                let parent = format!("{}/styles/config/vocabularies/{}", config_path.parent().unwrap().display(), APPLICATION);
551                debug!(parent, "=> {} Vocabularies", Label::using());
552                match create_dir_all(parent.clone()) {
553                    | Ok(_) => {}
554                    | Err(why) => error!(directory = parent, "=> {} Create - {why}", Label::fail()),
555                }
556                match File::create(format!("{parent}/accept.txt")) {
557                    | Ok(mut file) => {
558                        // TODO: Concatenate organization alternative names to accept file
559                        let acronyms = Constant::last_values("acronyms");
560                        let partners = Constant::last_values("partners");
561                        let sponsors = Constant::last_values("sponsors");
562                        let words = Constant::read_lines("accept.txt");
563                        let content = acronyms.chain(partners).chain(sponsors).chain(words).collect::<Vec<String>>().join("\n");
564                        file.write_all(content.as_bytes()).expect("Unable to write to accept.txt");
565                    }
566                    | Err(why) => panic!("=> {} Create accept.txt - {}", Label::fail(), why),
567                }
568                match File::create(format!("{parent}/reject.txt")) {
569                    | Ok(mut file) => {
570                        let content = Constant::read_lines("reject.txt").join("\n");
571                        file.write_all(content.as_bytes()).expect("Unable to write to reject.txt");
572                    }
573                    | Err(why) => panic!("=> {} Create reject.txt - {}", Label::fail(), why),
574                }
575                Ok(())
576            }
577            | Err(why) => {
578                error!(config = config_path.to_absolute_string(), "=> {} Vale sync - {}", Label::fail(), why);
579                Err(why)
580            }
581        }
582    }
583    fn with_binary<P>(mut self, path: P) -> Self
584    where
585        P: Into<PathBuf>,
586    {
587        self.binary = Some(path.into());
588        self
589    }
590    fn with_config(mut self, value: ValeConfig) -> Self {
591        self.config = Some(value);
592        self
593    }
594    fn with_system_command(mut self) -> Self {
595        let name = self.clone().command();
596        if command_exists(name.clone()) {
597            let path = which(name.clone()).unwrap().to_path_buf();
598            self.binary = Some(path.clone());
599            let offset = "vale version ".len();
600            let version = cmd!(name.clone(), "--version").read().unwrap()[offset..].to_string();
601            self.version = Some(SemanticVersion::from_string(&version));
602            debug!(
603                path = path.to_absolute_string(),
604                "=> {} System {} (v{}) command",
605                Label::using(),
606                name.green().bold(),
607                version
608            );
609        }
610        self
611    }
612    fn with_version(mut self, value: String) -> Self {
613        self.version = Some(SemanticVersion::from_string(&value));
614        self
615    }
616}
617impl StaticAnalyzerConfig for ValeConfig {
618    fn default() -> Self {
619        fn to_string(values: Vec<&str>) -> Vec<String> {
620            values.iter().map(|s| s.to_string()).collect()
621        }
622        let config = ValeConfig::init()
623            .packages(to_string(ENABLED_VALE_PACKAGES.to_vec()))
624            .vocabularies(to_string(vec![&ORGANIZATION.to_uppercase(), APPLICATION]))
625            .disabled(to_string(DISABLED_VALE_RULES.to_vec()))
626            .build();
627        trace!("=> {} Default - {:#?}", Label::using(), config.dimmed().cyan());
628        config
629    }
630    fn ini(self) -> Ini {
631        let ValeConfig {
632            packages,
633            vocabularies,
634            disabled,
635            ..
636        } = self;
637        let mut conf = Ini::new();
638        let package_repository = Repository::GitLab {
639            id: None,
640            location: Location::Simple("https://code.ornl.gov/research-enablement/vale-package".to_string()),
641        };
642        let package_url = match package_repository.latest_release() {
643            | Some(release) => {
644                let tag = release.tag_name;
645                format!("https://code.ornl.gov/research-enablement/vale-package/-/archive/{tag}/vale-package-{tag}.zip")
646            }
647            | None => DEFAULT_VALE_PACKAGE_URL.to_string(),
648        };
649        // CAUTION: Order of attributes in INI file matter. "StylesPath" must come before "Vocab"
650        conf.with_section::<String>(None)
651            .set("StylesPath", "styles")
652            .set("Vocab", vocabularies.join(", "))
653            .set("Packages", format!("{}, {}", packages.join(", "), package_url));
654        conf.with_section(Some("*"))
655            .set("BasedOnStyles", format!("Vale, {}, {}", CUSTOM_VALE_PACKAGE_NAME, packages.join(", ")));
656        disabled.iter().for_each(|rule| {
657            conf.with_section(Some("*")).set(rule, "NO");
658        });
659        conf
660    }
661    fn save(self) -> ValeConfig {
662        let path = self.clone().path;
663        let parent = path.parent().unwrap().to_path_buf();
664        match create_dir_all(parent.clone()) {
665            | Ok(_) => {}
666            | Err(why) => error!(directory = parent.to_absolute_string(), "=> {} Create - {why}", Label::fail()),
667        }
668        match self.clone().ini().write_to_file(path.clone()) {
669            | Ok(_) => {
670                debug!(path = path.to_absolute_string(), "=> {} Saved configuration", Label::using());
671            }
672            | Err(why) => {
673                error!("=> {} Save configuration - {why}", Label::fail());
674            }
675        }
676        self
677    }
678    fn with_path(mut self, path: PathBuf) -> Self {
679        self.path = path;
680        self
681    }
682}
683/// Convert Lychee response to [`Check`]
684pub fn convert_lychee_response(value: Response) -> Check {
685    match value.status() {
686        | Status::Ok(code) | Status::Redirected(code, _) => Check::init()
687            .category(CheckCategory::Link)
688            .success(true)
689            .status_code(code.to_string())
690            .message("has no HTTP errors".to_string())
691            .build(),
692        | Status::Cached(status) => match status {
693            | CacheStatus::Ok(code) => Check::init()
694                .category(CheckCategory::Link)
695                .success(true)
696                .status_code(code.to_string())
697                .message("has no HTTP errors".to_string())
698                .build(),
699            | CacheStatus::Error(Some(code)) => Check::init()
700                .category(CheckCategory::Link)
701                .success(false)
702                .status_code(code.to_string())
703                .message("has cached HTTP errors".to_string())
704                .build(),
705            | CacheStatus::Unsupported => Check::init()
706                .category(CheckCategory::Link)
707                .success(false)
708                .message("unsupported cached response".to_string())
709                .build(),
710            | _ => Check::init()
711                .category(CheckCategory::Link)
712                .success(true)
713                .message("ignored or otherwise successful (cached response)".to_string())
714                .build(),
715        },
716        | Status::Error(code) => Check::init()
717            .category(CheckCategory::Link)
718            .success(false)
719            .status_code(code.to_string())
720            .message("has HTTP errors".to_string())
721            .build(),
722        | Status::Unsupported(why) => Check::init()
723            .category(CheckCategory::Link)
724            .success(false)
725            .message(format!("unsupported HTTP response - {why}"))
726            .build(),
727        | Status::UnknownStatusCode(code) => Check::init()
728            .category(CheckCategory::Link)
729            .success(false)
730            .status_code(code.to_string())
731            .message("unknown HTTP response".to_string())
732            .build(),
733        | Status::Timeout(_) => Check::init()
734            .category(CheckCategory::Link)
735            .success(false)
736            .message("HTTP timeout".to_string())
737            .build(),
738        | _ => Check::init()
739            .category(CheckCategory::Link)
740            .success(true)
741            .message("ignored or otherwise successful".to_string())
742            .build(),
743    }
744}
745/// Perform link check on given URL using Lychee
746pub async fn link_check(uri: Option<String>) -> Check {
747    match uri {
748        | Some(value) => {
749            let result = lychee_lib::check(value.as_str()).await;
750            match result {
751                | Ok(response) => convert_lychee_response(response).with_uri(value),
752                | Err(_) => Check::init()
753                    .category(CheckCategory::Link)
754                    .success(false)
755                    .uri(value)
756                    .message("unreachable".to_string())
757                    .build(),
758            }
759        }
760        | None => Check::init()
761            .category(CheckCategory::Link)
762            .success(false)
763            .message("missing URL".to_string())
764            .build(),
765    }
766}
767/// Convert vector of [`Check`] values to a Polars [DataFrame]
768pub fn checks_to_dataframe(values: Vec<Check>) -> PolarsResult<DataFrame> {
769    let names = ["success", "category", "message", "uri", "status_code", "context"];
770    to_dataframe::<Check, _, &str>(values, names)
771}
772/// Prints `text` to stdout using syntax highlighting for the specified `syntax`.
773///
774/// `highlight` is an iterator of line numbers to highlight in the output.
775pub fn pretty_print<I: IntoIterator<Item = usize>>(text: &str, syntax: ProgrammingLanguage, highlight: I) {
776    let input = format!("{text}\n");
777    let language = syntax.to_string();
778    let mut printer = PrettyPrinter::new();
779    printer
780        .input_from_bytes(input.as_bytes())
781        .theme("zenburn")
782        .language(&language)
783        .line_numbers(true);
784    for line in highlight {
785        printer.highlight(line);
786    }
787    printer.print().unwrap();
788}
789/// Create summary data table from given issues
790pub fn summary(issues: Vec<Check>) -> Vec<Vec<String>> {
791    [
792        CheckCategory::Schema,
793        CheckCategory::Link,
794        CheckCategory::Prose,
795        CheckCategory::Readability,
796    ]
797    .iter()
798    .map(|category| {
799        let count = issues
800            .iter()
801            .filter(|issue| issue.category == *category)
802            .map(|issue| issue.issue_count())
803            .sum::<usize>()
804            .to_string();
805        vec![category.to_string(), count]
806    })
807    .collect::<Vec<_>>()
808}
809/// Convert vector of values of a given type to a Polars [DataFrame]
810/// ### Example
811/// ```ignore
812/// let df = to_dataframe::<i32, _, str>(vec![1, 2, 3], ["a", "b", "c"]);
813/// ```
814///
815/// [DataFrame]: https://docs.rs/polars/latest/polars/prelude/struct.DataFrame.html
816pub fn to_dataframe<'a, T, I, H>(values: Vec<T>, names: I) -> PolarsResult<DataFrame>
817where
818    T: IntoRow<'a>,
819    H: Into<PlSmallStr>,
820    I: IntoIterator<Item = H>,
821{
822    let rows = values.into_iter().map(|value| value.to_row::<T>()).collect::<Vec<_>>();
823    match DataFrame::from_rows(&rows) {
824        | Ok(mut df) => match df.set_column_names(names) {
825            | Ok(_) => Ok(df),
826            | Err(why) => Err(why),
827        },
828        | Err(why) => Err(why),
829    }
830}
831
832#[cfg(test)]
833mod tests;