1use crate::analyzer::{link_check, Check};
8use color_eyre::eyre;
9use derive_more::Display;
10use indicatif::{ProgressBar, ProgressStyle};
11use owo_colors::OwoColorize;
12use rayon::prelude::*;
13use reqwest::blocking::Client;
14use reqwest::header::{HeaderMap, USER_AGENT};
15use serde::{Deserialize, Serialize};
16use serde_json::Result;
17use serde_with::skip_serializing_none;
18use std::fmt::Debug;
19use std::fs::File;
20use std::io::{copy, Cursor};
21use std::path::PathBuf;
22use std::vec;
23use tracing::{debug, error, trace, warn};
24use uriparse::URI;
25use urlencoding::encode;
26
27pub mod analyzer;
28pub mod constants;
29pub mod doctor;
30pub mod powerpoint;
31pub mod schema;
32pub mod util;
33
34use crate::util::*;
35
36pub const IGNORE: [&str; 5] = [".gitignore", ".gitlab-ci.yml", ".gitkeep", ".DS_Store", "README.md"];
42
43#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)]
45#[serde(rename_all = "lowercase")]
46pub enum EntryType {
47 #[display("tree")]
51 Tree,
52 #[display("blob")]
56 Blob,
57}
58#[derive(Clone, Debug, Display, Serialize, Deserialize)]
60#[serde(untagged)]
61pub enum Location {
62 Simple(String),
64 #[display("{uri}")]
66 Detailed {
67 scheme: Scheme,
73 uri: String,
75 },
76}
77#[derive(Clone, Debug, Display, Serialize, Deserialize)]
79#[serde(tag = "provider", rename_all = "lowercase")]
80pub enum Repository {
81 #[display("git")]
85 Git {
86 location: Location,
88 },
89 #[display("github")]
93 GitHub {
94 #[serde(alias = "uri")]
96 location: Location,
97 },
98 #[display("gitlab")]
102 GitLab {
103 id: Option<u64>,
107 #[serde(alias = "uri")]
109 location: Location,
110 },
111}
112#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq)]
118#[serde(rename_all = "lowercase")]
119pub enum Scheme {
120 #[display("https")]
122 HTTPS,
123 #[display("file")]
125 File,
126 Unsupported,
128}
129#[derive(Clone, Debug, Serialize, Deserialize)]
131#[serde(rename_all = "camelCase")]
132pub struct Bucket {
133 pub name: String,
137 pub description: Option<String>,
141 #[serde(alias = "repository")]
145 pub code_repository: Repository,
146}
147#[derive(Clone, Debug, Serialize, Deserialize)]
172pub struct BucketsConfig {
173 pub buckets: Vec<Bucket>,
175}
176#[skip_serializing_none]
180#[derive(Clone, Debug, Serialize, Deserialize)]
181pub struct GithubTreeEntry {
182 pub path: String,
186 pub mode: String,
188 #[serde(rename = "type")]
190 pub entry_type: EntryType,
191 pub sha: String,
195 pub size: Option<u64>,
199 pub url: String,
203}
204#[derive(Clone, Debug, Serialize, Deserialize)]
225pub struct GithubTreeResponse {
226 pub sha: String,
228 pub url: String,
230 pub tree: Vec<GithubTreeEntry>,
232 pub truncated: bool,
234}
235#[derive(Clone, Debug, Serialize, Deserialize)]
239pub struct GitlabTreeEntry {
240 pub id: String,
244 pub name: String,
246 #[serde(rename = "type")]
248 pub entry_type: EntryType,
249 pub path: String,
253 pub mode: String,
255}
256#[derive(Clone, Debug, Serialize, Deserialize)]
258pub struct Release {
259 pub name: String,
261 pub tag_name: String,
265 #[serde(alias = "body")]
267 pub description: String,
268 pub created_at: String,
270 #[serde(alias = "published_at")]
272 pub released_at: String,
273}
274impl Bucket {
275 fn parse_github_response(response: reqwest::blocking::Response) -> Vec<String> {
277 let content = response.text().unwrap();
278 let data: Result<GithubTreeResponse> = serde_json::from_str(&content);
279 match data {
280 | Ok(GithubTreeResponse { tree, .. }) => {
281 debug!("=> {} {} Tree entries", Label::found(), tree.len());
282 tree.into_iter().filter(GithubTreeEntry::is_blob).map(GithubTreeEntry::path).collect()
283 }
284 | Err(why) => {
285 error!("=> {} Process tree entries - {why}", Label::fail());
286 vec![]
287 }
288 }
289 }
290 fn parse_gitlab_response(response: reqwest::blocking::Response) -> Vec<String> {
292 let content = response.text().unwrap();
293 let data: Result<Vec<GitlabTreeEntry>> = serde_json::from_str(&content);
294 debug!("=> {} {} Tree entries", Label::found(), data.as_ref().unwrap().len());
295 match data {
296 | Ok(entries) => entries.into_iter().filter(GitlabTreeEntry::is_blob).map(GitlabTreeEntry::path).collect(),
297 | Err(why) => {
298 error!("=> {} Process tree entries - {why}", Label::fail());
299 vec![]
300 }
301 }
302 }
303 fn domain(&self) -> String {
305 match &self.code_repository {
306 | Repository::GitHub { location } => match location.uri() {
307 | Some(uri) => match uri.scheme() {
308 | uriparse::Scheme::HTTPS => uri.host().unwrap().to_string(),
309 | _ => todo!("Add support for file:///"),
310 },
311 | None => todo!("Handle invalid GitHub URI"),
312 },
313 | Repository::GitLab { location, .. } => match location.uri() {
314 | Some(uri) => match uri.scheme() {
315 | uriparse::Scheme::HTTPS => uri.host().unwrap().to_string(),
316 | _ => todo!("Add support for file:///"),
317 },
318 | None => todo!("Handle invalid GitLab URI"),
319 },
320 | Repository::Git { .. } => todo!("Add support for generic repositories"),
321 }
322 }
323 fn tree(&self, directory: &str, page: Option<u32>) -> eyre::Result<reqwest::blocking::Response, reqwest::Error> {
324 let url = self.tree_url(directory, page);
325 let client = Client::new();
326 client.get(url.unwrap_or_default()).header(USER_AGENT, "rust-web-api-client").send()
327 }
328 fn tree_url(&self, directory: &str, page: Option<u32>) -> Option<String> {
329 match &self.code_repository {
330 | Repository::Git { .. } => None,
331 | Repository::GitHub { location } => {
332 let parsed = match location.uri() {
333 | Some(value) => value,
334 | None => {
335 warn!("=> {} Parse GitHub URI", Label::fail());
336 return None;
337 }
338 };
339 let path = parsed.path();
340 let url = format!("https://api.{}/repos{}/git/trees/main?recursive=1", self.domain(), path);
341 debug!(url = url.as_str(), "=> {}", Label::using());
342 Some(url)
343 }
344 | Repository::GitLab { .. } => {
345 if let Some(id) = &self.code_repository.id() {
346 let per_page = 100;
347 let url = format!(
348 "https://{}/api/v4/projects/{}/repository/tree?&per_page={}&page={}&recursive=true&path={}",
349 self.domain(),
350 id,
351 per_page,
352 page.unwrap_or_default(),
353 directory
354 );
355 debug!(url = url.as_str(), "=> {}", Label::using());
356 Some(url)
357 } else {
358 None
359 }
360 }
361 }
362 }
363 pub fn copy_files(self: Bucket, output: PathBuf) -> usize {
367 let paths = self
368 .clone()
369 .file_paths("")
370 .into_iter()
371 .filter(|path| !IGNORE.iter().any(|x| path.ends_with(x)))
372 .collect::<Vec<String>>();
373 let total_data: usize = count_json_files(paths.clone());
374 let total_images: usize = count_image_files(paths.clone());
375 let message = operations_complete_message(self.name, total_data, total_images);
376 let progress = ProgressBar::new(paths.len() as u64);
377 paths.par_iter().for_each(|path| {
378 progress.set_style(ProgressStyle::with_template(Label::PROGRESS_BAR_TEMPLATE).unwrap());
379 progress.set_message(format!("Copying {path}"));
380 let folder = format!("{}/{}", output.display(), parent(path.clone()).display());
381 std::fs::create_dir_all(folder.clone()).unwrap();
382 if let Ok(mut file) = File::create(format!("{}/{}", output.display(), path)) {
383 if self.code_repository.clone().is_local() {
384 let bytes = std::fs::read(path.clone()).unwrap();
385 println!("{path} = {bytes:?}");
386 let mut content = Cursor::new(bytes);
387 let _ = copy(&mut content, &mut file);
388 progress.inc(1);
389 }
390 }
391 });
392 progress.set_style(ProgressStyle::with_template("{msg}").unwrap());
393 progress.finish_with_message(message);
394 total_data + total_images
395 }
396 pub fn download_files(self: Bucket, output: PathBuf) -> usize {
400 let paths = self
401 .clone()
402 .file_paths("")
403 .into_iter()
404 .filter(|path| !IGNORE.iter().any(|x| path.ends_with(x)))
405 .collect::<Vec<String>>();
406 let total_data: usize = count_json_files(paths.clone());
407 let total_images: usize = count_image_files(paths.clone());
408 let message = operations_complete_message(self.name, total_data, total_images);
409 let progress = ProgressBar::new(paths.len() as u64);
410 let client = Client::new();
411 paths.par_iter().for_each(|path| {
412 progress.set_style(ProgressStyle::with_template(Label::PROGRESS_BAR_TEMPLATE).unwrap());
413 progress.set_message(format!("Downloading {path}"));
414 let folder = format!("{}/{}", output.display(), parent(path.clone()).display());
415 std::fs::create_dir_all(folder.clone()).unwrap();
416 if let Ok(mut file) = File::create(format!("{}/{}", output.display(), path)) {
417 if let Some(url) = self.code_repository.raw_url(path.to_string()) {
418 match client.get(url).header(USER_AGENT, "rust-web-api-client").send() {
419 | Ok(response) => match response.bytes() {
420 | Ok(bytes) => {
421 let mut content = Cursor::new(bytes);
422 let _ = copy(&mut content, &mut file);
423 }
424 | Err(why) => {
425 error!(path, "=> {} Convert to bytes - {why}", Label::fail());
426 }
427 },
428 | Err(why) => {
429 error!(path, "=> {} Download file - {why}", Label::fail());
430 }
431 }
432 }
433 };
434 progress.inc(1);
435 });
436 progress.set_style(ProgressStyle::with_template("{msg}").unwrap());
437 progress.finish_with_message(message);
438 total_data + total_images
439 }
440 fn file_paths(self: Bucket, directory: &str) -> Vec<String> {
441 const FIRST_PAGE: Option<u32> = Some(1);
442 fn page_count(response: &reqwest::blocking::Response) -> u32 {
443 fn parse_header(headers: &HeaderMap, key: &str) -> u32 {
444 match headers.get(key) {
445 | Some(val) if !val.is_empty() => {
446 let value = val.to_str().unwrap().parse::<u32>().unwrap();
447 debug!("=> {} {} = {}", Label::using(), key, value);
448 value
449 }
450 | Some(_) | None => 0,
451 }
452 }
453 let headers = response.headers();
454 parse_header(headers, "x-total-pages")
455 }
456 match self.code_repository {
457 | Repository::Git { .. } => {
458 let path = PathBuf::from(self.code_repository.location().to_string());
459 files_all(path, None).into_iter().map(|x| x.display().to_string()).collect()
460 }
461 | Repository::GitHub { .. } => match self.tree(directory, None) {
462 | Ok(response) if response.status().is_success() => Bucket::parse_github_response(response),
463 | Ok(_) | Err(_) => {
464 let url = self.tree_url(directory, None);
465 debug!(url, "=> {}", Label::using());
466 error!("=> {} Get file paths for {} bucket", Label::fail(), self.name.to_uppercase().red());
467 vec![]
468 }
469 },
470 | Repository::GitLab { .. } => match self.tree(directory, FIRST_PAGE) {
471 | Ok(response) if response.status().is_success() => {
472 let paths = (FIRST_PAGE.unwrap_or_default()..=page_count(&response))
473 .into_par_iter()
474 .map(|page| self.clone().file_paths_for_page(directory, Some(page)))
475 .reduce(std::vec::Vec::new, |a, b| [a, b].concat());
476 trace!("{:#?}", response);
477 paths
478 }
479 | Ok(_) | Err(_) => {
480 let url = self.tree_url(directory, FIRST_PAGE);
481 debug!(url, "=> {}", Label::using());
482 error!("=> {} Get file paths for {} bucket", Label::fail(), self.name.to_uppercase().red());
483 vec![]
484 }
485 },
486 }
487 }
488 fn file_paths_for_page(self: Bucket, directory: &str, page: Option<u32>) -> Vec<String> {
489 match self.tree(directory, page) {
490 | Ok(response) if response.status().is_success() => match self.tree(directory, page) {
491 | Ok(response) if response.status().is_success() => Bucket::parse_gitlab_response(response),
492 | Ok(_) | Err(_) => {
493 let url = self.tree_url(directory, Some(1));
494 error!(url, page, "=> {} Failed to get paths", Label::fail());
495 vec![]
496 }
497 },
498 | Ok(_) | Err(_) => {
499 let url = self.tree_url(directory, page);
500 error!(url, page, "=> {} Failed to get paths", Label::fail());
501 vec![]
502 }
503 }
504 }
505}
506impl BucketsConfig {
507 pub fn read(path: PathBuf) -> Option<BucketsConfig> {
509 let content = match MimeType::from_path(path.clone()) {
510 | MimeType::Json => match BucketsConfig::read_json(path.clone()) {
511 | Ok(value) => Some(value),
512 | Err(_) => None,
513 },
514 | MimeType::Yaml => match BucketsConfig::read_yaml(path.clone()) {
515 | Ok(value) => Some(value),
516 | Err(_) => None,
517 },
518 | _ => unimplemented!("Unsupported configuration file extension"),
519 };
520 if let Some(content) = content {
521 Some(content)
522 } else {
523 error!(path = path.to_str().unwrap(), "=> {} Import configuration", Label::fail());
524 std::process::exit(exitcode::UNAVAILABLE);
525 }
526 }
527 fn read_json(path: PathBuf) -> Result<BucketsConfig> {
529 let content = match read_file(path.clone()) {
530 | Ok(value) if !value.is_empty() => value,
531 | Ok(_) | Err(_) => {
532 error!(
533 path = path.to_str().unwrap(),
534 "=> {} Bucket configuration content is not valid",
535 Label::fail()
536 );
537 "{}".to_owned()
538 }
539 };
540 let data: Result<BucketsConfig> = serde_json::from_str(&content);
541 let label = match data {
542 | Ok(_) => Label::using(),
543 | Err(_) => Label::invalid(),
544 };
545 trace!("=> {} Bucket configuration = {:#?}", label, data.dimmed());
546 data
547 }
548 fn read_yaml(path: PathBuf) -> serde_yml::Result<BucketsConfig> {
550 let content = match read_file(path.clone()) {
551 | Ok(value) => value,
552 | Err(_) => {
553 error!(
554 path = path.to_str().unwrap(),
555 "=> {} Bucket configuration content is not valid",
556 Label::fail()
557 );
558 "".to_owned()
559 }
560 };
561 let data: serde_yml::Result<BucketsConfig> = serde_yml::from_str(&content);
562 let label = match data {
563 | Ok(_) => Label::output(),
564 | Err(_) => Label::fail(),
565 };
566 debug!("=> {} Bucket configuration = {:#?}", label, data.dimmed());
567 data
568 }
569}
570impl GithubTreeEntry {
571 fn path(self) -> String {
572 self.path
573 }
574 fn is_blob(&self) -> bool {
575 self.entry_type.eq(&EntryType::Blob)
576 }
577}
578impl GitlabTreeEntry {
579 fn path(self) -> String {
580 self.path
581 }
582 fn is_blob(&self) -> bool {
583 self.entry_type.eq(&EntryType::Blob)
584 }
585}
586impl Location {
587 pub fn hash(&self) -> String {
597 let uri = self.uri().unwrap();
598 let host = match uri.host() {
599 | Some(value) => value.clone().to_string().replace('.', "_"),
600 | None => "".to_string(),
601 };
602 let segments = uri
603 .path()
604 .segments()
605 .iter()
606 .map(|s| s.to_string())
607 .filter(|s| !(s.is_empty() || s.eq(".")))
608 .collect::<Vec<_>>();
609 [host, segments.join("_").to_lowercase()]
610 .into_iter()
611 .filter(|x| !x.is_empty())
612 .collect::<Vec<String>>()
613 .join("_")
614 }
615 pub fn scheme(&self) -> Scheme {
626 match self {
627 | Location::Simple(value) => match URI::try_from(value.as_str()) {
628 | Ok(uri) => match uri.scheme() {
629 | uriparse::Scheme::HTTPS => Scheme::HTTPS,
630 | uriparse::Scheme::File => Scheme::File,
631 | _ => Scheme::Unsupported,
632 },
633 | Err(_) => Scheme::Unsupported,
634 },
635 | Location::Detailed { scheme, .. } => scheme.clone(),
636 }
637 }
638 pub async fn exists(self) -> bool {
640 let uri = self.uri();
641 match self.scheme() {
642 | Scheme::HTTPS => match uri {
643 | Some(uri) => match link_check(Some(uri.into())).await {
644 | Check { success, .. } if success => true,
645 | _ => false,
646 },
647 | None => false,
648 },
649 | Scheme::File => match uri {
650 | Some(value) => PathBuf::from(value.path().to_string()).exists(),
651 | None => false,
652 },
653 | Scheme::Unsupported => false,
654 }
655 }
656 pub fn uri(&self) -> Option<URI<'_>> {
658 fn parse_uri(value: &str) -> Option<URI<'_>> {
659 match URI::try_from(value) {
660 | Ok(value) => Some(value),
661 | Err(why) => {
662 warn!("=> {} Parse URI - {why}", Label::fail());
663 None
664 }
665 }
666 }
667 match self {
668 | Location::Simple(value) => parse_uri(value),
669 | Location::Detailed { uri, .. } => parse_uri(uri),
670 }
671 }
672}
673impl Repository {
674 pub fn is_local(self) -> bool {
676 let local_schemes = [Scheme::File];
677 local_schemes.contains(&self.location().scheme())
678 }
679 pub fn latest_release(self) -> Option<Release> {
681 match self.releases() {
682 | releases if releases.is_empty() => None,
683 | releases => {
684 let release = releases[0].clone();
685 trace!("=> {} Latest {:#?}", Label::using(), release);
686 Some(release)
687 }
688 }
689 }
690 pub fn location(self) -> Location {
692 match self {
693 | Repository::Git { location, .. } => location,
694 | Repository::GitHub { location, .. } => location,
695 | Repository::GitLab { location, .. } => location,
696 }
697 }
698 fn id(&self) -> Option<String> {
699 match self {
700 | Repository::Git { .. } => None,
701 | Repository::GitHub { .. } => None,
702 | Repository::GitLab { id, location } => match location.uri() {
703 | Some(value) => {
704 let mut path = value.path().to_string();
705 path.remove(0);
706 let encoded = encode(&path).to_string();
707 trace!(encoded, "=> {} ID", Label::using());
708 Some(encoded)
709 }
710 | None => {
711 warn!("=> {} Parse GitLab URI", Label::fail());
712 match id {
713 | Some(value) => Some(value.to_string()),
714 | None => None,
715 }
716 }
717 },
718 }
719 }
720 fn releases(self) -> Vec<Release> {
721 let maybe_url = match &self {
722 | Repository::Git { .. } => None,
723 | Repository::GitHub { location } => match location.uri() {
724 | Some(uri) => {
725 let host = uri.host().unwrap().to_string();
726 let path = uri.path();
727 let endpoint = Some(format!("https://api.{host}/repos{path}/releases"));
728 endpoint
729 }
730 | None => {
731 error!("=> {} Parse GitHub URI", Label::fail());
732 None
733 }
734 },
735 | Repository::GitLab { location, .. } => match self.id() {
736 | Some(id) => match location.uri() {
737 | Some(uri) => {
738 let host = uri.host().unwrap().to_string();
739 Some(format!("https://{host}/api/v4/projects/{id}/releases"))
740 }
741 | None => {
742 error!("=> {} Parse GitLab URI", Label::fail());
743 None
744 }
745 },
746 | None => None,
747 },
748 };
749 if let Some(url) = maybe_url {
750 debug!(url, "=> {}", Label::using());
751 let client = Client::new();
752 match client.get(url).header(USER_AGENT, "rust-web-api-client").send() {
753 | Ok(response) => match response.text() {
754 | Ok(text) => {
755 let releases: Vec<Release> = match serde_json::from_str(&text) {
756 | Ok(values) => values,
757 | Err(why) => {
758 error!("=> {} Parse {} API JSON response - {why}", self, Label::fail());
759 vec![]
760 }
761 };
762 releases
763 }
764 | Err(why) => {
765 error!("=> {} Parse {} API text response - {why}", self, Label::fail());
766 vec![]
767 }
768 },
769 | Err(why) => {
770 error!("=> {} Download {} releases - {why}", self, Label::fail());
771 vec![]
772 }
773 }
774 } else {
775 vec![]
776 }
777 }
778 fn raw_url(&self, path: String) -> Option<String> {
780 match self {
781 | Repository::GitHub { location, .. } => match location.uri() {
782 | Some(ref value) => Some(format!("https://raw.githubusercontent.com{}/refs/heads/main/{path}", value.path())),
783 | None => {
784 error!("=> {} Parse GitHub URI", Label::fail());
785 None
786 }
787 },
788 | Repository::GitLab { location, .. } => Some(format!("{location}/-/raw/main/{path}")),
789 | Repository::Git { .. } => None,
790 }
791 }
792}
793fn count_json_files(paths: Vec<String>) -> usize {
794 paths.clone().into_iter().filter(|path| path.to_lowercase().ends_with(".json")).count()
795}
796fn count_image_files(paths: Vec<String>) -> usize {
797 paths.into_iter().filter(has_image_extension).count()
798}
799fn operations_complete_message(name: String, json_count: usize, image_count: usize) -> String {
800 let total = json_count + image_count;
801 let message = if json_count != image_count {
802 let recommendation = if json_count > image_count {
803 "Do you need to add some images?"
804 } else {
805 "Do you need to add some JSON files?"
806 };
807 format!(
808 " ({} data file{}, {} image{} - {})",
809 json_count.yellow(),
810 suffix(json_count),
811 image_count.yellow(),
812 suffix(image_count),
813 recommendation.italic(),
814 )
815 } else {
816 "".to_string()
817 };
818 format!(
819 " {}Downloaded {} {} file{}{}",
820 if total > 0 { Label::CHECKMARK } else { Label::CAUTION },
821 if total > 0 {
822 total.green().to_string()
823 } else {
824 total.yellow().to_string()
825 },
826 name.to_uppercase(),
827 suffix(total),
828 message,
829 )
830}
831#[allow(clippy::ptr_arg)]
832fn has_image_extension(path: &String) -> bool {
833 path.to_lowercase().ends_with(".png") || path.to_lowercase().ends_with(".jpg")
834}
835
836#[cfg(test)]
837mod tests;