acorn_lib/schema/
mod.rs

1use crate::analyzer::vale::{Vale, ValeConfig};
2use crate::analyzer::{StaticAnalyzer, StaticAnalyzerConfig};
3use crate::constants::*;
4use crate::util::*;
5use bon::Builder;
6use derive_more::Display;
7use fancy_regex::Regex;
8use nucleo_matcher::pattern::{CaseMatching, Normalization, Pattern};
9use nucleo_matcher::{Config, Matcher};
10use owo_colors::OwoColorize;
11use percy_dom::prelude::{html, IterableNodes, View, VirtualNode};
12use petgraph::graph::Graph;
13use rayon::prelude::*;
14use schemars::{schema_for, JsonSchema};
15use serde::{Deserialize, Serialize};
16use serde_repr::*;
17use serde_trim::*;
18use serde_with::skip_serializing_none;
19use std::collections::HashMap;
20use std::path::PathBuf;
21use std::result::Result;
22use titlecase::Titlecase;
23use tracing::{debug, error, info, trace};
24use validator::{Validate, ValidationErrorsKind};
25
26pub mod graph;
27pub mod validate;
28use graph::*;
29use validate::*;
30
31/// ### Core concepts related to the associated research activity
32///
33/// Could be used to filter research activity data and/or power data analytics through concept composition
34///
35/// ### Guidelines for creating keywords
36/// - **Shall**
37///     - Be officially sanctioned by responsible parties
38///     - Be in lower-kebab-case
39///     - Be unique relative to other keywords
40///     - Contain three or more characters
41/// - **Should**
42///     - Not be too specific
43///     - Be one or two words (ex. `foo` or `foo-bar`)
44///
45/// <div class="warning"><a href="https://code.ornl.gov/research-enablement/acorn/-/blob/main/acorn-lib/assets/constants/keywords.csv">Full list of keywords</a></div>
46pub type Keyword = String;
47/// U.S. Classified National Security Information Level
48///
49/// See [President Executive Order 13526](https://www.archives.gov/isoo/policy-documents/cnsi-eo.html)
50#[derive(Clone, Debug, Default, Display, Serialize, Deserialize, PartialEq, PartialOrd, JsonSchema)]
51#[serde(rename_all = "lowercase")]
52pub enum ClassificationLevel {
53    /// ### Unclassified (U)
54    #[default]
55    #[display("UNCLASSIFIED")]
56    Unclassified,
57    /// ### Confidential (C)
58    ///
59    /// Shall be applied to information, the unauthorized disclosure of which reasonably could be expected to cause ***damage*** to the national security that the original classification authority is able to identify or describe.
60    #[display("CONFIDENTIAL")]
61    Confidential,
62    /// ### Secret (S)
63    ///
64    /// Shall be applied to information, the unauthorized disclosure of which reasonably could be expected to cause ***serious damage*** to the national security that the original classification authority is able to identify or describe.
65    #[display("SECRET")]
66    Secret,
67    /// ### Top Secret (TS)
68    ///
69    /// Shall be applied to information, the unauthorized disclosure of which reasonably could be expected to cause ***exceptionally grave damage*** to the national security that the original classification authority is able to identify or describe.
70    #[display("TOP SECRET")]
71    #[serde(alias = "top secret")]
72    TopSecret,
73}
74#[derive(Clone, Debug, Serialize, Deserialize, Display)]
75pub enum FuzzyValue {
76    #[display("partners")]
77    Partner,
78    /// See [Keyword]
79    #[display("keywords")]
80    Keyword,
81    #[display("sponsors")]
82    Sponsor,
83    #[display("technology")]
84    Technology,
85}
86#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
87#[serde(untagged)]
88pub enum Other {
89    Unformatted(String),
90    Formatted(Notes),
91}
92#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
93#[serde(untagged)]
94pub enum Sections {
95    Highlight(HighlightSections),
96    Project(ProjectSections),
97    Organization(OrganizationSections),
98}
99#[derive(Clone, Debug, Default, Serialize, Deserialize, Display, PartialEq, JsonSchema)]
100#[serde(rename_all = "lowercase")]
101/// The schema type informs the sections and fields that apply to the associated research activity data
102pub enum SchemaType {
103    /// ### WIP
104    /// <div class="warning">May be revised/removed in the future</div>
105    #[display("highlight")]
106    Highlight,
107    /// Projects describe research capability applications to solve a challenge or challenges resulting in measurable outcomes and impact
108    #[default]
109    #[display("project")]
110    Project,
111    /// Organizations describe research-related groupings of people and resources
112    #[display("organization")]
113    Organization,
114}
115/// Technology readiness levels (TRLs) are a method for estimating the maturity of technologies during the acquisition phase of a program.
116///
117/// The "optimal point" to introduce technology depends on technology maturity (TRL) and program requirements. That point can be virtually anywhere in the acquisition process.
118///
119/// See [Technology Readiness for Machine Learning Systems](https://doi.org/10.1038/s41467-022-33128-9) for applying TRLs to machine learning (ML) systems
120#[derive(Clone, Debug, Default, Display, Serialize_repr, Deserialize_repr, PartialEq, PartialOrd, JsonSchema)]
121#[repr(u8)]
122pub enum TechnologyReadinessLevel {
123    /// A stage for greenfield research
124    ///
125    /// Not a standard TRL
126    #[display("Greenfield Research")]
127    Principles = 0,
128    /// Basic principles observed and reported
129    ///
130    /// ML: Goal-oriented research
131    #[default]
132    #[display("Basic Research")]
133    Research = 1,
134    /// Technology concept and/or application formulated
135    ///
136    /// ML: Proof of principle development
137    #[display("Technology Concept")]
138    Concept = 2,
139    /// Analytical and experimental critical function and/or characteristic proof-of-concept
140    ///
141    /// ML: Systems development
142    #[display("Feasible")]
143    Feasible = 3,
144    /// Component and/or breadboard validation in laboratory environment (low fidelity)
145    ///
146    /// ML: Proof of concept development
147    #[display("Developing")]
148    Developing = 4,
149    /// Component and/or breadboard validation in relevant environment (high fidelity)
150    ///
151    /// ML: Machine learning "capability"
152    #[display("Developed")]
153    Developed = 5,
154    /// System/subsystem model or prototype demonstration in a relevant environment (high fidelity)
155    ///
156    /// ML: Application development
157    #[display("Prototype")]
158    Prototype = 6,
159    /// System prototype demonstration in an operational environment
160    ///
161    /// ML: Integrations
162    #[display("Operational")]
163    Operational = 7,
164    /// Actual system completed and qualified through test and demonstration
165    ///
166    /// ML: Mission-ready
167    #[display("Mission Ready")]
168    MissionReady = 8,
169    /// Actual system proven through successful mission operation
170    ///
171    /// ML: Deployment
172    #[display("Mission Capable")]
173    MissionCapable = 9,
174}
175/// Contact point (i.e. "point of contact") for research activity
176///
177/// See <https://schema.org/ContactPoint>
178#[derive(Clone, Debug, Serialize, Deserialize, Validate, JsonSchema)]
179#[serde(rename_all = "camelCase")]
180pub struct ContactPoint {
181    /// ### Job title (e.g. "Group Lead") of role that the contact fills related to the asscociated research activity.
182    ///
183    /// When the nearest associated title is unclear, job role of the contact can be used (e.g. "Senior Scientist").
184    ///
185    /// ***Example*** Ideal contact title for a project would be "Primary Investigator"
186    ///
187    /// ***Example*** Ideal contact title for a group organization would be "Group Lead"
188    ///
189    /// See <https://schema.org/jobTitle>
190    #[serde(alias = "title", deserialize_with = "string_trim")]
191    pub job_title: String,
192    /// ### First (given) name of contact
193    ///
194    /// See <https://schema.org/givenName>
195    #[serde(alias = "first", deserialize_with = "string_trim")]
196    pub given_name: String,
197    /// ### Last (family) name of contact
198    ///
199    /// See <https://schema.org/familyName>
200    #[serde(alias = "last", deserialize_with = "string_trim")]
201    pub family_name: String,
202    /// ### Email address of contact point
203    ///
204    /// See <https://schema.org/email>
205    #[validate(email(message = "Please provide a valid email"))]
206    #[serde(deserialize_with = "string_trim")]
207    pub email: String,
208    /// ### Phone number of contact point
209    ///
210    /// See <https://schema.org/telephone>
211    #[validate(custom(function = "is_phone_number"))]
212    #[serde(alias = "phone", deserialize_with = "string_trim")]
213    pub telephone: String,
214    /// ### Profile URL of contact point
215    ///
216    /// ***Example*** Profile URL for "Jason Wohlgemuth" could be <https://impact.ornl.gov/en/persons/jason-wohlgemuth>
217    #[validate(url(message = "Please provide a valid profile URL"))]
218    #[serde(alias = "profile", deserialize_with = "string_trim")]
219    pub url: String,
220    /// ### Organization of contact point
221    ///
222    /// See [Organization]
223    #[serde(deserialize_with = "string_trim")]
224    pub organization: String,
225    /// ### Affiliation of associated research activity data
226    ///
227    /// Where organization applies to the contact point, affiliation applies to the research activity the contact point is associated with
228    ///
229    /// See <https://schema.org/affiliation>
230    pub affiliation: Option<String>,
231}
232#[skip_serializing_none]
233#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
234pub struct Graphic {
235    #[validate(length(max = "MAX_LENGTH_GRAPHIC_CAPTION", message = "Caption is too long, please reduce the length below 100."))]
236    #[serde(deserialize_with = "string_trim")]
237    pub caption: String,
238    #[validate(custom(function = "has_image_extension"))]
239    #[serde(default, deserialize_with = "option_string_trim")]
240    pub href: Option<String>,
241}
242#[skip_serializing_none]
243#[derive(Clone, Builder, Debug, Serialize, Deserialize, JsonSchema, Validate)]
244#[builder(start_fn = init)]
245#[serde(rename_all = "camelCase")]
246pub struct Metadata {
247    pub classification: Option<ClassificationLevel>,
248    /// ### Identifier for associated research activity data
249    ///
250    /// Should be [lower-kebab-case](https://developer.mozilla.org/en-US/docs/Glossary/Kebab_case)
251    ///
252    /// ***Example*** `my-research-project`
253    #[validate(custom(function = "is_kebabcase"))]
254    #[serde(alias = "id", rename = "identifier", deserialize_with = "string_trim")]
255    pub identifier: String,
256    #[builder(default = SchemaType::default())]
257    #[serde(alias = "type", rename = "type")]
258    pub schema_type: SchemaType,
259    pub additional_type: Option<OrganizationType>,
260    /// ### Digital Object Identifier
261    ///
262    /// See <https://www.doi.org/> for more information
263    #[validate(custom(function = "is_doi"))]
264    #[serde(default, deserialize_with = "option_string_trim")]
265    pub doi: Option<String>,
266    /// ### Research Activity Identifier
267    ///
268    /// See <https://www.raid.org/> for more information
269    #[validate(custom(function = "is_raid"))]
270    #[serde(default, deserialize_with = "option_string_trim")]
271    pub raid: Option<String>,
272    /// ### Research Organization Registry
273    ///
274    /// See <https://www.ror.org/> for more information
275    #[validate(custom(function = "is_ror"))]
276    #[serde(default, deserialize_with = "option_string_trim")]
277    pub ror: Option<String>,
278    /// ### URL of internet location where associated publication can be found
279    #[validate(url(message = "Please provide a valid URL"))]
280    #[serde(default, deserialize_with = "option_string_trim")]
281    pub publication: Option<String>,
282    /// ### Describes the active status of the associated research activity data
283    ///
284    /// Archived content typically will be omitted from public artifacts such as <https://research.ornl.gov>
285    #[builder(default = false)]
286    pub archive: bool,
287    /// ### Describes the draft status of the associated research activity data
288    ///
289    /// Draft content typically will be omitted from public artifacts such as <https://research.ornl.gov>
290    #[builder(default = true)]
291    pub draft: bool,
292    /// <abbr title"Technology Readiness Level">TRL</abbr> is applicable to acquisition, machine learning, and more
293    pub trl: Option<TechnologyReadinessLevel>,
294    #[validate(nested)]
295    pub websites: Option<Vec<Website>>,
296    #[validate(nested)]
297    pub graphics: Option<Vec<Graphic>>,
298    #[builder(default = Vec::<String>::new())]
299    pub keywords: Vec<Keyword>,
300    /// ### Software, programmings languages, and digital resources (e.g. tools, libraries, frameworks, data) related to the associated research activity data
301    ///
302    /// ***Examples***
303    /// - Rust
304    /// - Polars
305    /// - gdal
306    /// - matplotlib
307    /// - LaTeX
308    ///
309    /// <div class="warning"><a href="https://code.ornl.gov/research-enablement/acorn/-/blob/main/acorn-lib/assets/constants/technology.csv">Full list of technologies</a></div>
310    #[builder(default = Vec::<String>::new())]
311    #[serde(deserialize_with = "vec_string_trim")]
312    pub technology: Vec<String>,
313    /// ### Organization(s) responsible for funding associated research activity data
314    ///
315    /// Includes any office within a US cabinet-level department that has leadership appointed by the president and confirmed by the Senate, e.g., NNSA or Office of Science.
316    ///
317    /// <div class="warning"><a href="https://code.ornl.gov/research-enablement/acorn/-/blob/main/acorn-lib/assets/constants/sponsors.csv">Full list of sponsors</a></div>
318    pub sponsors: Option<Vec<String>>,
319    /// ### Organization(s) related to the associated research activity data
320    ///
321    /// ***Examples***
322    /// - Los Alamos National Laboratory
323    /// - University of Tennessee
324    /// - IBM
325    /// <div class="warning"><a href="https://code.ornl.gov/research-enablement/acorn/-/blob/main/acorn-lib/assets/constants/partners.csv">Full list of partners</a></div>
326    pub partners: Option<Vec<String>>,
327    /// Related resarch activity data
328    ///
329    /// <div class="warning">WIP</div>
330    pub related: Option<Vec<String>>,
331}
332#[skip_serializing_none]
333#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
334pub struct Notes {
335    /// ### [ASCR](https://www.energy.gov/science/ascr/advanced-scientific-computing-research) highlight attribute
336    pub managers: Option<Vec<String>>,
337    /// ### Collection of capabilities aimed at achieving a specific cross-cutting research outcome
338    pub programs: Option<Vec<String>>,
339    #[serde(default, deserialize_with = "option_string_trim")]
340    pub presentation: Option<String>,
341}
342#[skip_serializing_none]
343#[derive(Clone, Debug, Serialize, Deserialize, Display, Hash, PartialEq, PartialOrd)]
344#[display("Organization ({additional_type}) - {name})")]
345#[serde(rename_all = "camelCase")]
346pub struct Organization {
347    /// ### Full name of the organization
348    ///
349    /// See <https://schema.org/name>
350    #[serde(deserialize_with = "string_trim")]
351    pub name: String,
352    /// ### Research Organization Registry
353    ///
354    /// See <https://www.ror.org/> for more information
355    #[serde(default, deserialize_with = "option_string_trim")]
356    pub ror: Option<String>,
357    /// ### Organization alias (e.g. acronym or nickname)
358    ///
359    /// See <https://schema.org/alternateName>
360    #[serde(default, deserialize_with = "option_string_trim")]
361    pub alternative_name: Option<String>,
362    /// ### Organization sub-type
363    ///
364    /// See <https://schema.org/additionalType>
365    pub additional_type: OrganizationType,
366    pub keywords: Option<Vec<Keyword>>,
367    /// ### Distinct part(s) of the associated containing organization
368    ///
369    /// See <https://schema.org/member>
370    pub member: Vec<Organization>,
371}
372#[derive(Clone, Debug, Serialize, Deserialize, Display, Hash, PartialEq, PartialOrd, JsonSchema)]
373#[serde(rename_all = "lowercase")]
374pub enum OrganizationType {
375    #[display("agency")]
376    Agency,
377    #[display("center")]
378    Center,
379    #[display("directorate")]
380    Directorate,
381    #[display("division")]
382    Division,
383    #[display("facility")]
384    Facility,
385    /// Federally Funded Research and Development Center
386    #[display("FFRDC")]
387    Ffrdc,
388    #[display("group")]
389    Group,
390    #[display("office")]
391    Office,
392    #[display("program")]
393    Program,
394}
395/// ### Research activity is an identifiable package of work involving organized, systematic investigation.
396///
397/// See <https://www.raid.org/> for more information
398#[skip_serializing_none]
399#[derive(Builder, Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
400#[builder(start_fn = init)]
401pub struct ResearchActivity {
402    #[validate(nested)]
403    pub meta: Metadata,
404    #[validate(length(min = 4, max = "MAX_LENGTH_TITLE"))]
405    #[serde(deserialize_with = "string_trim")]
406    pub title: String,
407    #[validate(length(max = "MAX_LENGTH_SUBTITLE", message = "Subtitle is too long, please reduce the length below 75."))]
408    #[serde(default, deserialize_with = "option_string_trim")]
409    pub subtitle: Option<String>,
410    pub sections: Sections,
411    #[validate(nested)]
412    pub contact: ContactPoint,
413    pub notes: Option<Other>,
414}
415/// Website link and title description
416///
417/// **Example**: When deserializing research activity data, websites can be provided as a list of JSON objects.
418/// ```json
419/// {
420///     "websites": [
421///       {
422///         "title": "Home Page",
423///         "url": "https://example.com"
424///       },
425///       {
426///         "title": "Job Listing",
427///         "url": "https://www.example.com/jobs"
428///       }
429///     ]
430/// }
431/// ```
432///
433#[derive(Clone, Debug, Serialize, Deserialize, Validate, JsonSchema)]
434pub struct Website {
435    /// Brief description of webpage content
436    ///
437    /// See <https://schema.org/description>
438    #[serde(alias = "title", deserialize_with = "string_trim")]
439    pub description: String,
440    #[validate(url(message = "Please provide a valid URL"))]
441    #[serde(deserialize_with = "string_trim")]
442    pub url: String,
443}
444#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
445pub struct HighlightSections {
446    #[serde(alias = "scientific achievement", deserialize_with = "string_trim")]
447    pub achievement: String,
448    #[serde(alias = "significance & impact", deserialize_with = "string_trim")]
449    pub impact: String,
450    #[validate(
451        length(max = 6, message = "Please limit the number of methods to 6"),
452        custom(function = "validate_attribute_technical")
453    )]
454    #[serde(alias = "technical approach")]
455    #[serde(deserialize_with = "vec_string_trim")]
456    pub technical: Vec<String>,
457}
458#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
459pub struct ProjectSections {
460    #[validate(length(
461        min = 10,
462        max = "MAX_LENGTH_SECTION_INTRODUCTION",
463        message = "Introduction is too long, please reduce the length below 250."
464    ))]
465    #[serde(deserialize_with = "string_trim")]
466    pub introduction: String,
467    #[validate(length(
468        min = 10,
469        max = "MAX_LENGTH_SECTION_CHALLENGE",
470        message = "Challenge is too long, please reduce the length below 500."
471    ))]
472    #[serde(deserialize_with = "string_trim")]
473    pub challenge: String,
474    #[validate(length(
475        min = 10,
476        max = "MAX_LENGTH_SECTION_APPROACH",
477        message = "Approach is too long, please reduce the length below 500."
478    ))]
479    #[serde(deserialize_with = "string_trim")]
480    pub approach: String,
481    #[validate(length(min = 1, max = "MAX_COUNT_OUTCOMES"), custom(function = "validate_attribute_outcomes"))]
482    #[serde(deserialize_with = "vec_string_trim")]
483    pub outcomes: Vec<String>,
484    #[validate(nested)]
485    pub research: Research,
486}
487#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
488pub struct OrganizationSections {
489    #[validate(length(
490        min = 10,
491        max = "MAX_LENGTH_SECTION_INTRODUCTION",
492        message = "Mission is too long, please reduce the length below 250."
493    ))]
494    #[serde(deserialize_with = "string_trim")]
495    pub mission: String,
496    #[validate(length(min = 1, max = "MAX_COUNT_CAPABILITIES"), custom(function = "validate_attribute_capabilities"))]
497    #[serde(deserialize_with = "vec_string_trim")]
498    pub capabilities: Vec<String>,
499    #[validate(length(min = 1), custom(function = "validate_attribute_impact"))]
500    #[serde(deserialize_with = "vec_string_trim")]
501    pub impact: Vec<String>,
502    #[validate(custom(function = "validate_attribute_facilities"))]
503    pub facilities: Option<Vec<String>>,
504}
505#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
506pub struct Research {
507    #[validate(length(
508        min = 10,
509        max = "MAX_LENGTH_RESEARCH_FOCUS",
510        message = "Focus is too long, please reduce the length below 150."
511    ))]
512    #[serde(deserialize_with = "string_trim")]
513    pub focus: String,
514    #[validate(length(min = 1, max = "MAX_COUNT_RESEARCH_AREAS"), custom(function = "validate_attribute_areas"))]
515    #[serde(deserialize_with = "vec_string_trim")]
516    pub areas: Vec<String>,
517}
518impl View for ContactPoint {
519    fn render(&self) -> VirtualNode {
520        let ContactPoint {
521            given_name,
522            family_name,
523            job_title: role,
524            email,
525            telephone,
526            ..
527        } = self;
528        html! {
529            <section id="contact">
530                <div>
531                    <span class="label">Contact</span>
532                    <span class="spacer"> </span>
533                    <span class="name">{ format!("{} {}", given_name, family_name) }</span>
534                    <span class="spacer">|</span>
535                    <span class="title">{ role }</span>
536                    <span class="spacer">|</span>
537                    <span class="email">{ email }</span>
538                    <span class="spacer">|</span>
539                    <span class="phone">{ telephone }</span>
540                </div>
541            </section>
542        }
543    }
544}
545impl Metadata {
546    fn get_first_graphic(self) -> Option<Graphic> {
547        match self.graphics {
548            | Some(values) => values.first().cloned(),
549            | None => None,
550        }
551    }
552    pub fn get_first_graphic_href(self) -> String {
553        match self.get_first_graphic() {
554            | Some(Graphic { href, .. }) => match href.clone() {
555                | Some(href) => href.clone(),
556                | None => DEFAULT_GRAPHIC_HREF.to_string(),
557            },
558            | None => DEFAULT_GRAPHIC_HREF.to_string(),
559        }
560    }
561    pub fn get_first_graphic_caption(self) -> String {
562        match self.get_first_graphic() {
563            | Some(Graphic { caption, .. }) => match caption.clone() {
564                | value if !value.is_empty() => value.clone(),
565                | _ => DEFAULT_GRAPHIC_CAPTION.to_string(),
566            },
567            | None => DEFAULT_GRAPHIC_CAPTION.to_string(),
568        }
569    }
570}
571impl Organization {
572    pub fn load() -> Vec<Organization> {
573        serde_json::from_str(&Constant::from_asset("organization.json")).unwrap()
574    }
575    pub fn visit<F: Copy + for<'a> Fn(&'a mut Organization)>(&mut self, f: F) {
576        f(self);
577        for child in self.member.iter_mut() {
578            child.visit(f);
579        }
580    }
581    pub fn get_member(self, label: &str) -> Option<Organization> {
582        self.get_members().into_iter().find(|Organization { name, .. }| name == label)
583    }
584    pub fn get_members(self) -> Vec<Organization> {
585        let organization = self;
586        let mut items = vec![organization.clone()];
587        let directorates = organization.member.clone();
588        for directorate in &directorates {
589            items.push(directorate.clone());
590            let divisions = directorate.member.clone();
591            for division in &divisions {
592                items.push(division.clone());
593                let groups = division.member.clone();
594                for group in &groups {
595                    items.push(group.clone());
596                }
597            }
598        }
599        items
600    }
601    pub fn get_nearest(self, organization_type: OrganizationType) -> Option<Organization> {
602        let a = self.clone().additional_type.order();
603        let b = organization_type.order();
604        if a > b {
605            None
606        } else {
607            let ornl = Organization::load()[0].clone();
608            let graph = ornl.clone().to_graph();
609            let name = match b - a {
610                | 3 => Some(ornl.clone().name),
611                | 2 => match get_node_from_label(&graph, &self.name) {
612                    | Some(node) => match get_node_parent(&graph, node) {
613                        | Some(parent) => match get_node_parent(&graph, parent) {
614                            | Some(grandparent) => get_node_name(&graph, grandparent),
615                            | None => None,
616                        },
617                        | None => None,
618                    },
619                    | None => None,
620                },
621                | 1 => match get_node_from_label(&graph, &self.name) {
622                    | Some(node) => match get_node_parent(&graph, node) {
623                        | Some(parent) => get_node_name(&graph, parent),
624                        | None => None,
625                    },
626                    | None => None,
627                },
628                | 0 => Some(self.name),
629                | _ => None,
630            };
631            match name {
632                | Some(value) => match ornl.get_member(&value) {
633                    | Some(organization) => Some(organization),
634                    | None => None,
635                },
636                | None => None,
637            }
638        }
639    }
640    pub fn to_graph(self) -> Graph<String, u8> {
641        let mut graph: Graph<String, u8, petgraph::Directed> = Graph::new();
642        let organization = &self;
643        let root = graph.add_node(organization.name.clone());
644        for directorate in organization.member.iter() {
645            let a = graph.add_node(directorate.name.clone());
646            graph.add_edge(root, a, 0);
647            for division in directorate.member.iter() {
648                let b = graph.add_node(division.name.clone());
649                graph.add_edge(a, b, 0);
650                for group in division.member.iter() {
651                    let c = graph.add_node(group.name.clone());
652                    graph.add_edge(b, c, 0);
653                }
654            }
655        }
656        graph
657    }
658}
659impl OrganizationType {
660    pub fn from_string(value: String) -> OrganizationType {
661        match value.to_lowercase().as_str() {
662            | "agency" => OrganizationType::Agency,
663            | "center" => OrganizationType::Center,
664            | "division" => OrganizationType::Division,
665            | "directorate" => OrganizationType::Directorate,
666            | "group" => OrganizationType::Group,
667            | "office" => OrganizationType::Office,
668            | "program" => OrganizationType::Program,
669            | "facility" => OrganizationType::Facility,
670            | "ffrdc" => OrganizationType::Ffrdc,
671            | _ => unreachable!(),
672        }
673    }
674    pub fn order(self) -> u8 {
675        match self {
676            | OrganizationType::Ffrdc | OrganizationType::Agency | OrganizationType::Office => 4,
677            | OrganizationType::Directorate => 3,
678            | OrganizationType::Division | OrganizationType::Center | OrganizationType::Program | OrganizationType::Facility => 2,
679            | OrganizationType::Group => 1,
680        }
681    }
682}
683impl ResearchActivity {
684    pub fn to_schema() {
685        let schema = schema_for!(ResearchActivity);
686        println!("{}", serde_json::to_string_pretty(&schema).unwrap());
687    }
688    pub fn analyze(paths: Vec<PathBuf>) -> usize {
689        let config = ValeConfig::default().save();
690        let init = Vale::init().build();
691        let vale = if test_command("vale".into()) {
692            init.with_config(config).with_system_command()
693        } else {
694            init.download(Some(config))
695        };
696        match vale.clone().sync() {
697            | Ok(_) => {
698                let results = paths.iter().map(|path| match ResearchActivity::read(path.into()) {
699                    | Ok(data) => vale
700                        .clone()
701                        .analyze(data.clone().meta.identifier, data.extract_prose(), Some("JSON".into())),
702                    | Err(err) => {
703                        error!("=> {} Read research activity data at - {}", Label::fail(), err);
704                        1
705                    }
706                });
707                let output = results.collect::<Vec<usize>>();
708                output.into_iter().sum()
709            }
710            | Err(err) => {
711                error!("=> {} Vale sync - {}", Label::fail(), err);
712                1
713            }
714        }
715    }
716    pub fn check(paths: Vec<PathBuf>) -> usize {
717        paths
718            .par_iter()
719            .map(|path| match ResearchActivity::read(path.into()) {
720                | Ok(data) => match data.clone().get_errors() {
721                    | Ok(_) => {
722                        info!("=> {} {} has {}", Label::pass(), path.display(), "no schema errors".green().bold());
723                        0
724                    }
725                    | Err(found) => {
726                        let count = get_error_count(found.clone());
727                        error!("=> {} Found {} errors in {}: {:#?}", Label::fail(), count, path.display(), found,);
728                        count
729                    }
730                },
731                | Err(err) => {
732                    error!("=> {} Read research activity data at {} - {}", Label::fail(), path.display(), err);
733                    0
734                }
735            })
736            .sum()
737    }
738    pub fn copy(self) -> ResearchActivity {
739        let ResearchActivity {
740            meta,
741            title,
742            subtitle,
743            sections,
744            contact,
745            notes,
746        } = self.clone();
747        ResearchActivity::init()
748            .meta(meta)
749            .title(title)
750            .maybe_subtitle(subtitle)
751            .sections(sections)
752            .contact(contact)
753            .maybe_notes(notes)
754            .build()
755    }
756    pub fn extract_prose(self) -> String {
757        let sections = match self.sections {
758            | Sections::Highlight(HighlightSections {
759                achievement,
760                impact,
761                technical,
762            }) => {
763                format!(
764                    r#"
765<!-- Achievement -->
766{}
767
768<!-- Impact -->
769{}
770
771<!-- Technical Approach -->
772{}"#,
773                    achievement,
774                    impact,
775                    technical.into_iter().map(|x| format!("- {}", x)).collect::<Vec<String>>().join("\n")
776                )
777            }
778            | Sections::Project(ProjectSections {
779                introduction,
780                challenge,
781                approach,
782                outcomes,
783                research,
784                ..
785            }) => {
786                let Research { focus, areas } = research;
787                format!(
788                    r#"
789<!-- Introduction -->
790{}
791
792<!-- Challenge -->
793{}
794
795<!-- Approach -->
796{}
797
798<!-- Outcomes -->
799{}
800
801<!-- Focus -->
802{}
803
804<!-- Areas -->
805{}"#,
806                    introduction,
807                    challenge,
808                    approach,
809                    outcomes.into_iter().map(|x| format!("- {}", x)).collect::<Vec<String>>().join("\n"),
810                    focus,
811                    areas.into_iter().map(|x| format!("- {}", x)).collect::<Vec<String>>().join("\n")
812                )
813            }
814            | Sections::Organization(OrganizationSections {
815                mission,
816                capabilities,
817                impact,
818                facilities,
819            }) => {
820                format!(
821                    r#"
822<!-- Mission -->
823{}
824
825<!-- Capabilities -->
826{}
827
828<!-- Impact -->
829{}
830
831<!-- Facilities -->
832{}"#,
833                    mission,
834                    capabilities.into_iter().map(|x| format!("- {}", x)).collect::<Vec<String>>().join("\n"),
835                    impact.into_iter().map(|x| format!("- {}", x)).collect::<Vec<String>>().join("\n"),
836                    facilities
837                        .unwrap_or_default()
838                        .into_iter()
839                        .map(|x| format!("- {}", x))
840                        .collect::<Vec<String>>()
841                        .join("\n")
842                )
843            }
844        };
845        match self.subtitle {
846            | Some(subtitle) => format!(
847                r#"# {}
848> {}
849{}"#,
850                self.title, subtitle, sections
851            ),
852            | None => sections.to_string(),
853        }
854    }
855    pub fn format(self, path: Option<PathBuf>) -> ResearchActivity {
856        let parent = match path {
857            | Some(value) => value.parent().unwrap().to_path_buf(),
858            | None => PathBuf::from("."),
859        };
860        let name = match get_image_paths(parent.clone()) {
861            | value if !value.is_empty() => value[0].file_name().unwrap().to_string_lossy().to_string(),
862            | _ => DEFAULT_GRAPHIC_HREF.to_string(),
863        };
864        debug!(name, "=> {} First image", Label::using());
865        let first_graphic = match self.meta.clone().graphics {
866            | Some(values) if !values.is_empty() => {
867                let caption = match values.first() {
868                    | Some(Graphic { caption, .. }) if !caption.is_empty() => {
869                        let trimmed = caption.trim();
870                        trace!(caption = trimmed, "=> {}", Label::using());
871                        trimmed
872                    }
873                    | Some(_) | None => {
874                        error!(path = parent.to_str().unwrap(), "=> {} Caption", Label::not_found());
875                        &"".to_string()
876                    }
877                };
878                Graphic {
879                    href: Some(name.clone()),
880                    caption: caption.to_string(),
881                }
882            }
883            | Some(_) | None => Graphic {
884                href: Some(name.clone()),
885                caption: "".to_string(),
886            },
887        };
888        let mut clone = self.clone().copy();
889        clone.meta.graphics = Some(vec![first_graphic]);
890        clone.meta.keywords = self.clone().resolve(FuzzyValue::Keyword);
891        clone.meta.technology = self.clone().resolve(FuzzyValue::Technology);
892        clone.contact.organization = match resolve_from_organization_json(self.clone().contact.organization) {
893            | Some(value) => value,
894            | None => "".to_string(),
895        };
896        clone.contact.affiliation = match self.clone().contact.affiliation {
897            | Some(ref affiliation) => match resolve_from_organization_json(affiliation.to_string()) {
898                | Some(resolved) => Some(resolved),
899                | None => {
900                    error!(affiliation, "=> {} Affiliation", Label::not_found());
901                    Some(DEFAULT_AFFILIATION.to_string())
902                }
903            },
904            | None => {
905                let ornl = &Organization::load()[0];
906                match ornl.clone().get_member(&clone.contact.organization) {
907                    | Some(organization) => match organization.get_nearest(OrganizationType::Directorate) {
908                        | Some(Organization { name, .. }) => Some(name),
909                        | None => Some(DEFAULT_AFFILIATION.to_string()),
910                    },
911                    | None => {
912                        error!("=> {} Nearest directorate", Label::not_found());
913                        Some(DEFAULT_AFFILIATION.to_string())
914                    }
915                }
916            }
917        };
918        clone.meta.partners = match self.clone().resolve(FuzzyValue::Partner) {
919            | values if !values.is_empty() => Some(values),
920            | _ => None,
921        };
922        clone.meta.sponsors = match self.clone().resolve(FuzzyValue::Sponsor) {
923            | values if !values.is_empty() => Some(values),
924            | _ => None,
925        };
926        clone
927    }
928    pub fn get_errors(self) -> Result<(), HashMap<String, ValidationErrorsKind>> {
929        let mut found: Vec<Option<HashMap<String, ValidationErrorsKind>>> = vec![];
930        found.push(get_validation_errors::<ResearchActivity>(self.clone()));
931        match self.clone().sections {
932            | Sections::Highlight(sections) => {
933                found.push(get_validation_errors::<HighlightSections>(sections));
934            }
935            | Sections::Project(sections) => {
936                found.push(get_validation_errors::<ProjectSections>(sections));
937            }
938            | Sections::Organization(sections) => {
939                found.push(get_validation_errors::<OrganizationSections>(sections));
940            }
941        };
942        let errors = format_errors(found.clone());
943        if !errors.is_empty() {
944            Err(errors)
945        } else {
946            Ok(())
947        }
948    }
949    pub fn read(path: PathBuf) -> serde_json::Result<ResearchActivity> {
950        let content = match read_file(path.clone()) {
951            | Ok(value) if !value.is_empty() => value,
952            | Ok(_) | Err(_) => {
953                error!(path = path.to_str().unwrap(), "=> {} Project content is not valid", Label::fail());
954                "{}".to_owned()
955            }
956        };
957        let parsed: serde_json::Result<ResearchActivity> = serde_json::from_str(&content);
958        let label = match parsed {
959            | Ok(_) => Label::using(),
960            | Err(_) => Label::invalid(),
961        };
962        match parsed {
963            | Ok(data) => {
964                debug!(path = path.to_str().unwrap(), "=> {}", label);
965                trace!("=> {} Research activity data = {:#?}", label, data.dimmed().cyan());
966                Ok(data)
967            }
968            | Err(err) => {
969                error!(path = path.to_str().unwrap(), "=> {}", label);
970                Err(err)
971            }
972        }
973    }
974    fn resolve(self, value_type: FuzzyValue) -> Vec<String> {
975        let values: Vec<_> = match value_type {
976            | FuzzyValue::Keyword => self.meta.keywords,
977            | FuzzyValue::Partner => match self.meta.partners {
978                | Some(values) => values,
979                | None => vec![],
980            },
981            | FuzzyValue::Sponsor => match self.meta.sponsors {
982                | Some(values) => values,
983                | None => vec![],
984            },
985            | FuzzyValue::Technology => self.meta.technology,
986        };
987        let mut data: Vec<_> = values
988            .into_iter()
989            .flat_map(|x| resolve_from_csv_asset(format!("{}", value_type), x))
990            .collect();
991        data.sort();
992        data.dedup();
993        data
994    }
995    pub fn to_markdown(self) -> String {
996        let ResearchActivity { title, .. } = self.clone();
997        format!("# {}", title)
998    }
999}
1000fn get_match_list(value: String, values: Vec<String>) -> Vec<(String, u32)> {
1001    let pattern = Pattern::parse(&value, CaseMatching::Ignore, Normalization::Smart);
1002    let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
1003    pattern.match_list(values.clone(), &mut matcher)
1004}
1005fn print_resolution(output: Option<String>, value: String, name: String) {
1006    let label = name.titlecase();
1007    match output {
1008        | Some(resolved) => {
1009            if resolved.eq(&value.to_string()) {
1010                trace!("=> {} {} = \"{}\"", Label::using(), label, value.clone());
1011            } else {
1012                debug!(input = value.clone(), resolved, "=> {} {}", Label::found(), label);
1013            }
1014        }
1015        | None => {
1016            debug!(value = value.clone(), "=> {} {}", Label::not_found(), label);
1017        }
1018    };
1019}
1020fn resolve_from_csv_asset(name: String, value: String) -> Option<String> {
1021    let data = Constant::csv(&name);
1022    resolve_from_list_of_lists(value, data, name)
1023}
1024fn resolve_from_list_of_lists(value: String, data: Vec<Vec<String>>, name: String) -> Option<String> {
1025    let output = data
1026        .into_iter()
1027        .flat_map(|values| {
1028            let sanitized = sanitize(value.clone());
1029            let matched = get_match_list(sanitized, values.clone());
1030            trace!("{} => {:?}", value.clone(), matched.clone());
1031            if matched.clone().is_empty() {
1032                None
1033            } else {
1034                match values.first() {
1035                    | Some(x) => {
1036                        if value.eq(x) {
1037                            Some((x.into(), 10000))
1038                        } else {
1039                            let score = matched.into_iter().map(|(_, score)| score).max();
1040                            match score {
1041                                | Some(value) if value > 0 => Some((x.to_string(), value)),
1042                                | Some(_) | None => None,
1043                            }
1044                        }
1045                    }
1046                    | None => None,
1047                }
1048            }
1049        })
1050        .max_by_key(|(_, score)| *score)
1051        .map(|(x, _)| x.to_string());
1052    print_resolution(output.clone(), value, name);
1053    output
1054}
1055fn resolve_from_organization_json(value: String) -> Option<String> {
1056    let organization = &Organization::load()[0];
1057    let mut items = vec![organization.clone()];
1058    let directorates = organization.member.clone();
1059    for directorate in &directorates {
1060        items.push(directorate.clone());
1061        let divisions = directorate.member.clone();
1062        for division in &divisions {
1063            items.push(division.clone());
1064        }
1065    }
1066    let data = items
1067        .into_iter()
1068        .map(|x| (x.name.clone(), x.alternative_name.clone()))
1069        .filter(|(name, alias)| !(name.is_empty() || alias.is_none()))
1070        .map(|(name, alias)| {
1071            let alternative_name = match alias {
1072                | Some(x) => x.to_string(),
1073                | None => name.clone(),
1074            };
1075            vec![name, alternative_name]
1076        })
1077        .collect::<Vec<Vec<String>>>();
1078    resolve_from_list_of_lists(value, data, "organization".to_string())
1079}
1080fn sanitize(value: String) -> String {
1081    match Regex::new(r"[-_.,]") {
1082        | Ok(re) => re.replace_all(&value, "").replace("&", "and").trim().to_string(),
1083        | Err(err) => err.to_string(),
1084    }
1085}
1086
1087#[cfg(test)]
1088mod tests;
acorn_lib/schema/mod.rs

acorn_lib/schema/
mod.rs