acorn_lib/schema/
mod.rs

1//! ## Research activity schema
2//!
3//! Here you'll find everything needed to build and use the research activity data schema, including metadata fields, section information, media objects, formats, and functions that power ACORN CLI commands.
4//!
5use crate::analyzer::readability::ReadabilityType;
6use crate::analyzer::vale::{Vale, ValeConfig};
7use crate::analyzer::{link_check, Check, CheckCategory, ErrorKind, StaticAnalyzer, StaticAnalyzerConfig};
8use crate::constants::*;
9use crate::util::*;
10use bon::{builder, Builder};
11use convert_case::{Case, Casing};
12use core::hash::{Hash, Hasher};
13use core::num::NonZeroU64;
14use derive_more::Display;
15use fancy_regex::Regex;
16use nucleo_matcher::pattern::{CaseMatching, Normalization, Pattern};
17use nucleo_matcher::{Config, Matcher};
18use owo_colors::OwoColorize;
19use percy_dom::prelude::{html, IterableNodes, View, VirtualNode};
20use petgraph::graph::Graph;
21use rayon::prelude::*;
22use schemars::{schema_for, JsonSchema};
23use serde::{Deserialize, Serialize};
24use serde_repr::*;
25use serde_trim::*;
26use serde_with::skip_serializing_none;
27use std::path::PathBuf;
28use tracing::{debug, error, trace};
29use validator::Validate;
30
31pub mod graph;
32pub mod pid;
33pub mod validate;
34use graph::*;
35use pid::raid;
36use validate::*;
37
38/// ## Keywords
39/// > Core concepts related to the associated research activity
40///
41/// Could be used to filter research activity data and/or power data analytics through concept composition
42///
43/// ### Guidelines for creating keywords
44/// - **Shall**
45///     - Be officially sanctioned by responsible parties
46///     - Be in lower-kebab-case
47///     - Be unique relative to other keywords
48///     - Contain three or more characters
49/// - **Should**
50///     - Not be too specific
51///     - Be one or two words (ex. `foo` or `foo-bar`)
52///
53/// <div class="warning"><a href="https://code.ornl.gov/research-enablement/acorn/-/blob/main/acorn-lib/assets/constants/keywords.csv">Full list of keywords</a></div>
54pub type Keyword = String;
55/// U.S. Classified National Security Information Level
56///
57/// See [President Executive Order 13526](https://www.archives.gov/isoo/policy-documents/cnsi-eo.html)
58#[derive(Clone, Debug, Default, Display, Serialize, Deserialize, PartialEq, PartialOrd, JsonSchema)]
59#[serde(rename_all = "lowercase")]
60pub enum ClassificationLevel {
61    /// ### Unclassified (U)
62    #[default]
63    #[display("UNCLASSIFIED")]
64    Unclassified,
65    /// ### Confidential (C)
66    ///
67    /// Shall be applied to information, the unauthorized disclosure of which reasonably could be expected to cause ***damage*** to the national security that the original classification authority is able to identify or describe.
68    #[display("CONFIDENTIAL")]
69    Confidential,
70    /// ### Secret (S)
71    ///
72    /// Shall be applied to information, the unauthorized disclosure of which reasonably could be expected to cause ***serious damage*** to the national security that the original classification authority is able to identify or describe.
73    #[display("SECRET")]
74    Secret,
75    /// ### Top Secret (TS)
76    ///
77    /// Shall be applied to information, the unauthorized disclosure of which reasonably could be expected to cause ***exceptionally grave damage*** to the national security that the original classification authority is able to identify or describe.
78    #[display("TOP SECRET")]
79    #[serde(alias = "top secret")]
80    TopSecret,
81}
82#[derive(Clone, Debug, Serialize, Deserialize, Display)]
83enum FuzzyValue {
84    #[display("partners")]
85    Partner,
86    /// See [Keyword]
87    #[display("keywords")]
88    Keyword,
89    #[display("sponsors")]
90    Sponsor,
91    #[display("technology")]
92    Technology,
93}
94/// Media object such as image or video
95///
96/// See <https://schema.org/MediaObject>
97#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
98#[serde(untagged)]
99pub enum MediaObject {
100    /// Image format media
101    Image(ImageObject),
102    /// Video format media
103    Video(VideoObject),
104}
105/// Organization sub type
106#[derive(Clone, Debug, Serialize, Deserialize, Display, Hash, PartialEq, PartialOrd, JsonSchema)]
107#[serde(rename_all = "lowercase")]
108pub enum OrganizationType {
109    /// Agency
110    #[display("agency")]
111    Agency,
112    /// Initiative that involves multiple DOE laboratories partnering together for a shared purpose
113    #[display("center")]
114    Center,
115    /// Laboratory, public, and private partners
116    #[display("consortium")]
117    Consortium,
118    /// Top-level organizational unit that contains one or more divisions
119    #[display("directorate")]
120    Directorate,
121    /// Mid-level organizational unit that contains one or more sections and groups
122    #[display("division")]
123    Division,
124    /// Building, room, array of equipment, or a number of such things, designed to serve a particular function
125    ///
126    /// Includes DOE-designated user facilities
127    #[display("facility")]
128    Facility,
129    /// Federally Funded Research and Development Center
130    #[display("FFRDC")]
131    Ffrdc,
132    /// Low-level organizational unit that contains a small number of people that function as a team
133    #[display("group")]
134    Group,
135    /// Office
136    #[display("office")]
137    Office,
138    /// Program
139    #[display("program")]
140    Program,
141}
142/// "Other" content not easily placed into the schema
143#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
144#[serde(untagged)]
145pub enum Other {
146    /// Free-form test
147    Unformatted(String),
148    /// Structured container for miscellaneaous things
149    Formatted(Notes),
150}
151/// Provides a small subset of common programming languages available for syntax highlighting and contextual actions
152#[derive(Clone, Copy, Debug, Deserialize, Display, Serialize, JsonSchema)]
153#[serde(rename_all = "lowercase")]
154pub enum ProgrammingLanguage {
155    /// HyperText Markup Language (HTML)
156    #[display("html")]
157    Html,
158    /// JavaScript (JS) / ECMAScript (ES)
159    ///
160    /// See [MDN JavaScript docs](https://developer.mozilla.org/en-US/docs/Web/JavaScript) for more information
161    #[display("javascript")]
162    JavaScript,
163    /// Julia
164    ///
165    /// See <https://julialang.org/> for more information
166    #[display("julia")]
167    Julia,
168    /// Markdown
169    ///
170    /// See <https://www.markdownguide.org/> for more information
171    #[display("markdown")]
172    Markdown,
173    /// JavaScript Object Notation (JSON)
174    ///
175    /// See <https://www.json.org/json-en.html> for more information
176    #[display("json")]
177    Json,
178    /// Rust
179    ///
180    /// See <https://rust-lang.org/> for more information
181    #[display("rust")]
182    Rust,
183    /// Shell
184    ///
185    /// Catch-all for shell scripts (e.g., Bash, Zsh, etc.)
186    #[display("shell")]
187    #[serde(alias = "bash", alias = "zsh", alias = "fish", alias = "powershell")]
188    Shell,
189    /// YAM Ain't Markup Language (YAML)
190    ///
191    /// See <https://yaml.org/> for more information
192    #[display("yaml")]
193    Yaml,
194}
195/// ## TRL
196/// > Technology readiness levels (TRLs) are a method for estimating the maturity of technologies during the acquisition phase of a program.
197///
198/// The "optimal point" to introduce technology depends on technology maturity (TRL) and program requirements. That point can be virtually anywhere in the acquisition process.
199///
200/// See [Technology Readiness for Machine Learning Systems](https://doi.org/10.1038/s41467-022-33128-9) for applying TRLs to machine learning (ML) systems
201#[derive(Clone, Debug, Default, Display, Serialize_repr, Deserialize_repr, PartialEq, PartialOrd, JsonSchema)]
202#[repr(u8)]
203#[serde(deny_unknown_fields)]
204pub enum TechnologyReadinessLevel {
205    /// A stage for greenfield research
206    ///
207    /// Not a standard TRL
208    #[display("Greenfield Research")]
209    Principles = 0,
210    /// Basic principles observed and reported
211    ///
212    /// ML: Goal-oriented research
213    #[default]
214    #[display("Basic Research")]
215    Research = 1,
216    /// Technology concept and/or application formulated
217    ///
218    /// ML: Proof of principle development
219    #[display("Technology Concept")]
220    Concept = 2,
221    /// Analytical and experimental critical function and/or characteristic proof-of-concept
222    ///
223    /// ML: Systems development
224    #[display("Feasible")]
225    Feasible = 3,
226    /// Component and/or breadboard validation in laboratory environment (low fidelity)
227    ///
228    /// ML: Proof of concept development
229    #[display("Developing")]
230    Developing = 4,
231    /// Component and/or breadboard validation in relevant environment (high fidelity)
232    ///
233    /// ML: Machine learning "capability"
234    #[display("Developed")]
235    Developed = 5,
236    /// System/subsystem model or prototype demonstration in a relevant environment (high fidelity)
237    ///
238    /// ML: Application development
239    #[display("Prototype")]
240    Prototype = 6,
241    /// System prototype demonstration in an operational environment
242    ///
243    /// ML: Integrations
244    #[display("Operational")]
245    Operational = 7,
246    /// Actual system completed and qualified through test and demonstration
247    ///
248    /// ML: Mission-ready
249    #[display("Mission Ready")]
250    MissionReady = 8,
251    /// Actual system proven through successful mission operation
252    ///
253    /// ML: Deployment
254    #[display("Mission Capable")]
255    MissionCapable = 9,
256}
257/// Contact point (i.e. "point of contact") for research activity
258///
259/// See <https://schema.org/ContactPoint>
260#[derive(Builder, Clone, Debug, Serialize, Deserialize, Validate, JsonSchema)]
261#[builder(start_fn = init)]
262#[serde(deny_unknown_fields, rename_all = "camelCase")]
263pub struct ContactPoint {
264    /// Job title (e.g., "Group Lead") of role that the contact fills related to the asscociated research activity.
265    /// ### Example
266    /// > Ideal contact title for a project would be "Primary Investigator"
267    ///
268    /// ### Example
269    /// > Ideal contact title for a group organization would be "Group Lead"
270    ///
271    /// <div class="warning">When the nearest associated title is unclear, job role of the contact can be used (e.g., "Senior Scientist").</div>
272    ///
273    /// See <https://schema.org/jobTitle> for more information
274    #[builder(default = "Researcher".to_string())]
275    #[serde(alias = "title", deserialize_with = "string_trim")]
276    pub job_title: String,
277    /// First (given) name of contact point
278    ///
279    /// See <https://schema.org/givenName> for more information
280    #[builder(default = "First".to_string())]
281    #[serde(alias = "first", deserialize_with = "string_trim")]
282    pub given_name: String,
283    /// Last (family) name of contact point
284    ///
285    /// See <https://schema.org/familyName> for more information
286    #[builder(default = "Last".to_string())]
287    #[serde(alias = "last", deserialize_with = "string_trim")]
288    pub family_name: String,
289    /// ORCiD of contact point
290    /// ### Example
291    /// > "<https://orcid.org/0000-0002-2057-9115>"
292    #[validate(custom(function = "is_orcid"))]
293    #[serde(alias = "orcid")]
294    pub identifier: Option<String>,
295    /// Email address of contact point
296    ///
297    /// See <https://schema.org/email> for more information
298    #[validate(email(message = "Please provide a valid email"))]
299    #[builder(default = "first_last@example.com".to_string())]
300    #[serde(deserialize_with = "string_trim")]
301    pub email: String,
302    /// Phone number of contact point
303    ///
304    /// See <https://schema.org/telephone> for more information
305    #[validate(custom(function = "is_phone_number"))]
306    #[builder(default = "123-456-7890".to_string())]
307    #[serde(alias = "phone", deserialize_with = "string_trim")]
308    pub telephone: String,
309    /// Profile URL of contact point
310    /// ### Example
311    /// > Profile URL for "Jason Wohlgemuth" could be <https://impact.ornl.gov/en/persons/jason-wohlgemuth>
312    #[validate(url(message = "Please provide a valid profile URL"))]
313    #[builder(default = "https://example.com".to_string())]
314    #[serde(alias = "profile", deserialize_with = "string_trim")]
315    pub url: String,
316    /// Organization of contact point
317    ///
318    /// See [Organization]
319    #[builder(default = "Some Organization".to_string())]
320    #[serde(deserialize_with = "string_trim")]
321    pub organization: String,
322    /// Affiliation of associated research activity data
323    ///
324    /// <div class="warning">Where organization applies to the contact point, affiliation applies to the research activity the contact point is associated with</div>
325    ///
326    /// See <https://schema.org/affiliation> for more information
327    pub affiliation: Option<String>,
328}
329/// Image format media (e.g., PNG, JPEG, SVG, etc.)
330///
331/// See <https://schema.org/ImageObject>
332#[skip_serializing_none]
333#[derive(Builder, Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
334#[builder(start_fn = init)]
335#[serde(deny_unknown_fields, rename_all = "camelCase")]
336pub struct ImageObject {
337    /// Image caption
338    #[validate(length(max = "MAX_LENGTH_IMAGE_CAPTION", message = "Caption is too long, please reduce the length below 100."))]
339    #[serde(deserialize_with = "string_trim")]
340    pub caption: String,
341    /// File size (in kilobytes)
342    ///
343    /// <div class="warning">Will be overwritten by running <pre>acorn format</pre></div>
344    ///
345    /// See <https://schema.org/contentSize> for more information
346    #[serde(alias = "size")]
347    pub content_size: Option<NonZeroU64>,
348    /// Content URL
349    #[validate(custom(function = "has_image_extension"))]
350    #[serde(alias = "url", alias = "href")]
351    pub content_url: Option<String>,
352    /// Image height (in pixels)
353    ///
354    /// <div class="warning">Will be overwritten by running <pre>acorn format</pre></div>
355    ///
356    /// See <https://schema.org/height> for more information
357    pub height: Option<NonZeroU64>,
358    /// Image width (in pixels)
359    ///
360    /// <div class="warning">Will be overwritten by running <pre>acorn format</pre></div>
361    ///
362    /// See <https://schema.org/width> for more information
363    pub width: Option<NonZeroU64>,
364}
365/// ## Research Activity Metadata
366#[skip_serializing_none]
367#[derive(Builder, Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
368#[builder(start_fn = init)]
369#[serde(deny_unknown_fields, rename_all = "camelCase")]
370pub struct Metadata {
371    /// Classification level of associated research activity data
372    pub classification: Option<ClassificationLevel>,
373    /// <abbr title="Technology Readiness Level">TRL</abbr> is applicable to acquisition, machine learning, and more
374    pub trl: Option<TechnologyReadinessLevel>,
375    /// Describes the active status of the associated research activity data
376    ///
377    /// <div class="warning">Archived content typically will be omitted from public artifacts such as <a href="https://research.ornl.gov">the ORNL research activity index</a></div>
378    #[builder(default = false)]
379    pub archive: bool,
380    /// Describes the draft status of the associated research activity data
381    ///
382    /// <div class="warning">Draft content typically will be omitted from public artifacts such as <a href="https://research.ornl.gov">the ORNL research activity index</a></div>
383    #[builder(default = true)]
384    pub draft: bool,
385    /// Identifier for associated research activity data
386    /// ### Example
387    /// > `my-research-project`
388    ///
389    /// <div class="warning">Should be <a href="https://developer.mozilla.org/en-US/docs/Glossary/Kebab_case">lower-kebab-case</a></div>
390    ///
391    #[validate(custom(function = "is_kebabcase"))]
392    #[builder(default = "some-research-project".to_string())]
393    #[serde(alias = "id", rename = "identifier", deserialize_with = "string_trim")]
394    pub identifier: String,
395    /// Digital Object Identifier(s) related to the associated research activity data
396    ///
397    /// See <https://www.doi.org/> for more information
398    #[validate(custom(function = "validate_attribute_doi"))]
399    #[serde(default)]
400    pub doi: Option<Vec<String>>,
401    /// URL(s) of internet location where associated publication(s) can be found
402    #[validate(custom(function = "is_list_url"))]
403    #[serde(default)]
404    pub publications: Option<Vec<String>>,
405    /// Research Activity Identifier
406    ///
407    /// See <https://www.raid.org/> for more information
408    #[validate(nested)]
409    #[serde(default)]
410    pub raid: Option<raid::Metadata>,
411    /// Research Organization Registry
412    ///
413    /// See <https://www.ror.org/> for more information
414    #[validate(custom(function = "validate_attribute_ror"))]
415    #[serde(default)]
416    pub ror: Option<Vec<String>>,
417    /// Additional type
418    ///
419    /// Type of associated research activity data when directly associated with an organization
420    pub additional_type: Option<OrganizationType>,
421    /// Images, videos, and other media related to the associated research activity data
422    #[serde(alias = "graphics")]
423    pub media: Option<Vec<MediaObject>>,
424    /// Websites related to the associated research activity data
425    #[validate(nested)]
426    pub websites: Option<Vec<Website>>,
427    /// See [Keyword]
428    #[builder(default = Vec::<String>::new())]
429    pub keywords: Vec<Keyword>,
430    /// Software, programmings languages, and digital resources (e.g., tools, libraries, frameworks, data) related to the associated research activity data
431    /// ### Examples
432    /// - Rust
433    /// - Polars
434    /// - gdal
435    /// - matplotlib
436    /// - LaTeX
437    ///
438    /// <div class="warning"><a href="https://code.ornl.gov/research-enablement/acorn/-/blob/main/acorn-lib/assets/constants/technology.csv">Full list of technologies</a></div>
439    #[builder(default = Vec::<String>::new())]
440    #[serde(deserialize_with = "vec_string_trim")]
441    pub technology: Vec<String>,
442    /// Organization(s) responsible for funding associated research activity data
443    ///
444    /// Includes any office within a US cabinet-level department that has leadership appointed by the president and confirmed by the Senate, e.g., NNSA or Office of Science.
445    ///
446    /// <div class="warning"><a href="https://code.ornl.gov/research-enablement/acorn/-/blob/main/acorn-lib/assets/constants/sponsors.csv">Full list of sponsors</a></div>
447    pub sponsors: Option<Vec<String>>,
448    /// Organization(s) related to the associated research activity data
449    /// ### Examples
450    /// - Los Alamos National Laboratory
451    /// - University of Tennessee
452    /// - IBM
453    /// <div class="warning"><a href="https://code.ornl.gov/research-enablement/acorn/-/blob/main/acorn-lib/assets/constants/partners.csv">Full list of partners</a></div>
454    pub partners: Option<Vec<String>>,
455    /// Related resarch activity data identifiers of related research activity data
456    ///
457    /// <div class="warning">WIP</div>
458    pub related: Option<Vec<String>>,
459}
460/// Notes
461///
462/// Structured container for information not easily captured in other fields
463#[skip_serializing_none]
464#[derive(Clone, Debug, Default, Serialize, Deserialize, JsonSchema, Validate)]
465#[serde(deny_unknown_fields)]
466pub struct Notes {
467    /// [ASCR](https://www.energy.gov/science/ascr/advanced-scientific-computing-research) highlight attribute
468    pub managers: Option<Vec<String>>,
469    /// Collection of capabilities aimed at achieving a specific cross-cutting research outcome
470    pub programs: Option<Vec<String>>,
471    /// (PowerPoint) presentation notes
472    #[serde(default, deserialize_with = "option_string_trim")]
473    pub presentation: Option<String>,
474}
475/// ### Organization
476///
477/// Structured container for information about an organization
478///
479/// See also [OrganizationType]
480#[skip_serializing_none]
481#[derive(Clone, Debug, Serialize, Deserialize, Display, Hash, PartialEq, PartialOrd)]
482#[display("Organization ({additional_type}) - {name})")]
483#[serde(deny_unknown_fields, rename_all = "camelCase")]
484pub struct Organization {
485    /// Full name of the organization
486    ///
487    /// See <https://schema.org/name> for more information
488    #[serde(deserialize_with = "string_trim")]
489    pub name: String,
490    /// Research Organization Registry
491    ///
492    /// See <https://www.ror.org/> for more information
493    #[serde(default, deserialize_with = "option_string_trim")]
494    pub ror: Option<String>,
495    /// Organization alias (e.g., acronym or nickname)
496    ///
497    /// See <https://schema.org/alternateName> for more information
498    #[serde(default, deserialize_with = "option_string_trim")]
499    pub alternative_name: Option<String>,
500    /// Organization sub-type
501    ///
502    /// See <https://schema.org/additionalType> for more information
503    pub additional_type: OrganizationType,
504    /// See [Keyword]
505    pub keywords: Option<Vec<Keyword>>,
506    /// Distinct part(s) of the associated containing organization
507    ///
508    /// See <https://schema.org/member> for more information
509    pub member: Vec<Organization>,
510}
511/// ## Research Activity
512/// > Research activity is an identifiable package of work involving organized, systematic investigation.
513///
514/// See <https://www.raid.org/> for more information
515#[skip_serializing_none]
516#[derive(Builder, Clone, Debug, Display, Deserialize, Serialize, JsonSchema, Validate)]
517#[builder(start_fn = init)]
518#[display("Research Activity ({title})")]
519#[serde(deny_unknown_fields)]
520pub struct ResearchActivity {
521    /// Associated metadata
522    #[validate(nested)]
523    #[builder(default)]
524    pub meta: Metadata,
525    /// Heading that identifies and describes the associated research activity
526    #[validate(length(min = 4, max = "MAX_LENGTH_TITLE"))]
527    #[builder(default = "Research Activity Title".to_string())]
528    #[serde(deserialize_with = "string_trim")]
529    pub title: String,
530    /// Short description that augments the title of the associated research activity
531    #[validate(length(max = "MAX_LENGTH_SUBTITLE", message = "Subtitle is too long, please reduce the length below 75."))]
532    #[serde(default, deserialize_with = "option_string_trim")]
533    pub subtitle: Option<String>,
534    /// Prose components of associated research activity
535    #[validate(nested)]
536    #[builder(default)]
537    pub sections: Sections,
538    /// Contact point (i.e. "point of contact") for research activity
539    #[validate(nested)]
540    #[builder(default)]
541    pub contact: ContactPoint,
542    /// Other information related to the associated research activity not easily captured in structured areas of the schema
543    pub notes: Option<Other>,
544}
545/// Video format media (e.g., MP4, AVI, MOV, GIF, etc.)
546///
547/// See <https://schema.org/VideoObject> for more information
548#[skip_serializing_none]
549#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
550#[serde(deny_unknown_fields, rename_all = "camelCase")]
551pub struct VideoObject {
552    /// File size (in kilobytes)
553    ///
554    /// See <https://schema.org/contentSize> for more information
555    #[serde(alias = "size")]
556    pub content_size: Option<NonZeroU64>,
557    /// Video URL
558    #[validate(url)]
559    #[serde(alias = "url", alias = "href")]
560    pub content_url: Option<String>,
561    /// Video description
562    ///
563    /// See <https://schema.org/description> for more information
564    #[serde(deserialize_with = "string_trim")]
565    pub description: String,
566    // TODO: Create ISO 8601 struct and/or validator
567    /// Duration of video in [ISO 8601 format](https://en.wikipedia.org/wiki/ISO_8601)
568    ///
569    /// See <https://schema.org/duration> for more information
570    pub duration: Option<String>,
571    /// Video height (in pixels)
572    ///
573    /// See <https://schema.org/height> for more information
574    pub height: Option<NonZeroU64>,
575    /// Video width (in pixels)
576    ///
577    /// See <https://schema.org/width> for more information
578    pub width: Option<NonZeroU64>,
579}
580/// ## Website
581/// > Website link and title description
582/// ### Example
583/// When deserializing research activity data, websites can be provided as a list of JSON objects.
584/// ```json
585/// {
586///     "websites": [
587///       {
588///         "title": "Home Page",
589///         "url": "https://example.com"
590///       },
591///       {
592///         "title": "Job Listing",
593///         "url": "https://www.example.com/jobs"
594///       }
595///     ]
596/// }
597/// ```
598///
599#[derive(Clone, Debug, Serialize, Deserialize, Validate, JsonSchema)]
600#[serde(deny_unknown_fields)]
601pub struct Website {
602    /// Brief description of webpage content
603    ///
604    /// See <https://schema.org/description> for more information
605    #[serde(alias = "title", deserialize_with = "string_trim")]
606    pub description: String,
607    /// Associated website URL
608    #[validate(url(message = "Please provide a valid URL"))]
609    #[serde(deserialize_with = "string_trim")]
610    pub url: String,
611}
612/// Research activity prose components that describe the activity using natural language
613#[skip_serializing_none]
614#[derive(Builder, Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
615#[builder(start_fn = init)]
616#[serde(deny_unknown_fields)]
617pub struct Sections {
618    /// The reason for the research or research organization to exist
619    /// ### Example
620    /// > "Develop the first atomic bombs in the world to assist the Allied forces and bring an end to WWII"
621    #[validate(length(
622        min = 10,
623        max = "MAX_LENGTH_SECTION_MISSION",
624        message = "Mission is too long, please reduce the length below 250."
625    ))]
626    #[builder(default = "Purpose of the research".to_string())]
627    #[serde(alias = "introduction", deserialize_with = "string_trim")]
628    pub mission: String,
629    /// A problem or situation within a research field requiring scientific effort, resources, and/or innovation to overcome
630    /// ### Example
631    /// > "During WWII, there was a fear that Germany was researching and developing nuclear weapons, giving them a decisive advantage over Allied forces, including the United States, Great Britain, and Canada."
632    #[validate(length(
633        min = 10,
634        max = "MAX_LENGTH_SECTION_CHALLENGE",
635        message = "Challenge is too long, please reduce the length below 500."
636    ))]
637    #[builder(default = "Reason for the research".to_string())]
638    #[serde(deserialize_with = "string_trim")]
639    pub challenge: String,
640    /// The plan, resources and actions taken to perform the research in a given project or organization
641    /// ### Examples
642    /// - "Production across four different sites in the United States, each with a different focus, for security and safety purposes"
643    /// - "Research into new fields including nuclear fission, isotope separation methods, uranium enrichment, plutonium development, and weapons design"
644    /// - "Military coordination for project construction and security management as well as defense communications to national leaders"
645    #[validate(
646        length(min = 1, max = "MAX_COUNT_APPROACH", message = "Please limit the number of approaches to 6"),
647        custom(function = "validate_attribute_approach")
648    )]
649    #[builder(default = vec!["List of actions taken to perform the research".to_string()])]
650    #[serde(deserialize_with = "vec_string_trim")]
651    pub approach: Vec<String>,
652    /// Tangible effects the research approach has on areas outside academia, such as industry, society, the surrounding environment, or culture
653    /// ### Examples
654    /// - "Development of the world's first atomic weapons"
655    /// - "Introduction of the nuclear age, including advancements in nuclear science, engineering and a new source of energy"
656    /// - "The end of WWII, along with many ethical and moral considerations related to use of atomic weapons"
657    #[validate(length(min = 1, max = "MAX_COUNT_IMPACT"), custom(function = "validate_attribute_impact"))]
658    #[builder(default = vec!["List of tangible proof that validates the research approach".to_string()])]
659    #[serde(alias = "outcomes", deserialize_with = "vec_string_trim")]
660    pub impact: Vec<String>,
661    /// Notable recognition or awards given to the research team, organization, or research products
662    /// ### Examples
663    /// - "At least six Nobel Prizes awarded to Manhattan Project researchers in the years following the end of the project"
664    /// - "Creation of the Atomic Energy Commission in 1946, later becoming the Department of Energy and Nuclear Regulatory Commission"
665    #[validate(length(min = 1, max = 4, message = "Please limit the number of achievements to 4"))]
666    pub achievement: Option<Vec<String>>,
667    /// Expertise as applied to technology in a given mission space
668    /// ### Examples
669    /// - "Gaseous diffusion and electromagnetic separation to create fissionable materials"
670    /// - "Mechanisms for achieving supercritical mass for nuclear detonation"
671    /// - "Nuclear reactor development, which paved the way for nuclear power"
672    /// - "Radiochemistry for nuclear detonation analysis and advanced medical research with radioisotopes"
673    /// - "Large-scale multidisciplinary scientific collaboration"
674    #[validate(length(min = 1, max = "MAX_COUNT_CAPABILITIES"), custom(function = "validate_attribute_capabilities"))]
675    pub capabilities: Option<Vec<String>>,
676    /// Overview of research focus and areas
677    /// ### Example Focus
678    /// > "Developing fissionable materials for nuclear reactions to develop the world's first atomic weapons"
679    /// ### Example Areas
680    /// - "Nuclear fission"
681    /// - "Radiochemistry"
682    /// - "Uranium enrichment"
683    /// - "Electromagnetic separation"
684    /// - "Weapon design"
685    #[validate(nested)]
686    #[builder(default = Research::init().build())]
687    pub research: Research,
688}
689/// Overview of research focus and areas
690#[derive(Builder, Clone, Debug, Serialize, Deserialize, JsonSchema, Validate)]
691#[builder(start_fn = init)]
692#[serde(deny_unknown_fields)]
693pub struct Research {
694    /// Brief overview of the project or organization's research
695    #[validate(length(
696        min = 10,
697        max = "MAX_LENGTH_RESEARCH_FOCUS",
698        message = "Focus is too long, please reduce the length below 150."
699    ))]
700    #[builder(default = "Focus of the research".to_string())]
701    #[serde(deserialize_with = "string_trim")]
702    pub focus: String,
703    /// Topics related to and encapsulated within the project or organization
704    #[validate(length(min = 1, max = "MAX_COUNT_RESEARCH_AREAS"), custom(function = "validate_attribute_areas"))]
705    #[builder(default = vec!["Areas of research".to_string()])]
706    #[serde(deserialize_with = "vec_string_trim")]
707    pub areas: Vec<String>,
708}
709impl Default for ContactPoint {
710    fn default() -> Self {
711        Self::init().build()
712    }
713}
714impl Default for Metadata {
715    fn default() -> Self {
716        Metadata::init().build()
717    }
718}
719impl Default for ResearchActivity {
720    fn default() -> Self {
721        ResearchActivity::init().build()
722    }
723}
724impl Default for Sections {
725    fn default() -> Self {
726        Sections::init().build()
727    }
728}
729impl Hash for ResearchActivity {
730    fn hash<H: Hasher>(&self, state: &mut H) {
731        self.meta.identifier.hash(state);
732    }
733}
734impl MediaObject {
735    /// Returns the content URL of the media object
736    pub fn content_url(self) -> Option<String> {
737        match self {
738            | MediaObject::Image(ImageObject { content_url, .. }) => content_url,
739            | MediaObject::Video(VideoObject { content_url, .. }) => content_url,
740        }
741    }
742    /// Returns the description of the media object
743    pub fn description(self) -> String {
744        match self {
745            | MediaObject::Image(ImageObject { caption, .. }) => caption,
746            | MediaObject::Video(VideoObject { description, .. }) => description,
747        }
748    }
749    /// Returns true if the media object is an image, false otherwise
750    pub fn is_image(self) -> bool {
751        match self {
752            | MediaObject::Image(_) => true,
753            | _ => false,
754        }
755    }
756}
757impl Metadata {
758    fn first_image(self) -> Option<MediaObject> {
759        match self.media {
760            | Some(values) => values.into_iter().filter(|x| x.clone().is_image()).collect::<Vec<_>>().first().cloned(),
761            | None => None,
762        }
763    }
764    /// Returns the content URL of the first image in the list of media objects, or a default value if none are present.
765    pub fn first_image_content_url(self) -> String {
766        match self.first_image() {
767            | Some(media) => match media {
768                | MediaObject::Image(ImageObject { content_url, .. }) => match content_url {
769                    | Some(value) if !value.is_empty() => value.clone().trim().to_string(),
770                    | Some(_) | None => DEFAULT_GRAPHIC_HREF.to_string(),
771                },
772                | _ => DEFAULT_GRAPHIC_HREF.to_string(),
773            },
774            | None => DEFAULT_GRAPHIC_HREF.to_string(),
775        }
776    }
777    /// Returns the caption of the first image in the list of media objects, or a default value if none are present.
778    pub fn first_image_caption(self) -> String {
779        match self.first_image() {
780            | Some(MediaObject::Image(ImageObject { caption, .. })) => match caption.clone() {
781                | value if !value.is_empty() => value.clone(),
782                | _ => DEFAULT_GRAPHIC_CAPTION.to_string(),
783            },
784            | Some(_) | None => DEFAULT_GRAPHIC_CAPTION.to_string(),
785        }
786    }
787}
788impl Organization {
789    /// Returns a list of all organizations, loaded from the organization.json asset file
790    pub fn load() -> Vec<Organization> {
791        serde_json::from_str(&Constant::from_asset("organization.json")).unwrap()
792    }
793    /// Finds the first organization in the hierarchy with the given label.
794    pub fn member(self, label: &str) -> Option<Organization> {
795        self.members().into_iter().find(|Organization { name, .. }| name == label)
796    }
797    /// Returns a flattened vector of the organization hierarchy.
798    ///
799    /// This function collects the organization, its directorates, divisions, and groups
800    /// into a single vector, maintaining their hierarchical order.
801    pub fn members(self) -> Vec<Organization> {
802        let organization = self;
803        let mut items = vec![organization.clone()];
804        let directorates = organization.member.clone();
805        for directorate in &directorates {
806            items.push(directorate.clone());
807            let divisions = directorate.member.clone();
808            for division in &divisions {
809                items.push(division.clone());
810                let groups = division.member.clone();
811                for group in &groups {
812                    items.push(group.clone());
813                }
814            }
815        }
816        items
817    }
818    /// Returns the nearest organization of the given type in the organization hierarchy.
819    pub fn nearest(self, organization_type: OrganizationType) -> Option<Organization> {
820        let a = self.clone().additional_type.order();
821        let b = organization_type.order();
822        if a > b {
823            None
824        } else {
825            let ornl = Organization::load()[0].clone();
826            let graph = ornl.clone().to_graph();
827            let name = match b - a {
828                | 3 => Some(ornl.clone().name),
829                | 2 => match node_from_label(&graph, &self.name) {
830                    | Some(node) => match node_parent(&graph, node) {
831                        | Some(parent) => match node_parent(&graph, parent) {
832                            | Some(grandparent) => node_name(&graph, grandparent),
833                            | None => None,
834                        },
835                        | None => None,
836                    },
837                    | None => None,
838                },
839                | 1 => match node_from_label(&graph, &self.name) {
840                    | Some(node) => match node_parent(&graph, node) {
841                        | Some(parent) => node_name(&graph, parent),
842                        | None => None,
843                    },
844                    | None => None,
845                },
846                | 0 => Some(self.name),
847                | _ => None,
848            };
849            match name {
850                | Some(value) => match ornl.member(&value) {
851                    | Some(organization) => Some(organization),
852                    | None => None,
853                },
854                | None => None,
855            }
856        }
857    }
858    /// Returns a graph representation of the organization hierarchy.
859    pub fn to_graph(self) -> Graph<String, u8> {
860        let mut graph: Graph<String, u8, petgraph::Directed> = Graph::new();
861        let organization = &self;
862        let root = graph.add_node(organization.name.clone());
863        for directorate in organization.member.iter() {
864            let a = graph.add_node(directorate.name.clone());
865            graph.add_edge(root, a, 0);
866            for division in directorate.member.iter() {
867                let b = graph.add_node(division.name.clone());
868                graph.add_edge(a, b, 0);
869                for group in division.member.iter() {
870                    let c = graph.add_node(group.name.clone());
871                    graph.add_edge(b, c, 0);
872                }
873            }
874        }
875        graph
876    }
877}
878impl OrganizationType {
879    /// Parses a string into an `OrganizationType` value
880    pub fn from_string(value: String) -> OrganizationType {
881        match value.to_lowercase().as_str() {
882            | "agency" => OrganizationType::Agency,
883            | "center" => OrganizationType::Center,
884            | "consortium" => OrganizationType::Consortium,
885            | "division" => OrganizationType::Division,
886            | "directorate" => OrganizationType::Directorate,
887            | "group" => OrganizationType::Group,
888            | "office" => OrganizationType::Office,
889            | "program" => OrganizationType::Program,
890            | "facility" => OrganizationType::Facility,
891            | "ffrdc" => OrganizationType::Ffrdc,
892            | _ => unreachable!(),
893        }
894    }
895    /// Returns the order of an `OrganizationType` value
896    pub fn order(self) -> u8 {
897        match self {
898            | OrganizationType::Ffrdc | OrganizationType::Agency | OrganizationType::Consortium | OrganizationType::Office => 4,
899            | OrganizationType::Directorate => 3,
900            | OrganizationType::Division | OrganizationType::Center | OrganizationType::Program | OrganizationType::Facility => 2,
901            | OrganizationType::Group => 1,
902        }
903    }
904}
905impl ResearchActivity {
906    /// Creates a new `ResearchActivity`
907    pub fn new() -> Self {
908        ResearchActivity::default()
909    }
910    /// Print research activity schema as JSON schema
911    pub fn to_schema() {
912        let schema = schema_for!(ResearchActivity);
913        println!("{}", serde_json::to_string_pretty(&schema).unwrap());
914    }
915    /// Analyzes a list of research activity files
916    pub fn analyze_prose(paths: Vec<PathBuf>, is_offline: bool, skip_verify_checksum: bool) -> Vec<Check> {
917        let config = ValeConfig::default().save();
918        let vale = Vale::resolve(config, is_offline, skip_verify_checksum);
919        match vale.clone().sync(is_offline) {
920            | Ok(_) => {
921                let results = paths.iter().map(|path| match ResearchActivity::read(path.into()) {
922                    | Some(data) => vale.clone().run(data.clone().meta.identifier, data.extract_prose(), Some("JSON".into())),
923                    | None => {
924                        error!("=> {} Read research activity data", Label::fail());
925                        Check::init().category(CheckCategory::Prose).success(false).build()
926                    }
927                });
928                results.collect::<Vec<Check>>()
929            }
930            | Err(why) => {
931                error!("=> {} Vale sync - {why}", Label::fail());
932                vec![Check::init().category(CheckCategory::Prose).success(false).build()]
933            }
934        }
935    }
936    /// Calculate readability based on passed options for a list of research activity files
937    pub fn calculate_readability(paths: Vec<PathBuf>, readability_type: ReadabilityType) -> Vec<Check> {
938        paths
939            .par_iter()
940            .map(|path| match ResearchActivity::read(path.into()) {
941                | Some(data) => {
942                    let index = readability_type.calculate(&data.extract_prose());
943                    let maximum = match readability_type.maximum_allowed_from_env() {
944                        | Some(value) => {
945                            debug!(value, "=> {} Maximum allowed readability from .env", Label::using());
946                            value
947                        }
948                        | None => readability_type.maximum_allowed(),
949                    };
950                    debug!(value = index, "=> {} Readability index", Label::using());
951                    if index > maximum {
952                        let errors = ErrorKind::Readability((index, readability_type));
953                        Check::init()
954                            .category(CheckCategory::Readability)
955                            .success(false)
956                            .message(path.display().to_string())
957                            .errors(errors)
958                            .context(maximum.to_string())
959                            .build()
960                    } else {
961                        let score = format!("({} = {}/{})", readability_type.to_string().to_uppercase(), index, maximum);
962                        Check::init()
963                            .category(CheckCategory::Readability)
964                            .success(true)
965                            .message(path.display().to_string())
966                            .context(score)
967                            .build()
968                    }
969                }
970                | None => {
971                    error!("=> {} Read research activity data", Label::fail());
972                    Check::init().category(CheckCategory::Readability).success(false).build()
973                }
974            })
975            .collect::<Vec<Check>>()
976    }
977    /// Checks a list of research activity files
978    pub fn check(paths: Vec<PathBuf>, is_offline: bool) -> Vec<Check> {
979        let runtime = tokio_runtime();
980        paths
981            .par_iter()
982            .map(|path| match ResearchActivity::read(path.into()) {
983                | Some(data) => {
984                    let offline_issues = data
985                        .clone()
986                        .validation_issues()
987                        .into_iter()
988                        .map(|issue| issue.with_uri(path.display().to_string()))
989                        .collect::<Vec<_>>();
990                    let online_issues = runtime.block_on(async {
991                        let mut issues: Vec<Check> = vec![];
992                        if !is_offline {
993                            let dois = match data.clone().meta.doi {
994                                | Some(values) => values.into_iter().map(|doi| format!("https://doi.org/{doi}")).collect(),
995                                | None => vec![],
996                            };
997                            let websites = match data.clone().meta.websites {
998                                | Some(values) => values.into_iter().map(|Website { url, .. }| url).collect(),
999                                | None => vec![],
1000                            };
1001                            for url in dois.into_iter().chain(websites.into_iter()) {
1002                                let result = link_check(Some(url)).await;
1003                                issues.push(result);
1004                            }
1005                        }
1006                        issues
1007                    });
1008                    offline_issues.into_iter().chain(online_issues).collect::<Vec<Check>>()
1009                }
1010                | None => {
1011                    error!("=> {} Read research activity data at {}", Label::fail(), path.display());
1012                    vec![Check::init().category(CheckCategory::Schema).success(false).build()]
1013                }
1014            })
1015            .flatten()
1016            .collect()
1017    }
1018    /// Creates a copy of a `ResearchActivity`
1019    pub fn copy(self) -> ResearchActivity {
1020        let ResearchActivity {
1021            meta,
1022            title,
1023            subtitle,
1024            sections,
1025            contact,
1026            notes,
1027        } = self.clone();
1028        ResearchActivity::init()
1029            .meta(meta)
1030            .title(title)
1031            .maybe_subtitle(subtitle)
1032            .sections(sections)
1033            .contact(contact)
1034            .maybe_notes(notes)
1035            .build()
1036    }
1037    /// Extracts prose from a `ResearchActivity`
1038    pub fn extract_prose(self) -> String {
1039        let Sections {
1040            mission,
1041            challenge,
1042            approach,
1043            impact,
1044            research,
1045            ..
1046        } = self.sections;
1047        let Research { focus, areas } = research;
1048        let sections = format!(
1049            r#"
1050<!-- Introduction -->
1051{}
1052
1053<!-- Challenge -->
1054{}
1055
1056<!-- Approach -->
1057{}
1058
1059<!-- Impact -->
1060{}
1061
1062<!-- Focus -->
1063{}
1064
1065<!-- Areas -->
1066{}"#,
1067            mission,
1068            challenge,
1069            approach.into_iter().map(|x| format!("- {x}")).collect::<Vec<String>>().join("\n"),
1070            impact.into_iter().map(|x| format!("- {x}")).collect::<Vec<String>>().join("\n"),
1071            focus,
1072            areas.into_iter().map(|x| format!("- {x}")).collect::<Vec<String>>().join("\n")
1073        );
1074        match self.subtitle {
1075            | Some(subtitle) => format!(
1076                r#"# {}
1077> {}
1078{}"#,
1079                self.title, subtitle, sections
1080            ),
1081            | None => sections.to_string(),
1082        }
1083    }
1084    /// Formats research activity data
1085    /// ### Actions
1086    /// - Resolves URL of first media object (if found) and add empty caption
1087    /// - Resolves keywords, technology, organization, partners, sponsors, and affiliation using fuzzy matching against controlled vocabularies
1088    /// - Formats telephone number
1089    pub fn format(self, path: Option<PathBuf>) -> ResearchActivity {
1090        let mut clone = self.clone().copy();
1091        let path_parent = match path {
1092            | Some(value) => parent(value),
1093            | None => PathBuf::from("."),
1094        };
1095        let name = match image_paths(&path_parent) {
1096            | value if !value.is_empty() => Some(value[0].file_name().unwrap().to_string_lossy().to_string()),
1097            | _ => None,
1098        };
1099        debug!(path = to_absolute_string(path_parent), "=> {} Parent directory", Label::using());
1100        if let Some(value) = name {
1101            debug!(value, "=> {} First image", Label::using());
1102            // Make sure first graphic is well formed with a resolved image URL and caption
1103            let first_graphic = match self.meta.clone().media {
1104                | Some(values) if !values.is_empty() => {
1105                    let caption = self.meta.clone().first_image_caption();
1106                    let image_data = ImageObject::init().caption(caption.to_string()).content_url(value.clone()).build();
1107                    MediaObject::Image(image_data)
1108                }
1109                | Some(_) | None => {
1110                    let image_data = ImageObject::init().caption("".to_string()).content_url(value.clone()).build();
1111                    MediaObject::Image(image_data)
1112                }
1113            };
1114            // Get the rest of the media objects
1115            let rest = match self.clone().meta.media {
1116                | Some(values) if !values.is_empty() => values.into_iter().skip(1).collect::<Vec<_>>(),
1117                | Some(_) | None => vec![],
1118            };
1119            clone.meta.media = Some([vec![first_graphic], rest].concat());
1120        };
1121        clone.meta.keywords = self.clone().resolve(FuzzyValue::Keyword);
1122        clone.meta.technology = self.clone().resolve(FuzzyValue::Technology);
1123        clone.contact.telephone = match format_phone_number(&self.contact.telephone) {
1124            | Ok(value) => value,
1125            | Err(_) => {
1126                error!(value = self.contact.telephone, "=> {} Phone number", Label::invalid());
1127                self.contact.telephone.to_string()
1128            }
1129        };
1130        clone.contact.organization = match resolve_from_organization_json(self.clone().contact.organization) {
1131            | Some(value) => value,
1132            | None => "".to_string(),
1133        };
1134        clone.contact.affiliation = match self.clone().contact.affiliation {
1135            | Some(ref affiliation) => match resolve_from_organization_json(affiliation.to_string()) {
1136                | Some(resolved) => Some(resolved),
1137                | None => {
1138                    error!(affiliation, "=> {} Affiliation", Label::not_found());
1139                    Some(DEFAULT_AFFILIATION.to_string())
1140                }
1141            },
1142            | None => {
1143                let ornl = &Organization::load()[0];
1144                match ornl.clone().member(&clone.contact.organization) {
1145                    | Some(organization) => match organization.nearest(OrganizationType::Directorate) {
1146                        | Some(Organization { name, .. }) => Some(name),
1147                        | None => Some(DEFAULT_AFFILIATION.to_string()),
1148                    },
1149                    | None => {
1150                        error!("=> {} Nearest directorate", Label::not_found());
1151                        Some(DEFAULT_AFFILIATION.to_string())
1152                    }
1153                }
1154            }
1155        };
1156        clone.meta.partners = match self.clone().resolve(FuzzyValue::Partner) {
1157            | values if !values.is_empty() => Some(values),
1158            | _ => None,
1159        };
1160        clone.meta.sponsors = match self.clone().resolve(FuzzyValue::Sponsor) {
1161            | values if !values.is_empty() => Some(values),
1162            | _ => None,
1163        };
1164        clone
1165    }
1166    /// Read and parse research activity data (JSON or YAML)
1167    pub fn read(path: PathBuf) -> Option<ResearchActivity> {
1168        let content = match MimeType::from_path(path.clone()) {
1169            | MimeType::Json => match ResearchActivity::read_json(path.clone()) {
1170                | Ok(value) => Some(value),
1171                | Err(_) => None,
1172            },
1173            | MimeType::Yaml => match ResearchActivity::read_yaml(path.clone()) {
1174                | Ok(value) => Some(value),
1175                | Err(_) => None,
1176            },
1177            | _ => unimplemented!("Unsupported research activity data file extension"),
1178        };
1179        let label = match content {
1180            | Some(_) => Label::using(),
1181            | _ => Label::invalid(),
1182        };
1183        match content {
1184            | Some(data) => {
1185                debug!(path = path.to_str().unwrap(), "=> {}", label);
1186                trace!("=> {} Research activity data = {:#?}", label, data.dimmed().cyan());
1187                Some(data)
1188            }
1189            | None => {
1190                error!(path = path.to_str().unwrap(), "=> {}", label);
1191                None
1192            }
1193        }
1194    }
1195    /// Read research activity data using Serde and [`ResearchActivity`] struct
1196    fn read_json(path: PathBuf) -> serde_json::Result<ResearchActivity> {
1197        let content = match read_file(path.clone()) {
1198            | Ok(value) if !value.is_empty() => value,
1199            | Ok(_) | Err(_) => {
1200                error!(path = path.to_str().unwrap(), "=> {} RAD content is not valid", Label::fail());
1201                "{}".to_owned()
1202            }
1203        };
1204        let data: serde_json::Result<ResearchActivity> = serde_json::from_str(&content);
1205        let label = match data {
1206            | Ok(_) => Label::using(),
1207            | Err(_) => Label::invalid(),
1208        };
1209        match &data {
1210            | Ok(_) => trace!("=> {} RAD content = {:#?}", label, data.dimmed()),
1211            | Err(why) => error!("=> {} Parse RAD content - {}", label, why.red()),
1212        }
1213        data
1214    }
1215    /// Read research activity data (e.g., `buckets.yaml`) using Serde and [`ResearchActivity`] struct
1216    fn read_yaml(path: PathBuf) -> serde_yml::Result<ResearchActivity> {
1217        let content = match read_file(path.clone()) {
1218            | Ok(value) => value,
1219            | Err(_) => {
1220                error!(path = path.to_str().unwrap(), "=> {} RAD content is not valid", Label::fail());
1221                "".to_owned()
1222            }
1223        };
1224        let data: serde_yml::Result<ResearchActivity> = serde_yml::from_str(&content);
1225        let label = match data {
1226            | Ok(_) => Label::output(),
1227            | Err(_) => Label::fail(),
1228        };
1229        match &data {
1230            | Ok(_) => trace!("=> {} RAD content = {:#?}", label, data.dimmed()),
1231            | Err(why) => error!("=> {} Parse RAD content - {}", label, why.red()),
1232        }
1233        data
1234    }
1235    /// Resolve values to intended values according to controlled vocabularies and conventions
1236    fn resolve(self, value_type: FuzzyValue) -> Vec<String> {
1237        let values: Vec<_> = match value_type {
1238            | FuzzyValue::Keyword => self.meta.keywords,
1239            | FuzzyValue::Partner => match self.meta.partners {
1240                | Some(values) => values,
1241                | None => vec![],
1242            },
1243            | FuzzyValue::Sponsor => match self.meta.sponsors {
1244                | Some(values) => values,
1245                | None => vec![],
1246            },
1247            | FuzzyValue::Technology => self.meta.technology,
1248        };
1249        let mut data: Vec<_> = values
1250            .into_iter()
1251            .flat_map(|x| resolve_from_csv_asset(format!("{value_type}"), x))
1252            .collect();
1253        data.sort();
1254        data.dedup();
1255        data
1256    }
1257    /// Export to markdown
1258    pub fn to_markdown(self) -> String {
1259        let ResearchActivity { title, .. } = self.clone();
1260        format!("# {title}")
1261    }
1262    fn validation_issues(self) -> Vec<Check> {
1263        fn errors_collect<T: Validate>(attribute: T) -> Option<Vec<Check>> {
1264            match attribute.validate() {
1265                | Ok(_) => None,
1266                | Err(err) => Some(
1267                    err.into_errors()
1268                        .into_iter()
1269                        .map(|(key, value)| {
1270                            Check::init()
1271                                .category(CheckCategory::Schema)
1272                                .success(false)
1273                                .errors(ErrorKind::Validator(value))
1274                                .message(key.to_string())
1275                                .build()
1276                        })
1277                        .collect::<Vec<Check>>(),
1278                ),
1279            }
1280        }
1281        let mut found = vec![errors_collect::<ResearchActivity>(self.clone())];
1282        match self.meta.media {
1283            | Some(values) => values.iter().for_each(|media| match media {
1284                | MediaObject::Image(x) => found.push(errors_collect::<ImageObject>(x.clone())),
1285                | MediaObject::Video(x) => found.push(errors_collect::<VideoObject>(x.clone())),
1286            }),
1287            | None => {}
1288        }
1289        found.into_iter().flatten().flatten().collect::<Vec<_>>()
1290    }
1291}
1292impl View for ContactPoint {
1293    fn render(&self) -> VirtualNode {
1294        let ContactPoint {
1295            given_name,
1296            family_name,
1297            job_title: role,
1298            email,
1299            telephone,
1300            ..
1301        } = self;
1302        html! {
1303            <section id="contact">
1304                <div>
1305                    <span class="label">Contact</span>
1306                    <span class="spacer"> </span>
1307                    <span class="name">{ format!("{} {}", given_name, family_name) }</span>
1308                    <span class="spacer">|</span>
1309                    <span class="title">{ role }</span>
1310                    <span class="spacer">|</span>
1311                    <span class="email">{ email }</span>
1312                    <span class="spacer">|</span>
1313                    <span class="phone">{ telephone }</span>
1314                </div>
1315            </section>
1316        }
1317    }
1318}
1319fn match_list<I: IntoIterator<Item = String> + Clone>(value: String, values: I) -> Vec<(String, u32)> {
1320    let pattern = Pattern::parse(&value, CaseMatching::Ignore, Normalization::Smart);
1321    let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
1322    pattern.match_list(values.clone(), &mut matcher)
1323}
1324fn print_resolution(output: Option<String>, value: String, name: String) {
1325    let label = name.to_case(Case::Title);
1326    match output {
1327        | Some(resolved) => {
1328            if resolved.eq(&value.to_string()) {
1329                trace!("=> {} {} = \"{}\"", Label::using(), label, value.clone());
1330            } else {
1331                debug!(input = value.clone(), resolved, "=> {} {}", Label::found(), label);
1332            }
1333        }
1334        | None => {
1335            debug!(value = value.clone(), "=> {} {}", Label::not_found(), label);
1336        }
1337    };
1338}
1339fn resolve_from_csv_asset(name: String, value: String) -> Option<String> {
1340    let data = Constant::csv(&name);
1341    resolve_from_list_of_lists(value, data, name)
1342}
1343fn resolve_from_list_of_lists<I: IntoIterator<Item = Vec<String>>>(value: String, data: I, name: String) -> Option<String> {
1344    let output = data
1345        .into_iter()
1346        .flat_map(|values| {
1347            let sanitized = sanitize(value.clone());
1348            let matched = match_list(sanitized, values.clone());
1349            trace!("{} => {:?}", value.clone(), matched.clone());
1350            if matched.clone().is_empty() {
1351                None
1352            } else {
1353                match values.first() {
1354                    | Some(x) => {
1355                        if value.eq(x) {
1356                            Some((x.into(), 10000))
1357                        } else {
1358                            let score = matched.into_iter().map(|(_, score)| score).max();
1359                            match score {
1360                                | Some(value) if value > 0 => Some((x.to_string(), value)),
1361                                | Some(_) | None => None,
1362                            }
1363                        }
1364                    }
1365                    | None => None,
1366                }
1367            }
1368        })
1369        .max_by_key(|(_, score)| *score)
1370        .map(|(x, _)| x.to_string());
1371    print_resolution(output.clone(), value, name);
1372    output
1373}
1374fn resolve_from_organization_json(value: String) -> Option<String> {
1375    let organization = &Organization::load()[0];
1376    let mut items = vec![organization.clone()];
1377    let directorates = organization.member.clone();
1378    for directorate in &directorates {
1379        items.push(directorate.clone());
1380        let divisions = directorate.member.clone();
1381        for division in &divisions {
1382            items.push(division.clone());
1383        }
1384    }
1385    let data = items
1386        .into_iter()
1387        .map(|x| (x.name.clone(), x.alternative_name.clone()))
1388        .filter(|(name, alias)| !(name.is_empty() && alias.is_none()))
1389        .map(|(name, alias)| {
1390            let alternative_name = match alias {
1391                | Some(x) => x.to_string(),
1392                | None => name.clone(),
1393            };
1394            vec![name, alternative_name]
1395        })
1396        .collect::<Vec<Vec<String>>>();
1397    resolve_from_list_of_lists(value, data, "organization".to_string())
1398}
1399fn sanitize(value: String) -> String {
1400    match Regex::new(r"[-_.,]") {
1401        | Ok(re) => re.replace_all(&value, "").replace("&", "and").trim().to_string(),
1402        | Err(err) => err.to_string(),
1403    }
1404}
1405
1406#[cfg(test)]
1407mod tests;