acorn_lib/schema/pid/mod.rs
1//! Persistent Identifiers (PID)
2//!
3//! Contains functions for working with persistent identifiers (PID) such as [`ORCID`], [`DOI`]s, and [RAiD](`raid`)s
4//!
5//! ### Features
6//! - Best in class validation
7//! - Convert persistent identifiers into standard formats
8//! - Access the sub parts of a persistent identifier
9//!
10//! [RAiDs]: https://www.raid.org/
11use crate::constants::{DEFAULT_ORCID_SCHEMA_URI, RE_ARK_TEXT, RE_DOI, RE_DOI_TEXT, RE_ORCID, RE_ORCID_TEXT};
12use crate::util::{regex_capture_lookup, ToStringChunks};
13use bon::{builder, Builder};
14use core::fmt::Display;
15
16pub mod raid;
17
18const BETANUMERIC_DIGITS: &str = "0123456789bcdfghjkmnpqrstvwxz";
19
20/// Add utility functions for working with beta numeric values
21///
22/// Mostly intended for working with [NCDA](`noid_check_digit`)
23pub trait Betanumeric {
24    /// Check if `self` is a betanumeric value
25    fn is_betanumeric(&self) -> bool {
26        false
27    }
28    /// Convert `self` into a betanumeric ordinal value
29    /// ### Example
30    /// > `w` -> `26`
31    fn to_betanumeric_ordinal(&self) -> Option<usize>;
32}
33/// Provides common functions for working with persistent identifiers (PID)
34pub trait PersistentIdentifier: Display {
35    /// Create a new PID
36    fn new() -> Self;
37    /// Get standardized form of schema URI for a PID
38    /// ### Examples
39    /// - `https://doi.org`
40    /// - `https://orcid.org`
41    fn schema_uri(&self) -> String;
42    /// Get PID identifier section
43    /// ### Examples
44    /// - `ark:1234/x5678` for [`ARK`]
45    /// - `10.1234/5678` for [`DOI`]
46    /// - `0000-0002-2057-9115` for [`ORCID`]
47    fn identifier(&self) -> String;
48    /// Get PID prefix (different interpretation depending on PID type)
49    ///
50    /// Not every PID type has a prefix, but generally every PID has a "first" part that can losely be considered a "prefix"
51    fn prefix(&self) -> Option<String> {
52        None
53    }
54    /// Get PID suffix (different interpretation depending on PID type)
55    ///
56    /// Not every PID type has a suffix, but generally every PID has a "second" part that can losely be considered a "suffix"
57    fn suffix(&self) -> Option<String> {
58        None
59    }
60    /// Get PID check digit (when applicable)
61    fn check_digit(&self) -> Option<char> {
62        None
63    }
64    /// Convert `self` into a string with a standard format
65    fn format(&self) -> String {
66        self.to_string()
67    }
68    /// Check if PID is valid
69    fn is_valid(&self) -> bool {
70        false
71    }
72}
73/// Add coercion to persistent identifier (PID) functionality to string values
74pub trait PersistentIdentifierConvert<T: AsRef<str>> {
75    /// Convert `self` into a string standard format PID of a certain type
76    /// ```rust
77    /// use acorn_lib::schema::pid::{PID, PersistentIdentifier};
78    ///
79    /// assert_eq!("https://doi.org/10.1234/5678".format_as(PID::DOI), "10.1234/5678");
80    /// assert_eq!("0000-0002-2057-9115".format_as(PID::ORCID), "https://orcid.org/0000-0002-2057-9115");
81    /// ```
82    fn format_as(&self, pid_type: PID) -> String;
83    /// Coerce `self` into given PID type.
84    /// ```rust
85    /// use acorn_lib::schema::pid::{PID, PersistentIdentifier};
86    ///
87    /// let doi = "https://doi.org/10.1234/5678".to_pid(PID::DOI).to_doi();
88    /// assert_eq!(doi.suffix(), "5678");
89    /// ```
90    fn to_pid(&self, pid_type: PID) -> PersistentIdentifierInternal;
91    /// Determines if `self` is of the given PID type.
92    /// ```rust
93    /// use acorn_lib::schema::pid::{PID, PersistentIdentifier};
94    /// assert!("https://doi.org/10.1234/5678".is_pid(PID::DOI));
95    /// ```
96    fn is_pid(&self, _pid_type: PID) -> bool {
97        false
98    }
99    /// Determines if `self` is an archival resource key (ARK)
100    /// ```rust
101    /// use acorn_lib::schema::pid::{PID, PersistentIdentifier};
102    ///
103    /// assert!("https://n2t.net/ark:12148/btv1b8449691v/f29".is_ark());
104    /// ```
105    fn is_ark(&self) -> bool {
106        false
107    }
108    /// Determines if `self` is a DOI
109    /// ```rust
110    /// use acorn_lib::schema::pid::{PID, PersistentIdentifier};
111    ///
112    /// assert!("https://doi.org/10.1234/5678".is_doi());
113    /// ```
114    fn is_doi(&self) -> bool {
115        false
116    }
117    /// Determines if `self` is a ORCID
118    /// ```rust
119    /// use acorn_lib::schema::pid::{PID, PersistentIdentifier};
120    ///
121    /// assert!("https://orcid.org/0000-0000-0000-0000".is_orcid());
122    /// ```
123    fn is_orcid(&self) -> bool {
124        false
125    }
126}
127/// Internal representation of a persistent identifier
128#[derive(Default)]
129pub struct PersistentIdentifierInternal {
130    /// Raw string content of the (possible) PID
131    value: String,
132    /// Type of PID
133    pid_type: PID,
134}
135/// Persistent Identifier (PID) types
136///
137/// PIDs are globally unique identifiers, resolvable on the Web, and associated with a set of additional descriptive metadata (ex. [`raid::Metadata`])
138#[derive(Clone, Debug, Default)]
139pub enum PID {
140    /// Archival Resource Key (ARK)
141    ///
142    /// Widely used persistent identifier, supported by the California Digital Library \[21\], in collaboration with DuraSpaceď‚…. ARKs work similarly to DOIs, but are more permissive in design.[^ark]
143    ///
144    /// [^ark]: `M. Stocker et al., "Persistent Identification of Instruments," Data Science Journal, vol. 19, p. 18, May 2020, doi: 10.5334/dsj-2020-018.`
145    ARK,
146    /// Digital Object Identifier (DOI)
147    ///
148    /// See [`DOI`]
149    DOI,
150    /// Open Researcher and Contributor ID (ORCiD)
151    ///
152    /// See [`ORCID`]
153    ORCID,
154    /// Persistent Identification of Instruments (PIDINST)
155    /// ### Citation
156    /// ```text
157    /// M. Stocker et al., "Persistent Identification of Instruments," Data Science Journal, vol. 19, p. 18, May 2020, doi: 10.5334/dsj-2020-018.
158    /// ```
159    PIDINST,
160    /// Research Activity Identifier (RAiD)
161    ///
162    /// Developed by tthe Australian Research Data Commons (ARDC), used to identify research projects and activities for access by research communities worldwide
163    ///
164    /// The ARDC and [DataCite](https://datacite.org/) have entered an agreement to use DataCite [`DOI`]s as RAiD identifiers
165    ///
166    /// See [`raid`] module
167    RAID,
168    /// Research Organization Registry (ROR)
169    ///
170    /// Global, community-led registry of open persistent identifiers for research organizations
171    ///
172    /// See <https://www.ror.org/> for more information
173    ROR,
174    /// Unknown PID
175    #[default]
176    Unknown,
177}
178/// Archival Resource Key (ARK)
179/// ### Notes
180/// - ARKs are the only mainstream, non-siloed, non-paywalled identifiers that you can register to use in about 48 hours
181/// - ARKs are decentralized
182/// - There are no fees for ARKs, PURLs, and URNs
183/// - ARKs give access to almost any kind of thing, whether digital, physical, abstract, person, group, etc.
184/// - ARKs can be deleted
185/// - ARKs support early object development
186/// - ARKs that differ only by hyphens are considered identical
187///
188/// See the [ARK specification](https://datatracker.ietf.org/doc/draft-kunze-ark/) and <https://wiki.lyrasis.org/display/ARKs/ARK+Identifiers+FAQ> for more information
189#[derive(Builder, Clone, Debug)]
190#[builder(start_fn = init)]
191pub struct ARK {
192    /// The part of the ARK string that your organization is responsible for making unique.
193    ///
194    /// The first 2 or more characters constitue the shoulder of the ARK which must meet the following criteria:
195    /// - Must start with one or more lowercase letters
196    /// - Must end with a digit (non-zero preferred)
197    /// - Must not contain vowels or the letter "l" (ell)
198    /// - Must not contain any `/` characters (being opaque is part of the shoulder design)
199    pub assigned_name: Option<String>,
200    /// Prefix for NAAN (e.g., "ark:" or the older, "ark:/")
201    ///
202    /// <div class="warning">Label is mandatory</div>
203    #[builder(default = "ark:".to_string())]
204    pub label: String,
205    /// Number (here represented as a string) identifying an organization that creates or assigns identifiers
206    /// ### Notes
207    /// - Since 2001, every assigned name assigning authority number (NAAN) has consisted of exactly five digits, specifically five [beta-numeric digits](`BETA_NUMERIC_DIGITS`)
208    /// - Any given identifier will have exactly one NAAN but may have more than one NMA (at a time or over time)
209    /// - Similar to registration authority or prefix for [`DOI`]s, naming authority for [Handles], and namespace identifier for [URNs]
210    ///
211    /// [Handles]: https://handle.net/
212    /// [URNs]: https://en.wikipedia.org/wiki/Uniform_Resource_Name
213    pub name_assigning_authority_number: Option<String>,
214    /// String identifying a service that accepts names and returns information about them
215    /// ### Notes
216    /// > Any given identifier will have exactly one NAAN but may have more than one NMA (at a time or over time)
217    pub name_mapping_authority: Option<String>,
218    /// First section of optional "qualifier" part of ARK
219    ///
220    /// Generally serve as sub-namespaces to enabling grouping ARKs
221    #[builder(default = Vec::new())]
222    pub parts: Vec<String>,
223    /// Last section of optional "qualifier" part of ARK
224    ///
225    /// Typically is used to identify a specific version of a resource (i.e., "pdf", "fr", "v3", etc.)
226    #[builder(default = Vec::new())]
227    pub variants: Vec<String>,
228}
229/// Digital Object Identifier (DOI)
230///
231/// DOIs consist of a DOI name which is resolved at <https://doi.org>, with the full URI formulated according to the pattern `https://doi.org/{DOI_name}`. DOI names in turn consist of a prefix and a suffix, separated by a forward slash. The prefix is a code indicating the registrant who issues the DOI, e.g., Harvard University Dataverse - 10.7910; Dryad Digital Repository - 10.5061. The suffix is the identifier, in any form, assigned by the registrant.[^doi]
232///
233/// See <https://www.doi.org/doi-handbook/HTML/index.html> for more information
234///
235/// [^doi]: `N. Juty, S. M. Wimalaratne, S. Soiland-Reyes, J. Kunze, C. A. Goble, and T. Clark, "Unique, Persistent, Resolvable: Identifiers as the Foundation of FAIR," Data Intellegence, vol. 2, no. 1-2, pp. 30-39, Jan. 2020, doi: 10.1162/dint_a_00025.`
236#[derive(Builder, Clone, Debug)]
237#[builder(start_fn = init)]
238pub struct DOI {
239    /// Schema URI (i.e., <https://doi.org/>)
240    pub schema_uri: Option<String>,
241    /// Directory indicator
242    /// ### Rules
243    /// - Can contain only numeric values
244    /// - usually 10 but other indicators may be designated as compliant by the DOI Foundation
245    pub directory_indicator: Option<String>,
246    /// Registrant code
247    /// ### Rules
248    /// - Can contain only numeric values and one or several full stops which are used to subdivide the code
249    /// - If the directory indicator is 10 then a registrant code is mandatory
250    pub registrant_code: Option<String>,
251    /// Suffix
252    /// ### Rules
253    /// - Shall be unique to the prefix element that precedes it
254    /// - Can be a sequential number
255    /// - Can be an identifier generated from or based on another system used by the registrant
256    /// - No length limit is set to the suffix by the DOI System
257    pub suffix: Option<String>,
258}
259/// Open Researcher and Contributor ID (ORCiD)[^orcid]
260///
261/// Disambiguates researchers, and connects people with their research activities. This includes employment affiliations, research outputs, funding, peer review activity, research resources, society membership, distinctions and other scholarly infrastructure.
262///
263/// See <https://orcid.org/> for more information
264///
265/// [^orcid]: `L. L. Haak, M. Fenner, L. Paglione, E. Pentz, and H. Ratner, "ORCID: a system to uniquely identify researchers," Learned Publishing, vol. 25, no. 4, pp. 259-264, 2012, doi: 10.1087/20120404.`
266#[derive(Builder, Clone, Debug)]
267#[builder(start_fn = init)]
268pub struct ORCID {
269    /// Schema URI (i.e., <https://orcid.org/>)
270    pub schema_uri: Option<String>,
271    /// 16 digit string with hyphens every 4 digits (for readability)
272    /// <div class="warning">This value can be stored with or without hyphens. To ensure compliancy, use <code>ORCID::identifier</code> method to access ORCiD identifier.</div>
273    pub identifier: Option<String>,
274    /// The check digit is the last (16th) digit of the identifier
275    /// ### Note
276    /// Check digit should be verified IAW by [ISO 7064, MOD 11-2](https://www.iso.org/standard/31531.html) (see [`iso7064_check_digit`])
277    pub check_digit: Option<String>,
278}
279impl ARK {
280    /// Convenience method for easily parsing and formatting an ARK from a string value
281    /// ### Example
282    /// ```rust
283    /// use acorn_lib::schema::pid::ARK;
284    ///
285    /// assert_eq!(ARK::format("ark:/1234/5678"), "ark:/1234/5678");
286    /// assert_eq!(ARK::format("https://n2t.net/ark:12148/btv1b8449691v/f29"), "ark:12148/btv1b8449691v/f29");
287    /// ```
288    pub fn format<S>(value: S) -> String
289    where
290        S: AsRef<str>,
291    {
292        ARK::from_string(value).to_string()
293    }
294    /// Create new ARK by parsing raw string value
295    /// ### Example
296    /// ```rust
297    /// use acorn_lib::schema::pid::ARK;
298    ///
299    /// let doi = ARK::from_string("");
300    ///
301    /// ```
302    pub fn from_string<S>(value: S) -> ARK
303    where
304        S: AsRef<str>,
305    {
306        let names = ["nma", "schema_uri", "label", "naan", "assigned_name", "parts", "variants"];
307        let lookup = regex_capture_lookup(RE_ARK_TEXT, value.as_ref(), names.to_vec());
308        let parts = match lookup.get("parts") {
309            | Some(value) => value.split('/').map(String::from).collect(),
310            | None => vec![],
311        };
312        let variants = match lookup.get("variants") {
313            | Some(value) => value.split('.').map(String::from).collect(),
314            | None => vec![],
315        };
316        ARK::init()
317            .maybe_assigned_name(lookup.get("assigned_name").cloned())
318            .maybe_label(lookup.get("label").cloned())
319            .maybe_name_assigning_authority_number(lookup.get("naan").cloned())
320            .maybe_name_mapping_authority(lookup.get("nma").cloned())
321            .parts(parts)
322            .variants(variants)
323            .build()
324    }
325    /// Check if value is a valid ARK
326    /// ### Conditions
327    /// - ARKs are preferred to be "actionable" with the inclusion of a NMA URL, but are not required to be so (NMA is optional)
328    /// - If ARK is to contain a URL, "https" is the only allowed scheme
329    /// - Should have only one instance of "ark:" label
330    /// - NAAN should be an integer
331    /// - [Assigned name](`ARK::assigned_name`) should start with a valid [shoulder](https://arks.org/about/shoulders/)
332    /// - Last character should be valid check digit (see [`noid_check_digit`])
333    /// ### Example
334    /// ```rust
335    /// use acorn_lib::schema::pid::ARK;
336    ///
337    /// assert!(ARK::is_valid("https://n2t.net/ark:99166/w66d60p2"));
338    /// assert!(ARK::is_valid("https://n2t.net/ark:12148/btv1b8449691v/f29"));
339    /// ```
340    pub fn is_valid<S>(value: S) -> bool
341    where
342        S: AsRef<str>,
343    {
344        let ark = ARK::from_string(value);
345        // TODO: Test NAAN length? Test betanumeric?
346        let naan_is_integer = match ark.name_assigning_authority_number {
347            | Some(value) => match value.parse::<u32>() {
348                | Ok(_) => true,
349                | Err(_) => false,
350            },
351            | None => false,
352        };
353        let shoulder_starts_with_lowercase_letter = match ark.assigned_name {
354            | Some(value) => match value.chars().next() {
355                | Some(value) => value.is_ascii_lowercase() && !value.eq(&'l'),
356                | None => false,
357            },
358            | None => false,
359        };
360        naan_is_integer && shoulder_starts_with_lowercase_letter
361    }
362}
363impl DOI {
364    /// Convenience method for easily parsing and formatting a DOI from a string value
365    /// ### Example
366    /// ```rust
367    /// use acorn_lib::schema::pid::DOI;
368    ///
369    /// assert_eq!(DOI::format("https://doi.org/10.1000/182"), "10.1000/182");
370    /// assert_eq!(DOI::format("10.1000/182"), "10.1000/182");
371    /// ```
372    pub fn format<S>(value: S) -> String
373    where
374        S: AsRef<str>,
375    {
376        DOI::from_string(value).to_string()
377    }
378    /// Create new DOI by parsing raw string value
379    /// ### Example
380    /// ```rust
381    /// use acorn_lib::schema::pid::DOI;
382    ///
383    /// let doi = DOI::from_string("https://doi.org/10.1000/182");
384    /// assert_eq!(doi.prefix(), "10.1000");
385    /// assert_eq!(doi.suffix(), "182");
386    /// ```
387    pub fn from_string<S>(value: S) -> DOI
388    where
389        S: AsRef<str>,
390    {
391        let names = ["schema_uri", "directory_indicator", "registrant_code", "suffix"];
392        let lookup = regex_capture_lookup(RE_DOI_TEXT, value.as_ref(), names.to_vec());
393        DOI::init()
394            .maybe_schema_uri(lookup.get("schema_uri").cloned())
395            .maybe_directory_indicator(lookup.get("directory_indicator").cloned())
396            .maybe_registrant_code(lookup.get("registrant_code").cloned())
397            .maybe_suffix(lookup.get("suffix").cloned())
398            .build()
399    }
400    /// Check if value is a valid DOI
401    /// ### Conditions
402    /// - Must match DOI regular expression (see [`RE_DOI_TEXT`])
403    /// - Is valid with or without schema URI[^format]
404    /// - `10.5555/` is not a valid DOI prefix
405    /// ### Example
406    /// ```rust
407    /// use acorn_lib::schema::pid::DOI;
408    ///
409    /// assert!(DOI::is_valid("https://doi.org/10.1000/182"));
410    /// assert!(DOI::is_valid("10.1000/182"));
411    /// ```
412    ///
413    /// [^format]: Use `DOI::format(value)` to ensure value is formatted correctly
414    pub fn is_valid<S>(value: S) -> bool
415    where
416        S: AsRef<str>,
417    {
418        match RE_DOI.is_match(value.as_ref()) {
419            | Ok(x) if x && !value.as_ref().contains("10.5555/") => true,
420            | _ => false,
421        }
422    }
423}
424impl ORCID {
425    /// Convenience method for easily parsing and formatting a ORCID from a string value
426    /// ### Example
427    /// ```rust
428    /// use acorn_lib::schema::pid::ORCID;
429    ///
430    /// assert_eq!(ORCID::format("https://orcid.org/0000-0002-2057-9115"), "https://orcid.org/0000-0002-2057-9115");
431    /// assert_eq!(ORCID::format("0000-0002-2057-9115"), "https://orcid.org/0000-0002-2057-9115");
432    /// ```
433    pub fn format<S>(value: S) -> String
434    where
435        S: AsRef<str>,
436    {
437        ORCID::from_string(value).to_string()
438    }
439    /// Create new ORCID by parsing raw string value
440    /// ### Example
441    /// ```rust
442    /// use acorn_lib::schema::pid::ORCID;
443    ///
444    /// let orcid = ORCID::from_string("https://orcid.org/0000-0002-2057-9115");
445    /// assert_eq!(orcid.identifier(), "0000-0002-2057-9115");
446    /// ```
447    pub fn from_string<S>(value: S) -> ORCID
448    where
449        S: AsRef<str>,
450    {
451        let names = ["schema_uri", "identifier", "check_digit"];
452        let lookup = regex_capture_lookup(RE_ORCID_TEXT, value.as_ref(), names.to_vec());
453        ORCID::init()
454            .maybe_schema_uri(lookup.get("schema_uri").cloned())
455            .maybe_identifier(lookup.get("identifier").cloned())
456            .maybe_check_digit(lookup.get("check_digit").cloned())
457            .build()
458    }
459    /// Check if value is a valid ORCiD
460    /// ### Conditions
461    /// - ORCiD identifier must be 16 characters, 0 thru 9, or "X"
462    /// - Last character of identifier must be a valid ISO 7064 check digit (see [`iso7064_check_digit`])
463    /// - Value can be valid with or without hyphens in the ORCiD identifier[^format]
464    /// - Value can be valid with or without schema URI[^format]
465    /// ### Example
466    /// ```rust
467    /// use acorn_lib::schema::pid::ORCID;
468    ///
469    /// assert!(ORCID::is_valid("https://orcid.org/0000-0002-2057-9115"));
470    /// assert!(ORCID::is_valid("0000-0002-2057-9115"));
471    /// assert!(ORCID::is_valid("0000000220579115"));
472    /// ```
473    ///
474    /// [^format]: Use `ORCID::format(value)` to ensure value is formatted correctly
475    pub fn is_valid<S>(value: S) -> bool
476    where
477        S: AsRef<str>,
478    {
479        let orcid = ORCID::from_string(value.as_ref());
480        let identifier = orcid.identifier();
481        let last = identifier.chars().last().unwrap_or_default();
482        match iso7064_check_digit(identifier.as_str()) {
483            | Some(check_digit) => match RE_ORCID.is_match(value.as_ref()) {
484                | Ok(true) if check_digit.eq(&last) && identifier.len() == 19 => true,
485                | _ => false,
486            },
487            | _ => false,
488        }
489    }
490}
491impl Default for ARK {
492    fn default() -> Self {
493        Self::new()
494    }
495}
496impl Default for DOI {
497    fn default() -> Self {
498        Self::new()
499    }
500}
501impl Default for ORCID {
502    fn default() -> Self {
503        Self::new()
504    }
505}
506impl Display for ARK {
507    /// Format a ARK into a standard format of `"{NMA}{label}{NAAN}/{Assigned Name}/{Parts}{Variants}"`
508    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
509        let nma = self.name_mapping_authority.clone().unwrap_or_default().trim_end_matches('/').to_string();
510        let identifier = self.identifier();
511        let result = [nma, identifier].into_iter().filter(|x| !x.is_empty()).collect::<Vec<String>>().join("/");
512        write!(f, "{result}")
513    }
514}
515impl Display for DOI {
516    /// Format a DOI into a standard format of `"{prefix}/{suffix}"`
517    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
518        let result = self.identifier();
519        write!(f, "{result}")
520    }
521}
522impl Display for ORCID {
523    /// Format a ORCiD into a standard format of `"{schema_uri}{identifier}"`
524    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
525        let schema_uri = self.schema_uri();
526        let identifier = self.identifier();
527        let uri = if schema_uri.is_empty() { DEFAULT_ORCID_SCHEMA_URI } else { &schema_uri };
528        let values = match &self.identifier {
529            | Some(_) => [uri, &identifier].to_vec(),
530            | None => vec![],
531        };
532        let result = values
533            .into_iter()
534            .filter(|x| !x.is_empty())
535            .map(String::from)
536            .collect::<Vec<String>>()
537            .join("");
538        write!(f, "{result}")
539    }
540}
541impl PersistentIdentifier for ARK {
542    fn new() -> Self {
543        ARK::init().build()
544    }
545    fn format(&self) -> String {
546        self.to_string()
547    }
548    fn schema_uri(&self) -> String {
549        let uri = match &self.name_mapping_authority {
550            | Some(value) => value,
551            | None => "",
552        };
553        uri.to_string()
554    }
555    fn identifier(&self) -> String {
556        let values = [self.prefix(), self.suffix()];
557        values
558            .iter()
559            .flatten()
560            .filter(|x| !x.is_empty())
561            .map(String::from)
562            .collect::<Vec<String>>()
563            .join("/")
564    }
565    fn prefix(&self) -> Option<String> {
566        let Self {
567            label,
568            name_assigning_authority_number,
569            assigned_name,
570            ..
571        } = self;
572        if [name_assigning_authority_number.clone(), assigned_name.clone()]
573            .iter()
574            .all(|x| x.is_some())
575        {
576            let result = format!(
577                "{}{}/{}",
578                label.trim_end_matches('/'),
579                name_assigning_authority_number.as_ref().unwrap(),
580                assigned_name.as_ref().unwrap()
581            );
582            Some(result)
583        } else {
584            None
585        }
586    }
587    /// Returns consistent string representation of ARK qualifiers which often name subobjects
588    /// of a persistent object that are less stable and less opaquely named than the parent object
589    fn suffix(&self) -> Option<String> {
590        let parts = self.parts.join("/");
591        let variants = self.variants.join(".");
592        let qualifiers = [parts, variants];
593        let result = qualifiers
594            .iter()
595            .filter(|x| !x.is_empty())
596            .map(String::from)
597            .collect::<Vec<String>>()
598            .join(".");
599        Some(result)
600    }
601    fn check_digit(&self) -> Option<char> {
602        let Self {
603            name_assigning_authority_number: naan,
604            assigned_name: name,
605            ..
606        } = self;
607        let values = [naan.clone(), name.clone()];
608        if values.iter().all(|x| x.is_some()) {
609            let value = values.iter().flatten().map(String::from).collect::<Vec<String>>().join("/");
610            noid_check_digit(value)
611        } else {
612            None
613        }
614    }
615}
616impl PersistentIdentifier for DOI {
617    fn new() -> Self {
618        DOI::init().build()
619    }
620    fn schema_uri(&self) -> String {
621        self.schema_uri.as_ref().cloned().unwrap_or_default()
622    }
623    fn identifier(&self) -> String {
624        let values = [self.prefix(), self.suffix()];
625        values
626            .iter()
627            .flatten()
628            .filter(|x| !x.is_empty())
629            .map(String::from)
630            .collect::<Vec<String>>()
631            .join("/")
632    }
633    /// Get DOI prefix (i.e., "{directory_indicator}.{registrant_code}")
634    fn prefix(&self) -> Option<String> {
635        let values = [
636            self.directory_indicator.as_ref().cloned().unwrap_or_default(),
637            self.registrant_code.as_ref().cloned().unwrap_or_default(),
638        ];
639        let result = values
640            .iter()
641            .filter(|x| !x.is_empty())
642            .map(String::from)
643            .collect::<Vec<String>>()
644            .join(".");
645        Some(result)
646    }
647    /// Get DOI suffix
648    fn suffix(&self) -> Option<String> {
649        let result = self.suffix.as_ref().cloned().unwrap_or_default();
650        if !result.is_empty() {
651            Some(result)
652        } else {
653            None
654        }
655    }
656}
657impl PersistentIdentifier for ORCID {
658    fn new() -> Self {
659        ORCID::init().build()
660    }
661    /// Get ORCID schema URI
662    /// ### Notes
663    /// - Should always be "<https://orcid.org/>"
664    fn schema_uri(&self) -> String {
665        self.schema_uri.as_ref().cloned().unwrap_or_default()
666    }
667    /// Get ORCID identifier
668    /// ### Notes
669    /// - Will return an empty string if no identifier is present
670    /// - Will always return a 19 character string with a hyphen every 4 characters (i.e., "0000-0000-0000-0000")
671    fn identifier(&self) -> String {
672        let stripped = self.identifier.as_ref().cloned().unwrap_or_default().replace("-", "");
673        stripped.chunk(4).join("-")
674    }
675    fn suffix(&self) -> Option<String> {
676        Some(self.identifier())
677    }
678    fn check_digit(&self) -> Option<char> {
679        iso7064_check_digit(self.identifier())
680    }
681}
682impl<T: AsRef<str>> PersistentIdentifierConvert<T> for T
683where
684    T: ToString,
685{
686    fn format_as(&self, pid_type: PID) -> String {
687        match pid_type {
688            | PID::ARK => ARK::format(self.as_ref()),
689            | PID::DOI => DOI::format(self.as_ref()),
690            | PID::ORCID => ORCID::format(self.as_ref()),
691            | _ => self.as_ref().to_string(),
692        }
693    }
694    fn to_pid(&self, _pid_type: PID) -> PersistentIdentifierInternal {
695        match _pid_type {
696            | PID::ARK => PersistentIdentifierInternal {
697                value: self.as_ref().to_string(),
698                pid_type: PID::ARK,
699            },
700            | PID::DOI => PersistentIdentifierInternal {
701                value: self.as_ref().to_string(),
702                pid_type: PID::DOI,
703            },
704            | PID::ORCID => PersistentIdentifierInternal {
705                value: self.as_ref().to_string(),
706                pid_type: PID::ORCID,
707            },
708            | _ => PersistentIdentifierInternal::default(),
709        }
710    }
711    fn is_pid(&self, pid_type: PID) -> bool {
712        match pid_type {
713            | PID::ARK => self.is_ark(),
714            | PID::DOI => self.is_doi(),
715            | PID::ORCID => self.is_orcid(),
716            | _ => false,
717        }
718    }
719    fn is_ark(&self) -> bool {
720        ARK::is_valid(self.as_ref())
721    }
722    fn is_doi(&self) -> bool {
723        DOI::is_valid(self.as_ref())
724    }
725    fn is_orcid(&self) -> bool {
726        ORCID::is_valid(self.as_ref())
727    }
728}
729impl PersistentIdentifierInternal {
730    /// Convert a `PersistentIdentifierInternal` to an `ARK`
731    pub fn to_ark(&self) -> ARK {
732        let PersistentIdentifierInternal { value, pid_type } = self;
733        match pid_type {
734            | PID::ARK => ARK::from_string(value),
735            | _ => ARK::default(),
736        }
737    }
738    /// Convert a `PersistentIdentifierInternal` to a `DOI`
739    pub fn to_doi(&self) -> DOI {
740        let PersistentIdentifierInternal { value, pid_type } = self;
741        match pid_type {
742            | PID::DOI => DOI::from_string(value),
743            | _ => DOI::default(),
744        }
745    }
746    /// Convert a `PersistentIdentifierInternal` to a `ORCID`
747    pub fn to_orcid(&self) -> ORCID {
748        let PersistentIdentifierInternal { value, pid_type } = self;
749        match pid_type {
750            | PID::ORCID => ORCID::from_string(value),
751            | _ => ORCID::default(),
752        }
753    }
754}
755impl Betanumeric for char {
756    fn is_betanumeric(&self) -> bool {
757        BETANUMERIC_DIGITS.contains(*self)
758    }
759    fn to_betanumeric_ordinal(&self) -> Option<usize> {
760        BETANUMERIC_DIGITS.chars().position(|x| x.eq(self))
761    }
762}
763/// Calculate check digit IAW [ISO 7064, MOD 11-2](https://www.iso.org/standard/31531.html)
764///
765/// "MOD 11-2" means modulus = 11 and radix = 2
766///
767/// ### Example
768/// ```rust
769/// use acorn_lib::schema::pid::iso7064_check_digit;
770///
771/// assert_eq!(iso7064_check_digit("0000000220579115"), 5);
772/// assert_eq!(iso7064_check_digit("0000-0002-2057-9115"), 5);
773/// ```
774pub fn iso7064_check_digit<S>(value: S) -> Option<char>
775where
776    S: AsRef<str>,
777{
778    const MODULUS: u32 = 11;
779    const RADIX: u32 = 2;
780    let working = value.as_ref().replace("-", "");
781    let sum = working.chars().take(15).fold(0, |acc, x| {
782        let digit = x.to_digit(10).unwrap_or_default();
783        (acc + digit) * RADIX
784    });
785    let remainder = sum % MODULUS;
786    let result = (MODULUS + 1 - remainder) % MODULUS;
787    if result == 10 {
788        Some('X')
789    } else {
790        char::from_digit(result, 10)
791    }
792}
793/// Calculate check xdigit ("extended digit") IAW [NOID check digit algorithm (NCDA)](https://metacpan.org/dist/Noid/view/noid#NOID-CHECK-DIGIT-ALGORITHM)
794/// ### Notes
795/// - Check digits are not expected to cover qualifiers
796/// - If check digit is present in an ARK, by convention it is the right-most character of the so called "check zone"
797/// - The "check zone" is composed of the NAAN and assigned name, separated by a forward slash
798/// - Forward slashes do not contribute to the check digit sum, but do impact the character position index
799/// - NCDA is guaranteed against single-character errors
800/// - NCDA is guaranteed against transposition of two single characters
801/// ### References
802/// - <https://github.com/internetarchive/arklet>
803/// - <https://github.com/no-reply/pynoid>
804pub fn noid_check_digit<S>(value: S) -> Option<char>
805where
806    S: AsRef<str>,
807{
808    const RADIX: usize = 29;
809    let sum = value.as_ref().chars().enumerate().fold(0, |acc, (i, val)| {
810        let position = i + 1;
811        let ordinal = match val.is_betanumeric() {
812            | true => val.to_betanumeric_ordinal().unwrap(),
813            | false => 0,
814        };
815        acc + (position * ordinal)
816    });
817    let remainder = sum % RADIX;
818    to_betanumeric(remainder as u8)
819}
820fn to_betanumeric(value: u8) -> Option<char> {
821    match BETANUMERIC_DIGITS.chars().enumerate().find(|(i, _)| *i == value as usize) {
822        | Some((_, x)) => Some(x),
823        | None => None,
824    }
825}
826
827#[cfg(test)]
828mod tests;