Skip to main content

spellabet/
lib.rs

1#![deny(clippy::all)]
2#![warn(clippy::nursery, clippy::pedantic)]
3
4//! # Spelling Alphabet
5//!
6//! A Rust library for transforming text strings into corresponding code words
7//! based on predefined [spelling alphabets][], like the NATO phonetic alphabet.
8//! These alphabets are designed to enhance verbal clarity, especially when
9//! spelling out words over low-fidelity voice channels. This library supports
10//! several standard alphabets and allows for customization to suit specific
11//! communication needs.
12//!
13//! In operation, spellabet preserves the original capitalization of letters by
14//! returning either lowercase or uppercase code words. It similarly converts
15//! known digits and other symbols into code words, while unrecognized
16//! characters are returned unconverted.
17//!
18//! This library powers the command line utility `spellout`, which provides a
19//! handy interface for phonetic conversions. Check out [spellout on GitHub][]
20//! for more information.
21//!
22//! [spelling alphabets]: https://en.wikipedia.org/wiki/Spelling_alphabet
23//! [spellout on GitHub]: https://github.com/EarthmanMuons/spellout/
24//!
25//! # Example
26//!
27//! ```
28//! use spellabet::{PhoneticConverter, SpellingAlphabet};
29//!
30//! let converter = PhoneticConverter::new(&SpellingAlphabet::Nato);
31//! println!("{}", converter.convert("Example123!"));
32//! ```
33//!
34//! ```text
35//! ECHO x-ray alfa mike papa lima echo One Two Tree Exclamation
36//! ```
37
38use std::char;
39use std::cmp::Ordering;
40use std::collections::HashMap;
41use std::fmt::Write;
42
43use code_words::{
44    DEFAULT_DIGITS_AND_SYMBOLS, JAN_ALPHABET, LAPD_ALPHABET, NATO_ALPHABET, ROYAL_NAVY_ALPHABET,
45    US_FINANCIAL_ALPHABET, WESTERN_UNION_ALPHABET,
46};
47use convert_case::{Case, Casing};
48
49mod code_words;
50
51/// A phonetic converter.
52#[derive(Clone, Debug)]
53pub struct PhoneticConverter {
54    /// The map of characters to code words.
55    conversion_map: HashMap<char, String>,
56    /// Is set when the code word output will be in "nonce form".
57    nonce_form: bool,
58}
59
60/// A spelling alphabet.
61#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Default)]
62pub enum SpellingAlphabet {
63    /// The JAN (Joint Army/Navy) spelling alphabet.
64    Jan,
65    /// The LAPD (Los Angeles Police Department) spelling alphabet.
66    Lapd,
67    /// The NATO (North Atlantic Treaty Organization) spelling alphabet.
68    /// This is the default.
69    #[default]
70    Nato,
71    /// The Royal Navy spelling alphabet.
72    RoyalNavy,
73    /// The United States Financial Industry spelling alphabet.
74    UsFinancial,
75    /// The Western Union spelling alphabet.
76    WesternUnion,
77}
78
79impl PhoneticConverter {
80    /// Creates and returns a new instance of `PhoneticConverter` using the
81    /// desired spelling alphabet character mappings.
82    ///
83    /// # Arguments
84    ///
85    /// * `alphabet` - The [`SpellingAlphabet`] to use for character
86    ///   conversions.
87    ///
88    /// # Examples
89    ///
90    ///
91    /// ```
92    /// # use spellabet::{PhoneticConverter, SpellingAlphabet};
93    /// let converter = PhoneticConverter::new(&SpellingAlphabet::default());
94    /// ```
95    #[must_use]
96    pub fn new(alphabet: &SpellingAlphabet) -> Self {
97        let conversion_map = alphabet.initialize();
98
99        Self {
100            conversion_map,
101            nonce_form: false,
102        }
103    }
104
105    /// Get the current character mappings of the `PhoneticConverter` instance.
106    #[must_use]
107    pub const fn mappings(&self) -> &HashMap<char, String> {
108        &self.conversion_map
109    }
110
111    /// Configures the current `PhoneticConverter` instance to either output
112    /// code words in "nonce form" or not, based on the given boolean value.
113    ///
114    /// Nonce form means each letter character is expanded into the form "'A' as
115    /// in ALFA". Digits and symbols are always returned using the normal output
116    /// format.
117    ///
118    /// # Arguments
119    ///
120    /// * `nonce_form` - If true, enables nonce form output. Otherwise, the
121    ///   normal output format is used.
122    ///
123    /// # Examples
124    ///
125    /// ```
126    /// # use spellabet::{PhoneticConverter, SpellingAlphabet};
127    /// let converter = PhoneticConverter::new(&SpellingAlphabet::default()).nonce_form(true);
128    /// println!("{}", converter.convert("Hello"));
129    /// ```
130    ///
131    /// ```text
132    /// 'H' as in HOTEL, 'e' as in echo, 'l' as in lima, 'l' as in lima, 'o' as in oscar
133    /// ```
134    #[must_use]
135    pub const fn nonce_form(mut self, nonce_form: bool) -> Self {
136        self.nonce_form = nonce_form;
137        self
138    }
139
140    /// Modifies the conversion map of the current `PhoneticConverter` instance
141    /// by adding or replacing mappings based on the given overrides map.
142    ///
143    /// # Arguments
144    ///
145    /// * `overrides_map` - The desired character to code word mappings to
146    ///   override. The capitalization of the keys and values will be
147    ///   automatically normalized. For Unicode keys, normalization only
148    ///   lowercases when the result is a single Unicode scalar; otherwise the
149    ///   original key is preserved.
150    ///
151    /// # Examples
152    ///
153    /// ```
154    /// use std::collections::HashMap;
155    /// # use spellabet::{PhoneticConverter, SpellingAlphabet};
156    ///
157    /// let mut converter = PhoneticConverter::new(&SpellingAlphabet::default());
158    ///
159    /// let mut overrides_map = HashMap::new();
160    /// overrides_map.insert('a', "Apple".to_string());
161    /// overrides_map.insert('b', "Banana".to_string());
162    ///
163    /// println!("BEFORE: {}", converter.convert("abcd"));
164    /// ```
165    ///
166    /// ```text
167    /// BEFORE: alfa bravo charlie delta
168    /// ```
169    ///
170    /// ```
171    /// # use std::collections::HashMap;
172    /// # use spellabet::{PhoneticConverter, SpellingAlphabet};
173    /// # let mut converter = PhoneticConverter::new(&SpellingAlphabet::default());
174    /// # let mut overrides_map = HashMap::new();
175    /// # overrides_map.insert('a', "Apple".to_string());
176    /// # overrides_map.insert('b', "Banana".to_string());
177    /// converter = converter.with_overrides(overrides_map);
178    /// println!("AFTER: {}", converter.convert("abcd"));
179    /// ```
180    ///
181    /// ```text
182    /// AFTER: apple banana charlie delta
183    /// ```
184    #[must_use]
185    pub fn with_overrides(mut self, overrides_map: HashMap<char, String>) -> Self {
186        let normalized_overrides: HashMap<char, String> = overrides_map
187            .into_iter()
188            .map(|(k, v)| (normalize_key(k), v.to_case(Case::Pascal)))
189            .collect();
190
191        self.conversion_map.extend(normalized_overrides);
192        self
193    }
194
195    /// Converts the given text into a string of code words using the current
196    /// character mappings of the `PhoneticConverter` instance.
197    ///
198    /// # Arguments
199    ///
200    /// * `text` - The text to convert into code words.
201    ///
202    /// # Examples
203    ///
204    /// ```
205    /// # use spellabet::{PhoneticConverter, SpellingAlphabet};
206    /// let converter = PhoneticConverter::new(&SpellingAlphabet::default());
207    /// assert_eq!(converter.convert("Hello"), "HOTEL echo lima lima oscar");
208    /// ```
209    #[must_use]
210    pub fn convert(&self, text: &str) -> String {
211        let mut result = String::new();
212
213        for (i, c) in text.chars().enumerate() {
214            // add separator between converted characters
215            if i != 0 {
216                if self.nonce_form {
217                    result.push_str(", ");
218                } else {
219                    result.push(' ');
220                }
221            }
222            self.convert_char(c, &mut result);
223        }
224        result
225    }
226
227    fn convert_char(&self, character: char, result: &mut String) {
228        match self.conversion_map.get(&normalize_key(character)) {
229            Some(word) => {
230                let code_word = match character {
231                    _ if character.is_lowercase() => word.to_lowercase(),
232                    _ if character.is_uppercase() => word.to_uppercase(),
233                    _ => word.clone(),
234                };
235
236                if self.nonce_form && character.is_alphabetic() {
237                    write!(result, "'{character}' as in {code_word}").unwrap();
238                } else {
239                    result.push_str(&code_word);
240                }
241            }
242            None => result.push(character),
243        }
244    }
245
246    /// Writes the current character mappings of the `PhoneticConverter`
247    /// instance to the given writer.
248    ///
249    /// # Arguments
250    ///
251    /// * `writer` - The output destination.
252    /// * `verbose` - If true, dumps all characters. Otherwise, dumps only
253    ///   letter characters.
254    ///
255    /// # Errors
256    ///
257    /// This function will return an error if writing to the provided writer
258    /// fails. The specific conditions under which this may occur depend on the
259    /// nature of the writer.
260    ///
261    /// # Examples
262    ///
263    /// ```
264    /// # use spellabet::{PhoneticConverter, SpellingAlphabet};
265    /// let converter = PhoneticConverter::new(&SpellingAlphabet::default());
266    ///
267    /// let mut buf = Vec::new();
268    /// let verbose = false;
269    /// converter.dump_alphabet(&mut buf, verbose)?;
270    /// let output = String::from_utf8(buf)?;
271    /// println!("{output}");
272    /// # Ok::<(), Box<dyn std::error::Error>>(())
273    /// ```
274    ///
275    /// ```text
276    /// a -> Alfa
277    /// b -> Bravo
278    /// c -> Charlie
279    /// ...
280    /// ```
281    pub fn dump_alphabet(
282        &self,
283        mut writer: impl std::io::Write,
284        verbose: bool,
285    ) -> std::io::Result<()> {
286        for (character, code_word) in self.sorted_mappings() {
287            if verbose || character.is_alphabetic() {
288                writeln!(writer, "{character} -> {code_word}")?;
289            }
290        }
291        Ok(())
292    }
293
294    /// Returns the current character mappings sorted by letters, then digits,
295    /// then symbols, with each group sorted by natural character order.
296    #[must_use]
297    pub fn sorted_mappings(&self) -> Vec<(char, String)> {
298        let mut entries: Vec<_> = self
299            .conversion_map
300            .iter()
301            .map(|(character, code_word)| (*character, code_word.clone()))
302            .collect();
303        entries.sort_by(|(a, _), (b, _)| custom_char_ordering(*a, *b));
304        entries
305    }
306}
307
308impl Default for PhoneticConverter {
309    fn default() -> Self {
310        Self::new(&SpellingAlphabet::default())
311    }
312}
313
314// Sort characters in the order of letters before digits before symbols.
315// Within each group, characters will be sorted in their natural order.
316fn custom_char_ordering(a: char, b: char) -> Ordering {
317    match (
318        a.is_alphabetic(),
319        b.is_alphabetic(),
320        a.is_numeric(),
321        b.is_numeric(),
322    ) {
323        (true, false, _, _) | (false, false, true, false) => Ordering::Less,
324        (false, true, _, _) | (false, false, false, true) => Ordering::Greater,
325        _ => a.cmp(&b),
326    }
327}
328
329// Normalize keys to lowercase when that produces a single Unicode scalar.
330// If lowercasing expands to multiple scalars, keep the original character.
331fn normalize_key(character: char) -> char {
332    let mut lower = character.to_lowercase();
333    let first = lower.next().unwrap_or(character);
334    if lower.next().is_none() {
335        first
336    } else {
337        character
338    }
339}
340
341impl SpellingAlphabet {
342    /// Generates and returns a character to code word map based on the current
343    /// `SpellingAlphabet`.
344    #[must_use]
345    pub fn initialize(&self) -> HashMap<char, String> {
346        let mut map: HashMap<char, String> = HashMap::with_capacity(69);
347
348        let extend_map = |map: &mut HashMap<char, String>, source_map: &[(char, &str)]| {
349            for (k, v) in source_map {
350                map.insert(*k, (*v).to_string());
351            }
352        };
353
354        extend_map(&mut map, &DEFAULT_DIGITS_AND_SYMBOLS);
355
356        match self {
357            Self::Jan => extend_map(&mut map, &JAN_ALPHABET),
358            Self::Lapd => extend_map(&mut map, &LAPD_ALPHABET),
359            Self::Nato => extend_map(&mut map, &NATO_ALPHABET),
360            Self::RoyalNavy => extend_map(&mut map, &ROYAL_NAVY_ALPHABET),
361            Self::UsFinancial => extend_map(&mut map, &US_FINANCIAL_ALPHABET),
362            Self::WesternUnion => extend_map(&mut map, &WESTERN_UNION_ALPHABET),
363        }
364
365        map
366    }
367}