Skip to main content

cryptography_breaker/
normalizer.rs

1//! Normalizer
2//!
3//! This module provide normalization functions and trait which normalize character
4
5use std::fmt::Display;
6
7use crate::algorithms::CipherError;
8use clap::ValueEnum;
9use derive_builder::Builder;
10
11/// Possible results of Normalizer
12pub enum NormalizerResult {
13    /// Character wasn't changed
14    Preserved(char),
15    /// Character was normalizied
16    Normalized { c: char, was_lowercase: bool },
17}
18
19/// Which strategy to use for case
20#[derive(Clone, Copy, Debug, Default, ValueEnum)]
21pub enum CaseStrategy {
22    /// A ≠ a
23    ///
24    /// Alphabet can have same letters if in different case.
25    ///
26    /// # Example
27    ///
28    /// ```
29    /// let alphabet = &['A', 'a', 'B', 'b', 'C', 'c'];
30    /// ```
31    /// "A, a, B, b, C, c,…"
32    CaseSensitive,
33    /// A = a
34    ///
35    /// Alphabet can only have uppercase letters.
36    ///
37    /// # Example
38    ///
39    /// ```
40    /// let alphabet = &['A', 'B', 'C'];
41    /// ```
42    #[default]
43    CaseUnsensitive,
44    ///
45    /// The case of the letter will remain the same after encrypting or decrypting:
46    ///
47    /// ggzLCB ⇒ cipHER
48    ///
49    /// Alphabet can only have uppercase letters.
50    ///
51    /// # Example
52    ///
53    /// ```
54    /// let alphabet = &['A', 'B', 'C'];
55    /// ```
56    Preserve,
57}
58
59impl Display for CaseStrategy {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        self.to_possible_value()
62            .expect("no values are skipped")
63            .get_name()
64            .fmt(f)
65    }
66}
67
68/// Normalize character
69#[derive(Builder, Clone, Default)]
70#[builder(default)]
71pub struct Normalizer {
72    /// Case strategy to use
73    pub case: CaseStrategy,
74    /// Preserve whitespaces
75    ///
76    /// But the alphabet cannot have any
77    preserve_whitespaces: bool,
78    /// Preserver characters not present in alphabet (including whitespaces)
79    preserve_other_characters: bool,
80}
81
82impl Normalizer {
83    /// Normalize character to match preferences. Uses alphabet to determinates if alphabet contains given character
84    pub fn normalize(&self, c: char, alphabet: &[char]) -> Result<NormalizerResult, CipherError> {
85        if self.preserve_whitespaces && c.is_whitespace() {
86            return Ok(NormalizerResult::Preserved(c));
87        }
88
89        if self.preserve_other_characters && !alphabet.contains(&c) {
90            return Ok(NormalizerResult::Preserved(c));
91        }
92
93        let (normalized, was_lowercase) = match self.case {
94            CaseStrategy::CaseSensitive => (c, false),
95            CaseStrategy::CaseUnsensitive => (
96                c.to_uppercase()
97                    .next()
98                    .ok_or(CipherError::InvalidInputChar(c))?,
99                false,
100            ),
101            CaseStrategy::Preserve => {
102                let was_lowercase = c.is_lowercase();
103                let upper = c
104                    .to_uppercase()
105                    .next()
106                    .ok_or(CipherError::InvalidInputChar(c))?;
107                (upper, was_lowercase)
108            }
109        };
110        if !alphabet.contains(&normalized) {
111            return Err(CipherError::InvalidInputChar(c));
112        }
113
114        Ok(NormalizerResult::Normalized {
115            c: normalized,
116            was_lowercase,
117        })
118    }
119}
120
121/// Normalize given key
122pub fn normalize_key(key: &str, case_strategy: CaseStrategy) -> String {
123    match case_strategy {
124        CaseStrategy::CaseSensitive => key.to_string(),
125        CaseStrategy::CaseUnsensitive => key.to_uppercase(),
126        CaseStrategy::Preserve => key.to_uppercase(),
127    }
128}
129
130#[cfg(test)]
131mod tests {
132    /*
133    #[test]
134    fn test_normalize_for_words() {
135        let normalizer = NormalizerBuilder::default()
136            .preserve_whitespaces(true)
137            .case_strategy(ToLower)
138            ._transform(PolishToAscii)
139            .build()
140            .unwrap();
141
142        let text = "AAA ąĄą BBB ĆćĆ";
143        let normalized_text = Normalizer::normalize(&normalizer, text, Alphabet::ClassicPl.value());
144
145        assert_eq!("aaa aaa bbb ccc", normalized_text)
146    }
147
148    #[test]
149    fn test_normalize_preserve_other_character() {
150        let normalizer = NormalizerBuilder::default()
151            .preserve_other_characters(true)
152            .case_strategy(ToLower)
153            ._transform(NoTranformer)
154            .build()
155            .unwrap();
156
157        let text = "Alphabet with some weird characters: ąłćð :) :* <||>";
158        let normalized_text = Normalizer::normalize(&normalizer, text, Alphabet::ClassicEn.value());
159
160        assert_eq!(
161            "alphabet with some weird characters: ąłćð :) :* <||>",
162            normalized_text
163        )
164    }
165
166    #[test]
167    fn test_normalize_dont_preserve_other_character() {
168        let normalizer = NormalizerBuilder::default()
169            .preserve_whitespaces(true)
170            .case_strategy(ToLower)
171            ._transform(NoTranformer)
172            .build()
173            .unwrap();
174
175        let text = "Alphabet without weird characters::):*<||>";
176        let normalized_text = Normalizer::normalize(&normalizer, text, Alphabet::ClassicEn.value());
177
178        assert_eq!("alphabet without weird characters", normalized_text)
179    }
180
181    #[test]
182    fn test_normalize_wtf_it_doesnt_work() {
183        let normalizer = NormalizerBuilder::default()
184            .preserve_whitespaces(true)
185            ._transform(NoTranformer)
186            .build()
187            .unwrap();
188
189        let text = "sacsha100º";
190        let normalized_text = Normalizer::normalize(&normalizer, text, Alphabet::ClassicEn.value());
191
192        assert_eq!("SACSHA", normalized_text)
193    }
194    */
195}