Skip to main content

cryptography_breaker/
algorithms.rs

1//! Algorithms
2//!
3//! This module have shared functions that other cryptographic algorithms may need
4
5use std::{collections::HashMap, fmt};
6
7use freq_calc::{
8    calctype::{CalcType, Letters},
9    frequencies::Analyzer,
10    tokenizer::Tokenizer,
11};
12
13pub mod double_vigenere;
14pub mod one_time_pad;
15pub mod vigenere;
16
17#[derive(Debug)]
18pub enum CipherError {
19    EmptyKey,
20    InvalidKey(String),
21    InvalidKeyChar(char),
22    EmptyInputText,
23    InvalidInputText(String),
24    InvalidInputChar(char),
25    InvalidAlphabet,
26}
27
28impl fmt::Display for CipherError {
29    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30        match self {
31            CipherError::InvalidKey(key) => write!(f, "Invalid key: {}", key),
32            CipherError::EmptyKey => write!(f, "Empty key"),
33            CipherError::InvalidInputText(input_text) => {
34                write!(f, "Invalid input text: {}", input_text)
35            }
36            CipherError::InvalidInputChar(c) => {
37                write!(f, "Invalid character in input text: {}", c)
38            }
39            CipherError::EmptyInputText => write!(f, "Empty input text"),
40            CipherError::InvalidAlphabet => write!(f, "Invalid alphabet"),
41            CipherError::InvalidKeyChar(c) => write!(f, "Invalid character in key: {}", c),
42        }
43    }
44}
45
46impl std::error::Error for CipherError {}
47
48/// text should be normalized
49/// It return some number, every language has their own result from IoC
50/// Random text normally is much lower than what languages returns
51/// If text.len() <2 it will return 0
52pub fn index_of_coincidence(text: &str) -> f64 {
53    let text_len = text.len() as f64;
54
55    if text_len < 2.0 {
56        return 0.0;
57    }
58
59    let letters_count = Analyzer::new(text, Letters).count();
60
61    let sum: usize = letters_count
62        .values()
63        .map(|value| value * (value - 1))
64        .sum();
65
66    sum as f64 / (text_len * (text_len - 1.0))
67}
68
69/// Calculate closeness of given text to reference data. the closer the result is to zero, the closer the text is to the reference data. (the less probably ones are closer to -infty\
70///
71/// reference_data is frequency of given S type \
72/// text should be normalized as it will check what it see, so for most cases upper latters and no spaces<
73pub fn fitness<S: CalcType + Tokenizer + Default>(
74    text: &str,
75    reference_data: &HashMap<S::Key, f64>,
76    penalty_score: f64,
77) -> f64 {
78    // HashMap containing frequency of text we want to check
79    let text_frequency = Analyzer::new(text, S::default()).frequency();
80
81    // Check every key (Words, Quadgrams, etc) against referenced HashMap
82    let sum: f64 = text_frequency
83        .iter()
84        .map(|(key, freq)| {
85            // Check if given key in text exist in referenced data
86            if let Some(ref_freq) = reference_data.get(key) {
87                // Add to result log10 of ref_freq and multiply it by times the key appeared in text
88                // The bigger the number, the higher chance it's the text we're looking for (-0.20 > -6)
89                freq * ref_freq.log10()
90            }
91            // If it doesn't, subtract some big number (e.g. 10). (We can't use formula above as 0.log10() = -inf)
92            // And it doesn't make sens to use (0.01 / N).log10() as we would need to calculate N (the number of occurence of all keys) and it provide no benefit
93            else {
94                -penalty_score
95            }
96        })
97        .sum();
98    sum / S::divisor(S::default(), text) as f64
99}
100
101/// Calculate modulus of two numbers
102///
103/// # Example
104///
105/// ```
106/// use cryptography_breaker::algorithms::modulus;
107///
108/// let a = -2;
109/// let b = 5;
110///
111/// assert_eq!(3, modulus(a, b))
112/// ```
113pub fn modulus(a: isize, b: isize) -> usize {
114    (((a % b) + b) % b) as usize // It will always be positive so we can do it
115}
116
117#[cfg(test)]
118mod tests {
119    use std::{collections::HashMap, fs::File, path::Path};
120
121    use crate::algorithms::index_of_coincidence;
122    use freq_calc::{
123        calctype::Quadgrams,
124        normalizer::{Normalizer, NormalizerBuilder},
125        transformer::PolishToAscii,
126    };
127    use once_cell::sync::Lazy;
128
129    use crate::algorithms::fitness;
130
131    static QUADGRAMS_FREQUENCIES: Lazy<HashMap<String, f64>> = Lazy::new(|| {
132        let quadgrams_frequencies_path = Path::new("resources")
133            .join("frequencies")
134            .join("polish")
135            .join("quadgrams")
136            .join("hashmap.yaml");
137
138        let quadgrams_frequencies_file = File::open(quadgrams_frequencies_path).unwrap();
139
140        yaml_serde::from_reader(quadgrams_frequencies_file).unwrap()
141    });
142
143    static NORMALIZER: Lazy<Normalizer<PolishToAscii>> = Lazy::new(|| {
144        NormalizerBuilder::default()
145            ._transform(PolishToAscii)
146            .build()
147            .unwrap()
148    });
149
150    #[test]
151    fn test_fitness() {
152        let fitness = fitness::<Quadgrams>("JAKISZASZYFROWANYTEKST", &QUADGRAMS_FREQUENCIES, 10.0);
153
154        println!("fitness: {}", fitness)
155    }
156
157    mod test_index_of_coincidence {
158        use freq_calc::Alphabet;
159
160        use crate::IoC;
161
162        use super::*;
163
164        #[test]
165        fn english_text() {
166            let text: &str = "L did you know that why is this below one dot seven. Maybe if I start typing more and more it will eventually reach the one seven Its better the longer TEXT i write,.[]';'/.  2423525224. Eh, it should work, but still need to write test eventually";
167            let normalized_text = NORMALIZER.normalize(text, Alphabet::ClassicPl.value());
168
169            let expected_ioc = IoC::En.value();
170            let tolerance = 0.10 * expected_ioc;
171            let ioc = index_of_coincidence(&normalized_text);
172
173            println!(
174                "IoC: {}\nExpected IoC: {}\nTolerance: {}",
175                ioc, expected_ioc, tolerance
176            );
177            assert!((ioc - expected_ioc).abs() <= tolerance);
178        }
179
180        #[test]
181        fn polish_text() {
182            let text: &str = "Zbrodnie na pacjentach w Państwowym Zakładzie dla Umysłowo i Nerwowo Chorych w Kobierzynie – dokonane w czasie II wojny światowej przez urzędników III Rzeszy Niemieckiej zabójstwo ponad tysiąca pacjentów szpitala psychiatrycznego w Kobierzynie. Łańcuch zbrodni składał się z trzech etapów: głodzenia pacjentów, wywiezienia we wrześniu 1941 roku chorych narodowości żydowskiej oraz zbrodni masowego zabójstwa i podstępnego wywiezienia pacjentów do obozu koncentracyjnego Auschwitz-Birkenau 23 czerwca 1942 roku. Akcję wywózki przetrwała tylko jedna pacjentka zakładu. Łącznie, wliczając także ofiary zagłodzenia, uśmiercono około 900–1000 osób. Główny sprawca zbrodni, niemiecki dyrektor szpitala Alex Kroll, uniknął kary. Ofiary hitlerowskich zbrodni na pacjentach zakładu upamiętnia pomnik zlokalizowany na terenie Szpitala Klinicznego im. dr. Józefa Babińskiego w Krakowie. Czytaj więcej…";
183            let normalized_text = NORMALIZER.normalize(text, Alphabet::ClassicPl.value());
184
185            let expected_ioc = IoC::Pl.value();
186            let tolerance = 0.05 * expected_ioc;
187            let ioc = index_of_coincidence(&normalized_text);
188
189            println!(
190                "IoC: {}\nExpected IoC: {}\nTolerance: {}",
191                ioc, expected_ioc, tolerance
192            );
193
194            assert!((ioc - expected_ioc).abs() <= tolerance);
195        }
196
197        #[test]
198        fn random_text() {
199            let text: &str = "yxerkmivukbegbzkdyzusbiotcqdxjkycdcjfjueoryntavvmqveabgrpsvhevoglfzpqcoueemiicyxrjzsuhbbvpeobrikuigrwthunypwhwgnrwnwkqszsjcpaxzs";
200            let normalized_text = NORMALIZER.normalize(text, Alphabet::ClassicEn.value());
201
202            let ioc = index_of_coincidence(&normalized_text);
203            println!("IoC: {}", ioc);
204
205            assert!(0.0400 >= ioc, "IoC: {}", ioc);
206        }
207
208        #[test]
209        fn every_letter() {
210            let text =
211                "aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx yy zz";
212            let normalized_text = NORMALIZER.normalize(text, Alphabet::ClassicEn.value());
213
214            assert_eq!(0.0196078431372549, index_of_coincidence(&normalized_text));
215        }
216
217        #[test]
218        fn one_letter() {
219            let text = "A";
220
221            assert_eq!(0.0, index_of_coincidence(text))
222        }
223    }
224}