Skip to main content

bouncycastle_base64/
lib.rs

1//! Good old fashioned base64 encoder and decoder.
2//!
3//! It should just work the way you expect: [encode] takes any bytes-like rust type
4//! and returns a String, while [decode] takes a String (which can be in any bytes-like container)
5//! and returns a `Vec<u8>`.
6//!
7//!```
8//! use bouncycastle_base64 as base64;
9//!
10//! let out = base64::encode(b"\x00"); // "AA=="
11//! let out = base64::encode(b"Hello, World!"); // "SGVsbG8sIFdvcmxkIQ=="
12//! let out = base64::encode(b"\x00\x01\x02\x03\x04\x05\x06"); // "AAECAwQFBg=="
13//!
14//! let out = base64::decode("AA==").unwrap(); // b"\x00"
15//! let out = base64::decode("SGVsbG8sIFdvcmxkIQ==").unwrap(); // b"Hello, World!"
16//! let out = base64::decode("AAECAwQFBg==").unwrap(); // b"\x00\x01\x02\x03\x04\x05\x06"
17//!
18//! // note that the decoder automatically ignores whitespace in the b64 input
19//! let out1 = base64::decode("AAEC   Aw QFB\ng==").unwrap(); // b"\x00\x01\x02\x03\x04\x05\x06"
20//! assert_eq!(out, out1);
21//!
22//! // it is also tolerant of missing padding characters
23//! let out = base64::decode("AAECAwQFBg==").unwrap(); // b"\x00\x01\x02\x03\x04\x05\x06"
24//! let out1 = base64::decode("AAECAwQFBg=").unwrap(); // b"\x00\x01\x02\x03\x04\x05\x06"
25//! assert_eq!(out, out1);
26//! let out2 = base64::decode("AAECAwQFBg").unwrap(); // b"\x00\x01\x02\x03\x04\x05\x06"
27//! assert_eq!(out, out2);
28//! ```
29//!
30//! # Streaming
31//! Unlike Hex, Base64 does not align cleanly to byte boundaries.
32//! That means that the above one-shot APIs should only be used if you have the entire content to
33//! process at the same time.
34//! In other words, if you arbitrarily break your data into chunks and hand it to the one-shot [encode] and [decode] APIs,
35//! you will get incorrect results.
36//! If you need to process your data in chunks, you need to use the streaming API that allows
37//! repeated calls to `do_update`, producing output as it goes, and correctly holds on to the unprocessed
38//! partial block until either `do_update` or `do_final` is called.
39//!
40//! ```
41//! use bouncycastle_base64 as base64;
42//!
43//! let mut b64_str: String = String::new();
44//! let mut encoder = base64::Base64Encoder::new();
45//! b64_str.push_str( encoder.do_update(b"Hello,").as_str() );
46//! b64_str.push_str( encoder.do_final(b" World!").as_str() );
47//! assert_eq!(b64_str, "SGVsbG8sIFdvcmxkIQ==");
48//!
49//! let mut out_bytes = Vec::<u8>::new();
50//! let mut decoder = base64::Base64Decoder::new(/*skip_whitespace*/ false);
51//! out_bytes.extend( decoder.do_update("SGVs").unwrap() );
52//! out_bytes.extend( decoder.do_final("bG8sIFdvcmxkIQ==").unwrap() );
53//! assert_eq!(out_bytes, b"Hello, World!");
54//! ```
55//!
56//! # Security and constant-time
57//!
58//! The following paper proves that extremely clever attack algorithms exist to recover private keys
59//! if the attacker is allowed to observe closely side-channels of the base64 decode process.
60//!
61//! > [Util::Lookup: Exploiting key decoding in cryptographic libraries (Sieck, 2021)](https://arxiv.org/pdf/2108.04600.pdf),
62//!
63//! As this is a cryptography library, we are assuming that this base64 implementation will be used to encode
64//! and decode private keys in PEM and JWK formats and so we are only providing a constant-time implementation
65//! in order to remove the temptation to shoot yourself in the foot in the name of a small performance gain.
66//!
67//! In our testing, a naïve lookup table-based implementation of base64::decode was 1.7x faster than
68//! our constant-time implementation, and we are quite sure that optimized base64 implementations exist that
69//! provide still better performance.
70//! So if you find yourself in a position of needing to base64 encode gigabytes of non-sensitive data, then
71//! we recommend you use one of the good, fast, but non-constant-time base64 implementations available from other projects.
72//!
73//!
74//! # Alphabets:
75//!
76//! At the present time, this base64 implementation only supports the standard alphabet with "+" and "/", specifically:
77//! ```text
78//! ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=
79//! ```
80//! but additional alphabets such as the URLSafe alphabet will likely be added in future versions.
81//     /// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_="
82//     URLSafe,
83
84
85use bouncycastle_utils::ct::Condition;
86
87/// One-shot encode from bytes to a base64-encoded string using a constant-time implementation.
88pub fn encode<T: AsRef<[u8]>>(input: T) -> String {
89    Base64Encoder::new().do_final(input)
90}
91
92/// One-shot decode from a base64-encoded string to bytes using a constant-time implementation.
93pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, Base64Error> {
94    Base64Decoder::new(true).do_final(input)
95}
96
97#[derive(Debug)]
98pub enum Base64Error {
99    /// the do_update() method must not be called on a block that contains padding.
100    /// If this error is returned, then the provided input has not been processed and the caller must instead
101    /// pass the same input to do_final(). Note that do_final() is tolerant of incomplete padding blocks,
102    /// so even if an additional padding character is contained in the next chunk of input, do_final() will still produce
103    /// the correct output -- ie any additional chunks held by the caller can be discarded.
104    PaddingEnconteredDuringDoUpdate,
105
106    /// Input contained a character that was not in the base64 alphabet. The index of the illegal character is included in the output.
107    InvalidB64Character(usize),
108}
109
110/// The stateful base64 encoder that supports streaming.
111pub struct Base64Encoder {
112    buf: [u8; 3],
113    vals_in_buf: usize,
114}
115
116impl Base64Encoder {
117    /// Create a new instance.
118    pub fn new() -> Self {
119        Self { buf: [0; 3], vals_in_buf: 0 }
120    }
121
122    fn ct_bin_to_b64(c: u8) -> u8 {
123        let in_az = Condition::<i64>::is_within_range(c as i64, 26, 51);
124        let in_09 = Condition::<i64>::is_within_range(c as i64, 52, 61);
125        let eq_plus = Condition::<i64>::is_equal(c as i64, 62);
126        let eq_slash = Condition::<i64>::is_equal(c as i64, 63);
127
128        // TODO: redo this once we have ct::u8 implemented ... the i64 is wasteful
129
130        #[allow(non_snake_case)]
131        let c_AZ: i64 = 'A' as i64 + c as i64;
132        let c_az: i64 = 'a' as i64 + (c as i64 - 26);
133        let c_09: i64 = '0' as i64 + (c as i64 - 2 * 26);
134        let c_plus: i64 = '+' as i64;
135        let c_slash: i64 = '/' as i64;
136
137        let mut ret: i64 = c_AZ as i64;
138        ret = in_az.select(c_az as i64, ret);
139        ret = in_09.select(c_09 as i64, ret);
140        ret = eq_plus.select(c_plus, ret);
141        ret = eq_slash.select(c_slash, ret);
142        ret as u8
143    }
144
145    pub fn do_update<T: AsRef<[u8]>>(&mut self, input: T) -> String {
146        let inref = input.as_ref();
147        let mut out: Vec<u8> = Vec::with_capacity(inref.len() * 4 / 3 + 4);
148        let mut out_buf: [u8; 4] = [0; 4];
149
150        for i in 0..inref.len() {
151            self.buf[self.vals_in_buf] = inref[i];
152            self.vals_in_buf += 1;
153
154            if self.vals_in_buf == 3 {
155                // process a block
156                Self::encode_block(&self.buf, &mut out_buf);
157                out.append(&mut out_buf.to_vec());
158                self.vals_in_buf = 0;
159            }
160        }
161
162        String::from_utf8(out).unwrap()
163    }
164
165    /// As you would expect, do_final() consumes the object along with a final block.
166    /// do_final may be called with the entire content; ie without any do_update's before it.
167    pub fn do_final<T: AsRef<[u8]>>(mut self, input: T) -> String {
168        let mut out = self.do_update(input);
169
170        // pad the last block.
171        if self.vals_in_buf != 0 {
172            let mut out_buf: [u8; 4] = [0; 4];
173            if self.vals_in_buf == 1 {
174                self.buf[1] = 0;
175            }
176            if self.vals_in_buf <= 2 {
177                self.buf[2] = 0;
178            }
179            Self::encode_block(&self.buf, &mut out_buf);
180            if self.vals_in_buf <= 2 {
181                out_buf[3] = b'=';
182            }
183            if self.vals_in_buf == 1 {
184                out_buf[2] = b'=';
185            }
186            out.push_str(std::str::from_utf8(&out_buf).unwrap());
187        }
188        out
189    }
190
191    fn encode_block<T: AsRef<[u8]>>(input: T, out: &mut [u8]) {
192        let inref = input.as_ref();
193        assert!(inref.len() >= 3);
194        assert!(out.len() >= 4);
195
196        out[0] = Self::ct_bin_to_b64(inref[0] >> 2);
197        out[1] = Self::ct_bin_to_b64(((inref[0] & 0x03) << 4) | inref[1] >> 4);
198        out[2] = Self::ct_bin_to_b64(((inref[1] & 0x0F) << 2) | inref[2] >> 6);
199        out[3] = Self::ct_bin_to_b64(inref[2] & 0x3F);
200    }
201}
202
203/// The stateful base64 decoder that supports streaming.
204pub struct Base64Decoder {
205    buf: [u8; 4],
206    vals_in_buf: usize,
207    skip_whitespace: bool,
208}
209
210impl Base64Decoder {
211    /// Create a new instance.
212    pub fn new(skip_whitespace: bool) -> Self {
213        Base64Decoder { buf: [0; 4], vals_in_buf: 0, skip_whitespace }
214    }
215
216    fn ct_b64_to_bin(b: u8) -> u8 {
217        let in_az = Condition::<i64>::is_within_range(b as i64, 97, 122);
218        #[allow(non_snake_case)]
219        let in_AZ = Condition::<i64>::is_within_range(b as i64, 65, 90);
220        let in_09 = Condition::<i64>::is_within_range(b as i64, 48, 57);
221        let is_plus = Condition::<i64>::is_equal(b as i64, 43);
222        let is_slash = Condition::<i64>::is_equal(b as i64, 47);
223        let is_padding = Condition::<i64>::is_equal(b as i64, 61);
224        let is_whitespace = Condition::<i64>::is_in_list(
225            b as i64,
226            &[' ' as i64, '\t' as i64, '\n' as i64, '\r' as i64],
227        );
228
229        #[allow(non_snake_case)]
230        let c_AZ: i64 = b as i64 - 'A' as i64;
231        let c_az: i64 = b as i64 - 'a' as i64 + 26;
232        let c_09: i64 = b as i64 - '0' as i64 + 2*26;
233
234        let mut ret: i64 = 0xFFi64;
235
236        ret = in_AZ.select(c_AZ, ret);
237        ret = in_az.select(c_az, ret);
238        ret = in_09.select(c_09, ret);
239        ret = is_plus.select(62, ret);
240        ret = is_slash.select(63, ret);
241        ret = is_padding.select(0x81, ret);
242        ret = is_whitespace.select(0x80, ret);
243
244        ret as u8
245    }
246
247    pub fn do_update<T: AsRef<[u8]>>(&mut self, input: T) -> Result<Vec<u8>, Base64Error> {
248        self.decode_internal(input, true)
249    }
250
251    fn decode_internal<T: AsRef<[u8]>>(
252        &mut self,
253        input: T,
254        rollback_if_padding: bool,
255    ) -> Result<Vec<u8>, Base64Error> {
256        // copy the current state so that we can restore it if we encounter a padding character.
257        let starting_state: [u8; 4] = self.buf.clone();
258        let starting_vals_in_block: usize = self.vals_in_buf;
259
260        let inref = input.as_ref();
261        let mut out: Vec<u8> = vec![];
262
263        let mut i: usize = 0;
264        while i < inref.len() {
265            self.buf[self.vals_in_buf] = Self::ct_b64_to_bin(inref[i]);
266            if self.buf[self.vals_in_buf] == 0xFF {
267                return Err(Base64Error::InvalidB64Character(i));
268            }
269            if self.buf[self.vals_in_buf] == 0x80 {
270                if self.skip_whitespace {
271                    i += 1;
272                    continue;
273                } else {
274                    return Err(Base64Error::InvalidB64Character(i));
275                }
276            }
277            if self.buf[self.vals_in_buf] == 0x81 {
278                // Error: we found padding.
279                if rollback_if_padding {
280                    // Roll back and return Base64Error::NonFinalBlockContainsPadding.
281                    self.buf = starting_state.clone();
282                    self.vals_in_buf = starting_vals_in_block;
283                }
284                return Ok(out);
285            }
286
287            i += 1;
288            self.vals_in_buf += 1;
289
290            // here we get to assume that the buffer contains no padding.
291            if self.vals_in_buf == 4 {
292                // decode block
293                out.push(self.buf[0] << 2 | self.buf[1] >> 4);
294                out.push(self.buf[1] << 4 | self.buf[2] >> 2);
295                out.push(self.buf[2] << 6 | self.buf[3]);
296                self.vals_in_buf = 0;
297                continue;
298            }
299        }
300
301        Ok(out)
302    }
303
304    /// As you would expect, do_final() consumes the object.
305    pub fn do_final<T: AsRef<[u8]>>(mut self, input: T) -> Result<Vec<u8>, Base64Error> {
306        // process as much as we can the usual way.
307        let mut out = match self.decode_internal(input, false) {
308            Ok(out) => out,
309            Err(Base64Error::PaddingEnconteredDuringDoUpdate) => {
310                panic!("rollback_if_padding = false should not produce a Base64Error::PaddingEnconteredDuringDoUpdate");
311            }
312            Err(e) => return Err(e),
313        };
314
315        // now we only, maybe, have a single block containing padding to deal with.
316        if self.vals_in_buf != 0 {
317            // be tolerant of missing padding
318            // if we're at the end and it's not a complete block, then imagine the missing padding.
319            let pad_count: u8 = 3 - (self.vals_in_buf as u8 - 1);
320
321            out.push(self.buf[0] << 2 | self.buf[1] >> 4);
322            if pad_count != 2 {
323                out.push(self.buf[1] << 4 | self.buf[2] >> 2);
324            }
325            if pad_count == 0 {
326                out.push(self.buf[2] << 6 | self.buf[3]);
327            }
328        }
329
330        Ok(out)
331    }
332}