bouncycastle_base64/lib.rs
1//! Good old fashioned base64 encoder and decoder.
2//!
3//! It should just work the way you expect: [encode] takes any bytes-like rust type
4//! and returns a String, while [decode] takes a String (which can be in any bytes-like container)
5//! and returns a `Vec<u8>`.
6//!
7//!```
8//! use bouncycastle_base64 as base64;
9//!
10//! let out = base64::encode(b"\x00"); // "AA=="
11//! let out = base64::encode(b"Hello, World!"); // "SGVsbG8sIFdvcmxkIQ=="
12//! let out = base64::encode(b"\x00\x01\x02\x03\x04\x05\x06"); // "AAECAwQFBg=="
13//!
14//! let out = base64::decode("AA==").unwrap(); // b"\x00"
15//! let out = base64::decode("SGVsbG8sIFdvcmxkIQ==").unwrap(); // b"Hello, World!"
16//! let out = base64::decode("AAECAwQFBg==").unwrap(); // b"\x00\x01\x02\x03\x04\x05\x06"
17//!
18//! // note that the decoder automatically ignores whitespace in the b64 input
19//! let out1 = base64::decode("AAEC Aw QFB\ng==").unwrap(); // b"\x00\x01\x02\x03\x04\x05\x06"
20//! assert_eq!(out, out1);
21//!
22//! // it is also tolerant of missing padding characters
23//! let out = base64::decode("AAECAwQFBg==").unwrap(); // b"\x00\x01\x02\x03\x04\x05\x06"
24//! let out1 = base64::decode("AAECAwQFBg=").unwrap(); // b"\x00\x01\x02\x03\x04\x05\x06"
25//! assert_eq!(out, out1);
26//! let out2 = base64::decode("AAECAwQFBg").unwrap(); // b"\x00\x01\x02\x03\x04\x05\x06"
27//! assert_eq!(out, out2);
28//! ```
29//!
30//! # Streaming
31//! Unlike Hex, Base64 does not align cleanly to byte boundaries.
32//! That means that the above one-shot APIs should only be used if you have the entire content to
33//! process at the same time.
34//! In other words, if you arbitrarily break your data into chunks and hand it to the one-shot [encode] and [decode] APIs,
35//! you will get incorrect results.
36//! If you need to process your data in chunks, you need to use the streaming API that allows
37//! repeated calls to `do_update`, producing output as it goes, and correctly holds on to the unprocessed
38//! partial block until either `do_update` or `do_final` is called.
39//!
40//! ```
41//! use bouncycastle_base64 as base64;
42//!
43//! let mut b64_str: String = String::new();
44//! let mut encoder = base64::Base64Encoder::new();
45//! b64_str.push_str( encoder.do_update(b"Hello,").as_str() );
46//! b64_str.push_str( encoder.do_final(b" World!").as_str() );
47//! assert_eq!(b64_str, "SGVsbG8sIFdvcmxkIQ==");
48//!
49//! let mut out_bytes = Vec::<u8>::new();
50//! let mut decoder = base64::Base64Decoder::new(/*skip_whitespace*/ false);
51//! out_bytes.extend( decoder.do_update("SGVs").unwrap() );
52//! out_bytes.extend( decoder.do_final("bG8sIFdvcmxkIQ==").unwrap() );
53//! assert_eq!(out_bytes, b"Hello, World!");
54//! ```
55//!
56//! # Security and constant-time
57//!
58//! The following paper proves that extremely clever attack algorithms exist to recover private keys
59//! if the attacker is allowed to observe closely side-channels of the base64 decode process.
60//!
61//! > [Util::Lookup: Exploiting key decoding in cryptographic libraries (Sieck, 2021)](https://arxiv.org/pdf/2108.04600.pdf),
62//!
63//! As this is a cryptography library, we are assuming that this base64 implementation will be used to encode
64//! and decode private keys in PEM and JWK formats and so we are only providing a constant-time implementation
65//! in order to remove the temptation to shoot yourself in the foot in the name of a small performance gain.
66//!
67//! In our testing, a naïve lookup table-based implementation of base64::decode was 1.7x faster than
68//! our constant-time implementation, and we are quite sure that optimized base64 implementations exist that
69//! provide still better performance.
70//! So if you find yourself in a position of needing to base64 encode gigabytes of non-sensitive data, then
71//! we recommend you use one of the good, fast, but non-constant-time base64 implementations available from other projects.
72//!
73//!
74//! # Alphabets:
75//!
76//! At the present time, this base64 implementation only supports the standard alphabet with "+" and "/", specifically:
77//! ```text
78//! ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=
79//! ```
80//! but additional alphabets such as the URLSafe alphabet will likely be added in future versions.
81// /// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_="
82// URLSafe,
83
84
85use bouncycastle_utils::ct::Condition;
86
87/// One-shot encode from bytes to a base64-encoded string using a constant-time implementation.
88pub fn encode<T: AsRef<[u8]>>(input: T) -> String {
89 Base64Encoder::new().do_final(input)
90}
91
92/// One-shot decode from a base64-encoded string to bytes using a constant-time implementation.
93pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, Base64Error> {
94 Base64Decoder::new(true).do_final(input)
95}
96
97#[derive(Debug)]
98pub enum Base64Error {
99 /// the do_update() method must not be called on a block that contains padding.
100 /// If this error is returned, then the provided input has not been processed and the caller must instead
101 /// pass the same input to do_final(). Note that do_final() is tolerant of incomplete padding blocks,
102 /// so even if an additional padding character is contained in the next chunk of input, do_final() will still produce
103 /// the correct output -- ie any additional chunks held by the caller can be discarded.
104 PaddingEnconteredDuringDoUpdate,
105
106 /// Input contained a character that was not in the base64 alphabet. The index of the illegal character is included in the output.
107 InvalidB64Character(usize),
108}
109
110/// The stateful base64 encoder that supports streaming.
111pub struct Base64Encoder {
112 buf: [u8; 3],
113 vals_in_buf: usize,
114}
115
116impl Base64Encoder {
117 /// Create a new instance.
118 pub fn new() -> Self {
119 Self { buf: [0; 3], vals_in_buf: 0 }
120 }
121
122 fn ct_bin_to_b64(c: u8) -> u8 {
123 let in_az = Condition::<i64>::is_within_range(c as i64, 26, 51);
124 let in_09 = Condition::<i64>::is_within_range(c as i64, 52, 61);
125 let eq_plus = Condition::<i64>::is_equal(c as i64, 62);
126 let eq_slash = Condition::<i64>::is_equal(c as i64, 63);
127
128 // TODO: redo this once we have ct::u8 implemented ... the i64 is wasteful
129
130 #[allow(non_snake_case)]
131 let c_AZ: i64 = 'A' as i64 + c as i64;
132 let c_az: i64 = 'a' as i64 + (c as i64 - 26);
133 let c_09: i64 = '0' as i64 + (c as i64 - 2 * 26);
134 let c_plus: i64 = '+' as i64;
135 let c_slash: i64 = '/' as i64;
136
137 let mut ret: i64 = c_AZ as i64;
138 ret = in_az.select(c_az as i64, ret);
139 ret = in_09.select(c_09 as i64, ret);
140 ret = eq_plus.select(c_plus, ret);
141 ret = eq_slash.select(c_slash, ret);
142 ret as u8
143 }
144
145 pub fn do_update<T: AsRef<[u8]>>(&mut self, input: T) -> String {
146 let inref = input.as_ref();
147 let mut out: Vec<u8> = Vec::with_capacity(inref.len() * 4 / 3 + 4);
148 let mut out_buf: [u8; 4] = [0; 4];
149
150 for i in 0..inref.len() {
151 self.buf[self.vals_in_buf] = inref[i];
152 self.vals_in_buf += 1;
153
154 if self.vals_in_buf == 3 {
155 // process a block
156 Self::encode_block(&self.buf, &mut out_buf);
157 out.append(&mut out_buf.to_vec());
158 self.vals_in_buf = 0;
159 }
160 }
161
162 String::from_utf8(out).unwrap()
163 }
164
165 /// As you would expect, do_final() consumes the object along with a final block.
166 /// do_final may be called with the entire content; ie without any do_update's before it.
167 pub fn do_final<T: AsRef<[u8]>>(mut self, input: T) -> String {
168 let mut out = self.do_update(input);
169
170 // pad the last block.
171 if self.vals_in_buf != 0 {
172 let mut out_buf: [u8; 4] = [0; 4];
173 if self.vals_in_buf == 1 {
174 self.buf[1] = 0;
175 }
176 if self.vals_in_buf <= 2 {
177 self.buf[2] = 0;
178 }
179 Self::encode_block(&self.buf, &mut out_buf);
180 if self.vals_in_buf <= 2 {
181 out_buf[3] = b'=';
182 }
183 if self.vals_in_buf == 1 {
184 out_buf[2] = b'=';
185 }
186 out.push_str(std::str::from_utf8(&out_buf).unwrap());
187 }
188 out
189 }
190
191 fn encode_block<T: AsRef<[u8]>>(input: T, out: &mut [u8]) {
192 let inref = input.as_ref();
193 assert!(inref.len() >= 3);
194 assert!(out.len() >= 4);
195
196 out[0] = Self::ct_bin_to_b64(inref[0] >> 2);
197 out[1] = Self::ct_bin_to_b64(((inref[0] & 0x03) << 4) | inref[1] >> 4);
198 out[2] = Self::ct_bin_to_b64(((inref[1] & 0x0F) << 2) | inref[2] >> 6);
199 out[3] = Self::ct_bin_to_b64(inref[2] & 0x3F);
200 }
201}
202
203/// The stateful base64 decoder that supports streaming.
204pub struct Base64Decoder {
205 buf: [u8; 4],
206 vals_in_buf: usize,
207 skip_whitespace: bool,
208}
209
210impl Base64Decoder {
211 /// Create a new instance.
212 pub fn new(skip_whitespace: bool) -> Self {
213 Base64Decoder { buf: [0; 4], vals_in_buf: 0, skip_whitespace }
214 }
215
216 fn ct_b64_to_bin(b: u8) -> u8 {
217 let in_az = Condition::<i64>::is_within_range(b as i64, 97, 122);
218 #[allow(non_snake_case)]
219 let in_AZ = Condition::<i64>::is_within_range(b as i64, 65, 90);
220 let in_09 = Condition::<i64>::is_within_range(b as i64, 48, 57);
221 let is_plus = Condition::<i64>::is_equal(b as i64, 43);
222 let is_slash = Condition::<i64>::is_equal(b as i64, 47);
223 let is_padding = Condition::<i64>::is_equal(b as i64, 61);
224 let is_whitespace = Condition::<i64>::is_in_list(
225 b as i64,
226 &[' ' as i64, '\t' as i64, '\n' as i64, '\r' as i64],
227 );
228
229 #[allow(non_snake_case)]
230 let c_AZ: i64 = b as i64 - 'A' as i64;
231 let c_az: i64 = b as i64 - 'a' as i64 + 26;
232 let c_09: i64 = b as i64 - '0' as i64 + 2*26;
233
234 let mut ret: i64 = 0xFFi64;
235
236 ret = in_AZ.select(c_AZ, ret);
237 ret = in_az.select(c_az, ret);
238 ret = in_09.select(c_09, ret);
239 ret = is_plus.select(62, ret);
240 ret = is_slash.select(63, ret);
241 ret = is_padding.select(0x81, ret);
242 ret = is_whitespace.select(0x80, ret);
243
244 ret as u8
245 }
246
247 pub fn do_update<T: AsRef<[u8]>>(&mut self, input: T) -> Result<Vec<u8>, Base64Error> {
248 self.decode_internal(input, true)
249 }
250
251 fn decode_internal<T: AsRef<[u8]>>(
252 &mut self,
253 input: T,
254 rollback_if_padding: bool,
255 ) -> Result<Vec<u8>, Base64Error> {
256 // copy the current state so that we can restore it if we encounter a padding character.
257 let starting_state: [u8; 4] = self.buf.clone();
258 let starting_vals_in_block: usize = self.vals_in_buf;
259
260 let inref = input.as_ref();
261 let mut out: Vec<u8> = vec![];
262
263 let mut i: usize = 0;
264 while i < inref.len() {
265 self.buf[self.vals_in_buf] = Self::ct_b64_to_bin(inref[i]);
266 if self.buf[self.vals_in_buf] == 0xFF {
267 return Err(Base64Error::InvalidB64Character(i));
268 }
269 if self.buf[self.vals_in_buf] == 0x80 {
270 if self.skip_whitespace {
271 i += 1;
272 continue;
273 } else {
274 return Err(Base64Error::InvalidB64Character(i));
275 }
276 }
277 if self.buf[self.vals_in_buf] == 0x81 {
278 // Error: we found padding.
279 if rollback_if_padding {
280 // Roll back and return Base64Error::NonFinalBlockContainsPadding.
281 self.buf = starting_state.clone();
282 self.vals_in_buf = starting_vals_in_block;
283 }
284 return Ok(out);
285 }
286
287 i += 1;
288 self.vals_in_buf += 1;
289
290 // here we get to assume that the buffer contains no padding.
291 if self.vals_in_buf == 4 {
292 // decode block
293 out.push(self.buf[0] << 2 | self.buf[1] >> 4);
294 out.push(self.buf[1] << 4 | self.buf[2] >> 2);
295 out.push(self.buf[2] << 6 | self.buf[3]);
296 self.vals_in_buf = 0;
297 continue;
298 }
299 }
300
301 Ok(out)
302 }
303
304 /// As you would expect, do_final() consumes the object.
305 pub fn do_final<T: AsRef<[u8]>>(mut self, input: T) -> Result<Vec<u8>, Base64Error> {
306 // process as much as we can the usual way.
307 let mut out = match self.decode_internal(input, false) {
308 Ok(out) => out,
309 Err(Base64Error::PaddingEnconteredDuringDoUpdate) => {
310 panic!("rollback_if_padding = false should not produce a Base64Error::PaddingEnconteredDuringDoUpdate");
311 }
312 Err(e) => return Err(e),
313 };
314
315 // now we only, maybe, have a single block containing padding to deal with.
316 if self.vals_in_buf != 0 {
317 // be tolerant of missing padding
318 // if we're at the end and it's not a complete block, then imagine the missing padding.
319 let pad_count: u8 = 3 - (self.vals_in_buf as u8 - 1);
320
321 out.push(self.buf[0] << 2 | self.buf[1] >> 4);
322 if pad_count != 2 {
323 out.push(self.buf[1] << 4 | self.buf[2] >> 2);
324 }
325 if pad_count == 0 {
326 out.push(self.buf[2] << 6 | self.buf[3]);
327 }
328 }
329
330 Ok(out)
331 }
332}