1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#[macro_use] extern crate matches;
mod polyfill;
mod lossy;
pub use lossy::LossyDecoder;
use std::cmp;
use std::str;
pub const REPLACEMENT_CHARACTER: &'static str = "\u{FFFD}";
#[derive(Debug, Copy, Clone)]
pub enum DecodeError<'a> {
Invalid {
valid_prefix: &'a str,
invalid_sequence: &'a [u8],
remaining_input: &'a [u8],
},
Incomplete {
valid_prefix: &'a str,
incomplete_suffix: Incomplete,
},
}
#[derive(Debug, Copy, Clone)]
pub struct Incomplete {
pub buffer: [u8; 4],
pub buffer_len: u8,
}
pub fn decode(input: &[u8]) -> Result<&str, DecodeError> {
let error = match str::from_utf8(input) {
Ok(valid) => return Ok(valid),
Err(error) => error,
};
let (valid, after_valid) = input.split_at(error.valid_up_to());
let valid = unsafe {
str::from_utf8_unchecked(valid)
};
match polyfill::utf8error_error_len(&error, input) {
Some(invalid_sequence_length) => {
let (invalid, rest) = after_valid.split_at(invalid_sequence_length);
Err(DecodeError::Invalid {
valid_prefix: valid,
invalid_sequence: invalid,
remaining_input: rest
})
}
None => {
Err(DecodeError::Incomplete {
valid_prefix: valid,
incomplete_suffix: Incomplete::new(after_valid),
})
}
}
}
impl Incomplete {
pub fn new(bytes: &[u8]) -> Self {
let mut buffer = [0, 0, 0, 0];
let len = bytes.len();
buffer[..len].copy_from_slice(bytes);
Incomplete {
buffer: buffer,
buffer_len: len as u8,
}
}
pub fn try_complete<'char, 'input>(&'char mut self, input: &'input [u8])
-> Option<(Result<&'char str, &'char [u8]>, &'input [u8])> {
let buffer_len = self.buffer_len as usize;
let copied_from_input;
{
let unwritten = &mut self.buffer[buffer_len..];
copied_from_input = cmp::min(unwritten.len(), input.len());
unwritten[..copied_from_input].copy_from_slice(&input[..copied_from_input]);
}
let spliced = &self.buffer[..buffer_len + copied_from_input];
match str::from_utf8(spliced) {
Ok(valid) => {
self.buffer_len = 0;
Some((Ok(valid), &input[copied_from_input..]))
}
Err(error) => {
let valid_up_to = error.valid_up_to();
if valid_up_to > 0 {
let valid = &self.buffer[..valid_up_to];
let valid = unsafe {
str::from_utf8_unchecked(valid)
};
let consumed = valid_up_to.checked_sub(buffer_len).unwrap();
self.buffer_len = 0;
Some((Ok(valid), &input[consumed..]))
} else {
match polyfill::utf8error_error_len(&error, spliced) {
Some(invalid_sequence_length) => {
let invalid = &spliced[..invalid_sequence_length];
let consumed = invalid_sequence_length.checked_sub(buffer_len).unwrap();
let rest = &input[consumed..];
self.buffer_len = 0;
Some((Err(invalid), rest))
}
None => {
self.buffer_len = spliced.len() as u8;
None
}
}
}
}
}
}
}