mirror of
https://github.com/chatmail/core.git
synced 2026-05-22 16:26:31 +03:00
fix: Correctly sanitize input everywhere (#5697)
Best reviewed commit-by-commit; the commit messages explain what is done.
This commit is contained in:
@@ -22,7 +22,8 @@
|
||||
clippy::bool_assert_comparison,
|
||||
clippy::manual_split_once,
|
||||
clippy::format_push_string,
|
||||
clippy::bool_to_int_with_if
|
||||
clippy::bool_to_int_with_if,
|
||||
clippy::manual_range_contains
|
||||
)]
|
||||
|
||||
use std::fmt;
|
||||
@@ -36,7 +37,6 @@ use once_cell::sync::Lazy;
|
||||
use regex::Regex;
|
||||
|
||||
// TODOs to clean up:
|
||||
// - Check if sanitizing is done correctly everywhere
|
||||
// - Apply lints everywhere (https://doc.rust-lang.org/cargo/reference/workspaces.html#the-lints-table)
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -263,27 +263,27 @@ impl rusqlite::types::ToSql for ContactAddress {
|
||||
}
|
||||
}
|
||||
|
||||
/// Make the name and address
|
||||
/// Takes a name and an address and sanitizes them:
|
||||
/// - Extracts a name from the addr if the addr is in form "Alice <alice@example.org>"
|
||||
/// - Removes special characters from the name, see [`sanitize_name()`]
|
||||
/// - Removes the name if it is equal to the address by setting it to ""
|
||||
pub fn sanitize_name_and_addr(name: &str, addr: &str) -> (String, String) {
|
||||
static ADDR_WITH_NAME_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new("(.*)<(.*)>").unwrap());
|
||||
let (name, addr) = if let Some(captures) = ADDR_WITH_NAME_REGEX.captures(addr.as_ref()) {
|
||||
(
|
||||
if name.is_empty() {
|
||||
strip_rtlo_characters(captures.get(1).map_or("", |m| m.as_str()))
|
||||
captures.get(1).map_or("", |m| m.as_str())
|
||||
} else {
|
||||
strip_rtlo_characters(name)
|
||||
name
|
||||
},
|
||||
captures
|
||||
.get(2)
|
||||
.map_or("".to_string(), |m| m.as_str().to_string()),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
strip_rtlo_characters(&normalize_name(name)),
|
||||
addr.to_string(),
|
||||
)
|
||||
(name, addr.to_string())
|
||||
};
|
||||
let mut name = normalize_name(&name);
|
||||
let mut name = sanitize_name(name);
|
||||
|
||||
// If the 'display name' is just the address, remove it:
|
||||
// Otherwise, the contact would sometimes be shown as "alice@example.com (alice@example.com)" (see `get_name_n_addr()`).
|
||||
@@ -295,31 +295,77 @@ pub fn sanitize_name_and_addr(name: &str, addr: &str) -> (String, String) {
|
||||
(name, addr)
|
||||
}
|
||||
|
||||
/// Normalize a name.
|
||||
/// Sanitizes a name.
|
||||
///
|
||||
/// - Remove quotes (come from some bad MUA implementations)
|
||||
/// - Trims the resulting string
|
||||
///
|
||||
/// Typically, this function is not needed as it is called implicitly by `Contact::add_address_book`.
|
||||
pub fn normalize_name(full_name: &str) -> String {
|
||||
let full_name = full_name.trim();
|
||||
if full_name.is_empty() {
|
||||
return full_name.into();
|
||||
}
|
||||
/// - Removes newlines and trims the string
|
||||
/// - Removes quotes (come from some bad MUA implementations)
|
||||
/// - Removes potentially-malicious bidi characters
|
||||
pub fn sanitize_name(name: &str) -> String {
|
||||
let name = sanitize_single_line(name);
|
||||
|
||||
match full_name.as_bytes() {
|
||||
[b'\'', .., b'\''] | [b'\"', .., b'\"'] | [b'<', .., b'>'] => full_name
|
||||
.get(1..full_name.len() - 1)
|
||||
match name.as_bytes() {
|
||||
[b'\'', .., b'\''] | [b'\"', .., b'\"'] | [b'<', .., b'>'] => name
|
||||
.get(1..name.len() - 1)
|
||||
.map_or("".to_string(), |s| s.trim().to_string()),
|
||||
_ => full_name.to_string(),
|
||||
_ => name.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Sanitizes user input
|
||||
///
|
||||
/// - Removes newlines and trims the string
|
||||
/// - Removes potentially-malicious bidi characters
|
||||
pub fn sanitize_single_line(input: &str) -> String {
|
||||
sanitize_bidi_characters(input.replace(['\n', '\r'], " ").trim())
|
||||
}
|
||||
|
||||
const RTLO_CHARACTERS: [char; 5] = ['\u{202A}', '\u{202B}', '\u{202C}', '\u{202D}', '\u{202E}'];
|
||||
/// This method strips all occurrences of the RTLO Unicode character.
|
||||
/// [Why is this needed](https://github.com/deltachat/deltachat-core-rust/issues/3479)?
|
||||
pub fn strip_rtlo_characters(input_str: &str) -> String {
|
||||
input_str.replace(|char| RTLO_CHARACTERS.contains(&char), "")
|
||||
const ISOLATE_CHARACTERS: [char; 3] = ['\u{2066}', '\u{2067}', '\u{2068}'];
|
||||
const POP_ISOLATE_CHARACTER: char = '\u{2069}';
|
||||
/// Some control unicode characters can influence whether adjacent text is shown from
|
||||
/// left to right or from right to left.
|
||||
///
|
||||
/// Since user input is not supposed to influence how adjacent text looks,
|
||||
/// this function removes some of these characters.
|
||||
///
|
||||
/// Also see https://github.com/deltachat/deltachat-core-rust/issues/3479.
|
||||
pub fn sanitize_bidi_characters(input_str: &str) -> String {
|
||||
// RTLO_CHARACTERS are apparently rarely used in practice.
|
||||
// They can impact all following text, so, better remove them all:
|
||||
let input_str = input_str.replace(|char| RTLO_CHARACTERS.contains(&char), "");
|
||||
|
||||
// If the ISOLATE characters are not ended with a POP DIRECTIONAL ISOLATE character,
|
||||
// we regard the input as potentially malicious and simply remove all ISOLATE characters.
|
||||
// See https://en.wikipedia.org/wiki/Bidirectional_text#Unicode_bidi_support
|
||||
// and https://www.w3.org/International/questions/qa-bidi-unicode-controls.en
|
||||
// for an explanation about ISOLATE characters.
|
||||
fn isolate_characters_are_valid(input_str: &str) -> bool {
|
||||
let mut isolate_character_nesting: i32 = 0;
|
||||
for char in input_str.chars() {
|
||||
if ISOLATE_CHARACTERS.contains(&char) {
|
||||
isolate_character_nesting += 1;
|
||||
} else if char == POP_ISOLATE_CHARACTER {
|
||||
isolate_character_nesting -= 1;
|
||||
}
|
||||
|
||||
// According to Wikipedia, 125 levels are allowed:
|
||||
// https://en.wikipedia.org/wiki/Unicode_control_characters
|
||||
// (although, in practice, we could also significantly lower this number)
|
||||
if isolate_character_nesting < 0 || isolate_character_nesting > 125 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
isolate_character_nesting == 0
|
||||
}
|
||||
|
||||
if isolate_characters_are_valid(&input_str) {
|
||||
input_str
|
||||
} else {
|
||||
input_str.replace(
|
||||
|char| ISOLATE_CHARACTERS.contains(&char) || POP_ISOLATE_CHARACTER == char,
|
||||
"",
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns false if addr is an invalid address, otherwise true.
|
||||
@@ -668,4 +714,64 @@ END:VCARD
|
||||
assert_eq!(contacts[0].profile_image.as_deref().unwrap(), "/9j/4AAQSkZJRgABAQAAAQABAAD/4gIoSUNDX1BST0ZJTEUAAQEAAAIYAAAAAAQwAABtbnRyUkdCIFhZWiAAAAAAAAAAAAAAAABhY3NwAAAAAAAAAAAAAAAAL8bRuAJYoZUYrI4ZY3VWwxw4Ay28AAGBISScmf/2Q==");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sanitize_name() {
|
||||
assert_eq!(&sanitize_name(" hello world "), "hello world");
|
||||
assert_eq!(&sanitize_name("<"), "<");
|
||||
assert_eq!(&sanitize_name(">"), ">");
|
||||
assert_eq!(&sanitize_name("'"), "'");
|
||||
assert_eq!(&sanitize_name("\""), "\"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sanitize_single_line() {
|
||||
assert_eq!(sanitize_single_line("Hi\naiae "), "Hi aiae");
|
||||
assert_eq!(sanitize_single_line("\r\nahte\n\r"), "ahte");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sanitize_bidi_characters() {
|
||||
// Legit inputs:
|
||||
assert_eq!(
|
||||
&sanitize_bidi_characters("Tes\u{2067}ting Delta Chat\u{2069}"),
|
||||
"Tes\u{2067}ting Delta Chat\u{2069}"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&sanitize_bidi_characters("Tes\u{2067}ting \u{2068} Delta Chat\u{2069}\u{2069}"),
|
||||
"Tes\u{2067}ting \u{2068} Delta Chat\u{2069}\u{2069}"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&sanitize_bidi_characters("Tes\u{2067}ting\u{2069} Delta Chat\u{2067}\u{2069}"),
|
||||
"Tes\u{2067}ting\u{2069} Delta Chat\u{2067}\u{2069}"
|
||||
);
|
||||
|
||||
// Potentially-malicious inputs:
|
||||
assert_eq!(
|
||||
&sanitize_bidi_characters("Tes\u{202C}ting Delta Chat"),
|
||||
"Testing Delta Chat"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&sanitize_bidi_characters("Testing Delta Chat\u{2069}"),
|
||||
"Testing Delta Chat"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&sanitize_bidi_characters("Tes\u{2067}ting Delta Chat"),
|
||||
"Testing Delta Chat"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&sanitize_bidi_characters("Tes\u{2069}ting Delta Chat\u{2067}"),
|
||||
"Testing Delta Chat"
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&sanitize_bidi_characters("Tes\u{2068}ting Delta Chat"),
|
||||
"Testing Delta Chat"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user