mirror of
https://github.com/mrfluffy-dev/kami.git
synced 2026-01-17 12:50:32 +00:00
62 lines
2.5 KiB
Rust
62 lines
2.5 KiB
Rust
use regex::Regex;
|
|
|
|
//function that takes a vector called ln_chapters of strings and removes everyting after the first occurence of "-" and all \ and "
|
|
pub fn remove_after_dash(ln_chapters: &Vec<String>) -> Vec<String> {
|
|
let mut ln_chapters_new: Vec<String> = Vec::new();
|
|
let re = Regex::new(r#"\\"(.*?) -"#).unwrap();
|
|
for ln in ln_chapters {
|
|
for cap in re.captures_iter(ln) {
|
|
ln_chapters_new.push(cap.get(1).unwrap().as_str().trim().to_string());
|
|
}
|
|
}
|
|
ln_chapters_new = replace_unicode(&ln_chapters_new);
|
|
ln_chapters_new
|
|
}
|
|
|
|
//function that takes a vector called ln_chapters and looks for unicode characters and replaces them with the ascii version
|
|
pub fn replace_unicode(ln_chapters: &Vec<String>) -> Vec<String> {
|
|
let mut ln_chapters_new: Vec<String> = Vec::new();
|
|
for ln in ln_chapters {
|
|
//make regex to find all \uxxxx and save it in to a vector
|
|
let re = Regex::new(r#"(\\u[0-9a-fA-F]{4})"#).unwrap();
|
|
let mut vec_unicode: Vec<String> = Vec::new();
|
|
for cap in re.captures_iter(ln) {
|
|
vec_unicode.push(cap.get(1).unwrap().as_str().to_string());
|
|
}
|
|
let mut ln_new: String = String::new();
|
|
if !vec_unicode.is_empty() {
|
|
//loop through the vector and replace the unicode characters with the ascii version
|
|
for unicode in vec_unicode {
|
|
//convert the unicode to char
|
|
let unicode_char =
|
|
char::from_u32(u32::from_str_radix(&unicode[2..6], 16).unwrap()).unwrap();
|
|
let unicode_str = unicode_char as char;
|
|
ln_new = ln.replace(&unicode, &unicode_str.to_string());
|
|
}
|
|
} else {
|
|
ln_new = ln.to_string();
|
|
}
|
|
ln_chapters_new.push(ln_new);
|
|
}
|
|
ln_chapters_new
|
|
}
|
|
|
|
pub fn fix_html_encoding(ln_text: &Vec<String>) -> Vec<String> {
|
|
let mut ln_text_new: Vec<String> = Vec::new();
|
|
for ln in ln_text {
|
|
let ln = ln.replace("―", "--");
|
|
let ln = ln.replace("‖", "--");
|
|
let ln = ln.replace("‘", "'");
|
|
let ln = ln.replace("’", "'");
|
|
let ln = ln.replace("“", "\"");
|
|
let ln = ln.replace("”", "\"");
|
|
let ln = ln.replace("…", "...");
|
|
let ln = ln.replace("′", "'");
|
|
let ln = ln.replace("″", "\"");
|
|
let ln = ln.replace("⁄", "--");
|
|
let ln = ln.replace("—", "--");
|
|
ln_text_new.push(ln);
|
|
}
|
|
ln_text_new
|
|
}
|