use regex::Regex; use std::vec::Vec; use std::fs::File; use std::io::prelude::*; use colored::Colorize; use isahc::prelude::*; use std::process::{Command, ExitStatus}; use std::io::Result; fn main() { let mut search_path = String::new(); println!("What ln do you want to read?"); std::io::stdin().read_line(&mut search_path).expect("Failed to read line"); let search_path = search_path.replace(" ", "+"); let url = "https://readlightnovels.net/?s=".to_string(); let url = format!("{}{}", url, search_path.trim()).trim().to_string(); let html = get_html(&url).trim().to_string(); let ln_list = get_ln_list(&html); //remove first element of ln_list let ln_list = ln_list.iter().skip(1).map(|x| x.to_string()).collect::>(); let ln_titles = get_ln_titles(&ln_list); let ln_urls = get_ln_urls(&ln_list); let mut count = 0; for ln in ln_titles { if count % 2 == 0 { println!("({})\t{}",count, format!("{}", ln.blue())); } else { println!("({})\t{}",count, format!("{}", ln.yellow())); } count += 1; } println!("Which ln do you want to read?"); let mut ln_number = String::new(); std::io::stdin().read_line(&mut ln_number).expect("Failed to read line"); let ln_number = ln_number.trim().to_string(); let ln_number = ln_number.parse::().unwrap(); let ln_url = &ln_urls[ln_number]; let ln_url = ln_url.trim().to_string(); let ln_html = get_html(&ln_url); let ln_id = get_ln_id(&ln_html); let ln_last_page = get_ln_last_page(&ln_html); let ln_page_html = page_selector(&ln_last_page, &ln_id); let ln_chapters = get_ln_chapters(&ln_page_html); let ln_chapters_urls = get_ln_chapters_urls(&ln_page_html); count = 0; for chaprer in ln_chapters { if count % 2 == 0 { println!("({})\t{}",count, format!("{}", chaprer.blue())); } else { println!("({})\t{}",count, format!("{}", chaprer.yellow())); } count += 1; } println!("Which chapter do you want to read?"); let mut chaprer_number = String::new(); std::io::stdin().read_line(&mut chaprer_number).expect("Failed to read line"); let chaprer_number = chaprer_number.trim().to_string(); let chaprer_number = chaprer_number.parse::().unwrap(); let chaprer_url = &ln_chapters_urls[chaprer_number]; let chaprer_url = chaprer_url.trim().to_string(); let ln_text = get_ln_text(&chaprer_url); let mut full_text: String = String::new(); for line in ln_text { let text = format!("{}\n\n", line); full_text.push_str(&text); } //write full_text to file called temp.txt let mut file = File::create("/tmp/log_e").expect("Unable to create file"); file.write_all(full_text.as_bytes()).expect("Unable to write to file"); //close file file.sync_all().expect("Unable to sync file"); //open temp.txt in cat for user to read let _com = open_bat(); } pub fn open_bat() -> Result { Command::new("bat").arg("-p").arg("/tmp/log_e").spawn()?.wait() } //gets the full html of the page fn get_html(url: &str) -> String { let mut resp = isahc::get(url).unwrap(); let html = resp.text().unwrap(); html } //gets the list of ln's from the html and returns it as a vector of the ln's name and href fn get_ln_list(html: &str) -> Vec { let re = Regex::new(r#"(?m)^\s*( = Vec::new(); for cap in re.captures_iter(html) { ln_list.push(cap.get(1).unwrap().as_str().trim().to_string()); } ln_list } //gets the titles of the ln's from the html and returns it as a vector of the ln's name fn get_ln_titles(ln_list: &Vec) -> Vec { let re = Regex::new(r#"(?m)^\s* = Vec::new(); for ln in ln_list { for cap in re.captures_iter(ln) { ln_title.push(cap.get(1).unwrap().as_str().to_string()); } } ln_title } //gets the urls of the ln's from the html and returns it as a vector of the ln's href fn get_ln_urls(ln_list: &Vec) -> Vec { let re = Regex::new(r#"(?m)^\s* = Vec::new(); for ln in ln_list { for cap in re.captures_iter(ln) { ln_url.push(cap.get(1).unwrap().as_str().to_string()); } } ln_url } //gets the chapter titles from the html and returns it as a vector of the chapter's name fn get_ln_chapters(html: &str) -> Vec { let re = Regex::new(r#"title=(.*?)>"#).unwrap(); let mut ln_list:Vec = Vec::new(); for cap in re.captures_iter(html) { ln_list.push(cap.get(1).unwrap().as_str().trim().to_string()); } ln_list = remove_after_dash(&ln_list); ln_list } //function that takes a vector called ln_chapters of strings and removes everyting after the first occurence of "-" and all \ and " fn remove_after_dash(ln_chapters: &Vec) -> Vec { let mut ln_chapters_new:Vec = Vec::new(); let re = Regex::new(r#"\\"(.*?) -"#).unwrap(); for ln in ln_chapters { for cap in re.captures_iter(ln) { ln_chapters_new.push(cap.get(1).unwrap().as_str().trim().to_string()); } } ln_chapters_new = replace_unicode(&ln_chapters_new); ln_chapters_new } //function that takes a vector called ln_chapters and looks for unicode characters and replaces them with the ascii version fn replace_unicode(ln_chapters: &Vec) -> Vec { let mut ln_chapters_new:Vec = Vec::new(); for ln in ln_chapters { //make regex to find all \uxxxx and save it in to a vector let re = Regex::new(r#"(\\u[0-9a-fA-F]{4})"#).unwrap(); let mut vec_unicode:Vec = Vec::new(); for cap in re.captures_iter(ln) { vec_unicode.push(cap.get(1).unwrap().as_str().to_string()); } let mut ln_new :String = String::new(); if vec_unicode.len() > 0 { //loop through the vector and replace the unicode characters with the ascii version for unicode in vec_unicode { //convert the unicode to char let unicode_char = char::from_u32(u32::from_str_radix(&unicode[2..6], 16).unwrap()).unwrap(); let unicode_str = unicode_char as char; ln_new = ln.replace(&unicode, &unicode_str.to_string()); } } else { ln_new = ln.to_string(); } ln_chapters_new.push(ln_new); } ln_chapters_new } fn fix_html_encoding(ln_text: &Vec) -> Vec{ let mut ln_text_new:Vec = Vec::new(); for ln in ln_text{ let ln = ln.replace("―", "--"); let ln = ln.replace("‖", "--"); let ln = ln.replace("‘", "'"); let ln = ln.replace("’", "'"); let ln = ln.replace("“", "\""); let ln = ln.replace("”", "\""); let ln = ln.replace("…", "..."); let ln = ln.replace("′", "'"); let ln = ln.replace("″", "\""); let ln = ln.replace("⁄", "--"); let ln = ln.replace("—", "--"); ln_text_new.push(ln); } ln_text_new } //gets the chapter urls from the html and returns it as a vector of the chapter's href fn get_ln_chapters_urls(html: &str) -> Vec { let re = Regex::new(r#"href=\\"(h.*?)""#).unwrap(); let mut ln_list:Vec = Vec::new(); for cap in re.captures_iter(html) { ln_list.push(cap.get(1).unwrap().as_str().trim().to_string()); } ln_list = url_clean(&ln_list); ln_list }// take a vector of srings called ln_chapters_url and remove all the \ fn url_clean(ln_chapters_url: &Vec) -> Vec { let mut ln_chapters_url_new:Vec = Vec::new(); for ln in ln_chapters_url { let ln = ln.replace("\\", ""); ln_chapters_url_new.push(ln); } ln_chapters_url_new } //take a html string and return the ln id fn get_ln_last_page(html: &str) -> String { let re = Regex::new(r#"(?m)Last"#).unwrap(); let mut ln_last_page:String = String::new(); for cap in re.captures_iter(html) { ln_last_page = cap.get(1).unwrap().as_str().to_string(); } ln_last_page } //take a html string and return the ln id fn get_ln_id(html: &str) -> String { let re = Regex::new(r#"(?m)^\s*"#).unwrap(); let mut ln_id:String = String::new(); for cap in re.captures_iter(html) { ln_id = cap.get(1).unwrap().as_str().to_string(); } ln_id } //using isahc::prelude::* make a php reqest to get the next page of the ln fn get_ln_next_page(ln_id: &str, page: &str) -> String { let url = "https://readlightnovels.net/wp-admin/admin-ajax.php".to_string(); let form = format!("action=tw_ajax&type=pagination&id={}.html&page={}", ln_id, page); let mut resp = isahc::post(url,form).unwrap(); let html = resp.text().unwrap(); html } fn page_selector(max_page: &str, ln_id: &str) -> String { println!("Please select a page go to"); println!("1..{}", max_page); let mut page_select = String::new(); std::io::stdin().read_line(&mut page_select).unwrap(); let page_select: u32 = page_select.trim().parse().unwrap(); while page_select > max_page.parse::().unwrap() { println!("Please select a page go to"); println!("1..{}", max_page); let mut page_select = String::new(); std::io::stdin().read_line(&mut page_select).unwrap(); } get_ln_next_page(&ln_id, &page_select.to_string()) } fn get_ln_text(chapter_url: &str) -> Vec { let mut resp = isahc::get(chapter_url).unwrap(); let html = String::from(resp.text().unwrap()); let re = Regex::new(r#"(?m)

(.*?)

"#).unwrap(); let mut ln_text:Vec = Vec::new(); for cap in re.captures_iter(&html) { ln_text.push(cap.get(1).unwrap().as_str().trim().to_string()); } // remove last 3 indexes of ln_text ln_text.truncate(ln_text.len() - 3); let ln_text = fix_html_encoding(&ln_text); ln_text }