refactor(wip): refactor scraper

This commit is contained in:
newbee1905
2023-02-05 03:25:06 +07:00
parent 3ebb06cc1b
commit 7b605f40b8
8 changed files with 50 additions and 36 deletions

1
Cargo.lock generated
View File

@@ -629,6 +629,7 @@ dependencies = [
"crossterm 0.24.0",
"dirs",
"isahc",
"lazy_static",
"regex",
"rust_cast",
"serde_json",

View File

@@ -20,3 +20,4 @@ crossterm = "0.24.0"
unicode-width = "0.1.9"
rust_cast = "0.17.0"
viuer = { version = "0.6", features = ["sixel"] }
lazy_static = "1.4.0"

View File

@@ -3,43 +3,39 @@ use isahc::{ReadResponseExt, Request, RequestExt};
use std::fs::File;
use std::io::prelude::*;
use crate::helpers::{fixing_text::htmlise, scraper};
//use serde_json::json;
pub fn search_anime(query: String) -> (Vec<String>, Vec<String>, Vec<String>) {
let req = Request::builder()
.uri(format!(
"https://api.consumet.org/meta/anilist/{}",
query
.replace(" ", "%20")
.replace(":", "%3A")
.replace("!", "%21")
.replace("?", "%3F")
.replace("'", "%27")
htmlise(query)
))
.redirect_policy(isahc::config::RedirectPolicy::Follow)
.header(
"user-agent",
"Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/100.0",
)
.header("user-agent", *scraper::USER_AGENT)
.body(())
.unwrap();
let json = req.send().unwrap().text().unwrap();
let json: serde_json::Value = serde_json::from_str(&json).unwrap();
let mut titles = Vec::new();
let mut ids = Vec::new();
let mut images = Vec::new();
for i in 0..json["results"].as_array().unwrap().len() {
for anime in json["results"].as_array().unwrap().iter() {
titles.push(
json["results"][i]["title"]["userPreferred"]
anime["title"]["userPreferred"]
.as_str()
.unwrap()
.to_string(),
);
ids.push(json["results"][i]["id"].as_str().unwrap().to_string());
//convert ids to i32
images.push(json["results"][i]["image"].as_str().unwrap().to_string());
ids.push(anime["id"].as_str().unwrap().to_string());
images.push(anime["image"].as_str().unwrap().to_string());
}
(ids, titles, images)
}
@@ -50,19 +46,18 @@ pub fn get_episodes(id: &i32, provider: &str) -> (Vec<String>, Vec<String>) {
id, provider
))
.redirect_policy(isahc::config::RedirectPolicy::Follow)
.header(
"user-agent",
"Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/100.0",
)
.header("user-agent", *scraper::USER_AGENT)
.body(())
.unwrap();
let json = req.send().unwrap().text().unwrap();
let json: serde_json::Value = serde_json::from_str(&json).unwrap();
let mut titles = Vec::new();
let mut ids = Vec::new();
for i in 0..json["episodes"].as_array().unwrap().len() {
titles.push(json["episodes"][i]["title"].as_str().unwrap().to_string());
ids.push(json["episodes"][i]["id"].as_str().unwrap().to_string());
for episode in json["episodes"].as_array().unwrap().iter() {
titles.push(episode["title"].as_str().unwrap().to_string());
ids.push(episode["id"].as_str().unwrap().to_string());
}
(titles, ids)
}
@@ -74,33 +69,31 @@ pub fn get_episode_link(ep_id: &str, provider: &str) -> (String, String) {
ep_id, provider
))
.redirect_policy(isahc::config::RedirectPolicy::Follow)
.header(
"user-agent",
"Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/100.0",
)
.header("user-agent", *scraper::USER_AGENT)
.body(())
.unwrap();
let json = req.send().unwrap().text().unwrap();
let json: serde_json::Value = serde_json::from_str(&json).unwrap();
let mut url = String::new();
std::fs::write("test.json", json.to_string()).unwrap();
let mut subtitle = String::new();
let _error_vec = Vec::new();
let sub_array = json["subtitles"].as_array().unwrap_or(&_error_vec);
for i in 0..sub_array.len() {
for sub in sub_array.iter() {
//set subtitle to lang = English
if json["subtitles"][i]["lang"].as_str().unwrap_or("null") == "English" {
subtitle = json["subtitles"][i]["url"]
.as_str()
.unwrap_or("null")
.to_string();
if sub["lang"].as_str().unwrap_or("null") == "English" {
subtitle = sub["url"].as_str().unwrap_or("null").to_string();
// add \ before the first : in the url
subtitle = subtitle.replace(":", "\\:");
}
}
let mut highest_quality = 0;
for i in 0..json["sources"].as_array().unwrap().len() {
let quality = json["sources"][i]["quality"]
for source in json["sources"].as_array().unwrap().iter() {
let quality = source["quality"]
.as_str()
.unwrap()
.replace("p", "")
@@ -108,10 +101,10 @@ pub fn get_episode_link(ep_id: &str, provider: &str) -> (String, String) {
.unwrap_or(0);
if quality > highest_quality {
highest_quality = quality;
url = json["sources"][i]["url"].as_str().unwrap().to_string();
url = source["url"].as_str().unwrap().to_string();
}
}
(url.to_string(), subtitle)
(url, subtitle)
}
pub fn get_image(url: &str, path: &str) {

View File

@@ -59,3 +59,12 @@ pub fn fix_html_encoding(ln_text: &Vec<String>) -> Vec<String> {
}
ln_text_new
}
pub fn htmlise(query: String) -> String {
query
.replace(" ", "%20")
.replace(":", "%3A")
.replace("!", "%21")
.replace("?", "%3F")
.replace("'", "%27")
}

View File

@@ -1,2 +1,3 @@
pub mod fixing_text;
pub mod scraper;
pub mod take_input;

4
src/helpers/scraper.rs Normal file
View File

@@ -0,0 +1,4 @@
lazy_static! {
pub static ref USER_AGENT: &'static str =
"Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/100.0";
}

View File

@@ -3,6 +3,9 @@ mod helpers;
mod ln;
mod ui;
#[macro_use]
extern crate lazy_static;
use anime::anime::anime_ui;
use colored::Colorize;
//use ln::ui::ln_ui;
@@ -11,6 +14,7 @@ use ln::ln::ln_ui;
use crate::anime::trackers::*;
use crate::get_token;
use crate::helpers::take_input::{int_input, string_input};
fn main() {
let mut help = false;
let mut anime = false;

1
test.json Normal file
View File

@@ -0,0 +1 @@
{"message":"Something went wrong. Contact developer for help."}