diff --git a/Cargo.lock b/Cargo.lock index c7db5a9..b0e2f44 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -64,6 +64,12 @@ version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + [[package]] name = "cfg-if" version = "1.0.0" @@ -163,6 +169,25 @@ dependencies = [ "winapi", ] +[[package]] +name = "ducc" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41bc1f8a30712eb6a7454f85747f218d9dfb41d173bb223a8c4f18daff829207" +dependencies = [ + "cesu8", + "ducc-sys", +] + +[[package]] +name = "ducc-sys" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cdea834bf6a0fde522374db4404695c5f0465fc0ee814f2878d76eaabd4ffed" +dependencies = [ + "cc", +] + [[package]] name = "encoding_rs" version = "0.8.31" @@ -189,6 +214,7 @@ dependencies = [ "chrono", "clap", "crossterm", + "ducc", "futures", "percent-encoding", "regex", diff --git a/Cargo.toml b/Cargo.toml index abf3bd8..3b9479f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,3 +16,4 @@ chrono = "0.4.19" thiserror = "1.0.30" anyhow = "1.0.57" crossterm = "0.24.0" +ducc = "0.1.5" diff --git a/src/integrations/zippy.rs b/src/integrations/zippy.rs index 425367f..93e05f8 100644 --- a/src/integrations/zippy.rs +++ b/src/integrations/zippy.rs @@ -1,17 +1,18 @@ use std::io::{Error, ErrorKind}; use anyhow::Result; +use ducc::Ducc; use regex::Regex; pub fn is_zippyshare_url(url: &str) -> bool { - Regex::new(r"^https?://(?:www\d*\.)?zippyshare\.com/v/[0-9a-zA-Z]+/file\.html$") + Regex::new(r#"^https?://(?:www\d*\.)?zippyshare\.com/v/[0-9a-zA-Z]+/file\.html$"#) .unwrap() .is_match(url) } pub async fn resolve_link(url: &str) -> Result { // Regex to check if the provided url is a zippyshare download url - let re = Regex::new(r"^(https?://(?:www\d*\.)?zippyshare\.com)/v/[0-9a-zA-Z]+/file\.html$")?; + let re = Regex::new(r#"^(https?://(?:www\d*\.)?zippyshare\.com)/v/[0-9a-zA-Z]+/file\.html$"#)?; if !re.is_match(url) { return Err(Error::new(ErrorKind::Other, "URL is not a zippyshare url").into()); } @@ -22,224 +23,50 @@ pub async fn resolve_link(url: &str) -> Result { // Download the html body for the download page let body = reqwest::get(url).await?.text().await?; - // Try to extract the link using the latest extractor - let link = extract_dl_link_2022_08_16(&host, &body).await; + let re_script = + Regex::new(r#"(?ms)(.*getElementById\('dlbutton'\).*?)"#).unwrap(); + let re_script_start = Regex::new(r#"(?ms)"#).unwrap(); - // Try the previous extractors as fallback if it didn't work - let link = match link { - Err(_) => extract_dl_link_2022_07_24(&host, &body).await, - ok => ok, - }; - let link = match link { - Err(_) => extract_dl_link_2022_07_17(&host, &body).await, - ok => ok, - }; - let link = match link { - Err(_) => extract_dl_link_2022_03_07(&host, &body).await, - ok => ok, - }; - - link -} - -/* -Updated: 16.08.2022 -Link generation code: -- `a` and `b` are random -- `omg` is always `f` -- the number used in the middle part `XXX%b` seems to be always the same as `a` - -``` -var a = 634851; -var b = 958673; -document.getElementById('dlbutton').omg = "f"; -if (document.getElementById('dlbutton').omg != 'f') { - a = Math.ceil(a/3); -} else { - a = Math.floor(a/3); -} -document.getElementById('dlbutton').href = "/d/gue47sk7/"+(a + 634851%b)+"/some-file-name.part1.rar"; -``` - */ -pub async fn extract_dl_link_2022_08_16(host: &str, body: &str) -> Result { - let re_a = Regex::new(r#"var a = (\d+);"#)?; - let re_b = Regex::new(r#"var b = (\d+);"#)?; - - let re_link = Regex::new( - r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)"\+\(a \+ (\d+)%b\)\+"(.+)";"#, - )?; - - if !body.contains( - r#"document.getElementById('dlbutton').omg = "f"; - if (document.getElementById('dlbutton').omg != 'f') { - a = Math.ceil(a/3); - } else { - a = Math.floor(a/3); - }"#, - ) { - return Err(Error::new(ErrorKind::Other, "omg part of the link-gen not found").into()); - } - - let cap_a = match re_a.captures(body) { + // Extract the script. This will end at the correct script end, but has stuff before the start + let cap_tmp = match re_script.captures(&body) { Some(cap) => cap, None => return Err(Error::new(ErrorKind::Other, "Link not found").into()), }; + let temp = &cap_tmp[1]; - let cap_b = match re_b.captures(body) { + // Find the correct script start + let pos_script_start = match re_script_start.find_iter(&temp).last() { Some(cap) => cap, None => return Err(Error::new(ErrorKind::Other, "Link not found").into()), }; + // Cut off the beginning to get only the script contents + let raw_script = &temp[pos_script_start.end()..]; - let cap_link = match re_link.captures(body) { - Some(cap) => cap, - None => return Err(Error::new(ErrorKind::Other, "Link not found").into()), - }; + // Preprocess the script + let script = preprocess_js(raw_script); - let a: i64 = cap_a[1].parse()?; - let b: i64 = cap_b[1].parse()?; + // Calculate the link + let link = eval_js_link_calculation(&script) + .map_err(|_| Error::new(ErrorKind::Other, "Link not found: JS eval error"))?; - let url_start = &cap_link[1]; - let n1: i64 = cap_link[2].parse()?; - let url_end = &cap_link[3]; - - let middle = (a / 3) + n1 % b; - - let dl_url = format!("{}{}{}{}", &host, url_start, middle, url_end); - - Ok(dl_url) + let url = format!("{}{}", host, link); + Ok(url) } -/* -Updated: 24.07.2022 -Link generation code: +fn preprocess_js(js_src: &str) -> String { + let mut processed_src = js_src + .replace("document.getElementById('dlbutton').href", "href") + .replace("document.getElementById('fimage')", "false") + // Fix for antiscrape 24.07.2022 + .replace("document.getElementById('omg').getAttribute('class')", "2") + // Fix for antiscrape 16.08.2022 + .replace("document.getElementById('dlbutton').omg", "omg"); -``` - -