Implement zippy resolver with js evaluation
- Add ducc crate (duktape bindings) as javascript engine - Extract the script tag from zippyshare page contents - Preprocess the script and execute the js to get the link - This removes the need for full regex based implementations for each antiscrape challenge
This commit is contained in:
parent
bc2d312ce9
commit
f9659246ca
26
Cargo.lock
generated
26
Cargo.lock
generated
@ -64,6 +64,12 @@ version = "1.0.73"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
|
||||
|
||||
[[package]]
|
||||
name = "cesu8"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
@ -163,6 +169,25 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ducc"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41bc1f8a30712eb6a7454f85747f218d9dfb41d173bb223a8c4f18daff829207"
|
||||
dependencies = [
|
||||
"cesu8",
|
||||
"ducc-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ducc-sys"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0cdea834bf6a0fde522374db4404695c5f0465fc0ee814f2878d76eaabd4ffed"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.31"
|
||||
@ -189,6 +214,7 @@ dependencies = [
|
||||
"chrono",
|
||||
"clap",
|
||||
"crossterm",
|
||||
"ducc",
|
||||
"futures",
|
||||
"percent-encoding",
|
||||
"regex",
|
||||
|
||||
@ -16,3 +16,4 @@ chrono = "0.4.19"
|
||||
thiserror = "1.0.30"
|
||||
anyhow = "1.0.57"
|
||||
crossterm = "0.24.0"
|
||||
ducc = "0.1.5"
|
||||
|
||||
@ -1,17 +1,18 @@
|
||||
use std::io::{Error, ErrorKind};
|
||||
|
||||
use anyhow::Result;
|
||||
use ducc::Ducc;
|
||||
use regex::Regex;
|
||||
|
||||
pub fn is_zippyshare_url(url: &str) -> bool {
|
||||
Regex::new(r"^https?://(?:www\d*\.)?zippyshare\.com/v/[0-9a-zA-Z]+/file\.html$")
|
||||
Regex::new(r#"^https?://(?:www\d*\.)?zippyshare\.com/v/[0-9a-zA-Z]+/file\.html$"#)
|
||||
.unwrap()
|
||||
.is_match(url)
|
||||
}
|
||||
|
||||
pub async fn resolve_link(url: &str) -> Result<String> {
|
||||
// Regex to check if the provided url is a zippyshare download url
|
||||
let re = Regex::new(r"^(https?://(?:www\d*\.)?zippyshare\.com)/v/[0-9a-zA-Z]+/file\.html$")?;
|
||||
let re = Regex::new(r#"^(https?://(?:www\d*\.)?zippyshare\.com)/v/[0-9a-zA-Z]+/file\.html$"#)?;
|
||||
if !re.is_match(url) {
|
||||
return Err(Error::new(ErrorKind::Other, "URL is not a zippyshare url").into());
|
||||
}
|
||||
@ -22,224 +23,50 @@ pub async fn resolve_link(url: &str) -> Result<String> {
|
||||
// Download the html body for the download page
|
||||
let body = reqwest::get(url).await?.text().await?;
|
||||
|
||||
// Try to extract the link using the latest extractor
|
||||
let link = extract_dl_link_2022_08_16(&host, &body).await;
|
||||
let re_script =
|
||||
Regex::new(r#"(?ms)<script.*?>(.*getElementById\('dlbutton'\).*?)</script>"#).unwrap();
|
||||
let re_script_start = Regex::new(r#"(?ms)<script.*?>"#).unwrap();
|
||||
|
||||
// Try the previous extractors as fallback if it didn't work
|
||||
let link = match link {
|
||||
Err(_) => extract_dl_link_2022_07_24(&host, &body).await,
|
||||
ok => ok,
|
||||
};
|
||||
let link = match link {
|
||||
Err(_) => extract_dl_link_2022_07_17(&host, &body).await,
|
||||
ok => ok,
|
||||
};
|
||||
let link = match link {
|
||||
Err(_) => extract_dl_link_2022_03_07(&host, &body).await,
|
||||
ok => ok,
|
||||
};
|
||||
|
||||
link
|
||||
}
|
||||
|
||||
/*
|
||||
Updated: 16.08.2022
|
||||
Link generation code:
|
||||
- `a` and `b` are random
|
||||
- `omg` is always `f`
|
||||
- the number used in the middle part `XXX%b` seems to be always the same as `a`
|
||||
|
||||
```
|
||||
var a = 634851;
|
||||
var b = 958673;
|
||||
document.getElementById('dlbutton').omg = "f";
|
||||
if (document.getElementById('dlbutton').omg != 'f') {
|
||||
a = Math.ceil(a/3);
|
||||
} else {
|
||||
a = Math.floor(a/3);
|
||||
}
|
||||
document.getElementById('dlbutton').href = "/d/gue47sk7/"+(a + 634851%b)+"/some-file-name.part1.rar";
|
||||
```
|
||||
*/
|
||||
pub async fn extract_dl_link_2022_08_16(host: &str, body: &str) -> Result<String> {
|
||||
let re_a = Regex::new(r#"var a = (\d+);"#)?;
|
||||
let re_b = Regex::new(r#"var b = (\d+);"#)?;
|
||||
|
||||
let re_link = Regex::new(
|
||||
r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)"\+\(a \+ (\d+)%b\)\+"(.+)";"#,
|
||||
)?;
|
||||
|
||||
if !body.contains(
|
||||
r#"document.getElementById('dlbutton').omg = "f";
|
||||
if (document.getElementById('dlbutton').omg != 'f') {
|
||||
a = Math.ceil(a/3);
|
||||
} else {
|
||||
a = Math.floor(a/3);
|
||||
}"#,
|
||||
) {
|
||||
return Err(Error::new(ErrorKind::Other, "omg part of the link-gen not found").into());
|
||||
}
|
||||
|
||||
let cap_a = match re_a.captures(body) {
|
||||
// Extract the script. This will end at the correct script end, but has stuff before the start
|
||||
let cap_tmp = match re_script.captures(&body) {
|
||||
Some(cap) => cap,
|
||||
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
|
||||
};
|
||||
let temp = &cap_tmp[1];
|
||||
|
||||
let cap_b = match re_b.captures(body) {
|
||||
// Find the correct script start
|
||||
let pos_script_start = match re_script_start.find_iter(&temp).last() {
|
||||
Some(cap) => cap,
|
||||
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
|
||||
};
|
||||
// Cut off the beginning to get only the script contents
|
||||
let raw_script = &temp[pos_script_start.end()..];
|
||||
|
||||
let cap_link = match re_link.captures(body) {
|
||||
Some(cap) => cap,
|
||||
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
|
||||
};
|
||||
// Preprocess the script
|
||||
let script = preprocess_js(raw_script);
|
||||
|
||||
let a: i64 = cap_a[1].parse()?;
|
||||
let b: i64 = cap_b[1].parse()?;
|
||||
// Calculate the link
|
||||
let link = eval_js_link_calculation(&script)
|
||||
.map_err(|_| Error::new(ErrorKind::Other, "Link not found: JS eval error"))?;
|
||||
|
||||
let url_start = &cap_link[1];
|
||||
let n1: i64 = cap_link[2].parse()?;
|
||||
let url_end = &cap_link[3];
|
||||
|
||||
let middle = (a / 3) + n1 % b;
|
||||
|
||||
let dl_url = format!("{}{}{}{}", &host, url_start, middle, url_end);
|
||||
|
||||
Ok(dl_url)
|
||||
let url = format!("{}{}", host, link);
|
||||
Ok(url)
|
||||
}
|
||||
|
||||
/*
|
||||
Updated: 24.07.2022
|
||||
Link generation code:
|
||||
fn preprocess_js(js_src: &str) -> String {
|
||||
let mut processed_src = js_src
|
||||
.replace("document.getElementById('dlbutton').href", "href")
|
||||
.replace("document.getElementById('fimage')", "false")
|
||||
// Fix for antiscrape 24.07.2022
|
||||
.replace("document.getElementById('omg').getAttribute('class')", "2")
|
||||
// Fix for antiscrape 16.08.2022
|
||||
.replace("document.getElementById('dlbutton').omg", "omg");
|
||||
|
||||
```
|
||||
<span id="omg" class="2" style="display:none;"></span>
|
||||
<script type="text/javascript">
|
||||
var a = function() {return 1};
|
||||
var b = function() {return a() + 1};
|
||||
var c = function() {return b() + 1};
|
||||
var d = document.getElementById('omg').getAttribute('class');
|
||||
if (true) { d = d*2;}
|
||||
document.getElementById('dlbutton').href = "/d/gue47sk7/"+(34556%1000 + a() + b() + c() + d + 5/5)+"/some-file-name.part1.rar";
|
||||
```
|
||||
*/
|
||||
pub async fn extract_dl_link_2022_07_24(host: &str, body: &str) -> Result<String> {
|
||||
let re_link = Regex::new(
|
||||
r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)"\+\((\d+)%1000 \+ a\(\) \+ b\(\) \+ c\(\) \+ d \+ 5/5\)\+"(.+)";"#,
|
||||
)?;
|
||||
|
||||
if !body.contains(r#"<span id="omg" class="2" style="display:none;"></span>"#) {
|
||||
return Err(Error::new(ErrorKind::Other, "span part of the link-gen not found").into());
|
||||
}
|
||||
|
||||
if !body.contains(
|
||||
r#"var a = function() {return 1};
|
||||
var b = function() {return a() + 1};
|
||||
var c = function() {return b() + 1};
|
||||
var d = document.getElementById('omg').getAttribute('class');
|
||||
if (true) { d = d*2;}"#,
|
||||
) {
|
||||
return Err(Error::new(ErrorKind::Other, "script part of the link-gen not found").into());
|
||||
}
|
||||
|
||||
let cap_link = match re_link.captures(&body) {
|
||||
Some(cap) => cap,
|
||||
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
|
||||
};
|
||||
|
||||
let url_start = &cap_link[1];
|
||||
let n1: u64 = cap_link[2].parse()?;
|
||||
let url_end = &cap_link[3];
|
||||
|
||||
let middle = n1 % 1000 + 11;
|
||||
|
||||
let dl_url = format!("{}{}{}{}", &host, url_start, middle, url_end);
|
||||
|
||||
Ok(dl_url)
|
||||
processed_src.push_str(";href");
|
||||
processed_src
|
||||
}
|
||||
|
||||
/*
|
||||
Updated: 17.07.2022
|
||||
Link generation code:
|
||||
- `var a = $1`
|
||||
- $1 is the only variable that actually changes
|
||||
- effectively: `var b = "asdasd".substr(0, 3).length` seems to be fixed
|
||||
- evaluates to: `var b = 3`
|
||||
- `document.getElementById('dlbutton').href = "/d/0Ky7p1C6/"+(Math.pow(a, 3)+b)+"/some-file-name.part1.rar"`
|
||||
- evaluates to: `href = "/d/0Ky7p1C6/"+(Math.pow(a, 3)+3)+"/some-file-name.part1.rar"`
|
||||
|
||||
```
|
||||
var a = 114;
|
||||
document.getElementById('dlbutton').omg = "asdasd".substr(0, 3);
|
||||
var b = document.getElementById('dlbutton').omg.length;
|
||||
document.getElementById('dlbutton').href = "/d/0Ky7p1C6/"+(Math.pow(a, 3)+b)+"/some-file-name.part1.rar";
|
||||
```
|
||||
*/
|
||||
pub async fn extract_dl_link_2022_07_17(host: &str, body: &str) -> Result<String> {
|
||||
let re_var_a = Regex::new(r#"var a = (\d+);"#)?;
|
||||
|
||||
// Regex to match the javascript part of the html that generates the real download link
|
||||
let re_link = Regex::new(
|
||||
r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)"\+\(Math\.pow\(a, 3\)\+b\)\+"(.+)";"#,
|
||||
)?;
|
||||
|
||||
let cap_var_a = match re_var_a.captures(&body) {
|
||||
Some(cap) => cap,
|
||||
None => return Err(Error::new(ErrorKind::Other, "Var a not found").into()),
|
||||
};
|
||||
|
||||
let cap_link = match re_link.captures(&body) {
|
||||
Some(cap) => cap,
|
||||
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
|
||||
};
|
||||
|
||||
let url_start = &cap_link[1];
|
||||
let url_end = &cap_link[2];
|
||||
let var_a: i64 = cap_var_a[1].parse()?;
|
||||
|
||||
let middle = var_a.pow(3) + 3;
|
||||
|
||||
let dl_url = format!("{}{}{}{}", &host, url_start, middle, url_end);
|
||||
|
||||
Ok(dl_url)
|
||||
}
|
||||
|
||||
/*
|
||||
Updated: 07.03.2022
|
||||
Link generation code:
|
||||
- `href = $1 + ($2 % $3 + $4 % $5) + $6`
|
||||
- `$1` is always `/d/XXX` where XXX is dependent on the file
|
||||
- `$2`, `$3`, `$4` and `$5` are dynamic and randomly generated on each reload
|
||||
- `$2` is always the same as `$4`
|
||||
- `$6` is dependent on the file
|
||||
- The numbers in the calculation part ($2`, `$3`, `$4` and `$5`) are hard coded
|
||||
|
||||
```
|
||||
document.getElementById('dlbutton').href = "/d/0Ky7p1C6/" + (186549 % 51245 + 186549 % 913) + "/some-file-name.part1.rar";
|
||||
```
|
||||
*/
|
||||
pub async fn extract_dl_link_2022_03_07(host: &str, body: &str) -> Result<String> {
|
||||
// Regex to match the javascript part of the html that generates the real download link
|
||||
let re_link = Regex::new(
|
||||
r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)" \+ \((\d+) % (\d+) \+ \d+ % (\d+)\) \+ "(.+)";"#,
|
||||
)?;
|
||||
|
||||
let cap_link = match re_link.captures(&body) {
|
||||
Some(cap) => cap,
|
||||
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
|
||||
};
|
||||
|
||||
let url_start = &cap_link[1];
|
||||
let url_end = &cap_link[5];
|
||||
let n2: i32 = cap_link[2].parse()?;
|
||||
let n3: i32 = cap_link[3].parse()?;
|
||||
let n4 = n2;
|
||||
let n5: i32 = cap_link[4].parse()?;
|
||||
|
||||
let mixed = n2 % n3 + n4 % n5;
|
||||
|
||||
let dl_url = format!("{}{}{}{}", &host, url_start, mixed, url_end);
|
||||
|
||||
Ok(dl_url)
|
||||
fn eval_js_link_calculation(js_src: &str) -> ducc::Result<String> {
|
||||
let ducc = Ducc::new();
|
||||
ducc.exec(js_src, None, Default::default())
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user