Update zippyshare resolver 2022-07-17

This commit is contained in:
Daniel M 2022-07-17 23:24:11 +02:00
parent e7dca54b2b
commit 33d772c9e3

View File

@ -9,6 +9,78 @@ pub fn is_zippyshare_url(url: &str) -> bool {
.is_match(url)
}
pub async fn resolve_link(url: &str) -> Result<String> {
// Regex to check if the provided url is a zippyshare download url
let re = Regex::new(r"^(https?://(?:www\d*\.)?zippyshare\.com)/v/[0-9a-zA-Z]+/file\.html$")?;
if !re.is_match(url) {
return Err(Error::new(ErrorKind::Other, "URL is not a zippyshare url").into());
}
// Extract the hostname (with https:// prefix) for later
let host = &re.captures(url).unwrap()[1];
// Download the html body for the download page
let body = reqwest::get(url).await?.text().await?;
// Try to extract the link using the latest extractor
let link = extract_dl_link_2022_07_17(&host, &body).await;
// Try the previous extractor as fallback if it didn't work
let link = match link {
Err(_) => extract_dl_link_2022_03_07(&host, &body).await,
ok => ok,
};
link
}
/*
Updated: 17.07.2022
Link generation code:
- `var a = $1`
- $1 is the only variable that actually changes
- effectively: `var b = "asdasd".substr(0, 3).length` seems to be fixed
- evaluates to: `var b = 3`
- `document.getElementById('dlbutton').href = "/d/0Ky7p1C6/"+(Math.pow(a, 3)+b)+"/some-file-name.part1.rar"`
- evaluates to: `href = "/d/0Ky7p1C6/"+(Math.pow(a, 3)+3)+"/some-file-name.part1.rar"`
```
var a = 114;
document.getElementById('dlbutton').omg = "asdasd".substr(0, 3);
var b = document.getElementById('dlbutton').omg.length;
document.getElementById('dlbutton').href = "/d/0Ky7p1C6/"+(Math.pow(a, 3)+b)+"/some-file-name.part1.rar";
```
*/
pub async fn extract_dl_link_2022_07_17(host: &str, body: &str) -> Result<String> {
let re_var_a = Regex::new(
r#"var a = (\d+);"#
)?;
// Regex to match the javascript part of the html that generates the real download link
let re_link = Regex::new(
r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)"\+\(Math\.pow\(a, 3\)\+b\)\+"(.+)";"#,
)?;
let cap_var_a = match re_var_a.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Var a not found").into()),
};
let cap_link = match re_link.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
let url_start = &cap_link[1];
let url_end = &cap_link[2];
let var_a: i64 = cap_var_a[1].parse()?;
let middle = var_a.pow(3) + 3;
let dl_url = format!("{}{}{}{}", &host, url_start, middle, url_end);
Ok(dl_url)
}
/*
Updated: 07.03.2022
Link generation code:
@ -23,19 +95,7 @@ Link generation code:
document.getElementById('dlbutton').href = "/d/0Ky7p1C6/" + (186549 % 51245 + 186549 % 913) + "/some-file-name.part1.rar";
```
*/
pub async fn resolve_link(url: &str) -> Result<String> {
// Regex to check if the provided url is a zippyshare download url
let re = Regex::new(r"(https://www\d*\.zippyshare\.com)")?;
if !re.is_match(url) {
return Err(Error::new(ErrorKind::Other, "URL is not a zippyshare url").into());
}
// Extract the hostname (with https:// prefix) for later
let base_host = &re.captures(url).unwrap()[0];
// Download the html body for the download page
let body = reqwest::get(url).await?.text().await?;
pub async fn extract_dl_link_2022_03_07(host: &str, body: &str) -> Result<String> {
// Regex to match the javascript part of the html that generates the real download link
let re_link = Regex::new(
r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)" \+ \((\d+) % (\d+) \+ \d+ % (\d+)\) \+ "(.+)";"#,
@ -55,7 +115,7 @@ pub async fn resolve_link(url: &str) -> Result<String> {
let mixed = n2 % n3 + n4 % n5;
let dl_url = format!("{}{}{}{}", &base_host, url_start, mixed, url_end);
let dl_url = format!("{}{}{}{}", &host, url_start, mixed, url_end);
Ok(dl_url)
}