ffdl/src/integrations/zippy.rs
2022-08-16 21:42:08 +02:00

246 lines
8.0 KiB
Rust

use std::io::{Error, ErrorKind};
use anyhow::Result;
use regex::Regex;
pub fn is_zippyshare_url(url: &str) -> bool {
Regex::new(r"^https?://(?:www\d*\.)?zippyshare\.com/v/[0-9a-zA-Z]+/file\.html$")
.unwrap()
.is_match(url)
}
pub async fn resolve_link(url: &str) -> Result<String> {
// Regex to check if the provided url is a zippyshare download url
let re = Regex::new(r"^(https?://(?:www\d*\.)?zippyshare\.com)/v/[0-9a-zA-Z]+/file\.html$")?;
if !re.is_match(url) {
return Err(Error::new(ErrorKind::Other, "URL is not a zippyshare url").into());
}
// Extract the hostname (with https:// prefix) for later
let host = &re.captures(url).unwrap()[1];
// Download the html body for the download page
let body = reqwest::get(url).await?.text().await?;
// Try to extract the link using the latest extractor
let link = extract_dl_link_2022_08_16(&host, &body).await;
// Try the previous extractors as fallback if it didn't work
let link = match link {
Err(_) => extract_dl_link_2022_07_24(&host, &body).await,
ok => ok,
};
let link = match link {
Err(_) => extract_dl_link_2022_07_17(&host, &body).await,
ok => ok,
};
let link = match link {
Err(_) => extract_dl_link_2022_03_07(&host, &body).await,
ok => ok,
};
link
}
/*
Updated: 16.08.2022
Link generation code:
- `a` and `b` are random
- `omg` is always `f`
- the number used in the middle part `XXX%b` seems to be always the same as `a`
```
var a = 634851;
var b = 958673;
document.getElementById('dlbutton').omg = "f";
if (document.getElementById('dlbutton').omg != 'f') {
a = Math.ceil(a/3);
} else {
a = Math.floor(a/3);
}
document.getElementById('dlbutton').href = "/d/gue47sk7/"+(a + 634851%b)+"/some-file-name.part1.rar";
```
*/
pub async fn extract_dl_link_2022_08_16(host: &str, body: &str) -> Result<String> {
let re_a = Regex::new(r#"var a = (\d+);"#)?;
let re_b = Regex::new(r#"var b = (\d+);"#)?;
let re_link = Regex::new(
r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)"\+\(a \+ (\d+)%b\)\+"(.+)";"#,
)?;
if !body.contains(
r#"document.getElementById('dlbutton').omg = "f";
if (document.getElementById('dlbutton').omg != 'f') {
a = Math.ceil(a/3);
} else {
a = Math.floor(a/3);
}"#,
) {
return Err(Error::new(ErrorKind::Other, "omg part of the link-gen not found").into());
}
let cap_a = match re_a.captures(body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
let cap_b = match re_b.captures(body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
let cap_link = match re_link.captures(body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
let a: i64 = cap_a[1].parse()?;
let b: i64 = cap_b[1].parse()?;
let url_start = &cap_link[1];
let n1: i64 = cap_link[2].parse()?;
let url_end = &cap_link[3];
let middle = (a / 3) + n1 % b;
let dl_url = format!("{}{}{}{}", &host, url_start, middle, url_end);
Ok(dl_url)
}
/*
Updated: 24.07.2022
Link generation code:
```
<span id="omg" class="2" style="display:none;"></span>
<script type="text/javascript">
var a = function() {return 1};
var b = function() {return a() + 1};
var c = function() {return b() + 1};
var d = document.getElementById('omg').getAttribute('class');
if (true) { d = d*2;}
document.getElementById('dlbutton').href = "/d/gue47sk7/"+(34556%1000 + a() + b() + c() + d + 5/5)+"/some-file-name.part1.rar";
```
*/
pub async fn extract_dl_link_2022_07_24(host: &str, body: &str) -> Result<String> {
let re_link = Regex::new(
r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)"\+\((\d+)%1000 \+ a\(\) \+ b\(\) \+ c\(\) \+ d \+ 5/5\)\+"(.+)";"#,
)?;
if !body.contains(r#"<span id="omg" class="2" style="display:none;"></span>"#) {
return Err(Error::new(ErrorKind::Other, "span part of the link-gen not found").into());
}
if !body.contains(
r#"var a = function() {return 1};
var b = function() {return a() + 1};
var c = function() {return b() + 1};
var d = document.getElementById('omg').getAttribute('class');
if (true) { d = d*2;}"#,
) {
return Err(Error::new(ErrorKind::Other, "script part of the link-gen not found").into());
}
let cap_link = match re_link.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
let url_start = &cap_link[1];
let n1: u64 = cap_link[2].parse()?;
let url_end = &cap_link[3];
let middle = n1 % 1000 + 11;
let dl_url = format!("{}{}{}{}", &host, url_start, middle, url_end);
Ok(dl_url)
}
/*
Updated: 17.07.2022
Link generation code:
- `var a = $1`
- $1 is the only variable that actually changes
- effectively: `var b = "asdasd".substr(0, 3).length` seems to be fixed
- evaluates to: `var b = 3`
- `document.getElementById('dlbutton').href = "/d/0Ky7p1C6/"+(Math.pow(a, 3)+b)+"/some-file-name.part1.rar"`
- evaluates to: `href = "/d/0Ky7p1C6/"+(Math.pow(a, 3)+3)+"/some-file-name.part1.rar"`
```
var a = 114;
document.getElementById('dlbutton').omg = "asdasd".substr(0, 3);
var b = document.getElementById('dlbutton').omg.length;
document.getElementById('dlbutton').href = "/d/0Ky7p1C6/"+(Math.pow(a, 3)+b)+"/some-file-name.part1.rar";
```
*/
pub async fn extract_dl_link_2022_07_17(host: &str, body: &str) -> Result<String> {
let re_var_a = Regex::new(r#"var a = (\d+);"#)?;
// Regex to match the javascript part of the html that generates the real download link
let re_link = Regex::new(
r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)"\+\(Math\.pow\(a, 3\)\+b\)\+"(.+)";"#,
)?;
let cap_var_a = match re_var_a.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Var a not found").into()),
};
let cap_link = match re_link.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
let url_start = &cap_link[1];
let url_end = &cap_link[2];
let var_a: i64 = cap_var_a[1].parse()?;
let middle = var_a.pow(3) + 3;
let dl_url = format!("{}{}{}{}", &host, url_start, middle, url_end);
Ok(dl_url)
}
/*
Updated: 07.03.2022
Link generation code:
- `href = $1 + ($2 % $3 + $4 % $5) + $6`
- `$1` is always `/d/XXX` where XXX is dependent on the file
- `$2`, `$3`, `$4` and `$5` are dynamic and randomly generated on each reload
- `$2` is always the same as `$4`
- `$6` is dependent on the file
- The numbers in the calculation part ($2`, `$3`, `$4` and `$5`) are hard coded
```
document.getElementById('dlbutton').href = "/d/0Ky7p1C6/" + (186549 % 51245 + 186549 % 913) + "/some-file-name.part1.rar";
```
*/
pub async fn extract_dl_link_2022_03_07(host: &str, body: &str) -> Result<String> {
// Regex to match the javascript part of the html that generates the real download link
let re_link = Regex::new(
r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)" \+ \((\d+) % (\d+) \+ \d+ % (\d+)\) \+ "(.+)";"#,
)?;
let cap_link = match re_link.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
let url_start = &cap_link[1];
let url_end = &cap_link[5];
let n2: i32 = cap_link[2].parse()?;
let n3: i32 = cap_link[3].parse()?;
let n4 = n2;
let n5: i32 = cap_link[4].parse()?;
let mixed = n2 % n3 + n4 % n5;
let dl_url = format!("{}{}{}{}", &host, url_start, mixed, url_end);
Ok(dl_url)
}