Initial version

This commit is contained in:
Daniel M 2021-03-24 18:46:24 +01:00
commit a8474aab1e
8 changed files with 2114 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/target
/tmp
/list.lst

1249
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

16
Cargo.toml Normal file
View File

@ -0,0 +1,16 @@
[package]
name = "fdl"
version = "0.1.0"
authors = ["daniel m <danielm@dnml.de>"]
edition = "2018"
[dependencies]
tokio = { version = "1.2.0", features = [ "full" ] }
reqwest = { version = "0.11.2", features = [ "stream" ] }
futures = "0.3.12"
percent-encoding = "2.1.0"
regex = "1.4.3"
bytes = "1.0.1"
crossterm = "0.19.0"
clap = "2.33.3"
#futures-util = "0.3.13"

48
src/dlreport.rs Normal file
View File

@ -0,0 +1,48 @@
use tokio::sync::mpsc;
use crate::errors::*;
#[derive(Clone, Debug)]
pub enum DlStatus {
Init {
bytes_total: u64,
filename: String
},
Update {
speed_mbps: f64,
bytes_curr: u64
},
Done {
duration_ms: u64
}
}
#[derive(Clone, Debug)]
pub struct DlReport {
pub id: i32,
pub status: DlStatus
}
pub struct DlReporter {
id: i32,
transmitter: mpsc::UnboundedSender<DlReport>
}
impl DlReporter {
pub fn new(id: i32, transmitter: mpsc::UnboundedSender<DlReport>) -> DlReporter {
DlReporter {
id: id,
transmitter: transmitter
}
}
pub fn send(& self, status: DlStatus) -> ResBE<()> {
self.transmitter.send(
DlReport {
id: self.id,
status: status
}
).map_err(|e| e.into())
}
}

277
src/download.rs Normal file
View File

@ -0,0 +1,277 @@
use std::path::Path;
use tokio::io::AsyncWriteExt;
use std::time::SystemTime;
use percent_encoding::percent_decode_str;
use crate::errors::*;
use crate::dlreport::*;
struct RollingAverage {
index: usize,
data: Vec<f64>
}
impl RollingAverage {
fn new(size: usize) -> Self {
RollingAverage {
index: 0,
data: Vec::with_capacity(size)
}
}
fn value(&self) -> f64 {
if self.data.len() == 0 {
0.0
} else {
let sum: f64 = self.data.iter().sum();
sum / self.data.len() as f64
}
}
fn add(&mut self, val: f64) {
if self.data.capacity() == self.data.len() {
self.data[self.index] = val;
self.index += 1;
if self.index >= self.data.capacity() {
self.index = 0;
}
} else {
self.data.push(val);
}
}
}
/// Get the filename at the end of the given URL. This will decode the URL Encoding.
pub fn url_to_filename(url: &str) -> String {
let url_dec = percent_decode_str(&url).decode_utf8_lossy().to_owned().to_string();
let file_name = std::path::Path::new(&url_dec).file_name().unwrap().to_str().unwrap();
file_name.to_string()
}
#[allow(unused)]
pub async fn download(url: &str, into_file: &str) -> ResBE<()> {
let into_file = Path::new(into_file);
let mut resp = reqwest::Client::new()
.get(url)
.send().await?;
let mut ofile = tokio::fs::OpenOptions::new()
// Open in write mode
.write(true)
// Delete and overwrite the file
.truncate(true)
// Create the file if not existant
.create(true)
.open(into_file).await?;
// Read data from server as long as new data is available
while let Some(chunk) = resp.chunk().await? {
// Write the received data into the file
ofile.write_all(&chunk).await?;
}
// Ensure that IO is completed
ofile.flush().await?;
Ok(())
}
pub async fn download_feedback(url: &str, into_file: &str, rep: DlReporter) -> ResBE<()> {
let into_file = Path::new(into_file);
// Send the HTTP request to download the given link
let mut resp = reqwest::Client::new()
.get(url)
.send().await?;
// Error out if the server response is not success (something went wrong)
if !resp.status().is_success() {
return Err(DlError::BadHttpStatus.into());
}
// Get the content length for status update. If not present, error out cause
// without progress everything sucks anyways
let content_length = match resp.headers().get(reqwest::header::CONTENT_LENGTH) {
Some(cl) => cl.to_str()?.parse::<u64>()?,
None => return Err(DlError::ContentLengthUnknown.into())
};
// Open the local output file
let mut ofile = tokio::fs::OpenOptions::new()
// Open in write mode
.write(true)
// Delete and overwrite the file
.truncate(true)
// Create the file if not existant
.create(true)
.open(into_file).await?;
let filename = into_file.file_name().unwrap().to_str().unwrap();
// Report the download start
rep.send(
DlStatus::Init {
bytes_total: content_length,
filename: filename.to_string()
}
)?;
let mut curr_progress = 0;
let mut speed_mbps = 0.0;
let t_start = SystemTime::now();
let mut t_last_speed = SystemTime::now();
let mut last_bytecount = 0;
let mut average_speed = RollingAverage::new(5);
// Read data from server as long as new data is available
while let Some(chunk) = resp.chunk().await? {
// Write the received data into the file
ofile.write_all(&chunk).await?;
let datalen = chunk.len() as u64;
// Update progress
curr_progress += datalen;
// Update the number of bytes downloaded since the last report
last_bytecount += datalen;
// Update the reported download speed after every 10MB
if last_bytecount > 10_000_000 {
let t_elapsed = t_last_speed.elapsed()?.as_millis();
// Update rolling average
average_speed.add(
(last_bytecount as f64) / (1000.0 * t_elapsed as f64)
);
speed_mbps = average_speed.value();
// Reset the time and bytecount
last_bytecount = 0;
t_last_speed = SystemTime::now();
}
// Send status update report
rep.send(
DlStatus::Update {
speed_mbps: speed_mbps,
bytes_curr: curr_progress
}
)?;
}
// Ensure that IO is completed
ofile.flush().await?;
let duration_ms = t_start.elapsed()?.as_millis() as u64;
// Send report that the download is finished
rep.send(
DlStatus::Done {
duration_ms: duration_ms
}
)?;
Ok(())
}
pub async fn http_get_filesize_and_range_support(url: &str) -> ResBE<(u64, bool)> {
let resp = reqwest::Client::new()
.head(url)
.send().await?;
if let Some(filesize) = resp.headers().get(reqwest::header::CONTENT_LENGTH) {
if let Ok(val_str) = filesize.to_str() {
if let Ok(val) = val_str.parse::<u64>() {
let mut range_supported = false;
if let Some(range) = resp.headers().get(reqwest::header::ACCEPT_RANGES) {
if let Ok(range) = range.to_str() {
if range == "bytes" {
range_supported = true;
}
}
}
return Ok((val, range_supported));
}
}
}
Err(DlError::ContentLengthUnknown.into())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rolling_average() {
let mut ra = RollingAverage::new(3);
assert_eq!(0, ra.data.len());
assert_eq!(3, ra.data.capacity());
assert_eq!(0.0, ra.value());
// 10 / 1 = 10
ra.add(10.0);
assert_eq!(1, ra.data.len());
assert_eq!(10.0, ra.value());
// (10 + 20) / 2 = 15
ra.add(20.0);
assert_eq!(2, ra.data.len());
assert_eq!(15.0, ra.value());
// (10 + 20 + 30) / 3 = 20
ra.add(30.0);
assert_eq!(3, ra.data.len());
assert_eq!(20.0, ra.value());
assert_eq!(10.0, ra.data[0]);
assert_eq!(20.0, ra.data[1]);
assert_eq!(30.0, ra.data[2]);
// This should replace the oldest value (index 1)
ra.add(40.0);
assert_eq!(3, ra.data.len());
assert_eq!(3, ra.data.capacity());
// (40 + 20 + 30) / 3 = 30
assert_eq!(30.0, ra.value());
assert_eq!(40.0, ra.data[0]);
assert_eq!(20.0, ra.data[1]);
assert_eq!(30.0, ra.data[2]);
ra.add(50.0);
ra.add(60.0);
ra.add(70.0);
assert_eq!(70.0, ra.data[0]);
assert_eq!(50.0, ra.data[1]);
assert_eq!(60.0, ra.data[2]);
}
}

26
src/errors.rs Normal file
View File

@ -0,0 +1,26 @@
use std::error::Error;
use std::fmt::{ self, Display, Formatter };
/// Result Boxed Error
pub type ResBE<T> = Result<T, Box<dyn Error>>;
#[derive(Clone, Debug)]
pub enum DlError {
BadHttpStatus,
ContentLengthUnknown,
Other
}
impl Error for DlError {}
impl Display for DlError {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
DlError::BadHttpStatus => write!(f, "Bad http response status"),
DlError::ContentLengthUnknown => write!(f, "Content-Length is unknown"),
DlError::Other => write!(f, "Unknown download error")
}
}
}

404
src/main.rs Normal file
View File

@ -0,0 +1,404 @@
use std::path::Path;
use std::process::exit;
use clap::{ App, Arg, ArgGroup, crate_version };
use tokio::sync::mpsc;
use futures::future::join_all;
use std::time::SystemTime;
use std::io::BufRead;
use std::collections::HashMap;
use dlreport::{ DlReport, DlStatus, DlReporter };
use errors::ResBE;
mod zippy;
mod download;
mod errors;
mod dlreport;
#[tokio::main]
async fn main() -> ResBE<()> {
let arguments = App::new("FDL - Fast/File Downloader")
.version(crate_version!())
.about("Download files fast")
.arg(
Arg::with_name("outdir")
.short("o")
.long("outdir")
.value_name("OUTPUT DIR")
.takes_value(true)
.help("Set the output directory")
)
.arg(
Arg::with_name("numdl")
.short("n")
.long("numdl")
.value_name("NUMBER OF CONCURRENT DOWNLOADS")
.takes_value(true)
.help("Specify the number concurrent downloads")
)
.arg(
Arg::with_name("zippyshare")
.short("z")
.long("zippy")
.takes_value(false)
.help("The provided URLs are zippyshare URLs and need to be resolved")
)
.group(
ArgGroup::with_name("action")
.required(true)
)
.arg(
Arg::with_name("listfile")
.short("l")
.long("listfile")
.value_name("URL LIST")
.takes_value(true)
.group("action")
.help("Download all files form the specified url list")
)
.arg(
Arg::with_name("download")
.short("d")
.long("download")
.value_name("URL")
.takes_value(true)
.group("action")
.help("Download only the specified URL")
)
.arg(
Arg::with_name("resolve")
.short("r")
.long("resolve")
.value_name("URL")
.takes_value(true)
.group("action")
.help("Resolve the zippyshare url to real download url")
)
.get_matches();
let outdir = match arguments.value_of("outdir") {
Some(it) => it,
None => "./"
};
let numparal = match arguments.value_of("numdl") {
Some(it) => it,
None => "1"
};
let numparal: i32 = match numparal.parse() {
Ok(it) => it,
Err(_) => {
eprintln!("Invalid value for numdl: {}", numparal);
exit(1);
}
};
let is_zippy = arguments.is_present("zippyshare");
if arguments.is_present("listfile") {
let listfile = arguments.value_of("listfile").unwrap();
let ifile = std::fs::File::open(listfile)?;
let mut urls: Vec<String> = std::io::BufReader::new(ifile)
.lines()
.map(|l| l.unwrap())
.filter(|url| url.len() > 0 && !url.starts_with("#"))
.collect();
if is_zippy {
let mut zippy_urls = Vec::new();
for url in urls {
zippy_urls.push(
match zippy::resolve_link(&url).await {
Ok(url) => url,
Err(e) => {
println!("Zippyshare link could not be resolved");
eprintln!("{}", e);
exit(1);
}
}
)
}
urls = zippy_urls;
}
download_multiple(urls, outdir, numparal).await?;
} else if arguments.is_present("download") {
let url = arguments.value_of("download").unwrap();
let url = if is_zippy {
match zippy::resolve_link(&url).await {
Ok(url) => url,
Err(e) => {
println!("Zippyshare link could not be resolved");
eprintln!("{}", e);
exit(1);
}
}
} else {
url.to_string()
};
download_one(&url, outdir).await?;
} else if arguments.is_present("resolve") {
let url = arguments.value_of("resolve").unwrap();
match zippy::resolve_link(&url).await {
Ok(resolved_url) => {
println!("{}", resolved_url);
},
Err(e) => {
println!("Zippyshare link could not be resolved");
eprintln!("{}", e);
exit(1);
}
}
} else {
println!("Something went very wrong...");
}
Ok(())
}
async fn download_one(url: &str, outdir: &str) -> ResBE<()> {
let outdir = Path::new(outdir);
if !outdir.exists() {
std::fs::create_dir_all(outdir)?;
}
let file_name = download::url_to_filename(url);
let into_file = outdir.join(Path::new(&file_name));
let into_file = into_file.to_str().unwrap().to_string();
let path_into_file = Path::new(&into_file);
// If file with same name is present locally, check filesize
if path_into_file.exists() {
let (filesize, _) = download::http_get_filesize_and_range_support(&url).await?;
let local_filesize = std::fs::metadata(path_into_file)?.len();
if filesize == local_filesize {
println!("Skipping file '{}': already present", &file_name);
return Ok(());
} else {
println!("Replacing file '{}': present but not completed", &file_name);
}
}
// Create com channel to get feedback on download progress
let (tx, mut rx) = mpsc::unbounded_channel::<DlReport>();
// Start download nonblocking
let url = url.to_string();
let jh_download = tokio::spawn(async move {
// Create reporter with id 0 since there is only one anyways
let rep = DlReporter::new(0, tx);
if let Err(e) = download::download_feedback(&url, &into_file, rep).await {
eprintln!("Error while downloading");
eprintln!("{}", e);
}
});
let mut t_last = SystemTime::UNIX_EPOCH;
let mut filesize = 0;
// Handle download status updates until all transmitters are closed
// this happens when the download is completed
while let Some(update) = rx.recv().await {
match update.status {
DlStatus::Init {
bytes_total,
filename
} => {
println!("Starting download for file '{}'", &filename);
filesize = bytes_total;
},
DlStatus::Update {
speed_mbps,
bytes_curr
} => {
// Print update every second, otherwise ignore the updates
if t_last.elapsed()?.as_millis() > 1000 {
let percent_complete = bytes_curr as f64 / filesize as f64 * 100.0;
println!("Status: {:6.2} mb/s {:5.2}% completed", speed_mbps, percent_complete);
t_last = SystemTime::now();
}
},
DlStatus::Done {
duration_ms
} => {
println!("Status: 100% completed");
println!("Download took {} seconds", (duration_ms / 1000));
}
}
}
// Await the download just to make sure
jh_download.await?;
Ok(())
}
async fn download_multiple(urls: Vec<String>, outdir: &str, numparal: i32) -> ResBE<()> {
let outdir = Path::new(outdir);
if !outdir.exists() {
std::fs::create_dir_all(outdir)?;
}
let mut joiners = Vec::new();
let (tx, mut rx) = mpsc::unbounded_channel::<DlReport>();
for offset in 0..numparal {
let urls: Vec<String> = urls
.iter()
.enumerate()
.filter(|(index, _)| (index) % numparal as usize == offset as usize)
.map(|(_, v)| v.to_owned())
.collect();
let tx = tx.clone();
let outdir = outdir.to_owned();
let offset = offset;
joiners.push(tokio::task::spawn(async move {
for (i, url) in urls.iter().enumerate() {
// Recalculated index in the main url vector, used as id
let global_url_index = i as i32 * numparal + offset;
let file_name = download::url_to_filename(&url);
let into_file = outdir.join(Path::new(&file_name));
let into_file = into_file.to_str().unwrap().to_string();
let path_into_file = Path::new(&into_file);
// If file with same name is present locally, check filesize
if path_into_file.exists() {
let (filesize, _) = download::http_get_filesize_and_range_support(&url).await.unwrap();
let local_filesize = std::fs::metadata(path_into_file).unwrap().len();
if filesize == local_filesize {
println!("Skipping file '{}': already present", &file_name);
continue;
} else {
println!("Replacing file '{}': present but not completed", &file_name);
}
}
let rep = DlReporter::new(global_url_index, tx.clone());
if let Err(e) = download::download_feedback(&url, &into_file, rep).await {
eprintln!("Error while downloading '{}'", file_name);
eprintln!("{}", e);
}
}
}))
}
drop(tx);
// filename, total size bytes, current size bytes, download speed mbps
let mut statuses: HashMap<i32, (String, u64, u64, f64)> = HashMap::new();
let mut t_last = SystemTime::now();
while let Some(update) = rx.recv().await {
match update.status {
DlStatus::Init {
bytes_total,
filename
} => {
println!("Starting download for file '{}'", &filename);
statuses.insert(update.id, (filename, bytes_total, 0, 0.0));
},
DlStatus::Update {
speed_mbps,
bytes_curr
} => {
// Scope the reference to prevent borrowing conflict later
{
let s = &mut statuses.get_mut(&update.id).unwrap();
s.2 = bytes_curr;
s.3 = speed_mbps;
}
if t_last.elapsed().unwrap().as_millis() > 2000 {
let mut dl_speed_sum = 0.0;
for (_k, v) in &statuses {
let filename = &v.0;
let filesize = v.1;
let bytes_curr = v.2;
let speed_mbps = v.3;
let percent_complete = bytes_curr as f64 / filesize as f64 * 100.0;
println!("Status: {:6.2} mb/s {:5.2}% completed '{}'", speed_mbps, percent_complete, filename);
dl_speed_sum += speed_mbps;
}
println!("Accumulated download speed: {:6.2} mb/s\n", dl_speed_sum);
t_last = SystemTime::now();
}
},
DlStatus::Done {
duration_ms
} => {
println!(
"Status: 100% completed '{}'\nDownload took {} seconds",
&statuses.get(&update.id).unwrap().0,
(duration_ms / 1000)
);
statuses.remove(&update.id);
}
}
}
join_all(joiners).await;
Ok(())
}

91
src/zippy.rs Normal file
View File

@ -0,0 +1,91 @@
use regex::Regex;
use std::io::{ Error, ErrorKind };
use crate::errors::ResBE;
#[allow(dead_code)]
pub async fn resolve_link_old(url: &str) -> ResBE<String> {
// Regex to check if the provided url is a zippyshare download url
let re = Regex::new(r"(https://www\d*\.zippyshare\.com)")?;
if !re.is_match(&url) {
return Err(Error::new(ErrorKind::Other, "URL is not a zippyshare url").into());
}
// Extract the hostname (with https:// prefix) for later
let base_host = &re.captures(&url).unwrap()[0];
// Download the html body for the download page
let body = reqwest::get(url).await?
.text().await?;
// Regex to match the javascript part of the html that generates the real download link
let re = Regex::new(r#""(/d/\w+/)" \+ \((\d+) % (\d+) \+ (\d+) % (\d+)\) \+ "(/.+\.rar)";"#)?;
if let Some(cap) = re.captures(&body) {
// Extract the magic numbers used to generate the download link
let n1: i32 = i32::from_str_radix(&cap[2], 10)?;
let n2: i32 = i32::from_str_radix(&cap[3], 10)?;
let n3: i32 = i32::from_str_radix(&cap[4], 10)?;
let n4: i32 = i32::from_str_radix(&cap[5], 10)?;
// Mix the numbers together
let mixed = n1 % n2 + n3 % n4;
// Assemble the download link
let dl_url = format!("{}{}{}{}", base_host, &cap[1], mixed, &cap[6]);
Ok(dl_url)
} else {
Err(Error::new(ErrorKind::Other, "Link not found").into())
}
}
pub async fn resolve_link(url: &str) -> ResBE<String> {
// Regex to check if the provided url is a zippyshare download url
let re = Regex::new(r"(https://www\d*\.zippyshare\.com)")?;
if !re.is_match(&url) {
return Err(Error::new(ErrorKind::Other, "URL is not a zippyshare url").into());
}
// Extract the hostname (with https:// prefix) for later
let base_host = &re.captures(&url).unwrap()[0];
// Download the html body for the download page
let body = reqwest::get(url).await?
.text().await?;
// Regex to match the javascript part of the html that generates the real download link
let re_a = Regex::new(r#"var a = (\d+);"#)?;
let re_b = Regex::new(r#"var b = (\d+);"#)?;
let re_concat = Regex::new(r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)"\+\(a \+ (\d+)%b\)\+"(/.+\.rar)";"#)?;
let cap_a = match re_a.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into())
};
let cap_b = match re_b.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into())
};
let cap_concat = match re_concat.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into())
};
let a: i32 = i32::from_str_radix(&cap_a[1], 10)?;
let b: i32 = i32::from_str_radix(&cap_b[1], 10)?;
let c: i32 = i32::from_str_radix(&cap_concat[2], 10)?;
let mixed = (a/3) + (c%b);
let dl_url = format!("{}{}{}{}", &base_host, &cap_concat[1], mixed, &cap_concat[3]);
Ok(dl_url)
}