Compare commits

..

14 Commits

Author SHA1 Message Date
f9659246ca Implement zippy resolver with js evaluation
- Add ducc crate (duktape bindings) as javascript engine
- Extract the script tag from zippyshare page contents
- Preprocess the script and execute the js to get the link
- This removes the need for full regex based implementations for each
  antiscrape challenge
2022-09-13 15:37:20 +02:00
bc2d312ce9 Update zippyshare resolver 2022-07-24 + bump
- Bump version to 0.1.6
2022-08-16 21:42:08 +02:00
0f7e05a71d Update zippyshare resolver 2022-07-24 + bump
- Bump version to 0.1.5
2022-07-24 15:39:00 +02:00
2e0c12ee56 Bump version to 0.1.4 2022-07-17 23:34:19 +02:00
7606f90384 Update crossterm dependency
- Crossterm 0.24 was released with the merged bugfix, so no need for
  the pinned git dependency anymore
2022-07-17 23:33:21 +02:00
33d772c9e3 Update zippyshare resolver 2022-07-17 2022-07-17 23:24:11 +02:00
e7dca54b2b Add README 2022-06-16 01:40:11 +02:00
4fb19a4f5b Use latest git version for crossterm to fix bug
- Fix visual bugs on windows using git-bash that are caused by crossterm
- The crossterm bug was fixed in upstream by PR-657 but is not yet
  released to crates.io
- Bump version to 0.1.3
2022-06-15 17:51:07 +02:00
276ff194db Fix broken terminal output
- Replaced println by report message call
2022-04-26 00:21:29 +02:00
9d976b49ab Actually fix the parallel download 2022-04-01 01:22:13 +02:00
3e7601db1d Fix parallel file downloads not working 2022-04-01 01:09:37 +02:00
a46bc063ff Add support for sendcm
- Change the special url resolver code to better (still not optimally)
  support different services besides zippyshare
- Implement support for sendcm
2022-04-01 00:40:42 +02:00
e2c4d3572b More refactoring for dlreport 2022-03-31 23:13:43 +02:00
e6360153d6 More refactoring 2022-03-31 20:25:24 +02:00
13 changed files with 849 additions and 594 deletions

240
Cargo.lock generated
View File

@@ -13,9 +13,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.56"
version = "1.0.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4361135be9122e0870de935d7c439aef945b9f9ddd4199a553b5270b49c82a27"
checksum = "08f9b8508dccb7687a1d6c4ce66b2b0ecef467c94667de27d8d7fe1f8d2a9cdc"
[[package]]
name = "atty"
@@ -64,6 +64,12 @@ version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
[[package]]
name = "cesu8"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c"
[[package]]
name = "cfg-if"
version = "1.0.0"
@@ -85,16 +91,16 @@ dependencies = [
[[package]]
name = "clap"
version = "3.1.6"
version = "3.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8c93436c21e4698bacadf42917db28b23017027a4deccb35dbe47a7e7840123"
checksum = "7c167e37342afc5f33fd87bbc870cedd020d2a6dffa05d45ccd9241fbdd146db"
dependencies = [
"atty",
"bitflags",
"clap_derive",
"clap_lex",
"indexmap",
"lazy_static",
"os_str_bytes",
"strsim",
"termcolor",
"textwrap",
@@ -102,9 +108,9 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "3.1.4"
version = "3.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da95d038ede1a964ce99f49cbe27a7fb538d1da595e4b4f70b8c8f338d17bf16"
checksum = "a3aab4734e083b809aaf5794e14e756d1c798d2c69c7f7de7a09a2f5214993c1"
dependencies = [
"heck",
"proc-macro-error",
@@ -113,6 +119,15 @@ dependencies = [
"syn",
]
[[package]]
name = "clap_lex"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "189ddd3b5d32a70b35e7686054371742a937b0d99128e76dde6340210e966669"
dependencies = [
"os_str_bytes",
]
[[package]]
name = "core-foundation"
version = "0.9.3"
@@ -131,14 +146,14 @@ checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"
[[package]]
name = "crossterm"
version = "0.23.1"
version = "0.24.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1fd7173631a4e9e2ca8b32ae2fad58aab9843ea5aaf56642661937d87e28a3e"
checksum = "ab9f7409c70a38a56216480fba371ee460207dd8926ccf5b4160591759559170"
dependencies = [
"bitflags",
"crossterm_winapi",
"libc",
"mio 0.7.14",
"mio",
"parking_lot",
"signal-hook",
"signal-hook-mio",
@@ -155,10 +170,29 @@ dependencies = [
]
[[package]]
name = "encoding_rs"
version = "0.8.30"
name = "ducc"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7896dc8abb250ffdda33912550faa54c88ec8b998dec0b2c55ab224921ce11df"
checksum = "41bc1f8a30712eb6a7454f85747f218d9dfb41d173bb223a8c4f18daff829207"
dependencies = [
"cesu8",
"ducc-sys",
]
[[package]]
name = "ducc-sys"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0cdea834bf6a0fde522374db4404695c5f0465fc0ee814f2878d76eaabd4ffed"
dependencies = [
"cc",
]
[[package]]
name = "encoding_rs"
version = "0.8.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b"
dependencies = [
"cfg-if",
]
@@ -174,12 +208,13 @@ dependencies = [
[[package]]
name = "ffdl"
version = "0.1.2"
version = "0.1.6"
dependencies = [
"anyhow",
"chrono",
"clap",
"crossterm",
"ducc",
"futures",
"percent-encoding",
"regex",
@@ -310,9 +345,9 @@ dependencies = [
[[package]]
name = "h2"
version = "0.3.12"
version = "0.3.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62eeb471aa3e3c9197aa4bfeabfe02982f6dc96f750486c0bb0009ac58b26d2b"
checksum = "37a82c6d637fc9515a4694bbf1cb2457b79d81ce52b3108bdeea58b07dd34a57"
dependencies = [
"bytes",
"fnv",
@@ -323,7 +358,7 @@ dependencies = [
"indexmap",
"slab",
"tokio",
"tokio-util",
"tokio-util 0.7.1",
"tracing",
]
@@ -372,9 +407,9 @@ dependencies = [
[[package]]
name = "httparse"
version = "1.6.0"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9100414882e15fb7feccb4897e5f0ff0ff1ca7d1a86a23208ada4d7a18e6c6c4"
checksum = "6330e8a36bd8c859f3fa6d9382911fbb7147ec39807f63b923933a247240b9ba"
[[package]]
name = "httpdate"
@@ -432,9 +467,9 @@ dependencies = [
[[package]]
name = "indexmap"
version = "1.8.0"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282a6247722caba404c065016bbfa522806e51714c34f5dfc3e4a3a46fcb4223"
checksum = "0f647032dfaa1f8b6dc29bd3edb7bbef4861b8b8007ebb118d6db284fd59f6ee"
dependencies = [
"autocfg",
"hashbrown",
@@ -451,9 +486,9 @@ dependencies = [
[[package]]
name = "ipnet"
version = "2.4.0"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35e70ee094dc02fd9c13fdad4940090f22dbd6ac7c9e7094a46cf0232a50bc7c"
checksum = "879d54834c8c76457ef4293a689b2a8c59b076067ad77b15efafbb05f92a592b"
[[package]]
name = "itoa"
@@ -463,9 +498,9 @@ checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
[[package]]
name = "js-sys"
version = "0.3.56"
version = "0.3.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a38fc24e30fd564ce974c02bf1d337caddff65be6cc4735a1f7eab22a7440f04"
checksum = "671a26f820db17c2a2750743f1dd03bafd15b98c9f30c7c2628c024c05d73397"
dependencies = [
"wasm-bindgen",
]
@@ -478,16 +513,17 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.121"
version = "0.2.124"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50"
[[package]]
name = "lock_api"
version = "0.4.6"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88943dd7ef4a2e5a4bfa2753aaab3013e34ce2533d1996fb18ef591e315e2b3b"
checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53"
dependencies = [
"autocfg",
"scopeguard",
]
@@ -518,19 +554,6 @@ version = "0.3.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
[[package]]
name = "mio"
version = "0.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8067b404fe97c70829f082dec8bcf4f71225d7eaea1d8645349cb76fa06205cc"
dependencies = [
"libc",
"log",
"miow",
"ntapi",
"winapi",
]
[[package]]
name = "mio"
version = "0.8.2"
@@ -556,9 +579,9 @@ dependencies = [
[[package]]
name = "native-tls"
version = "0.2.9"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09bf6f32a3afefd0b587ee42ed19acd945c6d1f3b5424040f50b2f24ab16be77"
checksum = "fd7e2f3618557f980e0b17e8856252eee3c97fa12c54dff0ca290fb6266ca4a9"
dependencies = [
"lazy_static",
"libc",
@@ -654,9 +677,6 @@ name = "os_str_bytes"
version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"
dependencies = [
"memchr",
]
[[package]]
name = "parking_lot"
@@ -670,9 +690,9 @@ dependencies = [
[[package]]
name = "parking_lot_core"
version = "0.9.1"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28141e0cc4143da2443301914478dc976a61ffdb3f043058310c70df2fed8954"
checksum = "995f667a6c822200b0433ac218e05582f0e2efa1b922a3fd2fbaadc5f87bab37"
dependencies = [
"cfg-if",
"libc",
@@ -701,9 +721,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkg-config"
version = "0.3.24"
version = "0.3.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe"
checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
[[package]]
name = "proc-macro-error"
@@ -731,27 +751,27 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.36"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.17"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "632d02bff7f874a36f33ea8bb416cd484b90cc66c1194b1a1110d067a7013f58"
checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.2.12"
version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae183fc1b06c149f0c1793e1eb447c8b04bfe46d48e9e48bfb8d2d7ed64ecf0"
checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42"
dependencies = [
"bitflags",
]
@@ -811,7 +831,7 @@ dependencies = [
"serde_urlencoded",
"tokio",
"tokio-native-tls",
"tokio-util",
"tokio-util 0.6.9",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
@@ -910,7 +930,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29ad2e15f37ec9a6cc544097b78a1ec90001e9f71b81338ca39f430adaca99af"
dependencies = [
"libc",
"mio 0.7.14",
"mio",
"signal-hook",
]
@@ -925,9 +945,9 @@ dependencies = [
[[package]]
name = "slab"
version = "0.4.5"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5"
checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"
[[package]]
name = "smallvec"
@@ -953,9 +973,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "1.0.89"
version = "1.0.91"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea297be220d52398dcc07ce15a209fce436d361735ac1db700cab3b6cdfb9f54"
checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d"
dependencies = [
"proc-macro2",
"quote",
@@ -1024,9 +1044,9 @@ dependencies = [
[[package]]
name = "tinyvec"
version = "1.5.1"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c1c1d5a42b6245520c249549ec267180beaffcc0615401ac8e31853d4b6d8d2"
checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50"
dependencies = [
"tinyvec_macros",
]
@@ -1046,7 +1066,7 @@ dependencies = [
"bytes",
"libc",
"memchr",
"mio 0.8.2",
"mio",
"num_cpus",
"once_cell",
"parking_lot",
@@ -1092,6 +1112,20 @@ dependencies = [
"tokio",
]
[[package]]
name = "tokio-util"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0edfdeb067411dba2044da6d1cb2df793dd35add7888d73c16e3381ded401764"
dependencies = [
"bytes",
"futures-core",
"futures-sink",
"pin-project-lite",
"tokio",
"tracing",
]
[[package]]
name = "tower-service"
version = "0.3.1"
@@ -1100,20 +1134,32 @@ checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"
[[package]]
name = "tracing"
version = "0.1.32"
version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a1bdf54a7c28a2bbf701e1d2233f6c77f473486b94bee4f9678da5a148dca7f"
checksum = "5d0ecdcb44a79f0fe9844f0c4f33a342cbcbb5117de8001e6ba0dc2351327d09"
dependencies = [
"cfg-if",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-core"
version = "0.1.23"
name = "tracing-attributes"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa31669fa42c09c34d94d8165dd2012e8ff3c66aca50f3bb226b68f216f2706c"
checksum = "2e65ce065b4b5c53e73bb28912318cb8c9e9ad3921f1d669eb0e68b4c8143a2b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tracing-core"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f54c8ca710e81886d498c2fd3331b56c93aa248d49de2222ad2742247c60072f"
dependencies = [
"lazy_static",
]
@@ -1193,9 +1239,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
version = "0.2.79"
version = "0.2.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25f1af7423d8588a3d840681122e72e6a24ddbcb3f0ec385cac0d12d24256c06"
checksum = "27370197c907c55e3f1a9fbe26f44e937fe6451368324e009cba39e139dc08ad"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
@@ -1203,9 +1249,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.79"
version = "0.2.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b21c0df030f5a177f3cba22e9bc4322695ec43e7257d865302900290bcdedca"
checksum = "53e04185bfa3a779273da532f5025e33398409573f348985af9a1cbf3774d3f4"
dependencies = [
"bumpalo",
"lazy_static",
@@ -1218,9 +1264,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.29"
version = "0.4.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2eb6ec270a31b1d3c7e266b999739109abce8b6c87e4b31fcfcd788b65267395"
checksum = "6f741de44b75e14c35df886aff5f1eb73aa114fa5d4d00dcd37b5e01259bf3b2"
dependencies = [
"cfg-if",
"js-sys",
@@ -1230,9 +1276,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.79"
version = "0.2.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f4203d69e40a52ee523b2529a773d5ffc1dc0071801c87b3d270b471b80ed01"
checksum = "17cae7ff784d7e83a2fe7611cfe766ecf034111b49deb850a3dc7699c08251f5"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@@ -1240,9 +1286,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.79"
version = "0.2.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa8a30d46208db204854cadbb5d4baf5fcf8071ba5bf48190c3e59937962ebc"
checksum = "99ec0dc7a4756fffc231aab1b9f2f578d23cd391390ab27f952ae0c9b3ece20b"
dependencies = [
"proc-macro2",
"quote",
@@ -1253,15 +1299,15 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.79"
version = "0.2.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d958d035c4438e28c70e4321a2911302f10135ce78a9c7834c0cab4123d06a2"
checksum = "d554b7f530dee5964d9a9468d95c1f8b8acae4f282807e7d27d4b03099a46744"
[[package]]
name = "web-sys"
version = "0.3.56"
version = "0.3.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c060b319f29dd25724f09a2ba1418f142f539b2be99fbf4d2d5a8f7330afb8eb"
checksum = "7b17e741662c70c8bd24ac5c5b18de314a2c26c32bf8346ee1e6f53de919c283"
dependencies = [
"js-sys",
"wasm-bindgen",
@@ -1300,9 +1346,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.32.0"
version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3df6e476185f92a12c072be4a189a0210dcdcf512a1891d6dff9edb874deadc6"
checksum = "5acdd78cb4ba54c0045ac14f62d8f94a03d10047904ae2a40afa1e99d8f70825"
dependencies = [
"windows_aarch64_msvc",
"windows_i686_gnu",
@@ -1313,33 +1359,33 @@ dependencies = [
[[package]]
name = "windows_aarch64_msvc"
version = "0.32.0"
version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d8e92753b1c443191654ec532f14c199742964a061be25d77d7a96f09db20bf5"
checksum = "17cffbe740121affb56fad0fc0e421804adf0ae00891205213b5cecd30db881d"
[[package]]
name = "windows_i686_gnu"
version = "0.32.0"
version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a711c68811799e017b6038e0922cb27a5e2f43a2ddb609fe0b6f3eeda9de615"
checksum = "2564fde759adb79129d9b4f54be42b32c89970c18ebf93124ca8870a498688ed"
[[package]]
name = "windows_i686_msvc"
version = "0.32.0"
version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "146c11bb1a02615db74680b32a68e2d61f553cc24c4eb5b4ca10311740e44172"
checksum = "9cd9d32ba70453522332c14d38814bceeb747d80b3958676007acadd7e166956"
[[package]]
name = "windows_x86_64_gnu"
version = "0.32.0"
version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c912b12f7454c6620635bbff3450962753834be2a594819bd5e945af18ec64bc"
checksum = "cfce6deae227ee8d356d19effc141a509cc503dfd1f850622ec4b0f84428e1f4"
[[package]]
name = "windows_x86_64_msvc"
version = "0.32.0"
version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "504a2476202769977a040c6364301a3f65d0cc9e3fb08600b2bda150a0488316"
checksum = "d19538ccc21819d01deaf88d6a17eae6596a12e9aafdbb97916fb49896d89de9"
[[package]]
name = "winreg"

View File

@@ -1,8 +1,8 @@
[package]
name = "ffdl"
version = "0.1.2"
version = "0.1.6"
authors = ["daniel m <danielm@dnml.de>"]
edition = "2018"
edition = "2021"
description = "Download files fast"
[dependencies]
@@ -11,8 +11,9 @@ reqwest = { version = "0.11.10", features = [ "stream" ] }
futures = "0.3.21"
percent-encoding = "2.1.0"
regex = "1.5.5"
crossterm = "0.23.1"
clap = { version = "3.1.6", features = [ "derive" ] }
clap = { version = "3.1.12", features = [ "derive" ] }
chrono = "0.4.19"
thiserror = "1.0.30"
anyhow = "1.0.56"
anyhow = "1.0.57"
crossterm = "0.24.0"
ducc = "0.1.5"

86
README.md Normal file
View File

@@ -0,0 +1,86 @@
# Fast File Downloader
A simple CLI application to download files over HTTP with the goal of maximum throuput by parallelizing downloads. Optimized for downloading large quantities of files from [zippyshare](https://zippyshare.com/) as fast as possible.
## Features
* Automatically download files from a URL list
* Automatically fetch actual download URLs for [zippyshare](https://zippyshare.com/) and [sendcm](https://send.cm/) links
* Download multiple files at the same time
* Use multiple connections to download non overlapping chunks of the same file in parallel
* This can be used to gain more download speed than a server would normally provide to a single connection
* Works really well with zippyshare
* This requires high random write performance for the local storage and will bottleneck on HDDs
* Only works if the host supports the HTTP Range header and doesn't otherwise block it
## Performance
On a 1000Mbit connection (actual speed ~800Mbit) I can download with about 90MB/s through a wireguard VPN from zippyshare when using 5 files in parallel with 10 connections each (`-n 5 -c 10`).
When downloading the same content manually through a browser I get about 5MB/s per file, dropping to 0.5-1MB/s after a few seconds. Also I of course have to start all the downloads by hand which is quite annoying.
## Usage
Simply create a textfile and enter the URLs one per line. Comments using `#` and empty lines are allowed.
```
$ cat list.txt
# My encrypted documents backup
https://www71.zippyshare.com/v/XXXXXXX/file.html
https://www72.zippyshare.com/v/XXXXYYY/file.html
https://www73.zippyshare.com/v/XXXXZZZ/file.html
https://www74.zippyshare.com/v/Xagdgfh/file.html
# My encrypted video backup
https://www75.zippyshare.com/v/sdsgfds/file.html
https://www76.zippyshare.com/v/dfsdxfd/file.html
https://www75.zippyshare.com/v/dsgsdgf/file.html
https://www76.zippyshare.com/v/drtdrtd/file.html
https://www75.zippyshare.com/v/erdfghd/file.html
https://www76.zippyshare.com/v/87654rd/file.html
https://www75.zippyshare.com/v/dfghdfg/file.html
https://www76.zippyshare.com/v/hkjghjk/file.html
```
And then download the files using the desired levels of concurrency (e.g. 5 files at once with 10 connections each):
```
$ ffdl -l list.txt -o ~/Downloads/ -c 10 -n 5
```
## CLI Arguments
```
$ ffdl --help
FFDL - Fast File Downloader 0.1.2
Download files fast
USAGE:
ffdl.exe [OPTIONS]
OPTIONS:
-c, --connections <CONNECTIONS PER FILE>
The number concurrent connections per file download. Increasing this number will
increase the download speed of individual files if supported by the server but setting
this number too high may cause the download to fail.
NOTE: This mode will write cause random writes and for that reason won't work on HDDs.
WARNING: Files started with multiple connections currently can't be continued. [default:
1]
-d, --download <URL>
Download only the one file from the specified url
-h, --help
Print help information
-l, --listfile <URL LISTFILE>
Download all files from the specified url list file
-n, --num-files <PARALLEL DOWNLOADS>
Specify the number of files from that should be downloaded in parallel. Increasing this
number will increase the total download speed but won't improve the download speed for
individual files [default: 1]
-o, --outdir <OUTPUT DIR>
Set the output directory in which the downloads will be stored. The directory will be
created if it doesn't exit yet [default: ./]
-V, --version
Print version information
```

View File

@@ -1,4 +1,5 @@
use std::{num::NonZeroU32, path::PathBuf};
use clap::Parser;
#[derive(Parser, Clone, Debug)]
@@ -14,28 +15,32 @@ pub struct CLIArgs {
long = "outdir",
value_name = "OUTPUT DIR",
default_value = "./",
help = "Set the output directory. The directory will be created if it doesn't exit yet",
help = "Set the output directory in which the downloads will be stored. \
The directory will be created if it doesn't exit yet",
)]
pub outdir: PathBuf,
#[clap(
short = 'n',
long = "num-files",
value_name = "NUMBER OF CONCURRENT FILE DOWNLOADS",
value_name = "PARALLEL DOWNLOADS",
default_value = "1",
help = "Specify the number of concurrent downloads",
help = "Specify the number of files from that should be downloaded in parallel. Increasing \
this number will increase the total download speed but won't improve the download speed \
for individual files",
)]
pub file_count: NonZeroU32,
#[clap(
short = 'c',
long = "connections",
value_name = "NUMBER OF CONCURRENT CONNECTIONS",
value_name = "CONNECTIONS PER FILE",
default_value = "1",
help = "The number concurrent connections per file download. \
Downloads might fail when the number of connections is too high. \
Files started with multiple connections currently can't be continued. \
NOTE: This will likely cause IO bottlenecks on HDDs",
help = "The number concurrent connections per file download. Increasing this number will \
increase the download speed of individual files if supported by the server but \
setting this number too high may cause the download to fail. \n\
NOTE: This mode will write cause random writes and for that reason won't work on HDDs. \
WARNING: Files started with multiple connections currently can't be continued.",
)]
pub conn_count: NonZeroU32,

208
src/clireporter.rs Normal file
View File

@@ -0,0 +1,208 @@
use std::collections::{HashMap, VecDeque};
use std::io::stdout;
use std::time::SystemTime;
use anyhow::Result;
use crossterm::cursor::MoveToPreviousLine;
use crossterm::execute;
use crossterm::style::Print;
use crossterm::terminal::{Clear, ClearType};
use tokio::sync::mpsc;
use crate::dlreport::{DlReport, DlStatus, InfoHolder};
fn print_accumulated_report(
statuses: &HashMap<u32, InfoHolder>,
msg_queue: &mut VecDeque<String>,
moved_lines: u16,
file_count_completed: i32,
file_count_total: i32,
) -> Result<u16> {
let mut dl_speed_sum = 0.0;
execute!(
stdout(),
crossterm::cursor::Hide,
MoveToPreviousLine(moved_lines)
)?;
for msg in msg_queue.drain(..) {
let ct_now = chrono::Local::now();
execute!(
stdout(),
Print(format!("{} > {}", ct_now.format("%H:%M:%S"), msg)),
Clear(ClearType::UntilNewLine),
Print("\n")
)?;
}
execute!(
stdout(),
Print("----------------------------------------".to_string()),
Clear(ClearType::UntilNewLine),
Print("\n")
)?;
for v in statuses.values() {
let percent_complete = v.progress as f64 / v.total_size as f64 * 100.0;
execute!(
stdout(),
Print(format!(
"Status: {:6.2} mb/s {:5.2}% completed '{}'",
v.speed_mbps, percent_complete, v.filename
)),
Clear(ClearType::UntilNewLine),
Print("\n")
)?;
dl_speed_sum += v.speed_mbps;
}
let file_percent_completed = file_count_completed as f32 / file_count_total as f32 * 100.0;
execute!(
stdout(),
Clear(ClearType::CurrentLine),
Print("\n"),
Print(format!(
" =>> Accumulated download speed: {:6.2} mb/s {}/{} files, {:.0}%",
dl_speed_sum, file_count_completed, file_count_total, file_percent_completed
)),
Clear(ClearType::UntilNewLine),
Print("\n"),
Clear(ClearType::FromCursorDown),
crossterm::cursor::Show
)?;
// Next time go up 1 line for each printed status, +2 for divider & space, +1 for accumulated
Ok(statuses.len() as u16 + 3)
}
/// Receive download reports from the provided receiver and print them to stdout using dynamic
/// refreshes of the terminal. This will block until all senders are closed.
pub async fn cli_print_reports(
mut receiver: mpsc::UnboundedReceiver<DlReport>,
file_count_total: i32,
) -> Result<()> {
let mut statuses: HashMap<u32, InfoHolder> = HashMap::new();
let mut moved_lines = 0;
let mut msg_queue = VecDeque::new();
let mut t_last = SystemTime::now();
let mut file_count_completed = 0;
let mut file_count_failed = 0;
let mut file_count_done = 0;
while let Some(update) = receiver.recv().await {
match update.status {
DlStatus::Init {
bytes_total,
filename,
} => {
msg_queue.push_back(format!("Starting download for file '{}'", &filename));
statuses.insert(update.id, InfoHolder::new(filename, bytes_total));
moved_lines = print_accumulated_report(
&statuses,
&mut msg_queue,
moved_lines,
file_count_done,
file_count_total,
)?;
}
DlStatus::Update {
speed_mbps,
bytes_curr,
} => {
// Scope the reference to prevent borrowing conflict later
{
let s = &mut statuses.get_mut(&update.id).unwrap();
s.progress = bytes_curr;
s.speed_mbps = speed_mbps;
}
if t_last.elapsed().unwrap().as_millis() > 500 {
moved_lines = print_accumulated_report(
&statuses,
&mut msg_queue,
moved_lines,
file_count_done,
file_count_total,
)?;
t_last = SystemTime::now();
}
}
DlStatus::Done { duration_ms } => {
msg_queue.push_back(format!(
"Finished downloading '{}' with {:.2} mb in {:.2} seconds",
&statuses.get(&update.id).unwrap().filename,
(statuses.get(&update.id).unwrap().total_size as f32 / 1_000_000.0),
(duration_ms as f32 / 1_000.0)
));
statuses.remove(&update.id);
file_count_completed += 1;
file_count_done += 1;
}
DlStatus::DoneErr { filename } => {
msg_queue.push_back(format!("Error: Download failed: '{}'", filename));
// Don't care if it exists, just make sure it is gone
statuses.remove(&update.id);
// Refresh display
moved_lines = print_accumulated_report(
&statuses,
&mut msg_queue,
moved_lines,
file_count_done,
file_count_total,
)?;
t_last = SystemTime::now();
file_count_failed += 1;
file_count_done += 1;
}
DlStatus::Message(msg) => {
msg_queue.push_back(msg);
moved_lines = print_accumulated_report(
&statuses,
&mut msg_queue,
moved_lines,
file_count_done,
file_count_total,
)?;
t_last = SystemTime::now();
}
DlStatus::Skipped => {
file_count_completed += 1;
file_count_done += 1;
}
}
}
print_accumulated_report(
&statuses,
&mut msg_queue,
moved_lines,
file_count_done,
file_count_total,
)?;
execute!(
stdout(),
MoveToPreviousLine(2),
Print(format!(
"All done! {}/{} completed, {} failed\n",
file_count_completed, file_count_total, file_count_failed
)),
Clear(ClearType::FromCursorDown)
)?;
Ok(())
}

View File

@@ -1,15 +1,8 @@
use std::collections::{HashMap, VecDeque};
use std::io::stdout;
use std::time::SystemTime;
use std::{collections::HashMap, time::SystemTime};
use tokio::sync::mpsc;
use tokio::sync::mpsc::{self, UnboundedReceiver, UnboundedSender};
use crossterm::cursor::MoveToPreviousLine;
use crossterm::execute;
use crossterm::style::Print;
use crossterm::terminal::{Clear, ClearType};
use anyhow::Result;
use crate::misc::RollingAverage;
#[derive(Clone, Debug)]
pub enum DlStatus {
@@ -82,22 +75,22 @@ impl DlReporter {
#[macro_export]
macro_rules! report_msg {
($rep:ident, $fmt:expr) => {
DlReporter::msg(&$rep, $fmt.to_string());
DlReporter::msg(&$rep, format!($fmt));
};
($rep:ident, $fmt:expr, $($fmt2:expr),+) => {
DlReporter::msg(&$rep, format!($fmt, $($fmt2,)+));
};
}
struct InfoHolder {
filename: String,
total_size: u64,
progress: u64,
speed_mbps: f32,
pub struct InfoHolder {
pub filename: String,
pub total_size: u64,
pub progress: u64,
pub speed_mbps: f32,
}
impl InfoHolder {
fn new(filename: String, total_size: u64) -> InfoHolder {
pub fn new(filename: String, total_size: u64) -> InfoHolder {
InfoHolder {
filename,
total_size,
@@ -107,196 +100,59 @@ impl InfoHolder {
}
}
fn print_accumulated_report(
statuses: &HashMap<u32, InfoHolder>,
msg_queue: &mut VecDeque<String>,
moved_lines: u16,
file_count_completed: i32,
file_count_total: i32,
) -> Result<u16> {
let mut dl_speed_sum = 0.0;
execute!(
stdout(),
crossterm::cursor::Hide,
MoveToPreviousLine(moved_lines)
)?;
for msg in msg_queue.drain(..) {
let ct_now = chrono::Local::now();
execute!(
stdout(),
Print(format!("{} > {}", ct_now.format("%H:%M:%S"), msg)),
Clear(ClearType::UntilNewLine),
Print("\n")
)?;
}
execute!(
stdout(),
Print("----------------------------------------".to_string()),
Clear(ClearType::UntilNewLine),
Print("\n")
)?;
for v in statuses.values() {
let percent_complete = v.progress as f64 / v.total_size as f64 * 100.0;
execute!(
stdout(),
Print(format!(
"Status: {:6.2} mb/s {:5.2}% completed '{}'",
v.speed_mbps, percent_complete, v.filename
)),
Clear(ClearType::UntilNewLine),
Print("\n")
)?;
dl_speed_sum += v.speed_mbps;
}
let file_percent_completed = file_count_completed as f32 / file_count_total as f32 * 100.0;
execute!(
stdout(),
Clear(ClearType::CurrentLine),
Print("\n"),
Print(format!(
" =>> Accumulated download speed: {:6.2} mb/s {}/{} files, {:.0}%",
dl_speed_sum, file_count_completed, file_count_total, file_percent_completed
)),
Clear(ClearType::UntilNewLine),
Print("\n"),
Clear(ClearType::FromCursorDown),
crossterm::cursor::Show
)?;
// Next time go up 1 line for each printed status, +2 for divider & space, +1 for accumulated
Ok(statuses.len() as u16 + 3)
pub struct DlReportAccumulator {
parent: DlReporter,
rec: UnboundedReceiver<DlReport>,
}
pub async fn watch_and_print_reports(
mut receiver: mpsc::UnboundedReceiver<DlReport>,
file_count_total: i32,
) -> Result<()> {
let mut statuses: HashMap<u32, InfoHolder> = HashMap::new();
let mut moved_lines = 0;
let mut msg_queue = VecDeque::new();
impl DlReportAccumulator {
pub fn new(parent: DlReporter) -> (DlReportAccumulator, UnboundedSender<DlReport>) {
let (tx, rec) = mpsc::unbounded_channel();
(DlReportAccumulator { parent, rec }, tx)
}
let mut t_last = SystemTime::now();
pub async fn accumulate(mut self) {
let mut progresses: HashMap<u32, u64> = HashMap::new();
let mut file_count_completed = 0;
let mut file_count_failed = 0;
let mut file_count_done = 0;
let mut progress_last: u64 = 0;
while let Some(update) = receiver.recv().await {
match update.status {
DlStatus::Init {
bytes_total,
filename,
} => {
msg_queue.push_back(format!("Starting download for file '{}'", &filename));
statuses.insert(update.id, InfoHolder::new(filename, bytes_total));
let mut t_last = SystemTime::now();
moved_lines = print_accumulated_report(
&statuses,
&mut msg_queue,
moved_lines,
file_count_done,
file_count_total,
)?;
}
DlStatus::Update {
speed_mbps,
bytes_curr,
} => {
// Scope the reference to prevent borrowing conflict later
{
let s = &mut statuses.get_mut(&update.id).unwrap();
s.progress = bytes_curr;
s.speed_mbps = speed_mbps;
let mut average_speed = RollingAverage::new(10);
while let Some(update) = self.rec.recv().await {
match update.status {
DlStatus::Init {
bytes_total: _,
filename: _,
} => {}
DlStatus::Update {
speed_mbps: _,
bytes_curr,
} => {
*progresses.entry(update.id).or_insert(0) = bytes_curr;
let progress_curr = progresses.values().sum();
let progress_delta = progress_curr - progress_last;
let t_elapsed = t_last.elapsed().unwrap().as_secs_f64();
let speed_mbps = average_speed.value() as f32;
// currently executes always, but might change
if progress_delta >= 5_000_000 {
average_speed.add(((progress_delta as f64) / 1_000_000.0) / t_elapsed);
progress_last = progress_curr;
t_last = SystemTime::now();
}
self.parent.update(speed_mbps, progress_curr);
}
DlStatus::Done { duration_ms: _ } => {}
if t_last.elapsed().unwrap().as_millis() > 500 {
moved_lines = print_accumulated_report(
&statuses,
&mut msg_queue,
moved_lines,
file_count_done,
file_count_total,
)?;
t_last = SystemTime::now();
}
}
DlStatus::Done { duration_ms } => {
msg_queue.push_back(format!(
"Finished downloading '{}' with {:.2} mb in {:.2} seconds",
&statuses.get(&update.id).unwrap().filename,
(statuses.get(&update.id).unwrap().total_size as f32 / 1_000_000.0),
(duration_ms as f32 / 1_000.0)
));
statuses.remove(&update.id);
file_count_completed += 1;
file_count_done += 1;
}
DlStatus::DoneErr { filename } => {
msg_queue.push_back(format!("Error: Download failed: '{}'", filename));
// Don't care if it exists, just make sure it is gone
statuses.remove(&update.id);
// Refresh display
moved_lines = print_accumulated_report(
&statuses,
&mut msg_queue,
moved_lines,
file_count_done,
file_count_total,
)?;
t_last = SystemTime::now();
file_count_failed += 1;
file_count_done += 1;
}
DlStatus::Message(msg) => {
msg_queue.push_back(msg);
moved_lines = print_accumulated_report(
&statuses,
&mut msg_queue,
moved_lines,
file_count_done,
file_count_total,
)?;
t_last = SystemTime::now();
}
DlStatus::Skipped => {
file_count_completed += 1;
file_count_done += 1;
// Just forwared everything else to the calling receiver
_ => self.parent.send(update.status),
}
}
}
print_accumulated_report(
&statuses,
&mut msg_queue,
moved_lines,
file_count_done,
file_count_total,
)?;
execute!(
stdout(),
MoveToPreviousLine(2),
Print(format!(
"All done! {}/{} completed, {} failed\n",
file_count_completed, file_count_total, file_count_failed
)),
Clear(ClearType::FromCursorDown)
)?;
Ok(())
}

View File

@@ -1,66 +1,16 @@
use std::io::SeekFrom;
use std::path::Path;
use std::time::SystemTime;
use anyhow::Result;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use percent_encoding::percent_decode_str;
use std::io::SeekFrom;
use std::path::Path;
use std::time::SystemTime;
use tokio::io::{AsyncSeekExt, AsyncWriteExt};
use tokio::sync::mpsc;
use crate::dlreport::*;
use crate::errors::*;
struct RollingAverage {
index: usize,
data: Vec<f64>,
}
impl RollingAverage {
fn new(size: usize) -> Self {
RollingAverage {
index: 0,
data: Vec::with_capacity(size),
}
}
fn value(&self) -> f64 {
if self.data.is_empty() {
0.0
} else {
let mut max = self.data[0];
for v in self.data.iter() {
if *v > max {
max = *v;
}
}
let mut sum: f64 = self.data.iter().sum();
let mut count = self.data.len();
if self.data.len() >= 3 {
sum -= max;
count -= 1;
}
sum / count as f64
}
}
fn add(&mut self, val: f64) {
if self.data.capacity() == self.data.len() {
self.data[self.index] = val;
self.index += 1;
if self.index >= self.data.capacity() {
self.index = 0;
}
} else {
self.data.push(val);
}
}
}
use crate::dlreport::{DlReportAccumulator, DlReporter};
use crate::errors::DlError;
use crate::misc::RollingAverage;
/// Get the filename at the end of the given URL. This will decode the URL Encoding.
pub fn url_to_filename(url: &str) -> String {
@@ -81,7 +31,7 @@ pub async fn download_feedback(
url: &str,
into_file: &Path,
rep: DlReporter,
content_length: Option<u64>,
content_length: u64,
) -> Result<()> {
download_feedback_chunks(url, into_file, rep, None, content_length).await
}
@@ -91,14 +41,9 @@ pub async fn download_feedback_chunks(
into_file: &Path,
rep: DlReporter,
from_to: Option<(u64, u64)>,
content_length: Option<u64>,
mut content_length: u64,
) -> Result<()> {
let mut content_length = match content_length {
Some(it) => it,
None => http_get_filesize_and_range_support(url).await?.filesize,
};
// Send the HTTP request to download the given link
// Build the HTTP request to download the given link
let mut req = reqwest::Client::new().get(url);
// Add range header if needed
@@ -213,13 +158,8 @@ pub async fn download_feedback_multi(
into_file: &Path,
rep: DlReporter,
conn_count: u32,
content_length: Option<u64>,
content_length: u64,
) -> Result<()> {
let content_length = match content_length {
Some(it) => it,
None => http_get_filesize_and_range_support(url).await?.filesize,
};
// Create zeroed file with 1 byte too much. This will be truncated on download
// completion and can indicate that the file is not suitable for continuation
create_zeroed_file(into_file, content_length as usize + 1).await?;
@@ -229,7 +169,7 @@ pub async fn download_feedback_multi(
let mut joiners = Vec::new();
let (tx, mut rx) = mpsc::unbounded_channel::<DlReport>();
let (rep_accum, tx) = DlReportAccumulator::new(rep.clone());
let t_start = SystemTime::now();
@@ -258,7 +198,7 @@ pub async fn download_feedback_multi(
&into_file,
rep,
Some(from_to),
Some(specific_content_length),
specific_content_length,
)
.await
}))
@@ -270,58 +210,7 @@ pub async fn download_feedback_multi(
rep.init(content_length, filename.to_string());
let rep_task = rep.clone();
let mut t_last = t_start;
let manager_handle = tokio::task::spawn(async move {
let rep = rep_task;
//let mut dl_speeds = vec![0.0_f32; conn_count as usize];
let mut progresses = vec![0; conn_count as usize];
let mut progress_last: u64 = 0;
let mut average_speed = RollingAverage::new(10);
while let Some(update) = rx.recv().await {
match update.status {
DlStatus::Init {
bytes_total: _,
filename: _,
} => {}
DlStatus::Update {
speed_mbps: _,
bytes_curr,
} => {
//dl_speeds[update.id as usize] = speed_mbps;
progresses[update.id as usize] = bytes_curr;
let progress_curr = progresses.iter().sum();
let progress_delta = progress_curr - progress_last;
let t_elapsed = t_last.elapsed().unwrap().as_secs_f64();
let speed_mbps = average_speed.value() as f32;
// currently executes always, but might change
if progress_delta >= 5_000_000 {
average_speed.add(((progress_delta as f64) / 1_000_000.0) / t_elapsed);
progress_last = progress_curr;
t_last = SystemTime::now();
}
rep.update(speed_mbps, progress_curr);
}
DlStatus::Done { duration_ms: _ } => {
//dl_speeds[update.id as usize] = 0.0;
}
// Just forwared everything else to the calling receiver
_ => rep.send(update.status),
}
}
});
let manager_handle = tokio::task::spawn(rep_accum.accumulate());
let mut joiners: FuturesUnordered<_> = joiners.into_iter().collect();
// Validate if the tasks were successful. This will always grab the next completed
@@ -377,7 +266,7 @@ pub struct HttpFileInfo {
pub filename: String,
}
pub async fn http_get_filesize_and_range_support(url: &str) -> Result<HttpFileInfo> {
pub async fn http_file_info(url: &str) -> Result<HttpFileInfo> {
let resp = reqwest::Client::new().head(url).send().await?;
let filesize = resp
@@ -402,59 +291,3 @@ pub async fn http_get_filesize_and_range_support(url: &str) -> Result<HttpFileIn
Ok(info)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rolling_average() {
let mut ra = RollingAverage::new(3);
assert_eq!(0, ra.data.len());
assert_eq!(3, ra.data.capacity());
assert_eq!(0.0, ra.value());
// 10 / 1 = 10
ra.add(10.0);
assert_eq!(1, ra.data.len());
assert_eq!(10.0, ra.value());
// (10 + 20) / 2 = 15
ra.add(20.0);
assert_eq!(2, ra.data.len());
assert_eq!(15.0, ra.value());
// (10 + 20 + 30) / 3 = 20
ra.add(30.0);
assert_eq!(3, ra.data.len());
assert_eq!(20.0, ra.value());
assert_eq!(10.0, ra.data[0]);
assert_eq!(20.0, ra.data[1]);
assert_eq!(30.0, ra.data[2]);
// This should replace the oldest value (index 1)
ra.add(40.0);
assert_eq!(3, ra.data.len());
assert_eq!(3, ra.data.capacity());
// (40 + 20 + 30) / 3 = 30
assert_eq!(30.0, ra.value());
assert_eq!(40.0, ra.data[0]);
assert_eq!(20.0, ra.data[1]);
assert_eq!(30.0, ra.data[2]);
ra.add(50.0);
ra.add(60.0);
ra.add(70.0);
assert_eq!(70.0, ra.data[0]);
assert_eq!(50.0, ra.data[1]);
assert_eq!(60.0, ra.data[2]);
}
}

26
src/integrations.rs Normal file
View File

@@ -0,0 +1,26 @@
mod zippy;
mod sendcm;
use anyhow::Result;
pub enum IntegratedService {
ZippyShare,
SendCm,
}
pub fn is_integrated_url(url: &str) -> Option<IntegratedService> {
if zippy::is_zippyshare_url(url) {
Some(IntegratedService::ZippyShare)
} else if sendcm::is_sendcm_url(url) {
Some(IntegratedService::SendCm)
} else {
None
}
}
pub async fn resolve_integrated_url(url: &str, service: IntegratedService) -> Result<String> {
match service {
IntegratedService::ZippyShare => zippy::resolve_link(url).await,
IntegratedService::SendCm => sendcm::resolve_link(url).await,
}
}

View File

@@ -0,0 +1,84 @@
use std::io::{Error, ErrorKind};
use anyhow::Result;
use regex::Regex;
pub fn is_sendcm_url(url: &str) -> bool {
Regex::new(r"^https?://send\.cm/(?:d/)?[0-9a-zA-Z]+$")
.unwrap()
.is_match(url)
}
/*
Updated: 01.04.2022
Link generation code:
- A post request is sent to the server using the form described below
- The id field is the value which is used to generate the link
- If the id is not found, the link is not generated
- The id is the same as the url suffix when NOT using a /d/ prefix url
- The reponse to the post request is a 302 redirect to the generated link
```
<form name="F1" method="POST" action="https://send.cm">
<input type="hidden" name="op" value="download2">
<input type="hidden" name="id" value="xxxxxxxxxx">
<input type="hidden" name="rand" value="">
<input type="hidden" name="referer" value="">
<input type="hidden" name="method_free" value="">
<input type="hidden" name="method_premium" value="">
......
```
*/
pub async fn resolve_link(url: &str) -> Result<String> {
let user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:77.0) Gecko/20100101 Firefox/77.0";
let accept =
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8";
// Add a few extra headers to the request in order to be less suspicious
let body = reqwest::Client::new()
.get(url)
.header("User-Agent", user_agent)
.header("Accept", accept)
.send()
.await?
.text()
.await?;
let re_link = Regex::new(r#"<input type="hidden" name="id" value="([0-9a-zA-Z]+)">"#)?;
let cap_link = match re_link.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
let id = &match cap_link.get(1) {
Some(id) => id.as_str(),
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
let resp = reqwest::ClientBuilder::new()
.redirect(reqwest::redirect::Policy::none())
.build()?
.post("https://send.cm")
.header("User-Agent", user_agent)
.header("Accept", accept)
.form(&[
("op", "download2"),
("id", id),
("rand", ""),
("referer", ""),
("method_free", ""),
("method_premium", ""),
])
.send()
.await?;
if resp.status().is_redirection() {
match resp.headers().get(reqwest::header::LOCATION) {
Some(location) => Ok(location.to_str()?.to_string()),
None => Err(Error::new(ErrorKind::Other, "Location header not found").into()),
}
} else {
Err(Error::new(ErrorKind::Other, "Link not found").into())
}
}

72
src/integrations/zippy.rs Normal file
View File

@@ -0,0 +1,72 @@
use std::io::{Error, ErrorKind};
use anyhow::Result;
use ducc::Ducc;
use regex::Regex;
pub fn is_zippyshare_url(url: &str) -> bool {
Regex::new(r#"^https?://(?:www\d*\.)?zippyshare\.com/v/[0-9a-zA-Z]+/file\.html$"#)
.unwrap()
.is_match(url)
}
pub async fn resolve_link(url: &str) -> Result<String> {
// Regex to check if the provided url is a zippyshare download url
let re = Regex::new(r#"^(https?://(?:www\d*\.)?zippyshare\.com)/v/[0-9a-zA-Z]+/file\.html$"#)?;
if !re.is_match(url) {
return Err(Error::new(ErrorKind::Other, "URL is not a zippyshare url").into());
}
// Extract the hostname (with https:// prefix) for later
let host = &re.captures(url).unwrap()[1];
// Download the html body for the download page
let body = reqwest::get(url).await?.text().await?;
let re_script =
Regex::new(r#"(?ms)<script.*?>(.*getElementById\('dlbutton'\).*?)</script>"#).unwrap();
let re_script_start = Regex::new(r#"(?ms)<script.*?>"#).unwrap();
// Extract the script. This will end at the correct script end, but has stuff before the start
let cap_tmp = match re_script.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
let temp = &cap_tmp[1];
// Find the correct script start
let pos_script_start = match re_script_start.find_iter(&temp).last() {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
// Cut off the beginning to get only the script contents
let raw_script = &temp[pos_script_start.end()..];
// Preprocess the script
let script = preprocess_js(raw_script);
// Calculate the link
let link = eval_js_link_calculation(&script)
.map_err(|_| Error::new(ErrorKind::Other, "Link not found: JS eval error"))?;
let url = format!("{}{}", host, link);
Ok(url)
}
fn preprocess_js(js_src: &str) -> String {
let mut processed_src = js_src
.replace("document.getElementById('dlbutton').href", "href")
.replace("document.getElementById('fimage')", "false")
// Fix for antiscrape 24.07.2022
.replace("document.getElementById('omg').getAttribute('class')", "2")
// Fix for antiscrape 16.08.2022
.replace("document.getElementById('dlbutton').omg", "omg");
processed_src.push_str(";href");
processed_src
}
fn eval_js_link_calculation(js_src: &str) -> ducc::Result<String> {
let ducc = Ducc::new();
ducc.exec(js_src, None, Default::default())
}

View File

@@ -1,35 +1,29 @@
use std::{
collections::VecDeque,
path::{Path, PathBuf},
process::exit,
sync::Arc,
time::SystemTime,
};
use clap::Parser;
use futures::future::join_all;
use tokio::{
fs::create_dir_all,
sync::{
mpsc::{unbounded_channel, UnboundedSender},
Mutex,
},
};
use crate::{
args::CLIArgs,
dlreport::{watch_and_print_reports, DlReport, DlReporter},
download::{download_feedback, download_feedback_multi, http_get_filesize_and_range_support},
zippy::is_zippyshare_url,
};
use std::collections::VecDeque;
use std::path::Path;
use std::process::exit;
use std::sync::Arc;
use std::time::SystemTime;
use anyhow::Result;
use clap::Parser;
use futures::future::join_all;
use tokio::fs::create_dir_all;
use tokio::sync::mpsc::{unbounded_channel, UnboundedSender};
use tokio::sync::Mutex;
use crate::args::CLIArgs;
use crate::clireporter::cli_print_reports;
use crate::dlreport::{DlReport, DlReporter};
use crate::download::{download_feedback, download_feedback_multi, http_file_info};
use crate::integrations::{is_integrated_url, resolve_integrated_url};
mod args;
mod clireporter;
mod dlreport;
mod download;
mod errors;
mod zippy;
mod integrations;
mod misc;
struct DlRequest {
id: usize,
@@ -100,12 +94,12 @@ async fn download_multiple(args: CLIArgs, raw_urls: Vec<String>) -> Result<()> {
let t_start = SystemTime::now();
let jobs = (0..args.file_count.get())
.map(|_| tokio::task::spawn(download_job(urls.clone(), tx.clone(), args.clone())))
.map(|_| tokio::task::spawn(download_job(Arc::clone(&urls), tx.clone(), args.clone())))
.collect::<Vec<_>>();
drop(tx);
watch_and_print_reports(rx, num_urls as i32).await?;
cli_print_reports(rx, num_urls as i32).await?;
join_all(jobs).await;
@@ -115,12 +109,17 @@ async fn download_multiple(args: CLIArgs, raw_urls: Vec<String>) -> Result<()> {
}
async fn download_job(urls: SyncQueue, reporter: UnboundedSender<DlReport>, cli_args: CLIArgs) {
while let Some(dlreq) = urls.lock().await.pop_front() {
// The mutex access must be in its own scope to ensure that the lock is dropped
while let Some(dlreq) = {
let mut urls = urls.lock().await;
urls.pop_front().take()
} {
let reporter = DlReporter::new(dlreq.id as u32, reporter.clone());
report_msg!(reporter, "Downloading {}", dlreq.url);
// Resolve the zippy url to the direct download url if necessary
let url = if is_zippyshare_url(&dlreq.url) {
match zippy::resolve_link(&dlreq.url).await {
let url = match is_integrated_url(&dlreq.url) {
Some(service) => match resolve_integrated_url(&dlreq.url, service).await {
Ok(url) => url,
Err(_e) => {
report_msg!(
@@ -130,12 +129,11 @@ async fn download_job(urls: SyncQueue, reporter: UnboundedSender<DlReport>, cli_
);
continue;
}
}
} else {
dlreq.url.to_string()
},
None => dlreq.url,
};
let info = match http_get_filesize_and_range_support(&url).await {
let info = match http_file_info(&url).await {
Ok(it) => it,
Err(_e) => {
report_msg!(reporter, "Error while querying metadata: {url}");
@@ -143,13 +141,7 @@ async fn download_job(urls: SyncQueue, reporter: UnboundedSender<DlReport>, cli_
}
};
let into_file: PathBuf = cli_args
.outdir
.join(Path::new(&info.filename))
.to_str()
.unwrap()
.to_string()
.into();
let into_file = cli_args.outdir.join(Path::new(&info.filename));
// If file with same name is present locally, check filesize
if into_file.exists() {
@@ -173,20 +165,20 @@ async fn download_job(urls: SyncQueue, reporter: UnboundedSender<DlReport>, cli_
}
let dl_status = if cli_args.conn_count.get() == 1 {
download_feedback(&url, &into_file, reporter.clone(), Some(info.filesize)).await
download_feedback(&url, &into_file, reporter.clone(), info.filesize).await
} else if !info.range_support {
report_msg!(
reporter,
"Server does not support range headers. Downloading with single connection: {url}"
);
download_feedback(&url, &into_file, reporter.clone(), Some(info.filesize)).await
download_feedback(&url, &into_file, reporter.clone(), info.filesize).await
} else {
download_feedback_multi(
&url,
&into_file,
reporter.clone(),
cli_args.conn_count.get(),
Some(info.filesize),
info.filesize,
)
.await
};

106
src/misc.rs Normal file
View File

@@ -0,0 +1,106 @@
pub struct RollingAverage {
index: usize,
data: Vec<f64>,
}
impl RollingAverage {
pub fn new(size: usize) -> Self {
RollingAverage {
index: 0,
data: Vec::with_capacity(size),
}
}
pub fn value(&self) -> f64 {
if self.data.is_empty() {
0.0
} else {
let mut max = self.data[0];
for v in self.data.iter() {
if *v > max {
max = *v;
}
}
let mut sum: f64 = self.data.iter().sum();
let mut count = self.data.len();
if self.data.len() >= 3 {
sum -= max;
count -= 1;
}
sum / count as f64
}
}
pub fn add(&mut self, val: f64) {
if self.data.capacity() == self.data.len() {
self.data[self.index] = val;
self.index += 1;
if self.index >= self.data.capacity() {
self.index = 0;
}
} else {
self.data.push(val);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn rolling_average() {
let mut ra = RollingAverage::new(3);
assert_eq!(0, ra.data.len());
assert_eq!(3, ra.data.capacity());
assert_eq!(0.0, ra.value());
// 10 / 1 = 10
ra.add(10.0);
assert_eq!(1, ra.data.len());
assert_eq!(10.0, ra.value());
// (10 + 20) / 2 = 15
ra.add(20.0);
assert_eq!(2, ra.data.len());
assert_eq!(15.0, ra.value());
// (10 + 20 + 30) / 3 = 20
ra.add(30.0);
assert_eq!(3, ra.data.len());
assert_eq!(20.0, ra.value());
assert_eq!(10.0, ra.data[0]);
assert_eq!(20.0, ra.data[1]);
assert_eq!(30.0, ra.data[2]);
// This should replace the oldest value (index 1)
ra.add(40.0);
assert_eq!(3, ra.data.len());
assert_eq!(3, ra.data.capacity());
// (40 + 20 + 30) / 3 = 30
assert_eq!(30.0, ra.value());
assert_eq!(40.0, ra.data[0]);
assert_eq!(20.0, ra.data[1]);
assert_eq!(30.0, ra.data[2]);
ra.add(50.0);
ra.add(60.0);
ra.add(70.0);
assert_eq!(70.0, ra.data[0]);
assert_eq!(50.0, ra.data[1]);
assert_eq!(60.0, ra.data[2]);
}
}

View File

@@ -1,60 +0,0 @@
use anyhow::Result;
use regex::Regex;
use std::io::{Error, ErrorKind};
pub fn is_zippyshare_url(url: &str) -> bool {
Regex::new(r"^https?://(?:www\d*\.)?zippyshare\.com/v/[0-9a-zA-Z]+/file\.html$")
.unwrap()
.is_match(url)
}
/*
Updated: 07.03.2022
Link generation code:
- `href = $1 + ($2 % $3 + $4 % $5) + $6`
- `$1` is always `/d/XXX` where XXX is dependent on the file
- `$2`, `$3`, `$4` and `$5` are dynamic and randomly generated on each reload
- `$2` is always the same as `$4`
- `$6` is dependent on the file
- The numbers in the calculation part ($2`, `$3`, `$4` and `$5`) are hard coded
```
document.getElementById('dlbutton').href = "/d/0Ky7p1C6/" + (186549 % 51245 + 186549 % 913) + "/some-file-name.part1.rar";
```
*/
pub async fn resolve_link(url: &str) -> Result<String> {
// Regex to check if the provided url is a zippyshare download url
let re = Regex::new(r"(https://www\d*\.zippyshare\.com)")?;
if !re.is_match(url) {
return Err(Error::new(ErrorKind::Other, "URL is not a zippyshare url").into());
}
// Extract the hostname (with https:// prefix) for later
let base_host = &re.captures(url).unwrap()[0];
// Download the html body for the download page
let body = reqwest::get(url).await?.text().await?;
// Regex to match the javascript part of the html that generates the real download link
let re_link = Regex::new(
r#"document\.getElementById\('dlbutton'\)\.href = "(/d/.+/)" \+ \((\d+) % (\d+) \+ \d+ % (\d+)\) \+ "(.+)";"#,
)?;
let cap_link = match re_link.captures(&body) {
Some(cap) => cap,
None => return Err(Error::new(ErrorKind::Other, "Link not found").into()),
};
let url_start = &cap_link[1];
let url_end = &cap_link[5];
let n2: i32 = cap_link[2].parse()?;
let n3: i32 = cap_link[3].parse()?;
let n4 = n2;
let n5: i32 = cap_link[4].parse()?;
let mixed = n2 % n3 + n4 % n5;
let dl_url = format!("{}{}{}{}", &base_host, url_start, mixed, url_end);
Ok(dl_url)
}