Collect data chunks before writing to disk

- Buffer the downloaded data into 4M buffers and only write to disk
  after the buffer is full.
- This reduces the number of small writes and greatly improves
  performance on HDDs with mutliple concurrent downloads.
- Fixes #3
This commit is contained in:
Daniel M 2021-03-26 22:36:43 +01:00
parent d12c174a8b
commit 92f6c2699c

View File

@ -57,36 +57,6 @@ pub fn url_to_filename(url: &str) -> String {
file_name.to_string()
}
#[allow(unused)]
pub async fn download(url: &str, into_file: &str) -> ResBE<()> {
let into_file = Path::new(into_file);
let mut resp = reqwest::Client::new()
.get(url)
.send().await?;
let mut ofile = tokio::fs::OpenOptions::new()
// Open in write mode
.write(true)
// Delete and overwrite the file
.truncate(true)
// Create the file if not existant
.create(true)
.open(into_file).await?;
// Read data from server as long as new data is available
while let Some(chunk) = resp.chunk().await? {
// Write the received data into the file
ofile.write_all(&chunk).await?;
}
// Ensure that IO is completed
ofile.flush().await?;
Ok(())
}
pub async fn download_feedback(url: &str, into_file: &str, rep: DlReporter) -> ResBE<()> {
download_feedback_chunks(url, into_file, rep, None, false).await
@ -161,14 +131,27 @@ pub async fn download_feedback_chunks(url: &str, into_file: &str, rep: DlReporte
let mut average_speed = RollingAverage::new(10);
let mut buff: Vec<u8> = Vec::new();
// Read data from server as long as new data is available
while let Some(chunk) = resp.chunk().await? {
// Write the received data into the file
ofile.write_all(&chunk).await?;
let datalen = chunk.len() as u64;
buff.extend(chunk);
// Buffer in memory first and only write to disk if the threshold is reached.
// This reduces the number of small disk writes and thereby reduces the
// io bottleneck that occurs on HDDs with many small writes in different
// files and offsets at the same time
if buff.len() >= 4_000_000 {
// Write the received data into the file
ofile.write_all(&buff).await?;
buff.clear();
}
// Update progress
curr_progress += datalen;
@ -200,6 +183,10 @@ pub async fn download_feedback_chunks(url: &str, into_file: &str, rep: DlReporte
)?;
}
if buff.len() > 0 {
ofile.write_all(&buff).await?;
}
// Ensure that IO is completed
//ofile.flush().await?;