prefetch-npm-deps: deduplicate dependencies when parsing lockfile

A `package-lock.json` file can contain multiple instances of the same dependency, which caused unnecessary downloads and duplicate index entries in the generated cache.
This commit is contained in:
Winter 2022-11-15 19:52:50 -05:00
parent b117b359db
commit 091d039b12
3 changed files with 52 additions and 41 deletions

View file

@ -46,7 +46,7 @@
hash = "sha256-uQmc+S+V1co1Rfc4d82PpeXjmd1UqdsG492ADQFcZGA=";
};
hash = "sha256-fk7L9vn8EHJsGJNMAjYZg9h0PT6dAwiahdiEeXVrMB8=";
hash = "sha256-wca1QvxUw3OrLStfYN9Co6oVBR1LbfcNUKlDqvObps4=";
};
lockfileV2 = makeTest {
@ -57,7 +57,7 @@
hash = "sha256-qS29tq5QPnGxV+PU40VgMAtdwVLtLyyhG2z9GMeYtC4=";
};
hash = "sha256-s8SpZY/1tKZVd3vt7sA9vsqHvEaNORQBMrSyhWpj048=";
hash = "sha256-tuEfyePwlOy2/mOPdXbqJskO6IowvAP4DWg8xSZwbJw=";
};
hashPrecedence = makeTest {
@ -68,7 +68,7 @@
hash = "sha256-1+0AQw9EmbHiMPA/H8OP8XenhrkhLRYBRhmd1cNPFjk=";
};
hash = "sha256-KRxwrEij3bpZ5hbQhX67KYpnY2cRS7u2EVZIWO1FBPM=";
hash = "sha256-oItUls7AXcCECuyA+crQO6B0kv4toIr8pBubNwB7kAM=";
};
hostedGitDeps = makeTest {
@ -79,7 +79,7 @@
hash = "sha256-X9mCwPqV5yP0S2GonNvpYnLSLJMd/SUIked+hMRxDpA=";
};
hash = "sha256-oIM05TGHstX1D4k2K4TJ+SHB7H/tNKzxzssqf0GJwvY=";
hash = "sha256-ri8qvYjn420ykmCC2Uy5P3jxVVrKWJG3ug/qLIGcR7o=";
};
linkDependencies = makeTest {
@ -90,7 +90,7 @@
hash = "sha256-6ZTBMyuyPP/63gpQugggHhKVup6OB4hZ2rmSvPJ0yEs=";
};
hash = "sha256-uQx8F5OXKm+fqx6hP6obVYTlQIYcJwtO52j6VQNo7Sk=";
hash = "sha256-VzQhArHoznYSXUT7l9HkJV4yoSOmoP8eYTLel1QwmB4=";
};
# This package contains both hosted Git shorthand, and a bundled dependency that happens to override an existing one.
@ -102,7 +102,7 @@
hash = "sha256-1fGNxYJi1I4cXK/jinNG+Y6tPEOhP3QAqWOBEQttS9E=";
};
hash = "sha256-TzUFykASDjXlfmgq2bUIJjUkfLlGLUkRTVFPIYPCmyc=";
hash = "sha256-73rLcSBgsZRJFELaKK++62hVbt1QT8JgLu2hyDSmIZE=";
};
};

View file

@ -109,7 +109,7 @@ impl Cache {
let mut file = File::options().append(true).create(true).open(index_path)?;
write!(file, "\n{:x}\t{data}", Sha1::new().chain(&data).finalize())?;
write!(file, "{:x}\t{data}", Sha1::new().chain(&data).finalize())?;
Ok(())
}

View file

@ -5,7 +5,7 @@ use anyhow::{anyhow, Context};
use rayon::prelude::*;
use serde::Deserialize;
use std::{
collections::HashMap,
collections::{HashMap, HashSet},
env, fmt, fs,
path::Path,
process::{self, Command},
@ -292,47 +292,58 @@ fn main() -> anyhow::Result<()> {
return Ok(());
}
let packages = {
let mut seen = HashSet::new();
let mut new_packages = HashMap::new();
for (dep, package) in packages.unwrap().drain() {
if let (false, Some(UrlOrString::Url(resolved))) = (dep.is_empty(), &package.resolved) {
if !seen.contains(resolved) {
seen.insert(resolved.clone());
new_packages.insert(dep, package);
}
}
}
new_packages
};
let cache = Cache::new(out.join("_cacache"));
packages
.unwrap()
.into_par_iter()
.filter(|(dep, _)| !dep.is_empty())
.filter(|(_, package)| matches!(package.resolved, Some(UrlOrString::Url(_))))
.try_for_each(|(dep, package)| {
eprintln!("{dep}");
packages.into_par_iter().try_for_each(|(dep, package)| {
eprintln!("{dep}");
let mut resolved = match package.resolved {
Some(UrlOrString::Url(url)) => url,
_ => unreachable!(),
};
let mut resolved = match package.resolved {
Some(UrlOrString::Url(url)) => url,
_ => unreachable!(),
};
if let Some(hosted_git_url) = get_hosted_git_url(&resolved) {
resolved = hosted_git_url;
}
if let Some(hosted_git_url) = get_hosted_git_url(&resolved) {
resolved = hosted_git_url;
}
let mut data = Vec::new();
let mut data = Vec::new();
agent
.get(resolved.as_str())
.call()?
.into_reader()
.read_to_end(&mut data)?;
agent
.get(resolved.as_str())
.call()?
.into_reader()
.read_to_end(&mut data)?;
cache
.put(
format!("make-fetch-happen:request-cache:{resolved}"),
resolved,
&data,
package
.integrity
.map(|i| Ok::<String, anyhow::Error>(get_ideal_hash(&i)?.to_string()))
.transpose()?,
)
.map_err(|e| anyhow!("couldn't insert cache entry for {dep}: {e:?}"))?;
cache
.put(
format!("make-fetch-happen:request-cache:{resolved}"),
resolved,
&data,
package
.integrity
.map(|i| Ok::<String, anyhow::Error>(get_ideal_hash(&i)?.to_string()))
.transpose()?,
)
.map_err(|e| anyhow!("couldn't insert cache entry for {dep}: {e:?}"))?;
Ok::<_, anyhow::Error>(())
})?;
Ok::<_, anyhow::Error>(())
})?;
fs::write(out.join("package-lock.json"), lock_content)?;