prefetch-npm-deps: look up hashes from cache when fixing up lockfiles

This commit is contained in:
Winter 2023-04-30 10:29:46 -04:00 committed by Lily Foster
parent 7efebca89c
commit ac35d7ea86
No known key found for this signature in database
GPG key ID: 49340081E484C893
7 changed files with 329 additions and 62 deletions

View file

@ -56,6 +56,9 @@ npmConfigHook() {
exit 1 exit 1
fi fi
export CACHE_MAP_PATH="$TMP/MEOW"
@prefetchNpmDeps@ --map-cache
@prefetchNpmDeps@ --fixup-lockfile "$srcLockfile" @prefetchNpmDeps@ --fixup-lockfile "$srcLockfile"
local cachePath local cachePath
@ -109,6 +112,9 @@ npmConfigHook() {
patchShebangs node_modules patchShebangs node_modules
rm "$CACHE_MAP_PATH"
unset CACHE_MAP_PATH
echo "Finished npmConfigHook" echo "Finished npmConfigHook"
} }

View file

@ -305,6 +305,7 @@ dependencies = [
"tempfile", "tempfile",
"ureq", "ureq",
"url", "url",
"walkdir",
] ]
[[package]] [[package]]
@ -400,6 +401,15 @@ version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]] [[package]]
name = "scopeguard" name = "scopeguard"
version = "1.1.0" version = "1.1.0"
@ -583,6 +593,17 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]] [[package]]
name = "wasm-bindgen" name = "wasm-bindgen"
version = "0.2.82" version = "0.2.82"
@ -682,6 +703,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]] [[package]]
name = "winapi-x86_64-pc-windows-gnu" name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0" version = "0.4.0"

View file

@ -17,3 +17,4 @@ sha2 = "0.10.6"
tempfile = "3.3.0" tempfile = "3.3.0"
ureq = { version = "2.5.0" } ureq = { version = "2.5.0" }
url = { version = "2.3.1", features = ["serde"] } url = { version = "2.3.1", features = ["serde"] }
walkdir = "2.3.2"

View file

@ -1,5 +1,5 @@
use digest::{Digest, Update}; use digest::{Digest, Update};
use serde::Serialize; use serde::{Deserialize, Serialize};
use sha1::Sha1; use sha1::Sha1;
use sha2::{Sha256, Sha512}; use sha2::{Sha256, Sha512};
use std::{ use std::{
@ -9,24 +9,24 @@ use std::{
}; };
use url::Url; use url::Url;
#[derive(Serialize)] #[derive(Serialize, Deserialize)]
struct Key { pub(super) struct Key {
key: String, pub(super) key: String,
integrity: String, pub(super) integrity: String,
time: u8, pub(super) time: u8,
size: usize, pub(super) size: usize,
metadata: Metadata, pub(super) metadata: Metadata,
} }
#[derive(Serialize)] #[derive(Serialize, Deserialize)]
struct Metadata { pub(super) struct Metadata {
url: Url, pub(super) url: Url,
options: Options, pub(super) options: Options,
} }
#[derive(Serialize)] #[derive(Serialize, Deserialize)]
struct Options { pub(super) struct Options {
compress: bool, pub(super) compress: bool,
} }
pub struct Cache(PathBuf); pub struct Cache(PathBuf);

View file

@ -1,67 +1,176 @@
#![warn(clippy::pedantic)] #![warn(clippy::pedantic)]
use crate::cacache::Cache; use crate::cacache::{Cache, Key};
use anyhow::anyhow; use anyhow::{anyhow, bail};
use rayon::prelude::*; use rayon::prelude::*;
use serde_json::{Map, Value}; use serde_json::{Map, Value};
use std::{ use std::{
collections::HashMap,
env, fs, env, fs,
path::Path, path::{Path, PathBuf},
process::{self, Command}, process::{self, Command},
}; };
use tempfile::tempdir; use tempfile::tempdir;
use url::Url;
use walkdir::WalkDir;
mod cacache; mod cacache;
mod parse; mod parse;
/// `fixup_lockfile` removes the `integrity` field from Git dependencies. fn cache_map_path() -> Option<PathBuf> {
env::var_os("CACHE_MAP_PATH").map(PathBuf::from)
}
/// `fixup_lockfile` rewrites `integrity` hashes to match cache and removes the `integrity` field from Git dependencies.
///
/// Sometimes npm has multiple instances of a given `resolved` URL that have different types of `integrity` hashes (e.g. SHA-1
/// and SHA-512) in the lockfile. Given we only cache one version of these, the `integrity` field must be normalized to the hash
/// we cache as (which is the strongest available one).
/// ///
/// Git dependencies from specific providers can be retrieved from those providers' automatic tarball features. /// Git dependencies from specific providers can be retrieved from those providers' automatic tarball features.
/// When these dependencies are specified with a commit identifier, npm generates a tarball, and inserts the integrity hash of that /// When these dependencies are specified with a commit identifier, npm generates a tarball, and inserts the integrity hash of that
/// tarball into the lockfile. /// tarball into the lockfile.
/// ///
/// Thus, we remove this hash, to replace it with our own determinstic copies of dependencies from hosted Git providers. /// Thus, we remove this hash, to replace it with our own determinstic copies of dependencies from hosted Git providers.
fn fixup_lockfile(mut lock: Map<String, Value>) -> anyhow::Result<Option<Map<String, Value>>> { ///
if lock /// If no fixups were performed, `None` is returned and the lockfile structure should be left as-is. If fixups were performed, the
/// `dependencies` key in v2 lockfiles designed for backwards compatibility with v1 parsers is removed because of inconsistent data.
fn fixup_lockfile(
mut lock: Map<String, Value>,
cache: &Option<HashMap<String, String>>,
) -> anyhow::Result<Option<Map<String, Value>>> {
let mut fixed = false;
match lock
.get("lockfileVersion") .get("lockfileVersion")
.ok_or_else(|| anyhow!("couldn't get lockfile version"))? .ok_or_else(|| anyhow!("couldn't get lockfile version"))?
.as_i64() .as_i64()
.ok_or_else(|| anyhow!("lockfile version isn't an int"))? .ok_or_else(|| anyhow!("lockfile version isn't an int"))?
< 2
{ {
return Ok(None); 1 => fixup_v1_deps(
} lock.get_mut("dependencies")
.unwrap()
.as_object_mut()
.unwrap(),
cache,
&mut fixed,
),
2 | 3 => {
for package in lock
.get_mut("packages")
.ok_or_else(|| anyhow!("couldn't get packages"))?
.as_object_mut()
.ok_or_else(|| anyhow!("packages isn't a map"))?
.values_mut()
{
if let Some(Value::String(resolved)) = package.get("resolved") {
if let Some(Value::String(integrity)) = package.get("integrity") {
if resolved.starts_with("git+ssh://") {
fixed = true;
let mut fixed = false; package
.as_object_mut()
.ok_or_else(|| anyhow!("package isn't a map"))?
.remove("integrity");
} else if let Some(cache_hashes) = cache {
let cache_hash = cache_hashes
.get(resolved)
.expect("dependency should have a hash");
for package in lock if integrity != cache_hash {
.get_mut("packages") fixed = true;
.ok_or_else(|| anyhow!("couldn't get packages"))?
.as_object_mut()
.ok_or_else(|| anyhow!("packages isn't a map"))?
.values_mut()
{
if let Some(Value::String(resolved)) = package.get("resolved") {
if resolved.starts_with("git+ssh://") && package.get("integrity").is_some() {
fixed = true;
package *package
.as_object_mut() .as_object_mut()
.ok_or_else(|| anyhow!("package isn't a map"))? .ok_or_else(|| anyhow!("package isn't a map"))?
.remove("integrity"); .get_mut("integrity")
.unwrap() = Value::String(cache_hash.clone());
}
}
}
}
}
if fixed {
lock.remove("dependencies");
} }
} }
v => bail!("unsupported lockfile version {v}"),
} }
if fixed { if fixed {
lock.remove("dependencies");
Ok(Some(lock)) Ok(Some(lock))
} else { } else {
Ok(None) Ok(None)
} }
} }
// Recursive helper to fixup v1 lockfile deps
fn fixup_v1_deps(
dependencies: &mut serde_json::Map<String, Value>,
cache: &Option<HashMap<String, String>>,
fixed: &mut bool,
) {
for dep in dependencies.values_mut() {
if let Some(Value::String(resolved)) = dep
.as_object()
.expect("v1 dep must be object")
.get("resolved")
{
if let Some(Value::String(integrity)) = dep
.as_object()
.expect("v1 dep must be object")
.get("integrity")
{
if resolved.starts_with("git+ssh://") {
*fixed = true;
dep.as_object_mut()
.expect("v1 dep must be object")
.remove("integrity");
} else if let Some(cache_hashes) = cache {
let cache_hash = cache_hashes
.get(resolved)
.expect("dependency should have a hash");
if integrity != cache_hash {
*fixed = true;
*dep.as_object_mut()
.expect("v1 dep must be object")
.get_mut("integrity")
.unwrap() = Value::String(cache_hash.clone());
}
}
}
}
if let Some(Value::Object(more_deps)) = dep.as_object_mut().unwrap().get_mut("dependencies")
{
fixup_v1_deps(more_deps, cache, fixed);
}
}
}
fn map_cache() -> anyhow::Result<HashMap<Url, String>> {
let mut hashes = HashMap::new();
let content_path = Path::new(&env::var_os("npmDeps").unwrap()).join("_cacache/index-v5");
for entry in WalkDir::new(content_path) {
let entry = entry?;
if entry.file_type().is_file() {
let content = fs::read_to_string(entry.path())?;
let key: Key = serde_json::from_str(content.split_ascii_whitespace().nth(1).unwrap())?;
hashes.insert(key.metadata.url, key.integrity);
}
}
Ok(hashes)
}
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
let args = env::args().collect::<Vec<_>>(); let args = env::args().collect::<Vec<_>>();
@ -76,12 +185,25 @@ fn main() -> anyhow::Result<()> {
if args[1] == "--fixup-lockfile" { if args[1] == "--fixup-lockfile" {
let lock = serde_json::from_str(&fs::read_to_string(&args[2])?)?; let lock = serde_json::from_str(&fs::read_to_string(&args[2])?)?;
if let Some(fixed) = fixup_lockfile(lock)? { let cache = cache_map_path()
.map(|map_path| Ok::<_, anyhow::Error>(serde_json::from_slice(&fs::read(map_path)?)?))
.transpose()?;
if let Some(fixed) = fixup_lockfile(lock, &cache)? {
println!("Fixing lockfile"); println!("Fixing lockfile");
fs::write(&args[2], serde_json::to_string(&fixed)?)?; fs::write(&args[2], serde_json::to_string(&fixed)?)?;
} }
return Ok(());
} else if args[1] == "--map-cache" {
let map = map_cache()?;
fs::write(
cache_map_path().expect("CACHE_MAP_PATH environment variable must be set"),
serde_json::to_string(&map)?,
)?;
return Ok(()); return Ok(());
} }
@ -133,6 +255,8 @@ fn main() -> anyhow::Result<()> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::collections::HashMap;
use super::fixup_lockfile; use super::fixup_lockfile;
use serde_json::json; use serde_json::json;
@ -147,12 +271,20 @@ mod tests {
}, },
"foo": { "foo": {
"resolved": "https://github.com/NixOS/nixpkgs", "resolved": "https://github.com/NixOS/nixpkgs",
"integrity": "aaa" "integrity": "sha1-aaa"
}, },
"bar": { "bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
"integrity": "bbb" "integrity": "sha512-aaa"
} },
"foo-bad": {
"resolved": "foo",
"integrity": "sha1-foo"
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
} }
}); });
@ -165,22 +297,112 @@ mod tests {
}, },
"foo": { "foo": {
"resolved": "https://github.com/NixOS/nixpkgs", "resolved": "https://github.com/NixOS/nixpkgs",
"integrity": "aaa" "integrity": ""
}, },
"bar": { "bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
} },
"foo-bad": {
"resolved": "foo",
"integrity": "sha512-foo"
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
} }
}); });
let mut hashes = HashMap::new();
hashes.insert(
String::from("https://github.com/NixOS/nixpkgs"),
String::new(),
);
hashes.insert(
String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"),
String::new(),
);
hashes.insert(String::from("foo"), String::from("sha512-foo"));
assert_eq!( assert_eq!(
fixup_lockfile(input.as_object().unwrap().clone())?, fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?,
Some(expected.as_object().unwrap().clone()) Some(expected.as_object().unwrap().clone())
); );
Ok(())
}
#[test]
fn lockfile_v1_fixup() -> anyhow::Result<()> {
let input = json!({
"lockfileVersion": 1,
"name": "foo",
"dependencies": {
"foo": {
"resolved": "https://github.com/NixOS/nixpkgs",
"integrity": "sha512-aaa"
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
"bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
"integrity": "sha512-bbb",
"dependencies": {
"foo-bad": {
"resolved": "foo",
"integrity": "sha1-foo"
},
},
},
}
});
let expected = json!({
"lockfileVersion": 1,
"name": "foo",
"dependencies": {
"foo": {
"resolved": "https://github.com/NixOS/nixpkgs",
"integrity": ""
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
"bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
"dependencies": {
"foo-bad": {
"resolved": "foo",
"integrity": "sha512-foo"
},
},
},
}
});
let mut hashes = HashMap::new();
hashes.insert(
String::from("https://github.com/NixOS/nixpkgs"),
String::new(),
);
hashes.insert(
String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"),
String::new(),
);
hashes.insert(String::from("foo"), String::from("sha512-foo"));
assert_eq!( assert_eq!(
fixup_lockfile(json!({"lockfileVersion": 1}).as_object().unwrap().clone())?, fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?,
None Some(expected.as_object().unwrap().clone())
); );
Ok(()) Ok(())

View file

@ -97,10 +97,20 @@ impl fmt::Display for UrlOrString {
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub(super) struct HashCollection(HashSet<Hash>); pub struct HashCollection(HashSet<Hash>);
impl HashCollection { impl HashCollection {
pub(super) fn into_best(self) -> Option<Hash> { pub fn from_str(s: impl AsRef<str>) -> anyhow::Result<HashCollection> {
let hashes = s
.as_ref()
.split_ascii_whitespace()
.map(Hash::new)
.collect::<anyhow::Result<_>>()?;
Ok(HashCollection(hashes))
}
pub fn into_best(self) -> Option<Hash> {
self.0.into_iter().max() self.0.into_iter().max()
} }
} }
@ -136,17 +146,11 @@ impl<'de> Visitor<'de> for HashCollectionVisitor {
where where
E: de::Error, E: de::Error,
{ {
let hashes = value HashCollection::from_str(value).map_err(E::custom)
.split_ascii_whitespace()
.map(Hash::new)
.collect::<anyhow::Result<_>>()
.map_err(E::custom)?;
Ok(HashCollection(hashes))
} }
} }
#[derive(Debug, Deserialize, PartialEq, Eq, Hash)] #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
pub struct Hash(String); pub struct Hash(String);
// Hash algorithms, in ascending preference. // Hash algorithms, in ascending preference.
@ -166,11 +170,15 @@ impl Hash {
Err(anyhow!("unknown hash algorithm {algo:?}")) Err(anyhow!("unknown hash algorithm {algo:?}"))
} }
} }
pub fn as_str(&self) -> &str {
&self.0
}
} }
impl fmt::Display for Hash { impl fmt::Display for Hash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f) self.as_str().fmt(f)
} }
} }

View file

@ -9,7 +9,7 @@ use std::{
use tempfile::{tempdir, TempDir}; use tempfile::{tempdir, TempDir};
use url::Url; use url::Url;
mod lock; pub mod lock;
pub fn lockfile(content: &str, force_git_deps: bool) -> anyhow::Result<Vec<Package>> { pub fn lockfile(content: &str, force_git_deps: bool) -> anyhow::Result<Vec<Package>> {
let mut packages = lock::packages(content) let mut packages = lock::packages(content)