From ac35d7ea860d691d68f6e856202bad890478fe0b Mon Sep 17 00:00:00 2001 From: Winter Date: Sun, 30 Apr 2023 10:29:46 -0400 Subject: [PATCH] prefetch-npm-deps: look up hashes from cache when fixing up lockfiles --- .../hooks/npm-config-hook.sh | 6 + .../node/fetch-npm-deps/Cargo.lock | 30 ++ .../node/fetch-npm-deps/Cargo.toml | 1 + .../node/fetch-npm-deps/src/cacache.rs | 30 +- .../node/fetch-npm-deps/src/main.rs | 292 +++++++++++++++--- .../node/fetch-npm-deps/src/parse/lock.rs | 30 +- .../node/fetch-npm-deps/src/parse/mod.rs | 2 +- 7 files changed, 329 insertions(+), 62 deletions(-) diff --git a/pkgs/build-support/node/build-npm-package/hooks/npm-config-hook.sh b/pkgs/build-support/node/build-npm-package/hooks/npm-config-hook.sh index a3111862687..c1acdc3ac3f 100644 --- a/pkgs/build-support/node/build-npm-package/hooks/npm-config-hook.sh +++ b/pkgs/build-support/node/build-npm-package/hooks/npm-config-hook.sh @@ -56,6 +56,9 @@ npmConfigHook() { exit 1 fi + export CACHE_MAP_PATH="$TMP/MEOW" + @prefetchNpmDeps@ --map-cache + @prefetchNpmDeps@ --fixup-lockfile "$srcLockfile" local cachePath @@ -109,6 +112,9 @@ npmConfigHook() { patchShebangs node_modules + rm "$CACHE_MAP_PATH" + unset CACHE_MAP_PATH + echo "Finished npmConfigHook" } diff --git a/pkgs/build-support/node/fetch-npm-deps/Cargo.lock b/pkgs/build-support/node/fetch-npm-deps/Cargo.lock index ba832d115e6..ff636e7e596 100644 --- a/pkgs/build-support/node/fetch-npm-deps/Cargo.lock +++ b/pkgs/build-support/node/fetch-npm-deps/Cargo.lock @@ -305,6 +305,7 @@ dependencies = [ "tempfile", "ureq", "url", + "walkdir", ] [[package]] @@ -400,6 +401,15 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.1.0" @@ -583,6 +593,17 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + [[package]] name = "wasm-bindgen" version = "0.2.82" @@ -682,6 +703,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/pkgs/build-support/node/fetch-npm-deps/Cargo.toml b/pkgs/build-support/node/fetch-npm-deps/Cargo.toml index bebdaad2952..ebb631c0955 100644 --- a/pkgs/build-support/node/fetch-npm-deps/Cargo.toml +++ b/pkgs/build-support/node/fetch-npm-deps/Cargo.toml @@ -17,3 +17,4 @@ sha2 = "0.10.6" tempfile = "3.3.0" ureq = { version = "2.5.0" } url = { version = "2.3.1", features = ["serde"] } +walkdir = "2.3.2" diff --git a/pkgs/build-support/node/fetch-npm-deps/src/cacache.rs b/pkgs/build-support/node/fetch-npm-deps/src/cacache.rs index 5326c3e858b..3c49ef18702 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/cacache.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/cacache.rs @@ -1,5 +1,5 @@ use digest::{Digest, Update}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use sha1::Sha1; use sha2::{Sha256, Sha512}; use std::{ @@ -9,24 +9,24 @@ use std::{ }; use url::Url; -#[derive(Serialize)] -struct Key { - key: String, - integrity: String, - time: u8, - size: usize, - metadata: Metadata, +#[derive(Serialize, Deserialize)] +pub(super) struct Key { + pub(super) key: String, + pub(super) integrity: String, + pub(super) time: u8, + pub(super) size: usize, + pub(super) metadata: Metadata, } -#[derive(Serialize)] -struct Metadata { - url: Url, - options: Options, +#[derive(Serialize, Deserialize)] +pub(super) struct Metadata { + pub(super) url: Url, + pub(super) options: Options, } -#[derive(Serialize)] -struct Options { - compress: bool, +#[derive(Serialize, Deserialize)] +pub(super) struct Options { + pub(super) compress: bool, } pub struct Cache(PathBuf); diff --git a/pkgs/build-support/node/fetch-npm-deps/src/main.rs b/pkgs/build-support/node/fetch-npm-deps/src/main.rs index 2ce03cfd2aa..e6e44fc9d92 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/main.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/main.rs @@ -1,67 +1,176 @@ #![warn(clippy::pedantic)] -use crate::cacache::Cache; -use anyhow::anyhow; +use crate::cacache::{Cache, Key}; +use anyhow::{anyhow, bail}; use rayon::prelude::*; use serde_json::{Map, Value}; use std::{ + collections::HashMap, env, fs, - path::Path, + path::{Path, PathBuf}, process::{self, Command}, }; use tempfile::tempdir; +use url::Url; +use walkdir::WalkDir; mod cacache; mod parse; -/// `fixup_lockfile` removes the `integrity` field from Git dependencies. +fn cache_map_path() -> Option { + env::var_os("CACHE_MAP_PATH").map(PathBuf::from) +} + +/// `fixup_lockfile` rewrites `integrity` hashes to match cache and removes the `integrity` field from Git dependencies. +/// +/// Sometimes npm has multiple instances of a given `resolved` URL that have different types of `integrity` hashes (e.g. SHA-1 +/// and SHA-512) in the lockfile. Given we only cache one version of these, the `integrity` field must be normalized to the hash +/// we cache as (which is the strongest available one). /// /// Git dependencies from specific providers can be retrieved from those providers' automatic tarball features. /// When these dependencies are specified with a commit identifier, npm generates a tarball, and inserts the integrity hash of that /// tarball into the lockfile. /// /// Thus, we remove this hash, to replace it with our own determinstic copies of dependencies from hosted Git providers. -fn fixup_lockfile(mut lock: Map) -> anyhow::Result>> { - if lock +/// +/// If no fixups were performed, `None` is returned and the lockfile structure should be left as-is. If fixups were performed, the +/// `dependencies` key in v2 lockfiles designed for backwards compatibility with v1 parsers is removed because of inconsistent data. +fn fixup_lockfile( + mut lock: Map, + cache: &Option>, +) -> anyhow::Result>> { + let mut fixed = false; + + match lock .get("lockfileVersion") .ok_or_else(|| anyhow!("couldn't get lockfile version"))? .as_i64() .ok_or_else(|| anyhow!("lockfile version isn't an int"))? - < 2 { - return Ok(None); - } + 1 => fixup_v1_deps( + lock.get_mut("dependencies") + .unwrap() + .as_object_mut() + .unwrap(), + cache, + &mut fixed, + ), + 2 | 3 => { + for package in lock + .get_mut("packages") + .ok_or_else(|| anyhow!("couldn't get packages"))? + .as_object_mut() + .ok_or_else(|| anyhow!("packages isn't a map"))? + .values_mut() + { + if let Some(Value::String(resolved)) = package.get("resolved") { + if let Some(Value::String(integrity)) = package.get("integrity") { + if resolved.starts_with("git+ssh://") { + fixed = true; - let mut fixed = false; + package + .as_object_mut() + .ok_or_else(|| anyhow!("package isn't a map"))? + .remove("integrity"); + } else if let Some(cache_hashes) = cache { + let cache_hash = cache_hashes + .get(resolved) + .expect("dependency should have a hash"); - for package in lock - .get_mut("packages") - .ok_or_else(|| anyhow!("couldn't get packages"))? - .as_object_mut() - .ok_or_else(|| anyhow!("packages isn't a map"))? - .values_mut() - { - if let Some(Value::String(resolved)) = package.get("resolved") { - if resolved.starts_with("git+ssh://") && package.get("integrity").is_some() { - fixed = true; + if integrity != cache_hash { + fixed = true; - package - .as_object_mut() - .ok_or_else(|| anyhow!("package isn't a map"))? - .remove("integrity"); + *package + .as_object_mut() + .ok_or_else(|| anyhow!("package isn't a map"))? + .get_mut("integrity") + .unwrap() = Value::String(cache_hash.clone()); + } + } + } + } + } + + if fixed { + lock.remove("dependencies"); } } + v => bail!("unsupported lockfile version {v}"), } if fixed { - lock.remove("dependencies"); - Ok(Some(lock)) } else { Ok(None) } } +// Recursive helper to fixup v1 lockfile deps +fn fixup_v1_deps( + dependencies: &mut serde_json::Map, + cache: &Option>, + fixed: &mut bool, +) { + for dep in dependencies.values_mut() { + if let Some(Value::String(resolved)) = dep + .as_object() + .expect("v1 dep must be object") + .get("resolved") + { + if let Some(Value::String(integrity)) = dep + .as_object() + .expect("v1 dep must be object") + .get("integrity") + { + if resolved.starts_with("git+ssh://") { + *fixed = true; + + dep.as_object_mut() + .expect("v1 dep must be object") + .remove("integrity"); + } else if let Some(cache_hashes) = cache { + let cache_hash = cache_hashes + .get(resolved) + .expect("dependency should have a hash"); + + if integrity != cache_hash { + *fixed = true; + + *dep.as_object_mut() + .expect("v1 dep must be object") + .get_mut("integrity") + .unwrap() = Value::String(cache_hash.clone()); + } + } + } + } + + if let Some(Value::Object(more_deps)) = dep.as_object_mut().unwrap().get_mut("dependencies") + { + fixup_v1_deps(more_deps, cache, fixed); + } + } +} + +fn map_cache() -> anyhow::Result> { + let mut hashes = HashMap::new(); + + let content_path = Path::new(&env::var_os("npmDeps").unwrap()).join("_cacache/index-v5"); + + for entry in WalkDir::new(content_path) { + let entry = entry?; + + if entry.file_type().is_file() { + let content = fs::read_to_string(entry.path())?; + let key: Key = serde_json::from_str(content.split_ascii_whitespace().nth(1).unwrap())?; + + hashes.insert(key.metadata.url, key.integrity); + } + } + + Ok(hashes) +} + fn main() -> anyhow::Result<()> { let args = env::args().collect::>(); @@ -76,12 +185,25 @@ fn main() -> anyhow::Result<()> { if args[1] == "--fixup-lockfile" { let lock = serde_json::from_str(&fs::read_to_string(&args[2])?)?; - if let Some(fixed) = fixup_lockfile(lock)? { + let cache = cache_map_path() + .map(|map_path| Ok::<_, anyhow::Error>(serde_json::from_slice(&fs::read(map_path)?)?)) + .transpose()?; + + if let Some(fixed) = fixup_lockfile(lock, &cache)? { println!("Fixing lockfile"); fs::write(&args[2], serde_json::to_string(&fixed)?)?; } + return Ok(()); + } else if args[1] == "--map-cache" { + let map = map_cache()?; + + fs::write( + cache_map_path().expect("CACHE_MAP_PATH environment variable must be set"), + serde_json::to_string(&map)?, + )?; + return Ok(()); } @@ -133,6 +255,8 @@ fn main() -> anyhow::Result<()> { #[cfg(test)] mod tests { + use std::collections::HashMap; + use super::fixup_lockfile; use serde_json::json; @@ -147,12 +271,20 @@ mod tests { }, "foo": { "resolved": "https://github.com/NixOS/nixpkgs", - "integrity": "aaa" + "integrity": "sha1-aaa" }, "bar": { "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", - "integrity": "bbb" - } + "integrity": "sha512-aaa" + }, + "foo-bad": { + "resolved": "foo", + "integrity": "sha1-foo" + }, + "foo-good": { + "resolved": "foo", + "integrity": "sha512-foo" + }, } }); @@ -165,22 +297,112 @@ mod tests { }, "foo": { "resolved": "https://github.com/NixOS/nixpkgs", - "integrity": "aaa" + "integrity": "" }, "bar": { "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", - } + }, + "foo-bad": { + "resolved": "foo", + "integrity": "sha512-foo" + }, + "foo-good": { + "resolved": "foo", + "integrity": "sha512-foo" + }, } }); + let mut hashes = HashMap::new(); + + hashes.insert( + String::from("https://github.com/NixOS/nixpkgs"), + String::new(), + ); + + hashes.insert( + String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"), + String::new(), + ); + + hashes.insert(String::from("foo"), String::from("sha512-foo")); + assert_eq!( - fixup_lockfile(input.as_object().unwrap().clone())?, + fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?, Some(expected.as_object().unwrap().clone()) ); + Ok(()) + } + + #[test] + fn lockfile_v1_fixup() -> anyhow::Result<()> { + let input = json!({ + "lockfileVersion": 1, + "name": "foo", + "dependencies": { + "foo": { + "resolved": "https://github.com/NixOS/nixpkgs", + "integrity": "sha512-aaa" + }, + "foo-good": { + "resolved": "foo", + "integrity": "sha512-foo" + }, + "bar": { + "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", + "integrity": "sha512-bbb", + "dependencies": { + "foo-bad": { + "resolved": "foo", + "integrity": "sha1-foo" + }, + }, + }, + } + }); + + let expected = json!({ + "lockfileVersion": 1, + "name": "foo", + "dependencies": { + "foo": { + "resolved": "https://github.com/NixOS/nixpkgs", + "integrity": "" + }, + "foo-good": { + "resolved": "foo", + "integrity": "sha512-foo" + }, + "bar": { + "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", + "dependencies": { + "foo-bad": { + "resolved": "foo", + "integrity": "sha512-foo" + }, + }, + }, + } + }); + + let mut hashes = HashMap::new(); + + hashes.insert( + String::from("https://github.com/NixOS/nixpkgs"), + String::new(), + ); + + hashes.insert( + String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"), + String::new(), + ); + + hashes.insert(String::from("foo"), String::from("sha512-foo")); + assert_eq!( - fixup_lockfile(json!({"lockfileVersion": 1}).as_object().unwrap().clone())?, - None + fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?, + Some(expected.as_object().unwrap().clone()) ); Ok(()) diff --git a/pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs b/pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs index 572510bd82d..f50a31651d0 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs @@ -97,10 +97,20 @@ impl fmt::Display for UrlOrString { } #[derive(Debug, PartialEq, Eq)] -pub(super) struct HashCollection(HashSet); +pub struct HashCollection(HashSet); impl HashCollection { - pub(super) fn into_best(self) -> Option { + pub fn from_str(s: impl AsRef) -> anyhow::Result { + let hashes = s + .as_ref() + .split_ascii_whitespace() + .map(Hash::new) + .collect::>()?; + + Ok(HashCollection(hashes)) + } + + pub fn into_best(self) -> Option { self.0.into_iter().max() } } @@ -136,17 +146,11 @@ impl<'de> Visitor<'de> for HashCollectionVisitor { where E: de::Error, { - let hashes = value - .split_ascii_whitespace() - .map(Hash::new) - .collect::>() - .map_err(E::custom)?; - - Ok(HashCollection(hashes)) + HashCollection::from_str(value).map_err(E::custom) } } -#[derive(Debug, Deserialize, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] pub struct Hash(String); // Hash algorithms, in ascending preference. @@ -166,11 +170,15 @@ impl Hash { Err(anyhow!("unknown hash algorithm {algo:?}")) } } + + pub fn as_str(&self) -> &str { + &self.0 + } } impl fmt::Display for Hash { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) + self.as_str().fmt(f) } } diff --git a/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs b/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs index d96f7d87879..3dd6b7da497 100644 --- a/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs +++ b/pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs @@ -9,7 +9,7 @@ use std::{ use tempfile::{tempdir, TempDir}; use url::Url; -mod lock; +pub mod lock; pub fn lockfile(content: &str, force_git_deps: bool) -> anyhow::Result> { let mut packages = lock::packages(content)