prefetch-npm-deps: look up hashes from cache when fixing up lockfiles

This commit is contained in:
Winter 2023-04-30 10:29:46 -04:00 committed by Lily Foster
parent 7efebca89c
commit ac35d7ea86
No known key found for this signature in database
GPG key ID: 49340081E484C893
7 changed files with 329 additions and 62 deletions

View file

@ -56,6 +56,9 @@ npmConfigHook() {
exit 1
fi
export CACHE_MAP_PATH="$TMP/MEOW"
@prefetchNpmDeps@ --map-cache
@prefetchNpmDeps@ --fixup-lockfile "$srcLockfile"
local cachePath
@ -109,6 +112,9 @@ npmConfigHook() {
patchShebangs node_modules
rm "$CACHE_MAP_PATH"
unset CACHE_MAP_PATH
echo "Finished npmConfigHook"
}

View file

@ -305,6 +305,7 @@ dependencies = [
"tempfile",
"ureq",
"url",
"walkdir",
]
[[package]]
@ -400,6 +401,15 @@ version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
@ -583,6 +593,17 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.82"
@ -682,6 +703,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"

View file

@ -17,3 +17,4 @@ sha2 = "0.10.6"
tempfile = "3.3.0"
ureq = { version = "2.5.0" }
url = { version = "2.3.1", features = ["serde"] }
walkdir = "2.3.2"

View file

@ -1,5 +1,5 @@
use digest::{Digest, Update};
use serde::Serialize;
use serde::{Deserialize, Serialize};
use sha1::Sha1;
use sha2::{Sha256, Sha512};
use std::{
@ -9,24 +9,24 @@ use std::{
};
use url::Url;
#[derive(Serialize)]
struct Key {
key: String,
integrity: String,
time: u8,
size: usize,
metadata: Metadata,
#[derive(Serialize, Deserialize)]
pub(super) struct Key {
pub(super) key: String,
pub(super) integrity: String,
pub(super) time: u8,
pub(super) size: usize,
pub(super) metadata: Metadata,
}
#[derive(Serialize)]
struct Metadata {
url: Url,
options: Options,
#[derive(Serialize, Deserialize)]
pub(super) struct Metadata {
pub(super) url: Url,
pub(super) options: Options,
}
#[derive(Serialize)]
struct Options {
compress: bool,
#[derive(Serialize, Deserialize)]
pub(super) struct Options {
pub(super) compress: bool,
}
pub struct Cache(PathBuf);

View file

@ -1,67 +1,176 @@
#![warn(clippy::pedantic)]
use crate::cacache::Cache;
use anyhow::anyhow;
use crate::cacache::{Cache, Key};
use anyhow::{anyhow, bail};
use rayon::prelude::*;
use serde_json::{Map, Value};
use std::{
collections::HashMap,
env, fs,
path::Path,
path::{Path, PathBuf},
process::{self, Command},
};
use tempfile::tempdir;
use url::Url;
use walkdir::WalkDir;
mod cacache;
mod parse;
/// `fixup_lockfile` removes the `integrity` field from Git dependencies.
fn cache_map_path() -> Option<PathBuf> {
env::var_os("CACHE_MAP_PATH").map(PathBuf::from)
}
/// `fixup_lockfile` rewrites `integrity` hashes to match cache and removes the `integrity` field from Git dependencies.
///
/// Sometimes npm has multiple instances of a given `resolved` URL that have different types of `integrity` hashes (e.g. SHA-1
/// and SHA-512) in the lockfile. Given we only cache one version of these, the `integrity` field must be normalized to the hash
/// we cache as (which is the strongest available one).
///
/// Git dependencies from specific providers can be retrieved from those providers' automatic tarball features.
/// When these dependencies are specified with a commit identifier, npm generates a tarball, and inserts the integrity hash of that
/// tarball into the lockfile.
///
/// Thus, we remove this hash, to replace it with our own determinstic copies of dependencies from hosted Git providers.
fn fixup_lockfile(mut lock: Map<String, Value>) -> anyhow::Result<Option<Map<String, Value>>> {
if lock
///
/// If no fixups were performed, `None` is returned and the lockfile structure should be left as-is. If fixups were performed, the
/// `dependencies` key in v2 lockfiles designed for backwards compatibility with v1 parsers is removed because of inconsistent data.
fn fixup_lockfile(
mut lock: Map<String, Value>,
cache: &Option<HashMap<String, String>>,
) -> anyhow::Result<Option<Map<String, Value>>> {
let mut fixed = false;
match lock
.get("lockfileVersion")
.ok_or_else(|| anyhow!("couldn't get lockfile version"))?
.as_i64()
.ok_or_else(|| anyhow!("lockfile version isn't an int"))?
< 2
{
return Ok(None);
}
1 => fixup_v1_deps(
lock.get_mut("dependencies")
.unwrap()
.as_object_mut()
.unwrap(),
cache,
&mut fixed,
),
2 | 3 => {
for package in lock
.get_mut("packages")
.ok_or_else(|| anyhow!("couldn't get packages"))?
.as_object_mut()
.ok_or_else(|| anyhow!("packages isn't a map"))?
.values_mut()
{
if let Some(Value::String(resolved)) = package.get("resolved") {
if let Some(Value::String(integrity)) = package.get("integrity") {
if resolved.starts_with("git+ssh://") {
fixed = true;
let mut fixed = false;
package
.as_object_mut()
.ok_or_else(|| anyhow!("package isn't a map"))?
.remove("integrity");
} else if let Some(cache_hashes) = cache {
let cache_hash = cache_hashes
.get(resolved)
.expect("dependency should have a hash");
for package in lock
.get_mut("packages")
.ok_or_else(|| anyhow!("couldn't get packages"))?
.as_object_mut()
.ok_or_else(|| anyhow!("packages isn't a map"))?
.values_mut()
{
if let Some(Value::String(resolved)) = package.get("resolved") {
if resolved.starts_with("git+ssh://") && package.get("integrity").is_some() {
fixed = true;
if integrity != cache_hash {
fixed = true;
package
.as_object_mut()
.ok_or_else(|| anyhow!("package isn't a map"))?
.remove("integrity");
*package
.as_object_mut()
.ok_or_else(|| anyhow!("package isn't a map"))?
.get_mut("integrity")
.unwrap() = Value::String(cache_hash.clone());
}
}
}
}
}
if fixed {
lock.remove("dependencies");
}
}
v => bail!("unsupported lockfile version {v}"),
}
if fixed {
lock.remove("dependencies");
Ok(Some(lock))
} else {
Ok(None)
}
}
// Recursive helper to fixup v1 lockfile deps
fn fixup_v1_deps(
dependencies: &mut serde_json::Map<String, Value>,
cache: &Option<HashMap<String, String>>,
fixed: &mut bool,
) {
for dep in dependencies.values_mut() {
if let Some(Value::String(resolved)) = dep
.as_object()
.expect("v1 dep must be object")
.get("resolved")
{
if let Some(Value::String(integrity)) = dep
.as_object()
.expect("v1 dep must be object")
.get("integrity")
{
if resolved.starts_with("git+ssh://") {
*fixed = true;
dep.as_object_mut()
.expect("v1 dep must be object")
.remove("integrity");
} else if let Some(cache_hashes) = cache {
let cache_hash = cache_hashes
.get(resolved)
.expect("dependency should have a hash");
if integrity != cache_hash {
*fixed = true;
*dep.as_object_mut()
.expect("v1 dep must be object")
.get_mut("integrity")
.unwrap() = Value::String(cache_hash.clone());
}
}
}
}
if let Some(Value::Object(more_deps)) = dep.as_object_mut().unwrap().get_mut("dependencies")
{
fixup_v1_deps(more_deps, cache, fixed);
}
}
}
fn map_cache() -> anyhow::Result<HashMap<Url, String>> {
let mut hashes = HashMap::new();
let content_path = Path::new(&env::var_os("npmDeps").unwrap()).join("_cacache/index-v5");
for entry in WalkDir::new(content_path) {
let entry = entry?;
if entry.file_type().is_file() {
let content = fs::read_to_string(entry.path())?;
let key: Key = serde_json::from_str(content.split_ascii_whitespace().nth(1).unwrap())?;
hashes.insert(key.metadata.url, key.integrity);
}
}
Ok(hashes)
}
fn main() -> anyhow::Result<()> {
let args = env::args().collect::<Vec<_>>();
@ -76,12 +185,25 @@ fn main() -> anyhow::Result<()> {
if args[1] == "--fixup-lockfile" {
let lock = serde_json::from_str(&fs::read_to_string(&args[2])?)?;
if let Some(fixed) = fixup_lockfile(lock)? {
let cache = cache_map_path()
.map(|map_path| Ok::<_, anyhow::Error>(serde_json::from_slice(&fs::read(map_path)?)?))
.transpose()?;
if let Some(fixed) = fixup_lockfile(lock, &cache)? {
println!("Fixing lockfile");
fs::write(&args[2], serde_json::to_string(&fixed)?)?;
}
return Ok(());
} else if args[1] == "--map-cache" {
let map = map_cache()?;
fs::write(
cache_map_path().expect("CACHE_MAP_PATH environment variable must be set"),
serde_json::to_string(&map)?,
)?;
return Ok(());
}
@ -133,6 +255,8 @@ fn main() -> anyhow::Result<()> {
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use super::fixup_lockfile;
use serde_json::json;
@ -147,12 +271,20 @@ mod tests {
},
"foo": {
"resolved": "https://github.com/NixOS/nixpkgs",
"integrity": "aaa"
"integrity": "sha1-aaa"
},
"bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
"integrity": "bbb"
}
"integrity": "sha512-aaa"
},
"foo-bad": {
"resolved": "foo",
"integrity": "sha1-foo"
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
}
});
@ -165,22 +297,112 @@ mod tests {
},
"foo": {
"resolved": "https://github.com/NixOS/nixpkgs",
"integrity": "aaa"
"integrity": ""
},
"bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
}
},
"foo-bad": {
"resolved": "foo",
"integrity": "sha512-foo"
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
}
});
let mut hashes = HashMap::new();
hashes.insert(
String::from("https://github.com/NixOS/nixpkgs"),
String::new(),
);
hashes.insert(
String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"),
String::new(),
);
hashes.insert(String::from("foo"), String::from("sha512-foo"));
assert_eq!(
fixup_lockfile(input.as_object().unwrap().clone())?,
fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?,
Some(expected.as_object().unwrap().clone())
);
Ok(())
}
#[test]
fn lockfile_v1_fixup() -> anyhow::Result<()> {
let input = json!({
"lockfileVersion": 1,
"name": "foo",
"dependencies": {
"foo": {
"resolved": "https://github.com/NixOS/nixpkgs",
"integrity": "sha512-aaa"
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
"bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
"integrity": "sha512-bbb",
"dependencies": {
"foo-bad": {
"resolved": "foo",
"integrity": "sha1-foo"
},
},
},
}
});
let expected = json!({
"lockfileVersion": 1,
"name": "foo",
"dependencies": {
"foo": {
"resolved": "https://github.com/NixOS/nixpkgs",
"integrity": ""
},
"foo-good": {
"resolved": "foo",
"integrity": "sha512-foo"
},
"bar": {
"resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
"dependencies": {
"foo-bad": {
"resolved": "foo",
"integrity": "sha512-foo"
},
},
},
}
});
let mut hashes = HashMap::new();
hashes.insert(
String::from("https://github.com/NixOS/nixpkgs"),
String::new(),
);
hashes.insert(
String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"),
String::new(),
);
hashes.insert(String::from("foo"), String::from("sha512-foo"));
assert_eq!(
fixup_lockfile(json!({"lockfileVersion": 1}).as_object().unwrap().clone())?,
None
fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?,
Some(expected.as_object().unwrap().clone())
);
Ok(())

View file

@ -97,10 +97,20 @@ impl fmt::Display for UrlOrString {
}
#[derive(Debug, PartialEq, Eq)]
pub(super) struct HashCollection(HashSet<Hash>);
pub struct HashCollection(HashSet<Hash>);
impl HashCollection {
pub(super) fn into_best(self) -> Option<Hash> {
pub fn from_str(s: impl AsRef<str>) -> anyhow::Result<HashCollection> {
let hashes = s
.as_ref()
.split_ascii_whitespace()
.map(Hash::new)
.collect::<anyhow::Result<_>>()?;
Ok(HashCollection(hashes))
}
pub fn into_best(self) -> Option<Hash> {
self.0.into_iter().max()
}
}
@ -136,17 +146,11 @@ impl<'de> Visitor<'de> for HashCollectionVisitor {
where
E: de::Error,
{
let hashes = value
.split_ascii_whitespace()
.map(Hash::new)
.collect::<anyhow::Result<_>>()
.map_err(E::custom)?;
Ok(HashCollection(hashes))
HashCollection::from_str(value).map_err(E::custom)
}
}
#[derive(Debug, Deserialize, PartialEq, Eq, Hash)]
#[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)]
pub struct Hash(String);
// Hash algorithms, in ascending preference.
@ -166,11 +170,15 @@ impl Hash {
Err(anyhow!("unknown hash algorithm {algo:?}"))
}
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl fmt::Display for Hash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
self.as_str().fmt(f)
}
}

View file

@ -9,7 +9,7 @@ use std::{
use tempfile::{tempdir, TempDir};
use url::Url;
mod lock;
pub mod lock;
pub fn lockfile(content: &str, force_git_deps: bool) -> anyhow::Result<Vec<Package>> {
let mut packages = lock::packages(content)