copy-tarballs: Use an S3 bucket for tarballs.nixos.org

Tarballs.nixos.org is now stored in an S3 bucket rather than an EBS
volume. Redirects are used to simulate symlinks.

The function find-tarballs.nix now filters out fetchzip, fetchpatch
and the like.
This commit is contained in:
Eelco Dolstra 2015-12-10 15:53:48 +01:00
parent bb672805cd
commit 567e002545
2 changed files with 141 additions and 89 deletions

View file

@ -1,97 +1,144 @@
#! /run/current-system/sw/bin/perl -w #! /usr/bin/env nix-shell
#! nix-shell -i perl -p perl perlPackages.NetAmazonS3 nixUnstable
# This command uploads tarballs to tarballs.nixos.org, the
# content-addressed cache used by fetchurl as a fallback for when
# upstream tarballs disappear or change. Usage:
#
# 1) To upload a single file:
#
# $ copy-tarballs.pl --file /path/to/tarball.tar.gz
#
# 2) To upload all files obtained via calls to fetchurl in a Nix derivation:
#
# $ copy-tarballs.pl --expr '(import <nixpkgs> {}).hello'
use strict; use strict;
use XML::Simple; use warnings;
use File::Basename; use File::Basename;
use File::Path; use File::Path;
use File::Copy 'cp'; use JSON;
use IPC::Open2; use Net::Amazon::S3;
use Nix::Store; use Nix::Store;
my $myDir = dirname($0); # S3 setup.
my $aws_access_key_id = $ENV{'AWS_ACCESS_KEY_ID'} or die;
my $aws_secret_access_key = $ENV{'AWS_SECRET_ACCESS_KEY'} or die;
my $tarballsCache = $ENV{'NIX_TARBALLS_CACHE'} // "/tarballs"; my $s3 = Net::Amazon::S3->new(
{ aws_access_key_id => $aws_access_key_id,
aws_secret_access_key => $aws_secret_access_key,
retry => 1,
});
my $xml = `nix-instantiate --eval-only --xml --strict '<nixpkgs/maintainers/scripts/find-tarballs.nix>'`; my $bucket = $s3->bucket("nixpkgs-tarballs") or die;
die "$0: evaluation failed\n" if $? != 0;
my $data = XMLin($xml) or die; sub alreadyMirrored {
my ($algo, $hash) = @_;
mkpath($tarballsCache); return defined $bucket->get_key("$algo/$hash");
mkpath("$tarballsCache/md5"); }
mkpath("$tarballsCache/sha1");
mkpath("$tarballsCache/sha256"); sub uploadFile {
my ($fn, $name) = @_;
foreach my $file (@{$data->{list}->{attrs}}) {
my $url = $file->{attr}->{url}->{string}->{value}; my $md5_16 = hashFile("md5", 0, $fn) or die;
my $algo = $file->{attr}->{type}->{string}->{value}; my $sha1_16 = hashFile("sha1", 0, $fn) or die;
my $hash = $file->{attr}->{hash}->{string}->{value}; my $sha256_32 = hashFile("sha256", 1, $fn) or die;
my $sha256_16 = hashFile("sha256", 0, $fn) or die;
if ($url !~ /^http:/ && $url !~ /^https:/ && $url !~ /^ftp:/ && $url !~ /^mirror:/) { my $sha512_32 = hashFile("sha512", 1, $fn) or die;
print STDERR "skipping $url (unsupported scheme)\n"; my $sha512_16 = hashFile("sha512", 0, $fn) or die;
next;
} my $mainKey = "sha512/$sha512_16";
$url =~ /([^\/]+)$/; return if alreadyMirrored("sha512", $sha512_16);
my $fn = $1;
# Upload the file as sha512/<hash-in-base-16>.
if (!defined $fn) { print STDERR "uploading $fn to $mainKey...\n";
print STDERR "skipping $url (no file name)\n"; $bucket->add_key_filename($mainKey, $fn, { 'x-amz-meta-original-name' => $name })
next; or die "failed to upload $fn to $mainKey\n";
}
# Create redirects from the other hash types.
if ($fn =~ /[&?=%]/ || $fn =~ /^\./) { sub redirect {
print STDERR "skipping $url (bad character in file name)\n"; my ($name, $dest) = @_;
next; #print STDERR "linking $name to $dest...\n";
} $bucket->add_key($name, "", { 'x-amz-website-redirect-location' => "/" . $dest })
or die "failed to create redirect from $name to $dest\n";
if ($fn !~ /[a-zA-Z]/) { }
print STDERR "skipping $url (no letter in file name)\n"; redirect "md5/$md5_16", $mainKey;
next; redirect "sha1/$sha1_16", $mainKey;
} redirect "sha256/$sha256_32", $mainKey;
redirect "sha256/$sha256_16", $mainKey;
if ($fn !~ /[0-9]/) { redirect "sha512/$sha512_32", $mainKey;
print STDERR "skipping $url (no digit in file name)\n"; }
next;
} my $op = $ARGV[0] // "";
if ($fn !~ /[-_\.]/) { if ($op eq "--file") {
print STDERR "skipping $url (no dash/dot/underscore in file name)\n"; my $fn = $ARGV[1] // die "$0: --file requires a file name\n";
next; if (alreadyMirrored("sha512", hashFile("sha512", 0, $fn))) {
} print STDERR "$fn is already mirrored\n";
} else {
my $dstPath = "$tarballsCache/$fn"; uploadFile($fn, basename $fn);
}
next if -e $dstPath; }
print "downloading $url to $dstPath...\n"; elsif ($op eq "--expr") {
next if $ENV{DRY_RUN}; # Evaluate find-tarballs.nix.
my $expr = $ARGV[1] // die "$0: --expr requires a Nix expression\n";
$ENV{QUIET} = 1; my $pid = open(JSON, "-|", "nix-instantiate", "--eval-only", "--json", "--strict",
$ENV{PRINT_PATH} = 1; "<nixpkgs/maintainers/scripts/find-tarballs.nix>",
my $fh; "--arg", "expr", $expr);
my $pid = open($fh, "-|", "nix-prefetch-url", "--type", $algo, $url, $hash) or die; my $stdout = <JSON>;
waitpid($pid, 0) or die; waitpid($pid, 0);
if ($? != 0) { die "$0: evaluation failed\n" if $?;
print STDERR "failed to fetch $url: $?\n"; close JSON;
next;
} my $fetches = decode_json($stdout);
<$fh>; my $storePath = <$fh>; chomp $storePath;
print STDERR "evaluation returned ", scalar(@{$fetches}), " tarballs\n";
die unless -e $storePath;
# Check every fetchurl call discovered by find-tarballs.nix.
cp($storePath, $dstPath) or die; my $mirrored = 0;
my $have = 0;
my $md5 = hashFile("md5", 0, $storePath) or die; foreach my $fetch (@{$fetches}) {
symlink("../$fn", "$tarballsCache/md5/$md5"); my $url = $fetch->{url};
my $algo = $fetch->{type};
my $sha1 = hashFile("sha1", 0, $storePath) or die; my $hash = $fetch->{hash};
symlink("../$fn", "$tarballsCache/sha1/$sha1");
if ($url !~ /^http:/ && $url !~ /^https:/ && $url !~ /^ftp:/ && $url !~ /^mirror:/) {
my $sha256 = hashFile("sha256", 0, $storePath) or die; print STDERR "skipping $url (unsupported scheme)\n";
symlink("../$fn", "$tarballsCache/sha256/$sha256"); next;
}
$sha256 = hashFile("sha256", 1, $storePath) or die;
symlink("../$fn", "$tarballsCache/sha256/$sha256"); if (alreadyMirrored($algo, $hash)) {
$have++;
next;
}
print STDERR "mirroring $url...\n";
next if $ENV{DRY_RUN};
# Download the file using nix-prefetch-url.
$ENV{QUIET} = 1;
$ENV{PRINT_PATH} = 1;
my $fh;
my $pid = open($fh, "-|", "nix-prefetch-url", "--type", $algo, $url, $hash) or die;
waitpid($pid, 0) or die;
if ($? != 0) {
print STDERR "failed to fetch $url: $?\n";
next;
}
<$fh>; my $storePath = <$fh>; chomp $storePath;
uploadFile($storePath, $url);
$mirrored++;
}
print STDERR "mirrored $mirrored files, already have $have files\n";
}
else {
die "Syntax: $0 --file FILENAME | --expr EXPR\n";
} }

View file

@ -4,9 +4,11 @@
with import ../.. { }; with import ../.. { };
with lib; with lib;
{ expr ? removeAttrs (import ../../pkgs/top-level/release.nix { }) [ "tarball" "unstable" ] }:
let let
root = removeAttrs (import ../../pkgs/top-level/release.nix { }) [ "tarball" "unstable" ]; root = expr;
uniqueUrls = map (x: x.file) (genericClosure { uniqueUrls = map (x: x.file) (genericClosure {
startSet = map (file: { key = file.url; inherit file; }) urls; startSet = map (file: { key = file.url; inherit file; }) urls;
@ -15,7 +17,10 @@ let
urls = map (drv: { url = head drv.urls; hash = drv.outputHash; type = drv.outputHashAlgo; }) fetchurlDependencies; urls = map (drv: { url = head drv.urls; hash = drv.outputHash; type = drv.outputHashAlgo; }) fetchurlDependencies;
fetchurlDependencies = filter (drv: drv.outputHash or "" != "" && drv ? urls) dependencies; fetchurlDependencies =
filter
(drv: drv.outputHash or "" != "" && drv.outputHashMode == "flat" && drv.postFetch or "" == "" && drv ? urls)
dependencies;
dependencies = map (x: x.value) (genericClosure { dependencies = map (x: x.value) (genericClosure {
startSet = map keyDrv (derivationsIn' root); startSet = map keyDrv (derivationsIn' root);