From f6ca99b86d2e75b4fc416f04ad138aef3c747aae Mon Sep 17 00:00:00 2001 From: 06kellyjac Date: Fri, 2 Dec 2022 11:45:03 +0000 Subject: [PATCH] semgrep{,-core}: 0.112.1 -> 1.0.0 Added update script --- pkgs/tools/security/semgrep/common.nix | 54 ++++--- pkgs/tools/security/semgrep/default.nix | 88 +++++++----- pkgs/tools/security/semgrep/semgrep-core.nix | 3 +- pkgs/tools/security/semgrep/update.sh | 139 +++++++++++++++++++ 4 files changed, 228 insertions(+), 56 deletions(-) create mode 100755 pkgs/tools/security/semgrep/update.sh diff --git a/pkgs/tools/security/semgrep/common.nix b/pkgs/tools/security/semgrep/common.nix index a721448124e..0b3524dca56 100644 --- a/pkgs/tools/security/semgrep/common.nix +++ b/pkgs/tools/security/semgrep/common.nix @@ -1,41 +1,55 @@ { lib, fetchFromGitHub, fetchzip, stdenv }: rec { - version = "0.112.1"; + version = "1.0.0"; src = fetchFromGitHub { owner = "returntocorp"; repo = "semgrep"; rev = "v${version}"; - sha256 = "sha256-SZtxZz4x6YUKw1uO5HQTU4lRY989SoCNsPQphJr+L0Y="; + sha256 = "sha256-4fNBpokHKCtMB3P0ot1TzcuzOs5hlyH8nIw+bCGqThA="; }; # submodule dependencies # these are fetched so we: # 1. don't fetch the many submodules we don't need # 2. avoid fetchSubmodules since it's prone to impurities - langsSrc = fetchFromGitHub { - owner = "returntocorp"; - repo = "semgrep-langs"; - rev = "91e288062eb794e8a5e6967d1009624237793491"; - sha256 = "sha256-z2t2bTRyj5zu9h/GBg2YeRFimpJsd3dA7dK8VBaKzHo="; - }; - - interfacesSrc = fetchFromGitHub { - owner = "returntocorp"; - repo = "semgrep-interfaces"; - rev = "7bc457a32e088ef21adf1529fa0ddeea634b9131"; - sha256 = "sha256-xN8Qm1/YLa49k9fZKDoPPmHASI2ipI3mkKlwEK2ajO4="; + submodules = { + "cli/src/semgrep/lang" = fetchFromGitHub { + owner = "returntocorp"; + repo = "semgrep-langs"; + rev = "65cb2ed80e31e01b122f893fef8428d14432da75"; + sha256 = "sha256-HdPJdOlMM1l7vNSATkEu5KrCkpt2feEAH8LFDU84KUM="; + }; + "cli/src/semgrep/semgrep_interfaces" = fetchFromGitHub { + owner = "returntocorp"; + repo = "semgrep-interfaces"; + rev = "c69e30a4cf39f11cab5378700f5e193e8282079e"; + sha256 = "sha256-Wr3/TWx/LHiTFCoGY4sqdsn3dHvMsEIVYA3RGiv88xQ="; + }; }; # fetch pre-built semgrep-core since the ocaml build is complex and relies on # the opam package manager at some point - coreRelease = if stdenv.isDarwin then fetchzip { - url = "https://github.com/returntocorp/semgrep/releases/download/v${version}/semgrep-v${version}-osx.zip"; - sha256 = "sha256-JiOH39vMDL6r9WKuPO0CDkRwGZtzl/GIFoSegVddFpw="; - } else fetchzip { - url = "https://github.com/returntocorp/semgrep/releases/download/v${version}/semgrep-v${version}-ubuntu-16.04.tgz"; - sha256 = "sha256-V6r+VQrgz8uVSbRa2AmW4lnLxovk63FL7LqVKD46RBw="; + core = rec { + data = { + x86_64-linux = { + suffix = "-ubuntu-16.04.tgz"; + sha256 = "sha256-SsaAuhcDyO3nr6H2xOtdxzOoEQd6aIe0mlpehvDWzU0="; + }; + x86_64-darwin = { + suffix = "-osx.zip"; + sha256 = "sha256-DAcAB/q6XeljCp4mVljIJB4AUjUuzMSRMFzIuyjWMew="; + }; + }; + src = let + inherit (stdenv.hostPlatform) system; + selectSystemData = data: data.${system} or (throw "Unsupported system: ${system}"); + inherit (selectSystemData data) suffix sha256; + in fetchzip { + url = "https://github.com/returntocorp/semgrep/releases/download/v${version}/semgrep-v${version}${suffix}"; + inherit sha256; + }; }; meta = with lib; { diff --git a/pkgs/tools/security/semgrep/default.nix b/pkgs/tools/security/semgrep/default.nix index 1df7c515e30..c97d877b483 100644 --- a/pkgs/tools/security/semgrep/default.nix +++ b/pkgs/tools/security/semgrep/default.nix @@ -15,12 +15,26 @@ let in buildPythonApplication rec { pname = "semgrep"; - inherit (common) version; - src = "${common.src}/cli"; + inherit (common) src version; - SEMGREP_CORE_BIN = "${semgrep-core}/bin/semgrep-core"; + postPatch = (lib.concatStringsSep "\n" (lib.mapAttrsToList ( + path: submodule: '' + # substitute ${path} + # remove git submodule placeholder + rm -r ${path} + # link submodule + ln -s ${submodule}/ ${path} + '' + ) common.submodules)) + '' + cd cli + ''; nativeBuildInputs = [ pythonRelaxDepsHook ]; + # tell cli/setup.py to not copy semgrep-core into the result + # this means we can share a copy of semgrep-core and avoid an issue where it + # copies the binary but doesn't retain the executable bit + SEMGREP_SKIP_BIN = true; + pythonRelaxDeps = [ "attrs" "boltons" @@ -28,37 +42,6 @@ buildPythonApplication rec { "typing-extensions" ]; - postPatch = '' - # remove git submodule placeholders - rm -r ./src/semgrep/{lang,semgrep_interfaces} - # link submodule dependencies - ln -s ${common.langsSrc}/ ./src/semgrep/lang - ln -s ${common.interfacesSrc}/ ./src/semgrep/semgrep_interfaces - ''; - - doCheck = true; - checkInputs = [ git pytestCheckHook ] ++ (with pythonPackages; [ - pytest-snapshot - pytest-mock - pytest-freezegun - types-freezegun - ]); - disabledTests = [ - # requires networking - "tests/unit/test_metric_manager.py" - ]; - preCheck = '' - # tests need a home directory - export HOME="$(mktemp -d)" - - # disabledTestPaths doesn't manage to avoid the e2e tests - # remove them from pyproject.toml - # and remove need for pytest-split - substituteInPlace pyproject.toml \ - --replace '"tests/e2e",' "" \ - --replace 'addopts = "--splitting-algorithm=least_duration"' "" - ''; - propagatedBuildInputs = with pythonPackages; [ attrs boltons @@ -77,8 +60,45 @@ buildPythonApplication rec { urllib3 typing-extensions python-lsp-jsonrpc + tomli ]; + doCheck = true; + checkInputs = [ git pytestCheckHook ] ++ (with pythonPackages; [ + pytest-snapshot + pytest-mock + pytest-freezegun + types-freezegun + ]); + disabledTests = [ + # requires networking + "test_send" + # requires networking + "test_parse_exclude_rules_auto" + ]; + preCheck = '' + # tests need a home directory + export HOME="$(mktemp -d)" + + # disabledTestPaths doesn't manage to avoid the e2e tests + # remove them from pyproject.toml + # and remove need for pytest-split + substituteInPlace pyproject.toml \ + --replace '"tests/e2e",' "" \ + --replace 'addopts = "--splitting-algorithm=least_duration"' "" + ''; + + # since we stop cli/setup.py from finding semgrep-core and copying it into + # the result we need to provide it on the PATH + preFixup = '' + makeWrapperArgs+=(--prefix PATH : ${lib.makeBinPath [ semgrep-core ]}) + ''; + + passthru = { + inherit common; + updateScript = ./update.sh; + }; + meta = common.meta // { description = common.meta.description + " - cli"; }; diff --git a/pkgs/tools/security/semgrep/semgrep-core.nix b/pkgs/tools/security/semgrep/semgrep-core.nix index 3a9c904ad73..e5ce941298a 100644 --- a/pkgs/tools/security/semgrep/semgrep-core.nix +++ b/pkgs/tools/security/semgrep/semgrep-core.nix @@ -6,8 +6,7 @@ in stdenvNoCC.mkDerivation rec { pname = "semgrep-core"; inherit (common) version; - - src = common.coreRelease; + inherit (common.core) src; installPhase = '' runHook preInstall diff --git a/pkgs/tools/security/semgrep/update.sh b/pkgs/tools/security/semgrep/update.sh new file mode 100755 index 00000000000..2b464d60201 --- /dev/null +++ b/pkgs/tools/security/semgrep/update.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env nix-shell +#!nix-shell -i bash -p curl gnused jq + +set -euxo pipefail + +# provide a github token so you don't get rate limited +# if you use gh cli you can use: +# `export GITHUB_TOKEN="$(cat ~/.config/gh/config.yml | yq '.hosts."github.com".oauth_token' -r)"` +# or just set your token by hand: +# `read -s -p "Enter your token: " GITHUB_TOKEN; export GITHUB_TOKEN` +# (we use read so it doesn't show in our shell history and in secret mode so the token you paste isn't visible) +if [ -z "${GITHUB_TOKEN:-}" ]; then + echo "no GITHUB_TOKEN provided - you could meet API request limiting" >&2 +fi + +ROOT="$(dirname "$(readlink -f "$0")")" +NIXPKGS_ROOT="$ROOT/../../../.." +NIX_DRV="$ROOT/default.nix" + +COMMON_FILE="$ROOT/common.nix" + +instantiateClean() { + nix-instantiate -A "$1" --eval --strict | cut -d\" -f2 +} + +# get latest version +NEW_VERSION=$( + curl -s -H + "Accept: application/vnd.github.v3+json" \ + ${GITHUB_TOKEN:+ -H "Authorization: bearer $GITHUB_TOKEN"} \ + https://api.github.com/repos/returntocorp/semgrep/releases/latest \ + | jq -r '.tag_name' +) +# trim v prefix +NEW_VERSION="${NEW_VERSION:1}" +OLD_VERSION="$(instantiateClean semgrep.common.version)" + +if [[ "$OLD_VERSION" == "$NEW_VERSION" ]]; then + echo "Already up to date" + exit +fi + +replace() { + sed -i "s@$1@$2@g" "$3" +} + +fetchgithub() { + set +eo pipefail + nix-build -A "$1" 2>&1 >/dev/null | grep "got:" | cut -d':' -f2 | sed 's| ||g' + set -eo pipefail +} + +fetchzip() { + set +eo pipefail + nix-build -E "with import $NIXPKGS_ROOT {}; fetchzip {url = \"$1\"; sha256 = lib.fakeSha256; }" 2>&1 >/dev/null | grep "got:" | cut -d':' -f2 | sed 's| ||g' + set -eo pipefail +} + +replace "$OLD_VERSION" "$NEW_VERSION" "$COMMON_FILE" + +echo "Updating src" + +OLD_HASH="$(instantiateClean semgrep.common.src.outputHash)" +echo "Old hash $OLD_HASH" +TMP_HASH="sha256-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" +replace "$OLD_HASH" "$TMP_HASH" "$COMMON_FILE" +NEW_HASH="$(fetchgithub semgrep.common.src)" +echo "New hash $NEW_HASH" +replace "$TMP_HASH" "$NEW_HASH" "$COMMON_FILE" + +echo "Updated src" + +# loop through platforms for core +nix-instantiate -E "with import $NIXPKGS_ROOT {}; builtins.attrNames semgrep.common.core.data" --eval --strict --json \ +| jq '.[]' -r \ +| while read -r PLATFORM; do + echo "Updating core for $PLATFORM" + SUFFIX=$(instantiateClean semgrep.common.core.data."$1".suffix "$PLATFORM") + OLD_HASH=$(instantiateClean semgrep.common.core.data."$1".sha256 "$PLATFORM") + echo "Old hash $OLD_HASH" + + NEW_URL="https://github.com/returntocorp/semgrep/releases/download/v$NEW_VERSION/semgrep-v$NEW_VERSION$SUFFIX" + NEW_HASH="$(fetchzip "$NEW_URL")" + echo "New hash $NEW_HASH" + + replace "$OLD_HASH" "$NEW_HASH" "$COMMON_FILE" + + echo "Updated core for $PLATFORM" +done + +OLD_PWD=$PWD +TMPDIR="$(mktemp -d)" +# shallow clone to check submodule commits, don't actually need the submodules +git clone https://github.com/returntocorp/semgrep "$TMPDIR/semgrep" --depth 1 --branch "v$NEW_VERSION" + +get_submodule_commit() { + OLD_PWD=$PWD + ( + cd "$TMPDIR/semgrep" + git ls-tree --object-only HEAD "$1" + cd "$OLD_PWD" + ) +} + +# loop through submodules +nix-instantiate -E "with import $NIXPKGS_ROOT {}; builtins.attrNames semgrep.passthru.common.submodules" --eval --strict --json \ +| jq '.[]' -r \ +| while read -r SUBMODULE; do + echo "Updating $SUBMODULE" + OLD_REV=$(instantiateClean semgrep.passthru.common.submodules."$SUBMODULE".rev) + echo "Old commit $OLD_REV" + OLD_HASH=$(instantiateClean semgrep.passthru.common.submodules."$SUBMODULE".outputHash) + echo "Old hash $OLD_HASH" + + NEW_REV=$(get_submodule_commit "$SUBMODULE") + echo "New commit $NEW_REV" + + if [[ "$OLD_REV" == "$NEW_REV" ]]; then + echo "$SUBMODULE already up to date" + continue + fi + + NEW_URL=$(instantiateClean semgrep.passthru.common.submodules."$SUBMODULE".url | sed "s@$OLD_REV@$NEW_REV@g") + NEW_HASH=$(nix --experimental-features nix-command hash to-sri "sha256:$(nix-prefetch-url "$NEW_URL")") + + TMP_HASH="sha256-ABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=" + replace "$OLD_REV" "$NEW_REV" "$COMMON_FILE" + replace "$OLD_HASH" "$TMP_HASH" "$COMMON_FILE" + NEW_HASH="$(fetchgithub semgrep.passthru.common.submodules."$SUBMODULE")" + echo "New hash $NEW_HASH" + replace "$TMP_HASH" "$NEW_HASH" "$COMMON_FILE" + + echo "Updated $SUBMODULE" +done + +rm -rf "$TMPDIR" + +echo "Finished" +