Merge pull request #154004 from illustris/hadoop

This commit is contained in:
Artturi 2022-03-16 14:05:56 +02:00 committed by GitHub
commit b734f40478
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 639 additions and 380 deletions

View file

@ -569,6 +569,25 @@
because Python 2 is being retired from nixpkgs.
</para>
</listitem>
<listitem>
<para>
Services in the <literal>hadoop</literal> module previously
set <literal>openFirewall</literal> to true by default. This
has now been changed to false. Node definitions for multi-node
clusters would need <literal>openFirewall = true;</literal> to
be added to to hadoop services when upgrading from NixOS
21.11.
</para>
</listitem>
<listitem>
<para>
<literal>services.hadoop.yarn.nodemanager</literal> now uses
cgroup-based CPU limit enforcement by default. Additionally,
the option <literal>useCGroups</literal> was added to
nodemanagers as an easy way to switch back to the old
behavior.
</para>
</listitem>
<listitem>
<para>
The <literal>wafHook</literal> hook now honors
@ -1173,6 +1192,33 @@
using the PyPy interpreter were added.
</para>
</listitem>
<listitem>
<para>
Some improvements have been made to the
<literal>hadoop</literal> module:
</para>
<itemizedlist spacing="compact">
<listitem>
<para>
A <literal>gatewayRole</literal> option has been added,
for deploying hadoop cluster configuration files to a node
that does not have any active services
</para>
</listitem>
<listitem>
<para>
Support for older versions of hadoop have been added to
the module
</para>
</listitem>
<listitem>
<para>
Overriding and extending site XML files has been made
easier
</para>
</listitem>
</itemizedlist>
</listitem>
<listitem>
<para>
If you are using Wayland you can choose to use the Ozone

View file

@ -244,6 +244,14 @@ In addition to numerous new and upgraded packages, this release has the followin
- The MoinMoin wiki engine (`services.moinmoin`) has been removed, because Python 2 is being retired from nixpkgs.
- Services in the `hadoop` module previously set `openFirewall` to true by default.
This has now been changed to false. Node definitions for multi-node clusters would need
`openFirewall = true;` to be added to to hadoop services when upgrading from NixOS 21.11.
- `services.hadoop.yarn.nodemanager` now uses cgroup-based CPU limit enforcement by default.
Additionally, the option `useCGroups` was added to nodemanagers as an easy way to switch
back to the old behavior.
- The `wafHook` hook now honors `NIX_BUILD_CORES` when `enableParallelBuilding` is not set explicitly. Packages can restore the old behaviour by setting `enableParallelBuilding=false`.
- `pkgs.claws-mail-gtk2`, representing Claws Mail's older release version three, was removed in order to get rid of Python 2.
@ -436,6 +444,11 @@ In addition to numerous new and upgraded packages, this release has the followin
- The `writers.writePyPy2`/`writers.writePyPy3` and corresponding `writers.writePyPy2Bin`/`writers.writePyPy3Bin` convenience functions to create executable Python 2/3 scripts using the PyPy interpreter were added.
- Some improvements have been made to the `hadoop` module:
- A `gatewayRole` option has been added, for deploying hadoop cluster configuration files to a node that does not have any active services
- Support for older versions of hadoop have been added to the module
- Overriding and extending site XML files has been made easier
- If you are using Wayland you can choose to use the Ozone Wayland support
in Chrome and several Electron apps by setting the environment variable
`NIXOS_OZONE_WL=1` (for example via

View file

@ -1,6 +1,6 @@
{ cfg, pkgs, lib }:
let
propertyXml = name: value: ''
propertyXml = name: value: lib.optionalString (value != null) ''
<property>
<name>${name}</name>
<value>${builtins.toString value}</value>
@ -29,16 +29,16 @@ let
export HADOOP_LOG_DIR=/tmp/hadoop/$USER
'';
in
pkgs.runCommand "hadoop-conf" {} ''
pkgs.runCommand "hadoop-conf" {} (with cfg; ''
mkdir -p $out/
cp ${siteXml "core-site.xml" cfg.coreSite}/* $out/
cp ${siteXml "hdfs-site.xml" cfg.hdfsSite}/* $out/
cp ${siteXml "mapred-site.xml" cfg.mapredSite}/* $out/
cp ${siteXml "yarn-site.xml" cfg.yarnSite}/* $out/
cp ${siteXml "httpfs-site.xml" cfg.httpfsSite}/* $out/
cp ${cfgFile "container-executor.cfg" cfg.containerExecutorCfg}/* $out/
cp ${siteXml "core-site.xml" (coreSite // coreSiteInternal)}/* $out/
cp ${siteXml "hdfs-site.xml" (hdfsSiteDefault // hdfsSite // hdfsSiteInternal)}/* $out/
cp ${siteXml "mapred-site.xml" (mapredSiteDefault // mapredSite)}/* $out/
cp ${siteXml "yarn-site.xml" (yarnSiteDefault // yarnSite // yarnSiteInternal)}/* $out/
cp ${siteXml "httpfs-site.xml" httpfsSite}/* $out/
cp ${cfgFile "container-executor.cfg" containerExecutorCfg}/* $out/
cp ${pkgs.writeTextDir "hadoop-user-functions.sh" userFunctions}/* $out/
cp ${pkgs.writeTextDir "hadoop-env.sh" hadoopEnv}/* $out/
cp ${cfg.log4jProperties} $out/log4j.properties
${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") cfg.extraConfDirs}
''
cp ${log4jProperties} $out/log4j.properties
${lib.concatMapStringsSep "\n" (dir: "cp -r ${dir}/* $out/") extraConfDirs}
'')

View file

@ -21,24 +21,50 @@ with lib;
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/core-default.xml"/>
'';
};
coreSiteInternal = mkOption {
default = {};
type = types.attrsOf types.anything;
internal = true;
description = ''
Internal option to add configs to core-site.xml based on module options
'';
};
hdfsSite = mkOption {
hdfsSiteDefault = mkOption {
default = {
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
"dfs.namenode.http-address" = "0.0.0.0:9870";
"dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
"dfs.namenode.http-bind-host" = "0.0.0.0";
};
type = types.attrsOf types.anything;
description = ''
Default options for hdfs-site.xml
'';
};
hdfsSite = mkOption {
default = {};
type = types.attrsOf types.anything;
example = literalExpression ''
{
"dfs.nameservices" = "namenode1";
}
'';
description = ''
Hadoop hdfs-site.xml definition
Additional options and overrides for hdfs-site.xml
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml"/>
'';
};
hdfsSiteInternal = mkOption {
default = {};
type = types.attrsOf types.anything;
internal = true;
description = ''
Internal option to add configs to hdfs-site.xml based on module options
'';
};
mapredSite = mkOption {
mapredSiteDefault = mkOption {
default = {
"mapreduce.framework.name" = "yarn";
"yarn.app.mapreduce.am.env" = "HADOOP_MAPRED_HOME=${cfg.package}/lib/${cfg.package.untarDir}";
@ -54,18 +80,25 @@ with lib;
}
'';
type = types.attrsOf types.anything;
description = ''
Default options for mapred-site.xml
'';
};
mapredSite = mkOption {
default = {};
type = types.attrsOf types.anything;
example = literalExpression ''
options.services.hadoop.mapredSite.default // {
{
"mapreduce.map.java.opts" = "-Xmx900m -XX:+UseParallelGC";
}
'';
description = ''
Hadoop mapred-site.xml definition
Additional options and overrides for mapred-site.xml
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml"/>
'';
};
yarnSite = mkOption {
yarnSiteDefault = mkOption {
default = {
"yarn.nodemanager.admin-env" = "PATH=$PATH";
"yarn.nodemanager.aux-services" = "mapreduce_shuffle";
@ -77,19 +110,34 @@ with lib;
"yarn.nodemanager.linux-container-executor.path" = "/run/wrappers/yarn-nodemanager/bin/container-executor";
"yarn.nodemanager.log-dirs" = "/var/log/hadoop/yarn/nodemanager";
"yarn.resourcemanager.bind-host" = "0.0.0.0";
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler";
};
type = types.attrsOf types.anything;
description = ''
Default options for yarn-site.xml
'';
};
yarnSite = mkOption {
default = {};
type = types.attrsOf types.anything;
example = literalExpression ''
options.services.hadoop.yarnSite.default // {
{
"yarn.resourcemanager.hostname" = "''${config.networking.hostName}";
}
'';
description = ''
Hadoop yarn-site.xml definition
Additional options and overrides for yarn-site.xml
<link xlink:href="https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-common/yarn-default.xml"/>
'';
};
yarnSiteInternal = mkOption {
default = {};
type = types.attrsOf types.anything;
internal = true;
description = ''
Internal option to add configs to yarn-site.xml based on module options
'';
};
httpfsSite = mkOption {
default = { };
@ -123,6 +171,7 @@ with lib;
"yarn.nodemanager.linux-container-executor.group"="hadoop";
"min.user.id"=1000;
"feature.terminal.enabled"=1;
"feature.mount-cgroup.enabled" = 1;
};
type = types.attrsOf types.anything;
example = literalExpression ''
@ -148,6 +197,8 @@ with lib;
description = "Directories containing additional config files to be added to HADOOP_CONF_DIR";
};
gatewayRole.enable = mkEnableOption "gateway role for deploying hadoop configs";
package = mkOption {
type = types.package;
default = pkgs.hadoop;
@ -157,20 +208,16 @@ with lib;
};
config = mkMerge [
(mkIf (builtins.hasAttr "yarn" config.users.users ||
builtins.hasAttr "hdfs" config.users.users ||
builtins.hasAttr "httpfs" config.users.users) {
users.groups.hadoop = {
gid = config.ids.gids.hadoop;
};
environment = {
systemPackages = [ cfg.package ];
etc."hadoop-conf".source = let
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
in "${hadoopConf}";
};
})
];
config = mkIf cfg.gatewayRole.enable {
users.groups.hadoop = {
gid = config.ids.gids.hadoop;
};
environment = {
systemPackages = [ cfg.package ];
etc."hadoop-conf".source = let
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
in "${hadoopConf}";
variables.HADOOP_CONF_DIR = "/etc/hadoop-conf/";
};
};
}

View file

@ -1,191 +1,191 @@
{ config, lib, pkgs, ...}:
{ config, lib, pkgs, ... }:
with lib;
let
cfg = config.services.hadoop;
# Config files for hadoop services
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
restartIfChanged = mkOption {
type = types.bool;
description = ''
Automatically restart the service on config change.
This can be set to false to defer restarts on clusters running critical applications.
Please consider the security implications of inadvertently running an older version,
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
'';
default = false;
};
# Generator for HDFS service options
hadoopServiceOption = { serviceName, firewallOption ? true, extraOpts ? null }: {
enable = mkEnableOption serviceName;
restartIfChanged = mkOption {
type = types.bool;
description = ''
Automatically restart the service on config change.
This can be set to false to defer restarts on clusters running critical applications.
Please consider the security implications of inadvertently running an older version,
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
'';
default = false;
};
extraFlags = mkOption{
type = with types; listOf str;
default = [];
description = "Extra command line flags to pass to ${serviceName}";
example = [
"-Dcom.sun.management.jmxremote"
"-Dcom.sun.management.jmxremote.port=8010"
];
};
extraEnv = mkOption{
type = with types; attrsOf str;
default = {};
description = "Extra environment variables for ${serviceName}";
};
} // (optionalAttrs firewallOption {
openFirewall = mkOption {
type = types.bool;
default = false;
description = "Open firewall ports for ${serviceName}.";
};
}) // (optionalAttrs (extraOpts != null) extraOpts);
# Generator for HDFS service configs
hadoopServiceConfig =
{ name
, serviceOptions ? cfg.hdfs."${toLower name}"
, description ? "Hadoop HDFS ${name}"
, User ? "hdfs"
, allowedTCPPorts ? [ ]
, preStart ? ""
, environment ? { }
, extraConfig ? { }
}: (
mkIf serviceOptions.enable ( mkMerge [{
systemd.services."hdfs-${toLower name}" = {
inherit description preStart;
environment = environment // serviceOptions.extraEnv;
wantedBy = [ "multi-user.target" ];
inherit (serviceOptions) restartIfChanged;
serviceConfig = {
inherit User;
SyslogIdentifier = "hdfs-${toLower name}";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name} ${escapeShellArgs serviceOptions.extraFlags}";
Restart = "always";
};
};
services.hadoop.gatewayRole.enable = true;
networking.firewall.allowedTCPPorts = mkIf
((builtins.hasAttr "openFirewall" serviceOptions) && serviceOptions.openFirewall)
allowedTCPPorts;
} extraConfig])
);
in
{
options.services.hadoop.hdfs = {
namenode = {
enable = mkEnableOption "Whether to run the HDFS NameNode";
namenode = hadoopServiceOption { serviceName = "HDFS NameNode"; } // {
formatOnInit = mkOption {
type = types.bool;
default = false;
description = ''
Format HDFS namenode on first start. This is useful for quickly spinning up ephemeral HDFS clusters with a single namenode.
For HA clusters, initialization involves multiple steps across multiple nodes. Follow [this guide](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html)
to initialize an HA cluster manually.
'';
};
inherit restartIfChanged;
openFirewall = mkOption {
type = types.bool;
default = true;
description = ''
Open firewall ports for namenode
Format HDFS namenode on first start. This is useful for quickly spinning up
ephemeral HDFS clusters with a single namenode.
For HA clusters, initialization involves multiple steps across multiple nodes.
Follow this guide to initialize an HA cluster manually:
<link xlink:href="https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html"/>
'';
};
};
datanode = {
enable = mkEnableOption "Whether to run the HDFS DataNode";
inherit restartIfChanged;
openFirewall = mkOption {
type = types.bool;
default = true;
description = ''
Open firewall ports for datanode
'';
datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; } // {
dataDirs = mkOption {
default = null;
description = "Tier and path definitions for datanode storage.";
type = with types; nullOr (listOf (submodule {
options = {
type = mkOption {
type = enum [ "SSD" "DISK" "ARCHIVE" "RAM_DISK" ];
description = ''
Storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]) for HDFS storage policies.
'';
};
path = mkOption {
type = path;
example = [ "/var/lib/hadoop/hdfs/dn" ];
description = "Determines where on the local filesystem a data node should store its blocks.";
};
};
}));
};
};
journalnode = {
enable = mkEnableOption "Whether to run the HDFS JournalNode";
inherit restartIfChanged;
openFirewall = mkOption {
type = types.bool;
default = true;
description = ''
Open firewall ports for journalnode
'';
};
journalnode = hadoopServiceOption { serviceName = "HDFS JournalNode"; };
zkfc = hadoopServiceOption {
serviceName = "HDFS ZooKeeper failover controller";
firewallOption = false;
};
zkfc = {
enable = mkEnableOption "Whether to run the HDFS ZooKeeper failover controller";
inherit restartIfChanged;
};
httpfs = {
enable = mkEnableOption "Whether to run the HDFS HTTPfs server";
httpfs = hadoopServiceOption { serviceName = "HDFS JournalNode"; } // {
tempPath = mkOption {
type = types.path;
default = "/tmp/hadoop/httpfs";
description = ''
HTTPFS_TEMP path used by HTTPFS
'';
};
inherit restartIfChanged;
openFirewall = mkOption {
type = types.bool;
default = true;
description = ''
Open firewall ports for HTTPFS
'';
description = "HTTPFS_TEMP path used by HTTPFS";
};
};
};
config = mkMerge [
(mkIf cfg.hdfs.namenode.enable {
systemd.services.hdfs-namenode = {
description = "Hadoop HDFS NameNode";
wantedBy = [ "multi-user.target" ];
inherit (cfg.hdfs.namenode) restartIfChanged;
preStart = (mkIf cfg.hdfs.namenode.formatOnInit ''
${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true
'');
serviceConfig = {
User = "hdfs";
SyslogIdentifier = "hdfs-namenode";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} namenode";
Restart = "always";
};
};
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.namenode.openFirewall [
(hadoopServiceConfig {
name = "NameNode";
allowedTCPPorts = [
9870 # namenode.http-address
8020 # namenode.rpc-address
8022 # namenode. servicerpc-address
]);
8022 # namenode.servicerpc-address
8019 # dfs.ha.zkfc.port
];
preStart = (mkIf cfg.hdfs.namenode.formatOnInit
"${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true"
);
})
(mkIf cfg.hdfs.datanode.enable {
systemd.services.hdfs-datanode = {
description = "Hadoop HDFS DataNode";
wantedBy = [ "multi-user.target" ];
inherit (cfg.hdfs.datanode) restartIfChanged;
serviceConfig = {
User = "hdfs";
SyslogIdentifier = "hdfs-datanode";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} datanode";
Restart = "always";
};
};
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.datanode.openFirewall [
(hadoopServiceConfig {
name = "DataNode";
# port numbers for datanode changed between hadoop 2 and 3
allowedTCPPorts = if versionAtLeast cfg.package.version "3" then [
9864 # datanode.http.address
9866 # datanode.address
9867 # datanode.ipc.address
]);
] else [
50075 # datanode.http.address
50010 # datanode.address
50020 # datanode.ipc.address
];
extraConfig.services.hadoop.hdfsSiteInternal."dfs.datanode.data.dir" = let d = cfg.hdfs.datanode.dataDirs; in
if (d!= null) then (concatMapStringsSep "," (x: "["+x.type+"]file://"+x.path) cfg.hdfs.datanode.dataDirs) else d;
})
(mkIf cfg.hdfs.journalnode.enable {
systemd.services.hdfs-journalnode = {
description = "Hadoop HDFS JournalNode";
wantedBy = [ "multi-user.target" ];
inherit (cfg.hdfs.journalnode) restartIfChanged;
serviceConfig = {
User = "hdfs";
SyslogIdentifier = "hdfs-journalnode";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} journalnode";
Restart = "always";
};
};
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.journalnode.openFirewall [
(hadoopServiceConfig {
name = "JournalNode";
allowedTCPPorts = [
8480 # dfs.journalnode.http-address
8485 # dfs.journalnode.rpc-address
]);
];
})
(mkIf cfg.hdfs.zkfc.enable {
systemd.services.hdfs-zkfc = {
description = "Hadoop HDFS ZooKeeper failover controller";
wantedBy = [ "multi-user.target" ];
inherit (cfg.hdfs.zkfc) restartIfChanged;
serviceConfig = {
User = "hdfs";
SyslogIdentifier = "hdfs-zkfc";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} zkfc";
Restart = "always";
};
};
(hadoopServiceConfig {
name = "zkfc";
description = "Hadoop HDFS ZooKeeper failover controller";
})
(mkIf cfg.hdfs.httpfs.enable {
systemd.services.hdfs-httpfs = {
description = "Hadoop httpfs";
wantedBy = [ "multi-user.target" ];
inherit (cfg.hdfs.httpfs) restartIfChanged;
environment.HTTPFS_TEMP = cfg.hdfs.httpfs.tempPath;
preStart = ''
mkdir -p $HTTPFS_TEMP
'';
serviceConfig = {
User = "httpfs";
SyslogIdentifier = "hdfs-httpfs";
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} httpfs";
Restart = "always";
};
};
networking.firewall.allowedTCPPorts = (mkIf cfg.hdfs.httpfs.openFirewall [
(hadoopServiceConfig {
name = "HTTPFS";
environment.HTTPFS_TEMP = cfg.hdfs.httpfs.tempPath;
preStart = "mkdir -p $HTTPFS_TEMP";
User = "httpfs";
allowedTCPPorts = [
14000 # httpfs.http.port
]);
];
})
(mkIf (
cfg.hdfs.namenode.enable || cfg.hdfs.datanode.enable || cfg.hdfs.journalnode.enable || cfg.hdfs.zkfc.enable
) {
(mkIf cfg.gatewayRole.enable {
users.users.hdfs = {
description = "Hadoop HDFS user";
group = "hadoop";
@ -199,5 +199,6 @@ in
isSystemUser = true;
};
})
];
}

View file

@ -13,23 +13,77 @@ let
'';
default = false;
};
extraFlags = mkOption{
type = with types; listOf str;
default = [];
description = "Extra command line flags to pass to the service";
example = [
"-Dcom.sun.management.jmxremote"
"-Dcom.sun.management.jmxremote.port=8010"
];
};
extraEnv = mkOption{
type = with types; attrsOf str;
default = {};
description = "Extra environment variables";
};
in
{
options.services.hadoop.yarn = {
resourcemanager = {
enable = mkEnableOption "Whether to run the Hadoop YARN ResourceManager";
inherit restartIfChanged;
enable = mkEnableOption "Hadoop YARN ResourceManager";
inherit restartIfChanged extraFlags extraEnv;
openFirewall = mkOption {
type = types.bool;
default = true;
default = false;
description = ''
Open firewall ports for resourcemanager
'';
};
};
nodemanager = {
enable = mkEnableOption "Whether to run the Hadoop YARN NodeManager";
inherit restartIfChanged;
enable = mkEnableOption "Hadoop YARN NodeManager";
inherit restartIfChanged extraFlags extraEnv;
resource = {
cpuVCores = mkOption {
description = "Number of vcores that can be allocated for containers.";
type = with types; nullOr ints.positive;
default = null;
};
maximumAllocationVCores = mkOption {
description = "The maximum virtual CPU cores any container can be allocated.";
type = with types; nullOr ints.positive;
default = null;
};
memoryMB = mkOption {
description = "Amount of physical memory, in MB, that can be allocated for containers.";
type = with types; nullOr ints.positive;
default = null;
};
maximumAllocationMB = mkOption {
description = "The maximum physical memory any container can be allocated.";
type = with types; nullOr ints.positive;
default = null;
};
};
useCGroups = mkOption {
type = types.bool;
default = true;
description = ''
Use cgroups to enforce resource limits on containers
'';
};
localDir = mkOption {
description = "List of directories to store localized files in.";
type = with types; nullOr (listOf path);
example = [ "/var/lib/hadoop/yarn/nm" ];
default = null;
};
addBinBash = mkOption {
type = types.bool;
default = true;
@ -39,7 +93,7 @@ in
};
openFirewall = mkOption {
type = types.bool;
default = true;
default = false;
description = ''
Open firewall ports for nodemanager.
Because containers can listen on any ephemeral port, TCP ports 102465535 will be opened.
@ -49,10 +103,7 @@ in
};
config = mkMerge [
(mkIf (
cfg.yarn.resourcemanager.enable || cfg.yarn.nodemanager.enable
) {
(mkIf cfg.gatewayRole.enable {
users.users.yarn = {
description = "Hadoop YARN user";
group = "hadoop";
@ -65,15 +116,19 @@ in
description = "Hadoop YARN ResourceManager";
wantedBy = [ "multi-user.target" ];
inherit (cfg.yarn.resourcemanager) restartIfChanged;
environment = cfg.yarn.resourcemanager.extraEnv;
serviceConfig = {
User = "yarn";
SyslogIdentifier = "yarn-resourcemanager";
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
" resourcemanager";
" resourcemanager ${escapeShellArgs cfg.yarn.resourcemanager.extraFlags}";
Restart = "always";
};
};
services.hadoop.gatewayRole.enable = true;
networking.firewall.allowedTCPPorts = (mkIf cfg.yarn.resourcemanager.openFirewall [
8088 # resourcemanager.webapp.address
8030 # resourcemanager.scheduler.address
@ -94,6 +149,7 @@ in
description = "Hadoop YARN NodeManager";
wantedBy = [ "multi-user.target" ];
inherit (cfg.yarn.nodemanager) restartIfChanged;
environment = cfg.yarn.nodemanager.extraEnv;
preStart = ''
# create log dir
@ -101,8 +157,9 @@ in
chown yarn:hadoop /var/log/hadoop/yarn/nodemanager
# set up setuid container executor binary
umount /run/wrappers/yarn-nodemanager/cgroup/cpu || true
rm -rf /run/wrappers/yarn-nodemanager/ || true
mkdir -p /run/wrappers/yarn-nodemanager/{bin,etc/hadoop}
mkdir -p /run/wrappers/yarn-nodemanager/{bin,etc/hadoop,cgroup/cpu}
cp ${cfg.package}/lib/${cfg.package.untarDir}/bin/container-executor /run/wrappers/yarn-nodemanager/bin/
chgrp hadoop /run/wrappers/yarn-nodemanager/bin/container-executor
chmod 6050 /run/wrappers/yarn-nodemanager/bin/container-executor
@ -114,11 +171,26 @@ in
SyslogIdentifier = "yarn-nodemanager";
PermissionsStartOnly = true;
ExecStart = "${cfg.package}/bin/yarn --config ${hadoopConf} " +
" nodemanager";
" nodemanager ${escapeShellArgs cfg.yarn.nodemanager.extraFlags}";
Restart = "always";
};
};
services.hadoop.gatewayRole.enable = true;
services.hadoop.yarnSiteInternal = with cfg.yarn.nodemanager; {
"yarn.nodemanager.local-dirs" = localDir;
"yarn.scheduler.maximum-allocation-vcores" = resource.maximumAllocationVCores;
"yarn.scheduler.maximum-allocation-mb" = resource.maximumAllocationMB;
"yarn.nodemanager.resource.cpu-vcores" = resource.cpuVCores;
"yarn.nodemanager.resource.memory-mb" = resource.memoryMB;
} // mkIf useCGroups {
"yarn.nodemanager.linux-container-executor.cgroups.hierarchy" = "/hadoop-yarn";
"yarn.nodemanager.linux-container-executor.resources-handler.class" = "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler";
"yarn.nodemanager.linux-container-executor.cgroups.mount" = "true";
"yarn.nodemanager.linux-container-executor.cgroups.mount-path" = "/run/wrappers/yarn-nodemanager/cgroup";
};
networking.firewall.allowedTCPPortRanges = [
(mkIf (cfg.yarn.nodemanager.openFirewall) {from = 1024; to = 65535;})
];

View file

@ -189,9 +189,9 @@ in
grocy = handleTest ./grocy.nix {};
grub = handleTest ./grub.nix {};
gvisor = handleTest ./gvisor.nix {};
hadoop.all = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop/hadoop.nix {};
hadoop.hdfs = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop/hdfs.nix {};
hadoop.yarn = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop/yarn.nix {};
hadoop = import ./hadoop { inherit handleTestOn; package=pkgs.hadoop; };
hadoop_3_2 = import ./hadoop { inherit handleTestOn; package=pkgs.hadoop_3_2; };
hadoop2 = import ./hadoop { inherit handleTestOn; package=pkgs.hadoop2; };
haka = handleTest ./haka.nix {};
haproxy = handleTest ./haproxy.nix {};
hardened = handleTest ./hardened.nix {};

View file

@ -0,0 +1,7 @@
{ handleTestOn, package, ... }:
{
all = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop.nix { inherit package; };
hdfs = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hdfs.nix { inherit package; };
yarn = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./yarn.nix { inherit package; };
}

View file

@ -1,121 +1,148 @@
# This test is very comprehensive. It tests whether all hadoop services work well with each other.
# Run this when updating the Hadoop package or making significant changes to the hadoop module.
# For a more basic test, see hdfs.nix and yarn.nix
import ../make-test-python.nix ({pkgs, ...}: {
import ../make-test-python.nix ({ package, ... }: {
name = "hadoop-combined";
nodes = let
package = pkgs.hadoop;
coreSite = {
"fs.defaultFS" = "hdfs://ns1";
};
hdfsSite = {
"dfs.namenode.rpc-bind-host" = "0.0.0.0";
"dfs.namenode.http-bind-host" = "0.0.0.0";
"dfs.namenode.servicerpc-bind-host" = "0.0.0.0";
nodes =
let
coreSite = {
"fs.defaultFS" = "hdfs://ns1";
};
hdfsSite = {
# HA Quorum Journal Manager configuration
"dfs.nameservices" = "ns1";
"dfs.ha.namenodes.ns1" = "nn1,nn2";
"dfs.namenode.shared.edits.dir.ns1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
"dfs.namenode.rpc-address.ns1.nn1" = "nn1:8020";
"dfs.namenode.rpc-address.ns1.nn2" = "nn2:8020";
"dfs.namenode.servicerpc-address.ns1.nn1" = "nn1:8022";
"dfs.namenode.servicerpc-address.ns1.nn2" = "nn2:8022";
"dfs.namenode.http-address.ns1.nn1" = "nn1:9870";
"dfs.namenode.http-address.ns1.nn2" = "nn2:9870";
# HA Quorum Journal Manager configuration
"dfs.nameservices" = "ns1";
"dfs.ha.namenodes.ns1" = "nn1,nn2";
"dfs.namenode.shared.edits.dir.ns1.nn1" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
"dfs.namenode.shared.edits.dir.ns1.nn2" = "qjournal://jn1:8485;jn2:8485;jn3:8485/ns1";
"dfs.namenode.rpc-address.ns1.nn1" = "nn1:8020";
"dfs.namenode.rpc-address.ns1.nn2" = "nn2:8020";
"dfs.namenode.servicerpc-address.ns1.nn1" = "nn1:8022";
"dfs.namenode.servicerpc-address.ns1.nn2" = "nn2:8022";
"dfs.namenode.http-address.ns1.nn1" = "nn1:9870";
"dfs.namenode.http-address.ns1.nn2" = "nn2:9870";
# Automatic failover configuration
"dfs.client.failover.proxy.provider.ns1" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider";
"dfs.ha.automatic-failover.enabled.ns1" = "true";
"dfs.ha.fencing.methods" = "shell(true)";
"ha.zookeeper.quorum" = "zk1:2181";
};
yarnSite = {
"yarn.resourcemanager.zk-address" = "zk1:2181";
"yarn.resourcemanager.ha.enabled" = "true";
"yarn.resourcemanager.ha.rm-ids" = "rm1,rm2";
"yarn.resourcemanager.hostname.rm1" = "rm1";
"yarn.resourcemanager.hostname.rm2" = "rm2";
"yarn.resourcemanager.ha.automatic-failover.enabled" = "true";
"yarn.resourcemanager.cluster-id" = "cluster1";
# yarn.resourcemanager.webapp.address needs to be defined even though yarn.resourcemanager.hostname is set. This shouldn't be necessary, but there's a bug in
# hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmFilterInitializer.java:70
# that causes AM containers to fail otherwise.
"yarn.resourcemanager.webapp.address.rm1" = "rm1:8088";
"yarn.resourcemanager.webapp.address.rm2" = "rm2:8088";
};
in
{
zk1 = { ... }: {
services.zookeeper.enable = true;
networking.firewall.allowedTCPPorts = [ 2181 ];
};
# Automatic failover configuration
"dfs.client.failover.proxy.provider.ns1" = "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider";
"dfs.ha.automatic-failover.enabled.ns1" = "true";
"dfs.ha.fencing.methods" = "shell(true)";
"ha.zookeeper.quorum" = "zk1:2181";
};
yarnSiteHA = {
"yarn.resourcemanager.zk-address" = "zk1:2181";
"yarn.resourcemanager.ha.enabled" = "true";
"yarn.resourcemanager.ha.rm-ids" = "rm1,rm2";
"yarn.resourcemanager.hostname.rm1" = "rm1";
"yarn.resourcemanager.hostname.rm2" = "rm2";
"yarn.resourcemanager.ha.automatic-failover.enabled" = "true";
"yarn.resourcemanager.cluster-id" = "cluster1";
# yarn.resourcemanager.webapp.address needs to be defined even though yarn.resourcemanager.hostname is set. This shouldn't be necessary, but there's a bug in
# hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/amfilter/AmFilterInitializer.java:70
# that causes AM containers to fail otherwise.
"yarn.resourcemanager.webapp.address.rm1" = "rm1:8088";
"yarn.resourcemanager.webapp.address.rm2" = "rm2:8088";
};
in {
zk1 = { ... }: {
services.zookeeper.enable = true;
networking.firewall.allowedTCPPorts = [ 2181 ];
};
# HDFS cluster
nn1 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.namenode = {
enable = true;
openFirewall = true;
};
hdfs.zkfc.enable = true;
};
};
nn2 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.namenode = {
enable = true;
openFirewall = true;
};
hdfs.zkfc.enable = true;
};
};
# HDFS cluster
nn1 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.namenode.enable = true;
hdfs.zkfc.enable = true;
jn1 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode = {
enable = true;
openFirewall = true;
};
};
};
};
nn2 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.namenode.enable = true;
hdfs.zkfc.enable = true;
jn2 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode = {
enable = true;
openFirewall = true;
};
};
};
jn3 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode = {
enable = true;
openFirewall = true;
};
};
};
};
jn1 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode.enable = true;
dn1 = { ... }: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.datanode = {
enable = true;
openFirewall = true;
};
};
};
};
jn2 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode.enable = true;
};
};
jn3 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.journalnode.enable = true;
};
};
dn1 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
hdfs.datanode.enable = true;
# YARN cluster
rm1 = { options, ... }: {
services.hadoop = {
inherit package coreSite hdfsSite yarnSite;
yarn.resourcemanager = {
enable = true;
openFirewall = true;
};
};
};
};
# YARN cluster
rm1 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
yarn.resourcemanager.enable = true;
rm2 = { options, ... }: {
services.hadoop = {
inherit package coreSite hdfsSite yarnSite;
yarn.resourcemanager = {
enable = true;
openFirewall = true;
};
};
};
};
rm2 = {pkgs, options, ...}: {
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
yarn.resourcemanager.enable = true;
nm1 = { options, ... }: {
virtualisation.memorySize = 2048;
services.hadoop = {
inherit package coreSite hdfsSite yarnSite;
yarn.nodemanager = {
enable = true;
openFirewall = true;
};
};
};
};
nm1 = {pkgs, options, ...}: {
virtualisation.memorySize = 2048;
services.hadoop = {
inherit package coreSite hdfsSite;
yarnSite = options.services.hadoop.yarnSite.default // yarnSiteHA;
yarn.nodemanager.enable = true;
client = { options, ... }: {
services.hadoop = {
gatewayRole.enable = true;
inherit package coreSite hdfsSite yarnSite;
};
};
};
};
testScript = ''
@ -173,26 +200,26 @@ import ../make-test-python.nix ({pkgs, ...}: {
# DN should have started by now, but confirm anyway
dn1.wait_for_unit("hdfs-datanode")
# Print states of namenodes
dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat")
client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat")
# Wait for cluster to exit safemode
dn1.succeed("sudo -u hdfs hdfs dfsadmin -safemode wait")
dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat")
client.succeed("sudo -u hdfs hdfs dfsadmin -safemode wait")
client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat")
# test R/W
dn1.succeed("echo testfilecontents | sudo -u hdfs hdfs dfs -put - /testfile")
assert "testfilecontents" in dn1.succeed("sudo -u hdfs hdfs dfs -cat /testfile")
client.succeed("echo testfilecontents | sudo -u hdfs hdfs dfs -put - /testfile")
assert "testfilecontents" in client.succeed("sudo -u hdfs hdfs dfs -cat /testfile")
# Test NN failover
nn1.succeed("systemctl stop hdfs-namenode")
assert "active" in dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState")
dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat")
assert "testfilecontents" in dn1.succeed("sudo -u hdfs hdfs dfs -cat /testfile")
assert "active" in client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState")
client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat")
assert "testfilecontents" in client.succeed("sudo -u hdfs hdfs dfs -cat /testfile")
nn1.succeed("systemctl start hdfs-namenode")
nn1.wait_for_open_port(9870)
nn1.wait_for_open_port(8022)
nn1.wait_for_open_port(8020)
assert "standby" in dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState")
dn1.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat")
assert "standby" in client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState")
client.succeed("sudo -u hdfs hdfs haadmin -getAllServiceState | systemd-cat")
#### YARN tests ####
@ -208,21 +235,21 @@ import ../make-test-python.nix ({pkgs, ...}: {
nm1.wait_for_unit("yarn-nodemanager")
nm1.wait_for_open_port(8042)
nm1.wait_for_open_port(8040)
nm1.wait_until_succeeds("yarn node -list | grep Nodes:1")
nm1.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat")
nm1.succeed("sudo -u yarn yarn node -list | systemd-cat")
client.wait_until_succeeds("yarn node -list | grep Nodes:1")
client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat")
client.succeed("sudo -u yarn yarn node -list | systemd-cat")
# Test RM failover
rm1.succeed("systemctl stop yarn-resourcemanager")
assert "standby" not in nm1.succeed("sudo -u yarn yarn rmadmin -getAllServiceState")
nm1.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat")
assert "standby" not in client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState")
client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat")
rm1.succeed("systemctl start yarn-resourcemanager")
rm1.wait_for_unit("yarn-resourcemanager")
rm1.wait_for_open_port(8088)
assert "standby" in nm1.succeed("sudo -u yarn yarn rmadmin -getAllServiceState")
nm1.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat")
assert "standby" in client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState")
client.succeed("sudo -u yarn yarn rmadmin -getAllServiceState | systemd-cat")
assert "Estimated value of Pi is" in nm1.succeed("HADOOP_USER_NAME=hdfs yarn jar $(readlink $(which yarn) | sed -r 's~bin/yarn~lib/hadoop-*/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar~g') pi 2 10")
assert "SUCCEEDED" in nm1.succeed("yarn application -list -appStates FINISHED")
assert "Estimated value of Pi is" in client.succeed("HADOOP_USER_NAME=hdfs yarn jar $(readlink $(which yarn) | sed -r 's~bin/yarn~lib/hadoop-*/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar~g') pi 2 10")
assert "SUCCEEDED" in client.succeed("yarn application -list -appStates FINISHED")
'';
})

View file

@ -1,32 +1,46 @@
# Test a minimal HDFS cluster with no HA
import ../make-test-python.nix ({...}: {
nodes = {
namenode = {pkgs, ...}: {
import ../make-test-python.nix ({ package, lib, ... }:
with lib;
{
name = "hadoop-hdfs";
nodes = let
coreSite = {
"fs.defaultFS" = "hdfs://namenode:8020";
"hadoop.proxyuser.httpfs.groups" = "*";
"hadoop.proxyuser.httpfs.hosts" = "*";
};
in {
namenode = { pkgs, ... }: {
services.hadoop = {
package = pkgs.hadoop;
inherit package;
hdfs = {
namenode = {
enable = true;
openFirewall = true;
formatOnInit = true;
};
httpfs.enable = true;
};
coreSite = {
"fs.defaultFS" = "hdfs://namenode:8020";
"hadoop.proxyuser.httpfs.groups" = "*";
"hadoop.proxyuser.httpfs.hosts" = "*";
httpfs = {
# The NixOS hadoop module only support webHDFS on 3.3 and newer
enable = mkIf (versionAtLeast package.version "3.3") true;
openFirewall = true;
};
};
inherit coreSite;
};
};
datanode = {pkgs, ...}: {
datanode = { pkgs, ... }: {
services.hadoop = {
package = pkgs.hadoop;
hdfs.datanode.enable = true;
coreSite = {
"fs.defaultFS" = "hdfs://namenode:8020";
"hadoop.proxyuser.httpfs.groups" = "*";
"hadoop.proxyuser.httpfs.hosts" = "*";
inherit package;
hdfs.datanode = {
enable = true;
openFirewall = true;
dataDirs = [{
type = "DISK";
path = "/tmp/dn1";
}];
};
inherit coreSite;
};
};
};
@ -37,21 +51,32 @@ import ../make-test-python.nix ({...}: {
namenode.wait_for_unit("hdfs-namenode")
namenode.wait_for_unit("network.target")
namenode.wait_for_open_port(8020)
namenode.succeed("ss -tulpne | systemd-cat")
namenode.succeed("cat /etc/hadoop*/hdfs-site.xml | systemd-cat")
namenode.wait_for_open_port(9870)
datanode.wait_for_unit("hdfs-datanode")
datanode.wait_for_unit("network.target")
'' + ( if versionAtLeast package.version "3" then ''
datanode.wait_for_open_port(9864)
datanode.wait_for_open_port(9866)
datanode.wait_for_open_port(9867)
namenode.succeed("curl -f http://namenode:9870")
datanode.succeed("curl -f http://datanode:9864")
'' else ''
datanode.wait_for_open_port(50075)
datanode.wait_for_open_port(50010)
datanode.wait_for_open_port(50020)
datanode.succeed("curl -f http://datanode:50075")
'' ) + ''
namenode.succeed("curl -f http://namenode:9870")
datanode.succeed("sudo -u hdfs hdfs dfsadmin -safemode wait")
datanode.succeed("echo testfilecontents | sudo -u hdfs hdfs dfs -put - /testfile")
assert "testfilecontents" in datanode.succeed("sudo -u hdfs hdfs dfs -cat /testfile")
'' + optionalString ( versionAtLeast package.version "3.3" ) ''
namenode.wait_for_unit("hdfs-httpfs")
namenode.wait_for_open_port(14000)
assert "testfilecontents" in datanode.succeed("curl -f \"http://namenode:14000/webhdfs/v1/testfile?user.name=hdfs&op=OPEN\" 2>&1")

View file

@ -1,22 +1,30 @@
# This only tests if YARN is able to start its services
import ../make-test-python.nix ({...}: {
nodes = {
resourcemanager = {pkgs, ...}: {
services.hadoop.package = pkgs.hadoop;
services.hadoop.yarn.resourcemanager.enable = true;
services.hadoop.yarnSite = {
"yarn.resourcemanager.scheduler.class" = "org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler";
};
};
nodemanager = {pkgs, ...}: {
services.hadoop.package = pkgs.hadoop;
services.hadoop.yarn.nodemanager.enable = true;
services.hadoop.yarnSite = {
"yarn.resourcemanager.hostname" = "resourcemanager";
"yarn.nodemanager.log-dirs" = "/tmp/userlogs";
};
};
import ../make-test-python.nix ({ package, ... }: {
name = "hadoop-yarn";
nodes = {
resourcemanager = { ... }: {
services.hadoop = {
inherit package;
yarn.resourcemanager = {
enable = true;
openFirewall = true;
};
};
};
nodemanager = { options, lib, ... }: {
services.hadoop = {
inherit package;
yarn.nodemanager = {
enable = true;
openFirewall = true;
};
yarnSite = options.services.hadoop.yarnSite.default // {
"yarn.resourcemanager.hostname" = "resourcemanager";
"yarn.nodemanager.log-dirs" = "/tmp/userlogs";
};
};
};
};
testScript = ''

View file

@ -15,6 +15,8 @@
, zlib
, zstd
, openssl
, glibc
, nixosTests
}:
with lib;
@ -22,7 +24,7 @@ with lib;
assert elem stdenv.system [ "x86_64-linux" "x86_64-darwin" "aarch64-linux" "aarch64-darwin" ];
let
common = { pname, version, untarDir ? "${pname}-${version}", sha256, jdk, openssl ? null, nativeLibs ? [ ], libPatches ? "" }:
common = { pname, version, untarDir ? "${pname}-${version}", sha256, jdk, openssl ? null, nativeLibs ? [ ], libPatches ? "", tests }:
stdenv.mkDerivation rec {
inherit pname version jdk libPatches untarDir openssl;
src = fetchurl {
@ -38,7 +40,10 @@ let
installPhase = ''
mkdir -p $out/{lib/${untarDir}/conf,bin,lib}
mv * $out/lib/${untarDir}
'' + optionalString stdenv.isLinux ''
# All versions need container-executor, but some versions can't use autoPatchelf because of broken SSL versions
patchelf --set-interpreter ${glibc.out}/lib64/ld-linux-x86-64.so.2 $out/lib/${untarDir}/bin/container-executor
'' + ''
for n in $(find $out/lib/${untarDir}/bin -type f ! -name "*.*"); do
makeWrapper "$n" "$out/bin/$(basename $n)"\
--set-default JAVA_HOME ${jdk.home}\
@ -49,6 +54,8 @@ let
done
'' + libPatches;
passthru = { inherit tests; };
meta = {
homepage = "https://hadoop.apache.org/";
description = "Framework for distributed processing of large data sets across clusters of computers";
@ -73,30 +80,34 @@ in
{
# Different version of hadoop support different java runtime versions
# https://cwiki.apache.org/confluence/display/HADOOP/Hadoop+Java+Versions
hadoop_3_3 =
common
(rec {
pname = "hadoop";
version = "3.3.1";
untarDir = "${pname}-${version}";
sha256 = rec {
x86_64-linux = "1b3v16ihysqaxw8za1r5jlnphy8dwhivdx2d0z64309w57ihlxxd";
x86_64-darwin = x86_64-linux;
aarch64-linux = "00ln18vpi07jq2slk3kplyhcj8ad41n0yl880q5cihilk7daclxz";
aarch64-darwin = aarch64-linux;
};
inherit openssl;
nativeLibs = [ stdenv.cc.cc.lib protobuf3_7 zlib snappy ];
libPatches = ''
ln -s ${getLib cyrus_sasl}/lib/libsasl2.so $out/lib/${untarDir}/lib/native/libsasl2.so.2
ln -s ${getLib openssl}/lib/libcrypto.so $out/lib/${untarDir}/lib/native/
ln -s ${getLib zlib}/lib/libz.so.1 $out/lib/${untarDir}/lib/native/
ln -s ${getLib zstd}/lib/libzstd.so.1 $out/lib/${untarDir}/lib/native/
ln -s ${getLib bzip2}/lib/libbz2.so.1 $out/lib/${untarDir}/lib/native/
'' + optionalString stdenv.isLinux "patchelf --add-rpath ${jdk.home}/lib/server $out/lib/${untarDir}/lib/native/libnativetask.so.1.0.0";
jdk = jdk11_headless;
});
hadoop_3_3 = common rec {
pname = "hadoop";
version = "3.3.1";
untarDir = "${pname}-${version}";
sha256 = rec {
x86_64-linux = "1b3v16ihysqaxw8za1r5jlnphy8dwhivdx2d0z64309w57ihlxxd";
x86_64-darwin = x86_64-linux;
aarch64-linux = "00ln18vpi07jq2slk3kplyhcj8ad41n0yl880q5cihilk7daclxz";
aarch64-darwin = aarch64-linux;
};
jdk = jdk11_headless;
inherit openssl;
# TODO: Package and add Intel Storage Acceleration Library
nativeLibs = [ stdenv.cc.cc.lib protobuf3_7 zlib snappy ];
libPatches = ''
ln -s ${getLib cyrus_sasl}/lib/libsasl2.so $out/lib/${untarDir}/lib/native/libsasl2.so.2
ln -s ${getLib openssl}/lib/libcrypto.so $out/lib/${untarDir}/lib/native/
ln -s ${getLib zlib}/lib/libz.so.1 $out/lib/${untarDir}/lib/native/
ln -s ${getLib zstd}/lib/libzstd.so.1 $out/lib/${untarDir}/lib/native/
ln -s ${getLib bzip2}/lib/libbz2.so.1 $out/lib/${untarDir}/lib/native/
'' + optionalString stdenv.isLinux ''
# libjvm.so for Java >=11
patchelf --add-rpath ${jdk.home}/lib/server $out/lib/${untarDir}/lib/native/libnativetask.so.1.0.0
# Java 8 has libjvm.so at a different path
patchelf --add-rpath ${jdk.home}/jre/lib/amd64/server $out/lib/${untarDir}/lib/native/libnativetask.so.1.0.0
'';
tests = nixosTests.hadoop;
};
hadoop_3_2 = common rec {
pname = "hadoop";
version = "3.2.2";
@ -104,11 +115,13 @@ in
jdk = jdk8_headless;
# not using native libs because of broken openssl_1_0_2 dependency
# can be manually overriden
tests = nixosTests.hadoop_3_2;
};
hadoop2 = common rec {
pname = "hadoop";
version = "2.10.1";
sha256.x86_64-linux = "1w31x4bk9f2swnx8qxx0cgwfg8vbpm6cy5lvfnbbpl3rsjhmyg97";
jdk = jdk8_headless;
tests = nixosTests.hadoop2;
};
}