nixos/hadoop: add HBase submodule

This commit is contained in:
illustris 2022-05-07 18:00:04 +05:30 committed by pennae
parent cb2576c1b6
commit ac403b83fb
7 changed files with 297 additions and 16 deletions

View file

@ -33,6 +33,7 @@ pkgs.runCommand "hadoop-conf" {} (with cfg; ''
mkdir -p $out/
cp ${siteXml "core-site.xml" (coreSite // coreSiteInternal)}/* $out/
cp ${siteXml "hdfs-site.xml" (hdfsSiteDefault // hdfsSite // hdfsSiteInternal)}/* $out/
cp ${siteXml "hbase-site.xml" (hbaseSiteDefault // hbaseSite // hbaseSiteInternal)}/* $out/
cp ${siteXml "mapred-site.xml" (mapredSiteDefault // mapredSite)}/* $out/
cp ${siteXml "yarn-site.xml" (yarnSiteDefault // yarnSite // yarnSiteInternal)}/* $out/
cp ${siteXml "httpfs-site.xml" httpfsSite}/* $out/

View file

@ -5,7 +5,7 @@ let
in
with lib;
{
imports = [ ./yarn.nix ./hdfs.nix ];
imports = [ ./yarn.nix ./hdfs.nix ./hbase.nix ];
options.services.hadoop = {
coreSite = mkOption {

View file

@ -0,0 +1,200 @@
{ config, lib, pkgs, ...}:
with lib;
let
cfg = config.services.hadoop;
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
mkIfNotNull = x: mkIf (x != null) x;
in
{
options.services.hadoop = {
gatewayRole.enableHbaseCli = mkOption {
description = "Whether to enable HBase CLI tools";
default = false;
type = types.bool;
};
hbaseSiteDefault = mkOption {
default = {
"hbase.regionserver.ipc.address" = "0.0.0.0";
"hbase.master.ipc.address" = "0.0.0.0";
"hbase.master.info.bindAddress" = "0.0.0.0";
"hbase.regionserver.info.bindAddress" = "0.0.0.0";
"hbase.cluster.distributed" = "true";
};
type = types.attrsOf types.anything;
description = ''
Default options for hbase-site.xml
'';
};
hbaseSite = mkOption {
default = {};
type = with types; attrsOf anything;
example = literalExpression ''
'';
description = ''
Additional options and overrides for hbase-site.xml
<link xlink:href="https://github.com/apache/hbase/blob/rel/2.4.11/hbase-common/src/main/resources/hbase-default.xml"/>
'';
};
hbaseSiteInternal = mkOption {
default = {};
type = with types; attrsOf anything;
internal = true;
description = ''
Internal option to add configs to hbase-site.xml based on module options
'';
};
hbase = {
package = mkOption {
type = types.package;
default = pkgs.hbase;
defaultText = literalExpression "pkgs.hbase";
description = "HBase package";
};
rootdir = mkOption {
description = ''
This option will set "hbase.rootdir" in hbase-site.xml and determine
the directory shared by region servers and into which HBase persists.
The URL should be 'fully-qualified' to include the filesystem scheme.
If a core-site.xml is provided, the FS scheme defaults to the value
of "fs.defaultFS".
Filesystems other than HDFS (like S3, QFS, Swift) are also supported.
'';
type = types.str;
example = "hdfs://nameservice1/hbase";
default = "/hbase";
};
zookeeperQuorum = mkOption {
description = ''
This option will set "hbase.zookeeper.quorum" in hbase-site.xml.
Comma separated list of servers in the ZooKeeper ensemble.
'';
type = with types; nullOr commas;
example = "zk1.internal,zk2.internal,zk3.internal";
default = null;
};
master = {
enable = mkEnableOption "HBase Master";
initHDFS = mkEnableOption "initialization of the hbase directory on HDFS";
openFirewall = mkOption {
type = types.bool;
default = false;
description = ''
Open firewall ports for HBase master.
'';
};
};
regionServer = {
enable = mkEnableOption "HBase RegionServer";
overrideHosts = mkOption {
type = types.bool;
default = true;
description = ''
Remove /etc/hosts entries for "127.0.0.2" and "::1" defined in nixos/modules/config/networking.nix
Regionservers must be able to resolve their hostnames to their IP addresses, through PTR records
or /etc/hosts entries.
'';
};
openFirewall = mkOption {
type = types.bool;
default = false;
description = ''
Open firewall ports for HBase master.
'';
};
};
};
};
config = mkMerge [
(mkIf cfg.hbase.master.enable {
services.hadoop.gatewayRole = {
enable = true;
enableHbaseCli = mkDefault true;
};
systemd.services.hbase-master = {
description = "HBase master";
wantedBy = [ "multi-user.target" ];
preStart = mkIf cfg.hbase.master.initHDFS ''
HADOOP_USER_NAME=hdfs ${cfg.package}/bin/hdfs --config ${hadoopConf} dfsadmin -safemode wait
HADOOP_USER_NAME=hdfs ${cfg.package}/bin/hdfs --config ${hadoopConf} dfs -mkdir -p ${cfg.hbase.rootdir}
HADOOP_USER_NAME=hdfs ${cfg.package}/bin/hdfs --config ${hadoopConf} dfs -chown hbase ${cfg.hbase.rootdir}
'';
serviceConfig = {
User = "hbase";
SyslogIdentifier = "hbase-master";
ExecStart = "${cfg.hbase.package}/bin/hbase --config ${hadoopConf} " +
"master start";
Restart = "always";
};
};
services.hadoop.hbaseSiteInternal."hbase.rootdir" = cfg.hbase.rootdir;
networking.firewall.allowedTCPPorts = (mkIf cfg.hbase.master.openFirewall [
16000 16010
]);
})
(mkIf cfg.hbase.regionServer.enable {
services.hadoop.gatewayRole = {
enable = true;
enableHbaseCli = mkDefault true;
};
systemd.services.hbase-regionserver = {
description = "HBase RegionServer";
wantedBy = [ "multi-user.target" ];
serviceConfig = {
User = "hbase";
SyslogIdentifier = "hbase-regionserver";
ExecStart = "${cfg.hbase.package}/bin/hbase --config /etc/hadoop-conf/ " +
"regionserver start";
Restart = "always";
};
};
services.hadoop.hbaseSiteInternal."hbase.rootdir" = cfg.hbase.rootdir;
networking = {
firewall.allowedTCPPorts = (mkIf cfg.hbase.regionServer.openFirewall [
16020 16030
]);
hosts = mkIf cfg.hbase.regionServer.overrideHosts {
"127.0.0.2" = mkForce [ ];
"::1" = mkForce [ ];
};
};
})
(mkIf cfg.gatewayRole.enable {
environment.systemPackages = mkIf cfg.gatewayRole.enableHbaseCli [ cfg.hbase.package ];
services.hadoop.hbaseSiteInternal = with cfg.hbase; {
"hbase.zookeeper.quorum" = mkIfNotNull zookeeperQuorum;
};
users.users.hbase = {
description = "Hadoop HBase user";
group = "hadoop";
isSystemUser = true;
};
})
];
}

View file

@ -3,8 +3,8 @@
with lib;
let
cfg = config.services.hbase;
opt = options.services.hbase;
cfg = config.services.hbase-standalone;
opt = options.services.hbase-standalone;
buildProperty = configAttr:
(builtins.concatStringsSep "\n"
@ -35,16 +35,12 @@ in {
###### interface
options = {
services.hbase-standalone = {
services.hbase = {
enable = mkOption {
type = types.bool;
default = false;
description = lib.mdDoc ''
Whether to run HBase.
'';
};
enable = mkEnableOption ''
HBase master in standalone mode with embedded regionserver and zookeper.
Do not use this configuration for production nor for evaluating HBase performance.
'';
package = mkOption {
type = types.package;
@ -108,12 +104,11 @@ in {
};
};
};
###### implementation
config = mkIf config.services.hbase.enable {
config = mkIf cfg.enable {
systemd.tmpfiles.rules = [
"d '${cfg.dataDir}' - ${cfg.user} ${cfg.group} - -"

View file

@ -4,4 +4,5 @@
all = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hadoop.nix { inherit package; };
hdfs = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hdfs.nix { inherit package; };
yarn = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./yarn.nix { inherit package; };
hbase = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./hbase.nix { inherit package; };
}

View file

@ -0,0 +1,84 @@
# Test a minimal hbase cluster
{ pkgs, ... }:
import ../make-test-python.nix ({ hadoop ? pkgs.hadoop, hbase ? pkgs.hbase, ... }:
with pkgs.lib;
{
name = "hadoop-hbase";
nodes = let
coreSite = {
"fs.defaultFS" = "hdfs://namenode:8020";
};
defOpts = {
enable = true;
openFirewall = true;
};
zookeeperQuorum = "zookeeper";
in {
zookeeper = { ... }: {
services.zookeeper.enable = true;
networking.firewall.allowedTCPPorts = [ 2181 ];
};
namenode = { ... }: {
services.hadoop = {
hdfs = {
namenode = defOpts // { formatOnInit = true; };
};
inherit coreSite;
};
};
datanode = { ... }: {
virtualisation.diskSize = 8192;
services.hadoop = {
hdfs.datanode = defOpts;
inherit coreSite;
};
};
master = { ... }:{
services.hadoop = {
inherit coreSite;
hbase = {
inherit zookeeperQuorum;
master = defOpts // { initHDFS = true; };
};
};
};
regionserver = { ... }:{
services.hadoop = {
inherit coreSite;
hbase = {
inherit zookeeperQuorum;
regionServer = defOpts;
};
};
};
};
testScript = ''
start_all()
# wait for HDFS cluster
namenode.wait_for_unit("hdfs-namenode")
namenode.wait_for_unit("network.target")
namenode.wait_for_open_port(8020)
namenode.wait_for_open_port(9870)
datanode.wait_for_unit("hdfs-datanode")
datanode.wait_for_unit("network.target")
datanode.wait_for_open_port(9864)
datanode.wait_for_open_port(9866)
datanode.wait_for_open_port(9867)
# wait for ZK
zookeeper.wait_for_unit("zookeeper")
zookeeper.wait_for_open_port(2181)
# wait for HBase to start up
master.wait_for_unit("hbase-master")
regionserver.wait_for_unit("hbase-regionserver")
assert "1 active master, 0 backup masters, 1 servers" in master.succeed("echo status | HADOOP_USER_NAME=hbase hbase shell -n")
regionserver.wait_until_succeeds("echo \"create 't1','f1'\" | HADOOP_USER_NAME=hbase hbase shell -n")
assert "NAME => 'f1'" in regionserver.succeed("echo \"describe 't1'\" | HADOOP_USER_NAME=hbase hbase shell -n")
'';
})

View file

@ -1,6 +1,6 @@
import ./make-test-python.nix ({ pkgs, lib, package ? pkgs.hbase, ... }:
{
name = "hbase";
name = "hbase-standalone";
meta = with lib.maintainers; {
maintainers = [ illustris ];
@ -8,7 +8,7 @@ import ./make-test-python.nix ({ pkgs, lib, package ? pkgs.hbase, ... }:
nodes = {
hbase = { pkgs, ... }: {
services.hbase = {
services.hbase-standalone = {
enable = true;
inherit package;
# Needed for standalone mode in hbase 2+