Appendix A. Source Code for the addnode.pl Perl Script


#!/usr/bin/perl -w

################################################################################
#
#  Program: Add Node
#
#  File:    addnode.pl
#
#  Author:  Aaron Lees
#           alees@uvic.ca
#
#  Date:    September 15, 1999
#
#  Description:
#
#  Configures a beowulf cluster master node so that a new node can be added.
#
#  Usage:
#
#  addnode.pl -n NodeNumber -a EthernetAddress -k KernelImage
#
################################################################################

use strict;
use Getopt::Std;

# Global constants.
my $FILE_SYSTEM_DIR = "/tftpboot";
my $NAME_PREFIX = "node";
my $IP_PREFIX = "192.168.1";
my $DOMAIN = "tribec";

# Global variables.
my $nodeHWAddress;
my $kernel;
my $nodeNumber;
my $nodeName;
my $nodeIPAddress;


#-------------------------------------------------------------------------------
# Check initial conditions.
#-------------------------------------------------------------------------------

# Make sure I am root.
# $> is predefined to the effective uid of this process.
unless( $> == 0 ) {
    die( "Error: you must be root to add a new node to the cluster.\n" );
}

my %args;
getopts( "n:a:k:", \%args );

if( $args{n} && $args{a} && $args{k} ) {
    $nodeNumber = $args{n};
    $nodeHWAddress = $args{a};
    $kernel = $args{k};
}
else {
    die( "Usage: addnode.pl -n NodeNumber -a EthernetAddress -k KernelImage\n" );
}

# There cannot be more than 255 nodes since we are using ip addresses in the
# range of 192.168.1.1 to 192.168.1.255
unless( $nodeNumber <= 255 ) {
    die( "Error: the master node is not set up to handle more than 255 nodes.\n" );
}

my $hex = "[a-f|A-F|0-9]";
unless( $nodeHWAddress =~ /$hex$hex:$hex$hex:$hex$hex:$hex$hex:$hex$hex:$hex$hex/ ) {
    die( "Error: $nodeHWAddress is not a valid ethernet hardware address.\n" );
}

unless( $kernel =~ /^\// ) {
    die( "Error: the path to the kernel image must be absolute.\n" );
}

unless( -f $kernel ) {
    die( "Error: the kernel image $kernel does not exist.\n" );
}


#-------------------------------------------------------------------------------
# Clone the root file system from template.
#-------------------------------------------------------------------------------

print( "Cloning root file system...\n" );

$nodeName = "$NAME_PREFIX$nodeNumber";
$nodeIPAddress = "$IP_PREFIX.$nodeNumber";

print( "    Making directories...\n" );

chdir( $FILE_SYSTEM_DIR );
mkdir( $nodeName, 0755 );
chdir( $nodeName );

# Create directories
foreach ("proc", "tmp", "home", "usr") {
    mkdir( $_, 0755 );
}

# tmp must be rw to all with at sticky bit.
# Perl's chmod doesn't like to set the sticky bit.
system( "chmod 1777 tmp" );

print( "    Copying directories...\n" );

# All nodes have separate copies of these directories.
foreach ("dev", "etc", "var", "root") {
    system( "cp -a ../template/$_ ." );
}

print( "    Linking directories...\n" );

# These directories are shared throughout all nodes.
chdir( "$FILE_SYSTEM_DIR/template" );
foreach ("bin", "sbin", "lib") {
    system( "find $_ | cpio -pl ../$nodeName 2> /dev/null" );
}

# hosts and hosts.equiv is the same on all nodes.
system( "ln -f /etc/hosts $FILE_SYSTEM_DIR/$nodeName/etc" );
system( "ln -f /etc/hosts.equiv $FILE_SYSTEM_DIR/$nodeName/etc" );


#-------------------------------------------------------------------------------
# Configure the root file system.
#-------------------------------------------------------------------------------

print( "Configuring root file system...\n" );

chdir( "$FILE_SYSTEM_DIR/$nodeName" );

# Tell the node what it's host name and domain name is.
print( "    Editing network...\n" );
open( NETWORK, ">>etc/sysconfig/network" );
print( NETWORK "HOSTNAME=$nodeName.$DOMAIN\n" );
close( NETWORK );

# Tell the node what it's ip address, network address and broadcast address is.
print( "    Editing ifcfg-eth0...\n" );
open( IFCFG, ">>etc/sysconfig/network-scripts/ifcfg-eth0" );
print( IFCFG "IPADDR=$nodeIPAddress\n" );
print( IFCFG "NETWORK=$IP_PREFIX.0\n" );
print( IFCFG "BROADCAST=$IP_PREFIX.255\n" );
close( IFCFG );


#-------------------------------------------------------------------------------
# Configure files on the master node.
#-------------------------------------------------------------------------------

print( "Configuring master node...\n" );

# Create the aliases so nodes can be referred to as "nodeXX" instead of ip
# addresses.
print( "    Editing hosts...\n" );
open( HOSTS, ">>/etc/hosts" );
print( HOSTS "$nodeIPAddress\t$nodeName.$DOMAIN\t$nodeName\n" );
close( HOSTS );

# Let all nodes in the cluster rsh between each other without the need for
# passwords.
print( "    Editing hosts.equiv...\n" );
open( HOSTS_EQUIV, ">>/etc/hosts.equiv" );
print( HOSTS_EQUIV "$nodeName" );
close( HOSTS_EQUIV );

# Create a tagged kernel image in the correct location.
system( "mknbi -x -k $kernel -o $FILE_SYSTEM_DIR/kernels/$nodeName" );

# Tell the DHCP server what ip address corresponds to this node's hardware
# address.
print( "    Editing dhcpd.conf...\n" );
open( DHCP, ">>/etc/dhcpd.conf" );
print( DHCP "\nhost $nodeName {\n" );
print( DHCP "    hardware ethernet $nodeHWAddress;\n" );
print( DHCP "    fixed-address $nodeIPAddress;\n" );
print( DHCP "    filename \"$FILE_SYSTEM_DIR/kernels/$nodeName\";\n" );
print( DHCP "}\n" );
close( DHCP );

# Put the root file system in the export database.
print( "    Editing exports...\n" );
open( EXPORTS, ">>/etc/exports" );
print( EXPORTS "$FILE_SYSTEM_DIR/$nodeName\t$nodeIPAddress(rw,no_root_squash)\n" );
close( EXPORTS );


#-------------------------------------------------------------------------------
# Restart affected services.
#-------------------------------------------------------------------------------

system( "/usr/lib/yp/ypinit -m" );
system( "/etc/rc.d/init.d/dhcpd restart" );
system( "/etc/rc.d/init.d/nfs restart" );

print( "Done.\n" );