#!/usr/bin/perl

#----------------------------------------------------------------------
# If you run your apache in a chroot environment on OpenBSD, or plan to,
# you may find this script useful.
#
# It does one of two things:
#	1. it copies all necessary files from
#		/bin,/sbin,/usr/bin,/usr/libdata/perl5, etc
#		to a target-directory, satisfying dependencies on shared-libraries
#		as it goes.
#		Addtionally, you can also copy some files necessary to enable
#		you to manage packages in your chroot environment, i.e.:
#			# chroot /var/www /usr/local/bin/bash
#			(chroot-bash)# export PKG_PATH=ftp://ftp.bytemine.net/pub/OpenBSD/4.3/packages/i386/
#			(chroot-bash)# pkg_add <some-package>
#			
#			if you choose to set up package management ("-pkg"), beware
#			that cat, bash, cp, ls, mount, ping, hostname, df, sh, stty, ftp,
#			rm, gzip and less are all copied into the chroot-hierarchy. 
#			That may be a security risk. You have to decide.
#
#	2. it checks a previously copied target-hierarchy for unsatisfied
#		dependencies.  This is especially useful if you're performing
#		package management in your chroot-env.  It is possible to
#		install packages that require additional libraries from the
#		standard distribution.
#		Use the "-check" flag for this mode.  If you want to copy all
#		dependencies, use "-check -fix".
#		Since the purpose of this script is to "install" Perl in
#		a chroot-env, the "-check" functionality only checks the Perl
#		hierarchies, (/usr/libdata/perl5, /usr/local/libdata/perl5).
#
# This script will copy all necessary (and currently installed)
# Perl components on an OpenBSD system to a target directory in order
# to create a hierarchy for a chroot environment.
#
# By default, the target directory is "/var/www", but that can be
# changed with "-t".
#
# in debug-mode ("-d") the script will print all steps to STDOUT without
# actually doing anything.  Of course, some of the later steps depend on
# the target hierarchy existing, so it isn't entirely representative.
#
# To get more output use "-v".  For even more output, "-v -v".
#
# "-v -d" will more-or-less show you what the script would do.
#
# The "-pkg" flag tells the script to copy some additional files
# to the target hierarchy to allow package management ("pkg_add",
# "pkg_info", "pkg_delete") to function.
#
# Author: Robert Urban <urban (at) tru64 [dot] org>
# September, 2008
#----------------------------------------------------------------------

use FileHandle;
use File::Basename;
use strict;
use Getopt::Long;

my $DEBUG;
my $VERBOSE;

my $MODE				= 'copy';
my $FIX					= 0;

my $TARGET_PATH		= '/var/www';
my $COPY_PACKAGE_ENV	= 0;
my %COMMANDS = (
	sum		=> '/bin/sum',
	tar		=> '/bin/tar',
	cp		=> '/bin/cp',
	file	=> '/usr/bin/file',
	ldd		=> '/usr/bin/ldd',
);

#----------------------------------------------------------------------
# %FILES_BASIS
# these files are required for the chroot-environment.
# they will be copied using "cp -p <src> <dest>"
# for each file copied, a list of recursive dependencies is generated.
# when all regular files have been copied, the dependencies are copied.
# directories are created as necessary.
#
# dependencies are determined using 'ldd'
#----------------------------------------------------------------------
my %FILES_BASIS = (
	sbin			=> [ qw{ldconfig} ],
	etc				=> [ qw{resolv.conf services} ],
	'usr/bin'		=> [ qw{arch env perl tty} ],
	'usr/libexec'	=> [ 'ld.so', ],
	'var/run'		=> [ 'ld.so.hints', ],
);

#----------------------------------------------------------------------
# %FILES_PACKAGE
#
# these files are required for the package-environment.
# they will be copied using "cp -p <src> <dest>"
# dependencies are handled as above.
#----------------------------------------------------------------------
my %FILES_PACKAGE = (
	bin				=> [ qw{cat ls cp df hostname rm sh stty} ],
	sbin			=> [ qw{mount ping} ],
	'usr/bin'		=> [ qw{ftp gzip less} ],
	'usr/local/bin'	=> [ 'bash', ],
	'usr/sbin'		=> [qw{
		pkg
		pkg_add
		pkg_create
		pkg_delete
		pkg_info
		pkg_merge
		pkg_mklocatedb
		traceroute
		bgpctl
		}
	],
);

#----------------------------------------------------------------------
# /dev/ files required for chroot-env
#----------------------------------------------------------------------
my @DEV_FILES = qw(null tty);

#----------------------------------------------------------------------
# @DIRS
#
# some directories that need to exist in the target hierarchy for the
# chroot-env
#----------------------------------------------------------------------
my @DIRS = qw(
	var/tmp
	var/db/pkg
	usr/lib
	usr/local/lib
	tmp
);

#----------------------------------------------------------------------
# @COPY
#
# a list of hiararchies that need to be copied for the chroot-env
# they are copied using
#
#	(cd <base_dir>; tar cf - <sub_dir>) | tar xpf - -C $TARGET_PATH$base_dir
#
# after the hiararchies have been transferred, each target hierarchy is
# scanned for files with dependencies on shared objects. sub-dependencies
# are also handled.  All dependencies are subsequently copied to their
# respective target paths.  Directories are created as necessary.
#----------------------------------------------------------------------
my @COPY = (
	{
		base_dir	=> '/usr',
		sub_dir		=> 'libdata/perl5',
	},
	{
		base_dir	=> '/usr/local',
		sub_dir		=> 'libdata/perl5',
	},
	{
		base_dir	=> '/usr/share',
		sub_dir		=> 'nls',
	},
	{
		base_dir	=> '/usr/lib',
		sub_dir		=> 'apache/modules',
	},
	{
		base_dir	=> '/usr/share',
		sub_dir		=> 'zoneinfo',
	},
);

#=======================================================================
# main
#=======================================================================

$ENV{PATH} = '/bin:/usr/bin';
my $res = GetOptions(
	'p|pkg|package'	=> \$COPY_PACKAGE_ENV,
	't|target=s'	=> \$TARGET_PATH,
	'd|debug+'		=> \$DEBUG,
	'v|verbose+'	=> \$VERBOSE,
	'h|help'		=> \&usage,
	'c|check'		=> sub { $MODE = 'check'; },
	'f|fix'			=> \$FIX,
);

-d $TARGET_PATH || die "targer-hierarchy [$TARGET_PATH] must exist.\n";

if ($MODE eq 'check') {
	scan_hierarchies();
	exit;
}

check_target_paths();
copy_files(\%FILES_BASIS);
$COPY_PACKAGE_ENV && copy_files(\%FILES_PACKAGE);
copy_hierarchies();
scan_hierarchies();
copy_dev_files();

print "don't forget to create a symlink from <root>/etc/localtime to\n";
print "the appropriate file in /usr/share/zoneinfo/<file>\n";

exit;

#=======================================================================
# subs
#=======================================================================

sub usage
{
	print <<_EOF_;
usage: $0 [-check] [-fix] [-t <path>] [-pkg] [-d] [-v]
	-t <path>	top of target-hierarchy. default "/var/www"
	-pkg		copy files necessary for pkg-mgmt
	-d		increment debug level
	-v		increment verbose level
	-check		scans target hierarchies for unsatisfied dependencies
	-fix		combined with "-check" copies missing deps to target
_EOF_
	exit;
}

sub scan_hierarchies
{
	my %deps;

	$VERBOSE && print "\n%% SCAN_HIERARCHIES %%\n";

	my @dirs = map "$_->{base_dir}/$_->{sub_dir}", @COPY;

	foreach my $dir (@dirs) {
		$VERBOSE && print "  scanning: [$dir]\n";
		my $targ = "$TARGET_PATH$dir";
		map { $deps{$_} = 1; } scan_hierarchy($targ, 1);
	}

	$VERBOSE && print "found perl deps:\n\t",join("\n\t", keys(%deps)),"\n";
	copy_deps(keys(%deps));
}

sub scan_hierarchy
{
	my ($dir, $depth) = @_;

	$VERBOSE && print ' ' x ($depth * 2), "[$dir]\n";

	my $dh;
	my %deps;
	if (!opendir($dh, $dir)) {
		if ($DEBUG) {
			print "opendir failed. skipping\n";
			return;
		}
		die "opendir [$dir] failed: $!";
	}
	while(my $entry = readdir($dh)) {
		if ($entry =~ /^\.\.?$/) { next; }
		my $path = "$dir/$entry";
		($DEBUG > 1) && print "- path=[$path]\n";
		if (-l $path) { next; }
		if (-d $path) {
			($DEBUG > 1) && print "- D: recursing...\n";
			map { $deps{$_} = 1; } scan_hierarchy($path, $depth + 1);
		}
		#------------------------------------------------------------
		# if regular file, and is either executable, or ends in ".so",
		# and "file" says it's dynamically linked, then scan for deps
		#------------------------------------------------------------
		if ((-f $path)
			&& ((-x $path) || ($entry =~ /\.so$/))
			&& (file_type($path) eq 'dynamic')
		) {
			($DEBUG > 1) && print "- F: looking for deps.\n";
			#map { $deps{$_} = 1; } find_deps($path);
			my @d = find_deps($path);
			($DEBUG > 1) && print "- F: found:\n\t", join("\n\t", @d),"\n";
			map { $deps{$_} = 1; } @d;
		}
	}
	closedir($dh);

	return(keys(%deps));
}

sub check_target_paths
{
	$VERBOSE && print "\n%% CHECK_TARGET_PATHS %%\n";
	my %dirs;

	# collect unique list of target-dirs to check
	foreach my $dir (keys(%FILES_BASIS), keys(%FILES_PACKAGE), @DIRS) {
		$dirs{$dir} = 1;
	}
	foreach my $hier (@COPY) {
		my $dir = $hier->{base_dir};
		$dir =~ s!^/!!;
		$dirs{$dir} = 1;
	}
	$dirs{dev} = 1;

	foreach my $dir (keys(%dirs)) {
		check_target_dir($dir);
	}
}

sub check_target_dir
{
	my $dir = shift;

	($VERBOSE > 1) && print "    checking [$dir]\n";
	$dir =~ s!^/!!;
	my $path = "$TARGET_PATH";
	my @comps = split('/', $dir);
	foreach my $comp (@comps) {
		$path .= "/$comp";
		if (! -e $path) {
			create_dir($path);
		}
	}
}

#-------------------------------------------------------------------
# create_dir()
#
# creates a target directory using the owner and permissions of the
# source-directory
#-------------------------------------------------------------------
sub create_dir
{
	my $dir = shift;

	my $src_dir = $dir;
	$src_dir =~ s!^$TARGET_PATH!!;
	my ($mode, $uid, $gid) = (stat($src_dir))[2,4,5];
	$mode &= 07777;
	if ($VERBOSE) {
		printf("    mkdir [$dir], mode=0%o, uid=$uid, gid=$gid\n", $mode);
	}
	$DEBUG && return;

	mkdir($dir, $mode) || die "mkdir [$dir]: $!";
	chown($uid, $gid, $dir) || die "chown dir [$dir]: $!";
	#chmod($mode, $dir) || die "chmod dir [$dir]: $!";
}

#-------------------------------------------------------------------
# copy_files()
#
# copies a number of files from src-hierarchy to target-hierarchy
# and checks for dependencies, which are copied with "copy_deps()"
#-------------------------------------------------------------------
sub copy_files
{
	my $href = shift;

	my ($src, $targ);

	# unique list of dependencies that need to be copied
	my %deps;

	$VERBOSE && print "\n%% COPY_FILES %%\n";
	foreach my $tdir (keys(%{ $href })) {
		foreach my $file (@{ $href->{$tdir} }) {
			$src = "/$tdir/$file";
			$targ = "$TARGET_PATH/$tdir/$file";
			if ((! -e $targ) || files_differ($src, $targ)) {
				copy_file($src, $targ);
				my $ftype = file_type($src);
				if ($ftype eq 'dynamic') {
					my @deps = find_deps($src);
					map { $deps{$_} = 1; } @deps;
				}
			}
		}
	}
	copy_deps(keys(%deps));
}

#-------------------------------------------------------------------
# copy a list of files with absolute paths from src-hier to target-hier
# it is assumed that dependencies have already been resolved.
#-------------------------------------------------------------------
sub copy_deps
{
	my @files = @_;

	my $targ;
	foreach my $file (@files) {
		$targ = "$TARGET_PATH$file";
		if ((! -e $targ) || files_differ($file, $targ)) {
			if (($MODE eq 'check') && !$FIX) {
				print "  DEP MISSING: $targ\n";
			} else {
				copy_file($file, $targ);
			}
		}
	}
}

sub get_checksum
{
	my $file = shift;

	my $cmd = "$COMMANDS{sum} $file";
	open(CS, '-|', $cmd) || die "popen [$cmd]: $!";
	my $line = <CS>;
	close(CS);

	return (split(' ', $line))[0];
}

sub files_differ
{
	my ($src, $targ) = @_;

	($VERBOSE > 1) && print "  diffing [$src] and [$targ]\n";
	my $src_size = -s $src;
	my $targ_size = -s $targ;
	if ($src_size != $targ_size) {
		($VERBOSE > 1) && print "  sizes differ\n";
		return 1;
	}
	my $src_cs = get_checksum($src);
	my $targ_cs = get_checksum($targ);
	if ($src_cs != $targ_cs) {
		($VERBOSE > 1) && print "  cs differ\n";
		return 1;
	}

	($VERBOSE > 1) && print "  same\n";
	return 0;
}

sub copy_dev_files
{
	$VERBOSE && print "\n%% COPY_DEV_FILES %%\n";
	my $files = join(' ', @DEV_FILES);
	my $target_dir = "$TARGET_PATH/dev";
	my $tar_params = "xpf - -C $target_dir";
	if ($DEBUG) {
		$tar_params = 'tf -';
	}
	my $cmd = "cd /dev; $COMMANDS{tar} cf - $files | $COMMANDS{tar} $tar_params";
	$VERBOSE && print "  cmd=[$cmd]\n";
	
	if (system($cmd)) {
		print "cmd failed: [$cmd]. error=$!\n";
	}
}

sub find_deps
{
	my ($so, $seen_ref) = @_;

	($VERBOSE > 1) && print "[finding deps for <$so>]\n";

	my $return;
	if (!defined($seen_ref)) {
		$seen_ref = {};
		$return = 1;
	}

	my $cmd = "$COMMANDS{ldd} $so";
	my $fh = FileHandle->new;

	open($fh, "$cmd|") || die "popen: $!";
	my $found = 0;

	my @deps;

	while(<$fh>) {
		if ($found) {
			my ($start, $end, $type, $open, $ref, $grpref, $name) = split;
			if ($type eq 'rlib') {
				push(@deps, $name);
			}
		} else {
			if (/^\s+Start\s+End/) {
				$found = 1;
			}
		}
	}
	close($fh);

	foreach my $dep (@deps) {
		if (!exists($seen_ref->{$dep})) {
			$seen_ref->{$dep} = 1;
			find_deps($dep, $seen_ref);
		} else {
			($VERBOSE > 1) && print "dep [$dep] already seen\n";
		}
	}
	$return && return (keys(%{ $seen_ref }));
}

sub file_type
{
	my $path = shift;

	my $cmd = "$COMMANDS{file} $path";
	my $fh = FileHandle->new;

	open($fh, "$cmd|") || die "popen: $!";
	my $out = <$fh>;
	close($fh);

	if ($out =~ /statically linked/) { return 'static'; }
	if ($out =~ /(dynamically linked|shared object)/) { return 'dynamic'; }
	return 'unknown';
}

sub copy_file
{
	my ($source, $target) = @_;

	my $cmd = "$COMMANDS{cp} -p $source $target";

	$VERBOSE && print "  COPY: $source -> $target\n";
	$DEBUG && return 1;

	check_target_dir(dirname($source));

	if (system($cmd)) {
		print "copy failed: $!\n";
		return 0;
	 }
	return 1;
}

sub copy_hierarchies
{
	$VERBOSE && print "\n%% COPY_HIERARCHIES %%\n";
	foreach my $hier (@COPY) {
		my $src = "$hier->{base_dir}/$hier->{sub_dir}";
		my $targ = "$TARGET_PATH$hier->{base_dir}";
		my $test = "$TARGET_PATH$src";
		if (-d $test) {
			$VERBOSE && print " [$test] exists, skipping.\n";
			next;
		}
		$VERBOSE && print "  copy_hierarchy: base=$hier->{base_dir}, sub=$hier->{sub_dir}, targ=[$targ]\n";
		copy_hierarcy($hier->{base_dir}, $hier->{sub_dir}, $targ);
	}
}

sub copy_hierarcy
{
	my ($base, $sub, $target) = @_;

	!$DEBUG && ! -d $target && die "  FATAL: target [$target] not there\n";

	$VERBOSE && print "    COPY_HIER: [$base/$sub -> $target]\n";

	if (! -d $base) {
		die "base-dir [$base] not directory or does not exist\n";
	}
	($DEBUG == 1) && return;

	my $parms = $DEBUG ? 'tf -' : "xpf - -C $target";

	my $cmd = "cd $base; $COMMANDS{tar} cf - $sub | $COMMANDS{tar} $parms";
	$VERBOSE && print "    cmd=[$cmd]\n";
	$DEBUG && $VERBOSE && print "    would have added [-C $target]\n";

	if (system($cmd)) {
		print "cmd failed: [$cmd]. error=$!\n";
	}
}
