#!/usr/bin/perl
#	shtracer: Perl utility to identify processes connecting to specific remote
#		  IP addresses (eg: botnet sinkholes)
#		Version 0.01 Ray for CBL
#		Version 0.02 Ray for CBL Major revisions
#
#	In most cases, you merely need to test the script with the current
#	settings while accessing the abuseat.org web site with "wget" or "curl",
#	then change the sinkhole IP address to the IP shown in the CBL lookup
#	page.  If the script fails to show the capture, you will need to look
#	at the other configurable settings.  You may need to run this script
#	as root.

#	This should work unchanged on most Linux systems.  You may need to set
#	some of the command paths for Linux, FreeBSD and other LINUX/UNIX-like
#	systems, including MacOS.  This is unlikely to function on Windows without
#	a lot of work.

#	This script should be run as root, eg: "sudo ./shtracer.pl".
#	It _may_ work without it, but that depends on the system it's being
#	run on.

#	Basic functionality: this program loops quickly taking a look for connections
#	to the target sinkhole IP address via any port or protocol using the "ss"
#	command.  When it sees such a connection, it spits out the socket detail
#	including processid and userid.  

#	If the callsof variable is set to non-zero, it attempts to find out
#	more information about the process.  If the /proc file system exists,
#	it will use that.  Otherwise, it will use the "lsof" command to find out
#	added information about the process that has
#	initiated the connection, which should show the actual file name etc.

#	To run the script, make sure that it's executable ("chmod 755 shtracer.pl")
#	and run it ("./shtracer.pl").  It will keep running until you hit ^C.
#	While it's running it should catch and display details about any connections
#	made to the sinkhole IP address.   We recommend running this script for at least
#	12 hours before assuming that such connections are not being made.

#	Dependencies: "ss", "lsof" and "grep" commands with the usual command
#	line conventions, and the POSIX Perl module - which is default in Perl.

#	WARNING WARNING WARNING: never do the wget/curl test against the sinkhole
#	IP addresses given by the CBL lookup page.  While this is usually harmless,
#	depending on the sinkhole, doing a wget/curl test against the sinkhole MAY
#	cause a CBL relisting.  This is why we recommend testing against abuseat.org's
#	web site before setting up the capture for the real sinkhole - don't poke the
#	sinkhole!

#	-----------------------------------------------

#	IP address of sinkhole (from CBL lookup) is set by the "sinkhole" variable.

#	By default, the IP address is set to the IP address of the CBL's
#	web site.

#	You can use this setting to test the this script. Eg: you start this
#	script using this value, and then use "wget" or "curl" from another
#	window to test, like so:

#		wget http://192.42.118.110

#	Running the above wget command should trigger the script to tell you
#	your userid, processid, and name of the script ("wget"):

#	Once you've verified that the script is working, comment out the sinkhole setting
#	and use the sinkhole IP address from the CBL lookup page.  We've
#	included the EITEST sinkhole as a convenience, which you merely
#	need to uncoment.  But you should double check that the sinkhole IP address
#	agrees with the IP address shown in the CBL lookup.
#
my $sinkhole = '192.42.118.110';	# http://abuseat.org TEST address

#my $sinkhole = '192.42.119.41';	# current EITEST sinkhole

#	Set to one to get more information about the offending process via /proc
#	if it exists, lsof otherwise.
my $callsof = 1;

#	Loop time in seconds.  Since this program has to poll for events, you have to
#	make this loop time short enough to catch the short botnet connections.
#	.01 pretty much guarantees catching it.
#	.1 the probability is lower of catching it every time
#	1 might work if the network is slow or the botnet C&C is very slow
$loop = .01;

#	Locations of various utilities we use.
my $ss = '/bin/ss';
my $lsof = '/usr/bin/lsof';
my $grep = '/bin/grep';

use POSIX qw/strftime/;

die "No ss utility at $ss, try a 'which ss' and adjust script \$ss variable" if ! -x $ss;
die "No lsof utility at $lsof, try a 'which lsof' and adjust script \$lsof variable" if ! -x $lsof;
die "No grep utility at $grep, try a 'which grep' and adjust script \$grep variable" if ! -x $grep;

#	The actual work, stop this script by control-C:

while(1) {

    my @pids;
    my $hit = 0;
    open(I, "$ss -p ' ( dst $sinkhole ) '|");
    while(<I>) {
	next if /State/;
	chomp;
	print strftime("(%F %T) HIT:\n", localtime(time)) if !$hit;
	$hit = 1;
	print "\t$_\n";

	if (/users:\(\([^,]+,([^,]+),/) {
	    my $pid = $1;
	    $pid =~ s/pid=//;
	    push(@pids, $pid);
	}
    }
    close(I) || die "$ss -p ' ( dst $sinkhole ) ': failed: $!";


    if ($hit && $callsof) {
	for my $pid (@pids) {
	    &showpid($pid);
	}
    }
    sleep($loop);
}

sub showpid {
    my ($pid) = @_;
    my $attributes;
    if (-d "/proc/$pid") {
	for my $attr (qw/cmdline comm cwd exe/) {
	    if ($attr =~ /(cwd|exe)$/) {
		$attributes->{$attr} = readlink("/proc/$pid/$attr");
	    } else {
		my $v;
		open(I, "/proc/$pid/$attr");
		while(<I>) {
		    $v .= $_;
		}
		close(I);
		if ($attr eq 'cmdline') {
		    $attributes->{$attr} = join(' ', split(/\x00/, $v));
		} else {
		    $attributes->{$attr} = $v;
		}
	    }
	    chomp($attributes->{$attr});
	}

	for my $attr (keys(%{$attributes})) {
	    print "\t$attr\t$attributes->{$attr}\n";
	}

    } else {
	open(I, "$lsof -p $pid|");
	while(<I>) {
	    print "   ", $_;
	}
	close(I);
    }
}
