Other Stuff

UoM::RCS::Talby


Page Contents:


Page Group:

2010:

2009: 2008:


Related Pages:





Perl HTTPD Proxy. . .  

/etc/apache2/apache2.conf:

  # KeepAlive: Whether or not to allow persistent connections (more than
  # one request per connection). Set to "Off" to deactivate.
  #
  #### commented out, line below added 2010 May 06 ##############KeepAlive On
  KeepAlive Off

The Script!

#!/usr/bin/perl -T -w

# -- Notes : ----------------------------------------------------------------------

#  1. do not mix sysread and read on a file handle (similarly for writes/porints);
#  2. 
#
#
use strict;


# -- Get the remote host : --------------------------------------------------------

use Socket;

# -- xinetd on RedHat 9.0 passes REMOTE_HOST through, inetd on Debian Woody
#    does not;  getpeername is more reliable...
#
#unless ($ENV{"REMOTE_HOST"} =~ m/^(\d\d?\d?\.\d\d?\d?\.\d\d?\d?\.\d\d?\d?)$/) {
#    die "\n\n R \n\n";
#  }
#
#$ENV{"REMOTE_HOST"} = $1;

my $peername = getpeername(STDIN);
my ($peerport, $peeraddr) = Socket::unpack_sockaddr_in($peername);
$peeraddr = inet_ntoa($peeraddr);

$ENV{"REMOTE_HOST"} = $peeraddr;

##foreach (keys %ENV) {
##    print "\n $_ " . $ENV{$_};
##  }
##exit;


# -- Tighten up environment : -----------------------------------------------------

$ENV{PATH} = "";


# -- Config : ---------------------------------------------------------------------

my $EMERGENCY = 0;    # ...if we've shagged it up, set = 1 for a temp workaround...
my $DEBUG     = 1;    # ...if we're stuck...

my $IPT = "/sbin/iptables";

my $apache_port = 8999;


# -- Imports : --------------------------------------------------------------------

use Sys::Syslog;
use IPC::Open2;

##use Fcntl;
##use POSIX;
    # ...Fcntl, with POSIX, used for when non-blocking IO was being played with...


# -- The "Oh I've shagged it up section : -----------------------------------------

    # ...simple pass-through case for when have fucked-up the real version, 
    #    i.e., all that code below :

if ($EMERGENCY) {
###    system("/home/si2/netcat/nc -w 10 talby.csu.umist.ac.uk 8999");
    system("/home/si2/netcat/nc -w 10 talby.rcs.manchester.ac.uk 8999");
    exit 0;
  }


# -- Log who's calling us : -------------------------------------------------------

    # ...do this right near the beginning just in case we get shagged (and note 
    #    that with any luck xinetd is already doing some logging too)...

openlog("httpproxy", "ndelay", "daemon");
syslog("notice", $ENV{"REMOTE_HOST"} . " ...is making a request...");
closelog(); 
  


# -- Debug initialisation : -------------------------------------------------------
 
if ($DEBUG) {
    `echo "" > /tmp/HTTPPROXY_ETC`;
    `echo "" > /tmp/HTTPPROXY_ERR`;
    `echo "" > /tmp/HTTPPROXY_OUT`;
  }


# -- Read request from STDIN : ----------------------------------------------------

    # ...this was required when using std perl read to get request;  not required
    #    with sysread...

# my $flags = '';
# fcntl(STDIN, F_GETFL, $flags) or die "\n\n Couldn't get flags for STDIN \n\n";
# $flags |= O_NONBLOCK;
# fcntl(STDIN, F_SETFL, $flags) or die "\n\n Couldn't set flags for STDIN \n\n";

    # ...here be a suggestion of that std read type code :

# while ($xx = <STDIN>) {
#     unless (defined $xx) { exit }
#     $input .= $xx;  
#   }


# -- Read request from STDIN using nice low-level sys stuff: ----------------------

my $request = "";
my $nbytes  = 0;

$nbytes = sysread(STDIN, $request, 10240);  
        # ...10k should be enough for any conceivable request...


if ($DEBUG) {
    open(TMP, ">>/tmp/HTTPPROXY_ETC") || die "\n\n Can't open ETC \n\n";
    print TMP "\n" . time . " -- Read request from STDIN...";
    close(TMP);
  }


if ($DEBUG) {
    open(TMP, ">/tmp/HTTPPROXY_REQ") || die "\n\n Can't open REQ \n\n";
    print TMP "\n" . time . " -- $request";
    close(TMP);
        # ...save everything about the request, including all headers (debug)...
  }


# -- Grab main parts of request : -------------------------------------------------

my ($req_0, $req_1, $req_2) = split(/\s+/, $request);
my (undef, @req_rest)       = split(/\n/, $request);

unless (defined $req_0) {$req_0 = ""}
unless (defined $req_1) {$req_1 = ""}
unless (defined $req_2) {$req_2 = ""}


# -- Log main parts of request : --------------------------------------------------

openlog("httpproxy", "ndelay", "daemon");
my $ssss = $ENV{"REMOTE_HOST"} . " $req_0 $req_1 $req_2";
$ssss =~ s/%//g;
syslog("notice", $ssss);
closelog(); 


# -- We don't do these : ----------------------------------------------------------

if (    ($req_0 eq "OPTIONS") || ($req_0 eq "SEARCH") || ($req_0 eq "TRACE")
     || ($req_0 eq "LINK")    || ($req_0 eq "UNLINK") 
     || ($req_0 eq "PUT")     || ($req_0 eq "DELETE")){
    &when_in_doubt_do_nowt("DOUBTFUL METHOD", "$req_0 $req_1 $req_2");
  }

# -- Be a safety-girl : -----------------------------------------------------------

    # ...check out that request for nasties they are attempting to stick us with...

unless (($req_0 =~ m/^GET$/) || ($req_0 =~ m/^HEAD$/) || ($req_0 =~ m/^POST$/)) {
    &deathblock("UNKNOWN HEADER", "$req_0 $req_1 $req_2");
            # ...we handle only GET, HEAD and POST at present...
  }

unless (($req_2 =~ m/^HTTP\/1.0$/) || ($req_2 =~ m/^HTTP\/1\.1$/)) {
    &deathblock("UNKNOWN HTTP [$req_2]", "$req_0 $req_1 $req_2");
            # ...ensure we are talking the same language...
  }

unless (length($req_1) < 150) {
    &deathblock("TOO LONG", "$req_0 $req_1 $req_2");
            # ...buffer overloads...
  }

if ($req_1 =~ m/\.\./) {
    &deathblock("DOTS", "$req_0 $req_1 $req_2");
            # ...don't allow this "dir/../other" sort of thing...
  }


# -- Nuke chars we don't like : ---------------------------------------------------

$req_1 =~ s/[{}|\`^\\\[\]]//g;  
        # ...unsafe chars which should always be encoded:  {  } | \ ^ ~ [ ] `  
        #    so we should not see them;  we simply erase them, except for ~ which 
        #    we need...
   
$req_1 =~ s/%5B|%5C|%5D|%7B|%7D|%5E|%60|%7C//g;
        # ...browsers automatically convert these chars to hex-encodings, often, 
        #    so we erase them too:  [ %5B,  ] %5D,  \ %5C,  { %7B,  } %7D,  
        #    ^ %5E,  ` %60,  | %7C  (again, leave  ~ %7E  alone)...
         
$req_1 =~ s/[:;@]//g;
$req_1 =~ s/%3A|%3B|%40//g;
        # ...reserved chars:   ; / ? : @ = &   so should be allowed, but we don't 
        #    like   ; %3B   : %3A   @ %40   so we nuke them...
        #    

$req_1 =~ s/[\$!\*\,]//g;
$req_1 =~ s/%21|%2A|%2C|%24//g;
        # ...okay chars are supposed to be:  
        #
        #        1. alphanumerics 
        #        2. $ - _ . + ! * ' , ( )   
        #        3. reserved chars used for their reserved purposes
        #
        #    so should be allowed, but we don't like   $ %24  
        #    ! %21   * %2A  , %2C    so we nuke them (apparently 
        #    + can be used to imply space, so we allow that and we allow
        #    parentheses since we have some e-Learning stuff with them)...    

$req_1 =~ s/[\"\<\>]//g;
$req_1 =~ s/%22|%3C|%3E//g;
        # ...special chars   % # " < >   ...we allow the first two but don't like 
        #    the other three...


    ### __TODO__
    ### __TODO__
        # ...log nuking...


if ($DEBUG) {
    open(TMP, ">>/tmp/HTTPPROXY_ETC") || die "\n\n Can't open ETC \n\n";
    print TMP "\n" . time . " -- //$req_1//";
    close(TMP);
  }


# -- Send nice, safe request on its way : -----------------------------------------

my $output = "$req_0 $req_1 $req_2\n";
foreach (@req_rest) {$output .= "$_\n"}

if ($DEBUG) {
#    print "<HTML><BODY><PRE>$output" . "EEOOTT</PRE></BODY></HTML>";
#    exit 0;
  }


#open(OUTPUT, "| /home/simonh/src/nc-110/nc -w 10 127.0.0.1 8999");
#print OUTPUT "$output\n\n\n";
    # ...old version:  the sending of the request to apache seemed ok, but we did 
    #    not always get everything expected back using std "read" --- perhaps 
    #    should have used sysread (and the latter carefully)... 


if ($DEBUG) {
    open(TMP, ">>/tmp/HTTPPROXY_ETC") || die "\n\n Can't open ETC \n\n";
    print TMP "\n" . time . " -- Gonna send request to apache2 via nc...";
    close(TMP);
  }

###open2(*READ2, *WRITE2, "/home/si2/netcat/nc -w 10 talby.csu.umist.ac.uk 8999");
############################open2(*READ2, *WRITE2, "/home/si2/netcat/nc -w 10 mctalby.mc.man.ac.uk 8999");
open2(*READ2, *WRITE2, "/bin/nc -w 10 talby.rcs.manchester.ac.uk 8999");
print WRITE2 $output;
        # ...send request to apache via netcat...


if ($DEBUG) {
    open(TMP, ">>/tmp/HTTPPROXY_ETC") || die "\n\n Can't open ETC \n\n";
    print TMP "\n" . time . " -- Request sent to apache2 via nc...";
    close(TMP);
  }

if ($DEBUG) {
    open(TMP, ">>/tmp/HTTPPROXY_ETC") || die "\n\n Can't open ETC \n\n";
    print TMP "\n" . time . " -- Gonna read reply from apache2 via nc...";
    close(TMP);
  }


# -- Read stuff sent back from netcat : -------------------------------------------

my $r2_blocksize = (stat READ2)[11] || 16384;
my $buffer    = "";
my $read_2    = "";

while (my $len = sysread READ2, $buffer, $r2_blocksize) {
    if (!defined $len) {
        openlog("httpproxy", "ndelay", "daemon");
        syslog("notice", "System read error: $!");
        closelog(); 

        next if $! =~ /^Interrupted/;
        die "\n\n System read error: $! \n\n";
      }
    $read_2 .= $buffer;
    
    if ($DEBUG) {
        open(TMP, ">>/tmp/HTTPPROXY_ETC") || die "\n\n Can't open ETC \n\n";
        print TMP "\n" . time . " -- Read a block...";
        close(TMP);
      }
  }

close(WRITE2);
close(READ2);


if ($DEBUG) {
    open(TMP, ">>/tmp/HTTPPROXY_ETC") || die "\n\n Can't open ETC \n\n";
    print TMP "\n" . time . " -- Read reply from apache2 via nc...";
    close(TMP);
  }


# -- Debug : ----------------------------------------------------------------------

if ($DEBUG) {
    open(TMP, ">/tmp/HTTPPROXY_OUT") || die "\n\n Can't open TMP. \n\n";
    print TMP $read_2;
    close(TMP);
        # ...don't use `echo...` for this as may contain binary which screws up 
        #    in the shell... 
  }


# -- Send output on its way to remote host : --------------------------------------

$read_2 =~ s/\:$apache_port//;
    # ...some versions of apache (e.g., 1.3.31, Debian Sarge) attach, unasked,
    #    port number on URLs (e.g., 8999) which messes things up...

my $written = syswrite STDOUT, $read_2, length($read_2), 0;

  ## -- error handling??
  ##     -- if ($written < length($read_2)) {
  ##            do something;


if ($DEBUG) {
    open(TMP, ">>/tmp/HTTPPROXY_ETC") || die "\n\n Can't open ETC \n\n";
    print TMP "\n" . time . " -- Reply sent to remote host...";
    close(TMP);
  }


# ---------------------------------------------------------------------------------
# -- The End :
exit 0;
# ---------------------------------------------------------------------------------

sub when_in_doubt_do_nowt() {
    my $error  = shift;
    my $string = shift;

    $error  =~ s/%//g;
    $string =~ s/%//g;
        # ...don't want to confuse syslog's implementation with "%"s...

    openlog("httpproxy", "ndelay", "daemon");
    syslog("notice", $ENV{"REMOTE_HOST"} . " * " . $error . " * ");
    syslog("notice", $ENV{"REMOTE_HOST"} . " " . $string);
    closelog();

    die "\n\n Why can't you say something righteous and hopeful for a change? \n\n";
  }

sub deathblock() {
    my $error  = shift;
    my $string = shift;

    $error  =~ s/\%//g;
    $string =~ s/\%//g;
        # ...don't want to confuse syslog's implementation with "%"s...

    openlog("httpproxy", "ndelay", "daemon");
    syslog("notice", $ENV{"REMOTE_HOST"} . " *** " . $error . " ***");
    syslog("notice", $ENV{"REMOTE_HOST"} . " *** " . $string);
    closelog(); 

    # -- they've been naughty so invoke IPTables block : 

    print "\n\n Woof-woof-woof! \n\n";
    system("$IPT -t filter -I INPUT -s $ENV{REMOTE_HOST} -j DROP");
    die "\n\n That's my other dog impersonation! \n\n";
  }
 
# ---------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------