#!/usr/bin/env ruby
# == Synopsis
#
# A ruby script which plans to do the following.
#
# Start 2 threads.
# 1 thread: run snoop, listen on output, fill new connections
#       into table with timestamp now
# 1 thread: run netstat every n seconds.
#        determine age for every connection
#        mark every active connection as seen with flag
#        list connections with age over limit to STDOUT
#        if connection is not yet in table (after a fresh start), then
#        fill it also in table with timestamp now
#        when done, remove every connection from table without the seen
#        flag
#
# Additional features possibilities
#        limit age of connection per country
#        count number of connections per IP and limit per country
#        have a whitelist of IPs (or ranges)
#        have a dynamic whitelist of IPs, where IP is being whitelisted
#                as soon as IP has a positive response to login request in access log
#                (-> 3rd thread)
# #
# == Usage
#
# Options
# -h  --help             This help text
# -t  --test             Run unit tests
# -u  --usage -?         This help text
# -v  --verbose          Be verbose
#
# == Usage examples
#
# None
#
# == Defense methods remove item from array
# - Number of connections of IP is too high
#     refine filter with
#        - geoip
#        - authstate
# - Age of individual connection is too high
#     refine filter with
#        - geoip
#        - authstate
#
# == Caveat
# * Make sure server's IP is in the ignore regex, otherwise, outgoing connections to other servers could lead to
#   blacklisting your own IP
#
#
# TODO
# - Check parameters should check all parameters
# - pretty-print
# - document all functions
# - document config file
# - IPv6 support




# -----------------------------------------------------------
# INIT
# -----------------------------------------------------------

require "getoptlong"
require 'test/unit/assertions'
include Test::Unit::Assertions

require 'rubygems'
require 'geoip'


#require "tempfile"
#require "fileutils"
#require "find"
#require "pathname"


# require "rdoc/usage"  # see below under "def usage()"


$params = Hash.new		      # This is where we store config parameters
$connections = Array.new		# This is where we store connections
$ips = Array.new              # This is where we store the ip list
$authips = Array.new		      # This is where we store authenticated IPs

STDOUT.sync  = true             # no output buffering

class IPcounter
   attr_accessor :ip, :totalage, :sumconnections, :country

   def initialize(ip, country, connectionage)
      @ip = ip
      @country = country
      @sumconnections = 0
      @totalage = connectionage
   end

   def authstate
      if $authips.include?(@ip)
        @authstate = "auth"
      else
        @authstate = "none"
      end
   end

   def too_many_connections?

      connectionlimit = $params["basemaxconnection"]

      $params["maxconnectionconditions"].each do |conditionline|
         hit = true
         conditionline.split(" ")[0,conditionline.split(" ").length-1].each do |condition|
            hit = false if /country:#{@country}/.match(condition).nil? and not /country:/.match(condition).nil?
            hit = false if /auth:#{self.authstate}/.match(condition).nil? and not /auth:/.match(condition).nil?
         end
         if hit
            myconnectionlimit = conditionline.split(" ")[conditionline.split(" ").length-1].to_i
            connectionlimit = myconnectionlimit if myconnectionlimit > connectionlimit
         end
      end

      vprint "Determined connectionlimit #{connectionlimit} based on country:#{@country} authstate:#{self.authstate}"

      return @sumconnections > connectionlimit

   end

   def to_s
      "IP #{@ip} (from #{@country}) with #{@sumconnections} connections, totalage #{@totalage},"
   end

   def self.find_by_ip(ip)
      found = nil
      $ips.each do |o|
         found = o if o.ip == ip
      end
      found
   end

end

class Connection
   attr_accessor :key, :starttime, :seen, :ip, :port, :dport, :country, :authstate, :timeout

   def initialize(key, seen=true)
      @key = key
      myarr=key.split('.')
      @starttime = Time.now.to_i
      @seen = seen
      @ip = "#{myarr[0]}.#{myarr[1]}.#{myarr[2]}.#{myarr[3]}"
      @port = myarr[4]
      @dport = myarr[5]
      @country = $geoip.country(@ip).country_code2
      @country = "unknown" if @country.nil?
      if $authips.include?(@ip)
        @authstate = "auth"
      else
        @authstate = "none"
      end 

      @timeout = determine_timeout(@country, @dport, @authstate)  
    end

    def to_s
      "ip #{@ip} port #{@port} dport #{@dport} country #{@country} authstate #{@authstate} age #{Time.now.to_i - @starttime}s"
    end

    def timeouted?
      Time.now.to_i - @starttime > @timeout
    end

    def self.find_by_key(key)
      found = nil
      $connections.each do |o|
         found = o if o.key == key
      end
      found
    end

    def self.set_all_unseen()
      $connections.each do |o|
         o.seen = false
      end
      true
    end

    def self.num_instances()
      $connections.length
    end
end

# -----------------------------------------------------------
# SUB-FUNCTIONS (those that are specific to this script)
# -----------------------------------------------------------

def load_config ()
  # Purpose: load configuration file
  # Input  : key
  # Output : none
  # Remarks: none

  err_status = false
  loaded = false

  if ( $params["config"] ) 
    if (File::exists?($params["config"]))
      load $params["config"]
      loaded = true
    else
      $stderr.puts "Passed config file #{$params['config']}, but file does not exist. This is fatal. Aborting."
      err_status = true
    end
  
  else

    configlocations = ["#{ENV['HOME']}/.flying-frog.rc", "/root/.flying-frog.rc", "/etc/flying-frog.rc"]
  
    configlocations.each do |item| 
      if (File::exists?(item))
        load item
        loaded = true
      end
    end
  end

  if not loaded
    $stderr.puts "Could not load config file. This is fatal. Aborting."
    err_status = true
    return err_status
  end

  begin
    vprint "Initialising geoip db"
    $geoip = GeoIP.new($params["geoip-db"])
  rescue
    $stderr.puts "Could not load GeoIP database at #{$params['geoip-db']}. This is fatal. Aborting."
    err_status = true
    return err_status
  end

  # read timeoutconditions block
  $params["timeoutconditions_transform"] = Array.new
  $params["timeoutconditions"].each do |line|
    $params["timeoutconditions_transform"].push(line)
  end
  $params["timeoutconditions"] = $params["timeoutconditions_transform"]

  # tcpdump cmd
  portstring = ""
  $params["ports"].split(" ").each do |port|
     if portstring.length > 0
        portstring = portstring + " or "
     end
     portstring = portstring + "port " + port
  end
  portstring = "(#{portstring})"
  $params['tcpdump_cmd'] = $params["tcpdump_cmd_base"].gsub("__PORTS__", portstring)

  # netstat cmd
  portstring = ""
  $params["ports"].split(" ").each do |port|
     if portstring.length > 0
        portstring = portstring + "|"
     end
     portstring = portstring + ":" + port   # FIXME: colon is platform dependent. On Solaris it is a dot
  end
  portstring = "(#{portstring})"
  $params['netstat_cmd'] = $params["netstat_cmd_base"].gsub("__PORTS__", portstring)


  return err_status

end

def determine_timeout (country, port, authstate)
  # Purpose: Determine the timeout, that best matches a given connection
  # Input  : connection's origin country, port and authstate
  # Output : timeout in seconds
  # Remarks: Timeout is determined based on the following characteristics
  #          - port
  #          - country (-> geoip)
  #          - authstate
  #          NOTE that the timeout is determined when the connection is initialized.
  #          A later authstate change is not taken into consideration anymore


   timeout = $params["basetimeout"]

   $params["timeoutconditions"].each do |conditionline|
      hit = true
      conditionline.split(" ")[0,conditionline.split(" ").length-1].each do |condition|
         hit = false if /port:#{port}/.match(condition).nil? and not /port:/.match(condition).nil?
         hit = false if /country:#{country}/.match(condition).nil? and not /country:/.match(condition).nil?
         hit = false if /auth:#{authstate}/.match(condition).nil? and not /auth:/.match(condition).nil?
      end
      if hit
         mytimeout = conditionline.split(" ")[conditionline.split(" ").length-1].to_i
         timeout = mytimeout if mytimeout > timeout
      end
   end

   vprint "Determined timeout #{timeout} based on country:#{country} port:#{port} authstate:#{authstate}"

   return timeout

end

def start_new_connection_monitor
  # Purpose: start a separate pipe with a running tcpdump command
  # Input  : none
  # Output : none
  # Remarks: this routine will continue to run separately

  def caller 
     pid = nil

     t = Thread.new do



         IO.popen($params['tcpdump_cmd']) { |p| 
            pid = p.pid
            p.each { |f| 
               pair = f.split(' ')[4].gsub(/[.:]$/, "") # covers Linux and Solaris. The later is separating by dot, not colon, in netstat output

               # ignore listen socket on server side
               if pair.match($params['ignore-ip-regex']).nil? and pair.split(".")[4].to_i > 1024	# ignore our own IP (outgoing connections) and portnum <= 1024
                  dport = f.split(" ")[2].split(".")[4].gsub(/[.:]$/, "") # covers Linux and Solaris. The later is separating by dot, not colon, in netstat output

                  key = pair + "." + dport    # -> example 127.0.0.1.34566.80
                  if Connection.find_by_key(key).nil?
                     conn = Connection.new(key, true)
                     vprint "New connection by #{conn} added to table."
                     $connections.push(conn)
                  else 
                     # Existing connection, IP packet received is a retransmission
                  end
               end
            } 
         }
     end

     return pid

     end

   pid = caller()

   sleep 2

   unless pid_exists?(pid)
      $stderr.puts "Launch of forked tcpdump failed. Are you root? This is fatal. Aborting."
      exit 1
   end

end

def start_new_login_monitor
  # Purpose: start a separate pipe with a running tcpdump command
  # Input  : none
  # Output : none
  # Remarks: this routine will continue to run separately

  t = Thread.new do
	IO.popen("tail -f #{$params["auth-log"]}") { |p| 
		p.each { |authip| 
         authip.chomp!
         unless $authips.include?(authip)
            vprint "New IP address added to list of authenticated client IPs: #{authip}"
            $authips.push(authip)
            $connections.each do |conn|
               if conn.ip == authip
                  conn.authstate = "auth" 
                  vprint "Existing connection changed state to authenticated: #{conn}"
               end
            end
         end
		} 
	}

  end

end

def run_netstat
 	netstat_output = `#{$params["netstat_cmd"]}`
 	now = Time.now.to_i
 	nowstrftime = Time.now.strftime('%d/%b/%Y:%H:%M:%S %z')

 	# parse output
 	netstat_output.each do |line|
       # i.e. Linux: tcp        0      0 192.168.1.35:80         192.168.1.101:39427     ESTABLISHED
       items=line.split(" ")
       ip_sport = items[4].gsub(":", ".")	# -> i.e. 93.122.234.178.55949
       dport = items[3].gsub(":", ".").split(".")[4] # i.e. 80
       key = ip_sport + "." + dport    # -> i.e. 93.122.234.178.34566.80

 		conn = Connection.find_by_key(key)
 		if ( conn.nil? )
          if ip_sport.match($params['ignore-ip-regex']).nil?
                 conn = Connection.new(key, true)
  				    vprint "New existing connection from #{conn} found via netstat, adding to table (has script been restarted?)"
                 $connections.push(conn)
          end
 		else
          if conn.timeouted?
                  print "[#{nowstrftime}] Connection #{conn} too old. BLACKIP suggestion: #{conn.ip}\n"
                  # $connections.delete(key)
                  # Black IP script should be called externally, after
                  # being triggered by STDOUT
                  #
                  # Attention: blackip script is unlikely to drop existing
                  # connections. This means that they keep appearing
                  # in netstat and thus in this monitoring script as well
                  # until they are closed.
          else
                  vprint "Known connection #{conn} found active and within limits."
          end

          conn.seen = true

 		end
 	end

end

def cleanup_connections()
		# clean up connection table
 	# -> remove items without :seen-flag
    $connections.each do |conn|
 		vprint "Cleanup found an old closed connection #{conn}. Removing." if conn.seen == false
 		$connections.delete(conn) if conn.seen == false
 			# Note that the maxage is likely too big. Take sleeptime into consideration
 	end
end

def count_ips()
 	# Count all connections per IP
 	$ips = Array.new
 	totalage=0
 	totalages=Array.new

    $connections.each do |conn|
 		if IPcounter.find_by_ip(conn.ip).nil?
 			$ips.push(IPcounter.new(conn.ip, conn.country, Time.now.to_i - conn.starttime))
 		end
       
      ipitem = IPcounter.find_by_ip(conn.ip)

 		ipitem.sumconnections = ipitem.sumconnections + 1
 		ipitem.totalage = ipitem.totalage + (Time.now.to_i - conn.starttime)
 		totalage = totalage + (Time.now.to_i - conn.starttime)
 		totalages << Time.now.to_i - conn.starttime

 	end

    return totalage, totalages
end

def report_ips()
 	# Report IPs
 	$ips.each do |ipitem|

 		if ipitem.too_many_connections?
          print "[#{Time.now.strftime('%d/%b/%Y:%H:%M:%S %z')}] #{ipitem} has too many open connections. BLACKIP suggestion: #{ipitem.ip}\n"
 		end

 		vprint "#{ipitem} examined."

 	end
end

def report_stats(totalage, totalages)
 	# Report overall stats
 	print "[#{Time.now.strftime('%d/%b/%Y:%H:%M:%S %z')}] #{Connection.num_instances} active connections; median age #{median(totalages)} secs; total age #{totalage} secs.\n"
end

def do_sleep()
  vprint "Sleep #{$params['sleeptime']}s ..."
  sleep $params['sleeptime']
end



# -----------------------------------------------------------
# GENERIC SUB-FUNCTIONS (those that come with every script)
# -----------------------------------------------------------

def vprint(text)
  # Purpose: output text if global variable $verbose is set.
  # Input  : String input
  # Output : stdout
  # Remarks: none
  
  if $params["verbose"]
    print "[#{Time.now.strftime('%d/%b/%Y:%H:%M:%S %z')}] #{text}\n"
  end

end

def usage()
  # Purpose: output usage information; taken from the header of the file
  # Input  : none
  # Output : stdout
  # Return : true
  # Remarks: depends on rdoc/usage, but rdoc/usage is not available in Ruby 1.9
  #          so we hardcode the usage here.

  # RDoc::usage

  puts <<EOF

Synopsis
--------

A ruby script template. Use this as a template for ruby scripts.

Usage
-----  

Options
-c  --config STR       Path to config file (see below for default values)
-h  --help             This help text
-t  --test             Run unit tests
-u  --usage -?         This help text
-v  --verbose          Be verbose

Default locations of Config File
--------------------------------
$HOME/.flying-frog.rc
/root/.flying-frog.rc
/etc/flying-frog.rc

EOF

  return true

end



def check_parameters()
  # Purpose: check parameters
  # Input  : global variable params
  # Output : stderr in case there is a problem with one of the parameters
  # Return : true if there is an error with one of the parameters; or false in absence of errors
  # Remarks: None

  err_status = false

  unless /^.+$/.match($params["auth-log"])
    $stderr.puts "Error in parameter auth-log. This is fatal. Aborting."
    return true
  end
  unless File.exists? $params["auth-log"]
    $stderr.puts "Error in parameter auth-log: file #{$params["auth-log"]} does not exist. This is fatal. Aborting."
    return true
  end

  unless File.exists? $params["geoip-db"]
    $stderr.puts "Error in parameter geoip-db: file #{$params["geoip-db"]} does not exist. This is fatal. Aborting."
    return true
  end

  if $params["ports"].nil? or $params["ports"] == ""
    $stderr.puts "Parameter 'ports' not passed. You have to pass at least one port."
    return true
  end

  # unless /^foo$/.match($params["x"])
  #  $stderr.puts "Error in parameter x ..."
  #  err_status = true
  # end

  return err_status
  
end

def median(array, already_sorted=false)
  # Purpose: calculate median value from an array of values
  # Input  : array
  # Output : median
  # Return : median
  # Remarks: None

  return 0 if array.empty?

  array = array.sort unless already_sorted

  m_pos = array.size / 2
  return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos]) / 2

end

def pid_exists? (pid)
  # Purpose: check whether a given pid is still active
  # Input  : pid
  # Output : none
  # Return : true or false
  # Remarks: None
  #
  
  state = false

  begin
   Process.kill(0, pid)
   state = true
  rescue
  end

  return state

end

# -----------------------------------------------------------
# COMMAND LINE PARAMETER EXTRACTION
# -----------------------------------------------------------

def read_command_line_params
begin 
  opts = GetoptLong.new(
    [ '-h', '--help', '-?', '-u', '--usage',  GetoptLong::NO_ARGUMENT ],
    [ '-v', '--verbose',                      GetoptLong::NO_ARGUMENT ],
    [ '-c', '--config',                       GetoptLong::REQUIRED_ARGUMENT ]
  )

  opts.each do |opt, arg|
    case opt
      when '-h'
        usage
        exit
      when '--help'
        usage
        exit
      when '-v'
        $params["verbose"] = true
      when '--verbose'
        $params["verbose"] = true
      when '-c'
        $params["config"] = arg
      when '--config'
        $params["config"] = arg
      when '-?'
        usage
        exit
      when '-u'
        usage
        exit
      when '--usage'
        usage
        exit
    end
  end

rescue GetoptLong::InvalidOption
  $stderr.puts "Unknown command line option encountered. This is fatal. Aborting."
  exit 1
rescue
  $stderr.puts "Unknown error in command line parameter extraction. This is fatal. Aborting."
  exit 1
end
end

# -----------------------------------------------------------
# MAIN
# -----------------------------------------------------------

exit 1 if read_command_line_params()

vprint "Loading configuration"
exit 1 if load_config()

vprint "Starting parameter checking"
exit 1 if (check_parameters)

vprint "Starting monitor for new connections"
start_new_connection_monitor()

vprint "Starting monitor for new logins"
start_new_login_monitor()

vprint "Starting main loop"

while (1) do

 	Connection.set_all_unseen

   run_netstat()

   cleanup_connections()

   totalage, totalages = count_ips()

   report_stats(totalage, totalages)

   report_ips()
    
   do_sleep()

end

