/* 
   qrblcheck is basically rblcheck 1.4 with a chunk of code tacked on that
   will extract the ip address of the last relay mail server from a message 
   input on stdin -- handy for use in a .qmail file
   
   The level of blocking desired is specified as the only command line 
   argument and should be 16 or higher (no checks made), 8 (RBL only), 
   4 (RBL and DUL), 2 (RBL, DUL and RSS), 1 (RBL, DUL, RSS, and ORBS 
   outputs) or 0 (all + ORBS relays). Obviously, more will be blocked 
   the lower number you use, with 0 blocking a lot of mail, both spam and
   legit.
   
   qrblcheck will return 0 if no matches were made at the specified level
   or 100 if a match is made at the specified level. A return code of 100
   tells qmail that delivery failed with a hard error. The message will be
   returned. For safety's sake, if no IP address is matched, qrblcheck will
   return 0. While not perfect, it's better than dropping mail just 
   because I couldn't forsee all the different ways IPs would present
   themselves in headers. Comments welcome.
   
   rblcheck 1.4 is Copyright (C) 1997, 1998 Edward S. Marshall

   rblcheck's main() was renamed to rblmain(), and my main() was added
   in it's place. rblmain() was modified some to handle multiple lists
   a bit differently.
   
   2003/05/28 -- .94: I didn't realize people were not only still using it
                      but laso debugging it. Thanks to webservice.be for
                      not only finding a bug, but submitting the fix.
   
   2000/11/21 -- .93: cleaned up some code
   
   2000/11/03 -- .92: cleaned up some code
   
   2000/11/03 -- .91: whoops, first version had a nasty bug. poor
                      memory handling on my part.

   2000/10/31 -- .9: first version. Not as clean as I want, but it'll do
                     problem: could match invalid IPs

   

   If you have problems compiling try including the resolv library on the 
   gcc command line, like so:

          gcc -o qrblcheck -lresolv qrblcheck.c
*/

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/nameser.h>
#include <arpa/inet.h>
#include <resolv.h>
#include <netdb.h>
#include <regex.h>

#define VERSION "qrblcheck 0.94"
#define RESULT_SIZE 4096 /* What is the longest result text we support? */

/* the REGEXMATCH is not perfect... it will match 999.999.999.999 */
#define REGEXMATCH "^Received: from.*\\(([0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3})\\)"


/* linked list of sites we check */
struct rbl
{
	char * site;
	unsigned int rating;
} rblsites[] = {
   { "rbl.maps.vix.com", 16},
    { "dul.maps.vix.com", 8},
    { "relays.mail-abuse.org", 4},
    { "outputs.orbs.org", 2},
    { "relays.orbs.org", 1},
    { NULL, 0 }
  };

/*
** PORTABILITY STUFF.
*/

/* Unabashedly borrowed from the bind 8.1.1 sources, just in case, since
   bind 4.x defines these differently, and some don't even have this. */

#ifndef NS_INT16SZ
#define NS_INT16SZ 2
#endif

#ifndef NS_INT32SZ
#define NS_INT32SZ 4
#endif

#ifndef NS_GET16
#define NS_GET16(s, cp) { \
	register unsigned char *t_cp = (unsigned char *)(cp); \
	(s) = ((unsigned short)t_cp[0] << 8) \
	    | ((unsigned short)t_cp[1]) \
	    ; \
	(cp) += NS_INT16SZ; \
}
#endif

#ifndef T_TXT
#define T_TXT 16
#endif

#ifndef PACKETSZ
#define PACKETSZ 512
#endif

/* Some compilers dont understand 'const'. */
#ifndef __STDC__
#define const
#endif

/* rblcheck()
 * Checks the specified dotted-quad address against the provided RBL
 * domain. If "txt" is non-zero, we perform a TXT record lookup. We
 * return the text returned from a TXT match, or an empty string, on
 * a successful match, or NULL on an unsuccessful match. */
char * rblcheck( struct in_addr a, char * rbldomain, int txt )
{
	char * domain;
	char * result = NULL;
	u_char fixedans[ PACKETSZ ];
	u_char * answer;
	const u_char * cp;
	u_char * rp;
	const u_char * cend;
	const u_char * rend;
	int len;
	u_char *p;
	int i, j;
	int DEBUG = 0;

	/* 16 characters max in a dotted-quad address, plus 1 for null */
	domain = ( char * )malloc( 17 + strlen( rbldomain ) );

	p = (u_char *)&a;
	/* Create a domain name, in reverse. */
	sprintf( domain, "%d.%d.%d.%d.%s", p[3], p[2], p[1], p[0], rbldomain );
	if (DEBUG) printf( "%d.%d.%d.%d.%s\n", p[3], p[2], p[1], p[0], rbldomain );

	/* Make our DNS query. */
	res_init();
	answer = fixedans;
	len = res_query( domain, C_IN, T_A, answer, PACKETSZ );

	/* Was there a problem? If so, the domain doesn't exist. */
	if( len == -1 )
		return result;

	if( len > PACKETSZ )
	{
		answer = malloc( len );
		len = res_query( domain, C_IN, T_A, answer, len );
		if( len == -1 )
			return result;
	}

	result = ( char * )malloc( RESULT_SIZE );
	result[ 0 ] = '\0';
	if( !txt )
	{
		return result;
	}

	/* Make another DNS query for textual data; this shouldn't
	   be a performance hit, since it'll now be cached at the
	   nameserver we're using. */
	res_init();
	res_query( domain, C_IN, T_TXT, answer, PACKETSZ );

	/* Just in case there's no TXT record... */
	if( h_errno != 0 )
	{
		return result;
	}

	/* Skip the header and the address we queried. */
	cp = answer + sizeof( HEADER );
	while( *cp != '\0' )
	{
		i = *cp++;
		while( i-- )
			cp++;
	}

	/* This seems to be a bit of magic data that we need to
	   skip. I wish there were good online documentation
	   for programming for libresolv, so I'd know what I'm
	   skipping here. Anyone reading this, feel free to
	   enlighten me. */
	cp += 1 + NS_INT16SZ + NS_INT32SZ;

	/* Skip the type, class and ttl. */
	cp += ( NS_INT16SZ * 2 ) + NS_INT32SZ;

	/* Get the length and end of the buffer. */
	NS_GET16( i, cp );
	cend = cp + i;

	/* Iterate over any multiple answers we might have. In
	   this context, it's unlikely, but anyway. */
	rp = result;
	rend = result + RESULT_SIZE - 1;
	while( cp < cend && rp < rend )
	{
		i = *cp++;
		if( i != 0 )
			for( j = i; j > 0 && cp < cend && rp < rend;
			  j-- )
			{
				if( *cp == '\n' || *cp == '"' ||
				  *cp == '\\' )
				{
					*rp++ = '\\';
				}
				*rp++ = *cp++;
			}
	}
	*rp = '\0';
	return result;
}

int mainrbl( char *address, int max_rating )
{
        struct hostent *ent;
        struct in_addr a;
        int txt = 0;
        int rblfiltered = 0;
        char * response;
        struct rbl * ptr;
        int fail;
        int DEBUG = 0;


        fail = 0;
        if (DEBUG) printf( "before gethostbyname: %s\n", address );
        if ((ent = gethostbyname(address))) {
                memcpy(&a, ent->h_addr_list[0], sizeof(a));
                if (ent->h_addr_list[1]) {
                        fprintf(stderr, "%s resolved to mutiple addresses: ",
                                address);
                }
                /* fprintf(stderr, "checking %s\n", inet_ntoa(a)); */
        } else {
                if (!inet_aton(address, &a))
                        fail++;
        }
        
        
        for( ptr = rblsites; ptr->site != NULL; ptr++ )
        {
           if (DEBUG) printf( "site: %s %d ", ptr->site, ptr->rating );
           if (max_rating <= ptr->rating) {
              response = rblcheck( a, ptr->site, txt );
              if (DEBUG) printf("%s\n", response);
              if( response )
                   rblfiltered += ptr->rating;
           }
        }
        return rblfiltered;
}

int main (int argc, char *argv[])
{
   int line_buf = 256;
   char line[line_buf], *addr, *msgIN;
   int addr_l;
   int rating, max_rating;
   size_t num_match = 2;
   regex_t reg;
   regmatch_t matches[num_match - 1];
   int DEBUG = 0;
   int DEBUG_counter = 0;

   /* if user didn't supply the single argument required */
   if (argc != 2) {
      printf( "-- %s --\n", VERSION );
      printf( "usage: %s level\nwhere level = 0-16\n", argv[0]);
      printf( "16 being no blocking and 0 being block as much as possible\n" );
      return(1);
   }
   
   /* change the character-based arg into an integer */
   sscanf(argv[1], "%d", &max_rating);
   
   /* read the message w/headers from stdin, until EOF */
   while ((msgIN = fgets(line,line_buf,stdin))) {
      if (DEBUG) printf( "line %d:  %s", ++DEBUG_counter, msgIN );
      
      /* compare the line from stdin with our REGEXMATCH #define above */
      /* first gotta compile the regex, then run regexec               */
      /* each returns 0 if they fail                                   */
      if (regcomp(&reg, REGEXMATCH, REG_EXTENDED)) {
         printf( "whoa, failed to compile regex\n" );
         return(0);
      }

      if (regexec( &reg, msgIN, num_match, matches, 0 )) {

         /* return 0 if no IP was found and we're done with the headers */
         if (msgIN[0] == '\n') {
            regfree(&reg);
            return(0);
         }
         /* otherwise, grab the next line */
         continue;
      /* we found a match */
      } else {
         if (DEBUG) printf ( "match\n" );
         
         /* copy the resulting string out of the pmatch[] array */
         addr_l = matches[1].rm_eo - matches[1].rm_so;
         /* addr = (char *) malloc( addr_l ); */
         addr = (char *) malloc( addr_l+1);
         strncpy( addr, line + matches[1].rm_so, addr_l );
         addr[addr_l] = '\0';

         /* release memory created by regcomp() above */
         regfree(&reg);

         if (DEBUG) printf( "address matched: %s\n", addr );
         /* call mainrbl with address and supplied max rating */
         rating = mainrbl( addr, max_rating );

         /* if rblcheck returns num > supplied max rating      */
         /* print err msg and return 100, otherwise return 0   */
         if (rating > max_rating) {
            printf( "mail denied- potential spam rating =" );
            printf( " %d based on relay %s\n", rating, addr );
            return(100);
         } else {
            return(0);
         }
      }
   }
   return(0);
}
