/* ===================================================================
 * 
 * match.c -- wildcard matching functions
 *   (rename to reg.c for ircII)
 *
 * Shortly after it was discovered that ircd could be sent into the
 *  wildcard recursion abyss by using bans like:
 *     *?*?*?*?!*?*?*?*?!*?*??**?*??*?*?*?*?*?*??*??*?*?*?*?*?*?*?*?*
 *  an iterative re-write of wild_match() was posted on operlist.
 *  Robey Pointer grabbed that and adapted it to the Eggdrop bot, which
 *  is where I found it.  After Robey added a wrapper to catch obvious
 *  host name mismatches, I reversed the string matcher entirely so that
 *  it would not have to redundantly verify the ends of the strings.
 * Anyway, I took a look at the IrcII version which used a table to
 *  speed up tolower(c), but that seemed pretty inefficient still.
 *  Remembering having seen (SOMEWHERE) an xor test being used for case
 *  insensitivity, I put that together with a tabular version of the
 *  !isalpha(c) function, which yielded a noticeable improvement.
 * Once that was working, I added support for % so that I could use
 *  the same code both in Eggdrop and in my IrcII client.  Pleased
 *  with this, I added the option of a fourth wildcard, ~, which
 *  matches varying amounts of whitespace (at LEAST one space, though,
 *  for sanity reasons).
 * Naturally, it was NOT quite that simple :), and this code would not
 *  have been possible without the prior work and suggestions of all
 *  the sources mentioned above.  Also, special thanks to Robey for
 *  all his time/help tracking down bugs and his ever-helpful advice.
 *
 *   Chris Fuller  (aka Fred1@IRC & Fwitz@IRC)
 *     crf@cfox.bchs.uh.edu
 * 
 * I hereby release this code into the public domain
 *
 * =================================================================== */

/* Remove the next line to use this in IrcII */
#define EGGDROP


/* ===================================================================
 * Best to leave stuff after this point alone, but go on and change
 * it if you're adventurous...
 * =================================================================== */

/* The quoting character -- what overrides wildcards (do not undef)    */
#define QUOTE '\\'

/* The "matches ANYTHING" wildcard (do not undef)                      */
#define WILDS '*'

/* The "matches ANY NUMBER OF NON-SPACE CHARS" wildcard (do not undef) */
#define WILDP '%'

/* The "matches EXACTLY ONE CHARACTER" wildcard (do not undef)         */
#define WILDQ '?'

/* The "matches AT LEAST ONE SPACE" wildcard (undef me to disable!)    */
#define WILDT '~'


/* This makes sure WILDT doesn't get used in in the IrcII version of
 * this code.  If ya wanna live dangerously, you can remove these 3
 * lines, but WARNING: IT WOULD MAKE THIS CODE INCOMPATIBLE WITH THE
 * CURRENT reg.c OF IrcII!!!  Support for ~ is NOT is the reg.c of
 * IrcII, and adding it may cause compatibility problems, especially
 * in scripts.  If you don't think you have to worry about that, go
 * for it!
 */
#ifndef EGGDROP
#undef WILDT
#endif


/* ===================================================================
 * If you edit below this line and it stops working, don't even THINK
 * about whining to *ME* about it!
 * =================================================================== */

/* Tabular nisalpha is faster than the comparison and not operations
 * that are normally used in exchange for some memory. */
#undef isalpha
#define isalpha(x) !tab_nisalpha[x]
#define nisalpha(x) tab_nisalpha[x]
unsigned char tab_nisalpha[256]=  {
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
   1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};

/* Changing these is probably counter-productive :) */
#define MATCH (match+saved+sofar)
#define NOMATCH 0

/* ======================================================================
 * EGGDROP:   wild_match_per(char *ma, char *na)
 * IrcII:     wild_match(char *ma, char *na)
 * 
 * Features:  Forward, case-insensitive, ?, *, %, ~(optional)
 * Best use:  Generic string matching, such as in IrcII-esque bindings
 * ====================================================================== */
#ifdef EGGDROP
int wild_match_per(ma,na)
#else
int wild_match(ma,na)
#endif
unsigned char *ma, *na;
{
   unsigned char *m=ma, *n=na, *lsm=0, *lsn=0, *lpm=0, *lpn=0;
   int match=1, saved=0, sofar=0;
#ifdef WILDT
   int space;
#endif
   
   /* take care of null strings (should never match) */
   if ((ma==0) || (na==0) || (!*ma) || (!*na)) return NOMATCH;

   while (1)  {
      if (!*m)  {
	 if (!*n) return MATCH;  /* Made it through both strings! */
      }
      else if (!*n)  {
	 while ((*m==WILDS)||(*m==WILDP)) m++;
	 return (*m)? NOMATCH : MATCH;
      }
#ifdef WILDT
      else if (*m==WILDT)  {
	 space=0; while ((*m==WILDT)||(*m==' '))  { m++; space++; }
	 sofar+=space;
	 while (*n==' ') { n++; space--; }
	 if (space<=0) continue;
      }
#endif
      else  {
	 switch (*m)  {
	  case WILDP:
	    while (*(++m)==WILDP);
	    if (*m!=WILDS)  {
	       if (*n!=' ')  {
		  lpm=m; lpn=n;
		  saved+=sofar; sofar=0;
	       } 
	       continue;
	    }
	    /* FALL THROUGH */
	  case WILDS:
#ifdef WILDT
	    do m++; while ((*m==WILDS)||(*m==WILDP)||(*m==WILDT));
#else
	    do m++; while ((*m==WILDS)||(*m==WILDP));
#endif
	    lsm=m; lsn=n; lpm=0;
	    match+=(saved+sofar); saved=sofar=0;
	    continue;
	  case WILDQ:  m++; n++; continue;
	  case QUOTE:  m++;
	 }

    	 switch (*m ^ *n)  {
	  case 32:  if (nisalpha(*n)) break;
	  case 0:   m++; n++; sofar++; continue;
	 }
      }
      if (lpm)  {
	 n=++lpn;
	 m=lpm; sofar=0;
	 if ((*n|32)==32) lpm=0;
	 continue; 
      }
      if (lsm)  {
	 n=++lsn; m=lsm; if (!*n) lsm=0;
	 saved=sofar=0; continue;
      }
      return NOMATCH;
   }
}


#ifndef EGGDROP

/* For IrcII compatibility */
int _wild_match(ma,na) unsigned char *ma,*na; { return wild_match(ma,na)-1; }
int match(ma,na) unsigned char *ma,*na; { return wild_match(ma,na)? 1 : 0; }

#else


/* ====================================================================
 * Remaining code is not used by IrcII 
 * ==================================================================== */


/* The next two matchers don't support %, which changes MATCH's def */
#undef MATCH
#define MATCH (match+sofar)


/* ====================================================================
 * EGGDROP:   wild_match(char *ma, char *na)
 * IrcII:     NOT USED
 * 
 * Features:  Backwards, case-insensitive, ?, *
 * Best use:  Matching of hostmasks (since thery are likely to begin with
 *             a * rather than end with one).
 * ==================================================================== */

int wild_match(ma,na)
unsigned char *ma, *na;
{
   unsigned char *m=ma, *n=na, *lsm=0, *lsn=0;
   int match=1, sofar=0, q;
   
   /* take care of null strings (should never match) */
   if ((ma==0) || (na==0) || (!*ma) || (!*na)) return NOMATCH;
   /* find the end of each string */
   while (*(++m));  m--;  while (*(++n));  n--;

   while (1)  {
      if (m<ma)  {
	 if (n<na) return MATCH;  /* Made it through both strings! */
      }
      else if (n<na)  {
	 while ((m>=ma) && ((*m==WILDS)||(*m==WILDP))) m--;
	 return (m>=ma)? NOMATCH : MATCH;
      }
      else  {
	 q= (m>=ma)? (*(m-1)==QUOTE) : 0;
	 if (!q)  {
	    switch (*m)  {
	     case WILDS:
	       do m--; while ((m>=ma) && ((*m==WILDS)||(*m==WILDP)));
	       if ((m>=ma) && (*m=='\\')) m++;
	       lsm=m; lsn=n;
	       match+=sofar; sofar=0;
	       continue;
	     case WILDQ:  m--; n--; continue;
	    }
	 }
    	 switch (*m ^ *n)  {
	  case 32:  if (nisalpha(*n)) break;
	  case 0:
	    if (q) m--;
	    m--; n--; sofar++;
	    continue;
	 }
      }
      if (lsm)  {
	 n=--lsn; m=lsm; if (n<na) lsm=0;
	 sofar=0; continue;
      }
      return NOMATCH;
   }
}


/* ======================================================================
 * EGGDROP:   wild_match_file(char *ma, char *na)
 * IrcII:     NOT USED
 * 
 * Features:  Forward, case-sensitive, ?, *
 * Best use:  File mask matching, as it is case-sensitive
 * ====================================================================== */
int wild_match_file(ma,na)
unsigned char *ma, *na;
{
   unsigned char *m=ma, *n=na, *lsm=0, *lsn=0;
/* unsigned char *lpm=0, *lpn=0; int saved=0; */
   int match=1, sofar=0;
   
   /* take care of null strings (should never match) */
   if ((ma==0) || (na==0) || (!*ma) || (!*na)) return NOMATCH;

   while (1)  {
      if (!*m)  {
	 if (!*n) return MATCH;  /* Made it through both strings! */
      }
      else if (!*n)  {
	 while (*m==WILDS) m++;
	 return (*m)? NOMATCH : MATCH;
      }
      else  {
	 switch (*m)  {
	  case WILDS:
	    do m++; while (*m==WILDS);
	    lsm=m; lsn=n;
	    match+=sofar; sofar=0;
	    continue;
	  case WILDQ:  m++; n++; continue;
	  case QUOTE:  m++;
	 }
         if (*m==*n)  { m++; n++; sofar++; continue; }
      }
      if (lsm)  {
	 n=++lsn; m=lsm; if (!*n) lsm=0;
	 sofar=0; continue;
      }
      return 0;
   }
}


#endif