Regexp (slightly OT)

Wed Sep 28 16:19:21 IST 2005

On Wed, Sep 28, 2005 at 11:09:47AM +1000, Peter Russell wrote:
> I have had a need to start making my own custom rules for SA. Seems like 
> most people can already do this easily, but i kept making typos and 
> having issues making them work without retrying a few times.
> 
> Does anyone use any kind of validator, other than thier live system or 
> lint, to see if thier regexp are failing and if so where?
> 
> I found 
> http://desktopengineer.com/home/all_articles/free_regular_expression_tester
> this tool that does a fair job, but its not a linux shell tool and its 
> almost too overwhelming.
> 
> Anyone got any ideas on a shell tool that can do a similar job?

I built an _extremely_ quick-and-dirty Perl script to help me
build rules. It's immediately below. It isn't pretty, it doesn't
do meta-rules, and it lacks any semblance of elegance, nicety, or
refinement. But it's useful. 

#! /usr/bin/perl 

#	spmassassin rule builder
#	takes type (header, body, raw, uri, etc.), name, 
#	pattern or string to be matched,
#	description, and score, 
#	and writes rule 

#	Does *NO* parsing of patterns 
#	Does not run Spamassassin --lint
#	
#	appends rulesets to ~/buildrule.rules and
#	prints them to the standard output.

#	Maybe test rules using eval? Hmmmmmmm! This could be cool!

#	first see if ~/buildrule.rules exists

-e "/home/mikea/buildrule.rules" or warn "/home/mikea/buildrule.rules does not exist; creating it.\n";

open(RULES, ">>", "/home/mikea/buildrule.rules")  or die "Can't create /home/mikea/buildrule.rules: $!";

while (1) 
{	#	outer loop

$ruletype="";

#	$headerstr is null at entry; will be changed to 
#	"<headertype> =~ ", with first character of <headertype> capitalized.
#	e.g., "Subject =~ "

$headerstr="";	

# ruletype can be header, body, uri, or rawbody

while ((lc($ruletype) !~ "^h") && (lc($ruletype) !~ "^b") && 
		(lc($ruletype) !~ "^u") && (lc($ruletype) !~ "^r"))
	{
	print "SpamAssassin rulebuilder: /home/mikea/bin/buildrule\n";
	print "version of 20050217 10:00 CST\n";
	if ($ruletype ne "") {print "$ruletype is not a valid entry.\n";}
	print "ruletype entry. <ENTER> to quit; else one of header, body, uri, rawbody: ";
	$ruletype=<>;
	chomp $ruletype;
	if (length($ruletype) == 0) {goto QUIT};

	if (lc($ruletype) =~ "^h")		{$ruletype="header"}
	elsif (lc($ruletype) =~ "^b")	{$ruletype="body"}
	elsif (lc($ruletype) =~ "^u")	{$ruletype="uri"}
	elsif (lc($ruletype) =~ "^r")	{$ruletype="rawbody"};

	$ucruletype = $ruletype;
	$ucruletype =~ tr/a-z/A-Z/;
	print "\$ruletype=$ruletype\n";
	}

# headertype can be subject, to, from, received, or message-id 
# (or possibly resent-to, or one of the others)
# but we concentrate on subject, to, from, received, and message-id

#		If the ruletype is "header", then we want to get the subtype,
#		perhaps from the actual rule entry, perhaps by asking for it.
#		A good way to make sure that the header type is one of the 
#		permissible types is to print the permissible types and then
#		prompt and validate. 

if (lc($ruletype) =~ "^h")
	{
	$headertype = "";
	while ((lc($headertype) !~ "^s") && (lc($headertype) !~ "^t") && 
		(lc($headertype) !~ "^r") && 	#	added mla 20050725
		(lc($headertype) !~ "^f") && (lc($headertype) !~ "^m")) 
		{
		if ($headertype ne "") {print "$headertype is not a valid entry.\n";}
		print "headertype entry. <ENTER> to quit; else one of Subject, To, From, Received, Message-ID:\n";
		$headertype=<>;
		chomp $headertype;
		}

	if (lc($headertype) =~ "^s")		{$headertype="Subject"}
	elsif (lc($headertype) =~ "^t")		{$headertype="To"}
	elsif (lc($headertype) =~ "^r")		{$headertype="Received"}	#	added 20050725 mla
	elsif (lc($headertype) =~ "^f")		{$headertype="From"}
	elsif (lc($headertype) =~ "^m")		{$headertype="Message-ID"};

#	handle capitalization: just the first character, please; the rest lower
#	$ruletype = ucfirst(lc($headertype));
	$ruletype = "header";
	$ucheadertype = $headertype;
	$ucheadertype =~ tr/a-z/A-Z/;
	print "Header rule: \$headertype=$headertype\n";
	$headerstr = ucfirst( $headertype." =~ ");
	}

print "Rulename entry. Blanks and - will be replaced with _, \n";
print "and all letters will be capitalized. Specify rulename: ";
$rulename = <>;
chomp $rulename;
$rulename =~ tr/a-z/A-Z/;
$rulename =~ tr/\\. -/____/;

#	Build the entire rulename at this point by prepending ODOT 
#	and the uppercased ruletype to the current rulename.

$rulename = "ODOT_".$ucruletype."_".$rulename;
print "Rulename will be $rulename\n";

print "pattern or string entry. Enter the pattern *EXACTLY* \n";
print "as you want it to appear in the rule. Delimiters will be \"/\",.\n";
print "matching will be caseless (\/pattern\/i).\n";
$pattern_string=<>;
chomp $pattern_string;

print "description entry. Enter the description exactly as you want it\n";
print "to appear in the rule: ";
$desc = <>;
chomp $desc;

print "score entry. Enter the score part of the rule exactly as you want it\n";
print "to appear in the rule: ";
$score=<>;
chomp $score;

EVAL:
print "Want to evaluate the rule? Y (or y or 1 or yes) / N (or n or 0 or no)? :";
$eval = <>;
chomp $eval;
if ( (lc(substr($eval,0,1)) eq "1" ) or (lc(substr($eval,0,1)) eq "y" ) )
	{	#	eval the string against various inputs
	print "enter the body or header text to test your rule, or just <ENTER> to stop testing: ";
	$evalstring = " ";
	while (length($evalstring) != 0)
		{	#	testing the string for a match here
		$evalstring = <>;
		chomp $evalstring;
		if ($evalstring =~ m/$pattern_string/i)
			{
			print "$evalstring matched $pattern_string\n";
			}
		else 
			{
			print "$evalstring failed to match $pattern_string\n";
			}

		}	#	end of testing the string for a match here
	print "end of eval loop\n";
	}	#	end of eval loop

$saveit = "xxxx";
#print "substr(\$saveit,0,1)=substr($saveit,0,1)\n";
while ( (substr($saveit,0,1) ne "0") && (substr($saveit,0,1) ne "1") &&
		(substr($saveit,0,1) ne "n") && (substr($saveit,0,1) ne "y") &&
		(substr($saveit,0,1) ne "N") && (substr($saveit,0,1) ne "Y") )
	{
	print "Want to save the rule? Y (or y or 1 or yes) / N (or n or 0 or no or just <ENTER>): ";
	$saveit = lc(<>);
	chomp $saveit;
#	print "\$saveit=$saveit\n";
	}

($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime;

# $wday is numeric, Monday is day 1
# $year is 1900-based (year 2005 means $year=105)
# $mon is 0-based: January means $month=0

$year += 1900;
@dayname=(Sunday, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday);
$day=$dayname[$wday];
@monthname=(Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec);
$month=$monthname[$mon];

if ( (substr($saveit,0,1) eq "1") or (substr($saveit,0,1) eq "y") )
	{
	print "--------> appended to /home/mikea/buildrule.rules \n";
	print "\n\n\n\n";
	print "# generated by /home/mikea/bin/buildrule at $hour:$min:$sec on $day $year.$month.$mday\n";
	print "$ruletype $rulename		$headerstr\/$pattern_string/i\n";
	print "describe $rulename		(LOCAL RULE) $desc\n";
	print "score $rulename		$score\n";
	print "\n\n\n\n";

	print RULES "# generated by /home/mikea/bin/buildrule at $hour:$min:$sec on $day $year.$month.$mday\n";
	print RULES "$ruletype $rulename		$headerstr\/$pattern_string/i\n";
	print RULES "describe $rulename			(LOCAL RULE) $desc\n";
	print RULES "score $rulename		$score\n";
	print RULES "\n";
	}
}	#	outer loop

QUIT:
close RULES or die "Unable to close RULES: $!\n";
print "any rules saved have been appended to /home/mikea/buildrule.rules\n";
print "do you want to edit /home/mikea/buildrule.rules? (Y or n): ";
$edit = <>;
chomp $edit;
$edit = lc ( $edit);
#print "\$edit=\"$edit\".\n";
if (length($edit) > 0)
{
	$edit = substr($edit,0,1);
	if ( ($edit ne "0") or ($edit ne "n") 
	     or (length($edit) == 0) )
	{
#	print "would have run vim here\n";
	system "vim /home/mikea/buildrule.rules";
	}
}
exit(0);

-- 
Mike Andrews, W5EGO
mikea at mikea.ath.cx
Tired old sysadmin 

------------------------ MailScanner list ------------------------
To unsubscribe, email jiscmail at jiscmail.ac.uk with the words:
'leave mailscanner' in the body of the email.
Before posting, read the Wiki (http://wiki.mailscanner.info/) and
the archives (http://www.jiscmail.ac.uk/lists/mailscanner.html).

Support MailScanner development - buy the book off the website!