Regexp (slightly OT)
mikea
mikea at MIKEA.ATH.CX
Wed Sep 28 16:19:21 IST 2005
On Wed, Sep 28, 2005 at 11:09:47AM +1000, Peter Russell wrote:
> I have had a need to start making my own custom rules for SA. Seems like
> most people can already do this easily, but i kept making typos and
> having issues making them work without retrying a few times.
>
> Does anyone use any kind of validator, other than thier live system or
> lint, to see if thier regexp are failing and if so where?
>
> I found
> http://desktopengineer.com/home/all_articles/free_regular_expression_tester
> this tool that does a fair job, but its not a linux shell tool and its
> almost too overwhelming.
>
> Anyone got any ideas on a shell tool that can do a similar job?
I built an _extremely_ quick-and-dirty Perl script to help me
build rules. It's immediately below. It isn't pretty, it doesn't
do meta-rules, and it lacks any semblance of elegance, nicety, or
refinement. But it's useful.
#! /usr/bin/perl
# spmassassin rule builder
# takes type (header, body, raw, uri, etc.), name,
# pattern or string to be matched,
# description, and score,
# and writes rule
# Does *NO* parsing of patterns
# Does not run Spamassassin --lint
#
# appends rulesets to ~/buildrule.rules and
# prints them to the standard output.
# Maybe test rules using eval? Hmmmmmmm! This could be cool!
# first see if ~/buildrule.rules exists
-e "/home/mikea/buildrule.rules" or warn "/home/mikea/buildrule.rules does not exist; creating it.\n";
open(RULES, ">>", "/home/mikea/buildrule.rules") or die "Can't create /home/mikea/buildrule.rules: $!";
while (1)
{ # outer loop
$ruletype="";
# $headerstr is null at entry; will be changed to
# "<headertype> =~ ", with first character of <headertype> capitalized.
# e.g., "Subject =~ "
$headerstr="";
# ruletype can be header, body, uri, or rawbody
while ((lc($ruletype) !~ "^h") && (lc($ruletype) !~ "^b") &&
(lc($ruletype) !~ "^u") && (lc($ruletype) !~ "^r"))
{
print "SpamAssassin rulebuilder: /home/mikea/bin/buildrule\n";
print "version of 20050217 10:00 CST\n";
if ($ruletype ne "") {print "$ruletype is not a valid entry.\n";}
print "ruletype entry. <ENTER> to quit; else one of header, body, uri, rawbody: ";
$ruletype=<>;
chomp $ruletype;
if (length($ruletype) == 0) {goto QUIT};
if (lc($ruletype) =~ "^h") {$ruletype="header"}
elsif (lc($ruletype) =~ "^b") {$ruletype="body"}
elsif (lc($ruletype) =~ "^u") {$ruletype="uri"}
elsif (lc($ruletype) =~ "^r") {$ruletype="rawbody"};
$ucruletype = $ruletype;
$ucruletype =~ tr/a-z/A-Z/;
print "\$ruletype=$ruletype\n";
}
# headertype can be subject, to, from, received, or message-id
# (or possibly resent-to, or one of the others)
# but we concentrate on subject, to, from, received, and message-id
# If the ruletype is "header", then we want to get the subtype,
# perhaps from the actual rule entry, perhaps by asking for it.
# A good way to make sure that the header type is one of the
# permissible types is to print the permissible types and then
# prompt and validate.
if (lc($ruletype) =~ "^h")
{
$headertype = "";
while ((lc($headertype) !~ "^s") && (lc($headertype) !~ "^t") &&
(lc($headertype) !~ "^r") && # added mla 20050725
(lc($headertype) !~ "^f") && (lc($headertype) !~ "^m"))
{
if ($headertype ne "") {print "$headertype is not a valid entry.\n";}
print "headertype entry. <ENTER> to quit; else one of Subject, To, From, Received, Message-ID:\n";
$headertype=<>;
chomp $headertype;
}
if (lc($headertype) =~ "^s") {$headertype="Subject"}
elsif (lc($headertype) =~ "^t") {$headertype="To"}
elsif (lc($headertype) =~ "^r") {$headertype="Received"} # added 20050725 mla
elsif (lc($headertype) =~ "^f") {$headertype="From"}
elsif (lc($headertype) =~ "^m") {$headertype="Message-ID"};
# handle capitalization: just the first character, please; the rest lower
# $ruletype = ucfirst(lc($headertype));
$ruletype = "header";
$ucheadertype = $headertype;
$ucheadertype =~ tr/a-z/A-Z/;
print "Header rule: \$headertype=$headertype\n";
$headerstr = ucfirst( $headertype." =~ ");
}
print "Rulename entry. Blanks and - will be replaced with _, \n";
print "and all letters will be capitalized. Specify rulename: ";
$rulename = <>;
chomp $rulename;
$rulename =~ tr/a-z/A-Z/;
$rulename =~ tr/\\. -/____/;
# Build the entire rulename at this point by prepending ODOT
# and the uppercased ruletype to the current rulename.
$rulename = "ODOT_".$ucruletype."_".$rulename;
print "Rulename will be $rulename\n";
print "pattern or string entry. Enter the pattern *EXACTLY* \n";
print "as you want it to appear in the rule. Delimiters will be \"/\",.\n";
print "matching will be caseless (\/pattern\/i).\n";
$pattern_string=<>;
chomp $pattern_string;
print "description entry. Enter the description exactly as you want it\n";
print "to appear in the rule: ";
$desc = <>;
chomp $desc;
print "score entry. Enter the score part of the rule exactly as you want it\n";
print "to appear in the rule: ";
$score=<>;
chomp $score;
EVAL:
print "Want to evaluate the rule? Y (or y or 1 or yes) / N (or n or 0 or no)? :";
$eval = <>;
chomp $eval;
if ( (lc(substr($eval,0,1)) eq "1" ) or (lc(substr($eval,0,1)) eq "y" ) )
{ # eval the string against various inputs
print "enter the body or header text to test your rule, or just <ENTER> to stop testing: ";
$evalstring = " ";
while (length($evalstring) != 0)
{ # testing the string for a match here
$evalstring = <>;
chomp $evalstring;
if ($evalstring =~ m/$pattern_string/i)
{
print "$evalstring matched $pattern_string\n";
}
else
{
print "$evalstring failed to match $pattern_string\n";
}
} # end of testing the string for a match here
print "end of eval loop\n";
} # end of eval loop
$saveit = "xxxx";
#print "substr(\$saveit,0,1)=substr($saveit,0,1)\n";
while ( (substr($saveit,0,1) ne "0") && (substr($saveit,0,1) ne "1") &&
(substr($saveit,0,1) ne "n") && (substr($saveit,0,1) ne "y") &&
(substr($saveit,0,1) ne "N") && (substr($saveit,0,1) ne "Y") )
{
print "Want to save the rule? Y (or y or 1 or yes) / N (or n or 0 or no or just <ENTER>): ";
$saveit = lc(<>);
chomp $saveit;
# print "\$saveit=$saveit\n";
}
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime;
# $wday is numeric, Monday is day 1
# $year is 1900-based (year 2005 means $year=105)
# $mon is 0-based: January means $month=0
$year += 1900;
@dayname=(Sunday, Monday, Tuesday, Wednesday, Thursday, Friday, Saturday);
$day=$dayname[$wday];
@monthname=(Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec);
$month=$monthname[$mon];
if ( (substr($saveit,0,1) eq "1") or (substr($saveit,0,1) eq "y") )
{
print "--------> appended to /home/mikea/buildrule.rules \n";
print "\n\n\n\n";
print "# generated by /home/mikea/bin/buildrule at $hour:$min:$sec on $day $year.$month.$mday\n";
print "$ruletype $rulename $headerstr\/$pattern_string/i\n";
print "describe $rulename (LOCAL RULE) $desc\n";
print "score $rulename $score\n";
print "\n\n\n\n";
print RULES "# generated by /home/mikea/bin/buildrule at $hour:$min:$sec on $day $year.$month.$mday\n";
print RULES "$ruletype $rulename $headerstr\/$pattern_string/i\n";
print RULES "describe $rulename (LOCAL RULE) $desc\n";
print RULES "score $rulename $score\n";
print RULES "\n";
}
} # outer loop
QUIT:
close RULES or die "Unable to close RULES: $!\n";
print "any rules saved have been appended to /home/mikea/buildrule.rules\n";
print "do you want to edit /home/mikea/buildrule.rules? (Y or n): ";
$edit = <>;
chomp $edit;
$edit = lc ( $edit);
#print "\$edit=\"$edit\".\n";
if (length($edit) > 0)
{
$edit = substr($edit,0,1);
if ( ($edit ne "0") or ($edit ne "n")
or (length($edit) == 0) )
{
# print "would have run vim here\n";
system "vim /home/mikea/buildrule.rules";
}
}
exit(0);
--
Mike Andrews, W5EGO
mikea at mikea.ath.cx
Tired old sysadmin
------------------------ MailScanner list ------------------------
To unsubscribe, email jiscmail at jiscmail.ac.uk with the words:
'leave mailscanner' in the body of the email.
Before posting, read the Wiki (http://wiki.mailscanner.info/) and
the archives (http://www.jiscmail.ac.uk/lists/mailscanner.html).
Support MailScanner development - buy the book off the website!
More information about the MailScanner
mailing list