Xref: feenix.metronet.com comp.mail.uucp:1136
Newsgroups: comp.mail.uucp
Path: feenix.metronet.com!news.utdallas.edu!tamsun.tamu.edu!cs.utexas.edu!uunet!mcsun!uknet!bnr.co.uk!bnrgate!bcars6a8!clewis
From: clewis@ferret.ocunix.on.ca (Chris Lewis)
Subject: snarf.p NNTP article retriever (was Re: how to search for unused UUCP system names?)
Message-ID: <1993Jun17.160523.9189@bcars6a8.bnr.ca>
Originator: clewis@bcarh2ec
Sender: usenet@bcars6a8.bnr.ca (Use Net)
Nntp-Posting-Host: bcarh2ec
Organization: Huh?
References: <Wn3BsAzCBh107h@pos.apana.org.au> <4438@ecicrl.ocunix.on.ca> <C8pw22.6w@ra.nrl.navy.mil> <4442@ecicrl.ocunix.on.ca>
Date: Thu, 17 Jun 1993 16:05:23 GMT
Lines: 276

In article <4442@ecicrl.ocunix.on.ca> clewis@ferret.ocunix.on.ca (Chris Lewis) writes:
>I'll try to remember to post snarf.p tomorrow.

Here it is...

snarf.p is used to retrieve comp.mail.maps articles from an NNTP
server into a directory.  Then, unpackmaps can be used to
construct a uuwhere/paths database.

Here is some sample command lines:

First we do the snarf - which retrieves new comp.mail.maps articles
into /mnt/users/clewis/maps, with a togo file in
/mnt/users/clewis/maps/mapspool:

perl ./snarf.p -v -S <server> -o /mnt/users/clewis/maps \
	-s /mnt/users/clewis/maps/mapspool comp.mail.maps

Then the unpackmaps:

unpackmaps -n 'mail clewis' -d /mnt/users/clewis/maps -cvuwp

The unpackmaps invocation deletes the retrieved articles, compresses
the maps in the /mnt/users/clewis/maps directory, and constructs
the uuwhere and paths databases.

In order to use this, you need snarf.p (included here), perl,
unpackmaps and pathalias.  Plus whatever hackery you're going
to do with your mailer.  Eg: on SunOS/stock sendmail, get
smail 2.5 and change the uux entry to point to smail which uses
a "sendmail .... path@forwarder" line to actually transmit
the message.

Snarf.p is a gross hack, but it works.

# This is a shell archive.  Remove anything before this line,
# then unpack it by saving it in a file and typing "sh file".
#
# Wrapped by Chris Lewis <clewis@bcarh2ec> on Thu Jun 10 12:09:16 1993
#
# This archive contains:
#	snarf.p	
#

LANG=""; export LANG
PATH=/bin:/usr/bin:$PATH; export PATH

echo x - snarf.p
cat >snarf.p <<'@EOF'
#!/usr/bin/perl

#	A simple NNTP article retrieval client.
#	Chris Lewis
#
#	usage: snarf [options] groups
#
#	-d		debug
#	-v		verbose
#	-s dir		spool directory for retrieved articles,
#			defaults to /tmp/spool - the newsgroup
#			name is expanded out as the directories
#			underneath.  Ie: "comp.mail.maps" ->
#			dir/comp/mail/maps
#	-S server	Defaults to NNTPSERVER or $serverfile.
#	-o dir	a togo file is created under this directory if
#		specified.
#
#	Will create directories as needed.
#
#	Example:
#	    perl ./snarf.p -v -S freeple -o /mnt/users/clewis/maps \
#			-s /mnt/users/clewis/maps/mapspool \
#			comp.mail.maps can.general
#
#	retrieve articles from comp.mail.maps and can.general into
#	spool /mnt/users/clewis/maps/mapspool, being verbose, server
#	is freeple, togo is in /mnt/users/clewis/maps.
#
#	Bugs: doesn't maintain a real .newsrc properly.  It
#	merely creates a "ART" file under the directory it
#	extracts things to.  This contains the article number of
#	the last article received.  You will "miss" articles
#	that time out for some reason.
#

require 'getopts.pl';
$serverfile = "/usr/local/lib/news/server";
$spool = "/tmp/spool";
$timer = 10;

&Getopts('vds:o:S:');

$verbose = $opt_v;
$spool = $opt_s if $opt_s;
$mapdir = $opt_o if $opt_o;
$debug = $opt_d;

#($nntpserver, $port) = @ARGV;
$port = 119 unless $port;

chop($nntpserver = `cat $serverfile`)
    if -r $serverfile;
$nntpserver = $ENV{'NNTPSERVER'} if $ENV{'NNTPSERVER'};
$nntpserver = $opt_S if $opt_S;

$AF_INET = 2;
$SOCK_STREAM = 1;

$SIG{'INT'} = 'dokill';
$SIG{'ALRM'} = 'timeout';

sub timeout {
    print "Timed out!\n";
    $timedout = 1;
}

sub dokill {
    exit 1;
}
$sockaddr = 'S n a4 x8';
chop($hostname = `hostname`);

($name, $aliases, $proto) = getprotobyname('tcp');
($name, $aliases, $port) = getservbyname($port, 'tcp') unless $port =~ /^\d+$/;
($name, $aliases, $type, $len, $thisaddr) = gethostbyname($hostname);
($name, $aliases, $type, $len, $thataddr) = gethostbyname($nntpserver);

$this = pack($sockaddr, $AF_INET, 0, $thisaddr);
$that = pack($sockaddr, $AF_INET, $port, $thataddr);

if (socket(S, $AF_INET, $SOCK_STREAM, $proto)) {
    print "socket okay\n" if $debug;
} else {
    die $!;
}
if (bind(S, $this)) {
    print "bind ok\n" if $debug;
} else {
    die $!;
}
if (connect(S, $that)) {
    print "connect ok\n" if $debug;
} else {
    die $!;
}

sub getresponse {
    $timedout = 0;
    alarm($timer);
    $SIG{'ALRM'} = 'timeout';
    chop($response = <S>);
    chop($response);
    alarm(0);
    if ($timedout) {
	$response = "999 NNTP timed out";
    }
    ($retcode, $retdata) = ($response =~ /^(\S+)\s+(.*)$/);
    $response;
}

sub getdata {
    $timedout = 0;
    alarm($timer);
    $SIG{'ALRM'} = 'timeout';
    chop($response = <S>);
    chop($response);
    alarm(0);
    return if $timedout;
    $response;
}

sub sendcommand {
    print S $_[0], "\n";
}

sub mmkdir {
    local($dir) = $_[0];
    if (! -d $dir) {
	mkdir($dir,0777) || die "Can't create directory $dir: $!";
    }
}

sub getfirst {
    local($f) = $_[0];
    open(O, "<$f/ACT") || $return;
    chop($f = <O>);
    close(O);
    return $f;
}

sub putfirst {
    local($f, $v) = @_;
    open(O, ">$f/ACT") || die "Can't open $f/ACT";
    print O "$v\n";
    close(O);
}

sub makedirs {
    local($g) = $_[0];
    local($cd);
    &mmkdir($spool);
    @names = split(/\./, $g);
    $cd = $spool;
    foreach $i (@names) {
	$cd = "$cd/$i";
	&mmkdir($cd);
    }
    return $cd;
}

sub retrievegroup {
    local($group) = $_[0];
    $groupdir = &makedirs($group);
    &sendcommand("GROUP $group");
    &getresponse;
    if ($retcode != 211) {
	die "GROUP command failed: $response";
    }
    print $retdata, "\n" if $debug;
    ($count, $first, $last) = ($retdata =~ /(\d+)\s+(\d+)\s+(\d+)/);
    #print "count: $count, first: $first, last: $last\n";
    $ff = &getfirst($groupdir);
    $first = $ff+1 if $ff > $first;

    if ($debug || $verbose) {
	print "server has $count articles from $first to $last\n";
	print "last article retrieved: $ff\n" if $ff;
    }

    if ($mapdir) {
	open(TOGO, ">>$mapdir/togo") || die "Can't open $mapdir/togo: $!";
    }
    for ($i = $first; $i <= $last; $i++) {
	&sendcommand("ARTICLE $i");
	&getresponse;
	next if $retcode == 423;
	if ($retcode != 220) {
	    die "ARTICLE command failed; $response";
	}
	$file = "$groupdir/$i";
	open(OUT, ">$file") || die "Can't open $file: $!";
	while(1) {
	    &getdata;
	    last if $response eq '.' || $timedout;
	    print OUT $response, "\n";
	}
	print TOGO "$groupdir/$i\n" if $mapdir && !$timedout;
	close(OUT);
	unlink $file if $timedout;
	print "Timed-out $file\n" if $timedout;
	print "$file\n" if $verbose;
	&putfirst($groupdir, $i);
    }
    close(TOGO) if $mapdir;
}


select(S); $| = 1; select(STDOUT);

&getresponse;
if ($retcode != 200 && $retcode != 201) {
    print "Server not ready: $retdata\n";
    exit 1;
}
print "Server ready\n" if $debug || $verbose;
&mmkdir($mapdir) if $mapdir;

foreach $grp (@ARGV) {
    print "Retrieving from group $grp\n" if $verbose;
    &retrievegroup($grp);
}
@EOF

chmod 640 snarf.p

exit 0
