#! /usr/local/bin/perl -w # $Id: urlgrep.pl,v 1.5 1998/02/10 19:56:56 user Exp $ # # list all embedded URLs in plaintext, being # careful of trailing punctuation, like in this line: # Visit http://www.xor.com/. Maybe http://internet-plaza.net/? require 5.002; # not imperative # cannot use IO::Handle use FileHandle; ARGV->input_record_separator(''); # for paragraph reads $urls = '(' . join('|', qw{ http ftp file telnet gopher mailto about wais } ) . ')'; $ltrs = '\w'; $gunk = '/#~:.?+=&%@!\-'; $punc = '.:?\-'; $any = "$ltrs$gunk$punc"; while ($_ = ARGV->getline()) { while (m{ \b # start at word boundary ( # beginning of $1 catch buffer $urls : # need resource and a literal colon [$any] +? # followed by one or more # of any valid character, but # be conservative and take only # what you need to using +? ) # end of $1 catch buffer (?= # look-ahead non-consumptive assertion (?= [$punc]* # either 0 or more punctuation [^$any] # followed by a non-url char | # or else $ # then end of the string ) }igox) # /i means case-insensitive # /g means do the substitute globally # /o is a hack to avoid extra regcomps # for the interpolated variables # /x is for embedded comments and whitespc # # other cool switches include # /s make . also match newlines # /m make ^ and $ multiline match # /e RHS now full expr, not string: # s/([0-9]+)/3 * $1 + 1/eg; { print "$1\n"; } } exit 0;