home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
OS/2 Shareware BBS: 35 Internet
/
35-Internet.zip
/
cheklink.zip
/
cheklink.cmd
< prev
next >
Wrap
OS/2 REXX Batch file
|
1998-05-11
|
96KB
|
3,130 lines
/* Check htmllinks
See CHEKLINK.DOC for installation and useage details.
*/
cheklink:
/*** BEGIN USER CONFIGURABLE PARAMETERS */
/* these can be used to tune performance and modify the output. */
/* used to in <BODY back_n> element (n=1 part 1 or 2) */
back_1='bgcolor="#668a78"'
back_2='bgcolor="#bbbbdd"' /* used for both if use_multi=0 */
/* If check_robot=1, then check starter-url site for a /robots.txt file, and use it to
control extent of search. */
check_robot=1
/* URL pointing to cheklink.htm (used for a "do it again" option)
set cheklink_htm='' to not include this option.
Should be fully qualified */
cheklink_htm='/cheklink.htm'
/* if 0, do NOT double check "n.a. servers" */
double_check=1
/* If get_query=0, then use HEAD request for querying.
Although more efficient, some servers do not support HEAD requests.
If you are likely to encounter such sites, set get_query=1 and
short GET requests will be used
*/
get_query=0
linkfile_dir='' /* directory to store "linkages" file */
maxatonce=6 /* max threads active (in QUERY section */
maxatonce_get=2 /* max active threads (in GET section */
maxage=60 /* maximum age of a HEAD thread */
maxage2=80 /* maximum age of a GET thread */
/* used to bgcolor (or background) the rows of the results TABLEs */
row_color1='bgcolor="#bbcc66"' /* odd rows, on-site */
row_color2='bgcolor="#aaccdd"' /* even rows */
row_color1a='bgcolor="#bbaa44"' /* odd rows, off-site */
row_color2a='bgcolor="#aaccdd"' /* even rows */
/* standalone mode intermediate output : 0=none, 1=tiny bit 2 = just a little, 3=steady stream
Note that verbose output is sent to the "pmprintf" window */
standalone_verbose=3
/* if =1, then only SUPERUSERs can invoke CHEKLINK. Otherwise, anyone can
(give other sre-http access rights are satisfied). This is ignored
in standalone mode */
superusers_only=0
/* A fully qualified file containing "header" information for each part.
If ='', then a generic header is used
If specified, the file MUST contain at least:
<HTML><HEAD>.... </HEAD> <BODY ...> <h1>... </h1>
Note: use of user_intro1a (user_intro1b) means that back_1 (back_2) are NOT used
*/
user_intro1a=''
user_intro1b=''
/**************** END USER CONFIGURABLE PARAMETERS */
parse arg ddir, tempfile, reqstrg,list,verb ,uri,user, ,
basedir ,workdir,privset,enmadd,transaction,verbose, ,
servername,host_nickname,homedir,aparam,semqueue,prog_file
servername=strip(servername)
call load /* load dlls */
if linkfile_dir=0 | linkfile_dir='' then
linkfile_dir=value('TEMP',,'os2environment')
linkfile_dir=strip(linkfile_dir,'t','\')'\'
foo=time('r')
call make_get_url
second_output=''
dscmax=300
crlf='0d0a'x
imgs.0=0 ; hrefs.0=0 ; hrefs.!start=1
totgot=0
ascgi=0
doing_results=0
parse var semqueue mysem myqueue
if get_query<>1 then
query_method='HEAD'
else
query_method='HEADGET'
/* aparam is obsolete. Semqueue is semaphore/queue information
that can be used by some sre-daemon procedures */
standalone=0
if verb=" " then do
call ask_opts
end /* Do */
if standalone=0 then do
if superusers_only=1 & wordpos('SUPERUSER',privset)=0 then do
call lineout tempfile, '<!doctype html public "-//IETF//DTD HTML 3.0//EN">'
call lineout tempfile, "<html><head><title>Not authorized </title>"
call lineout tempfile, '</head><body> '
call lineout tempfile,' </body> </html> '
call lineout tempfile
iia=dosdir(tempfile,'s')
is13=value('SREF_PREFIX',,'os2environment')
if is13='' then do
'RESPONSE HTTP/1.0 401 Unauthorized ' /* Set HTTP response line */
'header add WWW-Authenticate: Basic Realm=<CheckLink>' /* challenge */
return 'FILE ERASE TYPE text/html NAME' tempfile
end
else do
foo=sref_response('unauth CheckLink','You do not have privileges to use CheckLink',servername,1)
return foo
end
end
end
if standalone=0 then do
isauth=reqfield('Authorization')
isref=reqfield('Refered')
end
/* read parameters from request */
if verb='GET' then parse var uri . '?' list
list=strip(list)
baseonly=0 /* check url's relative to the base of the request (NOT to the root */
siteonly=1 /* if =1 ,no HEAD check on off-site urls' */
aurl=''
exclusion_list='!* *?* *MAPIMAGE/* CGI*' /* space delimited list of wildcardable selectors to NOT check */
exclusion_list2=''
use_multi=1 /* use multi-part documents */
outtype='ALL'
linkfile=''
outfilel=''
queryonly=0 /* just query, do not read (subsumes baseonly */
treename=''
make_descrip=2 /*1=non, 2=text/html, 3=text/html and text/plain */
result_file=''
do until list=''
parse var list a1 '&' list
parse var a1 avar '=' aval ; tavar=translate(avar)
aaval=packur2(translate(aval,' ','+'))
select
when tavar='URL' then aurl=packur2(translate(aval,' ','+'))
when abbrev(tavar,'BASE')=1 then baseonly=is_yes_no(aaval,baseonly)
when abbrev(tavar,'QUERY')=1 then queryonly=is_yes_no(aaval,queryonly)
when abbrev(tavar,'USEMULTI')=1 then do
ag=wordpos(aaval,'0 1 2')
if ag>0 then use_multi=ag-1
end /* do */
when abbrev(tavar,'SITE')=1 then siteonly=is_yes_no(aaval,siteonly)
when abbrev(tavar,'EXCLUS')=1 then exclusion_list=aaval
when abbrev(tavar,'OUTTYPE')=1 then do
if aaval<>'' & aaval<>0 then outtype=translate(aaval)
end
when abbrev(tavar,'LINKFILE')=1 then linkfile=translate(aaval)
when abbrev(tavar,'NAME')=1 then treename=aaval
when abbrev(tavar,'RESULT')=1 then result_file=aaval
when abbrev(tavar,'DESCRIP')=1 then make_descrip=wordpos(aaval,'0 1 2')
otherwise nop
end
end /* do */
if make_descrip=0 then make_descrip=1
if result_File=0 then result_file=''
/* if result_file<>'', then just send it */
if result_file<>'' then do
outfilel=linkfile_dir||result_file
return 'FILE type text/html nocache name ' outfilel
end
if linkfile=0 then linkfile=''
if linkfile<>'' then do
outfilel=linkfile_dir||linkfile||'.STM'
if pos('.',outfilel)=0 then outfilel=outfilel'.STM'
end /* do */
if pos('?',outfilel)>0 then do
outfilel=dostempname(outfilel)
eek=filespec('n',outfilel); parse var eek linkfile '.' .
end
hold_doing=do_doing(linkfile_dir,use_multi) /* instructions for multi_send */
if exclusion_list=0 then exclusion_list=''
aurl=strip(aurl)
if standalone<>0 then use_multi=0 /* simplify my life */
/* check to see if the browser understands multi-part documents */
if use_multi=1 then do
a=translate(strip(reqfield('Connection')))
a2=translate(strip(reqfield('PROXY-Connection')))
if a<>'KEEP-ALIVE' & a<>'MAINTAIN' & a2<>'KEEP-ALIVE' & a2<>'MAINTAIN' then do
use_multi=2 /* multi-part not supported by browser */
end
end /* Do */
if use_multi=0 then back_1=back_2
if abbrev(translate(aurl),'HTTP://')=0 then do
request=aurl
server=servername
end
else do
parse var aurl . '//' server '/' request
if server='' then server=servername
end
server=strip(server)
fixexpire=value(enmadd||'FIX_EXPIRE',,'os2environment')
stype='1S'
if use_multi=1 then stype='SS'
/* send start of part1 */
user_intro1=''
if user_intro1a<>'' then do
afil=stream(user_intro1a,'c','query exists')
if afil='' then do
user_intro1=''
end
else do
foo=stream(afil,'c','open read')
user_intro1=charin(afil,1,chars(afil))
foo=stream(afil,'c','close')
end
end
if user_intro1='' then do /* the generic intro */
foo='<html><head><title> Running: CheckLink of ' server ' </title> ' crlf
/* add "refresh" meta-http? */
if use_multi=2 then do
parse var hold_doing . clm
second_output=filespec('n',clm)
clm='http://'servername'/cheklink?result='||filespec('n',clm)
foo=foo' <META HTTP-EQUIV="Refresh" Content="9 ; URL='clm'">'
end /* do */
foo=foo'</head> <body ' back_1'>'
user_intro1=foo||crlf'<h2 align="center"> CheckLink: creating a web-tree ... </h2>' crlf
end
rcode=multi_send(user_intro1,'text/html',stype,0,verbose,fixexpire,'CheckLink')
noyes.0='NO' ; noyes.1='YES'
is_descrip.1='None created' ; is_descrip.2='text/html only' ;is_descrip.3='text/html & text/plain '
/* intro1 is also used in part2 */
intro2=' <h3>Parameters</h3>'crlf'<ul>' ,
' <li>Name= 'treename || crlf ,
' <li>BASEONLY = ' noyes.baseonly ' (YES= only read text/htmls in/under <em>base-url</em>)'crlf ,
' <li>QUERYONLY = ' noyes.queryonly ' (YES= query, but do not read, links)' crlf ,
' <li>SITEONLY = ' noyes.siteonly ' (YES= do <em>not </em> verify off-site links)'crlf ,
' <li>DESCRIPTIONS= ' is_descrip.make_descrip || crlf ,
' <LI>EXCLUSION_LIST = <b>' exclusion_list '</b> (* are wildcards)'crlf ,
' <LI>USE_MULTI = ' use_multi ' (0=1 part doc, 1=2 part doc, 2=two docs 'crlf ,
' <li>OUTTYPE = <b>' outtype '</b> (types of results to report 'crlf
if linkfile<>'' then intro2=intro2'<li> LinkFile= 'linkfile ||crlf
if second_output<>'' then intro2=intro2'<li> Temporary Output to= 'second_output||crlf
intro2=intro2||'</ul>' crlf ,
' <b> CheckLink start at: ' aurl' </b><br>' crlf ,
' <tt>server</tt>=<u>' server '</u>, <tt>selector</tt>=<u>' request '</u><br>'
rcode=multi_send(intro2)
stuff=get_url(query_method,server,request,isauth) /* get HEAD info */
/* no such resource or no such server? */
if stuff="" then do
vop='<B>No such resource:</b><tt> 'aurl' </tt></body></html>'
if use_multi=1 then
rcode=multi_send(vop,,'E')
else
rcode=multi_send(vop,,'1E')
call outdone
return '200 '||extract2('bytessent')
end /* do */
call extracts /* create headers. and body variables */
parse var response ht num amess
/* error code (or redirect) */
if num<200 | num>399 then do
vop='<p><B>Resource not available</b>: 'num ' 'amess
if use_multi=1 then
rcode=multi_send(vop,,'E')
else
rcode=multi_send(vop,,'1E')
call outdone
return '200 '||extract2('bytessent')
end /* do */
/* extract basic info */
type='text/html'
asize=''
if wordpos('!CONTENT-TYPE',headers.0)>0 then do
foo='!CONTENT-TYPE'
type=headers.foo
asize=0
end
if wordpos('!CONTENT-LENGTH',headers.0)>0 then do
foo='!CONTENT-LENGTH'
asize=headers.foo
end
if translate(type)<>'TEXT/HTML' then do
vop='<h3>Not an HTML document </h3> <em>Nothing to check! </em> </body></html>'
if use_multi=1 then
rcode=multi_send(vop,,'E')
else
rcode=multi_send(vop,,'1E')
call outdone
return '200 '||extract2('bytessent'
end /* do */
/* text/html: get the body and find links */
stuff=get_url('GET',server,request,isauth) /* get head and body */
call extracts /* get body (skip headers) */
call set_base_root
rc=multi_send(intro3)
rc=multi_send('<p> For ' request': Mime type= ' type ', size='||length(body))
if use_multi=2 then do
aa='<blockquote><b>Output note:</b> The <em>tables of results </em> will be written ' crlf ,
' to an output file. On most browsers, this file will be automatically retrieved ' crlf,
' about 10 seconds after CheckLink finishes processing. Alternatively, you can ' crlf ,
' manually click on a link to this output file. This link <font color="RED">will</font> be ' ,
' placed at the <a href="#BOTTOM">bottom of this page</a> ' crlf ,
' (but wait until processing is complete and all the status info has been written!) ' crlf ,
' </blockquote> ' crlf
rcode=multi_send(aa)
end /* do */
if asize='' then asize=length(body)
hrefs.0=1
hrefs.1='http://'server'/'||strip(request,'l','/')
hrefs.1.!type='text/html' ; hrefs.1.!size=asize ; hrefs.1.!refered='!starter-URL!'
hrefs.1.!status=0 ; hrefs.1.!nrefs=0 ; hrefs.1.!queried=0
hrefs.1.!nlinks=0
hrefs.1.!reflist='' ; hrefs.1.!appearin='' ; hrefs.1.!Imglist=''
arf=strip(translate(hrefs.1))
hrefs.!list.arf=1
/* check for robots.txt, and augment exclusion list */
if check_robot=1 then do
stuff=get_url('GET',server,'ROBOTS.TXT',isauth)
if stuff<>'' then do
call extracts
parse var response . hcode .
if datatype(hcode)<>'NUM' then hcode=400
if hcode>199 & hcode<300 then do
exclusion_list2=add_robot(exclusion_list,body)
aa='<p><b>ROBOTS.TXT found.<br></b> Modified exclusion_list= <tt>' exclusion_list2 '</tt>'
rc=multi_send(aa)
end /* do */
end
end /* do */
if standalone <>0 then do /* suppress output of status info (standalone mode) */
call lineout outfilex,' <br> <a href="#SUMMARY">Skip to Results </a> '
end
/* now recurse down list of links (in hrefs list ================ */
/* start with the "starter-url" */
mustpre=rooturl
if baseonly=1 then mustpre=base
mustpre=strip(translate(mustpre))
rc=multi_send('<HR><H2>Traversing links -- displaying status information ...</h2> <ul>')
if rc<0 then return ''
/* Prepare for thread launchs... clean up quque */
myqueue2=translate(myqueue||'_HREF')
oo=rxqueue('c',myqueue2)
if oo<>myqueue2 then foo=rxqueue('d',oo)
foy=rxqueue('s',myqueue2)
ii=queued()
do ii0=1 to ii ; pull gg ; end
liminact=extract2('limittimeinactive')
/* do this batch of hrefs */
do forever
call get_Url_q /* launch, and get whatever may be on queue */
if result=-1 then return '' /* client killed connection */
if result=0 then do
call syssleep 1
iterate /* nothing to do, loop */
end
if result=-2 then leave /* all done! */
/* if here, stuff and anind have been set as globals */
if stuff="" then do
rc=multi_send(crlf'<br> No body! ' hrefs.anind)
if rc<0 then return ' '
iterate
end
call extracts /* get body variable */
rc=multi_send('<br><tt>'anind')</tt> Length ('hrefs.anind')=== '||length(body))
if rc<0 then return ''
nowimg=imgs.0 ; nowhref=hrefs.0
parse var hrefs.anind . '//' .'/' request
ijoe=lastpos('/',hrefs.anind)
base=delstr(hrefs.anind,ijoe+1)
oo=findurls(body,base,rooturl,request,anind) /* find links in this document */
hrefs.anind.!nlinks=oo
hrefs.anind.!queried=1
if nowimg=imgs.0 & nowhref=hrefs.0 then iterate /* no new links */
oo=query_types(rooturl,nowimg+1,nowhref+1,hrefs.anind,anind) /* determine types of these links*/
if queryonly=1 then leave /* finish after querying starter-url */
end /* do */
rc=multi_send('</ul>')
/* double check? */
if double_check<>0 then call double_check_it
/* get text/plain descriptions */
if queryonly=0 & make_descrip=3 then call make_text_descrip
/* !!!!! At this point, we start a new document (if use_multi=1 )
If use_multi=2, save results to temporary file */
doing_results=hold_doing /* created at top of program */
if use_multi=1 then do
rc=multi_send('</body></html>',,'SE') /* close first part */
end
if use_Multi>0 then do
user_intro1=''
/* send start of part2 */
if user_intro1b<>'' then do
afil=stream(user_intro1b,'c','query exists')
if afil='' then do
user_intro1=''
end
else do
foo=stream(afil,'c','open read')
user_intro1=charin(afil,1,chars(afil))
foo=stream(afil,'c','close')
end
end
if user_intro1='' then do
foo='<html><head><title> Results: CheckLink of ' server ' </title></head><body 'back_2'>'
if linkfile<>'' then foo=foo||'<A name="TOP">'jump_bar(linkfile,cheklink_htm)'</a>'
user_intro1=foo||'<h1 align="center"> CheckLink results </h1>' crlf
end
if use_multi=1 then
rcode=multi_send(user_intro1,'text/html','ES')
else
rcode=multi_send(user_intro1)
/* repeat basic info */
rc=multi_send(intro2)
if rc<0 then do
call outdone
return ' '
end /* do */
rc=multi_send(intro3)
end /* if not multi part, don't do any of the above */
/* ready to write tables of results */
fop='<hr> '
if use_multi=0 then
fop=fop||'<center><h2>Anchors and Imgs</h2> </center>' crlf
rc=multi_send(fop)
if rc<0 then do
call outdone
return ' '
end /* do */
call write_summary
if result<0 then do
call outdone
return ' '
end
/* do several sets of tables */
do ut=1 to words(outtype)
aut=strip(word(outtype,ut))
typedo=wordpos(aut,'OK NOSITE NOURL OFFSITE x EXCLUDED ALL')
if typedo=0 then do
typedo=wordpos(aut,'0 1 2 3 4 5 6')
if typedo=0 then iterate
end
tcode=strip(word('!OK 1 2 3 4 5 !ALL',typedo))
foo=write_img_href(1,1,tcode)
if foo<0 then do
call outdone end /* do */
return ' '
end
end /* do */
/* write column descriptions */
vop='<hr><a name="DESCRIBE"><h3>Description of Columns</h3></a>'crlf'<dl>'crlf
vop=vop' Note that each row of the tables describes a "resource on the web-tree", ',
' where "resources" can be documents, images, scripts, etc. <p>'crlf
if linkfile<>'' then
vop=vop'<dt><u>?</u> <dd> Examine links <b>to</b> and <u>from</u> this resource 'crlf
vop=vop'<dt> Image Location, or URL <dd> A link to the resource, as encounted while building ' ,
' the web-tree. If the resource is inaccessible, it will ',
' just be underlined; but the immediately preceding number will be linked ' ,
' to the resource (so as you can double check) '
vop=vop'<dt><b>#</b><dd><em>for text/html documents...</em> Number of links contained in this html document ' crlf
vop=vop'<dt>Mimetype <dd> The mime type of the resource' crlf
vop=vop'<dt>Size or error code <dd>The size (in bytes) of the resource (as reported ' crlf ,
' by it''s server); or an error code indicating why the resource could not be accessed. ' crlf ,
'<br> Error codes include: <menu> ' crlf ,
'<li><tt>Server n.a.</tt> : Server was inaccessible. Since this might be a ' ,
' temporary condition (say, if the server was exceptionally busy), you probably should ' crlf ,
' double-check these links (i.e.; click on the number immediately preceding the URL) ' crlf ,
' <li><tt>Missing resource </tt> The server reports that this link is unavailable ' crlf ,
'<li><tt>Off-site :</tt> This URL is off-site, and off-site URLs were not checked ' crlf ,
'<li><tt>Excluded </tt>: This is a CGI-BIN, or some other, "excluded" URL that are not checked ' crlf ,
'</menu> ' crlf
vop=vop'<dt>Number of references <dd>Number of times that links (URLs) pointing to <b>this resource</b> </u> appeared ' crlf ,
'in other html documents (on this web-tree) ' crlf
vop=vop'<dt>First reference <dd>Link to an HTML document that contains a URL pointing to this resource 'crlf ,
' (the first one encountered when building the web-tree) </dl>' crlf
vop=vop'<p><a href="#TOP">Top of document</a> 'crlf
oo=time('e')
vop=vop'<hr>Elapsed time= '||addcomma(oo,1) ' seconds.' crlf ,
' Total bytes downloaded='ADDCOMMA(totgot)||crlf
rcode=multi_send(vop,,'EE')
if use_multi=2 then do
parse var doing_results . dd1
og=filespec('n',dd1)
doing_results=1 /* 'VAR, but not lineout */
vop=vop' <hr> <a name="BOTTOM"> View the </a> <a href="/CHEKLINK?result='og'"> results tables? </a>'crlf
end /* do */
vop=vop'</body></html>'
if use_multi=1 then
rcode=multi_send(vop,,'EE')
else
rcode=multi_send(vop,,'1E')
call outdone 1
return '200 '||extract2('bytessent')
/********** END OF MAIN ***************/
/****/
outdone:
parse arg isdone
if use_multi=2 then do
parse var doing_results d1 d2 ; d2=strip(d2)
call lineout d2
if isdone<>1 then foo=sysfiledelete(d2) /* premature error */
end /* do */
if standalone<>0 then do
call lineout outfilex
say
say bold " Reminder: Result tables were written to "||stream(outfilex,'c','query exists')||normal
end
if outfilel<>'' & isdone=1 then do
aa=stream(outfilel,'c','close')
hh=cvtails(imgs,kins) /* drop some superfulous stuff */
if hh>0 then do
do nz=1 to kins.0
pup=translate(kins.nz)
if abbrev(pup,'!LIST.')=1 | right(pup,7)='.!BIRTH' | ,
right(pup,8)='.!STATUS' | right(pup,6)='.!SOCK' | ,
right(pup,9)='.!REFERED' then
drop imgs.pup
end /* do */
end /* do */
a1=cvcopy(imgs,bbg.!imgs)
hh=cvtails(hrefs,kins) /* drop some superfulous stuff */
if hh>0 then do
do nz=1 to kins.0
pup=translate(kins.nz)
if abbrev(pup,'!LIST.')=1 | right(pup,7)='.!BIRTH' | ,
right(pup,8)='.!STATUS' | right(pup,6)='.!SOCK' | ,
right(pup,9)='.!REFERED' then
drop hrefs.pup
end /* do */
end /* do */
/* add name */
if name='' then do
parse var hrefs.1 . '//' sname '/' rname
treename='Starting at /'rname ' on ' sname
end /* do */
hrefs.!name=treename
a2=cvcopy(hrefs,bbg.!hrefs)
if pos('.',outfilel)=0 then outfilel=outfilel'.stm'
a3=cvwrite(outfilel,BBG)
IF STANDALONE<>0 then DO
if (a1*a2*a3)=0 then
say " Warning: could not save IMGS and HREFS to " outfilel
else
SAY BOLD "Saved images and anchors to " normal outfilel
END
end /* do */
return 1
/*********************/
/* set up the doing_results variable -- perhaps with a temp file name
if use-multi=2 */
do_doing:procedure
parse arg ldir,bb
if bb<2 then return 1
lfile=dostempname(ldir||'LNKCH???.HTM')
return '2 'lfile
/****************/
/* make a jumpbar */
jump_bar:procedure expose crlf
parse arg aff,af2
foo='<a href="#SUMMARY">Summary</a> || ' crlf ,
'<a href="#DESCRIBE">Description</a>   ||   ' crlf ,
'<a href="/CHEKLNK2?linkfile='aff'&entrynum=1">Synopsis of starter-URL</a> || ' crlf ,
'<a href="/CHEKLNK2?linkfile='aff'&entrynum=0">View all HTMLs in this web-tree </a> || ' crlf
if af2<>'' then
foo=foo'<a href="'af2'">Create another web-tree </a>   || '
return foo
/*******************/
/* write summayr info */
write_summary:
ioki=0
do jj=1 to imgs.0
if imgs.jj.!size>=0 then ioki=ioki+1
end /* do */
iok.0=0;iok.1=0;iok.2=0;iok.3=0;iok.4=0;iok.5=0;iok.!html=0
do mm=1 to hrefs.0
select
when hrefs.mm.!size>=0 then do
iok.0=iok.0+1
if translate(strip(hrefs.mm.!type))='TEXT/HTML' then iok.!html=iok.!html+1
end /* do */
otherwise do
if datatype(hrefs.mm.!size)='NUM' then do
ool=abs(hrefs.mm.!size)
iok.ool=iok.ool+1
end
end /* otherwise */
end /* select */
end /* hrefs. */
/* NOW display this summary */
codes.1='<u>Server not available</u> '
codes.2='<b>No such resource on server</b>'
codes.3='Off-site (did not check) '
codes.4=''
codes.5='Excluded selectors (did not check) '
anames.!OK='OKS'
anames.1='NOSITE'
anames.2='NOURL'
anames.3='OFFSITE'
anames.4=''
anames.5='EXCLUDED'
anames.!ALL='ALL'
vl1='OK NOSITE NOURL OFFSITE x EXCLUDED ALL'
fop='<center><a name="SUMMARY"><h3>Summary of Results </h3></a></center>' crlf ,
' Starter-URL: <b> ' aurl '</b> <p>' crlf ,
'<blockquote><tt><b>Title</b>:' hrefs.1.!title '</tt>' crlf
if symbol('HREFS.1.!DESCRIP')='VAR' then
fop=fop'<br><b>Description</b>:' hrefs.1.!descrip '</tt>' crlf
fop=fop||'</blockquote><B>Images</b>: 'ioki', of ' imgs.0 ', images were readable.' crlf
if pos('ALL',outtype)+pos('6',outtype)>0 then
fop2='<a href="#ALL">Anchors</a>:'
else
fop2='<B>Anchors</b>:'
fop=fop||' <p>'fop2' of ' hrefs.0' anchors:' crlf
if pos('OK',outtype)+pos('0',outtype)>0 then
fop2='<a href="#OKS">obtainable</a>'
else
fop2='obtainable'
fop=fop||'<ul> <li> 'iok.0 ' were 'fop2' ( text/html='iok.!html')' crlf
do mmk=1 to 5
if mmk=4 then iterate
aa2=word('OK NOSITE NOURL OFFSITE x EXCLUDED ALL',mmk+1)
if wordpos(aa2,outtype)+wordpos(mmk,outtype)>0 then
ttc='<a href="#'||anames.mmk'">'codes.mmk'</a>'
else
ttc=codes.mmk
fop=fop||'<li>' ttc ': ' iok.mmk
end
fop=fop'</ul>'
rc=multi_send(fop)
if rc<0 then return -1
return 1
/************/
/* ADD COMMAS TO A NUMBER */
addcomma:procedure
parse arg aval,ndec
parse var aval p1 '.' p2
if ndec='' then do
p2=''
end
else do
p2='.'||left(p2,ndec,'0')
end /* do */
plen=length(p1)
p1new=''
do i=1 to 10000 while plen>3
p1new=','right(p1,3)||p1new
p1=delstr(p1,plen-2)
plen=plen-3
end /* do */
return p1||p1new||p2
/******************************/
/* parse a robots.txt file, and add appropriate disallows to the exclusion_list.
The algorithim:
1 ignore # lines (comments)
2a look for user-agent: checklink lines
2b if none, look for user-agent:* lines
3 if 2a or 2b don't work, exit with no changes
4 otherwise, from the look for disallow lines going starting from
the user-agent line, until the first empty line (use 0a as line delimiter,
and throw away the 0d)
5 take asel from each disallow: asel, add a * to the end, and append to
exclusion_list
---------------
# samples robots.txt -- will add cgi-* to exclusion_list
user-agent: mozilla
Disallow: /samples
Disallow: /stuff/
#user-agent: checklink
user-agent:gizmo
disallow:fes/
user-agent:*
disallow:cgi-
---------------
*/
add_robot:procedure expose standalone verbose
parse arg exlist,abody
cr='0a'x
nn=0
do forever
if abody='' then leave
parse var abody al1 (cr) abody
al1=strip(al1,,'0d'x)
if al1='#' then iterate
parse var al1 al1a '#' .
nn=nn+1
lins.nn=al1a
end
if nn=0 then return exlist /* empty, so ignore */
lins.0=nn
/* look for CHECKLINK or * user-agent */
iat=0
do mm=1 to lins.0
al=strip(lins.mm)
if abbrev(translate(al),'USER-AGENT')=0 then iterate
parse var al . ':' dagent ; dagent=translate(strip(dagent))
if abbrev(dagent,'CHECKLINK')=1 then do
iat=mm
leave
end
if dagent='*' then do
iat=mm
end /* do */
end /* do */
exlist2=''
if iat=0 then return exlist /* no matching user-agent */
do mm=iat+1 to lins.0
al=translate(strip(lins.mm))
if al='' then leave /* blank line signals end of "record" */
if abbrev(al,'DISALLOW')<>1 then iterate
parse var al . ':' dasel ; dasel=strip(dasel)
if dasel<>'' then exlist2=exlist2||' '||dasel||'* '
end /* do */
do ik=1 to words(exlist2)
aw=strip(word(exlist2,ik))
aw=strip(aw,'l','/')
exlist=exlist' 'aw
end /* do */
return exlist
/**************************************/
/* multi threaded GETs */
get_url_q:
lastgoo=basesec
nowactive=0
alldone=0
stuff=''
lastgoo=time('e')
ii1=hrefs.!start
do oj=hrefs.!start to hrefs.0 /* keep maxatonce_get threads busy */
nowsec=time('e')
if hrefs.oj.!status=2 then do /* done */
alldone=alldone+1 ; iterate
if oj=ii1 then ii1=oj+1
end /* do */
if hrefs.oj.!status=1 then do /*being done */
nowactive=nowactive+1
iterate
end /* do */
iss=is_this1(oj) /* shouldn't do ? */
if iss<0 then return -1 /* client killed the connection */
if iss=0 then do
hrefs.oj.!status=2 /* can't be done */
iterate
end /* do */
if nowactive>(maxatonce_get) then leave
tmp=get_url_0('GET',hrefs.oj,isauth,oj,myqueue2)
parse var tmp hrefs.oj.!sock','.
IF VERBOSE>2 THEN say ' CheckLink:GET 'hrefs.oj ' on socket: ' hrefs.oj.!sock
rc=multi_send('<li><b> checking:</b> ' hrefs.oj)
if rc<0 then return -1
hrefs.oj.!status=1
hrefs.oj.!birth=nowsec
nowactive=nowactive+1
end /* or leave when at end of hrefs. */
a=rxqueue('s',myqueue2)
nq=queued()
hrefs.!start=ii1
if alldone=hrefs.0 then return -2 /* all done with hrefs */
if (nowsec-lastgoo)> min(15,(0.75*liminact)) then do /* intermediate status report? */
rc=multi_send('<br> ...('alldone' of 'hrefs.0')')
if rc<0 then return 0
lastgoo=nowsec
end /* do */
/* any new results? */
if nq=0 then do /* nothing to do -- so check for old age */
do bb=1 to hrefs.0
if hrefs.bb.!status=1 then do /* it's active, check age */
if (nowsec-hrefs.bb.!birth)>maxage2 then do
ssk=hrefs.bb.!sock
rcc0=sockshutdown(ssk,2)
rcc=sockclose(ssk)
hrefs.bb.!status=2 /* done */
if verbose>2 then
say 'CheckLink: Killing socket 'ssk '(' hrefs.mm
hrefs.mm.!size=-1 /* server n.a. */
end /* do >maxage */
end /* do status=1 */
end /* do bb */
return 0
end /* do nq=0*/
/* if here, something in queue */
parse pull yow
totgot=totgot+length(yow)
anid=left(yow,25)
parse var anid atrans','anind ; atrans=strip(atrans); anind=strip(anind)
if anind>hrefs.0 then return 0 /* ignore -- impossible hrefs index */
if transaction<>atrans then return 0 /* ignore -- bad transaction */
hrefs.anind.!status=2 /* mark that this is done */
stuff=substr(yow,26)
return 1
/**************************************/
/* double check n.a. servers */
make_text_descrip:procedure expose myqueue hrefs. stuff imgs. isauth siteonly verbose ,
standalone query_method mustpre dscmax ,
exclusion_list2 exclusion_list maxatonce maxage totgot thread_string badsites. doing_results
liminact=extract2('limittimeinactive')
tocheck.0=0
/* find all local text/plain hrefs to lookup; copy to the tocheck array */
drop tocheck.
tocheck.0=0
do mm=1 to hrefs.0
att=strip(translate(hrefs.mm.!type))
if att<>'TEXT/PLAIN' then iterate
if abbrev(translate(hrefs.mm),mustpre)<>1 then iterate /* offsite or offdir */
/* check this href */
uu=tocheck.0+1
tocheck.uu=hrefs.mm
tocheck.uu.!indx=mm
tocheck.uu.!status=0 /* 0=not done,1=being done, 2=done */
tocheck.0=uu
end
if tocheck.0=0 then return 1
if verbose>2 then say 'Checklink. ' tocheck.0 ' text/plain descriptions '
rc=multi_send('<br>Checklink. ' tocheck.0 ' text/plain descriptions ')
if rc<0 then return 0
/* check all of the "tochecks" -- do atonce "threads" at a time */
nowactive=0
basesec=time('e') ; lastgoo=basesec
alldone=0
/* Prepare for thread launchs... clean up quque */
foy=rxqueue('s',myqueue)
ii=queued()
do ii0=1 to ii ; pull gg ; end
do forever /* until all tochecks are complete or timedout */
nq=queued()
nowsec=time('e')
alldone=0
do oj=1 to tocheck.0 /* keep maxatonce threads busy */
if nowactive>maxatonce then leave
astat=tocheck.oj.!status
if astat=2 then alldone=alldone+1
if astat<>0 then iterate /* active or done, ignore */
tmp=get_url_0('DSCGET',tocheck.oj,isauth,oj,myqueue)
parse var tmp tocheck.oj.!sock','tocheck.oj.!trans','.
IF VERBOSE>2 THEN say ' CheckLink: text/plain description 'tocheck.oj ' on socket: ' tocheck.oj.!sock
tocheck.oj.!status=1
tocheck.oj.!birth=nowsec
nowactive=nowactive+1
end /* or leave when at end of tocheck */
if alldone=tocheck.0 then leave /* all done with tocheck hrefs */
if (nowsec-lastgoo)> min(15,(0.75*liminact)) then do /* intermediate status report? */
rc=multi_send('<br> ...('alldone' of 'tocheck.0')')
if rc<0 then return 0
lastgoo=nowsec
end /* do */
/* any new results? */
if nq=0 then do /* nothing to do -- so check for old age */
do bb=1 to tocheck.0
if tocheck.bb.!status=1 then do /* it's active, check age */
if (nowsec-tocheck.bb.!birth)>maxage then do
ssk=tocheck.bb.!sock
rcc=sockshutdown(ssk,2)
rcc=sockclose(ssk)
tocheck.bb.!status=2 /* done */
mm=tocheck.bb.!indx
if verbose>2 then
say 'CheckLink: Killing socket 'ssk '(' hrefs.mm
nowactive=nowactive-1 /* not active any more! */
end /* do >maxage */
end /* do status=1 */
end /* do bb */
call syssleep 1 /* sleep for a second */
iterate /* and back to top of forever loop */
end /* do nq=0*/
/* if here, something in queue */
parse pull yow
totgot=totgot+length(yow)
anid=left(yow,25)
parse var anid atrans','anind ; atrans=strip(atrans); anind=strip(anind)
if anind>tocheck.0 then iterate /* ignore -- impossible tocheck index */
if tocheck.anind.!trans<>atrans then iterate /* ignore -- bad transaction */
tocheck.anind.!status=2 /* mark that this is done */
nowactive=nowactive-1 /* not active any more! */
stuff=substr(yow,26)
mm=tocheck.anind.!indx
/* process stuff */
if stuff="" then iterate
/* extract type and length */
call extracts /* create headers. and body */
parse var response ht num amess
if num<200 | num>399 then iterate
hrefs.mm.!descrip=translate(left(body,min(dscmax,length(body))),' ','0d0a0009'x)
end /* OF TOCHECKS */
return 1
/*****************************/
/* a text/html to be GETten */
is_this1:procedure expose hrefs. mustpre standalone verbose doing_results
parse arg jj
if hrefs.jj.!size<0 then return 0 /* not on site */
if translate(hrefs.jj.!type)<>'TEXT/HTML' then do
rc=multi_send('<li> <em>'jj') </em> Not text/html: ' hrefs.jj '=' hrefs.jj.!type)
if rc<0 then return -1
return 0
end /* do */
/* compare against root or baseurl */
if abbrev(translate(hrefs.jj),mustpre)=0 then do
rc=multi_send('<li> <em> ' JJ ') </em> Not checking contents: ' hrefs.jj)
if rc<0 then return -1
return 0
end /* do */
return 1
/*******************/
/* head to find out types */
query_types:procedure expose myqueue hrefs. stuff imgs. isauth siteonly verbose standalone query_method ,
exclusion_list2 exclusion_list maxatonce maxage totgot thread_string badsites. doing_results
parse upper arg daroot,img1,href1,paurl,anind
tmpanind=anind
liminact=extract2('limittimeinactive')
if href1<=hrefs.0 then call query_types_a
anind=tmpanind
if img1<=imgs.0 then call query_types_i
return 1
/**************************************/
/* query types of anchors */
query_types_a:
rc=multi_send('<br> Getting header info (for anchors ' href1 ' to ' hrefs.0 ')' )
IF RC<0 then EXIT ''
tocheck.0=0
/* find all hrefs to lookup; copy to the tocheck array */
do mm=href1 to hrefs.0
hrefs.mm.!type='n.a.' ; hrefs.mm.!size=-1 ; hrefs.mm.!refered=paurl
hrefs.mm.!status=0 ; hrefs.mm.!nrefs=1 ; hrefs.mm.!queried=0
hrefs.mm.!nlinks=0
hrefs.mm.!appearin=anind
hrefs.mm.!imglist=''
hrefs.mm.!reflist=''
/* special size codes:
-1 : server not available
-2 : no such resource on sever
-3 : siteonly violation
-4 : reserved
-5 : excluded
*/
/* suppress this link? */
if siteonly=1 then do
if abbrev(translate(hrefs.mm),daroot)=0 then do
hrefs.mm.!size=-3
iterate
end /* do */
end
if exclusion_list||exclusion_list2<>'' then do
parse var hrefs.mm . '//' . '/' arr
if exclude_me(arr,exclusion_list,exclusion_list2,hrefs.mm,daroot)=1 then do
hrefs.mm.!size=-5
iterate
end
end /* do */
/* is this server known to be down? */
parse var hrefs.mm '//' aserv '/'
aserv=translate(aserv)
if datatype('BADSITES.'aserv)='NUM' then do
if badsites.aserv>1 then do /* check twice before marking as bad */
hrefs.mm.!size=-1
iterate
end
end /* do */
/* check this href */
uu=tocheck.0+1
tocheck.uu=hrefs.mm
tocheck.uu.!indx=mm
tocheck.uu.!status=0 /* 0=not done,1=being done, 2=done */
tocheck.0=uu
end
/* check all of the "tochecks" -- do atonce "threads" at a time */
nowactive=0
basesec=time('e') ; lastgoo=basesec
alldone=0
/* Prepare for thread launchs... clean up quque */
foy=rxqueue('s',myqueue)
ii=queued()
do ii0=1 to ii ; pull gg ; end
do forever /* until all tochecks are complete or timedout */
nq=queued()
nowsec=time('e')
alldone=0
do oj=1 to tocheck.0 /* keep maxatonce threads busy */
if nowactive>maxatonce then leave
astat=tocheck.oj.!status
if astat=2 then alldone=alldone+1
if astat<>0 then iterate /* active or done, ignore */
tmp=get_url_0(query_method,tocheck.oj,isauth,oj,myqueue)
parse var tmp tocheck.oj.!sock','tocheck.oj.!trans','.
IF VERBOSE>2 THEN say ' CheckLink: 'query_method ' 'tocheck.oj ' on socket: ' tocheck.oj.!sock
tocheck.oj.!status=1
tocheck.oj.!birth=nowsec
nowactive=nowactive+1
end /* or leave when at end of tocheck */
if alldone=tocheck.0 then leave /* all done with tocheck hrefs */
if (nowsec-lastgoo)> min(15,(0.75*liminact)) then do /* intermediate status report? */
rc=multi_send('<br> ...('alldone' of 'tocheck.0')')
if rc<0 then return 0
lastgoo=nowsec
end /* do */
/* any new results? */
if nq=0 then do /* nothing to do -- so check for old age */
do bb=1 to tocheck.0
if tocheck.bb.!status=1 then do /* it's active, check age */
if (nowsec-tocheck.bb.!birth)>maxage then do
ssk=tocheck.bb.!sock
rcc=sockshutdown(ssk,2)
rcc=sockclose(ssk)
tocheck.bb.!status=2 /* done */
mm=tocheck.bb.!indx
if verbose>2 then
say 'CheckLink: Killing socket 'ssk '(' hrefs.mm
hrefs.mm.!size=-1 /* server n.a. */
if datatype('BADSITES.'aserv)<>'NUM' then
badsites.aserv=1
else
badsites.aserv=badsites.aserv+1
nowactive=nowactive-1 /* not active any more! */
end /* do >maxage */
end /* do status=1 */
end /* do bb */
call syssleep 1 /* sleep for a second */
iterate /* and back to top of forever loop */
end /* do nq=0*/
/* if here, something in queue */
parse pull yow
totgot=totgot+length(yow)
anid=left(yow,25)
parse var anid atrans','anind ; atrans=strip(atrans); anind=strip(anind)
if anind>tocheck.0 then iterate /* ignore -- impossible tocheck index */
if tocheck.anind.!trans<>atrans then iterate /* ignore -- bad transaction */
tocheck.anind.!status=2 /* mark that this is done */
nowactive=nowactive-1 /* not active any more! */
stuff=substr(yow,26)
mm=tocheck.anind.!indx
/* process stuff */
if stuff="" then do
hrefs.mm.!size=-1
if datatype('BADSITES.'aserv)<>'NUM' then
badsites.aserv=1
else
badsites.aserv=badsites.aserv+1
hrefs.mm.!type='n.a.'
iterate
end /* do */
/* extract type and length */
call extracts /* create headers. (there should not be a body( */
parse var response ht num amess
if num<200 | num>399 then do
hrefs.mm.!size=-2
iterate
end
hrefs.mm.!type='unknown'
hrefs.mm.!size=0
if wordpos('!CONTENT-TYPE',headers.0)>0 then do
foo='!CONTENT-TYPE'
hrefs.mm.!type=headers.foo
hrefs.mm.!size=0
end
if wordpos('!CONTENT-LENGTH',headers.0)>0 then do
foo='!CONTENT-LENGTH'
hrefs.mm.!size=headers.foo
end
end /* OF TOCHECKS */
return 1
/**************************************/
/* query types of images */
query_types_i:
rc=multi_send('<br> Getting header info (for in-line images ' img1 ' to ' imgs.0 ')' )
IF RC<0 then EXIT ''
tocheck.0=0
/* find all hrefs to lookup; copy to the tocheck array */
do mm=img1 to imgs.0
imgs.mm.!type='n.a.' ; imgs.mm.!size=0 ; imgs.mm.!refered=paurl
imgs.mm.!nrefs=1
imgs.mm.!appearin=anind
/* special size codes:
-1 : server not available
-2 : no such resource on sever
-3 : siteonly violation
-4 : reserved
-5 : exclusion violate
*/
/* suppress this link? */
if siteonly=1 then do
if abbrev(translate(imgs.mm),daroot)=0 then do
imgs.mm.!size=-3
iterate
end /* do */
end
if exclusion_list||exclusion_list2<>'' then do
parse var imgs.mm . '//' . '/' arr
if exclude_me(arr,exclusion_list,exclusion_list2,hrefs.mm,daroot)=1 then do
imgs.mm.!size=-5
iterate
end
end /* do */
/* is this server known to be down? */
parse var imgs.mm '//' aserv '/'
aserve=translate(aserv)
if datatype('BADSITES.'aserv)='NUM' then do
if badsites.aserv>1 then do /* check twice before marking as bad */
imgs.mm.!size=-1
iterate
end
end /* do */
/* check this href */
uu=tocheck.0+1
tocheck.uu=imgs.mm
tocheck.uu.!indx=mm
tocheck.uu.!status=0 /* 0=not done,1=being done, 2=done */
tocheck.0=uu
end
/* check all of the "tochecks" -- do atonce "threads" at a time */
nowactive=0
basesec=time('e') ; lastgoo=basesec
alldone=0
/* Prepare for thread launchs... clean up quque */
foy=rxqueue('s',myqueue)
ii=queued()
do ii0=1 to ii ; pull gg ; end
do forever /* until all tochecks are complete or timedout */
nq=queued()
nowsec=time('e')
alldone=0
do oj=1 to tocheck.0 /* keep maxatonce threads busy */
if nowactive>maxatonce then leave
astat=tocheck.oj.!status
if astat=2 then alldone=alldone+1
if astat<>0 then iterate /* active or done, ignore */
tmp=get_url_0(query_method,tocheck.oj,isauth,oj,myqueue)
parse var tmp tocheck.oj.!sock','tocheck.oj.!trans','.
IF VERBOSE>2 THEN say ' CheckLink: HEAD 'tocheck.oj ' on socket: ' tocheck.oj.!sock
tocheck.oj.!status=1
tocheck.oj.!birth=nowsec
nowactive=nowactive+1
end /* or leave when at end of tocheck */
if alldone=tocheck.0 then leave /* all done with tocheck imgss */
if (nowsec-lastgoo)> min(15,(0.75*liminact)) then do /* intermediate status report? */
rc=multi_send('<br> ...('alldone' of 'tocheck.0')')
if rc<0 then return 0
lastgoo=nowsec
end /* do */
/* any new results? */
if nq=0 then do /* nothing to do -- so check for old age */
do bb=1 to tocheck.0
if tocheck.bb.!status=1 then do /* it's active, check age */
if (nowsec-tocheck.bb.!birth)>maxage then do
ssk=tocheck.bb.!sock
rcc=sockshutdown(ssk,2)
rcc=sockclose(ssk)
tocheck.bb.!status=2 /* done */
mm=tocheck.bb.!indx
if verbose>2 then
say 'CheckLink: Killing socket 'ssk '(' imgs.mm
imgs.mm.!size=-1 /* server n.a. */
if datatype('BADSITES.'aserv)<>'NUM' then
badsites.aserv=1
else
badsites.aserv=badsites.aserv+1
nowactive=nowactive-1 /* not active any more! */
end /* do >maxage */
end /* do status=1 */
end /* do bb */
call syssleep 1 /* sleep for a second */
iterate /* and back to top of forever loop */
end /* do nq=0*/
/* if here, something in queue */
parse pull yow
totgot=totgot+length(yow)
anid=left(yow,25)
parse var anid atrans','anind ; atrans=strip(atrans); anind=strip(anind)
if anind>tocheck.0 then iterate /* ignore -- impossible tocheck index */
if tocheck.anind.!trans<>atrans then iterate /* ignore -- bad transaction */
tocheck.anind.!status=2 /* mark that this is done */
nowactive=nowactive-1 /* not active any more! */
stuff=substr(yow,26)
mm=tocheck.anind.!indx
/* process stuff */
if stuff="" then do
imgs.mm.!type='n.a.' ; imgs.mm.!size=-1
if datatype('BADSITES.'aserv)<>'NUM' then
badsites.aserv=1
else
badsites.aserv=badsites.aserv+1
iterate
end /* do */
/* extract type and length */
call extracts /* create headers. (there should not be a body( */
parse var response ht num amess
if num<200 | num>399 then do
imgs.mm.!size=-2
iterate
end
imgs.mm.!type='unknown'
imgs.mm.!size=0
if wordpos('!CONTENT-TYPE',headers.0)>0 then do
foo='!CONTENT-TYPE'
imgs.mm.!type=headers.foo
imgs.mm.!size=0
end
if wordpos('!CONTENT-LENGTH',headers.0)>0 then do
foo='!CONTENT-LENGTH'
imgs.mm.!size=headers.foo
end
end /* OF TOCHECKS */
return 1
/**************************************/
/* double check n.a. servers */
double_check_it:procedure expose myqueue hrefs. stuff imgs. isauth siteonly verbose ,
standalone query_method ,
exclusion_list2 exclusion_list maxatonce maxage totgot thread_string badsites. doing_results
liminact=extract2('limittimeinactive')
tocheck.0=0
/* find all hrefs to lookup; copy to the tocheck array */
drop tocheck.
tocheck.0=0
do mm=1 to hrefs.0
if hrefs.mm.!size<>-1 then iterate
/* check this href */
uu=tocheck.0+1
tocheck.uu=hrefs.mm
tocheck.uu.!indx=mm
tocheck.uu.!status=0 /* 0=not done,1=being done, 2=done */
tocheck.0=uu
end
if tocheck.0=0 then return 1
if verbose>2 then say 'Checklink. Double checking ' tocheck.0 ' server n.a. URLS.'
rc=multi_send('<br>Double checking 'tocheck.0)
if rc<0 then return 0
/* check all of the "tochecks" -- do atonce "threads" at a time */
nowactive=0
basesec=time('e') ; lastgoo=basesec
alldone=0
/* Prepare for thread launchs... clean up quque */
foy=rxqueue('s',myqueue)
ii=queued()
do ii0=1 to ii ; pull gg ; end
do forever /* until all tochecks are complete or timedout */
nq=queued()
nowsec=time('e')
alldone=0
do oj=1 to tocheck.0 /* keep maxatonce threads busy */
if nowactive>maxatonce then leave
astat=tocheck.oj.!status
if astat=2 then alldone=alldone+1
if astat<>0 then iterate /* active or done, ignore */
tmp=get_url_0('HEADGET',tocheck.oj,isauth,oj,myqueue)
parse var tmp tocheck.oj.!sock','tocheck.oj.!trans','.
IF VERBOSE>2 THEN say ' CheckLink: Double check 'tocheck.oj ' on socket: ' tocheck.oj.!sock
tocheck.oj.!status=1
tocheck.oj.!birth=nowsec
nowactive=nowactive+1
end /* or leave when at end of tocheck */
if alldone=tocheck.0 then leave /* all done with tocheck hrefs */
if (nowsec-lastgoo)> min(15,(0.75*liminact)) then do /* intermediate status report? */
rc=multi_send('<br> ...('alldone' of 'tocheck.0')')
if rc<0 then return 0
lastgoo=nowsec
end /* do */
/* any new results? */
if nq=0 then do /* nothing to do -- so check for old age */
do bb=1 to tocheck.0
if tocheck.bb.!status=1 then do /* it's active, check age */
if (nowsec-tocheck.bb.!birth)>maxage then do
ssk=tocheck.bb.!sock
rcc=sockshutdown(ssk,2)
rcc=sockclose(ssk)
tocheck.bb.!status=2 /* done */
mm=tocheck.bb.!indx
if verbose>2 then
say 'CheckLink: Killing socket 'ssk '(' hrefs.mm
nowactive=nowactive-1 /* not active any more! */
end /* do >maxage */
end /* do status=1 */
end /* do bb */
call syssleep 1 /* sleep for a second */
iterate /* and back to top of forever loop */
end /* do nq=0*/
/* if here, something in queue */
parse pull yow
totgot=totgot+length(yow)
anid=left(yow,25)
parse var anid atrans','anind ; atrans=strip(atrans); anind=strip(anind)
if anind>tocheck.0 then iterate /* ignore -- impossible tocheck index */
if tocheck.anind.!trans<>atrans then iterate /* ignore -- bad transaction */
tocheck.anind.!status=2 /* mark that this is done */
nowactive=nowactive-1 /* not active any more! */
stuff=substr(yow,26)
mm=tocheck.anind.!indx
/* process stuff */
if stuff="" then iterate
/* extract type and length */
call extracts /* create headers. */
parse var response ht num amess
if num<200 | num>399 then do
hrefs.mm.!size=-2
iterate
end
hrefs.mm.!type='unknown'
hrefs.mm.!size=0
if wordpos('!CONTENT-TYPE',headers.0)>0 then do
foo='!CONTENT-TYPE'
hrefs.mm.!type=headers.foo
hrefs.mm.!size=0
end
if wordpos('!CONTENT-LENGTH',headers.0)>0 then do
foo='!CONTENT-LENGTH'
hrefs.mm.!size=headers.foo
end
end /* OF TOCHECKS */
return 1
/************************/
/* write stuff */
write_img_href:procedure expose imgs. hrefs. crlf totgot baseonly standalone linkfile ,
row_color1 row_color2 row_color1a row_color2a verbose doing_results ascgi ,
rooturl server
parse arg i1,h1,outtype
acodes.!OK='<tt><b>Successfully</b> checked links</tt> '
acodes.1='<tt>Problem links: <u>Server not available</u></tt> '
acodes.2='<tt>Problem links:<u>No such resource on server</u></tt>'
acodes.3='<tt>Not checked links: <em>Off-site </em></tt> '
acodes.4=''
acodes.5='<tt>Not checked links:<em>Excluded selectors </em></tt> '
acodes.!ALL='<tt><b>All the links</b></tt> '
anames.!OK='OKS'
anames.1='NOSITE'
anames.2='NOURL'
anames.3='OFFSITE'
anames.4=''
anames.5='EXCLUDED'
anames.!ALL='ALL'
codesb.0='<tt>size n.a.</tt>'
codesb.1='Server n.a.'
codesb.2='Missing resource'
codesb.3='Off-site '
codesb.4=''
codesb.5='Excluded '
chlink='CHEKLNK2'
if ascgi=1 then chlink='/CGI-BIN/CHEKLNK2'
aa='<P><hr width="66%"> ' crlf ,
' <center> <a name="'anames.outtype'"> ' acodes.outtype '</a></center> <p>' crlf ,
'<b>IM</b>a<b>G</b>es: ' crlf
rc=multi_send(aa)
if rc<0 then return rc
stable0=' There are <b>no</b> "<em> 'acodes.outtype '"</em> Image links.'
/* write this if not any matches */
stable='<table> '
if linkfile<>'' then do
stable='<table><th>? </th> '
end /* do */
stable=stable'<th>IMG Location</th><th>mimetype</th><th>size<br><em>or error code</em></th> ' crlf ,
'<th><tt>number of references, <em>1st reference </em></th> ' crlf
call sort_nhref 1 /* sort imgs */
iwrote=0
do mm0=i1 to imgs.0
mm=sortlist.mm0
/* skip this one ? */
ssiz=imgs.mm.!size
if outtype<>'!ALL' then do /* not an ALL links report */
if ssiz>0 then do
if outtype<>'!OK' then iterate
end /* do */
else do
if abs(ssiz)<>outtype then iterate
end /* do */
end /* do */
/* write stuff to table */
if stable<>'' then do /*write table header */
rc=multi_send(stable); stable=''
if rc<0 then return rc
end
iwrote=iwrote+1
ismiss=0
if imgs.mm.!size=-2 | imgs.mm.!size=-1 then ismiss=1
ack=breakup(imgs.mm,36,rooturl)
bgc=choose_row_color(iwrote,row_color1,row_color2,row_color1a,row_color2a,ack)
aa=crlf'<TR ' bgc '> <td>'
if linkfile<>'' then do
cl2=' <a href="'chlink'?linkfile='linkfile'&isimg=1&entrynum='mm'"> ? </a> '
aa=aa||cl2 '</td><td> '
end
if imgs.mm.!size>=0 then do
aa=aa||'<font size=-1>'mm'. </font> <a href="'imgs.mm'">'ack'</a></td>' crlf
end
else do
iwrote2='<a href="'imgs.mm'">'mm'</a>'
aa=aa||' <font size=-1>'iwrote2'. </font> <u>'ack'</u></td>' crlf
end
ack=imgs.mm.!type
if length(ack)>20 then do
parse var ack a1 '/' a2 ; ack=a1'/<br>'a2
end /* do */
aa=aa||'<td> <tt>'ack'</tt></td> ' crlf
if imgs.mm.!size>0 then do
aa=aa||'<td> <tt>'imgs.mm.!size '</tt> ' crlf
end
else do
mam=abs(imgs.mm.!size)
mamo=codesb.mam
if ismiss=1 then
mamo='<b>'mamo'</b>'
else
mamo='<em>'mamo'</em>'
aa=aa||'<td> 'mamo' </td>' crlf
end
nhh=addcomma(imgs.mm.!nrefs) /* the refered by stuff */
lhh=lower(imgs.mm.!refered) ; lhh2=lhh
lhh2=breakup(lhh,35,rooturl)
aa=aa||'<td><tt>'nhh'</tt>, <em><a href="'lhh'">'lhh2'</a></em></td>'
rc=multi_send(aa)
if rc<0 then return rc
end /* do */
if stable='' then
rc=multi_send('</table>')
else
rc=multi_send(stable0)
if rc<0 then return rc
/* --------------- now do anchors */
ifc=''
/* if wordpos(outtype,'!ALL !OK 0 6')>0 then
ifc=' (if checked, # <tt><A href</tt>s)' */
aa='<P><hr width="30%">' crlf
aa=aa||'<b>A</b>nchors:'
rc=multi_send(aa)
if rc<0 then return rc
stable0=' There are <b>no</b> "<em> 'acodes.outtype '"</em> Anchor links.'
stable='<table> '
if linkfile<>'' then do
stable='<table><th>? </th> '
end /* do */
stable=stable'<th>URL 'ifc' </th><th><u>#</u></th> ' crlf ,
' <th>mimetype</th><th>size<br><em>or error code</em></th> ' crlf ,
'<th><tt>number of references, <em>1st reference </em></th> ' crlf
iwrote=0
/* sort 'em */
call sort_nhref 0
do mmn=h1 to hrefs.0
mm=strip(sortlist.mmn)
/* skip this one ? */
ssiz=hrefs.mm.!size
if outtype<>'!ALL' then do /* not an ALL links report */
if ssiz>0 then do
if outtype<>'!OK' then iterate
end /* do */
else do
if abs(ssiz)<>outtype then iterate
end /* do */
end /* do */
/* write stuff */
if stable<>'' then do /*write table header */
rc=multi_send(stable); stable=''
if rc<0 then return -1
end
/* write this one */
iwrote=iwrote+1
ack=breakup(hrefs.mm,36,rooturl)
bgc=choose_row_color(iwrote,row_color1,row_color2,row_color1a,row_color2a,ack)
aa=crlf'<TR ' bgc '> <td>' /* write a link to cheklnk2 ? */
if linkfile<>'' then do
cl2=' <a href="'chlink'?linkfile='linkfile'&isimg=0&entrynum='mm'"> ? </a> '
aa=aa||cl2 '</td><td> '
end
if hrefs.mm.!size>=0 then do /* number, or linked number */
aa=aa||' <font size=-1>'mm'. </font> <a href="'hrefs.mm'">'ack'</a></td>' crlf
end
else do
iwrote2=' <a href="'hrefs.mm'">'mm'</a>'
aa=aa||' <font size=-1>'iwrote2'. </font> <u>'ack'</u></td>' crlf
end
xx=' ' ;if hrefs.mm.!queried=1 then xx='<em>'hrefs.mm.!nlinks'</em>'; /* links in this document */
aa=aa||'<td>'xx'</td>'||crlf
ack=hrefs.mm.!type
if length(ack)>20 then do
parse var ack a1 '/' a2 ; ack=a1'/<br>'a2
end /* do */
aa=aa||'<td> <tt>'ack'</tt></td> ' crlf
if hrefs.mm.!size>0 then do
aa=aa||'<td> <tt>'hrefs.mm.!size '</tt> ' crlf
end
else do
mam=abs(hrefs.mm.!size)
mamo=codesb.mam
if hrefs.mm.!size=-2 | hrefs.mm.!size=-1 then
mamo='<b>'mamo'</b>'
else
mamo='<em>'mamo'</em>'
aa=aa||'<td> 'mamo' </td>' crlf
end
nhh=addcomma(hrefs.mm.!nrefs) /* the refered by column */
lhh=lower(hrefs.mm.!refered);lhh2=lhh
lhh2=breakup(lhh,36,rooturl)
aa=aa||'<td> <tt>'nhh'</tt>, <em><a href="'lhh'">'lhh2'</a></em></td>'
rc=multi_send(aa)
if rc<0 then return rc
end /* do hrefs.mm */
if stable='' then
rc=multi_send('</table>')
else
rc=multi_send(stable0)
if rc<0 then return rc
rc=multi_send('<br><a href="#SUMMARY">... back to summary </a>')
if rc<0 then return rc
return 1
/* end of WRITE_IMG_HREF */
/***********/
/* choose color for this row, depending on row# and type of lin (off or on site */
choose_row_color:procedure
parse arg nth,c1,c2,c3,c4,alink
if abbrev(alink,'/')=1 then do /* on-site */
bgc=c3
if nth//2=0 then bgc=c4
end
else do
bgc=c1
if nth//2=0 then bgc=c2
end /* do */
return bgc
/*********************************/
/********************************/
/* sort nhrefs. list of urls --- subdirectory sensitive */
sort_nhref:procedure expose hrefs. sortlist. crlf imgs. server
parse arg iimg
if iimg<>1 then do
do mn=1 to hrefs.0
nhrefs.mn=hrefs.mn
end /* do */
nhrefs.0=hrefs.0 ;maxssn=0
end
else do
do mn=1 to imgs.0
nhrefs.mn=imgs.mn
end /* do */
nhrefs.0=imgs.0 ;maxssn=0
end /* do */
/* make an array with sortable elements in portions of each record */
elemsizes.0=0 ; maxfname=0
do jj=1 to 40
elemsizes.jj=0
end /* do */
do is=1 to nhrefs.0
aa1=strip(strip(nhrefs.is,'l','/'))
parse var aa1 . '//' ssn '/' a1
if translate(ssn)=translate(server) then ssn=''
biglist.is.!srv=ssn
maxssn=max(maxssn,length(ssn))
h1=lastpos('/',a1) /* pluck off "file name" */
biglist.is.0=0
if h1>0 then do
biglist.is=substr(a1,h1+1)
maxfname=max(maxfname,length(biglist.is))
end
else do
biglist.is=a1
maxfname=max(maxfname,length(biglist.is))
iterate /* no dirs, get net entry */
end
a1=delstr(a1,h1) /* the remainder is the path */
idirs=0
do forever /* pluck out directories in path */
if a1='' then leave /* got all directories */
parse var a1 dx '/' a1
idirs=idirs+1
biglist.is.idirs=dx
elemsizes.idirs=max(length(dx),elemsizes.idirs)
end /* do */
biglist.is.0=idirs
elemsizes.0=max(elemsizes.0,idirs)
end
do ipp=1 to elemsizes.0
elemsizes.ipp=elemsizes.ipp+1
end /* do */
/* make the big elements array */
do ii=1 to NHREFS.0
oo.ii=left(ii,6)' 'left(biglist.ii.!srv,maxssn+1)
do mm=1 to ELEMSIZES.0
if mm<=biglist.ii.0 then
oo.ii=oo.ii||left(biglist.ii.mm,elemsizes.mm)
else
oo.ii=oo.ii||left(' ',elemsizes.mm)
end /* do */
oo.ii=oo.ii||left(biglist.ii,maxfname)
end /* do */
OO.0=NHREFS.0
sortlist.0=0
if oo.0=0 then return 0
EEF=ARRAYSORT(OO,,,7,,'A','I') /* sort the names */
DO MM=1 TO NHREFS.0
sortlist.mm=strip(left(oo.mm,6))
end /* do */
sortlist.0=nhrefs.0
return 1
/*********************************/
/* search for a <BASE element in the HEAD */
base_element:procedure expose standalone verbose
parse arg stuff
crlf='0d0a'x
if stuff=0 | stuff="" then return ""
dowrite=0
do until stuff=""
parse var stuff p1 '<' tag '>' stuff
if translate(word(tag,1))="HEAD" then do /* now in head !*/
dowrite=1
iterate
end
if dowrite=0 then iterate /* wait till we get into head .. */
if translate(word(tag,1))="/HEAD" then /* out of head, all done ! */
leave
if (translate(word(tag,1)))='BASE' then do
parse var tag . '=' . '"' ee '"'
return ee
end
end
return ""
/*******************************/
/* get/head a url, do not wait for response */
GET_URL_0:procedure expose verbose totgot thread_string standalone doing_results
parse arg type,a1,isauth,indid,aqueue
crlf='0d0a'x
parse var a1 . '//' server '/' request
tt=extract2('transaction') /* used as a queue entry id */
family ='AF_INET'
gosock = SockSocket(family, "SOCK_STREAM", 0)
iid=left(tt','indid,25,' ')
att=rexxthread('t',thread_string,gosock,type,server,request,,
isauth,verbose,aqueue,iid)
return gosock','iid
/*******************************/
/* get/head a url */
get_url:procedure expose verbose myqueue totgot thread_string maxage standalone doing_results
parse arg type,server,request,isauth,amaxage
crlf='0d0a'x
if amaxage<>'' & datatype(amaxage)='NUM' then
mxage=amaxage
else
mxage=maxage
stuff=''
/* clear the queue, then launch a thread */
foo=rxqueue('s',myqueue)
ii=queued()
do ii0=1 to ii ; pull gg ; end
tt=extract2('transaction')
family ='AF_INET'
gosock = SockSocket(family, "SOCK_STREAM", "IPPROTO_TCP")
tt=left(tt,25,' ')
att=rexxthread('t',thread_string,gosock,type,server,request,,
isauth,verbose,myqueue,tt)
do mm=1 to mxage /*wait maxage */
ll=queued()
if queued()>0 then do
parse pull yow
totgot=totgot+length(yow)
anid=left(yow,25)
if strip(anid)<>tt then iterate
return substr(yow,26)
end /* do */
call syssleep 1
end /* do */
return ""
/*************************************/
/* extract headers and body */
extracts:
parse arg noheaders
cr='0a'x
parse var stuff response (cr) stuff
response=strip(response,,'0d'x)
headers.0=''
do forever
parse var stuff ahead (cr) stuff
ahead=strip(ahead,,'0d'x)
if ahead='' then leave
parse var ahead name ':' aval
nn=translate('!'||name)
headers.0=headers.0' 'nn
headers.nn=aval
end /* do */
/* remove html comments */
body=""
stuff2x=stuff
do forever /*no comments within comments are allowed */
if stuff2x="" then leave
parse var stuff2x t1 '<!-- ' t2 '-->' stuff2x
body=body||t1
end /* do */
return 1
/* --- Load the function library, if necessary --- */
load:
if \RxFuncQuery("SockLoadFuncs") then return /* already there */
call RxFuncAdd "SockLoadFuncs","rxSock","SockLoadFuncs"
call SockLoadFuncs
foo=rxfuncquery('sysloadfuncs')
if foo=1 then do
call RxFuncAdd 'SysLoadFuncs', 'RexxUtil', 'SysLoadFuncs'
call SysLoadFuncs
end
foo=rxfuncquery('rexxlibregister')
if foo=1 then do
call rxfuncadd 'rexxlibregister','rexxlib', 'rexxlibregister'
call rexxlibregister
end
foo=rxfuncquery('rexxlibregister')
if foo=1 then do
say " Could not find REXXLIB "
exit
end /* do */
return 1
/***********************************/
/* search a file, find IMG SRC=, FRAME SRC=, and A HREF= urls. Add BASEURL if
no / or http://.../ at beginning of URL
Return results in hrefs. and imgs. */
findurls:procedure expose imgs. hrefs. totgot crlf standalone verbose doing_results dscmax make_descrip
parse arg stuff, baseurl,rooturl,burl,the_anind
nf=0
liminact=extract2('limittimeinactive')
basegoo=time('e')
/* convert '< x' to '<x' */
stuff=translate(stuff,' ','0d0a0900'x)
do forever
wow=pos('< ',stuff)
if wow=0 then leave
newstuff=''
do forever
parse var stuff a1 '< ' stuff
newstuff=newstuff||a1
if stuff<>"" then
newstuff=newstuff||'<'
else
leave
end /* do */
stuff=newstuff
end
tstuff=translate(stuff)
/* find TITLE element */
a1=pos('</HEAD',tstuff)
if a1>0 then do
a2=pos('<TITLE',tstuff)
if a2<a1 & a2<>0 then do /* <TITLE in <HEAD */
a3=pos('</TITLE',tstuff,a2)
IF A3=0 then DO /* NOT </TITLE ! */
HREFS.THE_ANIND.!TITLE='no_title'
end /* do */
else DO
a4=substr(stuff,a2,1+a3-a2)
parse var a4 . '>' atitle '<' .
atitle=space(strip(atitle),1)
hrefs.the_anind.!title=left(atitle,min(80,length(atitle)))
END
end /* do */
end /* do */
/* find description */
if make_descrip>1 then do
goo=fig_descript(a1)
if goo<>'' then hrefs.the_anind.!descrip=goo
end /* do */
/* find all FRAME SRC=, IMG SRC= and A HREF=, throw away internal links */
lookfor.1='<BODY '
lookfor.2='<IMG '
lookfor.3='<A '
lookfor.4='<FRAME '
lookfor.5='<AREA '
lookfor.6='<EMBED '
lookfor.7='<LINK '
lookfor.8='<APPLET '
lookfor.9='<OBJECT '
do anctype=1 to 9
nowtarg=lookfor.anctype
strt=1
do forever
s1a=pos(nowtarg,tstuff,strt)
if s1a=0 then leave
s2a=pos('>',tstuff,s1a)
if s2a=0 then leave /* error, give up on this one */
anarg=substr(stuff,s1a+1,(s2a-s1a)-1)
anarg=translate(anarg,' ','0d0a0900'x)
strt=s2a+1
select
when anctype=1 then do /* body background */
do forever
if anarg='' then leave
parse var anarg a1 anarg ; a1=strip(a1)
if abbrev(translate(a1),'BACKGROUND=')=0 then iterate
parse var a1 . '=' gotimg . ; gotimg=strip(strip(gotimg),,'"')
if left(gotimg,2)='//' then gotimg='http://'gotimg
nf=nf+1
fileurls.nf=fix_url(gotimg,baseurl,rooturl)
fileurls.nf.!img=1
leave
end /* do */
end /* i3>0 */
when anctype=2 then do /* img */
do forever
if anarg='' then leave
parse var anarg a1 anarg ; a1=strip(a1)
if abbrev(translate(a1),'SRC=')=0 then iterate
parse var a1 . '=' gotimg . ; gotimg=strip(strip(gotimg),,'"')
if left(gotimg,2)='//' then gotimg='http://'gotimg
nf=nf+1
fileurls.nf=fix_url(gotimg,baseurl,rooturl)
fileurls.nf.!img=1
leave
end /* do */
end
when anctype=3 | anctype=5 | anctype=7 then do /* A AREA LINK */
do forever
if anarg='' then leave
parse var anarg a1 anarg ; a1=strip(a1)
if abbrev(translate(a1),'HREF=')=0 then iterate
parse var a1 . '=' gothref . ; gothref=strip(strip(gothref),,'"')
parse var gothref gothref '#' . /* toss out internal jumps */
if gothref="" then iterate
if abbrev(upper(gothref),'JAVASCRIPT:')=1 then iterate /* don't do "javascript:" entries */
if left(gothref,2)='//' then gothref='http://'gothref
parse upper var gothref uaref ':' . /* non -http are discarded */
if wordpos(uaref,'MAILTO FTP FILE GOPHER TELNET')>0 then iterate
nf=nf+1
fileurls.nf=fix_url(gothref,baseurl,rooturl)
fileurls.nf.!img=0
leave
end /* do */
end
when anctype=4 | anctype=6 then do /* FRAME EMBED */
do forever
if anarg='' then leave
parse var anarg a1 anarg ; a1=strip(a1)
if abbrev(translate(a1),'SRC=')=0 then iterate
parse var a1 . '=' gothref . ; gothref=strip(strip(gothref),,'"')
if left(gothref,2)='//' then gothref='http://'gothref
parse var gothref gothref '#' . /* toss out internal jumps */
if gothref="" then iterate
nf=nf+1
fileurls.nf=fix_url(gothref,baseurl,rooturl)
fileurls.nf.!img=0
leave
end /* do */
end
when anctype=8 then do /* APPLET */
abase=''; aref=''
do forever
if anarg='' then leave
parse var anarg a1 anarg ; a1=strip(a1)
if abbrev(translate(a1),'CODE=') + ,
abbrev(translate(a1),'CODEBASE=')=0 then iterate
if abbrev(translate(a1),'CODEBASE=')=1 then do
parse var a1 '"' abase '"' .
end /* do */
else do /* CODE */
parse var a1 '"' aref '"'
end /* do */
if aref<>'' & abase<>'' then leave
end
if aref='' then iterate /* no CODE= found */
if left(abase,2)='//' then abase='http://'abase
if abase<>'' then
tmp1=abase||strip(aref,'l','/')
else
tmp1=fix_url(aref,baseurl,rooturl)
nf=nf+1
fileurls.nf=tmp1
fileurls.nf.!img=0
end
when anctype=9 then do /* OBJECT */
do forever
if anarg='' then leave
parse var anarg a1 anarg ; a1=strip(a1)
if abbrev(translate(a1),'CODEBASE=')=0 then iterate
parse var a1 . '=' gothref . ; gothref=strip(strip(gothref),,'"')
if left(gothref,2)='//' then gothref='http://'gothref
parse var gothref gothref '#' . /* toss out internal jumps */
if gothref="" then iterate
nf=nf+1
fileurls.nf=fix_url(gothref,baseurl,rooturl)
fileurls.nf.!img=0
leave
end /* do */
end
otherwise nop
end /* select */
goo=time('e')
if (goo-basegoo)> min(15,(0.75*liminact)) then do
rc=multi_send('<br> .... found 'nf)
if rc<0 then return 0
basegoo=goo
end /* do */
end /* search tstuff */
end /* anctype */
iurls=nf
if iurls=0 then return 0 /* no links */
/* remove duplicates */
rc=multi_send('<br> .... removing duplicates from ' iurls ' links ')
if rc<0 then return 0
okays=make_isdup(iurls)
if okays=0 then return 0
/* isdup=1 means "this is a duplicate of prior entry in this document"
use this, and list of prior hrefs and imgs, to remove duplicates */
iurls1=0 ; nimgs=imgs.0 ; nhrefs=hrefs.0
fop='<br> Check 'okays' links against (' nimgs ' & ' nhrefs ') prior links '
rc=multi_send(fop)
oo=time('e')
/* if prior exists, don't add new entry, but do augment "!nrefs" field */
do mm=1 to iurls
if isdup.mm=1 then iterate /* this is duplicated in this document, so ignore it */
if fileurls.mm.!img=1 then do /* check image list */
arf=strip(translate(fileurls.mm))
if datatype(imgs.!list.arf)='NUM' then do /* match, so don't add */
nn=imgs.!list.arf
imgs.nn.!nrefs=imgs.nn.!nrefs+1
imgs.nn.!appearin=imgs.nn.!appearin' 'the_anind
hrefs.the_anind.!imglist=hrefs.the_anind.!imglist' 'nn
iterate
end /* do */
nimgs=nimgs+1 /* no match, so add */
imgs.nimgs=fileurls.mm
imgs.!list.arf=nimgs
hrefs.the_anind.!imglist=hrefs.the_anind.!imglist' 'nimgs
end
else do /* check hrefs list */
arf=strip(translate(fileurls.mm))
if datatype(hrefs.!list.arf)='NUM' then do /* match, so don't add */
nn=hrefs.!list.arf
hrefs.nn.!nrefs=hrefs.nn.!nrefs+1 /* # of times this is referenced */
if nn<>the_anind then hrefs.nn.!appearin=hrefs.nn.!appearin' 'the_anind /* this appears in the_anind's */
if nn<>the_anind then hrefs.the_anind.!reflist=hrefs.the_anind.!reflist' 'nn
iterate
end /* do */
nhrefs=nhrefs+1 /* no match, so add */
hrefs.nhrefs=fileurls.mm
hrefs.!list.arf=nhrefs
hrefs.the_anind.!reflist=hrefs.the_anind.!reflist' 'nhrefs
end /* do */
end /* mm */
oo2=time('e')
imgs.0=nimgs ; hrefs.0=nhrefs
return okays
/* end of FINDURLS .. anind hrefs. */
/*****************/
/* extract descripiton from <head> */
fig_descript:procedure expose dscmax make_descrip stuff
parse arg a1
s2=stuff
dowrite=1
do until s2=""
parse var s2 p1 '<' tag '>' s2
/* is it a META HTTP-EQUIV or a META NAME ? */
if translate(word(tag,1))="/HEAD" then leave
if translate(word(tag,1))="META" then do
parse var tag ameta atype '=' rest
tatype=translate(atype)
if tatype="HTTP-EQUIV" | tatype="NAME" then do
parse var rest aval1 rest
REST=STRIP(REST)
aval1=strip(aval1) ;
aval1=strip(aval1,,'"')
if abbrev(translate(aval1),'DESC')<>1 then iterate
aval2=" "
foo1=ABBREV(translate(rest),'CONTENT')
if foo1>0 then do
PARSE VAR REST FOO '=' AVAL2
aval2=strip(aval2)
aval2=strip(aval2,'b','"')
url_content=LEFT(AVAL2,dscmax)
return url_content
end
end /* name or http-equiv */
end /* meta */
end /* stuff */
/* look for <h1 and <h2 headers? */
if make_descrip<>3 then return ''
if s2='' then s2=stuff /* no /head */
bb=look_Htag(s2)
return bb
/* ----------------------------------------------------------------------- */
/* Extract <hn> fields */
/* ----------------------------------------------------------------------- */
look_htag: procedure expose dscmax
parse arg stuff
amessage=""
dowrite=0
do until stuff=""
parse var stuff p1 '<' tag '>' stuff
ttag=translate(word(tag,1))
if wordpos(ttag,' H1 H2 H3 H4 TITLE')>0 THEN DO /* grab stuff */
parse var stuff amess '<' tag2 '>' stuff
amessage=amessage||amess||'<b> | </b>'
if length(amessage)>dscmax then leave
end
end
if amessage="" then do /* getting desperate -- grab any old words! */
stuff0=left(stuff,1000)
do until stuff0=""
parse var stuff0 p1 '<' tag '>' stuff0
amessage=amessage||' '||p1
if length(amessage)>dscmax then leave
end
end
return amessage
/************************/
/* make the isdup "duplicates" array */
make_isdup:procedure expose isdup. fileurls. standalone verbose
parse arg iurls
oo=time('e')
drop tmps.
okays=0
do mm=1 to iurls
a1=space(translate(fileurls.mm.!img||'_'fileurls.mm),0)
if tmps.a1=1 then do
isdup.mm=1
end
else do
isdup.mm=0
tmps.a1=1
okays=okays+1
end
end
oo2=time('e')
return okays
/****************************/
/* add baseurl if needed */
fix_url:procedure
parse arg aref,baseurl,rooturl
if abbrev(translate(aref),'HTTP://') then return aref
if abbrev(aref,'/')=0 then
aref1=baseurl||aref
else
aref1=rooturl||strip(aref,'l','/')
return aref1
/********************************/
/* set base and root */
set_base_root:
server=strip(server,,'/')
ii=lastpos('/',request)
if ii=0 then
base='http://'server'/'
else
base='http://'server'/'strip(delstr(request,ii+1),'l','/')
base2=base_element(body)
if base2<>'' then base=base2
parse var base . '//' rooturl '/' .
rooturl='http://'rooturl'/'
intro3=' <br> <em>base-url </em> = ' base ' ; root= ' rooturl
return 1
/***************/
/* return 0 for no, 1 for yes, default otherwise */
is_yes_no:procedure
parse arg aval,def
tdef=strip(translate(aval))
if wordpos(tdef,'Y YES 1')>0 then return 1
if wordpos(tdef,'N NO 0')>0 then return 0
return def
/***************/
/* check selector for match to one of the exclusion lists */
exclude_me:procedure
parse upper arg asel,alist1,alist2,ahref,aroot
alist=alist1
if abbrev(translate(ahref),aroot)=1 & alist2<>'' then
alist=alist2
do mm=1 to words(alist)
a1=strip(word(alist,mm))
oo=wild_match(asel,a1)
if oo<>0 then return 1
end
return 0
/*******************/
/* create thread_string -- tokenized string containing get_url thread */
/* this is the "thread" used to get resources from other servers */
make_get_url:
crlf='0d0a'x
dathread=' parse arg gosock,type,server,request,isauth,verbose,myqueue,transaction ' crlf ,
' signal on error name iserr ; signal on syntax name iserr ' crlf ,
' stuff=bget_url(type,server,request,isauth) ' crlf ,
' a10=transaction||stuff ' crlf ,
' foo=rxqueue('s',myqueue) ' crlf ,
' queue a10 ' crlf ,
' arf=queued() ' crlf ,
' exit " " ' crlf ,
' iserr: tt=rxqueue("g") ' crlf ,
' exit "" ' crlf ,
'bget_url:procedure expose verbose gosock ' crlf ,
'parse arg type,server,request,isauth ' crlf ,
' if verbose>4 then call pmprintf(" Bgeturl: look for "server " "request)' crlf,
'crlf="0d0a"x ; maxchar=1000000000 ' crlf ,
'httpport=80 ' crlf ,
' parse var server server ":" bport' crlf ,
'if bport<>"" then httpport=bport ' crlf ,
'family ="AF_INET" ' crlf ,
'rc=1 ;if verify(server,'1234567890.')>0 then do ' crlf ,
' rc=sockgethostbyname(strip(server), "serv.0") ' crlf ,
' end 'crlf,
'else do ' crlf ,
' serv.0addr=strip(server) ' crlf ,
'end 'crlf,
'if verbose>4 & rc=0 then call pmprintf(" rc in bgeturl= "rc)' crlf,
'if rc=0 then do ' crlf ,
' rc1=sockshutdown(gosock,2) ' crlf ,
' rc = SockClose(gosock) ' crlf ,
' return "" ' crlf ,
'end ' crlf ,
'dotserver=serv.0addr ' crlf ,
'gosaddr.0family=family ' crlf ,
'gosaddr.0port =httpport ' crlf ,
'gosaddr.0addr =dotserver ' crlf ,
'if gosock="" then ' crlf ,
' gosock = SockSocket(family, "SOCK_STREAM", 0) ' crlf ,
'request=strip(request,"l","/") ' crlf ,
' if type='HEADGET' then do ; type='GET' ; maxchar=999 ; end ' crlf ,
' if type='DSCGET' then do ; type='GET' ; maxchar=1500 ; end ' crlf ,
' message=type" /"request" HTTP/1.0"crlf"HOST: "server||crlf ' crlf ,
' message=message||"Referer: checklink@"||mehost||crlf ' crlf ,
'if isauth<>"" then message=message||"Authorization: "isauth||crlf ' crlf ,
'message=message||crlf ' crlf ,
' got="" ' crlf ,
'rc = SockConnect(gosock,"gosaddr.0") ' crlf ,
'if rc<0 then do ' crlf ,
' rc1=sockshutdown(gosock,2) ' crlf ,
' rc = SockClose(gosock) ' crlf ,
' if verbose>2 then call pmprintf(" CheckLink: error connecting to " server "=" rc) ' crlf ,
' return "" ' crlf ,
'end ' crlf ,
'rc = SockSend(gosock, message) ' crlf ,
'iok=1 ' crlf ,
'do r=1 by 1 ' crlf ,
' rc = SockRecv(gosock, "response", 1000) ' crlf ,
' got=got||response ' crlf ,
' if rc<0 then iok=0 ' crlf ,
' if rc<=0 then leave ' crlf ,
' if length(got)>maxchar then leave ' crlf ,
' end r ' crlf ,
' if iok=0 then do ' crlf ,
' rc=sockshutdown(gosock,2) ' crlf ,
' rc = SockClose(gosock) ' crlf ,
' return "" ' crlf ,
'end ' crlf ,
'rc=sockshutdown(gosock,2) ' crlf ,
'rc = SockClose(gosock) ' crlf ,
' if verbose > 2 then ' crlf ,
' call pmprintf("CheckLink: ("rc") "type"; got" length(got) "bytes of response from:" server " "request) ' crlf ,
'return got '
aa=tokenizestring(dathread,thread_string)
return
/**********************/
/* send to client, or to screen */
multi_send:procedure expose standalone verbose doing_results
parse arg a1,a2,a3,a4,a5,a6,a7
parse var standalone ss ofile ttime
ofile=strip(ofile) ; ss=strip(ss)
parse var doing_results doo1 doo2
if ss=0 then do
doo2=strip(doo2)
if doo1=2 then do
call lineout doo2,a1
return 1
end
else do
if a2='' & a3='' & a4='' then
rc=sref_multi_send(a1)
else
rc=sref_multi_send(a1,a2,a3,a4,a5,a6,a7)
return rc
end
end /* do */
/* if here, standalone mode... */
call lineout ofile,a1 /* standalone output file*/
/* if verbose>0, write to screen.. but remove <elements> */
if verbose=0 | doo=1 then return 1
aa=''
do forever
if a1='' then leave
parse var a1 t1 '<' t2 '>' a1
aa=aa||t1
if translate(t2)='LI' then aa=aa' * '
if translate(t2)='P' | translate(t2)='BR' then aa=aa||'0d0a'x
if abbrev(strip(translate(t2)),'A')=1 then aa=aa' >> '
end
aaa=''
do forever
if aa=' ' then leave
parse var aa a1 ' ' aa
aaa=aaa' 'a1
end /* do */
say aaa
return 1
/*********************/
/* standalone mode */
ask_opts:
SIGNAL OFF ERROR ; SIGNAL OFF SYNTAX
SIGNAL ON ERROR NAME ASKV
SIGNAL ON SYNTAX NAME ASKV
ansion=checkansi()
if ansion=1 then do
aesc='1B'x
cy_ye=aesc||'[37;46;m'
normal=aesc||'[0;m'
bold=aesc||'[1;m'
re_wh=aesc||'[31;47;m'
reverse=aesc||'[7;m'
end
else do
say " Warning: Could not detect ANSI.... output will look ugly ! "
cy_ye="" ; normal="" ; bold="" ;re_wh="" ;
reverse=""
end /* Do */
cls
say " " ; say
call lineout, bold cy_ye
call lineout, "CheckLink: check & verify HTML links -- stand alone mode "
call lineout, normal
say " Although designed primarily as an SRE-http addon, you can use CheckLink "
say " in a stand-alone mode."
say
say " "||cy_ye||"CheckLink's standalone-mode's final results are written to an "normal
say " "||cy_ye||"HTML file. You should view it with a browser (using Open File). "normal
call lineout, normal
say " "
if yesno(" Are you ready to continue ")=1 then
nop
else do
say " See you later?.. "
exit
end
say
say " We recommend reading the documentation (CHEKLINK.DOC) before "
say " running this program. On the other hand, you can always learn by "
say " making mistakes .... "
say
say "Enter a fully specified "bold "starter-URL: " normal
call charout, bold " ? "normal
parse pull aurl
call charout,bold' Web-tree name:'normal
parse pull treename
isauth=''
call charout, " A (space seperated) "bold "USERNAME PASSWORD "normal " (ENTER=None):"
parse pull upwd
if upwd<>' ' then do
upwd=space(strip(upwd))
upwd=mk_base64(translate(upwd,':',' '))
isauth='Authorization: Basic '||upwd
end
call charout, " URL pointing to " bold " CHEKLINK.HTM "normal ":"
parse pull cheklink_htm
queryonly=0
baseonly=yesno(" Only read documents in or under this starter-URL ",'Y')
if baseonly=1 then do
queryonly=yesno(normal" ..."reverse"only look at this URL (query links, but do NOT recurse)",'Y')
end /* do */
siteonly=yesno(' Query resources on other sites (N=do not)','Y')
siteonly=1-siteonly
call charout,' Make descriptions (1=NO, 2=text/html, 3=html and plaintext):'
pull maked
call charout , "Exclusion list (ENTER="bold'!* *?* *MAPIMAGE/* CGI*'normal'): '
parse pull exclus
if exclus='' then exclus='!* *?* *MAPIMAGE/* CGI*'
do forever
call charout,' Select output tables (?=Help,ENTER='bold'ALL'normal'): '
pull outtype
if outtype='' then outtype='ALL'
if outtype='?' then do
say cy_ye" Valid codes for output tables"normal' (you can use them in any combination): 'normal
say ' 'bold'OK'normal' ) Display succesfully found links'
say ' 'bold'NOSITE'normal' ) Display links to unreachable sites'
say ' 'bold'NOURL'normal' ) Display links missing resources>'
say ' 'bold'OFFSITE'normal' ) Display links to off-site URLs'
say ' 'bold'EXCLUDED'normal' ) Display links to excluded URLs (specified in the EXCLUSION_LIST)'
say ' 'bold'ALL'normal' ) Display all links'
iterate
end /* do */
leave
end
myqueue='CHEKLINK_STD1'
foo=rxqueue('C',myqueue)
if foo<>myqueue then aa=rxqueue('d',foo)
servername=get_hostname()
say "The output file will be an HTML document containing the tables of results."
do forever
call charout, 'Output file: '
pull outfileX
if outfilex='' then iterate
if pos('.',outfilex)=0 then outfilex=outfilex'.htm'
if stream(outfileX,'c','query exists')<>' ' then do
goo=yesno(' File exists. Overwrite? ')
if goo=1 then do
goo=sysfiledelete(outfilex)
leave
end
iterate
end
oo=stream(outfileX,'c','open write')
if abbrev(translate(oo),'READY')=1 then leave
say "Can't open file, try a different name"
end
say
say 'The "links" file stores information on what links appear in the HTML'
say 'documents, and what HTML documents each resource "appears in"'
say " (note: the links file will be stored in: " linkfile_dir ')'
do forever
call charout, ' Links file, name only (0=do not create): '
pull linkfile
if linkfile=0 then leave
outfilel=linkfile_dir||linkfile'.STM'
if pos('.',outfilel)=0 then outfilel=outfilel'.STM'
if stream(outfilel,'c','query exists')<>' ' then do
goo=yesno(' File exists. Overwrite? ')
if goo=1 then do
goo=sysfiledelete(outfilel)
leave
end
say " Using: " outfilel
ascgi=yesno(" Use CGI-BIN to specify CHEKLNK2 (web traversal) links ")
iterate
end
oo=stream(outfilel,'c','open write')
if abbrev(translate(oo),'READY')=1 then do
say " Using: " outfilel
ascgi=yesno(" Use CGI-BIN to specify CHEKLNK2 (web traversal) links ")
oo=stream(outfilel,'c','close')
leave
end
say "Can't open " outfilel ", try a different name"
end
treename=translate(treename,'+',' ')
list='url='aurl'&baseonly='baseonly'&siteonly='siteonly'&exclus='exclus'&outtype='outtype|| ,
'&queryonly='queryonly'&linkfile='linkfile'&treename='treename'&make_descrip='maked
verbose=standalone_verbose
transaction=(10*dospid())+dostid()
standalone=1' 'outfilex
return list
/* ------------------------------------------------------------------ */
/* function: Check if ANSI is activated */
/* */
/* call: CheckAnsi */
/* */
/* where: - */
/* */
/* returns: 1 - ANSI support detected */
/* 0 - no ANSI support available */
/* -1 - error detecting ansi */
/* */
/* note: Tested with the German and the US version of OS/2 3.0 */
/* */
/* */
CheckAnsi: PROCEDURE
thisRC = -1
trace off
/* install a local error handler */
SIGNAL ON ERROR Name InitAnsiEnd
"@ANSI 2>NUL | rxqueue 2>NUL"
thisRC = 0
do while queued() <> 0
queueLine = lineIN( "QUEUE:" )
if pos( " on.", queueLine ) <> 0 | , /* USA */
pos( " (ON).", queueLine ) <> 0 then /* GER */
thisRC = 1
end /* do while queued() <> 0 */
InitAnsiEnd:
signal off error
RETURN thisRC
/* -------------------- */
/* get a yes or no , return 1 if yes */
yesno:procedure expose normal reverse bold
parse arg fooa , allopt,altans
if altans<>" " & words(altans)>1 then do
w1=strip(word(altans,1))
w2=strip(word(altans,2))
a1=left(w1,1) ; a2=left(w2,1)
a1a=substr(w1,2) ; a2a=substr(w2,2)
end
else do
a1='Y' ; a1a='es'
a2='N' ; a2a='o'
end /* Do */
ayn=' '||bold||a1||normal||a1a||'\'||bold||a2||normal||a2a
if allopt=1 then ayn=ayn||'\'||bold||'A'||normal||'ll'
do forever
foo1=normal||reverse||fooa||normal||ayn
call charout, foo1 normal ':'
pull anans
if abbrev(anans,a1)=1 then return 1
if abbrev(anans,a2)=1 then return 0
if allopt=1 & abbrev(anans,'A')=1 then return 2
end
nocon:
if rc=-7 then return 0
exit 0
/* get the hostname (aa.bb.cc) for this machine */
get_hostname: procedure
do queued(); pull .; end /* flush */
address cmd '@hostname | rxqueue'
parse pull hostname
return hostname
/*********/
packur2:procedure expose standalone
parse arg a1b0
if standalone=0 then
return packur(translate(a1b0,' ','+'))
else
return decodekeyval(translate(a1b0,' ','+'))
/************************************************/
/* procedure from TEST-CGI.CMD by Frankie Fan <kfan@netcom.com> 7/11/94 */
DecodeKeyVal: procedure
parse arg Code
Text=''
Code=translate(Code, ' ', '+')
rest='%'
do while (rest\='')
Parse var Code T '%' rest
Text=Text || T
if (rest\='' ) then
do
ch = left( rest,2)
if verify(ch,'01234567890ABCDEF')=0 then
c=X2C(ch)
else
c=ch
Text=Text || c
Code=substr( rest, 3)
end
end
return Text
/************************/
/* extract */
extract2:procedure expose standalone
parse upper arg aa
if standalone=0 then do
foo=extract(aa)
return foo
end
select
when aa='TRANSACTION' then return (10*dospid())+dostid()
when aa='LIMITTIMEINACTIVE' then return 20
otherwise return 0
end
/************************/
/* wild card match, with comparision against prior wild card match */
/* needle : what to look for
haytack : what to compare it to. Haystack may contain numerous * wildcard
characters
oldresu : prior return from sref_wild_match; or empty.
Return (depends on oldresu):
If needle is exact match to haystack: return -1
If needle does not match haystack (even with wild card checking) : return 0
If needle wildcard matches haystack, and oldresu='': returns match information
If needle wildcard matches haystack, and if oldresu<>'' (is a prior
return from sref_wild_match), then the current match is compared to
this oldresu. If the current match is "better" (has more matching
characters early in the string), then : return match info
If it's worse (or the same): return 0
Basically, -1 means "exact match", 0 means "no match" or "not better match"
(if oldresu not specified, 0 always means "no match"), and everything else
means "wild card match".
*/
wild_match:procedure
parse upper arg needle, haystack,oldresu
aresu=awild_match(needle,haystack)
if aresu=0 then return aresu /* no match */
if aresu=-1 | oldresu=' ' then return aresu /* exact match, or first wildcard match */
/* Is this a better WILDCARD MATCH */
wrdsnew=words(ARESU);wrdsold=words(oldRESU)
useold=1
do Nmm=1 to max(wrdsold,wrdsnew)
if Nmm>wrdsnew then leave
if Nmm>wrdsold then do
useold=0; leave
end
a1=strip(word(oldresu,Nmm))
a2=strip(word(aresu,Nmm))
if a1=a2 then iterate
if a2>a1 then leave /* new matching element > old matching element, thus new is worse match */
useold=0 /* found a matching element in new < then corresponding element in old*/
leave /* thus, new is better match */
end
IF USEold=0 THEN return aresu
return 0 /* non superior match (might be same, in which case old is used*/
awild_match:procedure
parse upper arg needle, haystack ; haystack=strip(haystack)
needle=strip(needle)
if needle=haystack then return -1 /* -1 signals exact match */
ast1=pos('*',haystack)
if ast1=0 then return 0 /* 0 means no match */
if haystack='*' then do
if length(needle)=0 then
return 100000
else
return length(needle)
end
ff=haystack
ii=0
do until ff=""
ii=ii+1
parse var ff hw.ii '*' ff
hw.ii=strip(hw.ii)
end
if hw.ii='' then ii=ii-1
hw.0=ii
/* check each component of haystackw against needle -- all components
must be there */
resu=' '
istart=1 ; ido=2
if ast1>1 then do /* first check abbrev */
if abbrev(needle,hw.1)=0 then return 0
aresu=length(hw.1)
if hw.0=1 then do
do nm=1 to aresu
resu=resu||' '||nm
end /* do */
return resu /* if haystacy of form abc*, we have a match */
end
ido=2 ; istart=aresu+1
do mm=1 to aresu
resu=resu||' '||mm
end /* do */
end
/* if here, then first part (a non wildcard) of haystack matches first
part of needle
Now check sequentially that each remaining part also exists
*/
do mm=ido to hw.0
igoo=pos(hw.mm,needle,istart)
if igoo=0 then return 0
tres=length(hw.mm)
istart=igoo+tres
do nn=igoo to (istart-1)
resu=resu||' '||nn
end /* do */
end
if istart >= length(needle) | right(haystack,1)='*' then
return resu
return 0
/************/
/* create a base64 packing of a message */
mk_base64:procedure
do mm=0 to 25 /* set base 64 encoding keys */
a.mm=d2c(65+mm)
end /* do */
do mm=26 to 51
a.mm=d2c(97+mm-26)
end /* do */
do mm=52 to 61
a.mm=d2c(48+mm-52)
end /* do */
a.62='+'
a.63='/'
parse arg mess
s2=x2b(c2x(mess))
ith=0
do forever
ith=ith+1
a1=substr(s2,1,6,0)
ms.ith=x2d(b2x(a1))
if length(s2)<7 then leave
s2=substr(s2,7)
end /* do */
pint=""
do kk=1 to ith
oi=ms.kk ; pint=pint||a.oi
end /* do */
j1=length(pint)//4
if j1<>0 then pint=pint||copies('=',4-j1)
return pint
/************/
/* <BR>eak a long url (for use in cell of table as target of link)
alen -- max width (between <BR>
nosn -- strip out http://xxx.yy/ portion
*/
breakup:procedure
parse arg aword,alen,homesite
parse upper var homesite . '//' homesite '/' .
homesite=translate(homesite)
parse var aword . '//' aword
nosn=0
if homesite<>'' then do
if abbrev(translate(aword),homesite)=1 then nosn=1
end /* do */
if nosn=1 then do
parse var aword '/' aword
if length(aword)<=alen then return '/'aword
asn='' ; req='/'aword
end /* do */
else do
if length(aword)<=alen then return aword
parse var aword asn '/' req ; asn=asn'/<br>'
end
parse var req rq '?' opts
if length(rq)>alen then rq=left(rq,alen)||'...<br>'
if length(opts)>alen then opts=left(opts,alen)'...'
if opts<>'' then rq=rq'?'
return asn||rq||opts