home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Dream 52
/
Amiga_Dream_52.iso
/
Linux
/
Magazine
/
wwwoffle-2.1.tar.gz
/
wwwoffle-2.1
/
refresh.c
< prev
next >
Wrap
C/C++ Source or Header
|
1998-02-23
|
20KB
|
775 lines
/***************************************
$Header: /home/amb/wwwoffle/RCS/refresh.c 2.24 1998/02/23 20:10:20 amb Exp $
WWWOFFLE - World Wide Web Offline Explorer - Version 2.1.
The HTML interactive page to refresh a URL.
******************/ /******************
Written by Andrew M. Bishop
This file Copyright 1997,98 Andrew M. Bishop
It may be distributed under the GNU Public License, version 2, or
any higher version. See section COPYING of the GNU Public license
for conditions under which this file may be redistributed.
***************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "wwwoffle.h"
#include "misc.h"
#include "config.h"
#include "sockets.h"
#include "errors.h"
static void RefreshFormPage(int fd,char *args);
static int RefreshFormParse(int fd,char *request_body,int is_online);
static void RefreshFormError(int fd,char *body);
static int RefreshFormRecursive(int fd,char *url,char *method);
static void IllegalRefreshPage(int fd,char *path);
static int RecurseFetchPages(int fd,char *url,char *method);
/*++++++++++++++++++++++++++++++++++++++
Send to the client a page to allow refreshes using HTML.
char *RefreshPage Returns a modified URLs for a simple refresh.
int fd The file descriptor of the client.
char *path The path that was specified by the user.
char *args The argument that was appended to the URL.
char *url The url that was requested.
char *request_body A pointer to the HTTP request sent by the browser.
int is_online Set to true if we are online.
++++++++++++++++++++++++++++++++++++++*/
char *RefreshPage(int fd,char *path,char *args,char *url,char *request_body,int is_online)
{
char *newurl=NULL;
if(!strcmp("/refresh/",path))
RefreshFormPage(fd,args);
else if(!strcmp("/refresh-request/",path))
{
if(RefreshFormParse(fd,request_body,is_online))
newurl=(char*)1;
}
else if(!strncmp("/refresh/",path,9))
{
char *p,*s,*copy=(char*)malloc(strlen(url)+1);
p=strstr(url,"/refresh/")+9;
s=strchr(p,'/');
if(!s)
{IllegalRefreshPage(fd,path);return((char*)1);}
*s++=0;
strcpy(copy,p);
strcat(copy,"://");
strcat(copy,s);
newurl=copy;
*--s='/';
}
else
{
char *copy=(char*)malloc(strlen(path)+1);
char *slash,*s;
strcpy(copy,path);
slash=strchr(copy+1,'/');
if(!slash)
{IllegalRefreshPage(fd,path);return((char*)1);}
*slash++=0;
s=strchr(slash,'/');
if(!s)
{IllegalRefreshPage(fd,path);return((char*)1);}
*s++=0;
if(is_online)
{
char *url=(char*)malloc(strlen(slash)+strlen(s)+(args?strlen(args):0)+8);
if(args)
sprintf(url,"%s://%s?%s",slash,s,args);
else
sprintf(url,"%s://%s",slash,s);
if(RecurseFetchPages(fd,url,copy))
newurl=(char*)1;
free(url);
}
else
{
URL *Url=SplitURL(url);
if(Url->Protocol)
{
char *new_request=RequestURL(Url->name,NULL);
int new_outgoing=OpenOutgoingSpoolFile(0);
if(new_outgoing==-1)
PrintMessage(Warning,"Cannot open the new outgoing request to write.");
else
{
write_string(new_outgoing,new_request);
CloseOutgoingSpoolFile(new_outgoing,Url);
}
free(new_request);
}
FreeURL(Url);
}
free(copy);
}
return(newurl);
}
/*++++++++++++++++++++++++++++++++++++++
The form that the user enters the details on.
int fd The file descriptor.
char *args The arguments that were on the request for this URL.
++++++++++++++++++++++++++++++++++++++*/
static void RefreshFormPage(int fd,char *args)
{
char *head=
"HTTP/1.0 200 WWWOFFLE Refresh Form\r\n"
"Content-type: text/html\r\n"
"\r\n"
"<HTML>\n"
"<HEAD>\n"
"<TITLE>WWWOFFLE - Interactive Refresh Form</TITLE>\n"
"</HEAD>\n"
"<BODY>\n"
"<H1 align=center>WWWOFFLE Interactive Refresh Form</H1>\n"
"You can use this form to refresh or fetch any URL, either a single one or by following links recursively.\n"
"<p>\n"
"<form action=\"/refresh-request/\" method=post>\n"
"Fetch <select name=\"method\">\n"
"<option value=\"-none\" selected>this URL only\n"
"<option value=\"-dir-1\" >recursively to depth 1 in the same directory\n"
"<option value=\"-dir-2\" >recursively to depth 2 in the same directory\n"
"<option value=\"-dir-3\" >recursively to depth 3 in the same directory\n"
"<option value=\"-dir-4\" >recursively to depth 4 in the same directory\n"
"<option value=\"-dir-5\" >recursively to depth 5 in the same directory\n"
"<option value=\"-host-1\" >recursively to depth 1 on the same host\n"
"<option value=\"-host-2\" >recursively to depth 2 on the same host\n"
"<option value=\"-host-3\" >recursively to depth 3 on the same host\n"
"<option value=\"-host-4\" >recursively to depth 4 on the same host\n"
"<option value=\"-any-1\" >recursively to depth 1 on any host\n"
"<option value=\"-any-2\" >recursively to depth 2 on any host\n"
"<option value=\"-any-3\" >recursively to depth 3 on any host\n"
"</select>\n"
"<br>\n"
"<input type=\"text\" name=\"url\" value=\"";
char *middle1=
"\"size=60>\n"
"<br>\n"
"<input name=\"images\" type=\"checkbox\" value=\"-images\" ";
char *middle2=
">Fetch images in the pages\n"
"<br>\n"
"<input name=\"frames\" type=\"checkbox\" value=\"-frames\" ";
char *tail1=
">Fetch frames in the pages\n"
"<br>\n"
"<input name=\"force\" type=\"checkbox\" value=\"-force\">Force refresh even if already cached"
"<br>\n"
"<input type=\"submit\" value=\"Fetch Now\">\n"
"</form>\n"
"<p>\n"
"Notes:\n"
"<ol>\n"
"<li>The default protocol is http if none is specified.\n"
"<li>To get a directory listing using ftp make sure that the path ends with '/'.\n"
"<li>To finger user@remote.host you should enter the URL as finger://remote.host/user.\n"
"</ol>\n";
char *tail2=
"</BODY>\n"
"</HTML>\n";
write_string(fd,head);
if(args)
{
char *decargs=UrlDecode(args,0);
write_string(fd,decargs);
free(decargs);
}
write_string(fd,middle1);
if(FetchImages)
write_string(fd,"checked");
write_string(fd,middle2);
if(FetchFrames)
write_string(fd,"checked");
write_string(fd,tail1);
if(!args)
write_string(fd,"<p align=center>[<a href=\"/\">Back to the Welcome page</a>]</p>\n");
write_string(fd,tail2);
}
/*++++++++++++++++++++++++++++++++++++++
Parse the reply from the form.
int RefreshFormParse Returns a true value if there are more pages to get.
int fd The file descriptor of the client.
char *request_body The body of the HTTP request sent by the browser.
int is_online Set to true if we are online.
++++++++++++++++++++++++++++++++++++++*/
static int RefreshFormParse(int fd,char *request_body,int is_online)
{
int i;
int more=0;
char *copy,*url=NULL,*method=NULL,*total_method,*images="",*frames="",*force="";
URL *Url;
if(!request_body)
{
RefreshFormError(fd,NULL);
return(0);
}
copy=(char*)malloc(strlen(request_body)+1);
strcpy(copy,request_body);
for(i=0;copy[i];i++)
{
if(i!=0 && copy[i-1]=='&')
copy[i-1]=0;
if(i==0 || copy[i-1]==0)
{
if(!strncmp("method=",©[i],7))
method=©[i+7];
if(!strncmp("images=",©[i],7))
images=©[i+7];
if(!strncmp("frames=",©[i],7))
frames=©[i+7];
if(!strncmp("force=",©[i],6))
force=©[i+6];
if(!strncmp("url=",©[i],4))
url=©[i+4];
}
}
if(url==NULL || *url==0 || method==NULL)
{
RefreshFormError(fd,request_body);
free(copy);
return(0);
}
url=UrlDecode(url,1);
Url=SplitURL(url);
total_method=(char*)malloc(strlen(request_body));
strcpy(total_method,"refresh");
strcat(total_method,method);
strcat(total_method,images);
strcat(total_method,frames);
strcat(total_method,force);
if(is_online)
more=RefreshFormRecursive(fd,Url->name,total_method);
else
{
URL *new_Url;
char *new_url;
new_url=(char*)malloc(strlen(request_body)+16);
sprintf(new_url,"/%s/%s/%s",total_method,Url->proto,Url->hostp);
new_Url=SplitURL(new_url);
if(new_Url->Protocol)
{
char *new_request=RequestURL(new_Url->name,NULL);
int new_outgoing=OpenOutgoingSpoolFile(0);
if(new_outgoing==-1)
PrintMessage(Warning,"Cannot open the new outgoing request to write.");
else
{
write_string(new_outgoing,new_request);
CloseOutgoingSpoolFile(new_outgoing,new_Url);
WillGetURL(fd,new_Url,0);
}
free(new_request);
free(new_url);
}
FreeURL(new_Url);
}
free(total_method);
free(copy);
FreeURL(Url);
return(more);
}
/*++++++++++++++++++++++++++++++++++++++
An error with the form.
int fd The file descriptor.
char *body The browser reply that the user entered.
++++++++++++++++++++++++++++++++++++++*/
static void RefreshFormError(int fd,char *body)
{
char *head=
"HTTP/1.0 404 WWWOFFLE Refresh Form Error\r\n"
"Content-type: text/html\r\n"
"\r\n"
"<HTML>\n"
"<HEAD>\n"
"<TITLE>WWWOFFLE - Interactive Refresh Form Error</TITLE>\n"
"</HEAD>\n"
"<BODY>\n"
"<H1 align=center>WWWOFFLE Interactive Refresh Form Error</H1>\n"
"<p align=center>\n";
char *middle1=
"The reply from the form that your browser sent did not have a body.\n";
char *middle2=
"The reply from the form that your browser sent\n"
"<br><b><tt>\n";
char *middle3=
"\n"
"</tt></b><br>\n"
"had an error and could not be parsed.\n";
char *tail=
"<p align=center>[<a href=\"/refresh/\">Back to the Refresh page</a>]</p>"
"</BODY>\n"
"</HTML>\n";
write_string(fd,head);
if(!body)
write_string(fd,middle1);
else
{
write_string(fd,middle2);
write_string(fd,body);
write_string(fd,middle3);
}
write_string(fd,tail);
}
/*++++++++++++++++++++++++++++++++++++++
Fetch pages recursively, and show progress.
int RefreshFormRecursive Returns a true value if there are more pages to get.
int fd The file descriptor to write into.
char *url The URL to fetch.
char *method The method to use.
++++++++++++++++++++++++++++++++++++++*/
static int RefreshFormRecursive(int fd,char *url,char *method)
{
int more;
char *head=
"HTTP/1.0 200 WWWOFFLE Refresh Recursive Page\r\n"
"Content-type: text/html\r\n"
"\r\n"
"<HTML>\n"
"<HEAD>\n"
"<TITLE>WWWOFFLE - Refresh Recursive Page</TITLE>\n"
"</HEAD>\n"
"<BODY>\n"
"<H1 align=center>WWWOFFLE Refresh Recursive Page</H1>\n"
"Your requested URL\n"
"<br><b><tt>\n";
char *middle=
"\n"
"</tt></b><br>\n"
"and the links to the specified depth are being fetched in the background.\n"
"<pre>\n";
char *tail=
"</pre>\n"
"<p align=center>[<a href=\"/refresh/\">Back to the Refresh page</a>]</p>"
"</BODY>\n"
"</HTML>\n";
write_string(fd,head);
write_string(fd,url);
write_string(fd,middle);
more=RecurseFetchPages(fd,url,method);
write_string(fd,tail);
return(more);
}
/*++++++++++++++++++++++++++++++++++++++
Inform the user that the specified refresh page is illegal.
int fd The file descriptor to write to.
char *path The specified path.
++++++++++++++++++++++++++++++++++++++*/
static void IllegalRefreshPage(int fd,char *path)
{
char *head=
"HTTP/1.0 404 WWWOFFLE Illegal Refresh Page\r\n"
"Content-type: text/html\r\n"
"\r\n"
"<HTML>\n"
"<HEAD>\n"
"<TITLE>WWWOFFLE - Illegal Interactive Refresh Page</TITLE>\n"
"</HEAD>\n"
"<BODY>\n"
"<H1 align=center>WWWOFFLE Illegal Interactive Refresh Page</H1>\n"
"<p align=center>\n"
"Your request for the refresh URL\n"
"<br><b><tt>\n";
char *tail=
"\n"
"</tt></b><br>\n"
"is illegal, select the link below for the main interactive refresh page.\n"
"<br>\n"
"<a href=\"/refresh/\">/refresh/</a>\n"
"</BODY>\n"
"</HTML>\n";
write_string(fd,head);
write_formatted(fd,"/%s",path);
write_string(fd,tail);
}
/*++++++++++++++++++++++++++++++++++++++
Fetch pages recursively.
int RecurseFetchPages Returns a true value if there are more pages to get.
int fd The file descriptor to output to (mode==Real).
char *url The url to start at.
char *method The method to use, encoding the depth and other options.
++++++++++++++++++++++++++++++++++++++*/
static int RecurseFetchPages(int fd,char *url,char *method)
{
int recursive_depth=0,recursive_mode=0,images=0,frames=0,force=0;
int more=0,status;
char *buffer;
char *dash;
int socket;
int parsed=0;
URL *pageUrl;
char *request,*line;
char **list;
int j;
char *copy=(char*)malloc(strlen(method)+1);
strcpy(copy,method);
PrintMessage(Debug,"Refresh method='%s'",method);
if(*copy=='-')
copy++;
do
{
if((dash=strchr(copy,'-')))
*dash=0;
if(!strcmp(copy,"refresh"))
;
else if(!strcmp(copy,"none"))
;
else if(!strcmp(copy,"dir"))
recursive_mode=1;
else if(!strcmp(copy,"host"))
recursive_mode=2;
else if(!strcmp(copy,"any"))
recursive_mode=3;
else if(!strcmp(copy,"images"))
images=1;
else if(!strcmp(copy,"frames"))
frames=1;
else if(!strcmp(copy,"force"))
force=1;
else if(atoi(copy))
recursive_depth=atoi(copy);
copy=dash+1;
}
while(dash);
pageUrl=SplitURL(url);
/* Get the page */
socket=OpenClientSocket("localhost",HTTP_Port);
init_buffer(socket);
if(socket==-1)
{PrintMessage(Warning,"Cannot open connection to wwwoffle proxy.");return(0);}
if(fd>=0)
write_formatted(fd,"Getting %s [%s]\n",pageUrl->name,method);
request=RequestURL(pageUrl->name,NULL);
if(force)
{
char *copy=(char*)malloc(strlen(request)+24);
char *eol=strchr(request,'\n');
*eol=0;eol++;
strcpy(copy,request);
strcat(copy,"\nPragma: no-cache\r\n");
strcat(copy,eol);
free(request);
request=copy;
}
write_string(socket,request);
line=read_line_or_timeout(socket,NULL);
if(sscanf(line,"%*s %d",&status)!=1)
status=404;
parsed=(status>=200 && status<400) && ParseHTML(socket,pageUrl,0);
buffer=(char*)malloc(257);
while(read_data(socket,buffer,256)>0);
free(line);
free(request);
free(buffer);
if(images && parsed && (list=ListImages()))
for(j=0;list[j];j++)
{
URL *imageUrl=SplitURL(list[j]);
if(imageUrl->local)
{FreeURL(imageUrl);continue;}
if(imageUrl->Protocol)
{
char *new_request=RequestURL(imageUrl->name,pageUrl->name);
int new_outgoing=OpenOutgoingSpoolFile(0);
PrintMessage(Debug,"Image=%s",imageUrl->name);
if(new_outgoing==-1)
PrintMessage(Warning,"Cannot open the new outgoing request to write.");
else
{
write_string(new_outgoing,new_request);
CloseOutgoingSpoolFile(new_outgoing,imageUrl);
more=1;
}
free(new_request);
}
FreeURL(imageUrl);
}
if(frames && parsed && (list=ListFrames()))
for(j=0;list[j];j++)
{
char *refresh;
int recurse=1;
URL *frameUrl=SplitURL(list[j]);
if(frameUrl->local)
{FreeURL(frameUrl);continue;}
PrintMessage(Debug,"Frame=%s",frameUrl->name);
if(recursive_mode!=3)
{
if(strcmp(pageUrl->host,frameUrl->host))
recurse=0;
else
if(recursive_mode!=2)
{
char *end=pageUrl->path+strlen(pageUrl->path);
while(end>pageUrl->path)
if(*end=='/')
break;
else
end--;
if(*end)
*++end=0;
if(end!=pageUrl->path && strncmp(pageUrl->path,frameUrl->path,end-pageUrl->path))
recurse=0;
}
}
if(recurse)
{
refresh=(char*)malloc(strlen(frameUrl->name)+64);
strcpy(refresh,"/refresh");
if(images)
strcat(refresh,"-images");
if(frames)
strcat(refresh,"-frames");
if(force)
strcat(refresh,"-force");
if(recursive_depth)
{
if(recursive_mode==1)
strcat(refresh,"-dir");
else if(recursive_mode==2)
strcat(refresh,"-host");
else /* recursive_mode==3 */
strcat(refresh,"-any");
sprintf(&refresh[strlen(refresh)],"-%d",recursive_depth);
}
if(!images && !frames && !recursive_depth && !force)
strcat(refresh,"-none");
strcat(refresh,"/");
strcat(refresh,frameUrl->proto);
strcat(refresh,"/");
strcat(refresh,frameUrl->hostp);
}
else
refresh=frameUrl->hostp;
if(frameUrl->Protocol)
{
char *new_request=RequestURL(refresh,pageUrl->name);
int new_outgoing=OpenOutgoingSpoolFile(0);
if(new_outgoing==-1)
PrintMessage(Warning,"Cannot open the new outgoing request to write.");
else
{
write_string(new_outgoing,new_request);
CloseOutgoingSpoolFile(new_outgoing,frameUrl);
more=1;
}
free(new_request);
}
if(refresh!=list[j])
free(refresh);
FreeURL(frameUrl);
}
if(recursive_depth && parsed && (list=ListLinks()))
for(j=0;list[j];j++)
{
char *refresh;
URL *linkUrl=SplitURL(list[j]);
if(linkUrl->local)
{FreeURL(linkUrl);continue;}
PrintMessage(Debug,"Link=%s",linkUrl->name);
if(recursive_mode!=3)
{
if(strcmp(pageUrl->host,linkUrl->host))
{FreeURL(linkUrl);continue;}
else
if(recursive_mode!=2)
{
char *end=pageUrl->path+strlen(pageUrl->path);
while(end>pageUrl->path)
if(*end=='/')
break;
else
end--;
if(*end)
*++end=0;
if(end!=pageUrl->path && strncmp(pageUrl->path,linkUrl->path,end-pageUrl->path))
{FreeURL(linkUrl);continue;}
}
}
if(IsNotGotRecursive(linkUrl->proto,linkUrl->host,linkUrl->path))
{FreeURL(linkUrl);continue;}
refresh=(char*)malloc(strlen(linkUrl->name)+64);
strcpy(refresh,"/refresh");
if(images)
strcat(refresh,"-images");
if(frames)
strcat(refresh,"-frames");
if(force)
strcat(refresh,"-force");
if(recursive_depth-1)
{
if(recursive_mode==1)
strcat(refresh,"-dir");
else if(recursive_mode==2)
strcat(refresh,"-host");
else /* recursive_mode==3 */
strcat(refresh,"-any");
sprintf(&refresh[strlen(refresh)],"-%d",recursive_depth-1);
}
if(!images && !frames && !(recursive_depth-1) && !force)
strcat(refresh,"-none");
strcat(refresh,"/");
strcat(refresh,linkUrl->proto);
strcat(refresh,"/");
strcat(refresh,linkUrl->hostp);
if(linkUrl->Protocol)
{
char *new_request=RequestURL(refresh,pageUrl->name);
int new_outgoing=OpenOutgoingSpoolFile(0);
if(new_outgoing==-1)
PrintMessage(Warning,"Cannot open the new outgoing request to write.");
else
{
write_string(new_outgoing,new_request);
CloseOutgoingSpoolFile(new_outgoing,linkUrl);
more=1;
}
free(new_request);
}
free(refresh);
FreeURL(linkUrl);
}
FreeURL(pageUrl);
return(more);
}