home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ARM Club 3
/
TheARMClub_PDCD3.iso
/
programs
/
comms_networking
/
dehtml
/
!DeHTML
/
DeHTML
(
.txt
)
< prev
next >
Wrap
RISC OS BBC BASIC V Source
|
1996-06-26
|
8KB
|
404 lines
> DeHTML
by Philip Banks
convert(line_len%)
in_handle%,out_handle%,line$,writeflag%,indent%,pre_flag%
paragraphflag%,word$,word%,currentchar$,pos%,split%,stop%
list_stack%, lstack%
list_stack%(100,1)
lstack%=0
find(start_file$)
"OS_File",11,end_file$,&FFF
in_handle%=
(start_file$)
out_handle%=
(end_file$)
writeflag%=
indent%=0
pre_flag%=
paragraphflag%=
line$=""
last_line$=" "
indent%=0
#in_handle%)
word$=""
word%=
) currentchar$=
#in_handle%)
B
(currentchar$=" ")
(currentchar$)<32)
word%=
!
currentchar$="<"
L
decode_html(line$, word$, in_handle%, out_handle%, indent%)
word%=
!#
(currentchar$="&")
":
decode_ampersand(currentchar$,in_handle%)
#8
currentchar$="<"
word$+=currentchar$
%D
(word%=
(currentchar$<>"<")
word$+=currentchar$
&2
#in_handle%
word%=
:writeflag%=
word%=
((
word$<>""
line$+=word$+" "
).
(line$)>line_len%)
(writeflag%=
*
(line$)<line_len%
+9
output_line(last_line$, line$, out_handle%)
pos%=
(line$)
split%=0
stop%=
pos%-=1
2>
line$,pos%,1) = " ")
(pos%<77)
stop%=
3#
(pos%=0)
(stop%=
4B
output_line(last_line$,
line$,pos%-1), out_handle%)
59 line$=
indent(indent%)+
line$,
(line$)-pos%)
#in_handle%
#out_handle%
autoquit%
finished%=
nonspace_check(string$)
non_space%, index%
index% = 1
non_space% =
non_space%)
(index% <=
(string$))
C=
string$,index%,1)<>" "
non_space% =
index%+=1
=non_space%
output_line(
last$, lin$, out%)
nonspace_check(lin$)=
lin$ = ""
lin$=""
last$=""
#out%, lin$
last$=lin$
decode_html(
line$,
word$, in_file%, out_file%,
indent%)
currentchar$,token$,fred$,upper_token$
token$=""
currentchar$=""
token$+=currentchar$
V$ currentchar$=
#in_file%)
W0
(currentchar$=">")
(currentchar$=" ")
X' upper_token$=
upper_case(token$)
upper_token$
ZA
"BR":
output_line(last_line$, line$+word$, out_file%)
[# line$=
indent(indent%)
word$=""
^/
"TITLE":
find_end_bracket(in_file%)
`;
"HR":
output_line(last_line$, line$, out_file%)
a:
output_line(last_line$,
76,"-"), out_file%)
word$=""
c# line$=
indent(indent%)
e@
"P":
output_line(last_line$, line$+word$, out_file%)
f3
output_line(last_line$,"", out_file%)
word$=""
h) line$=
indent(indent%)+" "
"H1":
currentchar$=""
m# line$+=currentchar$
n, currentchar$=
#in_handle%)
currentchar$="<"
p5
output_line(last_line$,line$,out_file%)
pos%=0
r#
line$,pos%,1)<=" "
pos%+=1
u4
pos%<>0
#out_handle%,
pos%-1," ");
vE
output_line(last_line$,
(line$)-pos%,"="), out_file%)
x, currentchar$=
#in_handle%)
currentchar$=">"
z# line$=
indent(indent%)
"PRE"
}7
output_line(last_line$, line$, out_file%)
pre_flag%=
line$=""
- currentchar$=
#in_file%)
'
currentchar$="&"
>
decode_ampersand(currentchar$,in_file%)
/
#out_file%,currentchar$;
% currentchar$=""
E
currentchar$<>"<"
#out_file%,currentchar$;
"
currentchar$="<"
token$=""
- currentchar$=
#in_file%)
=
currentchar$<>">"
token$+=currentchar$
"
currentchar$=">"
* token$=
upper_case(token$)
!
token$="/PRE"
pre_flag%=
:
output_line(last_line$, "", out_file%)
last_line$=""
pre_flag%=
# line$=
indent(indent%)
B
"UL","MENU":
output_line(last_line$, line$, out_file%)
lstack%+=1
% list_stack%(lstack%,0)=0
% list_stack%(lstack%,1)=
indent%+=3
) line$=
indent(indent%)
D
"/UL","/MENU":
output_line(last_line$, line$, out_file%)
lstack%-=1
$
lstack%<0
lstack%=0
indent%-=3
# line$=
indent(indent%)
;
"OL":
output_line(last_line$, line$, out_file%)
lstack%+=1
% list_stack%(lstack%,0)=0
% list_stack%(lstack%,1)=
indent%+=3
) line$=
indent(indent%)
<
"/OL":
output_line(last_line$, line$, out_file%)
lstack%-=1
$
lstack%<0
lstack%=0
indent%-=3
# line$=
indent(indent%)
;
"LI":
output_line(last_line$, line$, out_file%)
$
lstack%<0
lstack%=0
)
list_stack%(lstack%,1)=
. line$=
indent(indent%-3)+" * "
H line$=
indent(indent%-3)+" "+
(list_stack%(lstack%,0)+1)
c
(list_stack%(lstack%,0)+1))<2
line$+=
(list_stack%(lstack%,0)+1))," ")
) list_stack%(lstack%,0)+=1
*
"/H1","/H2","/H4","/H5","/H6":
6
(line$+word$)<>"Return to the index."
@
output_line(last_line$, line$+word$, out_file%)
last_line$=""
# line$=
indent(indent%)
word$=""
U
"H2","H3","H4","H5","H6":
output_line(last_line$, line$+word$, out_file%)
# line$=
indent(indent%)
word$=""
"!--":fred$=""
* currentchar$=
#in_file%)
9
currentchar$<>">"
fred$+=currentchar$
currentchar$=">"
#
fred$="FAQ End --"
%
#in_file%=
#in_file%
>
currentchar$<>">"
find_end_bracket(in_file%)
indent(id%)
id%," ")
find_end_bracket(file%)
currentchar%
currentchar%=
#file%
currentchar%=
(">")
decode_ampersand(
return_char$,file%)
segment$, char$, directive$
segment$=""
char$=
#file%)
segment$+=char$
(char$=" ")
(char$=";")
(char$=
(10))
(char$=
(13))
char$=" "
(char$=
(10))
(char$=
(13))
This is not an ampersand directive
( segment$=
segment$,
(segment$)-1)
return_char$="&"+segment$
#file%=
#file%-1
Otherwise we need to know if it a special hex character or a directive
segment$,1)="#"
- directive$=
segment$,
(segment$)-1)
1 directive$=
directive$,
(directive$)-1)
val%=
(directive$)
return_char$=
(val%)
- directive$=
segment$,
(segment$)-1)
, directive$=
upper_case(directive$)
directive$
#
"GT":return_char$=">"
#
"LT":return_char$="<"
$
"AMP":return_char$="&"
'
"QUOT":return_char$=
+ return_char$="&"+directive$+";"
upper_case(victim$)
upper$, I%, char$
upper$=""
I% = 1
(victim$)
char$=
victim$,I%,1)
B
(char$) > 96)
(char$) < 123)
char$=
(char$)-32)
upper$+=char$
=upper$