home *** CD-ROM | disk | FTP | other *** search
/ ARM Club 3 / TheARMClub_PDCD3.iso / programs / comms_networking / dehtml / !DeHTML / DeHTML (.txt) < prev    next >
RISC OS BBC BASIC V Source  |  1996-06-26  |  8KB  |  404 lines

  1.  > DeHTML
  2.  by Philip Banks
  3. convert(line_len%)
  4.  in_handle%,out_handle%,line$,writeflag%,indent%,pre_flag%
  5.  paragraphflag%,word$,word%,currentchar$,pos%,split%,stop%
  6.  list_stack%, lstack%
  7.  list_stack%(100,1)
  8. lstack%=0
  9. find(start_file$) 
  10.  "OS_File",11,end_file$,&FFF
  11. in_handle%=
  12. (start_file$)
  13. out_handle%=
  14. (end_file$)
  15. writeflag%=
  16. indent%=0
  17. pre_flag%=
  18. paragraphflag%=
  19. line$=""
  20. last_line$=" "
  21. indent%=0
  22. #in_handle%)
  23.       word$=""
  24.       word%=
  25.       
  26. )         currentchar$=
  27. #in_handle%)
  28. B         
  29.  (currentchar$=" ") 
  30. (currentchar$)<32) 
  31.  word%=
  32. !         
  33.  currentchar$="<" 
  34. L            
  35. decode_html(line$, word$, in_handle%, out_handle%, indent%)
  36.             word%=
  37.          
  38. !#         
  39.  (currentchar$="&") 
  40. ":            
  41. decode_ampersand(currentchar$,in_handle%)
  42. #8            
  43.  currentchar$="<" 
  44.  word$+=currentchar$
  45.          
  46. %D         
  47.  (word%=
  48.  (currentchar$<>"<") 
  49.  word$+=currentchar$
  50. &2         
  51. #in_handle% 
  52.  word%=
  53. :writeflag%=
  54.       
  55.  word%=
  56. ((      
  57.  word$<>"" 
  58.  line$+=word$+" "
  59. ).   
  60. (line$)>line_len%) 
  61.  (writeflag%=
  62. *       
  63. (line$)<line_len% 
  64. +9         
  65. output_line(last_line$, line$, out_handle%)
  66.         
  67.          pos%=
  68. (line$)
  69.          split%=0
  70.          stop%=
  71.          
  72.             pos%-=1
  73. 2>            
  74. line$,pos%,1) = " ") 
  75.  (pos%<77) 
  76.  stop%=
  77. 3#         
  78.  (pos%=0) 
  79.  (stop%=
  80. 4B         
  81. output_line(last_line$, 
  82. line$,pos%-1), out_handle%)
  83. 59         line$=
  84. indent(indent%)+
  85. line$,
  86. (line$)-pos%)
  87.       
  88. #in_handle%
  89. #out_handle%
  90.  autoquit% 
  91.  finished%=
  92. nonspace_check(string$)
  93.  non_space%, index%
  94. index% = 1
  95. non_space% = 
  96.  non_space%) 
  97.  (index% <= 
  98. (string$))
  99. C=   
  100. string$,index%,1)<>" " 
  101.  non_space% = 
  102.  index%+=1
  103. =non_space%
  104. output_line(
  105.  last$, lin$, out%)
  106. nonspace_check(lin$)=
  107.  lin$ = ""
  108.  lin$="" 
  109.  last$="" 
  110. #out%, lin$
  111.    last$=lin$
  112. decode_html(
  113.  line$, 
  114.  word$, in_file%, out_file%, 
  115.  indent%)
  116.  currentchar$,token$,fred$,upper_token$
  117.    token$=""
  118.    currentchar$=""
  119.       token$+=currentchar$
  120. V$      currentchar$=
  121. #in_file%)
  122. W0   
  123.  (currentchar$=">") 
  124.  (currentchar$=" ")
  125. X'   upper_token$=
  126. upper_case(token$)
  127.  upper_token$ 
  128. ZA      
  129.  "BR":
  130. output_line(last_line$, line$+word$, out_file%)
  131. [#         line$=
  132. indent(indent%)
  133.          word$=""
  134. ^/      
  135.  "TITLE":
  136. find_end_bracket(in_file%)
  137. `;      
  138.  "HR":
  139. output_line(last_line$, line$, out_file%)
  140. a:         
  141. output_line(last_line$, 
  142. 76,"-"), out_file%)
  143.          word$=""
  144. c#         line$=
  145. indent(indent%)
  146. e@      
  147.  "P":
  148. output_line(last_line$, line$+word$, out_file%)
  149. f3         
  150. output_line(last_line$,"", out_file%)
  151.          word$=""
  152. h)         line$=
  153. indent(indent%)+"   "
  154.       
  155.  "H1":
  156.          currentchar$=""
  157.          
  158. m#            line$+=currentchar$
  159. n,            currentchar$=
  160. #in_handle%)
  161.          
  162.  currentchar$="<"
  163. p5         
  164. output_line(last_line$,line$,out_file%)
  165.          pos%=0
  166. r#         
  167. line$,pos%,1)<=" "
  168.             pos%+=1
  169.          
  170. u4         
  171.  pos%<>0 
  172. #out_handle%,
  173. pos%-1," ");
  174. vE         
  175. output_line(last_line$, 
  176. (line$)-pos%,"="), out_file%)
  177.          
  178. x,            currentchar$=
  179. #in_handle%)
  180.          
  181.  currentchar$=">"
  182. z#         line$=
  183. indent(indent%)
  184.       
  185.  "PRE"
  186. }7         
  187. output_line(last_line$, line$, out_file%)
  188.          pre_flag%=
  189.          line$=""
  190.          
  191.             
  192. -               currentchar$=
  193. #in_file%)
  194. '               
  195.  currentchar$="&" 
  196. >                  
  197. decode_ampersand(currentchar$,in_file%)
  198. /                  
  199. #out_file%,currentchar$;
  200. %                  currentchar$=""
  201.                 
  202. E                  
  203.  currentchar$<>"<" 
  204. #out_file%,currentchar$;
  205.                
  206. "            
  207.  currentchar$="<"
  208.             token$=""
  209.             
  210. -               currentchar$=
  211. #in_file%)
  212. =               
  213.  currentchar$<>">" 
  214.  token$+=currentchar$
  215. "            
  216.  currentchar$=">"
  217. *            token$=
  218. upper_case(token$)
  219. !            
  220.  token$="/PRE" 
  221.                pre_flag%=
  222. :               
  223. output_line(last_line$, "", out_file%)
  224.                 last_line$=""
  225.             
  226.          
  227.  pre_flag%=
  228. #         line$=
  229. indent(indent%)
  230. B      
  231.  "UL","MENU":
  232. output_line(last_line$, line$, out_file%)
  233.          lstack%+=1
  234. %         list_stack%(lstack%,0)=0
  235. %         list_stack%(lstack%,1)=
  236.          indent%+=3
  237. )         line$=
  238. indent(indent%)      
  239. D      
  240.  "/UL","/MENU":
  241. output_line(last_line$, line$, out_file%)
  242.          lstack%-=1
  243. $         
  244.  lstack%<0 
  245.  lstack%=0
  246.          indent%-=3
  247. #         line$=
  248. indent(indent%)
  249. ;      
  250.  "OL":
  251. output_line(last_line$, line$, out_file%)
  252.          lstack%+=1
  253. %         list_stack%(lstack%,0)=0
  254. %         list_stack%(lstack%,1)=
  255.          indent%+=3
  256. )         line$=
  257. indent(indent%)      
  258. <      
  259.  "/OL":
  260. output_line(last_line$, line$, out_file%)
  261.          lstack%-=1
  262. $         
  263.  lstack%<0 
  264.  lstack%=0
  265.          indent%-=3
  266. #         line$=
  267. indent(indent%)
  268. ;      
  269.  "LI":
  270. output_line(last_line$, line$, out_file%)
  271. $         
  272.  lstack%<0 
  273.  lstack%=0
  274. )         
  275.  list_stack%(lstack%,1)=
  276. .            line$=
  277. indent(indent%-3)+" * "
  278.            
  279. H            line$=
  280. indent(indent%-3)+" "+
  281. (list_stack%(lstack%,0)+1)
  282. c            
  283. (list_stack%(lstack%,0)+1))<2 
  284.  line$+=
  285. (list_stack%(lstack%,0)+1))," ") 
  286. )            list_stack%(lstack%,0)+=1
  287.          
  288. *      
  289.  "/H1","/H2","/H4","/H5","/H6":
  290. 6         
  291.  (line$+word$)<>"Return to the index." 
  292. @            
  293. output_line(last_line$, line$+word$, out_file%)
  294.            
  295.             last_line$=""
  296.          
  297. #         line$=
  298. indent(indent%)
  299.          word$=""
  300. U      
  301.  "H2","H3","H4","H5","H6":
  302. output_line(last_line$, line$+word$, out_file%)
  303. #         line$=
  304. indent(indent%)
  305.          word$=""
  306.       
  307.  "!--":fred$=""
  308.          
  309. *            currentchar$=
  310. #in_file%)
  311. 9            
  312.  currentchar$<>">" 
  313.  fred$+=currentchar$
  314.          
  315.  currentchar$=">"
  316. #         
  317.  fred$="FAQ End --" 
  318. %            
  319. #in_file%=
  320. #in_file%
  321.          
  322.       
  323. >         
  324.  currentchar$<>">" 
  325. find_end_bracket(in_file%)
  326. indent(id%)
  327. id%," ")
  328. find_end_bracket(file%)
  329.  currentchar%
  330.       currentchar%=
  331. #file%
  332.  currentchar%=
  333. (">")
  334. decode_ampersand(
  335.  return_char$,file%)
  336.  segment$, char$, directive$
  337. segment$=""
  338.   char$=
  339. #file%)
  340.   segment$+=char$
  341.  (char$=" ") 
  342.  (char$=";") 
  343.  (char$=
  344. (10)) 
  345.  (char$=
  346. (13))
  347.  char$=" " 
  348.  (char$=
  349. (10)) 
  350.  (char$=
  351. (13)) 
  352.  This is not an ampersand directive
  353. (   segment$=
  354. segment$,
  355. (segment$)-1)
  356.     return_char$="&"+segment$
  357. #file%=
  358. #file%-1
  359.  Otherwise we need to know if it a special hex character or a directive
  360. segment$,1)="#" 
  361. -      directive$=
  362. segment$,
  363. (segment$)-1)
  364. 1      directive$=
  365. directive$,
  366. (directive$)-1)
  367.       val%=
  368. (directive$)
  369.       return_char$=
  370. (val%)
  371.         
  372. -      directive$=
  373. segment$,
  374. (segment$)-1)
  375. ,      directive$=
  376. upper_case(directive$)
  377.       
  378.  directive$ 
  379. #        
  380.  "GT":return_char$=">"
  381. #        
  382.  "LT":return_char$="<"
  383. $        
  384.  "AMP":return_char$="&"
  385. '        
  386.  "QUOT":return_char$=
  387.         
  388. +        return_char$="&"+directive$+";"
  389.       
  390. upper_case(victim$)
  391.  upper$, I%, char$
  392. upper$=""
  393.  I% = 1 
  394. (victim$)
  395.    char$=
  396. victim$,I%,1)
  397.     B   
  398. (char$) > 96) 
  399. (char$) < 123) 
  400.  char$=
  401. (char$)-32)
  402.    upper$+=char$
  403. =upper$
  404.