home *** CD-ROM | disk | FTP | other *** search
- /* Import_HTML
- $VER: Import_HTML 3.0 by MJ, Adrian Barnett, Heiko Kuschel
- Imports an HTML (Web) file into Wordworth.
- All HTML codes are stripped out, leaving
- the text. Also, codes for different headings,
- and bold, italic and underline styles are
- recognised.
-
- Digita ARexx Script for Wordworth 5
- Copyright ©1996, Digita International Ltd.
-
- Created: 2 July 1996
- Author: MJ
-
- Version 2.0
- Modified: 7 May 1997
- Author: Adrian Barnett (adrian@abarnett.demon.co.uk)
- Changes: Handle long lines of text
- Handle lists
- Deal with newlines correctly
- Center text correctly
- Deal with lower-case html tags
-
- This still needs a lot of work...
- ... ok, so I did some of it...
-
- Version 3.0
- Modified: 11 Nov 1998
- Uploaded: June 1999 (sorry for the big delay)
- Author: Heiko Kuschel <hkuschel@mayn.de>
- Changes: Handle German Umlauts
- Major speed improvement
- Font names and sizes configurable
- to do:
-
- list font
- rework the title handling
- tables using TurboCalc
- tables using Tabs (configurable)
- More speed improvements possible!
- IIRC German Umlauts are always printed UPPERCASE in this version.
- I already had changed this, but lost the code.
- */
-
-
- /* -------------- Configure here --------------- */
-
- StandardFont = "Garamond Antiqua"
- H1=25 /* Font sizes in pt.*/
- H2=20
- H3=18
- H4=15
- H5=14
- H6=13
- H0=12 /* This is the standard font size.*/
-
- /* ----- Nothing to configure after this line. ----- */
-
-
-
- OPTIONS RESULTS
-
- numch = 0
-
- RequestFile TITLE "Select HTML file..." PATTERN "(#?.HTML|#?.HTM)"
- FileName = Result
-
- If RC > 0 THEN
- Exit
-
-
- If Open('MyFile', FileName, R) THEN DO
-
- New
- Address Value Result
-
- Document A4 "0.5in" "0.5in" "0.6in" "1.0in"
- Zoom 100
- Paragraph 0 0 0 LEFT AUTO SINGLE NONE NONE
-
- Font NAME StandardFont SIZE H0 PLAIN
-
- Para = ''
- numch = 0
-
- DO Until EOF('MyFile')
- MyChar = ReadCh('MyFile')
-
- /* Handle "<>" tags */
- select
- when MyChar = '<' THEN DO
- Text Para
- Para = ''
- Code = MyChar
- DO UNTIL MyChar = '>'
- MyChar = ReadCh('MyFile')
- Code = Code || MyChar
- END
-
- /* Convert tag to upper case */
- Code = UPPER(Code)
- SubCode = SubStr(Code, 1, 3)
- SubCode2 = SubStr(Code, 1, 4)
- SubCode3 = SubStr(Code, 1, 2)
- select
- when Code = '<P>' THEN do
- NewParagraph
- NewParagraph
- end
- when Code = '<BR>' THEN
- NewParagraph
- when Code = '<B>' THEN
- Bold
- when Code = '<I>' | Code = '<ADDRESS>' THEN
- Italic
- when Code = '<U>' THEN
- Underline
- when Code = '</B>' | Code = '</I>' | Code = '</U>' | Code = '</ADDRESS>' THEN
- Plain
- when SubCode3 = '<H' then DO
- Newparagraph
- select
- when Code = '<H1>' THEN
- Font SIZE H1
- when Code = '<H2>' THEN
- Font SIZE H2
- when Code = '<H3>' THEN
- Font SIZE H3
- when Code = '<H4>' THEN
- Font SIZE H4
- when Code = '<H5>' THEN
- Font SIZE H5
- when Code = '<H6>' THEN
- Font SIZE H6
- when Code = '<HR>' THEN do
- text "___________________________________________________________"
- NewParagraph
- end
- otherwise NOP
- end
- end
- when Code = '<LI>' THEN DO
- NewParagraph
- text "o "
- END
- when Code = '</UL>' | Code = '</OL>' | Code = '</DIR>' | Code = '</MENU>' THEN
- NewParagraph
-
- when Subcode = '</H' THEN DO
- NewParagraph
- Font SIZE 12
- END
- when SubCode2 = '<IMG' THEN DO
- text " [image] "
- END
-
- when Code = '<CENTER>' THEN
- CentreJustify
- when Code = '</CENTER>' THEN do
- NewParagraph
- LeftJustify
- end
- when Code = '<TITLE>' THEN DO
- Title = ''
- DO UNTIL MyChar = '<'
- MyChar = ReadCh('MyFile')
- IF MyChar = '&' THEN DO
-
- /* Handle things like " */
- Code = MyChar
- DO UNTIL MyChar = ';'
- MyChar = ReadCh('MyFile')
- Code = Code || MyChar
- END
-
- /* Convert tag to upper case */
- Code = UPPER(Code)
-
- IF Code = '"' THEN TITLE = TITLE || """"
- else IF Code = '>' THEN TITLE = TITLE || ">"
- else IF Code = '<' THEN TITLE = TITLE || "<"
- else IF Code = '&' THEN TITLE = TITLE || "&"
- else IF Code = '&POUND;' THEN TITLE = TITLE || "£"
- else IF Code = '&AUML;' THEN TITLE = TITLE || "ä"
- else IF Code = '&OUML;' THEN TITLE = TITLE || "ö"
- else IF Code = '&UUML;' THEN TITLE = TITLE || "ü"
- else IF Code = '&SZLIG;' THEN TITLE = TITLE || "ß"
- end
- else IF MyChar ~= '<' THEN
- Title = Title || MyChar
- END
- DO UNTIL MyChar = '>'
- MyChar = ReadCh('MyFile')
- END
-
- FileName = 'RAM:' || TITLE
- SaveAs NAME FileName
- Address Command 'Delete >NIL:' FileName
- END
- otherwise NOP
- end
- END
- when MyChar = '&' THEN DO
-
- /* Handle things like " */
- Code = MyChar
- DO UNTIL MyChar = ';'
- MyChar = ReadCh('MyFile')
- Code = Code || MyChar
- END
-
- /* Convert tag to upper case */
- Code = UPPER(Code)
-
- IF Code = '"' THEN para = para || """"
- else IF Code = '>' THEN para = para || ">"
- else IF Code = '<' THEN para = para || "<"
- else IF Code = '&' THEN para = para || "&"
- else IF Code = '&POUND;' THEN para = para || "£"
- else IF Code = '&AUML;' THEN para = para || "ä"
- else IF Code = '&OUML;' THEN para = para || "ö"
- else IF Code = '&UUML;' THEN para = para || "ü"
- else IF Code = '&SZLIG;' THEN para = para || "ß"
- end
- otherwise do
- /* Handle normal text */
-
- /* Replace newlines with spaces */
- IF MyChar = '0a'X THEN
- MyChar = ' '
-
- Para = Para || MyChar
-
- /* Batch up a block of 80 chars at a time */
- numch = numch + 1
- if numch = 80 then do
- text para
- para = ''
- numch = 0
- end
- end
- end
-
- END
- Text Para
- Call Close('MyFile')
- END
- ELSE
- RequestNotify PROMPT "Error: Unable to open file!"
- Exit
-
-
-