home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
- __license__ = 'GPL v3'
- __copyright__ = '2009, Matthew Briggs <hal.sulphur@gmail.com>'
- __docformat__ = 'restructuredtext en'
-
- '''
- theage.com.au
- '''
- from calibre import strftime
- from calibre.web.feeds.news import BasicNewsRecipe
- from calibre.ebooks.BeautifulSoup import BeautifulSoup
-
-
- class TheAge(BasicNewsRecipe):
-
- title = 'The Age'
- description = 'Business News, World News and Breaking News in Melbourne, Australia'
- __author__ = 'Matthew Briggs'
- language = 'en_AU'
-
-
- def get_browser(self):
- br = BasicNewsRecipe.get_browser()
- br.set_handle_refresh(False)
- return br
-
- def parse_index(self):
-
- soup = BeautifulSoup(self.browser.open('http://www.theage.com.au/text/').read())
-
- feeds, articles = [], []
- feed = None
-
-
- for tag in soup.findAll(['h3', 'a']):
- if tag.name == 'h3':
- if articles:
- feeds.append((feed, articles))
- articles = []
- feed = self.tag_to_string(tag)
- elif feed is not None and tag.has_key('href') and tag['href'].strip():
- url = tag['href'].strip()
- if url.startswith('/'):
- url = 'http://www.theage.com.au' + url
- title = self.tag_to_string(tag)
- articles.append({
- 'title': title,
- 'url' : url,
- 'date' : strftime('%a, %d %b'),
- 'description' : '',
- 'content' : '',
- })
-
- return feeds
-
-
-
-