home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
- __license__ = 'GPL v3'
- __copyright__ = '2010, Brendan Sleight <bms.calibre at barwap.com>'
- '''
- impreso.milenio.com
- '''
- from calibre import strftime
- from calibre.web.feeds.news import BasicNewsRecipe
-
- import datetime
-
- class Milenio(BasicNewsRecipe):
- title = u'Milenio-diario'
- __author__ = 'Bmsleight'
- language = 'es_MX'
- description = 'Milenio-diario'
- oldest_article = 10
- max_articles_per_feed = 100
- no_stylesheets = False
- index = 'http://impreso.milenio.com'
-
- keep_only_tags = [
- dict(name='div', attrs={'class':'content'})
- ]
-
- def parse_index(self):
- # "%m/%d/%Y"
- # http://impreso.milenio.com/Nacional/2010/09/01/
- totalfeeds = []
- soup = self.index_to_soup(self.index + "/Nacional/" + datetime.date.today().strftime("%Y/%m/%d"))
- maincontent = soup.find('div',attrs={'class':'content'})
- mfeed = []
- if maincontent:
- for itt in maincontent.findAll('a',href=True):
- if "/node/" in str(itt['href']):
- url = self.index + itt['href']
- title = self.tag_to_string(itt)
- description = ''
- date = strftime(self.timefmt)
- mfeed.append({
- 'title' :title
- ,'date' :date
- ,'url' :url
- ,'description':description
- })
- totalfeeds.append(('Articles', mfeed))
- return totalfeeds
-