home *** CD-ROM | disk | FTP | other *** search
- #!/usr/bin/env python
- __license__ = 'GPL v3'
- __author__ = 'Tony Stegall'
- __copyright__ = '2010, Tony Stegall or Tonythebookworm on mobiread.com'
- __version__ = '1.03'
- __date__ = '27, September 2010'
- __docformat__ = 'restructuredtext en'
-
-
-
- import datetime
- from calibre.web.feeds.news import BasicNewsRecipe
-
- class AdvancedUserRecipe1282101454(BasicNewsRecipe):
- now = datetime.datetime.now()
- title = 'The AJC'
- timefmt = ' [%a,%d %B %Y %I:%M %p]'
- __author__ = 'TonytheBookworm'
- language = 'en'
- description = 'News from Atlanta and USA'
- publisher = 'The Atlanta Journal'
- category = 'news, politics, USA'
- oldest_article = 1
- max_articles_per_feed = 100
- no_stylesheets = True
-
- masthead_url = 'http://gawand.org/wp-content/uploads/2010/06/ajc-logo.gif'
- extra_css = '''
- h1.articleHeadline{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
- h2.articleSubheadline{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
-
- p.byline{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
- p.organization{text-align:right; font-size:small;margin-top:0px;margin-bottom: 0px;}
-
-
- p{font-family:Helvetica,Arial,sans-serif;font-size:small;}
- '''
-
-
- keep_only_tags = [
- dict(name='div', attrs={'class':['cxArticleHeader']})
- ,dict(attrs={'id':['cxArticleText']})
- ]
-
-
- remove_tags = [
- dict(name='div' , attrs={'class':'cxArticleList' })
- ,dict(name='div' , attrs={'class':'cxFeedTease' })
- ,dict(name='div' , attrs={'class':'cxElementEnlarge' })
- ,dict(name='div' , attrs={'id':'cxArticleTools' })
- ]
-
-
-
- feeds = [
- ('Breaking News', 'http://www.ajc.com/genericList-rss.do?source=61499'),
- # -------------------------------------------------------------------
- # Here are the different area feeds. Choose which ever one you wish to
- # read by simply removing the pound sign from it. I currently have it
- # set to only get the Cobb area
- # --------------------------------------------------------------------
- #('Atlanta & Fulton', 'http://www.ajc.com/section-rss.do?source=atlanta'),
- #('Clayton', 'http://www.ajc.com/section-rss.do?source=clayton'),
- #('DeKalb', 'http://www.ajc.com/section-rss.do?source=dekalb'),
- #('Gwinnett', 'http://www.ajc.com/section-rss.do?source=gwinnett'),
- #('North Fulton', 'http://www.ajc.com/section-rss.do?source=north-fulton'),
- #('Metro', 'http://www.ajc.com/section-rss.do?source=news'),
- #('Cherokee', 'http://www.ajc.com/section-rss.do?source=cherokee'),
- ('Cobb', 'http://www.ajc.com/section-rss.do?source=cobb'),
- #('Fayette', 'http://www.ajc.com/section-rss.do?source=fayette'),
- #('Henry', 'http://www.ajc.com/section-rss.do?source=henry'),
- #('Q & A', 'http://www.ajc.com/genericList-rss.do?source=77197'),
- ('Opinions', 'http://www.ajc.com/section-rss.do?source=opinion'),
- ('Ga Politics', 'http://www.ajc.com/section-rss.do?source=georgia-politics-elections'),
- # ------------------------------------------------------------------------
- # Here are the different sports feeds. I only follow the Falcons, and Highschool
- # but again
- # You can enable which ever team you like by removing the pound sign
- # ------------------------------------------------------------------------
- #('Sports News', 'http://www.ajc.com/genericList-rss.do?source=61510'),
- #('Braves', 'http://www.ajc.com/genericList-rss.do?source=61457'),
- ('Falcons', 'http://www.ajc.com/genericList-rss.do?source=61458'),
- #('Hawks', 'http://www.ajc.com/genericList-rss.do?source=61522'),
- #('Dawgs', 'http://www.ajc.com/genericList-rss.do?source=61492'),
- #('Yellowjackets', 'http://www.ajc.com/genericList-rss.do?source=61523'),
- ('Highschool', 'http://www.ajc.com/section-rss.do?source=high-school'),
- ('Events', 'http://www.accessatlanta.com/section-rss.do?source=events'),
- ('Music', 'http://www.accessatlanta.com/section-rss.do?source=music'),
- ]
-
-
-
- def postprocess_html(self, soup, first):
- for credit_tag in soup.findAll('span', attrs={'class':['imageCredit rightFloat']}):
- credit_tag.extract()
-
- return soup
-
- #def print_version(self, url):
- # return url.partition('?')[0] +'?printArticle=y'
-
-
-
-
-
-
-
-
-
-
-
-
-