home *** CD-ROM | disk | FTP | other *** search
- # Source Generated with Decompyle++
- # File: in.pyc (Python 2.6)
-
- import urllib2
- import copy
- import re
- import os
- import urllib
- from _html import DefaultFactory
- import _response
- import _request
- import _rfc3986
- import _sockettimeout
- from _useragent import UserAgentBase
- __version__ = (0, 1, 10, None, None)
-
- class BrowserStateError(Exception):
- pass
-
-
- class LinkNotFoundError(Exception):
- pass
-
-
- class FormNotFoundError(Exception):
- pass
-
-
- def sanepathname2url(path):
- urlpath = urllib.pathname2url(path)
- if os.name == 'nt' and urlpath.startswith('///'):
- urlpath = urlpath[2:]
-
- return urlpath
-
-
- class History:
-
- def __init__(self):
- self._history = []
-
-
- def add(self, request, response):
- self._history.append((request, response))
-
-
- def back(self, n, _response):
- response = _response
- while n > 0 or response is None:
-
- try:
- (request, response) = self._history.pop()
- except IndexError:
- raise BrowserStateError('already at start of history')
-
- n -= 1
- return (request, response)
-
-
- def clear(self):
- del self._history[:]
-
-
- def close(self):
- for request, response in self._history:
- if response is not None:
- response.close()
- continue
-
- del self._history[:]
-
-
-
- class HTTPRefererProcessor(urllib2.BaseHandler):
-
- def http_request(self, request):
- if hasattr(request, 'redirect_dict'):
- request = self.parent._add_referer_header(request, origin_request = False)
-
- return request
-
- https_request = http_request
-
-
- class Browser(UserAgentBase):
- handler_classes = copy.copy(UserAgentBase.handler_classes)
- handler_classes['_referer'] = HTTPRefererProcessor
- default_features = copy.copy(UserAgentBase.default_features)
- default_features.append('_referer')
-
- def __init__(self, factory = None, history = None, request_class = None):
- self._handle_referer = True
- if history is None:
- history = History()
-
- self._history = history
- if request_class is None:
- if not hasattr(urllib2.Request, 'add_unredirected_header'):
- request_class = _request.Request
- else:
- request_class = urllib2.Request
-
- if factory is None:
- factory = DefaultFactory()
-
- factory.set_request_class(request_class)
- self._factory = factory
- self.request_class = request_class
- self.request = None
- self._set_response(None, False)
- UserAgentBase.__init__(self)
-
-
- def close(self):
- UserAgentBase.close(self)
- if self._response is not None:
- self._response.close()
-
- if self._history is not None:
- self._history.close()
- self._history = None
-
- self.form = None
- self.request = None
- self._response = None
- self.request = None
- self.response = None
- self.set_response = None
- self.geturl = None
- self.reload = None
- self.back = None
- self.clear_history = None
- self.set_cookie = None
- self.links = None
- self.forms = None
- self.viewing_html = None
- self.encoding = None
- self.title = None
- self.select_form = None
- self.click = None
- self.submit = None
- self.click_link = None
- self.follow_link = None
- self.find_link = None
-
-
- def set_handle_referer(self, handle):
- self._set_handler('_referer', handle)
- self._handle_referer = bool(handle)
-
-
- def _add_referer_header(self, request, origin_request = True):
- if self.request is None:
- return request
- scheme = request.get_type()
- original_scheme = self.request.get_type()
- if scheme not in ('http', 'https'):
- return request
- if not origin_request and not self.request.has_header('Referer'):
- return request
- if self._handle_referer and original_scheme in ('http', 'https'):
- if original_scheme == 'https':
- pass
- return request
-
-
- def open_novisit(self, url, data = None, timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- return self._mech_open(url, data, visit = False, timeout = timeout)
-
-
- def open(self, url, data = None, timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
- return self._mech_open(url, data, timeout = timeout)
-
-
- def _mech_open(self, url, data = None, update_history = True, visit = None, timeout = _sockettimeout._GLOBAL_DEFAULT_TIMEOUT):
-
- try:
- url.get_full_url
- except AttributeError:
- (scheme, authority) = _rfc3986.urlsplit(url)[:2]
- if scheme is None:
- if self._response is None:
- raise BrowserStateError("can't fetch relative reference: not viewing any document")
- self._response is None
- url = _rfc3986.urljoin(self._response.geturl(), url)
-
- except:
- scheme is None
-
- request = self._request(url, data, visit, timeout)
- visit = request.visit
- if visit is None:
- visit = True
-
- if visit:
- self._visit_request(request, update_history)
-
- success = True
-
- try:
- response = UserAgentBase.open(self, request, data)
- except urllib2.HTTPError:
- error = None
- success = False
- if error.fp is None:
- raise
- error.fp is None
- response = error
-
- if visit:
- self._set_response(response, False)
- response = copy.copy(self._response)
- elif response is not None:
- response = _response.upgrade_response(response)
-
- if not success:
- raise response
- success
- return response
-
-
- def __str__(self):
- text = []
- text.append('<%s ' % self.__class__.__name__)
- if self._response:
- text.append('visiting %s' % self._response.geturl())
- else:
- text.append('(not visiting a URL)')
- if self.form:
- text.append('\n selected form:\n %s\n' % str(self.form))
-
- text.append('>')
- return ''.join(text)
-
-
- def response(self):
- return copy.copy(self._response)
-
-
- def open_local_file(self, filename):
- path = sanepathname2url(os.path.abspath(filename))
- url = 'file://' + path
- return self.open(url)
-
-
- def set_response(self, response):
- self._set_response(response, True)
-
-
- def _set_response(self, response, close_current):
- if not response is None and hasattr(response, 'info') and hasattr(response, 'geturl') and hasattr(response, 'read'):
- raise ValueError('not a response object')
- hasattr(response, 'read')
- self.form = None
- if response is not None:
- response = _response.upgrade_response(response)
-
- if close_current and self._response is not None:
- self._response.close()
-
- self._response = response
- self._factory.set_response(response)
-
-
- def visit_response(self, response, request = None):
- if request is None:
- request = _request.Request(response.geturl())
-
- self._visit_request(request, True)
- self._set_response(response, False)
-
-
- def _visit_request(self, request, update_history):
- if self._response is not None:
- self._response.close()
-
- if self.request is not None and update_history:
- self._history.add(self.request, self._response)
-
- self._response = None
- self.request = request
-
-
- def geturl(self):
- if self._response is None:
- raise BrowserStateError('not viewing any document')
- self._response is None
- return self._response.geturl()
-
-
- def reload(self):
- if self.request is None:
- raise BrowserStateError('no URL has yet been .open()ed')
- self.request is None
- if self._response is not None:
- self._response.close()
-
- return self._mech_open(self.request, update_history = False)
-
-
- def back(self, n = 1):
- if self._response is not None:
- self._response.close()
-
- (self.request, response) = self._history.back(n, self._response)
- self.set_response(response)
- if not response.read_complete:
- return self.reload()
- return copy.copy(response)
-
-
- def clear_history(self):
- self._history.clear()
-
-
- def set_cookie(self, cookie_string):
- if self._response is None:
- raise BrowserStateError('not viewing any document')
- self._response is None
- if self.request.get_type() not in ('http', 'https'):
- raise BrowserStateError("can't set cookie for non-HTTP/HTTPS transactions")
- self.request.get_type() not in ('http', 'https')
- cookiejar = self._ua_handlers['_cookies'].cookiejar
- response = self.response()
- headers = response.info()
- headers['Set-cookie'] = cookie_string
- cookiejar.extract_cookies(response, self.request)
-
-
- def links(self, **kwds):
- if not self.viewing_html():
- raise BrowserStateError('not viewing HTML')
- self.viewing_html()
- links = self._factory.links()
- if kwds:
- return self._filter_links(links, **kwds)
- return links
-
-
- def forms(self):
- if not self.viewing_html():
- raise BrowserStateError('not viewing HTML')
- self.viewing_html()
- return self._factory.forms()
-
-
- def global_form(self):
- if not self.viewing_html():
- raise BrowserStateError('not viewing HTML')
- self.viewing_html()
- return self._factory.global_form
-
-
- def viewing_html(self):
- if self._response is None:
- raise BrowserStateError('not viewing any document')
- self._response is None
- return self._factory.is_html
-
-
- def encoding(self):
- if self._response is None:
- raise BrowserStateError('not viewing any document')
- self._response is None
- return self._factory.encoding
-
-
- def title(self):
- if not self.viewing_html():
- raise BrowserStateError('not viewing HTML')
- self.viewing_html()
- return self._factory.title
-
-
- def select_form(self, name = None, predicate = None, nr = None):
- if not self.viewing_html():
- raise BrowserStateError('not viewing HTML')
- self.viewing_html()
- if name is None and predicate is None and nr is None:
- raise ValueError('at least one argument must be supplied to specify form')
- nr is None
- global_form = self._factory.global_form
- if nr is None and name is None and predicate is not None and predicate(global_form):
- self.form = global_form
- return None
- orig_nr = nr
- for form in self.forms():
- if name is not None and name != form.name:
- continue
-
- if predicate is not None and not predicate(form):
- continue
-
- if nr:
- nr -= 1
- continue
-
- self.form = form
- else:
- description = []
- if name is not None:
- description.append("name '%s'" % name)
-
- if predicate is not None:
- description.append('predicate %s' % predicate)
-
- if orig_nr is not None:
- description.append('nr %d' % orig_nr)
-
- description = ', '.join(description)
- raise FormNotFoundError('no form matching ' + description)
-
-
- def click(self, *args, **kwds):
- if not self.viewing_html():
- raise BrowserStateError('not viewing HTML')
- self.viewing_html()
- request = self.form.click(*args, **kwds)
- return self._add_referer_header(request)
-
-
- def submit(self, *args, **kwds):
- return self.open(self.click(*args, **kwds))
-
-
- def click_link(self, link = None, **kwds):
- if not self.viewing_html():
- raise BrowserStateError('not viewing HTML')
- self.viewing_html()
- if not link:
- link = self.find_link(**kwds)
- elif kwds:
- raise ValueError('either pass a Link, or keyword arguments, not both')
-
- request = self.request_class(link.absolute_url)
- return self._add_referer_header(request)
-
-
- def follow_link(self, link = None, **kwds):
- return self.open(self.click_link(link, **kwds))
-
-
- def find_link(self, **kwds):
-
- try:
- return self._filter_links(self._factory.links(), **kwds).next()
- except StopIteration:
- raise LinkNotFoundError()
-
-
-
- def __getattr__(self, name):
- form = self.__dict__.get('form')
- if form is None:
- raise AttributeError('%s instance has no attribute %s (perhaps you forgot to .select_form()?)' % (self.__class__, name))
- form is None
- return getattr(form, name)
-
-
- def _filter_links(self, links, text = None, text_regex = None, name = None, name_regex = None, url = None, url_regex = None, tag = None, predicate = None, nr = 0):
- if not self.viewing_html():
- raise BrowserStateError('not viewing HTML')
- self.viewing_html()
- found_links = []
- orig_nr = nr
- for link in links:
- if url is not None and url != link.url:
- continue
-
- if url_regex is not None and not re.search(url_regex, link.url):
- continue
-
- if text is not None:
- if link.text is None or text != link.text:
- continue
-
- if text_regex is not None:
- if link.text is None or not re.search(text_regex, link.text):
- continue
-
- if name is not None and name != dict(link.attrs).get('name'):
- continue
-
- if name_regex is not None:
- link_name = dict(link.attrs).get('name')
- if link_name is None or not re.search(name_regex, link_name):
- continue
-
-
- if tag is not None and tag != link.tag:
- continue
-
- if predicate is not None and not predicate(link):
- continue
-
- if nr:
- nr -= 1
- continue
-
- yield link
- nr = orig_nr
-
-
-
-