1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | #!/usr/bin/env python
"""A web application that retrieves other websites for you.
To start serving the application on port 8088, type
python webproxy.py
To start the server on some other interface/port, use
python -m gevent.wsgi -p 8000 -i 0.0.0.0 webproxy.py
"""
from __future__ import print_function
from gevent import monkey; monkey.patch_all()
import sys
import re
import traceback
try:
from cgi import escape
except ImportError:
# Python 3.8 removed this API
from html import escape
try:
import urllib2
from urlparse import urlparse
from urllib import unquote
except ImportError:
# pylint:disable=import-error,no-name-in-module
from urllib import request as urllib2
from urllib.parse import urlparse
from urllib.parse import unquote
LISTEN = ('127.0.0.1', 8088)
def _as_bytes(s):
if not isinstance(s, bytes): # Py3
s = s.encode('utf-8')
return s
def _as_str(s):
if not isinstance(s, str): # Py3
s = s.decode('latin-1')
return s
def application(env, start_response):
proxy_url = 'http://%s/' % env['HTTP_HOST']
method = env['REQUEST_METHOD']
path = env['PATH_INFO']
if env['QUERY_STRING']:
path += '?' + env['QUERY_STRING']
path = path.lstrip('/')
if (method, path) == ('GET', ''):
start_response('200 OK', [('Content-Type', 'text/html')])
return [FORM]
elif method == 'GET':
return proxy(path, start_response, proxy_url)
elif (method, path) == ('POST', ''):
key, value = env['wsgi.input'].read().strip().split(b'=')
assert key == b'url', repr(key)
value = _as_str(value)
start_response('302 Found', [('Location', _as_str(join(proxy_url, unquote(value))))])
elif method == 'POST':
start_response('404 Not Found', [])
else:
start_response('501 Not Implemented', [])
return []
def proxy(path, start_response, proxy_url):
# pylint:disable=too-many-locals
if '://' not in path:
path = 'http://' + path
try:
try:
response = urllib2.urlopen(path)
except urllib2.HTTPError as ex:
response = ex
print('%s: %s %s' % (path, response.code, response.msg))
headers = [(k, v) for (k, v) in response.headers.items() if k not in drop_headers]
scheme, netloc, path, _params, _query, _fragment = urlparse(path)
host = (scheme or 'http') + '://' + netloc
except Exception as ex: # pylint:disable=broad-except
sys.stderr.write('error while reading %s:\n' % path)
traceback.print_exc()
tb = traceback.format_exc()
start_response('502 Bad Gateway', [('Content-Type', 'text/html')])
# pylint:disable=deprecated-method
error_str = escape(str(ex) or ex.__class__.__name__ or 'Error')
error_str = '<h1>%s</h1><h2>%s</h2><pre>%s</pre>' % (error_str, escape(path), escape(tb))
return [_as_bytes(error_str)]
else:
start_response('%s %s' % (response.code, response.msg), headers)
data = response.read()
data = fix_links(data, proxy_url, host)
return [data]
def join(url1, *rest):
if not rest:
return url1
url2, rest = rest[0], rest[1:]
url1 = _as_bytes(url1)
url2 = _as_bytes(url2)
if url1.endswith(b'/'):
if url2.startswith(b'/'):
return join(url1 + url2[1:], *rest)
return join(url1 + url2, *rest)
elif url2.startswith(b'/'):
return join(url1 + url2, *rest)
return join(url1 + b'/' + url2, *rest)
def fix_links(data, proxy_url, host_url):
"""
>>> fix_links("><img src=images/hp0.gif width=158", 'http://127.0.0.1:8088', 'www.google.com')
'><img src="http://127.0.0.1:8088/www.google.com/images/hp0.gif" width=158'
"""
def fix_link_cb(m):
url = m.group('url')
if b'://' in url:
result = m.group('before') + b'"' + join(proxy_url, url) + b'"'
else:
result = m.group('before') + b'"' + join(proxy_url, host_url, url) + b'"'
#print('replaced %r -> %r' % (m.group(0), result))
return result
data = _link_re_1.sub(fix_link_cb, data)
data = _link_re_2.sub(fix_link_cb, data)
return data
_link_re_1 = re.compile(br'''(?P<before>(href|src|action)\s*=\s*)(?P<quote>['"])(?P<url>[^#].*?)(?P=quote)''')
_link_re_2 = re.compile(br'''(?P<before>(href|src|action)\s*=\s*)(?P<url>[^'"#>][^ >]*)''')
drop_headers = ['transfer-encoding', 'set-cookie']
FORM = b"""<html><head>
<title>Web Proxy - gevent example</title></head><body>
<table width=60% height=100% align=center>
<tr height=30%><td align=center valign=bottom>Type in URL you want to visit and press Enter</td></tr>
<tr><td align=center valign=top>
<form action=/ method=post>
<input size=80 name=url value="http://www.gevent.org"/>
</form>
</td></tr>
</table></body></table>
"""
if __name__ == '__main__':
from gevent.pywsgi import WSGIServer
print('Serving on %s...' % (LISTEN,))
WSGIServer(LISTEN, application).serve_forever()
|
Next page: Example webpy.py