mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[core] Update redirect handling from yt-dlp
* Thx coletdjnz: https://github.com/yt-dlp/yt-dlp/pull/7094 * add test that redirected `POST` loses its `Content-Type`
This commit is contained in:
		| @@ -8,33 +8,160 @@ import sys | |||||||
| import unittest | import unittest | ||||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||||
|  |  | ||||||
|  | import gzip | ||||||
|  | import io | ||||||
|  | import ssl | ||||||
|  | import tempfile | ||||||
|  | import threading | ||||||
|  | import zlib | ||||||
|  |  | ||||||
|  | # avoid deprecated alias assertRaisesRegexp | ||||||
|  | if hasattr(unittest.TestCase, 'assertRaisesRegex'): | ||||||
|  |     unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex | ||||||
|  |  | ||||||
|  | try: | ||||||
|  |     import brotli | ||||||
|  | except ImportError: | ||||||
|  |     brotli = None | ||||||
|  | try: | ||||||
|  |     from urllib.request import pathname2url | ||||||
|  | except ImportError: | ||||||
|  |     from urllib import pathname2url | ||||||
|  |  | ||||||
|  | from youtube_dl.compat import ( | ||||||
|  |     compat_http_cookiejar_Cookie, | ||||||
|  |     compat_http_server, | ||||||
|  |     compat_str as str, | ||||||
|  |     compat_urllib_error, | ||||||
|  |     compat_urllib_HTTPError, | ||||||
|  |     compat_urllib_parse, | ||||||
|  |     compat_urllib_request, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | from youtube_dl.utils import ( | ||||||
|  |     sanitized_Request, | ||||||
|  |     urlencode_postdata, | ||||||
|  | ) | ||||||
|  |  | ||||||
| from test.helper import ( | from test.helper import ( | ||||||
|  |     FakeYDL, | ||||||
|     FakeLogger, |     FakeLogger, | ||||||
|     http_server_port, |     http_server_port, | ||||||
| ) | ) | ||||||
| from youtube_dl import YoutubeDL | from youtube_dl import YoutubeDL | ||||||
| from youtube_dl.compat import compat_http_server, compat_urllib_request |  | ||||||
| import ssl |  | ||||||
| import threading |  | ||||||
|  |  | ||||||
| TEST_DIR = os.path.dirname(os.path.abspath(__file__)) | TEST_DIR = os.path.dirname(os.path.abspath(__file__)) | ||||||
|  |  | ||||||
|  |  | ||||||
| class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): | class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): | ||||||
|  |     protocol_version = 'HTTP/1.1' | ||||||
|  |  | ||||||
|  |     # work-around old/new -style class inheritance | ||||||
|  |     def super(self, meth_name, *args, **kwargs): | ||||||
|  |         from types import MethodType | ||||||
|  |         try: | ||||||
|  |             super() | ||||||
|  |             fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k) | ||||||
|  |         except TypeError: | ||||||
|  |             fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k) | ||||||
|  |         self.super = MethodType(fn, self) | ||||||
|  |         return self.super(meth_name, *args, **kwargs) | ||||||
|  |  | ||||||
|     def log_message(self, format, *args): |     def log_message(self, format, *args): | ||||||
|         pass |         pass | ||||||
|  |  | ||||||
|  |     def _headers(self): | ||||||
|  |         payload = str(self.headers).encode('utf-8') | ||||||
|  |         self.send_response(200) | ||||||
|  |         self.send_header('Content-Type', 'application/json') | ||||||
|  |         self.send_header('Content-Length', str(len(payload))) | ||||||
|  |         self.end_headers() | ||||||
|  |         self.wfile.write(payload) | ||||||
|  |  | ||||||
|  |     def _redirect(self): | ||||||
|  |         self.send_response(int(self.path[len('/redirect_'):])) | ||||||
|  |         self.send_header('Location', '/method') | ||||||
|  |         self.send_header('Content-Length', '0') | ||||||
|  |         self.end_headers() | ||||||
|  |  | ||||||
|  |     def _method(self, method, payload=None): | ||||||
|  |         self.send_response(200) | ||||||
|  |         self.send_header('Content-Length', str(len(payload or ''))) | ||||||
|  |         self.send_header('Method', method) | ||||||
|  |         self.end_headers() | ||||||
|  |         if payload: | ||||||
|  |             self.wfile.write(payload) | ||||||
|  |  | ||||||
|  |     def _status(self, status): | ||||||
|  |         payload = '<html>{0} NOT FOUND</html>'.format(status).encode('utf-8') | ||||||
|  |         self.send_response(int(status)) | ||||||
|  |         self.send_header('Content-Type', 'text/html; charset=utf-8') | ||||||
|  |         self.send_header('Content-Length', str(len(payload))) | ||||||
|  |         self.end_headers() | ||||||
|  |         self.wfile.write(payload) | ||||||
|  |  | ||||||
|  |     def _read_data(self): | ||||||
|  |         if 'Content-Length' in self.headers: | ||||||
|  |             return self.rfile.read(int(self.headers['Content-Length'])) | ||||||
|  |  | ||||||
|  |     def _test_url(self, path, host='127.0.0.1', scheme='http', port=None): | ||||||
|  |         return '{0}://{1}:{2}/{3}'.format( | ||||||
|  |             scheme, host, | ||||||
|  |             port if port is not None | ||||||
|  |             else http_server_port(self.server), path) | ||||||
|  |  | ||||||
|  |     def do_POST(self): | ||||||
|  |         data = self._read_data() | ||||||
|  |         if self.path.startswith('/redirect_'): | ||||||
|  |             self._redirect() | ||||||
|  |         elif self.path.startswith('/method'): | ||||||
|  |             self._method('POST', data) | ||||||
|  |         elif self.path.startswith('/headers'): | ||||||
|  |             self._headers() | ||||||
|  |         else: | ||||||
|  |             self._status(404) | ||||||
|  |  | ||||||
|  |     def do_HEAD(self): | ||||||
|  |         if self.path.startswith('/redirect_'): | ||||||
|  |             self._redirect() | ||||||
|  |         elif self.path.startswith('/method'): | ||||||
|  |             self._method('HEAD') | ||||||
|  |         else: | ||||||
|  |             self._status(404) | ||||||
|  |  | ||||||
|  |     def do_PUT(self): | ||||||
|  |         data = self._read_data() | ||||||
|  |         if self.path.startswith('/redirect_'): | ||||||
|  |             self._redirect() | ||||||
|  |         elif self.path.startswith('/method'): | ||||||
|  |             self._method('PUT', data) | ||||||
|  |         else: | ||||||
|  |             self._status(404) | ||||||
|  |  | ||||||
|     def do_GET(self): |     def do_GET(self): | ||||||
|  |  | ||||||
|  |         def respond(payload=b'<html><video src="/vid.mp4" /></html>', | ||||||
|  |                     payload_type='text/html; charset=utf-8', | ||||||
|  |                     payload_encoding=None, | ||||||
|  |                     resp_code=200): | ||||||
|  |             self.send_response(resp_code) | ||||||
|  |             self.send_header('Content-Type', payload_type) | ||||||
|  |             if payload_encoding: | ||||||
|  |                 self.send_header('Content-Encoding', payload_encoding) | ||||||
|  |             self.send_header('Content-Length', str(len(payload)))  # required for persistent connections | ||||||
|  |             self.end_headers() | ||||||
|  |             self.wfile.write(payload) | ||||||
|  |  | ||||||
|  |         def gzip_compress(p): | ||||||
|  |             buf = io.BytesIO() | ||||||
|  |             with gzip.GzipFile(fileobj=buf, mode='wb') as f: | ||||||
|  |                 f.write(p) | ||||||
|  |             return buf.getvalue() | ||||||
|  |  | ||||||
|         if self.path == '/video.html': |         if self.path == '/video.html': | ||||||
|             self.send_response(200) |             respond() | ||||||
|             self.send_header('Content-Type', 'text/html; charset=utf-8') |  | ||||||
|             self.end_headers() |  | ||||||
|             self.wfile.write(b'<html><video src="/vid.mp4" /></html>') |  | ||||||
|         elif self.path == '/vid.mp4': |         elif self.path == '/vid.mp4': | ||||||
|             self.send_response(200) |             respond(b'\x00\x00\x00\x00\x20\x66\x74[video]', 'video/mp4') | ||||||
|             self.send_header('Content-Type', 'video/mp4') |  | ||||||
|             self.end_headers() |  | ||||||
|             self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]') |  | ||||||
|         elif self.path == '/302': |         elif self.path == '/302': | ||||||
|             if sys.version_info[0] == 3: |             if sys.version_info[0] == 3: | ||||||
|                 # XXX: Python 3 http server does not allow non-ASCII header values |                 # XXX: Python 3 http server does not allow non-ASCII header values | ||||||
| @@ -42,60 +169,284 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): | |||||||
|                 self.end_headers() |                 self.end_headers() | ||||||
|                 return |                 return | ||||||
|  |  | ||||||
|             new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server) |             new_url = self._test_url('中文.html') | ||||||
|             self.send_response(302) |             self.send_response(302) | ||||||
|             self.send_header(b'Location', new_url.encode('utf-8')) |             self.send_header(b'Location', new_url.encode('utf-8')) | ||||||
|             self.end_headers() |             self.end_headers() | ||||||
|         elif self.path == '/%E4%B8%AD%E6%96%87.html': |         elif self.path == '/%E4%B8%AD%E6%96%87.html': | ||||||
|             self.send_response(200) |             respond() | ||||||
|             self.send_header('Content-Type', 'text/html; charset=utf-8') |         elif self.path == '/%c7%9f': | ||||||
|  |             respond() | ||||||
|  |         elif self.path.startswith('/redirect_'): | ||||||
|  |             self._redirect() | ||||||
|  |         elif self.path.startswith('/method'): | ||||||
|  |             self._method('GET') | ||||||
|  |         elif self.path.startswith('/headers'): | ||||||
|  |             self._headers() | ||||||
|  |         elif self.path == '/trailing_garbage': | ||||||
|  |             payload = b'<html><video src="/vid.mp4" /></html>' | ||||||
|  |             compressed = gzip_compress(payload) + b'trailing garbage' | ||||||
|  |             respond(compressed, payload_encoding='gzip') | ||||||
|  |         elif self.path == '/302-non-ascii-redirect': | ||||||
|  |             new_url = self._test_url('中文.html') | ||||||
|  |             # actually respond with permanent redirect | ||||||
|  |             self.send_response(301) | ||||||
|  |             self.send_header('Location', new_url) | ||||||
|  |             self.send_header('Content-Length', '0') | ||||||
|             self.end_headers() |             self.end_headers() | ||||||
|             self.wfile.write(b'<html><video src="/vid.mp4" /></html>') |         elif self.path == '/content-encoding': | ||||||
|  |             encodings = self.headers.get('ytdl-encoding', '') | ||||||
|  |             payload = b'<html><video src="/vid.mp4" /></html>' | ||||||
|  |             for encoding in filter(None, (e.strip() for e in encodings.split(','))): | ||||||
|  |                 if encoding == 'br' and brotli: | ||||||
|  |                     payload = brotli.compress(payload) | ||||||
|  |                 elif encoding == 'gzip': | ||||||
|  |                     payload = gzip_compress(payload) | ||||||
|  |                 elif encoding == 'deflate': | ||||||
|  |                     payload = zlib.compress(payload) | ||||||
|  |                 elif encoding == 'unsupported': | ||||||
|  |                     payload = b'raw' | ||||||
|  |                     break | ||||||
|  |                 else: | ||||||
|  |                     self._status(415) | ||||||
|  |                     return | ||||||
|  |             respond(payload, payload_encoding=encodings) | ||||||
|  |  | ||||||
|         else: |         else: | ||||||
|             assert False |             self._status(404) | ||||||
|  |  | ||||||
|  |     def send_header(self, keyword, value): | ||||||
|  |         """ | ||||||
|  |         Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers. | ||||||
|  |         This is against what is defined in RFC 3986: but we need to test that we support this | ||||||
|  |         since some sites incorrectly do this. | ||||||
|  |         """ | ||||||
|  |         if keyword.lower() == 'connection': | ||||||
|  |             return self.super('send_header', keyword, value) | ||||||
|  |  | ||||||
|  |         if not hasattr(self, '_headers_buffer'): | ||||||
|  |             self._headers_buffer = [] | ||||||
|  |  | ||||||
|  |         self._headers_buffer.append('{0}: {1}\r\n'.format(keyword, value).encode('utf-8')) | ||||||
|  |  | ||||||
|  |     def end_headers(self): | ||||||
|  |         if hasattr(self, '_headers_buffer'): | ||||||
|  |             self.wfile.write(b''.join(self._headers_buffer)) | ||||||
|  |             self._headers_buffer = [] | ||||||
|  |         self.super('end_headers') | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestHTTP(unittest.TestCase): | class TestHTTP(unittest.TestCase): | ||||||
|     def setUp(self): |     def setUp(self): | ||||||
|         self.httpd = compat_http_server.HTTPServer( |         # HTTP server | ||||||
|  |         self.http_httpd = compat_http_server.HTTPServer( | ||||||
|             ('127.0.0.1', 0), HTTPTestRequestHandler) |             ('127.0.0.1', 0), HTTPTestRequestHandler) | ||||||
|         self.port = http_server_port(self.httpd) |         self.http_port = http_server_port(self.http_httpd) | ||||||
|         self.server_thread = threading.Thread(target=self.httpd.serve_forever) |  | ||||||
|         self.server_thread.daemon = True |  | ||||||
|         self.server_thread.start() |  | ||||||
|  |  | ||||||
|     def test_unicode_path_redirection(self): |         self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever) | ||||||
|         # XXX: Python 3 http server does not allow non-ASCII header values |         self.http_server_thread.daemon = True | ||||||
|         if sys.version_info[0] == 3: |         self.http_server_thread.start() | ||||||
|             return |  | ||||||
|  |  | ||||||
|         ydl = YoutubeDL({'logger': FakeLogger()}) |         try: | ||||||
|         r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port) |             from http.server import ThreadingHTTPServer | ||||||
|         self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port) |         except ImportError: | ||||||
|  |             try: | ||||||
|  |                 from socketserver import ThreadingMixIn | ||||||
|  |             except ImportError: | ||||||
|  |                 from SocketServer import ThreadingMixIn | ||||||
|  |  | ||||||
|  |             class ThreadingHTTPServer(ThreadingMixIn, compat_http_server.HTTPServer): | ||||||
|  |                 pass | ||||||
|  |  | ||||||
| class TestHTTPS(unittest.TestCase): |         # HTTPS server | ||||||
|     def setUp(self): |  | ||||||
|         certfn = os.path.join(TEST_DIR, 'testcert.pem') |         certfn = os.path.join(TEST_DIR, 'testcert.pem') | ||||||
|         self.httpd = compat_http_server.HTTPServer( |         self.https_httpd = ThreadingHTTPServer( | ||||||
|             ('127.0.0.1', 0), HTTPTestRequestHandler) |             ('127.0.0.1', 0), HTTPTestRequestHandler) | ||||||
|         self.httpd.socket = ssl.wrap_socket( |         try: | ||||||
|             self.httpd.socket, certfile=certfn, server_side=True) |             sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23) | ||||||
|         self.port = http_server_port(self.httpd) |             sslctx.verify_mode = ssl.CERT_NONE | ||||||
|         self.server_thread = threading.Thread(target=self.httpd.serve_forever) |             sslctx.check_hostname = False | ||||||
|         self.server_thread.daemon = True |             sslctx.load_cert_chain(certfn, None) | ||||||
|         self.server_thread.start() |             self.https_httpd.socket = sslctx.wrap_socket( | ||||||
|  |                 self.https_httpd.socket, server_side=True) | ||||||
|  |         except AttributeError: | ||||||
|  |             self.https_httpd.socket = ssl.wrap_socket( | ||||||
|  |                 self.https_httpd.socket, certfile=certfn, server_side=True) | ||||||
|  |  | ||||||
|  |         self.https_port = http_server_port(self.https_httpd) | ||||||
|  |         self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever) | ||||||
|  |         self.https_server_thread.daemon = True | ||||||
|  |         self.https_server_thread.start() | ||||||
|  |  | ||||||
|  |     def tearDown(self): | ||||||
|  |  | ||||||
|  |         def closer(svr): | ||||||
|  |             def _closer(): | ||||||
|  |                 svr.shutdown() | ||||||
|  |                 svr.server_close() | ||||||
|  |             return _closer | ||||||
|  |  | ||||||
|  |         shutdown_thread = threading.Thread(target=closer(self.http_httpd)) | ||||||
|  |         shutdown_thread.start() | ||||||
|  |         self.http_server_thread.join(2.0) | ||||||
|  |  | ||||||
|  |         shutdown_thread = threading.Thread(target=closer(self.https_httpd)) | ||||||
|  |         shutdown_thread.start() | ||||||
|  |         self.https_server_thread.join(2.0) | ||||||
|  |  | ||||||
|  |     def _test_url(self, path, host='127.0.0.1', scheme='http', port=None): | ||||||
|  |         return '{0}://{1}:{2}/{3}'.format( | ||||||
|  |             scheme, host, | ||||||
|  |             port if port is not None | ||||||
|  |             else self.https_port if scheme == 'https' | ||||||
|  |             else self.http_port, path) | ||||||
|  |  | ||||||
|     def test_nocheckcertificate(self): |     def test_nocheckcertificate(self): | ||||||
|         if sys.version_info >= (2, 7, 9):  # No certificate checking anyways |         with FakeYDL({'logger': FakeLogger()}) as ydl: | ||||||
|             ydl = YoutubeDL({'logger': FakeLogger()}) |             with self.assertRaises(compat_urllib_error.URLError): | ||||||
|             self.assertRaises( |                 ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https'))) | ||||||
|                 Exception, |  | ||||||
|                 ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port) |  | ||||||
|  |  | ||||||
|         ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) |         with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl: | ||||||
|         r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) |             r = ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https'))) | ||||||
|         self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) |             self.assertEqual(r.getcode(), 200) | ||||||
|  |             r.close() | ||||||
|  |  | ||||||
|  |     def test_percent_encode(self): | ||||||
|  |         with FakeYDL() as ydl: | ||||||
|  |             # Unicode characters should be encoded with uppercase percent-encoding | ||||||
|  |             res = ydl.urlopen(sanitized_Request(self._test_url('中文.html'))) | ||||||
|  |             self.assertEqual(res.getcode(), 200) | ||||||
|  |             res.close() | ||||||
|  |             # don't normalize existing percent encodings | ||||||
|  |             res = ydl.urlopen(sanitized_Request(self._test_url('%c7%9f'))) | ||||||
|  |             self.assertEqual(res.getcode(), 200) | ||||||
|  |             res.close() | ||||||
|  |  | ||||||
|  |     def test_unicode_path_redirection(self): | ||||||
|  |         with FakeYDL() as ydl: | ||||||
|  |             r = ydl.urlopen(sanitized_Request(self._test_url('302-non-ascii-redirect'))) | ||||||
|  |             self.assertEqual(r.url, self._test_url('%E4%B8%AD%E6%96%87.html')) | ||||||
|  |             r.close() | ||||||
|  |  | ||||||
|  |     def test_redirect(self): | ||||||
|  |         with FakeYDL() as ydl: | ||||||
|  |             def do_req(redirect_status, method, check_no_content=False): | ||||||
|  |                 data = b'testdata' if method in ('POST', 'PUT') else None | ||||||
|  |                 res = ydl.urlopen(sanitized_Request( | ||||||
|  |                     self._test_url('redirect_{0}'.format(redirect_status)), | ||||||
|  |                     method=method, data=data)) | ||||||
|  |                 if check_no_content: | ||||||
|  |                     self.assertNotIn('Content-Type', res.headers) | ||||||
|  |                 return res.read().decode('utf-8'), res.headers.get('method', '') | ||||||
|  |             # A 303 must either use GET or HEAD for subsequent request | ||||||
|  |             self.assertEqual(do_req(303, 'POST'), ('', 'GET')) | ||||||
|  |             self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD')) | ||||||
|  |  | ||||||
|  |             self.assertEqual(do_req(303, 'PUT'), ('', 'GET')) | ||||||
|  |  | ||||||
|  |             # 301 and 302 turn POST only into a GET, with no Content-Type | ||||||
|  |             self.assertEqual(do_req(301, 'POST', True), ('', 'GET')) | ||||||
|  |             self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD')) | ||||||
|  |             self.assertEqual(do_req(302, 'POST', True), ('', 'GET')) | ||||||
|  |             self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD')) | ||||||
|  |  | ||||||
|  |             self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT')) | ||||||
|  |             self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT')) | ||||||
|  |  | ||||||
|  |             # 307 and 308 should not change method | ||||||
|  |             for m in ('POST', 'PUT'): | ||||||
|  |                 self.assertEqual(do_req(307, m), ('testdata', m)) | ||||||
|  |                 self.assertEqual(do_req(308, m), ('testdata', m)) | ||||||
|  |  | ||||||
|  |             self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD')) | ||||||
|  |             self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD')) | ||||||
|  |  | ||||||
|  |             # These should not redirect and instead raise an HTTPError | ||||||
|  |             for code in (300, 304, 305, 306): | ||||||
|  |                 with self.assertRaises(compat_urllib_HTTPError): | ||||||
|  |                     do_req(code, 'GET') | ||||||
|  |  | ||||||
|  |     def test_content_type(self): | ||||||
|  |         # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28 | ||||||
|  |         with FakeYDL({'nocheckcertificate': True}) as ydl: | ||||||
|  |             # method should be auto-detected as POST | ||||||
|  |             r = sanitized_Request(self._test_url('headers', scheme='https'), data=urlencode_postdata({'test': 'test'})) | ||||||
|  |  | ||||||
|  |             headers = ydl.urlopen(r).read().decode('utf-8') | ||||||
|  |             self.assertIn('Content-Type: application/x-www-form-urlencoded', headers) | ||||||
|  |  | ||||||
|  |             # test http | ||||||
|  |             r = sanitized_Request(self._test_url('headers'), data=urlencode_postdata({'test': 'test'})) | ||||||
|  |             headers = ydl.urlopen(r).read().decode('utf-8') | ||||||
|  |             self.assertIn('Content-Type: application/x-www-form-urlencoded', headers) | ||||||
|  |  | ||||||
|  |     def test_cookiejar(self): | ||||||
|  |         with FakeYDL() as ydl: | ||||||
|  |             ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie( | ||||||
|  |                 0, 'test', 'ytdl', None, False, '127.0.0.1', True, | ||||||
|  |                 False, '/headers', True, False, None, False, None, None, {})) | ||||||
|  |             data = ydl.urlopen(sanitized_Request(self._test_url('headers'))).read() | ||||||
|  |             self.assertIn(b'Cookie: test=ytdl', data) | ||||||
|  |  | ||||||
|  |     def test_no_compression_compat_header(self): | ||||||
|  |         with FakeYDL() as ydl: | ||||||
|  |             data = ydl.urlopen( | ||||||
|  |                 sanitized_Request( | ||||||
|  |                     self._test_url('headers'), | ||||||
|  |                     headers={'Youtubedl-no-compression': True})).read() | ||||||
|  |             self.assertIn(b'Accept-Encoding: identity', data) | ||||||
|  |             self.assertNotIn(b'youtubedl-no-compression', data.lower()) | ||||||
|  |  | ||||||
|  |     def test_gzip_trailing_garbage(self): | ||||||
|  |         # https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5 | ||||||
|  |         # https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f | ||||||
|  |         with FakeYDL() as ydl: | ||||||
|  |             data = ydl.urlopen(sanitized_Request(self._test_url('trailing_garbage'))).read().decode('utf-8') | ||||||
|  |             self.assertEqual(data, '<html><video src="/vid.mp4" /></html>') | ||||||
|  |  | ||||||
|  |     def __test_compression(self, encoding): | ||||||
|  |         with FakeYDL() as ydl: | ||||||
|  |             res = ydl.urlopen( | ||||||
|  |                 sanitized_Request( | ||||||
|  |                     self._test_url('content-encoding'), | ||||||
|  |                     headers={'ytdl-encoding': encoding})) | ||||||
|  |             self.assertEqual(res.headers.get('Content-Encoding'), encoding) | ||||||
|  |             self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>') | ||||||
|  |  | ||||||
|  |     @unittest.skipUnless(brotli, 'brotli support is not installed') | ||||||
|  |     @unittest.expectedFailure | ||||||
|  |     def test_brotli(self): | ||||||
|  |         self.__test_compression('br') | ||||||
|  |  | ||||||
|  |     @unittest.expectedFailure | ||||||
|  |     def test_deflate(self): | ||||||
|  |         self.__test_compression('deflate') | ||||||
|  |  | ||||||
|  |     @unittest.expectedFailure | ||||||
|  |     def test_gzip(self): | ||||||
|  |         self.__test_compression('gzip') | ||||||
|  |  | ||||||
|  |     @unittest.expectedFailure  # not yet implemented | ||||||
|  |     def test_multiple_encodings(self): | ||||||
|  |         # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4 | ||||||
|  |         with FakeYDL() as ydl: | ||||||
|  |             for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'): | ||||||
|  |                 res = ydl.urlopen( | ||||||
|  |                     sanitized_Request( | ||||||
|  |                         self._test_url('content-encoding'), | ||||||
|  |                         headers={'ytdl-encoding': pair})) | ||||||
|  |                 self.assertEqual(res.headers.get('Content-Encoding'), pair) | ||||||
|  |                 self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>') | ||||||
|  |  | ||||||
|  |     def test_unsupported_encoding(self): | ||||||
|  |         # it should return the raw content | ||||||
|  |         with FakeYDL() as ydl: | ||||||
|  |             res = ydl.urlopen( | ||||||
|  |                 sanitized_Request( | ||||||
|  |                     self._test_url('content-encoding'), | ||||||
|  |                     headers={'ytdl-encoding': 'unsupported'})) | ||||||
|  |             self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported') | ||||||
|  |             self.assertEqual(res.read(), b'raw') | ||||||
|  |  | ||||||
|  |  | ||||||
| def _build_proxy_handler(name): | def _build_proxy_handler(name): | ||||||
| @@ -109,7 +460,7 @@ def _build_proxy_handler(name): | |||||||
|             self.send_response(200) |             self.send_response(200) | ||||||
|             self.send_header('Content-Type', 'text/plain; charset=utf-8') |             self.send_header('Content-Type', 'text/plain; charset=utf-8') | ||||||
|             self.end_headers() |             self.end_headers() | ||||||
|             self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8')) |             self.wfile.write('{0}: {1}'.format(self.proxy_name, self.path).encode('utf-8')) | ||||||
|     return HTTPTestRequestHandler |     return HTTPTestRequestHandler | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -129,10 +480,30 @@ class TestProxy(unittest.TestCase): | |||||||
|         self.geo_proxy_thread.daemon = True |         self.geo_proxy_thread.daemon = True | ||||||
|         self.geo_proxy_thread.start() |         self.geo_proxy_thread.start() | ||||||
|  |  | ||||||
|  |     def tearDown(self): | ||||||
|  |  | ||||||
|  |         def closer(svr): | ||||||
|  |             def _closer(): | ||||||
|  |                 svr.shutdown() | ||||||
|  |                 svr.server_close() | ||||||
|  |             return _closer | ||||||
|  |  | ||||||
|  |         shutdown_thread = threading.Thread(target=closer(self.proxy)) | ||||||
|  |         shutdown_thread.start() | ||||||
|  |         self.proxy_thread.join(2.0) | ||||||
|  |  | ||||||
|  |         shutdown_thread = threading.Thread(target=closer(self.geo_proxy)) | ||||||
|  |         shutdown_thread.start() | ||||||
|  |         self.geo_proxy_thread.join(2.0) | ||||||
|  |  | ||||||
|  |     def _test_proxy(self, host='127.0.0.1', port=None): | ||||||
|  |         return '{0}:{1}'.format( | ||||||
|  |             host, port if port is not None else self.port) | ||||||
|  |  | ||||||
|     def test_proxy(self): |     def test_proxy(self): | ||||||
|         geo_proxy = '127.0.0.1:{0}'.format(self.geo_port) |         geo_proxy = self._test_proxy(port=self.geo_port) | ||||||
|         ydl = YoutubeDL({ |         ydl = YoutubeDL({ | ||||||
|             'proxy': '127.0.0.1:{0}'.format(self.port), |             'proxy': self._test_proxy(), | ||||||
|             'geo_verification_proxy': geo_proxy, |             'geo_verification_proxy': geo_proxy, | ||||||
|         }) |         }) | ||||||
|         url = 'http://foo.com/bar' |         url = 'http://foo.com/bar' | ||||||
| @@ -146,7 +517,7 @@ class TestProxy(unittest.TestCase): | |||||||
|  |  | ||||||
|     def test_proxy_with_idn(self): |     def test_proxy_with_idn(self): | ||||||
|         ydl = YoutubeDL({ |         ydl = YoutubeDL({ | ||||||
|             'proxy': '127.0.0.1:{0}'.format(self.port), |             'proxy': self._test_proxy(), | ||||||
|         }) |         }) | ||||||
|         url = 'http://中文.tw/' |         url = 'http://中文.tw/' | ||||||
|         response = ydl.urlopen(url).read().decode('utf-8') |         response = ydl.urlopen(url).read().decode('utf-8') | ||||||
| @@ -154,5 +525,25 @@ class TestProxy(unittest.TestCase): | |||||||
|         self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') |         self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class TestFileURL(unittest.TestCase): | ||||||
|  |     # See https://github.com/ytdl-org/youtube-dl/issues/8227 | ||||||
|  |     def test_file_urls(self): | ||||||
|  |         tf = tempfile.NamedTemporaryFile(delete=False) | ||||||
|  |         tf.write(b'foobar') | ||||||
|  |         tf.close() | ||||||
|  |         url = compat_urllib_parse.urljoin('file://', pathname2url(tf.name)) | ||||||
|  |         with FakeYDL() as ydl: | ||||||
|  |             self.assertRaisesRegexp( | ||||||
|  |                 compat_urllib_error.URLError, 'file:// scheme is explicitly disabled in youtube-dl for security reasons', ydl.urlopen, url) | ||||||
|  |         # not yet implemented | ||||||
|  |         """ | ||||||
|  |         with FakeYDL({'enable_file_urls': True}) as ydl: | ||||||
|  |             res = ydl.urlopen(url) | ||||||
|  |             self.assertEqual(res.read(), b'foobar') | ||||||
|  |             res.close() | ||||||
|  |         """ | ||||||
|  |         os.unlink(tf.name) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -41,7 +41,6 @@ import zlib | |||||||
| from .compat import ( | from .compat import ( | ||||||
|     compat_HTMLParseError, |     compat_HTMLParseError, | ||||||
|     compat_HTMLParser, |     compat_HTMLParser, | ||||||
|     compat_HTTPError, |  | ||||||
|     compat_basestring, |     compat_basestring, | ||||||
|     compat_casefold, |     compat_casefold, | ||||||
|     compat_chr, |     compat_chr, | ||||||
| @@ -64,6 +63,7 @@ from .compat import ( | |||||||
|     compat_struct_pack, |     compat_struct_pack, | ||||||
|     compat_struct_unpack, |     compat_struct_unpack, | ||||||
|     compat_urllib_error, |     compat_urllib_error, | ||||||
|  |     compat_urllib_HTTPError, | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|     compat_urllib_parse_parse_qs as compat_parse_qs, |     compat_urllib_parse_parse_qs as compat_parse_qs, | ||||||
|     compat_urllib_parse_urlencode, |     compat_urllib_parse_urlencode, | ||||||
| @@ -2614,7 +2614,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | |||||||
|  |  | ||||||
|     Part of this code was copied from: |     Part of this code was copied from: | ||||||
|  |  | ||||||
|     http://techknack.net/python-urllib2-handlers/ |     http://techknack.net/python-urllib2-handlers/, archived at | ||||||
|  |     https://web.archive.org/web/20130527205558/http://techknack.net/python-urllib2-handlers/ | ||||||
|  |  | ||||||
|     Andrew Rowls, the author of that code, agreed to release it to the |     Andrew Rowls, the author of that code, agreed to release it to the | ||||||
|     public domain. |     public domain. | ||||||
| @@ -2672,7 +2673,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | |||||||
|             req._Request__original = req._Request__original.partition('#')[0] |             req._Request__original = req._Request__original.partition('#')[0] | ||||||
|             req._Request__r_type = req._Request__r_type.partition('#')[0] |             req._Request__r_type = req._Request__r_type.partition('#')[0] | ||||||
|  |  | ||||||
|         return req |         # Use the totally undocumented AbstractHTTPHandler per | ||||||
|  |         # https://github.com/yt-dlp/yt-dlp/pull/4158 | ||||||
|  |         return compat_urllib_request.AbstractHTTPHandler.do_request_(self, req) | ||||||
|  |  | ||||||
|     def http_response(self, req, resp): |     def http_response(self, req, resp): | ||||||
|         old_resp = resp |         old_resp = resp | ||||||
| @@ -2683,7 +2686,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | |||||||
|             try: |             try: | ||||||
|                 uncompressed = io.BytesIO(gz.read()) |                 uncompressed = io.BytesIO(gz.read()) | ||||||
|             except IOError as original_ioerror: |             except IOError as original_ioerror: | ||||||
|                 # There may be junk add the end of the file |                 # There may be junk at the end of the file | ||||||
|                 # See http://stackoverflow.com/q/4928560/35070 for details |                 # See http://stackoverflow.com/q/4928560/35070 for details | ||||||
|                 for i in range(1, 1024): |                 for i in range(1, 1024): | ||||||
|                     try: |                     try: | ||||||
| @@ -2710,9 +2713,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | |||||||
|             if location: |             if location: | ||||||
|                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 |                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 | ||||||
|                 if sys.version_info >= (3, 0): |                 if sys.version_info >= (3, 0): | ||||||
|                     location = location.encode('iso-8859-1').decode('utf-8') |                     location = location.encode('iso-8859-1') | ||||||
|                 else: |                 location = location.decode('utf-8') | ||||||
|                     location = location.decode('utf-8') |  | ||||||
|                 location_escaped = escape_url(location) |                 location_escaped = escape_url(location) | ||||||
|                 if location != location_escaped: |                 if location != location_escaped: | ||||||
|                     del resp.headers['Location'] |                     del resp.headers['Location'] | ||||||
| @@ -2940,17 +2942,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): | |||||||
|  |  | ||||||
|     The code is based on HTTPRedirectHandler implementation from CPython [1]. |     The code is based on HTTPRedirectHandler implementation from CPython [1]. | ||||||
|  |  | ||||||
|     This redirect handler solves two issues: |     This redirect handler fixes and improves the logic to better align with RFC7261 | ||||||
|      - ensures redirect URL is always unicode under python 2 |     and what browsers tend to do [2][3] | ||||||
|      - introduces support for experimental HTTP response status code |  | ||||||
|        308 Permanent Redirect [2] used by some sites [3] |  | ||||||
|  |  | ||||||
|     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py |     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py | ||||||
|     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308 |     2. https://datatracker.ietf.org/doc/html/rfc7231 | ||||||
|     3. https://github.com/ytdl-org/youtube-dl/issues/28768 |     3. https://github.com/python/cpython/issues/91306 | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302 |     # Supply possibly missing alias | ||||||
|  |     http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302 | ||||||
|  |  | ||||||
|     def redirect_request(self, req, fp, code, msg, headers, newurl): |     def redirect_request(self, req, fp, code, msg, headers, newurl): | ||||||
|         """Return a Request or None in response to a redirect. |         """Return a Request or None in response to a redirect. | ||||||
| @@ -2962,19 +2963,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): | |||||||
|         else should try to handle this url.  Return None if you can't |         else should try to handle this url.  Return None if you can't | ||||||
|         but another Handler might. |         but another Handler might. | ||||||
|         """ |         """ | ||||||
|         m = req.get_method() |         if code not in (301, 302, 303, 307, 308): | ||||||
|         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD") |             raise compat_urllib_HTTPError(req.full_url, code, msg, headers, fp) | ||||||
|                  or code in (301, 302, 303) and m == "POST")): |  | ||||||
|             raise compat_HTTPError(req.full_url, code, msg, headers, fp) |         new_method = req.get_method() | ||||||
|         # Strictly (according to RFC 2616), 301 or 302 in response to |         new_data = req.data | ||||||
|         # a POST MUST NOT cause a redirection without confirmation |         remove_headers = [] | ||||||
|         # from the user (of urllib.request, in this case).  In practice, |  | ||||||
|         # essentially all clients do redirect in this case, so we do |  | ||||||
|         # the same. |  | ||||||
|  |  | ||||||
|         # On python 2 urlh.geturl() may sometimes return redirect URL |         # On python 2 urlh.geturl() may sometimes return redirect URL | ||||||
|         # as byte string instead of unicode. This workaround allows |         # as a byte string instead of unicode. This workaround forces | ||||||
|         # to force it always return unicode. |         # it to return unicode. | ||||||
|         if sys.version_info[0] < 3: |         if sys.version_info[0] < 3: | ||||||
|             newurl = compat_str(newurl) |             newurl = compat_str(newurl) | ||||||
|  |  | ||||||
| @@ -2983,13 +2981,29 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): | |||||||
|         # but it is kept for compatibility with other callers. |         # but it is kept for compatibility with other callers. | ||||||
|         newurl = newurl.replace(' ', '%20') |         newurl = newurl.replace(' ', '%20') | ||||||
|  |  | ||||||
|         CONTENT_HEADERS = ("content-length", "content-type") |         # A 303 must either use GET or HEAD for subsequent request | ||||||
|  |         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4 | ||||||
|  |         if code == 303 and req.get_method() != 'HEAD': | ||||||
|  |             new_method = 'GET' | ||||||
|  |         # 301 and 302 redirects are commonly turned into a GET from a POST | ||||||
|  |         # for subsequent requests by browsers, so we'll do the same. | ||||||
|  |         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2 | ||||||
|  |         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3 | ||||||
|  |         elif code in (301, 302) and req.get_method() == 'POST': | ||||||
|  |             new_method = 'GET' | ||||||
|  |  | ||||||
|  |         # only remove payload if method changed (e.g. POST to GET) | ||||||
|  |         if new_method != req.get_method(): | ||||||
|  |             new_data = None | ||||||
|  |             remove_headers.extend(['Content-Length', 'Content-Type']) | ||||||
|  |  | ||||||
|         # NB: don't use dict comprehension for python 2.6 compatibility |         # NB: don't use dict comprehension for python 2.6 compatibility | ||||||
|         newheaders = dict((k, v) for k, v in req.headers.items() |         new_headers = dict((k, v) for k, v in req.header_items() | ||||||
|                           if k.lower() not in CONTENT_HEADERS) |                            if k.lower() not in remove_headers) | ||||||
|  |  | ||||||
|         return compat_urllib_request.Request( |         return compat_urllib_request.Request( | ||||||
|             newurl, headers=newheaders, origin_req_host=req.origin_req_host, |             newurl, headers=new_headers, origin_req_host=req.origin_req_host, | ||||||
|             unverifiable=True) |             unverifiable=True, method=new_method, data=new_data) | ||||||
|  |  | ||||||
|  |  | ||||||
| def extract_timezone(date_str): | def extract_timezone(date_str): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user