mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[core] Update redirect handling from yt-dlp
* Thx coletdjnz: https://github.com/yt-dlp/yt-dlp/pull/7094 * add test that redirected `POST` loses its `Content-Type`
This commit is contained in:
		| @@ -8,33 +8,160 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| import gzip | ||||
| import io | ||||
| import ssl | ||||
| import tempfile | ||||
| import threading | ||||
| import zlib | ||||
|  | ||||
| # avoid deprecated alias assertRaisesRegexp | ||||
| if hasattr(unittest.TestCase, 'assertRaisesRegex'): | ||||
|     unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex | ||||
|  | ||||
| try: | ||||
|     import brotli | ||||
| except ImportError: | ||||
|     brotli = None | ||||
| try: | ||||
|     from urllib.request import pathname2url | ||||
| except ImportError: | ||||
|     from urllib import pathname2url | ||||
|  | ||||
| from youtube_dl.compat import ( | ||||
|     compat_http_cookiejar_Cookie, | ||||
|     compat_http_server, | ||||
|     compat_str as str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_HTTPError, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
| from youtube_dl.utils import ( | ||||
|     sanitized_Request, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
| from test.helper import ( | ||||
|     FakeYDL, | ||||
|     FakeLogger, | ||||
|     http_server_port, | ||||
| ) | ||||
| from youtube_dl import YoutubeDL | ||||
| from youtube_dl.compat import compat_http_server, compat_urllib_request | ||||
| import ssl | ||||
| import threading | ||||
|  | ||||
| TEST_DIR = os.path.dirname(os.path.abspath(__file__)) | ||||
|  | ||||
|  | ||||
| class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): | ||||
|     protocol_version = 'HTTP/1.1' | ||||
|  | ||||
|     # work-around old/new -style class inheritance | ||||
|     def super(self, meth_name, *args, **kwargs): | ||||
|         from types import MethodType | ||||
|         try: | ||||
|             super() | ||||
|             fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k) | ||||
|         except TypeError: | ||||
|             fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k) | ||||
|         self.super = MethodType(fn, self) | ||||
|         return self.super(meth_name, *args, **kwargs) | ||||
|  | ||||
|     def log_message(self, format, *args): | ||||
|         pass | ||||
|  | ||||
|     def do_GET(self): | ||||
|         if self.path == '/video.html': | ||||
|     def _headers(self): | ||||
|         payload = str(self.headers).encode('utf-8') | ||||
|         self.send_response(200) | ||||
|         self.send_header('Content-Type', 'application/json') | ||||
|         self.send_header('Content-Length', str(len(payload))) | ||||
|         self.end_headers() | ||||
|         self.wfile.write(payload) | ||||
|  | ||||
|     def _redirect(self): | ||||
|         self.send_response(int(self.path[len('/redirect_'):])) | ||||
|         self.send_header('Location', '/method') | ||||
|         self.send_header('Content-Length', '0') | ||||
|         self.end_headers() | ||||
|  | ||||
|     def _method(self, method, payload=None): | ||||
|         self.send_response(200) | ||||
|         self.send_header('Content-Length', str(len(payload or ''))) | ||||
|         self.send_header('Method', method) | ||||
|         self.end_headers() | ||||
|         if payload: | ||||
|             self.wfile.write(payload) | ||||
|  | ||||
|     def _status(self, status): | ||||
|         payload = '<html>{0} NOT FOUND</html>'.format(status).encode('utf-8') | ||||
|         self.send_response(int(status)) | ||||
|         self.send_header('Content-Type', 'text/html; charset=utf-8') | ||||
|         self.send_header('Content-Length', str(len(payload))) | ||||
|         self.end_headers() | ||||
|             self.wfile.write(b'<html><video src="/vid.mp4" /></html>') | ||||
|         self.wfile.write(payload) | ||||
|  | ||||
|     def _read_data(self): | ||||
|         if 'Content-Length' in self.headers: | ||||
|             return self.rfile.read(int(self.headers['Content-Length'])) | ||||
|  | ||||
|     def _test_url(self, path, host='127.0.0.1', scheme='http', port=None): | ||||
|         return '{0}://{1}:{2}/{3}'.format( | ||||
|             scheme, host, | ||||
|             port if port is not None | ||||
|             else http_server_port(self.server), path) | ||||
|  | ||||
|     def do_POST(self): | ||||
|         data = self._read_data() | ||||
|         if self.path.startswith('/redirect_'): | ||||
|             self._redirect() | ||||
|         elif self.path.startswith('/method'): | ||||
|             self._method('POST', data) | ||||
|         elif self.path.startswith('/headers'): | ||||
|             self._headers() | ||||
|         else: | ||||
|             self._status(404) | ||||
|  | ||||
|     def do_HEAD(self): | ||||
|         if self.path.startswith('/redirect_'): | ||||
|             self._redirect() | ||||
|         elif self.path.startswith('/method'): | ||||
|             self._method('HEAD') | ||||
|         else: | ||||
|             self._status(404) | ||||
|  | ||||
|     def do_PUT(self): | ||||
|         data = self._read_data() | ||||
|         if self.path.startswith('/redirect_'): | ||||
|             self._redirect() | ||||
|         elif self.path.startswith('/method'): | ||||
|             self._method('PUT', data) | ||||
|         else: | ||||
|             self._status(404) | ||||
|  | ||||
|     def do_GET(self): | ||||
|  | ||||
|         def respond(payload=b'<html><video src="/vid.mp4" /></html>', | ||||
|                     payload_type='text/html; charset=utf-8', | ||||
|                     payload_encoding=None, | ||||
|                     resp_code=200): | ||||
|             self.send_response(resp_code) | ||||
|             self.send_header('Content-Type', payload_type) | ||||
|             if payload_encoding: | ||||
|                 self.send_header('Content-Encoding', payload_encoding) | ||||
|             self.send_header('Content-Length', str(len(payload)))  # required for persistent connections | ||||
|             self.end_headers() | ||||
|             self.wfile.write(payload) | ||||
|  | ||||
|         def gzip_compress(p): | ||||
|             buf = io.BytesIO() | ||||
|             with gzip.GzipFile(fileobj=buf, mode='wb') as f: | ||||
|                 f.write(p) | ||||
|             return buf.getvalue() | ||||
|  | ||||
|         if self.path == '/video.html': | ||||
|             respond() | ||||
|         elif self.path == '/vid.mp4': | ||||
|             self.send_response(200) | ||||
|             self.send_header('Content-Type', 'video/mp4') | ||||
|             self.end_headers() | ||||
|             self.wfile.write(b'\x00\x00\x00\x00\x20\x66\x74[video]') | ||||
|             respond(b'\x00\x00\x00\x00\x20\x66\x74[video]', 'video/mp4') | ||||
|         elif self.path == '/302': | ||||
|             if sys.version_info[0] == 3: | ||||
|                 # XXX: Python 3 http server does not allow non-ASCII header values | ||||
| @@ -42,60 +169,284 @@ class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): | ||||
|                 self.end_headers() | ||||
|                 return | ||||
|  | ||||
|             new_url = 'http://127.0.0.1:%d/中文.html' % http_server_port(self.server) | ||||
|             new_url = self._test_url('中文.html') | ||||
|             self.send_response(302) | ||||
|             self.send_header(b'Location', new_url.encode('utf-8')) | ||||
|             self.end_headers() | ||||
|         elif self.path == '/%E4%B8%AD%E6%96%87.html': | ||||
|             self.send_response(200) | ||||
|             self.send_header('Content-Type', 'text/html; charset=utf-8') | ||||
|             respond() | ||||
|         elif self.path == '/%c7%9f': | ||||
|             respond() | ||||
|         elif self.path.startswith('/redirect_'): | ||||
|             self._redirect() | ||||
|         elif self.path.startswith('/method'): | ||||
|             self._method('GET') | ||||
|         elif self.path.startswith('/headers'): | ||||
|             self._headers() | ||||
|         elif self.path == '/trailing_garbage': | ||||
|             payload = b'<html><video src="/vid.mp4" /></html>' | ||||
|             compressed = gzip_compress(payload) + b'trailing garbage' | ||||
|             respond(compressed, payload_encoding='gzip') | ||||
|         elif self.path == '/302-non-ascii-redirect': | ||||
|             new_url = self._test_url('中文.html') | ||||
|             # actually respond with permanent redirect | ||||
|             self.send_response(301) | ||||
|             self.send_header('Location', new_url) | ||||
|             self.send_header('Content-Length', '0') | ||||
|             self.end_headers() | ||||
|             self.wfile.write(b'<html><video src="/vid.mp4" /></html>') | ||||
|         elif self.path == '/content-encoding': | ||||
|             encodings = self.headers.get('ytdl-encoding', '') | ||||
|             payload = b'<html><video src="/vid.mp4" /></html>' | ||||
|             for encoding in filter(None, (e.strip() for e in encodings.split(','))): | ||||
|                 if encoding == 'br' and brotli: | ||||
|                     payload = brotli.compress(payload) | ||||
|                 elif encoding == 'gzip': | ||||
|                     payload = gzip_compress(payload) | ||||
|                 elif encoding == 'deflate': | ||||
|                     payload = zlib.compress(payload) | ||||
|                 elif encoding == 'unsupported': | ||||
|                     payload = b'raw' | ||||
|                     break | ||||
|                 else: | ||||
|             assert False | ||||
|                     self._status(415) | ||||
|                     return | ||||
|             respond(payload, payload_encoding=encodings) | ||||
|  | ||||
|         else: | ||||
|             self._status(404) | ||||
|  | ||||
|     def send_header(self, keyword, value): | ||||
|         """ | ||||
|         Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers. | ||||
|         This is against what is defined in RFC 3986: but we need to test that we support this | ||||
|         since some sites incorrectly do this. | ||||
|         """ | ||||
|         if keyword.lower() == 'connection': | ||||
|             return self.super('send_header', keyword, value) | ||||
|  | ||||
|         if not hasattr(self, '_headers_buffer'): | ||||
|             self._headers_buffer = [] | ||||
|  | ||||
|         self._headers_buffer.append('{0}: {1}\r\n'.format(keyword, value).encode('utf-8')) | ||||
|  | ||||
|     def end_headers(self): | ||||
|         if hasattr(self, '_headers_buffer'): | ||||
|             self.wfile.write(b''.join(self._headers_buffer)) | ||||
|             self._headers_buffer = [] | ||||
|         self.super('end_headers') | ||||
|  | ||||
|  | ||||
| class TestHTTP(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         self.httpd = compat_http_server.HTTPServer( | ||||
|         # HTTP server | ||||
|         self.http_httpd = compat_http_server.HTTPServer( | ||||
|             ('127.0.0.1', 0), HTTPTestRequestHandler) | ||||
|         self.port = http_server_port(self.httpd) | ||||
|         self.server_thread = threading.Thread(target=self.httpd.serve_forever) | ||||
|         self.server_thread.daemon = True | ||||
|         self.server_thread.start() | ||||
|         self.http_port = http_server_port(self.http_httpd) | ||||
|  | ||||
|     def test_unicode_path_redirection(self): | ||||
|         # XXX: Python 3 http server does not allow non-ASCII header values | ||||
|         if sys.version_info[0] == 3: | ||||
|             return | ||||
|         self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever) | ||||
|         self.http_server_thread.daemon = True | ||||
|         self.http_server_thread.start() | ||||
|  | ||||
|         ydl = YoutubeDL({'logger': FakeLogger()}) | ||||
|         r = ydl.extract_info('http://127.0.0.1:%d/302' % self.port) | ||||
|         self.assertEqual(r['entries'][0]['url'], 'http://127.0.0.1:%d/vid.mp4' % self.port) | ||||
|         try: | ||||
|             from http.server import ThreadingHTTPServer | ||||
|         except ImportError: | ||||
|             try: | ||||
|                 from socketserver import ThreadingMixIn | ||||
|             except ImportError: | ||||
|                 from SocketServer import ThreadingMixIn | ||||
|  | ||||
|             class ThreadingHTTPServer(ThreadingMixIn, compat_http_server.HTTPServer): | ||||
|                 pass | ||||
|  | ||||
| class TestHTTPS(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         # HTTPS server | ||||
|         certfn = os.path.join(TEST_DIR, 'testcert.pem') | ||||
|         self.httpd = compat_http_server.HTTPServer( | ||||
|         self.https_httpd = ThreadingHTTPServer( | ||||
|             ('127.0.0.1', 0), HTTPTestRequestHandler) | ||||
|         self.httpd.socket = ssl.wrap_socket( | ||||
|             self.httpd.socket, certfile=certfn, server_side=True) | ||||
|         self.port = http_server_port(self.httpd) | ||||
|         self.server_thread = threading.Thread(target=self.httpd.serve_forever) | ||||
|         self.server_thread.daemon = True | ||||
|         self.server_thread.start() | ||||
|         try: | ||||
|             sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23) | ||||
|             sslctx.verify_mode = ssl.CERT_NONE | ||||
|             sslctx.check_hostname = False | ||||
|             sslctx.load_cert_chain(certfn, None) | ||||
|             self.https_httpd.socket = sslctx.wrap_socket( | ||||
|                 self.https_httpd.socket, server_side=True) | ||||
|         except AttributeError: | ||||
|             self.https_httpd.socket = ssl.wrap_socket( | ||||
|                 self.https_httpd.socket, certfile=certfn, server_side=True) | ||||
|  | ||||
|         self.https_port = http_server_port(self.https_httpd) | ||||
|         self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever) | ||||
|         self.https_server_thread.daemon = True | ||||
|         self.https_server_thread.start() | ||||
|  | ||||
|     def tearDown(self): | ||||
|  | ||||
|         def closer(svr): | ||||
|             def _closer(): | ||||
|                 svr.shutdown() | ||||
|                 svr.server_close() | ||||
|             return _closer | ||||
|  | ||||
|         shutdown_thread = threading.Thread(target=closer(self.http_httpd)) | ||||
|         shutdown_thread.start() | ||||
|         self.http_server_thread.join(2.0) | ||||
|  | ||||
|         shutdown_thread = threading.Thread(target=closer(self.https_httpd)) | ||||
|         shutdown_thread.start() | ||||
|         self.https_server_thread.join(2.0) | ||||
|  | ||||
|     def _test_url(self, path, host='127.0.0.1', scheme='http', port=None): | ||||
|         return '{0}://{1}:{2}/{3}'.format( | ||||
|             scheme, host, | ||||
|             port if port is not None | ||||
|             else self.https_port if scheme == 'https' | ||||
|             else self.http_port, path) | ||||
|  | ||||
|     def test_nocheckcertificate(self): | ||||
|         if sys.version_info >= (2, 7, 9):  # No certificate checking anyways | ||||
|             ydl = YoutubeDL({'logger': FakeLogger()}) | ||||
|             self.assertRaises( | ||||
|                 Exception, | ||||
|                 ydl.extract_info, 'https://127.0.0.1:%d/video.html' % self.port) | ||||
|         with FakeYDL({'logger': FakeLogger()}) as ydl: | ||||
|             with self.assertRaises(compat_urllib_error.URLError): | ||||
|                 ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https'))) | ||||
|  | ||||
|         ydl = YoutubeDL({'logger': FakeLogger(), 'nocheckcertificate': True}) | ||||
|         r = ydl.extract_info('https://127.0.0.1:%d/video.html' % self.port) | ||||
|         self.assertEqual(r['entries'][0]['url'], 'https://127.0.0.1:%d/vid.mp4' % self.port) | ||||
|         with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl: | ||||
|             r = ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https'))) | ||||
|             self.assertEqual(r.getcode(), 200) | ||||
|             r.close() | ||||
|  | ||||
|     def test_percent_encode(self): | ||||
|         with FakeYDL() as ydl: | ||||
|             # Unicode characters should be encoded with uppercase percent-encoding | ||||
|             res = ydl.urlopen(sanitized_Request(self._test_url('中文.html'))) | ||||
|             self.assertEqual(res.getcode(), 200) | ||||
|             res.close() | ||||
|             # don't normalize existing percent encodings | ||||
|             res = ydl.urlopen(sanitized_Request(self._test_url('%c7%9f'))) | ||||
|             self.assertEqual(res.getcode(), 200) | ||||
|             res.close() | ||||
|  | ||||
|     def test_unicode_path_redirection(self): | ||||
|         with FakeYDL() as ydl: | ||||
|             r = ydl.urlopen(sanitized_Request(self._test_url('302-non-ascii-redirect'))) | ||||
|             self.assertEqual(r.url, self._test_url('%E4%B8%AD%E6%96%87.html')) | ||||
|             r.close() | ||||
|  | ||||
|     def test_redirect(self): | ||||
|         with FakeYDL() as ydl: | ||||
|             def do_req(redirect_status, method, check_no_content=False): | ||||
|                 data = b'testdata' if method in ('POST', 'PUT') else None | ||||
|                 res = ydl.urlopen(sanitized_Request( | ||||
|                     self._test_url('redirect_{0}'.format(redirect_status)), | ||||
|                     method=method, data=data)) | ||||
|                 if check_no_content: | ||||
|                     self.assertNotIn('Content-Type', res.headers) | ||||
|                 return res.read().decode('utf-8'), res.headers.get('method', '') | ||||
|             # A 303 must either use GET or HEAD for subsequent request | ||||
|             self.assertEqual(do_req(303, 'POST'), ('', 'GET')) | ||||
|             self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD')) | ||||
|  | ||||
|             self.assertEqual(do_req(303, 'PUT'), ('', 'GET')) | ||||
|  | ||||
|             # 301 and 302 turn POST only into a GET, with no Content-Type | ||||
|             self.assertEqual(do_req(301, 'POST', True), ('', 'GET')) | ||||
|             self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD')) | ||||
|             self.assertEqual(do_req(302, 'POST', True), ('', 'GET')) | ||||
|             self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD')) | ||||
|  | ||||
|             self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT')) | ||||
|             self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT')) | ||||
|  | ||||
|             # 307 and 308 should not change method | ||||
|             for m in ('POST', 'PUT'): | ||||
|                 self.assertEqual(do_req(307, m), ('testdata', m)) | ||||
|                 self.assertEqual(do_req(308, m), ('testdata', m)) | ||||
|  | ||||
|             self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD')) | ||||
|             self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD')) | ||||
|  | ||||
|             # These should not redirect and instead raise an HTTPError | ||||
|             for code in (300, 304, 305, 306): | ||||
|                 with self.assertRaises(compat_urllib_HTTPError): | ||||
|                     do_req(code, 'GET') | ||||
|  | ||||
|     def test_content_type(self): | ||||
|         # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28 | ||||
|         with FakeYDL({'nocheckcertificate': True}) as ydl: | ||||
|             # method should be auto-detected as POST | ||||
|             r = sanitized_Request(self._test_url('headers', scheme='https'), data=urlencode_postdata({'test': 'test'})) | ||||
|  | ||||
|             headers = ydl.urlopen(r).read().decode('utf-8') | ||||
|             self.assertIn('Content-Type: application/x-www-form-urlencoded', headers) | ||||
|  | ||||
|             # test http | ||||
|             r = sanitized_Request(self._test_url('headers'), data=urlencode_postdata({'test': 'test'})) | ||||
|             headers = ydl.urlopen(r).read().decode('utf-8') | ||||
|             self.assertIn('Content-Type: application/x-www-form-urlencoded', headers) | ||||
|  | ||||
|     def test_cookiejar(self): | ||||
|         with FakeYDL() as ydl: | ||||
|             ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie( | ||||
|                 0, 'test', 'ytdl', None, False, '127.0.0.1', True, | ||||
|                 False, '/headers', True, False, None, False, None, None, {})) | ||||
|             data = ydl.urlopen(sanitized_Request(self._test_url('headers'))).read() | ||||
|             self.assertIn(b'Cookie: test=ytdl', data) | ||||
|  | ||||
|     def test_no_compression_compat_header(self): | ||||
|         with FakeYDL() as ydl: | ||||
|             data = ydl.urlopen( | ||||
|                 sanitized_Request( | ||||
|                     self._test_url('headers'), | ||||
|                     headers={'Youtubedl-no-compression': True})).read() | ||||
|             self.assertIn(b'Accept-Encoding: identity', data) | ||||
|             self.assertNotIn(b'youtubedl-no-compression', data.lower()) | ||||
|  | ||||
|     def test_gzip_trailing_garbage(self): | ||||
|         # https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5 | ||||
|         # https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f | ||||
|         with FakeYDL() as ydl: | ||||
|             data = ydl.urlopen(sanitized_Request(self._test_url('trailing_garbage'))).read().decode('utf-8') | ||||
|             self.assertEqual(data, '<html><video src="/vid.mp4" /></html>') | ||||
|  | ||||
|     def __test_compression(self, encoding): | ||||
|         with FakeYDL() as ydl: | ||||
|             res = ydl.urlopen( | ||||
|                 sanitized_Request( | ||||
|                     self._test_url('content-encoding'), | ||||
|                     headers={'ytdl-encoding': encoding})) | ||||
|             self.assertEqual(res.headers.get('Content-Encoding'), encoding) | ||||
|             self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>') | ||||
|  | ||||
|     @unittest.skipUnless(brotli, 'brotli support is not installed') | ||||
|     @unittest.expectedFailure | ||||
|     def test_brotli(self): | ||||
|         self.__test_compression('br') | ||||
|  | ||||
|     @unittest.expectedFailure | ||||
|     def test_deflate(self): | ||||
|         self.__test_compression('deflate') | ||||
|  | ||||
|     @unittest.expectedFailure | ||||
|     def test_gzip(self): | ||||
|         self.__test_compression('gzip') | ||||
|  | ||||
|     @unittest.expectedFailure  # not yet implemented | ||||
|     def test_multiple_encodings(self): | ||||
|         # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4 | ||||
|         with FakeYDL() as ydl: | ||||
|             for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'): | ||||
|                 res = ydl.urlopen( | ||||
|                     sanitized_Request( | ||||
|                         self._test_url('content-encoding'), | ||||
|                         headers={'ytdl-encoding': pair})) | ||||
|                 self.assertEqual(res.headers.get('Content-Encoding'), pair) | ||||
|                 self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>') | ||||
|  | ||||
|     def test_unsupported_encoding(self): | ||||
|         # it should return the raw content | ||||
|         with FakeYDL() as ydl: | ||||
|             res = ydl.urlopen( | ||||
|                 sanitized_Request( | ||||
|                     self._test_url('content-encoding'), | ||||
|                     headers={'ytdl-encoding': 'unsupported'})) | ||||
|             self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported') | ||||
|             self.assertEqual(res.read(), b'raw') | ||||
|  | ||||
|  | ||||
| def _build_proxy_handler(name): | ||||
| @@ -109,7 +460,7 @@ def _build_proxy_handler(name): | ||||
|             self.send_response(200) | ||||
|             self.send_header('Content-Type', 'text/plain; charset=utf-8') | ||||
|             self.end_headers() | ||||
|             self.wfile.write('{self.proxy_name}: {self.path}'.format(self=self).encode('utf-8')) | ||||
|             self.wfile.write('{0}: {1}'.format(self.proxy_name, self.path).encode('utf-8')) | ||||
|     return HTTPTestRequestHandler | ||||
|  | ||||
|  | ||||
| @@ -129,10 +480,30 @@ class TestProxy(unittest.TestCase): | ||||
|         self.geo_proxy_thread.daemon = True | ||||
|         self.geo_proxy_thread.start() | ||||
|  | ||||
|     def tearDown(self): | ||||
|  | ||||
|         def closer(svr): | ||||
|             def _closer(): | ||||
|                 svr.shutdown() | ||||
|                 svr.server_close() | ||||
|             return _closer | ||||
|  | ||||
|         shutdown_thread = threading.Thread(target=closer(self.proxy)) | ||||
|         shutdown_thread.start() | ||||
|         self.proxy_thread.join(2.0) | ||||
|  | ||||
|         shutdown_thread = threading.Thread(target=closer(self.geo_proxy)) | ||||
|         shutdown_thread.start() | ||||
|         self.geo_proxy_thread.join(2.0) | ||||
|  | ||||
|     def _test_proxy(self, host='127.0.0.1', port=None): | ||||
|         return '{0}:{1}'.format( | ||||
|             host, port if port is not None else self.port) | ||||
|  | ||||
|     def test_proxy(self): | ||||
|         geo_proxy = '127.0.0.1:{0}'.format(self.geo_port) | ||||
|         geo_proxy = self._test_proxy(port=self.geo_port) | ||||
|         ydl = YoutubeDL({ | ||||
|             'proxy': '127.0.0.1:{0}'.format(self.port), | ||||
|             'proxy': self._test_proxy(), | ||||
|             'geo_verification_proxy': geo_proxy, | ||||
|         }) | ||||
|         url = 'http://foo.com/bar' | ||||
| @@ -146,7 +517,7 @@ class TestProxy(unittest.TestCase): | ||||
|  | ||||
|     def test_proxy_with_idn(self): | ||||
|         ydl = YoutubeDL({ | ||||
|             'proxy': '127.0.0.1:{0}'.format(self.port), | ||||
|             'proxy': self._test_proxy(), | ||||
|         }) | ||||
|         url = 'http://中文.tw/' | ||||
|         response = ydl.urlopen(url).read().decode('utf-8') | ||||
| @@ -154,5 +525,25 @@ class TestProxy(unittest.TestCase): | ||||
|         self.assertEqual(response, 'normal: http://xn--fiq228c.tw/') | ||||
|  | ||||
|  | ||||
| class TestFileURL(unittest.TestCase): | ||||
|     # See https://github.com/ytdl-org/youtube-dl/issues/8227 | ||||
|     def test_file_urls(self): | ||||
|         tf = tempfile.NamedTemporaryFile(delete=False) | ||||
|         tf.write(b'foobar') | ||||
|         tf.close() | ||||
|         url = compat_urllib_parse.urljoin('file://', pathname2url(tf.name)) | ||||
|         with FakeYDL() as ydl: | ||||
|             self.assertRaisesRegexp( | ||||
|                 compat_urllib_error.URLError, 'file:// scheme is explicitly disabled in youtube-dl for security reasons', ydl.urlopen, url) | ||||
|         # not yet implemented | ||||
|         """ | ||||
|         with FakeYDL({'enable_file_urls': True}) as ydl: | ||||
|             res = ydl.urlopen(url) | ||||
|             self.assertEqual(res.read(), b'foobar') | ||||
|             res.close() | ||||
|         """ | ||||
|         os.unlink(tf.name) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -41,7 +41,6 @@ import zlib | ||||
| from .compat import ( | ||||
|     compat_HTMLParseError, | ||||
|     compat_HTMLParser, | ||||
|     compat_HTTPError, | ||||
|     compat_basestring, | ||||
|     compat_casefold, | ||||
|     compat_chr, | ||||
| @@ -64,6 +63,7 @@ from .compat import ( | ||||
|     compat_struct_pack, | ||||
|     compat_struct_unpack, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_HTTPError, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_parse_qs as compat_parse_qs, | ||||
|     compat_urllib_parse_urlencode, | ||||
| @@ -2614,7 +2614,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | ||||
|  | ||||
|     Part of this code was copied from: | ||||
|  | ||||
|     http://techknack.net/python-urllib2-handlers/ | ||||
|     http://techknack.net/python-urllib2-handlers/, archived at | ||||
|     https://web.archive.org/web/20130527205558/http://techknack.net/python-urllib2-handlers/ | ||||
|  | ||||
|     Andrew Rowls, the author of that code, agreed to release it to the | ||||
|     public domain. | ||||
| @@ -2672,7 +2673,9 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | ||||
|             req._Request__original = req._Request__original.partition('#')[0] | ||||
|             req._Request__r_type = req._Request__r_type.partition('#')[0] | ||||
|  | ||||
|         return req | ||||
|         # Use the totally undocumented AbstractHTTPHandler per | ||||
|         # https://github.com/yt-dlp/yt-dlp/pull/4158 | ||||
|         return compat_urllib_request.AbstractHTTPHandler.do_request_(self, req) | ||||
|  | ||||
|     def http_response(self, req, resp): | ||||
|         old_resp = resp | ||||
| @@ -2683,7 +2686,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | ||||
|             try: | ||||
|                 uncompressed = io.BytesIO(gz.read()) | ||||
|             except IOError as original_ioerror: | ||||
|                 # There may be junk add the end of the file | ||||
|                 # There may be junk at the end of the file | ||||
|                 # See http://stackoverflow.com/q/4928560/35070 for details | ||||
|                 for i in range(1, 1024): | ||||
|                     try: | ||||
| @@ -2710,8 +2713,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | ||||
|             if location: | ||||
|                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3 | ||||
|                 if sys.version_info >= (3, 0): | ||||
|                     location = location.encode('iso-8859-1').decode('utf-8') | ||||
|                 else: | ||||
|                     location = location.encode('iso-8859-1') | ||||
|                 location = location.decode('utf-8') | ||||
|                 location_escaped = escape_url(location) | ||||
|                 if location != location_escaped: | ||||
| @@ -2940,17 +2942,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): | ||||
|  | ||||
|     The code is based on HTTPRedirectHandler implementation from CPython [1]. | ||||
|  | ||||
|     This redirect handler solves two issues: | ||||
|      - ensures redirect URL is always unicode under python 2 | ||||
|      - introduces support for experimental HTTP response status code | ||||
|        308 Permanent Redirect [2] used by some sites [3] | ||||
|     This redirect handler fixes and improves the logic to better align with RFC7261 | ||||
|     and what browsers tend to do [2][3] | ||||
|  | ||||
|     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py | ||||
|     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308 | ||||
|     3. https://github.com/ytdl-org/youtube-dl/issues/28768 | ||||
|     2. https://datatracker.ietf.org/doc/html/rfc7231 | ||||
|     3. https://github.com/python/cpython/issues/91306 | ||||
|     """ | ||||
|  | ||||
|     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302 | ||||
|     # Supply possibly missing alias | ||||
|     http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302 | ||||
|  | ||||
|     def redirect_request(self, req, fp, code, msg, headers, newurl): | ||||
|         """Return a Request or None in response to a redirect. | ||||
| @@ -2962,19 +2963,16 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): | ||||
|         else should try to handle this url.  Return None if you can't | ||||
|         but another Handler might. | ||||
|         """ | ||||
|         m = req.get_method() | ||||
|         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD") | ||||
|                  or code in (301, 302, 303) and m == "POST")): | ||||
|             raise compat_HTTPError(req.full_url, code, msg, headers, fp) | ||||
|         # Strictly (according to RFC 2616), 301 or 302 in response to | ||||
|         # a POST MUST NOT cause a redirection without confirmation | ||||
|         # from the user (of urllib.request, in this case).  In practice, | ||||
|         # essentially all clients do redirect in this case, so we do | ||||
|         # the same. | ||||
|         if code not in (301, 302, 303, 307, 308): | ||||
|             raise compat_urllib_HTTPError(req.full_url, code, msg, headers, fp) | ||||
|  | ||||
|         new_method = req.get_method() | ||||
|         new_data = req.data | ||||
|         remove_headers = [] | ||||
|  | ||||
|         # On python 2 urlh.geturl() may sometimes return redirect URL | ||||
|         # as byte string instead of unicode. This workaround allows | ||||
|         # to force it always return unicode. | ||||
|         # as a byte string instead of unicode. This workaround forces | ||||
|         # it to return unicode. | ||||
|         if sys.version_info[0] < 3: | ||||
|             newurl = compat_str(newurl) | ||||
|  | ||||
| @@ -2983,13 +2981,29 @@ class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): | ||||
|         # but it is kept for compatibility with other callers. | ||||
|         newurl = newurl.replace(' ', '%20') | ||||
|  | ||||
|         CONTENT_HEADERS = ("content-length", "content-type") | ||||
|         # A 303 must either use GET or HEAD for subsequent request | ||||
|         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4 | ||||
|         if code == 303 and req.get_method() != 'HEAD': | ||||
|             new_method = 'GET' | ||||
|         # 301 and 302 redirects are commonly turned into a GET from a POST | ||||
|         # for subsequent requests by browsers, so we'll do the same. | ||||
|         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2 | ||||
|         # https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3 | ||||
|         elif code in (301, 302) and req.get_method() == 'POST': | ||||
|             new_method = 'GET' | ||||
|  | ||||
|         # only remove payload if method changed (e.g. POST to GET) | ||||
|         if new_method != req.get_method(): | ||||
|             new_data = None | ||||
|             remove_headers.extend(['Content-Length', 'Content-Type']) | ||||
|  | ||||
|         # NB: don't use dict comprehension for python 2.6 compatibility | ||||
|         newheaders = dict((k, v) for k, v in req.headers.items() | ||||
|                           if k.lower() not in CONTENT_HEADERS) | ||||
|         new_headers = dict((k, v) for k, v in req.header_items() | ||||
|                            if k.lower() not in remove_headers) | ||||
|  | ||||
|         return compat_urllib_request.Request( | ||||
|             newurl, headers=newheaders, origin_req_host=req.origin_req_host, | ||||
|             unverifiable=True) | ||||
|             newurl, headers=new_headers, origin_req_host=req.origin_req_host, | ||||
|             unverifiable=True, method=new_method, data=new_data) | ||||
|  | ||||
|  | ||||
| def extract_timezone(date_str): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user