mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[jsinterp] Handle new YT players 113ca41c, c57c113c
* add NaN * allow any white-space character for `after_op` * align with yt-dlp f26af78a8ac11d9d617ed31ea5282cfaa5bcbcfa (charcodeAt and bitwise overflow) * allow escaping in regex, fixing player c57c113c
This commit is contained in:
		| @@ -135,6 +135,11 @@ class TestJSInterpreter(unittest.TestCase): | |||||||
|         self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50]) |         self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50]) | ||||||
|  |  | ||||||
|     def test_builtins(self): |     def test_builtins(self): | ||||||
|  |         jsi = JSInterpreter(''' | ||||||
|  |         function x() { return NaN } | ||||||
|  |         ''') | ||||||
|  |         self.assertTrue(math.isnan(jsi.call_function('x'))) | ||||||
|  |  | ||||||
|         jsi = JSInterpreter(''' |         jsi = JSInterpreter(''' | ||||||
|         function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; } |         function x() { return new Date('Wednesday 31 December 1969 18:01:26 MDT') - 0; } | ||||||
|         ''') |         ''') | ||||||
| @@ -385,6 +390,22 @@ class TestJSInterpreter(unittest.TestCase): | |||||||
|         ''') |         ''') | ||||||
|         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I) |         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I) | ||||||
|  |  | ||||||
|  |     def test_char_code_at(self): | ||||||
|  |         jsi = JSInterpreter('function x(i){return "test".charCodeAt(i)}') | ||||||
|  |         self.assertEqual(jsi.call_function('x', 0), 116) | ||||||
|  |         self.assertEqual(jsi.call_function('x', 1), 101) | ||||||
|  |         self.assertEqual(jsi.call_function('x', 2), 115) | ||||||
|  |         self.assertEqual(jsi.call_function('x', 3), 116) | ||||||
|  |         self.assertEqual(jsi.call_function('x', 4), None) | ||||||
|  |         self.assertEqual(jsi.call_function('x', 'not_a_number'), 116) | ||||||
|  |  | ||||||
|  |     def test_bitwise_operators_overflow(self): | ||||||
|  |         jsi = JSInterpreter('function x(){return -524999584 << 5}') | ||||||
|  |         self.assertEqual(jsi.call_function('x'), 379882496) | ||||||
|  |  | ||||||
|  |         jsi = JSInterpreter('function x(){return 1236566549 << 5}') | ||||||
|  |         self.assertEqual(jsi.call_function('x'), 915423904) | ||||||
|  |  | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -111,10 +111,26 @@ _NSIG_TESTS = [ | |||||||
|         'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js', |         'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js', | ||||||
|         'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw', |         'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw', | ||||||
|     ), |     ), | ||||||
|  |     ( | ||||||
|  |         'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js', | ||||||
|  |         '5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw', | ||||||
|  |     ), | ||||||
|     ( |     ( | ||||||
|         'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js', |         'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js', | ||||||
|         '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ', |         '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ', | ||||||
|     ), |     ), | ||||||
|  |     ( | ||||||
|  |         'https://www.youtube.com/s/player/c2199353/player_ias.vflset/en_US/base.js', | ||||||
|  |         '5EHDMgYLV6HPGk_Mu-kk', 'AD5rgS85EkrE7', | ||||||
|  |     ), | ||||||
|  |     ( | ||||||
|  |         'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js', | ||||||
|  |         'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg', | ||||||
|  |     ), | ||||||
|  |     ( | ||||||
|  |         'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js', | ||||||
|  |         '-Txvy6bT5R6LqgnQNx', 'dcklJCnRUHbgSg', | ||||||
|  |     ), | ||||||
| ] | ] | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
| @@ -23,10 +23,11 @@ from .compat import ( | |||||||
|  |  | ||||||
| def _js_bit_op(op): | def _js_bit_op(op): | ||||||
|  |  | ||||||
|     def wrapped(a, b): |  | ||||||
|     def zeroise(x): |     def zeroise(x): | ||||||
|         return 0 if x in (None, JS_Undefined) else x |         return 0 if x in (None, JS_Undefined) else x | ||||||
|         return op(zeroise(a), zeroise(b)) |  | ||||||
|  |     def wrapped(a, b): | ||||||
|  |         return op(zeroise(a), zeroise(b)) & 0xffffffff | ||||||
|  |  | ||||||
|     return wrapped |     return wrapped | ||||||
|  |  | ||||||
| @@ -44,7 +45,7 @@ def _js_arith_op(op): | |||||||
| def _js_div(a, b): | def _js_div(a, b): | ||||||
|     if JS_Undefined in (a, b) or not (a and b): |     if JS_Undefined in (a, b) or not (a and b): | ||||||
|         return float('nan') |         return float('nan') | ||||||
|     return float('inf') if not b else operator.truediv(a or 0, b) |     return operator.truediv(a or 0, b) if b else float('inf') | ||||||
|  |  | ||||||
|  |  | ||||||
| def _js_mod(a, b): | def _js_mod(a, b): | ||||||
| @@ -260,13 +261,14 @@ class JSInterpreter(object): | |||||||
|                     counters[_MATCHING_PARENS[char]] += 1 |                     counters[_MATCHING_PARENS[char]] += 1 | ||||||
|                 elif char in counters: |                 elif char in counters: | ||||||
|                     counters[char] -= 1 |                     counters[char] -= 1 | ||||||
|             if not escaping and char in _QUOTES and in_quote in (char, None): |             if not escaping: | ||||||
|  |                 if char in _QUOTES and in_quote in (char, None): | ||||||
|                     if in_quote or after_op or char != '/': |                     if in_quote or after_op or char != '/': | ||||||
|                         in_quote = None if in_quote and not in_regex_char_group else char |                         in_quote = None if in_quote and not in_regex_char_group else char | ||||||
|                 elif in_quote == '/' and char in '[]': |                 elif in_quote == '/' and char in '[]': | ||||||
|                     in_regex_char_group = char == '[' |                     in_regex_char_group = char == '[' | ||||||
|             escaping = not escaping and in_quote and char == '\\' |             escaping = not escaping and in_quote and char == '\\' | ||||||
|             after_op = not in_quote and char in cls.OP_CHARS or (char == ' ' and after_op) |             after_op = not in_quote and (char in cls.OP_CHARS or (char.isspace() and after_op)) | ||||||
|  |  | ||||||
|             if char != delim[pos] or any(counters.values()) or in_quote: |             if char != delim[pos] or any(counters.values()) or in_quote: | ||||||
|                 pos = skipping = 0 |                 pos = skipping = 0 | ||||||
| @@ -590,6 +592,8 @@ class JSInterpreter(object): | |||||||
|  |  | ||||||
|         elif expr == 'undefined': |         elif expr == 'undefined': | ||||||
|             return JS_Undefined, should_return |             return JS_Undefined, should_return | ||||||
|  |         elif expr == 'NaN': | ||||||
|  |             return float('NaN'), should_return | ||||||
|  |  | ||||||
|         elif md.get('return'): |         elif md.get('return'): | ||||||
|             return local_vars[m.group('name')], should_return |             return local_vars[m.group('name')], should_return | ||||||
| @@ -635,7 +639,8 @@ class JSInterpreter(object): | |||||||
|             def assertion(cndn, msg): |             def assertion(cndn, msg): | ||||||
|                 """ assert, but without risk of getting optimized out """ |                 """ assert, but without risk of getting optimized out """ | ||||||
|                 if not cndn: |                 if not cndn: | ||||||
|                     raise ExtractorError('{member} {msg}'.format(**locals()), expr=expr) |                     memb = member | ||||||
|  |                     raise self.Exception('{member} {msg}'.format(**locals()), expr=expr) | ||||||
|  |  | ||||||
|             def eval_method(): |             def eval_method(): | ||||||
|                 if (variable, member) == ('console', 'debug'): |                 if (variable, member) == ('console', 'debug'): | ||||||
| @@ -737,6 +742,13 @@ class JSInterpreter(object): | |||||||
|                         return obj.index(idx, start) |                         return obj.index(idx, start) | ||||||
|                     except ValueError: |                     except ValueError: | ||||||
|                         return -1 |                         return -1 | ||||||
|  |                 elif member == 'charCodeAt': | ||||||
|  |                     assertion(isinstance(obj, compat_str), 'must be applied on a string') | ||||||
|  |                     # assertion(len(argvals) == 1, 'takes exactly one argument') # but not enforced | ||||||
|  |                     idx = argvals[0] if isinstance(argvals[0], int) else 0 | ||||||
|  |                     if idx >= len(obj): | ||||||
|  |                         return None | ||||||
|  |                     return ord(obj[idx]) | ||||||
|  |  | ||||||
|                 idx = int(member) if isinstance(obj, list) else member |                 idx = int(member) if isinstance(obj, list) else member | ||||||
|                 return obj[idx](argvals, allow_recursion=allow_recursion) |                 return obj[idx](argvals, allow_recursion=allow_recursion) | ||||||
| @@ -820,11 +832,9 @@ class JSInterpreter(object): | |||||||
|             if mobj is None: |             if mobj is None: | ||||||
|                 break |                 break | ||||||
|             start, body_start = mobj.span() |             start, body_start = mobj.span() | ||||||
|             body, remaining = self._separate_at_paren(code[body_start - 1:]) |             body, remaining = self._separate_at_paren(code[body_start - 1:], '}') | ||||||
|             name = self._named_object( |             name = self._named_object(local_vars, self.extract_function_from_code( | ||||||
|                 local_vars, |                 [x.strip() for x in mobj.group('args').split(',')], | ||||||
|                 self.extract_function_from_code( |  | ||||||
|                     self.build_arglist(mobj.group('args')), |  | ||||||
|                 body, local_vars, *global_stack)) |                 body, local_vars, *global_stack)) | ||||||
|             code = code[:start] + name + remaining |             code = code[:start] + name + remaining | ||||||
|         return self.build_function(argnames, code, local_vars, *global_stack) |         return self.build_function(argnames, code, local_vars, *global_stack) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user