mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[jsinterp] Clean up and pull yt-dlp style
* add compat_re_Pattern * improve compat_collections_chain_map * use class JS_Undefined * remove unused code
This commit is contained in:
		| @@ -11,8 +11,9 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
| import math | ||||
| import re | ||||
|  | ||||
| from youtube_dl.jsinterp import JSInterpreter | ||||
| undefined = JSInterpreter.undefined | ||||
| from youtube_dl.compat import compat_re_Pattern | ||||
|  | ||||
| from youtube_dl.jsinterp import JS_Undefined, JSInterpreter | ||||
|  | ||||
|  | ||||
| class TestJSInterpreter(unittest.TestCase): | ||||
| @@ -261,12 +262,12 @@ class TestJSInterpreter(unittest.TestCase): | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { return undefined; } | ||||
|         ''') | ||||
|         self.assertIs(jsi.call_function('x'), undefined) | ||||
|         self.assertIs(jsi.call_function('x'), JS_Undefined) | ||||
|  | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { let v; return v; } | ||||
|         ''') | ||||
|         self.assertIs(jsi.call_function('x'), undefined) | ||||
|         self.assertIs(jsi.call_function('x'), JS_Undefined) | ||||
|  | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { return [undefined === undefined, undefined == undefined, undefined < undefined, undefined > undefined]; } | ||||
| @@ -307,7 +308,7 @@ class TestJSInterpreter(unittest.TestCase): | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { let v; return [v>42, v<=42, v&&42, 42&&v]; } | ||||
|         ''') | ||||
|         self.assertEqual(jsi.call_function('x'), [False, False, undefined, undefined]) | ||||
|         self.assertEqual(jsi.call_function('x'), [False, False, JS_Undefined, JS_Undefined]) | ||||
|  | ||||
|         jsi = JSInterpreter('function x(){return undefined ?? 42; }') | ||||
|         self.assertEqual(jsi.call_function('x'), 42) | ||||
| @@ -326,12 +327,12 @@ class TestJSInterpreter(unittest.TestCase): | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { let a; return a?.qq; } | ||||
|         ''') | ||||
|         self.assertIs(jsi.call_function('x'), undefined) | ||||
|         self.assertIs(jsi.call_function('x'), JS_Undefined) | ||||
|  | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { let a = {m1: 42, m2: 0 }; return a?.qq; } | ||||
|         ''') | ||||
|         self.assertIs(jsi.call_function('x'), undefined) | ||||
|         self.assertIs(jsi.call_function('x'), JS_Undefined) | ||||
|  | ||||
|     def test_regex(self): | ||||
|         jsi = JSInterpreter(''' | ||||
| @@ -342,13 +343,12 @@ class TestJSInterpreter(unittest.TestCase): | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { let a=/,,[/,913,/](,)}/; return a; } | ||||
|         ''') | ||||
|         # Pythons disagree on the type of a pattern | ||||
|         self.assertTrue(isinstance(jsi.call_function('x'), type(re.compile('')))) | ||||
|         self.assertIsInstance(jsi.call_function('x'), compat_re_Pattern) | ||||
|  | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { let a=/,,[/,913,/](,)}/i; return a; } | ||||
|         ''') | ||||
|         self.assertEqual(jsi.call_function('x').flags & re.I, re.I) | ||||
|         self.assertEqual(jsi.call_function('x').flags & ~re.U, re.I) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|   | ||||
| @@ -12,10 +12,11 @@ import io | ||||
| import re | ||||
| import string | ||||
|  | ||||
| from youtube_dl.compat import compat_str, compat_urlretrieve | ||||
|  | ||||
| from test.helper import FakeYDL | ||||
| from youtube_dl.extractor import YoutubeIE | ||||
| from youtube_dl.jsinterp import JSInterpreter | ||||
| from youtube_dl.compat import compat_str, compat_urlretrieve | ||||
|  | ||||
| _SIG_TESTS = [ | ||||
|     ( | ||||
|   | ||||
| @@ -3023,18 +3023,34 @@ except ImportError: | ||||
|             self.maps[0].__setitem__(k, v) | ||||
|             return | ||||
|  | ||||
|         def __delitem__(self, k): | ||||
|         def __contains__(self, k): | ||||
|             return any((k in m) for m in self.maps) | ||||
|  | ||||
|         def __delitem(self, k): | ||||
|             if k in self.maps[0]: | ||||
|                 del self.maps[0][k] | ||||
|                 return | ||||
|             raise KeyError(k) | ||||
|  | ||||
|         def __delitem__(self, k): | ||||
|             self.__delitem(k) | ||||
|  | ||||
|         def __iter__(self): | ||||
|             return itertools.chain(*reversed(self.maps)) | ||||
|  | ||||
|         def __len__(self): | ||||
|             return len(iter(self)) | ||||
|  | ||||
|         # to match Py3, don't del directly | ||||
|         def pop(self, k, *args): | ||||
|             if self.__contains__(k): | ||||
|                 off = self.__getitem__(k) | ||||
|                 self.__delitem(k) | ||||
|                 return off | ||||
|             elif len(args) > 0: | ||||
|                 return args[0] | ||||
|             raise KeyError(k) | ||||
|  | ||||
|         def new_child(self, m=None, **kwargs): | ||||
|             m = m or {} | ||||
|             m.update(kwargs) | ||||
| @@ -3044,6 +3060,8 @@ except ImportError: | ||||
|         def parents(self): | ||||
|             return compat_collections_chain_map(*(self.maps[1:])) | ||||
|  | ||||
| # Pythons disagree on the type of a pattern (RegexObject, _sre.SRE_Pattern, Pattern, ...?) | ||||
| compat_re_Pattern = type(re.compile('')) | ||||
|  | ||||
| if sys.version_info < (3, 3): | ||||
|     def compat_b64decode(s, *args, **kwargs): | ||||
| @@ -3110,6 +3128,7 @@ __all__ = [ | ||||
|     'compat_os_name', | ||||
|     'compat_parse_qs', | ||||
|     'compat_print', | ||||
|     'compat_re_Pattern', | ||||
|     'compat_realpath', | ||||
|     'compat_setenv', | ||||
|     'compat_shlex_quote', | ||||
|   | ||||
| @@ -19,16 +19,12 @@ from .compat import ( | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
| _NAME_RE = r'[a-zA-Z_$][\w$]*' | ||||
|  | ||||
| _UNDEFINED = object() | ||||
|  | ||||
|  | ||||
| def _js_bit_op(op): | ||||
|  | ||||
|     def wrapped(a, b): | ||||
|         def zeroise(x): | ||||
|             return 0 if x in (None, _UNDEFINED) else x | ||||
|             return 0 if x in (None, JS_Undefined) else x | ||||
|         return op(zeroise(a), zeroise(b)) | ||||
|  | ||||
|     return wrapped | ||||
| @@ -37,7 +33,7 @@ def _js_bit_op(op): | ||||
| def _js_arith_op(op): | ||||
|  | ||||
|     def wrapped(a, b): | ||||
|         if _UNDEFINED in (a, b): | ||||
|         if JS_Undefined in (a, b): | ||||
|             return float('nan') | ||||
|         return op(a or 0, b or 0) | ||||
|  | ||||
| @@ -45,22 +41,21 @@ def _js_arith_op(op): | ||||
|  | ||||
|  | ||||
| def _js_div(a, b): | ||||
|     if _UNDEFINED in (a, b) or not (a and b): | ||||
|     if JS_Undefined in (a, b) or not (a and b): | ||||
|         return float('nan') | ||||
|     return float('inf') if not b else operator.truediv(a or 0, b) | ||||
|  | ||||
|  | ||||
| def _js_mod(a, b): | ||||
|     if _UNDEFINED in (a, b) or not b: | ||||
|     if JS_Undefined in (a, b) or not b: | ||||
|         return float('nan') | ||||
|     return (a or 0) % b | ||||
|  | ||||
|  | ||||
| def _js_exp(a, b): | ||||
|     if not b: | ||||
|         # even 0 ** 0 !! | ||||
|         return 1 | ||||
|     if _UNDEFINED in (a, b): | ||||
|         return 1  # even 0 ** 0 !! | ||||
|     elif JS_Undefined in (a, b): | ||||
|         return float('nan') | ||||
|     return (a or 0) ** b | ||||
|  | ||||
| @@ -68,7 +63,7 @@ def _js_exp(a, b): | ||||
| def _js_eq_op(op): | ||||
|  | ||||
|     def wrapped(a, b): | ||||
|         if set((a, b)) <= set((None, _UNDEFINED)): | ||||
|         if set((a, b)) <= set((None, JS_Undefined)): | ||||
|             return op(a, a) | ||||
|         return op(a, b) | ||||
|  | ||||
| @@ -78,21 +73,28 @@ def _js_eq_op(op): | ||||
| def _js_comp_op(op): | ||||
|  | ||||
|     def wrapped(a, b): | ||||
|         if _UNDEFINED in (a, b): | ||||
|         if JS_Undefined in (a, b): | ||||
|             return False | ||||
|         return op(a or 0, b or 0) | ||||
|  | ||||
|     return wrapped | ||||
|  | ||||
|  | ||||
| def _js_ternary(cndn, if_true=True, if_false=False): | ||||
|     """Simulate JS's ternary operator (cndn?if_true:if_false)""" | ||||
|     if cndn in (False, None, 0, '', JS_Undefined): | ||||
|         return if_false | ||||
|     try: | ||||
|         if math.isnan(cndn):  # NB: NaN cannot be checked by membership | ||||
|             return if_false | ||||
|     except TypeError: | ||||
|         pass | ||||
|     return if_true | ||||
|  | ||||
|  | ||||
| # (op, definition) in order of binding priority, tightest first | ||||
| # avoid dict to maintain order | ||||
| # definition None => Defined in JSInterpreter._operator | ||||
| _DOT_OPERATORS = ( | ||||
|     ('.', None), | ||||
|     # TODO: ('?.', None), | ||||
| ) | ||||
|  | ||||
| _OPERATORS = ( | ||||
|     ('>>', _js_bit_op(operator.rshift)), | ||||
|     ('<<', _js_bit_op(operator.lshift)), | ||||
| @@ -130,20 +132,13 @@ _SC_OPERATORS = ( | ||||
|  | ||||
| _OPERATOR_RE = '|'.join(map(lambda x: re.escape(x[0]), _OPERATORS + _LOG_OPERATORS)) | ||||
|  | ||||
| _NAME_RE = r'[a-zA-Z_$][\w$]*' | ||||
| _MATCHING_PARENS = dict(zip(*zip('()', '{}', '[]'))) | ||||
| _QUOTES = '\'"/' | ||||
|  | ||||
|  | ||||
| def _ternary(cndn, if_true=True, if_false=False): | ||||
|     """Simulate JS's ternary operator (cndn?if_true:if_false)""" | ||||
|     if cndn in (False, None, 0, '', _UNDEFINED): | ||||
|         return if_false | ||||
|     try: | ||||
|         if math.isnan(cndn):  # NB: NaN cannot be checked by membership | ||||
|             return if_false | ||||
|     except TypeError: | ||||
|         pass | ||||
|     return if_true | ||||
| class JS_Undefined(object): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class JS_Break(ExtractorError): | ||||
| @@ -167,7 +162,7 @@ class LocalNameSpace(ChainMap): | ||||
|         try: | ||||
|             return super(LocalNameSpace, self).__getitem__(key) | ||||
|         except KeyError: | ||||
|             return _UNDEFINED | ||||
|             return JS_Undefined | ||||
|  | ||||
|     def __setitem__(self, key, value): | ||||
|         for scope in self.maps: | ||||
| @@ -179,24 +174,6 @@ class LocalNameSpace(ChainMap): | ||||
|     def __delitem__(self, key): | ||||
|         raise NotImplementedError('Deleting is not supported') | ||||
|  | ||||
|     # except | ||||
|     def pop(self, key, *args): | ||||
|         try: | ||||
|             off = self.__getitem__(key) | ||||
|             super(LocalNameSpace, self).__delitem__(key) | ||||
|             return off | ||||
|         except KeyError: | ||||
|             if len(args) > 0: | ||||
|                 return args[0] | ||||
|             raise | ||||
|  | ||||
|     def __contains__(self, key): | ||||
|         try: | ||||
|             super(LocalNameSpace, self).__getitem__(key) | ||||
|             return True | ||||
|         except KeyError: | ||||
|             return False | ||||
|  | ||||
|     def __repr__(self): | ||||
|         return 'LocalNameSpace%s' % (self.maps, ) | ||||
|  | ||||
| @@ -204,9 +181,7 @@ class LocalNameSpace(ChainMap): | ||||
| class JSInterpreter(object): | ||||
|     __named_object_counter = 0 | ||||
|  | ||||
|     undefined = _UNDEFINED | ||||
|  | ||||
|     RE_FLAGS = { | ||||
|     _RE_FLAGS = { | ||||
|         # special knowledge: Python's re flags are bitmask values, current max 128 | ||||
|         # invent new bitmask values well above that for literal parsing | ||||
|         # TODO: new pattern class to execute matches with these flags | ||||
| @@ -257,10 +232,10 @@ class JSInterpreter(object): | ||||
|         if not expr: | ||||
|             return flags, expr | ||||
|         for idx, ch in enumerate(expr): | ||||
|             if ch not in cls.RE_FLAGS: | ||||
|             if ch not in cls._RE_FLAGS: | ||||
|                 break | ||||
|             flags |= cls.RE_FLAGS[ch] | ||||
|         return flags, expr[idx:] if idx > 0 else expr | ||||
|             flags |= cls._RE_FLAGS[ch] | ||||
|         return flags, expr[idx + 1:] | ||||
|  | ||||
|     @classmethod | ||||
|     def _separate(cls, expr, delim=',', max_split=None, skip_delims=None): | ||||
| @@ -283,14 +258,6 @@ class JSInterpreter(object): | ||||
|             if not escaping and char in _QUOTES and in_quote in (char, None): | ||||
|                 if in_quote or after_op or char != '/': | ||||
|                     in_quote = None if in_quote and not in_regex_char_group else char | ||||
|                     if in_quote is None and char == '/' and delim != '/': | ||||
|                         # regexp flags | ||||
|                         n_idx = idx + 1 | ||||
|                         while n_idx < len(expr) and expr[n_idx] in cls.RE_FLAGS: | ||||
|                             n_idx += 1 | ||||
|                         skip_re = n_idx - idx - 1 | ||||
|                         if skip_re > 0: | ||||
|                             continue | ||||
|             elif in_quote == '/' and char in '[]': | ||||
|                 in_regex_char_group = char == '[' | ||||
|             escaping = not escaping and in_quote and char == '\\' | ||||
| @@ -336,13 +303,13 @@ class JSInterpreter(object): | ||||
|  | ||||
|     def _operator(self, op, left_val, right_expr, expr, local_vars, allow_recursion): | ||||
|         if op in ('||', '&&'): | ||||
|             if (op == '&&') ^ _ternary(left_val): | ||||
|             if (op == '&&') ^ _js_ternary(left_val): | ||||
|                 return left_val  # short circuiting | ||||
|         elif op == '??': | ||||
|             if left_val not in (None, self.undefined): | ||||
|             if left_val not in (None, JS_Undefined): | ||||
|                 return left_val | ||||
|         elif op == '?': | ||||
|             right_expr = _ternary(left_val, *self._separate(right_expr, ':', 1)) | ||||
|             right_expr = _js_ternary(left_val, *self._separate(right_expr, ':', 1)) | ||||
|  | ||||
|         right_val = self.interpret_expression(right_expr, local_vars, allow_recursion) | ||||
|         opfunc = op and next((v for k, v in self._all_operators() if k == op), None) | ||||
| @@ -361,7 +328,7 @@ class JSInterpreter(object): | ||||
|             return obj[int(idx)] if isinstance(obj, list) else obj[idx] | ||||
|         except Exception as e: | ||||
|             if allow_undefined: | ||||
|                 return self.undefined | ||||
|                 return JS_Undefined | ||||
|             raise self.Exception('Cannot get index {idx}'.format(**locals()), expr=repr(obj), cause=e) | ||||
|  | ||||
|     def _dump(self, obj, namespace): | ||||
| @@ -395,9 +362,8 @@ class JSInterpreter(object): | ||||
|         if expr[0] in _QUOTES: | ||||
|             inner, outer = self._separate(expr, expr[0], 1) | ||||
|             if expr[0] == '/': | ||||
|                 flags, _ = self._regex_flags(outer) | ||||
|                 inner, outer = inner.replace('"', r'\"'), '' | ||||
|                 inner = re.compile(js_to_json(inner + expr[0]), flags=flags)  # , strict=True)) | ||||
|                 flags, outer = self._regex_flags(outer) | ||||
|                 inner = re.compile(inner[1:], flags=flags)  # , strict=True)) | ||||
|             else: | ||||
|                 inner = json.loads(js_to_json(inner + expr[0]))  # , strict=True)) | ||||
|             if not outer: | ||||
| @@ -422,7 +388,7 @@ class JSInterpreter(object): | ||||
|  | ||||
|         if expr.startswith('{'): | ||||
|             inner, outer = self._separate_at_paren(expr, '}') | ||||
|             # try for object expression | ||||
|             # try for object expression (Map) | ||||
|             sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)] | ||||
|             if all(len(sub_expr) == 2 for sub_expr in sub_expressions): | ||||
|                 return dict( | ||||
| @@ -455,7 +421,8 @@ class JSInterpreter(object): | ||||
|             (?P<try>try|finally)\s*| | ||||
|             (?P<catch>catch\s*(?P<err>\(\s*{_NAME_RE}\s*\)))| | ||||
|             (?P<switch>switch)\s*\(| | ||||
|             (?P<for>for)\s*\(|'''.format(**globals()), expr) | ||||
|             (?P<for>for)\s*\(| | ||||
|             '''.format(**globals()), expr) | ||||
|         md = m.groupdict() if m else {} | ||||
|         if md.get('try'): | ||||
|             if expr[m.end()] == '{': | ||||
| @@ -500,7 +467,7 @@ class JSInterpreter(object): | ||||
|             start, cndn, increment = self._separate(constructor, ';') | ||||
|             self.interpret_expression(start, local_vars, allow_recursion) | ||||
|             while True: | ||||
|                 if not _ternary(self.interpret_expression(cndn, local_vars, allow_recursion)): | ||||
|                 if not _js_ternary(self.interpret_expression(cndn, local_vars, allow_recursion)): | ||||
|                     break | ||||
|                 try: | ||||
|                     ret, should_abort = self.interpret_statement(body, local_vars, allow_recursion) | ||||
| @@ -587,7 +554,7 @@ class JSInterpreter(object): | ||||
|                 local_vars[m.group('out')] = self._operator( | ||||
|                     m.group('op'), left_val, m.group('expr'), expr, local_vars, allow_recursion) | ||||
|                 return local_vars[m.group('out')], should_return | ||||
|             elif left_val in (None, self.undefined): | ||||
|             elif left_val in (None, JS_Undefined): | ||||
|                 raise self.Exception('Cannot index undefined variable ' + m.group('out'), expr=expr) | ||||
|  | ||||
|             idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion) | ||||
| @@ -607,7 +574,7 @@ class JSInterpreter(object): | ||||
|             raise JS_Continue() | ||||
|  | ||||
|         elif expr == 'undefined': | ||||
|             return self.undefined, should_return | ||||
|             return JS_Undefined, should_return | ||||
|  | ||||
|         elif md.get('return'): | ||||
|             return local_vars[m.group('name')], should_return | ||||
| @@ -663,9 +630,9 @@ class JSInterpreter(object): | ||||
|                     'Math': float, | ||||
|                 } | ||||
|                 obj = local_vars.get(variable) | ||||
|                 if obj in (self.undefined, None): | ||||
|                     obj = types.get(variable, self.undefined) | ||||
|                 if obj is self.undefined: | ||||
|                 if obj in (JS_Undefined, None): | ||||
|                     obj = types.get(variable, JS_Undefined) | ||||
|                 if obj is JS_Undefined: | ||||
|                     try: | ||||
|                         if variable not in self._objects: | ||||
|                             self._objects[variable] = self.extract_object(variable) | ||||
| @@ -674,8 +641,8 @@ class JSInterpreter(object): | ||||
|                         if not nullish: | ||||
|                             raise | ||||
|  | ||||
|                 if nullish and obj is self.undefined: | ||||
|                     return self.undefined | ||||
|                 if nullish and obj is JS_Undefined: | ||||
|                     return JS_Undefined | ||||
|  | ||||
|                 # Member access | ||||
|                 if arg_str is None: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user