mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)
Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes).
This commit is contained in:
		| @@ -13,8 +13,10 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |||||||
| from youtube_dl.utils import get_filesystem_encoding | from youtube_dl.utils import get_filesystem_encoding | ||||||
| from youtube_dl.compat import ( | from youtube_dl.compat import ( | ||||||
|     compat_getenv, |     compat_getenv, | ||||||
|  |     compat_etree_fromstring, | ||||||
|     compat_expanduser, |     compat_expanduser, | ||||||
|     compat_shlex_split, |     compat_shlex_split, | ||||||
|  |     compat_str, | ||||||
|     compat_urllib_parse_unquote, |     compat_urllib_parse_unquote, | ||||||
|     compat_urllib_parse_unquote_plus, |     compat_urllib_parse_unquote_plus, | ||||||
| ) | ) | ||||||
| @@ -71,5 +73,10 @@ class TestCompat(unittest.TestCase): | |||||||
|     def test_compat_shlex_split(self): |     def test_compat_shlex_split(self): | ||||||
|         self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) |         self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) | ||||||
|  |  | ||||||
|  |     def test_compat_etree_fromstring(self): | ||||||
|  |         xml = '<el foo="bar"></el>' | ||||||
|  |         doc = compat_etree_fromstring(xml.encode('utf-8')) | ||||||
|  |         self.assertTrue(isinstance(doc.attrib['foo'], compat_str)) | ||||||
|  |  | ||||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||||
|     unittest.main() |     unittest.main() | ||||||
|   | |||||||
| @@ -68,6 +68,9 @@ from youtube_dl.utils import ( | |||||||
|     cli_valueless_option, |     cli_valueless_option, | ||||||
|     cli_bool_option, |     cli_bool_option, | ||||||
| ) | ) | ||||||
|  | from youtube_dl.compat import ( | ||||||
|  |     compat_etree_fromstring, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestUtil(unittest.TestCase): | class TestUtil(unittest.TestCase): | ||||||
| @@ -242,7 +245,7 @@ class TestUtil(unittest.TestCase): | |||||||
|             <node x="b" y="d" /> |             <node x="b" y="d" /> | ||||||
|             <node x="" /> |             <node x="" /> | ||||||
|         </root>''' |         </root>''' | ||||||
|         doc = xml.etree.ElementTree.fromstring(testxml) |         doc = compat_etree_fromstring(testxml) | ||||||
|  |  | ||||||
|         self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None) |         self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None) | ||||||
|         self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) |         self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) | ||||||
| @@ -263,7 +266,7 @@ class TestUtil(unittest.TestCase): | |||||||
|                 <url>http://server.com/download.mp3</url> |                 <url>http://server.com/download.mp3</url> | ||||||
|             </media:song> |             </media:song> | ||||||
|         </root>''' |         </root>''' | ||||||
|         doc = xml.etree.ElementTree.fromstring(testxml) |         doc = compat_etree_fromstring(testxml) | ||||||
|         find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) |         find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) | ||||||
|         self.assertTrue(find('media:song') is not None) |         self.assertTrue(find('media:song') is not None) | ||||||
|         self.assertEqual(find('media:song/media:author').text, 'The Author') |         self.assertEqual(find('media:song/media:author').text, 'The Author') | ||||||
| @@ -285,7 +288,7 @@ class TestUtil(unittest.TestCase): | |||||||
|                 <p>Foo</p> |                 <p>Foo</p> | ||||||
|             </div> |             </div> | ||||||
|         </root>''' |         </root>''' | ||||||
|         doc = xml.etree.ElementTree.fromstring(testxml) |         doc = compat_etree_fromstring(testxml) | ||||||
|         self.assertEqual(xpath_text(doc, 'div/p'), 'Foo') |         self.assertEqual(xpath_text(doc, 'div/p'), 'Foo') | ||||||
|         self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default') |         self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default') | ||||||
|         self.assertTrue(xpath_text(doc, 'div/bar') is None) |         self.assertTrue(xpath_text(doc, 'div/bar') is None) | ||||||
| @@ -297,7 +300,7 @@ class TestUtil(unittest.TestCase): | |||||||
|                 <p x="a">Foo</p> |                 <p x="a">Foo</p> | ||||||
|             </div> |             </div> | ||||||
|         </root>''' |         </root>''' | ||||||
|         doc = xml.etree.ElementTree.fromstring(testxml) |         doc = compat_etree_fromstring(testxml) | ||||||
|         self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a') |         self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a') | ||||||
|         self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None) |         self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None) | ||||||
|         self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None) |         self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None) | ||||||
|   | |||||||
| @@ -14,6 +14,7 @@ import socket | |||||||
| import subprocess | import subprocess | ||||||
| import sys | import sys | ||||||
| import itertools | import itertools | ||||||
|  | import xml.etree.ElementTree | ||||||
|  |  | ||||||
|  |  | ||||||
| try: | try: | ||||||
| @@ -212,6 +213,29 @@ try: | |||||||
| except ImportError:  # Python 2.6 | except ImportError:  # Python 2.6 | ||||||
|     from xml.parsers.expat import ExpatError as compat_xml_parse_error |     from xml.parsers.expat import ExpatError as compat_xml_parse_error | ||||||
|  |  | ||||||
|  | if sys.version_info[0] >= 3: | ||||||
|  |     compat_etree_fromstring = xml.etree.ElementTree.fromstring | ||||||
|  | else: | ||||||
|  |     # on python 2.x the the attributes of a node are str objects instead of | ||||||
|  |     # unicode | ||||||
|  |     etree = xml.etree.ElementTree | ||||||
|  |  | ||||||
|  |     # on 2.6 XML doesn't have a parser argument, function copied from CPython | ||||||
|  |     # 2.7 source | ||||||
|  |     def _XML(text, parser=None): | ||||||
|  |         if not parser: | ||||||
|  |             parser = etree.XMLParser(target=etree.TreeBuilder()) | ||||||
|  |         parser.feed(text) | ||||||
|  |         return parser.close() | ||||||
|  |  | ||||||
|  |     def _element_factory(*args, **kwargs): | ||||||
|  |         el = etree.Element(*args, **kwargs) | ||||||
|  |         for k, v in el.items(): | ||||||
|  |             el.set(k, v.decode('utf-8')) | ||||||
|  |         return el | ||||||
|  |  | ||||||
|  |     def compat_etree_fromstring(text): | ||||||
|  |         return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory))) | ||||||
|  |  | ||||||
| try: | try: | ||||||
|     from urllib.parse import parse_qs as compat_parse_qs |     from urllib.parse import parse_qs as compat_parse_qs | ||||||
| @@ -507,6 +531,7 @@ __all__ = [ | |||||||
|     'compat_chr', |     'compat_chr', | ||||||
|     'compat_cookiejar', |     'compat_cookiejar', | ||||||
|     'compat_cookies', |     'compat_cookies', | ||||||
|  |     'compat_etree_fromstring', | ||||||
|     'compat_expanduser', |     'compat_expanduser', | ||||||
|     'compat_get_terminal_size', |     'compat_get_terminal_size', | ||||||
|     'compat_getenv', |     'compat_getenv', | ||||||
|   | |||||||
| @@ -5,10 +5,10 @@ import io | |||||||
| import itertools | import itertools | ||||||
| import os | import os | ||||||
| import time | import time | ||||||
| import xml.etree.ElementTree as etree |  | ||||||
|  |  | ||||||
| from .fragment import FragmentFD | from .fragment import FragmentFD | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
|  |     compat_etree_fromstring, | ||||||
|     compat_urlparse, |     compat_urlparse, | ||||||
|     compat_urllib_error, |     compat_urllib_error, | ||||||
|     compat_urllib_parse_urlparse, |     compat_urllib_parse_urlparse, | ||||||
| @@ -290,7 +290,7 @@ class F4mFD(FragmentFD): | |||||||
|         man_url = urlh.geturl() |         man_url = urlh.geturl() | ||||||
|         manifest = urlh.read() |         manifest = urlh.read() | ||||||
|  |  | ||||||
|         doc = etree.fromstring(manifest) |         doc = compat_etree_fromstring(manifest) | ||||||
|         formats = [(int(f.attrib.get('bitrate', -1)), f) |         formats = [(int(f.attrib.get('bitrate', -1)), f) | ||||||
|                    for f in self._get_unencrypted_media(doc)] |                    for f in self._get_unencrypted_media(doc)] | ||||||
|         if requested_bitrate is None: |         if requested_bitrate is None: | ||||||
|   | |||||||
| @@ -2,7 +2,6 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
| import xml.etree.ElementTree |  | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
| @@ -14,7 +13,10 @@ from ..utils import ( | |||||||
|     remove_end, |     remove_end, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
| ) | ) | ||||||
| from ..compat import compat_HTTPError | from ..compat import ( | ||||||
|  |     compat_etree_fromstring, | ||||||
|  |     compat_HTTPError, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class BBCCoUkIE(InfoExtractor): | class BBCCoUkIE(InfoExtractor): | ||||||
| @@ -344,7 +346,7 @@ class BBCCoUkIE(InfoExtractor): | |||||||
|                 url, programme_id, 'Downloading media selection XML') |                 url, programme_id, 'Downloading media selection XML') | ||||||
|         except ExtractorError as ee: |         except ExtractorError as ee: | ||||||
|             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: |             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: | ||||||
|                 media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8')) |                 media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8')) | ||||||
|             else: |             else: | ||||||
|                 raise |                 raise | ||||||
|         return self._process_media_selector(media_selection, programme_id) |         return self._process_media_selector(media_selection, programme_id) | ||||||
|   | |||||||
| @@ -4,9 +4,11 @@ from __future__ import unicode_literals | |||||||
| import re | import re | ||||||
| import itertools | import itertools | ||||||
| import json | import json | ||||||
| import xml.etree.ElementTree as ET |  | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..compat import ( | ||||||
|  |     compat_etree_fromstring, | ||||||
|  | ) | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     unified_strdate, |     unified_strdate, | ||||||
| @@ -88,7 +90,7 @@ class BiliBiliIE(InfoExtractor): | |||||||
|         except ValueError: |         except ValueError: | ||||||
|             pass |             pass | ||||||
|  |  | ||||||
|         lq_doc = ET.fromstring(lq_page) |         lq_doc = compat_etree_fromstring(lq_page) | ||||||
|         lq_durls = lq_doc.findall('./durl') |         lq_durls = lq_doc.findall('./durl') | ||||||
|  |  | ||||||
|         hq_doc = self._download_xml( |         hq_doc = self._download_xml( | ||||||
|   | |||||||
| @@ -3,10 +3,10 @@ from __future__ import unicode_literals | |||||||
|  |  | ||||||
| import re | import re | ||||||
| import json | import json | ||||||
| import xml.etree.ElementTree |  | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
|  |     compat_etree_fromstring, | ||||||
|     compat_parse_qs, |     compat_parse_qs, | ||||||
|     compat_str, |     compat_str, | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
| @@ -119,7 +119,7 @@ class BrightcoveIE(InfoExtractor): | |||||||
|         object_str = fix_xml_ampersands(object_str) |         object_str = fix_xml_ampersands(object_str) | ||||||
|  |  | ||||||
|         try: |         try: | ||||||
|             object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8')) |             object_doc = compat_etree_fromstring(object_str.encode('utf-8')) | ||||||
|         except compat_xml_parse_error: |         except compat_xml_parse_error: | ||||||
|             return |             return | ||||||
|  |  | ||||||
|   | |||||||
| @@ -10,7 +10,6 @@ import re | |||||||
| import socket | import socket | ||||||
| import sys | import sys | ||||||
| import time | import time | ||||||
| import xml.etree.ElementTree |  | ||||||
|  |  | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
|     compat_cookiejar, |     compat_cookiejar, | ||||||
| @@ -23,6 +22,7 @@ from ..compat import ( | |||||||
|     compat_urllib_request, |     compat_urllib_request, | ||||||
|     compat_urlparse, |     compat_urlparse, | ||||||
|     compat_str, |     compat_str, | ||||||
|  |     compat_etree_fromstring, | ||||||
| ) | ) | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     NO_DEFAULT, |     NO_DEFAULT, | ||||||
| @@ -461,7 +461,7 @@ class InfoExtractor(object): | |||||||
|             return xml_string |             return xml_string | ||||||
|         if transform_source: |         if transform_source: | ||||||
|             xml_string = transform_source(xml_string) |             xml_string = transform_source(xml_string) | ||||||
|         return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) |         return compat_etree_fromstring(xml_string.encode('utf-8')) | ||||||
|  |  | ||||||
|     def _download_json(self, url_or_request, video_id, |     def _download_json(self, url_or_request, video_id, | ||||||
|                        note='Downloading JSON metadata', |                        note='Downloading JSON metadata', | ||||||
|   | |||||||
| @@ -5,12 +5,12 @@ import re | |||||||
| import json | import json | ||||||
| import base64 | import base64 | ||||||
| import zlib | import zlib | ||||||
| import xml.etree.ElementTree |  | ||||||
|  |  | ||||||
| from hashlib import sha1 | from hashlib import sha1 | ||||||
| from math import pow, sqrt, floor | from math import pow, sqrt, floor | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
|  |     compat_etree_fromstring, | ||||||
|     compat_urllib_parse, |     compat_urllib_parse, | ||||||
|     compat_urllib_parse_unquote, |     compat_urllib_parse_unquote, | ||||||
|     compat_urllib_request, |     compat_urllib_request, | ||||||
| @@ -234,7 +234,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | |||||||
|         return output |         return output | ||||||
|  |  | ||||||
|     def _extract_subtitles(self, subtitle): |     def _extract_subtitles(self, subtitle): | ||||||
|         sub_root = xml.etree.ElementTree.fromstring(subtitle) |         sub_root = compat_etree_fromstring(subtitle) | ||||||
|         return [{ |         return [{ | ||||||
|             'ext': 'srt', |             'ext': 'srt', | ||||||
|             'data': self._convert_subtitles_to_srt(sub_root), |             'data': self._convert_subtitles_to_srt(sub_root), | ||||||
|   | |||||||
| @@ -1,10 +1,10 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import re | import re | ||||||
| import xml.etree.ElementTree |  | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
|  |     compat_etree_fromstring, | ||||||
|     compat_urllib_request, |     compat_urllib_request, | ||||||
| ) | ) | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
| @@ -97,7 +97,7 @@ class VevoIE(InfoExtractor): | |||||||
|         if last_version['version'] == -1: |         if last_version['version'] == -1: | ||||||
|             raise ExtractorError('Unable to extract last version of the video') |             raise ExtractorError('Unable to extract last version of the video') | ||||||
|  |  | ||||||
|         renditions = xml.etree.ElementTree.fromstring(last_version['data']) |         renditions = compat_etree_fromstring(last_version['data']) | ||||||
|         formats = [] |         formats = [] | ||||||
|         # Already sorted from worst to best quality |         # Already sorted from worst to best quality | ||||||
|         for rend in renditions.findall('rendition'): |         for rend in renditions.findall('rendition'): | ||||||
| @@ -114,7 +114,7 @@ class VevoIE(InfoExtractor): | |||||||
|  |  | ||||||
|     def _formats_from_smil(self, smil_xml): |     def _formats_from_smil(self, smil_xml): | ||||||
|         formats = [] |         formats = [] | ||||||
|         smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8')) |         smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8')) | ||||||
|         els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') |         els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video') | ||||||
|         for el in els: |         for el in els: | ||||||
|             src = el.attrib['src'] |             src = el.attrib['src'] | ||||||
|   | |||||||
| @@ -36,6 +36,7 @@ import zlib | |||||||
| from .compat import ( | from .compat import ( | ||||||
|     compat_basestring, |     compat_basestring, | ||||||
|     compat_chr, |     compat_chr, | ||||||
|  |     compat_etree_fromstring, | ||||||
|     compat_html_entities, |     compat_html_entities, | ||||||
|     compat_http_client, |     compat_http_client, | ||||||
|     compat_kwargs, |     compat_kwargs, | ||||||
| @@ -1974,7 +1975,7 @@ def dfxp2srt(dfxp_data): | |||||||
|  |  | ||||||
|         return out |         return out | ||||||
|  |  | ||||||
|     dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8')) |     dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8')) | ||||||
|     out = [] |     out = [] | ||||||
|     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p') |     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p') | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user