mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[InfoExtractor] Support DASH subtitle extraction (yt-dlp back-port)
This commit is contained in:
		| @@ -993,7 +993,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ | |||||||
|                     'tbr': 5997.485, |                     'tbr': 5997.485, | ||||||
|                     'width': 1920, |                     'width': 1920, | ||||||
|                     'height': 1080, |                     'height': 1080, | ||||||
|                 }] |                 }], | ||||||
|  |                 {}, | ||||||
|             ), ( |             ), ( | ||||||
|                 # https://github.com/ytdl-org/youtube-dl/pull/14844 |                 # https://github.com/ytdl-org/youtube-dl/pull/14844 | ||||||
|                 'urls_only', |                 'urls_only', | ||||||
| @@ -1076,7 +1077,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ | |||||||
|                     'tbr': 4400, |                     'tbr': 4400, | ||||||
|                     'width': 1920, |                     'width': 1920, | ||||||
|                     'height': 1080, |                     'height': 1080, | ||||||
|                 }] |                 }], | ||||||
|  |                 {}, | ||||||
|             ), ( |             ), ( | ||||||
|                 # https://github.com/ytdl-org/youtube-dl/issues/20346 |                 # https://github.com/ytdl-org/youtube-dl/issues/20346 | ||||||
|                 # Media considered unfragmented even though it contains |                 # Media considered unfragmented even though it contains | ||||||
| @@ -1122,18 +1124,119 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ | |||||||
|                     'width': 360, |                     'width': 360, | ||||||
|                     'height': 360, |                     'height': 360, | ||||||
|                     'fps': 30, |                     'fps': 30, | ||||||
|                 }] |                 }], | ||||||
|  |                 {}, | ||||||
|  |             ), ( | ||||||
|  |                 'subtitles', | ||||||
|  |                 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/', | ||||||
|  |                 [{ | ||||||
|  |                     'format_id': 'audio=128001', | ||||||
|  |                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'ext': 'm4a', | ||||||
|  |                     'tbr': 128.001, | ||||||
|  |                     'asr': 48000, | ||||||
|  |                     'format_note': 'DASH audio', | ||||||
|  |                     'container': 'm4a_dash', | ||||||
|  |                     'vcodec': 'none', | ||||||
|  |                     'acodec': 'mp4a.40.2', | ||||||
|  |                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||||
|  |                     'protocol': 'http_dash_segments', | ||||||
|  |                 }, { | ||||||
|  |                     'format_id': 'video=100000', | ||||||
|  |                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'ext': 'mp4', | ||||||
|  |                     'width': 336, | ||||||
|  |                     'height': 144, | ||||||
|  |                     'tbr': 100, | ||||||
|  |                     'format_note': 'DASH video', | ||||||
|  |                     'container': 'mp4_dash', | ||||||
|  |                     'vcodec': 'avc1.4D401F', | ||||||
|  |                     'acodec': 'none', | ||||||
|  |                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||||
|  |                     'protocol': 'http_dash_segments', | ||||||
|  |                 }, { | ||||||
|  |                     'format_id': 'video=326000', | ||||||
|  |                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'ext': 'mp4', | ||||||
|  |                     'width': 562, | ||||||
|  |                     'height': 240, | ||||||
|  |                     'tbr': 326, | ||||||
|  |                     'format_note': 'DASH video', | ||||||
|  |                     'container': 'mp4_dash', | ||||||
|  |                     'vcodec': 'avc1.4D401F', | ||||||
|  |                     'acodec': 'none', | ||||||
|  |                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||||
|  |                     'protocol': 'http_dash_segments', | ||||||
|  |                 }, { | ||||||
|  |                     'format_id': 'video=698000', | ||||||
|  |                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'ext': 'mp4', | ||||||
|  |                     'width': 844, | ||||||
|  |                     'height': 360, | ||||||
|  |                     'tbr': 698, | ||||||
|  |                     'format_note': 'DASH video', | ||||||
|  |                     'container': 'mp4_dash', | ||||||
|  |                     'vcodec': 'avc1.4D401F', | ||||||
|  |                     'acodec': 'none', | ||||||
|  |                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||||
|  |                     'protocol': 'http_dash_segments', | ||||||
|  |                 }, { | ||||||
|  |                     'format_id': 'video=1493000', | ||||||
|  |                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'ext': 'mp4', | ||||||
|  |                     'width': 1126, | ||||||
|  |                     'height': 480, | ||||||
|  |                     'tbr': 1493, | ||||||
|  |                     'format_note': 'DASH video', | ||||||
|  |                     'container': 'mp4_dash', | ||||||
|  |                     'vcodec': 'avc1.4D401F', | ||||||
|  |                     'acodec': 'none', | ||||||
|  |                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||||
|  |                     'protocol': 'http_dash_segments', | ||||||
|  |                 }, { | ||||||
|  |                     'format_id': 'video=4482000', | ||||||
|  |                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'ext': 'mp4', | ||||||
|  |                     'width': 1688, | ||||||
|  |                     'height': 720, | ||||||
|  |                     'tbr': 4482, | ||||||
|  |                     'format_note': 'DASH video', | ||||||
|  |                     'container': 'mp4_dash', | ||||||
|  |                     'vcodec': 'avc1.4D401F', | ||||||
|  |                     'acodec': 'none', | ||||||
|  |                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||||
|  |                     'protocol': 'http_dash_segments', | ||||||
|  |                 }], | ||||||
|  |                 { | ||||||
|  |                     'en': [ | ||||||
|  |                         { | ||||||
|  |                             'ext': 'mp4', | ||||||
|  |                             'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                             'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||||
|  |                             'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||||
|  |                             'protocol': 'http_dash_segments', | ||||||
|  |                         } | ||||||
|  |                     ] | ||||||
|  |                 }, | ||||||
|             ) |             ) | ||||||
|         ] |         ] | ||||||
|  |  | ||||||
|         for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES: |         for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: | ||||||
|             with open('./test/testdata/mpd/%s.mpd' % mpd_file, |             with open('./test/testdata/mpd/%s.mpd' % mpd_file, | ||||||
|                       mode='r', encoding='utf-8') as f: |                       mode='r', encoding='utf-8') as f: | ||||||
|                 formats = self.ie._parse_mpd_formats( |                 formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( | ||||||
|                     compat_etree_fromstring(f.read().encode('utf-8')), |                     compat_etree_fromstring(f.read().encode('utf-8')), | ||||||
|                     mpd_base_url=mpd_base_url, mpd_url=mpd_url) |                     mpd_base_url=mpd_base_url, mpd_url=mpd_url) | ||||||
|                 self.ie._sort_formats(formats) |                 self.ie._sort_formats(formats) | ||||||
|                 expect_value(self, formats, expected_formats, None) |                 expect_value(self, formats, expected_formats, None) | ||||||
|  |                 expect_value(self, subtitles, expected_subtitles, None) | ||||||
|  |  | ||||||
|     def test_parse_f4m_formats(self): |     def test_parse_f4m_formats(self): | ||||||
|         _TEST_CASES = [ |         _TEST_CASES = [ | ||||||
|   | |||||||
| @@ -2,6 +2,7 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import base64 | import base64 | ||||||
|  | import collections | ||||||
| import datetime | import datetime | ||||||
| import functools | import functools | ||||||
| import hashlib | import hashlib | ||||||
| @@ -58,6 +59,7 @@ from ..utils import ( | |||||||
|     GeoRestrictedError, |     GeoRestrictedError, | ||||||
|     GeoUtils, |     GeoUtils, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     join_nonempty, | ||||||
|     js_to_json, |     js_to_json, | ||||||
|     JSON_LD_RE, |     JSON_LD_RE, | ||||||
|     mimetype2ext, |     mimetype2ext, | ||||||
| @@ -74,6 +76,7 @@ from ..utils import ( | |||||||
|     str_or_none, |     str_or_none, | ||||||
|     str_to_int, |     str_to_int, | ||||||
|     strip_or_none, |     strip_or_none, | ||||||
|  |     T, | ||||||
|     traverse_obj, |     traverse_obj, | ||||||
|     try_get, |     try_get, | ||||||
|     unescapeHTML, |     unescapeHTML, | ||||||
| @@ -1751,6 +1754,12 @@ class InfoExtractor(object): | |||||||
|             'format_note': 'Quality selection URL', |             'format_note': 'Quality selection URL', | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |     def _report_ignoring_subs(self, name): | ||||||
|  |         self.report_warning(bug_reports_message( | ||||||
|  |             'Ignoring subtitle tracks found in the {0} manifest; ' | ||||||
|  |             'if any subtitle tracks are missing,'.format(name) | ||||||
|  |         ), only_once=True) | ||||||
|  |  | ||||||
|     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, |     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, | ||||||
|                               entry_protocol='m3u8', preference=None, |                               entry_protocol='m3u8', preference=None, | ||||||
|                               m3u8_id=None, note=None, errnote=None, |                               m3u8_id=None, note=None, errnote=None, | ||||||
| @@ -2191,23 +2200,46 @@ class InfoExtractor(object): | |||||||
|             }) |             }) | ||||||
|         return entries |         return entries | ||||||
|  |  | ||||||
|     def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): |     def _extract_mpd_formats(self, *args, **kwargs): | ||||||
|  |         fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs) | ||||||
|  |         if subs: | ||||||
|  |             self._report_ignoring_subs('DASH') | ||||||
|  |         return fmts | ||||||
|  |  | ||||||
|  |     def _extract_mpd_formats_and_subtitles( | ||||||
|  |             self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, | ||||||
|  |             fatal=True, data=None, headers=None, query=None): | ||||||
|  |  | ||||||
|  |         # TODO: or not? param not yet implemented | ||||||
|  |         if self.get_param('ignore_no_formats_error'): | ||||||
|  |             fatal = False | ||||||
|  |  | ||||||
|         res = self._download_xml_handle( |         res = self._download_xml_handle( | ||||||
|             mpd_url, video_id, |             mpd_url, video_id, | ||||||
|             note=note or 'Downloading MPD manifest', |             note='Downloading MPD manifest' if note is None else note, | ||||||
|             errnote=errnote or 'Failed to download MPD manifest', |             errnote='Failed to download MPD manifest' if errnote is None else errnote, | ||||||
|             fatal=fatal, data=data, headers=headers, query=query) |             fatal=fatal, data=data, headers=headers or {}, query=query or {}) | ||||||
|         if res is False: |         if res is False: | ||||||
|             return [] |             return [], {} | ||||||
|         mpd_doc, urlh = res |         mpd_doc, urlh = res | ||||||
|         if mpd_doc is None: |         if mpd_doc is None: | ||||||
|             return [] |             return [], {} | ||||||
|         mpd_base_url = base_url(urlh.geturl()) |  | ||||||
|  |  | ||||||
|         return self._parse_mpd_formats( |         # We could have been redirected to a new url when we retrieved our mpd file. | ||||||
|  |         mpd_url = urlh.geturl() | ||||||
|  |         mpd_base_url = base_url(mpd_url) | ||||||
|  |  | ||||||
|  |         return self._parse_mpd_formats_and_subtitles( | ||||||
|             mpd_doc, mpd_id, mpd_base_url, mpd_url) |             mpd_doc, mpd_id, mpd_base_url, mpd_url) | ||||||
|  |  | ||||||
|     def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None): |     def _parse_mpd_formats(self, *args, **kwargs): | ||||||
|  |         fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs) | ||||||
|  |         if subs: | ||||||
|  |             self._report_ignoring_subs('DASH') | ||||||
|  |         return fmts | ||||||
|  |  | ||||||
|  |     def _parse_mpd_formats_and_subtitles( | ||||||
|  |             self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None): | ||||||
|         """ |         """ | ||||||
|         Parse formats from MPD manifest. |         Parse formats from MPD manifest. | ||||||
|         References: |         References: | ||||||
| @@ -2215,8 +2247,10 @@ class InfoExtractor(object): | |||||||
|             http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip |             http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip | ||||||
|          2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP |          2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP | ||||||
|         """ |         """ | ||||||
|  |         # TODO: param not yet implemented: default like previous yt-dl logic | ||||||
|  |         if not self.get_param('dynamic_mpd', False): | ||||||
|             if mpd_doc.get('type') == 'dynamic': |             if mpd_doc.get('type') == 'dynamic': | ||||||
|             return [] |                 return [], {} | ||||||
|  |  | ||||||
|         namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None) |         namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None) | ||||||
|  |  | ||||||
| @@ -2228,6 +2262,7 @@ class InfoExtractor(object): | |||||||
|  |  | ||||||
|         def extract_multisegment_info(element, ms_parent_info): |         def extract_multisegment_info(element, ms_parent_info): | ||||||
|             ms_info = ms_parent_info.copy() |             ms_info = ms_parent_info.copy() | ||||||
|  |             base_url = ms_info.get('base_url') | ||||||
|  |  | ||||||
|             # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some |             # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some | ||||||
|             # common attributes and elements.  We will only extract relevant |             # common attributes and elements.  We will only extract relevant | ||||||
| @@ -2285,7 +2320,8 @@ class InfoExtractor(object): | |||||||
|             return ms_info |             return ms_info | ||||||
|  |  | ||||||
|         mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) |         mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) | ||||||
|         formats = [] |         formats, subtitles = [], {} | ||||||
|  |         stream_numbers = collections.defaultdict(int) | ||||||
|         for period in mpd_doc.findall(_add_ns('Period')): |         for period in mpd_doc.findall(_add_ns('Period')): | ||||||
|             period_duration = parse_duration(period.get('duration')) or mpd_duration |             period_duration = parse_duration(period.get('duration')) or mpd_duration | ||||||
|             period_ms_info = extract_multisegment_info(period, { |             period_ms_info = extract_multisegment_info(period, { | ||||||
| @@ -2295,7 +2331,7 @@ class InfoExtractor(object): | |||||||
|             for adaptation_set in period.findall(_add_ns('AdaptationSet')): |             for adaptation_set in period.findall(_add_ns('AdaptationSet')): | ||||||
|                 if is_drm_protected(adaptation_set): |                 if is_drm_protected(adaptation_set): | ||||||
|                     continue |                     continue | ||||||
|                 adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info) |                 adaptation_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info) | ||||||
|                 for representation in adaptation_set.findall(_add_ns('Representation')): |                 for representation in adaptation_set.findall(_add_ns('Representation')): | ||||||
|                     if is_drm_protected(representation): |                     if is_drm_protected(representation): | ||||||
|                         continue |                         continue | ||||||
| @@ -2303,27 +2339,35 @@ class InfoExtractor(object): | |||||||
|                     representation_attrib.update(representation.attrib) |                     representation_attrib.update(representation.attrib) | ||||||
|                     # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory |                     # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory | ||||||
|                     mime_type = representation_attrib['mimeType'] |                     mime_type = representation_attrib['mimeType'] | ||||||
|                     content_type = mime_type.split('/')[0] |                     content_type = representation_attrib.get('contentType') or mime_type.split('/')[0] | ||||||
|                     if content_type == 'text': |                     codec_str = representation_attrib.get('codecs', '') | ||||||
|                         # TODO implement WebVTT downloading |                     # Some kind of binary subtitle found in some youtube livestreams | ||||||
|                         pass |                     if mime_type == 'application/x-rawcc': | ||||||
|                     elif content_type in ('video', 'audio'): |                         codecs = {'scodec': codec_str} | ||||||
|                         base_url = '' |                     else: | ||||||
|                         for element in (representation, adaptation_set, period, mpd_doc): |                         codecs = parse_codecs(codec_str) | ||||||
|                             base_url_e = element.find(_add_ns('BaseURL')) |                     if content_type not in ('video', 'audio', 'text'): | ||||||
|                             if base_url_e is not None: |                         if mime_type == 'image/jpeg': | ||||||
|                                 base_url = base_url_e.text + base_url |                             content_type = mime_type | ||||||
|                                 if re.match(r'^https?://', base_url): |                         elif codecs.get('vcodec', 'none') != 'none': | ||||||
|                                     break |                             content_type = 'video' | ||||||
|                         if mpd_base_url and not re.match(r'^https?://', base_url): |                         elif codecs.get('acodec', 'none') != 'none': | ||||||
|                             if not mpd_base_url.endswith('/') and not base_url.startswith('/'): |                             content_type = 'audio' | ||||||
|                                 mpd_base_url += '/' |                         elif codecs.get('scodec', 'none') != 'none': | ||||||
|                             base_url = mpd_base_url + base_url |                             content_type = 'text' | ||||||
|  |                         elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'): | ||||||
|  |                             content_type = 'text' | ||||||
|  |                         else: | ||||||
|  |                             self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) | ||||||
|  |                             continue | ||||||
|  |  | ||||||
|                     representation_id = representation_attrib.get('id') |                     representation_id = representation_attrib.get('id') | ||||||
|                     lang = representation_attrib.get('lang') |                     lang = representation_attrib.get('lang') | ||||||
|                     url_el = representation.find(_add_ns('BaseURL')) |                     url_el = representation.find(_add_ns('BaseURL')) | ||||||
|                         filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None) |                     filesize = int_or_none(url_el.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None) | ||||||
|                     bandwidth = int_or_none(representation_attrib.get('bandwidth')) |                     bandwidth = int_or_none(representation_attrib.get('bandwidth')) | ||||||
|  |                     format_id = join_nonempty(representation_id or content_type, mpd_id) | ||||||
|  |                     if content_type in ('video', 'audio'): | ||||||
|                         f = { |                         f = { | ||||||
|                             'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, |                             'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, | ||||||
|                             'manifest_url': mpd_url, |                             'manifest_url': mpd_url, | ||||||
| @@ -2338,8 +2382,27 @@ class InfoExtractor(object): | |||||||
|                             'filesize': filesize, |                             'filesize': filesize, | ||||||
|                             'container': mimetype2ext(mime_type) + '_dash', |                             'container': mimetype2ext(mime_type) + '_dash', | ||||||
|                         } |                         } | ||||||
|                         f.update(parse_codecs(representation_attrib.get('codecs'))) |                         f.update(codecs) | ||||||
|                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) |                     elif content_type == 'text': | ||||||
|  |                         f = { | ||||||
|  |                             'ext': mimetype2ext(mime_type), | ||||||
|  |                             'manifest_url': mpd_url, | ||||||
|  |                             'filesize': filesize, | ||||||
|  |                         } | ||||||
|  |                     elif content_type == 'image/jpeg': | ||||||
|  |                         # See test case in VikiIE | ||||||
|  |                         # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1 | ||||||
|  |                         f = { | ||||||
|  |                             'format_id': format_id, | ||||||
|  |                             'ext': 'mhtml', | ||||||
|  |                             'manifest_url': mpd_url, | ||||||
|  |                             'format_note': 'DASH storyboards (jpeg)', | ||||||
|  |                             'acodec': 'none', | ||||||
|  |                             'vcodec': 'none', | ||||||
|  |                         } | ||||||
|  |                     if is_drm_protected(adaptation_set) or is_drm_protected(representation): | ||||||
|  |                         f['has_drm'] = True | ||||||
|  |                     representation_ms_info = extract_multisegment_info(representation, adaptation_set_ms_info) | ||||||
|  |  | ||||||
|                     def prepare_template(template_name, identifiers): |                     def prepare_template(template_name, identifiers): | ||||||
|                         tmpl = representation_ms_info[template_name] |                         tmpl = representation_ms_info[template_name] | ||||||
| @@ -2391,7 +2454,8 @@ class InfoExtractor(object): | |||||||
|                             segment_duration = None |                             segment_duration = None | ||||||
|                             if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info: |                             if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info: | ||||||
|                                 segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) |                                 segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) | ||||||
|                                     representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) |                                 representation_ms_info['total_number'] = int(math.ceil( | ||||||
|  |                                     float_or_none(period_duration, segment_duration, default=0))) | ||||||
|                             representation_ms_info['fragments'] = [{ |                             representation_ms_info['fragments'] = [{ | ||||||
|                                 media_location_key: media_template % { |                                 media_location_key: media_template % { | ||||||
|                                     'Number': segment_number, |                                     'Number': segment_number, | ||||||
| @@ -2431,11 +2495,12 @@ class InfoExtractor(object): | |||||||
|                                     add_segment_url() |                                     add_segment_url() | ||||||
|                                     segment_number += 1 |                                     segment_number += 1 | ||||||
|                                 segment_time += segment_d |                                 segment_time += segment_d | ||||||
|                         elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info: |                     elif 'segment_urls' in representation_ms_info: | ||||||
|  |                         fragments = [] | ||||||
|  |                         if 's' in representation_ms_info: | ||||||
|                             # No media template |                             # No media template | ||||||
|                             # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI |                             # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI | ||||||
|                             # or any YouTube dashsegments video |                             # or any YouTube dashsegments video | ||||||
|                             fragments = [] |  | ||||||
|                             segment_index = 0 |                             segment_index = 0 | ||||||
|                             timescale = representation_ms_info['timescale'] |                             timescale = representation_ms_info['timescale'] | ||||||
|                             for s in representation_ms_info['s']: |                             for s in representation_ms_info['s']: | ||||||
| @@ -2487,8 +2552,15 @@ class InfoExtractor(object): | |||||||
|                             f['url'] = base_url |                             f['url'] = base_url | ||||||
|                         formats.append(f) |                         formats.append(f) | ||||||
|                     else: |                     else: | ||||||
|                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) |                         # Assuming direct URL to unfragmented media. | ||||||
|         return formats |                         f['url'] = representation_ms_info['base_url'] | ||||||
|  |                     if content_type in ('video', 'audio', 'image/jpeg'): | ||||||
|  |                         f['manifest_stream_number'] = stream_numbers[f['url']] | ||||||
|  |                         stream_numbers[f['url']] += 1 | ||||||
|  |                         formats.append(f) | ||||||
|  |                     elif content_type == 'text': | ||||||
|  |                         subtitles.setdefault(lang or 'und', []).append(f) | ||||||
|  |         return formats, subtitles | ||||||
|  |  | ||||||
|     def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): |     def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): | ||||||
|         res = self._download_xml_handle( |         res = self._download_xml_handle( | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user