mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[InfoExtractor] Support DASH subtitle extraction (yt-dlp back-port)
This commit is contained in:
		| @@ -993,7 +993,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ | ||||
|                     'tbr': 5997.485, | ||||
|                     'width': 1920, | ||||
|                     'height': 1080, | ||||
|                 }] | ||||
|                 }], | ||||
|                 {}, | ||||
|             ), ( | ||||
|                 # https://github.com/ytdl-org/youtube-dl/pull/14844 | ||||
|                 'urls_only', | ||||
| @@ -1076,7 +1077,8 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ | ||||
|                     'tbr': 4400, | ||||
|                     'width': 1920, | ||||
|                     'height': 1080, | ||||
|                 }] | ||||
|                 }], | ||||
|                 {}, | ||||
|             ), ( | ||||
|                 # https://github.com/ytdl-org/youtube-dl/issues/20346 | ||||
|                 # Media considered unfragmented even though it contains | ||||
| @@ -1122,18 +1124,119 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ | ||||
|                     'width': 360, | ||||
|                     'height': 360, | ||||
|                     'fps': 30, | ||||
|                 }] | ||||
|                 }], | ||||
|                 {}, | ||||
|             ), ( | ||||
|                 'subtitles', | ||||
|                 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/', | ||||
|                 [{ | ||||
|                     'format_id': 'audio=128001', | ||||
|                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'ext': 'm4a', | ||||
|                     'tbr': 128.001, | ||||
|                     'asr': 48000, | ||||
|                     'format_note': 'DASH audio', | ||||
|                     'container': 'm4a_dash', | ||||
|                     'vcodec': 'none', | ||||
|                     'acodec': 'mp4a.40.2', | ||||
|                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||
|                     'protocol': 'http_dash_segments', | ||||
|                 }, { | ||||
|                     'format_id': 'video=100000', | ||||
|                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'ext': 'mp4', | ||||
|                     'width': 336, | ||||
|                     'height': 144, | ||||
|                     'tbr': 100, | ||||
|                     'format_note': 'DASH video', | ||||
|                     'container': 'mp4_dash', | ||||
|                     'vcodec': 'avc1.4D401F', | ||||
|                     'acodec': 'none', | ||||
|                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||
|                     'protocol': 'http_dash_segments', | ||||
|                 }, { | ||||
|                     'format_id': 'video=326000', | ||||
|                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'ext': 'mp4', | ||||
|                     'width': 562, | ||||
|                     'height': 240, | ||||
|                     'tbr': 326, | ||||
|                     'format_note': 'DASH video', | ||||
|                     'container': 'mp4_dash', | ||||
|                     'vcodec': 'avc1.4D401F', | ||||
|                     'acodec': 'none', | ||||
|                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||
|                     'protocol': 'http_dash_segments', | ||||
|                 }, { | ||||
|                     'format_id': 'video=698000', | ||||
|                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'ext': 'mp4', | ||||
|                     'width': 844, | ||||
|                     'height': 360, | ||||
|                     'tbr': 698, | ||||
|                     'format_note': 'DASH video', | ||||
|                     'container': 'mp4_dash', | ||||
|                     'vcodec': 'avc1.4D401F', | ||||
|                     'acodec': 'none', | ||||
|                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||
|                     'protocol': 'http_dash_segments', | ||||
|                 }, { | ||||
|                     'format_id': 'video=1493000', | ||||
|                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'ext': 'mp4', | ||||
|                     'width': 1126, | ||||
|                     'height': 480, | ||||
|                     'tbr': 1493, | ||||
|                     'format_note': 'DASH video', | ||||
|                     'container': 'mp4_dash', | ||||
|                     'vcodec': 'avc1.4D401F', | ||||
|                     'acodec': 'none', | ||||
|                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||
|                     'protocol': 'http_dash_segments', | ||||
|                 }, { | ||||
|                     'format_id': 'video=4482000', | ||||
|                     'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'ext': 'mp4', | ||||
|                     'width': 1688, | ||||
|                     'height': 720, | ||||
|                     'tbr': 4482, | ||||
|                     'format_note': 'DASH video', | ||||
|                     'container': 'mp4_dash', | ||||
|                     'vcodec': 'avc1.4D401F', | ||||
|                     'acodec': 'none', | ||||
|                     'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                     'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||
|                     'protocol': 'http_dash_segments', | ||||
|                 }], | ||||
|                 { | ||||
|                     'en': [ | ||||
|                         { | ||||
|                             'ext': 'mp4', | ||||
|                             'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                             'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', | ||||
|                             'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', | ||||
|                             'protocol': 'http_dash_segments', | ||||
|                         } | ||||
|                     ] | ||||
|                 }, | ||||
|             ) | ||||
|         ] | ||||
|  | ||||
|         for mpd_file, mpd_url, mpd_base_url, expected_formats in _TEST_CASES: | ||||
|         for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: | ||||
|             with open('./test/testdata/mpd/%s.mpd' % mpd_file, | ||||
|                       mode='r', encoding='utf-8') as f: | ||||
|                 formats = self.ie._parse_mpd_formats( | ||||
|                 formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( | ||||
|                     compat_etree_fromstring(f.read().encode('utf-8')), | ||||
|                     mpd_base_url=mpd_base_url, mpd_url=mpd_url) | ||||
|                 self.ie._sort_formats(formats) | ||||
|                 expect_value(self, formats, expected_formats, None) | ||||
|                 expect_value(self, subtitles, expected_subtitles, None) | ||||
|  | ||||
|     def test_parse_f4m_formats(self): | ||||
|         _TEST_CASES = [ | ||||
|   | ||||
| @@ -2,6 +2,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import collections | ||||
| import datetime | ||||
| import functools | ||||
| import hashlib | ||||
| @@ -58,6 +59,7 @@ from ..utils import ( | ||||
|     GeoRestrictedError, | ||||
|     GeoUtils, | ||||
|     int_or_none, | ||||
|     join_nonempty, | ||||
|     js_to_json, | ||||
|     JSON_LD_RE, | ||||
|     mimetype2ext, | ||||
| @@ -74,6 +76,7 @@ from ..utils import ( | ||||
|     str_or_none, | ||||
|     str_to_int, | ||||
|     strip_or_none, | ||||
|     T, | ||||
|     traverse_obj, | ||||
|     try_get, | ||||
|     unescapeHTML, | ||||
| @@ -1751,6 +1754,12 @@ class InfoExtractor(object): | ||||
|             'format_note': 'Quality selection URL', | ||||
|         } | ||||
|  | ||||
|     def _report_ignoring_subs(self, name): | ||||
|         self.report_warning(bug_reports_message( | ||||
|             'Ignoring subtitle tracks found in the {0} manifest; ' | ||||
|             'if any subtitle tracks are missing,'.format(name) | ||||
|         ), only_once=True) | ||||
|  | ||||
|     def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None, | ||||
|                               entry_protocol='m3u8', preference=None, | ||||
|                               m3u8_id=None, note=None, errnote=None, | ||||
| @@ -2191,23 +2200,46 @@ class InfoExtractor(object): | ||||
|             }) | ||||
|         return entries | ||||
|  | ||||
|     def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): | ||||
|     def _extract_mpd_formats(self, *args, **kwargs): | ||||
|         fmts, subs = self._extract_mpd_formats_and_subtitles(*args, **kwargs) | ||||
|         if subs: | ||||
|             self._report_ignoring_subs('DASH') | ||||
|         return fmts | ||||
|  | ||||
|     def _extract_mpd_formats_and_subtitles( | ||||
|             self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, | ||||
|             fatal=True, data=None, headers=None, query=None): | ||||
|  | ||||
|         # TODO: or not? param not yet implemented | ||||
|         if self.get_param('ignore_no_formats_error'): | ||||
|             fatal = False | ||||
|  | ||||
|         res = self._download_xml_handle( | ||||
|             mpd_url, video_id, | ||||
|             note=note or 'Downloading MPD manifest', | ||||
|             errnote=errnote or 'Failed to download MPD manifest', | ||||
|             fatal=fatal, data=data, headers=headers, query=query) | ||||
|             note='Downloading MPD manifest' if note is None else note, | ||||
|             errnote='Failed to download MPD manifest' if errnote is None else errnote, | ||||
|             fatal=fatal, data=data, headers=headers or {}, query=query or {}) | ||||
|         if res is False: | ||||
|             return [] | ||||
|             return [], {} | ||||
|         mpd_doc, urlh = res | ||||
|         if mpd_doc is None: | ||||
|             return [] | ||||
|         mpd_base_url = base_url(urlh.geturl()) | ||||
|             return [], {} | ||||
|  | ||||
|         return self._parse_mpd_formats( | ||||
|         # We could have been redirected to a new url when we retrieved our mpd file. | ||||
|         mpd_url = urlh.geturl() | ||||
|         mpd_base_url = base_url(mpd_url) | ||||
|  | ||||
|         return self._parse_mpd_formats_and_subtitles( | ||||
|             mpd_doc, mpd_id, mpd_base_url, mpd_url) | ||||
|  | ||||
|     def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None): | ||||
|     def _parse_mpd_formats(self, *args, **kwargs): | ||||
|         fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs) | ||||
|         if subs: | ||||
|             self._report_ignoring_subs('DASH') | ||||
|         return fmts | ||||
|  | ||||
|     def _parse_mpd_formats_and_subtitles( | ||||
|             self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None): | ||||
|         """ | ||||
|         Parse formats from MPD manifest. | ||||
|         References: | ||||
| @@ -2215,8 +2247,10 @@ class InfoExtractor(object): | ||||
|             http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip | ||||
|          2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP | ||||
|         """ | ||||
|         if mpd_doc.get('type') == 'dynamic': | ||||
|             return [] | ||||
|         # TODO: param not yet implemented: default like previous yt-dl logic | ||||
|         if not self.get_param('dynamic_mpd', False): | ||||
|             if mpd_doc.get('type') == 'dynamic': | ||||
|                 return [], {} | ||||
|  | ||||
|         namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None) | ||||
|  | ||||
| @@ -2228,6 +2262,7 @@ class InfoExtractor(object): | ||||
|  | ||||
|         def extract_multisegment_info(element, ms_parent_info): | ||||
|             ms_info = ms_parent_info.copy() | ||||
|             base_url = ms_info.get('base_url') | ||||
|  | ||||
|             # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some | ||||
|             # common attributes and elements.  We will only extract relevant | ||||
| @@ -2285,7 +2320,8 @@ class InfoExtractor(object): | ||||
|             return ms_info | ||||
|  | ||||
|         mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration')) | ||||
|         formats = [] | ||||
|         formats, subtitles = [], {} | ||||
|         stream_numbers = collections.defaultdict(int) | ||||
|         for period in mpd_doc.findall(_add_ns('Period')): | ||||
|             period_duration = parse_duration(period.get('duration')) or mpd_duration | ||||
|             period_ms_info = extract_multisegment_info(period, { | ||||
| @@ -2295,7 +2331,7 @@ class InfoExtractor(object): | ||||
|             for adaptation_set in period.findall(_add_ns('AdaptationSet')): | ||||
|                 if is_drm_protected(adaptation_set): | ||||
|                     continue | ||||
|                 adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info) | ||||
|                 adaptation_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info) | ||||
|                 for representation in adaptation_set.findall(_add_ns('Representation')): | ||||
|                     if is_drm_protected(representation): | ||||
|                         continue | ||||
| @@ -2303,27 +2339,35 @@ class InfoExtractor(object): | ||||
|                     representation_attrib.update(representation.attrib) | ||||
|                     # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory | ||||
|                     mime_type = representation_attrib['mimeType'] | ||||
|                     content_type = mime_type.split('/')[0] | ||||
|                     if content_type == 'text': | ||||
|                         # TODO implement WebVTT downloading | ||||
|                         pass | ||||
|                     elif content_type in ('video', 'audio'): | ||||
|                         base_url = '' | ||||
|                         for element in (representation, adaptation_set, period, mpd_doc): | ||||
|                             base_url_e = element.find(_add_ns('BaseURL')) | ||||
|                             if base_url_e is not None: | ||||
|                                 base_url = base_url_e.text + base_url | ||||
|                                 if re.match(r'^https?://', base_url): | ||||
|                                     break | ||||
|                         if mpd_base_url and not re.match(r'^https?://', base_url): | ||||
|                             if not mpd_base_url.endswith('/') and not base_url.startswith('/'): | ||||
|                                 mpd_base_url += '/' | ||||
|                             base_url = mpd_base_url + base_url | ||||
|                         representation_id = representation_attrib.get('id') | ||||
|                         lang = representation_attrib.get('lang') | ||||
|                         url_el = representation.find(_add_ns('BaseURL')) | ||||
|                         filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None) | ||||
|                         bandwidth = int_or_none(representation_attrib.get('bandwidth')) | ||||
|                     content_type = representation_attrib.get('contentType') or mime_type.split('/')[0] | ||||
|                     codec_str = representation_attrib.get('codecs', '') | ||||
|                     # Some kind of binary subtitle found in some youtube livestreams | ||||
|                     if mime_type == 'application/x-rawcc': | ||||
|                         codecs = {'scodec': codec_str} | ||||
|                     else: | ||||
|                         codecs = parse_codecs(codec_str) | ||||
|                     if content_type not in ('video', 'audio', 'text'): | ||||
|                         if mime_type == 'image/jpeg': | ||||
|                             content_type = mime_type | ||||
|                         elif codecs.get('vcodec', 'none') != 'none': | ||||
|                             content_type = 'video' | ||||
|                         elif codecs.get('acodec', 'none') != 'none': | ||||
|                             content_type = 'audio' | ||||
|                         elif codecs.get('scodec', 'none') != 'none': | ||||
|                             content_type = 'text' | ||||
|                         elif mimetype2ext(mime_type) in ('tt', 'dfxp', 'ttml', 'xml', 'json'): | ||||
|                             content_type = 'text' | ||||
|                         else: | ||||
|                             self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) | ||||
|                             continue | ||||
|  | ||||
|                     representation_id = representation_attrib.get('id') | ||||
|                     lang = representation_attrib.get('lang') | ||||
|                     url_el = representation.find(_add_ns('BaseURL')) | ||||
|                     filesize = int_or_none(url_el.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None) | ||||
|                     bandwidth = int_or_none(representation_attrib.get('bandwidth')) | ||||
|                     format_id = join_nonempty(representation_id or content_type, mpd_id) | ||||
|                     if content_type in ('video', 'audio'): | ||||
|                         f = { | ||||
|                             'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, | ||||
|                             'manifest_url': mpd_url, | ||||
| @@ -2338,104 +2382,125 @@ class InfoExtractor(object): | ||||
|                             'filesize': filesize, | ||||
|                             'container': mimetype2ext(mime_type) + '_dash', | ||||
|                         } | ||||
|                         f.update(parse_codecs(representation_attrib.get('codecs'))) | ||||
|                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) | ||||
|                         f.update(codecs) | ||||
|                     elif content_type == 'text': | ||||
|                         f = { | ||||
|                             'ext': mimetype2ext(mime_type), | ||||
|                             'manifest_url': mpd_url, | ||||
|                             'filesize': filesize, | ||||
|                         } | ||||
|                     elif content_type == 'image/jpeg': | ||||
|                         # See test case in VikiIE | ||||
|                         # https://www.viki.com/videos/1175236v-choosing-spouse-by-lottery-episode-1 | ||||
|                         f = { | ||||
|                             'format_id': format_id, | ||||
|                             'ext': 'mhtml', | ||||
|                             'manifest_url': mpd_url, | ||||
|                             'format_note': 'DASH storyboards (jpeg)', | ||||
|                             'acodec': 'none', | ||||
|                             'vcodec': 'none', | ||||
|                         } | ||||
|                     if is_drm_protected(adaptation_set) or is_drm_protected(representation): | ||||
|                         f['has_drm'] = True | ||||
|                     representation_ms_info = extract_multisegment_info(representation, adaptation_set_ms_info) | ||||
|  | ||||
|                         def prepare_template(template_name, identifiers): | ||||
|                             tmpl = representation_ms_info[template_name] | ||||
|                             # First of, % characters outside $...$ templates | ||||
|                             # must be escaped by doubling for proper processing | ||||
|                             # by % operator string formatting used further (see | ||||
|                             # https://github.com/ytdl-org/youtube-dl/issues/16867). | ||||
|                             t = '' | ||||
|                             in_template = False | ||||
|                             for c in tmpl: | ||||
|                     def prepare_template(template_name, identifiers): | ||||
|                         tmpl = representation_ms_info[template_name] | ||||
|                         # First of, % characters outside $...$ templates | ||||
|                         # must be escaped by doubling for proper processing | ||||
|                         # by % operator string formatting used further (see | ||||
|                         # https://github.com/ytdl-org/youtube-dl/issues/16867). | ||||
|                         t = '' | ||||
|                         in_template = False | ||||
|                         for c in tmpl: | ||||
|                             t += c | ||||
|                             if c == '$': | ||||
|                                 in_template = not in_template | ||||
|                             elif c == '%' and not in_template: | ||||
|                                 t += c | ||||
|                                 if c == '$': | ||||
|                                     in_template = not in_template | ||||
|                                 elif c == '%' and not in_template: | ||||
|                                     t += c | ||||
|                             # Next, $...$ templates are translated to their | ||||
|                             # %(...) counterparts to be used with % operator | ||||
|                             t = t.replace('$RepresentationID$', representation_id) | ||||
|                             t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t) | ||||
|                             t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t) | ||||
|                             t.replace('$$', '$') | ||||
|                             return t | ||||
|                         # Next, $...$ templates are translated to their | ||||
|                         # %(...) counterparts to be used with % operator | ||||
|                         t = t.replace('$RepresentationID$', representation_id) | ||||
|                         t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t) | ||||
|                         t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t) | ||||
|                         t.replace('$$', '$') | ||||
|                         return t | ||||
|  | ||||
|                         # @initialization is a regular template like @media one | ||||
|                         # so it should be handled just the same way (see | ||||
|                         # https://github.com/ytdl-org/youtube-dl/issues/11605) | ||||
|                         if 'initialization' in representation_ms_info: | ||||
|                             initialization_template = prepare_template( | ||||
|                                 'initialization', | ||||
|                                 # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and | ||||
|                                 # $Time$ shall not be included for @initialization thus | ||||
|                                 # only $Bandwidth$ remains | ||||
|                                 ('Bandwidth', )) | ||||
|                             representation_ms_info['initialization_url'] = initialization_template % { | ||||
|                                 'Bandwidth': bandwidth, | ||||
|                             } | ||||
|                     # @initialization is a regular template like @media one | ||||
|                     # so it should be handled just the same way (see | ||||
|                     # https://github.com/ytdl-org/youtube-dl/issues/11605) | ||||
|                     if 'initialization' in representation_ms_info: | ||||
|                         initialization_template = prepare_template( | ||||
|                             'initialization', | ||||
|                             # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and | ||||
|                             # $Time$ shall not be included for @initialization thus | ||||
|                             # only $Bandwidth$ remains | ||||
|                             ('Bandwidth', )) | ||||
|                         representation_ms_info['initialization_url'] = initialization_template % { | ||||
|                             'Bandwidth': bandwidth, | ||||
|                         } | ||||
|  | ||||
|                         def location_key(location): | ||||
|                             return 'url' if re.match(r'^https?://', location) else 'path' | ||||
|                     def location_key(location): | ||||
|                         return 'url' if re.match(r'^https?://', location) else 'path' | ||||
|  | ||||
|                         if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: | ||||
|                     if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info: | ||||
|  | ||||
|                             media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) | ||||
|                             media_location_key = location_key(media_template) | ||||
|                         media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time')) | ||||
|                         media_location_key = location_key(media_template) | ||||
|  | ||||
|                             # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ | ||||
|                             # can't be used at the same time | ||||
|                             if '%(Number' in media_template and 's' not in representation_ms_info: | ||||
|                                 segment_duration = None | ||||
|                                 if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info: | ||||
|                                     segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) | ||||
|                                     representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) | ||||
|                                 representation_ms_info['fragments'] = [{ | ||||
|                                     media_location_key: media_template % { | ||||
|                                         'Number': segment_number, | ||||
|                                         'Bandwidth': bandwidth, | ||||
|                                     }, | ||||
|                                     'duration': segment_duration, | ||||
|                                 } for segment_number in range( | ||||
|                                     representation_ms_info['start_number'], | ||||
|                                     representation_ms_info['total_number'] + representation_ms_info['start_number'])] | ||||
|                             else: | ||||
|                                 # $Number*$ or $Time$ in media template with S list available | ||||
|                                 # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg | ||||
|                                 # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 | ||||
|                                 representation_ms_info['fragments'] = [] | ||||
|                                 segment_time = 0 | ||||
|                                 segment_d = None | ||||
|                                 segment_number = representation_ms_info['start_number'] | ||||
|                         # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ | ||||
|                         # can't be used at the same time | ||||
|                         if '%(Number' in media_template and 's' not in representation_ms_info: | ||||
|                             segment_duration = None | ||||
|                             if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info: | ||||
|                                 segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale']) | ||||
|                                 representation_ms_info['total_number'] = int(math.ceil( | ||||
|                                     float_or_none(period_duration, segment_duration, default=0))) | ||||
|                             representation_ms_info['fragments'] = [{ | ||||
|                                 media_location_key: media_template % { | ||||
|                                     'Number': segment_number, | ||||
|                                     'Bandwidth': bandwidth, | ||||
|                                 }, | ||||
|                                 'duration': segment_duration, | ||||
|                             } for segment_number in range( | ||||
|                                 representation_ms_info['start_number'], | ||||
|                                 representation_ms_info['total_number'] + representation_ms_info['start_number'])] | ||||
|                         else: | ||||
|                             # $Number*$ or $Time$ in media template with S list available | ||||
|                             # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg | ||||
|                             # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 | ||||
|                             representation_ms_info['fragments'] = [] | ||||
|                             segment_time = 0 | ||||
|                             segment_d = None | ||||
|                             segment_number = representation_ms_info['start_number'] | ||||
|  | ||||
|                                 def add_segment_url(): | ||||
|                                     segment_url = media_template % { | ||||
|                                         'Time': segment_time, | ||||
|                                         'Bandwidth': bandwidth, | ||||
|                                         'Number': segment_number, | ||||
|                                     } | ||||
|                                     representation_ms_info['fragments'].append({ | ||||
|                                         media_location_key: segment_url, | ||||
|                                         'duration': float_or_none(segment_d, representation_ms_info['timescale']), | ||||
|                                     }) | ||||
|                             def add_segment_url(): | ||||
|                                 segment_url = media_template % { | ||||
|                                     'Time': segment_time, | ||||
|                                     'Bandwidth': bandwidth, | ||||
|                                     'Number': segment_number, | ||||
|                                 } | ||||
|                                 representation_ms_info['fragments'].append({ | ||||
|                                     media_location_key: segment_url, | ||||
|                                     'duration': float_or_none(segment_d, representation_ms_info['timescale']), | ||||
|                                 }) | ||||
|  | ||||
|                                 for num, s in enumerate(representation_ms_info['s']): | ||||
|                                     segment_time = s.get('t') or segment_time | ||||
|                                     segment_d = s['d'] | ||||
|                             for num, s in enumerate(representation_ms_info['s']): | ||||
|                                 segment_time = s.get('t') or segment_time | ||||
|                                 segment_d = s['d'] | ||||
|                                 add_segment_url() | ||||
|                                 segment_number += 1 | ||||
|                                 for r in range(s.get('r', 0)): | ||||
|                                     segment_time += segment_d | ||||
|                                     add_segment_url() | ||||
|                                     segment_number += 1 | ||||
|                                     for r in range(s.get('r', 0)): | ||||
|                                         segment_time += segment_d | ||||
|                                         add_segment_url() | ||||
|                                         segment_number += 1 | ||||
|                                     segment_time += segment_d | ||||
|                         elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info: | ||||
|                                 segment_time += segment_d | ||||
|                     elif 'segment_urls' in representation_ms_info: | ||||
|                         fragments = [] | ||||
|                         if 's' in representation_ms_info: | ||||
|                             # No media template | ||||
|                             # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI | ||||
|                             # or any YouTube dashsegments video | ||||
|                             fragments = [] | ||||
|                             segment_index = 0 | ||||
|                             timescale = representation_ms_info['timescale'] | ||||
|                             for s in representation_ms_info['s']: | ||||
| @@ -2487,8 +2552,15 @@ class InfoExtractor(object): | ||||
|                             f['url'] = base_url | ||||
|                         formats.append(f) | ||||
|                     else: | ||||
|                         self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type) | ||||
|         return formats | ||||
|                         # Assuming direct URL to unfragmented media. | ||||
|                         f['url'] = representation_ms_info['base_url'] | ||||
|                     if content_type in ('video', 'audio', 'image/jpeg'): | ||||
|                         f['manifest_stream_number'] = stream_numbers[f['url']] | ||||
|                         stream_numbers[f['url']] += 1 | ||||
|                         formats.append(f) | ||||
|                     elif content_type == 'text': | ||||
|                         subtitles.setdefault(lang or 'und', []).append(f) | ||||
|         return formats, subtitles | ||||
|  | ||||
|     def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): | ||||
|         res = self._download_xml_handle( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user