mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[nova:embed] Fix extraction (closes #24700)
This commit is contained in:
		| @@ -6,6 +6,7 @@ import re | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     clean_html, |     clean_html, | ||||||
|  |     determine_ext, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     js_to_json, |     js_to_json, | ||||||
|     qualities, |     qualities, | ||||||
| @@ -33,42 +34,76 @@ class NovaEmbedIE(InfoExtractor): | |||||||
|  |  | ||||||
|         webpage = self._download_webpage(url, video_id) |         webpage = self._download_webpage(url, video_id) | ||||||
|  |  | ||||||
|         bitrates = self._parse_json( |         duration = None | ||||||
|             self._search_regex( |  | ||||||
|                 r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'), |  | ||||||
|             video_id, transform_source=js_to_json) |  | ||||||
|  |  | ||||||
|         QUALITIES = ('lq', 'mq', 'hq', 'hd') |  | ||||||
|         quality_key = qualities(QUALITIES) |  | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|         for format_id, format_list in bitrates.items(): |  | ||||||
|             if not isinstance(format_list, list): |         player = self._parse_json( | ||||||
|                 format_list = [format_list] |             self._search_regex( | ||||||
|             for format_url in format_list: |                 r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;', | ||||||
|                 format_url = url_or_none(format_url) |                 webpage, 'player', default='{}'), video_id, fatal=False) | ||||||
|                 if not format_url: |         if player: | ||||||
|                     continue |             for format_id, format_list in player['tracks'].items(): | ||||||
|                 if format_id == 'hls': |                 if not isinstance(format_list, list): | ||||||
|                     formats.extend(self._extract_m3u8_formats( |                     format_list = [format_list] | ||||||
|                         format_url, video_id, ext='mp4', |                 for format_dict in format_list: | ||||||
|                         entry_protocol='m3u8_native', m3u8_id='hls', |                     if not isinstance(format_dict, dict): | ||||||
|                         fatal=False)) |                         continue | ||||||
|                     continue |                     format_url = url_or_none(format_dict.get('src')) | ||||||
|                 f = { |                     format_type = format_dict.get('type') | ||||||
|                     'url': format_url, |                     ext = determine_ext(format_url) | ||||||
|                 } |                     if (format_type == 'application/x-mpegURL' | ||||||
|                 f_id = format_id |                             or format_id == 'HLS' or ext == 'm3u8'): | ||||||
|                 for quality in QUALITIES: |                         formats.extend(self._extract_m3u8_formats( | ||||||
|                     if '%s.mp4' % quality in format_url: |                             format_url, video_id, 'mp4', | ||||||
|                         f_id += '-%s' % quality |                             entry_protocol='m3u8_native', m3u8_id='hls', | ||||||
|                         f.update({ |                             fatal=False)) | ||||||
|                             'quality': quality_key(quality), |                     elif (format_type == 'application/dash+xml' | ||||||
|                             'format_note': quality.upper(), |                           or format_id == 'DASH' or ext == 'mpd'): | ||||||
|  |                         formats.extend(self._extract_mpd_formats( | ||||||
|  |                             format_url, video_id, mpd_id='dash', fatal=False)) | ||||||
|  |                     else: | ||||||
|  |                         formats.append({ | ||||||
|  |                             'url': format_url, | ||||||
|                         }) |                         }) | ||||||
|                         break |             duration = int_or_none(player.get('duration')) | ||||||
|                 f['format_id'] = f_id |         else: | ||||||
|                 formats.append(f) |             # Old path, not actual as of 08.04.2020 | ||||||
|  |             bitrates = self._parse_json( | ||||||
|  |                 self._search_regex( | ||||||
|  |                     r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'), | ||||||
|  |                 video_id, transform_source=js_to_json) | ||||||
|  |  | ||||||
|  |             QUALITIES = ('lq', 'mq', 'hq', 'hd') | ||||||
|  |             quality_key = qualities(QUALITIES) | ||||||
|  |  | ||||||
|  |             for format_id, format_list in bitrates.items(): | ||||||
|  |                 if not isinstance(format_list, list): | ||||||
|  |                     format_list = [format_list] | ||||||
|  |                 for format_url in format_list: | ||||||
|  |                     format_url = url_or_none(format_url) | ||||||
|  |                     if not format_url: | ||||||
|  |                         continue | ||||||
|  |                     if format_id == 'hls': | ||||||
|  |                         formats.extend(self._extract_m3u8_formats( | ||||||
|  |                             format_url, video_id, ext='mp4', | ||||||
|  |                             entry_protocol='m3u8_native', m3u8_id='hls', | ||||||
|  |                             fatal=False)) | ||||||
|  |                         continue | ||||||
|  |                     f = { | ||||||
|  |                         'url': format_url, | ||||||
|  |                     } | ||||||
|  |                     f_id = format_id | ||||||
|  |                     for quality in QUALITIES: | ||||||
|  |                         if '%s.mp4' % quality in format_url: | ||||||
|  |                             f_id += '-%s' % quality | ||||||
|  |                             f.update({ | ||||||
|  |                                 'quality': quality_key(quality), | ||||||
|  |                                 'format_note': quality.upper(), | ||||||
|  |                             }) | ||||||
|  |                             break | ||||||
|  |                     f['format_id'] = f_id | ||||||
|  |                     formats.append(f) | ||||||
|  |  | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|         title = self._og_search_title( |         title = self._og_search_title( | ||||||
| @@ -81,7 +116,8 @@ class NovaEmbedIE(InfoExtractor): | |||||||
|             r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, |             r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, | ||||||
|             'thumbnail', fatal=False, group='value') |             'thumbnail', fatal=False, group='value') | ||||||
|         duration = int_or_none(self._search_regex( |         duration = int_or_none(self._search_regex( | ||||||
|             r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False)) |             r'videoDuration\s*:\s*(\d+)', webpage, 'duration', | ||||||
|  |             default=duration)) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user