mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[mediaset] fix extraction(closes #16977)
This commit is contained in:
		| @@ -3,75 +3,75 @@ from __future__ import unicode_literals | |||||||
|  |  | ||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .theplatform import ThePlatformBaseIE | ||||||
| from ..compat import compat_str |  | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     determine_ext, |     ExtractorError, | ||||||
|     parse_duration, |     int_or_none, | ||||||
|     try_get, |     update_url_query, | ||||||
|     unified_strdate, |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class MediasetIE(InfoExtractor): | class MediasetIE(ThePlatformBaseIE): | ||||||
|  |     _TP_TLD = 'eu' | ||||||
|     _VALID_URL = r'''(?x) |     _VALID_URL = r'''(?x) | ||||||
|                     (?: |                     (?: | ||||||
|                         mediaset:| |                         mediaset:| | ||||||
|                         https?:// |                         https?:// | ||||||
|                             (?:www\.)?video\.mediaset\.it/ |                             (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/ | ||||||
|                             (?: |                             (?: | ||||||
|                                 (?:video|on-demand)/(?:[^/]+/)+[^/]+_| |                                 (?:video|on-demand)/(?:[^/]+/)+[^/]+_| | ||||||
|                                 player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid= |                                 player/index\.html\?.*?\bprogramGuid= | ||||||
|                             ) |                             ) | ||||||
|                     )(?P<id>[0-9]+) |                     )(?P<id>[0-9A-Z]{16}) | ||||||
|                     ''' |                     ''' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         # full episode |         # full episode | ||||||
|         'url': 'http://www.video.mediaset.it/video/hello_goodbye/full/quarta-puntata_661824.html', |         'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824', | ||||||
|         'md5': '9b75534d42c44ecef7bf1ffeacb7f85d', |         'md5': '9b75534d42c44ecef7bf1ffeacb7f85d', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '661824', |             'id': 'FAFU000000661824', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Quarta puntata', |             'title': 'Quarta puntata', | ||||||
|             'description': 'md5:7183696d6df570e3412a5ef74b27c5e2', |             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|             'duration': 1414, |             'duration': 1414.26, | ||||||
|             'creator': 'mediaset', |  | ||||||
|             'upload_date': '20161107', |             'upload_date': '20161107', | ||||||
|             'series': 'Hello Goodbye', |             'series': 'Hello Goodbye', | ||||||
|             'categories': ['reality'], |             'timestamp': 1478532900, | ||||||
|  |             'uploader': 'Rete 4', | ||||||
|  |             'uploader_id': 'R4', | ||||||
|         }, |         }, | ||||||
|         'expected_warnings': ['is not a supported codec'], |  | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.video.mediaset.it/video/matrix/full_chiambretti/puntata-del-25-maggio_846685.html', |         'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501', | ||||||
|         'md5': '1276f966ac423d16ba255ce867de073e', |         'md5': '288532f0ad18307705b01e581304cd7b', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '846685', |             'id': 'F309013801000501', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Puntata del 25 maggio', |             'title': 'Puntata del 25 maggio', | ||||||
|             'description': 'md5:ee2e456e3eb1dba5e814596655bb5296', |             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | ||||||
|             'thumbnail': r're:^https?://.*\.jpg$', |             'thumbnail': r're:^https?://.*\.jpg$', | ||||||
|             'duration': 6565, |             'duration': 6565.007, | ||||||
|             'creator': 'mediaset', |             'upload_date': '20180526', | ||||||
|             'upload_date': '20180525', |  | ||||||
|             'series': 'Matrix', |             'series': 'Matrix', | ||||||
|             'categories': ['infotainment'], |             'timestamp': 1527326245, | ||||||
|  |             'uploader': 'Canale 5', | ||||||
|  |             'uploader_id': 'C5', | ||||||
|         }, |         }, | ||||||
|         'expected_warnings': ['HTTP Error 403: Forbidden'], |         'expected_warnings': ['HTTP Error 403: Forbidden'], | ||||||
|     }, { |     }, { | ||||||
|         # clip |         # clip | ||||||
|         'url': 'http://www.video.mediaset.it/video/gogglebox/clip/un-grande-classico-della-commedia-sexy_661680.html', |         'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         # iframe simple |         # iframe simple | ||||||
|         'url': 'http://www.video.mediaset.it/player/playerIFrame.shtml?id=665924&autoplay=true', |         'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/) |         # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/) | ||||||
|         'url': 'https://www.video.mediaset.it/player/playerIFrameTwitter.shtml?id=665104&playrelated=false&autoplay=false&related=true&hidesocial=true', |         'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }, { |     }, { | ||||||
|         'url': 'mediaset:661824', |         'url': 'mediaset:FAFU000000665924', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |  | ||||||
| @@ -84,61 +84,54 @@ class MediasetIE(InfoExtractor): | |||||||
|                 webpage)] |                 webpage)] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         guid = self._match_id(url) | ||||||
|  |         tp_path = 'PR1GhC/media/guid/2702976343/' + guid | ||||||
|         video = self._download_json( |         info = self._extract_theplatform_metadata(tp_path, guid) | ||||||
|             'https://www.video.mediaset.it/html/metainfo.sjson', |  | ||||||
|             video_id, 'Downloading media info', query={ |  | ||||||
|                 'id': video_id |  | ||||||
|             })['video'] |  | ||||||
|  |  | ||||||
|         title = video['title'] |  | ||||||
|         media_id = video.get('guid') or video_id |  | ||||||
|  |  | ||||||
|         video_list = self._download_json( |  | ||||||
|             'http://cdnsel01.mediaset.net/GetCdn2018.aspx', |  | ||||||
|             video_id, 'Downloading video CDN JSON', query={ |  | ||||||
|                 'streamid': media_id, |  | ||||||
|                 'format': 'json', |  | ||||||
|             })['videoList'] |  | ||||||
|  |  | ||||||
|         formats = [] |         formats = [] | ||||||
|         for format_url in video_list: |         subtitles = {} | ||||||
|             ext = determine_ext(format_url) |         first_e = None | ||||||
|             if ext == 'm3u8': |         for asset_type in ('SD', 'HD'): | ||||||
|                 formats.extend(self._extract_m3u8_formats( |             for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'): | ||||||
|                     format_url, video_id, 'mp4', entry_protocol='m3u8_native', |                 try: | ||||||
|                     m3u8_id='hls', fatal=False)) |                     tp_formats, tp_subtitles = self._extract_theplatform_smil( | ||||||
|             elif ext == 'mpd': |                         update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), { | ||||||
|                 formats.extend(self._extract_mpd_formats( |                             'mbr': 'true', | ||||||
|                     format_url, video_id, mpd_id='dash', fatal=False)) |                             'formats': f, | ||||||
|             elif ext == 'ism' or '.ism' in format_url: |                             'assetTypes': asset_type, | ||||||
|                 formats.extend(self._extract_ism_formats( |                         }), guid, 'Downloading %s %s SMIL data' % (f, asset_type)) | ||||||
|                     format_url, video_id, ism_id='mss', fatal=False)) |                 except ExtractorError as e: | ||||||
|             else: |                     if not first_e: | ||||||
|                 formats.append({ |                         first_e = e | ||||||
|                     'url': format_url, |                     break | ||||||
|                     'format_id': determine_ext(format_url), |                 for tp_f in tp_formats: | ||||||
|                 }) |                     tp_f['quality'] = 1 if asset_type == 'HD' else 0 | ||||||
|  |                 formats.extend(tp_formats) | ||||||
|  |                 subtitles = self._merge_subtitles(subtitles, tp_subtitles) | ||||||
|  |         if first_e and not formats: | ||||||
|  |             raise first_e | ||||||
|         self._sort_formats(formats) |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|         creator = try_get( |         fields = [] | ||||||
|             video, lambda x: x['brand-info']['publisher'], compat_str) |         for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))): | ||||||
|         category = try_get( |             fields.extend(templ % repl for repl in repls) | ||||||
|             video, lambda x: x['brand-info']['category'], compat_str) |         feed_data = self._download_json( | ||||||
|         categories = [category] if category else None |             'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid, | ||||||
|  |             guid, fatal=False, query={'fields': ','.join(fields)}) | ||||||
|  |         if feed_data: | ||||||
|  |             publish_info = feed_data.get('mediasetprogram$publishInfo') or {} | ||||||
|  |             info.update({ | ||||||
|  |                 'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')), | ||||||
|  |                 'season_number': int_or_none(feed_data.get('tvSeasonNumber')), | ||||||
|  |                 'series': feed_data.get('mediasetprogram$brandTitle'), | ||||||
|  |                 'uploader': publish_info.get('description'), | ||||||
|  |                 'uploader_id': publish_info.get('channel'), | ||||||
|  |                 'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')), | ||||||
|  |             }) | ||||||
|  |  | ||||||
|         return { |         info.update({ | ||||||
|             'id': video_id, |             'id': guid, | ||||||
|             'title': title, |  | ||||||
|             'description': video.get('short-description'), |  | ||||||
|             'thumbnail': video.get('thumbnail'), |  | ||||||
|             'duration': parse_duration(video.get('duration')), |  | ||||||
|             'creator': creator, |  | ||||||
|             'upload_date': unified_strdate(video.get('production-date')), |  | ||||||
|             'webpage_url': video.get('url'), |  | ||||||
|             'series': video.get('brand-value'), |  | ||||||
|             'season': video.get('season'), |  | ||||||
|             'categories': categories, |  | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|         } |             'subtitles': subtitles, | ||||||
|  |         }) | ||||||
|  |         return info | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user