mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[medialaan] add support DPG Media MyChannels based websites
closes #14871 closes #15597 closes #16106 closes #16489
This commit is contained in:
		| @@ -1459,6 +1459,7 @@ from .vrv import ( | ||||
|     VRVSeriesIE, | ||||
| ) | ||||
| from .vshare import VShareIE | ||||
| from .vtm import VTMIE | ||||
| from .medialaan import MedialaanIE | ||||
| from .vube import VubeIE | ||||
| from .vuclip import VuClipIE | ||||
|   | ||||
| @@ -128,6 +128,7 @@ from .zype import ZypeIE | ||||
| from .odnoklassniki import OdnoklassnikiIE | ||||
| from .kinja import KinjaEmbedIE | ||||
| from .arcpublishing import ArcPublishingIE | ||||
| from .medialaan import MedialaanIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -2223,6 +2224,20 @@ class GenericIE(InfoExtractor): | ||||
|                 'duration': 1581, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # MyChannels SDK embed | ||||
|             # https://www.24kitchen.nl/populair/deskundige-dit-waarom-sommigen-gevoelig-zijn-voor-voedselallergieen | ||||
|             'url': 'https://www.demorgen.be/nieuws/burgemeester-rotterdam-richt-zich-in-videoboodschap-tot-relschoppers-voelt-het-goed~b0bcfd741/', | ||||
|             'md5': '90c0699c37006ef18e198c032d81739c', | ||||
|             'info_dict': { | ||||
|                 'id': '194165', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Burgemeester Aboutaleb spreekt relschoppers toe', | ||||
|                 'timestamp': 1611740340, | ||||
|                 'upload_date': '20210127', | ||||
|                 'duration': 159, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
| @@ -2462,6 +2477,9 @@ class GenericIE(InfoExtractor): | ||||
|         webpage = self._webpage_read_content( | ||||
|             full_response, url, video_id, prefix=first_bytes) | ||||
|  | ||||
|         if '<title>DPG Media Privacy Gate</title>' in webpage: | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest? | ||||
| @@ -2593,6 +2611,11 @@ class GenericIE(InfoExtractor): | ||||
|         if arc_urls: | ||||
|             return self.playlist_from_matches(arc_urls, video_id, video_title, ie=ArcPublishingIE.ie_key()) | ||||
|  | ||||
|         mychannels_urls = MedialaanIE._extract_urls(webpage) | ||||
|         if mychannels_urls: | ||||
|             return self.playlist_from_matches( | ||||
|                 mychannels_urls, video_id, video_title, ie=MedialaanIE.ie_key()) | ||||
|  | ||||
|         # Look for embedded rtl.nl player | ||||
|         matches = re.findall( | ||||
|             r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"', | ||||
|   | ||||
| @@ -2,268 +2,113 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .gigya import GigyaBaseIE | ||||
|  | ||||
| from ..compat import compat_str | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     extract_attributes, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     try_get, | ||||
|     unified_timestamp, | ||||
|     mimetype2ext, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MedialaanIE(GigyaBaseIE): | ||||
| class MedialaanIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?:www\.|nieuws\.)? | ||||
|                         (?: | ||||
|                             (?P<site_id>vtm|q2|vtmkzoom)\.be/ | ||||
|                             (?:embed\.)?mychannels.video/embed/| | ||||
|                             embed\.mychannels\.video/(?:s(?:dk|cript)/)?production/| | ||||
|                             (?:www\.)?(?: | ||||
|                                 (?: | ||||
|                                 video(?:/[^/]+/id/|/?\?.*?\baid=)| | ||||
|                                 (?:[^/]+/)* | ||||
|                                     7sur7| | ||||
|                                     demorgen| | ||||
|                                     hln| | ||||
|                                     joe| | ||||
|                                     qmusic | ||||
|                                 )\.be| | ||||
|                                 (?: | ||||
|                                     [abe]d| | ||||
|                                     bndestem| | ||||
|                                     destentor| | ||||
|                                     gelderlander| | ||||
|                                     pzc| | ||||
|                                     tubantia| | ||||
|                                     volkskrant | ||||
|                                 )\.nl | ||||
|                             )/video/(?:[^/]+/)*[^/?&#]+~p | ||||
|                         ) | ||||
|                         ) | ||||
|                         (?P<id>[^/?#&]+) | ||||
|                         (?P<id>\d+) | ||||
|                     ''' | ||||
|     _NETRC_MACHINE = 'medialaan' | ||||
|     _APIKEY = '3_HZ0FtkMW_gOyKlqQzW5_0FHRC7Nd5XpXJZcDdXY4pk5eES2ZWmejRW5egwVm4ug-' | ||||
|     _SITE_TO_APP_ID = { | ||||
|         'vtm': 'vtm_watch', | ||||
|         'q2': 'q2', | ||||
|         'vtmkzoom': 'vtmkzoom', | ||||
|     } | ||||
|     _TESTS = [{ | ||||
|         # vod | ||||
|         'url': 'http://vtm.be/video/volledige-afleveringen/id/vtm_20170219_VM0678361_vtmwatch', | ||||
|         'url': 'https://www.bndestem.nl/video/de-terugkeer-van-ally-de-aap-en-wie-vertrekt-er-nog-bij-nac~p193993', | ||||
|         'info_dict': { | ||||
|             'id': 'vtm_20170219_VM0678361_vtmwatch', | ||||
|             'id': '193993', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Allemaal Chris afl. 6', | ||||
|             'description': 'md5:4be86427521e7b07e0adb0c9c554ddb2', | ||||
|             'timestamp': 1487533280, | ||||
|             'upload_date': '20170219', | ||||
|             'duration': 2562, | ||||
|             'series': 'Allemaal Chris', | ||||
|             'season': 'Allemaal Chris', | ||||
|             'season_number': 1, | ||||
|             'season_id': '256936078124527', | ||||
|             'episode': 'Allemaal Chris afl. 6', | ||||
|             'episode_number': 6, | ||||
|             'episode_id': '256936078591527', | ||||
|             'title': 'De terugkeer van Ally de Aap en wie vertrekt er nog bij NAC?', | ||||
|             'timestamp': 1611663540, | ||||
|             'upload_date': '20210126', | ||||
|             'duration': 238, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Requires account credentials', | ||||
|     }, { | ||||
|         # clip | ||||
|         'url': 'http://vtm.be/video?aid=168332', | ||||
|         'info_dict': { | ||||
|             'id': '168332', | ||||
|             'ext': 'mp4', | ||||
|             'title': '"Veronique liegt!"', | ||||
|             'description': 'md5:1385e2b743923afe54ba4adc38476155', | ||||
|             'timestamp': 1489002029, | ||||
|             'upload_date': '20170308', | ||||
|             'duration': 96, | ||||
|         }, | ||||
|     }, { | ||||
|         # vod | ||||
|         'url': 'http://vtm.be/video/volledige-afleveringen/id/257107153551000', | ||||
|         'url': 'https://www.gelderlander.nl/video/kanalen/degelderlander~c320/series/snel-nieuws~s984/noodbevel-in-doetinchem-politie-stuurt-mensen-centrum-uit~p194093', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # vod | ||||
|         'url': 'http://vtm.be/video?aid=163157', | ||||
|         'url': 'https://embed.mychannels.video/sdk/production/193993?options=TFTFF_default', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # vod | ||||
|         'url': 'http://www.q2.be/video/volledige-afleveringen/id/2be_20170301_VM0684442_q2', | ||||
|         'url': 'https://embed.mychannels.video/script/production/193993', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # clip | ||||
|         'url': 'http://vtmkzoom.be/k3-dansstudio/een-nieuw-seizoen-van-k3-dansstudio', | ||||
|         'url': 'https://embed.mychannels.video/production/193993', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # http/s redirect | ||||
|         'url': 'https://vtmkzoom.be/video?aid=45724', | ||||
|         'info_dict': { | ||||
|             'id': '257136373657000', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'K3 Dansstudio Ushuaia afl.6', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Requires account credentials', | ||||
|         'url': 'https://mychannels.video/embed/193993', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # nieuws.vtm.be | ||||
|         'url': 'https://nieuws.vtm.be/stadion/stadion/genk-nog-moeilijk-programma', | ||||
|         'url': 'https://embed.mychannels.video/embed/193993', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._logged_in = False | ||||
|  | ||||
|     def _login(self): | ||||
|         username, password = self._get_login_info() | ||||
|         if username is None: | ||||
|             self.raise_login_required() | ||||
|  | ||||
|         auth_data = { | ||||
|             'APIKey': self._APIKEY, | ||||
|             'sdk': 'js_6.1', | ||||
|             'format': 'json', | ||||
|             'loginID': username, | ||||
|             'password': password, | ||||
|         } | ||||
|  | ||||
|         auth_info = self._gigya_login(auth_data) | ||||
|  | ||||
|         self._uid = auth_info['UID'] | ||||
|         self._uid_signature = auth_info['UIDSignature'] | ||||
|         self._signature_timestamp = auth_info['signatureTimestamp'] | ||||
|  | ||||
|         self._logged_in = True | ||||
|     @staticmethod | ||||
|     def _extract_urls(webpage): | ||||
|         entries = [] | ||||
|         for element in re.findall(r'(<div[^>]+data-mychannels-type="video"[^>]*>)', webpage): | ||||
|             mychannels_id = extract_attributes(element).get('data-mychannels-id') | ||||
|             if mychannels_id: | ||||
|                 entries.append('https://mychannels.video/embed/' + mychannels_id) | ||||
|         return entries | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id, site_id = mobj.group('id', 'site_id') | ||||
|         production_id = self._match_id(url) | ||||
|         production = self._download_json( | ||||
|             'https://embed.mychannels.video/sdk/production/' + production_id, | ||||
|             production_id, query={'options': 'UUUU_default'})['productions'][0] | ||||
|         title = production['title'] | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         config = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'videoJSConfig\s*=\s*JSON\.parse\(\'({.+?})\'\);', | ||||
|                 webpage, 'config', default='{}'), video_id, | ||||
|             transform_source=lambda s: s.replace( | ||||
|                 '\\\\', '\\').replace(r'\"', '"').replace(r"\'", "'")) | ||||
|  | ||||
|         vod_id = config.get('vodId') or self._search_regex( | ||||
|             (r'\\"vodId\\"\s*:\s*\\"(.+?)\\"', | ||||
|              r'"vodId"\s*:\s*"(.+?)"', | ||||
|              r'<[^>]+id=["\']vod-(\d+)'), | ||||
|             webpage, 'video_id', default=None) | ||||
|  | ||||
|         # clip, no authentication required | ||||
|         if not vod_id: | ||||
|             player = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'vmmaplayer\(({.+?})\);', webpage, 'vmma player', | ||||
|                     default=''), | ||||
|                 video_id, transform_source=lambda s: '[%s]' % s, fatal=False) | ||||
|             if player: | ||||
|                 video = player[-1] | ||||
|                 if video['videoUrl'] in ('http', 'https'): | ||||
|                     return self.url_result(video['url'], MedialaanIE.ie_key()) | ||||
|                 info = { | ||||
|                     'id': video_id, | ||||
|                     'url': video['videoUrl'], | ||||
|                     'title': video['title'], | ||||
|                     'thumbnail': video.get('imageUrl'), | ||||
|                     'timestamp': int_or_none(video.get('createdDate')), | ||||
|                     'duration': int_or_none(video.get('duration')), | ||||
|                 } | ||||
|         formats = [] | ||||
|         for source in (production.get('sources') or []): | ||||
|             src = source.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             ext = mimetype2ext(source.get('type')) | ||||
|             if ext == 'm3u8': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     src, production_id, 'mp4', 'm3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|             else: | ||||
|                 info = self._parse_html5_media_entries( | ||||
|                     url, webpage, video_id, m3u8_id='hls')[0] | ||||
|                 info.update({ | ||||
|                     'id': video_id, | ||||
|                     'title': self._html_search_meta('description', webpage), | ||||
|                     'duration': parse_duration(self._html_search_meta('duration', webpage)), | ||||
|                 formats.append({ | ||||
|                     'ext': ext, | ||||
|                     'url': src, | ||||
|                 }) | ||||
|         # vod, authentication required | ||||
|         else: | ||||
|             if not self._logged_in: | ||||
|                 self._login() | ||||
|  | ||||
|             settings = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', | ||||
|                     webpage, 'drupal settings', default='{}'), | ||||
|                 video_id) | ||||
|  | ||||
|             def get(container, item): | ||||
|                 return try_get( | ||||
|                     settings, lambda x: x[container][item], | ||||
|                     compat_str) or self._search_regex( | ||||
|                     r'"%s"\s*:\s*"([^"]+)' % item, webpage, item, | ||||
|                     default=None) | ||||
|  | ||||
|             app_id = get('vod', 'app_id') or self._SITE_TO_APP_ID.get(site_id, 'vtm_watch') | ||||
|             sso = get('vod', 'gigyaDatabase') or 'vtm-sso' | ||||
|  | ||||
|             data = self._download_json( | ||||
|                 'http://vod.medialaan.io/api/1.0/item/%s/video' % vod_id, | ||||
|                 video_id, query={ | ||||
|                     'app_id': app_id, | ||||
|                     'user_network': sso, | ||||
|                     'UID': self._uid, | ||||
|                     'UIDSignature': self._uid_signature, | ||||
|                     'signatureTimestamp': self._signature_timestamp, | ||||
|                 }) | ||||
|  | ||||
|             formats = self._extract_m3u8_formats( | ||||
|                 data['response']['uri'], video_id, entry_protocol='m3u8_native', | ||||
|                 ext='mp4', m3u8_id='hls') | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|             info = { | ||||
|                 'id': vod_id, | ||||
|         return { | ||||
|             'id': production_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': production.get('posterUrl'), | ||||
|             'timestamp': parse_iso8601(production.get('publicationDate'), ' '), | ||||
|             'duration': int_or_none(production.get('duration')) or None, | ||||
|         } | ||||
|  | ||||
|             api_key = get('vod', 'apiKey') | ||||
|             channel = get('medialaanGigya', 'channel') | ||||
|  | ||||
|             if api_key: | ||||
|                 videos = self._download_json( | ||||
|                     'http://vod.medialaan.io/vod/v2/videos', video_id, fatal=False, | ||||
|                     query={ | ||||
|                         'channels': channel, | ||||
|                         'ids': vod_id, | ||||
|                         'limit': 1, | ||||
|                         'apikey': api_key, | ||||
|                     }) | ||||
|                 if videos: | ||||
|                     video = try_get( | ||||
|                         videos, lambda x: x['response']['videos'][0], dict) | ||||
|                     if video: | ||||
|                         def get(container, item, expected_type=None): | ||||
|                             return try_get( | ||||
|                                 video, lambda x: x[container][item], expected_type) | ||||
|  | ||||
|                         def get_string(container, item): | ||||
|                             return get(container, item, compat_str) | ||||
|  | ||||
|                         info.update({ | ||||
|                             'series': get_string('program', 'title'), | ||||
|                             'season': get_string('season', 'title'), | ||||
|                             'season_number': int_or_none(get('season', 'number')), | ||||
|                             'season_id': get_string('season', 'id'), | ||||
|                             'episode': get_string('episode', 'title'), | ||||
|                             'episode_number': int_or_none(get('episode', 'number')), | ||||
|                             'episode_id': get_string('episode', 'id'), | ||||
|                             'duration': int_or_none( | ||||
|                                 video.get('duration')) or int_or_none( | ||||
|                                 video.get('durationMillis'), scale=1000), | ||||
|                             'title': get_string('episode', 'title'), | ||||
|                             'description': get_string('episode', 'text'), | ||||
|                             'timestamp': unified_timestamp(get_string( | ||||
|                                 'publication', 'begin')), | ||||
|                         }) | ||||
|  | ||||
|             if not info.get('title'): | ||||
|                 info['title'] = try_get( | ||||
|                     config, lambda x: x['videoConfig']['title'], | ||||
|                     compat_str) or self._html_search_regex( | ||||
|                     r'\\"title\\"\s*:\s*\\"(.+?)\\"', webpage, 'title', | ||||
|                     default=None) or self._og_search_title(webpage) | ||||
|  | ||||
|         if not info.get('description'): | ||||
|             info['description'] = self._html_search_regex( | ||||
|                 r'<div[^>]+class="field-item\s+even">\s*<p>(.+?)</p>', | ||||
|                 webpage, 'description', default=None) | ||||
|  | ||||
|         return info | ||||
|   | ||||
							
								
								
									
										62
									
								
								youtube_dl/extractor/vtm.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								youtube_dl/extractor/vtm.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VTMIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?vtm\.be/([^/?&#]+)~v(?P<id>[0-9a-f]{8}(?:-[0-9a-f]{4}){3}-[0-9a-f]{12})' | ||||
|     _TEST = { | ||||
|         'url': 'https://vtm.be/gast-vernielt-genkse-hotelkamer~ve7534523-279f-4b4d-a5c9-a33ffdbe23e1', | ||||
|         'md5': '37dca85fbc3a33f2de28ceb834b071f8', | ||||
|         'info_dict': { | ||||
|             'id': '192445', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Gast vernielt Genkse hotelkamer', | ||||
|             'timestamp': 1611060180, | ||||
|             'upload_date': '20210119', | ||||
|             'duration': 74, | ||||
|             # TODO: fix url _type result processing | ||||
|             # 'series': 'Op Interventie', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         uuid = self._match_id(url) | ||||
|         video = self._download_json( | ||||
|             'https://omc4vm23offuhaxx6hekxtzspi.appsync-api.eu-west-1.amazonaws.com/graphql', | ||||
|             uuid, query={ | ||||
|                 'query': '''{ | ||||
|   getComponent(type: Video, uuid: "%s") { | ||||
|     ... on Video { | ||||
|       description | ||||
|       duration | ||||
|       myChannelsVideo | ||||
|       program { | ||||
|         title | ||||
|       } | ||||
|       publishedAt | ||||
|       title | ||||
|     } | ||||
|   } | ||||
| }''' % uuid, | ||||
|             }, headers={ | ||||
|                 'x-api-key': 'da2-lz2cab4tfnah3mve6wiye4n77e', | ||||
|             })['data']['getComponent'] | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url', | ||||
|             'id': uuid, | ||||
|             'title': video.get('title'), | ||||
|             'url': 'http://mychannels.video/embed/%d' % video['myChannelsVideo'], | ||||
|             'description': video.get('description'), | ||||
|             'timestamp': parse_iso8601(video.get('publishedAt')), | ||||
|             'duration': int_or_none(video.get('duration')), | ||||
|             'series': try_get(video, lambda x: x['program']['title']), | ||||
|             'ie_key': 'Medialaan', | ||||
|         } | ||||
		Reference in New Issue
	
	Block a user