mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[mitele] Fix extraction after website redesign (fixes #10824)
This commit is contained in:
		
				
					committed by
					
						 Jaime Marquínez Ferrándiz
						Jaime Marquínez Ferrándiz
					
				
			
			
				
	
			
			
			
						parent
						
							98708e6cbd
						
					
				
				
					commit
					cb882540e8
				
			| @@ -1,3 +1,9 @@ | ||||
| version <unreleased> | ||||
|  | ||||
| Extractors | ||||
| * [mitele] Fix extraction after website redesign (#10824) | ||||
|  | ||||
|  | ||||
| version 2016.11.04 | ||||
|  | ||||
| Core | ||||
|   | ||||
| @@ -1,19 +1,20 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import uuid | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     get_element_by_attribute, | ||||
|     int_or_none, | ||||
|     remove_start, | ||||
|     extract_attributes, | ||||
|     determine_ext, | ||||
|     smuggle_url, | ||||
|     parse_duration, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -72,16 +73,14 @@ class MiTeleBaseIE(InfoExtractor): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class MiTeleIE(MiTeleBaseIE): | ||||
| class MiTeleIE(InfoExtractor): | ||||
|     IE_DESC = 'mitele.es' | ||||
|     _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/){3}(?P<id>[^/]+)/' | ||||
|     _VALID_URL = r'https?://(?:www\.)?mitele\.es/programas-tv/(?:[^/]+/)(?P<id>[^/]+)/player' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', | ||||
|         # MD5 is unstable | ||||
|         'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player', | ||||
|         'info_dict': { | ||||
|             'id': '0NF1jJnxS1Wu3pHrmvFyw2', | ||||
|             'display_id': 'programa-144', | ||||
|             'id': '57b0dfb9c715da65618b4afa', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tor, la web invisible', | ||||
|             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', | ||||
| @@ -91,57 +90,71 @@ class MiTeleIE(MiTeleBaseIE): | ||||
|             'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|             'duration': 2913, | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     }, { | ||||
|         # no explicit title | ||||
|         'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/', | ||||
|         'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player', | ||||
|         'info_dict': { | ||||
|             'id': 'eLZSwoEd1S3pVyUm8lc6F', | ||||
|             'display_id': 'programa-226', | ||||
|             'id': '57b0de3dc915da14058b4876', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Cuarto Milenio - Temporada 6 - Programa 226', | ||||
|             'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', | ||||
|             'title': 'Cuarto Milenio Temporada 6 Programa 226', | ||||
|             'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f', | ||||
|             'series': 'Cuarto Milenio', | ||||
|             'season': 'Temporada 6', | ||||
|             'episode': 'Programa 226', | ||||
|             'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|             'duration': 7312, | ||||
|             'duration': 7313, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         gigya_url = self._search_regex(r'<gigya-api>[^>]*</gigya-api>[^>]*<script\s*src="([^"]*)">[^>]*</script>', webpage, 'gigya', default=None) | ||||
|         gigya_sc = self._download_webpage(compat_urlparse.urljoin(r'http://www.mitele.es/', gigya_url), video_id, 'Downloading gigya script') | ||||
|         # Get a appKey/uuid for getting the session key | ||||
|         appKey_var = self._search_regex(r'value\("appGridApplicationKey",([0-9a-f]+)\)', gigya_sc, 'appKey variable') | ||||
|         appKey = self._search_regex(r'var %s="([0-9a-f]+)"' % appKey_var, gigya_sc, 'appKey') | ||||
|         uid = compat_str(uuid.uuid4()) | ||||
|         session_url = 'https://appgrid-api.cloud.accedo.tv/session?appKey=%s&uuid=%s' % (appKey, uid) | ||||
|         session_json = self._download_json(session_url, video_id, 'Downloading session keys') | ||||
|         sessionKey = compat_str(session_json['sessionKey']) | ||||
|  | ||||
|         info = self._get_player_info(url, webpage) | ||||
|         paths_url = 'https://appgrid-api.cloud.accedo.tv/metadata/general_configuration,%20web_configuration?sessionKey=' + sessionKey | ||||
|         paths = self._download_json(paths_url, video_id, 'Downloading paths JSON') | ||||
|         ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search'] | ||||
|         data_p = ( | ||||
|             'http://' + ooyala_s['base_url'] + ooyala_s['full_path'] + ooyala_s['provider_id'] + | ||||
|             '/docs/' + video_id + '?include_titles=Series,Season&product_name=test&format=full') | ||||
|         data = self._download_json(data_p, video_id, 'Downloading data JSON') | ||||
|         source = data['hits']['hits'][0]['_source'] | ||||
|         embedCode = source['offers'][0]['embed_codes'][0] | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', | ||||
|             webpage, 'title', default=None) | ||||
|         titles = source['localizable_titles'][0] | ||||
|         title = titles.get('title_medium') or titles['title_long'] | ||||
|         episode = titles['title_sort_name'] | ||||
|         description = titles['summary_long'] | ||||
|         titles_series = source['localizable_titles_series'][0] | ||||
|         series = titles_series['title_long'] | ||||
|         titles_season = source['localizable_titles_season'][0] | ||||
|         season = titles_season['title_medium'] | ||||
|         duration = parse_duration(source['videos'][0]['duration']) | ||||
|  | ||||
|         mobj = re.search(r'''(?sx) | ||||
|                             class="Destacado-text"[^>]*>.*?<h1>\s* | ||||
|                             <span>(?P<series>[^<]+)</span>\s* | ||||
|                             <span>(?P<season>[^<]+)</span>\s* | ||||
|                             <span>(?P<episode>[^<]+)</span>''', webpage) | ||||
|         series, season, episode = mobj.groups() if mobj else [None] * 3 | ||||
|  | ||||
|         if not title: | ||||
|             if mobj: | ||||
|                 title = '%s - %s - %s' % (series, season, episode) | ||||
|             else: | ||||
|                 title = remove_start(self._search_regex( | ||||
|                     r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ') | ||||
|  | ||||
|         info.update({ | ||||
|             'display_id': display_id, | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             # for some reason only HLS is supported | ||||
|             'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8'}), | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': get_element_by_attribute('class', 'text', webpage), | ||||
|             'description': description, | ||||
|             'series': series, | ||||
|             'season': season, | ||||
|             'episode': episode, | ||||
|         }) | ||||
|         return info | ||||
|             'duration': duration, | ||||
|             'thumbnail': source['images'][0]['url'], | ||||
|         } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ class OoyalaBaseIE(InfoExtractor): | ||||
|     _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/' | ||||
|     _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?' | ||||
|  | ||||
|     def _extract(self, content_tree_url, video_id, domain='example.org'): | ||||
|     def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None): | ||||
|         content_tree = self._download_json(content_tree_url, video_id)['content_tree'] | ||||
|         metadata = content_tree[list(content_tree)[0]] | ||||
|         embed_code = metadata['embed_code'] | ||||
| @@ -29,7 +29,7 @@ class OoyalaBaseIE(InfoExtractor): | ||||
|             self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code) + | ||||
|             compat_urllib_parse_urlencode({ | ||||
|                 'domain': domain, | ||||
|                 'supportedFormats': 'mp4,rtmp,m3u8,hds', | ||||
|                 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds', | ||||
|             }), video_id) | ||||
|  | ||||
|         cur_auth_data = auth_data['authorization_data'][embed_code] | ||||
| @@ -145,8 +145,9 @@ class OoyalaIE(OoyalaBaseIE): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|         embed_code = self._match_id(url) | ||||
|         domain = smuggled_data.get('domain') | ||||
|         supportedformats = smuggled_data.get('supportedformats') | ||||
|         content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code) | ||||
|         return self._extract(content_tree_url, embed_code, domain) | ||||
|         return self._extract(content_tree_url, embed_code, domain, supportedformats) | ||||
|  | ||||
|  | ||||
| class OoyalaExternalIE(OoyalaBaseIE): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user