mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[rtbf] improve extraction(fixes #9267)
This commit is contained in:
		| @@ -4,12 +4,18 @@ from __future__ import unicode_literals | |||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..utils import ( | from ..utils import ( | ||||||
|     int_or_none, |     int_or_none, | ||||||
|     unescapeHTML, |     ExtractorError, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class RTBFIE(InfoExtractor): | class RTBFIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?rtbf\.be/(?:video/[^?]+\?.*\bid=|ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=)(?P<id>\d+)' |     _VALID_URL = r'''(?x) | ||||||
|  |         https?://(?:www\.)?rtbf\.be/ | ||||||
|  |         (?: | ||||||
|  |             video/[^?]+\?.*\bid=| | ||||||
|  |             ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=| | ||||||
|  |             auvio/[^/]+\?.*id= | ||||||
|  |         )(?P<id>\d+)''' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', |         'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', | ||||||
|         'md5': '799f334ddf2c0a582ba80c44655be570', |         'md5': '799f334ddf2c0a582ba80c44655be570', | ||||||
| @@ -17,7 +23,11 @@ class RTBFIE(InfoExtractor): | |||||||
|             'id': '1921274', |             'id': '1921274', | ||||||
|             'ext': 'mp4', |             'ext': 'mp4', | ||||||
|             'title': 'Les Diables au coeur (épisode 2)', |             'title': 'Les Diables au coeur (épisode 2)', | ||||||
|  |             'description': 'Football - Diables Rouges', | ||||||
|             'duration': 3099, |             'duration': 3099, | ||||||
|  |             'upload_date': '20140425', | ||||||
|  |             'timestamp': 1398456336, | ||||||
|  |             'uploader': 'rtbfsport', | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
|         # geo restricted |         # geo restricted | ||||||
| @@ -26,45 +36,63 @@ class RTBFIE(InfoExtractor): | |||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858', |         'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858', | ||||||
|         'only_matching': True, |         'only_matching': True, | ||||||
|  |     }, { | ||||||
|  |         'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996', | ||||||
|  |         'only_matching': True, | ||||||
|     }] |     }] | ||||||
|  |     _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be' | ||||||
|  |     _PROVIDERS = { | ||||||
|  |         'YOUTUBE': 'Youtube', | ||||||
|  |         'DAILYMOTION': 'Dailymotion', | ||||||
|  |         'VIMEO': 'Vimeo', | ||||||
|  |     } | ||||||
|     _QUALITIES = [ |     _QUALITIES = [ | ||||||
|         ('mobile', 'mobile'), |         ('mobile', 'SD'), | ||||||
|         ('web', 'SD'), |         ('web', 'MD'), | ||||||
|         ('url', 'MD'), |  | ||||||
|         ('high', 'HD'), |         ('high', 'HD'), | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         video_id = self._match_id(url) |         video_id = self._match_id(url) | ||||||
|  |         data = self._download_json( | ||||||
|  |             'http://www.rtbf.be/api/media/video?method=getVideoDetail&args[]=%s' % video_id, video_id) | ||||||
|  |  | ||||||
|         webpage = self._download_webpage( |         error = data.get('error') | ||||||
|             'http://www.rtbf.be/video/embed?id=%s' % video_id, video_id) |         if error: | ||||||
|  |             raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | ||||||
|  |  | ||||||
|         data = self._parse_json( |         data = data['data'] | ||||||
|             unescapeHTML(self._search_regex( |  | ||||||
|                 r'data-media="([^"]+)"', webpage, 'data video')), |         provider = data.get('provider') | ||||||
|             video_id) |         if provider in self._PROVIDERS: | ||||||
|  |             return self.url_result(data['url'], self._PROVIDERS[provider]) | ||||||
|  |  | ||||||
|         if data.get('provider').lower() == 'youtube': |  | ||||||
|             video_url = data.get('downloadUrl') or data.get('url') |  | ||||||
|             return self.url_result(video_url, 'Youtube') |  | ||||||
|         formats = [] |         formats = [] | ||||||
|         for key, format_id in self._QUALITIES: |         for key, format_id in self._QUALITIES: | ||||||
|             format_url = data['sources'].get(key) |             format_url = data.get(key + 'Url') | ||||||
|             if format_url: |             if format_url: | ||||||
|                 formats.append({ |                 formats.append({ | ||||||
|                     'format_id': format_id, |                     'format_id': format_id, | ||||||
|                     'url': format_url, |                     'url': format_url, | ||||||
|                 }) |                 }) | ||||||
|  |  | ||||||
|  |         thumbnails = [] | ||||||
|  |         for thumbnail_id, thumbnail_url in data.get('thumbnail', {}).items(): | ||||||
|  |             if thumbnail_id != 'default': | ||||||
|  |                 thumbnails.append({ | ||||||
|  |                     'url': self._IMAGE_HOST + thumbnail_url, | ||||||
|  |                     'id': thumbnail_id, | ||||||
|  |                 }) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'formats': formats, |             'formats': formats, | ||||||
|             'title': data['title'], |             'title': data['title'], | ||||||
|             'description': data.get('description') or data.get('subtitle'), |             'description': data.get('description') or data.get('subtitle'), | ||||||
|             'thumbnail': data.get('thumbnail'), |             'thumbnails': thumbnails, | ||||||
|             'duration': data.get('duration') or data.get('realDuration'), |             'duration': data.get('duration') or data.get('realDuration'), | ||||||
|             'timestamp': int_or_none(data.get('created')), |             'timestamp': int_or_none(data.get('created')), | ||||||
|             'view_count': int_or_none(data.get('viewCount')), |             'view_count': int_or_none(data.get('viewCount')), | ||||||
|  |             'uploader': data.get('channel'), | ||||||
|  |             'tags': data.get('tags'), | ||||||
|         } |         } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user