mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[seeker] remove Revision3 extractors and fix extraction
This commit is contained in:
		| @@ -932,10 +932,6 @@ from .rentv import ( | |||||||
| from .restudy import RestudyIE | from .restudy import RestudyIE | ||||||
| from .reuters import ReutersIE | from .reuters import ReutersIE | ||||||
| from .reverbnation import ReverbNationIE | from .reverbnation import ReverbNationIE | ||||||
| from .revision3 import ( |  | ||||||
|     Revision3EmbedIE, |  | ||||||
|     Revision3IE, |  | ||||||
| ) |  | ||||||
| from .rice import RICEIE | from .rice import RICEIE | ||||||
| from .rmcdecouverte import RMCDecouverteIE | from .rmcdecouverte import RMCDecouverteIE | ||||||
| from .ro220 import Ro220IE | from .ro220 import Ro220IE | ||||||
|   | |||||||
| @@ -1,170 +0,0 @@ | |||||||
| # coding: utf-8 |  | ||||||
| from __future__ import unicode_literals |  | ||||||
|  |  | ||||||
| import re |  | ||||||
|  |  | ||||||
| from .common import InfoExtractor |  | ||||||
| from ..compat import compat_str |  | ||||||
| from ..utils import ( |  | ||||||
|     int_or_none, |  | ||||||
|     parse_iso8601, |  | ||||||
|     unescapeHTML, |  | ||||||
|     qualities, |  | ||||||
| ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class Revision3EmbedIE(InfoExtractor): |  | ||||||
|     IE_NAME = 'revision3:embed' |  | ||||||
|     _VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)' |  | ||||||
|     _TEST = { |  | ||||||
|         'url': 'http://api.seekernetwork.com/player/embed?videoId=67558', |  | ||||||
|         'md5': '83bcd157cab89ad7318dd7b8c9cf1306', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '67558', |  | ||||||
|             'ext': 'mp4', |  | ||||||
|             'title': 'The Pros & Cons Of Zoos', |  | ||||||
|             'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', |  | ||||||
|             'uploader_id': 'dnews', |  | ||||||
|             'uploader': 'DNews', |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62' |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         mobj = re.match(self._VALID_URL, url) |  | ||||||
|         playlist_id = mobj.group('playlist_id') |  | ||||||
|         playlist_type = mobj.group('playlist_type') or 'video_id' |  | ||||||
|         video_data = self._download_json( |  | ||||||
|             'http://revision3.com/api/getPlaylist.json', playlist_id, query={ |  | ||||||
|                 'api_key': self._API_KEY, |  | ||||||
|                 'codecs': 'h264,vp8,theora', |  | ||||||
|                 playlist_type: playlist_id, |  | ||||||
|             })['items'][0] |  | ||||||
|  |  | ||||||
|         formats = [] |  | ||||||
|         for vcodec, media in video_data['media'].items(): |  | ||||||
|             for quality_id, quality in media.items(): |  | ||||||
|                 if quality_id == 'hls': |  | ||||||
|                     formats.extend(self._extract_m3u8_formats( |  | ||||||
|                         quality['url'], playlist_id, 'mp4', |  | ||||||
|                         'm3u8_native', m3u8_id='hls', fatal=False)) |  | ||||||
|                 else: |  | ||||||
|                     formats.append({ |  | ||||||
|                         'url': quality['url'], |  | ||||||
|                         'format_id': '%s-%s' % (vcodec, quality_id), |  | ||||||
|                         'tbr': int_or_none(quality.get('bitrate')), |  | ||||||
|                         'vcodec': vcodec, |  | ||||||
|                     }) |  | ||||||
|         self._sort_formats(formats) |  | ||||||
|  |  | ||||||
|         return { |  | ||||||
|             'id': playlist_id, |  | ||||||
|             'title': unescapeHTML(video_data['title']), |  | ||||||
|             'description': unescapeHTML(video_data.get('summary')), |  | ||||||
|             'uploader': video_data.get('show', {}).get('name'), |  | ||||||
|             'uploader_id': video_data.get('show', {}).get('slug'), |  | ||||||
|             'duration': int_or_none(video_data.get('duration')), |  | ||||||
|             'formats': formats, |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class Revision3IE(InfoExtractor): |  | ||||||
|     IE_NAME = 'revision' |  | ||||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)' |  | ||||||
|     _TESTS = [{ |  | ||||||
|         'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016', |  | ||||||
|         'md5': 'd94a72d85d0a829766de4deb8daaf7df', |  | ||||||
|         'info_dict': { |  | ||||||
|             'id': '71089', |  | ||||||
|             'display_id': 'technobuffalo/5-google-predictions-for-2016', |  | ||||||
|             'ext': 'webm', |  | ||||||
|             'title': '5 Google Predictions for 2016', |  | ||||||
|             'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.', |  | ||||||
|             'upload_date': '20151228', |  | ||||||
|             'timestamp': 1451325600, |  | ||||||
|             'duration': 187, |  | ||||||
|             'uploader': 'TechnoBuffalo', |  | ||||||
|             'uploader_id': 'technobuffalo', |  | ||||||
|         } |  | ||||||
|     }, { |  | ||||||
|         # Show |  | ||||||
|         'url': 'http://revision3.com/variant', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }, { |  | ||||||
|         # Tag |  | ||||||
|         'url': 'http://revision3.com/vr', |  | ||||||
|         'only_matching': True, |  | ||||||
|     }] |  | ||||||
|     _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s' |  | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |  | ||||||
|         domain, display_id = re.match(self._VALID_URL, url).groups() |  | ||||||
|         site = domain.split('.')[0] |  | ||||||
|         page_info = self._download_json( |  | ||||||
|             self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id) |  | ||||||
|  |  | ||||||
|         page_data = page_info['data'] |  | ||||||
|         page_type = page_data['type'] |  | ||||||
|         if page_type in ('episode', 'embed'): |  | ||||||
|             show_data = page_data['show']['data'] |  | ||||||
|             page_id = compat_str(page_data['id']) |  | ||||||
|             video_id = compat_str(page_data['video']['data']['id']) |  | ||||||
|  |  | ||||||
|             preference = qualities(['mini', 'small', 'medium', 'large']) |  | ||||||
|             thumbnails = [{ |  | ||||||
|                 'url': image_url, |  | ||||||
|                 'id': image_id, |  | ||||||
|                 'preference': preference(image_id) |  | ||||||
|             } for image_id, image_url in page_data.get('images', {}).items()] |  | ||||||
|  |  | ||||||
|             info = { |  | ||||||
|                 'id': page_id, |  | ||||||
|                 'display_id': display_id, |  | ||||||
|                 'title': unescapeHTML(page_data['name']), |  | ||||||
|                 'description': unescapeHTML(page_data.get('summary')), |  | ||||||
|                 'timestamp': parse_iso8601(page_data.get('publishTime'), ' '), |  | ||||||
|                 'author': page_data.get('author'), |  | ||||||
|                 'uploader': show_data.get('name'), |  | ||||||
|                 'uploader_id': show_data.get('slug'), |  | ||||||
|                 'thumbnails': thumbnails, |  | ||||||
|                 'extractor_key': site, |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             if page_type == 'embed': |  | ||||||
|                 info.update({ |  | ||||||
|                     '_type': 'url_transparent', |  | ||||||
|                     'url': page_data['video']['data']['embed'], |  | ||||||
|                 }) |  | ||||||
|                 return info |  | ||||||
|  |  | ||||||
|             info.update({ |  | ||||||
|                 '_type': 'url_transparent', |  | ||||||
|                 'url': 'revision3:%s' % video_id, |  | ||||||
|             }) |  | ||||||
|             return info |  | ||||||
|         else: |  | ||||||
|             list_data = page_info[page_type]['data'] |  | ||||||
|             episodes_data = page_info['episodes']['data'] |  | ||||||
|             num_episodes = page_info['meta']['totalEpisodes'] |  | ||||||
|             processed_episodes = 0 |  | ||||||
|             entries = [] |  | ||||||
|             page_num = 1 |  | ||||||
|             while True: |  | ||||||
|                 entries.extend([{ |  | ||||||
|                     '_type': 'url', |  | ||||||
|                     'url': 'http://%s%s' % (domain, episode['path']), |  | ||||||
|                     'id': compat_str(episode['id']), |  | ||||||
|                     'ie_key': 'Revision3', |  | ||||||
|                     'extractor_key': site, |  | ||||||
|                 } for episode in episodes_data]) |  | ||||||
|                 processed_episodes += len(episodes_data) |  | ||||||
|                 if processed_episodes == num_episodes: |  | ||||||
|                     break |  | ||||||
|                 page_num += 1 |  | ||||||
|                 episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % ( |  | ||||||
|                     domain, display_id + '/' + compat_str(page_num), domain), |  | ||||||
|                     display_id)['episodes']['data'] |  | ||||||
|  |  | ||||||
|             return self.playlist_result( |  | ||||||
|                 entries, compat_str(list_data['id']), |  | ||||||
|                 list_data.get('name'), list_data.get('summary')) |  | ||||||
| @@ -4,34 +4,37 @@ from __future__ import unicode_literals | |||||||
| import re | import re | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
|  | from ..utils import ( | ||||||
|  |     get_element_by_class, | ||||||
|  |     strip_or_none, | ||||||
|  | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| class SeekerIE(InfoExtractor): | class SeekerIE(InfoExtractor): | ||||||
|     _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html' |     _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html' | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         # player.loadRevision3Item |  | ||||||
|         'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', |         'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html', | ||||||
|         'md5': '30c1dc4030cc715cf05b423d0947ac18', |         'md5': '897d44bbe0d8986a2ead96de565a92db', | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|             'id': '76243', |             'id': 'Elrn3gnY', | ||||||
|             'ext': 'webm', |             'ext': 'mp4', | ||||||
|             'title': 'Should Trump Be Required To Release His Tax Returns?', |             'title': 'Should Trump Be Required To Release His Tax Returns?', | ||||||
|             'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?', |             'description': 'md5:41efa8cfa8d627841045eec7b018eb45', | ||||||
|             'uploader': 'Seeker Daily', |             'timestamp': 1490090165, | ||||||
|             'uploader_id': 'seekerdaily', |             'upload_date': '20170321', | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
|         'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', |         'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html', | ||||||
|         'playlist': [ |         'playlist': [ | ||||||
|             { |             { | ||||||
|                 'md5': '83bcd157cab89ad7318dd7b8c9cf1306', |                 'md5': '0497b9f20495174be73ae136949707d2', | ||||||
|                 'info_dict': { |                 'info_dict': { | ||||||
|                     'id': '67558', |                     'id': 'FihYQ8AE', | ||||||
|                     'ext': 'mp4', |                     'ext': 'mp4', | ||||||
|                     'title': 'The Pros & Cons Of Zoos', |                     'title': 'The Pros & Cons Of Zoos', | ||||||
|                     'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?', |                     'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c', | ||||||
|                     'uploader': 'DNews', |                     'timestamp': 1490039133, | ||||||
|                     'uploader_id': 'dnews', |                     'upload_date': '20170320', | ||||||
|                 }, |                 }, | ||||||
|             } |             } | ||||||
|         ], |         ], | ||||||
| @@ -45,13 +48,11 @@ class SeekerIE(InfoExtractor): | |||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         display_id, article_id = re.match(self._VALID_URL, url).groups() |         display_id, article_id = re.match(self._VALID_URL, url).groups() | ||||||
|         webpage = self._download_webpage(url, display_id) |         webpage = self._download_webpage(url, display_id) | ||||||
|         mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage) |         entries = [] | ||||||
|         if mobj: |         for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage): | ||||||
|             playlist_type, playlist_id = mobj.groups() |             entries.append(self.url_result( | ||||||
|             return self.url_result( |                 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id)) | ||||||
|                 'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id) |         return self.playlist_result( | ||||||
|         else: |             entries, article_id, | ||||||
|             entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall( |             self._og_search_title(webpage), | ||||||
|                 r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)] |             strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage)) | ||||||
|             return self.playlist_result( |  | ||||||
|                 entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage)) |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user