mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			46 Commits
		
	
	
		
			2016.02.09
			...
			2016.02.13
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 35ced3985a | ||
|  | 3e18700d45 | ||
|  | f9f49d87c2 | ||
|  | 6863631c26 | ||
|  | 9d939cec48 | ||
|  | 4c77d3f52a | ||
|  | 7be747b921 | ||
|  | bb20526b64 | ||
|  | bcbb1b08b2 | ||
|  | 3d98f97c64 | ||
|  | c349456ef6 | ||
|  | 5a4905924d | ||
|  | b826035dd5 | ||
|  | a7cab4d039 | ||
|  | fc3810f6d1 | ||
|  | 3dc71d82ce | ||
|  | 9c7b38981c | ||
|  | 8b85ac3fd9 | ||
|  | 81e1c4e2fc | ||
|  | 388ae76b52 | ||
|  | b67d63149d | ||
|  | 28280e8ded | ||
|  | 6b3fbd3425 | ||
|  | a7ab46375b | ||
|  | b14d5e26f6 | ||
|  | 9a61dfba0c | ||
|  | 154c209e2d | ||
|  | d1ea5e171f | ||
|  | a1188d0ed0 | ||
|  | 47d205a646 | ||
|  | 80f772c28a | ||
|  | f817d9bec1 | ||
|  | e2effb08a4 | ||
|  | 7fcea295c5 | ||
|  | cc799437ea | ||
|  | 89d23f37f2 | ||
|  | b92071ef00 | ||
|  | 47246ae26c | ||
|  | 9c15869c28 | ||
|  | 51e9094f4a | ||
|  | 5e3a6fec33 | ||
|  | d413095f7e | ||
|  | 1bedf4de06 | ||
|  | 3967a761f4 | ||
|  | b081350bd9 | ||
|  | 16f1430ba6 | 
| @@ -89,6 +89,8 @@ | ||||
|  - **canalc2.tv** | ||||
|  - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv | ||||
|  - **Canvas** | ||||
|  - **CBC** | ||||
|  - **CBCPlayer** | ||||
|  - **CBS** | ||||
|  - **CBSNews**: CBS News | ||||
|  - **CBSNewsLiveVideo**: CBS News Live Videos | ||||
| @@ -120,6 +122,7 @@ | ||||
|  - **ComedyCentralShows**: The Daily Show / The Colbert Report | ||||
|  - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED | ||||
|  - **Cracked** | ||||
|  - **Crackle** | ||||
|  - **Criterion** | ||||
|  - **CrooksAndLiars** | ||||
|  - **Crunchyroll** | ||||
| @@ -445,6 +448,7 @@ | ||||
|  - **PlanetaPlay** | ||||
|  - **play.fm** | ||||
|  - **played.to** | ||||
|  - **PlaysTV** | ||||
|  - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz | ||||
|  - **Playvid** | ||||
|  - **Playwire** | ||||
|   | ||||
| @@ -1288,6 +1288,9 @@ class YoutubeDL(object): | ||||
|  | ||||
|             if format.get('format_id') is None: | ||||
|                 format['format_id'] = compat_str(i) | ||||
|             else: | ||||
|                 # Sanitize format_id from characters used in format selector expression | ||||
|                 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id']) | ||||
|             format_id = format['format_id'] | ||||
|             if format_id not in formats_dict: | ||||
|                 formats_dict[format_id] = [] | ||||
| @@ -1338,7 +1341,6 @@ class YoutubeDL(object): | ||||
|         if req_format is None: | ||||
|             req_format_list = [] | ||||
|             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and | ||||
|                     info_dict['extractor'] in ['youtube', 'ted'] and | ||||
|                     not info_dict.get('is_live')): | ||||
|                 merger = FFmpegMergerPP(self) | ||||
|                 if merger.available and merger.can_merge(): | ||||
| @@ -1795,7 +1797,7 @@ class YoutubeDL(object): | ||||
|             else: | ||||
|                 res = '%sp' % format['height'] | ||||
|         elif format.get('width') is not None: | ||||
|             res = '?x%d' % format['width'] | ||||
|             res = '%dx?' % format['width'] | ||||
|         else: | ||||
|             res = default | ||||
|         return res | ||||
|   | ||||
| @@ -89,6 +89,10 @@ from .camdemy import ( | ||||
| from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .canvas import CanvasIE | ||||
| from .cbc import ( | ||||
|     CBCIE, | ||||
|     CBCPlayerIE, | ||||
| ) | ||||
| from .cbs import CBSIE | ||||
| from .cbsnews import ( | ||||
|     CBSNewsIE, | ||||
| @@ -126,6 +130,7 @@ from .comcarcoff import ComCarCoffIE | ||||
| from .commonmistakes import CommonMistakesIE, UnicodeBOMIE | ||||
| from .condenast import CondeNastIE | ||||
| from .cracked import CrackedIE | ||||
| from .crackle import CrackleIE | ||||
| from .criterion import CriterionIE | ||||
| from .crooksandliars import CrooksAndLiarsIE | ||||
| from .crunchyroll import ( | ||||
| @@ -533,6 +538,7 @@ from .planetaplay import PlanetaPlayIE | ||||
| from .pladform import PladformIE | ||||
| from .played import PlayedIE | ||||
| from .playfm import PlayFMIE | ||||
| from .plays import PlaysTVIE | ||||
| from .playtvak import PlaytvakIE | ||||
| from .playvid import PlayvidIE | ||||
| from .playwire import PlaywireIE | ||||
|   | ||||
							
								
								
									
										113
									
								
								youtube_dl/extractor/cbc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										113
									
								
								youtube_dl/extractor/cbc.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,113 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import js_to_json | ||||
|  | ||||
|  | ||||
| class CBCIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)' | ||||
|     _TESTS = [{ | ||||
|         # with mediaId | ||||
|         'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', | ||||
|         'info_dict': { | ||||
|             'id': '2682904050', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Don Cherry – All-Stars', | ||||
|             'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.', | ||||
|             'timestamp': 1454475540, | ||||
|             'upload_date': '20160203', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # with clipId | ||||
|         'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', | ||||
|         'info_dict': { | ||||
|             'id': '2487345465', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Robin Williams freestyles on 90 Minutes Live', | ||||
|             'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.', | ||||
|             'upload_date': '19700101', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # multiple iframes | ||||
|         'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot', | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': '2680832926', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'An Eagle\'s-Eye View Off Burrard Bridge', | ||||
|                 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.', | ||||
|                 'upload_date': '19700101', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': '2658915080', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Fly like an eagle!', | ||||
|                 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower', | ||||
|                 'upload_date': '19700101', | ||||
|             }, | ||||
|         }], | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         player_init = self._search_regex( | ||||
|             r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage, 'player init', | ||||
|             default=None) | ||||
|         if player_init: | ||||
|             player_info = self._parse_json(player_init, display_id, js_to_json) | ||||
|             media_id = player_info.get('mediaId') | ||||
|             if not media_id: | ||||
|                 clip_id = player_info['clipId'] | ||||
|                 media_id = self._download_json( | ||||
|                     'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id, | ||||
|                     clip_id)['entries'][0]['id'].split('/')[-1] | ||||
|             return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) | ||||
|         else: | ||||
|             entries = [self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id) for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)] | ||||
|             return self.playlist_result(entries) | ||||
|  | ||||
|  | ||||
| class CBCPlayerIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cbc.ca/player/play/2683190193', | ||||
|         'info_dict': { | ||||
|             'id': '2683190193', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Gerry Runs a Sweat Shop', | ||||
|             'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0', | ||||
|             'timestamp': 1455067800, | ||||
|             'upload_date': '20160210', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return self.url_result( | ||||
|             'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid=%s' % video_id, | ||||
|             'ThePlatformFeed', video_id) | ||||
| @@ -2,6 +2,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
| @@ -14,14 +15,13 @@ class ComCarCoffIE(InfoExtractor): | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/', | ||||
|         'info_dict': { | ||||
|             'id': 'miranda-sings-happy-thanksgiving-miranda', | ||||
|             'id': '2494164', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20141127', | ||||
|             'timestamp': 1417107600, | ||||
|             'duration': 1232, | ||||
|             'title': 'Happy Thanksgiving Miranda', | ||||
|             'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.', | ||||
|             'thumbnail': 'http://ccc.crackle.com/images/s5e4_thumb.jpg', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'requires ffmpeg', | ||||
| @@ -39,15 +39,14 @@ class ComCarCoffIE(InfoExtractor): | ||||
|                 r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'), | ||||
|             display_id)['videoData'] | ||||
|  | ||||
|         video_id = full_data['activeVideo']['video'] | ||||
|         video_data = full_data.get('videos', {}).get(video_id) or full_data['singleshots'][video_id] | ||||
|         display_id = full_data['activeVideo']['video'] | ||||
|         video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id] | ||||
|         video_id = compat_str(video_data['mediaId']) | ||||
|         thumbnails = [{ | ||||
|             'url': video_data['images']['thumb'], | ||||
|         }, { | ||||
|             'url': video_data['images']['poster'], | ||||
|         }] | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             video_data['mediaUrl'], video_id, ext='mp4') | ||||
|  | ||||
|         timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601( | ||||
|             video_data.get('pubDate')) | ||||
| @@ -55,6 +54,8 @@ class ComCarCoffIE(InfoExtractor): | ||||
|             video_data.get('duration')) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'url': 'crackle:%s' % video_id, | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': video_data['title'], | ||||
| @@ -62,6 +63,7 @@ class ComCarCoffIE(InfoExtractor): | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'thumbnails': thumbnails, | ||||
|             'formats': formats, | ||||
|             'season_number': int_or_none(video_data.get('season')), | ||||
|             'episode_number': int_or_none(video_data.get('episode')), | ||||
|             'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))), | ||||
|         } | ||||
|   | ||||
| @@ -1186,11 +1186,13 @@ class InfoExtractor(object): | ||||
|         http_count = 0 | ||||
|         m3u8_count = 0 | ||||
|  | ||||
|         srcs = [] | ||||
|         videos = smil.findall(self._xpath_ns('.//video', namespace)) | ||||
|         for video in videos: | ||||
|             src = video.get('src') | ||||
|             if not src: | ||||
|             if not src or src in srcs: | ||||
|                 continue | ||||
|             srcs.append(src) | ||||
|  | ||||
|             bitrate = float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000) | ||||
|             filesize = int_or_none(video.get('size') or video.get('fileSize')) | ||||
| @@ -1222,6 +1224,7 @@ class InfoExtractor(object): | ||||
|                 continue | ||||
|  | ||||
|             src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) | ||||
|             src_url = src_url.strip() | ||||
|  | ||||
|             if proto == 'm3u8' or src_ext == 'm3u8': | ||||
|                 m3u8_formats = self._extract_m3u8_formats( | ||||
| @@ -1267,11 +1270,13 @@ class InfoExtractor(object): | ||||
|         return formats | ||||
|  | ||||
|     def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): | ||||
|         urls = [] | ||||
|         subtitles = {} | ||||
|         for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))): | ||||
|             src = textstream.get('src') | ||||
|             if not src: | ||||
|             if not src or src in urls: | ||||
|                 continue | ||||
|             urls.append(src) | ||||
|             ext = textstream.get('ext') or determine_ext(src) | ||||
|             if not ext: | ||||
|                 type_ = textstream.get('type') | ||||
| @@ -1430,12 +1435,16 @@ class InfoExtractor(object): | ||||
|                                 base_url = base_url_e.text + base_url | ||||
|                                 if re.match(r'^https?://', base_url): | ||||
|                                     break | ||||
|                         if not re.match(r'^https?://', base_url): | ||||
|                         if mpd_base_url and not re.match(r'^https?://', base_url): | ||||
|                             if not mpd_base_url.endswith('/') and not base_url.startswith('/'): | ||||
|                                 mpd_base_url += '/' | ||||
|                             base_url = mpd_base_url + base_url | ||||
|                         representation_id = representation_attrib.get('id') | ||||
|                         lang = representation_attrib.get('lang') | ||||
|                         url_el = representation.find(_add_ns('BaseURL')) | ||||
|                         filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None) | ||||
|                         f = { | ||||
|                             'format_id': mpd_id or representation_id, | ||||
|                             'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, | ||||
|                             'url': base_url, | ||||
|                             'width': int_or_none(representation_attrib.get('width')), | ||||
|                             'height': int_or_none(representation_attrib.get('height')), | ||||
| @@ -1446,6 +1455,7 @@ class InfoExtractor(object): | ||||
|                             'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'), | ||||
|                             'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None, | ||||
|                             'format_note': 'DASH %s' % content_type, | ||||
|                             'filesize': filesize, | ||||
|                         } | ||||
|                         representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info) | ||||
|                         if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info: | ||||
|   | ||||
							
								
								
									
										95
									
								
								youtube_dl/extractor/crackle.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								youtube_dl/extractor/crackle.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,95 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class CrackleIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:crackle:|https?://(?:www\.)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.crackle.com/the-art-of-more/2496419', | ||||
|         'info_dict': { | ||||
|             'id': '2496419', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Heavy Lies the Head', | ||||
|             'description': 'md5:bb56aa0708fe7b9a4861535f15c3abca', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     # extracted from http://legacyweb-us.crackle.com/flash/QueryReferrer.ashx | ||||
|     _SUBTITLE_SERVER = 'http://web-us-az.crackle.com' | ||||
|     _UPLYNK_OWNER_ID = 'e8773f7770a44dbd886eee4fca16a66b' | ||||
|     _THUMBNAIL_TEMPLATE = 'http://images-us-am.crackle.com/%stnl_1920x1080.jpg?ts=20140107233116?c=635333335057637614' | ||||
|  | ||||
|     # extracted from http://legacyweb-us.crackle.com/flash/ReferrerRedirect.ashx | ||||
|     _MEDIA_FILE_SLOTS = { | ||||
|         'c544.flv': { | ||||
|             'width': 544, | ||||
|             'height': 306, | ||||
|         }, | ||||
|         '360p.mp4': { | ||||
|             'width': 640, | ||||
|             'height': 360, | ||||
|         }, | ||||
|         '480p.mp4': { | ||||
|             'width': 852, | ||||
|             'height': 478, | ||||
|         }, | ||||
|         '480p_1mbps.mp4': { | ||||
|             'width': 852, | ||||
|             'height': 478, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         item = self._download_xml( | ||||
|             'http://legacyweb-us.crackle.com/app/revamp/vidwallcache.aspx?flags=-1&fm=%s' % video_id, | ||||
|             video_id).find('i') | ||||
|         title = item.attrib['t'] | ||||
|  | ||||
|         thumbnail = None | ||||
|         subtitles = {} | ||||
|         formats = self._extract_m3u8_formats( | ||||
|             'http://content.uplynk.com/ext/%s/%s.m3u8' % (self._UPLYNK_OWNER_ID, video_id), | ||||
|             video_id, 'mp4', m3u8_id='hls', fatal=None) | ||||
|         path = item.attrib.get('p') | ||||
|         if path: | ||||
|             thumbnail = self._THUMBNAIL_TEMPLATE % path | ||||
|             http_base_url = 'http://ahttp.crackle.com/' + path | ||||
|             for mfs_path, mfs_info in self._MEDIA_FILE_SLOTS.items(): | ||||
|                 formats.append({ | ||||
|                     'url': http_base_url + mfs_path, | ||||
|                     'format_id': 'http-' + mfs_path.split('.')[0], | ||||
|                     'width': mfs_info['width'], | ||||
|                     'height': mfs_info['height'], | ||||
|                 }) | ||||
|             for cc in item.findall('cc'): | ||||
|                 locale = cc.attrib.get('l') | ||||
|                 v = cc.attrib.get('v') | ||||
|                 if locale and v: | ||||
|                     if locale not in subtitles: | ||||
|                         subtitles[locale] = [] | ||||
|                     subtitles[locale] = [{ | ||||
|                         'url': '%s/%s%s_%s.xml' % (self._SUBTITLE_SERVER, path, locale, v), | ||||
|                         'ext': 'ttml', | ||||
|                     }] | ||||
|         self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': item.attrib.get('d'), | ||||
|             'duration': int(item.attrib.get('r'), 16) if item.attrib.get('r') else None, | ||||
|             'series': item.attrib.get('sn'), | ||||
|             'season_number': int_or_none(item.attrib.get('se')), | ||||
|             'episode_number': int_or_none(item.attrib.get('ep')), | ||||
|             'thumbnail': thumbnail, | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -9,6 +9,7 @@ class FOXIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?fox\.com/watch/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.fox.com/watch/255180355939/7684182528', | ||||
|         'md5': 'ebd296fcc41dd4b19f8115d8461a3165', | ||||
|         'info_dict': { | ||||
|             'id': '255180355939', | ||||
|             'ext': 'mp4', | ||||
| @@ -17,10 +18,6 @@ class FOXIE(InfoExtractor): | ||||
|             'duration': 129, | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -29,7 +26,7 @@ class FOXIE(InfoExtractor): | ||||
|  | ||||
|         release_url = self._parse_json(self._search_regex( | ||||
|             r'"fox_pdk_player"\s*:\s*({[^}]+?})', webpage, 'fox_pdk_player'), | ||||
|             video_id)['release_url'] + '&manifest=m3u' | ||||
|             video_id)['release_url'] + '&switch=http' | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|   | ||||
| @@ -224,6 +224,20 @@ class GenericIE(InfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         # MPD from http://dash-mse-test.appspot.com/media.html | ||||
|         { | ||||
|             'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd', | ||||
|             'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53', | ||||
|             'info_dict': { | ||||
|                 'id': 'car-20120827-manifest', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'car-20120827-manifest', | ||||
|                 'formats': 'mincount:9', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'format': 'bestvideo', | ||||
|             }, | ||||
|         }, | ||||
|         # google redirect | ||||
|         { | ||||
|             'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', | ||||
| @@ -1302,7 +1316,8 @@ class GenericIE(InfoExtractor): | ||||
|                 return { | ||||
|                     'id': video_id, | ||||
|                     'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), | ||||
|                     'formats': self._parse_mpd_formats(doc, video_id), | ||||
|                     'formats': self._parse_mpd_formats( | ||||
|                         doc, video_id, mpd_base_url=url.rpartition('/')[0]), | ||||
|                 } | ||||
|         except compat_xml_parse_error: | ||||
|             pass | ||||
| @@ -1413,7 +1428,7 @@ class GenericIE(InfoExtractor): | ||||
|  | ||||
|         # Look for embedded Dailymotion player | ||||
|         matches = re.findall( | ||||
|             r'<(?:embed|iframe)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) | ||||
|             r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) | ||||
|         if matches: | ||||
|             return _playlist_from_matches( | ||||
|                 matches, lambda m: unescapeHTML(m[1])) | ||||
|   | ||||
| @@ -10,8 +10,8 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class HotStarIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/.*?[/-](?P<id>\d{10})' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273', | ||||
|         'info_dict': { | ||||
|             'id': '1000076273', | ||||
| @@ -26,7 +26,13 @@ class HotStarIE(InfoExtractor): | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.hotstar.com/1000000515', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _GET_CONTENT_TEMPLATE = 'http://account.hotstar.com/AVS/besc?action=GetAggregatedContentDetails&channel=PCTV&contentId=%s' | ||||
|     _GET_CDN_TEMPLATE = 'http://getcdn.hotstar.com/AVS/besc?action=GetCDN&asJson=Y&channel=%s&id=%s&type=%s' | ||||
|   | ||||
| @@ -4,6 +4,10 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MailRuIE(InfoExtractor): | ||||
| @@ -34,14 +38,30 @@ class MailRuIE(InfoExtractor): | ||||
|                 'id': '46843144_1263', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion', | ||||
|                 'timestamp': 1397217632, | ||||
|                 'upload_date': '20140411', | ||||
|                 'uploader': 'hitech', | ||||
|                 'timestamp': 1397039888, | ||||
|                 'upload_date': '20140409', | ||||
|                 'uploader': 'hitech@corp.mail.ru', | ||||
|                 'uploader_id': 'hitech@corp.mail.ru', | ||||
|                 'duration': 245, | ||||
|             }, | ||||
|             'skip': 'Not accessible from Travis CI server', | ||||
|         }, | ||||
|         { | ||||
|             # only available via metaUrl API | ||||
|             'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html', | ||||
|             'md5': '3b26d2491c6949d031a32b96bd97c096', | ||||
|             'info_dict': { | ||||
|                 'id': '56664382_502', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': ':8336', | ||||
|                 'timestamp': 1449094163, | ||||
|                 'upload_date': '20151202', | ||||
|                 'uploader': '720pizle@mail.ru', | ||||
|                 'uploader_id': '720pizle@mail.ru', | ||||
|                 'duration': 6001, | ||||
|             }, | ||||
|             'skip': 'Not accessible from Travis CI server', | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -51,32 +71,55 @@ class MailRuIE(InfoExtractor): | ||||
|         if not video_id: | ||||
|             video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix') | ||||
|  | ||||
|         video_data = self._download_json( | ||||
|             'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, video_id, 'Downloading video JSON') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         author = video_data['author'] | ||||
|         uploader = author['name'] | ||||
|         uploader_id = author.get('id') or author.get('email') | ||||
|         view_count = video_data.get('views_count') | ||||
|         video_data = None | ||||
|  | ||||
|         page_config = self._parse_json(self._search_regex( | ||||
|             r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>', | ||||
|             webpage, 'page config', default='{}'), video_id, fatal=False) | ||||
|         if page_config: | ||||
|             meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl') | ||||
|             if meta_url: | ||||
|                 video_data = self._download_json( | ||||
|                     meta_url, video_id, 'Downloading video meta JSON', fatal=False) | ||||
|  | ||||
|         # Fallback old approach | ||||
|         if not video_data: | ||||
|             video_data = self._download_json( | ||||
|                 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id, | ||||
|                 video_id, 'Downloading video JSON') | ||||
|  | ||||
|         formats = [] | ||||
|         for f in video_data['videos']: | ||||
|             video_url = f.get('url') | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             format_id = f.get('key') | ||||
|             height = int_or_none(self._search_regex( | ||||
|                 r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': format_id, | ||||
|                 'height': height, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         meta_data = video_data['meta'] | ||||
|         content_id = '%s_%s' % ( | ||||
|             meta_data.get('accId', ''), meta_data['itemId']) | ||||
|         title = meta_data['title'] | ||||
|         if title.endswith('.mp4'): | ||||
|             title = title[:-4] | ||||
|         thumbnail = meta_data['poster'] | ||||
|         duration = meta_data['duration'] | ||||
|         timestamp = meta_data['timestamp'] | ||||
|         title = remove_end(meta_data['title'], '.mp4') | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': video['url'], | ||||
|                 'format_id': video['key'], | ||||
|                 'height': int(video['key'].rstrip('p')) | ||||
|             } for video in video_data['videos'] | ||||
|         ] | ||||
|         self._sort_formats(formats) | ||||
|         author = video_data.get('author') | ||||
|         uploader = author.get('name') | ||||
|         uploader_id = author.get('id') or author.get('email') | ||||
|         view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count')) | ||||
|  | ||||
|         acc_id = meta_data.get('accId') | ||||
|         item_id = meta_data.get('itemId') | ||||
|         content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id | ||||
|  | ||||
|         thumbnail = meta_data.get('poster') | ||||
|         duration = int_or_none(meta_data.get('duration')) | ||||
|         timestamp = int_or_none(meta_data.get('timestamp')) | ||||
|  | ||||
|         return { | ||||
|             'id': content_id, | ||||
|   | ||||
| @@ -57,7 +57,7 @@ class NBCIE(InfoExtractor): | ||||
|         { | ||||
|             # This video has expired but with an escaped embedURL | ||||
|             'url': 'http://www.nbc.com/parenthood/episode-guide/season-5/just-like-at-home/515', | ||||
|             'skip': 'Expired' | ||||
|             'only_matching': True, | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|   | ||||
| @@ -4,10 +4,12 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_HTTPError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     strip_jsonp, | ||||
|     unified_strdate, | ||||
|     US_RATINGS, | ||||
| @@ -199,7 +201,7 @@ class PBSIE(InfoExtractor): | ||||
|                 'id': '2365006249', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Constitution USA with Peter Sagal - A More Perfect Union', | ||||
|                 'description': 'md5:ba0c207295339c8d6eced00b7c363c6a', | ||||
|                 'description': 'md5:36f341ae62e251b8f5bd2b754b95a071', | ||||
|                 'duration': 3190, | ||||
|             }, | ||||
|             'params': { | ||||
| @@ -213,7 +215,7 @@ class PBSIE(InfoExtractor): | ||||
|                 'id': '2365297690', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'FRONTLINE - Losing Iraq', | ||||
|                 'description': 'md5:f5bfbefadf421e8bb8647602011caf8e', | ||||
|                 'description': 'md5:4d3eaa01f94e61b3e73704735f1196d9', | ||||
|                 'duration': 5050, | ||||
|             }, | ||||
|             'params': { | ||||
| @@ -227,7 +229,7 @@ class PBSIE(InfoExtractor): | ||||
|                 'id': '2201174722', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'PBS NewsHour - Cyber Schools Gain Popularity, but Quality Questions Persist', | ||||
|                 'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28', | ||||
|                 'description': 'md5:95a19f568689d09a166dff9edada3301', | ||||
|                 'duration': 801, | ||||
|             }, | ||||
|         }, | ||||
| @@ -237,8 +239,8 @@ class PBSIE(InfoExtractor): | ||||
|             'info_dict': { | ||||
|                 'id': '2365297708', | ||||
|                 'ext': 'mp4', | ||||
|                 'description': 'md5:68d87ef760660eb564455eb30ca464fe', | ||||
|                 'title': 'Great Performances - Dudamel Conducts Verdi Requiem at the Hollywood Bowl - Full', | ||||
|                 'description': 'md5:657897370e09e2bc6bf0f8d2cd313c6b', | ||||
|                 'duration': 6559, | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
| @@ -278,7 +280,7 @@ class PBSIE(InfoExtractor): | ||||
|                 'display_id': 'player', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'American Experience - Death and the Civil War, Chapter 1', | ||||
|                 'description': 'American Experience, TV’s most-watched history series, brings to life the compelling stories from our past that inform our understanding of the world today.', | ||||
|                 'description': 'md5:1b80a74e0380ed2a4fb335026de1600d', | ||||
|                 'duration': 682, | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
| @@ -287,20 +289,19 @@ class PBSIE(InfoExtractor): | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://video.pbs.org/video/2365367186/', | ||||
|             'url': 'http://www.pbs.org/video/2365245528/', | ||||
|             'info_dict': { | ||||
|                 'id': '2365367186', | ||||
|                 'display_id': '2365367186', | ||||
|                 'id': '2365245528', | ||||
|                 'display_id': '2365245528', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'To Catch A Comet - Full Episode', | ||||
|                 'description': 'On November 12, 2014, billions of kilometers from Earth, spacecraft orbiter Rosetta and lander Philae did what no other had dared to attempt \u2014 land on the volatile surface of a comet as it zooms around the sun at 67,000 km/hr. The European Space Agency hopes this mission can help peer into our past and unlock secrets of our origins.', | ||||
|                 'duration': 3342, | ||||
|                 'title': 'FRONTLINE - United States of Secrets (Part One)', | ||||
|                 'description': 'md5:55756bd5c551519cc4b7703e373e217e', | ||||
|                 'duration': 6851, | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True,  # requires ffmpeg | ||||
|             }, | ||||
|             'skip': 'Expired', | ||||
|         }, | ||||
|         { | ||||
|             # Video embedded in iframe containing angle brackets as attribute's value (e.g. | ||||
| @@ -312,7 +313,7 @@ class PBSIE(InfoExtractor): | ||||
|                 'display_id': 'a-chefs-life-season-3-episode-5-prickly-business', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': "A Chef's Life - Season 3, Ep. 5: Prickly Business", | ||||
|                 'description': 'md5:61db2ddf27c9912f09c241014b118ed1', | ||||
|                 'description': 'md5:54033c6baa1f9623607c6e2ed245888b', | ||||
|                 'duration': 1480, | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
| @@ -328,7 +329,7 @@ class PBSIE(InfoExtractor): | ||||
|                 'display_id': 'the-atomic-artists', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'FRONTLINE - The Atomic Artists', | ||||
|                 'description': 'md5:f5bfbefadf421e8bb8647602011caf8e', | ||||
|                 'description': 'md5:1a2481e86b32b2e12ec1905dd473e2c1', | ||||
|                 'duration': 723, | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             }, | ||||
| @@ -365,10 +366,14 @@ class PBSIE(InfoExtractor): | ||||
|                 webpage, 'upload date', default=None)) | ||||
|  | ||||
|             # tabbed frontline videos | ||||
|             tabbed_videos = re.findall( | ||||
|                 r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', webpage) | ||||
|             if tabbed_videos: | ||||
|                 return tabbed_videos, presumptive_id, upload_date | ||||
|             MULTI_PART_REGEXES = ( | ||||
|                 r'<div[^>]+class="videotab[^"]*"[^>]+vid="(\d+)"', | ||||
|                 r'<a[^>]+href=["\']#video-\d+["\'][^>]+data-coveid=["\'](\d+)', | ||||
|             ) | ||||
|             for p in MULTI_PART_REGEXES: | ||||
|                 tabbed_videos = re.findall(p, webpage) | ||||
|                 if tabbed_videos: | ||||
|                     return tabbed_videos, presumptive_id, upload_date | ||||
|  | ||||
|             MEDIA_ID_REGEXES = [ | ||||
|                 r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed | ||||
| @@ -432,9 +437,21 @@ class PBSIE(InfoExtractor): | ||||
|                 for vid_id in video_id] | ||||
|             return self.playlist_result(entries, display_id) | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, | ||||
|             display_id) | ||||
|         try: | ||||
|             info = self._download_json( | ||||
|                 'http://player.pbs.org/videoInfo/%s?format=json&type=partner' % video_id, | ||||
|                 display_id, 'Downloading video info JSON') | ||||
|         except ExtractorError as e: | ||||
|             if not isinstance(e.cause, compat_HTTPError) or e.cause.code != 404: | ||||
|                 raise | ||||
|             # videoInfo API may not work for some videos, fallback to portalplayer API | ||||
|             player = self._download_webpage( | ||||
|                 'http://player.pbs.org/portalplayer/%s' % video_id, display_id) | ||||
|             info = self._parse_json( | ||||
|                 self._search_regex( | ||||
|                     r'(?s)PBS\.videoData\s*=\s*({.+?});\n', | ||||
|                     player, 'video data', default='{}'), | ||||
|                 display_id, transform_source=js_to_json, fatal=False) | ||||
|  | ||||
|         formats = [] | ||||
|         for encoding_name in ('recommended_encoding', 'alternate_encoding'): | ||||
| @@ -493,7 +510,7 @@ class PBSIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': info['title'], | ||||
|             'description': info['program'].get('description'), | ||||
|             'description': info.get('description') or info.get('program', {}).get('description'), | ||||
|             'thumbnail': info.get('image_url'), | ||||
|             'duration': int_or_none(info.get('duration')), | ||||
|             'age_limit': age_limit, | ||||
|   | ||||
							
								
								
									
										51
									
								
								youtube_dl/extractor/plays.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								youtube_dl/extractor/plays.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,51 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class PlaysTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?plays\.tv/video/(?P<id>[0-9a-f]{18})' | ||||
|     _TEST = { | ||||
|         'url': 'http://plays.tv/video/56af17f56c95335490/when-you-outplay-the-azir-wall', | ||||
|         'md5': 'dfeac1198506652b5257a62762cec7bc', | ||||
|         'info_dict': { | ||||
|             'id': '56af17f56c95335490', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'When you outplay the Azir wall', | ||||
|             'description': 'Posted by Bjergsen', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|         content = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'R\.bindContent\(({.+?})\);', webpage, | ||||
|                 'content'), video_id)['content'] | ||||
|         mpd_url, sources = re.search( | ||||
|             r'(?s)<video[^>]+data-mpd="([^"]+)"[^>]*>(.+?)</video>', | ||||
|             content).groups() | ||||
|         formats = self._extract_mpd_formats( | ||||
|             self._proto_relative_url(mpd_url), video_id, mpd_id='DASH') | ||||
|         for format_id, height, format_url in re.findall(r'<source\s+res="((\d+)h?)"\s+src="([^"]+)"', sources): | ||||
|             formats.append({ | ||||
|                 'url': self._proto_relative_url(format_url), | ||||
|                 'format_id': 'http-' + format_id, | ||||
|                 'height': int_or_none(height), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -20,7 +20,6 @@ from ..utils import ( | ||||
|     int_or_none, | ||||
|     sanitized_Request, | ||||
|     unsmuggle_url, | ||||
|     url_basename, | ||||
|     xpath_with_ns, | ||||
| ) | ||||
|  | ||||
| @@ -283,8 +282,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE): | ||||
|         first_video_id = None | ||||
|         duration = None | ||||
|         for item in entry['media$content']: | ||||
|             smil_url = item['plfile$url'] + '&format=SMIL&Tracking=true&Embedded=true&formats=MPEG4,F4M' | ||||
|             cur_video_id = url_basename(smil_url) | ||||
|             smil_url = item['plfile$url'] + '&format=SMIL&mbr=true' | ||||
|             cur_video_id = ThePlatformIE._match_id(smil_url) | ||||
|             if first_video_id is None: | ||||
|                 first_video_id = cur_video_id | ||||
|                 duration = float_or_none(item.get('plfile$duration')) | ||||
|   | ||||
| @@ -1,6 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
| @@ -12,10 +16,10 @@ class ViddlerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?viddler\.com/(?:v|embed|player)/(?P<id>[a-z0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.viddler.com/v/43903784', | ||||
|         'md5': 'ae43ad7cb59431ce043f0ff7fa13cbf4', | ||||
|         'md5': '9eee21161d2c7f5b39690c3e325fab2f', | ||||
|         'info_dict': { | ||||
|             'id': '43903784', | ||||
|             'ext': 'mp4', | ||||
|             'ext': 'mov', | ||||
|             'title': 'Video Made Easy', | ||||
|             'description': 'md5:6a697ebd844ff3093bd2e82c37b409cd', | ||||
|             'uploader': 'viddler', | ||||
| @@ -29,10 +33,10 @@ class ViddlerIE(InfoExtractor): | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.viddler.com/v/4d03aad9/', | ||||
|         'md5': 'faa71fbf70c0bee7ab93076fd007f4b0', | ||||
|         'md5': 'f12c5a7fa839c47a79363bfdf69404fb', | ||||
|         'info_dict': { | ||||
|             'id': '4d03aad9', | ||||
|             'ext': 'mp4', | ||||
|             'ext': 'ts', | ||||
|             'title': 'WALL-TO-GORTAT', | ||||
|             'upload_date': '20150126', | ||||
|             'uploader': 'deadspin', | ||||
| @@ -42,10 +46,10 @@ class ViddlerIE(InfoExtractor): | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.viddler.com/player/221ebbbd/0/', | ||||
|         'md5': '0defa2bd0ea613d14a6e9bd1db6be326', | ||||
|         'md5': '740511f61d3d1bb71dc14a0fe01a1c10', | ||||
|         'info_dict': { | ||||
|             'id': '221ebbbd', | ||||
|             'ext': 'mp4', | ||||
|             'ext': 'mov', | ||||
|             'title': 'LETeens-Grammar-snack-third-conditional', | ||||
|             'description': ' ', | ||||
|             'upload_date': '20140929', | ||||
| @@ -54,16 +58,42 @@ class ViddlerIE(InfoExtractor): | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         # secret protected | ||||
|         'url': 'http://www.viddler.com/v/890c0985?secret=34051570', | ||||
|         'info_dict': { | ||||
|             'id': '890c0985', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Complete Property Training - Traineeships', | ||||
|             'description': ' ', | ||||
|             'upload_date': '20130606', | ||||
|             'uploader': 'TiffanyBowtell', | ||||
|             'timestamp': 1370496993, | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         json_url = ( | ||||
|             'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?video_id=%s&key=v0vhrt7bg2xq1vyxhkct' % | ||||
|             video_id) | ||||
|         query = { | ||||
|             'video_id': video_id, | ||||
|             'key': 'v0vhrt7bg2xq1vyxhkct', | ||||
|         } | ||||
|  | ||||
|         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) | ||||
|         secret = qs.get('secret', [None])[0] | ||||
|         if secret: | ||||
|             query['secret'] = secret | ||||
|  | ||||
|         headers = {'Referer': 'http://static.cdn-ec.viddler.com/js/arpeggio/v2/embed.html'} | ||||
|         request = sanitized_Request(json_url, None, headers) | ||||
|         request = sanitized_Request( | ||||
|             'http://api.viddler.com/api/v2/viddler.videos.getPlaybackDetails.json?%s' | ||||
|             % compat_urllib_parse.urlencode(query), None, headers) | ||||
|         data = self._download_json(request, video_id)['video'] | ||||
|  | ||||
|         formats = [] | ||||
|   | ||||
| @@ -57,7 +57,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): | ||||
|  | ||||
|     def _extract_xsrft_and_vuid(self, webpage): | ||||
|         xsrft = self._search_regex( | ||||
|             r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)', | ||||
|             r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)', | ||||
|             webpage, 'login token', group='xsrft') | ||||
|         vuid = self._search_regex( | ||||
|             r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1', | ||||
|   | ||||
| @@ -229,6 +229,9 @@ class YoukuIE(InfoExtractor): | ||||
|             if error_note is not None and '因版权原因无法观看此视频' in error_note: | ||||
|                 raise ExtractorError( | ||||
|                     'Youku said: Sorry, this video is available in China only', expected=True) | ||||
|             elif error_note and '该视频被设为私密' in error_note: | ||||
|                 raise ExtractorError( | ||||
|                     'Youku said: Sorry, this video is private', expected=True) | ||||
|             else: | ||||
|                 msg = 'Youku server reported error %i' % error.get('code') | ||||
|                 if error_note is not None: | ||||
|   | ||||
| @@ -375,7 +375,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|     IE_NAME = 'youtube' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9', | ||||
|             'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9', | ||||
|             'info_dict': { | ||||
|                 'id': 'BaW_jenozKc', | ||||
|                 'ext': 'mp4', | ||||
| @@ -441,7 +441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&v=UxxajLWwzqY', | ||||
|             'url': 'http://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY', | ||||
|             'note': 'Use the first video ID in the URL', | ||||
|             'info_dict': { | ||||
|                 'id': 'BaW_jenozKc', | ||||
| @@ -704,6 +704,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # Multifeed video with comma in title (see https://github.com/rg3/youtube-dl/issues/8536) | ||||
|             'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo', | ||||
|             'info_dict': { | ||||
|                 'id': 'gVfLd0zydlo', | ||||
|                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30', | ||||
|             }, | ||||
|             'playlist_count': 2, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vid.plus/FlRa-iH7PGw', | ||||
|             'only_matching': True, | ||||
| @@ -1196,9 +1205,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             if not self._downloader.params.get('noplaylist'): | ||||
|                 entries = [] | ||||
|                 feed_ids = [] | ||||
|                 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0]) | ||||
|                 multifeed_metadata_list = video_info['multifeed_metadata_list'][0] | ||||
|                 for feed in multifeed_metadata_list.split(','): | ||||
|                     feed_data = compat_parse_qs(feed) | ||||
|                     # Unquote should take place before split on comma (,) since textual | ||||
|                     # fields may contain comma as well (see | ||||
|                     # https://github.com/rg3/youtube-dl/issues/8536) | ||||
|                     feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed)) | ||||
|                     entries.append({ | ||||
|                         '_type': 'url_transparent', | ||||
|                         'ie_key': 'Youtube', | ||||
|   | ||||
| @@ -56,7 +56,7 @@ from .compat import ( | ||||
| compiled_regex_type = type(re.compile('')) | ||||
|  | ||||
| std_headers = { | ||||
|     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)', | ||||
|     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)', | ||||
|     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', | ||||
|     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | ||||
|     'Accept-Encoding': 'gzip, deflate', | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __version__ = '2016.02.09' | ||||
| __version__ = '2016.02.13' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user