mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			65 Commits
		
	
	
		
			2015.02.19
			...
			2015.02.23
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | bd61a9e770 | ||
|  | 3438e7acd2 | ||
|  | 09c200acf2 | ||
|  | 716889cab1 | ||
|  | 409693984f | ||
|  | 04e8c11080 | ||
|  | 80af2b73ab | ||
|  | 3cc57f9645 | ||
|  | a65d4e7f14 | ||
|  | 543ec2136b | ||
|  | 93b5071f73 | ||
|  | ddc369f073 | ||
|  | fcc3e6138b | ||
|  | 9fe6ef7ab2 | ||
|  | c010af6f19 | ||
|  | 35b7982303 | ||
|  | f311cfa231 | ||
|  | e086e0eb6c | ||
|  | 314368c822 | ||
|  | c5181ab410 | ||
|  | ea5152cae1 | ||
|  | 255fca5eea | ||
|  | 4aeccadf4e | ||
|  | 93540ee10e | ||
|  | 8fb3ac3649 | ||
|  | 77b2986b5b | ||
|  | 62b013df0d | ||
|  | fad6768bd1 | ||
|  | a78125f925 | ||
|  | a00a8bcc8a | ||
|  | 1e9a9e167d | ||
|  | 3da0db62e6 | ||
|  | e14ced7918 | ||
|  | ab9d02f53b | ||
|  | a461a11989 | ||
|  | 1bd838608f | ||
|  | 365577f567 | ||
|  | 50efb383f0 | ||
|  | 5da6bd0083 | ||
|  | 5e9a033e6e | ||
|  | dd0a58f5f0 | ||
|  | a21420389e | ||
|  | 6140baf4e1 | ||
|  | 8fc642eb5b | ||
|  | e66e1a0046 | ||
|  | d5c69f1da4 | ||
|  | 5c8a3f862a | ||
|  | a3b9157f49 | ||
|  | b88ba05356 | ||
|  | b74d505577 | ||
|  | 9e2d7dca87 | ||
|  | d236b37ac9 | ||
|  | e880c66bd8 | ||
|  | 383456aa29 | ||
|  | 1a13940c8d | ||
|  | 3d54788495 | ||
|  | 71d53ace2f | ||
|  | f37e3f99f0 | ||
|  | bd03ffc16e | ||
|  | 1ac1af9b47 | ||
|  | 3bf5705316 | ||
|  | 1c2528c8a3 | ||
|  | 7bd15b1a03 | ||
|  | 6b961a85fd | ||
|  | 7707004043 | 
							
								
								
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -111,3 +111,4 @@ Paul Hartmann | ||||
| Frans de Jonge | ||||
| Robin de Rooij | ||||
| Ryan Schmidt | ||||
| Leslie P. Polzer | ||||
|   | ||||
							
								
								
									
										2
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,7 +1,7 @@ | ||||
| all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites | ||||
|  | ||||
| clean: | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe | ||||
|  | ||||
| PREFIX ?= /usr/local | ||||
| BINDIR ?= $(PREFIX)/bin | ||||
|   | ||||
| @@ -571,7 +571,7 @@ Support requests for services that **do** purchase the rights to distribute thei | ||||
|  | ||||
| ### How can I detect whether a given URL is supported by youtube-dl? | ||||
|  | ||||
| For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. | ||||
| For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. | ||||
|  | ||||
| It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. | ||||
|  | ||||
|   | ||||
| @@ -45,12 +45,12 @@ for test in get_testcases(): | ||||
|  | ||||
|         RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) | ||||
|  | ||||
|     if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] | ||||
|                    or test['info_dict']['age_limit'] != 18): | ||||
|     if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or | ||||
|                    test['info_dict']['age_limit'] != 18): | ||||
|         print('\nPotential missing age_limit check: {0}'.format(test['name'])) | ||||
|  | ||||
|     elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] | ||||
|                          and test['info_dict']['age_limit'] == 18): | ||||
|     elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and | ||||
|                          test['info_dict']['age_limit'] == 18): | ||||
|         print('\nPotential false negative: {0}'.format(test['name'])) | ||||
|  | ||||
|     else: | ||||
|   | ||||
| @@ -68,9 +68,12 @@ | ||||
|  - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv | ||||
|  - **CBS** | ||||
|  - **CBSNews**: CBS News | ||||
|  - **CBSSports** | ||||
|  - **CeskaTelevize** | ||||
|  - **channel9**: Channel 9 | ||||
|  - **Chilloutzone** | ||||
|  - **chirbit** | ||||
|  - **chirbit:profile** | ||||
|  - **Cinchcast** | ||||
|  - **Cinemassacre** | ||||
|  - **clipfish** | ||||
| @@ -121,6 +124,7 @@ | ||||
|  - **EllenTV** | ||||
|  - **EllenTV:clips** | ||||
|  - **ElPais**: El País | ||||
|  - **Embedly** | ||||
|  - **EMPFlix** | ||||
|  - **Engadget** | ||||
|  - **Eporner** | ||||
| @@ -190,6 +194,7 @@ | ||||
|  - **ign.com** | ||||
|  - **imdb**: Internet Movie Database trailers | ||||
|  - **imdb:list**: Internet Movie Database lists | ||||
|  - **Imgur** | ||||
|  - **Ina** | ||||
|  - **InfoQ** | ||||
|  - **Instagram** | ||||
| @@ -262,6 +267,7 @@ | ||||
|  - **myvideo** | ||||
|  - **MyVidster** | ||||
|  - **n-tv.de** | ||||
|  - **NationalGeographic** | ||||
|  - **Naver** | ||||
|  - **NBA** | ||||
|  - **NBC** | ||||
| @@ -319,12 +325,14 @@ | ||||
|  - **podomatic** | ||||
|  - **PornHd** | ||||
|  - **PornHub** | ||||
|  - **PornHubPlaylist** | ||||
|  - **Pornotube** | ||||
|  - **PornoXO** | ||||
|  - **PromptFile** | ||||
|  - **prosiebensat1**: ProSiebenSat.1 Digital | ||||
|  - **Pyvideo** | ||||
|  - **QuickVid** | ||||
|  - **R7** | ||||
|  - **radio.de** | ||||
|  - **radiobremen** | ||||
|  - **radiofrance** | ||||
| @@ -380,7 +388,8 @@ | ||||
|  - **soundcloud:playlist** | ||||
|  - **soundcloud:set** | ||||
|  - **soundcloud:user** | ||||
|  - **Soundgasm** | ||||
|  - **soundgasm** | ||||
|  - **soundgasm:profile** | ||||
|  - **southpark.cc.com** | ||||
|  - **southpark.de** | ||||
|  - **Space** | ||||
| @@ -446,6 +455,7 @@ | ||||
|  - **Turbo** | ||||
|  - **Tutv** | ||||
|  - **tv.dfb.de** | ||||
|  - **TV4**: tv4.se and tv4play.se | ||||
|  - **tvigle**: Интернет-телевидение Tvigle.ru | ||||
|  - **tvp.pl** | ||||
|  - **tvp.pl:Series** | ||||
| @@ -553,6 +563,7 @@ | ||||
|  - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) | ||||
|  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) | ||||
|  - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) | ||||
|  - **Zapiks** | ||||
|  - **ZDF** | ||||
|  - **ZDFChannel** | ||||
|  - **zingmp3:album**: mp3.zing.vn albums | ||||
|   | ||||
| @@ -34,8 +34,8 @@ def _make_testfunc(testfile): | ||||
|     def test_func(self): | ||||
|         as_file = os.path.join(TEST_DIR, testfile) | ||||
|         swf_file = os.path.join(TEST_DIR, test_id + '.swf') | ||||
|         if ((not os.path.exists(swf_file)) | ||||
|                 or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): | ||||
|         if ((not os.path.exists(swf_file)) or | ||||
|                 os.path.getmtime(swf_file) < os.path.getmtime(as_file)): | ||||
|             # Recompile | ||||
|             try: | ||||
|                 subprocess.check_call([ | ||||
|   | ||||
| @@ -308,8 +308,8 @@ class YoutubeDL(object): | ||||
|                     raise | ||||
|  | ||||
|         if (sys.version_info >= (3,) and sys.platform != 'win32' and | ||||
|                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] | ||||
|                 and not params.get('restrictfilenames', False)): | ||||
|                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and | ||||
|                 not params.get('restrictfilenames', False)): | ||||
|             # On Python 3, the Unicode filesystem API will throw errors (#1474) | ||||
|             self.report_warning( | ||||
|                 'Assuming --restrict-filenames since file system encoding ' | ||||
| @@ -1366,8 +1366,8 @@ class YoutubeDL(object): | ||||
|         """Download a given list of URLs.""" | ||||
|         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) | ||||
|         if (len(url_list) > 1 and | ||||
|                 '%' not in outtmpl | ||||
|                 and self.params.get('max_downloads') != 1): | ||||
|                 '%' not in outtmpl and | ||||
|                 self.params.get('max_downloads') != 1): | ||||
|             raise SameFileError(outtmpl) | ||||
|  | ||||
|         for url in url_list: | ||||
|   | ||||
| @@ -189,14 +189,14 @@ def _real_main(argv=None): | ||||
|         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) | ||||
|         if opts.outtmpl is not None: | ||||
|             opts.outtmpl = opts.outtmpl.decode(preferredencoding()) | ||||
|     outtmpl = ((opts.outtmpl is not None and opts.outtmpl) | ||||
|                or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') | ||||
|                or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') | ||||
|                or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') | ||||
|                or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') | ||||
|                or (opts.useid and '%(id)s.%(ext)s') | ||||
|                or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') | ||||
|                or DEFAULT_OUTTMPL) | ||||
|     outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or | ||||
|                (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or | ||||
|                (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or | ||||
|                (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or | ||||
|                (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or | ||||
|                (opts.useid and '%(id)s.%(ext)s') or | ||||
|                (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or | ||||
|                DEFAULT_OUTTMPL) | ||||
|     if not os.path.splitext(outtmpl)[1] and opts.extractaudio: | ||||
|         parser.error('Cannot download a video and extract audio into the same' | ||||
|                      ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' | ||||
|   | ||||
| @@ -311,14 +311,14 @@ class FileDownloader(object): | ||||
|         """ | ||||
|  | ||||
|         nooverwrites_and_exists = ( | ||||
|             self.params.get('nooverwrites', False) | ||||
|             and os.path.exists(encodeFilename(filename)) | ||||
|             self.params.get('nooverwrites', False) and | ||||
|             os.path.exists(encodeFilename(filename)) | ||||
|         ) | ||||
|  | ||||
|         continuedl_and_exists = ( | ||||
|             self.params.get('continuedl', False) | ||||
|             and os.path.isfile(encodeFilename(filename)) | ||||
|             and not self.params.get('nopart', False) | ||||
|             self.params.get('continuedl', False) and | ||||
|             os.path.isfile(encodeFilename(filename)) and | ||||
|             not self.params.get('nopart', False) | ||||
|         ) | ||||
|  | ||||
|         # Check file already present | ||||
|   | ||||
| @@ -325,8 +325,8 @@ class F4mFD(FileDownloader): | ||||
|                 state['frag_index'] += 1 | ||||
|  | ||||
|             estimated_size = ( | ||||
|                 (state['downloaded_bytes'] + frag_total_bytes) | ||||
|                 / (state['frag_index'] + 1) * total_frags) | ||||
|                 (state['downloaded_bytes'] + frag_total_bytes) / | ||||
|                 (state['frag_index'] + 1) * total_frags) | ||||
|             time_now = time.time() | ||||
|             state['total_bytes_estimate'] = estimated_size | ||||
|             state['elapsed'] = time_now - start | ||||
|   | ||||
| @@ -58,10 +58,15 @@ from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .cbs import CBSIE | ||||
| from .cbsnews import CBSNewsIE | ||||
| from .cbssports import CBSSportsIE | ||||
| from .ccc import CCCIE | ||||
| from .ceskatelevize import CeskaTelevizeIE | ||||
| from .channel9 import Channel9IE | ||||
| from .chilloutzone import ChilloutzoneIE | ||||
| from .chirbit import ( | ||||
|     ChirbitIE, | ||||
|     ChirbitProfileIE, | ||||
| ) | ||||
| from .cinchcast import CinchcastIE | ||||
| from .clipfish import ClipfishIE | ||||
| from .cliphunter import CliphunterIE | ||||
| @@ -121,6 +126,7 @@ from .ellentv import ( | ||||
|     EllenTVClipsIE, | ||||
| ) | ||||
| from .elpais import ElPaisIE | ||||
| from .embedly import EmbedlyIE | ||||
| from .empflix import EMPFlixIE | ||||
| from .engadget import EngadgetIE | ||||
| from .eporner import EpornerIE | ||||
| @@ -204,6 +210,7 @@ from .imdb import ( | ||||
|     ImdbIE, | ||||
|     ImdbListIE | ||||
| ) | ||||
| from .imgur import ImgurIE | ||||
| from .ina import InaIE | ||||
| from .infoq import InfoQIE | ||||
| from .instagram import InstagramIE, InstagramUserIE | ||||
| @@ -282,6 +289,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE | ||||
| from .myspass import MySpassIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .myvidster import MyVidsterIE | ||||
| from .nationalgeographic import NationalGeographicIE | ||||
| from .naver import NaverIE | ||||
| from .nba import NBAIE | ||||
| from .nbc import ( | ||||
| @@ -350,13 +358,17 @@ from .playfm import PlayFMIE | ||||
| from .playvid import PlayvidIE | ||||
| from .podomatic import PodomaticIE | ||||
| from .pornhd import PornHdIE | ||||
| from .pornhub import PornHubIE | ||||
| from .pornhub import ( | ||||
|     PornHubIE, | ||||
|     PornHubPlaylistIE, | ||||
| ) | ||||
| from .pornotube import PornotubeIE | ||||
| from .pornoxo import PornoXOIE | ||||
| from .promptfile import PromptFileIE | ||||
| from .prosiebensat1 import ProSiebenSat1IE | ||||
| from .pyvideo import PyvideoIE | ||||
| from .quickvid import QuickVidIE | ||||
| from .r7 import R7IE | ||||
| from .radiode import RadioDeIE | ||||
| from .radiobremen import RadioBremenIE | ||||
| from .radiofrance import RadioFranceIE | ||||
| @@ -417,7 +429,10 @@ from .soundcloud import ( | ||||
|     SoundcloudUserIE, | ||||
|     SoundcloudPlaylistIE | ||||
| ) | ||||
| from .soundgasm import SoundgasmIE | ||||
| from .soundgasm import ( | ||||
|     SoundgasmIE, | ||||
|     SoundgasmProfileIE | ||||
| ) | ||||
| from .southpark import ( | ||||
|     SouthParkIE, | ||||
|     SouthparkDeIE, | ||||
| @@ -483,6 +498,7 @@ from .tumblr import TumblrIE | ||||
| from .tunein import TuneInIE | ||||
| from .turbo import TurboIE | ||||
| from .tutv import TutvIE | ||||
| from .tv4 import TV4IE | ||||
| from .tvigle import TvigleIE | ||||
| from .tvp import TvpIE, TvpSeriesIE | ||||
| from .tvplay import TVPlayIE | ||||
| @@ -604,6 +620,7 @@ from .youtube import ( | ||||
|     YoutubeUserIE, | ||||
|     YoutubeWatchLaterIE, | ||||
| ) | ||||
| from .zapiks import ZapiksIE | ||||
| from .zdf import ZDFIE, ZDFChannelIE | ||||
| from .zingmp3 import ( | ||||
|     ZingMp3SongIE, | ||||
|   | ||||
| @@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         player = self._parse_json( | ||||
| @@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor): | ||||
|             self._html_search_meta('datepublished', webpage, 'upload date')) | ||||
|  | ||||
|         duration = parse_duration( | ||||
|             self._html_search_meta('duration', webpage, 'duration') | ||||
|             or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration')) | ||||
|             self._html_search_meta('duration', webpage, 'duration') or | ||||
|             self._search_regex( | ||||
|                 r'Runtime:\s*(\d{2}:\d{2}:\d{2})', | ||||
|                 webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>', | ||||
|   | ||||
| @@ -11,8 +11,8 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class AppleTrailersIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         "url": "http://trailers.apple.com/trailers/wb/manofsteel/", | ||||
|         'info_dict': { | ||||
|             'id': 'manofsteel', | ||||
| @@ -63,7 +63,10 @@ class AppleTrailersIE(InfoExtractor): | ||||
|                 }, | ||||
|             }, | ||||
|         ] | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/ca/metropole/autrui/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _JSON_RE = r'iTunes.playURL\((.*?)\);' | ||||
|  | ||||
|   | ||||
| @@ -1,40 +1,35 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import remove_start | ||||
| from ..utils import ( | ||||
|     remove_start, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BlinkxIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' | ||||
|     _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' | ||||
|     IE_NAME = 'blinkx' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB', | ||||
|         'md5': '2e9a07364af40163a908edbf10bb2492', | ||||
|         'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', | ||||
|         'md5': '337cf7a344663ec79bf93a526a2e06c7', | ||||
|         'info_dict': { | ||||
|             'id': '8aQUy7GV', | ||||
|             'id': 'Da0Gw3xc', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Police Car Rolls Away', | ||||
|             'uploader': 'stupidvideos.com', | ||||
|             'upload_date': '20131215', | ||||
|             'timestamp': 1387068000, | ||||
|             'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!', | ||||
|             'duration': 14.886, | ||||
|             'thumbnails': [{ | ||||
|                 'width': 100, | ||||
|                 'height': 76, | ||||
|                 'resolution': '100x76', | ||||
|                 'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg', | ||||
|             }], | ||||
|             'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', | ||||
|             'uploader': 'IGN News', | ||||
|             'upload_date': '20150217', | ||||
|             'timestamp': 1424215740, | ||||
|             'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', | ||||
|             'duration': 47.743333, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, rl): | ||||
|         m = re.match(self._VALID_URL, rl) | ||||
|         video_id = m.group('id') | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         display_id = video_id[:8] | ||||
|  | ||||
|         api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + | ||||
| @@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor): | ||||
|             elif m['type'] in ('flv', 'mp4'): | ||||
|                 vcodec = remove_start(m['vcodec'], 'ff') | ||||
|                 acodec = remove_start(m['acodec'], 'ff') | ||||
|                 tbr = (int(m['vbr']) + int(m['abr'])) // 1000 | ||||
|                 vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) | ||||
|                 abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) | ||||
|                 tbr = vbr + abr if vbr and abr else None | ||||
|                 format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'url': m['link'], | ||||
|                     'vcodec': vcodec, | ||||
|                     'acodec': acodec, | ||||
|                     'abr': int(m['abr']) // 1000, | ||||
|                     'vbr': int(m['vbr']) // 1000, | ||||
|                     'abr': abr, | ||||
|                     'vbr': vbr, | ||||
|                     'tbr': tbr, | ||||
|                     'width': int(m['w']), | ||||
|                     'height': int(m['h']), | ||||
|                     'width': int_or_none(m.get('w')), | ||||
|                     'height': int_or_none(m.get('h')), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -39,8 +37,7 @@ class CBSIE(InfoExtractor): | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         real_id = self._search_regex( | ||||
|             r"video\.settings\.pid\s*=\s*'([^']+)';", | ||||
|   | ||||
							
								
								
									
										30
									
								
								youtube_dl/extractor/cbssports.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								youtube_dl/extractor/cbssports.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CBSSportsIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', | ||||
|         'info_dict': { | ||||
|             'id': '_d5_GbO8p1sT', | ||||
|             'ext': 'flv', | ||||
|             'title': 'US Open flashbacks: 1990s', | ||||
|             'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         section = mobj.group('section') | ||||
|         video_id = mobj.group('id') | ||||
|         all_videos = self._download_json( | ||||
|             'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section, | ||||
|             video_id) | ||||
|         # The json file contains the info of all the videos in the section | ||||
|         video_info = next(v for v in all_videos if v['pcid'] == video_id) | ||||
|         return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform') | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/chirbit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/chirbit.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ChirbitIE(InfoExtractor): | ||||
|     IE_NAME = 'chirbit' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://chirb.it/PrIPv5', | ||||
|         'md5': '9847b0dad6ac3e074568bf2cfb197de8', | ||||
|         'info_dict': { | ||||
|             'id': 'PrIPv5', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Фасадстрой', | ||||
|             'duration': 52, | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         audio_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://chirb.it/%s' % audio_id, audio_id) | ||||
|  | ||||
|         audio_url = self._search_regex( | ||||
|             r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'itemprop="name">([^<]+)', webpage, 'title') | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, 'duration', fatal=False)) | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'itemprop="playCount"\s*>(\d+)', webpage, | ||||
|             'listen count', fatal=False)) | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'>(\d+) Comments?:', webpage, | ||||
|             'comment count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': audio_id, | ||||
|             'url': audio_url, | ||||
|             'title': title, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ChirbitProfileIE(InfoExtractor): | ||||
|     IE_NAME = 'chirbit:profile' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://chirbit.com/ScarletBeauty', | ||||
|         'info_dict': { | ||||
|             'id': 'ScarletBeauty', | ||||
|             'title': 'Chirbits by ScarletBeauty', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         profile_id = self._match_id(url) | ||||
|  | ||||
|         rss = self._download_xml( | ||||
|             'http://chirbit.com/rss/%s' % profile_id, profile_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result(audio_url.text, 'Chirbit') | ||||
|             for audio_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         title = rss.find('./channel/title').text | ||||
|  | ||||
|         return self.playlist_result(entries, profile_id, title) | ||||
| @@ -391,6 +391,16 @@ class InfoExtractor(object): | ||||
|             if blocked_iframe: | ||||
|                 msg += ' Visit %s for more details' % blocked_iframe | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|         if '<title>The URL you requested has been blocked</title>' in content[:512]: | ||||
|             msg = ( | ||||
|                 'Access to this webpage has been blocked by Indian censorship. ' | ||||
|                 'Use a VPN or proxy server (with --proxy) to route around it.') | ||||
|             block_msg = self._html_search_regex( | ||||
|                 r'</h1><p>(.*?)</p>', | ||||
|                 content, 'block message', default=None) | ||||
|             if block_msg: | ||||
|                 msg += ' (Message: "%s")' % block_msg.replace('\n', ' ') | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|         return content | ||||
|  | ||||
| @@ -798,8 +808,8 @@ class InfoExtractor(object): | ||||
|             media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') | ||||
|         for i, media_el in enumerate(media_nodes): | ||||
|             if manifest_version == '2.0': | ||||
|                 manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' | ||||
|                                 + (media_el.attrib.get('href') or media_el.attrib.get('url'))) | ||||
|                 manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' + | ||||
|                                 (media_el.attrib.get('href') or media_el.attrib.get('url'))) | ||||
|             tbr = int_or_none(media_el.attrib.get('bitrate')) | ||||
|             formats.append({ | ||||
|                 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), | ||||
| @@ -823,7 +833,7 @@ class InfoExtractor(object): | ||||
|             'url': m3u8_url, | ||||
|             'ext': ext, | ||||
|             'protocol': 'm3u8', | ||||
|             'preference': -1, | ||||
|             'preference': preference - 1 if preference else -1, | ||||
|             'resolution': 'multiple', | ||||
|             'format_note': 'Quality selection URL', | ||||
|         }] | ||||
|   | ||||
| @@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor): | ||||
|             r"flashvars.pvg_id=\"(\d+)\";", | ||||
|             webpage, 'ID') | ||||
|  | ||||
|         json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' | ||||
|                     + video_id) | ||||
|         json_url = ( | ||||
|             'http://static.videos.gouv.fr/brightcovehub/export/json/%s' % | ||||
|             video_id) | ||||
|         info = self._download_json(json_url, title, 'Downloading JSON config') | ||||
|         video_url = info['renditions'][0]['url'] | ||||
|  | ||||
|   | ||||
							
								
								
									
										16
									
								
								youtube_dl/extractor/embedly.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								youtube_dl/extractor/embedly.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
|  | ||||
|  | ||||
| class EmbedlyIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) | ||||
| @@ -22,6 +22,7 @@ class EscapistIE(InfoExtractor): | ||||
|             'uploader_id': 'the-escapist-presents', | ||||
|             'uploader': 'The Escapist Presents', | ||||
|             'title': "Breaking Down Baldur's Gate", | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -30,19 +31,18 @@ class EscapistIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r"<h1 class='headline'><a href='/videos/view/(.*?)'", | ||||
|             r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'", | ||||
|             webpage, 'uploader ID', fatal=False) | ||||
|         uploader = self._html_search_regex( | ||||
|             r"<h1 class='headline'>(.*?)</a>", | ||||
|             r"<h1\s+class='headline'>(.*?)</a>", | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         raw_title = self._html_search_meta('title', webpage, fatal=True) | ||||
|         title = raw_title.partition(' : ')[2] | ||||
|  | ||||
|         player_url = self._og_search_video_url(webpage, name='player URL') | ||||
|         config_url = compat_urllib_parse.unquote(self._search_regex( | ||||
|             r'config=(.*)$', player_url, 'config URL')) | ||||
|         config_url = compat_urllib_parse.unquote(self._html_search_regex( | ||||
|             r'<param\s+name="flashvars"\s+value="config=([^"&]+)', webpage, 'config URL')) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
| @@ -81,5 +81,4 @@ class EscapistIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': description, | ||||
|             'player_url': player_url, | ||||
|         } | ||||
|   | ||||
| @@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor): | ||||
|     IE_NAME = '5min' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=| | ||||
|             https?://(?:(?:massively|www)\.)?joystiq\.com/video/| | ||||
|             5min:) | ||||
|         (?P<id>\d+) | ||||
|         ''' | ||||
|   | ||||
| @@ -7,6 +7,7 @@ from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from ..utils import remove_end | ||||
|  | ||||
|  | ||||
| class GDCVaultIE(InfoExtractor): | ||||
| @@ -68,7 +69,9 @@ class GDCVaultIE(InfoExtractor): | ||||
|         akami_url = xml_description.find('./metadata/akamaiHost').text | ||||
|         slide_video_path = xml_description.find('./metadata/slideVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + slide_video_path, | ||||
|             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st', | ||||
|             'play_path': remove_end(slide_video_path, '.flv'), | ||||
|             'ext': 'flv', | ||||
|             'format_note': 'slide deck video', | ||||
|             'quality': -2, | ||||
|             'preference': -2, | ||||
| @@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor): | ||||
|         }) | ||||
|         speaker_video_path = xml_description.find('./metadata/speakerVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + speaker_video_path, | ||||
|             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % 'fms.digitallyspeaking.com/cfx/st', | ||||
|             'play_path': remove_end(speaker_video_path, '.flv'), | ||||
|             'ext': 'flv', | ||||
|             'format_note': 'speaker video', | ||||
|             'quality': -1, | ||||
|             'preference': -1, | ||||
|   | ||||
| @@ -532,7 +532,7 @@ class GenericIE(InfoExtractor): | ||||
|             'info_dict': { | ||||
|                 'id': 'Mrj4DVp2zeA', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20150204', | ||||
|                 'upload_date': '20150212', | ||||
|                 'uploader': 'The National Archives UK', | ||||
|                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', | ||||
|                 'uploader_id': 'NationalArchives08', | ||||
| @@ -547,7 +547,16 @@ class GenericIE(InfoExtractor): | ||||
|                 'id': 'aanslagen-kopenhagen', | ||||
|                 'title': 'Aanslagen Kopenhagen | RTL Nieuws', | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|         # Zapiks embed | ||||
|         { | ||||
|             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html', | ||||
|             'info_dict': { | ||||
|                 'id': '118046', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
| @@ -1098,6 +1107,12 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Livestream') | ||||
|  | ||||
|         # Look for Zapiks embed | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Zapiks') | ||||
|  | ||||
|         def check_video(vurl): | ||||
|             if YoutubeIE.suitable(vurl): | ||||
|                 return True | ||||
|   | ||||
							
								
								
									
										97
									
								
								youtube_dl/extractor/imgur.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								youtube_dl/extractor/imgur.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     mimetype2ext, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ImgurIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://i.imgur.com/A61SaA1.gifv', | ||||
|         'info_dict': { | ||||
|             'id': 'A61SaA1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', | ||||
|             'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://imgur.com/A61SaA1', | ||||
|         'info_dict': { | ||||
|             'id': 'A61SaA1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', | ||||
|             'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         width = int_or_none(self._search_regex( | ||||
|             r'<param name="width" value="([0-9]+)"', | ||||
|             webpage, 'width', fatal=False)) | ||||
|         height = int_or_none(self._search_regex( | ||||
|             r'<param name="height" value="([0-9]+)"', | ||||
|             webpage, 'height', fatal=False)) | ||||
|  | ||||
|         video_elements = self._search_regex( | ||||
|             r'(?s)<div class="video-elements">(.*?)</div>', | ||||
|             webpage, 'video elements', default=None) | ||||
|         if not video_elements: | ||||
|             raise ExtractorError( | ||||
|                 'No sources found for video %s. Maybe an image?' % video_id, | ||||
|                 expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): | ||||
|             formats.append({ | ||||
|                 'format_id': m.group('type').partition('/')[2], | ||||
|                 'url': self._proto_relative_url(m.group('src')), | ||||
|                 'ext': mimetype2ext(m.group('type')), | ||||
|                 'acodec': 'none', | ||||
|                 'width': width, | ||||
|                 'height': height, | ||||
|                 'http_headers': { | ||||
|                     'User-Agent': 'youtube-dl (like wget)', | ||||
|                 }, | ||||
|             }) | ||||
|  | ||||
|         gif_json = self._search_regex( | ||||
|             r'(?s)var\s+videoItem\s*=\s*(\{.*?\})', | ||||
|             webpage, 'GIF code', fatal=False) | ||||
|         if gif_json: | ||||
|             gifd = self._parse_json( | ||||
|                 gif_json, video_id, transform_source=js_to_json) | ||||
|             formats.append({ | ||||
|                 'format_id': 'gif', | ||||
|                 'preference': -10, | ||||
|                 'width': width, | ||||
|                 'height': height, | ||||
|                 'ext': 'gif', | ||||
|                 'acodec': 'none', | ||||
|                 'vcodec': 'gif', | ||||
|                 'container': 'gif', | ||||
|                 'url': self._proto_relative_url(gifd['gifUrl']), | ||||
|                 'filesize': gifd.get('size'), | ||||
|                 'http_headers': { | ||||
|                     'User-Agent': 'youtube-dl (like wget)', | ||||
|                 }, | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'title': self._og_search_title(webpage), | ||||
|         } | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/nationalgeographic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/nationalgeographic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
|     url_basename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NationalGeographicIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo', | ||||
|         'info_dict': { | ||||
|             'id': '4DmDACA6Qtk_', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Mating Crabs Busted by Sharks', | ||||
|             'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         name = url_basename(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url') | ||||
|         guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid') | ||||
|  | ||||
|         feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name) | ||||
|         content = feed.find('.//{http://search.yahoo.com/mrss/}content') | ||||
|         theplatform_id = url_basename(content.attrib.get('url')) | ||||
|  | ||||
|         return self.url_result(smuggle_url( | ||||
|             'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id, | ||||
|             # For some reason, the normal links don't work and we must force the use of f4m | ||||
|             {'force_smil_url': True})) | ||||
| @@ -18,13 +18,13 @@ class NBCIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', | ||||
|             'url': 'http://www.nbc.com/the-tonight-show/segments/112966', | ||||
|             # md5 checksum is not stable | ||||
|             'info_dict': { | ||||
|                 'id': 'bTmnLCvIbaaH', | ||||
|                 'id': 'c9xnCo0YPOPH', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'I Am a Firefighter', | ||||
|                 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', | ||||
|                 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', | ||||
|                 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|   | ||||
| @@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor): | ||||
|             'timestamp': 1344858571, | ||||
|             'age_limit': 12, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'Download only works from Germany', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,9 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
| @@ -11,7 +8,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class PatreonIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.patreon.com/creation?hid=743933', | ||||
| @@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor): | ||||
|                 'thumbnail': 're:^https?://.*$', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://www.patreon.com/creation?hid=1682498', | ||||
|             'info_dict': { | ||||
|                 'id': 'SU4fj_aEMVw', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'I\'m on Patreon!', | ||||
|                 'uploader': 'TraciJHines', | ||||
|                 'thumbnail': 're:^https?://.*$', | ||||
|                 'upload_date': '20150211', | ||||
|                 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', | ||||
|                 'uploader_id': 'TraciJHines', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'noplaylist': True, | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     # Currently Patreon exposes download URL via hidden CSS, so login is not | ||||
| @@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor): | ||||
|     ''' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._og_search_title(webpage).strip() | ||||
|  | ||||
|         attach_fn = self._html_search_regex( | ||||
|             r'<div class="attach"><a target="_blank" href="([^"]+)">', | ||||
|             webpage, 'attachment URL', default=None) | ||||
|         embed = self._html_search_regex( | ||||
|             r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"', | ||||
|             webpage, 'embedded URL', default=None) | ||||
|  | ||||
|         if attach_fn is not None: | ||||
|             video_url = 'http://www.patreon.com' + attach_fn | ||||
|             thumbnail = self._og_search_thumbnail(webpage) | ||||
|             uploader = self._html_search_regex( | ||||
|                 r'<strong>(.*?)</strong> is creating', webpage, 'uploader') | ||||
|         elif embed is not None: | ||||
|             return self.url_result(embed) | ||||
|         else: | ||||
|             playlist_js = self._search_regex( | ||||
|             playlist = self._parse_json(self._search_regex( | ||||
|                 r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])', | ||||
|                 webpage, 'playlist JSON') | ||||
|             playlist_json = js_to_json(playlist_js) | ||||
|             playlist = json.loads(playlist_json) | ||||
|                 webpage, 'playlist JSON'), | ||||
|                 video_id, transform_source=js_to_json) | ||||
|             data = playlist[0] | ||||
|             video_url = self._proto_relative_url(data['mp3']) | ||||
|             thumbnail = self._proto_relative_url(data.get('cover')) | ||||
|   | ||||
| @@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor): | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<', | ||||
|             r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) | ||||
|         if thumbnail: | ||||
| @@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor): | ||||
|             'formats': formats, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class PornHubPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.pornhub.com/playlist/6201671', | ||||
|         'info_dict': { | ||||
|             'id': '6201671', | ||||
|             'title': 'P0p4', | ||||
|         }, | ||||
|         'playlist_mincount': 35, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') | ||||
|             for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage)) | ||||
|         ] | ||||
|  | ||||
|         playlist = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'), | ||||
|             playlist_id) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, playlist_id, playlist.get('title'), playlist.get('description')) | ||||
|   | ||||
							
								
								
									
										88
									
								
								youtube_dl/extractor/r7.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								youtube_dl/extractor/r7.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
|     unescapeHTML, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class R7IE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|                         (?: | ||||
|                             (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| | ||||
|                             noticias\.r7\.com(?:/[^/]+)+/[^/]+-| | ||||
|                             player\.r7\.com/video/i/ | ||||
|                         ) | ||||
|                         (?P<id>[\da-f]{24}) | ||||
|                         ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', | ||||
|         'md5': '403c4e393617e8e8ddc748978ee8efde', | ||||
|         'info_dict': { | ||||
|             'id': '54e7050b0cf2ff57e0279389', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 98, | ||||
|             'like_count': int, | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://player.r7.com/video/i/%s' % video_id, video_id) | ||||
|  | ||||
|         item = self._parse_json(js_to_json(self._search_regex( | ||||
|             r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id) | ||||
|  | ||||
|         title = unescapeHTML(item['title']) | ||||
|         thumbnail = item.get('init', {}).get('thumbUri') | ||||
|         duration = None | ||||
|  | ||||
|         statistics = item.get('statistics', {}) | ||||
|         like_count = int_or_none(statistics.get('likes')) | ||||
|         view_count = int_or_none(statistics.get('views')) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_key, format_dict in item['playlist'][0].items(): | ||||
|             src = format_dict.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             format_id = format_dict.get('format') or format_key | ||||
|             if duration is None: | ||||
|                 duration = format_dict.get('duration') | ||||
|             if '.f4m' in src: | ||||
|                 formats.extend(self._extract_f4m_formats(src, video_id, preference=-1)) | ||||
|             elif src.endswith('.m3u8'): | ||||
|                 formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': src, | ||||
|                     'format_id': format_id, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'like_count': like_count, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -6,6 +6,7 @@ import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     struct_unpack, | ||||
|     remove_end, | ||||
| @@ -96,12 +97,10 @@ class RTVEALaCartaIE(InfoExtractor): | ||||
|             ).replace('.net.rtve', '.multimedia.cdn.rtve') | ||||
|             video_path = self._download_webpage( | ||||
|                 auth_url, video_id, 'Getting video url') | ||||
|             # Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get | ||||
|             # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get | ||||
|             # the right Content-Length header and the mp4 format | ||||
|             video_url = ( | ||||
|                 'http://mvod.akcdn.rtve.es/{0}&v=2.6.8' | ||||
|                 '&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path) | ||||
|             ) | ||||
|             video_url = compat_urlparse.urljoin( | ||||
|                 'http://mvod1.akcdn.rtve.es/', video_path) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor): | ||||
|             'id': '437BE28B89D799D7', | ||||
|             'title': 'big_buck_bunny_720p_surround.avi', | ||||
|             'ext': 'avi', | ||||
|             'thumbnail': 're:^http://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor): | ||||
|             ''', webpage, 'hash') | ||||
|  | ||||
|         fields = { | ||||
|             "hash": confirm_hash, | ||||
|             "hash": confirm_hash.encode('utf-8'), | ||||
|             "confirm": "Continue as Free User" | ||||
|         } | ||||
|  | ||||
| @@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor): | ||||
|             webpage, 'title', default=None) | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'<img\s+src="([^"]*)".+?name="bg"', | ||||
|             webpage, 'thumbnail') | ||||
|             webpage, 'thumbnail', default=None) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|   | ||||
| @@ -7,6 +7,7 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SoundgasmIE(InfoExtractor): | ||||
|     IE_NAME = 'soundgasm' | ||||
|     _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://soundgasm.net/u/ytdl/Piano-sample', | ||||
| @@ -38,3 +39,25 @@ class SoundgasmIE(InfoExtractor): | ||||
|             'title': audio_title, | ||||
|             'description': description | ||||
|         } | ||||
|  | ||||
| class SoundgasmProfileIE(InfoExtractor): | ||||
|     IE_NAME = 'soundgasm:profile' | ||||
|     _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$' | ||||
|     _TEST = { | ||||
|         'url': 'http://soundgasm.net/u/ytdl', | ||||
|         'info_dict': { | ||||
|             'id': 'ytdl', | ||||
|         }, | ||||
|         'playlist_count': 1, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         profile_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, profile_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result(audio_url, 'Soundgasm') | ||||
|             for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)] | ||||
|  | ||||
|         return self.playlist_result(entries, profile_id) | ||||
|   | ||||
| @@ -1,8 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import qualities | ||||
|  | ||||
|  | ||||
| class TeamcocoIE(InfoExtractor): | ||||
| @@ -24,8 +26,8 @@ class TeamcocoIE(InfoExtractor): | ||||
|             'info_dict': { | ||||
|                 'id': '19705', | ||||
|                 'ext': 'mp4', | ||||
|                 "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", | ||||
|                 "title": "Louis C.K. Interview Pt. 1 11/3/11", | ||||
|                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', | ||||
|                 'title': 'Louis C.K. Interview Pt. 1 11/3/11', | ||||
|                 'age_limit': 0, | ||||
|             } | ||||
|         } | ||||
| @@ -42,42 +44,39 @@ class TeamcocoIE(InfoExtractor): | ||||
|         display_id = mobj.group('display_id') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = mobj.group("video_id") | ||||
|         video_id = mobj.group('video_id') | ||||
|         if not video_id: | ||||
|             video_id = self._html_search_regex( | ||||
|                 self._VIDEO_ID_REGEXES, webpage, 'video id') | ||||
|  | ||||
|         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id | ||||
|         data = self._download_xml( | ||||
|             data_url, display_id, 'Downloading data webpage') | ||||
|         embed_url = 'http://teamcoco.com/embed/v/%s' % video_id | ||||
|         embed = self._download_webpage( | ||||
|             embed_url, video_id, 'Downloading embed page') | ||||
|  | ||||
|         encoded_data = self._search_regex( | ||||
|             r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data') | ||||
|         data = self._parse_json( | ||||
|             base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id) | ||||
|  | ||||
|         qualities = ['500k', '480p', '1000k', '720p', '1080p'] | ||||
|         formats = [] | ||||
|         for filed in data.findall('files/file'): | ||||
|             if filed.attrib.get('playmode') == 'all': | ||||
|                 # it just duplicates one of the entries | ||||
|                 break | ||||
|             file_url = filed.text | ||||
|             m_format = re.search(r'(\d+(k|p))\.mp4', file_url) | ||||
|         get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) | ||||
|         for filed in data['files']: | ||||
|             m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) | ||||
|             if m_format is not None: | ||||
|                 format_id = m_format.group(1) | ||||
|             else: | ||||
|                 format_id = filed.attrib['bitrate'] | ||||
|                 format_id = filed['bitrate'] | ||||
|             tbr = ( | ||||
|                 int(filed.attrib['bitrate']) | ||||
|                 if filed.attrib['bitrate'].isdigit() | ||||
|                 int(filed['bitrate']) | ||||
|                 if filed['bitrate'].isdigit() | ||||
|                 else None) | ||||
|  | ||||
|             try: | ||||
|                 quality = qualities.index(format_id) | ||||
|             except ValueError: | ||||
|                 quality = -1 | ||||
|             formats.append({ | ||||
|                 'url': file_url, | ||||
|                 'url': filed['url'], | ||||
|                 'ext': 'mp4', | ||||
|                 'tbr': tbr, | ||||
|                 'format_id': format_id, | ||||
|                 'quality': quality, | ||||
|                 'quality': get_quality(format_id), | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
| @@ -86,8 +85,8 @@ class TeamcocoIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'formats': formats, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'title': data['title'], | ||||
|             'thumbnail': data.get('thumb', {}).get('href'), | ||||
|             'description': data.get('teaser'), | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|         } | ||||
|   | ||||
| @@ -83,6 +83,22 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # YouTube video | ||||
|         'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond', | ||||
|         'add_ie': ['Youtube'], | ||||
|         'info_dict': { | ||||
|             'id': 'aFBIPO-P7LM', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville', | ||||
|             'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1', | ||||
|             'uploader': 'TEDx Talks', | ||||
|             'uploader_id': 'TEDxTalks', | ||||
|             'upload_date': '20111216', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     _NATIVE_FORMATS = { | ||||
| @@ -132,11 +148,16 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|         talk_info = self._extract_info(webpage)['talks'][0] | ||||
|  | ||||
|         if talk_info.get('external') is not None: | ||||
|             self.to_screen('Found video from %s' % talk_info['external']['service']) | ||||
|         external = talk_info.get('external') | ||||
|         if external: | ||||
|             service = external['service'] | ||||
|             self.to_screen('Found video from %s' % service) | ||||
|             ext_url = None | ||||
|             if service.lower() == 'youtube': | ||||
|                 ext_url = external.get('code') | ||||
|             return { | ||||
|                 '_type': 'url', | ||||
|                 'url': talk_info['external']['uri'], | ||||
|                 'url': ext_url or external['uri'], | ||||
|             } | ||||
|  | ||||
|         formats = [{ | ||||
|   | ||||
| @@ -4,11 +4,10 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class TheOnionIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?' | ||||
|     _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/', | ||||
|         'md5': '19eaa9a39cf9b9804d982e654dc791ee', | ||||
| @@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         article_id = mobj.group('article_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, article_id) | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'"videoId":\s(\d+),', webpage, 'video ID') | ||||
| @@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor): | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage) | ||||
|         if not sources: | ||||
|             raise ExtractorError( | ||||
|                 'No sources found for video %s' % video_id, expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for src, type_ in sources: | ||||
|             if type_ == 'video/mp4': | ||||
| @@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor): | ||||
|                 }) | ||||
|             elif type_ == 'application/x-mpegURL': | ||||
|                 formats.extend( | ||||
|                     self._extract_m3u8_formats(src, video_id, preference=-1)) | ||||
|                     self._extract_m3u8_formats(src, display_id, preference=-1)) | ||||
|             else: | ||||
|                 self.report_warning( | ||||
|                     'Encountered unexpected format: %s' % type_) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': thumbnail, | ||||
|   | ||||
| @@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor): | ||||
|         if not provider_id: | ||||
|             provider_id = 'dJ5BDC' | ||||
|  | ||||
|         if mobj.group('config'): | ||||
|         if smuggled_data.get('force_smil_url', False): | ||||
|             smil_url = url | ||||
|         elif mobj.group('config'): | ||||
|             config_url = url + '&form=json' | ||||
|             config_url = config_url.replace('swf/', 'config/') | ||||
|             config_url = config_url.replace('onsite/', 'onsite/config/') | ||||
|   | ||||
							
								
								
									
										100
									
								
								youtube_dl/extractor/tv4.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								youtube_dl/extractor/tv4.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,100 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TV4IE(InfoExtractor): | ||||
|     IE_DESC = 'tv4.se and tv4play.se' | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)? | ||||
|         (?: | ||||
|             tv4\.se/(?:[^/]+)/klipp/(?:.*)-| | ||||
|             tv4play\.se/ | ||||
|             (?: | ||||
|                 (?:program|barn)/(?:[^\?]+)\?video_id=| | ||||
|                 iframe/video/| | ||||
|                 film/| | ||||
|                 sport/| | ||||
|             ) | ||||
|         )(?P<id>[0-9]+)''' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650', | ||||
|             'md5': '909d6454b87b10a25aa04c4bdd416a9b', | ||||
|             'info_dict': { | ||||
|                 'id': '2491650', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Kalla Fakta 5 (english subtitles)', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'timestamp': int, | ||||
|                 'upload_date': '20131125', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tv4play.se/iframe/video/3054113', | ||||
|             'md5': '77f851c55139ffe0ebd41b6a5552489b', | ||||
|             'info_dict': { | ||||
|                 'id': '3054113', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.', | ||||
|                 'timestamp': int, | ||||
|                 'upload_date': '20150130', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tv4play.se/sport/3060959', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tv4play.se/film/2378136', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') | ||||
|  | ||||
|         # If is_geo_restricted is true, it doesn't neceserally mean we can't download it | ||||
|         if info['is_geo_restricted']: | ||||
|             self.report_warning('This content might not be available in your country due to licensing restrictions.') | ||||
|         if info['requires_subscription']: | ||||
|             raise ExtractorError('This content requires subscription.', expected=True) | ||||
|  | ||||
|         sources_data = self._download_json( | ||||
|             'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON') | ||||
|         sources = sources_data['playback'] | ||||
|  | ||||
|         formats = [] | ||||
|         for item in sources.get('items', {}).get('item', []): | ||||
|             ext, bitrate = item['mediaFormat'], item['bitrate'] | ||||
|             formats.append({ | ||||
|                 'format_id': '%s_%s' % (ext, bitrate), | ||||
|                 'tbr': bitrate, | ||||
|                 'ext': ext, | ||||
|                 'url': item['url'], | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info['title'], | ||||
|             'formats': formats, | ||||
|             'description': info.get('description'), | ||||
|             'timestamp': parse_iso8601(info.get('broadcast_date_time')), | ||||
|             'duration': info.get('duration'), | ||||
|             'thumbnail': info.get('image'), | ||||
|             'is_live': sources.get('live'), | ||||
|         } | ||||
| @@ -4,6 +4,7 @@ from __future__ import unicode_literals | ||||
| import json | ||||
| import re | ||||
| import itertools | ||||
| import hashlib | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| @@ -225,6 +226,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         if mobj.group('pro') or mobj.group('player'): | ||||
|             url = 'http://player.vimeo.com/video/' + video_id | ||||
|  | ||||
|         password = self._downloader.params.get('videopassword', None) | ||||
|         if password: | ||||
|             headers['Cookie'] = '%s_password=%s' % ( | ||||
|                 video_id, hashlib.md5(password.encode('utf-8')).hexdigest()) | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         request = compat_urllib_request.Request(url, None, headers) | ||||
|         try: | ||||
|   | ||||
| @@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor): | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         story_filename = self._search_regex( | ||||
|             r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') | ||||
|         speaker_id = self._search_regex( | ||||
|             r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') | ||||
|         story_id = self._search_regex( | ||||
|             r'\.storyId\((\d+)\)', webpage, 'story ID') | ||||
|         speaker_type = self._search_regex( | ||||
|             r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') | ||||
|         great_life = self._search_regex( | ||||
|             r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') | ||||
|         embed_params = [s.strip(" \r\n\t'") for s in self._search_regex( | ||||
|             r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)', | ||||
|             webpage, 'embed params').split(',')] | ||||
|  | ||||
|         ( | ||||
|             _, speaker_id, story_id, story_duration, | ||||
|             speaker_type, great_life, _thumbnail, _has_subtitles, | ||||
|             story_filename, _story_order) = embed_params | ||||
|  | ||||
|         is_great_life_series = great_life == 'true' | ||||
|         duration = int_or_none(self._search_regex( | ||||
|             r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) | ||||
|         duration = int_or_none(story_duration) | ||||
|  | ||||
|         # URL building, see: http://www.webofstories.com/scripts/player.js | ||||
|         ms_prefix = '' | ||||
|   | ||||
							
								
								
									
										110
									
								
								youtube_dl/extractor/zapiks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								youtube_dl/extractor/zapiks.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,110 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     xpath_with_ns, | ||||
|     xpath_text, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ZapiksIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html', | ||||
|             'md5': 'aeb3c473b2d564b2d46d664d28d5f050', | ||||
|             'info_dict': { | ||||
|                 'id': '80798', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!', | ||||
|                 'description': 'md5:7054d6f6f620c6519be1fe710d4da847', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'duration': 528, | ||||
|                 'timestamp': 1359044972, | ||||
|                 'upload_date': '20130124', | ||||
|                 'view_count': int, | ||||
|                 'comment_count': int, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or video_id | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         if not video_id: | ||||
|             video_id = self._search_regex( | ||||
|                 r'data-media-id="(\d+)"', webpage, 'video id') | ||||
|  | ||||
|         playlist = self._download_xml( | ||||
|             'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id, | ||||
|             display_id) | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'jwplayer': 'http://rss.jwpcdn.com/' | ||||
|         } | ||||
|  | ||||
|         def ns(path): | ||||
|             return xpath_with_ns(path, NS_MAP) | ||||
|  | ||||
|         item = playlist.find('./channel/item') | ||||
|  | ||||
|         title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage, default=None) | ||||
|         thumbnail = xpath_text( | ||||
|             item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None) | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, 'duration', default=None)) | ||||
|         timestamp = parse_iso8601(self._html_search_meta( | ||||
|             'uploadDate', webpage, 'upload date', default=None), ' ') | ||||
|  | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'UserPlays:(\d+)', webpage, 'view count', default=None)) | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'UserComments:(\d+)', webpage, 'comment count', default=None)) | ||||
|  | ||||
|         formats = [] | ||||
|         for source in item.findall(ns('./jwplayer:source')): | ||||
|             format_id = source.attrib['label'] | ||||
|             f = { | ||||
|                 'url': source.attrib['file'], | ||||
|                 'format_id': format_id, | ||||
|             } | ||||
|             m = re.search(r'^(?P<height>\d+)[pP]', format_id) | ||||
|             if m: | ||||
|                 f['height'] = int(m.group('height')) | ||||
|             formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'timestamp': timestamp, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -900,8 +900,8 @@ def _windows_write_string(s, out): | ||||
|     def not_a_console(handle): | ||||
|         if handle == INVALID_HANDLE_VALUE or handle is None: | ||||
|             return True | ||||
|         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR | ||||
|                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) | ||||
|         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or | ||||
|                 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) | ||||
|  | ||||
|     if not_a_console(h): | ||||
|         return False | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __version__ = '2015.02.19' | ||||
| __version__ = '2015.02.23' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user