mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			163 Commits
		
	
	
		
			2016.06.14
			...
			2016.07.02
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 7a1e71575e | ||
|  | ac2d8f54d1 | ||
|  | 14ff6baa0e | ||
|  | bb08101ec4 | ||
|  | bc4b2d75ba | ||
|  | 35fc3021ba | ||
|  | 347227237b | ||
|  | 564dc3c6e8 | ||
|  | 9f4576a7eb | ||
|  | f11315e8d4 | ||
|  | 0c2ac64bb8 | ||
|  | a9eede3913 | ||
|  | 9e29ef13a3 | ||
|  | eaaaaec042 | ||
|  | 3cb3b60064 | ||
|  | 044e3d91b5 | ||
|  | c9e538a3b1 | ||
|  | 76dad392f5 | ||
|  | 9617b557aa | ||
|  | bf4fa24414 | ||
|  | 20361b4f25 | ||
|  | 05a0068a76 | ||
|  | 66a42309fa | ||
|  | fd94e2671a | ||
|  | 8ff6697861 | ||
|  | eafa643715 | ||
|  | 049da7cb6c | ||
|  | 7dbeee7e22 | ||
|  | 93ad6c6bfa | ||
|  | 329179073b | ||
|  | 4d86d2008e | ||
|  | ab47b6e881 | ||
|  | df43389ade | ||
|  | 397b305cfe | ||
|  | e496fa50cd | ||
|  | 06a96da15b | ||
|  | 70157c2c43 | ||
|  | c58ed8563d | ||
|  | 4c7821227c | ||
|  | 42362fdb5e | ||
|  | 97124e572d | ||
|  | 32616c14cc | ||
|  | 8174d0fe95 | ||
|  | 8704778d95 | ||
|  | c287f2bc60 | ||
|  | 9ea5c04c0d | ||
|  | fd7a7498a4 | ||
|  | e3a6747d8f | ||
|  | f41ffc00d1 | ||
|  | 81fda15369 | ||
|  | 427cd050a3 | ||
|  | b0c200f1ec | ||
|  | 92747e664a | ||
|  | f1f336322d | ||
|  | bf8dd79045 | ||
|  | c6781156aa | ||
|  | f484c5fa25 | ||
|  | 88d9f6c0c4 | ||
|  | 3c9c088f9c | ||
|  | fc3996bfe1 | ||
|  | 5b6ad8630c | ||
|  | 30105f4ac0 | ||
|  | 1143535d76 | ||
|  | 7d52c052ef | ||
|  | a2406fce3c | ||
|  | 3b34ab538c | ||
|  | ac782306f1 | ||
|  | 0c00e889f3 | ||
|  | ce96ed05f4 | ||
|  | 0463b77a1f | ||
|  | 2d185706ea | ||
|  | b72b44318c | ||
|  | 46f59e89ea | ||
|  | b4241e308e | ||
|  | 3d4b08dfc7 | ||
|  | be49068d65 | ||
|  | 525cedb971 | ||
|  | de3c7fe0d4 | ||
|  | 896cc72750 | ||
|  | c1ff6e1ad0 | ||
|  | fee70322d7 | ||
|  | 8065d6c55f | ||
|  | 494172d2e5 | ||
|  | 6e3c2047f8 | ||
|  | 011bd3221b | ||
|  | b46eabecd3 | ||
|  | 0437307a41 | ||
|  | 22b7ac13ef | ||
|  | 96f88e91b7 | ||
|  | 3331a4644d | ||
|  | adf1921dc1 | ||
|  | 97674f0419 | ||
|  | 73843ae8ac | ||
|  | f2bb8c036a | ||
|  | 75ca6bcee2 | ||
|  | 089657ed1f | ||
|  | b5eab86c24 | ||
|  | c8e3e0974b | ||
|  | dfc8f46e1c | ||
|  | c143ddce5d | ||
|  | 169d836feb | ||
|  | 6ae938b295 | ||
|  | cf40fdf5c1 | ||
|  | 23bdae0955 | ||
|  | ca74c90bf5 | ||
|  | 7cfc1e2a10 | ||
|  | 1ac5705f62 | ||
|  | e4f90ea0a7 | ||
|  | cdfc187cd5 | ||
|  | feef925f49 | ||
|  | 19e2d1cdea | ||
|  | 8369a4fe76 | ||
|  | 1f749b6658 | ||
|  | 819707920a | ||
|  | 43518503a6 | ||
|  | 5839d556e4 | ||
|  | 6c83e583b3 | ||
|  | 6aeb64b673 | ||
|  | 6cd64b6806 | ||
|  | e154c65128 | ||
|  | a50fd6e026 | ||
|  | 6a55bb66ee | ||
|  | 7c05097633 | ||
|  | 589568789f | ||
|  | 7577d849a6 | ||
|  | cb23192bc4 | ||
|  | 41c1023300 | ||
|  | 90b6288cce | ||
|  | c1823c8ad9 | ||
|  | d7c6c656c5 | ||
|  | b0b128049a | ||
|  | e8f13f2637 | ||
|  | b5aad37f6b | ||
|  | 6d0d4fc26d | ||
|  | 0278aa443f | ||
|  | 1f35745758 | ||
|  | 573c35272f | ||
|  | 09e3f91e40 | ||
|  | 1b6cf16be7 | ||
|  | 26264cb056 | ||
|  | a72df5f36f | ||
|  | c878e635de | ||
|  | 0f47cc2e92 | ||
|  | 5fc2757682 | ||
|  | e3944c2621 | ||
|  | 667d96480b | ||
|  | e6fe993c31 | ||
|  | d0d93f76ea | ||
|  | 20a6a154fe | ||
|  | f011876076 | ||
|  | 6929569403 | ||
|  | eb451890da | ||
|  | ded7511a70 | ||
|  | d2161cade5 | ||
|  | 27e5fa8198 | ||
|  | efbd1eb51a | ||
|  | 369ff75081 | ||
|  | 47212f7bcb | ||
|  | 4c93ee8d14 | ||
|  | 8bc4dbb1af | ||
|  | 6c3760292c | ||
|  | 4cef70db6c | ||
|  | ff4af6ec59 | 
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							| @@ -6,8 +6,8 @@ | ||||
|  | ||||
| --- | ||||
|  | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.14** | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.07.02*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.07.02** | ||||
|  | ||||
| ### Before submitting an *issue* make sure you have: | ||||
| - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections | ||||
| @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> | ||||
| [debug] User config: [] | ||||
| [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] | ||||
| [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 | ||||
| [debug] youtube-dl version 2016.06.14 | ||||
| [debug] youtube-dl version 2016.07.02 | ||||
| [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 | ||||
| [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 | ||||
| [debug] Proxy map: {} | ||||
|   | ||||
							
								
								
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -175,3 +175,4 @@ Tomáš Čech | ||||
| Déstin Reed | ||||
| Roman Tsiupa | ||||
| Artur Krysiak | ||||
| Jakub Adam Wieczorek | ||||
|   | ||||
| @@ -44,7 +44,7 @@ Or with [MacPorts](https://www.macports.org/): | ||||
| Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html). | ||||
|  | ||||
| # DESCRIPTION | ||||
| **youtube-dl** is a small command-line program to download videos from | ||||
| **youtube-dl** is a command-line program to download videos from | ||||
| YouTube.com and a few more sites. It requires the Python interpreter, version | ||||
| 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on | ||||
| your Unix box, on Windows or on Mac OS X. It is released to the public domain, | ||||
|   | ||||
| @@ -14,15 +14,17 @@ if os.path.exists(lazy_extractors_filename): | ||||
|     os.remove(lazy_extractors_filename) | ||||
|  | ||||
| from youtube_dl.extractor import _ALL_CLASSES | ||||
| from youtube_dl.extractor.common import InfoExtractor | ||||
| from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor | ||||
|  | ||||
| with open('devscripts/lazy_load_template.py', 'rt') as f: | ||||
|     module_template = f.read() | ||||
|  | ||||
| module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)] | ||||
| module_contents = [ | ||||
|     module_template + '\n' + getsource(InfoExtractor.suitable) + '\n', | ||||
|     'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n'] | ||||
|  | ||||
| ie_template = ''' | ||||
| class {name}(LazyLoadExtractor): | ||||
| class {name}({bases}): | ||||
|     _VALID_URL = {valid_url!r} | ||||
|     _module = '{module}' | ||||
| ''' | ||||
| @@ -34,10 +36,20 @@ make_valid_template = ''' | ||||
| ''' | ||||
|  | ||||
|  | ||||
| def get_base_name(base): | ||||
|     if base is InfoExtractor: | ||||
|         return 'LazyLoadExtractor' | ||||
|     elif base is SearchInfoExtractor: | ||||
|         return 'LazyLoadSearchExtractor' | ||||
|     else: | ||||
|         return base.__name__ | ||||
|  | ||||
|  | ||||
| def build_lazy_ie(ie, name): | ||||
|     valid_url = getattr(ie, '_VALID_URL', None) | ||||
|     s = ie_template.format( | ||||
|         name=name, | ||||
|         bases=', '.join(map(get_base_name, ie.__bases__)), | ||||
|         valid_url=valid_url, | ||||
|         module=ie.__module__) | ||||
|     if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: | ||||
| @@ -47,12 +59,35 @@ def build_lazy_ie(ie, name): | ||||
|         s += make_valid_template.format(valid_url=ie._make_valid_url()) | ||||
|     return s | ||||
|  | ||||
| # find the correct sorting and add the required base classes so that sublcasses | ||||
| # can be correctly created | ||||
| classes = _ALL_CLASSES[:-1] | ||||
| ordered_cls = [] | ||||
| while classes: | ||||
|     for c in classes[:]: | ||||
|         bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor)) | ||||
|         stop = False | ||||
|         for b in bases: | ||||
|             if b not in classes and b not in ordered_cls: | ||||
|                 if b.__name__ == 'GenericIE': | ||||
|                     exit() | ||||
|                 classes.insert(0, b) | ||||
|                 stop = True | ||||
|         if stop: | ||||
|             break | ||||
|         if all(b in ordered_cls for b in bases): | ||||
|             ordered_cls.append(c) | ||||
|             classes.remove(c) | ||||
|             break | ||||
| ordered_cls.append(_ALL_CLASSES[-1]) | ||||
|  | ||||
| names = [] | ||||
| for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]: | ||||
|     name = ie.ie_key() + 'IE' | ||||
| for ie in ordered_cls: | ||||
|     name = ie.__name__ | ||||
|     src = build_lazy_ie(ie, name) | ||||
|     module_contents.append(src) | ||||
|     names.append(name) | ||||
|     if ie in _ALL_CLASSES: | ||||
|         names.append(name) | ||||
|  | ||||
| module_contents.append( | ||||
|     '_ALL_CLASSES = [{0}]'.format(', '.join(names))) | ||||
|   | ||||
| @@ -15,6 +15,7 @@ | ||||
| set -e | ||||
|  | ||||
| skip_tests=true | ||||
| gpg_sign_commits="" | ||||
| buildserver='localhost:8142' | ||||
|  | ||||
| while true | ||||
| @@ -24,6 +25,10 @@ case "$1" in | ||||
|         skip_tests=false | ||||
|         shift | ||||
|     ;; | ||||
|     --gpg-sign-commits|-S) | ||||
|         gpg_sign_commits="-S" | ||||
|         shift | ||||
|     ;; | ||||
|     --buildserver) | ||||
|         buildserver="$2" | ||||
|         shift 2 | ||||
| @@ -69,7 +74,7 @@ sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py | ||||
| /bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." | ||||
| make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites | ||||
| git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py | ||||
| git commit -m "release $version" | ||||
| git commit $gpg_sign_commits -m "release $version" | ||||
|  | ||||
| /bin/echo -e "\n### Now tagging, signing and pushing..." | ||||
| git tag -s -m "Release $version" "$version" | ||||
| @@ -116,7 +121,7 @@ git clone --branch gh-pages --single-branch . build/gh-pages | ||||
|     "$ROOT/devscripts/gh-pages/update-copyright.py" | ||||
|     "$ROOT/devscripts/gh-pages/update-sites.py" | ||||
|     git add *.html *.html.in update | ||||
|     git commit -m "release $version" | ||||
|     git commit $gpg_sign_commits -m "release $version" | ||||
|     git push "$ROOT" gh-pages | ||||
|     git push "$ORIGIN_URL" gh-pages | ||||
| ) | ||||
|   | ||||
| @@ -44,7 +44,6 @@ | ||||
|  - **appletrailers:section** | ||||
|  - **archive.org**: archive.org videos | ||||
|  - **ARD** | ||||
|  - **ARD:mediathek**: Saarländischer Rundfunk | ||||
|  - **ARD:mediathek** | ||||
|  - **arte.tv** | ||||
|  - **arte.tv:+7** | ||||
| @@ -74,6 +73,8 @@ | ||||
|  - **bbc**: BBC | ||||
|  - **bbc.co.uk**: BBC iPlayer | ||||
|  - **bbc.co.uk:article**: BBC articles | ||||
|  - **bbc.co.uk:iplayer:playlist** | ||||
|  - **bbc.co.uk:playlist** | ||||
|  - **BeatportPro** | ||||
|  - **Beeg** | ||||
|  - **BehindKink** | ||||
| @@ -104,6 +105,8 @@ | ||||
|  - **canalc2.tv** | ||||
|  - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv | ||||
|  - **Canvas** | ||||
|  - **CarambaTV** | ||||
|  - **CarambaTVPage** | ||||
|  - **CBC** | ||||
|  - **CBCPlayer** | ||||
|  - **CBS** | ||||
| @@ -124,6 +127,7 @@ | ||||
|  - **cliphunter** | ||||
|  - **ClipRs** | ||||
|  - **Clipsyndicate** | ||||
|  - **CloserToTruth** | ||||
|  - **cloudtime**: CloudTime | ||||
|  - **Cloudy** | ||||
|  - **Clubic** | ||||
| @@ -148,6 +152,8 @@ | ||||
|  - **CSNNE** | ||||
|  - **CSpan**: C-SPAN | ||||
|  - **CtsNews**: 華視新聞 | ||||
|  - **CTV** | ||||
|  - **CTVNews** | ||||
|  - **culturebox.francetvinfo.fr** | ||||
|  - **CultureUnplugged** | ||||
|  - **CWTV** | ||||
| @@ -236,6 +242,7 @@ | ||||
|  - **FreeVideo** | ||||
|  - **Funimation** | ||||
|  - **FunnyOrDie** | ||||
|  - **Fusion** | ||||
|  - **GameInformer** | ||||
|  - **Gamekings** | ||||
|  - **GameOne** | ||||
| @@ -243,7 +250,6 @@ | ||||
|  - **Gamersyde** | ||||
|  - **GameSpot** | ||||
|  - **GameStar** | ||||
|  - **Gametrailers** | ||||
|  - **Gazeta** | ||||
|  - **GDCVault** | ||||
|  - **generic**: Generic downloader that works on some sites | ||||
| @@ -269,6 +275,7 @@ | ||||
|  - **Helsinki**: helsinki.fi | ||||
|  - **HentaiStigma** | ||||
|  - **HistoricFilms** | ||||
|  - **history:topic**: History.com Topic | ||||
|  - **hitbox** | ||||
|  - **hitbox:live** | ||||
|  - **HornBunny** | ||||
| @@ -355,6 +362,7 @@ | ||||
|  - **MatchTV** | ||||
|  - **MDR**: MDR.DE and KiKA | ||||
|  - **media.ccc.de** | ||||
|  - **META** | ||||
|  - **metacafe** | ||||
|  - **Metacritic** | ||||
|  - **Mgoon** | ||||
| @@ -381,7 +389,7 @@ | ||||
|  - **MovieFap** | ||||
|  - **Moviezine** | ||||
|  - **MPORA** | ||||
|  - **MSNBC** | ||||
|  - **MSN** | ||||
|  - **MTV** | ||||
|  - **mtv.de** | ||||
|  - **mtviggy.com** | ||||
| @@ -432,8 +440,10 @@ | ||||
|  - **nhl.com:videocenter** | ||||
|  - **nhl.com:videocenter:category**: NHL videocenter category | ||||
|  - **nick.com** | ||||
|  - **nick.de** | ||||
|  - **niconico**: ニコニコ動画 | ||||
|  - **NiconicoPlaylist** | ||||
|  - **NineCNineMedia** | ||||
|  - **njoy**: N-JOY | ||||
|  - **njoy:embed** | ||||
|  - **Noco** | ||||
| @@ -497,8 +507,9 @@ | ||||
|  - **plus.google**: Google Plus | ||||
|  - **pluzz.francetv.fr** | ||||
|  - **podomatic** | ||||
|  - **PolskieRadio** | ||||
|  - **PornHd** | ||||
|  - **PornHub** | ||||
|  - **PornHub**: PornHub and Thumbzilla | ||||
|  - **PornHubPlaylist** | ||||
|  - **PornHubUserVideos** | ||||
|  - **Pornotube** | ||||
| @@ -516,6 +527,7 @@ | ||||
|  - **qqmusic:singer**: QQ音乐 - 歌手 | ||||
|  - **qqmusic:toplist**: QQ音乐 - 排行榜 | ||||
|  - **R7** | ||||
|  - **R7Article** | ||||
|  - **radio.de** | ||||
|  - **radiobremen** | ||||
|  - **radiocanada** | ||||
| @@ -581,8 +593,10 @@ | ||||
|  - **Shared**: shared.sx and vivo.sx | ||||
|  - **ShareSix** | ||||
|  - **Sina** | ||||
|  - **SixPlay** | ||||
|  - **skynewsarabia:article** | ||||
|  - **skynewsarabia:video** | ||||
|  - **skynewsarabia:video** | ||||
|  - **SkySports** | ||||
|  - **Slideshare** | ||||
|  - **Slutload** | ||||
|  - **smotri**: Smotri.com | ||||
| @@ -614,6 +628,7 @@ | ||||
|  - **SportBoxEmbed** | ||||
|  - **SportDeutschland** | ||||
|  - **Sportschau** | ||||
|  - **sr:mediathek**: Saarländischer Rundfunk | ||||
|  - **SRGSSR** | ||||
|  - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites | ||||
|  - **SSA** | ||||
| @@ -714,6 +729,7 @@ | ||||
|  - **UDNEmbed**: 聯合影音 | ||||
|  - **Unistra** | ||||
|  - **Urort**: NRK P3 Urørt | ||||
|  - **URPlay** | ||||
|  - **USAToday** | ||||
|  - **ustream** | ||||
|  - **ustream:channel** | ||||
| @@ -731,6 +747,7 @@ | ||||
|  - **vh1.com** | ||||
|  - **Vice** | ||||
|  - **ViceShow** | ||||
|  - **Vidbit** | ||||
|  - **Viddler** | ||||
|  - **video.google:search**: Google Video search | ||||
|  - **video.mit.edu** | ||||
|   | ||||
							
								
								
									
										63
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										63
									
								
								setup.py
									
									
									
									
									
								
							| @@ -21,25 +21,37 @@ try: | ||||
|     import py2exe | ||||
| except ImportError: | ||||
|     if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe': | ||||
|         print("Cannot import py2exe", file=sys.stderr) | ||||
|         print('Cannot import py2exe', file=sys.stderr) | ||||
|         exit(1) | ||||
|  | ||||
| py2exe_options = { | ||||
|     "bundle_files": 1, | ||||
|     "compressed": 1, | ||||
|     "optimize": 2, | ||||
|     "dist_dir": '.', | ||||
|     "dll_excludes": ['w9xpopen.exe', 'crypt32.dll'], | ||||
|     'bundle_files': 1, | ||||
|     'compressed': 1, | ||||
|     'optimize': 2, | ||||
|     'dist_dir': '.', | ||||
|     'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], | ||||
| } | ||||
|  | ||||
| # Get the version from youtube_dl/version.py without importing the package | ||||
| exec(compile(open('youtube_dl/version.py').read(), | ||||
|              'youtube_dl/version.py', 'exec')) | ||||
|  | ||||
| DESCRIPTION = 'YouTube video downloader' | ||||
| LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites' | ||||
|  | ||||
| py2exe_console = [{ | ||||
|     "script": "./youtube_dl/__main__.py", | ||||
|     "dest_base": "youtube-dl", | ||||
|     'script': './youtube_dl/__main__.py', | ||||
|     'dest_base': 'youtube-dl', | ||||
|     'version': __version__, | ||||
|     'description': DESCRIPTION, | ||||
|     'comments': LONG_DESCRIPTION, | ||||
|     'product_name': 'youtube-dl', | ||||
|     'product_version': __version__, | ||||
| }] | ||||
|  | ||||
| py2exe_params = { | ||||
|     'console': py2exe_console, | ||||
|     'options': {"py2exe": py2exe_options}, | ||||
|     'options': {'py2exe': py2exe_options}, | ||||
|     'zipfile': None | ||||
| } | ||||
|  | ||||
| @@ -72,7 +84,7 @@ else: | ||||
|         params['scripts'] = ['bin/youtube-dl'] | ||||
|  | ||||
| class build_lazy_extractors(Command): | ||||
|     description = "Build the extractor lazy loading module" | ||||
|     description = 'Build the extractor lazy loading module' | ||||
|     user_options = [] | ||||
|  | ||||
|     def initialize_options(self): | ||||
| @@ -87,16 +99,11 @@ class build_lazy_extractors(Command): | ||||
|             dry_run=self.dry_run, | ||||
|         ) | ||||
|  | ||||
| # Get the version from youtube_dl/version.py without importing the package | ||||
| exec(compile(open('youtube_dl/version.py').read(), | ||||
|              'youtube_dl/version.py', 'exec')) | ||||
|  | ||||
| setup( | ||||
|     name='youtube_dl', | ||||
|     version=__version__, | ||||
|     description='YouTube video downloader', | ||||
|     long_description='Small command-line program to download videos from' | ||||
|     ' YouTube.com and other video sites.', | ||||
|     description=DESCRIPTION, | ||||
|     long_description=LONG_DESCRIPTION, | ||||
|     url='https://github.com/rg3/youtube-dl', | ||||
|     author='Ricardo Garcia', | ||||
|     author_email='ytdl@yt-dl.org', | ||||
| @@ -112,17 +119,17 @@ setup( | ||||
|     # test_requires = ['nosetest'], | ||||
|  | ||||
|     classifiers=[ | ||||
|         "Topic :: Multimedia :: Video", | ||||
|         "Development Status :: 5 - Production/Stable", | ||||
|         "Environment :: Console", | ||||
|         "License :: Public Domain", | ||||
|         "Programming Language :: Python :: 2.6", | ||||
|         "Programming Language :: Python :: 2.7", | ||||
|         "Programming Language :: Python :: 3", | ||||
|         "Programming Language :: Python :: 3.2", | ||||
|         "Programming Language :: Python :: 3.3", | ||||
|         "Programming Language :: Python :: 3.4", | ||||
|         "Programming Language :: Python :: 3.5", | ||||
|         'Topic :: Multimedia :: Video', | ||||
|         'Development Status :: 5 - Production/Stable', | ||||
|         'Environment :: Console', | ||||
|         'License :: Public Domain', | ||||
|         'Programming Language :: Python :: 2.6', | ||||
|         'Programming Language :: Python :: 2.7', | ||||
|         'Programming Language :: Python :: 3', | ||||
|         'Programming Language :: Python :: 3.2', | ||||
|         'Programming Language :: Python :: 3.3', | ||||
|         'Programming Language :: Python :: 3.4', | ||||
|         'Programming Language :: Python :: 3.5', | ||||
|     ], | ||||
|  | ||||
|     cmdclass={'build_lazy_extractors': build_lazy_extractors}, | ||||
|   | ||||
| @@ -11,7 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
| from test.helper import FakeYDL | ||||
| from youtube_dl.extractor.common import InfoExtractor | ||||
| from youtube_dl.extractor import YoutubeIE, get_info_extractor | ||||
| from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError | ||||
| from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError | ||||
|  | ||||
|  | ||||
| class TestIE(InfoExtractor): | ||||
| @@ -66,6 +66,11 @@ class TestInfoExtractor(unittest.TestCase): | ||||
|         self.assertEqual(ie._html_search_meta('d', html), '4') | ||||
|         self.assertEqual(ie._html_search_meta('e', html), '5') | ||||
|         self.assertEqual(ie._html_search_meta('f', html), '6') | ||||
|         self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1') | ||||
|         self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3') | ||||
|         self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3') | ||||
|         self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) | ||||
|         self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) | ||||
|  | ||||
|     def test_download_json(self): | ||||
|         uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') | ||||
|   | ||||
| @@ -6,6 +6,7 @@ from __future__ import unicode_literals | ||||
| import os | ||||
| import sys | ||||
| import unittest | ||||
| import collections | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
|  | ||||
| @@ -130,6 +131,15 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|             'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html', | ||||
|             ['Yahoo']) | ||||
|  | ||||
|     def test_no_duplicated_ie_names(self): | ||||
|         name_accu = collections.defaultdict(list) | ||||
|         for ie in self.ies: | ||||
|             name_accu[ie.IE_NAME.lower()].append(type(ie).__name__) | ||||
|         for (ie_name, ie_list) in name_accu.items(): | ||||
|             self.assertEqual( | ||||
|                 len(ie_list), 1, | ||||
|                 'Multiple extractors with the same IE_NAME "%s" (%s)' % (ie_name, ', '.join(ie_list))) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -87,6 +87,7 @@ class TestCompat(unittest.TestCase): | ||||
|  | ||||
|     def test_compat_shlex_split(self): | ||||
|         self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two']) | ||||
|         self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) | ||||
|  | ||||
|     def test_compat_etree_fromstring(self): | ||||
|         xml = ''' | ||||
|   | ||||
| @@ -60,11 +60,13 @@ from youtube_dl.utils import ( | ||||
|     timeconvert, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     unified_timestamp, | ||||
|     unsmuggle_url, | ||||
|     uppercase_escape, | ||||
|     lowercase_escape, | ||||
|     url_basename, | ||||
|     urlencode_postdata, | ||||
|     urshift, | ||||
|     update_url_query, | ||||
|     version_tuple, | ||||
|     xpath_with_ns, | ||||
| @@ -283,8 +285,28 @@ class TestUtil(unittest.TestCase): | ||||
|             '20150202') | ||||
|         self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214') | ||||
|         self.assertEqual(unified_strdate('25-09-2014'), '20140925') | ||||
|         self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') | ||||
|         self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) | ||||
|  | ||||
|     def test_unified_timestamps(self): | ||||
|         self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) | ||||
|         self.assertEqual(unified_timestamp('8/7/2009'), 1247011200) | ||||
|         self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200) | ||||
|         self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598) | ||||
|         self.assertEqual(unified_timestamp('1968 12 10'), -33436800) | ||||
|         self.assertEqual(unified_timestamp('1968-12-10'), -33436800) | ||||
|         self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200) | ||||
|         self.assertEqual( | ||||
|             unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False), | ||||
|             1417001400) | ||||
|         self.assertEqual( | ||||
|             unified_timestamp('2/2/2015 6:47:40 PM', day_first=False), | ||||
|             1422902860) | ||||
|         self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900) | ||||
|         self.assertEqual(unified_timestamp('25-09-2014'), 1411603200) | ||||
|         self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) | ||||
|         self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) | ||||
|  | ||||
|     def test_determine_ext(self): | ||||
|         self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') | ||||
|         self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None) | ||||
| @@ -640,6 +662,9 @@ class TestUtil(unittest.TestCase): | ||||
|             "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} | ||||
|         }''') | ||||
|  | ||||
|         inp = '''{"foo":101}''' | ||||
|         self.assertEqual(js_to_json(inp), '''{"foo":101}''') | ||||
|  | ||||
|     def test_js_to_json_edgecases(self): | ||||
|         on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") | ||||
|         self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) | ||||
| @@ -956,5 +981,9 @@ The first line | ||||
|         self.assertRaises(ValueError, encode_base_n, 0, 70) | ||||
|         self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table) | ||||
|  | ||||
|     def test_urshift(self): | ||||
|         self.assertEqual(urshift(3, 1), 1) | ||||
|         self.assertEqual(urshift(-3, 1), 2147483646) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -2,14 +2,24 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import os.path | ||||
| import re | ||||
| import binascii | ||||
| try: | ||||
|     from Crypto.Cipher import AES | ||||
|     can_decrypt_frag = True | ||||
| except ImportError: | ||||
|     can_decrypt_frag = False | ||||
|  | ||||
| from .fragment import FragmentFD | ||||
| from .external import FFmpegFD | ||||
|  | ||||
| from ..compat import compat_urlparse | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
|     compat_struct_pack, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     parse_m3u8_attributes, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -21,7 +31,7 @@ class HlsFD(FragmentFD): | ||||
|     @staticmethod | ||||
|     def can_download(manifest): | ||||
|         UNSUPPORTED_FEATURES = ( | ||||
|             r'#EXT-X-KEY:METHOD=(?!NONE)',  # encrypted streams [1] | ||||
|             r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1] | ||||
|             r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2] | ||||
|  | ||||
|             # Live streams heuristic does not always work (e.g. geo restricted to Germany | ||||
| @@ -39,7 +49,9 @@ class HlsFD(FragmentFD): | ||||
|             # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 | ||||
|             # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 | ||||
|         ) | ||||
|         return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) | ||||
|         check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] | ||||
|         check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest) | ||||
|         return all(check_results) | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         man_url = info_dict['url'] | ||||
| @@ -57,36 +69,60 @@ class HlsFD(FragmentFD): | ||||
|                 fd.add_progress_hook(ph) | ||||
|             return fd.real_download(filename, info_dict) | ||||
|  | ||||
|         fragment_urls = [] | ||||
|         total_frags = 0 | ||||
|         for line in s.splitlines(): | ||||
|             line = line.strip() | ||||
|             if line and not line.startswith('#'): | ||||
|                 segment_url = ( | ||||
|                     line | ||||
|                     if re.match(r'^https?://', line) | ||||
|                     else compat_urlparse.urljoin(man_url, line)) | ||||
|                 fragment_urls.append(segment_url) | ||||
|                 # We only download the first fragment during the test | ||||
|                 if self.params.get('test', False): | ||||
|                     break | ||||
|                 total_frags += 1 | ||||
|  | ||||
|         ctx = { | ||||
|             'filename': filename, | ||||
|             'total_frags': len(fragment_urls), | ||||
|             'total_frags': total_frags, | ||||
|         } | ||||
|  | ||||
|         self._prepare_and_start_frag_download(ctx) | ||||
|  | ||||
|         i = 0 | ||||
|         media_sequence = 0 | ||||
|         decrypt_info = {'METHOD': 'NONE'} | ||||
|         frags_filenames = [] | ||||
|         for i, frag_url in enumerate(fragment_urls): | ||||
|             frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) | ||||
|             success = ctx['dl'].download(frag_filename, {'url': frag_url}) | ||||
|             if not success: | ||||
|                 return False | ||||
|             down, frag_sanitized = sanitize_open(frag_filename, 'rb') | ||||
|             ctx['dest_stream'].write(down.read()) | ||||
|             down.close() | ||||
|             frags_filenames.append(frag_sanitized) | ||||
|         for line in s.splitlines(): | ||||
|             line = line.strip() | ||||
|             if line: | ||||
|                 if not line.startswith('#'): | ||||
|                     frag_url = ( | ||||
|                         line | ||||
|                         if re.match(r'^https?://', line) | ||||
|                         else compat_urlparse.urljoin(man_url, line)) | ||||
|                     frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) | ||||
|                     success = ctx['dl'].download(frag_filename, {'url': frag_url}) | ||||
|                     if not success: | ||||
|                         return False | ||||
|                     down, frag_sanitized = sanitize_open(frag_filename, 'rb') | ||||
|                     frag_content = down.read() | ||||
|                     down.close() | ||||
|                     if decrypt_info['METHOD'] == 'AES-128': | ||||
|                         iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) | ||||
|                         frag_content = AES.new( | ||||
|                             decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) | ||||
|                     ctx['dest_stream'].write(frag_content) | ||||
|                     frags_filenames.append(frag_sanitized) | ||||
|                     # We only download the first fragment during the test | ||||
|                     if self.params.get('test', False): | ||||
|                         break | ||||
|                     i += 1 | ||||
|                     media_sequence += 1 | ||||
|                 elif line.startswith('#EXT-X-KEY'): | ||||
|                     decrypt_info = parse_m3u8_attributes(line[11:]) | ||||
|                     if decrypt_info['METHOD'] == 'AES-128': | ||||
|                         if 'IV' in decrypt_info: | ||||
|                             decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:]) | ||||
|                         if not re.match(r'^https?://', decrypt_info['URI']): | ||||
|                             decrypt_info['URI'] = compat_urlparse.urljoin( | ||||
|                                 man_url, decrypt_info['URI']) | ||||
|                         decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() | ||||
|                 elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): | ||||
|                     media_sequence = int(line[22:]) | ||||
|  | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|   | ||||
| @@ -156,7 +156,10 @@ class AdobeTVVideoIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         video_data = self._download_json(url + '?format=json', video_id) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_data = self._parse_json(self._search_regex( | ||||
|             r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')), | ||||
|   | ||||
| @@ -7,18 +7,123 @@ from ..utils import ( | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
|     unescapeHTML, | ||||
|     extract_attributes, | ||||
|     get_element_by_attribute, | ||||
| ) | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AENetworksIE(InfoExtractor): | ||||
| class AENetworksBaseIE(InfoExtractor): | ||||
|     def theplatform_url_result(self, theplatform_url, video_id, query): | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': smuggle_url( | ||||
|                 update_url_query(theplatform_url, query), | ||||
|                 { | ||||
|                     'sig': { | ||||
|                         'key': 'crazyjava', | ||||
|                         'secret': 's3cr3t' | ||||
|                     }, | ||||
|                     'force_smil_url': True | ||||
|                 }), | ||||
|             'ie_key': 'ThePlatform', | ||||
|         } | ||||
|  | ||||
|  | ||||
| class AENetworksIE(AENetworksBaseIE): | ||||
|     IE_NAME = 'aenetworks' | ||||
|     IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?:shows/(?P<show_path>[^/]+(?:/[^/]+){0,2})|movies/(?P<movie_display_id>[^/]+)/full-movie)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', | ||||
|         'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', | ||||
|         'info_dict': { | ||||
|             'id': '22253814', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Winter Is Coming', | ||||
|             'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', | ||||
|             'timestamp': 1338306241, | ||||
|             'upload_date': '20120529', | ||||
|             'uploader': 'AENE-NEW', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }, { | ||||
|         'url': 'http://www.history.com/shows/ancient-aliens/season-1', | ||||
|         'info_dict': { | ||||
|             'id': '71889446852', | ||||
|         }, | ||||
|         'playlist_mincount': 5, | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/shows/atlanta-plastic', | ||||
|         'info_dict': { | ||||
|             'id': 'SERIES4317', | ||||
|             'title': 'Atlanta Plastic', | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|     }, { | ||||
|         'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie', | ||||
|         'only_matching': True | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         show_path, movie_display_id = re.match(self._VALID_URL, url).groups() | ||||
|         display_id = show_path or movie_display_id | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         if show_path: | ||||
|             url_parts = show_path.split('/') | ||||
|             url_parts_len = len(url_parts) | ||||
|             if url_parts_len == 1: | ||||
|                 entries = [] | ||||
|                 for season_url_path in re.findall(r'(?s)<li[^>]+data-href="(/shows/%s/season-\d+)"' % url_parts[0], webpage): | ||||
|                     entries.append(self.url_result( | ||||
|                         compat_urlparse.urljoin(url, season_url_path), 'AENetworks')) | ||||
|                 return self.playlist_result( | ||||
|                     entries, self._html_search_meta('aetn:SeriesId', webpage), | ||||
|                     self._html_search_meta('aetn:SeriesTitle', webpage)) | ||||
|             elif url_parts_len == 2: | ||||
|                 entries = [] | ||||
|                 for episode_item in re.findall(r'(?s)<div[^>]+class="[^"]*episode-item[^"]*"[^>]*>', webpage): | ||||
|                     episode_attributes = extract_attributes(episode_item) | ||||
|                     episode_url = compat_urlparse.urljoin( | ||||
|                         url, episode_attributes['data-canonical']) | ||||
|                     entries.append(self.url_result( | ||||
|                         episode_url, 'AENetworks', | ||||
|                         episode_attributes['data-videoid'])) | ||||
|                 return self.playlist_result( | ||||
|                     entries, self._html_search_meta('aetn:SeasonId', webpage)) | ||||
|         video_id = self._html_search_meta('aetn:VideoID', webpage) | ||||
|         media_url = self._search_regex( | ||||
|             r"media_url\s*=\s*'([^']+)'", webpage, 'video url') | ||||
|  | ||||
|         info = self._search_json_ld(webpage, video_id, fatal=False) | ||||
|         info.update(self.theplatform_url_result( | ||||
|             media_url, video_id, { | ||||
|                 'mbr': 'true', | ||||
|                 'assetTypes': 'medium_video_s3' | ||||
|             })) | ||||
|         return info | ||||
|  | ||||
|  | ||||
| class HistoryTopicIE(AENetworksBaseIE): | ||||
|     IE_NAME = 'history:topic' | ||||
|     IE_DESC = 'History.com Topic' | ||||
|     _VALID_URL = r'https?://(?:www\.)?history\.com/topics/(?:[^/]+/)?(?P<topic_id>[^/]+)/videos(?:/(?P<video_display_id>[^/?#]+))?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', | ||||
|         'info_dict': { | ||||
|             'id': 'g12m5Gyt3fdR', | ||||
|             'id': '40700995724', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Bet You Didn't Know: Valentine's Day", | ||||
|             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', | ||||
| @@ -31,57 +136,39 @@ class AENetworksIE(InfoExtractor): | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|         'expected_warnings': ['JSON-LD'], | ||||
|     }, { | ||||
|         'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1', | ||||
|         'md5': '8ff93eb073449f151d6b90c0ae1ef0c7', | ||||
|         'info_dict': { | ||||
|             'id': 'eg47EERs_JsZ', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Winter Is Coming', | ||||
|             'description': 'md5:641f424b7a19d8e24f26dea22cf59d74', | ||||
|             'timestamp': 1338306241, | ||||
|             'upload_date': '20120529', | ||||
|             'uploader': 'AENE-NEW', | ||||
|         'url': 'http://www.history.com/topics/world-war-i/world-war-i-history/videos', | ||||
|         'info_dict': | ||||
|         { | ||||
|             'id': 'world-war-i-history', | ||||
|             'title': 'World War I History', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|         'playlist_mincount': 24, | ||||
|     }, { | ||||
|         'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage', | ||||
|         'only_matching': True | ||||
|     }, { | ||||
|         'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients', | ||||
|         'only_matching': True | ||||
|         'url': 'http://www.history.com/topics/world-war-i-history/videos', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         page_type, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         topic_id, video_display_id = re.match(self._VALID_URL, url).groups() | ||||
|         if video_display_id: | ||||
|             webpage = self._download_webpage(url, video_display_id) | ||||
|             release_url, video_id = re.search(r"_videoPlayer.play\('([^']+)'\s*,\s*'[^']+'\s*,\s*'(\d+)'\)", webpage).groups() | ||||
|             release_url = unescapeHTML(release_url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url_re = [ | ||||
|             r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id, | ||||
|             r"media_url\s*=\s*'([^']+)'" | ||||
|         ] | ||||
|         video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url')) | ||||
|         query = {'mbr': 'true'} | ||||
|         if page_type == 'shows': | ||||
|             query['assetTypes'] = 'medium_video_s3' | ||||
|         if 'switch=hds' in video_url: | ||||
|             query['switch'] = 'hls' | ||||
|  | ||||
|         info = self._search_json_ld(webpage, video_id, fatal=False) | ||||
|         info.update({ | ||||
|             '_type': 'url_transparent', | ||||
|             'url': smuggle_url( | ||||
|                 update_url_query(video_url, query), | ||||
|                 { | ||||
|                     'sig': { | ||||
|                         'key': 'crazyjava', | ||||
|                         'secret': 's3cr3t'}, | ||||
|                     'force_smil_url': True | ||||
|                 }), | ||||
|         }) | ||||
|         return info | ||||
|             return self.theplatform_url_result( | ||||
|                 release_url, video_id, { | ||||
|                     'mbr': 'true', | ||||
|                     'switch': 'hls' | ||||
|                 }) | ||||
|         else: | ||||
|             webpage = self._download_webpage(url, topic_id) | ||||
|             entries = [] | ||||
|             for episode_item in re.findall(r'<a.+?data-release-url="[^"]+"[^>]*>', webpage): | ||||
|                 video_attributes = extract_attributes(episode_item) | ||||
|                 entries.append(self.theplatform_url_result( | ||||
|                     video_attributes['data-release-url'], video_attributes['data-id'], { | ||||
|                         'mbr': 'true', | ||||
|                         'switch': 'hls' | ||||
|                     })) | ||||
|             return self.playlist_result(entries, topic_id, get_element_by_attribute('class', 'show-title', webpage)) | ||||
|   | ||||
| @@ -24,10 +24,10 @@ class AftonbladetIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # find internal video meta data | ||||
|         meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' | ||||
|         meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json' | ||||
|         player_config = self._parse_json(self._html_search_regex( | ||||
|             r'data-player-config="([^"]+)"', webpage, 'player config'), video_id) | ||||
|         internal_meta_id = player_config['videoId'] | ||||
|         internal_meta_id = player_config['aptomaVideoId'] | ||||
|         internal_meta_url = meta_url % internal_meta_id | ||||
|         internal_meta_json = self._download_json( | ||||
|             internal_meta_url, video_id, 'Downloading video meta data') | ||||
|   | ||||
| @@ -7,6 +7,8 @@ from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -16,7 +18,8 @@ class AppleTrailersIE(InfoExtractor): | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', | ||||
|         'info_dict': { | ||||
|             'id': 'manofsteel', | ||||
|             'id': '5111', | ||||
|             'title': 'Man of Steel', | ||||
|         }, | ||||
|         'playlist': [ | ||||
|             { | ||||
| @@ -70,6 +73,15 @@ class AppleTrailersIE(InfoExtractor): | ||||
|             'id': 'blackthorn', | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|         'expected_warnings': ['Unable to download JSON metadata'], | ||||
|     }, { | ||||
|         # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json | ||||
|         'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/', | ||||
|         'info_dict': { | ||||
|             'id': '15881', | ||||
|             'title': 'Kung Fu Panda 3', | ||||
|         }, | ||||
|         'playlist_mincount': 4, | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/ca/metropole/autrui/', | ||||
|         'only_matching': True, | ||||
| @@ -85,6 +97,45 @@ class AppleTrailersIE(InfoExtractor): | ||||
|         movie = mobj.group('movie') | ||||
|         uploader_id = mobj.group('company') | ||||
|  | ||||
|         webpage = self._download_webpage(url, movie) | ||||
|         film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id') | ||||
|         film_data = self._download_json( | ||||
|             'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id, | ||||
|             film_id, fatal=False) | ||||
|  | ||||
|         if film_data: | ||||
|             entries = [] | ||||
|             for clip in film_data.get('clips', []): | ||||
|                 clip_title = clip['title'] | ||||
|  | ||||
|                 formats = [] | ||||
|                 for version, version_data in clip.get('versions', {}).items(): | ||||
|                     for size, size_data in version_data.get('sizes', {}).items(): | ||||
|                         src = size_data.get('src') | ||||
|                         if not src: | ||||
|                             continue | ||||
|                         formats.append({ | ||||
|                             'format_id': '%s-%s' % (version, size), | ||||
|                             'url': re.sub(r'_(\d+p.mov)', r'_h\1', src), | ||||
|                             'width': int_or_none(size_data.get('width')), | ||||
|                             'height': int_or_none(size_data.get('height')), | ||||
|                             'language': version[:2], | ||||
|                         }) | ||||
|                 self._sort_formats(formats) | ||||
|  | ||||
|                 entries.append({ | ||||
|                     'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(), | ||||
|                     'formats': formats, | ||||
|                     'title': clip_title, | ||||
|                     'thumbnail': clip.get('screen') or clip.get('thumb'), | ||||
|                     'duration': parse_duration(clip.get('runtime') or clip.get('faded')), | ||||
|                     'upload_date': unified_strdate(clip.get('posted')), | ||||
|                     'uploader_id': uploader_id, | ||||
|                 }) | ||||
|  | ||||
|             page_data = film_data.get('page', {}) | ||||
|             return self.playlist_result(entries, film_id, page_data.get('movie_title')) | ||||
|  | ||||
|         playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc') | ||||
|  | ||||
|         def fix_html(s): | ||||
|   | ||||
| @@ -8,7 +8,6 @@ from .generic import GenericIE | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     get_element_by_attribute, | ||||
|     qualities, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
| @@ -274,41 +273,3 @@ class ARDIE(InfoExtractor): | ||||
|             'upload_date': upload_date, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class SportschauIE(ARDMediathekIE): | ||||
|     IE_NAME = 'Sportschau' | ||||
|     _VALID_URL = r'(?P<baseurl>https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P<id>[^/#?]+))\.html' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html', | ||||
|         'info_dict': { | ||||
|             'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         base_url = mobj.group('baseurl') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = get_element_by_attribute('class', 'headline', webpage) | ||||
|         description = self._html_search_meta('description', webpage, 'description') | ||||
|  | ||||
|         info = self._extract_media_info( | ||||
|             base_url + '-mc_defaultQuality-h.json', webpage, video_id) | ||||
|  | ||||
|         info.update({ | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|         }) | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -180,11 +180,14 @@ class ArteTVBaseIE(InfoExtractor): | ||||
|  | ||||
| class ArteTVPlus7IE(ArteTVBaseIE): | ||||
|     IE_NAME = 'arte.tv:+7' | ||||
|     _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)' | ||||
|     _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
| @@ -240,10 +243,10 @@ class ArteTVPlus7IE(ArteTVBaseIE): | ||||
|             return self._extract_from_json_url(json_url, video_id, lang, title=title) | ||||
|         # Different kind of embed URL (e.g. | ||||
|         # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) | ||||
|         embed_url = self._search_regex( | ||||
|             r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', | ||||
|             webpage, 'embed url', group='url') | ||||
|         return self.url_result(embed_url) | ||||
|         entries = [ | ||||
|             self.url_result(url) | ||||
|             for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)] | ||||
|         return self.playlist_result(entries) | ||||
|  | ||||
|  | ||||
| # It also uses the arte_vp_url url from the webpage to extract the information | ||||
| @@ -252,22 +255,17 @@ class ArteTVCreativeIE(ArteTVPlus7IE): | ||||
|     _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', | ||||
|         'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1', | ||||
|         'info_dict': { | ||||
|             'id': '72176', | ||||
|             'id': '057405-001-A', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Folge 2 - Corporate Design', | ||||
|             'upload_date': '20131004', | ||||
|             'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)', | ||||
|             'upload_date': '20150716', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion', | ||||
|         'info_dict': { | ||||
|             'id': '160676', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Monty Python live (mostly)', | ||||
|             'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n', | ||||
|             'upload_date': '20140805', | ||||
|         } | ||||
|         'playlist_count': 11, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }, { | ||||
|         'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', | ||||
|         'only_matching': True, | ||||
| @@ -349,14 +347,13 @@ class ArteTVCinemaIE(ArteTVPlus7IE): | ||||
|     _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://cinema.arte.tv/de/node/38291', | ||||
|         'md5': '6b275511a5107c60bacbeeda368c3aa1', | ||||
|         'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck', | ||||
|         'md5': 'a5b9dd5575a11d93daf0e3f404f45438', | ||||
|         'info_dict': { | ||||
|             'id': '055876-000_PWA12025-D', | ||||
|             'id': '062494-000-A', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tod auf dem Nil', | ||||
|             'upload_date': '20160122', | ||||
|             'description': 'md5:7f749bbb77d800ef2be11d54529b96bc', | ||||
|             'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck', | ||||
|             'upload_date': '20150807', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
| @@ -422,6 +419,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): | ||||
|         'info_dict': { | ||||
|             'id': 'PL-013263', | ||||
|             'title': 'Areva & Uramin', | ||||
|             'description': 'md5:a1dc0312ce357c262259139cfd48c9bf', | ||||
|         }, | ||||
|         'playlist_mincount': 6, | ||||
|     }, { | ||||
|   | ||||
| @@ -46,6 +46,7 @@ class AzubuIE(InfoExtractor): | ||||
|                 'uploader_id': 272749, | ||||
|                 'view_count': int, | ||||
|             }, | ||||
|             'skip': 'Channel offline', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
| @@ -56,22 +57,26 @@ class AzubuIE(InfoExtractor): | ||||
|             'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data'] | ||||
|  | ||||
|         title = data['title'].strip() | ||||
|         description = data['description'] | ||||
|         thumbnail = data['thumbnail'] | ||||
|         view_count = data['view_count'] | ||||
|         uploader = data['user']['username'] | ||||
|         uploader_id = data['user']['id'] | ||||
|         description = data.get('description') | ||||
|         thumbnail = data.get('thumbnail') | ||||
|         view_count = data.get('view_count') | ||||
|         user = data.get('user', {}) | ||||
|         uploader = user.get('username') | ||||
|         uploader_id = user.get('id') | ||||
|  | ||||
|         stream_params = json.loads(data['stream_params']) | ||||
|  | ||||
|         timestamp = float_or_none(stream_params['creationDate'], 1000) | ||||
|         duration = float_or_none(stream_params['length'], 1000) | ||||
|         timestamp = float_or_none(stream_params.get('creationDate'), 1000) | ||||
|         duration = float_or_none(stream_params.get('length'), 1000) | ||||
|  | ||||
|         renditions = stream_params.get('renditions') or [] | ||||
|         video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength') | ||||
|         if video: | ||||
|             renditions.append(video) | ||||
|  | ||||
|         if not renditions and not user.get('channel', {}).get('is_live', True): | ||||
|             raise ExtractorError('%s said: channel is offline.' % self.IE_NAME, expected=True) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': fmt['url'], | ||||
|             'width': fmt['frameWidth'], | ||||
|   | ||||
| @@ -31,7 +31,7 @@ class BBCCoUkIE(InfoExtractor): | ||||
|                             music/clips[/#]| | ||||
|                             radio/player/ | ||||
|                         ) | ||||
|                         (?P<id>%s) | ||||
|                         (?P<id>%s)(?!/(?:episodes|broadcasts|clips)) | ||||
|                     ''' % _ID_REGEX | ||||
|  | ||||
|     _MEDIASELECTOR_URLS = [ | ||||
| @@ -192,6 +192,7 @@ class BBCCoUkIE(InfoExtractor): | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Now it\'s really geo-restricted', | ||||
|         }, { | ||||
|             # compact player (https://github.com/rg3/youtube-dl/issues/8147) | ||||
|             'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', | ||||
| @@ -698,7 +699,9 @@ class BBCIE(BBCCoUkIE): | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if BBCCoUkIE.suitable(url) or BBCCoUkArticleIE.suitable(url) else super(BBCIE, cls).suitable(url) | ||||
|         EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE) | ||||
|         return (False if any(ie.suitable(url) for ie in EXCLUDE_IE) | ||||
|                 else super(BBCIE, cls).suitable(url)) | ||||
|  | ||||
|     def _extract_from_media_meta(self, media_meta, video_id): | ||||
|         # Direct links to media in media metadata (e.g. | ||||
| @@ -975,3 +978,72 @@ class BBCCoUkArticleIE(InfoExtractor): | ||||
|             r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)] | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
|  | ||||
|  | ||||
| class BBCCoUkPlaylistBaseIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key()) | ||||
|             for video_id in re.findall( | ||||
|                 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)] | ||||
|  | ||||
|         title, description = self._extract_title_and_description(webpage) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
|  | ||||
|  | ||||
| class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): | ||||
|     IE_NAME = 'bbc.co.uk:iplayer:playlist' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX | ||||
|     _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s' | ||||
|     _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v', | ||||
|         'info_dict': { | ||||
|             'id': 'b05rcz9v', | ||||
|             'title': 'The Disappearance', | ||||
|             'description': 'French thriller serial about a missing teenager.', | ||||
|         }, | ||||
|         'playlist_mincount': 6, | ||||
|     } | ||||
|  | ||||
|     def _extract_title_and_description(self, webpage): | ||||
|         title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False) | ||||
|         description = self._search_regex( | ||||
|             r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>', | ||||
|             webpage, 'description', fatal=False, group='value') | ||||
|         return title, description | ||||
|  | ||||
|  | ||||
| class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): | ||||
|     IE_NAME = 'bbc.co.uk:playlist' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX | ||||
|     _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s' | ||||
|     _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips', | ||||
|         'info_dict': { | ||||
|             'id': 'b05rcz9v', | ||||
|             'title': 'The Disappearance - Clips - BBC Four', | ||||
|             'description': 'French thriller serial about a missing teenager.', | ||||
|         }, | ||||
|         'playlist_mincount': 7, | ||||
|     }, { | ||||
|         'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _extract_title_and_description(self, webpage): | ||||
|         title = self._og_search_title(webpage, fatal=False) | ||||
|         description = self._og_search_description(webpage) | ||||
|         return title, description | ||||
|   | ||||
| @@ -1,31 +1,27 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
| from ..utils import unified_strdate | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
|  | ||||
|  | ||||
| class BetIE(InfoExtractor): | ||||
| class BetIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', | ||||
|             'info_dict': { | ||||
|                 'id': 'news/national/2014/a-conversation-with-president-obama', | ||||
|                 'id': '07e96bd3-8850-3051-b856-271b457f0ab8', | ||||
|                 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'A Conversation With President Obama', | ||||
|                 'description': 'md5:699d0652a350cf3e491cd15cc745b5da', | ||||
|                 'description': 'President Obama urges persistence in confronting racism and bias.', | ||||
|                 'duration': 1534, | ||||
|                 'timestamp': 1418075340, | ||||
|                 'upload_date': '20141208', | ||||
|                 'uploader': 'admin', | ||||
|                 'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|                 'subtitles': { | ||||
|                     'en': 'mincount:2', | ||||
|                 } | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
| @@ -35,16 +31,17 @@ class BetIE(InfoExtractor): | ||||
|         { | ||||
|             'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', | ||||
|             'info_dict': { | ||||
|                 'id': 'news/national/2014/justice-for-ferguson-a-community-reacts', | ||||
|                 'id': '9f516bf1-7543-39c4-8076-dd441b459ba9', | ||||
|                 'display_id': 'justice-for-ferguson-a-community-reacts', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Justice for Ferguson: A Community Reacts', | ||||
|                 'description': 'A BET News special.', | ||||
|                 'duration': 1696, | ||||
|                 'timestamp': 1416942360, | ||||
|                 'upload_date': '20141125', | ||||
|                 'uploader': 'admin', | ||||
|                 'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|                 'subtitles': { | ||||
|                     'en': 'mincount:2', | ||||
|                 } | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
| @@ -53,57 +50,32 @@ class BetIE(InfoExtractor): | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" | ||||
|  | ||||
|     def _get_feed_query(self, uri): | ||||
|         return compat_urllib_parse_urlencode({ | ||||
|             'uuid': uri, | ||||
|         }) | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         mgid = self._extract_mgid(webpage) | ||||
|         videos_info = self._get_videos_info(mgid) | ||||
|  | ||||
|         media_url = compat_urllib_parse_unquote(self._search_regex( | ||||
|             [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"], | ||||
|             webpage, 'media URL')) | ||||
|         info_dict = videos_info['entries'][0] | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'/video/(.*)/_jcr_content/', media_url, 'video id') | ||||
|         upload_date = unified_strdate(self._html_search_meta('date', webpage)) | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         mrss = self._download_xml(media_url, display_id) | ||||
|  | ||||
|         item = mrss.find('./channel/item') | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'dc': 'http://purl.org/dc/elements/1.1/', | ||||
|             'media': 'http://search.yahoo.com/mrss/', | ||||
|             'ka': 'http://kickapps.com/karss', | ||||
|         } | ||||
|  | ||||
|         title = xpath_text(item, './title', 'title') | ||||
|         description = xpath_text( | ||||
|             item, './description', 'description', fatal=False) | ||||
|  | ||||
|         timestamp = parse_iso8601(xpath_text( | ||||
|             item, xpath_with_ns('./dc:date', NS_MAP), | ||||
|             'upload date', fatal=False)) | ||||
|         uploader = xpath_text( | ||||
|             item, xpath_with_ns('./dc:creator', NS_MAP), | ||||
|             'uploader', fatal=False) | ||||
|  | ||||
|         media_content = item.find( | ||||
|             xpath_with_ns('./media:content', NS_MAP)) | ||||
|         duration = int_or_none(media_content.get('duration')) | ||||
|         smil_url = media_content.get('url') | ||||
|  | ||||
|         thumbnail = media_content.find( | ||||
|             xpath_with_ns('./media:thumbnail', NS_MAP)).get('url') | ||||
|  | ||||
|         formats = self._extract_smil_formats(smil_url, display_id) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|         info_dict.update({ | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'uploader': uploader, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|             'upload_date': upload_date, | ||||
|         }) | ||||
|  | ||||
|         return info_dict | ||||
|   | ||||
| @@ -29,7 +29,8 @@ class BRIE(InfoExtractor): | ||||
|                 'duration': 180, | ||||
|                 'uploader': 'Reinhard Weber', | ||||
|                 'upload_date': '20150422', | ||||
|             } | ||||
|             }, | ||||
|             'skip': '404 not found', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html', | ||||
| @@ -40,7 +41,8 @@ class BRIE(InfoExtractor): | ||||
|                 'title': 'Manfred Schreiber ist tot', | ||||
|                 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97', | ||||
|                 'duration': 26, | ||||
|             } | ||||
|             }, | ||||
|             'skip': '404 not found', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html', | ||||
| @@ -51,7 +53,8 @@ class BRIE(InfoExtractor): | ||||
|                 'title': 'Kurzweilig und sehr bewegend', | ||||
|                 'description': 'md5:0351996e3283d64adeb38ede91fac54e', | ||||
|                 'duration': 296, | ||||
|             } | ||||
|             }, | ||||
|             'skip': '404 not found', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html', | ||||
|   | ||||
							
								
								
									
										88
									
								
								youtube_dl/extractor/carambatv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								youtube_dl/extractor/carambatv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CarambaTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://video1.carambatv.ru/v/191910501', | ||||
|         'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a', | ||||
|         'info_dict': { | ||||
|             'id': '191910501', | ||||
|             'ext': 'mp4', | ||||
|             'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'duration': 2678.31, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'carambatv:191910501', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         video = self._download_json( | ||||
|             'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id, | ||||
|             video_id) | ||||
|  | ||||
|         title = video['title'] | ||||
|  | ||||
|         base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': base_url + f['fn'], | ||||
|             'height': int_or_none(f.get('height')), | ||||
|             'format_id': '%sp' % f['height'] if f.get('height') else None, | ||||
|         } for f in video['qualities'] if f.get('fn')] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = video.get('splash') | ||||
|         duration = float_or_none(try_get( | ||||
|             video, lambda x: x['annotations'][0]['end_time'], compat_str)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CarambaTVPageIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/', | ||||
|         'md5': '', | ||||
|         'info_dict': { | ||||
|             'id': '191910501', | ||||
|             'ext': 'mp4', | ||||
|             'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 2678.31, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._og_search_property('video:iframe', webpage, default=None) | ||||
|  | ||||
|         if not video_url: | ||||
|             video_id = self._search_regex( | ||||
|                 r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)', | ||||
|                 webpage, 'video id') | ||||
|             video_url = 'carambatv:%s' % video_id | ||||
|  | ||||
|         return self.url_result(video_url, CarambaTVIE.ie_key()) | ||||
| @@ -1,17 +1,13 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .theplatform import ThePlatformIE | ||||
| from .theplatform import ThePlatformFeedIE | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     xpath_element, | ||||
|     int_or_none, | ||||
|     find_xpath_attr, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CBSBaseIE(ThePlatformIE): | ||||
| class CBSBaseIE(ThePlatformFeedIE): | ||||
|     def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): | ||||
|         closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') | ||||
|         return { | ||||
| @@ -21,9 +17,22 @@ class CBSBaseIE(ThePlatformIE): | ||||
|             }] | ||||
|         } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] | ||||
|  | ||||
|     def _extract_video_info(self, filter_query, video_id): | ||||
|         return self._extract_feed_info( | ||||
|             'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: { | ||||
|                 'series': entry.get('cbs$SeriesTitle'), | ||||
|                 'season_number': int_or_none(entry.get('cbs$SeasonNumber')), | ||||
|                 'episode': entry.get('cbs$EpisodeTitle'), | ||||
|                 'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')), | ||||
|             }, { | ||||
|                 'StreamPack': { | ||||
|                     'manifest': 'm3u', | ||||
|                 } | ||||
|             }) | ||||
|  | ||||
|  | ||||
| class CBSIE(CBSBaseIE): | ||||
|     _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))' | ||||
|     _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', | ||||
| @@ -38,25 +47,7 @@ class CBSIE(CBSBaseIE): | ||||
|             'upload_date': '20131127', | ||||
|             'uploader': 'CBSI-NEW', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         '_skip': 'Blocked outside the US', | ||||
|     }, { | ||||
|         'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', | ||||
|         'info_dict': { | ||||
|             'id': 'WWF_5KqY3PK1', | ||||
|             'display_id': 'st-vincent', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Live on Letterman - St. Vincent', | ||||
|             'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', | ||||
|             'duration': 3221, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download m3u8 information'], | ||||
|         '_skip': 'Blocked outside the US', | ||||
|     }, { | ||||
|         'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', | ||||
| @@ -68,44 +59,5 @@ class CBSIE(CBSBaseIE): | ||||
|     TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         content_id, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         if not content_id: | ||||
|             webpage = self._download_webpage(url, display_id) | ||||
|             content_id = self._search_regex( | ||||
|                 [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], | ||||
|                 webpage, 'content id') | ||||
|         items_data = self._download_xml( | ||||
|             'http://can.cbs.com/thunder/player/videoPlayerService.php', | ||||
|             content_id, query={'partner': 'cbs', 'contentId': content_id}) | ||||
|         video_data = xpath_element(items_data, './/item') | ||||
|         title = xpath_text(video_data, 'videoTitle', 'title', True) | ||||
|  | ||||
|         subtitles = {} | ||||
|         formats = [] | ||||
|         for item in items_data.findall('.//item'): | ||||
|             pid = xpath_text(item, 'pid') | ||||
|             if not pid: | ||||
|                 continue | ||||
|             tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid | ||||
|             if '.m3u8' in xpath_text(item, 'contentUrl', default=''): | ||||
|                 tp_release_url += '&manifest=m3u' | ||||
|             tp_formats, tp_subtitles = self._extract_theplatform_smil( | ||||
|                 tp_release_url, content_id, 'Downloading %s SMIL data' % pid) | ||||
|             formats.extend(tp_formats) | ||||
|             subtitles = self._merge_subtitles(subtitles, tp_subtitles) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id) | ||||
|         info.update({ | ||||
|             'id': content_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'series': xpath_text(video_data, 'seriesTitle'), | ||||
|             'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), | ||||
|             'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), | ||||
|             'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), | ||||
|             'thumbnail': xpath_text(video_data, 'previewImageURL'), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         }) | ||||
|         return info | ||||
|         content_id = self._match_id(url) | ||||
|         return self._extract_video_info('byGuid=%s' % content_id, content_id) | ||||
|   | ||||
| @@ -30,9 +30,12 @@ class CBSNewsIE(CBSBaseIE): | ||||
|         { | ||||
|             'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', | ||||
|             'info_dict': { | ||||
|                 'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack', | ||||
|                 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', | ||||
|                 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', | ||||
|                 'upload_date': '19700101', | ||||
|                 'uploader': 'CBSI-NEW', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'duration': 205, | ||||
|                 'subtitles': { | ||||
| @@ -58,30 +61,8 @@ class CBSNewsIE(CBSBaseIE): | ||||
|             webpage, 'video JSON info'), video_id) | ||||
|  | ||||
|         item = video_info['item'] if 'item' in video_info else video_info | ||||
|         title = item.get('articleTitle') or item.get('hed') | ||||
|         duration = item.get('duration') | ||||
|         thumbnail = item.get('mediaImage') or item.get('thumbnail') | ||||
|  | ||||
|         subtitles = {} | ||||
|         formats = [] | ||||
|         for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']: | ||||
|             pid = item.get('media' + format_id) | ||||
|             if not pid: | ||||
|                 continue | ||||
|             release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid | ||||
|             tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid) | ||||
|             formats.extend(tp_formats) | ||||
|             subtitles = self._merge_subtitles(subtitles, tp_subtitles) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|         guid = item['mpxRefId'] | ||||
|         return self._extract_video_info('byGuid=%s' % guid, guid) | ||||
|  | ||||
|  | ||||
| class CBSNewsLiveVideoIE(InfoExtractor): | ||||
|   | ||||
| @@ -1,30 +1,28 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .cbs import CBSBaseIE | ||||
|  | ||||
|  | ||||
| class CBSSportsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' | ||||
| class CBSSportsIE(CBSBaseIE): | ||||
|     _VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast', | ||||
|         'info_dict': { | ||||
|             'id': '_d5_GbO8p1sT', | ||||
|             'ext': 'flv', | ||||
|             'title': 'US Open flashbacks: 1990s', | ||||
|             'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.', | ||||
|             'id': '708337219968', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ben Simmons the next LeBron? Not so fast', | ||||
|             'description': 'md5:854294f627921baba1f4b9a990d87197', | ||||
|             'timestamp': 1466293740, | ||||
|             'upload_date': '20160618', | ||||
|             'uploader': 'CBSI-NEW', | ||||
|         }, | ||||
|     } | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         section = mobj.group('section') | ||||
|         video_id = mobj.group('id') | ||||
|         all_videos = self._download_json( | ||||
|             'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section, | ||||
|             video_id) | ||||
|         # The json file contains the info of all the videos in the section | ||||
|         video_info = next(v for v in all_videos if v['pcid'] == video_id) | ||||
|         return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform') | ||||
|         video_id = self._match_id(url) | ||||
|         return self._extract_video_info('byId=%s' % video_id, video_id) | ||||
|   | ||||
| @@ -58,7 +58,8 @@ class CDAIE(InfoExtractor): | ||||
|         def extract_format(page, version): | ||||
|             unpacked = decode_packed_codes(page) | ||||
|             format_url = self._search_regex( | ||||
|                 r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False) | ||||
|                 r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked, | ||||
|                 '%s url' % version, fatal=False, group='url') | ||||
|             if not format_url: | ||||
|                 return | ||||
|             f = { | ||||
| @@ -75,7 +76,8 @@ class CDAIE(InfoExtractor): | ||||
|             info_dict['formats'].append(f) | ||||
|             if not info_dict['duration']: | ||||
|                 info_dict['duration'] = parse_duration(self._search_regex( | ||||
|                     r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False)) | ||||
|                     r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1", | ||||
|                     unpacked, 'duration', fatal=False, group='duration')) | ||||
|  | ||||
|         extract_format(webpage, 'default') | ||||
|  | ||||
|   | ||||
							
								
								
									
										92
									
								
								youtube_dl/extractor/closertotruth.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								youtube_dl/extractor/closertotruth.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,92 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CloserToTruthIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', | ||||
|         'info_dict': { | ||||
|             'id': '0_zof1ktre', | ||||
|             'display_id': 'solutions-the-mind-body-problem', | ||||
|             'ext': 'mov', | ||||
|             'title': 'Solutions to the Mind-Body Problem?', | ||||
|             'upload_date': '20140221', | ||||
|             'timestamp': 1392956007, | ||||
|             'uploader_id': 'CTTXML' | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://closertotruth.com/episodes/how-do-brains-work', | ||||
|         'info_dict': { | ||||
|             'id': '0_iuxai6g6', | ||||
|             'display_id': 'how-do-brains-work', | ||||
|             'ext': 'mov', | ||||
|             'title': 'How do Brains Work?', | ||||
|             'upload_date': '20140221', | ||||
|             'timestamp': 1392956024, | ||||
|             'uploader_id': 'CTTXML' | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://closertotruth.com/interviews/1725', | ||||
|         'info_dict': { | ||||
|             'id': '1725', | ||||
|             'title': 'AyaFr-002', | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         partner_id = self._search_regex( | ||||
|             r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)', | ||||
|             webpage, 'kaltura partner_id') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title') | ||||
|  | ||||
|         select = self._search_regex( | ||||
|             r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>', | ||||
|             webpage, 'select version', default=None) | ||||
|         if select: | ||||
|             entry_ids = set() | ||||
|             entries = [] | ||||
|             for mobj in re.finditer( | ||||
|                     r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)', | ||||
|                     webpage): | ||||
|                 entry_id = mobj.group('id') | ||||
|                 if entry_id in entry_ids: | ||||
|                     continue | ||||
|                 entry_ids.add(entry_id) | ||||
|                 entries.append({ | ||||
|                     '_type': 'url_transparent', | ||||
|                     'url': 'kaltura:%s:%s' % (partner_id, entry_id), | ||||
|                     'ie_key': 'Kaltura', | ||||
|                     'title': mobj.group('title'), | ||||
|                 }) | ||||
|             if entries: | ||||
|                 return self.playlist_result(entries, display_id, title) | ||||
|  | ||||
|         entry_id = self._search_regex( | ||||
|             r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2', | ||||
|             webpage, 'kaltura entry_id', group='id') | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'display_id': display_id, | ||||
|             'url': 'kaltura:%s:%s' % (partner_id, entry_id), | ||||
|             'ie_key': 'Kaltura', | ||||
|             'title': title | ||||
|         } | ||||
| @@ -53,6 +53,7 @@ from ..utils import ( | ||||
|     mimetype2ext, | ||||
|     update_Request, | ||||
|     update_url_query, | ||||
|     parse_m3u8_attributes, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -748,10 +749,12 @@ class InfoExtractor(object): | ||||
|         return self._og_search_property('url', html, **kargs) | ||||
|  | ||||
|     def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): | ||||
|         if not isinstance(name, (list, tuple)): | ||||
|             name = [name] | ||||
|         if display_name is None: | ||||
|             display_name = name | ||||
|             display_name = name[0] | ||||
|         return self._html_search_regex( | ||||
|             self._meta_regex(name), | ||||
|             [self._meta_regex(n) for n in name], | ||||
|             html, display_name, fatal=fatal, group='content', **kwargs) | ||||
|  | ||||
|     def _dc_search_uploader(self, html): | ||||
| @@ -875,7 +878,11 @@ class InfoExtractor(object): | ||||
|                 f['ext'] = determine_ext(f['url']) | ||||
|  | ||||
|             if isinstance(field_preference, (list, tuple)): | ||||
|                 return tuple(f.get(field) if f.get(field) is not None else -1 for field in field_preference) | ||||
|                 return tuple( | ||||
|                     f.get(field) | ||||
|                     if f.get(field) is not None | ||||
|                     else ('' if field == 'format_id' else -1) | ||||
|                     for field in field_preference) | ||||
|  | ||||
|             preference = f.get('preference') | ||||
|             if preference is None: | ||||
| @@ -1150,23 +1157,11 @@ class InfoExtractor(object): | ||||
|             }] | ||||
|         last_info = None | ||||
|         last_media = None | ||||
|         kv_rex = re.compile( | ||||
|             r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)') | ||||
|         for line in m3u8_doc.splitlines(): | ||||
|             if line.startswith('#EXT-X-STREAM-INF:'): | ||||
|                 last_info = {} | ||||
|                 for m in kv_rex.finditer(line): | ||||
|                     v = m.group('val') | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_info[m.group('key')] = v | ||||
|                 last_info = parse_m3u8_attributes(line) | ||||
|             elif line.startswith('#EXT-X-MEDIA:'): | ||||
|                 last_media = {} | ||||
|                 for m in kv_rex.finditer(line): | ||||
|                     v = m.group('val') | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_media[m.group('key')] = v | ||||
|                 last_media = parse_m3u8_attributes(line) | ||||
|             elif line.startswith('#') or not line.strip(): | ||||
|                 continue | ||||
|             else: | ||||
|   | ||||
							
								
								
									
										30
									
								
								youtube_dl/extractor/ctv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								youtube_dl/extractor/ctv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ctv\.ca/video/player\?vid=(?P<id>[0-9.]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ctv.ca/video/player?vid=706966', | ||||
|         'md5': 'ff2ebbeae0aa2dcc32a830c3fd69b7b0', | ||||
|         'info_dict': { | ||||
|             'id': '706966', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Larry Day and Richard Jutras on the TIFF red carpet of \'Stonewall\'', | ||||
|             'description': 'etalk catches up with Larry Day and Richard Jutras on the TIFF red carpet of "Stonewall”.', | ||||
|             'upload_date': '20150919', | ||||
|             'timestamp': 1442624700, | ||||
|         }, | ||||
|         'expected_warnings': ['HTTP Error 404'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': '9c9media:ctv_web:%s' % video_id, | ||||
|             'ie_key': 'NineCNineMedia', | ||||
|         } | ||||
							
								
								
									
										65
									
								
								youtube_dl/extractor/ctvnews.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								youtube_dl/extractor/ctvnews.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import orderedSet | ||||
|  | ||||
|  | ||||
| class CTVNewsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ctvnews\.ca/(?:video\?(?:clip|playlist|bin)Id=|.*?)(?P<id>[0-9.]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.ctvnews.ca/video?clipId=901995', | ||||
|         'md5': '10deb320dc0ccb8d01d34d12fc2ea672', | ||||
|         'info_dict': { | ||||
|             'id': '901995', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Extended: \'That person cannot be me\' Johnson says', | ||||
|             'description': 'md5:958dd3b4f5bbbf0ed4d045c790d89285', | ||||
|             'timestamp': 1467286284, | ||||
|             'upload_date': '20160630', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.ctvnews.ca/video?playlistId=1.2966224', | ||||
|         'info_dict': | ||||
|         { | ||||
|             'id': '1.2966224', | ||||
|         }, | ||||
|         'playlist_mincount': 19, | ||||
|     }, { | ||||
|         'url': 'http://www.ctvnews.ca/video?binId=1.2876780', | ||||
|         'info_dict': | ||||
|         { | ||||
|             'id': '1.2876780', | ||||
|         }, | ||||
|         'playlist_mincount': 100, | ||||
|     }, { | ||||
|         'url': 'http://www.ctvnews.ca/1.810401', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.ctvnews.ca/canadiens-send-p-k-subban-to-nashville-in-blockbuster-trade-1.2967231', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         page_id = self._match_id(url) | ||||
|  | ||||
|         def ninecninemedia_url_result(clip_id): | ||||
|             return { | ||||
|                 '_type': 'url_transparent', | ||||
|                 'id': clip_id, | ||||
|                 'url': '9c9media:ctvnews_web:%s' % clip_id, | ||||
|                 'ie_key': 'NineCNineMedia', | ||||
|             } | ||||
|  | ||||
|         if page_id.isdigit(): | ||||
|             return ninecninemedia_url_result(page_id) | ||||
|         else: | ||||
|             webpage = self._download_webpage('http://www.ctvnews.ca/%s' % page_id, page_id, query={ | ||||
|                 'ot': 'example.AjaxPageLayout.ot', | ||||
|                 'maxItemsPerPage': 1000000, | ||||
|             }) | ||||
|             entries = [ninecninemedia_url_result(clip_id) for clip_id in orderedSet( | ||||
|                 re.findall(r'clip\.id\s*=\s*(\d+);', webpage))] | ||||
|             return self.playlist_result(entries, page_id) | ||||
| @@ -20,7 +20,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class DCNIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?show/(?P<show_id>\d+)/[^/]+(?:/(?P<video_id>\d+)/(?P<season_id>\d+))?' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         show_id, video_id, season_id = re.match(self._VALID_URL, url).groups() | ||||
| @@ -55,30 +55,32 @@ class DCNBaseIE(InfoExtractor): | ||||
|             'is_live': is_live, | ||||
|         } | ||||
|  | ||||
|     def _extract_video_formats(self, webpage, video_id, entry_protocol): | ||||
|     def _extract_video_formats(self, webpage, video_id, m3u8_entry_protocol): | ||||
|         formats = [] | ||||
|         m3u8_url = self._html_search_regex( | ||||
|             r'file\s*:\s*"([^"]+)', webpage, 'm3u8 url', fatal=False) | ||||
|         if m3u8_url: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 m3u8_url, video_id, 'mp4', entry_protocol, m3u8_id='hls', fatal=None)) | ||||
|  | ||||
|         rtsp_url = self._search_regex( | ||||
|             r'<a[^>]+href="(rtsp://[^"]+)"', webpage, 'rtsp url', fatal=False) | ||||
|         if rtsp_url: | ||||
|             formats.append({ | ||||
|                 'url': rtsp_url, | ||||
|                 'format_id': 'rtsp', | ||||
|             }) | ||||
|  | ||||
|         format_url_base = 'http' + self._html_search_regex( | ||||
|             [ | ||||
|                 r'file\s*:\s*"https?(://[^"]+)/playlist.m3u8', | ||||
|                 r'<a[^>]+href="rtsp(://[^"]+)"' | ||||
|             ], webpage, 'format url') | ||||
|         # TODO: Current DASH formats are broken - $Time$ pattern in | ||||
|         # <SegmentTemplate> not implemented yet | ||||
|         # formats.extend(self._extract_mpd_formats( | ||||
|         #     format_url_base + '/manifest.mpd', | ||||
|         #     video_id, mpd_id='dash', fatal=False)) | ||||
|         formats.extend(self._extract_m3u8_formats( | ||||
|             format_url_base + '/playlist.m3u8', video_id, 'mp4', | ||||
|             m3u8_entry_protocol, m3u8_id='hls', fatal=False)) | ||||
|         formats.extend(self._extract_f4m_formats( | ||||
|             format_url_base + '/manifest.f4m', | ||||
|             video_id, f4m_id='hds', fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|         return formats | ||||
|  | ||||
|  | ||||
| class DCNVideoIE(DCNBaseIE): | ||||
|     IE_NAME = 'dcn:video' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?(?:video/[^/]+|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?(?:video(?:/[^/]+)?|media|catchup/[^/]+/[^/]+)/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.dcndigital.ae/#/video/%D8%B1%D8%AD%D9%84%D8%A9-%D8%A7%D9%84%D8%B9%D9%85%D8%B1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/17375', | ||||
|         'info_dict': | ||||
|         { | ||||
| @@ -94,7 +96,10 @@ class DCNVideoIE(DCNBaseIE): | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://awaan.ae/video/26723981/%D8%AF%D8%A7%D8%B1-%D8%A7%D9%84%D8%B3%D9%84%D8%A7%D9%85:-%D8%AE%D9%8A%D8%B1-%D8%AF%D9%88%D8%B1-%D8%A7%D9%84%D8%A3%D9%86%D8%B5%D8%A7%D8%B1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
| @@ -120,7 +125,7 @@ class DCNVideoIE(DCNBaseIE): | ||||
|  | ||||
| class DCNLiveIE(DCNBaseIE): | ||||
|     IE_NAME = 'dcn:live' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?live/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?live/(?P<id>\d+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         channel_id = self._match_id(url) | ||||
| @@ -147,7 +152,7 @@ class DCNLiveIE(DCNBaseIE): | ||||
|  | ||||
| class DCNSeasonIE(InfoExtractor): | ||||
|     IE_NAME = 'dcn:season' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dcndigital\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:awaan|dcndigital)\.ae/(?:#/)?program/(?:(?P<show_id>\d+)|season/(?P<season_id>\d+))' | ||||
|     _TEST = { | ||||
|         'url': 'http://dcndigital.ae/#/program/205024/%D9%85%D8%AD%D8%A7%D8%B6%D8%B1%D8%A7%D8%AA-%D8%A7%D9%84%D8%B4%D9%8A%D8%AE-%D8%A7%D9%84%D8%B4%D8%B9%D8%B1%D8%A7%D9%88%D9%8A', | ||||
|         'info_dict': | ||||
|   | ||||
| @@ -50,6 +50,14 @@ class EaglePlatformIE(InfoExtractor): | ||||
|         'skip': 'Georestricted', | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//.+?\.media\.eagleplatform\.com/index/player\?.+?)\1', | ||||
|             webpage) | ||||
|         if mobj is not None: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|     @staticmethod | ||||
|     def _handle_error(response): | ||||
|         status = int_or_none(response.get('status', 200)) | ||||
|   | ||||
| @@ -20,7 +20,10 @@ from .adobetv import ( | ||||
|     AdobeTVVideoIE, | ||||
| ) | ||||
| from .adultswim import AdultSwimIE | ||||
| from .aenetworks import AENetworksIE | ||||
| from .aenetworks import ( | ||||
|     AENetworksIE, | ||||
|     HistoryTopicIE, | ||||
| ) | ||||
| from .afreecatv import AfreecaTVIE | ||||
| from .aftonbladet import AftonbladetIE | ||||
| from .airmozilla import AirMozillaIE | ||||
| @@ -44,7 +47,6 @@ from .archiveorg import ArchiveOrgIE | ||||
| from .ard import ( | ||||
|     ARDIE, | ||||
|     ARDMediathekIE, | ||||
|     SportschauIE, | ||||
| ) | ||||
| from .arte import ( | ||||
|     ArteTvIE, | ||||
| @@ -71,6 +73,8 @@ from .bandcamp import BandcampIE, BandcampAlbumIE | ||||
| from .bbc import ( | ||||
|     BBCCoUkIE, | ||||
|     BBCCoUkArticleIE, | ||||
|     BBCCoUkIPlayerPlaylistIE, | ||||
|     BBCCoUkPlaylistIE, | ||||
|     BBCIE, | ||||
| ) | ||||
| from .beeg import BeegIE | ||||
| @@ -108,6 +112,10 @@ from .camwithher import CamWithHerIE | ||||
| from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .canvas import CanvasIE | ||||
| from .carambatv import ( | ||||
|     CarambaTVIE, | ||||
|     CarambaTVPageIE, | ||||
| ) | ||||
| from .cbc import ( | ||||
|     CBCIE, | ||||
|     CBCPlayerIE, | ||||
| @@ -135,6 +143,7 @@ from .cliprs import ClipRsIE | ||||
| from .clipfish import ClipfishIE | ||||
| from .cliphunter import CliphunterIE | ||||
| from .clipsyndicate import ClipsyndicateIE | ||||
| from .closertotruth import CloserToTruthIE | ||||
| from .cloudy import CloudyIE | ||||
| from .clubic import ClubicIE | ||||
| from .clyp import ClypIE | ||||
| @@ -162,6 +171,8 @@ from .crunchyroll import ( | ||||
| ) | ||||
| from .cspan import CSpanIE | ||||
| from .ctsnews import CtsNewsIE | ||||
| from .ctv import CTVIE | ||||
| from .ctvnews import CTVNewsIE | ||||
| from .cultureunplugged import CultureUnpluggedIE | ||||
| from .cwtv import CWTVIE | ||||
| from .dailymail import DailyMailIE | ||||
| @@ -270,6 +281,7 @@ from .freespeech import FreespeechIE | ||||
| from .freevideo import FreeVideoIE | ||||
| from .funimation import FunimationIE | ||||
| from .funnyordie import FunnyOrDieIE | ||||
| from .fusion import FusionIE | ||||
| from .gameinformer import GameInformerIE | ||||
| from .gamekings import GamekingsIE | ||||
| from .gameone import ( | ||||
| @@ -279,7 +291,6 @@ from .gameone import ( | ||||
| from .gamersyde import GamersydeIE | ||||
| from .gamespot import GameSpotIE | ||||
| from .gamestar import GameStarIE | ||||
| from .gametrailers import GametrailersIE | ||||
| from .gazeta import GazetaIE | ||||
| from .gdcvault import GDCVaultIE | ||||
| from .generic import GenericIE | ||||
| @@ -417,6 +428,7 @@ from .makerschannel import MakersChannelIE | ||||
| from .makertv import MakerTVIE | ||||
| from .matchtv import MatchTVIE | ||||
| from .mdr import MDRIE | ||||
| from .meta import METAIE | ||||
| from .metacafe import MetacafeIE | ||||
| from .metacritic import MetacriticIE | ||||
| from .mgoon import MgoonIE | ||||
| @@ -449,6 +461,7 @@ from .motherless import MotherlessIE | ||||
| from .motorsport import MotorsportIE | ||||
| from .movieclips import MovieClipsIE | ||||
| from .moviezine import MoviezineIE | ||||
| from .msn import MSNIE | ||||
| from .mtv import ( | ||||
|     MTVIE, | ||||
|     MTVServicesEmbeddedIE, | ||||
| @@ -475,7 +488,6 @@ from .nbc import ( | ||||
|     NBCNewsIE, | ||||
|     NBCSportsIE, | ||||
|     NBCSportsVPlayerIE, | ||||
|     MSNBCIE, | ||||
| ) | ||||
| from .ndr import ( | ||||
|     NDRIE, | ||||
| @@ -512,8 +524,12 @@ from .nhl import ( | ||||
|     NHLVideocenterCategoryIE, | ||||
|     NHLIE, | ||||
| ) | ||||
| from .nick import NickIE | ||||
| from .nick import ( | ||||
|     NickIE, | ||||
|     NickDeIE, | ||||
| ) | ||||
| from .niconico import NiconicoIE, NiconicoPlaylistIE | ||||
| from .ninecninemedia import NineCNineMediaIE | ||||
| from .ninegag import NineGagIE | ||||
| from .noco import NocoIE | ||||
| from .normalboots import NormalbootsIE | ||||
| @@ -599,6 +615,7 @@ from .pluralsight import ( | ||||
|     PluralsightCourseIE, | ||||
| ) | ||||
| from .podomatic import PodomaticIE | ||||
| from .polskieradio import PolskieRadioIE | ||||
| from .porn91 import Porn91IE | ||||
| from .pornhd import PornHdIE | ||||
| from .pornhub import ( | ||||
| @@ -622,7 +639,10 @@ from .qqmusic import ( | ||||
|     QQMusicToplistIE, | ||||
|     QQMusicPlaylistIE, | ||||
| ) | ||||
| from .r7 import R7IE | ||||
| from .r7 import ( | ||||
|     R7IE, | ||||
|     R7ArticleIE, | ||||
| ) | ||||
| from .radiocanada import ( | ||||
|     RadioCanadaIE, | ||||
|     RadioCanadaAudioVideoIE, | ||||
| @@ -694,10 +714,12 @@ from .shahid import ShahidIE | ||||
| from .shared import SharedIE | ||||
| from .sharesix import ShareSixIE | ||||
| from .sina import SinaIE | ||||
| from .sixplay import SixPlayIE | ||||
| from .skynewsarabia import ( | ||||
|     SkyNewsArabiaIE, | ||||
|     SkyNewsArabiaArticleIE, | ||||
| ) | ||||
| from .skysports import SkySportsIE | ||||
| from .slideshare import SlideshareIE | ||||
| from .slutload import SlutloadIE | ||||
| from .smotri import ( | ||||
| @@ -738,6 +760,7 @@ from .sportbox import ( | ||||
|     SportBoxEmbedIE, | ||||
| ) | ||||
| from .sportdeutschland import SportDeutschlandIE | ||||
| from .sportschau import SportschauIE | ||||
| from .srgssr import ( | ||||
|     SRGSSRIE, | ||||
|     SRGSSRPlayIE, | ||||
| @@ -878,6 +901,7 @@ from .udn import UDNEmbedIE | ||||
| from .digiteka import DigitekaIE | ||||
| from .unistra import UnistraIE | ||||
| from .urort import UrortIE | ||||
| from .urplay import URPlayIE | ||||
| from .usatoday import USATodayIE | ||||
| from .ustream import UstreamIE, UstreamChannelIE | ||||
| from .ustudio import ( | ||||
| @@ -904,6 +928,7 @@ from .vice import ( | ||||
|     ViceIE, | ||||
|     ViceShowIE, | ||||
| ) | ||||
| from .vidbit import VidbitIE | ||||
| from .viddler import ViddlerIE | ||||
| from .videodetective import VideoDetectiveIE | ||||
| from .videofyme import VideofyMeIE | ||||
|   | ||||
| @@ -239,6 +239,8 @@ class FacebookIE(InfoExtractor): | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, f in video_data.items(): | ||||
|             if f and isinstance(f, dict): | ||||
|                 f = [f] | ||||
|             if not f or not isinstance(f, list): | ||||
|                 continue | ||||
|             for quality in ('sd', 'hd'): | ||||
|   | ||||
| @@ -1,7 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import smuggle_url | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FoxSportsIE(InfoExtractor): | ||||
| @@ -9,11 +12,15 @@ class FoxSportsIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.foxsports.com/video?vid=432609859715', | ||||
|         'md5': 'b49050e955bebe32c301972e4012ac17', | ||||
|         'info_dict': { | ||||
|             'id': 'gA0bHB3Ladz3', | ||||
|             'ext': 'flv', | ||||
|             'id': 'i0qKWsk3qJaM', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', | ||||
|             'description': 'Courtney Lee talks about Memphis being focused.', | ||||
|             'upload_date': '20150423', | ||||
|             'timestamp': 1429761109, | ||||
|             'uploader': 'NEWA-FNG-FOXSPORTS', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     } | ||||
| @@ -28,5 +35,8 @@ class FoxSportsIE(InfoExtractor): | ||||
|                 r"data-player-config='([^']+)'", webpage, 'data player config'), | ||||
|             video_id) | ||||
|  | ||||
|         return self.url_result(smuggle_url( | ||||
|             config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True})) | ||||
|         return self.url_result(smuggle_url(update_url_query( | ||||
|             config['releaseURL'], { | ||||
|                 'mbr': 'true', | ||||
|                 'switch': 'http', | ||||
|             }), {'force_smil_url': True})) | ||||
|   | ||||
							
								
								
									
										35
									
								
								youtube_dl/extractor/fusion.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								youtube_dl/extractor/fusion.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .ooyala import OoyalaIE | ||||
|  | ||||
|  | ||||
| class FusionIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?fusion\.net/video/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://fusion.net/video/201781/u-s-and-panamanian-forces-work-together-to-stop-a-vessel-smuggling-drugs/', | ||||
|         'info_dict': { | ||||
|             'id': 'ZpcWNoMTE6x6uVIIWYpHh0qQDjxBuq5P', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'U.S. and Panamanian forces work together to stop a vessel smuggling drugs', | ||||
|             'description': 'md5:0cc84a9943c064c0f46b128b41b1b0d7', | ||||
|             'duration': 140.0, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     }, { | ||||
|         'url': 'http://fusion.net/video/201781', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         ooyala_code = self._search_regex( | ||||
|             r'data-video-id=(["\'])(?P<code>.+?)\1', | ||||
|             webpage, 'ooyala code', group='code') | ||||
|  | ||||
|         return OoyalaIE._build_url_result(ooyala_code) | ||||
| @@ -1,19 +1,19 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .once import OnceIE | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_unquote, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     unescapeHTML, | ||||
|     url_basename, | ||||
|     dict_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GameSpotIE(InfoExtractor): | ||||
| class GameSpotIE(OnceIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', | ||||
| @@ -39,29 +39,73 @@ class GameSpotIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|         data_video_json = self._search_regex( | ||||
|             r'data-video=["\'](.*?)["\']', webpage, 'data video') | ||||
|         data_video = json.loads(unescapeHTML(data_video_json)) | ||||
|         data_video = self._parse_json(unescapeHTML(data_video_json), page_id) | ||||
|         streams = data_video['videoStreams'] | ||||
|  | ||||
|         manifest_url = None | ||||
|         formats = [] | ||||
|         f4m_url = streams.get('f4m_stream') | ||||
|         if f4m_url is not None: | ||||
|             # Transform the manifest url to a link to the mp4 files | ||||
|             # they are used in mobile devices. | ||||
|             f4m_path = compat_urlparse.urlparse(f4m_url).path | ||||
|             QUALITIES_RE = r'((,\d+)+,?)' | ||||
|             qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',') | ||||
|             http_path = f4m_path[1:].split('/', 1)[1] | ||||
|             http_template = re.sub(QUALITIES_RE, r'%s', http_path) | ||||
|             http_template = http_template.replace('.csmil/manifest.f4m', '') | ||||
|             http_template = compat_urlparse.urljoin( | ||||
|                 'http://video.gamespotcdn.com/', http_template) | ||||
|             for q in qualities: | ||||
|                 formats.append({ | ||||
|                     'url': http_template % q, | ||||
|                     'ext': 'mp4', | ||||
|                     'format_id': q, | ||||
|                 }) | ||||
|         else: | ||||
|         if f4m_url: | ||||
|             manifest_url = f4m_url | ||||
|             formats.extend(self._extract_f4m_formats( | ||||
|                 f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False)) | ||||
|         m3u8_url = streams.get('m3u8_stream') | ||||
|         if m3u8_url: | ||||
|             manifest_url = m3u8_url | ||||
|             m3u8_formats = self._extract_m3u8_formats( | ||||
|                 m3u8_url, page_id, 'mp4', 'm3u8_native', | ||||
|                 m3u8_id='hls', fatal=False) | ||||
|             formats.extend(m3u8_formats) | ||||
|         progressive_url = dict_get( | ||||
|             streams, ('progressive_hd', 'progressive_high', 'progressive_low')) | ||||
|         if progressive_url and manifest_url: | ||||
|             qualities_basename = self._search_regex( | ||||
|                 '/([^/]+)\.csmil/', | ||||
|                 manifest_url, 'qualities basename', default=None) | ||||
|             if qualities_basename: | ||||
|                 QUALITIES_RE = r'((,\d+)+,?)' | ||||
|                 qualities = self._search_regex( | ||||
|                     QUALITIES_RE, qualities_basename, | ||||
|                     'qualities', default=None) | ||||
|                 if qualities: | ||||
|                     qualities = list(map(lambda q: int(q), qualities.strip(',').split(','))) | ||||
|                     qualities.sort() | ||||
|                     http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename) | ||||
|                     http_url_basename = url_basename(progressive_url) | ||||
|                     if m3u8_formats: | ||||
|                         self._sort_formats(m3u8_formats) | ||||
|                         m3u8_formats = list(filter( | ||||
|                             lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', | ||||
|                             m3u8_formats)) | ||||
|                     if len(qualities) == len(m3u8_formats): | ||||
|                         for q, m3u8_format in zip(qualities, m3u8_formats): | ||||
|                             f = m3u8_format.copy() | ||||
|                             f.update({ | ||||
|                                 'url': progressive_url.replace( | ||||
|                                     http_url_basename, http_template % q), | ||||
|                                 'format_id': f['format_id'].replace('hls', 'http'), | ||||
|                                 'protocol': 'http', | ||||
|                             }) | ||||
|                             formats.append(f) | ||||
|                     else: | ||||
|                         for q in qualities: | ||||
|                             formats.append({ | ||||
|                                 'url': progressive_url.replace( | ||||
|                                     http_url_basename, http_template % q), | ||||
|                                 'ext': 'mp4', | ||||
|                                 'format_id': 'http-%d' % q, | ||||
|                                 'tbr': q, | ||||
|                             }) | ||||
|  | ||||
|         onceux_json = self._search_regex( | ||||
|             r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None) | ||||
|         if onceux_json: | ||||
|             onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') | ||||
|             if onceux_url: | ||||
|                 formats.extend(self._extract_once_formats(re.sub( | ||||
|                     r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', ''))) | ||||
|  | ||||
|         if not formats: | ||||
|             for quality in ['sd', 'hd']: | ||||
|                 # It's actually a link to a flv file | ||||
|                 flv_url = streams.get('f4m_{0}'.format(quality)) | ||||
| @@ -71,6 +115,7 @@ class GameSpotIE(InfoExtractor): | ||||
|                         'ext': 'flv', | ||||
|                         'format_id': quality, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': data_video['guid'], | ||||
|   | ||||
| @@ -1,62 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_age_limit, | ||||
|     url_basename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GametrailersIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review', | ||||
|         'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a', | ||||
|         'info_dict': { | ||||
|             'id': '2983958', | ||||
|             'ext': 'mp4', | ||||
|             'display_id': '116437-Just-Cause-3-Review', | ||||
|             'title': 'Just Cause 3 - Review', | ||||
|             'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>(.+?)\|', webpage, 'title').strip() | ||||
|         embed_url = self._proto_relative_url( | ||||
|             self._search_regex( | ||||
|                 r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage, | ||||
|                 'embed url'), | ||||
|             scheme='http:') | ||||
|         video_id = url_basename(embed_url) | ||||
|         embed_page = self._download_webpage(embed_url, video_id) | ||||
|         embed_vars_json = self._search_regex( | ||||
|             r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page, | ||||
|             'embed vars') | ||||
|         info = self._parse_json(embed_vars_json, video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for media in info['media']: | ||||
|             if media['mediaPurpose'] == 'play': | ||||
|                 formats.append({ | ||||
|                     'url': media['uri'], | ||||
|                     'height': media['height'], | ||||
|                     'width:': media['width'], | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': info.get('thumbUri'), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'duration': int_or_none(info.get('videoLengthInSeconds')), | ||||
|             'age_limit': parse_age_limit(info.get('audienceRating')), | ||||
|         } | ||||
| @@ -64,6 +64,8 @@ from .liveleak import LiveLeakIE | ||||
| from .threeqsdn import ThreeQSDNIE | ||||
| from .theplatform import ThePlatformIE | ||||
| from .vessel import VesselIE | ||||
| from .kaltura import KalturaIE | ||||
| from .eagleplatform import EaglePlatformIE | ||||
|  | ||||
|  | ||||
| class GenericIE(InfoExtractor): | ||||
| @@ -920,6 +922,24 @@ class GenericIE(InfoExtractor): | ||||
|             }, | ||||
|             'add_ie': ['Kaltura'], | ||||
|         }, | ||||
|         { | ||||
|             # Kaltura embedded via quoted entry_id | ||||
|             'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures', | ||||
|             'info_dict': { | ||||
|                 'id': '0_utuok90b', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '06_matthew_brender_raj_dutt', | ||||
|                 'timestamp': 1466638791, | ||||
|                 'upload_date': '20160622', | ||||
|             }, | ||||
|             'add_ie': ['Kaltura'], | ||||
|             'expected_warnings': [ | ||||
|                 'Could not send HEAD request' | ||||
|             ], | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         }, | ||||
|         # Eagle.Platform embed (generic URL) | ||||
|         { | ||||
|             'url': 'http://lenta.ru/news/2015/03/06/navalny/', | ||||
| @@ -1091,12 +1111,17 @@ class GenericIE(InfoExtractor): | ||||
|         # Dailymotion Cloud video | ||||
|         { | ||||
|             'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910', | ||||
|             'md5': '49444254273501a64675a7e68c502681', | ||||
|             'md5': 'dcaf23ad0c67a256f4278bce6e0bae38', | ||||
|             'info_dict': { | ||||
|                 'id': '5585de919473990de4bee11b', | ||||
|                 'id': 'x2uy8t3', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Le débat', | ||||
|                 'title': 'Sauvons les abeilles ! - Le débat', | ||||
|                 'description': 'md5:d9082128b1c5277987825d684939ca26', | ||||
|                 'thumbnail': 're:^https?://.*\.jpe?g$', | ||||
|                 'timestamp': 1434970506, | ||||
|                 'upload_date': '20150622', | ||||
|                 'uploader': 'Public Sénat', | ||||
|                 'uploader_id': 'xa9gza', | ||||
|             } | ||||
|         }, | ||||
|         # OnionStudios embed | ||||
| @@ -1220,6 +1245,22 @@ class GenericIE(InfoExtractor): | ||||
|                 'uploader': 'www.hudl.com', | ||||
|             }, | ||||
|         }, | ||||
|         # twitter:player embed | ||||
|         { | ||||
|             'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/', | ||||
|             'md5': 'a3e0df96369831de324f0778e126653c', | ||||
|             'info_dict': { | ||||
|                 'id': '4909620399001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'What Do Black Holes Sound Like?', | ||||
|                 'description': 'what do black holes sound like', | ||||
|                 'upload_date': '20160524', | ||||
|                 'uploader_id': '29913724001', | ||||
|                 'timestamp': 1464107587, | ||||
|                 'uploader': 'TheAtlantic', | ||||
|             }, | ||||
|             'add_ie': ['BrightcoveLegacy'], | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
| @@ -1903,18 +1944,14 @@ class GenericIE(InfoExtractor): | ||||
|             return self.url_result(mobj.group('url'), 'Zapiks') | ||||
|  | ||||
|         # Look for Kaltura embeds | ||||
|         mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or | ||||
|                 re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage)) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(smuggle_url( | ||||
|                 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), | ||||
|                 {'source_url': url}), 'Kaltura') | ||||
|         kaltura_url = KalturaIE._extract_url(webpage) | ||||
|         if kaltura_url: | ||||
|             return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key()) | ||||
|  | ||||
|         # Look for Eagle.Platform embeds | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'EaglePlatform') | ||||
|         eagleplatform_url = EaglePlatformIE._extract_url(webpage) | ||||
|         if eagleplatform_url: | ||||
|             return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key()) | ||||
|  | ||||
|         # Look for ClipYou (uses Eagle.Platform) embeds | ||||
|         mobj = re.search( | ||||
| @@ -2060,6 +2097,11 @@ class GenericIE(InfoExtractor): | ||||
|                 'uploader': video_uploader, | ||||
|             } | ||||
|  | ||||
|         # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser | ||||
|         embed_url = self._html_search_meta('twitter:player', webpage, default=None) | ||||
|         if embed_url: | ||||
|             return self.url_result(embed_url) | ||||
|  | ||||
|         def check_video(vurl): | ||||
|             if YoutubeIE.suitable(vurl): | ||||
|                 return True | ||||
|   | ||||
| @@ -12,7 +12,7 @@ from ..utils import ( | ||||
| class ImdbIE(InfoExtractor): | ||||
|     IE_NAME = 'imdb' | ||||
|     IE_DESC = 'Internet Movie Database trailers' | ||||
|     _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.imdb.com/video/imdb/vi2524815897', | ||||
| @@ -25,6 +25,12 @@ class ImdbIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://www.imdb.com/video/_/vi2524815897', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,30 +1,25 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import binascii | ||||
| import hashlib | ||||
| import itertools | ||||
| import math | ||||
| import os | ||||
| import random | ||||
| import re | ||||
| import time | ||||
| import uuid | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_str, | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_urllib_parse_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     decode_packed_codes, | ||||
|     ExtractorError, | ||||
|     intlist_to_bytes, | ||||
|     ohdave_rsa_encrypt, | ||||
|     remove_start, | ||||
|     sanitized_Request, | ||||
|     urlencode_postdata, | ||||
|     url_basename, | ||||
|     urshift, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -171,70 +166,21 @@ class IqiyiIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.iqiyi.com/v_19rrojlavg.html', | ||||
|         'md5': '2cb594dc2781e6c941a110d8f358118b', | ||||
|         'md5': '470a6c160618577166db1a7aac5a3606', | ||||
|         'info_dict': { | ||||
|             'id': '9c1fb1b99d192b21c559e5a1a2cb3c73', | ||||
|             'ext': 'mp4', | ||||
|             'title': '美国德州空中惊现奇异云团 酷似UFO', | ||||
|             'ext': 'f4v', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.iqiyi.com/v_19rrhnnclk.html', | ||||
|         'md5': 'f09f0a6a59b2da66a26bf4eda669a4cc', | ||||
|         'info_dict': { | ||||
|             'id': 'e3f585b550a280af23c98b6cb2be19fb', | ||||
|             'title': '名侦探柯南第752集', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part1', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part2', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part3', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part4', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part5', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part6', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part7', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }, { | ||||
|             'info_dict': { | ||||
|                 'id': 'e3f585b550a280af23c98b6cb2be19fb_part8', | ||||
|                 'ext': 'f4v', | ||||
|                 'title': '名侦探柯南第752集', | ||||
|             }, | ||||
|         }], | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|             'ext': 'mp4', | ||||
|             'title': '名侦探柯南 国语版', | ||||
|         }, | ||||
|         'skip': 'Geo-restricted to China', | ||||
|     }, { | ||||
|         'url': 'http://www.iqiyi.com/w_19rt6o8t9p.html', | ||||
|         'only_matching': True, | ||||
| @@ -287,13 +233,6 @@ class IqiyiIE(InfoExtractor): | ||||
|         ('10', 'h1'), | ||||
|     ] | ||||
|  | ||||
|     AUTH_API_ERRORS = { | ||||
|         # No preview available (不允许试看鉴权失败) | ||||
|         'Q00505': 'This video requires a VIP account', | ||||
|         # End of preview time (试看结束鉴权失败) | ||||
|         'Q00506': 'Needs a VIP account for full video', | ||||
|     } | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
| @@ -352,177 +291,101 @@ class IqiyiIE(InfoExtractor): | ||||
|  | ||||
|         return True | ||||
|  | ||||
|     def _authenticate_vip_video(self, api_video_url, video_id, tvid, _uuid, do_report_warning): | ||||
|         auth_params = { | ||||
|             # version and platform hard-coded in com/qiyi/player/core/model/remote/AuthenticationRemote.as | ||||
|             'version': '2.0', | ||||
|             'platform': 'b6c13e26323c537d', | ||||
|             'aid': tvid, | ||||
|     @staticmethod | ||||
|     def _gen_sc(tvid, timestamp): | ||||
|         M = [1732584193, -271733879] | ||||
|         M.extend([~M[0], ~M[1]]) | ||||
|         I_table = [7, 12, 17, 22, 5, 9, 14, 20, 4, 11, 16, 23, 6, 10, 15, 21] | ||||
|         C_base = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8388608, 432] | ||||
|  | ||||
|         def L(n, t): | ||||
|             if t is None: | ||||
|                 t = 0 | ||||
|             return trunc(((n >> 1) + (t >> 1) << 1) + (n & 1) + (t & 1)) | ||||
|  | ||||
|         def trunc(n): | ||||
|             n = n % 0x100000000 | ||||
|             if n > 0x7fffffff: | ||||
|                 n -= 0x100000000 | ||||
|             return n | ||||
|  | ||||
|         def transform(string, mod): | ||||
|             num = int(string, 16) | ||||
|             return (num >> 8 * (i % 4) & 255 ^ i % mod) << ((a & 3) << 3) | ||||
|  | ||||
|         C = list(C_base) | ||||
|         o = list(M) | ||||
|         k = str(timestamp - 7) | ||||
|         for i in range(13): | ||||
|             a = i | ||||
|             C[a >> 2] |= ord(k[a]) << 8 * (a % 4) | ||||
|  | ||||
|         for i in range(16): | ||||
|             a = i + 13 | ||||
|             start = (i >> 2) * 8 | ||||
|             r = '03967743b643f66763d623d637e30733' | ||||
|             C[a >> 2] |= transform(''.join(reversed(r[start:start + 8])), 7) | ||||
|  | ||||
|         for i in range(16): | ||||
|             a = i + 29 | ||||
|             start = (i >> 2) * 8 | ||||
|             r = '7038766939776a32776a32706b337139' | ||||
|             C[a >> 2] |= transform(r[start:start + 8], 1) | ||||
|  | ||||
|         for i in range(9): | ||||
|             a = i + 45 | ||||
|             if i < len(tvid): | ||||
|                 C[a >> 2] |= ord(tvid[i]) << 8 * (a % 4) | ||||
|  | ||||
|         for a in range(64): | ||||
|             i = a | ||||
|             I = i >> 4 | ||||
|             C_index = [i, 5 * i + 1, 3 * i + 5, 7 * i][I] % 16 + urshift(a, 6) | ||||
|             m = L(L(o[0], [ | ||||
|                 trunc(o[1] & o[2]) | trunc(~o[1] & o[3]), | ||||
|                 trunc(o[3] & o[1]) | trunc(~o[3] & o[2]), | ||||
|                 o[1] ^ o[2] ^ o[3], | ||||
|                 o[2] ^ trunc(o[1] | ~o[3]) | ||||
|             ][I]), L( | ||||
|                 trunc(int(abs(math.sin(i + 1)) * 4294967296)), | ||||
|                 C[C_index] if C_index < len(C) else None)) | ||||
|             I = I_table[4 * I + i % 4] | ||||
|             o = [o[3], | ||||
|                  L(o[1], trunc(trunc(m << I) | urshift(m, 32 - I))), | ||||
|                  o[1], | ||||
|                  o[2]] | ||||
|  | ||||
|         new_M = [L(o[0], M[0]), L(o[1], M[1]), L(o[2], M[2]), L(o[3], M[3])] | ||||
|         s = [new_M[a >> 3] >> (1 ^ a & 7) * 4 & 15 for a in range(32)] | ||||
|         return binascii.hexlify(intlist_to_bytes(s))[1::2].decode('ascii') | ||||
|  | ||||
|     def get_raw_data(self, tvid, video_id): | ||||
|         tm = int(time.time() * 1000) | ||||
|  | ||||
|         sc = self._gen_sc(tvid, tm) | ||||
|         params = { | ||||
|             'platForm': 'h5', | ||||
|             'rate': 1, | ||||
|             'tvid': tvid, | ||||
|             'uid': '', | ||||
|             'deviceId': _uuid, | ||||
|             'playType': 'main',  # XXX: always main? | ||||
|             'filename': os.path.splitext(url_basename(api_video_url))[0], | ||||
|         } | ||||
|  | ||||
|         qd_items = compat_parse_qs(compat_urllib_parse_urlparse(api_video_url).query) | ||||
|         for key, val in qd_items.items(): | ||||
|             auth_params[key] = val[0] | ||||
|  | ||||
|         auth_req = sanitized_Request( | ||||
|             'http://api.vip.iqiyi.com/services/ckn.action', | ||||
|             urlencode_postdata(auth_params)) | ||||
|         # iQiyi server throws HTTP 405 error without the following header | ||||
|         auth_req.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         auth_result = self._download_json( | ||||
|             auth_req, video_id, | ||||
|             note='Downloading video authentication JSON', | ||||
|             errnote='Unable to download video authentication JSON') | ||||
|  | ||||
|         code = auth_result.get('code') | ||||
|         msg = self.AUTH_API_ERRORS.get(code) or auth_result.get('msg') or code | ||||
|         if code == 'Q00506': | ||||
|             if do_report_warning: | ||||
|                 self.report_warning(msg) | ||||
|             return False | ||||
|         if 'data' not in auth_result: | ||||
|             if msg is not None: | ||||
|                 raise ExtractorError('%s said: %s' % (self.IE_NAME, msg), expected=True) | ||||
|             raise ExtractorError('Unexpected error from Iqiyi auth API') | ||||
|  | ||||
|         return auth_result['data'] | ||||
|  | ||||
|     def construct_video_urls(self, data, video_id, _uuid, tvid): | ||||
|         def do_xor(x, y): | ||||
|             a = y % 3 | ||||
|             if a == 1: | ||||
|                 return x ^ 121 | ||||
|             if a == 2: | ||||
|                 return x ^ 72 | ||||
|             return x ^ 103 | ||||
|  | ||||
|         def get_encode_code(l): | ||||
|             a = 0 | ||||
|             b = l.split('-') | ||||
|             c = len(b) | ||||
|             s = '' | ||||
|             for i in range(c - 1, -1, -1): | ||||
|                 a = do_xor(int(b[c - i - 1], 16), i) | ||||
|                 s += chr(a) | ||||
|             return s[::-1] | ||||
|  | ||||
|         def get_path_key(x, format_id, segment_index): | ||||
|             mg = ')(*&^flash@#$%a' | ||||
|             tm = self._download_json( | ||||
|                 'http://data.video.qiyi.com/t?tn=' + str(random.random()), video_id, | ||||
|                 note='Download path key of segment %d for format %s' % (segment_index + 1, format_id) | ||||
|             )['t'] | ||||
|             t = str(int(math.floor(int(tm) / (600.0)))) | ||||
|             return md5_text(t + mg + x) | ||||
|  | ||||
|         video_urls_dict = {} | ||||
|         need_vip_warning_report = True | ||||
|         for format_item in data['vp']['tkl'][0]['vs']: | ||||
|             if 0 < int(format_item['bid']) <= 10: | ||||
|                 format_id = self.get_format(format_item['bid']) | ||||
|             else: | ||||
|                 continue | ||||
|  | ||||
|             video_urls = [] | ||||
|  | ||||
|             video_urls_info = format_item['fs'] | ||||
|             if not format_item['fs'][0]['l'].startswith('/'): | ||||
|                 t = get_encode_code(format_item['fs'][0]['l']) | ||||
|                 if t.endswith('mp4'): | ||||
|                     video_urls_info = format_item['flvs'] | ||||
|  | ||||
|             for segment_index, segment in enumerate(video_urls_info): | ||||
|                 vl = segment['l'] | ||||
|                 if not vl.startswith('/'): | ||||
|                     vl = get_encode_code(vl) | ||||
|                 is_vip_video = '/vip/' in vl | ||||
|                 filesize = segment['b'] | ||||
|                 base_url = data['vp']['du'].split('/') | ||||
|                 if not is_vip_video: | ||||
|                     key = get_path_key( | ||||
|                         vl.split('/')[-1].split('.')[0], format_id, segment_index) | ||||
|                     base_url.insert(-1, key) | ||||
|                 base_url = '/'.join(base_url) | ||||
|                 param = { | ||||
|                     'su': _uuid, | ||||
|                     'qyid': uuid.uuid4().hex, | ||||
|                     'client': '', | ||||
|                     'z': '', | ||||
|                     'bt': '', | ||||
|                     'ct': '', | ||||
|                     'tn': str(int(time.time())) | ||||
|                 } | ||||
|                 api_video_url = base_url + vl | ||||
|                 if is_vip_video: | ||||
|                     api_video_url = api_video_url.replace('.f4v', '.hml') | ||||
|                     auth_result = self._authenticate_vip_video( | ||||
|                         api_video_url, video_id, tvid, _uuid, need_vip_warning_report) | ||||
|                     if auth_result is False: | ||||
|                         need_vip_warning_report = False | ||||
|                         break | ||||
|                     param.update({ | ||||
|                         't': auth_result['t'], | ||||
|                         # cid is hard-coded in com/qiyi/player/core/player/RuntimeData.as | ||||
|                         'cid': 'afbe8fd3d73448c9', | ||||
|                         'vid': video_id, | ||||
|                         'QY00001': auth_result['u'], | ||||
|                     }) | ||||
|                 api_video_url += '?' if '?' not in api_video_url else '&' | ||||
|                 api_video_url += compat_urllib_parse_urlencode(param) | ||||
|                 js = self._download_json( | ||||
|                     api_video_url, video_id, | ||||
|                     note='Download video info of segment %d for format %s' % (segment_index + 1, format_id)) | ||||
|                 video_url = js['l'] | ||||
|                 video_urls.append( | ||||
|                     (video_url, filesize)) | ||||
|  | ||||
|             video_urls_dict[format_id] = video_urls | ||||
|         return video_urls_dict | ||||
|  | ||||
|     def get_format(self, bid): | ||||
|         matched_format_ids = [_format_id for _bid, _format_id in self._FORMATS_MAP if _bid == str(bid)] | ||||
|         return matched_format_ids[0] if len(matched_format_ids) else None | ||||
|  | ||||
|     def get_bid(self, format_id): | ||||
|         matched_bids = [_bid for _bid, _format_id in self._FORMATS_MAP if _format_id == format_id] | ||||
|         return matched_bids[0] if len(matched_bids) else None | ||||
|  | ||||
|     def get_raw_data(self, tvid, video_id, enc_key, _uuid): | ||||
|         tm = str(int(time.time())) | ||||
|         tail = tm + tvid | ||||
|         param = { | ||||
|             'key': 'fvip', | ||||
|             'src': md5_text('youtube-dl'), | ||||
|             'tvId': tvid, | ||||
|             'vid': video_id, | ||||
|             'vinfo': 1, | ||||
|             'tm': tm, | ||||
|             'enc': md5_text(enc_key + tail), | ||||
|             'qyid': _uuid, | ||||
|             'tn': random.random(), | ||||
|             # In iQiyi's flash player, um is set to 1 if there's a logged user | ||||
|             # Some 1080P formats are only available with a logged user. | ||||
|             # Here force um=1 to trick the iQiyi server | ||||
|             'um': 1, | ||||
|             'authkey': md5_text(md5_text('') + tail), | ||||
|             'k_tag': 1, | ||||
|             'cupid': 'qc_100001_100186', | ||||
|             'type': 'mp4', | ||||
|             'nolimit': 0, | ||||
|             'agenttype': 13, | ||||
|             'src': 'd846d0c32d664d32b6b54ea48997a589', | ||||
|             'sc': sc, | ||||
|             't': tm - 7, | ||||
|             '__jsT': None, | ||||
|         } | ||||
|  | ||||
|         api_url = 'http://cache.video.qiyi.com/vms' + '?' + \ | ||||
|             compat_urllib_parse_urlencode(param) | ||||
|         raw_data = self._download_json(api_url, video_id) | ||||
|         return raw_data | ||||
|  | ||||
|     def get_enc_key(self, video_id): | ||||
|         # TODO: automatic key extraction | ||||
|         # last update at 2016-01-22 for Zombie::bite | ||||
|         enc_key = '4a1caba4b4465345366f28da7c117d20' | ||||
|         return enc_key | ||||
|         headers = {} | ||||
|         cn_verification_proxy = self._downloader.params.get('cn_verification_proxy') | ||||
|         if cn_verification_proxy: | ||||
|             headers['Ytdl-request-proxy'] = cn_verification_proxy | ||||
|         return self._download_json( | ||||
|             'http://cache.m.iqiyi.com/jp/tmts/%s/%s/' % (tvid, video_id), | ||||
|             video_id, transform_source=lambda s: remove_start(s, 'var tvInfoJs='), | ||||
|             query=params, headers=headers) | ||||
|  | ||||
|     def _extract_playlist(self, webpage): | ||||
|         PAGE_SIZE = 50 | ||||
| @@ -571,58 +434,27 @@ class IqiyiIE(InfoExtractor): | ||||
|             r'data-player-tvid\s*=\s*[\'"](\d+)', webpage, 'tvid') | ||||
|         video_id = self._search_regex( | ||||
|             r'data-player-videoid\s*=\s*[\'"]([a-f\d]+)', webpage, 'video_id') | ||||
|         _uuid = uuid.uuid4().hex | ||||
|  | ||||
|         enc_key = self.get_enc_key(video_id) | ||||
|         for _ in range(5): | ||||
|             raw_data = self.get_raw_data(tvid, video_id) | ||||
|  | ||||
|         raw_data = self.get_raw_data(tvid, video_id, enc_key, _uuid) | ||||
|             if raw_data['code'] != 'A00000': | ||||
|                 if raw_data['code'] == 'A00111': | ||||
|                     self.raise_geo_restricted() | ||||
|                 raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) | ||||
|  | ||||
|         if raw_data['code'] != 'A000000': | ||||
|             raise ExtractorError('Unable to load data. Error code: ' + raw_data['code']) | ||||
|             data = raw_data['data'] | ||||
|  | ||||
|         data = raw_data['data'] | ||||
|             # iQiYi sometimes returns Ads | ||||
|             if not isinstance(data['playInfo'], dict): | ||||
|                 self._sleep(5, video_id) | ||||
|                 continue | ||||
|  | ||||
|         title = data['vi']['vn'] | ||||
|             title = data['playInfo']['an'] | ||||
|             break | ||||
|  | ||||
|         # generate video_urls_dict | ||||
|         video_urls_dict = self.construct_video_urls( | ||||
|             data, video_id, _uuid, tvid) | ||||
|  | ||||
|         # construct info | ||||
|         entries = [] | ||||
|         for format_id in video_urls_dict: | ||||
|             video_urls = video_urls_dict[format_id] | ||||
|             for i, video_url_info in enumerate(video_urls): | ||||
|                 if len(entries) < i + 1: | ||||
|                     entries.append({'formats': []}) | ||||
|                 entries[i]['formats'].append( | ||||
|                     { | ||||
|                         'url': video_url_info[0], | ||||
|                         'filesize': video_url_info[-1], | ||||
|                         'format_id': format_id, | ||||
|                         'preference': int(self.get_bid(format_id)) | ||||
|                     } | ||||
|                 ) | ||||
|  | ||||
|         for i in range(len(entries)): | ||||
|             self._sort_formats(entries[i]['formats']) | ||||
|             entries[i].update( | ||||
|                 { | ||||
|                     'id': '%s_part%d' % (video_id, i + 1), | ||||
|                     'title': title, | ||||
|                 } | ||||
|             ) | ||||
|  | ||||
|         if len(entries) > 1: | ||||
|             info = { | ||||
|                 '_type': 'multi_video', | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
|                 'entries': entries, | ||||
|             } | ||||
|         else: | ||||
|             info = entries[0] | ||||
|             info['id'] = video_id | ||||
|             info['title'] = title | ||||
|  | ||||
|         return info | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'url': data['m3u'], | ||||
|         } | ||||
|   | ||||
| @@ -12,9 +12,35 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class JWPlatformBaseIE(InfoExtractor): | ||||
|     @staticmethod | ||||
|     def _find_jwplayer_data(webpage): | ||||
|         # TODO: Merge this with JWPlayer-related codes in generic.py | ||||
|  | ||||
|         mobj = re.search( | ||||
|             'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)', | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('options') | ||||
|  | ||||
|     def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): | ||||
|         jwplayer_data = self._parse_json( | ||||
|             self._find_jwplayer_data(webpage), video_id) | ||||
|         return self._parse_jwplayer_data( | ||||
|             jwplayer_data, video_id, *args, **kwargs) | ||||
|  | ||||
|     def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None): | ||||
|         # JWPlayer backward compatibility: flattened playlists | ||||
|         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 | ||||
|         if 'playlist' not in jwplayer_data: | ||||
|             jwplayer_data = {'playlist': [jwplayer_data]} | ||||
|  | ||||
|         video_data = jwplayer_data['playlist'][0] | ||||
|  | ||||
|         # JWPlayer backward compatibility: flattened sources | ||||
|         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 | ||||
|         if 'sources' not in video_data: | ||||
|             video_data['sources'] = [video_data] | ||||
|  | ||||
|         formats = [] | ||||
|         for source in video_data['sources']: | ||||
|             source_url = self._proto_relative_url(source['file']) | ||||
|   | ||||
| @@ -64,6 +64,32 @@ class KalturaIE(InfoExtractor): | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         mobj = ( | ||||
|             re.search( | ||||
|                 r"""(?xs) | ||||
|                     kWidget\.(?:thumb)?[Ee]mbed\( | ||||
|                     \{.*? | ||||
|                         (?P<q1>['\"])wid(?P=q1)\s*:\s* | ||||
|                         (?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*? | ||||
|                         (?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s* | ||||
|                         (?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4), | ||||
|                 """, webpage) or | ||||
|             re.search( | ||||
|                 r'''(?xs) | ||||
|                     (?P<q1>["\']) | ||||
|                         (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*? | ||||
|                     (?P=q1).*? | ||||
|                     (?: | ||||
|                         entry_?[Ii]d| | ||||
|                         (?P<q2>["\'])entry_?[Ii]d(?P=q2) | ||||
|                     )\s*:\s* | ||||
|                     (?P<q3>["\'])(?P<id>.+?)(?P=q3) | ||||
|                 ''', webpage)) | ||||
|         if mobj: | ||||
|             return 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict() | ||||
|  | ||||
|     def _kaltura_api_call(self, video_id, actions, *args, **kwargs): | ||||
|         params = actions[0] | ||||
|         if len(actions) > 1: | ||||
|   | ||||
| @@ -23,6 +23,7 @@ from ..utils import ( | ||||
|     sanitized_Request, | ||||
|     str_or_none, | ||||
|     url_basename, | ||||
|     urshift, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -74,15 +75,11 @@ class LeIE(InfoExtractor): | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @staticmethod | ||||
|     def urshift(val, n): | ||||
|         return val >> n if val >= 0 else (val + 0x100000000) >> n | ||||
|  | ||||
|     # ror() and calc_time_key() are reversed from a embedded swf file in KLetvPlayer.swf | ||||
|     def ror(self, param1, param2): | ||||
|         _loc3_ = 0 | ||||
|         while _loc3_ < param2: | ||||
|             param1 = self.urshift(param1, 1) + ((param1 & 1) << 31) | ||||
|             param1 = urshift(param1, 1) + ((param1 & 1) << 31) | ||||
|             _loc3_ += 1 | ||||
|         return param1 | ||||
|  | ||||
|   | ||||
| @@ -95,7 +95,6 @@ class LyndaIE(LyndaBaseIE): | ||||
|     IE_NAME = 'lynda' | ||||
|     IE_DESC = 'lynda.com videos' | ||||
|     _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' | ||||
|     _NETRC_MACHINE = 'lynda' | ||||
|  | ||||
|     _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' | ||||
|  | ||||
|   | ||||
| @@ -1,8 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -23,34 +21,5 @@ class M6IE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id, | ||||
|                                  'Downloading video RSS') | ||||
|  | ||||
|         title = rss.find('./channel/item/title').text | ||||
|         description = rss.find('./channel/item/description').text | ||||
|         thumbnail = rss.find('./channel/item/visuel_clip_big').text | ||||
|         duration = int(rss.find('./channel/item/duration').text) | ||||
|         view_count = int(rss.find('./channel/item/nombre_vues').text) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id in ['lq', 'sd', 'hq', 'hd']: | ||||
|             video_url = rss.find('./channel/item/url_video_%s' % format_id) | ||||
|             if video_url is None: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': video_url.text, | ||||
|                 'format_id': format_id, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
|         video_id = self._match_id(url) | ||||
|         return self.url_result('6play:%s' % video_id, 'SixPlay', video_id) | ||||
|   | ||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/meta.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/meta.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .pladform import PladformIE | ||||
| from ..utils import ( | ||||
|     unescapeHTML, | ||||
|     int_or_none, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class METAIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://video\.meta\.ua/(?:iframe/)?(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://video.meta.ua/5502115.video', | ||||
|         'md5': '71b6f3ee274bef16f1ab410f7f56b476', | ||||
|         'info_dict': { | ||||
|             'id': '5502115', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Sony Xperia Z camera test [HQ]', | ||||
|             'description': 'Xperia Z shoots video in FullHD HDR.', | ||||
|             'uploader_id': 'nomobile', | ||||
|             'uploader': 'CHЁZA.TV', | ||||
|             'upload_date': '20130211', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }, { | ||||
|         'url': 'http://video.meta.ua/iframe/5502115', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # pladform embed | ||||
|         'url': 'http://video.meta.ua/7121015.video', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         st_html5 = self._search_regex( | ||||
|             r"st_html5\s*=\s*'#([^']+)'", webpage, 'uppod html5 st', default=None) | ||||
|  | ||||
|         if st_html5: | ||||
|             # uppod st decryption algorithm is reverse engineered from function un(s) at uppod.js | ||||
|             json_str = '' | ||||
|             for i in range(0, len(st_html5), 3): | ||||
|                 json_str += '�%s;' % st_html5[i:i + 3] | ||||
|             uppod_data = self._parse_json(unescapeHTML(json_str), video_id) | ||||
|             error = uppod_data.get('customnotfound') | ||||
|             if error: | ||||
|                 raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | ||||
|  | ||||
|             video_url = uppod_data['file'] | ||||
|             info = { | ||||
|                 'id': video_id, | ||||
|                 'url': video_url, | ||||
|                 'title': uppod_data.get('comment') or self._og_search_title(webpage), | ||||
|                 'description': self._og_search_description(webpage, default=None), | ||||
|                 'thumbnail': uppod_data.get('poster') or self._og_search_thumbnail(webpage), | ||||
|                 'duration': int_or_none(self._og_search_property( | ||||
|                     'video:duration', webpage, default=None)), | ||||
|             } | ||||
|             if 'youtube.com/' in video_url: | ||||
|                 info.update({ | ||||
|                     '_type': 'url_transparent', | ||||
|                     'ie_key': 'Youtube', | ||||
|                 }) | ||||
|             return info | ||||
|  | ||||
|         pladform_url = PladformIE._extract_url(webpage) | ||||
|         if pladform_url: | ||||
|             return self.url_result(pladform_url) | ||||
| @@ -1,5 +1,8 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlencode, | ||||
| @@ -8,6 +11,7 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     get_element_by_attribute, | ||||
|     int_or_none, | ||||
|     remove_start, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -15,7 +19,7 @@ class MiTeleIE(InfoExtractor): | ||||
|     IE_DESC = 'mitele.es' | ||||
|     _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', | ||||
|         # MD5 is unstable | ||||
|         'info_dict': { | ||||
| @@ -24,10 +28,31 @@ class MiTeleIE(InfoExtractor): | ||||
|             'ext': 'flv', | ||||
|             'title': 'Tor, la web invisible', | ||||
|             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', | ||||
|             'series': 'Diario de', | ||||
|             'season': 'La redacción', | ||||
|             'episode': 'Programa 144', | ||||
|             'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|             'duration': 2913, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         # no explicit title | ||||
|         'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/', | ||||
|         'info_dict': { | ||||
|             'id': 'eLZSwoEd1S3pVyUm8lc6F', | ||||
|             'display_id': 'programa-226', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Cuarto Milenio - Temporada 6 - Programa 226', | ||||
|             'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', | ||||
|             'series': 'Cuarto Milenio', | ||||
|             'season': 'Temporada 6', | ||||
|             'episode': 'Programa 226', | ||||
|             'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|             'duration': 7312, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
| @@ -70,7 +95,22 @@ class MiTeleIE(InfoExtractor): | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title') | ||||
|             r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', | ||||
|             webpage, 'title', default=None) | ||||
|  | ||||
|         mobj = re.search(r'''(?sx) | ||||
|                             class="Destacado-text"[^>]*>.*?<h1>\s* | ||||
|                             <span>(?P<series>[^<]+)</span>\s* | ||||
|                             <span>(?P<season>[^<]+)</span>\s* | ||||
|                             <span>(?P<episode>[^<]+)</span>''', webpage) | ||||
|         series, season, episode = mobj.groups() if mobj else [None] * 3 | ||||
|  | ||||
|         if not title: | ||||
|             if mobj: | ||||
|                 title = '%s - %s - %s' % (series, season, episode) | ||||
|             else: | ||||
|                 title = remove_start(self._search_regex( | ||||
|                     r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ') | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'data-media-id\s*=\s*"([^"]+)"', webpage, | ||||
| @@ -83,6 +123,9 @@ class MiTeleIE(InfoExtractor): | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': get_element_by_attribute('class', 'text', webpage), | ||||
|             'series': series, | ||||
|             'season': season, | ||||
|             'episode': episode, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|   | ||||
| @@ -102,11 +102,11 @@ class MixcloudIE(InfoExtractor): | ||||
|         description = self._og_search_description(webpage) | ||||
|         like_count = parse_count(self._search_regex( | ||||
|             r'\bbutton-favorite[^>]+>.*?<span[^>]+class=["\']toggle-number[^>]+>\s*([^<]+)', | ||||
|             webpage, 'like count', fatal=False)) | ||||
|             webpage, 'like count', default=None)) | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"', | ||||
|              r'/listeners/?">([0-9,.]+)</a>'], | ||||
|             webpage, 'play count', fatal=False)) | ||||
|             webpage, 'play count', default=None)) | ||||
|  | ||||
|         return { | ||||
|             'id': track_id, | ||||
|   | ||||
							
								
								
									
										122
									
								
								youtube_dl/extractor/msn.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								youtube_dl/extractor/msn.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MSNIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE', | ||||
|         'md5': '8442f66c116cbab1ff7098f986983458', | ||||
|         'info_dict': { | ||||
|             'id': 'BBqQYNE', | ||||
|             'display_id': 'criminal-minds-shemar-moore-shares-a-touching-goodbye-message', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message', | ||||
|             'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25', | ||||
|             'duration': 104, | ||||
|             'uploader': 'CBS Entertainment', | ||||
|             'uploader_id': 'IT0X5aoJ6bJgYerJXSDCgFmYPB1__54v', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # geo restricted | ||||
|         'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id, display_id = mobj.group('id', 'display_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1', | ||||
|                 webpage, 'video data', default='{}', group='data'), | ||||
|             display_id, transform_source=unescapeHTML) | ||||
|  | ||||
|         if not video: | ||||
|             error = unescapeHTML(self._search_regex( | ||||
|                 r'data-error=(["\'])(?P<error>.+?)\1', | ||||
|                 webpage, 'error', group='error')) | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | ||||
|  | ||||
|         title = video['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for file_ in video.get('videoFiles', []): | ||||
|             format_url = file_.get('url') | ||||
|             if not format_url: | ||||
|                 continue | ||||
|             ext = determine_ext(format_url) | ||||
|             # .ism is not yet supported (see | ||||
|             # https://github.com/rg3/youtube-dl/issues/8118) | ||||
|             if ext == 'ism': | ||||
|                 continue | ||||
|             if 'm3u8' in format_url: | ||||
|                 # m3u8_native should not be used here until | ||||
|                 # https://github.com/rg3/youtube-dl/issues/9913 is fixed | ||||
|                 m3u8_formats = self._extract_m3u8_formats( | ||||
|                     format_url, display_id, 'mp4', | ||||
|                     m3u8_id='hls', fatal=False) | ||||
|                 # Despite metadata in m3u8 all video+audio formats are | ||||
|                 # actually video-only (no audio) | ||||
|                 for f in m3u8_formats: | ||||
|                     if f.get('acodec') != 'none' and f.get('vcodec') != 'none': | ||||
|                         f['acodec'] = 'none' | ||||
|                 formats.extend(m3u8_formats) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': format_url, | ||||
|                     'ext': 'mp4', | ||||
|                     'format_id': 'http', | ||||
|                     'width': int_or_none(file_.get('width')), | ||||
|                     'height': int_or_none(file_.get('height')), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for file_ in video.get('files', []): | ||||
|             format_url = file_.get('url') | ||||
|             format_code = file_.get('formatCode') | ||||
|             if not format_url or not format_code: | ||||
|                 continue | ||||
|             if compat_str(format_code) == '3100': | ||||
|                 subtitles.setdefault(file_.get('culture', 'en'), []).append({ | ||||
|                     'ext': determine_ext(format_url, 'ttml'), | ||||
|                     'url': format_url, | ||||
|                 }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': video.get('description'), | ||||
|             'thumbnail': video.get('headlineImage', {}).get('url'), | ||||
|             'duration': int_or_none(video.get('durationSecs')), | ||||
|             'uploader': video.get('sourceFriendly'), | ||||
|             'uploader_id': video.get('providerId'), | ||||
|             'creator': video.get('creator'), | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -6,6 +6,7 @@ from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_str, | ||||
|     compat_xpath, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| @@ -84,9 +85,10 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|                 rtmp_video_url = rendition.find('./src').text | ||||
|                 if rtmp_video_url.endswith('siteunavail.png'): | ||||
|                     continue | ||||
|                 new_url = self._transform_rtmp_url(rtmp_video_url) | ||||
|                 formats.append({ | ||||
|                     'ext': ext, | ||||
|                     'url': self._transform_rtmp_url(rtmp_video_url), | ||||
|                     'ext': 'flv' if new_url.startswith('rtmp') else ext, | ||||
|                     'url': new_url, | ||||
|                     'format_id': rendition.get('bitrate'), | ||||
|                     'width': int(rendition.get('width')), | ||||
|                     'height': int(rendition.get('height')), | ||||
| @@ -139,9 +141,9 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|                 itemdoc, './/{http://search.yahoo.com/mrss/}category', | ||||
|                 'scheme', 'urn:mtvn:video_title') | ||||
|         if title_el is None: | ||||
|             title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title') | ||||
|             title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title')) | ||||
|         if title_el is None: | ||||
|             title_el = itemdoc.find('.//title') or itemdoc.find('./title') | ||||
|             title_el = itemdoc.find(compat_xpath('.//title')) | ||||
|             if title_el.text is None: | ||||
|                 title_el = None | ||||
|  | ||||
|   | ||||
| @@ -9,10 +9,6 @@ from ..utils import ( | ||||
|     lowercase_escape, | ||||
|     smuggle_url, | ||||
|     unescapeHTML, | ||||
|     update_url_query, | ||||
|     int_or_none, | ||||
|     HEADRequest, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -192,9 +188,9 @@ class CSNNEIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class NBCNewsIE(ThePlatformIE): | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today)\.com/ | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/ | ||||
|         (?:video/.+?/(?P<id>\d+)| | ||||
|         ([^/]+/)*(?P<display_id>[^/?]+)) | ||||
|         ([^/]+/)*(?:.*-)?(?P<mpx_id>[^/?]+)) | ||||
|         ''' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -216,13 +212,16 @@ class NBCNewsIE(ThePlatformIE): | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'How Twitter Reacted To The Snowden Interview', | ||||
|                 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64', | ||||
|                 'uploader': 'NBCU-NEWS', | ||||
|                 'timestamp': 1401363060, | ||||
|                 'upload_date': '20140529', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156', | ||||
|             'md5': 'fdbf39ab73a72df5896b6234ff98518a', | ||||
|             'info_dict': { | ||||
|                 'id': 'Wjf9EDR3A_60', | ||||
|                 'id': '529953347624', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'FULL EPISODE: Family Business', | ||||
|                 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', | ||||
| @@ -237,6 +236,9 @@ class NBCNewsIE(ThePlatformIE): | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', | ||||
|                 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', | ||||
|                 'timestamp': 1423104900, | ||||
|                 'uploader': 'NBCU-NEWS', | ||||
|                 'upload_date': '20150205', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -245,10 +247,12 @@ class NBCNewsIE(ThePlatformIE): | ||||
|             'info_dict': { | ||||
|                 'id': '529953347624', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Volkswagen U.S. Chief: We \'Totally Screwed Up\'', | ||||
|                 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301', | ||||
|                 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up', | ||||
|                 'description': 'md5:c8be487b2d80ff0594c005add88d8351', | ||||
|                 'upload_date': '20150922', | ||||
|                 'timestamp': 1442917800, | ||||
|                 'uploader': 'NBCU-NEWS', | ||||
|             }, | ||||
|             'expected_warnings': ['http-6000 is not available'] | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788', | ||||
| @@ -260,6 +264,22 @@ class NBCNewsIE(ThePlatformIE): | ||||
|                 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1', | ||||
|                 'upload_date': '20160420', | ||||
|                 'timestamp': 1461152093, | ||||
|                 'uploader': 'NBCU-NEWS', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', | ||||
|             'md5': '6d236bf4f3dddc226633ce6e2c3f814d', | ||||
|             'info_dict': { | ||||
|                 'id': '314487875924', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'The chaotic GOP immigration vote', | ||||
|                 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'timestamp': 1406937606, | ||||
|                 'upload_date': '20140802', | ||||
|                 'uploader': 'NBCU-NEWS', | ||||
|                 'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'], | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
| @@ -290,105 +310,28 @@ class NBCNewsIE(ThePlatformIE): | ||||
|             } | ||||
|         else: | ||||
|             # "feature" and "nightly-news" pages use theplatform.com | ||||
|             display_id = mobj.group('display_id') | ||||
|             webpage = self._download_webpage(url, display_id) | ||||
|             info = None | ||||
|             bootstrap_json = self._search_regex( | ||||
|                 [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', | ||||
|                  r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'], | ||||
|                 webpage, 'bootstrap json', default=None) | ||||
|             bootstrap = self._parse_json( | ||||
|                 bootstrap_json, display_id, transform_source=unescapeHTML) | ||||
|             if 'results' in bootstrap: | ||||
|                 info = bootstrap['results'][0]['video'] | ||||
|             elif 'video' in bootstrap: | ||||
|                 info = bootstrap['video'] | ||||
|             else: | ||||
|                 info = bootstrap | ||||
|             video_id = info['mpxId'] | ||||
|             title = info['title'] | ||||
|  | ||||
|             subtitles = {} | ||||
|             caption_links = info.get('captionLinks') | ||||
|             if caption_links: | ||||
|                 for (sub_key, sub_ext) in (('smpte-tt', 'ttml'), ('web-vtt', 'vtt'), ('srt', 'srt')): | ||||
|                     sub_url = caption_links.get(sub_key) | ||||
|                     if sub_url: | ||||
|                         subtitles.setdefault('en', []).append({ | ||||
|                             'url': sub_url, | ||||
|                             'ext': sub_ext, | ||||
|                         }) | ||||
|  | ||||
|             formats = [] | ||||
|             for video_asset in info['videoAssets']: | ||||
|                 video_url = video_asset.get('publicUrl') | ||||
|                 if not video_url: | ||||
|                     continue | ||||
|                 container = video_asset.get('format') | ||||
|                 asset_type = video_asset.get('assetType') or '' | ||||
|                 if container == 'ISM' or asset_type == 'FireTV-Once': | ||||
|                     continue | ||||
|                 elif asset_type == 'OnceURL': | ||||
|                     tp_formats, tp_subtitles = self._extract_theplatform_smil( | ||||
|                         video_url, video_id) | ||||
|                     formats.extend(tp_formats) | ||||
|                     subtitles = self._merge_subtitles(subtitles, tp_subtitles) | ||||
|             video_id = mobj.group('mpx_id') | ||||
|             if not video_id.isdigit(): | ||||
|                 webpage = self._download_webpage(url, video_id) | ||||
|                 info = None | ||||
|                 bootstrap_json = self._search_regex( | ||||
|                     [r'(?m)(?:var\s+(?:bootstrapJson|playlistData)|NEWS\.videoObj)\s*=\s*({.+});?\s*$', | ||||
|                      r'videoObj\s*:\s*({.+})', r'data-video="([^"]+)"'], | ||||
|                     webpage, 'bootstrap json', default=None) | ||||
|                 bootstrap = self._parse_json( | ||||
|                     bootstrap_json, video_id, transform_source=unescapeHTML) | ||||
|                 if 'results' in bootstrap: | ||||
|                     info = bootstrap['results'][0]['video'] | ||||
|                 elif 'video' in bootstrap: | ||||
|                     info = bootstrap['video'] | ||||
|                 else: | ||||
|                     tbr = int_or_none(video_asset.get('bitRate') or video_asset.get('bitrate'), 1000) | ||||
|                     format_id = 'http%s' % ('-%d' % tbr if tbr else '') | ||||
|                     video_url = update_url_query( | ||||
|                         video_url, {'format': 'redirect'}) | ||||
|                     # resolve the url so that we can check availability and detect the correct extension | ||||
|                     head = self._request_webpage( | ||||
|                         HEADRequest(video_url), video_id, | ||||
|                         'Checking %s url' % format_id, | ||||
|                         '%s is not available' % format_id, | ||||
|                         fatal=False) | ||||
|                     if head: | ||||
|                         video_url = head.geturl() | ||||
|                         formats.append({ | ||||
|                             'format_id': format_id, | ||||
|                             'url': video_url, | ||||
|                             'width': int_or_none(video_asset.get('width')), | ||||
|                             'height': int_or_none(video_asset.get('height')), | ||||
|                             'tbr': tbr, | ||||
|                             'container': video_asset.get('format'), | ||||
|                         }) | ||||
|             self._sort_formats(formats) | ||||
|                     info = bootstrap | ||||
|                 video_id = info['mpxId'] | ||||
|  | ||||
|             return { | ||||
|                 '_type': 'url_transparent', | ||||
|                 'id': video_id, | ||||
|                 'title': title, | ||||
|                 'description': info.get('description'), | ||||
|                 'thumbnail': info.get('thumbnail'), | ||||
|                 'duration': int_or_none(info.get('duration')), | ||||
|                 'timestamp': parse_iso8601(info.get('pubDate') or info.get('pub_date')), | ||||
|                 'formats': formats, | ||||
|                 'subtitles': subtitles, | ||||
|                 # http://feed.theplatform.com/f/2E2eJC/nbcnews also works | ||||
|                 'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byId=%s' % video_id, | ||||
|                 'ie_key': 'ThePlatformFeed', | ||||
|             } | ||||
|  | ||||
|  | ||||
| class MSNBCIE(InfoExtractor): | ||||
|     # https URLs redirect to corresponding http ones | ||||
|     _VALID_URL = r'https?://www\.msnbc\.com/[^/]+/watch/(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924', | ||||
|         'md5': '6d236bf4f3dddc226633ce6e2c3f814d', | ||||
|         'info_dict': { | ||||
|             'id': 'n_hayes_Aimm_140801_272214', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The chaotic GOP immigration vote', | ||||
|             'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'timestamp': 1406937606, | ||||
|             'upload_date': '20140802', | ||||
|             'uploader': 'NBCU-NEWS', | ||||
|             'categories': ['MSNBC/Topics/Franchise/Best of last night', 'MSNBC/Topics/General/Congress'], | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         embed_url = self._html_search_meta('embedURL', webpage) | ||||
|         return self.url_result(embed_url) | ||||
|   | ||||
| @@ -3,6 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
| from ..utils import update_url_query | ||||
|  | ||||
|  | ||||
| class NickIE(MTVServicesInfoExtractor): | ||||
| @@ -61,3 +62,26 @@ class NickIE(MTVServicesInfoExtractor): | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') | ||||
|  | ||||
|  | ||||
| class NickDeIE(MTVServicesInfoExtractor): | ||||
|     IE_NAME = 'nick.de' | ||||
|     _VALID_URL = r'https?://(?:www\.)?nick\.de/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.nick.de/shows/342-icarly', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         mrss_url = update_url_query(self._search_regex( | ||||
|             r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'), | ||||
|             {'siteKey': 'nick.de'}) | ||||
|  | ||||
|         return self._get_videos_info_from_url(mrss_url, video_id) | ||||
|   | ||||
							
								
								
									
										55
									
								
								youtube_dl/extractor/ninecninemedia.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								youtube_dl/extractor/ninecninemedia.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,55 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     parse_duration, | ||||
|     ExtractorError | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NineCNineMediaIE(InfoExtractor): | ||||
|     _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         destination_code, video_id = re.match(self._VALID_URL, url).groups() | ||||
|         api_base_url = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/' % (destination_code, video_id) | ||||
|         content = self._download_json(api_base_url, video_id, query={ | ||||
|             '$include': '[contentpackages]', | ||||
|         }) | ||||
|         title = content['Name'] | ||||
|         if len(content['ContentPackages']) > 1: | ||||
|             raise ExtractorError('multiple content packages') | ||||
|         content_package = content['ContentPackages'][0] | ||||
|         stacks_base_url = api_base_url + 'contentpackages/%s/stacks/' % content_package['Id'] | ||||
|         stacks = self._download_json(stacks_base_url, video_id)['Items'] | ||||
|         if len(stacks) > 1: | ||||
|             raise ExtractorError('multiple stacks') | ||||
|         stack = stacks[0] | ||||
|         stack_base_url = '%s%s/manifest.' % (stacks_base_url, stack['Id']) | ||||
|         formats = [] | ||||
|         formats.extend(self._extract_m3u8_formats( | ||||
|             stack_base_url + 'm3u8', video_id, 'mp4', | ||||
|             'm3u8_native', m3u8_id='hls', fatal=False)) | ||||
|         formats.extend(self._extract_f4m_formats( | ||||
|             stack_base_url + 'f4m', video_id, | ||||
|             f4m_id='hds', fatal=False)) | ||||
|         mp4_url = self._download_webpage(stack_base_url + 'pd', video_id, fatal=False) | ||||
|         if mp4_url: | ||||
|             formats.append({ | ||||
|                 'url': mp4_url, | ||||
|                 'format_id': 'mp4', | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': content.get('Desc') or content.get('ShortDesc'), | ||||
|             'timestamp': parse_iso8601(content.get('BroadcastDateTime')), | ||||
|             'duration': parse_duration(content.get('BroadcastTime')), | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -516,9 +516,14 @@ class PBSIE(InfoExtractor): | ||||
|                 # https://projects.pbs.org/confluence/display/coveapi/COVE+Video+Specifications | ||||
|                 if not bitrate or bitrate not in ('400k', '800k', '1200k', '2500k'): | ||||
|                     continue | ||||
|                 f_url = re.sub(r'\d+k|baseline', bitrate, http_url) | ||||
|                 # This may produce invalid links sometimes (e.g. | ||||
|                 # http://www.pbs.org/wgbh/frontline/film/suicide-plan) | ||||
|                 if not self._is_valid_url(f_url, display_id, 'http-%s video' % bitrate): | ||||
|                     continue | ||||
|                 f = m3u8_format.copy() | ||||
|                 f.update({ | ||||
|                     'url': re.sub(r'\d+k|baseline', bitrate, http_url), | ||||
|                     'url': f_url, | ||||
|                     'format_id': m3u8_format['format_id'].replace('hls', 'http'), | ||||
|                     'protocol': 'http', | ||||
|                 }) | ||||
|   | ||||
| @@ -120,9 +120,12 @@ class PeriscopeUserIE(InfoExtractor): | ||||
|         title = user.get('display_name') or user.get('username') | ||||
|         description = user.get('description') | ||||
|  | ||||
|         broadcast_ids = (data_store.get('UserBroadcastHistory', {}).get('broadcastIds') or | ||||
|                          data_store.get('BroadcastCache', {}).get('broadcastIds', [])) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result( | ||||
|                 'https://www.periscope.tv/%s/%s' % (user_id, broadcast['id'])) | ||||
|             for broadcast in data_store.get('UserBroadcastHistory', {}).get('broadcasts', [])] | ||||
|                 'https://www.periscope.tv/%s/%s' % (user_id, broadcast_id)) | ||||
|             for broadcast_id in broadcast_ids] | ||||
|  | ||||
|         return self.playlist_result(entries, user_id, title, description) | ||||
|   | ||||
| @@ -49,7 +49,7 @@ class PladformIE(InfoExtractor): | ||||
|     @staticmethod | ||||
|     def _extract_url(webpage): | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src="(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)"', webpage) | ||||
|             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('url') | ||||
|  | ||||
|   | ||||
							
								
								
									
										95
									
								
								youtube_dl/extractor/polskieradio.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								youtube_dl/extractor/polskieradio.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,95 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse_unquote, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     strip_or_none, | ||||
|     unified_timestamp, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PolskieRadioIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie', | ||||
|         'info_dict': { | ||||
|             'id': '1587943', | ||||
|             'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie', | ||||
|             'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5', | ||||
|         }, | ||||
|         'playlist': [{ | ||||
|             'md5': '2984ee6ce9046d91fc233bc1a864a09a', | ||||
|             'info_dict': { | ||||
|                 'id': '1540576', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'md5:d4623290d4ac983bf924061c75c23a0d', | ||||
|                 'timestamp': 1456594200, | ||||
|                 'upload_date': '20160227', | ||||
|                 'duration': 2364, | ||||
|             }, | ||||
|         }], | ||||
|     }, { | ||||
|         'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal', | ||||
|         'info_dict': { | ||||
|             'id': '1635803', | ||||
|             'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał', | ||||
|             'description': 'md5:01cb7d0cad58664095d72b51a1ebada2', | ||||
|         }, | ||||
|         'playlist_mincount': 12, | ||||
|     }, { | ||||
|         'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # with mp4 video | ||||
|         'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         content = self._search_regex( | ||||
|             r'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>', | ||||
|             webpage, 'content') | ||||
|  | ||||
|         timestamp = unified_timestamp(self._html_search_regex( | ||||
|             r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>', | ||||
|             webpage, 'timestamp', fatal=False)) | ||||
|  | ||||
|         entries = [] | ||||
|  | ||||
|         media_urls = set() | ||||
|  | ||||
|         for data_media in re.findall(r'<[^>]+data-media=({[^>]+})', content): | ||||
|             media = self._parse_json(data_media, playlist_id, fatal=False) | ||||
|             if not media.get('file') or not media.get('desc'): | ||||
|                 continue | ||||
|             media_url = self._proto_relative_url(media['file'], 'http:') | ||||
|             if media_url in media_urls: | ||||
|                 continue | ||||
|             media_urls.add(media_url) | ||||
|             entries.append({ | ||||
|                 'id': compat_str(media['id']), | ||||
|                 'url': media_url, | ||||
|                 'title': compat_urllib_parse_unquote(media['desc']), | ||||
|                 'duration': int_or_none(media.get('length')), | ||||
|                 'vcodec': 'none' if media.get('provider') == 'audio' else None, | ||||
|                 'timestamp': timestamp, | ||||
|             }) | ||||
|  | ||||
|         title = self._og_search_title(webpage).strip() | ||||
|         description = strip_or_none(self._og_search_description(webpage)) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
| @@ -1,19 +1,32 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     qualities, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PornHdIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', | ||||
|         'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5', | ||||
|         'info_dict': { | ||||
|             'id': '9864', | ||||
|             'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Restroom selfie masturbation', | ||||
|             'description': 'md5:3748420395e03e31ac96857a8f125b2b', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'view_count': int, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     }, { | ||||
|         # removed video | ||||
|         'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', | ||||
|         'md5': '956b8ca569f7f4d8ec563e2c41598441', | ||||
|         'info_dict': { | ||||
| @@ -25,8 +38,9 @@ class PornHdIE(InfoExtractor): | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'view_count': int, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|         }, | ||||
|         'skip': 'Not available anymore', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -38,28 +52,38 @@ class PornHdIE(InfoExtractor): | ||||
|         title = self._html_search_regex( | ||||
|             [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)', | ||||
|              r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'(\d+) views\s*</span>', webpage, 'view count', fatal=False)) | ||||
|         thumbnail = self._search_regex( | ||||
|             r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         quality = qualities(['sd', 'hd']) | ||||
|         sources = json.loads(js_to_json(self._search_regex( | ||||
|         sources = self._parse_json(js_to_json(self._search_regex( | ||||
|             r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", | ||||
|             webpage, 'sources'))) | ||||
|             webpage, 'sources', default='{}')), video_id) | ||||
|  | ||||
|         if not sources: | ||||
|             message = self._html_search_regex( | ||||
|                 r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1', | ||||
|                 webpage, 'error message', group='value') | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for qname, video_url in sources.items(): | ||||
|         for format_id, video_url in sources.items(): | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             height = int_or_none(self._search_regex( | ||||
|                 r'^(\d+)[pP]', format_id, 'height', default=None)) | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': qname, | ||||
|                 'quality': quality(qname), | ||||
|                 'format_id': format_id, | ||||
|                 'height': height, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = self._html_search_regex( | ||||
|             r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1', | ||||
|             webpage, 'description', fatal=False, group='value') | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'(\d+) views\s*<', webpage, 'view count', fatal=False)) | ||||
|         thumbnail = self._search_regex( | ||||
|             r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| @@ -24,7 +25,15 @@ from ..aes import ( | ||||
|  | ||||
|  | ||||
| class PornHubIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)(?P<id>[0-9a-z]+)' | ||||
|     IE_DESC = 'PornHub and Thumbzilla' | ||||
|     _VALID_URL = r'''(?x) | ||||
|                     https?:// | ||||
|                         (?: | ||||
|                             (?:[a-z]+\.)?pornhub\.com/(?:view_video\.php\?viewkey=|embed/)| | ||||
|                             (?:www\.)?thumbzilla\.com/video/ | ||||
|                         ) | ||||
|                         (?P<id>[0-9a-z]+) | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', | ||||
|         'md5': '1e19b41231a02eba417839222ac9d58e', | ||||
| @@ -39,13 +48,43 @@ class PornHubIE(InfoExtractor): | ||||
|             'dislike_count': int, | ||||
|             'comment_count': int, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|         }, | ||||
|     }, { | ||||
|         # non-ASCII title | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', | ||||
|         'info_dict': { | ||||
|             'id': '1331683002', | ||||
|             'ext': 'mp4', | ||||
|             'title': '重庆婷婷女王足交', | ||||
|             'uploader': 'cj397186295', | ||||
|             'duration': 1753, | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|             'comment_count': int, | ||||
|             'age_limit': 18, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # removed at the request of cam4.com | ||||
|         'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # removed at the request of the copyright owner | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # removed by uploader | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
| @@ -68,27 +107,33 @@ class PornHubIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         error_msg = self._html_search_regex( | ||||
|             r'(?s)<div class="userMessageSection[^"]*".*?>(.*?)</div>', | ||||
|             webpage, 'error message', default=None) | ||||
|             r'(?s)<div[^>]+class=(["\']).*?\bremoved\b.*?\1[^>]*>(?P<error>.+?)</div>', | ||||
|             webpage, 'error message', default=None, group='error') | ||||
|         if error_msg: | ||||
|             error_msg = re.sub(r'\s+', ' ', error_msg) | ||||
|             raise ExtractorError( | ||||
|                 'PornHub said: %s' % error_msg, | ||||
|                 expected=True, video_id=video_id) | ||||
|  | ||||
|         # video_title from flashvars contains whitespace instead of non-ASCII (see | ||||
|         # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying | ||||
|         # on that anymore. | ||||
|         title = self._html_search_meta( | ||||
|             'twitter:title', webpage, default=None) or self._search_regex( | ||||
|             (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)', | ||||
|              r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', | ||||
|              r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'), | ||||
|             webpage, 'title', group='title') | ||||
|  | ||||
|         flashvars = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), | ||||
|             video_id) | ||||
|         if flashvars: | ||||
|             video_title = flashvars.get('video_title') | ||||
|             thumbnail = flashvars.get('image_url') | ||||
|             duration = int_or_none(flashvars.get('video_duration')) | ||||
|         else: | ||||
|             video_title, thumbnail, duration = [None] * 3 | ||||
|  | ||||
|         if not video_title: | ||||
|             video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') | ||||
|             title, thumbnail, duration = [None] * 3 | ||||
|  | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', | ||||
| @@ -137,7 +182,7 @@ class PornHubIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'uploader': video_uploader, | ||||
|             'title': video_title, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|   | ||||
| @@ -2,22 +2,19 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
|     unescapeHTML, | ||||
|     int_or_none, | ||||
| ) | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class R7IE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|     _VALID_URL = r'''(?x) | ||||
|                         https?:// | ||||
|                         (?: | ||||
|                             (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| | ||||
|                             noticias\.r7\.com(?:/[^/]+)+/[^/]+-| | ||||
|                             player\.r7\.com/video/i/ | ||||
|                         ) | ||||
|                         (?P<id>[\da-f]{24}) | ||||
|                         ''' | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', | ||||
|         'md5': '403c4e393617e8e8ddc748978ee8efde', | ||||
| @@ -25,6 +22,7 @@ class R7IE(InfoExtractor): | ||||
|             'id': '54e7050b0cf2ff57e0279389', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', | ||||
|             'description': 'md5:01812008664be76a6479aa58ec865b72', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 98, | ||||
|             'like_count': int, | ||||
| @@ -44,45 +42,72 @@ class R7IE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://player.r7.com/video/i/%s' % video_id, video_id) | ||||
|         video = self._download_json( | ||||
|             'http://player-api.r7.com/video/i/%s' % video_id, video_id) | ||||
|  | ||||
|         item = self._parse_json(js_to_json(self._search_regex( | ||||
|             r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id) | ||||
|  | ||||
|         title = unescapeHTML(item['title']) | ||||
|         thumbnail = item.get('init', {}).get('thumbUri') | ||||
|         duration = None | ||||
|  | ||||
|         statistics = item.get('statistics', {}) | ||||
|         like_count = int_or_none(statistics.get('likes')) | ||||
|         view_count = int_or_none(statistics.get('views')) | ||||
|         title = video['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for format_key, format_dict in item['playlist'][0].items(): | ||||
|             src = format_dict.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             format_id = format_dict.get('format') or format_key | ||||
|             if duration is None: | ||||
|                 duration = format_dict.get('duration') | ||||
|             if '.f4m' in src: | ||||
|                 formats.extend(self._extract_f4m_formats(src, video_id, preference=-1)) | ||||
|             elif src.endswith('.m3u8'): | ||||
|                 formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': src, | ||||
|                     'format_id': format_id, | ||||
|                 }) | ||||
|         media_url_hls = video.get('media_url_hls') | ||||
|         if media_url_hls: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                 m3u8_id='hls', fatal=False)) | ||||
|         media_url = video.get('media_url') | ||||
|         if media_url: | ||||
|             f = { | ||||
|                 'url': media_url, | ||||
|                 'format_id': 'http', | ||||
|             } | ||||
|             # m3u8 format always matches the http format, let's copy metadata from | ||||
|             # one to another | ||||
|             m3u8_formats = list(filter( | ||||
|                 lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', | ||||
|                 formats)) | ||||
|             if len(m3u8_formats) == 1: | ||||
|                 f_copy = m3u8_formats[0].copy() | ||||
|                 f_copy.update(f) | ||||
|                 f_copy['protocol'] = 'http' | ||||
|                 f = f_copy | ||||
|             formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = video.get('description') | ||||
|         thumbnail = video.get('thumb') | ||||
|         duration = int_or_none(video.get('media_duration')) | ||||
|         like_count = int_or_none(video.get('likes')) | ||||
|         view_count = int_or_none(video.get('views')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'like_count': like_count, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class R7ArticleIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015', | ||||
|         'only_matching': True, | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key()) | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import( | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     str_to_int, | ||||
| ) | ||||
|   | ||||
| @@ -1,23 +1,23 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     js_to_json, | ||||
| ) | ||||
| from ..compat import compat_str | ||||
|  | ||||
|  | ||||
| class RDSIE(InfoExtractor): | ||||
|     IE_DESC = 'RDS.ca' | ||||
|     _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799', | ||||
|         'info_dict': { | ||||
|             'id': '3.1132799', | ||||
|             'id': '604333', | ||||
|             'display_id': 'fowler-jr-prend-la-direction-de-jacksonville', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Fowler Jr. prend la direction de Jacksonville', | ||||
| @@ -33,22 +33,17 @@ class RDSIE(InfoExtractor): | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         # TODO: extract f4m from 9c9media.com | ||||
|         video_url = self._search_regex( | ||||
|             r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"', | ||||
|             webpage, 'video url') | ||||
|  | ||||
|         title = self._og_search_title(webpage) or self._html_search_meta( | ||||
|         item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json) | ||||
|         video_id = compat_str(item['id']) | ||||
|         title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta( | ||||
|             'title', webpage, 'title', fatal=True) | ||||
|         description = self._og_search_description(webpage) or self._html_search_meta( | ||||
|             'description', webpage, 'description') | ||||
|         thumbnail = self._og_search_thumbnail(webpage) or self._search_regex( | ||||
|         thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex( | ||||
|             [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"', | ||||
|              r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'], | ||||
|             webpage, 'thumbnail', fatal=False) | ||||
| @@ -61,13 +56,15 @@ class RDSIE(InfoExtractor): | ||||
|         age_limit = self._family_friendly_search(webpage) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'url': video_url, | ||||
|             'url': '9c9media:rds_web:%s' % video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'age_limit': age_limit, | ||||
|             'ie_key': 'NineCNineMedia', | ||||
|         } | ||||
|   | ||||
							
								
								
									
										60
									
								
								youtube_dl/extractor/sixplay.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								youtube_dl/extractor/sixplay.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     qualities, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SixPlayIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:6play:|https?://(?:www\.)?6play\.fr/.+?-c_)(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.6play.fr/jamel-et-ses-amis-au-marrakech-du-rire-p_1316/jamel-et-ses-amis-au-marrakech-du-rire-2015-c_11495320', | ||||
|         'md5': '42310bffe4ba3982db112b9cd3467328', | ||||
|         'info_dict': { | ||||
|             'id': '11495320', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Jamel et ses amis au Marrakech du rire 2015', | ||||
|             'description': 'md5:ba2149d5c321d5201b78070ee839d872', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         clip_data = self._download_json( | ||||
|             'https://player.m6web.fr/v2/video/config/6play-auth/FR/%s.json' % video_id, | ||||
|             video_id) | ||||
|         video_data = clip_data['videoInfo'] | ||||
|  | ||||
|         quality_key = qualities(['lq', 'sd', 'hq', 'hd']) | ||||
|         formats = [] | ||||
|         for source in clip_data['sources']: | ||||
|             source_type, source_url = source.get('type'), source.get('src') | ||||
|             if not source_url or source_type == 'hls/primetime': | ||||
|                 continue | ||||
|             if source_type == 'application/vnd.apple.mpegURL': | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     source_url, video_id, 'mp4', 'm3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     source_url.replace('.m3u8', '.f4m'), | ||||
|                     video_id, f4m_id='hds', fatal=False)) | ||||
|             elif source_type == 'video/mp4': | ||||
|                 quality = source.get('quality') | ||||
|                 formats.append({ | ||||
|                     'url': source_url, | ||||
|                     'format_id': quality, | ||||
|                     'quality': quality_key(quality), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_data['title'].strip(), | ||||
|             'description': video_data.get('description'), | ||||
|             'duration': int_or_none(video_data.get('duration')), | ||||
|             'series': video_data.get('titlePgm'), | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -67,7 +67,7 @@ class SkyNewsArabiaIE(SkyNewsArabiaBaseIE): | ||||
|  | ||||
|  | ||||
| class SkyNewsArabiaArticleIE(SkyNewsArabiaBaseIE): | ||||
|     IE_NAME = 'skynewsarabia:video' | ||||
|     IE_NAME = 'skynewsarabia:article' | ||||
|     _VALID_URL = r'https?://(?:www\.)?skynewsarabia\.com/web/article/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.skynewsarabia.com/web/article/794549/%D8%A7%D9%94%D8%AD%D8%AF%D8%A7%D8%AB-%D8%A7%D9%84%D8%B4%D8%B1%D9%82-%D8%A7%D9%84%D8%A7%D9%94%D9%88%D8%B3%D8%B7-%D8%AE%D8%B1%D9%8A%D8%B7%D8%A9-%D8%A7%D9%84%D8%A7%D9%94%D9%84%D8%B9%D8%A7%D8%A8-%D8%A7%D9%84%D8%B0%D9%83%D9%8A%D8%A9', | ||||
|   | ||||
							
								
								
									
										33
									
								
								youtube_dl/extractor/skysports.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								youtube_dl/extractor/skysports.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SkySportsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?skysports\.com/watch/video/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.skysports.com/watch/video/10328419/bale-its-our-time-to-shine', | ||||
|         'md5': 'c44a1db29f27daf9a0003e010af82100', | ||||
|         'info_dict': { | ||||
|             'id': '10328419', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Bale: Its our time to shine', | ||||
|             'description': 'md5:9fd1de3614d525f5addda32ac3c482c9', | ||||
|         }, | ||||
|         'add_ie': ['Ooyala'], | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'id': video_id, | ||||
|             'url': 'ooyala:%s' % self._search_regex( | ||||
|                 r'data-video-id="([^"]+)"', webpage, 'ooyala id'), | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'ie_key': 'Ooyala', | ||||
|         } | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/sportschau.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/sportschau.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .wdr import WDRBaseIE | ||||
| from ..utils import get_element_by_attribute | ||||
|  | ||||
|  | ||||
| class SportschauIE(WDRBaseIE): | ||||
|     IE_NAME = 'Sportschau' | ||||
|     _VALID_URL = r'https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video-?(?P<id>[^/#?]+)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.sportschau.de/uefaeuro2016/videos/video-dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100.html', | ||||
|         'info_dict': { | ||||
|             'id': 'mdb-1140188', | ||||
|             'display_id': 'dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'DFB-Team geht gut gelaunt ins Spiel gegen Polen', | ||||
|             'description': 'Vor dem zweiten Gruppenspiel gegen Polen herrscht gute Stimmung im deutschen Team. Insbesondere Bastian Schweinsteiger strotzt vor Optimismus nach seinem Tor gegen die Ukraine.', | ||||
|             'upload_date': '20160615', | ||||
|         }, | ||||
|         'skip': 'Geo-restricted to Germany', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = get_element_by_attribute('class', 'headline', webpage) | ||||
|         description = self._html_search_meta('description', webpage, 'description') | ||||
|  | ||||
|         info = self._extract_wdr_video(webpage, video_id) | ||||
|  | ||||
|         info.update({ | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|         }) | ||||
|  | ||||
|         return info | ||||
| @@ -9,6 +9,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class SRMediathekIE(ARDMediathekIE): | ||||
|     IE_NAME = 'sr:mediathek' | ||||
|     IE_DESC = 'Saarländischer Rundfunk' | ||||
|     _VALID_URL = r'https?://sr-mediathek\.sr-online\.de/index\.php\?.*?&id=(?P<id>[0-9]+)' | ||||
|  | ||||
|   | ||||
| @@ -6,7 +6,6 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     sanitized_Request, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
| @@ -45,20 +44,26 @@ class StreamcloudIE(InfoExtractor): | ||||
|             (?:id="[^"]+"\s+)? | ||||
|             value="([^"]*)" | ||||
|             ''', orig_webpage) | ||||
|         post = urlencode_postdata(fields) | ||||
|  | ||||
|         self._sleep(12, video_id) | ||||
|         headers = { | ||||
|             b'Content-Type': b'application/x-www-form-urlencoded', | ||||
|         } | ||||
|         req = sanitized_Request(url, post, headers) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             req, video_id, note='Downloading video page ...') | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1[^>]*>([^<]+)<', webpage, 'title') | ||||
|         video_url = self._search_regex( | ||||
|             r'file:\s*"([^"]+)"', webpage, 'video URL') | ||||
|             url, video_id, data=urlencode_postdata(fields), headers={ | ||||
|                 b'Content-Type': b'application/x-www-form-urlencoded', | ||||
|             }) | ||||
|  | ||||
|         try: | ||||
|             title = self._html_search_regex( | ||||
|                 r'<h1[^>]*>([^<]+)<', webpage, 'title') | ||||
|             video_url = self._search_regex( | ||||
|                 r'file:\s*"([^"]+)"', webpage, 'video URL') | ||||
|         except ExtractorError: | ||||
|             message = self._html_search_regex( | ||||
|                 r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>', | ||||
|                 webpage, 'message', default=None, group='message') | ||||
|             if message: | ||||
|                 raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) | ||||
|             raise | ||||
|         thumbnail = self._search_regex( | ||||
|             r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False) | ||||
|  | ||||
|   | ||||
| @@ -6,17 +6,14 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     dict_get, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SVTBaseIE(InfoExtractor): | ||||
|     def _extract_video(self, url, video_id): | ||||
|         info = self._download_json(url, video_id) | ||||
|  | ||||
|         title = info['context']['title'] | ||||
|         thumbnail = info['context'].get('thumbnailImage') | ||||
|  | ||||
|         video_info = info['video'] | ||||
|     def _extract_video(self, video_info, video_id): | ||||
|         formats = [] | ||||
|         for vr in video_info['videoReferences']: | ||||
|             player_type = vr.get('playerType') | ||||
| @@ -40,27 +37,49 @@ class SVTBaseIE(InfoExtractor): | ||||
|                     'format_id': player_type, | ||||
|                     'url': vurl, | ||||
|                 }) | ||||
|         if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): | ||||
|             self.raise_geo_restricted('This video is only available in Sweden') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         subtitle_references = video_info.get('subtitleReferences') | ||||
|         subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences')) | ||||
|         if isinstance(subtitle_references, list): | ||||
|             for sr in subtitle_references: | ||||
|                 subtitle_url = sr.get('url') | ||||
|                 subtitle_lang = sr.get('language', 'sv') | ||||
|                 if subtitle_url: | ||||
|                     subtitles.setdefault('sv', []).append({'url': subtitle_url}) | ||||
|                     if determine_ext(subtitle_url) == 'm3u8': | ||||
|                         # TODO(yan12125): handle WebVTT in m3u8 manifests | ||||
|                         continue | ||||
|  | ||||
|         duration = video_info.get('materialLength') | ||||
|         age_limit = 18 if video_info.get('inappropriateForChildren') else 0 | ||||
|                     subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) | ||||
|  | ||||
|         title = video_info.get('title') | ||||
|  | ||||
|         series = video_info.get('programTitle') | ||||
|         season_number = int_or_none(video_info.get('season')) | ||||
|         episode = video_info.get('episodeTitle') | ||||
|         episode_number = int_or_none(video_info.get('episodeNumber')) | ||||
|  | ||||
|         duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) | ||||
|         age_limit = None | ||||
|         adult = dict_get( | ||||
|             video_info, ('inappropriateForChildren', 'blockedForChildren'), | ||||
|             skip_false_values=False) | ||||
|         if adult is not None: | ||||
|             age_limit = 18 if adult else 0 | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'age_limit': age_limit, | ||||
|             'series': series, | ||||
|             'season_number': season_number, | ||||
|             'episode': episode, | ||||
|             'episode_number': episode_number, | ||||
|         } | ||||
|  | ||||
|  | ||||
| @@ -68,11 +87,11 @@ class SVTIE(SVTBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false', | ||||
|         'md5': '9648197555fc1b49e3dc22db4af51d46', | ||||
|         'md5': '33e9a5d8f646523ce0868ecfb0eed77d', | ||||
|         'info_dict': { | ||||
|             'id': '2900353', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Stjärnorna skojar till det - under SVT-intervjun', | ||||
|             'duration': 27, | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
| @@ -89,15 +108,20 @@ class SVTIE(SVTBaseIE): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         widget_id = mobj.group('widget_id') | ||||
|         article_id = mobj.group('id') | ||||
|         return self._extract_video( | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), | ||||
|             article_id) | ||||
|  | ||||
|         info_dict = self._extract_video(info['video'], article_id) | ||||
|         info_dict['title'] = info['context']['title'] | ||||
|         return info_dict | ||||
|  | ||||
|  | ||||
| class SVTPlayIE(SVTBaseIE): | ||||
|     IE_DESC = 'SVT Play and Öppet arkiv' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp)/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', | ||||
|         'md5': '2b6704fe4a28801e1a098bbf3c5ac611', | ||||
|         'info_dict': { | ||||
| @@ -113,12 +137,50 @@ class SVTPlayIE(SVTBaseIE): | ||||
|                 }] | ||||
|             }, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         # geo restricted to Sweden | ||||
|         'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         host = mobj.group('host') | ||||
|         return self._extract_video( | ||||
|             'http://www.%s.se/video/%s?output=json' % (host, video_id), | ||||
|             video_id) | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         data = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'root\["__svtplay"\]\s*=\s*([^;]+);', | ||||
|                 webpage, 'embedded data', default='{}'), | ||||
|             video_id, fatal=False) | ||||
|  | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         if data: | ||||
|             video_info = try_get( | ||||
|                 data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], | ||||
|                 dict) | ||||
|             if video_info: | ||||
|                 info_dict = self._extract_video(video_info, video_id) | ||||
|                 info_dict.update({ | ||||
|                     'title': data['context']['dispatcher']['stores']['MetaStore']['title'], | ||||
|                     'thumbnail': thumbnail, | ||||
|                 }) | ||||
|                 return info_dict | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', | ||||
|             webpage, 'video id', default=None) | ||||
|  | ||||
|         if video_id: | ||||
|             data = self._download_json( | ||||
|                 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) | ||||
|             info_dict = self._extract_video(data, video_id) | ||||
|             if not info_dict.get('title'): | ||||
|                 info_dict['title'] = re.sub( | ||||
|                     r'\s*\|\s*.+?$', '', | ||||
|                     info_dict.get('episode') or self._og_search_title(webpage)) | ||||
|             return info_dict | ||||
|   | ||||
| @@ -48,6 +48,6 @@ class TF1IE(InfoExtractor): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         wat_id = self._html_search_regex( | ||||
|             r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8}).*?\1', | ||||
|             r'(["\'])(?:https?:)?//www\.wat\.tv/embedframe/.*?(?P<id>\d{8})\1', | ||||
|             webpage, 'wat id', group='id') | ||||
|         return self.url_result('wat:%s' % wat_id, 'Wat') | ||||
|   | ||||
| @@ -277,9 +277,9 @@ class ThePlatformIE(ThePlatformBaseIE): | ||||
|  | ||||
|  | ||||
| class ThePlatformFeedIE(ThePlatformBaseIE): | ||||
|     _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s' | ||||
|     _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)' | ||||
|     _TEST = { | ||||
|     _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s' | ||||
|     _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))' | ||||
|     _TESTS = [{ | ||||
|         # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207 | ||||
|         'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207', | ||||
|         'md5': '6e32495b5073ab414471b615c5ded394', | ||||
| @@ -295,32 +295,38 @@ class ThePlatformFeedIE(ThePlatformBaseIE): | ||||
|             'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'], | ||||
|             'uploader': 'NBCU-NEWS', | ||||
|         }, | ||||
|     } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         provider_id = mobj.group('provider_id') | ||||
|         feed_id = mobj.group('feed_id') | ||||
|  | ||||
|         real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id) | ||||
|         feed = self._download_json(real_url, video_id) | ||||
|         entry = feed['entries'][0] | ||||
|     def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}): | ||||
|         real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query) | ||||
|         entry = self._download_json(real_url, video_id)['entries'][0] | ||||
|  | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|         first_video_id = None | ||||
|         duration = None | ||||
|         asset_types = [] | ||||
|         for item in entry['media$content']: | ||||
|             smil_url = item['plfile$url'] + '&mbr=true' | ||||
|             smil_url = item['plfile$url'] | ||||
|             cur_video_id = ThePlatformIE._match_id(smil_url) | ||||
|             if first_video_id is None: | ||||
|                 first_video_id = cur_video_id | ||||
|                 duration = float_or_none(item.get('plfile$duration')) | ||||
|             cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id) | ||||
|             formats.extend(cur_formats) | ||||
|             subtitles = self._merge_subtitles(subtitles, cur_subtitles) | ||||
|             for asset_type in item['plfile$assetTypes']: | ||||
|                 if asset_type in asset_types: | ||||
|                     continue | ||||
|                 asset_types.append(asset_type) | ||||
|                 query = { | ||||
|                     'mbr': 'true', | ||||
|                     'formats': item['plfile$format'], | ||||
|                     'assetTypes': asset_type, | ||||
|                 } | ||||
|                 if asset_type in asset_types_query: | ||||
|                     query.update(asset_types_query[asset_type]) | ||||
|                 cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query( | ||||
|                     smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type) | ||||
|                 formats.extend(cur_formats) | ||||
|                 subtitles = self._merge_subtitles(subtitles, cur_subtitles) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -344,5 +350,17 @@ class ThePlatformFeedIE(ThePlatformBaseIE): | ||||
|             'timestamp': timestamp, | ||||
|             'categories': categories, | ||||
|         }) | ||||
|         if custom_fields: | ||||
|             ret.update(custom_fields(entry)) | ||||
|  | ||||
|         return ret | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         provider_id = mobj.group('provider_id') | ||||
|         feed_id = mobj.group('feed_id') | ||||
|         filter_query = mobj.group('filter') | ||||
|  | ||||
|         return self._extract_feed_info(provider_id, feed_id, filter_query, video_id) | ||||
|   | ||||
| @@ -4,6 +4,12 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     clean_html, | ||||
|     get_element_by_attribute, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TVPIE(InfoExtractor): | ||||
| @@ -21,7 +27,7 @@ class TVPIE(InfoExtractor): | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.tvp.pl/there-can-be-anything-so-i-shortened-it/17916176', | ||||
|         'md5': 'c3b15ed1af288131115ff17a17c19dda', | ||||
|         'md5': 'b0005b542e5b4de643a9690326ab1257', | ||||
|         'info_dict': { | ||||
|             'id': '17916176', | ||||
|             'ext': 'mp4', | ||||
| @@ -53,6 +59,11 @@ class TVPIE(InfoExtractor): | ||||
|         webpage = self._download_webpage( | ||||
|             'http://www.tvp.pl/sess/tvplayer.php?object_id=%s' % video_id, video_id) | ||||
|  | ||||
|         error_massage = get_element_by_attribute('class', 'msg error', webpage) | ||||
|         if error_massage: | ||||
|             raise ExtractorError('%s said: %s' % ( | ||||
|                 self.IE_NAME, clean_html(error_massage)), expected=True) | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'name\s*:\s*([\'"])Title\1\s*,\s*value\s*:\s*\1(?P<title>.+?)\1', | ||||
|             webpage, 'title', group='title') | ||||
| @@ -66,24 +77,50 @@ class TVPIE(InfoExtractor): | ||||
|             r"poster\s*:\s*'([^']+)'", webpage, 'thumbnail', default=None) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'0:{src:([\'"])(?P<url>.*?)\1', webpage, 'formats', group='url', default=None) | ||||
|         if not video_url: | ||||
|             r'0:{src:([\'"])(?P<url>.*?)\1', webpage, | ||||
|             'formats', group='url', default=None) | ||||
|         if not video_url or 'material_niedostepny.mp4' in video_url: | ||||
|             video_url = self._download_json( | ||||
|                 'http://www.tvp.pl/pub/stat/videofileinfo?video_id=%s' % video_id, | ||||
|                 video_id)['video_url'] | ||||
|  | ||||
|         ext = video_url.rsplit('.', 1)[-1] | ||||
|         if ext != 'ism/manifest': | ||||
|             if '/' in ext: | ||||
|                 ext = 'mp4' | ||||
|         formats = [] | ||||
|         video_url_base = self._search_regex( | ||||
|             r'(https?://.+?/video)(?:\.(?:ism|f4m|m3u8)|-\d+\.mp4)', | ||||
|             video_url, 'video base url', default=None) | ||||
|         if video_url_base: | ||||
|             # TODO: Current DASH formats are broken - $Time$ pattern in | ||||
|             # <SegmentTemplate> not implemented yet | ||||
|             # formats.extend(self._extract_mpd_formats( | ||||
|             #     video_url_base + '.ism/video.mpd', | ||||
|             #     video_id, mpd_id='dash', fatal=False)) | ||||
|             formats.extend(self._extract_f4m_formats( | ||||
|                 video_url_base + '.ism/video.f4m', | ||||
|                 video_id, f4m_id='hds', fatal=False)) | ||||
|             m3u8_formats = self._extract_m3u8_formats( | ||||
|                 video_url_base + '.ism/video.m3u8', video_id, | ||||
|                 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) | ||||
|             self._sort_formats(m3u8_formats) | ||||
|             m3u8_formats = list(filter( | ||||
|                 lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', | ||||
|                 m3u8_formats)) | ||||
|             formats.extend(m3u8_formats) | ||||
|             for i, m3u8_format in enumerate(m3u8_formats, 2): | ||||
|                 http_url = '%s-%d.mp4' % (video_url_base, i) | ||||
|                 if self._is_valid_url(http_url, video_id): | ||||
|                     f = m3u8_format.copy() | ||||
|                     f.update({ | ||||
|                         'url': http_url, | ||||
|                         'format_id': f['format_id'].replace('hls', 'http'), | ||||
|                         'protocol': 'http', | ||||
|                     }) | ||||
|                     formats.append(f) | ||||
|         else: | ||||
|             formats = [{ | ||||
|                 'format_id': 'direct', | ||||
|                 'url': video_url, | ||||
|                 'ext': ext, | ||||
|                 'ext': determine_ext(video_url, 'mp4'), | ||||
|             }] | ||||
|         else: | ||||
|             m3u8_url = re.sub('([^/]*)\.ism/manifest', r'\1.ism/\1.m3u8', video_url) | ||||
|             formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4') | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ class TwitchBaseIE(InfoExtractor): | ||||
|     _VALID_URL_BASE = r'https?://(?:www\.)?twitch\.tv' | ||||
|  | ||||
|     _API_BASE = 'https://api.twitch.tv' | ||||
|     _USHER_BASE = 'http://usher.twitch.tv' | ||||
|     _USHER_BASE = 'https://usher.ttvnw.net' | ||||
|     _LOGIN_URL = 'http://www.twitch.tv/login' | ||||
|     _NETRC_MACHINE = 'twitch' | ||||
|  | ||||
|   | ||||
							
								
								
									
										67
									
								
								youtube_dl/extractor/urplay.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								youtube_dl/extractor/urplay.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,67 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class URPlayIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?urplay\.se/program/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde', | ||||
|         'md5': '15ca67b63fd8fb320ac2bcd854bad7b6', | ||||
|         'info_dict': { | ||||
|             'id': '190031', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tripp, Trapp, Träd : Sovkudde', | ||||
|             'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         urplayer_data = self._parse_json(self._search_regex( | ||||
|             r'urPlayer\.init\(({.+?})\);', webpage, 'urplayer data'), video_id) | ||||
|         host = self._download_json('http://streaming-loadbalancer.ur.se/loadbalancer.json', video_id)['redirect'] | ||||
|  | ||||
|         formats = [] | ||||
|         for quality_attr, quality, preference in (('', 'sd', 0), ('_hd', 'hd', 1)): | ||||
|             file_rtmp = urplayer_data.get('file_rtmp' + quality_attr) | ||||
|             if file_rtmp: | ||||
|                 formats.append({ | ||||
|                     'url': 'rtmp://%s/urplay/mp4:%s' % (host, file_rtmp), | ||||
|                     'format_id': quality + '-rtmp', | ||||
|                     'ext': 'flv', | ||||
|                     'preference': preference, | ||||
|                 }) | ||||
|             file_http = urplayer_data.get('file_http' + quality_attr) or urplayer_data.get('file_http_sub' + quality_attr) | ||||
|             if file_http: | ||||
|                 file_http_base_url = 'http://%s/%s' % (host, file_http) | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     file_http_base_url + 'manifest.f4m', video_id, | ||||
|                     preference, '%s-hds' % quality, fatal=False)) | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     file_http_base_url + 'playlist.m3u8', video_id, 'mp4', | ||||
|                     'm3u8_native', preference, '%s-hls' % quality, fatal=False)) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         for subtitle in urplayer_data.get('subtitles', []): | ||||
|             subtitle_url = subtitle.get('file') | ||||
|             kind = subtitle.get('kind') | ||||
|             if subtitle_url or kind and kind != 'captions': | ||||
|                 continue | ||||
|             subtitles.setdefault(subtitle.get('label', 'Svenska'), []).append({ | ||||
|                 'url': subtitle_url, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': urplayer_data['title'], | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': urplayer_data.get('image'), | ||||
|             'series': urplayer_data.get('series_title'), | ||||
|             'subtitles': subtitles, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/vidbit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/vidbit.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     remove_end, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VidbitIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?vidbit\.co/(?:watch|embed)\?.*?\bv=(?P<id>[\da-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.vidbit.co/watch?v=jkL2yDOEq2', | ||||
|         'md5': '1a34b7f14defe3b8fafca9796892924d', | ||||
|         'info_dict': { | ||||
|             'id': 'jkL2yDOEq2', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Intro to VidBit', | ||||
|             'description': 'md5:5e0d6142eec00b766cbf114bfd3d16b7', | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|             'upload_date': '20160618', | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.vidbit.co/embed?v=jkL2yDOEq2&auto=0&water=0', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             compat_urlparse.urljoin(url, '/watch?v=%s' % video_id), video_id) | ||||
|  | ||||
|         video_url, title = [None] * 2 | ||||
|  | ||||
|         config = self._parse_json(self._search_regex( | ||||
|             r'(?s)\.setup\(({.+?})\);', webpage, 'setup', default='{}'), | ||||
|             video_id, transform_source=js_to_json) | ||||
|         if config: | ||||
|             if config.get('file'): | ||||
|                 video_url = compat_urlparse.urljoin(url, config['file']) | ||||
|             title = config.get('title') | ||||
|  | ||||
|         if not video_url: | ||||
|             video_url = compat_urlparse.urljoin(url, self._search_regex( | ||||
|                 r'file\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', | ||||
|                 webpage, 'video URL', group='url')) | ||||
|  | ||||
|         if not title: | ||||
|             title = remove_end( | ||||
|                 self._html_search_regex( | ||||
|                     (r'<h1>(.+?)</h1>', r'<title>(.+?)</title>'), | ||||
|                     webpage, 'title', default=None) or self._og_search_title(webpage), | ||||
|                 ' - VidBit') | ||||
|  | ||||
|         description = self._html_search_meta( | ||||
|             ('description', 'og:description', 'twitter:description'), | ||||
|             webpage, 'description') | ||||
|  | ||||
|         upload_date = unified_strdate(self._html_search_meta( | ||||
|             'datePublished', webpage, 'upload date')) | ||||
|  | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'<strong>(\d+)</strong> views', | ||||
|             webpage, 'view count', fatal=False)) | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'id=["\']cmt_num["\'][^>]*>\((\d+)\)', | ||||
|             webpage, 'comment count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'upload_date': upload_date, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
| @@ -8,6 +8,7 @@ import itertools | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
| @@ -15,6 +16,7 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     InAdvancePagedList, | ||||
|     int_or_none, | ||||
|     NO_DEFAULT, | ||||
|     RegexNotFoundError, | ||||
|     sanitized_Request, | ||||
|     smuggle_url, | ||||
| @@ -24,6 +26,7 @@ from ..utils import ( | ||||
|     urlencode_postdata, | ||||
|     unescapeHTML, | ||||
|     parse_filesize, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -54,6 +57,26 @@ class VimeoBaseInfoExtractor(InfoExtractor): | ||||
|         self._set_vimeo_cookie('vuid', vuid) | ||||
|         self._download_webpage(login_request, None, False, 'Wrong login info') | ||||
|  | ||||
|     def _verify_video_password(self, url, video_id, webpage): | ||||
|         password = self._downloader.params.get('videopassword') | ||||
|         if password is None: | ||||
|             raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) | ||||
|         token, vuid = self._extract_xsrft_and_vuid(webpage) | ||||
|         data = urlencode_postdata({ | ||||
|             'password': password, | ||||
|             'token': token, | ||||
|         }) | ||||
|         if url.startswith('http://'): | ||||
|             # vimeo only supports https now, but the user can give an http url | ||||
|             url = url.replace('http://', 'https://') | ||||
|         password_request = sanitized_Request(url + '/password', data) | ||||
|         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         password_request.add_header('Referer', url) | ||||
|         self._set_vimeo_cookie('vuid', vuid) | ||||
|         return self._download_webpage( | ||||
|             password_request, video_id, | ||||
|             'Verifying the password', 'Wrong password') | ||||
|  | ||||
|     def _extract_xsrft_and_vuid(self, webpage): | ||||
|         xsrft = self._search_regex( | ||||
|             r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)', | ||||
| @@ -144,7 +167,7 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|                             \. | ||||
|                         )? | ||||
|                         vimeo(?P<pro>pro)?\.com/ | ||||
|                         (?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/) | ||||
|                         (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) | ||||
|                         (?:.*?/)? | ||||
|                         (?: | ||||
|                             (?: | ||||
| @@ -225,8 +248,6 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|         { | ||||
|             'url': 'http://vimeo.com/channels/keypeele/75629013', | ||||
|             'md5': '2f86a05afe9d7abc0b9126d229bbe15d', | ||||
|             'note': 'Video is freely available via original URL ' | ||||
|                     'and protected with password when accessed via http://vimeo.com/75629013', | ||||
|             'info_dict': { | ||||
|                 'id': '75629013', | ||||
|                 'ext': 'mp4', | ||||
| @@ -270,7 +291,7 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|         { | ||||
|             # contains original format | ||||
|             'url': 'https://vimeo.com/33951933', | ||||
|             'md5': '53c688fa95a55bf4b7293d37a89c5c53', | ||||
|             'md5': '2d9f5475e0537f013d0073e812ab89e6', | ||||
|             'info_dict': { | ||||
|                 'id': '33951933', | ||||
|                 'ext': 'mp4', | ||||
| @@ -282,6 +303,29 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|                 'description': 'md5:ae23671e82d05415868f7ad1aec21147', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # only available via https://vimeo.com/channels/tributes/6213729 and | ||||
|             # not via https://vimeo.com/6213729 | ||||
|             'url': 'https://vimeo.com/channels/tributes/6213729', | ||||
|             'info_dict': { | ||||
|                 'id': '6213729', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Vimeo Tribute: The Shining', | ||||
|                 'uploader': 'Casey Donahue', | ||||
|                 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/caseydonahue', | ||||
|                 'uploader_id': 'caseydonahue', | ||||
|                 'upload_date': '20090821', | ||||
|                 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'expected_warnings': ['Unable to download JSON metadata'], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://vimeo.com/109815029', | ||||
|             'note': 'Video not completely processed, "failed" seed status', | ||||
| @@ -291,6 +335,10 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|             'url': 'https://vimeo.com/groups/travelhd/videos/22439234', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://vimeo.com/album/2632481/video/79010983', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             # source file returns 403: Forbidden | ||||
|             'url': 'https://vimeo.com/7809605', | ||||
| @@ -317,26 +365,6 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|         if mobj: | ||||
|             return mobj.group(1) | ||||
|  | ||||
|     def _verify_video_password(self, url, video_id, webpage): | ||||
|         password = self._downloader.params.get('videopassword') | ||||
|         if password is None: | ||||
|             raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) | ||||
|         token, vuid = self._extract_xsrft_and_vuid(webpage) | ||||
|         data = urlencode_postdata({ | ||||
|             'password': password, | ||||
|             'token': token, | ||||
|         }) | ||||
|         if url.startswith('http://'): | ||||
|             # vimeo only supports https now, but the user can give an http url | ||||
|             url = url.replace('http://', 'https://') | ||||
|         password_request = sanitized_Request(url + '/password', data) | ||||
|         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         password_request.add_header('Referer', url) | ||||
|         self._set_vimeo_cookie('vuid', vuid) | ||||
|         return self._download_webpage( | ||||
|             password_request, video_id, | ||||
|             'Verifying the password', 'Wrong password') | ||||
|  | ||||
|     def _verify_player_video_password(self, url, video_id): | ||||
|         password = self._downloader.params.get('videopassword') | ||||
|         if password is None: | ||||
| @@ -367,7 +395,7 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|         orig_url = url | ||||
|         if mobj.group('pro') or mobj.group('player'): | ||||
|             url = 'https://player.vimeo.com/video/' + video_id | ||||
|         else: | ||||
|         elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): | ||||
|             url = 'https://vimeo.com/' + video_id | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
| @@ -445,7 +473,18 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|             if config.get('view') == 4: | ||||
|                 config = self._verify_player_video_password(url, video_id) | ||||
|  | ||||
|         if '>You rented this title.<' in webpage: | ||||
|         def is_rented(): | ||||
|             if '>You rented this title.<' in webpage: | ||||
|                 return True | ||||
|             if config.get('user', {}).get('purchased'): | ||||
|                 return True | ||||
|             label = try_get( | ||||
|                 config, lambda x: x['video']['vod']['purchase_options'][0]['label_string'], compat_str) | ||||
|             if label and label.startswith('You rented this'): | ||||
|                 return True | ||||
|             return False | ||||
|  | ||||
|         if is_rented(): | ||||
|             feature_id = config.get('video', {}).get('vod', {}).get('feature_id') | ||||
|             if feature_id and not data.get('force_feature_id', False): | ||||
|                 return self.url_result(smuggle_url( | ||||
| @@ -617,8 +656,21 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): | ||||
|                 webpage = self._login_list_password(page_url, list_id, webpage) | ||||
|                 yield self._extract_list_title(webpage) | ||||
|  | ||||
|             for video_id in re.findall(r'id="clip_(\d+?)"', webpage): | ||||
|                 yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo') | ||||
|             # Try extracting href first since not all videos are available via | ||||
|             # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729) | ||||
|             clips = re.findall( | ||||
|                 r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage) | ||||
|             if clips: | ||||
|                 for video_id, video_url in clips: | ||||
|                     yield self.url_result( | ||||
|                         compat_urlparse.urljoin(base_url, video_url), | ||||
|                         VimeoIE.ie_key(), video_id=video_id) | ||||
|             # More relaxed fallback | ||||
|             else: | ||||
|                 for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): | ||||
|                     yield self.url_result( | ||||
|                         'https://vimeo.com/%s' % video_id, | ||||
|                         VimeoIE.ie_key(), video_id=video_id) | ||||
|  | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: | ||||
|                 break | ||||
| @@ -655,7 +707,7 @@ class VimeoUserIE(VimeoChannelIE): | ||||
|  | ||||
| class VimeoAlbumIE(VimeoChannelIE): | ||||
|     IE_NAME = 'vimeo:album' | ||||
|     _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))' | ||||
|     _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://vimeo.com/album/2632481', | ||||
| @@ -675,6 +727,13 @@ class VimeoAlbumIE(VimeoChannelIE): | ||||
|         'params': { | ||||
|             'videopassword': 'youtube-dl', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # TODO: respect page number | ||||
|         'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _page_url(self, base_url, pagenum): | ||||
| @@ -733,12 +792,39 @@ class VimeoReviewIE(VimeoBaseInfoExtractor): | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'uploader_id': 'user22258446', | ||||
|         } | ||||
|     }, { | ||||
|         'note': 'Password protected', | ||||
|         'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde', | ||||
|         'info_dict': { | ||||
|             'id': '138823582', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1', | ||||
|             'uploader': 'TMB', | ||||
|             'uploader_id': 'user37284429', | ||||
|         }, | ||||
|         'params': { | ||||
|             'videopassword': 'holygrail', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _get_config_url(self, webpage_url, video_id, video_password_verified=False): | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|         config_url = self._html_search_regex( | ||||
|             r'data-config-url="([^"]+)"', webpage, 'config URL', | ||||
|             default=NO_DEFAULT if video_password_verified else None) | ||||
|         if config_url is None: | ||||
|             self._verify_video_password(webpage_url, video_id, webpage) | ||||
|             config_url = self._get_config_url( | ||||
|                 webpage_url, video_id, video_password_verified=True) | ||||
|         return config_url | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         config = self._download_json( | ||||
|             'https://player.vimeo.com/video/%s/config' % video_id, video_id) | ||||
|         config_url = self._get_config_url(url, video_id) | ||||
|         config = self._download_json(config_url, video_id) | ||||
|         info_dict = self._parse_config(config, video_id) | ||||
|         self._vimeo_sort_formats(info_dict['formats']) | ||||
|         info_dict['id'] = video_id | ||||
|   | ||||
| @@ -24,6 +24,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20130519', | ||||
|             'uploader': 'Jack Dorsey', | ||||
|             'uploader_id': '76', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -39,6 +40,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20140815', | ||||
|             'uploader': 'Mars Ruiz', | ||||
|             'uploader_id': '1102363502380728320', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -54,6 +56,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20130430', | ||||
|             'uploader': 'Z3k3', | ||||
|             'uploader_id': '936470460173008896', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -71,6 +74,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20150705', | ||||
|             'uploader': 'Pimry_zaa', | ||||
|             'uploader_id': '1135760698325307392', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -86,10 +90,12 @@ class VineIE(InfoExtractor): | ||||
|  | ||||
|         data = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'window\.POST_DATA\s*=\s*{\s*%s\s*:\s*({.+?})\s*};\s*</script>' % video_id, | ||||
|                 r'window\.POST_DATA\s*=\s*({.+?});\s*</script>', | ||||
|                 webpage, 'vine data'), | ||||
|             video_id) | ||||
|  | ||||
|         data = data[list(data.keys())[0]] | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': '%(format)s-%(rate)s' % f, | ||||
|             'vcodec': f.get('format'), | ||||
| @@ -109,6 +115,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': unified_strdate(data.get('created')), | ||||
|             'uploader': username, | ||||
|             'uploader_id': data.get('userIdStr'), | ||||
|             'view_count': int_or_none(data.get('loops', {}).get('count')), | ||||
|             'like_count': int_or_none(data.get('likes', {}).get('count')), | ||||
|             'comment_count': int_or_none(data.get('comments', {}).get('count')), | ||||
|             'repost_count': int_or_none(data.get('reposts', {}).get('count')), | ||||
|   | ||||
| @@ -3,6 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import sys | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| @@ -10,7 +11,6 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     orderedSet, | ||||
|     sanitized_Request, | ||||
|     str_to_int, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
| @@ -190,7 +190,7 @@ class VKIE(InfoExtractor): | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|         login_page, url_handle = self._download_webpage_handle( | ||||
|             'https://vk.com', None, 'Downloading login page') | ||||
|  | ||||
|         login_form = self._hidden_inputs(login_page) | ||||
| @@ -200,11 +200,26 @@ class VKIE(InfoExtractor): | ||||
|             'pass': password.encode('cp1251'), | ||||
|         }) | ||||
|  | ||||
|         request = sanitized_Request( | ||||
|             'https://login.vk.com/?act=login', | ||||
|             urlencode_postdata(login_form)) | ||||
|         # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header | ||||
|         # and expects the first one to be set rather than second (see | ||||
|         # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201). | ||||
|         # As of RFC6265 the newer one cookie should be set into cookie store | ||||
|         # what actually happens. | ||||
|         # We will workaround this VK issue by resetting the remixlhk cookie to | ||||
|         # the first one manually. | ||||
|         cookies = url_handle.headers.get('Set-Cookie') | ||||
|         if sys.version_info[0] >= 3: | ||||
|             cookies = cookies.encode('iso-8859-1') | ||||
|         cookies = cookies.decode('utf-8') | ||||
|         remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) | ||||
|         if remixlhk: | ||||
|             value, domain = remixlhk.groups() | ||||
|             self._set_cookie(domain, 'remixlhk', value) | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             request, None, note='Logging in as %s' % username) | ||||
|             'https://login.vk.com/?act=login', None, | ||||
|             note='Logging in as %s' % username, | ||||
|             data=urlencode_postdata(login_form)) | ||||
|  | ||||
|         if re.search(r'onLoginFailed', login_page): | ||||
|             raise ExtractorError( | ||||
|   | ||||
| @@ -25,7 +25,8 @@ class VRTIE(InfoExtractor): | ||||
|                 'timestamp': 1414271750.949, | ||||
|                 'upload_date': '20141025', | ||||
|                 'duration': 929, | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'HTTP Error 404: Not Found', | ||||
|         }, | ||||
|         # sporza.be | ||||
|         { | ||||
| @@ -39,7 +40,8 @@ class VRTIE(InfoExtractor): | ||||
|                 'timestamp': 1413835980.560, | ||||
|                 'upload_date': '20141020', | ||||
|                 'duration': 3238, | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'HTTP Error 404: Not Found', | ||||
|         }, | ||||
|         # cobra.be | ||||
|         { | ||||
| @@ -53,16 +55,39 @@ class VRTIE(InfoExtractor): | ||||
|                 'timestamp': 1413967500.494, | ||||
|                 'upload_date': '20141022', | ||||
|                 'duration': 661, | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'HTTP Error 404: Not Found', | ||||
|         }, | ||||
|         { | ||||
|             # YouTube video | ||||
|             'url': 'http://deredactie.be/cm/vrtnieuws/videozone/nieuws/cultuurenmedia/1.2622957', | ||||
|             'only_matching': True, | ||||
|             'md5': 'b8b93da1df1cea6c8556255a796b7d61', | ||||
|             'info_dict': { | ||||
|                 'id': 'Wji-BZ0oCwg', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'ROGUE ONE: A STAR WARS STORY Official Teaser Trailer', | ||||
|                 'description': 'md5:8e468944dce15567a786a67f74262583', | ||||
|                 'uploader': 'Star Wars', | ||||
|                 'uploader_id': 'starwars', | ||||
|                 'upload_date': '20160407', | ||||
|             }, | ||||
|             'add_ie': ['Youtube'], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://cobra.canvas.be/cm/cobra/videozone/rubriek/film-videozone/1.2377055', | ||||
|             'only_matching': True, | ||||
|             'md5': '', | ||||
|             'info_dict': { | ||||
|                 'id': '2377055', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Cafe Derby', | ||||
|                 'description': 'Lenny Van Wesemael debuteert met de langspeelfilm Café Derby. Een waar gebeurd maar ook verzonnen verhaal.', | ||||
|                 'upload_date': '20150626', | ||||
|                 'timestamp': 1435305240.769, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
| @@ -98,6 +123,32 @@ class VRTIE(InfoExtractor): | ||||
|                 formats.extend(self._extract_m3u8_formats( | ||||
|                     src, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                     m3u8_id='hls', fatal=False)) | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     src.replace('playlist.m3u8', 'manifest.f4m'), | ||||
|                     video_id, f4m_id='hds', fatal=False)) | ||||
|                 if 'data-video-geoblocking="true"' not in webpage: | ||||
|                     rtmp_formats = self._extract_smil_formats( | ||||
|                         src.replace('playlist.m3u8', 'jwplayer.smil'), | ||||
|                         video_id, fatal=False) | ||||
|                     formats.extend(rtmp_formats) | ||||
|                     for rtmp_format in rtmp_formats: | ||||
|                         rtmp_format_c = rtmp_format.copy() | ||||
|                         rtmp_format_c['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path']) | ||||
|                         del rtmp_format_c['play_path'] | ||||
|                         del rtmp_format_c['ext'] | ||||
|                         http_format = rtmp_format_c.copy() | ||||
|                         http_format.update({ | ||||
|                             'url': rtmp_format_c['url'].replace('rtmp://', 'http://').replace('vod.', 'download.').replace('/_definst_/', '/').replace('mp4:', ''), | ||||
|                             'format_id': rtmp_format['format_id'].replace('rtmp', 'http'), | ||||
|                             'protocol': 'http', | ||||
|                         }) | ||||
|                         rtsp_format = rtmp_format_c.copy() | ||||
|                         rtsp_format.update({ | ||||
|                             'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'), | ||||
|                             'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'), | ||||
|                             'protocol': 'rtsp', | ||||
|                         }) | ||||
|                         formats.extend([http_format, rtsp_format]) | ||||
|             else: | ||||
|                 formats.extend(self._extract_f4m_formats( | ||||
|                     '%s/manifest.f4m' % src, video_id, f4m_id='hds', fatal=False)) | ||||
|   | ||||
| @@ -15,7 +15,87 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class WDRIE(InfoExtractor): | ||||
| class WDRBaseIE(InfoExtractor): | ||||
|     def _extract_wdr_video(self, webpage, display_id): | ||||
|         # for wdr.de the data-extension is in a tag with the class "mediaLink" | ||||
|         # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" | ||||
|         # for wdrmaus its in a link to the page in a multiline "videoLink"-tag | ||||
|         json_metadata = self._html_search_regex( | ||||
|             r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', | ||||
|             webpage, 'media link', default=None, flags=re.MULTILINE) | ||||
|  | ||||
|         if not json_metadata: | ||||
|             return | ||||
|  | ||||
|         media_link_obj = self._parse_json(json_metadata, display_id, | ||||
|                                           transform_source=js_to_json) | ||||
|         jsonp_url = media_link_obj['mediaObj']['url'] | ||||
|  | ||||
|         metadata = self._download_json( | ||||
|             jsonp_url, 'metadata', transform_source=strip_jsonp) | ||||
|  | ||||
|         metadata_tracker_data = metadata['trackerData'] | ||||
|         metadata_media_resource = metadata['mediaResource'] | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         # check if the metadata contains a direct URL to a file | ||||
|         for kind, media_resource in metadata_media_resource.items(): | ||||
|             if kind not in ('dflt', 'alt'): | ||||
|                 continue | ||||
|  | ||||
|             for tag_name, medium_url in media_resource.items(): | ||||
|                 if tag_name not in ('videoURL', 'audioURL'): | ||||
|                     continue | ||||
|  | ||||
|                 ext = determine_ext(medium_url) | ||||
|                 if ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         medium_url, display_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id='hls')) | ||||
|                 elif ext == 'f4m': | ||||
|                     manifest_url = update_url_query( | ||||
|                         medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         manifest_url, display_id, f4m_id='hds', fatal=False)) | ||||
|                 elif ext == 'smil': | ||||
|                     formats.extend(self._extract_smil_formats( | ||||
|                         medium_url, 'stream', fatal=False)) | ||||
|                 else: | ||||
|                     a_format = { | ||||
|                         'url': medium_url | ||||
|                     } | ||||
|                     if ext == 'unknown_video': | ||||
|                         urlh = self._request_webpage( | ||||
|                             medium_url, display_id, note='Determining extension') | ||||
|                         ext = urlhandle_detect_ext(urlh) | ||||
|                         a_format['ext'] = ext | ||||
|                     formats.append(a_format) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         caption_url = metadata_media_resource.get('captionURL') | ||||
|         if caption_url: | ||||
|             subtitles['de'] = [{ | ||||
|                 'url': caption_url, | ||||
|                 'ext': 'ttml', | ||||
|             }] | ||||
|  | ||||
|         title = metadata_tracker_data['trackerClipTitle'] | ||||
|  | ||||
|         return { | ||||
|             'id': metadata_tracker_data.get('trackerClipId', display_id), | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'alt_title': metadata_tracker_data.get('trackerClipSubcategory'), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'upload_date': unified_strdate(metadata_tracker_data.get('trackerClipAirTime')), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class WDRIE(WDRBaseIE): | ||||
|     _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' | ||||
|     _PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P<type>[^/]+)/(?P<display_id>.+)\.html' | ||||
|     _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL | ||||
| @@ -91,10 +171,10 @@ class WDRIE(InfoExtractor): | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', | ||||
|             # HDS download, MD5 is unstable | ||||
|             'md5': '803138901f6368ee497b4d195bb164f2', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-186083', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20130919', | ||||
|                 'title': 'Sachgeschichte - Achterbahn ', | ||||
|                 'description': '- Die Sendung mit der Maus -', | ||||
| @@ -120,14 +200,9 @@ class WDRIE(InfoExtractor): | ||||
|         display_id = mobj.group('display_id') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         # for wdr.de the data-extension is in a tag with the class "mediaLink" | ||||
|         # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" | ||||
|         # for wdrmaus its in a link to the page in a multiline "videoLink"-tag | ||||
|         json_metadata = self._html_search_regex( | ||||
|             r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', | ||||
|             webpage, 'media link', default=None, flags=re.MULTILINE) | ||||
|         info_dict = self._extract_wdr_video(webpage, display_id) | ||||
|  | ||||
|         if not json_metadata: | ||||
|         if not info_dict: | ||||
|             entries = [ | ||||
|                 self.url_result(page_url + href[0], 'WDR') | ||||
|                 for href in re.findall( | ||||
| @@ -140,86 +215,22 @@ class WDRIE(InfoExtractor): | ||||
|  | ||||
|             raise ExtractorError('No downloadable streams found', expected=True) | ||||
|  | ||||
|         media_link_obj = self._parse_json(json_metadata, display_id, | ||||
|                                           transform_source=js_to_json) | ||||
|         jsonp_url = media_link_obj['mediaObj']['url'] | ||||
|  | ||||
|         metadata = self._download_json( | ||||
|             jsonp_url, 'metadata', transform_source=strip_jsonp) | ||||
|  | ||||
|         metadata_tracker_data = metadata['trackerData'] | ||||
|         metadata_media_resource = metadata['mediaResource'] | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         # check if the metadata contains a direct URL to a file | ||||
|         for kind, media_resource in metadata_media_resource.items(): | ||||
|             if kind not in ('dflt', 'alt'): | ||||
|                 continue | ||||
|  | ||||
|             for tag_name, medium_url in media_resource.items(): | ||||
|                 if tag_name not in ('videoURL', 'audioURL'): | ||||
|                     continue | ||||
|  | ||||
|                 ext = determine_ext(medium_url) | ||||
|                 if ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         medium_url, display_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id='hls')) | ||||
|                 elif ext == 'f4m': | ||||
|                     manifest_url = update_url_query( | ||||
|                         medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         manifest_url, display_id, f4m_id='hds', fatal=False)) | ||||
|                 elif ext == 'smil': | ||||
|                     formats.extend(self._extract_smil_formats( | ||||
|                         medium_url, 'stream', fatal=False)) | ||||
|                 else: | ||||
|                     a_format = { | ||||
|                         'url': medium_url | ||||
|                     } | ||||
|                     if ext == 'unknown_video': | ||||
|                         urlh = self._request_webpage( | ||||
|                             medium_url, display_id, note='Determining extension') | ||||
|                         ext = urlhandle_detect_ext(urlh) | ||||
|                         a_format['ext'] = ext | ||||
|                     formats.append(a_format) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         caption_url = metadata_media_resource.get('captionURL') | ||||
|         if caption_url: | ||||
|             subtitles['de'] = [{ | ||||
|                 'url': caption_url, | ||||
|                 'ext': 'ttml', | ||||
|             }] | ||||
|  | ||||
|         title = metadata_tracker_data.get('trackerClipTitle') | ||||
|         is_live = url_type == 'live' | ||||
|  | ||||
|         if is_live: | ||||
|             title = self._live_title(title) | ||||
|             upload_date = None | ||||
|         elif 'trackerClipAirTime' in metadata_tracker_data: | ||||
|             upload_date = metadata_tracker_data['trackerClipAirTime'] | ||||
|         else: | ||||
|             upload_date = self._html_search_meta('DC.Date', webpage, 'upload date') | ||||
|             info_dict.update({ | ||||
|                 'title': self._live_title(info_dict['title']), | ||||
|                 'upload_date': None, | ||||
|             }) | ||||
|         elif 'upload_date' not in info_dict: | ||||
|             info_dict['upload_date'] = unified_strdate(self._html_search_meta('DC.Date', webpage, 'upload date')) | ||||
|  | ||||
|         if upload_date: | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         return { | ||||
|             'id': metadata_tracker_data.get('trackerClipId', display_id), | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'alt_title': metadata_tracker_data.get('trackerClipSubcategory'), | ||||
|             'formats': formats, | ||||
|             'upload_date': upload_date, | ||||
|         info_dict.update({ | ||||
|             'description': self._html_search_meta('Description', webpage), | ||||
|             'is_live': is_live, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|         }) | ||||
|  | ||||
|         return info_dict | ||||
|  | ||||
|  | ||||
| class WDRMobileIE(InfoExtractor): | ||||
|   | ||||
| @@ -1,29 +1,33 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| from .jwplatform import JWPlatformBaseIE | ||||
|  | ||||
|  | ||||
| class WimpIE(InfoExtractor): | ||||
| class WimpIE(JWPlatformBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.wimp.com/maruexhausted/', | ||||
|         'url': 'http://www.wimp.com/maru-is-exhausted/', | ||||
|         'md5': 'ee21217ffd66d058e8b16be340b74883', | ||||
|         'info_dict': { | ||||
|             'id': 'maruexhausted', | ||||
|             'id': 'maru-is-exhausted', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Maru is exhausted.', | ||||
|             'description': 'md5:57e099e857c0a4ea312542b684a869b8', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.wimp.com/clowncar/', | ||||
|         'md5': '4e2986c793694b55b37cf92521d12bb4', | ||||
|         'md5': '5c31ad862a90dc5b1f023956faec13fe', | ||||
|         'info_dict': { | ||||
|             'id': 'clowncar', | ||||
|             'id': 'cG4CEr2aiSg', | ||||
|             'ext': 'webm', | ||||
|             'title': 'It\'s like a clown car.', | ||||
|             'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2', | ||||
|             'title': 'Basset hound clown car...incredible!', | ||||
|             'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom', | ||||
|             'upload_date': '20140303', | ||||
|             'uploader': 'Gretchen Hoey', | ||||
|             'uploader_id': 'gretchenandjeff1', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -41,14 +45,13 @@ class WimpIE(InfoExtractor): | ||||
|                 'ie_key': YoutubeIE.ie_key(), | ||||
|             } | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'<video[^>]+>\s*<source[^>]+src=(["\'])(?P<url>.+?)\1', | ||||
|             webpage, 'video URL', group='url') | ||||
|         info_dict = self._extract_jwplayer_data( | ||||
|             webpage, video_id, require_title=False) | ||||
|  | ||||
|         return { | ||||
|         info_dict.update({ | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
|         }) | ||||
|  | ||||
|         return info_dict | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     remove_start, | ||||
| @@ -27,16 +28,17 @@ class WrzutaIE(InfoExtractor): | ||||
|             'uploader_id': 'laboratoriumdextera', | ||||
|             'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd', | ||||
|         }, | ||||
|         'skip': 'Redirected to wrzuta.pl', | ||||
|     }, { | ||||
|         'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty', | ||||
|         'md5': 'bc78077859bea7bcfe4295d7d7fc9025', | ||||
|         'url': 'http://vexling.wrzuta.pl/audio/01xBFabGXu6/james_horner_-_into_the_na_39_vi_world_bonus', | ||||
|         'md5': 'f80564fb5a2ec6ec59705ae2bf2ba56d', | ||||
|         'info_dict': { | ||||
|             'id': '063jOPX5ue2', | ||||
|             'ext': 'ogg', | ||||
|             'title': 'Liber & Natalia Szroeder - Teraz Ty', | ||||
|             'duration': 203, | ||||
|             'uploader_id': 'jolka85', | ||||
|             'description': 'md5:2d2b6340f9188c8c4cd891580e481096', | ||||
|             'id': '01xBFabGXu6', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'James Horner - Into The Na\'vi World [Bonus]', | ||||
|             'description': 'md5:30a70718b2cd9df3120fce4445b0263b', | ||||
|             'duration': 95, | ||||
|             'uploader_id': 'vexling', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
| @@ -46,7 +48,10 @@ class WrzutaIE(InfoExtractor): | ||||
|         typ = mobj.group('typ') | ||||
|         uploader = mobj.group('uploader') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         webpage, urlh = self._download_webpage_handle(url, video_id) | ||||
|  | ||||
|         if urlh.geturl() == 'http://www.wrzuta.pl/': | ||||
|             raise ExtractorError('Video removed', expected=True) | ||||
|  | ||||
|         quality = qualities(['SD', 'MQ', 'HQ', 'HD']) | ||||
|  | ||||
|   | ||||
| @@ -6,17 +6,23 @@ from ..compat import compat_urllib_parse_unquote | ||||
|  | ||||
|  | ||||
| class XNXXIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)' | ||||
|     _TEST = { | ||||
|         'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||
|         'md5': '0831677e2b4761795f68d417e0b7b445', | ||||
|     _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video', | ||||
|         'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0', | ||||
|         'info_dict': { | ||||
|             'id': '1135332', | ||||
|             'id': '55awb78', | ||||
|             'ext': 'flv', | ||||
|             'title': 'lida » Naked Funny Actress  (5)', | ||||
|             'title': 'Skyrim Test Video', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.xnxx.com/video-55awb78/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|   | ||||
| @@ -501,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '141', | ||||
|             }, | ||||
|             'skip': 'format 141 not served anymore', | ||||
|         }, | ||||
|         # DASH manifest with encrypted signature | ||||
|         { | ||||
| @@ -517,7 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             }, | ||||
|             'params': { | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '141', | ||||
|                 'format': '141/bestaudio[ext=m4a]', | ||||
|             }, | ||||
|         }, | ||||
|         # JS player signature function name containing $ | ||||
| @@ -537,7 +538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             }, | ||||
|             'params': { | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '141', | ||||
|                 'format': '141/bestaudio[ext=m4a]', | ||||
|             }, | ||||
|         }, | ||||
|         # Controversy video | ||||
| @@ -618,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic', | ||||
|                 'license': 'Standard YouTube License', | ||||
|                 'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', | ||||
|                 'uploader': 'Olympics', | ||||
|                 'uploader': 'Olympic', | ||||
|                 'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games', | ||||
|             }, | ||||
|             'params': { | ||||
| @@ -671,7 +672,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000', | ||||
|                 'uploader': 'dorappi2000', | ||||
|                 'license': 'Standard YouTube License', | ||||
|                 'formats': 'mincount:33', | ||||
|                 'formats': 'mincount:32', | ||||
|             }, | ||||
|         }, | ||||
|         # DASH manifest with segment_list | ||||
| @@ -691,7 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             'params': { | ||||
|                 'youtube_include_dash_manifest': True, | ||||
|                 'format': '135',  # bestvideo | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'This live event has ended.', | ||||
|         }, | ||||
|         { | ||||
|             # Multifeed videos (multiple cameras), URL is for Main Camera | ||||
| @@ -762,6 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30', | ||||
|             }, | ||||
|             'playlist_count': 2, | ||||
|             'skip': 'Not multifeed anymore', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vid.plus/FlRa-iH7PGw', | ||||
| @@ -814,6 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'This video does not exist.', | ||||
|         }, | ||||
|         { | ||||
|             # Video licensed under Creative Commons | ||||
| @@ -1331,7 +1335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|                     (?:[a-zA-Z-]+="[^"]*"\s+)*? | ||||
|                     (?:title|href)="([^"]+)"\s+ | ||||
|                     (?:[a-zA-Z-]+="[^"]*"\s+)*? | ||||
|                     class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*> | ||||
|                     class="[^"]*"[^>]*> | ||||
|                 [^<]+\.{3}\s* | ||||
|                 </a> | ||||
|             ''', r'\1', video_description) | ||||
|   | ||||
| @@ -232,7 +232,7 @@ class JSInterpreter(object): | ||||
|     def extract_function(self, funcname): | ||||
|         func_m = re.search( | ||||
|             r'''(?x) | ||||
|                 (?:function\s+%s|[{;,]%s\s*=\s*function|var\s+%s\s*=\s*function)\s* | ||||
|                 (?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s* | ||||
|                 \((?P<args>[^)]*)\)\s* | ||||
|                 \{(?P<code>[^}]+)\}''' % ( | ||||
|                 re.escape(funcname), re.escape(funcname), re.escape(funcname)), | ||||
|   | ||||
| @@ -26,9 +26,7 @@ def parseOpts(overrideArguments=None): | ||||
|         except IOError: | ||||
|             return default  # silently skip if file is not present | ||||
|         try: | ||||
|             res = [] | ||||
|             for l in optionf: | ||||
|                 res += compat_shlex_split(l, comments=True) | ||||
|             res = compat_shlex_split(optionf.read(), comments=True) | ||||
|         finally: | ||||
|             optionf.close() | ||||
|         return res | ||||
|   | ||||
| @@ -76,7 +76,7 @@ class Socks4Error(ProxyError): | ||||
|  | ||||
|     CODES = { | ||||
|         91: 'request rejected or failed', | ||||
|         92: 'request rejected becasue SOCKS server cannot connect to identd on the client', | ||||
|         92: 'request rejected because SOCKS server cannot connect to identd on the client', | ||||
|         93: 'request rejected because the client program and identd report different user-ids' | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -110,6 +110,49 @@ ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙ | ||||
|                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'], | ||||
|                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy'))) | ||||
|  | ||||
| DATE_FORMATS = ( | ||||
|     '%d %B %Y', | ||||
|     '%d %b %Y', | ||||
|     '%B %d %Y', | ||||
|     '%b %d %Y', | ||||
|     '%b %dst %Y %I:%M', | ||||
|     '%b %dnd %Y %I:%M', | ||||
|     '%b %dth %Y %I:%M', | ||||
|     '%Y %m %d', | ||||
|     '%Y-%m-%d', | ||||
|     '%Y/%m/%d', | ||||
|     '%Y/%m/%d %H:%M:%S', | ||||
|     '%Y-%m-%d %H:%M:%S', | ||||
|     '%Y-%m-%d %H:%M:%S.%f', | ||||
|     '%d.%m.%Y %H:%M', | ||||
|     '%d.%m.%Y %H.%M', | ||||
|     '%Y-%m-%dT%H:%M:%SZ', | ||||
|     '%Y-%m-%dT%H:%M:%S.%fZ', | ||||
|     '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||
|     '%Y-%m-%dT%H:%M:%S', | ||||
|     '%Y-%m-%dT%H:%M:%S.%f', | ||||
|     '%Y-%m-%dT%H:%M', | ||||
| ) | ||||
|  | ||||
| DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS) | ||||
| DATE_FORMATS_DAY_FIRST.extend([ | ||||
|     '%d-%m-%Y', | ||||
|     '%d.%m.%Y', | ||||
|     '%d.%m.%y', | ||||
|     '%d/%m/%Y', | ||||
|     '%d/%m/%y', | ||||
|     '%d/%m/%Y %H:%M:%S', | ||||
| ]) | ||||
|  | ||||
| DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS) | ||||
| DATE_FORMATS_MONTH_FIRST.extend([ | ||||
|     '%m-%d-%Y', | ||||
|     '%m.%d.%Y', | ||||
|     '%m/%d/%Y', | ||||
|     '%m/%d/%y', | ||||
|     '%m/%d/%Y %H:%M:%S', | ||||
| ]) | ||||
|  | ||||
|  | ||||
| def preferredencoding(): | ||||
|     """Get preferred encoding. | ||||
| @@ -975,6 +1018,24 @@ class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor): | ||||
|     https_response = http_response | ||||
|  | ||||
|  | ||||
| def extract_timezone(date_str): | ||||
|     m = re.search( | ||||
|         r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', | ||||
|         date_str) | ||||
|     if not m: | ||||
|         timezone = datetime.timedelta() | ||||
|     else: | ||||
|         date_str = date_str[:-len(m.group('tz'))] | ||||
|         if not m.group('sign'): | ||||
|             timezone = datetime.timedelta() | ||||
|         else: | ||||
|             sign = 1 if m.group('sign') == '+' else -1 | ||||
|             timezone = datetime.timedelta( | ||||
|                 hours=sign * int(m.group('hours')), | ||||
|                 minutes=sign * int(m.group('minutes'))) | ||||
|     return timezone, date_str | ||||
|  | ||||
|  | ||||
| def parse_iso8601(date_str, delimiter='T', timezone=None): | ||||
|     """ Return a UNIX timestamp from the given date """ | ||||
|  | ||||
| @@ -984,20 +1045,8 @@ def parse_iso8601(date_str, delimiter='T', timezone=None): | ||||
|     date_str = re.sub(r'\.[0-9]+', '', date_str) | ||||
|  | ||||
|     if timezone is None: | ||||
|         m = re.search( | ||||
|             r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', | ||||
|             date_str) | ||||
|         if not m: | ||||
|             timezone = datetime.timedelta() | ||||
|         else: | ||||
|             date_str = date_str[:-len(m.group(0))] | ||||
|             if not m.group('sign'): | ||||
|                 timezone = datetime.timedelta() | ||||
|             else: | ||||
|                 sign = 1 if m.group('sign') == '+' else -1 | ||||
|                 timezone = datetime.timedelta( | ||||
|                     hours=sign * int(m.group('hours')), | ||||
|                     minutes=sign * int(m.group('minutes'))) | ||||
|         timezone, date_str = extract_timezone(date_str) | ||||
|  | ||||
|     try: | ||||
|         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) | ||||
|         dt = datetime.datetime.strptime(date_str, date_format) - timezone | ||||
| @@ -1006,6 +1055,10 @@ def parse_iso8601(date_str, delimiter='T', timezone=None): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| def date_formats(day_first=True): | ||||
|     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST | ||||
|  | ||||
|  | ||||
| def unified_strdate(date_str, day_first=True): | ||||
|     """Return a string with the date in the format YYYYMMDD""" | ||||
|  | ||||
| @@ -1014,53 +1067,11 @@ def unified_strdate(date_str, day_first=True): | ||||
|     upload_date = None | ||||
|     # Replace commas | ||||
|     date_str = date_str.replace(',', ' ') | ||||
|     # %z (UTC offset) is only supported in python>=3.2 | ||||
|     if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str): | ||||
|         date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) | ||||
|     # Remove AM/PM + timezone | ||||
|     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) | ||||
|     _, date_str = extract_timezone(date_str) | ||||
|  | ||||
|     format_expressions = [ | ||||
|         '%d %B %Y', | ||||
|         '%d %b %Y', | ||||
|         '%B %d %Y', | ||||
|         '%b %d %Y', | ||||
|         '%b %dst %Y %I:%M', | ||||
|         '%b %dnd %Y %I:%M', | ||||
|         '%b %dth %Y %I:%M', | ||||
|         '%Y %m %d', | ||||
|         '%Y-%m-%d', | ||||
|         '%Y/%m/%d', | ||||
|         '%Y/%m/%d %H:%M:%S', | ||||
|         '%Y-%m-%d %H:%M:%S', | ||||
|         '%Y-%m-%d %H:%M:%S.%f', | ||||
|         '%d.%m.%Y %H:%M', | ||||
|         '%d.%m.%Y %H.%M', | ||||
|         '%Y-%m-%dT%H:%M:%SZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%fZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||
|         '%Y-%m-%dT%H:%M:%S', | ||||
|         '%Y-%m-%dT%H:%M:%S.%f', | ||||
|         '%Y-%m-%dT%H:%M', | ||||
|     ] | ||||
|     if day_first: | ||||
|         format_expressions.extend([ | ||||
|             '%d-%m-%Y', | ||||
|             '%d.%m.%Y', | ||||
|             '%d.%m.%y', | ||||
|             '%d/%m/%Y', | ||||
|             '%d/%m/%y', | ||||
|             '%d/%m/%Y %H:%M:%S', | ||||
|         ]) | ||||
|     else: | ||||
|         format_expressions.extend([ | ||||
|             '%m-%d-%Y', | ||||
|             '%m.%d.%Y', | ||||
|             '%m/%d/%Y', | ||||
|             '%m/%d/%y', | ||||
|             '%m/%d/%Y %H:%M:%S', | ||||
|         ]) | ||||
|     for expression in format_expressions: | ||||
|     for expression in date_formats(day_first): | ||||
|         try: | ||||
|             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') | ||||
|         except ValueError: | ||||
| @@ -1076,6 +1087,29 @@ def unified_strdate(date_str, day_first=True): | ||||
|         return compat_str(upload_date) | ||||
|  | ||||
|  | ||||
| def unified_timestamp(date_str, day_first=True): | ||||
|     if date_str is None: | ||||
|         return None | ||||
|  | ||||
|     date_str = date_str.replace(',', ' ') | ||||
|  | ||||
|     pm_delta = datetime.timedelta(hours=12 if re.search(r'(?i)PM', date_str) else 0) | ||||
|     timezone, date_str = extract_timezone(date_str) | ||||
|  | ||||
|     # Remove AM/PM + timezone | ||||
|     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str) | ||||
|  | ||||
|     for expression in date_formats(day_first): | ||||
|         try: | ||||
|             dt = datetime.datetime.strptime(date_str, expression) - timezone + pm_delta | ||||
|             return calendar.timegm(dt.timetuple()) | ||||
|         except ValueError: | ||||
|             pass | ||||
|     timetuple = email.utils.parsedate_tz(date_str) | ||||
|     if timetuple: | ||||
|         return calendar.timegm(timetuple.timetuple()) | ||||
|  | ||||
|  | ||||
| def determine_ext(url, default_ext='unknown_video'): | ||||
|     if url is None: | ||||
|         return default_ext | ||||
| @@ -1626,6 +1660,10 @@ def float_or_none(v, scale=1, invscale=1, default=None): | ||||
|         return default | ||||
|  | ||||
|  | ||||
| def strip_or_none(v): | ||||
|     return None if v is None else v.strip() | ||||
|  | ||||
|  | ||||
| def parse_duration(s): | ||||
|     if not isinstance(s, compat_basestring): | ||||
|         return None | ||||
| @@ -1970,7 +2008,7 @@ def js_to_json(code): | ||||
|         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| | ||||
|         /\*.*?\*/|,(?=\s*[\]}])| | ||||
|         [a-zA-Z_][.a-zA-Z_0-9]*| | ||||
|         (?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| | ||||
|         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| | ||||
|         [0-9]+(?=\s*:) | ||||
|         ''', fix_kv, code) | ||||
|  | ||||
| @@ -2852,3 +2890,16 @@ def decode_packed_codes(code): | ||||
|     return re.sub( | ||||
|         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], | ||||
|         obfucasted_code) | ||||
|  | ||||
|  | ||||
| def parse_m3u8_attributes(attrib): | ||||
|     info = {} | ||||
|     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib): | ||||
|         if val.startswith('"'): | ||||
|             val = val[1:-1] | ||||
|         info[key] = val | ||||
|     return info | ||||
|  | ||||
|  | ||||
| def urshift(val, n): | ||||
|     return val >> n if val >= 0 else (val + 0x100000000) >> n | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __version__ = '2016.06.14' | ||||
| __version__ = '2016.07.02' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user