mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			75 Commits
		
	
	
		
			2016.06.14
			...
			2016.06.23
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 96f88e91b7 | ||
|  | 3331a4644d | ||
|  | adf1921dc1 | ||
|  | 97674f0419 | ||
|  | 73843ae8ac | ||
|  | f2bb8c036a | ||
|  | 75ca6bcee2 | ||
|  | 089657ed1f | ||
|  | b5eab86c24 | ||
|  | c8e3e0974b | ||
|  | dfc8f46e1c | ||
|  | c143ddce5d | ||
|  | 169d836feb | ||
|  | 6ae938b295 | ||
|  | cf40fdf5c1 | ||
|  | 23bdae0955 | ||
|  | ca74c90bf5 | ||
|  | 7cfc1e2a10 | ||
|  | 1ac5705f62 | ||
|  | e4f90ea0a7 | ||
|  | cdfc187cd5 | ||
|  | feef925f49 | ||
|  | 19e2d1cdea | ||
|  | 8369a4fe76 | ||
|  | 1f749b6658 | ||
|  | 819707920a | ||
|  | 43518503a6 | ||
|  | 5839d556e4 | ||
|  | 6c83e583b3 | ||
|  | 6aeb64b673 | ||
|  | 6cd64b6806 | ||
|  | e154c65128 | ||
|  | a50fd6e026 | ||
|  | 6a55bb66ee | ||
|  | 7c05097633 | ||
|  | 589568789f | ||
|  | 7577d849a6 | ||
|  | cb23192bc4 | ||
|  | 41c1023300 | ||
|  | 90b6288cce | ||
|  | c1823c8ad9 | ||
|  | d7c6c656c5 | ||
|  | b0b128049a | ||
|  | e8f13f2637 | ||
|  | b5aad37f6b | ||
|  | 6d0d4fc26d | ||
|  | 0278aa443f | ||
|  | 1f35745758 | ||
|  | 573c35272f | ||
|  | 09e3f91e40 | ||
|  | 1b6cf16be7 | ||
|  | 26264cb056 | ||
|  | a72df5f36f | ||
|  | c878e635de | ||
|  | 0f47cc2e92 | ||
|  | 5fc2757682 | ||
|  | e3944c2621 | ||
|  | 667d96480b | ||
|  | e6fe993c31 | ||
|  | d0d93f76ea | ||
|  | 20a6a154fe | ||
|  | f011876076 | ||
|  | 6929569403 | ||
|  | eb451890da | ||
|  | ded7511a70 | ||
|  | d2161cade5 | ||
|  | 27e5fa8198 | ||
|  | efbd1eb51a | ||
|  | 369ff75081 | ||
|  | 47212f7bcb | ||
|  | 4c93ee8d14 | ||
|  | 8bc4dbb1af | ||
|  | 6c3760292c | ||
|  | 4cef70db6c | ||
|  | ff4af6ec59 | 
							
								
								
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/ISSUE_TEMPLATE.md
									
									
									
									
										vendored
									
									
								
							| @@ -6,8 +6,8 @@ | ||||
|  | ||||
| --- | ||||
|  | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.14*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.14** | ||||
| ### Make sure you are using the *latest* version: run `youtube-dl --version` and ensure your version is *2016.06.23*. If it's not read [this FAQ entry](https://github.com/rg3/youtube-dl/blob/master/README.md#how-do-i-update-youtube-dl) and update. Issues with outdated version will be rejected. | ||||
| - [ ] I've **verified** and **I assure** that I'm running youtube-dl **2016.06.23** | ||||
|  | ||||
| ### Before submitting an *issue* make sure you have: | ||||
| - [ ] At least skimmed through [README](https://github.com/rg3/youtube-dl/blob/master/README.md) and **most notably** [FAQ](https://github.com/rg3/youtube-dl#faq) and [BUGS](https://github.com/rg3/youtube-dl#bugs) sections | ||||
| @@ -35,7 +35,7 @@ $ youtube-dl -v <your command line> | ||||
| [debug] User config: [] | ||||
| [debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj'] | ||||
| [debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251 | ||||
| [debug] youtube-dl version 2016.06.14 | ||||
| [debug] youtube-dl version 2016.06.23 | ||||
| [debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2 | ||||
| [debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4 | ||||
| [debug] Proxy map: {} | ||||
|   | ||||
| @@ -44,7 +44,7 @@ Or with [MacPorts](https://www.macports.org/): | ||||
| Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://rg3.github.io/youtube-dl/download.html). | ||||
|  | ||||
| # DESCRIPTION | ||||
| **youtube-dl** is a small command-line program to download videos from | ||||
| **youtube-dl** is a command-line program to download videos from | ||||
| YouTube.com and a few more sites. It requires the Python interpreter, version | ||||
| 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on | ||||
| your Unix box, on Windows or on Mac OS X. It is released to the public domain, | ||||
|   | ||||
| @@ -14,15 +14,17 @@ if os.path.exists(lazy_extractors_filename): | ||||
|     os.remove(lazy_extractors_filename) | ||||
|  | ||||
| from youtube_dl.extractor import _ALL_CLASSES | ||||
| from youtube_dl.extractor.common import InfoExtractor | ||||
| from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor | ||||
|  | ||||
| with open('devscripts/lazy_load_template.py', 'rt') as f: | ||||
|     module_template = f.read() | ||||
|  | ||||
| module_contents = [module_template + '\n' + getsource(InfoExtractor.suitable)] | ||||
| module_contents = [ | ||||
|     module_template + '\n' + getsource(InfoExtractor.suitable) + '\n', | ||||
|     'class LazyLoadSearchExtractor(LazyLoadExtractor):\n    pass\n'] | ||||
|  | ||||
| ie_template = ''' | ||||
| class {name}(LazyLoadExtractor): | ||||
| class {name}({bases}): | ||||
|     _VALID_URL = {valid_url!r} | ||||
|     _module = '{module}' | ||||
| ''' | ||||
| @@ -34,10 +36,20 @@ make_valid_template = ''' | ||||
| ''' | ||||
|  | ||||
|  | ||||
| def get_base_name(base): | ||||
|     if base is InfoExtractor: | ||||
|         return 'LazyLoadExtractor' | ||||
|     elif base is SearchInfoExtractor: | ||||
|         return 'LazyLoadSearchExtractor' | ||||
|     else: | ||||
|         return base.__name__ | ||||
|  | ||||
|  | ||||
| def build_lazy_ie(ie, name): | ||||
|     valid_url = getattr(ie, '_VALID_URL', None) | ||||
|     s = ie_template.format( | ||||
|         name=name, | ||||
|         bases=', '.join(map(get_base_name, ie.__bases__)), | ||||
|         valid_url=valid_url, | ||||
|         module=ie.__module__) | ||||
|     if ie.suitable.__func__ is not InfoExtractor.suitable.__func__: | ||||
| @@ -47,12 +59,35 @@ def build_lazy_ie(ie, name): | ||||
|         s += make_valid_template.format(valid_url=ie._make_valid_url()) | ||||
|     return s | ||||
|  | ||||
| # find the correct sorting and add the required base classes so that sublcasses | ||||
| # can be correctly created | ||||
| classes = _ALL_CLASSES[:-1] | ||||
| ordered_cls = [] | ||||
| while classes: | ||||
|     for c in classes[:]: | ||||
|         bases = set(c.__bases__) - set((object, InfoExtractor, SearchInfoExtractor)) | ||||
|         stop = False | ||||
|         for b in bases: | ||||
|             if b not in classes and b not in ordered_cls: | ||||
|                 if b.__name__ == 'GenericIE': | ||||
|                     exit() | ||||
|                 classes.insert(0, b) | ||||
|                 stop = True | ||||
|         if stop: | ||||
|             break | ||||
|         if all(b in ordered_cls for b in bases): | ||||
|             ordered_cls.append(c) | ||||
|             classes.remove(c) | ||||
|             break | ||||
| ordered_cls.append(_ALL_CLASSES[-1]) | ||||
|  | ||||
| names = [] | ||||
| for ie in list(sorted(_ALL_CLASSES[:-1], key=lambda cls: cls.ie_key())) + _ALL_CLASSES[-1:]: | ||||
|     name = ie.ie_key() + 'IE' | ||||
| for ie in ordered_cls: | ||||
|     name = ie.__name__ | ||||
|     src = build_lazy_ie(ie, name) | ||||
|     module_contents.append(src) | ||||
|     names.append(name) | ||||
|     if ie in _ALL_CLASSES: | ||||
|         names.append(name) | ||||
|  | ||||
| module_contents.append( | ||||
|     '_ALL_CLASSES = [{0}]'.format(', '.join(names))) | ||||
|   | ||||
| @@ -15,6 +15,7 @@ | ||||
| set -e | ||||
|  | ||||
| skip_tests=true | ||||
| gpg_sign_commits="" | ||||
| buildserver='localhost:8142' | ||||
|  | ||||
| while true | ||||
| @@ -24,6 +25,10 @@ case "$1" in | ||||
|         skip_tests=false | ||||
|         shift | ||||
|     ;; | ||||
|     --gpg-sign-commits|-S) | ||||
|         gpg_sign_commits="-S" | ||||
|         shift | ||||
|     ;; | ||||
|     --buildserver) | ||||
|         buildserver="$2" | ||||
|         shift 2 | ||||
| @@ -69,7 +74,7 @@ sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py | ||||
| /bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..." | ||||
| make README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md supportedsites | ||||
| git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE.md docs/supportedsites.md youtube_dl/version.py | ||||
| git commit -m "release $version" | ||||
| git commit $gpg_sign_commits -m "release $version" | ||||
|  | ||||
| /bin/echo -e "\n### Now tagging, signing and pushing..." | ||||
| git tag -s -m "Release $version" "$version" | ||||
| @@ -116,7 +121,7 @@ git clone --branch gh-pages --single-branch . build/gh-pages | ||||
|     "$ROOT/devscripts/gh-pages/update-copyright.py" | ||||
|     "$ROOT/devscripts/gh-pages/update-sites.py" | ||||
|     git add *.html *.html.in update | ||||
|     git commit -m "release $version" | ||||
|     git commit $gpg_sign_commits -m "release $version" | ||||
|     git push "$ROOT" gh-pages | ||||
|     git push "$ORIGIN_URL" gh-pages | ||||
| ) | ||||
|   | ||||
| @@ -74,6 +74,8 @@ | ||||
|  - **bbc**: BBC | ||||
|  - **bbc.co.uk**: BBC iPlayer | ||||
|  - **bbc.co.uk:article**: BBC articles | ||||
|  - **bbc.co.uk:iplayer:playlist** | ||||
|  - **bbc.co.uk:playlist** | ||||
|  - **BeatportPro** | ||||
|  - **Beeg** | ||||
|  - **BehindKink** | ||||
| @@ -104,6 +106,8 @@ | ||||
|  - **canalc2.tv** | ||||
|  - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv | ||||
|  - **Canvas** | ||||
|  - **CarambaTV** | ||||
|  - **CarambaTVPage** | ||||
|  - **CBC** | ||||
|  - **CBCPlayer** | ||||
|  - **CBS** | ||||
| @@ -124,6 +128,7 @@ | ||||
|  - **cliphunter** | ||||
|  - **ClipRs** | ||||
|  - **Clipsyndicate** | ||||
|  - **CloserToTruth** | ||||
|  - **cloudtime**: CloudTime | ||||
|  - **Cloudy** | ||||
|  - **Clubic** | ||||
| @@ -243,7 +248,6 @@ | ||||
|  - **Gamersyde** | ||||
|  - **GameSpot** | ||||
|  - **GameStar** | ||||
|  - **Gametrailers** | ||||
|  - **Gazeta** | ||||
|  - **GDCVault** | ||||
|  - **generic**: Generic downloader that works on some sites | ||||
| @@ -432,6 +436,7 @@ | ||||
|  - **nhl.com:videocenter** | ||||
|  - **nhl.com:videocenter:category**: NHL videocenter category | ||||
|  - **nick.com** | ||||
|  - **nick.de** | ||||
|  - **niconico**: ニコニコ動画 | ||||
|  - **NiconicoPlaylist** | ||||
|  - **njoy**: N-JOY | ||||
| @@ -516,6 +521,7 @@ | ||||
|  - **qqmusic:singer**: QQ音乐 - 歌手 | ||||
|  - **qqmusic:toplist**: QQ音乐 - 排行榜 | ||||
|  - **R7** | ||||
|  - **R7Article** | ||||
|  - **radio.de** | ||||
|  - **radiobremen** | ||||
|  - **radiocanada** | ||||
|   | ||||
| @@ -640,6 +640,9 @@ class TestUtil(unittest.TestCase): | ||||
|             "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} | ||||
|         }''') | ||||
|  | ||||
|         inp = '''{"foo":101}''' | ||||
|         self.assertEqual(js_to_json(inp), '''{"foo":101}''') | ||||
|  | ||||
|     def test_js_to_json_edgecases(self): | ||||
|         on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") | ||||
|         self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) | ||||
|   | ||||
| @@ -2,14 +2,24 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import os.path | ||||
| import re | ||||
| import binascii | ||||
| try: | ||||
|     from Crypto.Cipher import AES | ||||
|     can_decrypt_frag = True | ||||
| except ImportError: | ||||
|     can_decrypt_frag = False | ||||
|  | ||||
| from .fragment import FragmentFD | ||||
| from .external import FFmpegFD | ||||
|  | ||||
| from ..compat import compat_urlparse | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
|     compat_struct_pack, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     parse_m3u8_attributes, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -21,7 +31,7 @@ class HlsFD(FragmentFD): | ||||
|     @staticmethod | ||||
|     def can_download(manifest): | ||||
|         UNSUPPORTED_FEATURES = ( | ||||
|             r'#EXT-X-KEY:METHOD=(?!NONE)',  # encrypted streams [1] | ||||
|             r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)',  # encrypted streams [1] | ||||
|             r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2] | ||||
|  | ||||
|             # Live streams heuristic does not always work (e.g. geo restricted to Germany | ||||
| @@ -39,7 +49,9 @@ class HlsFD(FragmentFD): | ||||
|             # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 | ||||
|             # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 | ||||
|         ) | ||||
|         return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) | ||||
|         check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] | ||||
|         check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest) | ||||
|         return all(check_results) | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         man_url = info_dict['url'] | ||||
| @@ -57,36 +69,60 @@ class HlsFD(FragmentFD): | ||||
|                 fd.add_progress_hook(ph) | ||||
|             return fd.real_download(filename, info_dict) | ||||
|  | ||||
|         fragment_urls = [] | ||||
|         total_frags = 0 | ||||
|         for line in s.splitlines(): | ||||
|             line = line.strip() | ||||
|             if line and not line.startswith('#'): | ||||
|                 segment_url = ( | ||||
|                     line | ||||
|                     if re.match(r'^https?://', line) | ||||
|                     else compat_urlparse.urljoin(man_url, line)) | ||||
|                 fragment_urls.append(segment_url) | ||||
|                 # We only download the first fragment during the test | ||||
|                 if self.params.get('test', False): | ||||
|                     break | ||||
|                 total_frags += 1 | ||||
|  | ||||
|         ctx = { | ||||
|             'filename': filename, | ||||
|             'total_frags': len(fragment_urls), | ||||
|             'total_frags': total_frags, | ||||
|         } | ||||
|  | ||||
|         self._prepare_and_start_frag_download(ctx) | ||||
|  | ||||
|         i = 0 | ||||
|         media_sequence = 0 | ||||
|         decrypt_info = {'METHOD': 'NONE'} | ||||
|         frags_filenames = [] | ||||
|         for i, frag_url in enumerate(fragment_urls): | ||||
|             frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) | ||||
|             success = ctx['dl'].download(frag_filename, {'url': frag_url}) | ||||
|             if not success: | ||||
|                 return False | ||||
|             down, frag_sanitized = sanitize_open(frag_filename, 'rb') | ||||
|             ctx['dest_stream'].write(down.read()) | ||||
|             down.close() | ||||
|             frags_filenames.append(frag_sanitized) | ||||
|         for line in s.splitlines(): | ||||
|             line = line.strip() | ||||
|             if line: | ||||
|                 if not line.startswith('#'): | ||||
|                     frag_url = ( | ||||
|                         line | ||||
|                         if re.match(r'^https?://', line) | ||||
|                         else compat_urlparse.urljoin(man_url, line)) | ||||
|                     frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) | ||||
|                     success = ctx['dl'].download(frag_filename, {'url': frag_url}) | ||||
|                     if not success: | ||||
|                         return False | ||||
|                     down, frag_sanitized = sanitize_open(frag_filename, 'rb') | ||||
|                     frag_content = down.read() | ||||
|                     down.close() | ||||
|                     if decrypt_info['METHOD'] == 'AES-128': | ||||
|                         iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) | ||||
|                         frag_content = AES.new( | ||||
|                             decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) | ||||
|                     ctx['dest_stream'].write(frag_content) | ||||
|                     frags_filenames.append(frag_sanitized) | ||||
|                     # We only download the first fragment during the test | ||||
|                     if self.params.get('test', False): | ||||
|                         break | ||||
|                     i += 1 | ||||
|                     media_sequence += 1 | ||||
|                 elif line.startswith('#EXT-X-KEY'): | ||||
|                     decrypt_info = parse_m3u8_attributes(line[11:]) | ||||
|                     if decrypt_info['METHOD'] == 'AES-128': | ||||
|                         if 'IV' in decrypt_info: | ||||
|                             decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:]) | ||||
|                         if not re.match(r'^https?://', decrypt_info['URI']): | ||||
|                             decrypt_info['URI'] = compat_urlparse.urljoin( | ||||
|                                 man_url, decrypt_info['URI']) | ||||
|                         decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() | ||||
|                 elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): | ||||
|                     media_sequence = int(line[22:]) | ||||
|  | ||||
|         self._finish_frag_download(ctx) | ||||
|  | ||||
|   | ||||
| @@ -156,7 +156,10 @@ class AdobeTVVideoIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         video_data = self._download_json(url + '?format=json', video_id) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_data = self._parse_json(self._search_regex( | ||||
|             r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')), | ||||
|   | ||||
| @@ -24,10 +24,10 @@ class AftonbladetIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # find internal video meta data | ||||
|         meta_url = 'http://aftonbladet-play.drlib.aptoma.no/video/%s.json' | ||||
|         meta_url = 'http://aftonbladet-play-metadata.cdn.drvideo.aptoma.no/video/%s.json' | ||||
|         player_config = self._parse_json(self._html_search_regex( | ||||
|             r'data-player-config="([^"]+)"', webpage, 'player config'), video_id) | ||||
|         internal_meta_id = player_config['videoId'] | ||||
|         internal_meta_id = player_config['aptomaVideoId'] | ||||
|         internal_meta_url = meta_url % internal_meta_id | ||||
|         internal_meta_json = self._download_json( | ||||
|             internal_meta_url, video_id, 'Downloading video meta data') | ||||
|   | ||||
| @@ -8,7 +8,6 @@ from .generic import GenericIE | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     get_element_by_attribute, | ||||
|     qualities, | ||||
|     int_or_none, | ||||
|     parse_duration, | ||||
| @@ -274,41 +273,3 @@ class ARDIE(InfoExtractor): | ||||
|             'upload_date': upload_date, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class SportschauIE(ARDMediathekIE): | ||||
|     IE_NAME = 'Sportschau' | ||||
|     _VALID_URL = r'(?P<baseurl>https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video(?P<id>[^/#?]+))\.html' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.sportschau.de/tourdefrance/videoseppeltkokainhatnichtsmitklassischemdopingzutun100.html', | ||||
|         'info_dict': { | ||||
|             'id': 'seppeltkokainhatnichtsmitklassischemdopingzutun100', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Seppelt: "Kokain hat nichts mit klassischem Doping zu tun"', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': 'Der ARD-Doping Experte Hajo Seppelt gibt seine Einschätzung zum ersten Dopingfall der diesjährigen Tour de France um den Italiener Luca Paolini ab.', | ||||
|         }, | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         base_url = mobj.group('baseurl') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = get_element_by_attribute('class', 'headline', webpage) | ||||
|         description = self._html_search_meta('description', webpage, 'description') | ||||
|  | ||||
|         info = self._extract_media_info( | ||||
|             base_url + '-mc_defaultQuality-h.json', webpage, video_id) | ||||
|  | ||||
|         info.update({ | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|         }) | ||||
|  | ||||
|         return info | ||||
|   | ||||
| @@ -180,11 +180,14 @@ class ArteTVBaseIE(InfoExtractor): | ||||
|  | ||||
| class ArteTVPlus7IE(ArteTVBaseIE): | ||||
|     IE_NAME = 'arte.tv:+7' | ||||
|     _VALID_URL = r'https?://(?:www\.)?arte\.tv/guide/(?P<lang>fr|de|en|es)/(?:(?:sendungen|emissions|embed)/)?(?P<id>[^/]+)/(?P<name>[^/?#&]+)' | ||||
|     _VALID_URL = r'https?://(?:(?:www|sites)\.)?arte\.tv/[^/]+/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.arte.tv/guide/de/sendungen/XEN/xenius/?vid=055918-015_PLUS7-D', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://sites.arte.tv/karambolage/de/video/karambolage-22', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     @classmethod | ||||
| @@ -240,10 +243,10 @@ class ArteTVPlus7IE(ArteTVBaseIE): | ||||
|             return self._extract_from_json_url(json_url, video_id, lang, title=title) | ||||
|         # Different kind of embed URL (e.g. | ||||
|         # http://www.arte.tv/magazine/trepalium/fr/episode-0406-replay-trepalium) | ||||
|         embed_url = self._search_regex( | ||||
|             r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', | ||||
|             webpage, 'embed url', group='url') | ||||
|         return self.url_result(embed_url) | ||||
|         entries = [ | ||||
|             self.url_result(url) | ||||
|             for _, url in re.findall(r'<iframe[^>]+src=(["\'])(?P<url>.+?)\1', webpage)] | ||||
|         return self.playlist_result(entries) | ||||
|  | ||||
|  | ||||
| # It also uses the arte_vp_url url from the webpage to extract the information | ||||
| @@ -252,22 +255,17 @@ class ArteTVCreativeIE(ArteTVPlus7IE): | ||||
|     _VALID_URL = r'https?://creative\.arte\.tv/(?P<lang>fr|de|en|es)/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://creative.arte.tv/de/magazin/agentur-amateur-corporate-design', | ||||
|         'url': 'http://creative.arte.tv/fr/episode/osmosis-episode-1', | ||||
|         'info_dict': { | ||||
|             'id': '72176', | ||||
|             'id': '057405-001-A', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Folge 2 - Corporate Design', | ||||
|             'upload_date': '20131004', | ||||
|             'title': 'OSMOSIS - N\'AYEZ PLUS PEUR D\'AIMER (1)', | ||||
|             'upload_date': '20150716', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://creative.arte.tv/fr/Monty-Python-Reunion', | ||||
|         'info_dict': { | ||||
|             'id': '160676', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Monty Python live (mostly)', | ||||
|             'description': 'Événement ! Quarante-cinq ans après leurs premiers succès, les légendaires Monty Python remontent sur scène.\n', | ||||
|             'upload_date': '20140805', | ||||
|         } | ||||
|         'playlist_count': 11, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }, { | ||||
|         'url': 'http://creative.arte.tv/de/episode/agentur-amateur-4-der-erste-kunde', | ||||
|         'only_matching': True, | ||||
| @@ -349,14 +347,13 @@ class ArteTVCinemaIE(ArteTVPlus7IE): | ||||
|     _VALID_URL = r'https?://cinema\.arte\.tv/(?P<lang>fr|de|en|es)/(?P<id>.+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://cinema.arte.tv/de/node/38291', | ||||
|         'md5': '6b275511a5107c60bacbeeda368c3aa1', | ||||
|         'url': 'http://cinema.arte.tv/fr/article/les-ailes-du-desir-de-julia-reck', | ||||
|         'md5': 'a5b9dd5575a11d93daf0e3f404f45438', | ||||
|         'info_dict': { | ||||
|             'id': '055876-000_PWA12025-D', | ||||
|             'id': '062494-000-A', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tod auf dem Nil', | ||||
|             'upload_date': '20160122', | ||||
|             'description': 'md5:7f749bbb77d800ef2be11d54529b96bc', | ||||
|             'title': 'Film lauréat du concours web - "Les ailes du désir" de Julia Reck', | ||||
|             'upload_date': '20150807', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|   | ||||
| @@ -46,6 +46,7 @@ class AzubuIE(InfoExtractor): | ||||
|                 'uploader_id': 272749, | ||||
|                 'view_count': int, | ||||
|             }, | ||||
|             'skip': 'Channel offline', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
| @@ -56,22 +57,26 @@ class AzubuIE(InfoExtractor): | ||||
|             'http://www.azubu.tv/api/video/%s' % video_id, video_id)['data'] | ||||
|  | ||||
|         title = data['title'].strip() | ||||
|         description = data['description'] | ||||
|         thumbnail = data['thumbnail'] | ||||
|         view_count = data['view_count'] | ||||
|         uploader = data['user']['username'] | ||||
|         uploader_id = data['user']['id'] | ||||
|         description = data.get('description') | ||||
|         thumbnail = data.get('thumbnail') | ||||
|         view_count = data.get('view_count') | ||||
|         user = data.get('user', {}) | ||||
|         uploader = user.get('username') | ||||
|         uploader_id = user.get('id') | ||||
|  | ||||
|         stream_params = json.loads(data['stream_params']) | ||||
|  | ||||
|         timestamp = float_or_none(stream_params['creationDate'], 1000) | ||||
|         duration = float_or_none(stream_params['length'], 1000) | ||||
|         timestamp = float_or_none(stream_params.get('creationDate'), 1000) | ||||
|         duration = float_or_none(stream_params.get('length'), 1000) | ||||
|  | ||||
|         renditions = stream_params.get('renditions') or [] | ||||
|         video = stream_params.get('FLVFullLength') or stream_params.get('videoFullLength') | ||||
|         if video: | ||||
|             renditions.append(video) | ||||
|  | ||||
|         if not renditions and not user.get('channel', {}).get('is_live', True): | ||||
|             raise ExtractorError('%s said: channel is offline.' % self.IE_NAME, expected=True) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': fmt['url'], | ||||
|             'width': fmt['frameWidth'], | ||||
|   | ||||
| @@ -31,7 +31,7 @@ class BBCCoUkIE(InfoExtractor): | ||||
|                             music/clips[/#]| | ||||
|                             radio/player/ | ||||
|                         ) | ||||
|                         (?P<id>%s) | ||||
|                         (?P<id>%s)(?!/(?:episodes|broadcasts|clips)) | ||||
|                     ''' % _ID_REGEX | ||||
|  | ||||
|     _MEDIASELECTOR_URLS = [ | ||||
| @@ -192,6 +192,7 @@ class BBCCoUkIE(InfoExtractor): | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Now it\'s really geo-restricted', | ||||
|         }, { | ||||
|             # compact player (https://github.com/rg3/youtube-dl/issues/8147) | ||||
|             'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player', | ||||
| @@ -698,7 +699,9 @@ class BBCIE(BBCCoUkIE): | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if BBCCoUkIE.suitable(url) or BBCCoUkArticleIE.suitable(url) else super(BBCIE, cls).suitable(url) | ||||
|         EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE) | ||||
|         return (False if any(ie.suitable(url) for ie in EXCLUDE_IE) | ||||
|                 else super(BBCIE, cls).suitable(url)) | ||||
|  | ||||
|     def _extract_from_media_meta(self, media_meta, video_id): | ||||
|         # Direct links to media in media metadata (e.g. | ||||
| @@ -975,3 +978,72 @@ class BBCCoUkArticleIE(InfoExtractor): | ||||
|             r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)] | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
|  | ||||
|  | ||||
| class BBCCoUkPlaylistBaseIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result(self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key()) | ||||
|             for video_id in re.findall( | ||||
|                 self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage)] | ||||
|  | ||||
|         title, description = self._extract_title_and_description(webpage) | ||||
|  | ||||
|         return self.playlist_result(entries, playlist_id, title, description) | ||||
|  | ||||
|  | ||||
| class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE): | ||||
|     IE_NAME = 'bbc.co.uk:iplayer:playlist' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/episodes/(?P<id>%s)' % BBCCoUkIE._ID_REGEX | ||||
|     _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s' | ||||
|     _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v', | ||||
|         'info_dict': { | ||||
|             'id': 'b05rcz9v', | ||||
|             'title': 'The Disappearance', | ||||
|             'description': 'French thriller serial about a missing teenager.', | ||||
|         }, | ||||
|         'playlist_mincount': 6, | ||||
|     } | ||||
|  | ||||
|     def _extract_title_and_description(self, webpage): | ||||
|         title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False) | ||||
|         description = self._search_regex( | ||||
|             r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>', | ||||
|             webpage, 'description', fatal=False, group='value') | ||||
|         return title, description | ||||
|  | ||||
|  | ||||
| class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE): | ||||
|     IE_NAME = 'bbc.co.uk:playlist' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX | ||||
|     _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s' | ||||
|     _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips', | ||||
|         'info_dict': { | ||||
|             'id': 'b05rcz9v', | ||||
|             'title': 'The Disappearance - Clips - BBC Four', | ||||
|             'description': 'French thriller serial about a missing teenager.', | ||||
|         }, | ||||
|         'playlist_mincount': 7, | ||||
|     }, { | ||||
|         'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _extract_title_and_description(self, webpage): | ||||
|         title = self._og_search_title(webpage, fatal=False) | ||||
|         description = self._og_search_description(webpage) | ||||
|         return title, description | ||||
|   | ||||
| @@ -1,31 +1,27 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     xpath_with_ns, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
| from ..utils import unified_strdate | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
|  | ||||
|  | ||||
| class BetIE(InfoExtractor): | ||||
| class BetIE(MTVServicesInfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?bet\.com/(?:[^/]+/)+(?P<id>.+?)\.html' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.bet.com/news/politics/2014/12/08/in-bet-exclusive-obama-talks-race-and-racism.html', | ||||
|             'info_dict': { | ||||
|                 'id': 'news/national/2014/a-conversation-with-president-obama', | ||||
|                 'id': '07e96bd3-8850-3051-b856-271b457f0ab8', | ||||
|                 'display_id': 'in-bet-exclusive-obama-talks-race-and-racism', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'A Conversation With President Obama', | ||||
|                 'description': 'md5:699d0652a350cf3e491cd15cc745b5da', | ||||
|                 'description': 'President Obama urges persistence in confronting racism and bias.', | ||||
|                 'duration': 1534, | ||||
|                 'timestamp': 1418075340, | ||||
|                 'upload_date': '20141208', | ||||
|                 'uploader': 'admin', | ||||
|                 'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|                 'subtitles': { | ||||
|                     'en': 'mincount:2', | ||||
|                 } | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
| @@ -35,16 +31,17 @@ class BetIE(InfoExtractor): | ||||
|         { | ||||
|             'url': 'http://www.bet.com/video/news/national/2014/justice-for-ferguson-a-community-reacts.html', | ||||
|             'info_dict': { | ||||
|                 'id': 'news/national/2014/justice-for-ferguson-a-community-reacts', | ||||
|                 'id': '9f516bf1-7543-39c4-8076-dd441b459ba9', | ||||
|                 'display_id': 'justice-for-ferguson-a-community-reacts', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Justice for Ferguson: A Community Reacts', | ||||
|                 'description': 'A BET News special.', | ||||
|                 'duration': 1696, | ||||
|                 'timestamp': 1416942360, | ||||
|                 'upload_date': '20141125', | ||||
|                 'uploader': 'admin', | ||||
|                 'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|                 'subtitles': { | ||||
|                     'en': 'mincount:2', | ||||
|                 } | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
| @@ -53,57 +50,32 @@ class BetIE(InfoExtractor): | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     _FEED_URL = "http://feeds.mtvnservices.com/od/feed/bet-mrss-player" | ||||
|  | ||||
|     def _get_feed_query(self, uri): | ||||
|         return compat_urllib_parse_urlencode({ | ||||
|             'uuid': uri, | ||||
|         }) | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         return self._search_regex(r'data-uri="([^"]+)', webpage, 'mgid') | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         mgid = self._extract_mgid(webpage) | ||||
|         videos_info = self._get_videos_info(mgid) | ||||
|  | ||||
|         media_url = compat_urllib_parse_unquote(self._search_regex( | ||||
|             [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"], | ||||
|             webpage, 'media URL')) | ||||
|         info_dict = videos_info['entries'][0] | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'/video/(.*)/_jcr_content/', media_url, 'video id') | ||||
|         upload_date = unified_strdate(self._html_search_meta('date', webpage)) | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         mrss = self._download_xml(media_url, display_id) | ||||
|  | ||||
|         item = mrss.find('./channel/item') | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'dc': 'http://purl.org/dc/elements/1.1/', | ||||
|             'media': 'http://search.yahoo.com/mrss/', | ||||
|             'ka': 'http://kickapps.com/karss', | ||||
|         } | ||||
|  | ||||
|         title = xpath_text(item, './title', 'title') | ||||
|         description = xpath_text( | ||||
|             item, './description', 'description', fatal=False) | ||||
|  | ||||
|         timestamp = parse_iso8601(xpath_text( | ||||
|             item, xpath_with_ns('./dc:date', NS_MAP), | ||||
|             'upload date', fatal=False)) | ||||
|         uploader = xpath_text( | ||||
|             item, xpath_with_ns('./dc:creator', NS_MAP), | ||||
|             'uploader', fatal=False) | ||||
|  | ||||
|         media_content = item.find( | ||||
|             xpath_with_ns('./media:content', NS_MAP)) | ||||
|         duration = int_or_none(media_content.get('duration')) | ||||
|         smil_url = media_content.get('url') | ||||
|  | ||||
|         thumbnail = media_content.find( | ||||
|             xpath_with_ns('./media:thumbnail', NS_MAP)).get('url') | ||||
|  | ||||
|         formats = self._extract_smil_formats(smil_url, display_id) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|         info_dict.update({ | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'uploader': uploader, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|             'upload_date': upload_date, | ||||
|         }) | ||||
|  | ||||
|         return info_dict | ||||
|   | ||||
| @@ -29,7 +29,8 @@ class BRIE(InfoExtractor): | ||||
|                 'duration': 180, | ||||
|                 'uploader': 'Reinhard Weber', | ||||
|                 'upload_date': '20150422', | ||||
|             } | ||||
|             }, | ||||
|             'skip': '404 not found', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/nachrichten/oberbayern/inhalt/muenchner-polizeipraesident-schreiber-gestorben-100.html', | ||||
| @@ -40,7 +41,8 @@ class BRIE(InfoExtractor): | ||||
|                 'title': 'Manfred Schreiber ist tot', | ||||
|                 'description': 'md5:b454d867f2a9fc524ebe88c3f5092d97', | ||||
|                 'duration': 26, | ||||
|             } | ||||
|             }, | ||||
|             'skip': '404 not found', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://www.br-klassik.de/audio/peeping-tom-premierenkritik-dance-festival-muenchen-100.html', | ||||
| @@ -51,7 +53,8 @@ class BRIE(InfoExtractor): | ||||
|                 'title': 'Kurzweilig und sehr bewegend', | ||||
|                 'description': 'md5:0351996e3283d64adeb38ede91fac54e', | ||||
|                 'duration': 296, | ||||
|             } | ||||
|             }, | ||||
|             'skip': '404 not found', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.br.de/radio/bayern1/service/team/videos/team-video-erdelt100.html', | ||||
|   | ||||
							
								
								
									
										88
									
								
								youtube_dl/extractor/carambatv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								youtube_dl/extractor/carambatv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CarambaTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:carambatv:|https?://video1\.carambatv\.ru/v/)(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://video1.carambatv.ru/v/191910501', | ||||
|         'md5': '2f4a81b7cfd5ab866ee2d7270cb34a2a', | ||||
|         'info_dict': { | ||||
|             'id': '191910501', | ||||
|             'ext': 'mp4', | ||||
|             'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'duration': 2678.31, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'carambatv:191910501', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         video = self._download_json( | ||||
|             'http://video1.carambatv.ru/v/%s/videoinfo.js' % video_id, | ||||
|             video_id) | ||||
|  | ||||
|         title = video['title'] | ||||
|  | ||||
|         base_url = video.get('video') or 'http://video1.carambatv.ru/v/%s/' % video_id | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': base_url + f['fn'], | ||||
|             'height': int_or_none(f.get('height')), | ||||
|             'format_id': '%sp' % f['height'] if f.get('height') else None, | ||||
|         } for f in video['qualities'] if f.get('fn')] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = video.get('splash') | ||||
|         duration = float_or_none(try_get( | ||||
|             video, lambda x: x['annotations'][0]['end_time'], compat_str)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CarambaTVPageIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://carambatv\.ru/(?:[^/]+/)+(?P<id>[^/?#&]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://carambatv.ru/movie/bad-comedian/razborka-v-manile/', | ||||
|         'md5': '', | ||||
|         'info_dict': { | ||||
|             'id': '191910501', | ||||
|             'ext': 'mp4', | ||||
|             'title': '[BadComedian] - Разборка в Маниле (Абсолютный обзор)', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 2678.31, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._og_search_property('video:iframe', webpage, default=None) | ||||
|  | ||||
|         if not video_url: | ||||
|             video_id = self._search_regex( | ||||
|                 r'(?:video_id|crmb_vuid)\s*[:=]\s*["\']?(\d+)', | ||||
|                 webpage, 'video id') | ||||
|             video_url = 'carambatv:%s' % video_id | ||||
|  | ||||
|         return self.url_result(video_url, CarambaTVIE.ie_key()) | ||||
| @@ -1,17 +1,13 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .theplatform import ThePlatformIE | ||||
| from .theplatform import ThePlatformFeedIE | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     xpath_element, | ||||
|     int_or_none, | ||||
|     find_xpath_attr, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CBSBaseIE(ThePlatformIE): | ||||
| class CBSBaseIE(ThePlatformFeedIE): | ||||
|     def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): | ||||
|         closed_caption_e = find_xpath_attr(smil, self._xpath_ns('.//param', namespace), 'name', 'ClosedCaptionURL') | ||||
|         return { | ||||
| @@ -21,9 +17,22 @@ class CBSBaseIE(ThePlatformIE): | ||||
|             }] | ||||
|         } if closed_caption_e is not None and closed_caption_e.attrib.get('value') else [] | ||||
|  | ||||
|     def _extract_video_info(self, filter_query, video_id): | ||||
|         return self._extract_feed_info( | ||||
|             'dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id, lambda entry: { | ||||
|                 'series': entry.get('cbs$SeriesTitle'), | ||||
|                 'season_number': int_or_none(entry.get('cbs$SeasonNumber')), | ||||
|                 'episode': entry.get('cbs$EpisodeTitle'), | ||||
|                 'episode_number': int_or_none(entry.get('cbs$EpisodeNumber')), | ||||
|             }, { | ||||
|                 'StreamPack': { | ||||
|                     'manifest': 'm3u', | ||||
|                 } | ||||
|             }) | ||||
|  | ||||
|  | ||||
| class CBSIE(CBSBaseIE): | ||||
|     _VALID_URL = r'(?:cbs:(?P<content_id>\w+)|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/(?:video|artist)|colbertlateshow\.com/(?:video|podcasts))/[^/]+/(?P<display_id>[^/]+))' | ||||
|     _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:cbs\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/', | ||||
| @@ -38,25 +47,7 @@ class CBSIE(CBSBaseIE): | ||||
|             'upload_date': '20131127', | ||||
|             'uploader': 'CBSI-NEW', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         '_skip': 'Blocked outside the US', | ||||
|     }, { | ||||
|         'url': 'http://www.cbs.com/shows/liveonletterman/artist/221752/st-vincent/', | ||||
|         'info_dict': { | ||||
|             'id': 'WWF_5KqY3PK1', | ||||
|             'display_id': 'st-vincent', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Live on Letterman - St. Vincent', | ||||
|             'description': 'Live On Letterman: St. Vincent in concert from New York\'s Ed Sullivan Theater on Tuesday, July 16, 2014.', | ||||
|             'duration': 3221, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'expected_warnings': ['Failed to download m3u8 information'], | ||||
|         '_skip': 'Blocked outside the US', | ||||
|     }, { | ||||
|         'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/', | ||||
| @@ -68,44 +59,5 @@ class CBSIE(CBSBaseIE): | ||||
|     TP_RELEASE_URL_TEMPLATE = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         content_id, display_id = re.match(self._VALID_URL, url).groups() | ||||
|         if not content_id: | ||||
|             webpage = self._download_webpage(url, display_id) | ||||
|             content_id = self._search_regex( | ||||
|                 [r"video\.settings\.content_id\s*=\s*'([^']+)';", r"cbsplayer\.contentId\s*=\s*'([^']+)';"], | ||||
|                 webpage, 'content id') | ||||
|         items_data = self._download_xml( | ||||
|             'http://can.cbs.com/thunder/player/videoPlayerService.php', | ||||
|             content_id, query={'partner': 'cbs', 'contentId': content_id}) | ||||
|         video_data = xpath_element(items_data, './/item') | ||||
|         title = xpath_text(video_data, 'videoTitle', 'title', True) | ||||
|  | ||||
|         subtitles = {} | ||||
|         formats = [] | ||||
|         for item in items_data.findall('.//item'): | ||||
|             pid = xpath_text(item, 'pid') | ||||
|             if not pid: | ||||
|                 continue | ||||
|             tp_release_url = self.TP_RELEASE_URL_TEMPLATE % pid | ||||
|             if '.m3u8' in xpath_text(item, 'contentUrl', default=''): | ||||
|                 tp_release_url += '&manifest=m3u' | ||||
|             tp_formats, tp_subtitles = self._extract_theplatform_smil( | ||||
|                 tp_release_url, content_id, 'Downloading %s SMIL data' % pid) | ||||
|             formats.extend(tp_formats) | ||||
|             subtitles = self._merge_subtitles(subtitles, tp_subtitles) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         info = self.get_metadata('dJ5BDC/media/guid/2198311517/%s' % content_id, content_id) | ||||
|         info.update({ | ||||
|             'id': content_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'series': xpath_text(video_data, 'seriesTitle'), | ||||
|             'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')), | ||||
|             'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')), | ||||
|             'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000), | ||||
|             'thumbnail': xpath_text(video_data, 'previewImageURL'), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         }) | ||||
|         return info | ||||
|         content_id = self._match_id(url) | ||||
|         return self._extract_video_info('byGuid=%s' % content_id, content_id) | ||||
|   | ||||
| @@ -30,9 +30,12 @@ class CBSNewsIE(CBSBaseIE): | ||||
|         { | ||||
|             'url': 'http://www.cbsnews.com/videos/fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack/', | ||||
|             'info_dict': { | ||||
|                 'id': 'fort-hood-shooting-army-downplays-mental-illness-as-cause-of-attack', | ||||
|                 'id': 'SNJBOYzXiWBOvaLsdzwH8fmtP1SCd91Y', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack', | ||||
|                 'description': 'md5:4a6983e480542d8b333a947bfc64ddc7', | ||||
|                 'upload_date': '19700101', | ||||
|                 'uploader': 'CBSI-NEW', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'duration': 205, | ||||
|                 'subtitles': { | ||||
| @@ -58,30 +61,8 @@ class CBSNewsIE(CBSBaseIE): | ||||
|             webpage, 'video JSON info'), video_id) | ||||
|  | ||||
|         item = video_info['item'] if 'item' in video_info else video_info | ||||
|         title = item.get('articleTitle') or item.get('hed') | ||||
|         duration = item.get('duration') | ||||
|         thumbnail = item.get('mediaImage') or item.get('thumbnail') | ||||
|  | ||||
|         subtitles = {} | ||||
|         formats = [] | ||||
|         for format_id in ['RtmpMobileLow', 'RtmpMobileHigh', 'Hls', 'RtmpDesktop']: | ||||
|             pid = item.get('media' + format_id) | ||||
|             if not pid: | ||||
|                 continue | ||||
|             release_url = 'http://link.theplatform.com/s/dJ5BDC/%s?mbr=true' % pid | ||||
|             tp_formats, tp_subtitles = self._extract_theplatform_smil(release_url, video_id, 'Downloading %s SMIL data' % pid) | ||||
|             formats.extend(tp_formats) | ||||
|             subtitles = self._merge_subtitles(subtitles, tp_subtitles) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|         guid = item['mpxRefId'] | ||||
|         return self._extract_video_info('byGuid=%s' % guid, guid) | ||||
|  | ||||
|  | ||||
| class CBSNewsLiveVideoIE(InfoExtractor): | ||||
|   | ||||
| @@ -1,30 +1,28 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .cbs import CBSBaseIE | ||||
|  | ||||
|  | ||||
| class CBSSportsIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' | ||||
| class CBSSportsIE(CBSBaseIE): | ||||
|     _VALID_URL = r'https?://www\.cbssports\.com/video/player/[^/]+/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.cbssports.com/video/player/videos/708337219968/0/ben-simmons-the-next-lebron?-not-so-fast', | ||||
|         'info_dict': { | ||||
|             'id': '_d5_GbO8p1sT', | ||||
|             'ext': 'flv', | ||||
|             'title': 'US Open flashbacks: 1990s', | ||||
|             'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.', | ||||
|             'id': '708337219968', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ben Simmons the next LeBron? Not so fast', | ||||
|             'description': 'md5:854294f627921baba1f4b9a990d87197', | ||||
|             'timestamp': 1466293740, | ||||
|             'upload_date': '20160618', | ||||
|             'uploader': 'CBSI-NEW', | ||||
|         }, | ||||
|     } | ||||
|         'params': { | ||||
|             # m3u8 download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         section = mobj.group('section') | ||||
|         video_id = mobj.group('id') | ||||
|         all_videos = self._download_json( | ||||
|             'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section, | ||||
|             video_id) | ||||
|         # The json file contains the info of all the videos in the section | ||||
|         video_info = next(v for v in all_videos if v['pcid'] == video_id) | ||||
|         return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform') | ||||
|         video_id = self._match_id(url) | ||||
|         return self._extract_video_info('byId=%s' % video_id, video_id) | ||||
|   | ||||
| @@ -58,7 +58,8 @@ class CDAIE(InfoExtractor): | ||||
|         def extract_format(page, version): | ||||
|             unpacked = decode_packed_codes(page) | ||||
|             format_url = self._search_regex( | ||||
|                 r"url:\\'(.+?)\\'", unpacked, '%s url' % version, fatal=False) | ||||
|                 r"(?:file|url)\s*:\s*(\\?[\"'])(?P<url>http.+?)\1", unpacked, | ||||
|                 '%s url' % version, fatal=False, group='url') | ||||
|             if not format_url: | ||||
|                 return | ||||
|             f = { | ||||
| @@ -75,7 +76,8 @@ class CDAIE(InfoExtractor): | ||||
|             info_dict['formats'].append(f) | ||||
|             if not info_dict['duration']: | ||||
|                 info_dict['duration'] = parse_duration(self._search_regex( | ||||
|                     r"duration:\\'(.+?)\\'", unpacked, 'duration', fatal=False)) | ||||
|                     r"duration\s*:\s*(\\?[\"'])(?P<duration>.+?)\1", | ||||
|                     unpacked, 'duration', fatal=False, group='duration')) | ||||
|  | ||||
|         extract_format(webpage, 'default') | ||||
|  | ||||
|   | ||||
							
								
								
									
										92
									
								
								youtube_dl/extractor/closertotruth.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								youtube_dl/extractor/closertotruth.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,92 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CloserToTruthIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', | ||||
|         'info_dict': { | ||||
|             'id': '0_zof1ktre', | ||||
|             'display_id': 'solutions-the-mind-body-problem', | ||||
|             'ext': 'mov', | ||||
|             'title': 'Solutions to the Mind-Body Problem?', | ||||
|             'upload_date': '20140221', | ||||
|             'timestamp': 1392956007, | ||||
|             'uploader_id': 'CTTXML' | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://closertotruth.com/episodes/how-do-brains-work', | ||||
|         'info_dict': { | ||||
|             'id': '0_iuxai6g6', | ||||
|             'display_id': 'how-do-brains-work', | ||||
|             'ext': 'mov', | ||||
|             'title': 'How do Brains Work?', | ||||
|             'upload_date': '20140221', | ||||
|             'timestamp': 1392956024, | ||||
|             'uploader_id': 'CTTXML' | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://closertotruth.com/interviews/1725', | ||||
|         'info_dict': { | ||||
|             'id': '1725', | ||||
|             'title': 'AyaFr-002', | ||||
|         }, | ||||
|         'playlist_mincount': 2, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         partner_id = self._search_regex( | ||||
|             r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)', | ||||
|             webpage, 'kaltura partner_id') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title') | ||||
|  | ||||
|         select = self._search_regex( | ||||
|             r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>', | ||||
|             webpage, 'select version', default=None) | ||||
|         if select: | ||||
|             entry_ids = set() | ||||
|             entries = [] | ||||
|             for mobj in re.finditer( | ||||
|                     r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)', | ||||
|                     webpage): | ||||
|                 entry_id = mobj.group('id') | ||||
|                 if entry_id in entry_ids: | ||||
|                     continue | ||||
|                 entry_ids.add(entry_id) | ||||
|                 entries.append({ | ||||
|                     '_type': 'url_transparent', | ||||
|                     'url': 'kaltura:%s:%s' % (partner_id, entry_id), | ||||
|                     'ie_key': 'Kaltura', | ||||
|                     'title': mobj.group('title'), | ||||
|                 }) | ||||
|             if entries: | ||||
|                 return self.playlist_result(entries, display_id, title) | ||||
|  | ||||
|         entry_id = self._search_regex( | ||||
|             r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2', | ||||
|             webpage, 'kaltura entry_id', group='id') | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url_transparent', | ||||
|             'display_id': display_id, | ||||
|             'url': 'kaltura:%s:%s' % (partner_id, entry_id), | ||||
|             'ie_key': 'Kaltura', | ||||
|             'title': title | ||||
|         } | ||||
| @@ -53,6 +53,7 @@ from ..utils import ( | ||||
|     mimetype2ext, | ||||
|     update_Request, | ||||
|     update_url_query, | ||||
|     parse_m3u8_attributes, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -1150,23 +1151,11 @@ class InfoExtractor(object): | ||||
|             }] | ||||
|         last_info = None | ||||
|         last_media = None | ||||
|         kv_rex = re.compile( | ||||
|             r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)') | ||||
|         for line in m3u8_doc.splitlines(): | ||||
|             if line.startswith('#EXT-X-STREAM-INF:'): | ||||
|                 last_info = {} | ||||
|                 for m in kv_rex.finditer(line): | ||||
|                     v = m.group('val') | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_info[m.group('key')] = v | ||||
|                 last_info = parse_m3u8_attributes(line) | ||||
|             elif line.startswith('#EXT-X-MEDIA:'): | ||||
|                 last_media = {} | ||||
|                 for m in kv_rex.finditer(line): | ||||
|                     v = m.group('val') | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_media[m.group('key')] = v | ||||
|                 last_media = parse_m3u8_attributes(line) | ||||
|             elif line.startswith('#') or not line.strip(): | ||||
|                 continue | ||||
|             else: | ||||
|   | ||||
| @@ -44,7 +44,6 @@ from .archiveorg import ArchiveOrgIE | ||||
| from .ard import ( | ||||
|     ARDIE, | ||||
|     ARDMediathekIE, | ||||
|     SportschauIE, | ||||
| ) | ||||
| from .arte import ( | ||||
|     ArteTvIE, | ||||
| @@ -71,6 +70,8 @@ from .bandcamp import BandcampIE, BandcampAlbumIE | ||||
| from .bbc import ( | ||||
|     BBCCoUkIE, | ||||
|     BBCCoUkArticleIE, | ||||
|     BBCCoUkIPlayerPlaylistIE, | ||||
|     BBCCoUkPlaylistIE, | ||||
|     BBCIE, | ||||
| ) | ||||
| from .beeg import BeegIE | ||||
| @@ -108,6 +109,10 @@ from .camwithher import CamWithHerIE | ||||
| from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .canvas import CanvasIE | ||||
| from .carambatv import ( | ||||
|     CarambaTVIE, | ||||
|     CarambaTVPageIE, | ||||
| ) | ||||
| from .cbc import ( | ||||
|     CBCIE, | ||||
|     CBCPlayerIE, | ||||
| @@ -135,6 +140,7 @@ from .cliprs import ClipRsIE | ||||
| from .clipfish import ClipfishIE | ||||
| from .cliphunter import CliphunterIE | ||||
| from .clipsyndicate import ClipsyndicateIE | ||||
| from .closertotruth import CloserToTruthIE | ||||
| from .cloudy import CloudyIE | ||||
| from .clubic import ClubicIE | ||||
| from .clyp import ClypIE | ||||
| @@ -279,7 +285,6 @@ from .gameone import ( | ||||
| from .gamersyde import GamersydeIE | ||||
| from .gamespot import GameSpotIE | ||||
| from .gamestar import GameStarIE | ||||
| from .gametrailers import GametrailersIE | ||||
| from .gazeta import GazetaIE | ||||
| from .gdcvault import GDCVaultIE | ||||
| from .generic import GenericIE | ||||
| @@ -512,7 +517,10 @@ from .nhl import ( | ||||
|     NHLVideocenterCategoryIE, | ||||
|     NHLIE, | ||||
| ) | ||||
| from .nick import NickIE | ||||
| from .nick import ( | ||||
|     NickIE, | ||||
|     NickDeIE, | ||||
| ) | ||||
| from .niconico import NiconicoIE, NiconicoPlaylistIE | ||||
| from .ninegag import NineGagIE | ||||
| from .noco import NocoIE | ||||
| @@ -622,7 +630,10 @@ from .qqmusic import ( | ||||
|     QQMusicToplistIE, | ||||
|     QQMusicPlaylistIE, | ||||
| ) | ||||
| from .r7 import R7IE | ||||
| from .r7 import ( | ||||
|     R7IE, | ||||
|     R7ArticleIE, | ||||
| ) | ||||
| from .radiocanada import ( | ||||
|     RadioCanadaIE, | ||||
|     RadioCanadaAudioVideoIE, | ||||
| @@ -738,6 +749,7 @@ from .sportbox import ( | ||||
|     SportBoxEmbedIE, | ||||
| ) | ||||
| from .sportdeutschland import SportDeutschlandIE | ||||
| from .sportschau import SportschauIE | ||||
| from .srgssr import ( | ||||
|     SRGSSRIE, | ||||
|     SRGSSRPlayIE, | ||||
|   | ||||
| @@ -239,6 +239,8 @@ class FacebookIE(InfoExtractor): | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id, f in video_data.items(): | ||||
|             if f and isinstance(f, dict): | ||||
|                 f = [f] | ||||
|             if not f or not isinstance(f, list): | ||||
|                 continue | ||||
|             for quality in ('sd', 'hd'): | ||||
|   | ||||
| @@ -1,7 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import smuggle_url | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
|     update_url_query, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FoxSportsIE(InfoExtractor): | ||||
| @@ -9,11 +12,15 @@ class FoxSportsIE(InfoExtractor): | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.foxsports.com/video?vid=432609859715', | ||||
|         'md5': 'b49050e955bebe32c301972e4012ac17', | ||||
|         'info_dict': { | ||||
|             'id': 'gA0bHB3Ladz3', | ||||
|             'ext': 'flv', | ||||
|             'id': 'i0qKWsk3qJaM', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Courtney Lee on going up 2-0 in series vs. Blazers', | ||||
|             'description': 'Courtney Lee talks about Memphis being focused.', | ||||
|             'upload_date': '20150423', | ||||
|             'timestamp': 1429761109, | ||||
|             'uploader': 'NEWA-FNG-FOXSPORTS', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     } | ||||
| @@ -28,5 +35,8 @@ class FoxSportsIE(InfoExtractor): | ||||
|                 r"data-player-config='([^']+)'", webpage, 'data player config'), | ||||
|             video_id) | ||||
|  | ||||
|         return self.url_result(smuggle_url( | ||||
|             config['releaseURL'] + '&manifest=f4m', {'force_smil_url': True})) | ||||
|         return self.url_result(smuggle_url(update_url_query( | ||||
|             config['releaseURL'], { | ||||
|                 'mbr': 'true', | ||||
|                 'switch': 'http', | ||||
|             }), {'force_smil_url': True})) | ||||
|   | ||||
| @@ -1,19 +1,19 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .once import OnceIE | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_unquote, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     unescapeHTML, | ||||
|     url_basename, | ||||
|     dict_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GameSpotIE(InfoExtractor): | ||||
| class GameSpotIE(OnceIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', | ||||
| @@ -39,29 +39,73 @@ class GameSpotIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|         data_video_json = self._search_regex( | ||||
|             r'data-video=["\'](.*?)["\']', webpage, 'data video') | ||||
|         data_video = json.loads(unescapeHTML(data_video_json)) | ||||
|         data_video = self._parse_json(unescapeHTML(data_video_json), page_id) | ||||
|         streams = data_video['videoStreams'] | ||||
|  | ||||
|         manifest_url = None | ||||
|         formats = [] | ||||
|         f4m_url = streams.get('f4m_stream') | ||||
|         if f4m_url is not None: | ||||
|             # Transform the manifest url to a link to the mp4 files | ||||
|             # they are used in mobile devices. | ||||
|             f4m_path = compat_urlparse.urlparse(f4m_url).path | ||||
|             QUALITIES_RE = r'((,\d+)+,?)' | ||||
|             qualities = self._search_regex(QUALITIES_RE, f4m_path, 'qualities').strip(',').split(',') | ||||
|             http_path = f4m_path[1:].split('/', 1)[1] | ||||
|             http_template = re.sub(QUALITIES_RE, r'%s', http_path) | ||||
|             http_template = http_template.replace('.csmil/manifest.f4m', '') | ||||
|             http_template = compat_urlparse.urljoin( | ||||
|                 'http://video.gamespotcdn.com/', http_template) | ||||
|             for q in qualities: | ||||
|                 formats.append({ | ||||
|                     'url': http_template % q, | ||||
|                     'ext': 'mp4', | ||||
|                     'format_id': q, | ||||
|                 }) | ||||
|         else: | ||||
|         if f4m_url: | ||||
|             manifest_url = f4m_url | ||||
|             formats.extend(self._extract_f4m_formats( | ||||
|                 f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False)) | ||||
|         m3u8_url = streams.get('m3u8_stream') | ||||
|         if m3u8_url: | ||||
|             manifest_url = m3u8_url | ||||
|             m3u8_formats = self._extract_m3u8_formats( | ||||
|                 m3u8_url, page_id, 'mp4', 'm3u8_native', | ||||
|                 m3u8_id='hls', fatal=False) | ||||
|             formats.extend(m3u8_formats) | ||||
|         progressive_url = dict_get( | ||||
|             streams, ('progressive_hd', 'progressive_high', 'progressive_low')) | ||||
|         if progressive_url and manifest_url: | ||||
|             qualities_basename = self._search_regex( | ||||
|                 '/([^/]+)\.csmil/', | ||||
|                 manifest_url, 'qualities basename', default=None) | ||||
|             if qualities_basename: | ||||
|                 QUALITIES_RE = r'((,\d+)+,?)' | ||||
|                 qualities = self._search_regex( | ||||
|                     QUALITIES_RE, qualities_basename, | ||||
|                     'qualities', default=None) | ||||
|                 if qualities: | ||||
|                     qualities = list(map(lambda q: int(q), qualities.strip(',').split(','))) | ||||
|                     qualities.sort() | ||||
|                     http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename) | ||||
|                     http_url_basename = url_basename(progressive_url) | ||||
|                     if m3u8_formats: | ||||
|                         self._sort_formats(m3u8_formats) | ||||
|                         m3u8_formats = list(filter( | ||||
|                             lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', | ||||
|                             m3u8_formats)) | ||||
|                     if len(qualities) == len(m3u8_formats): | ||||
|                         for q, m3u8_format in zip(qualities, m3u8_formats): | ||||
|                             f = m3u8_format.copy() | ||||
|                             f.update({ | ||||
|                                 'url': progressive_url.replace( | ||||
|                                     http_url_basename, http_template % q), | ||||
|                                 'format_id': f['format_id'].replace('hls', 'http'), | ||||
|                                 'protocol': 'http', | ||||
|                             }) | ||||
|                             formats.append(f) | ||||
|                     else: | ||||
|                         for q in qualities: | ||||
|                             formats.append({ | ||||
|                                 'url': progressive_url.replace( | ||||
|                                     http_url_basename, http_template % q), | ||||
|                                 'ext': 'mp4', | ||||
|                                 'format_id': 'http-%d' % q, | ||||
|                                 'tbr': q, | ||||
|                             }) | ||||
|  | ||||
|         onceux_json = self._search_regex( | ||||
|             r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None) | ||||
|         if onceux_json: | ||||
|             onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') | ||||
|             if onceux_url: | ||||
|                 formats.extend(self._extract_once_formats(re.sub( | ||||
|                     r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url).replace('ads/vmap/', ''))) | ||||
|  | ||||
|         if not formats: | ||||
|             for quality in ['sd', 'hd']: | ||||
|                 # It's actually a link to a flv file | ||||
|                 flv_url = streams.get('f4m_{0}'.format(quality)) | ||||
| @@ -71,6 +115,7 @@ class GameSpotIE(InfoExtractor): | ||||
|                         'ext': 'flv', | ||||
|                         'format_id': quality, | ||||
|                     }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': data_video['guid'], | ||||
|   | ||||
| @@ -1,62 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     parse_age_limit, | ||||
|     url_basename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GametrailersIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review', | ||||
|         'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a', | ||||
|         'info_dict': { | ||||
|             'id': '2983958', | ||||
|             'ext': 'mp4', | ||||
|             'display_id': '116437-Just-Cause-3-Review', | ||||
|             'title': 'Just Cause 3 - Review', | ||||
|             'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>(.+?)\|', webpage, 'title').strip() | ||||
|         embed_url = self._proto_relative_url( | ||||
|             self._search_regex( | ||||
|                 r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage, | ||||
|                 'embed url'), | ||||
|             scheme='http:') | ||||
|         video_id = url_basename(embed_url) | ||||
|         embed_page = self._download_webpage(embed_url, video_id) | ||||
|         embed_vars_json = self._search_regex( | ||||
|             r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page, | ||||
|             'embed vars') | ||||
|         info = self._parse_json(embed_vars_json, video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for media in info['media']: | ||||
|             if media['mediaPurpose'] == 'play': | ||||
|                 formats.append({ | ||||
|                     'url': media['uri'], | ||||
|                     'height': media['height'], | ||||
|                     'width:': media['width'], | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': info.get('thumbUri'), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'duration': int_or_none(info.get('videoLengthInSeconds')), | ||||
|             'age_limit': parse_age_limit(info.get('audienceRating')), | ||||
|         } | ||||
| @@ -12,7 +12,7 @@ from ..utils import ( | ||||
| class ImdbIE(InfoExtractor): | ||||
|     IE_NAME = 'imdb' | ||||
|     IE_DESC = 'Internet Movie Database trailers' | ||||
|     _VALID_URL = r'https?://(?:www|m)\.imdb\.com/video/[^/]+/vi(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video/[^/]+/|title/tt\d+.*?#lb-)vi(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.imdb.com/video/imdb/vi2524815897', | ||||
| @@ -25,6 +25,12 @@ class ImdbIE(InfoExtractor): | ||||
|     }, { | ||||
|         'url': 'http://www.imdb.com/video/_/vi2524815897', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -12,9 +12,35 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class JWPlatformBaseIE(InfoExtractor): | ||||
|     @staticmethod | ||||
|     def _find_jwplayer_data(webpage): | ||||
|         # TODO: Merge this with JWPlayer-related codes in generic.py | ||||
|  | ||||
|         mobj = re.search( | ||||
|             'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\((?P<options>[^)]+)\)', | ||||
|             webpage) | ||||
|         if mobj: | ||||
|             return mobj.group('options') | ||||
|  | ||||
|     def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs): | ||||
|         jwplayer_data = self._parse_json( | ||||
|             self._find_jwplayer_data(webpage), video_id) | ||||
|         return self._parse_jwplayer_data( | ||||
|             jwplayer_data, video_id, *args, **kwargs) | ||||
|  | ||||
|     def _parse_jwplayer_data(self, jwplayer_data, video_id, require_title=True, m3u8_id=None, rtmp_params=None): | ||||
|         # JWPlayer backward compatibility: flattened playlists | ||||
|         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96 | ||||
|         if 'playlist' not in jwplayer_data: | ||||
|             jwplayer_data = {'playlist': [jwplayer_data]} | ||||
|  | ||||
|         video_data = jwplayer_data['playlist'][0] | ||||
|  | ||||
|         # JWPlayer backward compatibility: flattened sources | ||||
|         # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35 | ||||
|         if 'sources' not in video_data: | ||||
|             video_data['sources'] = [video_data] | ||||
|  | ||||
|         formats = [] | ||||
|         for source in video_data['sources']: | ||||
|             source_url = self._proto_relative_url(source['file']) | ||||
|   | ||||
| @@ -95,7 +95,6 @@ class LyndaIE(LyndaBaseIE): | ||||
|     IE_NAME = 'lynda' | ||||
|     IE_DESC = 'lynda.com videos' | ||||
|     _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P<id>\d+)' | ||||
|     _NETRC_MACHINE = 'lynda' | ||||
|  | ||||
|     _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,8 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlencode, | ||||
| @@ -8,6 +11,7 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     get_element_by_attribute, | ||||
|     int_or_none, | ||||
|     remove_start, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -15,7 +19,7 @@ class MiTeleIE(InfoExtractor): | ||||
|     IE_DESC = 'mitele.es' | ||||
|     _VALID_URL = r'https?://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<id>[^/]+)/' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/', | ||||
|         # MD5 is unstable | ||||
|         'info_dict': { | ||||
| @@ -24,10 +28,31 @@ class MiTeleIE(InfoExtractor): | ||||
|             'ext': 'flv', | ||||
|             'title': 'Tor, la web invisible', | ||||
|             'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', | ||||
|             'series': 'Diario de', | ||||
|             'season': 'La redacción', | ||||
|             'episode': 'Programa 144', | ||||
|             'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|             'duration': 2913, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         # no explicit title | ||||
|         'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/temporada-6/programa-226/', | ||||
|         'info_dict': { | ||||
|             'id': 'eLZSwoEd1S3pVyUm8lc6F', | ||||
|             'display_id': 'programa-226', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Cuarto Milenio - Temporada 6 - Programa 226', | ||||
|             'description': 'md5:50daf9fadefa4e62d9fc866d0c015701', | ||||
|             'series': 'Cuarto Milenio', | ||||
|             'season': 'Temporada 6', | ||||
|             'episode': 'Programa 226', | ||||
|             'thumbnail': 're:(?i)^https?://.*\.jpg$', | ||||
|             'duration': 7312, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
| @@ -70,7 +95,22 @@ class MiTeleIE(InfoExtractor): | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', webpage, 'title') | ||||
|             r'class="Destacado-text"[^>]*>\s*<strong>([^<]+)</strong>', | ||||
|             webpage, 'title', default=None) | ||||
|  | ||||
|         mobj = re.search(r'''(?sx) | ||||
|                             class="Destacado-text"[^>]*>.*?<h1>\s* | ||||
|                             <span>(?P<series>[^<]+)</span>\s* | ||||
|                             <span>(?P<season>[^<]+)</span>\s* | ||||
|                             <span>(?P<episode>[^<]+)</span>''', webpage) | ||||
|         series, season, episode = mobj.groups() if mobj else [None] * 3 | ||||
|  | ||||
|         if not title: | ||||
|             if mobj: | ||||
|                 title = '%s - %s - %s' % (series, season, episode) | ||||
|             else: | ||||
|                 title = remove_start(self._search_regex( | ||||
|                     r'<title>([^<]+)</title>', webpage, 'title'), 'Ver online ') | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'data-media-id\s*=\s*"([^"]+)"', webpage, | ||||
| @@ -83,6 +123,9 @@ class MiTeleIE(InfoExtractor): | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'description': get_element_by_attribute('class', 'text', webpage), | ||||
|             'series': series, | ||||
|             'season': season, | ||||
|             'episode': episode, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|   | ||||
| @@ -6,6 +6,7 @@ from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse_urlencode, | ||||
|     compat_str, | ||||
|     compat_xpath, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| @@ -84,9 +85,10 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|                 rtmp_video_url = rendition.find('./src').text | ||||
|                 if rtmp_video_url.endswith('siteunavail.png'): | ||||
|                     continue | ||||
|                 new_url = self._transform_rtmp_url(rtmp_video_url) | ||||
|                 formats.append({ | ||||
|                     'ext': ext, | ||||
|                     'url': self._transform_rtmp_url(rtmp_video_url), | ||||
|                     'ext': 'flv' if new_url.startswith('rtmp') else ext, | ||||
|                     'url': new_url, | ||||
|                     'format_id': rendition.get('bitrate'), | ||||
|                     'width': int(rendition.get('width')), | ||||
|                     'height': int(rendition.get('height')), | ||||
| @@ -139,9 +141,9 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|                 itemdoc, './/{http://search.yahoo.com/mrss/}category', | ||||
|                 'scheme', 'urn:mtvn:video_title') | ||||
|         if title_el is None: | ||||
|             title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title') | ||||
|             title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title')) | ||||
|         if title_el is None: | ||||
|             title_el = itemdoc.find('.//title') or itemdoc.find('./title') | ||||
|             title_el = itemdoc.find(compat_xpath('.//title')) | ||||
|             if title_el.text is None: | ||||
|                 title_el = None | ||||
|  | ||||
|   | ||||
| @@ -3,6 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .mtv import MTVServicesInfoExtractor | ||||
| from ..compat import compat_urllib_parse_urlencode | ||||
| from ..utils import update_url_query | ||||
|  | ||||
|  | ||||
| class NickIE(MTVServicesInfoExtractor): | ||||
| @@ -61,3 +62,26 @@ class NickIE(MTVServicesInfoExtractor): | ||||
|  | ||||
|     def _extract_mgid(self, webpage): | ||||
|         return self._search_regex(r'data-contenturi="([^"]+)', webpage, 'mgid') | ||||
|  | ||||
|  | ||||
| class NickDeIE(MTVServicesInfoExtractor): | ||||
|     IE_NAME = 'nick.de' | ||||
|     _VALID_URL = r'https?://(?:www\.)?nick\.de/(?:playlist|shows)/(?:[^/]+/)*(?P<id>[^/?#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.nick.de/playlist/3773-top-videos/videos/episode/17306-zu-wasser-und-zu-land-rauchende-erdnusse', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.nick.de/shows/342-icarly', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         mrss_url = update_url_query(self._search_regex( | ||||
|             r'data-mrss=(["\'])(?P<url>http.+?)\1', webpage, 'mrss url', group='url'), | ||||
|             {'siteKey': 'nick.de'}) | ||||
|  | ||||
|         return self._get_videos_info_from_url(mrss_url, video_id) | ||||
|   | ||||
| @@ -1,19 +1,32 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     qualities, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class PornHdIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', | ||||
|         'md5': 'c8b964b1f0a4b5f7f28ae3a5c9f86ad5', | ||||
|         'info_dict': { | ||||
|             'id': '9864', | ||||
|             'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Restroom selfie masturbation', | ||||
|             'description': 'md5:3748420395e03e31ac96857a8f125b2b', | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'view_count': int, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     }, { | ||||
|         # removed video | ||||
|         'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', | ||||
|         'md5': '956b8ca569f7f4d8ec563e2c41598441', | ||||
|         'info_dict': { | ||||
| @@ -25,8 +38,9 @@ class PornHdIE(InfoExtractor): | ||||
|             'thumbnail': 're:^https?://.*\.jpg', | ||||
|             'view_count': int, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|         }, | ||||
|         'skip': 'Not available anymore', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -38,28 +52,38 @@ class PornHdIE(InfoExtractor): | ||||
|         title = self._html_search_regex( | ||||
|             [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)', | ||||
|              r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="description">([^<]+)</div>', webpage, 'description', fatal=False) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'(\d+) views\s*</span>', webpage, 'view count', fatal=False)) | ||||
|         thumbnail = self._search_regex( | ||||
|             r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         quality = qualities(['sd', 'hd']) | ||||
|         sources = json.loads(js_to_json(self._search_regex( | ||||
|         sources = self._parse_json(js_to_json(self._search_regex( | ||||
|             r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", | ||||
|             webpage, 'sources'))) | ||||
|             webpage, 'sources', default='{}')), video_id) | ||||
|  | ||||
|         if not sources: | ||||
|             message = self._html_search_regex( | ||||
|                 r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1', | ||||
|                 webpage, 'error message', group='value') | ||||
|             raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for qname, video_url in sources.items(): | ||||
|         for format_id, video_url in sources.items(): | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             height = int_or_none(self._search_regex( | ||||
|                 r'^(\d+)[pP]', format_id, 'height', default=None)) | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': qname, | ||||
|                 'quality': quality(qname), | ||||
|                 'format_id': format_id, | ||||
|                 'height': height, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = self._html_search_regex( | ||||
|             r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1', | ||||
|             webpage, 'description', fatal=False, group='value') | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r'(\d+) views\s*<', webpage, 'view count', fatal=False)) | ||||
|         thumbnail = self._search_regex( | ||||
|             r"'poster'\s*:\s*'([^']+)'", webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| @@ -39,7 +40,25 @@ class PornHubIE(InfoExtractor): | ||||
|             'dislike_count': int, | ||||
|             'comment_count': int, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|         }, | ||||
|     }, { | ||||
|         # non-ASCII title | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', | ||||
|         'info_dict': { | ||||
|             'id': '1331683002', | ||||
|             'ext': 'mp4', | ||||
|             'title': '重庆婷婷女王足交', | ||||
|             'uploader': 'cj397186295', | ||||
|             'duration': 1753, | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|             'comment_count': int, | ||||
|             'age_limit': 18, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', | ||||
|         'only_matching': True, | ||||
| @@ -76,19 +95,25 @@ class PornHubIE(InfoExtractor): | ||||
|                 'PornHub said: %s' % error_msg, | ||||
|                 expected=True, video_id=video_id) | ||||
|  | ||||
|         # video_title from flashvars contains whitespace instead of non-ASCII (see | ||||
|         # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying | ||||
|         # on that anymore. | ||||
|         title = self._html_search_meta( | ||||
|             'twitter:title', webpage, default=None) or self._search_regex( | ||||
|             (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)', | ||||
|              r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1', | ||||
|              r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'), | ||||
|             webpage, 'title', group='title') | ||||
|  | ||||
|         flashvars = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), | ||||
|             video_id) | ||||
|         if flashvars: | ||||
|             video_title = flashvars.get('video_title') | ||||
|             thumbnail = flashvars.get('image_url') | ||||
|             duration = int_or_none(flashvars.get('video_duration')) | ||||
|         else: | ||||
|             video_title, thumbnail, duration = [None] * 3 | ||||
|  | ||||
|         if not video_title: | ||||
|             video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') | ||||
|             title, thumbnail, duration = [None] * 3 | ||||
|  | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', | ||||
| @@ -137,7 +162,7 @@ class PornHubIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'uploader': video_uploader, | ||||
|             'title': video_title, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|   | ||||
| @@ -2,22 +2,19 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
|     unescapeHTML, | ||||
|     int_or_none, | ||||
| ) | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class R7IE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|     _VALID_URL = r'''(?x) | ||||
|                         https?:// | ||||
|                         (?: | ||||
|                             (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| | ||||
|                             noticias\.r7\.com(?:/[^/]+)+/[^/]+-| | ||||
|                             player\.r7\.com/video/i/ | ||||
|                         ) | ||||
|                         (?P<id>[\da-f]{24}) | ||||
|                         ''' | ||||
|                     ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', | ||||
|         'md5': '403c4e393617e8e8ddc748978ee8efde', | ||||
| @@ -25,6 +22,7 @@ class R7IE(InfoExtractor): | ||||
|             'id': '54e7050b0cf2ff57e0279389', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', | ||||
|             'description': 'md5:01812008664be76a6479aa58ec865b72', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 98, | ||||
|             'like_count': int, | ||||
| @@ -44,45 +42,72 @@ class R7IE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://player.r7.com/video/i/%s' % video_id, video_id) | ||||
|         video = self._download_json( | ||||
|             'http://player-api.r7.com/video/i/%s' % video_id, video_id) | ||||
|  | ||||
|         item = self._parse_json(js_to_json(self._search_regex( | ||||
|             r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id) | ||||
|  | ||||
|         title = unescapeHTML(item['title']) | ||||
|         thumbnail = item.get('init', {}).get('thumbUri') | ||||
|         duration = None | ||||
|  | ||||
|         statistics = item.get('statistics', {}) | ||||
|         like_count = int_or_none(statistics.get('likes')) | ||||
|         view_count = int_or_none(statistics.get('views')) | ||||
|         title = video['title'] | ||||
|  | ||||
|         formats = [] | ||||
|         for format_key, format_dict in item['playlist'][0].items(): | ||||
|             src = format_dict.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             format_id = format_dict.get('format') or format_key | ||||
|             if duration is None: | ||||
|                 duration = format_dict.get('duration') | ||||
|             if '.f4m' in src: | ||||
|                 formats.extend(self._extract_f4m_formats(src, video_id, preference=-1)) | ||||
|             elif src.endswith('.m3u8'): | ||||
|                 formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': src, | ||||
|                     'format_id': format_id, | ||||
|                 }) | ||||
|         media_url_hls = video.get('media_url_hls') | ||||
|         if media_url_hls: | ||||
|             formats.extend(self._extract_m3u8_formats( | ||||
|                 media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', | ||||
|                 m3u8_id='hls', fatal=False)) | ||||
|         media_url = video.get('media_url') | ||||
|         if media_url: | ||||
|             f = { | ||||
|                 'url': media_url, | ||||
|                 'format_id': 'http', | ||||
|             } | ||||
|             # m3u8 format always matches the http format, let's copy metadata from | ||||
|             # one to another | ||||
|             m3u8_formats = list(filter( | ||||
|                 lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple', | ||||
|                 formats)) | ||||
|             if len(m3u8_formats) == 1: | ||||
|                 f_copy = m3u8_formats[0].copy() | ||||
|                 f_copy.update(f) | ||||
|                 f_copy['protocol'] = 'http' | ||||
|                 f = f_copy | ||||
|             formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         description = video.get('description') | ||||
|         thumbnail = video.get('thumb') | ||||
|         duration = int_or_none(video.get('media_duration')) | ||||
|         like_count = int_or_none(video.get('likes')) | ||||
|         view_count = int_or_none(video.get('views')) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'like_count': like_count, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class R7ArticleIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015', | ||||
|         'only_matching': True, | ||||
|     } | ||||
|  | ||||
|     @classmethod | ||||
|     def suitable(cls, url): | ||||
|         return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         display_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key()) | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import( | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     str_to_int, | ||||
| ) | ||||
|   | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/sportschau.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/sportschau.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .wdr import WDRBaseIE | ||||
| from ..utils import get_element_by_attribute | ||||
|  | ||||
|  | ||||
| class SportschauIE(WDRBaseIE): | ||||
|     IE_NAME = 'Sportschau' | ||||
|     _VALID_URL = r'https?://(?:www\.)?sportschau\.de/(?:[^/]+/)+video-?(?P<id>[^/#?]+)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.sportschau.de/uefaeuro2016/videos/video-dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100.html', | ||||
|         'info_dict': { | ||||
|             'id': 'mdb-1140188', | ||||
|             'display_id': 'dfb-team-geht-gut-gelaunt-ins-spiel-gegen-polen-100', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'DFB-Team geht gut gelaunt ins Spiel gegen Polen', | ||||
|             'description': 'Vor dem zweiten Gruppenspiel gegen Polen herrscht gute Stimmung im deutschen Team. Insbesondere Bastian Schweinsteiger strotzt vor Optimismus nach seinem Tor gegen die Ukraine.', | ||||
|             'upload_date': '20160615', | ||||
|         }, | ||||
|         'skip': 'Geo-restricted to Germany', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = get_element_by_attribute('class', 'headline', webpage) | ||||
|         description = self._html_search_meta('description', webpage, 'description') | ||||
|  | ||||
|         info = self._extract_wdr_video(webpage, video_id) | ||||
|  | ||||
|         info.update({ | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|         }) | ||||
|  | ||||
|         return info | ||||
| @@ -6,7 +6,6 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     sanitized_Request, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
|  | ||||
| @@ -45,20 +44,26 @@ class StreamcloudIE(InfoExtractor): | ||||
|             (?:id="[^"]+"\s+)? | ||||
|             value="([^"]*)" | ||||
|             ''', orig_webpage) | ||||
|         post = urlencode_postdata(fields) | ||||
|  | ||||
|         self._sleep(12, video_id) | ||||
|         headers = { | ||||
|             b'Content-Type': b'application/x-www-form-urlencoded', | ||||
|         } | ||||
|         req = sanitized_Request(url, post, headers) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             req, video_id, note='Downloading video page ...') | ||||
|         title = self._html_search_regex( | ||||
|             r'<h1[^>]*>([^<]+)<', webpage, 'title') | ||||
|         video_url = self._search_regex( | ||||
|             r'file:\s*"([^"]+)"', webpage, 'video URL') | ||||
|             url, video_id, data=urlencode_postdata(fields), headers={ | ||||
|                 b'Content-Type': b'application/x-www-form-urlencoded', | ||||
|             }) | ||||
|  | ||||
|         try: | ||||
|             title = self._html_search_regex( | ||||
|                 r'<h1[^>]*>([^<]+)<', webpage, 'title') | ||||
|             video_url = self._search_regex( | ||||
|                 r'file:\s*"([^"]+)"', webpage, 'video URL') | ||||
|         except ExtractorError: | ||||
|             message = self._html_search_regex( | ||||
|                 r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>', | ||||
|                 webpage, 'message', default=None, group='message') | ||||
|             if message: | ||||
|                 raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) | ||||
|             raise | ||||
|         thumbnail = self._search_regex( | ||||
|             r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False) | ||||
|  | ||||
|   | ||||
| @@ -6,17 +6,14 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     dict_get, | ||||
|     int_or_none, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SVTBaseIE(InfoExtractor): | ||||
|     def _extract_video(self, url, video_id): | ||||
|         info = self._download_json(url, video_id) | ||||
|  | ||||
|         title = info['context']['title'] | ||||
|         thumbnail = info['context'].get('thumbnailImage') | ||||
|  | ||||
|         video_info = info['video'] | ||||
|     def _extract_video(self, video_info, video_id): | ||||
|         formats = [] | ||||
|         for vr in video_info['videoReferences']: | ||||
|             player_type = vr.get('playerType') | ||||
| @@ -40,27 +37,49 @@ class SVTBaseIE(InfoExtractor): | ||||
|                     'format_id': player_type, | ||||
|                     'url': vurl, | ||||
|                 }) | ||||
|         if not formats and video_info.get('rights', {}).get('geoBlockedSweden'): | ||||
|             self.raise_geo_restricted('This video is only available in Sweden') | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         subtitle_references = video_info.get('subtitleReferences') | ||||
|         subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences')) | ||||
|         if isinstance(subtitle_references, list): | ||||
|             for sr in subtitle_references: | ||||
|                 subtitle_url = sr.get('url') | ||||
|                 subtitle_lang = sr.get('language', 'sv') | ||||
|                 if subtitle_url: | ||||
|                     subtitles.setdefault('sv', []).append({'url': subtitle_url}) | ||||
|                     if determine_ext(subtitle_url) == 'm3u8': | ||||
|                         # TODO(yan12125): handle WebVTT in m3u8 manifests | ||||
|                         continue | ||||
|  | ||||
|         duration = video_info.get('materialLength') | ||||
|         age_limit = 18 if video_info.get('inappropriateForChildren') else 0 | ||||
|                     subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url}) | ||||
|  | ||||
|         title = video_info.get('title') | ||||
|  | ||||
|         series = video_info.get('programTitle') | ||||
|         season_number = int_or_none(video_info.get('season')) | ||||
|         episode = video_info.get('episodeTitle') | ||||
|         episode_number = int_or_none(video_info.get('episodeNumber')) | ||||
|  | ||||
|         duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration'))) | ||||
|         age_limit = None | ||||
|         adult = dict_get( | ||||
|             video_info, ('inappropriateForChildren', 'blockedForChildren'), | ||||
|             skip_false_values=False) | ||||
|         if adult is not None: | ||||
|             age_limit = 18 if adult else 0 | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'age_limit': age_limit, | ||||
|             'series': series, | ||||
|             'season_number': season_number, | ||||
|             'episode': episode, | ||||
|             'episode_number': episode_number, | ||||
|         } | ||||
|  | ||||
|  | ||||
| @@ -68,11 +87,11 @@ class SVTIE(SVTBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false', | ||||
|         'md5': '9648197555fc1b49e3dc22db4af51d46', | ||||
|         'md5': '33e9a5d8f646523ce0868ecfb0eed77d', | ||||
|         'info_dict': { | ||||
|             'id': '2900353', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Stjärnorna skojar till det - under SVT-intervjun', | ||||
|             'duration': 27, | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
| @@ -89,15 +108,20 @@ class SVTIE(SVTBaseIE): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         widget_id = mobj.group('widget_id') | ||||
|         article_id = mobj.group('id') | ||||
|         return self._extract_video( | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id), | ||||
|             article_id) | ||||
|  | ||||
|         info_dict = self._extract_video(info['video'], article_id) | ||||
|         info_dict['title'] = info['context']['title'] | ||||
|         return info_dict | ||||
|  | ||||
|  | ||||
| class SVTPlayIE(SVTBaseIE): | ||||
|     IE_DESC = 'SVT Play and Öppet arkiv' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?P<host>svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/video/(?P<id>[0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2', | ||||
|         'md5': '2b6704fe4a28801e1a098bbf3c5ac611', | ||||
|         'info_dict': { | ||||
| @@ -113,12 +137,47 @@ class SVTPlayIE(SVTBaseIE): | ||||
|                 }] | ||||
|             }, | ||||
|         }, | ||||
|     } | ||||
|     }, { | ||||
|         # geo restricted to Sweden | ||||
|         'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         host = mobj.group('host') | ||||
|         return self._extract_video( | ||||
|             'http://www.%s.se/video/%s?output=json' % (host, video_id), | ||||
|             video_id) | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         data = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'root\["__svtplay"\]\s*=\s*([^;]+);', | ||||
|                 webpage, 'embedded data', default='{}'), | ||||
|             video_id, fatal=False) | ||||
|  | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         if data: | ||||
|             video_info = try_get( | ||||
|                 data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'], | ||||
|                 dict) | ||||
|             if video_info: | ||||
|                 info_dict = self._extract_video(video_info, video_id) | ||||
|                 info_dict.update({ | ||||
|                     'title': data['context']['dispatcher']['stores']['MetaStore']['title'], | ||||
|                     'thumbnail': thumbnail, | ||||
|                 }) | ||||
|                 return info_dict | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)', | ||||
|             webpage, 'video id', default=None) | ||||
|  | ||||
|         if video_id: | ||||
|             data = self._download_json( | ||||
|                 'http://www.svt.se/videoplayer-api/video/%s' % video_id, video_id) | ||||
|             info_dict = self._extract_video(data, video_id) | ||||
|             if not info_dict.get('title'): | ||||
|                 info_dict['title'] = re.sub( | ||||
|                     r'\s*\|\s*.+?$', '', | ||||
|                     info_dict.get('episode') or self._og_search_title(webpage)) | ||||
|             return info_dict | ||||
|   | ||||
| @@ -277,9 +277,9 @@ class ThePlatformIE(ThePlatformBaseIE): | ||||
|  | ||||
|  | ||||
| class ThePlatformFeedIE(ThePlatformBaseIE): | ||||
|     _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&byGuid=%s' | ||||
|     _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*byGuid=(?P<id>[a-zA-Z0-9_]+)' | ||||
|     _TEST = { | ||||
|     _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s' | ||||
|     _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))' | ||||
|     _TESTS = [{ | ||||
|         # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207 | ||||
|         'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207', | ||||
|         'md5': '6e32495b5073ab414471b615c5ded394', | ||||
| @@ -295,32 +295,38 @@ class ThePlatformFeedIE(ThePlatformBaseIE): | ||||
|             'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'], | ||||
|             'uploader': 'NBCU-NEWS', | ||||
|         }, | ||||
|     } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         provider_id = mobj.group('provider_id') | ||||
|         feed_id = mobj.group('feed_id') | ||||
|  | ||||
|         real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, video_id) | ||||
|         feed = self._download_json(real_url, video_id) | ||||
|         entry = feed['entries'][0] | ||||
|     def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}): | ||||
|         real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query) | ||||
|         entry = self._download_json(real_url, video_id)['entries'][0] | ||||
|  | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|         first_video_id = None | ||||
|         duration = None | ||||
|         asset_types = [] | ||||
|         for item in entry['media$content']: | ||||
|             smil_url = item['plfile$url'] + '&mbr=true' | ||||
|             smil_url = item['plfile$url'] | ||||
|             cur_video_id = ThePlatformIE._match_id(smil_url) | ||||
|             if first_video_id is None: | ||||
|                 first_video_id = cur_video_id | ||||
|                 duration = float_or_none(item.get('plfile$duration')) | ||||
|             cur_formats, cur_subtitles = self._extract_theplatform_smil(smil_url, video_id, 'Downloading SMIL data for %s' % cur_video_id) | ||||
|             formats.extend(cur_formats) | ||||
|             subtitles = self._merge_subtitles(subtitles, cur_subtitles) | ||||
|             for asset_type in item['plfile$assetTypes']: | ||||
|                 if asset_type in asset_types: | ||||
|                     continue | ||||
|                 asset_types.append(asset_type) | ||||
|                 query = { | ||||
|                     'mbr': 'true', | ||||
|                     'formats': item['plfile$format'], | ||||
|                     'assetTypes': asset_type, | ||||
|                 } | ||||
|                 if asset_type in asset_types_query: | ||||
|                     query.update(asset_types_query[asset_type]) | ||||
|                 cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query( | ||||
|                     smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type) | ||||
|                 formats.extend(cur_formats) | ||||
|                 subtitles = self._merge_subtitles(subtitles, cur_subtitles) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
| @@ -344,5 +350,17 @@ class ThePlatformFeedIE(ThePlatformBaseIE): | ||||
|             'timestamp': timestamp, | ||||
|             'categories': categories, | ||||
|         }) | ||||
|         if custom_fields: | ||||
|             ret.update(custom_fields(entry)) | ||||
|  | ||||
|         return ret | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         provider_id = mobj.group('provider_id') | ||||
|         feed_id = mobj.group('feed_id') | ||||
|         filter_query = mobj.group('filter') | ||||
|  | ||||
|         return self._extract_feed_info(provider_id, feed_id, filter_query, video_id) | ||||
|   | ||||
| @@ -8,6 +8,7 @@ import itertools | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_str, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
| @@ -24,6 +25,7 @@ from ..utils import ( | ||||
|     urlencode_postdata, | ||||
|     unescapeHTML, | ||||
|     parse_filesize, | ||||
|     try_get, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -144,7 +146,7 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|                             \. | ||||
|                         )? | ||||
|                         vimeo(?P<pro>pro)?\.com/ | ||||
|                         (?!channels/[^/?#]+/?(?:$|[?#])|[^/]+/review/|(?:album|ondemand)/) | ||||
|                         (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) | ||||
|                         (?:.*?/)? | ||||
|                         (?: | ||||
|                             (?: | ||||
| @@ -225,8 +227,6 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|         { | ||||
|             'url': 'http://vimeo.com/channels/keypeele/75629013', | ||||
|             'md5': '2f86a05afe9d7abc0b9126d229bbe15d', | ||||
|             'note': 'Video is freely available via original URL ' | ||||
|                     'and protected with password when accessed via http://vimeo.com/75629013', | ||||
|             'info_dict': { | ||||
|                 'id': '75629013', | ||||
|                 'ext': 'mp4', | ||||
| @@ -270,7 +270,7 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|         { | ||||
|             # contains original format | ||||
|             'url': 'https://vimeo.com/33951933', | ||||
|             'md5': '53c688fa95a55bf4b7293d37a89c5c53', | ||||
|             'md5': '2d9f5475e0537f013d0073e812ab89e6', | ||||
|             'info_dict': { | ||||
|                 'id': '33951933', | ||||
|                 'ext': 'mp4', | ||||
| @@ -282,6 +282,29 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|                 'description': 'md5:ae23671e82d05415868f7ad1aec21147', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # only available via https://vimeo.com/channels/tributes/6213729 and | ||||
|             # not via https://vimeo.com/6213729 | ||||
|             'url': 'https://vimeo.com/channels/tributes/6213729', | ||||
|             'info_dict': { | ||||
|                 'id': '6213729', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Vimeo Tribute: The Shining', | ||||
|                 'uploader': 'Casey Donahue', | ||||
|                 'uploader_url': 're:https?://(?:www\.)?vimeo\.com/caseydonahue', | ||||
|                 'uploader_id': 'caseydonahue', | ||||
|                 'upload_date': '20090821', | ||||
|                 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'expected_warnings': ['Unable to download JSON metadata'], | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://vimeo.com/109815029', | ||||
|             'note': 'Video not completely processed, "failed" seed status', | ||||
| @@ -291,6 +314,10 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|             'url': 'https://vimeo.com/groups/travelhd/videos/22439234', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://vimeo.com/album/2632481/video/79010983', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             # source file returns 403: Forbidden | ||||
|             'url': 'https://vimeo.com/7809605', | ||||
| @@ -367,7 +394,7 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|         orig_url = url | ||||
|         if mobj.group('pro') or mobj.group('player'): | ||||
|             url = 'https://player.vimeo.com/video/' + video_id | ||||
|         else: | ||||
|         elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): | ||||
|             url = 'https://vimeo.com/' + video_id | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
| @@ -445,7 +472,18 @@ class VimeoIE(VimeoBaseInfoExtractor): | ||||
|             if config.get('view') == 4: | ||||
|                 config = self._verify_player_video_password(url, video_id) | ||||
|  | ||||
|         if '>You rented this title.<' in webpage: | ||||
|         def is_rented(): | ||||
|             if '>You rented this title.<' in webpage: | ||||
|                 return True | ||||
|             if config.get('user', {}).get('purchased'): | ||||
|                 return True | ||||
|             label = try_get( | ||||
|                 config, lambda x: x['video']['vod']['purchase_options'][0]['label_string'], compat_str) | ||||
|             if label and label.startswith('You rented this'): | ||||
|                 return True | ||||
|             return False | ||||
|  | ||||
|         if is_rented(): | ||||
|             feature_id = config.get('video', {}).get('vod', {}).get('feature_id') | ||||
|             if feature_id and not data.get('force_feature_id', False): | ||||
|                 return self.url_result(smuggle_url( | ||||
| @@ -617,8 +655,21 @@ class VimeoChannelIE(VimeoBaseInfoExtractor): | ||||
|                 webpage = self._login_list_password(page_url, list_id, webpage) | ||||
|                 yield self._extract_list_title(webpage) | ||||
|  | ||||
|             for video_id in re.findall(r'id="clip_(\d+?)"', webpage): | ||||
|                 yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo') | ||||
|             # Try extracting href first since not all videos are available via | ||||
|             # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729) | ||||
|             clips = re.findall( | ||||
|                 r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage) | ||||
|             if clips: | ||||
|                 for video_id, video_url in clips: | ||||
|                     yield self.url_result( | ||||
|                         compat_urlparse.urljoin(base_url, video_url), | ||||
|                         VimeoIE.ie_key(), video_id=video_id) | ||||
|             # More relaxed fallback | ||||
|             else: | ||||
|                 for video_id in re.findall(r'id=["\']clip_(\d+)', webpage): | ||||
|                     yield self.url_result( | ||||
|                         'https://vimeo.com/%s' % video_id, | ||||
|                         VimeoIE.ie_key(), video_id=video_id) | ||||
|  | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: | ||||
|                 break | ||||
| @@ -655,7 +706,7 @@ class VimeoUserIE(VimeoChannelIE): | ||||
|  | ||||
| class VimeoAlbumIE(VimeoChannelIE): | ||||
|     IE_NAME = 'vimeo:album' | ||||
|     _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)' | ||||
|     _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))' | ||||
|     _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://vimeo.com/album/2632481', | ||||
| @@ -675,6 +726,13 @@ class VimeoAlbumIE(VimeoChannelIE): | ||||
|         'params': { | ||||
|             'videopassword': 'youtube-dl', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         # TODO: respect page number | ||||
|         'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _page_url(self, base_url, pagenum): | ||||
|   | ||||
| @@ -24,6 +24,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20130519', | ||||
|             'uploader': 'Jack Dorsey', | ||||
|             'uploader_id': '76', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -39,6 +40,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20140815', | ||||
|             'uploader': 'Mars Ruiz', | ||||
|             'uploader_id': '1102363502380728320', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -54,6 +56,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20130430', | ||||
|             'uploader': 'Z3k3', | ||||
|             'uploader_id': '936470460173008896', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -71,6 +74,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': '20150705', | ||||
|             'uploader': 'Pimry_zaa', | ||||
|             'uploader_id': '1135760698325307392', | ||||
|             'view_count': int, | ||||
|             'like_count': int, | ||||
|             'comment_count': int, | ||||
|             'repost_count': int, | ||||
| @@ -109,6 +113,7 @@ class VineIE(InfoExtractor): | ||||
|             'upload_date': unified_strdate(data.get('created')), | ||||
|             'uploader': username, | ||||
|             'uploader_id': data.get('userIdStr'), | ||||
|             'view_count': int_or_none(data.get('loops', {}).get('count')), | ||||
|             'like_count': int_or_none(data.get('likes', {}).get('count')), | ||||
|             'comment_count': int_or_none(data.get('comments', {}).get('count')), | ||||
|             'repost_count': int_or_none(data.get('reposts', {}).get('count')), | ||||
|   | ||||
| @@ -3,6 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import sys | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_str | ||||
| @@ -10,7 +11,6 @@ from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     orderedSet, | ||||
|     sanitized_Request, | ||||
|     str_to_int, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
| @@ -190,7 +190,7 @@ class VKIE(InfoExtractor): | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|         login_page, url_handle = self._download_webpage_handle( | ||||
|             'https://vk.com', None, 'Downloading login page') | ||||
|  | ||||
|         login_form = self._hidden_inputs(login_page) | ||||
| @@ -200,11 +200,26 @@ class VKIE(InfoExtractor): | ||||
|             'pass': password.encode('cp1251'), | ||||
|         }) | ||||
|  | ||||
|         request = sanitized_Request( | ||||
|             'https://login.vk.com/?act=login', | ||||
|             urlencode_postdata(login_form)) | ||||
|         # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header | ||||
|         # and expects the first one to be set rather than second (see | ||||
|         # https://github.com/rg3/youtube-dl/issues/9841#issuecomment-227871201). | ||||
|         # As of RFC6265 the newer one cookie should be set into cookie store | ||||
|         # what actually happens. | ||||
|         # We will workaround this VK issue by resetting the remixlhk cookie to | ||||
|         # the first one manually. | ||||
|         cookies = url_handle.headers.get('Set-Cookie') | ||||
|         if sys.version_info[0] >= 3: | ||||
|             cookies = cookies.encode('iso-8859-1') | ||||
|         cookies = cookies.decode('utf-8') | ||||
|         remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies) | ||||
|         if remixlhk: | ||||
|             value, domain = remixlhk.groups() | ||||
|             self._set_cookie(domain, 'remixlhk', value) | ||||
|  | ||||
|         login_page = self._download_webpage( | ||||
|             request, None, note='Logging in as %s' % username) | ||||
|             'https://login.vk.com/?act=login', None, | ||||
|             note='Logging in as %s' % username, | ||||
|             data=urlencode_postdata(login_form)) | ||||
|  | ||||
|         if re.search(r'onLoginFailed', login_page): | ||||
|             raise ExtractorError( | ||||
|   | ||||
| @@ -15,7 +15,87 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class WDRIE(InfoExtractor): | ||||
| class WDRBaseIE(InfoExtractor): | ||||
|     def _extract_wdr_video(self, webpage, display_id): | ||||
|         # for wdr.de the data-extension is in a tag with the class "mediaLink" | ||||
|         # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" | ||||
|         # for wdrmaus its in a link to the page in a multiline "videoLink"-tag | ||||
|         json_metadata = self._html_search_regex( | ||||
|             r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', | ||||
|             webpage, 'media link', default=None, flags=re.MULTILINE) | ||||
|  | ||||
|         if not json_metadata: | ||||
|             return | ||||
|  | ||||
|         media_link_obj = self._parse_json(json_metadata, display_id, | ||||
|                                           transform_source=js_to_json) | ||||
|         jsonp_url = media_link_obj['mediaObj']['url'] | ||||
|  | ||||
|         metadata = self._download_json( | ||||
|             jsonp_url, 'metadata', transform_source=strip_jsonp) | ||||
|  | ||||
|         metadata_tracker_data = metadata['trackerData'] | ||||
|         metadata_media_resource = metadata['mediaResource'] | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         # check if the metadata contains a direct URL to a file | ||||
|         for kind, media_resource in metadata_media_resource.items(): | ||||
|             if kind not in ('dflt', 'alt'): | ||||
|                 continue | ||||
|  | ||||
|             for tag_name, medium_url in media_resource.items(): | ||||
|                 if tag_name not in ('videoURL', 'audioURL'): | ||||
|                     continue | ||||
|  | ||||
|                 ext = determine_ext(medium_url) | ||||
|                 if ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         medium_url, display_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id='hls')) | ||||
|                 elif ext == 'f4m': | ||||
|                     manifest_url = update_url_query( | ||||
|                         medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         manifest_url, display_id, f4m_id='hds', fatal=False)) | ||||
|                 elif ext == 'smil': | ||||
|                     formats.extend(self._extract_smil_formats( | ||||
|                         medium_url, 'stream', fatal=False)) | ||||
|                 else: | ||||
|                     a_format = { | ||||
|                         'url': medium_url | ||||
|                     } | ||||
|                     if ext == 'unknown_video': | ||||
|                         urlh = self._request_webpage( | ||||
|                             medium_url, display_id, note='Determining extension') | ||||
|                         ext = urlhandle_detect_ext(urlh) | ||||
|                         a_format['ext'] = ext | ||||
|                     formats.append(a_format) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         caption_url = metadata_media_resource.get('captionURL') | ||||
|         if caption_url: | ||||
|             subtitles['de'] = [{ | ||||
|                 'url': caption_url, | ||||
|                 'ext': 'ttml', | ||||
|             }] | ||||
|  | ||||
|         title = metadata_tracker_data['trackerClipTitle'] | ||||
|  | ||||
|         return { | ||||
|             'id': metadata_tracker_data.get('trackerClipId', display_id), | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'alt_title': metadata_tracker_data.get('trackerClipSubcategory'), | ||||
|             'formats': formats, | ||||
|             'subtitles': subtitles, | ||||
|             'upload_date': unified_strdate(metadata_tracker_data.get('trackerClipAirTime')), | ||||
|         } | ||||
|  | ||||
|  | ||||
| class WDRIE(WDRBaseIE): | ||||
|     _CURRENT_MAUS_URL = r'https?://(?:www\.)wdrmaus.de/(?:[^/]+/){1,2}[^/?#]+\.php5' | ||||
|     _PAGE_REGEX = r'/(?:mediathek/)?[^/]+/(?P<type>[^/]+)/(?P<display_id>.+)\.html' | ||||
|     _VALID_URL = r'(?P<page_url>https?://(?:www\d\.)?wdr\d?\.de)' + _PAGE_REGEX + '|' + _CURRENT_MAUS_URL | ||||
| @@ -91,10 +171,10 @@ class WDRIE(InfoExtractor): | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/achterbahn.php5', | ||||
|             # HDS download, MD5 is unstable | ||||
|             'md5': '803138901f6368ee497b4d195bb164f2', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-186083', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20130919', | ||||
|                 'title': 'Sachgeschichte - Achterbahn ', | ||||
|                 'description': '- Die Sendung mit der Maus -', | ||||
| @@ -120,14 +200,9 @@ class WDRIE(InfoExtractor): | ||||
|         display_id = mobj.group('display_id') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         # for wdr.de the data-extension is in a tag with the class "mediaLink" | ||||
|         # for wdr.de radio players, in a tag with the class "wdrrPlayerPlayBtn" | ||||
|         # for wdrmaus its in a link to the page in a multiline "videoLink"-tag | ||||
|         json_metadata = self._html_search_regex( | ||||
|             r'class=(?:"(?:mediaLink|wdrrPlayerPlayBtn)\b[^"]*"[^>]+|"videoLink\b[^"]*"[\s]*>\n[^\n]*)data-extension="([^"]+)"', | ||||
|             webpage, 'media link', default=None, flags=re.MULTILINE) | ||||
|         info_dict = self._extract_wdr_video(webpage, display_id) | ||||
|  | ||||
|         if not json_metadata: | ||||
|         if not info_dict: | ||||
|             entries = [ | ||||
|                 self.url_result(page_url + href[0], 'WDR') | ||||
|                 for href in re.findall( | ||||
| @@ -140,86 +215,22 @@ class WDRIE(InfoExtractor): | ||||
|  | ||||
|             raise ExtractorError('No downloadable streams found', expected=True) | ||||
|  | ||||
|         media_link_obj = self._parse_json(json_metadata, display_id, | ||||
|                                           transform_source=js_to_json) | ||||
|         jsonp_url = media_link_obj['mediaObj']['url'] | ||||
|  | ||||
|         metadata = self._download_json( | ||||
|             jsonp_url, 'metadata', transform_source=strip_jsonp) | ||||
|  | ||||
|         metadata_tracker_data = metadata['trackerData'] | ||||
|         metadata_media_resource = metadata['mediaResource'] | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         # check if the metadata contains a direct URL to a file | ||||
|         for kind, media_resource in metadata_media_resource.items(): | ||||
|             if kind not in ('dflt', 'alt'): | ||||
|                 continue | ||||
|  | ||||
|             for tag_name, medium_url in media_resource.items(): | ||||
|                 if tag_name not in ('videoURL', 'audioURL'): | ||||
|                     continue | ||||
|  | ||||
|                 ext = determine_ext(medium_url) | ||||
|                 if ext == 'm3u8': | ||||
|                     formats.extend(self._extract_m3u8_formats( | ||||
|                         medium_url, display_id, 'mp4', 'm3u8_native', | ||||
|                         m3u8_id='hls')) | ||||
|                 elif ext == 'f4m': | ||||
|                     manifest_url = update_url_query( | ||||
|                         medium_url, {'hdcore': '3.2.0', 'plugin': 'aasp-3.2.0.77.18'}) | ||||
|                     formats.extend(self._extract_f4m_formats( | ||||
|                         manifest_url, display_id, f4m_id='hds', fatal=False)) | ||||
|                 elif ext == 'smil': | ||||
|                     formats.extend(self._extract_smil_formats( | ||||
|                         medium_url, 'stream', fatal=False)) | ||||
|                 else: | ||||
|                     a_format = { | ||||
|                         'url': medium_url | ||||
|                     } | ||||
|                     if ext == 'unknown_video': | ||||
|                         urlh = self._request_webpage( | ||||
|                             medium_url, display_id, note='Determining extension') | ||||
|                         ext = urlhandle_detect_ext(urlh) | ||||
|                         a_format['ext'] = ext | ||||
|                     formats.append(a_format) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         subtitles = {} | ||||
|         caption_url = metadata_media_resource.get('captionURL') | ||||
|         if caption_url: | ||||
|             subtitles['de'] = [{ | ||||
|                 'url': caption_url, | ||||
|                 'ext': 'ttml', | ||||
|             }] | ||||
|  | ||||
|         title = metadata_tracker_data.get('trackerClipTitle') | ||||
|         is_live = url_type == 'live' | ||||
|  | ||||
|         if is_live: | ||||
|             title = self._live_title(title) | ||||
|             upload_date = None | ||||
|         elif 'trackerClipAirTime' in metadata_tracker_data: | ||||
|             upload_date = metadata_tracker_data['trackerClipAirTime'] | ||||
|         else: | ||||
|             upload_date = self._html_search_meta('DC.Date', webpage, 'upload date') | ||||
|             info_dict.update({ | ||||
|                 'title': self._live_title(info_dict['title']), | ||||
|                 'upload_date': None, | ||||
|             }) | ||||
|         elif 'upload_date' not in info_dict: | ||||
|             info_dict['upload_date'] = unified_strdate(self._html_search_meta('DC.Date', webpage, 'upload date')) | ||||
|  | ||||
|         if upload_date: | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         return { | ||||
|             'id': metadata_tracker_data.get('trackerClipId', display_id), | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'alt_title': metadata_tracker_data.get('trackerClipSubcategory'), | ||||
|             'formats': formats, | ||||
|             'upload_date': upload_date, | ||||
|         info_dict.update({ | ||||
|             'description': self._html_search_meta('Description', webpage), | ||||
|             'is_live': is_live, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|         }) | ||||
|  | ||||
|         return info_dict | ||||
|  | ||||
|  | ||||
| class WDRMobileIE(InfoExtractor): | ||||
|   | ||||
| @@ -1,29 +1,33 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| from .jwplatform import JWPlatformBaseIE | ||||
|  | ||||
|  | ||||
| class WimpIE(InfoExtractor): | ||||
| class WimpIE(JWPlatformBaseIE): | ||||
|     _VALID_URL = r'https?://(?:www\.)?wimp\.com/(?P<id>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.wimp.com/maruexhausted/', | ||||
|         'url': 'http://www.wimp.com/maru-is-exhausted/', | ||||
|         'md5': 'ee21217ffd66d058e8b16be340b74883', | ||||
|         'info_dict': { | ||||
|             'id': 'maruexhausted', | ||||
|             'id': 'maru-is-exhausted', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Maru is exhausted.', | ||||
|             'description': 'md5:57e099e857c0a4ea312542b684a869b8', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.wimp.com/clowncar/', | ||||
|         'md5': '4e2986c793694b55b37cf92521d12bb4', | ||||
|         'md5': '5c31ad862a90dc5b1f023956faec13fe', | ||||
|         'info_dict': { | ||||
|             'id': 'clowncar', | ||||
|             'id': 'cG4CEr2aiSg', | ||||
|             'ext': 'webm', | ||||
|             'title': 'It\'s like a clown car.', | ||||
|             'description': 'md5:0e56db1370a6e49c5c1d19124c0d2fb2', | ||||
|             'title': 'Basset hound clown car...incredible!', | ||||
|             'description': '5 of my Bassets crawled in this dog loo! www.bellinghambassets.com\n\nFor licensing/usage please contact: licensing(at)jukinmediadotcom', | ||||
|             'upload_date': '20140303', | ||||
|             'uploader': 'Gretchen Hoey', | ||||
|             'uploader_id': 'gretchenandjeff1', | ||||
|         }, | ||||
|         'add_ie': ['Youtube'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -41,14 +45,13 @@ class WimpIE(InfoExtractor): | ||||
|                 'ie_key': YoutubeIE.ie_key(), | ||||
|             } | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'<video[^>]+>\s*<source[^>]+src=(["\'])(?P<url>.+?)\1', | ||||
|             webpage, 'video URL', group='url') | ||||
|         info_dict = self._extract_jwplayer_data( | ||||
|             webpage, video_id, require_title=False) | ||||
|  | ||||
|         return { | ||||
|         info_dict.update({ | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
|         }) | ||||
|  | ||||
|         return info_dict | ||||
|   | ||||
| @@ -5,6 +5,7 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     remove_start, | ||||
| @@ -27,16 +28,17 @@ class WrzutaIE(InfoExtractor): | ||||
|             'uploader_id': 'laboratoriumdextera', | ||||
|             'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd', | ||||
|         }, | ||||
|         'skip': 'Redirected to wrzuta.pl', | ||||
|     }, { | ||||
|         'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty', | ||||
|         'md5': 'bc78077859bea7bcfe4295d7d7fc9025', | ||||
|         'url': 'http://vexling.wrzuta.pl/audio/01xBFabGXu6/james_horner_-_into_the_na_39_vi_world_bonus', | ||||
|         'md5': 'f80564fb5a2ec6ec59705ae2bf2ba56d', | ||||
|         'info_dict': { | ||||
|             'id': '063jOPX5ue2', | ||||
|             'ext': 'ogg', | ||||
|             'title': 'Liber & Natalia Szroeder - Teraz Ty', | ||||
|             'duration': 203, | ||||
|             'uploader_id': 'jolka85', | ||||
|             'description': 'md5:2d2b6340f9188c8c4cd891580e481096', | ||||
|             'id': '01xBFabGXu6', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'James Horner - Into The Na\'vi World [Bonus]', | ||||
|             'description': 'md5:30a70718b2cd9df3120fce4445b0263b', | ||||
|             'duration': 95, | ||||
|             'uploader_id': 'vexling', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
| @@ -46,7 +48,10 @@ class WrzutaIE(InfoExtractor): | ||||
|         typ = mobj.group('typ') | ||||
|         uploader = mobj.group('uploader') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         webpage, urlh = self._download_webpage_handle(url, video_id) | ||||
|  | ||||
|         if urlh.geturl() == 'http://www.wrzuta.pl/': | ||||
|             raise ExtractorError('Video removed', expected=True) | ||||
|  | ||||
|         quality = qualities(['SD', 'MQ', 'HQ', 'HD']) | ||||
|  | ||||
|   | ||||
| @@ -6,17 +6,23 @@ from ..compat import compat_urllib_parse_unquote | ||||
|  | ||||
|  | ||||
| class XNXXIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:video|www)\.xnxx\.com/video(?P<id>[0-9]+)/(.*)' | ||||
|     _TEST = { | ||||
|         'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||
|         'md5': '0831677e2b4761795f68d417e0b7b445', | ||||
|     _VALID_URL = r'https?://(?:video|www)\.xnxx\.com/video-?(?P<id>[0-9a-z]+)/' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.xnxx.com/video-55awb78/skyrim_test_video', | ||||
|         'md5': 'ef7ecee5af78f8b03dca2cf31341d3a0', | ||||
|         'info_dict': { | ||||
|             'id': '1135332', | ||||
|             'id': '55awb78', | ||||
|             'ext': 'flv', | ||||
|             'title': 'lida » Naked Funny Actress  (5)', | ||||
|             'title': 'Skyrim Test Video', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.xnxx.com/video-55awb78/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|   | ||||
| @@ -1970,7 +1970,7 @@ def js_to_json(code): | ||||
|         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| | ||||
|         /\*.*?\*/|,(?=\s*[\]}])| | ||||
|         [a-zA-Z_][.a-zA-Z_0-9]*| | ||||
|         (?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| | ||||
|         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:\s*:)?| | ||||
|         [0-9]+(?=\s*:) | ||||
|         ''', fix_kv, code) | ||||
|  | ||||
| @@ -2852,3 +2852,12 @@ def decode_packed_codes(code): | ||||
|     return re.sub( | ||||
|         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)], | ||||
|         obfucasted_code) | ||||
|  | ||||
|  | ||||
| def parse_m3u8_attributes(attrib): | ||||
|     info = {} | ||||
|     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib): | ||||
|         if val.startswith('"'): | ||||
|             val = val[1:-1] | ||||
|         info[key] = val | ||||
|     return info | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __version__ = '2016.06.14' | ||||
| __version__ = '2016.06.23' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user