mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			66 Commits
		
	
	
		
			2015.02.10
			...
			2015.02.16
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | a568180441 | ||
|  | 85e80f71cd | ||
|  | bfa6bdcd8b | ||
|  | 03cd72b007 | ||
|  | 5bfd430f81 | ||
|  | 73fac4e911 | ||
|  | 8fb474fb17 | ||
|  | f813928e4b | ||
|  | b9c7a97318 | ||
|  | 9fb2f1cd6d | ||
|  | 6ca7732d5e | ||
|  | b0ab0fac49 | ||
|  | a294bce82f | ||
|  | 76d1466b08 | ||
|  | 1888d3f7b3 | ||
|  | c2787701cc | ||
|  | 52e1d0ccc4 | ||
|  | 10e3c4c221 | ||
|  | 68f2d273bf | ||
|  | 7c86c21662 | ||
|  | ae1580d790 | ||
|  | 3215c50f25 | ||
|  | 36f73e8044 | ||
|  | a4f3d779db | ||
|  | d9aa2b784d | ||
|  | cffcbc02de | ||
|  | 9347fddbfc | ||
|  | 037e9437e4 | ||
|  | 36e7a4ca2e | ||
|  | ae6423d704 | ||
|  | 7105440cec | ||
|  | c80b9cd280 | ||
|  | 171ca612af | ||
|  | c3d64fc1b3 | ||
|  | 7c24ce225d | ||
|  | 08b38d5401 | ||
|  | 024c53694d | ||
|  | 7e6011101f | ||
|  | c40feaba77 | ||
|  | 5277f09dfc | ||
|  | 2d30521ab9 | ||
|  | 050fa43561 | ||
|  | f36f92f4da | ||
|  | 124f3bc67d | ||
|  | d304209a85 | ||
|  | 8367d3f3cb | ||
|  | c56d7d899d | ||
|  | ea5db8469e | ||
|  | 3811c567e7 | ||
|  | 8708d76425 | ||
|  | 054fe3cc40 | ||
|  | af0d11f244 | ||
|  | 9650885be9 | ||
|  | 596ac6e31f | ||
|  | 612ee37365 | ||
|  | 442c37b7a9 | ||
|  | 04bbe41330 | ||
|  | 8f84f57183 | ||
|  | 6a78740211 | ||
|  | c0e1a415fd | ||
|  | bf8f082a90 | ||
|  | 2f543a2142 | ||
|  | 7e5db8c930 | ||
|  | f7a211dcc8 | ||
|  | 845734773d | ||
|  | 347de4931c | 
							
								
								
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -110,3 +110,4 @@ Shaya Goldberg | ||||
| Paul Hartmann | ||||
| Frans de Jonge | ||||
| Robin de Rooij | ||||
| Ryan Schmidt | ||||
|   | ||||
							
								
								
									
										21
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										21
									
								
								README.md
									
									
									
									
									
								
							| @@ -77,6 +77,7 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      on Windows) | ||||
|     --flat-playlist                  Do not extract the videos of a playlist, | ||||
|                                      only list them. | ||||
|     --no-color                       Do not emit color codes in output. | ||||
|  | ||||
| ## Network Options: | ||||
|     --proxy URL                      Use the specified HTTP/HTTPS proxy. Pass in | ||||
| @@ -119,6 +120,23 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      COUNT views | ||||
|     --max-views COUNT                Do not download any videos with more than | ||||
|                                      COUNT views | ||||
|     --match-filter FILTER            (Experimental) Generic video filter. | ||||
|                                      Specify any key (see help for -o for a list | ||||
|                                      of available keys) to match if the key is | ||||
|                                      present, !key to check if the key is not | ||||
|                                      present,key > NUMBER (like "comment_count > | ||||
|                                      12", also works with >=, <, <=, !=, =) to | ||||
|                                      compare against a number, and & to require | ||||
|                                      multiple matches. Values which are not | ||||
|                                      known are excluded unless you put a | ||||
|                                      question mark (?) after the operator.For | ||||
|                                      example, to only match videos that have | ||||
|                                      been liked more than 100 times and disliked | ||||
|                                      less than 50 times (or the dislike | ||||
|                                      functionality is not available at the given | ||||
|                                      service), but who also have a description, | ||||
|                                      use  --match-filter "like_count > 100 & | ||||
|                                      dislike_count <? 50 & description" . | ||||
|     --no-playlist                    If the URL refers to a video and a | ||||
|                                      playlist, download only the video. | ||||
|     --age-limit YEARS                download only videos suitable for the given | ||||
| @@ -379,6 +397,9 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      postprocessors (default) | ||||
|     --prefer-ffmpeg                  Prefer ffmpeg over avconv for running the | ||||
|                                      postprocessors | ||||
|     --ffmpeg-location PATH           Location of the ffmpeg/avconv binary; | ||||
|                                      either the path to the binary or its | ||||
|                                      containing directory. | ||||
|     --exec CMD                       Execute a command on the file after | ||||
|                                      downloading, similar to find's -exec | ||||
|                                      syntax. Example: --exec 'adb push {} | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| # Supported sites | ||||
|  - **1tv**: Первый канал | ||||
|  - **1up.com** | ||||
|  - **220.ro** | ||||
|  - **24video** | ||||
| @@ -60,6 +61,8 @@ | ||||
|  - **Brightcove** | ||||
|  - **BuzzFeed** | ||||
|  - **BYUtv** | ||||
|  - **Camdemy** | ||||
|  - **CamdemyFolder** | ||||
|  - **Canal13cl** | ||||
|  - **canalc2.tv** | ||||
|  - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv | ||||
| @@ -134,7 +137,6 @@ | ||||
|  - **fernsehkritik.tv:postecke** | ||||
|  - **Firedrive** | ||||
|  - **Firstpost** | ||||
|  - **firsttv**: Видеоархив - Первый канал | ||||
|  - **Flickr** | ||||
|  - **Folketinget**: Folketinget (ft.dk; Danish parliament) | ||||
|  - **Foxgay** | ||||
| @@ -174,6 +176,7 @@ | ||||
|  - **Helsinki**: helsinki.fi | ||||
|  - **HentaiStigma** | ||||
|  - **HistoricFilms** | ||||
|  - **History** | ||||
|  - **hitbox** | ||||
|  - **hitbox:live** | ||||
|  - **HornBunny** | ||||
| @@ -225,6 +228,7 @@ | ||||
|  - **mailru**: Видео@Mail.Ru | ||||
|  - **Malemotion** | ||||
|  - **MDR** | ||||
|  - **media.ccc.de** | ||||
|  - **metacafe** | ||||
|  - **Metacritic** | ||||
|  - **Mgoon** | ||||
| @@ -286,6 +290,8 @@ | ||||
|  - **nowvideo**: NowVideo | ||||
|  - **npo.nl** | ||||
|  - **npo.nl:live** | ||||
|  - **npo.nl:radio** | ||||
|  - **npo.nl:radio:fragment** | ||||
|  - **NRK** | ||||
|  - **NRKTV** | ||||
|  - **ntv.ru** | ||||
| @@ -526,6 +532,7 @@ | ||||
|  - **XVideos** | ||||
|  - **XXXYMovies** | ||||
|  - **Yahoo**: Yahoo screen and movies | ||||
|  - **Yam** | ||||
|  - **YesJapan** | ||||
|  - **Ynet** | ||||
|  - **YouJizz** | ||||
|   | ||||
| @@ -3,4 +3,4 @@ universal = True | ||||
|  | ||||
| [flake8] | ||||
| exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,setup.py,build,.git | ||||
| ignore = E501 | ||||
| ignore = E402,E501,E731 | ||||
|   | ||||
| @@ -39,5 +39,6 @@ | ||||
|     "writesubtitles": false, | ||||
|     "allsubtitles": false, | ||||
|     "listssubtitles": false, | ||||
|     "socket_timeout": 20 | ||||
|     "socket_timeout": 20, | ||||
|     "fixup": "never" | ||||
| } | ||||
|   | ||||
| @@ -138,7 +138,7 @@ class TestDailymotionSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 5) | ||||
|         self.assertTrue(len(subtitles.keys()) >= 6) | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
| @@ -247,7 +247,7 @@ class TestVimeoSubtitles(BaseTestSubtitles): | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '26399116d23ae3cf2c087cea94bc43b4') | ||||
|         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') | ||||
|  | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
| @@ -334,7 +334,7 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['cs'])) | ||||
|         self.assertEqual(md5(subtitles['cs']), '9bf52d9549533c32c427e264bf0847d4') | ||||
|         self.assertTrue(len(subtitles['cs']) > 20000) | ||||
|  | ||||
|     def test_nosubtitles(self): | ||||
|         self.DL.expect_warning('video doesn\'t have subtitles') | ||||
|   | ||||
| @@ -53,6 +53,7 @@ from youtube_dl.utils import ( | ||||
|     version_tuple, | ||||
|     xpath_with_ns, | ||||
|     render_table, | ||||
|     match_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -459,6 +460,37 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') | ||||
|             '123  4\n' | ||||
|             '9999 51') | ||||
|  | ||||
|     def test_match_str(self): | ||||
|         self.assertRaises(ValueError, match_str, 'xy>foobar', {}) | ||||
|         self.assertFalse(match_str('xy', {'x': 1200})) | ||||
|         self.assertTrue(match_str('!xy', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x', {'x': 1200})) | ||||
|         self.assertFalse(match_str('!x', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x', {'x': 0})) | ||||
|         self.assertFalse(match_str('x>0', {'x': 0})) | ||||
|         self.assertFalse(match_str('x>0', {})) | ||||
|         self.assertTrue(match_str('x>?0', {})) | ||||
|         self.assertTrue(match_str('x>1K', {'x': 1200})) | ||||
|         self.assertFalse(match_str('x>2K', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) | ||||
|         self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) | ||||
|         self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) | ||||
|         self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) | ||||
|         self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) | ||||
|         self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 90, 'description': 'foo'})) | ||||
|         self.assertTrue(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'description': 'foo'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'dislike_count': 60, 'description': 'foo'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'dislike_count': 10})) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -8,11 +8,11 @@ import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
|  | ||||
| import io | ||||
| import re | ||||
| import string | ||||
|  | ||||
| from test.helper import FakeYDL | ||||
| from youtube_dl.extractor import YoutubeIE | ||||
| from youtube_dl.compat import compat_str, compat_urlretrieve | ||||
|  | ||||
| @@ -88,7 +88,8 @@ def make_tfunc(url, stype, sig_input, expected_sig): | ||||
|         if not os.path.exists(fn): | ||||
|             compat_urlretrieve(url, fn) | ||||
|  | ||||
|         ie = YoutubeIE() | ||||
|         ydl = FakeYDL() | ||||
|         ie = YoutubeIE(ydl) | ||||
|         if stype == 'js': | ||||
|             with io.open(fn, encoding='utf-8') as testf: | ||||
|                 jscode = testf.read() | ||||
|   | ||||
| @@ -228,6 +228,12 @@ class YoutubeDL(object): | ||||
|     external_downloader:  Executable of the external downloader to call. | ||||
|     listformats:       Print an overview of available video formats and exit. | ||||
|     list_thumbnails:   Print a table of all thumbnails and exit. | ||||
|     match_filter:      A function that gets called with the info_dict of | ||||
|                        every video. | ||||
|                        If it returns a message, the video is ignored. | ||||
|                        If it returns None, the video is downloaded. | ||||
|                        match_filter_func in utils.py is one example for this. | ||||
|     no_color:          Do not emit color codes in output. | ||||
|  | ||||
|  | ||||
|     The following parameters are not used by YoutubeDL itself, they are used by | ||||
| @@ -485,7 +491,7 @@ class YoutubeDL(object): | ||||
|         else: | ||||
|             if self.params.get('no_warnings'): | ||||
|                 return | ||||
|             if self._err_file.isatty() and os.name != 'nt': | ||||
|             if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': | ||||
|                 _msg_header = '\033[0;33mWARNING:\033[0m' | ||||
|             else: | ||||
|                 _msg_header = 'WARNING:' | ||||
| @@ -497,7 +503,7 @@ class YoutubeDL(object): | ||||
|         Do the same as trouble, but prefixes the message with 'ERROR:', colored | ||||
|         in red if stderr is a tty file. | ||||
|         ''' | ||||
|         if self._err_file.isatty() and os.name != 'nt': | ||||
|         if not self.params.get('no_color') and self._err_file.isatty() and os.name != 'nt': | ||||
|             _msg_header = '\033[0;31mERROR:\033[0m' | ||||
|         else: | ||||
|             _msg_header = 'ERROR:' | ||||
| @@ -554,7 +560,7 @@ class YoutubeDL(object): | ||||
|             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') | ||||
|             return None | ||||
|  | ||||
|     def _match_entry(self, info_dict): | ||||
|     def _match_entry(self, info_dict, incomplete): | ||||
|         """ Returns None iff the file should be downloaded """ | ||||
|  | ||||
|         video_title = info_dict.get('title', info_dict.get('id', 'video')) | ||||
| @@ -583,9 +589,17 @@ class YoutubeDL(object): | ||||
|             if max_views is not None and view_count > max_views: | ||||
|                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) | ||||
|         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): | ||||
|             return 'Skipping "%s" because it is age restricted' % title | ||||
|             return 'Skipping "%s" because it is age restricted' % video_title | ||||
|         if self.in_download_archive(info_dict): | ||||
|             return '%s has already been recorded in archive' % video_title | ||||
|  | ||||
|         if not incomplete: | ||||
|             match_filter = self.params.get('match_filter') | ||||
|             if match_filter is not None: | ||||
|                 ret = match_filter(info_dict) | ||||
|                 if ret is not None: | ||||
|                     return ret | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -779,7 +793,7 @@ class YoutubeDL(object): | ||||
|                     'extractor_key': ie_result['extractor_key'], | ||||
|                 } | ||||
|  | ||||
|                 reason = self._match_entry(entry) | ||||
|                 reason = self._match_entry(entry, incomplete=True) | ||||
|                 if reason is not None: | ||||
|                     self.to_screen('[download] ' + reason) | ||||
|                     continue | ||||
| @@ -1153,7 +1167,7 @@ class YoutubeDL(object): | ||||
|         if 'format' not in info_dict: | ||||
|             info_dict['format'] = info_dict['ext'] | ||||
|  | ||||
|         reason = self._match_entry(info_dict) | ||||
|         reason = self._match_entry(info_dict, incomplete=False) | ||||
|         if reason is not None: | ||||
|             self.to_screen('[download] ' + reason) | ||||
|             return | ||||
| @@ -1284,7 +1298,7 @@ class YoutubeDL(object): | ||||
|                     downloaded = [] | ||||
|                     success = True | ||||
|                     merger = FFmpegMergerPP(self, not self.params.get('keepvideo')) | ||||
|                     if not merger._executable: | ||||
|                     if not merger.available(): | ||||
|                         postprocessors = [] | ||||
|                         self.report_warning('You have requested multiple ' | ||||
|                                             'formats but ffmpeg or avconv are not installed.' | ||||
| @@ -1633,7 +1647,7 @@ class YoutubeDL(object): | ||||
|         self._write_string('[debug] Python version %s - %s\n' % ( | ||||
|             platform.python_version(), platform_name())) | ||||
|  | ||||
|         exe_versions = FFmpegPostProcessor.get_versions() | ||||
|         exe_versions = FFmpegPostProcessor.get_versions(self) | ||||
|         exe_versions['rtmpdump'] = rtmpdump_version() | ||||
|         exe_str = ', '.join( | ||||
|             '%s %s' % (exe, v) | ||||
|   | ||||
| @@ -23,9 +23,10 @@ from .compat import ( | ||||
| ) | ||||
| from .utils import ( | ||||
|     DateRange, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     decodeOption, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     DownloadError, | ||||
|     match_filter_func, | ||||
|     MaxDownloadsReached, | ||||
|     preferredencoding, | ||||
|     read_batch_urls, | ||||
| @@ -247,6 +248,9 @@ def _real_main(argv=None): | ||||
|             xattr  # Confuse flake8 | ||||
|         except ImportError: | ||||
|             parser.error('setting filesize xattr requested but python-xattr is not available') | ||||
|     match_filter = ( | ||||
|         None if opts.match_filter is None | ||||
|         else match_filter_func(opts.match_filter)) | ||||
|  | ||||
|     ydl_opts = { | ||||
|         'usenetrc': opts.usenetrc, | ||||
| @@ -344,6 +348,9 @@ def _real_main(argv=None): | ||||
|         'list_thumbnails': opts.list_thumbnails, | ||||
|         'playlist_items': opts.playlist_items, | ||||
|         'xattr_set_filesize': opts.xattr_set_filesize, | ||||
|         'match_filter': match_filter, | ||||
|         'no_color': opts.no_color, | ||||
|         'ffmpeg_location': opts.ffmpeg_location, | ||||
|     } | ||||
|  | ||||
|     with YoutubeDL(ydl_opts) as ydl: | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] | ||||
|  | ||||
| import base64 | ||||
| from math import ceil | ||||
|  | ||||
| @@ -329,3 +327,5 @@ def inc(data): | ||||
|             data[i] = data[i] + 1 | ||||
|             break | ||||
|     return data | ||||
|  | ||||
| __all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_cbc_decrypt', 'aes_decrypt_text'] | ||||
|   | ||||
| @@ -23,15 +23,14 @@ class HlsFD(FileDownloader): | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|  | ||||
|         ffpp = FFmpegPostProcessor(downloader=self) | ||||
|         program = ffpp._executable | ||||
|         if program is None: | ||||
|         if not ffpp.available: | ||||
|             self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.') | ||||
|             return False | ||||
|         ffpp.check_version() | ||||
|  | ||||
|         args = [ | ||||
|             encodeArgument(opt) | ||||
|             for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] | ||||
|             for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] | ||||
|         args.append(encodeFilename(tmpfilename, True)) | ||||
|  | ||||
|         retval = subprocess.call(args) | ||||
| @@ -48,7 +47,7 @@ class HlsFD(FileDownloader): | ||||
|             return True | ||||
|         else: | ||||
|             self.to_stderr('\n') | ||||
|             self.report_error('%s exited with code %d' % (program, retval)) | ||||
|             self.report_error('%s exited with code %d' % (ffpp.basename, retval)) | ||||
|             return False | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -49,11 +49,16 @@ from .brightcove import BrightcoveIE | ||||
| from .buzzfeed import BuzzFeedIE | ||||
| from .byutv import BYUtvIE | ||||
| from .c56 import C56IE | ||||
| from .camdemy import ( | ||||
|     CamdemyIE, | ||||
|     CamdemyFolderIE | ||||
| ) | ||||
| from .canal13cl import Canal13clIE | ||||
| from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .cbs import CBSIE | ||||
| from .cbsnews import CBSNewsIE | ||||
| from .ccc import CCCIE | ||||
| from .ceskatelevize import CeskaTelevizeIE | ||||
| from .channel9 import Channel9IE | ||||
| from .chilloutzone import ChilloutzoneIE | ||||
| @@ -184,6 +189,7 @@ from .hellporno import HellPornoIE | ||||
| from .helsinki import HelsinkiIE | ||||
| from .hentaistigma import HentaiStigmaIE | ||||
| from .historicfilms import HistoricFilmsIE | ||||
| from .history import HistoryIE | ||||
| from .hitbox import HitboxIE, HitboxLiveIE | ||||
| from .hornbunny import HornBunnyIE | ||||
| from .hostingbulk import HostingBulkIE | ||||
| @@ -313,6 +319,8 @@ from .nowvideo import NowVideoIE | ||||
| from .npo import ( | ||||
|     NPOIE, | ||||
|     NPOLiveIE, | ||||
|     NPORadioIE, | ||||
|     NPORadioFragmentIE, | ||||
|     TegenlichtVproIE, | ||||
| ) | ||||
| from .nrk import ( | ||||
| @@ -571,6 +579,7 @@ from .yahoo import ( | ||||
|     YahooIE, | ||||
|     YahooSearchIE, | ||||
| ) | ||||
| from .yam import YamIE | ||||
| from .yesjapan import YesJapanIE | ||||
| from .ynet import YnetIE | ||||
| from .youjizz import YouJizzIE | ||||
|   | ||||
| @@ -50,7 +50,7 @@ class BambuserIE(InfoExtractor): | ||||
|             'duration': int(info['length']), | ||||
|             'view_count': int(info['views_total']), | ||||
|             'uploader': info['username'], | ||||
|             'uploader_id': info['uid'], | ||||
|             'uploader_id': info['owner']['uid'], | ||||
|         } | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -273,7 +273,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|                     formats, subtitles = self._download_media_selector(programme_id) | ||||
|                 return programme_id, title, description, duration, formats, subtitles | ||||
|         except ExtractorError as ee: | ||||
|             if not isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404: | ||||
|             if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404): | ||||
|                 raise | ||||
|  | ||||
|         # fallback to legacy playlist | ||||
|   | ||||
| @@ -9,7 +9,7 @@ class BeegIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?beeg\.com/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://beeg.com/5416503', | ||||
|         'md5': '634526ae978711f6b748fe0dd6c11f57', | ||||
|         'md5': '1bff67111adb785c51d1b42959ec10e5', | ||||
|         'info_dict': { | ||||
|             'id': '5416503', | ||||
|             'ext': 'mp4', | ||||
|   | ||||
							
								
								
									
										153
									
								
								youtube_dl/extractor/camdemy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										153
									
								
								youtube_dl/extractor/camdemy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,153 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     parse_iso8601, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CamdemyIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         # single file | ||||
|         'url': 'http://www.camdemy.com/media/5181/', | ||||
|         'md5': '5a5562b6a98b37873119102e052e311b', | ||||
|         'info_dict': { | ||||
|             'id': '5181', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ch1-1 Introduction, Signals (02-23-2012)', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': '', | ||||
|             'creator': 'ss11spring', | ||||
|             'upload_date': '20130114', | ||||
|             'timestamp': 1358154556, | ||||
|             'view_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         # With non-empty description | ||||
|         'url': 'http://www.camdemy.com/media/13885', | ||||
|         'md5': '4576a3bb2581f86c61044822adbd1249', | ||||
|         'info_dict': { | ||||
|             'id': '13885', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'EverCam + Camdemy QuickStart', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'description': 'md5:050b62f71ed62928f8a35f1a41e186c9', | ||||
|             'creator': 'evercam', | ||||
|             'upload_date': '20140620', | ||||
|             'timestamp': 1403271569, | ||||
|         } | ||||
|     }, { | ||||
|         # External source | ||||
|         'url': 'http://www.camdemy.com/media/14842', | ||||
|         'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7', | ||||
|         'info_dict': { | ||||
|             'id': '2vsYQzNIsJo', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20130211', | ||||
|             'uploader': 'Hun Kim', | ||||
|             'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection', | ||||
|             'uploader_id': 'hunkimtutorials', | ||||
|             'title': 'Excel 2013 Tutorial - How to add Password Protection', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         src_from = self._html_search_regex( | ||||
|             r"<div class='srcFrom'>Source: <a title='([^']+)'", page, | ||||
|             'external source', default=None) | ||||
|         if src_from: | ||||
|             return self.url_result(src_from) | ||||
|  | ||||
|         oembed_obj = self._download_json( | ||||
|             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) | ||||
|  | ||||
|         thumb_url = oembed_obj['thumbnail_url'] | ||||
|         video_folder = compat_urlparse.urljoin(thumb_url, 'video/') | ||||
|         file_list_doc = self._download_xml( | ||||
|             compat_urlparse.urljoin(video_folder, 'fileList.xml'), | ||||
|             video_id, 'Filelist XML') | ||||
|         file_name = file_list_doc.find('./video/item/fileName').text | ||||
|         video_url = compat_urlparse.urljoin(video_folder, file_name) | ||||
|  | ||||
|         timestamp = parse_iso8601(self._html_search_regex( | ||||
|             r"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||
|             page, 'creation time', fatal=False), | ||||
|             delimiter=' ', timezone=datetime.timedelta(hours=8)) | ||||
|         view_count = str_to_int(self._html_search_regex( | ||||
|             r"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<", | ||||
|             page, 'view count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': oembed_obj['title'], | ||||
|             'thumbnail': thumb_url, | ||||
|             'description': self._html_search_meta('description', page), | ||||
|             'creator': oembed_obj['author_name'], | ||||
|             'duration': oembed_obj['duration'], | ||||
|             'timestamp': timestamp, | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class CamdemyFolderIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www.camdemy.com/folder/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         # links with trailing slash | ||||
|         'url': 'http://www.camdemy.com/folder/450', | ||||
|         'info_dict': { | ||||
|             'id': '450', | ||||
|             'title': '信號與系統 2012 & 2011 (Signals and Systems)', | ||||
|         }, | ||||
|         'playlist_mincount': 145 | ||||
|     }, { | ||||
|         # links without trailing slash | ||||
|         # and multi-page | ||||
|         'url': 'http://www.camdemy.com/folder/853', | ||||
|         'info_dict': { | ||||
|             'id': '853', | ||||
|             'title': '科學計算 - 使用 Matlab' | ||||
|         }, | ||||
|         'playlist_mincount': 20 | ||||
|     }, { | ||||
|         # with displayMode parameter. For testing the codes to add parameters | ||||
|         'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', | ||||
|         'info_dict': { | ||||
|             'id': '853', | ||||
|             'title': '科學計算 - 使用 Matlab' | ||||
|         }, | ||||
|         'playlist_mincount': 20 | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         folder_id = self._match_id(url) | ||||
|  | ||||
|         # Add displayMode=list so that all links are displayed in a single page | ||||
|         parsed_url = list(compat_urlparse.urlparse(url)) | ||||
|         query = dict(compat_urlparse.parse_qsl(parsed_url[4])) | ||||
|         query.update({'displayMode': 'list'}) | ||||
|         parsed_url[4] = compat_urllib_parse.urlencode(query) | ||||
|         final_url = compat_urlparse.urlunparse(parsed_url) | ||||
|  | ||||
|         page = self._download_webpage(final_url, folder_id) | ||||
|         matches = re.findall(r"href='(/media/\d+/?)'", page) | ||||
|  | ||||
|         entries = [self.url_result('http://www.camdemy.com' + media_path) | ||||
|                    for media_path in matches] | ||||
|  | ||||
|         folder_title = self._html_search_meta('keywords', page) | ||||
|  | ||||
|         return self.playlist_result(entries, folder_id, folder_title) | ||||
| @@ -15,12 +15,13 @@ from ..utils import ( | ||||
|  | ||||
| class CanalplusIE(InfoExtractor): | ||||
|     IE_DESC = 'canalplus.fr, piwiplus.fr and d8.tv' | ||||
|     _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' | ||||
|     _VALID_URL = r'https?://(?:www\.(?P<site>canalplus\.fr|piwiplus\.fr|d8\.tv|itele\.fr)/.*?/(?P<path>.*)|player\.canalplus\.fr/#/(?P<id>[0-9]+))' | ||||
|     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/%s/%s' | ||||
|     _SITE_ID_MAP = { | ||||
|         'canalplus.fr': 'cplus', | ||||
|         'piwiplus.fr': 'teletoon', | ||||
|         'd8.tv': 'd8', | ||||
|         'itele.fr': 'itele', | ||||
|     } | ||||
|  | ||||
|     _TESTS = [{ | ||||
| @@ -53,6 +54,16 @@ class CanalplusIE(InfoExtractor): | ||||
|             'upload_date': '20131108', | ||||
|         }, | ||||
|         'skip': 'videos get deleted after a while', | ||||
|     }, { | ||||
|         'url': 'http://www.itele.fr/france/video/aubervilliers-un-lycee-en-colere-111559', | ||||
|         'md5': '65aa83ad62fe107ce29e564bb8712580', | ||||
|         'info_dict': { | ||||
|             'id': '1213714', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Aubervilliers : un lycée en colère - Le 11/02/2015 à 06h45', | ||||
|             'description': 'md5:8216206ec53426ea6321321f3b3c16db', | ||||
|             'upload_date': '20150211', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
							
								
								
									
										99
									
								
								youtube_dl/extractor/ccc.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								youtube_dl/extractor/ccc.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,99 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     qualities, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CCCIE(InfoExtractor): | ||||
|     IE_NAME = 'media.ccc.de' | ||||
|     _VALID_URL = r'https?://(?:www\.)?media\.ccc\.de/[^?#]+/[^?#/]*?_(?P<id>[0-9]{8,})._[^?#/]*\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video', | ||||
|         'md5': '205a365d0d57c0b1e43a12c9ffe8f9be', | ||||
|         'info_dict': { | ||||
|             'id': '20131228183', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Introduction to Processor Design', | ||||
|             'description': 'md5:5ddbf8c734800267f2cee4eab187bc1b', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'view_count': int, | ||||
|             'upload_date': '20131229', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         if self._downloader.params.get('prefer_free_formats'): | ||||
|             preference = qualities(['mp3', 'opus', 'mp4-lq', 'webm-lq', 'h264-sd', 'mp4-sd', 'webm-sd', 'mp4', 'webm', 'mp4-hd', 'h264-hd', 'webm-hd']) | ||||
|         else: | ||||
|             preference = qualities(['opus', 'mp3', 'webm-lq', 'mp4-lq', 'webm-sd', 'h264-sd', 'mp4-sd', 'webm', 'mp4', 'webm-hd', 'mp4-hd', 'h264-hd']) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'(?s)<h1>(.*?)</h1>', webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r"(?s)<p class='description'>(.*?)</p>", | ||||
|             webpage, 'description', fatal=False) | ||||
|         upload_date = unified_strdate(self._html_search_regex( | ||||
|             r"(?s)<span class='[^']*fa-calendar-o'></span>(.*?)</li>", | ||||
|             webpage, 'upload date', fatal=False)) | ||||
|         view_count = int_or_none(self._html_search_regex( | ||||
|             r"(?s)<span class='[^']*fa-eye'></span>(.*?)</li>", | ||||
|             webpage, 'view count', fatal=False)) | ||||
|  | ||||
|         matches = re.finditer(r'''(?xs) | ||||
|             <(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s* | ||||
|             <a\s+href='(?P<http_url>[^']+)'>\s* | ||||
|             (?: | ||||
|                 .*? | ||||
|                 <a\s+href='(?P<torrent_url>[^']+\.torrent)' | ||||
|             )?''', webpage) | ||||
|         formats = [] | ||||
|         for m in matches: | ||||
|             format = m.group('format') | ||||
|             format_id = self._search_regex( | ||||
|                 r'.*/([a-z0-9_-]+)/[^/]*$', | ||||
|                 m.group('http_url'), 'format id', default=None) | ||||
|             vcodec = 'h264' if 'h264' in format_id else ( | ||||
|                 'none' if format_id in ('mp3', 'opus') else None | ||||
|             ) | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'format': format, | ||||
|                 'url': m.group('http_url'), | ||||
|                 'vcodec': vcodec, | ||||
|                 'preference': preference(format_id), | ||||
|             }) | ||||
|  | ||||
|             if m.group('torrent_url'): | ||||
|                 formats.append({ | ||||
|                     'format_id': 'torrent-%s' % (format if format_id is None else format_id), | ||||
|                     'format': '%s (torrent)' % format, | ||||
|                     'proto': 'torrent', | ||||
|                     'format_note': '(unsupported; will just download the .torrent file)', | ||||
|                     'vcodec': vcodec, | ||||
|                     'preference': -100 + preference(format_id), | ||||
|                     'url': m.group('torrent_url'), | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r"<video.*?poster='([^']+)'", webpage, 'thumbnail', fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'view_count': view_count, | ||||
|             'upload_date': upload_date, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -157,6 +157,7 @@ class InfoExtractor(object): | ||||
|     view_count:     How many users have watched the video on the platform. | ||||
|     like_count:     Number of positive ratings of the video | ||||
|     dislike_count:  Number of negative ratings of the video | ||||
|     average_rating: Average rating give by users, the scale used depends on the webpage | ||||
|     comment_count:  Number of comments on the video | ||||
|     comments:       A list of comments, each with one or more of the following | ||||
|                     properties (all but one of text or html optional): | ||||
| @@ -271,7 +272,7 @@ class InfoExtractor(object): | ||||
|             raise | ||||
|         except compat_http_client.IncompleteRead as e: | ||||
|             raise ExtractorError('A network error has occured.', cause=e, expected=True) | ||||
|         except (KeyError,) as e: | ||||
|         except (KeyError, StopIteration) as e: | ||||
|             raise ExtractorError('An extractor error has occured.', cause=e) | ||||
|  | ||||
|     def set_downloader(self, downloader): | ||||
| @@ -514,7 +515,7 @@ class InfoExtractor(object): | ||||
|                 if mobj: | ||||
|                     break | ||||
|  | ||||
|         if os.name != 'nt' and sys.stderr.isatty(): | ||||
|         if not self._downloader.params.get('no_color') and os.name != 'nt' and sys.stderr.isatty(): | ||||
|             _name = '\033[0;34m%s\033[0m' % name | ||||
|         else: | ||||
|             _name = name | ||||
| @@ -664,7 +665,7 @@ class InfoExtractor(object): | ||||
|         return RATING_TABLE.get(rating.lower(), None) | ||||
|  | ||||
|     def _family_friendly_search(self, html): | ||||
|         # See http://schema.org/VideoObj | ||||
|         # See http://schema.org/VideoObject | ||||
|         family_friendly = self._html_search_meta('isFamilyFriendly', html) | ||||
|  | ||||
|         if not family_friendly: | ||||
| @@ -728,6 +729,7 @@ class InfoExtractor(object): | ||||
|                 f.get('language_preference') if f.get('language_preference') is not None else -1, | ||||
|                 f.get('quality') if f.get('quality') is not None else -1, | ||||
|                 f.get('tbr') if f.get('tbr') is not None else -1, | ||||
|                 f.get('filesize') if f.get('filesize') is not None else -1, | ||||
|                 f.get('vbr') if f.get('vbr') is not None else -1, | ||||
|                 f.get('height') if f.get('height') is not None else -1, | ||||
|                 f.get('width') if f.get('width') is not None else -1, | ||||
| @@ -735,7 +737,6 @@ class InfoExtractor(object): | ||||
|                 f.get('abr') if f.get('abr') is not None else -1, | ||||
|                 audio_ext_preference, | ||||
|                 f.get('fps') if f.get('fps') is not None else -1, | ||||
|                 f.get('filesize') if f.get('filesize') is not None else -1, | ||||
|                 f.get('filesize_approx') if f.get('filesize_approx') is not None else -1, | ||||
|                 f.get('source_preference') if f.get('source_preference') is not None else -1, | ||||
|                 f.get('format_id'), | ||||
|   | ||||
| @@ -14,6 +14,10 @@ class DctpTvIE(InfoExtractor): | ||||
|             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Videoinstallation für eine Kaufhausfassade' | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -1,13 +1,14 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class DotsubIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?dotsub\.com/view/(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://dotsub.com/view/aed3b8b2-1889-4df5-ae63-ad85f5572f27', | ||||
|         'md5': '0914d4d69605090f623b7ac329fea66e', | ||||
| @@ -15,28 +16,37 @@ class DotsubIE(InfoExtractor): | ||||
|             'id': 'aed3b8b2-1889-4df5-ae63-ad85f5572f27', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Pyramids of Waste (2010), AKA The Lightbulb Conspiracy - Planned obsolescence documentary', | ||||
|             'description': 'md5:699a0f7f50aeec6042cb3b1db2d0d074', | ||||
|             'thumbnail': 're:^https?://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', | ||||
|             'duration': 3169, | ||||
|             'uploader': '4v4l0n42', | ||||
|             'description': 'Pyramids of Waste (2010) also known as "The lightbulb conspiracy" is a documentary about how our economic system based on consumerism  and planned obsolescence is breaking our planet down.\r\n\r\nSolutions to this can be found at:\r\nhttp://robotswillstealyourjob.com\r\nhttp://www.federicopistono.org\r\n\r\nhttp://opensourceecology.org\r\nhttp://thezeitgeistmovement.com', | ||||
|             'thumbnail': 'http://dotsub.com/media/aed3b8b2-1889-4df5-ae63-ad85f5572f27/p', | ||||
|             'timestamp': 1292248482.625, | ||||
|             'upload_date': '20101213', | ||||
|             'view_count': int, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         info_url = "https://dotsub.com/api/media/%s/metadata" % video_id | ||||
|         info = self._download_json(info_url, video_id) | ||||
|         date = time.gmtime(info['dateCreated'] / 1000)  # The timestamp is in miliseconds | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'https://dotsub.com/api/media/%s/metadata' % video_id, video_id) | ||||
|         video_url = info.get('mediaURI') | ||||
|  | ||||
|         if not video_url: | ||||
|             webpage = self._download_webpage(url, video_id) | ||||
|             video_url = self._search_regex( | ||||
|                 r'"file"\s*:\s*\'([^\']+)', webpage, 'video url') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': info['mediaURI'], | ||||
|             'url': video_url, | ||||
|             'ext': 'flv', | ||||
|             'title': info['title'], | ||||
|             'thumbnail': info['screenshotURI'], | ||||
|             'description': info['description'], | ||||
|             'uploader': info['user'], | ||||
|             'view_count': info['numberOfViews'], | ||||
|             'upload_date': '%04i%02i%02i' % (date.tm_year, date.tm_mon, date.tm_mday), | ||||
|             'description': info.get('description'), | ||||
|             'thumbnail': info.get('screenshotURI'), | ||||
|             'duration': int_or_none(info.get('duration'), 1000), | ||||
|             'uploader': info.get('user'), | ||||
|             'timestamp': float_or_none(info.get('dateCreated'), 1000), | ||||
|             'view_count': int_or_none(info.get('numberOfViews')), | ||||
|         } | ||||
|   | ||||
| @@ -15,7 +15,7 @@ class DrTuberIE(InfoExtractor): | ||||
|             'id': '1740434', | ||||
|             'display_id': 'hot-perky-blonde-naked-golf', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hot Perky Blonde Naked Golf', | ||||
|             'title': 'hot perky blonde naked golf', | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|             'comment_count': int, | ||||
| @@ -36,7 +36,8 @@ class DrTuberIE(InfoExtractor): | ||||
|             r'<source src="([^"]+)"', webpage, 'video URL') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<title>([^<]+)\s*-\s*Free', webpage, 'title') | ||||
|             [r'class="hd_title" style="[^"]+">([^<]+)</h1>', r'<title>([^<]+) - \d+'], | ||||
|             webpage, 'title') | ||||
|  | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'poster="([^"]+)"', | ||||
|   | ||||
| @@ -1,18 +1,17 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     js_to_json, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class EscapistIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<id>[0-9]+)-' | ||||
|     _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate', | ||||
|         'md5': 'ab3a706c681efca53f0a35f1415cf0d1', | ||||
| @@ -20,31 +19,30 @@ class EscapistIE(InfoExtractor): | ||||
|             'id': '6618', | ||||
|             'ext': 'mp4', | ||||
|             'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.", | ||||
|             'uploader': 'the-escapist-presents', | ||||
|             'uploader_id': 'the-escapist-presents', | ||||
|             'uploader': 'The Escapist Presents', | ||||
|             'title': "Breaking Down Baldur's Gate", | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         showName = mobj.group('showname') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         videoDesc = self._html_search_regex( | ||||
|             r'<meta name="description" content="([^"]*)"', | ||||
|             webpage, 'description', fatal=False) | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r"<h1 class='headline'><a href='/videos/view/(.*?)'", | ||||
|             webpage, 'uploader ID', fatal=False) | ||||
|         uploader = self._html_search_regex( | ||||
|             r"<h1 class='headline'>(.*?)</a>", | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         playerUrl = self._og_search_video_url(webpage, name='player URL') | ||||
|         raw_title = self._html_search_meta('title', webpage, fatal=True) | ||||
|         title = raw_title.partition(' : ')[2] | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<meta name="title" content="([^"]*)"', | ||||
|             webpage, 'title').split(' : ')[-1] | ||||
|  | ||||
|         configUrl = self._search_regex('config=(.*)$', playerUrl, 'config URL') | ||||
|         configUrl = compat_urllib_parse.unquote(configUrl) | ||||
|         player_url = self._og_search_video_url(webpage, name='player URL') | ||||
|         config_url = compat_urllib_parse.unquote(self._search_regex( | ||||
|             r'config=(.*)$', player_url, 'config URL')) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
| @@ -53,18 +51,21 @@ class EscapistIE(InfoExtractor): | ||||
|                 cfgurl, video_id, | ||||
|                 'Downloading ' + name + ' configuration', | ||||
|                 'Unable to download ' + name + ' configuration', | ||||
|                 transform_source=lambda s: s.replace("'", '"')) | ||||
|                 transform_source=js_to_json) | ||||
|  | ||||
|             playlist = config['playlist'] | ||||
|             video_url = next( | ||||
|                 p['url'] for p in playlist | ||||
|                 if p.get('eventCategory') == 'Video') | ||||
|             formats.append({ | ||||
|                 'url': playlist[1]['url'], | ||||
|                 'url': video_url, | ||||
|                 'format_id': name, | ||||
|                 'quality': quality, | ||||
|             }) | ||||
|  | ||||
|         _add_format('normal', configUrl, quality=0) | ||||
|         hq_url = (configUrl + | ||||
|                   ('&hq=1' if '?' in configUrl else configUrl + '?hq=1')) | ||||
|         _add_format('normal', config_url, quality=0) | ||||
|         hq_url = (config_url + | ||||
|                   ('&hq=1' if '?' in config_url else config_url + '?hq=1')) | ||||
|         try: | ||||
|             _add_format('hq', hq_url, quality=1) | ||||
|         except ExtractorError: | ||||
| @@ -75,9 +76,10 @@ class EscapistIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'uploader': showName, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'title': title, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': videoDesc, | ||||
|             'player_url': playerUrl, | ||||
|             'description': description, | ||||
|             'player_url': player_url, | ||||
|         } | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -20,11 +18,10 @@ class FirstpostIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         page = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_meta('twitter:title', page, 'title') | ||||
|  | ||||
|         title = self._html_search_meta('twitter:title', page, 'title', fatal=True) | ||||
|         description = self._html_search_meta('twitter:description', page, 'title') | ||||
|  | ||||
|         data = self._download_xml( | ||||
| @@ -42,6 +39,7 @@ class FirstpostIE(InfoExtractor): | ||||
|                 'height': int(details.find('./height').text.strip()), | ||||
|             } for details in item.findall('./source/file_details') if details.find('./file').text | ||||
|         ] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -1,52 +1,71 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class FirstTVIE(InfoExtractor): | ||||
|     IE_NAME = 'firsttv' | ||||
|     IE_DESC = 'Видеоархив - Первый канал' | ||||
|     _VALID_URL = r'http://(?:www\.)?1tv\.ru/videoarchive/(?P<id>\d+)' | ||||
|     IE_NAME = '1tv' | ||||
|     IE_DESC = 'Первый канал' | ||||
|     _VALID_URL = r'http://(?:www\.)?1tv\.ru/(?:[^/]+/)+(?P<id>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.1tv.ru/videoarchive/73390', | ||||
|         'md5': '3de6390cf0cca4a5eae1d1d83895e5ad', | ||||
|         'md5': '777f525feeec4806130f4f764bc18a4f', | ||||
|         'info_dict': { | ||||
|             'id': '73390', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Олимпийские канатные дороги', | ||||
|             'description': 'md5:cc730d2bf4215463e37fff6a1e277b13', | ||||
|             'thumbnail': 'http://img1.1tv.ru/imgsize640x360/PR20140210114657.JPG', | ||||
|             'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', | ||||
|             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', | ||||
|             'duration': 149, | ||||
|             'like_count': int, | ||||
|             'dislike_count': int, | ||||
|         }, | ||||
|         'skip': 'Only works from Russia', | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://www.1tv.ru/prj/inprivate/vypusk/35930', | ||||
|         'md5': 'a1b6b60d530ebcf8daacf4565762bbaf', | ||||
|         'info_dict': { | ||||
|             'id': '35930', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Наедине со всеми. Людмила Сенчина', | ||||
|             'description': 'md5:89553aed1d641416001fe8d450f06cb9', | ||||
|             'thumbnail': 're:^https?://.*\.(?:jpg|JPG)$', | ||||
|             'duration': 2694, | ||||
|         }, | ||||
|         'skip': 'Only works from Russia', | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id, 'Downloading page') | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'''(?s)jwplayer\('flashvideoportal_1'\)\.setup\({.*?'file': '([^']+)'.*?}\);''', webpage, 'video URL') | ||||
|             r'''(?s)(?:jwplayer\('flashvideoportal_1'\)\.setup\({|var\s+playlistObj\s*=).*?'file'\s*:\s*'([^']+)'.*?}\);''', | ||||
|             webpage, 'video URL') | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', webpage, 'title') | ||||
|             [r'<div class="tv_translation">\s*<h1><a href="[^"]+">([^<]*)</a>', | ||||
|              r"'title'\s*:\s*'([^']+)'"], webpage, 'title') | ||||
|         description = self._html_search_regex( | ||||
|             r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', webpage, 'description', fatal=False) | ||||
|             r'<div class="descr">\s*<div> </div>\s*<p>([^<]*)</p></div>', | ||||
|             webpage, 'description', default=None) or self._html_search_meta( | ||||
|                 'description', webpage, 'description') | ||||
|  | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         duration = self._og_search_property('video:duration', webpage, 'video duration', fatal=False) | ||||
|         duration = self._og_search_property( | ||||
|             'video:duration', webpage, | ||||
|             'video duration', fatal=False) | ||||
|  | ||||
|         like_count = self._html_search_regex(r'title="Понравилось".*?/></label> \[(\d+)\]', | ||||
|                                              webpage, 'like count', fatal=False) | ||||
|         dislike_count = self._html_search_regex(r'title="Не понравилось".*?/></label> \[(\d+)\]', | ||||
|                                                 webpage, 'dislike count', fatal=False) | ||||
|         like_count = self._html_search_regex( | ||||
|             r'title="Понравилось".*?/></label> \[(\d+)\]', | ||||
|             webpage, 'like count', default=None) | ||||
|         dislike_count = self._html_search_regex( | ||||
|             r'title="Не понравилось".*?/></label> \[(\d+)\]', | ||||
|             webpage, 'dislike count', default=None) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
							
								
								
									
										31
									
								
								youtube_dl/extractor/history.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								youtube_dl/extractor/history.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import smuggle_url | ||||
|  | ||||
|  | ||||
| class HistoryIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?history\.com/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false', | ||||
|         'md5': '6fe632d033c92aa10b8d4a9be047a7c5', | ||||
|         'info_dict': { | ||||
|             'id': 'bLx5Dv5Aka1G', | ||||
|             'ext': 'mp4', | ||||
|             'title': "Bet You Didn't Know: Valentine's Day", | ||||
|             'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._search_regex( | ||||
|             r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id, | ||||
|             webpage, 'video url') | ||||
|  | ||||
|         return self.url_result(smuggle_url(video_url, {'sig': {'key': 'crazyjava', 'secret': 's3cr3t'}})) | ||||
| @@ -1,7 +1,6 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
| @@ -52,9 +51,9 @@ class NBCIE(InfoExtractor): | ||||
|  | ||||
|  | ||||
| class NBCNewsIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?://www\.nbcnews\.com/ | ||||
|         ((video/.+?/(?P<id>\d+))| | ||||
|         (feature/[^/]+/(?P<title>.+))) | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/ | ||||
|         (?:video/.+?/(?P<id>\d+)| | ||||
|         (?:feature|nightly-news)/[^/]+/(?P<title>.+)) | ||||
|         ''' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -89,6 +88,16 @@ class NBCNewsIE(InfoExtractor): | ||||
|                 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844', | ||||
|             'md5': 'b5dda8cddd8650baa0dcb616dd2cf60d', | ||||
|             'info_dict': { | ||||
|                 'id': 'sekXqyTVnmN3', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)', | ||||
|                 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -107,13 +116,13 @@ class NBCNewsIE(InfoExtractor): | ||||
|                 'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text, | ||||
|             } | ||||
|         else: | ||||
|             # "feature" pages use theplatform.com | ||||
|             # "feature" and "nightly-news" pages use theplatform.com | ||||
|             title = mobj.group('title') | ||||
|             webpage = self._download_webpage(url, title) | ||||
|             bootstrap_json = self._search_regex( | ||||
|                 r'var bootstrapJson = ({.+})\s*$', webpage, 'bootstrap json', | ||||
|                 flags=re.MULTILINE) | ||||
|             bootstrap = json.loads(bootstrap_json) | ||||
|                 r'var\s+(?:bootstrapJson|playlistData)\s*=\s*({.+});?\s*$', | ||||
|                 webpage, 'bootstrap json', flags=re.MULTILINE) | ||||
|             bootstrap = self._parse_json(bootstrap_json, video_id) | ||||
|             info = bootstrap['results'][0]['video'] | ||||
|             mpxid = info['mpxId'] | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     fix_xml_ampersands, | ||||
|     parse_duration, | ||||
| @@ -22,7 +23,7 @@ class NPOBaseIE(SubtitlesInfoExtractor): | ||||
|  | ||||
| class NPOIE(NPOBaseIE): | ||||
|     IE_NAME = 'npo.nl' | ||||
|     _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -185,7 +186,7 @@ class NPOIE(NPOBaseIE): | ||||
|  | ||||
| class NPOLiveIE(NPOBaseIE): | ||||
|     IE_NAME = 'npo.nl:live' | ||||
|     _VALID_URL = r'https?://www\.npo\.nl/live/(?P<id>.+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?npo\.nl/live/(?P<id>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.npo.nl/live/npo-1', | ||||
| @@ -260,6 +261,84 @@ class NPOLiveIE(NPOBaseIE): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NPORadioIE(InfoExtractor): | ||||
|     IE_NAME = 'npo.nl:radio' | ||||
|     _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/(?P<id>[^/]+)/?$' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.npo.nl/radio/radio-1', | ||||
|         'info_dict': { | ||||
|             'id': 'radio-1', | ||||
|             'ext': 'mp3', | ||||
|             'title': 're:^NPO Radio 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _html_get_attribute_regex(attribute): | ||||
|         return r'{0}\s*=\s*\'([^\']+)\''.format(attribute) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             self._html_get_attribute_regex('data-channel'), webpage, 'title') | ||||
|  | ||||
|         stream = self._parse_json( | ||||
|             self._html_search_regex(self._html_get_attribute_regex('data-streams'), webpage, 'data-streams'), | ||||
|             video_id) | ||||
|  | ||||
|         codec = stream.get('codec') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': stream['url'], | ||||
|             'title': self._live_title(title), | ||||
|             'acodec': codec, | ||||
|             'ext': codec, | ||||
|             'is_live': True, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NPORadioFragmentIE(InfoExtractor): | ||||
|     IE_NAME = 'npo.nl:radio:fragment' | ||||
|     _VALID_URL = r'https?://(?:www\.)?npo\.nl/radio/[^/]+/fragment/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.npo.nl/radio/radio-5/fragment/174356', | ||||
|         'md5': 'dd8cc470dad764d0fdc70a9a1e2d18c2', | ||||
|         'info_dict': { | ||||
|             'id': '174356', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Jubileumconcert Willeke Alberti', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         audio_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, audio_id) | ||||
|  | ||||
|         title = self._html_search_regex( | ||||
|             r'href="/radio/[^/]+/fragment/%s" title="([^"]+)"' % audio_id, | ||||
|             webpage, 'title') | ||||
|  | ||||
|         audio_url = self._search_regex( | ||||
|             r"data-streams='([^']+)'", webpage, 'audio url') | ||||
|  | ||||
|         return { | ||||
|             'id': audio_id, | ||||
|             'url': audio_url, | ||||
|             'title': title, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class TegenlichtVproIE(NPOIE): | ||||
|     IE_NAME = 'tegenlicht.vpro.nl' | ||||
|     _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' | ||||
|   | ||||
| @@ -3,7 +3,9 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unescapeHTML | ||||
|     clean_html, | ||||
|     xpath_text, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -14,73 +16,63 @@ class NTVRuIE(InfoExtractor): | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/novosti/863142/', | ||||
|             'md5': 'ba7ea172a91cb83eb734cad18c10e723', | ||||
|             'info_dict': { | ||||
|                 'id': '746000', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', | ||||
|                 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 136, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/video/novosti/750370/', | ||||
|             'md5': 'adecff79691b4d71e25220a191477124', | ||||
|             'info_dict': { | ||||
|                 'id': '750370', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', | ||||
|                 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 172, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416', | ||||
|             'md5': '82dbd49b38e3af1d00df16acbeab260c', | ||||
|             'info_dict': { | ||||
|                 'id': '747480', | ||||
|                 'ext': 'flv', | ||||
|                 'title': '«Сегодня». 21 марта 2014 года. 16:00 ', | ||||
|                 'description': '«Сегодня». 21 марта 2014 года. 16:00 ', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '«Сегодня». 21 марта 2014 года. 16:00', | ||||
|                 'description': '«Сегодня». 21 марта 2014 года. 16:00', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 1496, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/kino/Koma_film', | ||||
|             'md5': 'f825770930937aa7e5aca0dc0d29319a', | ||||
|             'info_dict': { | ||||
|                 'id': '758100', | ||||
|                 'ext': 'flv', | ||||
|                 'id': '1007609', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Остросюжетный фильм «Кома»', | ||||
|                 'description': 'Остросюжетный фильм «Кома»', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 5592, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/', | ||||
|             'md5': '9320cd0e23f3ea59c330dc744e06ff3b', | ||||
|             'info_dict': { | ||||
|                 'id': '751482', | ||||
|                 'ext': 'flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': '«Дело врачей»: «Деревце жизни»', | ||||
|                 'description': '«Дело врачей»: «Деревце жизни»', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'duration': 2590, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
| @@ -92,45 +84,36 @@ class NTVRuIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, page, 'video id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         player = self._download_xml('http://www.ntv.ru/vi%s/' % video_id, video_id, 'Downloading video XML') | ||||
|         title = unescapeHTML(player.find('./data/title').text) | ||||
|         description = unescapeHTML(player.find('./data/description').text) | ||||
|         video_id = self._html_search_regex(self._VIDEO_ID_REGEXES, webpage, 'video id') | ||||
|  | ||||
|         player = self._download_xml( | ||||
|             'http://www.ntv.ru/vi%s/' % video_id, | ||||
|             video_id, 'Downloading video XML') | ||||
|         title = clean_html(xpath_text(player, './data/title', 'title', fatal=True)) | ||||
|         description = clean_html(xpath_text(player, './data/description', 'description')) | ||||
|  | ||||
|         video = player.find('./data/video') | ||||
|         video_id = video.find('./id').text | ||||
|         thumbnail = video.find('./splash').text | ||||
|         duration = int(video.find('./totaltime').text) | ||||
|         view_count = int(video.find('./views').text) | ||||
|         puid22 = video.find('./puid22').text | ||||
|         video_id = xpath_text(video, './id', 'video id') | ||||
|         thumbnail = xpath_text(video, './splash', 'thumbnail') | ||||
|         duration = int_or_none(xpath_text(video, './totaltime', 'duration')) | ||||
|         view_count = int_or_none(xpath_text(video, './views', 'view count')) | ||||
|  | ||||
|         apps = { | ||||
|             '4': 'video1', | ||||
|             '7': 'video2', | ||||
|         } | ||||
|  | ||||
|         app = apps.get(puid22, apps['4']) | ||||
|         token = self._download_webpage( | ||||
|             'http://stat.ntv.ru/services/access/token', | ||||
|             video_id, 'Downloading access token') | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id in ['', 'hi', 'webm']: | ||||
|             file = video.find('./%sfile' % format_id) | ||||
|             if file is None: | ||||
|             file_ = video.find('./%sfile' % format_id) | ||||
|             if file_ is None: | ||||
|                 continue | ||||
|             size = video.find('./%ssize' % format_id) | ||||
|             formats.append({ | ||||
|                 'url': 'rtmp://media.ntv.ru/%s' % app, | ||||
|                 'app': app, | ||||
|                 'play_path': file.text, | ||||
|                 'rtmp_conn': 'B:1', | ||||
|                 'player_url': 'http://www.ntv.ru/swf/vps1.swf?update=20131128', | ||||
|                 'page_url': 'http://www.ntv.ru', | ||||
|                 'flash_version': 'LNX 11,2,202,341', | ||||
|                 'rtmp_live': True, | ||||
|                 'ext': 'flv', | ||||
|                 'filesize': int(size.text), | ||||
|                 'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token), | ||||
|                 'filesize': int_or_none(size.text if size is not None else None), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|   | ||||
| @@ -46,16 +46,17 @@ class PornHdIE(InfoExtractor): | ||||
|  | ||||
|         quality = qualities(['sd', 'hd']) | ||||
|         sources = json.loads(js_to_json(self._search_regex( | ||||
|             r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}\);", webpage, 'sources'))) | ||||
|             r"(?s)'sources'\s*:\s*(\{.+?\})\s*\}[;,)]", | ||||
|             webpage, 'sources'))) | ||||
|         formats = [] | ||||
|         for container, s in sources.items(): | ||||
|             for qname, video_url in s.items(): | ||||
|                 formats.append({ | ||||
|                     'url': video_url, | ||||
|                     'container': container, | ||||
|                     'format_id': '%s-%s' % (container, qname), | ||||
|                     'quality': quality(qname), | ||||
|                 }) | ||||
|         for qname, video_url in sources.items(): | ||||
|             if not video_url: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|                 'format_id': qname, | ||||
|                 'quality': quality(qname), | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -1,14 +1,30 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import hashlib | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def _get_api_key(api_path): | ||||
|     if api_path.endswith('?'): | ||||
|         api_path = api_path[:-1] | ||||
|  | ||||
|     api_key = 'fb5f58a820353bd7095de526253c14fd' | ||||
|     a = '{0:}{1:}{2:}'.format(api_key, api_path, int(round(time.time() / 24 / 3600))) | ||||
|     return hashlib.md5(a.encode('ascii')).hexdigest() | ||||
|  | ||||
|  | ||||
| class StreamCZIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<id>[0-9]+)' | ||||
|     _API_URL = 'http://www.stream.cz/API' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti', | ||||
| @@ -36,8 +52,11 @@ class StreamCZIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         data = self._download_json( | ||||
|             'http://www.stream.cz/API/episode/%s' % video_id, video_id) | ||||
|         api_path = '/episode/%s' % video_id | ||||
|  | ||||
|         req = compat_urllib_request.Request(self._API_URL + api_path) | ||||
|         req.add_header('Api-Password', _get_api_key(api_path)) | ||||
|         data = self._download_json(req, video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for quality, video in enumerate(data['video_qualities']): | ||||
|   | ||||
| @@ -52,7 +52,7 @@ class SunPornoIE(InfoExtractor): | ||||
|  | ||||
|         formats = [] | ||||
|         quality = qualities(['mp4', 'flv']) | ||||
|         for video_url in re.findall(r'<source src="([^"]+)"', webpage): | ||||
|         for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage): | ||||
|             video_ext = determine_ext(video_url) | ||||
|             formats.append({ | ||||
|                 'url': video_url, | ||||
|   | ||||
| @@ -30,6 +30,11 @@ class TeamcocoIE(InfoExtractor): | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|     _VIDEO_ID_REGEXES = ( | ||||
|         r'"eVar42"\s*:\s*(\d+)', | ||||
|         r'Ginger\.TeamCoco\.openInApp\("video",\s*"([^"]+)"', | ||||
|         r'"id_not"\s*:\s*(\d+)' | ||||
|     ) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
| @@ -40,8 +45,7 @@ class TeamcocoIE(InfoExtractor): | ||||
|         video_id = mobj.group("video_id") | ||||
|         if not video_id: | ||||
|             video_id = self._html_search_regex( | ||||
|                 r'<div\s+class="player".*?data-id="(\d+?)"', | ||||
|                 webpage, 'video id') | ||||
|                 self._VIDEO_ID_REGEXES, webpage, 'video id') | ||||
|  | ||||
|         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id | ||||
|         data = self._download_xml( | ||||
|   | ||||
| @@ -2,6 +2,11 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
| import time | ||||
| import hmac | ||||
| import binascii | ||||
| import hashlib | ||||
|  | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..compat import ( | ||||
| @@ -11,6 +16,7 @@ from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     xpath_with_ns, | ||||
|     unsmuggle_url, | ||||
| ) | ||||
|  | ||||
| _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) | ||||
| @@ -18,7 +24,7 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language | ||||
|  | ||||
| class ThePlatformIE(SubtitlesInfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/ | ||||
|         (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ | ||||
|            (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? | ||||
|          |theplatform:)(?P<id>[^/\?&]+)''' | ||||
|  | ||||
| @@ -38,9 +44,33 @@ class ThePlatformIE(SubtitlesInfoExtractor): | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False): | ||||
|         flags = '10' if include_qs else '00' | ||||
|         expiration_date = '%x' % (int(time.time()) + life) | ||||
|  | ||||
|         def str_to_hex(str): | ||||
|             return binascii.b2a_hex(str.encode('ascii')).decode('ascii') | ||||
|  | ||||
|         def hex_to_str(hex): | ||||
|             return binascii.a2b_hex(hex) | ||||
|  | ||||
|         relative_path = url.split('http://link.theplatform.com/s/')[1].split('?')[0] | ||||
|         clear_text = hex_to_str(flags + expiration_date + str_to_hex(relative_path)) | ||||
|         checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest() | ||||
|         sig = flags + expiration_date + checksum + str_to_hex(sig_secret) | ||||
|         return '%s&sig=%s' % (url, sig) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         url, smuggled_data = unsmuggle_url(url, {}) | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         provider_id = mobj.group('provider_id') | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         if not provider_id: | ||||
|             provider_id = 'dJ5BDC' | ||||
|  | ||||
|         if mobj.group('config'): | ||||
|             config_url = url + '&form=json' | ||||
|             config_url = config_url.replace('swf/', 'config/') | ||||
| @@ -48,8 +78,12 @@ class ThePlatformIE(SubtitlesInfoExtractor): | ||||
|             config = self._download_json(config_url, video_id, 'Downloading config') | ||||
|             smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4&manifest=f4m' | ||||
|         else: | ||||
|             smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' | ||||
|                         'format=smil&mbr=true'.format(video_id)) | ||||
|             smil_url = ('http://link.theplatform.com/s/{0}/{1}/meta.smil?' | ||||
|                         'format=smil&mbr=true'.format(provider_id, video_id)) | ||||
|  | ||||
|         sig = smuggled_data.get('sig') | ||||
|         if sig: | ||||
|             smil_url = self._sign_url(smil_url, sig['key'], sig['secret']) | ||||
|  | ||||
|         meta = self._download_xml(smil_url, video_id) | ||||
|         try: | ||||
| @@ -62,7 +96,7 @@ class ThePlatformIE(SubtitlesInfoExtractor): | ||||
|         else: | ||||
|             raise ExtractorError(error_msg, expected=True) | ||||
|  | ||||
|         info_url = 'http://link.theplatform.com/s/dJ5BDC/{0}?format=preview'.format(video_id) | ||||
|         info_url = 'http://link.theplatform.com/s/{0}/{1}?format=preview'.format(provider_id, video_id) | ||||
|         info_json = self._download_webpage(info_url, video_id) | ||||
|         info = json.loads(info_json) | ||||
|  | ||||
|   | ||||
| @@ -188,9 +188,9 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         password_request = compat_urllib_request.Request(pass_url + '/password', data) | ||||
|         password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         password_request.add_header('Cookie', 'xsrft=%s' % token) | ||||
|         self._download_webpage(password_request, video_id, | ||||
|                                'Verifying the password', | ||||
|                                'Wrong password') | ||||
|         return self._download_webpage( | ||||
|             password_request, video_id, | ||||
|             'Verifying the password', 'Wrong password') | ||||
|  | ||||
|     def _verify_player_video_password(self, url, video_id): | ||||
|         password = self._downloader.params.get('videopassword', None) | ||||
| @@ -266,7 +266,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): | ||||
|                 raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') | ||||
|  | ||||
|             if re.search('<form[^>]+?id="pw_form"', webpage) is not None: | ||||
|             if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None: | ||||
|                 self._verify_video_password(url, video_id, webpage) | ||||
|                 return self._real_extract(url) | ||||
|             else: | ||||
| @@ -412,12 +412,47 @@ class VimeoChannelIE(InfoExtractor): | ||||
|     def _extract_list_title(self, webpage): | ||||
|         return self._html_search_regex(self._TITLE_RE, webpage, 'list title') | ||||
|  | ||||
|     def _login_list_password(self, page_url, list_id, webpage): | ||||
|         login_form = self._search_regex( | ||||
|             r'(?s)<form[^>]+?id="pw_form"(.*?)</form>', | ||||
|             webpage, 'login form', default=None) | ||||
|         if not login_form: | ||||
|             return webpage | ||||
|  | ||||
|         password = self._downloader.params.get('videopassword', None) | ||||
|         if password is None: | ||||
|             raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True) | ||||
|         fields = dict(re.findall(r'''(?x)<input\s+ | ||||
|             type="hidden"\s+ | ||||
|             name="([^"]+)"\s+ | ||||
|             value="([^"]*)" | ||||
|             ''', login_form)) | ||||
|         token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') | ||||
|         fields['token'] = token | ||||
|         fields['password'] = password | ||||
|         post = compat_urllib_parse.urlencode(fields) | ||||
|         password_path = self._search_regex( | ||||
|             r'action="([^"]+)"', login_form, 'password URL') | ||||
|         password_url = compat_urlparse.urljoin(page_url, password_path) | ||||
|         password_request = compat_urllib_request.Request(password_url, post) | ||||
|         password_request.add_header('Content-type', 'application/x-www-form-urlencoded') | ||||
|         self._set_cookie('vimeo.com', 'xsrft', token) | ||||
|  | ||||
|         return self._download_webpage( | ||||
|             password_request, list_id, | ||||
|             'Verifying the password', 'Wrong password') | ||||
|  | ||||
|     def _extract_videos(self, list_id, base_url): | ||||
|         video_ids = [] | ||||
|         for pagenum in itertools.count(1): | ||||
|             page_url = self._page_url(base_url, pagenum) | ||||
|             webpage = self._download_webpage( | ||||
|                 self._page_url(base_url, pagenum), list_id, | ||||
|                 page_url, list_id, | ||||
|                 'Downloading page %s' % pagenum) | ||||
|  | ||||
|             if pagenum == 1: | ||||
|                 webpage = self._login_list_password(page_url, list_id, webpage) | ||||
|  | ||||
|             video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) | ||||
|             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: | ||||
|                 break | ||||
| @@ -464,14 +499,24 @@ class VimeoAlbumIE(VimeoChannelIE): | ||||
|             'title': 'Staff Favorites: November 2013', | ||||
|         }, | ||||
|         'playlist_mincount': 13, | ||||
|     }, { | ||||
|         'note': 'Password-protected album', | ||||
|         'url': 'https://vimeo.com/album/3253534', | ||||
|         'info_dict': { | ||||
|             'title': 'test', | ||||
|             'id': '3253534', | ||||
|         }, | ||||
|         'playlist_count': 1, | ||||
|         'params': { | ||||
|             'videopassword': 'youtube-dl', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _page_url(self, base_url, pagenum): | ||||
|         return '%s/page:%d/' % (base_url, pagenum) | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         album_id = mobj.group('id') | ||||
|         album_id = self._match_id(url) | ||||
|         return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id) | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										81
									
								
								youtube_dl/extractor/yam.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								youtube_dl/extractor/yam.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,81 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     float_or_none, | ||||
|     month_by_abbreviation, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class YamIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://mymedia.yam.com/m/(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         # An audio hosted on Yam | ||||
|         'url': 'http://mymedia.yam.com/m/2283921', | ||||
|         'md5': 'c011b8e262a52d5473d9c2e3c9963b9c', | ||||
|         'info_dict': { | ||||
|             'id': '2283921', | ||||
|             'ext': 'mp3', | ||||
|             'title': '發現 - 趙薇 京華煙雲主題曲', | ||||
|             'uploader_id': 'princekt', | ||||
|             'upload_date': '20080807', | ||||
|             'duration': 313.0, | ||||
|         } | ||||
|     }, { | ||||
|         # An external video hosted on YouTube | ||||
|         'url': 'http://mymedia.yam.com/m/3598173', | ||||
|         'md5': '0238ceec479c654e8c2f1223755bf3e9', | ||||
|         'info_dict': { | ||||
|             'id': 'pJ2Deys283c', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20150202', | ||||
|             'uploader': '新莊社大瑜伽社', | ||||
|             'description': 'md5:f5cc72f0baf259a70fb731654b0d2eff', | ||||
|             'uploader_id': '2323agoy', | ||||
|             'title': '外婆的澎湖灣KTV-潘安邦', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         page = self._download_webpage(url, video_id) | ||||
|  | ||||
|         # Is it hosted externally on YouTube? | ||||
|         youtube_url = self._html_search_regex( | ||||
|             r'<embed src="(http://www.youtube.com/[^"]+)"', | ||||
|             page, 'YouTube url', default=None) | ||||
|         if youtube_url: | ||||
|             return self.url_result(youtube_url, 'Youtube') | ||||
|  | ||||
|         api_page = self._download_webpage( | ||||
|             'http://mymedia.yam.com/api/a/?pID=' + video_id, video_id, | ||||
|             note='Downloading API page') | ||||
|         api_result_obj = compat_urlparse.parse_qs(api_page) | ||||
|  | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r'<!-- 發表作者 -->:[\n ]+<a href="/([a-z]+)"', | ||||
|             page, 'uploader id', fatal=False) | ||||
|         mobj = re.search(r'<!-- 發表於 -->(?P<mon>[A-Z][a-z]{2})  ' + | ||||
|                          r'(?P<day>\d{1,2}), (?P<year>\d{4})', page) | ||||
|         if mobj: | ||||
|             upload_date = '%s%02d%02d' % ( | ||||
|                 mobj.group('year'), | ||||
|                 month_by_abbreviation(mobj.group('mon')), | ||||
|                 int(mobj.group('day'))) | ||||
|         else: | ||||
|             upload_date = None | ||||
|         duration = float_or_none(api_result_obj['totaltime'][0], scale=1000) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': api_result_obj['mp3file'][0], | ||||
|             'title': self._html_search_meta('description', page), | ||||
|             'duration': duration, | ||||
|             'uploader_id': uploader_id, | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
| @@ -25,6 +25,7 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     get_element_by_attribute, | ||||
|     get_element_by_id, | ||||
|     int_or_none, | ||||
| @@ -1124,6 +1125,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             'view_count': view_count, | ||||
|             'like_count': like_count, | ||||
|             'dislike_count': dislike_count, | ||||
|             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]), | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
|   | ||||
| @@ -165,6 +165,11 @@ def parseOpts(overrideArguments=None): | ||||
|         action='store_const', dest='extract_flat', const='in_playlist', | ||||
|         default=False, | ||||
|         help='Do not extract the videos of a playlist, only list them.') | ||||
|     general.add_option( | ||||
|         '--no-color', '--no-colors', | ||||
|         action='store_true', dest='no_color', | ||||
|         default=False, | ||||
|         help='Do not emit color codes in output.') | ||||
|  | ||||
|     network = optparse.OptionGroup(parser, 'Network Options') | ||||
|     network.add_option( | ||||
| @@ -244,6 +249,25 @@ def parseOpts(overrideArguments=None): | ||||
|         '--max-views', | ||||
|         metavar='COUNT', dest='max_views', default=None, type=int, | ||||
|         help='Do not download any videos with more than COUNT views') | ||||
|     selection.add_option( | ||||
|         '--match-filter', | ||||
|         metavar='FILTER', dest='match_filter', default=None, | ||||
|         help=( | ||||
|             '(Experimental) Generic video filter. ' | ||||
|             'Specify any key (see help for -o for a list of available keys) to' | ||||
|             ' match if the key is present, ' | ||||
|             '!key to check if the key is not present,' | ||||
|             'key > NUMBER (like "comment_count > 12", also works with ' | ||||
|             '>=, <, <=, !=, =) to compare against a number, and ' | ||||
|             '& to require multiple matches. ' | ||||
|             'Values which are not known are excluded unless you' | ||||
|             ' put a question mark (?) after the operator.' | ||||
|             'For example, to only match videos that have been liked more than ' | ||||
|             '100 times and disliked less than 50 times (or the dislike ' | ||||
|             'functionality is not available at the given service), but who ' | ||||
|             'also have a description, use  --match-filter ' | ||||
|             '"like_count > 100 & dislike_count <? 50 & description" .' | ||||
|         )) | ||||
|     selection.add_option( | ||||
|         '--no-playlist', | ||||
|         action='store_true', dest='noplaylist', default=False, | ||||
| @@ -533,7 +557,7 @@ def parseOpts(overrideArguments=None): | ||||
|         action='store_true', dest='youtube_print_sig_code', default=False, | ||||
|         help=optparse.SUPPRESS_HELP) | ||||
|     verbosity.add_option( | ||||
|         '--print-traffic', | ||||
|         '--print-traffic', '--dump-headers', | ||||
|         dest='debug_printtraffic', action='store_true', default=False, | ||||
|         help='Display sent and read HTTP traffic') | ||||
|     verbosity.add_option( | ||||
| @@ -711,6 +735,10 @@ def parseOpts(overrideArguments=None): | ||||
|         '--prefer-ffmpeg', | ||||
|         action='store_true', dest='prefer_ffmpeg', | ||||
|         help='Prefer ffmpeg over avconv for running the postprocessors') | ||||
|     postproc.add_option( | ||||
|         '--ffmpeg-location', '--avconv-location', metavar='PATH', | ||||
|         dest='ffmpeg_location', | ||||
|         help='Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.') | ||||
|     postproc.add_option( | ||||
|         '--exec', | ||||
|         metavar='CMD', dest='exec_cmd', | ||||
|   | ||||
| @@ -30,54 +30,97 @@ class FFmpegPostProcessorError(PostProcessingError): | ||||
| class FFmpegPostProcessor(PostProcessor): | ||||
|     def __init__(self, downloader=None, deletetempfiles=False): | ||||
|         PostProcessor.__init__(self, downloader) | ||||
|         self._versions = self.get_versions() | ||||
|         self._deletetempfiles = deletetempfiles | ||||
|         self._determine_executables() | ||||
|  | ||||
|     def check_version(self): | ||||
|         if not self._executable: | ||||
|         if not self.available(): | ||||
|             raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') | ||||
|  | ||||
|         required_version = '10-0' if self._uses_avconv() else '1.0' | ||||
|         if is_outdated_version( | ||||
|                 self._versions[self._executable], required_version): | ||||
|                 self._versions[self.basename], required_version): | ||||
|             warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( | ||||
|                 self._executable, self._executable, required_version) | ||||
|                 self.basename, self.basename, required_version) | ||||
|             if self._downloader: | ||||
|                 self._downloader.report_warning(warning) | ||||
|  | ||||
|     @staticmethod | ||||
|     def get_versions(): | ||||
|     def get_versions(downloader=None): | ||||
|         return FFmpegPostProcessor(downloader)._versions | ||||
|  | ||||
|     def _determine_executables(self): | ||||
|         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] | ||||
|         return dict((p, get_exe_version(p, args=['-version'])) for p in programs) | ||||
|         prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False) | ||||
|  | ||||
|     @property | ||||
|     def available(self): | ||||
|         return self._executable is not None | ||||
|         self.basename = None | ||||
|         self.probe_basename = None | ||||
|  | ||||
|     @property | ||||
|     def _executable(self): | ||||
|         if self._downloader.params.get('prefer_ffmpeg', False): | ||||
|         self._paths = None | ||||
|         self._versions = None | ||||
|         if self._downloader: | ||||
|             location = self._downloader.params.get('ffmpeg_location') | ||||
|             if location is not None: | ||||
|                 if not os.path.exists(location): | ||||
|                     self._downloader.report_warning( | ||||
|                         'ffmpeg-location %s does not exist! ' | ||||
|                         'Continuing without avconv/ffmpeg.' % (location)) | ||||
|                     self._versions = {} | ||||
|                     return | ||||
|                 elif not os.path.isdir(location): | ||||
|                     basename = os.path.splitext(os.path.basename(location))[0] | ||||
|                     if basename not in programs: | ||||
|                         self._downloader.report_warning( | ||||
|                             'Cannot identify executable %s, its basename should be one of %s. ' | ||||
|                             'Continuing without avconv/ffmpeg.' % | ||||
|                             (location, ', '.join(programs))) | ||||
|                         self._versions = {} | ||||
|                         return None | ||||
|                     location = os.path.dirname(os.path.abspath(location)) | ||||
|                     if basename in ('ffmpeg', 'ffprobe'): | ||||
|                         prefer_ffmpeg = True | ||||
|  | ||||
|                 self._paths = dict( | ||||
|                     (p, os.path.join(location, p)) for p in programs) | ||||
|                 self._versions = dict( | ||||
|                     (p, get_exe_version(self._paths[p], args=['-version'])) | ||||
|                     for p in programs) | ||||
|         if self._versions is None: | ||||
|             self._versions = dict( | ||||
|                 (p, get_exe_version(p, args=['-version'])) for p in programs) | ||||
|             self._paths = dict((p, p) for p in programs) | ||||
|  | ||||
|         if prefer_ffmpeg: | ||||
|             prefs = ('ffmpeg', 'avconv') | ||||
|         else: | ||||
|             prefs = ('avconv', 'ffmpeg') | ||||
|         for p in prefs: | ||||
|             if self._versions[p]: | ||||
|                 return p | ||||
|         return None | ||||
|                 self.basename = p | ||||
|                 break | ||||
|  | ||||
|     @property | ||||
|     def _probe_executable(self): | ||||
|         if self._downloader.params.get('prefer_ffmpeg', False): | ||||
|         if prefer_ffmpeg: | ||||
|             prefs = ('ffprobe', 'avprobe') | ||||
|         else: | ||||
|             prefs = ('avprobe', 'ffprobe') | ||||
|         for p in prefs: | ||||
|             if self._versions[p]: | ||||
|                 return p | ||||
|         return None | ||||
|                 self.probe_basename = p | ||||
|                 break | ||||
|  | ||||
|     def available(self): | ||||
|         return self.basename is not None | ||||
|  | ||||
|     def _uses_avconv(self): | ||||
|         return self._executable == 'avconv' | ||||
|         return self.basename == 'avconv' | ||||
|  | ||||
|     @property | ||||
|     def executable(self): | ||||
|         return self._paths[self.basename] | ||||
|  | ||||
|     @property | ||||
|     def probe_executable(self): | ||||
|         return self._paths[self.probe_basename] | ||||
|  | ||||
|     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): | ||||
|         self.check_version() | ||||
| @@ -88,14 +131,14 @@ class FFmpegPostProcessor(PostProcessor): | ||||
|         files_cmd = [] | ||||
|         for path in input_paths: | ||||
|             files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)]) | ||||
|         cmd = ([encodeFilename(self._executable, True), encodeArgument('-y')] + | ||||
|         cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] + | ||||
|                files_cmd + | ||||
|                [encodeArgument(o) for o in opts] + | ||||
|                [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) | ||||
|  | ||||
|         if self._downloader.params.get('verbose', False): | ||||
|             self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd)) | ||||
|         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
|         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) | ||||
|         stdout, stderr = p.communicate() | ||||
|         if p.returncode != 0: | ||||
|             stderr = stderr.decode('utf-8', 'replace') | ||||
| @@ -127,14 +170,16 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|  | ||||
|     def get_audio_codec(self, path): | ||||
|  | ||||
|         if not self._probe_executable: | ||||
|         if not self.probe_executable: | ||||
|             raise PostProcessingError('ffprobe or avprobe not found. Please install one.') | ||||
|         try: | ||||
|             cmd = [ | ||||
|                 encodeFilename(self._probe_executable, True), | ||||
|                 encodeFilename(self.probe_executable, True), | ||||
|                 encodeArgument('-show_streams'), | ||||
|                 encodeFilename(self._ffmpeg_filename_argument(path), True)] | ||||
|             handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) | ||||
|             if self._downloader.params.get('verbose', False): | ||||
|                 self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) | ||||
|             handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) | ||||
|             output = handle.communicate()[0] | ||||
|             if handle.wait() != 0: | ||||
|                 return None | ||||
| @@ -223,14 +268,14 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): | ||||
|             if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)): | ||||
|                 self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path) | ||||
|             else: | ||||
|                 self._downloader.to_screen('[' + self._executable + '] Destination: ' + new_path) | ||||
|                 self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path) | ||||
|                 self.run_ffmpeg(path, new_path, acodec, more_opts) | ||||
|         except: | ||||
|             etype, e, tb = sys.exc_info() | ||||
|             if isinstance(e, AudioConversionError): | ||||
|                 msg = 'audio conversion failed: ' + e.msg | ||||
|             else: | ||||
|                 msg = 'error running ' + self._executable | ||||
|                 msg = 'error running ' + self.basename | ||||
|             raise PostProcessingError(msg) | ||||
|  | ||||
|         # Try to update the date time for extracted audio file. | ||||
|   | ||||
| @@ -17,6 +17,7 @@ import io | ||||
| import json | ||||
| import locale | ||||
| import math | ||||
| import operator | ||||
| import os | ||||
| import pipes | ||||
| import platform | ||||
| @@ -61,6 +62,11 @@ std_headers = { | ||||
| } | ||||
|  | ||||
|  | ||||
| ENGLISH_MONTH_NAMES = [ | ||||
|     'January', 'February', 'March', 'April', 'May', 'June', | ||||
|     'July', 'August', 'September', 'October', 'November', 'December'] | ||||
|  | ||||
|  | ||||
| def preferredencoding(): | ||||
|     """Get preferred encoding. | ||||
|  | ||||
| @@ -665,26 +671,27 @@ class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler): | ||||
|             req, **kwargs) | ||||
|  | ||||
|  | ||||
| def parse_iso8601(date_str, delimiter='T'): | ||||
| def parse_iso8601(date_str, delimiter='T', timezone=None): | ||||
|     """ Return a UNIX timestamp from the given date """ | ||||
|  | ||||
|     if date_str is None: | ||||
|         return None | ||||
|  | ||||
|     m = re.search( | ||||
|         r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', | ||||
|         date_str) | ||||
|     if not m: | ||||
|         timezone = datetime.timedelta() | ||||
|     else: | ||||
|         date_str = date_str[:-len(m.group(0))] | ||||
|         if not m.group('sign'): | ||||
|     if timezone is None: | ||||
|         m = re.search( | ||||
|             r'(\.[0-9]+)?(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', | ||||
|             date_str) | ||||
|         if not m: | ||||
|             timezone = datetime.timedelta() | ||||
|         else: | ||||
|             sign = 1 if m.group('sign') == '+' else -1 | ||||
|             timezone = datetime.timedelta( | ||||
|                 hours=sign * int(m.group('hours')), | ||||
|                 minutes=sign * int(m.group('minutes'))) | ||||
|             date_str = date_str[:-len(m.group(0))] | ||||
|             if not m.group('sign'): | ||||
|                 timezone = datetime.timedelta() | ||||
|             else: | ||||
|                 sign = 1 if m.group('sign') == '+' else -1 | ||||
|                 timezone = datetime.timedelta( | ||||
|                     hours=sign * int(m.group('hours')), | ||||
|                     minutes=sign * int(m.group('minutes'))) | ||||
|     date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) | ||||
|     dt = datetime.datetime.strptime(date_str, date_format) - timezone | ||||
|     return calendar.timegm(dt.timetuple()) | ||||
| @@ -1183,11 +1190,18 @@ def get_term_width(): | ||||
| def month_by_name(name): | ||||
|     """ Return the number of a month by (locale-independently) English name """ | ||||
|  | ||||
|     ENGLISH_NAMES = [ | ||||
|         'January', 'February', 'March', 'April', 'May', 'June', | ||||
|         'July', 'August', 'September', 'October', 'November', 'December'] | ||||
|     try: | ||||
|         return ENGLISH_NAMES.index(name) + 1 | ||||
|         return ENGLISH_MONTH_NAMES.index(name) + 1 | ||||
|     except ValueError: | ||||
|         return None | ||||
|  | ||||
|  | ||||
| def month_by_abbreviation(abbrev): | ||||
|     """ Return the number of a month by (locale-independently) English | ||||
|         abbreviations """ | ||||
|  | ||||
|     try: | ||||
|         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1 | ||||
|     except ValueError: | ||||
|         return None | ||||
|  | ||||
| @@ -1678,3 +1692,79 @@ def render_table(header_row, data): | ||||
|     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)] | ||||
|     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s' | ||||
|     return '\n'.join(format_str % tuple(row) for row in table) | ||||
|  | ||||
|  | ||||
| def _match_one(filter_part, dct): | ||||
|     COMPARISON_OPERATORS = { | ||||
|         '<': operator.lt, | ||||
|         '<=': operator.le, | ||||
|         '>': operator.gt, | ||||
|         '>=': operator.ge, | ||||
|         '=': operator.eq, | ||||
|         '!=': operator.ne, | ||||
|     } | ||||
|     operator_rex = re.compile(r'''(?x)\s* | ||||
|         (?P<key>[a-z_]+) | ||||
|         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* | ||||
|         (?: | ||||
|             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| | ||||
|             (?P<strval>(?![0-9.])[a-z0-9A-Z]*) | ||||
|         ) | ||||
|         \s*$ | ||||
|         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) | ||||
|     m = operator_rex.search(filter_part) | ||||
|     if m: | ||||
|         op = COMPARISON_OPERATORS[m.group('op')] | ||||
|         if m.group('strval') is not None: | ||||
|             if m.group('op') not in ('=', '!='): | ||||
|                 raise ValueError( | ||||
|                     'Operator %s does not support string values!' % m.group('op')) | ||||
|             comparison_value = m.group('strval') | ||||
|         else: | ||||
|             try: | ||||
|                 comparison_value = int(m.group('intval')) | ||||
|             except ValueError: | ||||
|                 comparison_value = parse_filesize(m.group('intval')) | ||||
|                 if comparison_value is None: | ||||
|                     comparison_value = parse_filesize(m.group('intval') + 'B') | ||||
|                 if comparison_value is None: | ||||
|                     raise ValueError( | ||||
|                         'Invalid integer value %r in filter part %r' % ( | ||||
|                             m.group('intval'), filter_part)) | ||||
|         actual_value = dct.get(m.group('key')) | ||||
|         if actual_value is None: | ||||
|             return m.group('none_inclusive') | ||||
|         return op(actual_value, comparison_value) | ||||
|  | ||||
|     UNARY_OPERATORS = { | ||||
|         '': lambda v: v is not None, | ||||
|         '!': lambda v: v is None, | ||||
|     } | ||||
|     operator_rex = re.compile(r'''(?x)\s* | ||||
|         (?P<op>%s)\s*(?P<key>[a-z_]+) | ||||
|         \s*$ | ||||
|         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) | ||||
|     m = operator_rex.search(filter_part) | ||||
|     if m: | ||||
|         op = UNARY_OPERATORS[m.group('op')] | ||||
|         actual_value = dct.get(m.group('key')) | ||||
|         return op(actual_value) | ||||
|  | ||||
|     raise ValueError('Invalid filter part %r' % filter_part) | ||||
|  | ||||
|  | ||||
| def match_str(filter_str, dct): | ||||
|     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ | ||||
|  | ||||
|     return all( | ||||
|         _match_one(filter_part, dct) for filter_part in filter_str.split('&')) | ||||
|  | ||||
|  | ||||
| def match_filter_func(filter_str): | ||||
|     def _match_func(info_dict): | ||||
|         if match_str(filter_str, info_dict): | ||||
|             return None | ||||
|         else: | ||||
|             video_title = info_dict.get('title', info_dict.get('id', 'video')) | ||||
|             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) | ||||
|     return _match_func | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __version__ = '2015.02.10.1' | ||||
| __version__ = '2015.02.16' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user