mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			11 Commits
		
	
	
		
			2015.02.09
			...
			2015.02.10
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 845734773d | ||
|  | 347de4931c | ||
|  | 8829650513 | ||
|  | c73fae1e2e | ||
|  | 834bf069d2 | ||
|  | c06a9fa34f | ||
|  | 753fad4adc | ||
|  | 34814eb66e | ||
|  | 3a5bcd0326 | ||
|  | 99c2398bc6 | ||
|  | 28f1272870 | 
							
								
								
									
										17
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								README.md
									
									
									
									
									
								
							| @@ -119,6 +119,23 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      COUNT views | ||||
|     --max-views COUNT                Do not download any videos with more than | ||||
|                                      COUNT views | ||||
|     --match-filter FILTER            (Experimental) Generic video filter. | ||||
|                                      Specify any key (see help for -o for a list | ||||
|                                      of available keys) to match if the key is | ||||
|                                      present, !key to check if the key is not | ||||
|                                      present,key > NUMBER (like "comment_count > | ||||
|                                      12", also works with >=, <, <=, !=, =) to | ||||
|                                      compare against a number, and & to require | ||||
|                                      multiple matches. Values which are not | ||||
|                                      known are excluded unless you put a | ||||
|                                      question mark (?) after the operator.For | ||||
|                                      example, to only match videos that have | ||||
|                                      been liked more than 100 times and disliked | ||||
|                                      less than 50 times (or the dislike | ||||
|                                      functionality is not available at the given | ||||
|                                      service), but who also have a description, | ||||
|                                      use  --match-filter "like_count > 100 & | ||||
|                                      dislike_count <? 50 & description" . | ||||
|     --no-playlist                    If the URL refers to a video and a | ||||
|                                      playlist, download only the video. | ||||
|     --age-limit YEARS                download only videos suitable for the given | ||||
|   | ||||
| @@ -53,6 +53,7 @@ from youtube_dl.utils import ( | ||||
|     version_tuple, | ||||
|     xpath_with_ns, | ||||
|     render_table, | ||||
|     match_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -459,6 +460,37 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') | ||||
|             '123  4\n' | ||||
|             '9999 51') | ||||
|  | ||||
|     def test_match_str(self): | ||||
|         self.assertRaises(ValueError, match_str, 'xy>foobar', {}) | ||||
|         self.assertFalse(match_str('xy', {'x': 1200})) | ||||
|         self.assertTrue(match_str('!xy', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x', {'x': 1200})) | ||||
|         self.assertFalse(match_str('!x', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x', {'x': 0})) | ||||
|         self.assertFalse(match_str('x>0', {'x': 0})) | ||||
|         self.assertFalse(match_str('x>0', {})) | ||||
|         self.assertTrue(match_str('x>?0', {})) | ||||
|         self.assertTrue(match_str('x>1K', {'x': 1200})) | ||||
|         self.assertFalse(match_str('x>2K', {'x': 1200})) | ||||
|         self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) | ||||
|         self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) | ||||
|         self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) | ||||
|         self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) | ||||
|         self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) | ||||
|         self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 90, 'description': 'foo'})) | ||||
|         self.assertTrue(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'description': 'foo'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'dislike_count': 60, 'description': 'foo'})) | ||||
|         self.assertFalse(match_str( | ||||
|             'like_count > 100 & dislike_count <? 50 & description', | ||||
|             {'like_count': 190, 'dislike_count': 10})) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -228,6 +228,11 @@ class YoutubeDL(object): | ||||
|     external_downloader:  Executable of the external downloader to call. | ||||
|     listformats:       Print an overview of available video formats and exit. | ||||
|     list_thumbnails:   Print a table of all thumbnails and exit. | ||||
|     match_filter:      A function that gets called with the info_dict of | ||||
|                        every video. | ||||
|                        If it returns a message, the video is ignored. | ||||
|                        If it returns None, the video is downloaded. | ||||
|                        match_filter_func in utils.py is one example for this. | ||||
|  | ||||
|  | ||||
|     The following parameters are not used by YoutubeDL itself, they are used by | ||||
| @@ -583,9 +588,16 @@ class YoutubeDL(object): | ||||
|             if max_views is not None and view_count > max_views: | ||||
|                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) | ||||
|         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): | ||||
|             return 'Skipping "%s" because it is age restricted' % title | ||||
|             return 'Skipping "%s" because it is age restricted' % video_title | ||||
|         if self.in_download_archive(info_dict): | ||||
|             return '%s has already been recorded in archive' % video_title | ||||
|  | ||||
|         match_filter = self.params.get('match_filter') | ||||
|         if match_filter is not None: | ||||
|             ret = match_filter(info_dict) | ||||
|             if ret is not None: | ||||
|                 return ret | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     @staticmethod | ||||
|   | ||||
| @@ -23,9 +23,10 @@ from .compat import ( | ||||
| ) | ||||
| from .utils import ( | ||||
|     DateRange, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     decodeOption, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     DownloadError, | ||||
|     match_filter_func, | ||||
|     MaxDownloadsReached, | ||||
|     preferredencoding, | ||||
|     read_batch_urls, | ||||
| @@ -247,6 +248,9 @@ def _real_main(argv=None): | ||||
|             xattr  # Confuse flake8 | ||||
|         except ImportError: | ||||
|             parser.error('setting filesize xattr requested but python-xattr is not available') | ||||
|     match_filter = ( | ||||
|         None if opts.match_filter is None | ||||
|         else match_filter_func(opts.match_filter)) | ||||
|  | ||||
|     ydl_opts = { | ||||
|         'usenetrc': opts.usenetrc, | ||||
| @@ -344,6 +348,7 @@ def _real_main(argv=None): | ||||
|         'list_thumbnails': opts.list_thumbnails, | ||||
|         'playlist_items': opts.playlist_items, | ||||
|         'xattr_set_filesize': opts.xattr_set_filesize, | ||||
|         'match_filter': match_filter, | ||||
|     } | ||||
|  | ||||
|     with YoutubeDL(ydl_opts) as ydl: | ||||
|   | ||||
| @@ -74,7 +74,7 @@ from .collegehumor import CollegeHumorIE | ||||
| from .collegerama import CollegeRamaIE | ||||
| from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE | ||||
| from .comcarcoff import ComCarCoffIE | ||||
| from .commonmistakes import CommonMistakesIE | ||||
| from .commonmistakes import CommonMistakesIE, UnicodeBOMIE | ||||
| from .condenast import CondeNastIE | ||||
| from .cracked import CrackedIE | ||||
| from .criterion import CriterionIE | ||||
|   | ||||
| @@ -72,26 +72,29 @@ class BandcampIE(InfoExtractor): | ||||
|  | ||||
|         download_link = m_download.group(1) | ||||
|         video_id = self._search_regex( | ||||
|             r'var TralbumData = {.*?id: (?P<id>\d+),?$', | ||||
|             webpage, 'video id', flags=re.MULTILINE | re.DOTALL) | ||||
|             r'(?ms)var TralbumData = {.*?id: (?P<id>\d+),?$', | ||||
|             webpage, 'video id') | ||||
|  | ||||
|         download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') | ||||
|         # We get the dictionary of the track from some javascript code | ||||
|         info = re.search(r'items: (.*?),$', download_webpage, re.MULTILINE).group(1) | ||||
|         info = json.loads(info)[0] | ||||
|         all_info = self._parse_json(self._search_regex( | ||||
|             r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id) | ||||
|         info = all_info[0] | ||||
|         # We pick mp3-320 for now, until format selection can be easily implemented. | ||||
|         mp3_info = info['downloads']['mp3-320'] | ||||
|         # If we try to use this url it says the link has expired | ||||
|         initial_url = mp3_info['url'] | ||||
|         re_url = r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$' | ||||
|         m_url = re.match(re_url, initial_url) | ||||
|         m_url = re.match( | ||||
|             r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$', | ||||
|             initial_url) | ||||
|         # We build the url we will use to get the final track url | ||||
|         # This url is build in Bandcamp in the script download_bunde_*.js | ||||
|         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) | ||||
|         final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') | ||||
|         # If we could correctly generate the .rand field the url would be | ||||
|         # in the "download_url" key | ||||
|         final_url = re.search(r'"retry_url":"(.*?)"', final_url_webpage).group(1) | ||||
|         final_url = self._search_regex( | ||||
|             r'"retry_url":"(.*?)"', final_url_webpage, 'final video URL') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -264,8 +264,15 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def extract(self, url): | ||||
|         """Extracts URL information and returns it in list of dicts.""" | ||||
|         self.initialize() | ||||
|         return self._real_extract(url) | ||||
|         try: | ||||
|             self.initialize() | ||||
|             return self._real_extract(url) | ||||
|         except ExtractorError: | ||||
|             raise | ||||
|         except compat_http_client.IncompleteRead as e: | ||||
|             raise ExtractorError('A network error has occured.', cause=e, expected=True) | ||||
|         except (KeyError,) as e: | ||||
|             raise ExtractorError('An extractor error has occured.', cause=e) | ||||
|  | ||||
|     def set_downloader(self, downloader): | ||||
|         """Sets the downloader for this IE.""" | ||||
|   | ||||
| @@ -24,6 +24,23 @@ class CommonMistakesIE(InfoExtractor): | ||||
|             'That doesn\'t make any sense. ' | ||||
|             'Simply remove the parameter in your command or configuration.' | ||||
|         ) % url | ||||
|         if self._downloader.params.get('verbose'): | ||||
|         if not self._downloader.params.get('verbose'): | ||||
|             msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.' | ||||
|         raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|  | ||||
| class UnicodeBOMIE(InfoExtractor): | ||||
|         IE_DESC = False | ||||
|         _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$' | ||||
|  | ||||
|         _TESTS = [{ | ||||
|             'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc', | ||||
|             'only_matching': True, | ||||
|         }] | ||||
|  | ||||
|         def _real_extract(self, url): | ||||
|             real_url = self._match_id(url) | ||||
|             self.report_warning( | ||||
|                 'Your URL starts with a Byte Order Mark (BOM). ' | ||||
|                 'Removing the BOM and looking for "%s" ...' % real_url) | ||||
|             return self.url_result(real_url) | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -10,13 +11,13 @@ class SVTPlayIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?svtplay\.se/video/(?P<id>[0-9]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.svtplay.se/video/2609989/sm-veckan/sm-veckan-rally-final-sasong-1-sm-veckan-rally-final', | ||||
|         'md5': '2521cd644e862936cf2e698206e47385', | ||||
|         'md5': 'f4a184968bc9c802a9b41316657aaa80', | ||||
|         'info_dict': { | ||||
|             'id': '3966754', | ||||
|             'id': '2609989', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'FIFA 14 - E3 2013 Trailer', | ||||
|             'title': 'SM veckan vinter, Örebro - Rally, final', | ||||
|             'duration': 4500, | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'thumbnail': 're:^https?://.*[\.-]jpg$', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -244,6 +244,25 @@ def parseOpts(overrideArguments=None): | ||||
|         '--max-views', | ||||
|         metavar='COUNT', dest='max_views', default=None, type=int, | ||||
|         help='Do not download any videos with more than COUNT views') | ||||
|     selection.add_option( | ||||
|         '--match-filter', | ||||
|         metavar='FILTER', dest='match_filter', default=None, | ||||
|         help=( | ||||
|             '(Experimental) Generic video filter. ' | ||||
|             'Specify any key (see help for -o for a list of available keys) to' | ||||
|             ' match if the key is present, ' | ||||
|             '!key to check if the key is not present,' | ||||
|             'key > NUMBER (like "comment_count > 12", also works with ' | ||||
|             '>=, <, <=, !=, =) to compare against a number, and ' | ||||
|             '& to require multiple matches. ' | ||||
|             'Values which are not known are excluded unless you' | ||||
|             ' put a question mark (?) after the operator.' | ||||
|             'For example, to only match videos that have been liked more than ' | ||||
|             '100 times and disliked less than 50 times (or the dislike ' | ||||
|             'functionality is not available at the given service), but who ' | ||||
|             'also have a description, use  --match-filter ' | ||||
|             '"like_count > 100 & dislike_count <? 50 & description" .' | ||||
|         )) | ||||
|     selection.add_option( | ||||
|         '--no-playlist', | ||||
|         action='store_true', dest='noplaylist', default=False, | ||||
| @@ -734,22 +753,22 @@ def parseOpts(overrideArguments=None): | ||||
|         if opts.verbose: | ||||
|             write_string('[debug] Override config: ' + repr(overrideArguments) + '\n') | ||||
|     else: | ||||
|         commandLineConf = sys.argv[1:] | ||||
|         if '--ignore-config' in commandLineConf: | ||||
|             systemConf = [] | ||||
|             userConf = [] | ||||
|         command_line_conf = sys.argv[1:] | ||||
|         if '--ignore-config' in command_line_conf: | ||||
|             system_conf = [] | ||||
|             user_conf = [] | ||||
|         else: | ||||
|             systemConf = _readOptions('/etc/youtube-dl.conf') | ||||
|             if '--ignore-config' in systemConf: | ||||
|                 userConf = [] | ||||
|             system_conf = _readOptions('/etc/youtube-dl.conf') | ||||
|             if '--ignore-config' in system_conf: | ||||
|                 user_conf = [] | ||||
|             else: | ||||
|                 userConf = _readUserConf() | ||||
|         argv = systemConf + userConf + commandLineConf | ||||
|                 user_conf = _readUserConf() | ||||
|         argv = system_conf + user_conf + command_line_conf | ||||
|  | ||||
|         opts, args = parser.parse_args(argv) | ||||
|         if opts.verbose: | ||||
|             write_string('[debug] System config: ' + repr(_hide_login_info(systemConf)) + '\n') | ||||
|             write_string('[debug] User config: ' + repr(_hide_login_info(userConf)) + '\n') | ||||
|             write_string('[debug] Command-line args: ' + repr(_hide_login_info(commandLineConf)) + '\n') | ||||
|             write_string('[debug] System config: ' + repr(_hide_login_info(system_conf)) + '\n') | ||||
|             write_string('[debug] User config: ' + repr(_hide_login_info(user_conf)) + '\n') | ||||
|             write_string('[debug] Command-line args: ' + repr(_hide_login_info(command_line_conf)) + '\n') | ||||
|  | ||||
|     return parser, opts, args | ||||
|   | ||||
| @@ -17,6 +17,7 @@ import io | ||||
| import json | ||||
| import locale | ||||
| import math | ||||
| import operator | ||||
| import os | ||||
| import pipes | ||||
| import platform | ||||
| @@ -1678,3 +1679,79 @@ def render_table(header_row, data): | ||||
|     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)] | ||||
|     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s' | ||||
|     return '\n'.join(format_str % tuple(row) for row in table) | ||||
|  | ||||
|  | ||||
| def _match_one(filter_part, dct): | ||||
|     COMPARISON_OPERATORS = { | ||||
|         '<': operator.lt, | ||||
|         '<=': operator.le, | ||||
|         '>': operator.gt, | ||||
|         '>=': operator.ge, | ||||
|         '=': operator.eq, | ||||
|         '!=': operator.ne, | ||||
|     } | ||||
|     operator_rex = re.compile(r'''(?x)\s* | ||||
|         (?P<key>[a-z_]+) | ||||
|         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s* | ||||
|         (?: | ||||
|             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)| | ||||
|             (?P<strval>(?![0-9.])[a-z0-9A-Z]*) | ||||
|         ) | ||||
|         \s*$ | ||||
|         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))) | ||||
|     m = operator_rex.search(filter_part) | ||||
|     if m: | ||||
|         op = COMPARISON_OPERATORS[m.group('op')] | ||||
|         if m.group('strval') is not None: | ||||
|             if m.group('op') not in ('=', '!='): | ||||
|                 raise ValueError( | ||||
|                     'Operator %s does not support string values!' % m.group('op')) | ||||
|             comparison_value = m.group('strval') | ||||
|         else: | ||||
|             try: | ||||
|                 comparison_value = int(m.group('intval')) | ||||
|             except ValueError: | ||||
|                 comparison_value = parse_filesize(m.group('intval')) | ||||
|                 if comparison_value is None: | ||||
|                     comparison_value = parse_filesize(m.group('intval') + 'B') | ||||
|                 if comparison_value is None: | ||||
|                     raise ValueError( | ||||
|                         'Invalid integer value %r in filter part %r' % ( | ||||
|                             m.group('intval'), filter_part)) | ||||
|         actual_value = dct.get(m.group('key')) | ||||
|         if actual_value is None: | ||||
|             return m.group('none_inclusive') | ||||
|         return op(actual_value, comparison_value) | ||||
|  | ||||
|     UNARY_OPERATORS = { | ||||
|         '': lambda v: v is not None, | ||||
|         '!': lambda v: v is None, | ||||
|     } | ||||
|     operator_rex = re.compile(r'''(?x)\s* | ||||
|         (?P<op>%s)\s*(?P<key>[a-z_]+) | ||||
|         \s*$ | ||||
|         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys()))) | ||||
|     m = operator_rex.search(filter_part) | ||||
|     if m: | ||||
|         op = UNARY_OPERATORS[m.group('op')] | ||||
|         actual_value = dct.get(m.group('key')) | ||||
|         return op(actual_value) | ||||
|  | ||||
|     raise ValueError('Invalid filter part %r' % filter_part) | ||||
|  | ||||
|  | ||||
| def match_str(filter_str, dct): | ||||
|     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ | ||||
|  | ||||
|     return all( | ||||
|         _match_one(filter_part, dct) for filter_part in filter_str.split('&')) | ||||
|  | ||||
|  | ||||
| def match_filter_func(filter_str): | ||||
|     def _match_func(info_dict): | ||||
|         if match_str(filter_str, info_dict): | ||||
|             return None | ||||
|         else: | ||||
|             video_title = info_dict.get('title', info_dict.get('id', 'video')) | ||||
|             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str) | ||||
|     return _match_func | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __version__ = '2015.02.09.3' | ||||
| __version__ = '2015.02.10.2' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user