mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			54 Commits
		
	
	
		
			2014.11.02
			...
			2014.11.12
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 7a8cbc72b2 | ||
|  | 2774852c2f | ||
|  | bbcc21efd1 | ||
|  | 60526d6bcb | ||
|  | 1d4df56d09 | ||
|  | a1cf99d03a | ||
|  | 3c6af203cc | ||
|  | 1a92e086a7 | ||
|  | 519c73f267 | ||
|  | a6dae6c09c | ||
|  | f866e474f3 | ||
|  | 8bb9b97c97 | ||
|  | d6fdc38682 | ||
|  | c2b61af548 | ||
|  | 2fdbf27ad8 | ||
|  | 29ed169cd6 | ||
|  | 9908e03528 | ||
|  | 1fe8fb8c20 | ||
|  | 5d63b0aa93 | ||
|  | 4164f0117e | ||
|  | 37aab27808 | ||
|  | 6110bbbfdd | ||
|  | cde9b380e6 | ||
|  | dab647a7b6 | ||
|  | a316a83d2b | ||
|  | 81b22aee8b | ||
|  | a80c96eab0 | ||
|  | 20436c30c9 | ||
|  | 3828505646 | ||
|  | 11fba1751d | ||
|  | 12ea2f30cf | ||
|  | 9c3e870393 | ||
|  | 44789f2457 | ||
|  | 711ede6e1b | ||
|  | a32f253112 | ||
|  | 94bd361318 | ||
|  | acd40f64ed | ||
|  | 766306450d | ||
|  | e7642ab572 | ||
|  | bdf9701729 | ||
|  | b5af6fcdad | ||
|  | 278143df5b | ||
|  | fdca55fe34 | ||
|  | 4f195f55f0 | ||
|  | ac35c26686 | ||
|  | 42f7d2f588 | ||
|  | 39f0a2a6b7 | ||
|  | ecc0c5ee01 | ||
|  | 451948b28c | ||
|  | baa708036c | ||
|  | 8c25f81bee | ||
|  | 4c83c96795 | ||
|  | 9580711841 | ||
|  | ccdd0ffb80 | 
							
								
								
									
										29
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								README.md
									
									
									
									
									
								
							| @@ -131,17 +131,19 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      %(upload_date)s for the upload date | ||||
|                                      (YYYYMMDD), %(extractor)s for the provider | ||||
|                                      (youtube, metacafe, etc), %(id)s for the | ||||
|                                      video id, %(playlist)s for the playlist the | ||||
|                                      video id, %(playlist_title)s, | ||||
|                                      %(playlist_id)s, or %(playlist)s (=title if | ||||
|                                      present, ID otherwise) for the playlist the | ||||
|                                      video is in, %(playlist_index)s for the | ||||
|                                      position in the playlist and %% for a | ||||
|                                      literal percent. %(height)s and %(width)s | ||||
|                                      for the width and height of the video | ||||
|                                      format. %(resolution)s for a textual | ||||
|                                      position in the playlist. %(height)s and | ||||
|                                      %(width)s for the width and height of the | ||||
|                                      video format. %(resolution)s for a textual | ||||
|                                      description of the resolution of the video | ||||
|                                      format. Use - to output to stdout. Can also | ||||
|                                      be used to download to a different | ||||
|                                      directory, for example with -o '/my/downloa | ||||
|                                      ds/%(uploader)s/%(title)s-%(id)s.%(ext)s' . | ||||
|                                      format. %% for a literal percent. Use - to | ||||
|                                      output to stdout. Can also be used to | ||||
|                                      download to a different directory, for | ||||
|                                      example with -o '/my/downloads/%(uploader)s | ||||
|                                      /%(title)s-%(id)s.%(ext)s' . | ||||
|     --autonumber-size NUMBER         Specifies the number of digits in | ||||
|                                      %(autonumber)s when it is present in output | ||||
|                                      filename template or --auto-number option | ||||
| @@ -239,8 +241,13 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      "worst", "worstvideo" and "worstaudio". By | ||||
|                                      default, youtube-dl will pick the best | ||||
|                                      quality. Use commas to download multiple | ||||
|                                      audio formats, such as  -f | ||||
|                                      136/137/mp4/bestvideo,140/m4a/bestaudio | ||||
|                                      audio formats, such as -f | ||||
|                                      136/137/mp4/bestvideo,140/m4a/bestaudio. | ||||
|                                      You can merge the video and audio of two | ||||
|                                      formats into a single file using -f <video- | ||||
|                                      format>+<audio-format> (requires ffmpeg or | ||||
|                                      avconv), for example -f | ||||
|                                      bestvideo+bestaudio. | ||||
|     --all-formats                    download all available video formats | ||||
|     --prefer-free-formats            prefer free video formats unless a specific | ||||
|                                      one is requested | ||||
|   | ||||
| @@ -57,7 +57,7 @@ class FakeYDL(YoutubeDL): | ||||
|         # Different instances of the downloader can't share the same dictionary | ||||
|         # some test set the "sublang" parameter, which would break the md5 checks. | ||||
|         params = get_params(override=override) | ||||
|         super(FakeYDL, self).__init__(params) | ||||
|         super(FakeYDL, self).__init__(params, auto_init=False) | ||||
|         self.result = [] | ||||
|          | ||||
|     def to_screen(self, s, skip_eol=None): | ||||
|   | ||||
							
								
								
									
										44
									
								
								test/test_compat.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								test/test_compat.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | ||||
| #!/usr/bin/env python | ||||
| # coding: utf-8 | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| import sys | ||||
| import unittest | ||||
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
|  | ||||
| from youtube_dl.utils import get_filesystem_encoding | ||||
| from youtube_dl.compat import ( | ||||
|     compat_getenv, | ||||
|     compat_expanduser, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestCompat(unittest.TestCase): | ||||
|     def test_compat_getenv(self): | ||||
|         test_str = 'тест' | ||||
|         os.environ['YOUTUBE-DL-TEST'] = ( | ||||
|             test_str if sys.version_info >= (3, 0) | ||||
|             else test_str.encode(get_filesystem_encoding())) | ||||
|         self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) | ||||
|  | ||||
|     def test_compat_expanduser(self): | ||||
|         test_str = 'C:\Documents and Settings\тест\Application Data' | ||||
|         os.environ['HOME'] = ( | ||||
|             test_str if sys.version_info >= (3, 0) | ||||
|             else test_str.encode(get_filesystem_encoding())) | ||||
|         self.assertEqual(compat_expanduser('~'), test_str) | ||||
|  | ||||
|     def test_all_present(self): | ||||
|         import youtube_dl.compat | ||||
|         all_names = youtube_dl.compat.__all__ | ||||
|         present_names = set(filter( | ||||
|             lambda c: '_' in c and not c.startswith('_'), | ||||
|             dir(youtube_dl.compat))) - set(['unicode_literals']) | ||||
|         self.assertEqual(all_names, sorted(present_names)) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -23,10 +23,12 @@ import json | ||||
| import socket | ||||
|  | ||||
| import youtube_dl.YoutubeDL | ||||
| from youtube_dl.utils import ( | ||||
| from youtube_dl.compat import ( | ||||
|     compat_http_client, | ||||
|     compat_urllib_error, | ||||
|     compat_HTTPError, | ||||
| ) | ||||
| from youtube_dl.utils import ( | ||||
|     DownloadError, | ||||
|     ExtractorError, | ||||
|     format_bytes, | ||||
| @@ -94,7 +96,7 @@ def generator(test_case): | ||||
|             params.setdefault('extract_flat', True) | ||||
|             params.setdefault('skip_download', True) | ||||
|  | ||||
|         ydl = YoutubeDL(params) | ||||
|         ydl = YoutubeDL(params, auto_init=False) | ||||
|         ydl.add_default_info_extractors() | ||||
|         finished_hook_called = set() | ||||
|         def _hook(status): | ||||
|   | ||||
| @@ -37,7 +37,9 @@ def _make_testfunc(testfile): | ||||
|                 or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): | ||||
|             # Recompile | ||||
|             try: | ||||
|                 subprocess.check_call(['mxmlc', '-output', swf_file, as_file]) | ||||
|                 subprocess.check_call([ | ||||
|                     'mxmlc', '-output', swf_file, | ||||
|                     '-static-link-runtime-shared-libraries', as_file]) | ||||
|             except OSError as ose: | ||||
|                 if ose.errno == errno.ENOENT: | ||||
|                     print('mxmlc not found! Skipping test.') | ||||
|   | ||||
| @@ -20,7 +20,6 @@ from youtube_dl.utils import ( | ||||
|     encodeFilename, | ||||
|     find_xpath_attr, | ||||
|     fix_xml_ampersands, | ||||
|     get_meta_content, | ||||
|     orderedSet, | ||||
|     OnDemandPagedList, | ||||
|     InAdvancePagedList, | ||||
| @@ -46,8 +45,6 @@ from youtube_dl.utils import ( | ||||
|     escape_url, | ||||
|     js_to_json, | ||||
|     get_filesystem_encoding, | ||||
|     compat_getenv, | ||||
|     compat_expanduser, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -157,17 +154,6 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) | ||||
|         self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) | ||||
|  | ||||
|     def test_meta_parser(self): | ||||
|         testhtml = ''' | ||||
|         <head> | ||||
|             <meta name="description" content="foo & bar"> | ||||
|             <meta content='Plato' name='author'/> | ||||
|         </head> | ||||
|         ''' | ||||
|         get_meta = lambda name: get_meta_content(name, testhtml) | ||||
|         self.assertEqual(get_meta('description'), 'foo & bar') | ||||
|         self.assertEqual(get_meta('author'), 'Plato') | ||||
|  | ||||
|     def test_xpath_with_ns(self): | ||||
|         testxml = '''<root xmlns:media="http://example.com/"> | ||||
|             <media:song> | ||||
| @@ -359,17 +345,5 @@ class TestUtil(unittest.TestCase): | ||||
|         on = js_to_json('{"abc": true}') | ||||
|         self.assertEqual(json.loads(on), {'abc': True}) | ||||
|  | ||||
|     def test_compat_getenv(self): | ||||
|         test_str = 'тест' | ||||
|         os.environ['YOUTUBE-DL-TEST'] = (test_str if sys.version_info >= (3, 0) | ||||
|             else test_str.encode(get_filesystem_encoding())) | ||||
|         self.assertEqual(compat_getenv('YOUTUBE-DL-TEST'), test_str) | ||||
|  | ||||
|     def test_compat_expanduser(self): | ||||
|         test_str = 'C:\Documents and Settings\тест\Application Data' | ||||
|         os.environ['HOME'] = (test_str if sys.version_info >= (3, 0) | ||||
|             else test_str.encode(get_filesystem_encoding())) | ||||
|         self.assertEqual(compat_expanduser('~'), test_str) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -22,13 +22,15 @@ import traceback | ||||
| if os.name == 'nt': | ||||
|     import ctypes | ||||
|  | ||||
| from .utils import ( | ||||
| from .compat import ( | ||||
|     compat_cookiejar, | ||||
|     compat_expanduser, | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from .utils import ( | ||||
|     escape_url, | ||||
|     ContentTooShortError, | ||||
|     date_from_str, | ||||
| @@ -62,6 +64,7 @@ from .utils import ( | ||||
| from .cache import Cache | ||||
| from .extractor import get_info_extractor, gen_extractors | ||||
| from .downloader import get_suitable_downloader | ||||
| from .downloader.rtmp import rtmpdump_version | ||||
| from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor | ||||
| from .version import __version__ | ||||
|  | ||||
| @@ -655,6 +658,8 @@ class YoutubeDL(object): | ||||
|                 extra = { | ||||
|                     'n_entries': n_entries, | ||||
|                     'playlist': playlist, | ||||
|                     'playlist_id': ie_result.get('id'), | ||||
|                     'playlist_title': ie_result.get('title'), | ||||
|                     'playlist_index': i + playliststart, | ||||
|                     'extractor': ie_result['extractor'], | ||||
|                     'webpage_url': ie_result['webpage_url'], | ||||
| @@ -832,6 +837,13 @@ class YoutubeDL(object): | ||||
|                         format_1, format_2 = rf.split('+') | ||||
|                         formats_info = (self.select_format(format_1, formats), | ||||
|                             self.select_format(format_2, formats)) | ||||
|                         # The first format must contain the video and the | ||||
|                         # second the audio | ||||
|                         if formats_info[0].get('vcodec') == 'none': | ||||
|                             self.report_error('The first format must contain ' | ||||
|                                 'the video, try using "-f %s+%s"' % | ||||
|                                 (format_2, format_1)) | ||||
|                             return | ||||
|                         if all(formats_info): | ||||
|                             selected_format = { | ||||
|                                 'requested_formats': formats_info, | ||||
| @@ -1321,6 +1333,7 @@ class YoutubeDL(object): | ||||
|             platform.python_version(), platform_name())) | ||||
|  | ||||
|         exe_versions = FFmpegPostProcessor.get_versions() | ||||
|         exe_versions['rtmpdump'] = rtmpdump_version() | ||||
|         exe_str = ', '.join( | ||||
|             '%s %s' % (exe, v) | ||||
|             for exe, v in sorted(exe_versions.items()) | ||||
|   | ||||
| @@ -13,10 +13,12 @@ import sys | ||||
| from .options import ( | ||||
|     parseOpts, | ||||
| ) | ||||
| from .utils import ( | ||||
| from .compat import ( | ||||
|     compat_expanduser, | ||||
|     compat_getpass, | ||||
|     compat_print, | ||||
| ) | ||||
| from .utils import ( | ||||
|     DateRange, | ||||
|     DEFAULT_OUTTMPL, | ||||
|     decodeOption, | ||||
|   | ||||
| @@ -8,10 +8,8 @@ import re | ||||
| import shutil | ||||
| import traceback | ||||
|  | ||||
| from .utils import ( | ||||
|     compat_expanduser, | ||||
|     write_json_file, | ||||
| ) | ||||
| from .compat import compat_expanduser | ||||
| from .utils import write_json_file | ||||
|  | ||||
|  | ||||
| class Cache(object): | ||||
|   | ||||
							
								
								
									
										317
									
								
								youtube_dl/compat.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										317
									
								
								youtube_dl/compat.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,317 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import getpass | ||||
| import os | ||||
| import subprocess | ||||
| import sys | ||||
|  | ||||
|  | ||||
| try: | ||||
|     import urllib.request as compat_urllib_request | ||||
| except ImportError: # Python 2 | ||||
|     import urllib2 as compat_urllib_request | ||||
|  | ||||
| try: | ||||
|     import urllib.error as compat_urllib_error | ||||
| except ImportError: # Python 2 | ||||
|     import urllib2 as compat_urllib_error | ||||
|  | ||||
| try: | ||||
|     import urllib.parse as compat_urllib_parse | ||||
| except ImportError: # Python 2 | ||||
|     import urllib as compat_urllib_parse | ||||
|  | ||||
| try: | ||||
|     from urllib.parse import urlparse as compat_urllib_parse_urlparse | ||||
| except ImportError: # Python 2 | ||||
|     from urlparse import urlparse as compat_urllib_parse_urlparse | ||||
|  | ||||
| try: | ||||
|     import urllib.parse as compat_urlparse | ||||
| except ImportError: # Python 2 | ||||
|     import urlparse as compat_urlparse | ||||
|  | ||||
| try: | ||||
|     import http.cookiejar as compat_cookiejar | ||||
| except ImportError: # Python 2 | ||||
|     import cookielib as compat_cookiejar | ||||
|  | ||||
| try: | ||||
|     import html.entities as compat_html_entities | ||||
| except ImportError: # Python 2 | ||||
|     import htmlentitydefs as compat_html_entities | ||||
|  | ||||
| try: | ||||
|     import html.parser as compat_html_parser | ||||
| except ImportError: # Python 2 | ||||
|     import HTMLParser as compat_html_parser | ||||
|  | ||||
| try: | ||||
|     import http.client as compat_http_client | ||||
| except ImportError: # Python 2 | ||||
|     import httplib as compat_http_client | ||||
|  | ||||
| try: | ||||
|     from urllib.error import HTTPError as compat_HTTPError | ||||
| except ImportError:  # Python 2 | ||||
|     from urllib2 import HTTPError as compat_HTTPError | ||||
|  | ||||
| try: | ||||
|     from urllib.request import urlretrieve as compat_urlretrieve | ||||
| except ImportError:  # Python 2 | ||||
|     from urllib import urlretrieve as compat_urlretrieve | ||||
|  | ||||
|  | ||||
| try: | ||||
|     from subprocess import DEVNULL | ||||
|     compat_subprocess_get_DEVNULL = lambda: DEVNULL | ||||
| except ImportError: | ||||
|     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') | ||||
|  | ||||
| try: | ||||
|     from urllib.parse import unquote as compat_urllib_parse_unquote | ||||
| except ImportError: | ||||
|     def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'): | ||||
|         if string == '': | ||||
|             return string | ||||
|         res = string.split('%') | ||||
|         if len(res) == 1: | ||||
|             return string | ||||
|         if encoding is None: | ||||
|             encoding = 'utf-8' | ||||
|         if errors is None: | ||||
|             errors = 'replace' | ||||
|         # pct_sequence: contiguous sequence of percent-encoded bytes, decoded | ||||
|         pct_sequence = b'' | ||||
|         string = res[0] | ||||
|         for item in res[1:]: | ||||
|             try: | ||||
|                 if not item: | ||||
|                     raise ValueError | ||||
|                 pct_sequence += item[:2].decode('hex') | ||||
|                 rest = item[2:] | ||||
|                 if not rest: | ||||
|                     # This segment was just a single percent-encoded character. | ||||
|                     # May be part of a sequence of code units, so delay decoding. | ||||
|                     # (Stored in pct_sequence). | ||||
|                     continue | ||||
|             except ValueError: | ||||
|                 rest = '%' + item | ||||
|             # Encountered non-percent-encoded characters. Flush the current | ||||
|             # pct_sequence. | ||||
|             string += pct_sequence.decode(encoding, errors) + rest | ||||
|             pct_sequence = b'' | ||||
|         if pct_sequence: | ||||
|             # Flush the final pct_sequence | ||||
|             string += pct_sequence.decode(encoding, errors) | ||||
|         return string | ||||
|  | ||||
|  | ||||
| try: | ||||
|     from urllib.parse import parse_qs as compat_parse_qs | ||||
| except ImportError: # Python 2 | ||||
|     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. | ||||
|     # Python 2's version is apparently totally broken | ||||
|  | ||||
|     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, | ||||
|                 encoding='utf-8', errors='replace'): | ||||
|         qs, _coerce_result = qs, unicode | ||||
|         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] | ||||
|         r = [] | ||||
|         for name_value in pairs: | ||||
|             if not name_value and not strict_parsing: | ||||
|                 continue | ||||
|             nv = name_value.split('=', 1) | ||||
|             if len(nv) != 2: | ||||
|                 if strict_parsing: | ||||
|                     raise ValueError("bad query field: %r" % (name_value,)) | ||||
|                 # Handle case of a control-name with no equal sign | ||||
|                 if keep_blank_values: | ||||
|                     nv.append('') | ||||
|                 else: | ||||
|                     continue | ||||
|             if len(nv[1]) or keep_blank_values: | ||||
|                 name = nv[0].replace('+', ' ') | ||||
|                 name = compat_urllib_parse_unquote( | ||||
|                     name, encoding=encoding, errors=errors) | ||||
|                 name = _coerce_result(name) | ||||
|                 value = nv[1].replace('+', ' ') | ||||
|                 value = compat_urllib_parse_unquote( | ||||
|                     value, encoding=encoding, errors=errors) | ||||
|                 value = _coerce_result(value) | ||||
|                 r.append((name, value)) | ||||
|         return r | ||||
|  | ||||
|     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False, | ||||
|                 encoding='utf-8', errors='replace'): | ||||
|         parsed_result = {} | ||||
|         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing, | ||||
|                         encoding=encoding, errors=errors) | ||||
|         for name, value in pairs: | ||||
|             if name in parsed_result: | ||||
|                 parsed_result[name].append(value) | ||||
|             else: | ||||
|                 parsed_result[name] = [value] | ||||
|         return parsed_result | ||||
|  | ||||
| try: | ||||
|     compat_str = unicode # Python 2 | ||||
| except NameError: | ||||
|     compat_str = str | ||||
|  | ||||
| try: | ||||
|     compat_chr = unichr # Python 2 | ||||
| except NameError: | ||||
|     compat_chr = chr | ||||
|  | ||||
| try: | ||||
|     from xml.etree.ElementTree import ParseError as compat_xml_parse_error | ||||
| except ImportError:  # Python 2.6 | ||||
|     from xml.parsers.expat import ExpatError as compat_xml_parse_error | ||||
|  | ||||
| try: | ||||
|     from shlex import quote as shlex_quote | ||||
| except ImportError:  # Python < 3.3 | ||||
|     def shlex_quote(s): | ||||
|         return "'" + s.replace("'", "'\"'\"'") + "'" | ||||
|  | ||||
|  | ||||
| def compat_ord(c): | ||||
|     if type(c) is int: return c | ||||
|     else: return ord(c) | ||||
|  | ||||
|  | ||||
| if sys.version_info >= (3, 0): | ||||
|     compat_getenv = os.getenv | ||||
|     compat_expanduser = os.path.expanduser | ||||
| else: | ||||
|     # Environment variables should be decoded with filesystem encoding. | ||||
|     # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) | ||||
|  | ||||
|     def compat_getenv(key, default=None): | ||||
|         from .utils import get_filesystem_encoding | ||||
|         env = os.getenv(key, default) | ||||
|         if env: | ||||
|             env = env.decode(get_filesystem_encoding()) | ||||
|         return env | ||||
|  | ||||
|     # HACK: The default implementations of os.path.expanduser from cpython do not decode | ||||
|     # environment variables with filesystem encoding. We will work around this by | ||||
|     # providing adjusted implementations. | ||||
|     # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib | ||||
|     # for different platforms with correct environment variables decoding. | ||||
|  | ||||
|     if os.name == 'posix': | ||||
|         def compat_expanduser(path): | ||||
|             """Expand ~ and ~user constructions.  If user or $HOME is unknown, | ||||
|             do nothing.""" | ||||
|             if not path.startswith('~'): | ||||
|                 return path | ||||
|             i = path.find('/', 1) | ||||
|             if i < 0: | ||||
|                 i = len(path) | ||||
|             if i == 1: | ||||
|                 if 'HOME' not in os.environ: | ||||
|                     import pwd | ||||
|                     userhome = pwd.getpwuid(os.getuid()).pw_dir | ||||
|                 else: | ||||
|                     userhome = compat_getenv('HOME') | ||||
|             else: | ||||
|                 import pwd | ||||
|                 try: | ||||
|                     pwent = pwd.getpwnam(path[1:i]) | ||||
|                 except KeyError: | ||||
|                     return path | ||||
|                 userhome = pwent.pw_dir | ||||
|             userhome = userhome.rstrip('/') | ||||
|             return (userhome + path[i:]) or '/' | ||||
|     elif os.name == 'nt' or os.name == 'ce': | ||||
|         def compat_expanduser(path): | ||||
|             """Expand ~ and ~user constructs. | ||||
|  | ||||
|             If user or $HOME is unknown, do nothing.""" | ||||
|             if path[:1] != '~': | ||||
|                 return path | ||||
|             i, n = 1, len(path) | ||||
|             while i < n and path[i] not in '/\\': | ||||
|                 i = i + 1 | ||||
|  | ||||
|             if 'HOME' in os.environ: | ||||
|                 userhome = compat_getenv('HOME') | ||||
|             elif 'USERPROFILE' in os.environ: | ||||
|                 userhome = compat_getenv('USERPROFILE') | ||||
|             elif not 'HOMEPATH' in os.environ: | ||||
|                 return path | ||||
|             else: | ||||
|                 try: | ||||
|                     drive = compat_getenv('HOMEDRIVE') | ||||
|                 except KeyError: | ||||
|                     drive = '' | ||||
|                 userhome = os.path.join(drive, compat_getenv('HOMEPATH')) | ||||
|  | ||||
|             if i != 1: #~user | ||||
|                 userhome = os.path.join(os.path.dirname(userhome), path[1:i]) | ||||
|  | ||||
|             return userhome + path[i:] | ||||
|     else: | ||||
|         compat_expanduser = os.path.expanduser | ||||
|  | ||||
|  | ||||
| if sys.version_info < (3, 0): | ||||
|     def compat_print(s): | ||||
|         from .utils import preferredencoding | ||||
|         print(s.encode(preferredencoding(), 'xmlcharrefreplace')) | ||||
| else: | ||||
|     def compat_print(s): | ||||
|         assert type(s) == type(u'') | ||||
|         print(s) | ||||
|  | ||||
|  | ||||
| try: | ||||
|     subprocess_check_output = subprocess.check_output | ||||
| except AttributeError: | ||||
|     def subprocess_check_output(*args, **kwargs): | ||||
|         assert 'input' not in kwargs | ||||
|         p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs) | ||||
|         output, _ = p.communicate() | ||||
|         ret = p.poll() | ||||
|         if ret: | ||||
|             raise subprocess.CalledProcessError(ret, p.args, output=output) | ||||
|         return output | ||||
|  | ||||
| if sys.version_info < (3, 0) and sys.platform == 'win32': | ||||
|     def compat_getpass(prompt, *args, **kwargs): | ||||
|         if isinstance(prompt, compat_str): | ||||
|             from .utils import preferredencoding | ||||
|             prompt = prompt.encode(preferredencoding()) | ||||
|         return getpass.getpass(prompt, *args, **kwargs) | ||||
| else: | ||||
|     compat_getpass = getpass.getpass | ||||
|  | ||||
|  | ||||
| __all__ = [ | ||||
|     'compat_HTTPError', | ||||
|     'compat_chr', | ||||
|     'compat_cookiejar', | ||||
|     'compat_expanduser', | ||||
|     'compat_getenv', | ||||
|     'compat_getpass', | ||||
|     'compat_html_entities', | ||||
|     'compat_html_parser', | ||||
|     'compat_http_client', | ||||
|     'compat_ord', | ||||
|     'compat_parse_qs', | ||||
|     'compat_print', | ||||
|     'compat_str', | ||||
|     'compat_subprocess_get_DEVNULL', | ||||
|     'compat_urllib_error', | ||||
|     'compat_urllib_parse', | ||||
|     'compat_urllib_parse_unquote', | ||||
|     'compat_urllib_parse_urlparse', | ||||
|     'compat_urllib_request', | ||||
|     'compat_urlparse', | ||||
|     'compat_urlretrieve', | ||||
|     'compat_xml_parse_error', | ||||
|     'shlex_quote', | ||||
|     'subprocess_check_output', | ||||
| ] | ||||
| @@ -12,9 +12,15 @@ from ..utils import ( | ||||
|     compat_str, | ||||
|     encodeFilename, | ||||
|     format_bytes, | ||||
|     get_exe_version, | ||||
| ) | ||||
|  | ||||
|  | ||||
| def rtmpdump_version(): | ||||
|     return get_exe_version( | ||||
|         'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)') | ||||
|  | ||||
|  | ||||
| class RtmpFD(FileDownloader): | ||||
|     def real_download(self, filename, info_dict): | ||||
|         def run_rtmpdump(args): | ||||
|   | ||||
| @@ -421,6 +421,7 @@ from .vesti import VestiIE | ||||
| from .vevo import VevoIE | ||||
| from .vgtv import VGTVIE | ||||
| from .vh1 import VH1IE | ||||
| from .vice import ViceIE | ||||
| from .viddler import ViddlerIE | ||||
| from .videobam import VideoBamIE | ||||
| from .videodetective import VideoDetectiveIE | ||||
|   | ||||
| @@ -3,12 +3,13 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_urlparse, | ||||
|  | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|   | ||||
| @@ -14,6 +14,7 @@ from ..utils import ( | ||||
|     compat_str, | ||||
|     compat_urllib_request, | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_parse_urlparse, | ||||
|  | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
| @@ -23,7 +24,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class BrightcoveIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)' | ||||
|     _VALID_URL = r'https?://.*brightcove\.com/(services|viewer).*?\?(?P<query>.*)' | ||||
|     _FEDERATED_URL_TEMPLATE = 'http://c.brightcove.com/services/viewer/htmlFederated?%s' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -260,11 +261,19 @@ class BrightcoveIE(InfoExtractor): | ||||
|             formats = [] | ||||
|             for rend in renditions: | ||||
|                 url = rend['defaultURL'] | ||||
|                 if not url: | ||||
|                     continue | ||||
|                 if rend['remote']: | ||||
|                     # This type of renditions are served through akamaihd.net, | ||||
|                     # but they don't use f4m manifests | ||||
|                     url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' | ||||
|                     ext = 'flv' | ||||
|                     url_comp = compat_urllib_parse_urlparse(url) | ||||
|                     if url_comp.path.endswith('.m3u8'): | ||||
|                         formats.extend( | ||||
|                             self._extract_m3u8_formats(url, info['id'], 'mp4')) | ||||
|                         continue | ||||
|                     elif 'akamaihd.net' in url_comp.netloc: | ||||
|                         # This type of renditions are served through | ||||
|                         # akamaihd.net, but they don't use f4m manifests | ||||
|                         url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' | ||||
|                         ext = 'flv' | ||||
|                 else: | ||||
|                     ext = determine_ext(url) | ||||
|                 size = rend.get('size') | ||||
|   | ||||
| @@ -27,7 +27,7 @@ class Channel9IE(InfoExtractor): | ||||
|                 'title': 'Developer Kick-Off Session: Stuff We Love', | ||||
|                 'description': 'md5:c08d72240b7c87fcecafe2692f80e35f', | ||||
|                 'duration': 4576, | ||||
|                 'thumbnail': 'http://media.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg', | ||||
|                 'thumbnail': 'http://video.ch9.ms/ch9/9d51/03902f2d-fc97-4d3c-b195-0bfe15a19d51/KOS002_220.jpg', | ||||
|                 'session_code': 'KOS002', | ||||
|                 'session_day': 'Day 1', | ||||
|                 'session_room': 'Arena 1A', | ||||
| @@ -43,7 +43,7 @@ class Channel9IE(InfoExtractor): | ||||
|                 'title': 'Self-service BI with Power BI - nuclear testing', | ||||
|                 'description': 'md5:d1e6ecaafa7fb52a2cacdf9599829f5b', | ||||
|                 'duration': 1540, | ||||
|                 'thumbnail': 'http://media.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg', | ||||
|                 'thumbnail': 'http://video.ch9.ms/ch9/87e1/0300391f-a455-4c72-bec3-4422f19287e1/selfservicenuk_512.jpg', | ||||
|                 'authors': [ 'Mike Wilmot' ], | ||||
|             }, | ||||
|         } | ||||
| @@ -94,7 +94,7 @@ class Channel9IE(InfoExtractor): | ||||
|  | ||||
|     def _extract_title(self, html): | ||||
|         title = self._html_search_meta('title', html, 'title') | ||||
|         if title is None:            | ||||
|         if title is None: | ||||
|             title = self._og_search_title(html) | ||||
|             TITLE_SUFFIX = ' (Channel 9)' | ||||
|             if title is not None and title.endswith(TITLE_SUFFIX): | ||||
| @@ -115,7 +115,7 @@ class Channel9IE(InfoExtractor): | ||||
|         return self._html_search_meta('description', html, 'description') | ||||
|  | ||||
|     def _extract_duration(self, html): | ||||
|         m = re.search(r'data-video_duration="(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html) | ||||
|         m = re.search(r'"length": *"(?P<hours>\d{2}):(?P<minutes>\d{2}):(?P<seconds>\d{2})"', html) | ||||
|         return ((int(m.group('hours')) * 60 * 60) + (int(m.group('minutes')) * 60) + int(m.group('seconds'))) if m else None | ||||
|  | ||||
|     def _extract_slides(self, html): | ||||
| @@ -167,7 +167,7 @@ class Channel9IE(InfoExtractor): | ||||
|         return re.findall(r'<a href="/Events/Speakers/[^"]+">([^<]+)</a>', html) | ||||
|  | ||||
|     def _extract_content(self, html, content_path): | ||||
|         # Look for downloadable content         | ||||
|         # Look for downloadable content | ||||
|         formats = self._formats_from_html(html) | ||||
|         slides = self._extract_slides(html) | ||||
|         zip_ = self._extract_zip(html) | ||||
| @@ -258,16 +258,17 @@ class Channel9IE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, content_path, 'Downloading web page') | ||||
|  | ||||
|         page_type_m = re.search(r'<meta name="Search.PageType" content="(?P<pagetype>[^"]+)"/>', webpage) | ||||
|         if page_type_m is None: | ||||
|             raise ExtractorError('Search.PageType not found, don\'t know how to process this page', expected=True) | ||||
|         page_type_m = re.search(r'<meta name="WT.entryid" content="(?P<pagetype>[^:]+)[^"]+"/>', webpage) | ||||
|         if page_type_m is not None: | ||||
|             page_type = page_type_m.group('pagetype') | ||||
|             if page_type == 'Entry':      # Any 'item'-like page, may contain downloadable content | ||||
|                 return self._extract_entry_item(webpage, content_path) | ||||
|             elif page_type == 'Session':  # Event session page, may contain downloadable content | ||||
|                 return self._extract_session(webpage, content_path) | ||||
|             elif page_type == 'Event': | ||||
|                 return self._extract_list(content_path) | ||||
|             else: | ||||
|                 raise ExtractorError('Unexpected WT.entryid %s' % page_type, expected=True) | ||||
|  | ||||
|         page_type = page_type_m.group('pagetype') | ||||
|         if page_type == 'List':         # List page, may contain list of 'item'-like objects | ||||
|         else: # Assuming list | ||||
|             return self._extract_list(content_path) | ||||
|         elif page_type == 'Entry.Item': # Any 'item'-like page, may contain downloadable content | ||||
|             return self._extract_entry_item(webpage, content_path) | ||||
|         elif page_type == 'Session':    # Event session page, may contain downloadable content | ||||
|             return self._extract_session(webpage, content_path) | ||||
|         else: | ||||
|             raise ExtractorError('Unexpected Search.PageType %s' % page_type, expected=True) | ||||
| @@ -42,11 +42,12 @@ class CinemassacreIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         video_date = mobj.group('date_Y') + mobj.group('date_m') + mobj.group('date_d') | ||||
|         mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?:Cinemassacre-)?(?P<video_id>.+?))"', webpage) | ||||
|         mobj = re.search(r'src="(?P<embed_url>http://player\.screenwavemedia\.com/play/[a-zA-Z]+\.php\?[^"]*\bid=(?P<full_video_id>(?:Cinemassacre-)?(?P<video_id>.+?)))"', webpage) | ||||
|         if not mobj: | ||||
|             raise ExtractorError('Can\'t extract embed url and video id') | ||||
|         playerdata_url = mobj.group('embed_url') | ||||
|         video_id = mobj.group('video_id') | ||||
|         full_video_id = mobj.group('full_video_id') | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<title>(?P<title>.+?)\|', webpage, 'title') | ||||
| @@ -60,37 +61,52 @@ class CinemassacreIE(InfoExtractor): | ||||
|         vidurl = self._search_regex( | ||||
|             r'\'vidurl\'\s*:\s*"([^\']+)"', playerdata, 'vidurl').replace('\\/', '/') | ||||
|  | ||||
|         videolist_url = self._search_regex( | ||||
|             r"file\s*:\s*'(http.+?/jwplayer\.smil)'", playerdata, 'jwplayer.smil') | ||||
|         videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML') | ||||
|         videolist_url = None | ||||
|  | ||||
|         formats = [] | ||||
|         baseurl = vidurl[:vidurl.rfind('/')+1] | ||||
|         for video in videolist.findall('.//video'): | ||||
|             src = video.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             file_ = src.partition(':')[-1] | ||||
|             width = int_or_none(video.get('width')) | ||||
|             height = int_or_none(video.get('height')) | ||||
|             bitrate = int_or_none(video.get('system-bitrate')) | ||||
|             format = { | ||||
|                 'url': baseurl + file_, | ||||
|                 'format_id': src.rpartition('.')[0].rpartition('_')[-1], | ||||
|             } | ||||
|             if width or height: | ||||
|                 format.update({ | ||||
|                     'tbr': bitrate // 1000 if bitrate else None, | ||||
|                     'width': width, | ||||
|                     'height': height, | ||||
|                 }) | ||||
|             else: | ||||
|                 format.update({ | ||||
|                     'abr': bitrate // 1000 if bitrate else None, | ||||
|                     'vcodec': 'none', | ||||
|                 }) | ||||
|             formats.append(format) | ||||
|         self._sort_formats(formats) | ||||
|         mobj = re.search(r"'videoserver'\s*:\s*'(?P<videoserver>[^']+)'", playerdata) | ||||
|         if mobj: | ||||
|             videoserver = mobj.group('videoserver') | ||||
|             mobj = re.search(r'\'vidid\'\s*:\s*"(?P<vidid>[^\']+)"', playerdata) | ||||
|             vidid = mobj.group('vidid') if mobj else full_video_id | ||||
|             videolist_url = 'http://%s/vod/smil:%s.smil/jwplayer.smil' % (videoserver, vidid) | ||||
|         else: | ||||
|             mobj = re.search(r"file\s*:\s*'(?P<smil>http.+?/jwplayer\.smil)'", playerdata) | ||||
|             if mobj: | ||||
|                 videolist_url = mobj.group('smil') | ||||
|  | ||||
|         if videolist_url: | ||||
|             videolist = self._download_xml(videolist_url, video_id, 'Downloading videolist XML') | ||||
|             formats = [] | ||||
|             baseurl = vidurl[:vidurl.rfind('/')+1] | ||||
|             for video in videolist.findall('.//video'): | ||||
|                 src = video.get('src') | ||||
|                 if not src: | ||||
|                     continue | ||||
|                 file_ = src.partition(':')[-1] | ||||
|                 width = int_or_none(video.get('width')) | ||||
|                 height = int_or_none(video.get('height')) | ||||
|                 bitrate = int_or_none(video.get('system-bitrate')) | ||||
|                 format = { | ||||
|                     'url': baseurl + file_, | ||||
|                     'format_id': src.rpartition('.')[0].rpartition('_')[-1], | ||||
|                 } | ||||
|                 if width or height: | ||||
|                     format.update({ | ||||
|                         'tbr': bitrate // 1000 if bitrate else None, | ||||
|                         'width': width, | ||||
|                         'height': height, | ||||
|                     }) | ||||
|                 else: | ||||
|                     format.update({ | ||||
|                         'abr': bitrate // 1000 if bitrate else None, | ||||
|                         'vcodec': 'none', | ||||
|                     }) | ||||
|                 formats.append(format) | ||||
|             self._sort_formats(formats) | ||||
|         else: | ||||
|             formats = [{ | ||||
|                 'url': vidurl, | ||||
|             }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -4,14 +4,16 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| from ..compat import ( | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_parse, | ||||
|     remove_end, | ||||
|     HEADRequest, | ||||
|     compat_HTTPError, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     HEADRequest, | ||||
|     remove_end, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CloudyIE(InfoExtractor): | ||||
|   | ||||
| @@ -16,9 +16,10 @@ class CNNIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn', | ||||
|         'file': 'sports_2013_06_09_nadal-1-on-1.cnn.mp4', | ||||
|         'md5': '3e6121ea48df7e2259fe73a0628605c4', | ||||
|         'info_dict': { | ||||
|             'id': 'sports_2013_06_09_nadal-1-on-1.cnn', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Nadal wins 8th French Open title', | ||||
|             'description': 'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.', | ||||
|             'duration': 135, | ||||
| @@ -27,9 +28,10 @@ class CNNIE(InfoExtractor): | ||||
|     }, | ||||
|     { | ||||
|         "url": "http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29", | ||||
|         "file": "us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4", | ||||
|         "md5": "b5cc60c60a3477d185af8f19a2a26f4e", | ||||
|         "info_dict": { | ||||
|             'id': 'us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology', | ||||
|             'ext': 'mp4', | ||||
|             "title": "Student's epic speech stuns new freshmen", | ||||
|             "description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\"", | ||||
|             "upload_date": "20130821", | ||||
|   | ||||
| @@ -12,13 +12,14 @@ import sys | ||||
| import time | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from ..utils import ( | ||||
| from ..compat import ( | ||||
|     compat_http_client, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urlparse, | ||||
|     compat_str, | ||||
|  | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     compiled_regex_type, | ||||
|     ExtractorError, | ||||
| @@ -403,7 +404,7 @@ class InfoExtractor(object): | ||||
|             video_info['title'] = playlist_title | ||||
|         return video_info | ||||
|  | ||||
|     def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): | ||||
|     def _search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): | ||||
|         """ | ||||
|         Perform a regex search on the given string, using a single or a list of | ||||
|         patterns returning the first matching group. | ||||
| @@ -424,8 +425,11 @@ class InfoExtractor(object): | ||||
|             _name = name | ||||
|  | ||||
|         if mobj: | ||||
|             # return the first matching group | ||||
|             return next(g for g in mobj.groups() if g is not None) | ||||
|             if group is None: | ||||
|                 # return the first matching group | ||||
|                 return next(g for g in mobj.groups() if g is not None) | ||||
|             else: | ||||
|                 return mobj.group(group) | ||||
|         elif default is not _NO_DEFAULT: | ||||
|             return default | ||||
|         elif fatal: | ||||
| @@ -435,11 +439,11 @@ class InfoExtractor(object): | ||||
|                 'please report this issue on http://yt-dl.org/bug' % _name) | ||||
|             return None | ||||
|  | ||||
|     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0): | ||||
|     def _html_search_regex(self, pattern, string, name, default=_NO_DEFAULT, fatal=True, flags=0, group=None): | ||||
|         """ | ||||
|         Like _search_regex, but strips HTML tags and unescapes entities. | ||||
|         """ | ||||
|         res = self._search_regex(pattern, string, name, default, fatal, flags) | ||||
|         res = self._search_regex(pattern, string, name, default, fatal, flags, group) | ||||
|         if res: | ||||
|             return clean_html(res).strip() | ||||
|         else: | ||||
| @@ -533,9 +537,9 @@ class InfoExtractor(object): | ||||
|             display_name = name | ||||
|         return self._html_search_regex( | ||||
|             r'''(?ix)<meta | ||||
|                     (?=[^>]+(?:itemprop|name|property)=["\']?%s["\']?) | ||||
|                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), | ||||
|             html, display_name, fatal=fatal, **kwargs) | ||||
|                     (?=[^>]+(?:itemprop|name|property)=(["\']?)%s\1) | ||||
|                     [^>]+content=(["\'])(?P<content>.*?)\1''' % re.escape(name), | ||||
|             html, display_name, fatal=fatal, group='content', **kwargs) | ||||
|  | ||||
|     def _dc_search_uploader(self, html): | ||||
|         return self._html_search_meta('dc.creator', html, 'uploader') | ||||
|   | ||||
| @@ -17,7 +17,6 @@ from ..utils import ( | ||||
|     bytes_to_intlist, | ||||
|     intlist_to_bytes, | ||||
|     unified_strdate, | ||||
|     clean_html, | ||||
|     urlencode_postdata, | ||||
| ) | ||||
| from ..aes import ( | ||||
|   | ||||
| @@ -5,7 +5,8 @@ import os.path | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_parse_unquote, url_basename | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
| from ..utils import url_basename | ||||
|  | ||||
|  | ||||
| class DropboxIE(InfoExtractor): | ||||
|   | ||||
| @@ -5,12 +5,14 @@ import re | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| from ..compat import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     urlencode_postdata, | ||||
|     ExtractorError, | ||||
|     limit_length, | ||||
|   | ||||
| @@ -8,12 +8,11 @@ from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urlparse, | ||||
|     unescapeHTML, | ||||
|     get_meta_content, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GameSpotIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?' | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<id>\d+)/?' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.gamespot.com/videos/arma-3-community-guide-sitrep-i/2300-6410818/', | ||||
|         'md5': 'b2a30deaa8654fcccd43713a6b6a4825', | ||||
| @@ -26,10 +25,10 @@ class GameSpotIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         page_id = mobj.group('page_id') | ||||
|         page_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, page_id) | ||||
|         data_video_json = self._search_regex(r'data-video=["\'](.*?)["\']', webpage, 'data video') | ||||
|         data_video_json = self._search_regex( | ||||
|             r'data-video=["\'](.*?)["\']', webpage, 'data video') | ||||
|         data_video = json.loads(unescapeHTML(data_video_json)) | ||||
|  | ||||
|         # Transform the manifest url to a link to the mp4 files | ||||
| @@ -41,7 +40,8 @@ class GameSpotIE(InfoExtractor): | ||||
|         http_path = f4m_path[1:].split('/', 1)[1] | ||||
|         http_template = re.sub(QUALITIES_RE, r'%s', http_path) | ||||
|         http_template = http_template.replace('.csmil/manifest.f4m', '') | ||||
|         http_template = compat_urlparse.urljoin('http://video.gamespotcdn.com/', http_template) | ||||
|         http_template = compat_urlparse.urljoin( | ||||
|             'http://video.gamespotcdn.com/', http_template) | ||||
|         formats = [] | ||||
|         for q in qualities: | ||||
|             formats.append({ | ||||
| @@ -52,8 +52,9 @@ class GameSpotIE(InfoExtractor): | ||||
|  | ||||
|         return { | ||||
|             'id': data_video['guid'], | ||||
|             'display_id': page_id, | ||||
|             'title': compat_urllib_parse.unquote(data_video['title']), | ||||
|             'formats': formats, | ||||
|             'description': get_meta_content('description', webpage), | ||||
|             'description': self._html_search_meta('description', webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|         } | ||||
|   | ||||
| @@ -7,11 +7,12 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| from ..utils import ( | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urlparse, | ||||
|     compat_xml_parse_error, | ||||
|  | ||||
| ) | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
| @@ -99,6 +100,22 @@ class GenericIE(InfoExtractor): | ||||
|                 'uploader': 'Championat', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             # https://github.com/rg3/youtube-dl/issues/3541 | ||||
|             'add_ie': ['Brightcove'], | ||||
|             'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1', | ||||
|             'info_dict': { | ||||
|                 'id': '3866516442001', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Leer mij vrouwen kennen: Aflevering 1', | ||||
|                 'description': 'Leer mij vrouwen kennen: Aflevering 1', | ||||
|                 'uploader': 'SBS Broadcasting', | ||||
|             }, | ||||
|             'skip': 'Restricted to Netherlands', | ||||
|             'params': { | ||||
|                 'skip_download': True,  # m3u8 download | ||||
|             }, | ||||
|         }, | ||||
|         # Direct link to a video | ||||
|         { | ||||
|             'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', | ||||
| @@ -559,6 +576,7 @@ class GenericIE(InfoExtractor): | ||||
|             return { | ||||
|                 'id': video_id, | ||||
|                 'title': os.path.splitext(url_basename(url))[0], | ||||
|                 'direct': True, | ||||
|                 'formats': [{ | ||||
|                     'format_id': m.group('format_id'), | ||||
|                     'url': url, | ||||
|   | ||||
| @@ -5,13 +5,15 @@ import random | ||||
| import math | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_chr, | ||||
|     compat_ord, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class GloboIE(InfoExtractor): | ||||
|   | ||||
| @@ -1,15 +1,11 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     str_to_int, | ||||
|     ExtractorError, | ||||
| ) | ||||
| import json | ||||
|  | ||||
|  | ||||
| class GoshgayIE(InfoExtractor): | ||||
| @@ -27,36 +23,27 @@ class GoshgayIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title') | ||||
|         title = self._og_search_title(webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         family_friendly = self._html_search_meta( | ||||
|             'isFamilyFriendly', webpage, default='false') | ||||
|         config_url = self._search_regex( | ||||
|             r"'config'\s*:\s*'([^']+)'", webpage, 'config URL') | ||||
|  | ||||
|         player_config = self._search_regex( | ||||
|             r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings') | ||||
|         player_vars = json.loads(player_config.replace("'", '"')) | ||||
|         width = str_to_int(player_vars.get('width')) | ||||
|         height = str_to_int(player_vars.get('height')) | ||||
|         config_uri = player_vars.get('config') | ||||
|         config = self._download_xml( | ||||
|             config_url, video_id, 'Downloading player config XML') | ||||
|  | ||||
|         if config_uri is None: | ||||
|             raise ExtractorError('Missing config URI') | ||||
|         node = self._download_xml(config_uri, video_id, 'Downloading player config XML', | ||||
|                                   errnote='Unable to download XML') | ||||
|         if node is None: | ||||
|         if config is None: | ||||
|             raise ExtractorError('Missing config XML') | ||||
|         if node.tag != 'config': | ||||
|         if config.tag != 'config': | ||||
|             raise ExtractorError('Missing config attribute') | ||||
|         fns = node.findall('file') | ||||
|         imgs = node.findall('image') | ||||
|         if len(fns) != 1: | ||||
|         fns = config.findall('file') | ||||
|         if len(fns) < 1: | ||||
|             raise ExtractorError('Missing media URI') | ||||
|         video_url = fns[0].text | ||||
|         if len(imgs) < 1: | ||||
|             thumbnail = None | ||||
|         else: | ||||
|             thumbnail = imgs[0].text | ||||
|  | ||||
|         url_comp = compat_urlparse.urlparse(url) | ||||
|         ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) | ||||
| @@ -65,9 +52,7 @@ class GoshgayIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'width': width, | ||||
|             'height': height, | ||||
|             'thumbnail': thumbnail, | ||||
|             'http_referer': ref, | ||||
|             'age_limit': 18, | ||||
|             'age_limit': 0 if family_friendly == 'true' else 18, | ||||
|         } | ||||
|   | ||||
| @@ -8,12 +8,13 @@ import re | ||||
|  | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError, compat_urllib_request, compat_html_parser | ||||
|  | ||||
| from ..utils import ( | ||||
| from ..compat import ( | ||||
|     compat_html_parser, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class GroovesharkHtmlParser(compat_html_parser.HTMLParser): | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     get_meta_content, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
| ) | ||||
| @@ -25,11 +25,11 @@ class HeiseIE(InfoExtractor): | ||||
|             'title': ( | ||||
|                 "Podcast: c't uplink 3.3 – Owncloud / Tastaturen / Peilsender Smartphone" | ||||
|             ), | ||||
|             'format_id': 'mp4_720', | ||||
|             'format_id': 'mp4_720p', | ||||
|             'timestamp': 1411812600, | ||||
|             'upload_date': '20140927', | ||||
|             'description': 'In uplink-Episode 3.3 geht es darum, wie man sich von Cloud-Anbietern emanzipieren kann, worauf man beim Kauf einer Tastatur achten sollte und was Smartphones über uns verraten.', | ||||
|             'thumbnail': 're:https?://.*\.jpg$', | ||||
|             'thumbnail': 're:^https?://.*\.jpe?g$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -49,11 +49,12 @@ class HeiseIE(InfoExtractor): | ||||
|         info = { | ||||
|             'id': video_id, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'timestamp': parse_iso8601(get_meta_content('date', webpage)), | ||||
|             'timestamp': parse_iso8601( | ||||
|                 self._html_search_meta('date', webpage)), | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
|  | ||||
|         title = get_meta_content('fulltitle', webpage) | ||||
|         title = self._html_search_meta('fulltitle', webpage) | ||||
|         if title: | ||||
|             info['title'] = title | ||||
|         else: | ||||
| @@ -64,9 +65,12 @@ class HeiseIE(InfoExtractor): | ||||
|             label = source_node.attrib['label'] | ||||
|             height = int_or_none(self._search_regex( | ||||
|                 r'^(.*?_)?([0-9]+)p$', label, 'height', default=None)) | ||||
|             video_url = source_node.attrib['file'] | ||||
|             ext = determine_ext(video_url, '') | ||||
|             formats.append({ | ||||
|                 'url': source_node.attrib['file'], | ||||
|                 'url': video_url, | ||||
|                 'format_note': label, | ||||
|                 'format_id': '%s_%s' % (ext, label), | ||||
|                 'height': height, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|   | ||||
| @@ -6,7 +6,6 @@ import json | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     get_element_by_attribute, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -27,10 +26,11 @@ class ImdbIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage('http://www.imdb.com/video/imdb/vi%s' % video_id, video_id) | ||||
|         descr = get_element_by_attribute('itemprop', 'description', webpage) | ||||
|         descr = self._html_search_regex( | ||||
|             r'(?s)<span itemprop="description">(.*?)</span>', | ||||
|             webpage, 'description', fatal=False) | ||||
|         available_formats = re.findall( | ||||
|             r'case \'(?P<f_id>.*?)\' :$\s+url = \'(?P<path>.*?)\'', webpage, | ||||
|             flags=re.MULTILINE) | ||||
| @@ -73,9 +73,7 @@ class ImdbListIE(InfoExtractor): | ||||
|     } | ||||
|      | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         list_id = mobj.group('id') | ||||
|  | ||||
|         list_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, list_id) | ||||
|         entries = [ | ||||
|             self.url_result('http://www.imdb.com' + m, 'Imdb') | ||||
|   | ||||
| @@ -5,11 +5,11 @@ import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     get_element_by_id, | ||||
|     parse_iso8601, | ||||
|     determine_ext, | ||||
|     int_or_none, | ||||
|     float_or_none, | ||||
|     get_element_by_id, | ||||
|     int_or_none, | ||||
|     parse_iso8601, | ||||
|     str_to_int, | ||||
| ) | ||||
|  | ||||
| @@ -30,7 +30,7 @@ class IzleseneIE(InfoExtractor): | ||||
|                 'description': 'md5:253753e2655dde93f59f74b572454f6d', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'uploader_id': 'pelikzzle', | ||||
|                 'timestamp': 1404298698, | ||||
|                 'timestamp': 1404302298, | ||||
|                 'upload_date': '20140702', | ||||
|                 'duration': 95.395, | ||||
|                 'age_limit': 0, | ||||
| @@ -46,7 +46,7 @@ class IzleseneIE(InfoExtractor): | ||||
|                 'description': 'Tarkan Dortmund 2006 Konseri', | ||||
|                 'thumbnail': 're:^http://.*\.jpg', | ||||
|                 'uploader_id': 'parlayankiz', | ||||
|                 'timestamp': 1163318593, | ||||
|                 'timestamp': 1163322193, | ||||
|                 'upload_date': '20061112', | ||||
|                 'duration': 253.666, | ||||
|                 'age_limit': 0, | ||||
| @@ -55,10 +55,9 @@ class IzleseneIE(InfoExtractor): | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         url = 'http://www.izlesene.com/video/%s' % video_id | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         url = 'http://www.izlesene.com/video/%s' % video_id | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._og_search_title(webpage) | ||||
|   | ||||
| @@ -4,6 +4,7 @@ import random | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class Laola1TvIE(InfoExtractor): | ||||
|   | ||||
| @@ -33,7 +33,7 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|         m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\..+?/.*)$', rtmp_video_url) | ||||
|         if not m: | ||||
|             return rtmp_video_url | ||||
|         base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' | ||||
|         base = 'http://viacommtvstrmfs.fplive.net/' | ||||
|         return base + m.group('finalid') | ||||
|  | ||||
|     def _get_feed_url(self, uri): | ||||
|   | ||||
| @@ -7,11 +7,12 @@ import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
| from ..compat import ( | ||||
|     compat_ord, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|  | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|   | ||||
| @@ -67,7 +67,7 @@ class NDRIE(InfoExtractor): | ||||
|  | ||||
|         thumbnail = None | ||||
|  | ||||
|         video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.hi\.mp4', type:"video/mp4"},''', page) | ||||
|         video_url = re.search(r'''3: \{src:'(?P<video>.+?)\.(lo|hi|hq)\.mp4', type:"video/mp4"},''', page) | ||||
|         if video_url: | ||||
|             thumbnails = re.findall(r'''\d+: \{src: "([^"]+)"(?: \|\| '[^']+')?, quality: '([^']+)'}''', page) | ||||
|             if thumbnails: | ||||
|   | ||||
| @@ -12,6 +12,7 @@ from ..utils import ( | ||||
|     unified_strdate, | ||||
|     parse_duration, | ||||
|     int_or_none, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -108,6 +109,9 @@ class NiconicoIE(InfoExtractor): | ||||
|                 flv_info_request, video_id, | ||||
|                 note='Downloading flv info', errnote='Unable to download flv info') | ||||
|  | ||||
|         if 'deleted=' in flv_info_webpage: | ||||
|             raise ExtractorError('The video has been deleted.', | ||||
|                 expected=True) | ||||
|         video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] | ||||
|  | ||||
|         # Start extracting information | ||||
| @@ -171,7 +175,8 @@ class NiconicoPlaylistIE(InfoExtractor): | ||||
|         entries = [{ | ||||
|             '_type': 'url', | ||||
|             'ie_key': NiconicoIE.ie_key(), | ||||
|             'url': 'http://www.nicovideo.jp/watch/%s' % entry['item_id'], | ||||
|             'url': ('http://www.nicovideo.jp/watch/%s' % | ||||
|                 entry['item_data']['video_id']), | ||||
|         } for entry in entries] | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import os.path | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| @@ -29,6 +30,12 @@ class PlayedIE(InfoExtractor): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         orig_webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         m_error = re.search( | ||||
|             r'(?s)Reason for deletion:.*?<b class="err"[^>]*>(?P<msg>[^<]+)</b>', orig_webpage) | ||||
|         if m_error: | ||||
|             raise ExtractorError(m_error.group('msg'), expected=True) | ||||
|  | ||||
|         fields = re.findall( | ||||
|             r'type="hidden" name="([^"]+)"\s+value="([^"]+)">', orig_webpage) | ||||
|         data = dict(fields) | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_parse_unquote | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
|  | ||||
|  | ||||
| class Ro220IE(InfoExtractor): | ||||
|   | ||||
| @@ -13,7 +13,7 @@ from ..utils import ( | ||||
|  | ||||
| class StreamcloudIE(InfoExtractor): | ||||
|     IE_NAME = 'streamcloud.eu' | ||||
|     _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html' | ||||
|     _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html', | ||||
| @@ -27,8 +27,8 @@ class StreamcloudIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|         url = 'http://streamcloud.eu/%s' % video_id | ||||
|  | ||||
|         orig_webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|   | ||||
| @@ -80,7 +80,7 @@ class SWRMediathekIE(InfoExtractor): | ||||
|  | ||||
|             if media_type == 'Video': | ||||
|                 fmt.update({ | ||||
|                     'format_note': ['144p', '288p', '544p'][quality-1], | ||||
|                     'format_note': ['144p', '288p', '544p', '720p'][quality-1], | ||||
|                     'vcodec': codec, | ||||
|                 }) | ||||
|             elif media_type == 'Audio': | ||||
| @@ -101,4 +101,4 @@ class SWRMediathekIE(InfoExtractor): | ||||
|             'uploader': attr['channel_title'], | ||||
|             'uploader_id': attr['channel_idkey'], | ||||
|             'formats': formats, | ||||
|         } | ||||
|         } | ||||
|   | ||||
| @@ -50,6 +50,7 @@ class TapelyIE(InfoExtractor): | ||||
|         request = compat_urllib_request.Request(playlist_url) | ||||
|         request.add_header('X-Requested-With', 'XMLHttpRequest') | ||||
|         request.add_header('Accept', 'application/json') | ||||
|         request.add_header('Referer', url) | ||||
|  | ||||
|         playlist = self._download_json(request, display_id) | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,7 @@ class TruTubeIE(InfoExtractor): | ||||
|  | ||||
|         # filehd is always 404 | ||||
|         video_url = xpath_text(config, './file', 'video URL', fatal=True) | ||||
|         title = xpath_text(config, './title', 'title') | ||||
|         title = xpath_text(config, './title', 'title').strip() | ||||
|         thumbnail = xpath_text(config, './image', ' thumbnail') | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -5,7 +5,6 @@ import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     get_meta_content, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -79,7 +78,7 @@ class UstreamChannelIE(InfoExtractor): | ||||
|         m = re.match(self._VALID_URL, url) | ||||
|         display_id = m.group('slug') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         channel_id = get_meta_content('ustream:channel_id', webpage) | ||||
|         channel_id = self._html_search_meta('ustream:channel_id', webpage) | ||||
|  | ||||
|         BASE = 'http://www.ustream.tv' | ||||
|         next_url = '/ajax/socialstream/videos/%s/1.json' % channel_id | ||||
|   | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/vice.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/vice.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .ooyala import OoyalaIE | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class ViceIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.vice\.com/.*?/(?P<name>.+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.vice.com/Fringes/cowboy-capitalists-part-1', | ||||
|         'info_dict': { | ||||
|             'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', | ||||
|         }, | ||||
|         'params': { | ||||
|             # Requires ffmpeg (m3u8 manifest) | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         try: | ||||
|             embed_code = self._search_regex( | ||||
|                 r'embedCode=([^&\'"]+)', webpage, | ||||
|                 'ooyala embed code') | ||||
|             ooyala_url = OoyalaIE._url_for_embed_code(embed_code) | ||||
|             print(ooyala_url) | ||||
|         except ExtractorError: | ||||
|             raise ExtractorError('The page doesn\'t contain a video', expected=True) | ||||
|         return self.url_result(ooyala_url, ie='Ooyala') | ||||
|  | ||||
| @@ -7,11 +7,13 @@ import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     InAdvancePagedList, | ||||
|     int_or_none, | ||||
|   | ||||
| @@ -37,7 +37,7 @@ class WimpIE(InfoExtractor): | ||||
|         video_id = mobj.group(1) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_url = self._search_regex( | ||||
|             r"'file'\s*:\s*'([^']+)'", webpage, 'video URL') | ||||
|             r"[\"']file[\"']\s*[:,]\s*[\"'](.+?)[\"']", webpage, 'video URL') | ||||
|         if YoutubeIE.suitable(video_url): | ||||
|             self.to_screen('Found YouTube video') | ||||
|             return { | ||||
|   | ||||
| @@ -27,15 +27,15 @@ class WrzutaIE(InfoExtractor): | ||||
|             'description': 'md5:7fb5ef3c21c5893375fda51d9b15d9cd', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://w729.wrzuta.pl/audio/9oXJqdcndqv/david_guetta_amp_showtek_ft._vassy_-_bad', | ||||
|         'md5': '1e546a18e1c22ac6e9adce17b8961ff5', | ||||
|         'url': 'http://jolka85.wrzuta.pl/audio/063jOPX5ue2/liber_natalia_szroeder_-_teraz_ty', | ||||
|         'md5': 'bc78077859bea7bcfe4295d7d7fc9025', | ||||
|         'info_dict': { | ||||
|             'id': '9oXJqdcndqv', | ||||
|             'id': '063jOPX5ue2', | ||||
|             'ext': 'ogg', | ||||
|             'title': 'David Guetta & Showtek ft. Vassy - Bad', | ||||
|             'duration': 270, | ||||
|             'uploader_id': 'w729', | ||||
|             'description': 'md5:4628f01c666bbaaecefa83476cfa794a', | ||||
|             'title': 'Liber & Natalia Szroeder - Teraz Ty', | ||||
|             'duration': 203, | ||||
|             'uploader_id': 'jolka85', | ||||
|             'description': 'md5:2d2b6340f9188c8c4cd891580e481096', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
| @@ -49,16 +49,17 @@ class WrzutaIE(InfoExtractor): | ||||
|  | ||||
|         quality = qualities(['SD', 'MQ', 'HQ', 'HD']) | ||||
|  | ||||
|         audio_table = {'flv': 'mp3', 'webm': 'ogg'} | ||||
|         audio_table = {'flv': 'mp3', 'webm': 'ogg', '???': 'mp3'} | ||||
|  | ||||
|         embedpage = self._download_json('http://www.wrzuta.pl/npp/embed/%s/%s' % (uploader, video_id), video_id) | ||||
|  | ||||
|         formats = [] | ||||
|         for media in embedpage['url']: | ||||
|             fmt = media['type'].split('@')[0] | ||||
|             if typ == 'audio': | ||||
|                 ext = audio_table[media['type'].split('@')[0]] | ||||
|                 ext = audio_table.get(fmt, fmt) | ||||
|             else: | ||||
|                 ext = media['type'].split('@')[0] | ||||
|                 ext = fmt | ||||
|  | ||||
|             formats.append({ | ||||
|                 'format_id': '%s_%s' % (ext, media['quality'].lower()), | ||||
|   | ||||
| @@ -684,7 +684,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         # Get video info | ||||
|         self.report_video_info_webpage_download(video_id) | ||||
|         if re.search(r'player-age-gate-content">', video_webpage) is not None: | ||||
|             self.report_age_confirmation() | ||||
|             age_gate = True | ||||
|             # We simulate the access to the video from www.youtube.com/v/{video_id} | ||||
|             # this can be viewed without login into Youtube | ||||
| @@ -692,12 +691,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 'video_id': video_id, | ||||
|                 'eurl': 'https://youtube.googleapis.com/v/' + video_id, | ||||
|                 'sts': self._search_regex( | ||||
|                     r'"sts"\s*:\s*(\d+)', video_webpage, 'sts'), | ||||
|                     r'"sts"\s*:\s*(\d+)', video_webpage, 'sts', default=''), | ||||
|             }) | ||||
|             video_info_url = proto + '://www.youtube.com/get_video_info?' + data | ||||
|             video_info_webpage = self._download_webpage(video_info_url, video_id, | ||||
|                                     note=False, | ||||
|                                     errnote='unable to download video info webpage') | ||||
|             video_info_webpage = self._download_webpage( | ||||
|                 video_info_url, video_id, | ||||
|                 note='Refetching age-gated info webpage', | ||||
|                 errnote='unable to download video info webpage') | ||||
|             video_info = compat_parse_qs(video_info_webpage) | ||||
|         else: | ||||
|             age_gate = False | ||||
| @@ -1043,6 +1043,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|         'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', | ||||
|         'info_dict': { | ||||
|             'title': 'ytdl test PL', | ||||
|             'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', | ||||
|         }, | ||||
|         'playlist_count': 3, | ||||
|     }, { | ||||
|   | ||||
| @@ -5,9 +5,11 @@ import optparse | ||||
| import shlex | ||||
| import sys | ||||
|  | ||||
| from .utils import ( | ||||
| from .compat import ( | ||||
|     compat_expanduser, | ||||
|     compat_getenv, | ||||
| ) | ||||
| from .utils import ( | ||||
|     get_term_width, | ||||
|     write_string, | ||||
| ) | ||||
| @@ -259,7 +261,16 @@ def parseOpts(overrideArguments=None): | ||||
|     video_format.add_option( | ||||
|         '-f', '--format', | ||||
|         action='store', dest='format', metavar='FORMAT', default=None, | ||||
|         help='video format code, specify the order of preference using slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also supported. You can also use the special names "best", "bestvideo", "bestaudio", "worst", "worstvideo" and "worstaudio". By default, youtube-dl will pick the best quality. Use commas to download multiple audio formats, such as  -f  136/137/mp4/bestvideo,140/m4a/bestaudio') | ||||
|         help='video format code, specify the order of preference using' | ||||
|             ' slashes: -f 22/17/18 .  -f mp4 , -f m4a and  -f flv  are also' | ||||
|             ' supported. You can also use the special names "best",' | ||||
|             ' "bestvideo", "bestaudio", "worst", "worstvideo" and' | ||||
|             ' "worstaudio". By default, youtube-dl will pick the best quality.' | ||||
|             ' Use commas to download multiple audio formats, such as' | ||||
|             ' -f  136/137/mp4/bestvideo,140/m4a/bestaudio.' | ||||
|             ' You can merge the video and audio of two formats into a single' | ||||
|             ' file using -f <video-format>+<audio-format> (requires ffmpeg or' | ||||
|             ' avconv), for example -f bestvideo+bestaudio.') | ||||
|     video_format.add_option( | ||||
|         '--all-formats', | ||||
|         action='store_const', dest='format', const='all', | ||||
| @@ -479,10 +490,12 @@ def parseOpts(overrideArguments=None): | ||||
|               '%(format_id)s for the unique id of the format (like Youtube\'s itags: "137"), ' | ||||
|               '%(upload_date)s for the upload date (YYYYMMDD), ' | ||||
|               '%(extractor)s for the provider (youtube, metacafe, etc), ' | ||||
|               '%(id)s for the video id, %(playlist)s for the playlist the video is in, ' | ||||
|               '%(playlist_index)s for the position in the playlist and %% for a literal percent. ' | ||||
|               '%(id)s for the video id, ' | ||||
|               '%(playlist_title)s, %(playlist_id)s, or %(playlist)s (=title if present, ID otherwise) for the playlist the video is in, ' | ||||
|               '%(playlist_index)s for the position in the playlist. ' | ||||
|               '%(height)s and %(width)s for the width and height of the video format. ' | ||||
|               '%(resolution)s for a textual description of the resolution of the video format. ' | ||||
|               '%% for a literal percent. ' | ||||
|               'Use - to output to stdout. Can also be used to download to a different directory, ' | ||||
|               'for example with -o \'/my/downloads/%(uploader)s/%(title)s-%(id)s.%(ext)s\' .')) | ||||
|     filesystem.add_option( | ||||
|   | ||||
| @@ -6,10 +6,11 @@ import os | ||||
| import subprocess | ||||
|  | ||||
| from .common import PostProcessor | ||||
|  | ||||
| from ..compat import ( | ||||
|     compat_urlretrieve, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     check_executable, | ||||
|     compat_urlretrieve, | ||||
|     encodeFilename, | ||||
|     PostProcessingError, | ||||
|     prepend_extension, | ||||
|   | ||||
| @@ -3,10 +3,8 @@ from __future__ import unicode_literals | ||||
| import subprocess | ||||
|  | ||||
| from .common import PostProcessor | ||||
| from ..utils import ( | ||||
|     shlex_quote, | ||||
|     PostProcessingError, | ||||
| ) | ||||
| from ..compat import shlex_quote | ||||
| from ..utils import PostProcessingError | ||||
|  | ||||
|  | ||||
| class ExecAfterDownloadPP(PostProcessor): | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| import os | ||||
| import re | ||||
| import subprocess | ||||
| import sys | ||||
| import time | ||||
| @@ -7,10 +6,13 @@ import time | ||||
|  | ||||
| from .common import AudioConversionError, PostProcessor | ||||
|  | ||||
| from ..utils import ( | ||||
| from ..compat import ( | ||||
|     compat_subprocess_get_DEVNULL, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     encodeArgument, | ||||
|     encodeFilename, | ||||
|     get_exe_version, | ||||
|     is_outdated_version, | ||||
|     PostProcessingError, | ||||
|     prepend_extension, | ||||
| @@ -19,23 +21,6 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| def get_version(executable): | ||||
|     """ Returns the version of the specified executable, | ||||
|     or False if the executable is not present """ | ||||
|     try: | ||||
|         out, err = subprocess.Popen( | ||||
|             [executable, '-version'], | ||||
|             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate() | ||||
|     except OSError: | ||||
|         return False | ||||
|     firstline = out.partition(b'\n')[0].decode('ascii', 'ignore') | ||||
|     m = re.search(r'version\s+([0-9._-a-zA-Z]+)', firstline) | ||||
|     if not m: | ||||
|         return u'present' | ||||
|     else: | ||||
|         return m.group(1) | ||||
|  | ||||
|  | ||||
| class FFmpegPostProcessorError(PostProcessingError): | ||||
|     pass | ||||
|  | ||||
| @@ -61,7 +46,7 @@ class FFmpegPostProcessor(PostProcessor): | ||||
|     @staticmethod | ||||
|     def get_versions(): | ||||
|         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] | ||||
|         return dict((program, get_version(program)) for program in programs) | ||||
|         return dict((p, get_exe_version(p, args=['-version'])) for p in programs) | ||||
|  | ||||
|     @property | ||||
|     def _executable(self): | ||||
|   | ||||
| @@ -3,10 +3,12 @@ import subprocess | ||||
| import sys | ||||
|  | ||||
| from .common import PostProcessor | ||||
| from ..compat import ( | ||||
|     subprocess_check_output | ||||
| ) | ||||
| from ..utils import ( | ||||
|     check_executable, | ||||
|     hyphenate_date, | ||||
|     subprocess_check_output | ||||
| ) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| #!/usr/bin/env python | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import calendar | ||||
| import codecs | ||||
| import contextlib | ||||
| @@ -8,7 +10,6 @@ import ctypes | ||||
| import datetime | ||||
| import email.utils | ||||
| import errno | ||||
| import getpass | ||||
| import gzip | ||||
| import itertools | ||||
| import io | ||||
| @@ -29,254 +30,18 @@ import traceback | ||||
| import xml.etree.ElementTree | ||||
| import zlib | ||||
|  | ||||
| try: | ||||
|     import urllib.request as compat_urllib_request | ||||
| except ImportError: # Python 2 | ||||
|     import urllib2 as compat_urllib_request | ||||
|  | ||||
| try: | ||||
|     import urllib.error as compat_urllib_error | ||||
| except ImportError: # Python 2 | ||||
|     import urllib2 as compat_urllib_error | ||||
|  | ||||
| try: | ||||
|     import urllib.parse as compat_urllib_parse | ||||
| except ImportError: # Python 2 | ||||
|     import urllib as compat_urllib_parse | ||||
|  | ||||
| try: | ||||
|     from urllib.parse import urlparse as compat_urllib_parse_urlparse | ||||
| except ImportError: # Python 2 | ||||
|     from urlparse import urlparse as compat_urllib_parse_urlparse | ||||
|  | ||||
| try: | ||||
|     import urllib.parse as compat_urlparse | ||||
| except ImportError: # Python 2 | ||||
|     import urlparse as compat_urlparse | ||||
|  | ||||
| try: | ||||
|     import http.cookiejar as compat_cookiejar | ||||
| except ImportError: # Python 2 | ||||
|     import cookielib as compat_cookiejar | ||||
|  | ||||
| try: | ||||
|     import html.entities as compat_html_entities | ||||
| except ImportError: # Python 2 | ||||
|     import htmlentitydefs as compat_html_entities | ||||
|  | ||||
| try: | ||||
|     import html.parser as compat_html_parser | ||||
| except ImportError: # Python 2 | ||||
|     import HTMLParser as compat_html_parser | ||||
|  | ||||
| try: | ||||
|     import http.client as compat_http_client | ||||
| except ImportError: # Python 2 | ||||
|     import httplib as compat_http_client | ||||
|  | ||||
| try: | ||||
|     from urllib.error import HTTPError as compat_HTTPError | ||||
| except ImportError:  # Python 2 | ||||
|     from urllib2 import HTTPError as compat_HTTPError | ||||
|  | ||||
| try: | ||||
|     from urllib.request import urlretrieve as compat_urlretrieve | ||||
| except ImportError:  # Python 2 | ||||
|     from urllib import urlretrieve as compat_urlretrieve | ||||
|  | ||||
|  | ||||
| try: | ||||
|     from subprocess import DEVNULL | ||||
|     compat_subprocess_get_DEVNULL = lambda: DEVNULL | ||||
| except ImportError: | ||||
|     compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w') | ||||
|  | ||||
| try: | ||||
|     from urllib.parse import unquote as compat_urllib_parse_unquote | ||||
| except ImportError: | ||||
|     def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'): | ||||
|         if string == '': | ||||
|             return string | ||||
|         res = string.split('%') | ||||
|         if len(res) == 1: | ||||
|             return string | ||||
|         if encoding is None: | ||||
|             encoding = 'utf-8' | ||||
|         if errors is None: | ||||
|             errors = 'replace' | ||||
|         # pct_sequence: contiguous sequence of percent-encoded bytes, decoded | ||||
|         pct_sequence = b'' | ||||
|         string = res[0] | ||||
|         for item in res[1:]: | ||||
|             try: | ||||
|                 if not item: | ||||
|                     raise ValueError | ||||
|                 pct_sequence += item[:2].decode('hex') | ||||
|                 rest = item[2:] | ||||
|                 if not rest: | ||||
|                     # This segment was just a single percent-encoded character. | ||||
|                     # May be part of a sequence of code units, so delay decoding. | ||||
|                     # (Stored in pct_sequence). | ||||
|                     continue | ||||
|             except ValueError: | ||||
|                 rest = '%' + item | ||||
|             # Encountered non-percent-encoded characters. Flush the current | ||||
|             # pct_sequence. | ||||
|             string += pct_sequence.decode(encoding, errors) + rest | ||||
|             pct_sequence = b'' | ||||
|         if pct_sequence: | ||||
|             # Flush the final pct_sequence | ||||
|             string += pct_sequence.decode(encoding, errors) | ||||
|         return string | ||||
|  | ||||
|  | ||||
| try: | ||||
|     from urllib.parse import parse_qs as compat_parse_qs | ||||
| except ImportError: # Python 2 | ||||
|     # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib. | ||||
|     # Python 2's version is apparently totally broken | ||||
|  | ||||
|     def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False, | ||||
|                 encoding='utf-8', errors='replace'): | ||||
|         qs, _coerce_result = qs, unicode | ||||
|         pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] | ||||
|         r = [] | ||||
|         for name_value in pairs: | ||||
|             if not name_value and not strict_parsing: | ||||
|                 continue | ||||
|             nv = name_value.split('=', 1) | ||||
|             if len(nv) != 2: | ||||
|                 if strict_parsing: | ||||
|                     raise ValueError("bad query field: %r" % (name_value,)) | ||||
|                 # Handle case of a control-name with no equal sign | ||||
|                 if keep_blank_values: | ||||
|                     nv.append('') | ||||
|                 else: | ||||
|                     continue | ||||
|             if len(nv[1]) or keep_blank_values: | ||||
|                 name = nv[0].replace('+', ' ') | ||||
|                 name = compat_urllib_parse_unquote( | ||||
|                     name, encoding=encoding, errors=errors) | ||||
|                 name = _coerce_result(name) | ||||
|                 value = nv[1].replace('+', ' ') | ||||
|                 value = compat_urllib_parse_unquote( | ||||
|                     value, encoding=encoding, errors=errors) | ||||
|                 value = _coerce_result(value) | ||||
|                 r.append((name, value)) | ||||
|         return r | ||||
|  | ||||
|     def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False, | ||||
|                 encoding='utf-8', errors='replace'): | ||||
|         parsed_result = {} | ||||
|         pairs = _parse_qsl(qs, keep_blank_values, strict_parsing, | ||||
|                         encoding=encoding, errors=errors) | ||||
|         for name, value in pairs: | ||||
|             if name in parsed_result: | ||||
|                 parsed_result[name].append(value) | ||||
|             else: | ||||
|                 parsed_result[name] = [value] | ||||
|         return parsed_result | ||||
|  | ||||
| try: | ||||
|     compat_str = unicode # Python 2 | ||||
| except NameError: | ||||
|     compat_str = str | ||||
|  | ||||
| try: | ||||
|     compat_chr = unichr # Python 2 | ||||
| except NameError: | ||||
|     compat_chr = chr | ||||
|  | ||||
| try: | ||||
|     from xml.etree.ElementTree import ParseError as compat_xml_parse_error | ||||
| except ImportError:  # Python 2.6 | ||||
|     from xml.parsers.expat import ExpatError as compat_xml_parse_error | ||||
|  | ||||
| try: | ||||
|     from shlex import quote as shlex_quote | ||||
| except ImportError:  # Python < 3.3 | ||||
|     def shlex_quote(s): | ||||
|         return "'" + s.replace("'", "'\"'\"'") + "'" | ||||
|  | ||||
|  | ||||
| def compat_ord(c): | ||||
|     if type(c) is int: return c | ||||
|     else: return ord(c) | ||||
|  | ||||
|  | ||||
| if sys.version_info >= (3, 0): | ||||
|     compat_getenv = os.getenv | ||||
|     compat_expanduser = os.path.expanduser | ||||
| else: | ||||
|     # Environment variables should be decoded with filesystem encoding. | ||||
|     # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918) | ||||
|  | ||||
|     def compat_getenv(key, default=None): | ||||
|         env = os.getenv(key, default) | ||||
|         if env: | ||||
|             env = env.decode(get_filesystem_encoding()) | ||||
|         return env | ||||
|  | ||||
|     # HACK: The default implementations of os.path.expanduser from cpython do not decode | ||||
|     # environment variables with filesystem encoding. We will work around this by | ||||
|     # providing adjusted implementations. | ||||
|     # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib | ||||
|     # for different platforms with correct environment variables decoding. | ||||
|  | ||||
|     if os.name == 'posix': | ||||
|         def compat_expanduser(path): | ||||
|             """Expand ~ and ~user constructions.  If user or $HOME is unknown, | ||||
|             do nothing.""" | ||||
|             if not path.startswith('~'): | ||||
|                 return path | ||||
|             i = path.find('/', 1) | ||||
|             if i < 0: | ||||
|                 i = len(path) | ||||
|             if i == 1: | ||||
|                 if 'HOME' not in os.environ: | ||||
|                     import pwd | ||||
|                     userhome = pwd.getpwuid(os.getuid()).pw_dir | ||||
|                 else: | ||||
|                     userhome = compat_getenv('HOME') | ||||
|             else: | ||||
|                 import pwd | ||||
|                 try: | ||||
|                     pwent = pwd.getpwnam(path[1:i]) | ||||
|                 except KeyError: | ||||
|                     return path | ||||
|                 userhome = pwent.pw_dir | ||||
|             userhome = userhome.rstrip('/') | ||||
|             return (userhome + path[i:]) or '/' | ||||
|     elif os.name == 'nt' or os.name == 'ce': | ||||
|         def compat_expanduser(path): | ||||
|             """Expand ~ and ~user constructs. | ||||
|  | ||||
|             If user or $HOME is unknown, do nothing.""" | ||||
|             if path[:1] != '~': | ||||
|                 return path | ||||
|             i, n = 1, len(path) | ||||
|             while i < n and path[i] not in '/\\': | ||||
|                 i = i + 1 | ||||
|  | ||||
|             if 'HOME' in os.environ: | ||||
|                 userhome = compat_getenv('HOME') | ||||
|             elif 'USERPROFILE' in os.environ: | ||||
|                 userhome = compat_getenv('USERPROFILE') | ||||
|             elif not 'HOMEPATH' in os.environ: | ||||
|                 return path | ||||
|             else: | ||||
|                 try: | ||||
|                     drive = compat_getenv('HOMEDRIVE') | ||||
|                 except KeyError: | ||||
|                     drive = '' | ||||
|                 userhome = os.path.join(drive, compat_getenv('HOMEPATH')) | ||||
|  | ||||
|             if i != 1: #~user | ||||
|                 userhome = os.path.join(os.path.dirname(userhome), path[1:i]) | ||||
|  | ||||
|             return userhome + path[i:] | ||||
|     else: | ||||
|         compat_expanduser = os.path.expanduser | ||||
| from .compat import ( | ||||
|     compat_chr, | ||||
|     compat_getenv, | ||||
|     compat_html_entities, | ||||
|     compat_parse_qs, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_parse_urlparse, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| # This is not clearly defined otherwise | ||||
| @@ -304,14 +69,6 @@ def preferredencoding(): | ||||
|  | ||||
|     return pref | ||||
|  | ||||
| if sys.version_info < (3,0): | ||||
|     def compat_print(s): | ||||
|         print(s.encode(preferredencoding(), 'xmlcharrefreplace')) | ||||
| else: | ||||
|     def compat_print(s): | ||||
|         assert type(s) == type(u'') | ||||
|         print(s) | ||||
|  | ||||
|  | ||||
| def write_json_file(obj, fn): | ||||
|     """ Encode obj as JSON and write it to fn, atomically """ | ||||
| @@ -394,127 +151,32 @@ def xpath_text(node, xpath, name=None, fatal=False): | ||||
|     return n.text | ||||
|  | ||||
|  | ||||
| compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix | ||||
| class BaseHTMLParser(compat_html_parser.HTMLParser): | ||||
|     def __init(self): | ||||
|         compat_html_parser.HTMLParser.__init__(self) | ||||
|         self.html = None | ||||
|  | ||||
|     def loads(self, html): | ||||
|         self.html = html | ||||
|         self.feed(html) | ||||
|         self.close() | ||||
|  | ||||
| class AttrParser(BaseHTMLParser): | ||||
|     """Modified HTMLParser that isolates a tag with the specified attribute""" | ||||
|     def __init__(self, attribute, value): | ||||
|         self.attribute = attribute | ||||
|         self.value = value | ||||
|         self.result = None | ||||
|         self.started = False | ||||
|         self.depth = {} | ||||
|         self.watch_startpos = False | ||||
|         self.error_count = 0 | ||||
|         BaseHTMLParser.__init__(self) | ||||
|  | ||||
|     def error(self, message): | ||||
|         if self.error_count > 10 or self.started: | ||||
|             raise compat_html_parser.HTMLParseError(message, self.getpos()) | ||||
|         self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line | ||||
|         self.error_count += 1 | ||||
|         self.goahead(1) | ||||
|  | ||||
|     def handle_starttag(self, tag, attrs): | ||||
|         attrs = dict(attrs) | ||||
|         if self.started: | ||||
|             self.find_startpos(None) | ||||
|         if self.attribute in attrs and attrs[self.attribute] == self.value: | ||||
|             self.result = [tag] | ||||
|             self.started = True | ||||
|             self.watch_startpos = True | ||||
|         if self.started: | ||||
|             if not tag in self.depth: self.depth[tag] = 0 | ||||
|             self.depth[tag] += 1 | ||||
|  | ||||
|     def handle_endtag(self, tag): | ||||
|         if self.started: | ||||
|             if tag in self.depth: self.depth[tag] -= 1 | ||||
|             if self.depth[self.result[0]] == 0: | ||||
|                 self.started = False | ||||
|                 self.result.append(self.getpos()) | ||||
|  | ||||
|     def find_startpos(self, x): | ||||
|         """Needed to put the start position of the result (self.result[1]) | ||||
|         after the opening tag with the requested id""" | ||||
|         if self.watch_startpos: | ||||
|             self.watch_startpos = False | ||||
|             self.result.append(self.getpos()) | ||||
|     handle_entityref = handle_charref = handle_data = handle_comment = \ | ||||
|     handle_decl = handle_pi = unknown_decl = find_startpos | ||||
|  | ||||
|     def get_result(self): | ||||
|         if self.result is None: | ||||
|             return None | ||||
|         if len(self.result) != 3: | ||||
|             return None | ||||
|         lines = self.html.split('\n') | ||||
|         lines = lines[self.result[1][0]-1:self.result[2][0]] | ||||
|         lines[0] = lines[0][self.result[1][1]:] | ||||
|         if len(lines) == 1: | ||||
|             lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]] | ||||
|         lines[-1] = lines[-1][:self.result[2][1]] | ||||
|         return '\n'.join(lines).strip() | ||||
| # Hack for https://github.com/rg3/youtube-dl/issues/662 | ||||
| if sys.version_info < (2, 7, 3): | ||||
|     AttrParser.parse_endtag = (lambda self, i: | ||||
|         i + len("</scr'+'ipt>") | ||||
|         if self.rawdata[i:].startswith("</scr'+'ipt>") | ||||
|         else compat_html_parser.HTMLParser.parse_endtag(self, i)) | ||||
|  | ||||
| def get_element_by_id(id, html): | ||||
|     """Return the content of the tag with the specified ID in the passed HTML document""" | ||||
|     return get_element_by_attribute("id", id, html) | ||||
|  | ||||
|  | ||||
| def get_element_by_attribute(attribute, value, html): | ||||
|     """Return the content of the tag with the specified attribute in the passed HTML document""" | ||||
|     parser = AttrParser(attribute, value) | ||||
|     try: | ||||
|         parser.loads(html) | ||||
|     except compat_html_parser.HTMLParseError: | ||||
|         pass | ||||
|     return parser.get_result() | ||||
|  | ||||
| class MetaParser(BaseHTMLParser): | ||||
|     """ | ||||
|     Modified HTMLParser that isolates a meta tag with the specified name  | ||||
|     attribute. | ||||
|     """ | ||||
|     def __init__(self, name): | ||||
|         BaseHTMLParser.__init__(self) | ||||
|         self.name = name | ||||
|         self.content = None | ||||
|         self.result = None | ||||
|     m = re.search(r'''(?xs) | ||||
|         <([a-zA-Z0-9:._-]+) | ||||
|          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? | ||||
|          \s+%s=['"]?%s['"]? | ||||
|          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*? | ||||
|         \s*> | ||||
|         (?P<content>.*?) | ||||
|         </\1> | ||||
|     ''' % (re.escape(attribute), re.escape(value)), html) | ||||
|  | ||||
|     def handle_starttag(self, tag, attrs): | ||||
|         if tag != 'meta': | ||||
|             return | ||||
|         attrs = dict(attrs) | ||||
|         if attrs.get('name') == self.name: | ||||
|             self.result = attrs.get('content') | ||||
|     if not m: | ||||
|         return None | ||||
|     res = m.group('content') | ||||
|  | ||||
|     def get_result(self): | ||||
|         return self.result | ||||
|     if res.startswith('"') or res.startswith("'"): | ||||
|         res = res[1:-1] | ||||
|  | ||||
| def get_meta_content(name, html): | ||||
|     """ | ||||
|     Return the content attribute from the meta tag with the given name attribute. | ||||
|     """ | ||||
|     parser = MetaParser(name) | ||||
|     try: | ||||
|         parser.loads(html) | ||||
|     except compat_html_parser.HTMLParseError: | ||||
|         pass | ||||
|     return parser.get_result() | ||||
|     return unescapeHTML(res) | ||||
|  | ||||
|  | ||||
| def clean_html(html): | ||||
| @@ -1472,6 +1134,25 @@ def check_executable(exe, args=[]): | ||||
|     return exe | ||||
|  | ||||
|  | ||||
| def get_exe_version(exe, args=['--version'], | ||||
|                     version_re=r'version\s+([0-9._-a-zA-Z]+)', | ||||
|                     unrecognized=u'present'): | ||||
|     """ Returns the version of the specified executable, | ||||
|     or False if the executable is not present """ | ||||
|     try: | ||||
|         out, err = subprocess.Popen( | ||||
|             [exe] + args, | ||||
|             stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate() | ||||
|     except OSError: | ||||
|         return False | ||||
|     firstline = out.partition(b'\n')[0].decode('ascii', 'ignore') | ||||
|     m = re.search(version_re, firstline) | ||||
|     if m: | ||||
|         return m.group(1) | ||||
|     else: | ||||
|         return unrecognized | ||||
|  | ||||
|  | ||||
| class PagedList(object): | ||||
|     def __len__(self): | ||||
|         # This is only useful for tests | ||||
| @@ -1562,7 +1243,7 @@ def escape_rfc3986(s): | ||||
|     """Escape non-ASCII characters as suggested by RFC 3986""" | ||||
|     if sys.version_info < (3, 0) and isinstance(s, unicode): | ||||
|         s = s.encode('utf-8') | ||||
|     return compat_urllib_parse.quote(s, "%/;:@&=+$,!~*'()?#[]") | ||||
|     return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") | ||||
|  | ||||
|  | ||||
| def escape_url(url): | ||||
| @@ -1636,15 +1317,6 @@ def parse_xml(s): | ||||
|     return tree | ||||
|  | ||||
|  | ||||
| if sys.version_info < (3, 0) and sys.platform == 'win32': | ||||
|     def compat_getpass(prompt, *args, **kwargs): | ||||
|         if isinstance(prompt, compat_str): | ||||
|             prompt = prompt.encode(preferredencoding()) | ||||
|         return getpass.getpass(prompt, *args, **kwargs) | ||||
| else: | ||||
|     compat_getpass = getpass.getpass | ||||
|  | ||||
|  | ||||
| US_RATINGS = { | ||||
|     'G': 0, | ||||
|     'PG': 10, | ||||
| @@ -1702,18 +1374,6 @@ def qualities(quality_ids): | ||||
|  | ||||
| DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s' | ||||
|  | ||||
| try: | ||||
|     subprocess_check_output = subprocess.check_output | ||||
| except AttributeError: | ||||
|     def subprocess_check_output(*args, **kwargs): | ||||
|         assert 'input' not in kwargs | ||||
|         p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs) | ||||
|         output, _ = p.communicate() | ||||
|         ret = p.poll() | ||||
|         if ret: | ||||
|             raise subprocess.CalledProcessError(ret, p.args, output=output) | ||||
|         return output | ||||
|  | ||||
|  | ||||
| def limit_length(s, length): | ||||
|     """ Add ellipses to overly long strings """ | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.11.02.1' | ||||
| __version__ = '2014.11.12' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user