mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Add proper support for "gzip" and "deflate" encodings
This commit is contained in:
		
							
								
								
									
										89
									
								
								youtube-dl
									
									
									
									
									
								
							
							
						
						
									
										89
									
								
								youtube-dl
									
									
									
									
									
								
							| @@ -8,6 +8,7 @@ | ||||
| import cookielib | ||||
| import ctypes | ||||
| import datetime | ||||
| import gzip | ||||
| import htmlentitydefs | ||||
| import httplib | ||||
| import locale | ||||
| @@ -18,11 +19,13 @@ import os.path | ||||
| import re | ||||
| import socket | ||||
| import string | ||||
| import StringIO | ||||
| import subprocess | ||||
| import sys | ||||
| import time | ||||
| import urllib | ||||
| import urllib2 | ||||
| import zlib | ||||
|  | ||||
| # parse_qs was moved from the cgi module to the urlparse module recently. | ||||
| try: | ||||
| @@ -161,6 +164,56 @@ class ContentTooShortError(Exception): | ||||
| 		self.downloaded = downloaded | ||||
| 		self.expected = expected | ||||
|  | ||||
| class YoutubeDLHandler(urllib2.HTTPHandler): | ||||
| 	"""Handler for HTTP requests and responses. | ||||
|  | ||||
| 	This class, when installed with an OpenerDirector, automatically adds | ||||
| 	the standard headers to every HTTP request and handles gzipped and | ||||
| 	deflated responses from web servers. If compression is to be avoided in | ||||
| 	a particular request, the original request in the program code only has | ||||
| 	to include the HTTP header "Youtubedl-No-Compression", which will be | ||||
| 	removed before making the real request. | ||||
| 	 | ||||
| 	Part of this code was copied from: | ||||
|  | ||||
| 	  http://techknack.net/python-urllib2-handlers/ | ||||
| 	   | ||||
| 	Andrew Rowls, the author of that code, agreed to release it to the | ||||
| 	public domain. | ||||
| 	""" | ||||
|  | ||||
| 	@staticmethod | ||||
| 	def deflate(data): | ||||
| 		try: | ||||
| 			return zlib.decompress(data, -zlib.MAX_WBITS) | ||||
| 		except zlib.error: | ||||
| 			return zlib.decompress(data) | ||||
| 	 | ||||
| 	def http_request(self, req): | ||||
| 		for h in std_headers: | ||||
| 			if h in req.headers: | ||||
| 				del req.headers[h] | ||||
| 			req.add_header(h, std_headers[h]) | ||||
| 		if 'Youtubedl-no-compression' in req.headers: | ||||
| 			if 'Accept-encoding' in req.headers: | ||||
| 				del req.headers['Accept-encoding'] | ||||
| 			del req.headers['Youtubedl-no-compression'] | ||||
| 		return req | ||||
|  | ||||
| 	def http_response(self, req, resp): | ||||
| 		old_resp = resp | ||||
| 		# gzip | ||||
| 		if resp.headers.get('Content-encoding', '') == 'gzip': | ||||
| 			gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r') | ||||
| 			resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) | ||||
| 			resp.msg = old_resp.msg | ||||
| 		# deflate | ||||
| 		if resp.headers.get('Content-encoding', '') == 'deflate': | ||||
| 			gz = StringIO.StringIO(self.deflate(resp.read())) | ||||
| 			resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) | ||||
| 			resp.msg = old_resp.msg | ||||
| 		return resp | ||||
|  | ||||
| class FileDownloader(object): | ||||
| 	"""File Downloader class. | ||||
|  | ||||
| @@ -559,8 +612,11 @@ class FileDownloader(object): | ||||
| 		tmpfilename = self.temp_name(filename) | ||||
| 		stream = None | ||||
| 		open_mode = 'wb' | ||||
| 		basic_request = urllib2.Request(url, None, std_headers) | ||||
| 		request = urllib2.Request(url, None, std_headers) | ||||
|  | ||||
| 		# Do not include the Accept-Encoding header | ||||
| 		headers = {'Youtubedl-no-compression': 'True'} | ||||
| 		basic_request = urllib2.Request(url, None, headers) | ||||
| 		request = urllib2.Request(url, None, headers) | ||||
|  | ||||
| 		# Establish possible resume length | ||||
| 		if os.path.isfile(tmpfilename): | ||||
| @@ -822,7 +878,7 @@ class YoutubeIE(InfoExtractor): | ||||
| 				return | ||||
|  | ||||
| 		# Set language | ||||
| 		request = urllib2.Request(self._LANG_URL, None, std_headers) | ||||
| 		request = urllib2.Request(self._LANG_URL) | ||||
| 		try: | ||||
| 			self.report_lang() | ||||
| 			urllib2.urlopen(request).read() | ||||
| @@ -842,7 +898,7 @@ class YoutubeIE(InfoExtractor): | ||||
| 				'username':	username, | ||||
| 				'password':	password, | ||||
| 				} | ||||
| 		request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers) | ||||
| 		request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) | ||||
| 		try: | ||||
| 			self.report_login() | ||||
| 			login_results = urllib2.urlopen(request).read() | ||||
| @@ -858,7 +914,7 @@ class YoutubeIE(InfoExtractor): | ||||
| 				'next_url':		'/', | ||||
| 				'action_confirm':	'Confirm', | ||||
| 				} | ||||
| 		request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers) | ||||
| 		request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form)) | ||||
| 		try: | ||||
| 			self.report_age_confirmation() | ||||
| 			age_results = urllib2.urlopen(request).read() | ||||
| @@ -876,7 +932,7 @@ class YoutubeIE(InfoExtractor): | ||||
|  | ||||
| 		# Get video webpage | ||||
| 		self.report_video_webpage_download(video_id) | ||||
| 		request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id, None, std_headers) | ||||
| 		request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id) | ||||
| 		try: | ||||
| 			video_webpage = urllib2.urlopen(request).read() | ||||
| 		except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| @@ -895,7 +951,7 @@ class YoutubeIE(InfoExtractor): | ||||
| 		for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: | ||||
| 			video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' | ||||
| 					   % (video_id, el_type)) | ||||
| 			request = urllib2.Request(video_info_url, None, std_headers) | ||||
| 			request = urllib2.Request(video_info_url) | ||||
| 			try: | ||||
| 				video_info_webpage = urllib2.urlopen(request).read() | ||||
| 				video_info = parse_qs(video_info_webpage) | ||||
| @@ -1055,7 +1111,7 @@ class MetacafeIE(InfoExtractor): | ||||
|  | ||||
| 	def _real_initialize(self): | ||||
| 		# Retrieve disclaimer | ||||
| 		request = urllib2.Request(self._DISCLAIMER, None, std_headers) | ||||
| 		request = urllib2.Request(self._DISCLAIMER) | ||||
| 		try: | ||||
| 			self.report_disclaimer() | ||||
| 			disclaimer = urllib2.urlopen(request).read() | ||||
| @@ -1068,7 +1124,7 @@ class MetacafeIE(InfoExtractor): | ||||
| 			'filters': '0', | ||||
| 			'submit': "Continue - I'm over 18", | ||||
| 			} | ||||
| 		request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) | ||||
| 		request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form)) | ||||
| 		try: | ||||
| 			self.report_age_confirmation() | ||||
| 			disclaimer = urllib2.urlopen(request).read() | ||||
| @@ -1771,7 +1827,7 @@ class YoutubeSearchIE(InfoExtractor): | ||||
| 		while True: | ||||
| 			self.report_download_page(query, pagenum) | ||||
| 			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) | ||||
| 			request = urllib2.Request(result_url, None, std_headers) | ||||
| 			request = urllib2.Request(result_url) | ||||
| 			try: | ||||
| 				page = urllib2.urlopen(request).read() | ||||
| 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| @@ -1862,7 +1918,7 @@ class GoogleSearchIE(InfoExtractor): | ||||
| 		while True: | ||||
| 			self.report_download_page(query, pagenum) | ||||
| 			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) | ||||
| 			request = urllib2.Request(result_url, None, std_headers) | ||||
| 			request = urllib2.Request(result_url) | ||||
| 			try: | ||||
| 				page = urllib2.urlopen(request).read() | ||||
| 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| @@ -1953,7 +2009,7 @@ class YahooSearchIE(InfoExtractor): | ||||
| 		while True: | ||||
| 			self.report_download_page(query, pagenum) | ||||
| 			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) | ||||
| 			request = urllib2.Request(result_url, None, std_headers) | ||||
| 			request = urllib2.Request(result_url) | ||||
| 			try: | ||||
| 				page = urllib2.urlopen(request).read() | ||||
| 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| @@ -2017,7 +2073,7 @@ class YoutubePlaylistIE(InfoExtractor): | ||||
|  | ||||
| 		while True: | ||||
| 			self.report_download_page(playlist_id, pagenum) | ||||
| 			request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) | ||||
| 			request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum)) | ||||
| 			try: | ||||
| 				page = urllib2.urlopen(request).read() | ||||
| 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| @@ -2079,7 +2135,7 @@ class YoutubeUserIE(InfoExtractor): | ||||
| 		pagenum = 1 | ||||
|  | ||||
| 		self.report_download_page(username) | ||||
| 		request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) | ||||
| 		request = urllib2.Request(self._TEMPLATE_URL % (username)) | ||||
| 		try: | ||||
| 			page = urllib2.urlopen(request).read() | ||||
| 		except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| @@ -2135,7 +2191,7 @@ class DepositFilesIE(InfoExtractor): | ||||
|  | ||||
| 		# Retrieve file webpage with 'Free download' button pressed | ||||
| 		free_download_indication = { 'gateway_result' : '1' } | ||||
| 		request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers) | ||||
| 		request = urllib2.Request(url, urllib.urlencode(free_download_indication)) | ||||
| 		try: | ||||
| 			self.report_download_webpage(file_id) | ||||
| 			webpage = urllib2.urlopen(request).read() | ||||
| @@ -2354,8 +2410,7 @@ if __name__ == '__main__': | ||||
|  | ||||
| 		# General configuration | ||||
| 		cookie_processor = urllib2.HTTPCookieProcessor(jar) | ||||
| 		urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) | ||||
| 		urllib2.install_opener(urllib2.build_opener(cookie_processor)) | ||||
| 		urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) | ||||
| 		socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) | ||||
|  | ||||
| 		# Batch file verification | ||||
|   | ||||
		Reference in New Issue
	
	Block a user