mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Add proper support for "gzip" and "deflate" encodings
This commit is contained in:
		
							
								
								
									
										89
									
								
								youtube-dl
									
									
									
									
									
								
							
							
						
						
									
										89
									
								
								youtube-dl
									
									
									
									
									
								
							| @@ -8,6 +8,7 @@ | |||||||
| import cookielib | import cookielib | ||||||
| import ctypes | import ctypes | ||||||
| import datetime | import datetime | ||||||
|  | import gzip | ||||||
| import htmlentitydefs | import htmlentitydefs | ||||||
| import httplib | import httplib | ||||||
| import locale | import locale | ||||||
| @@ -18,11 +19,13 @@ import os.path | |||||||
| import re | import re | ||||||
| import socket | import socket | ||||||
| import string | import string | ||||||
|  | import StringIO | ||||||
| import subprocess | import subprocess | ||||||
| import sys | import sys | ||||||
| import time | import time | ||||||
| import urllib | import urllib | ||||||
| import urllib2 | import urllib2 | ||||||
|  | import zlib | ||||||
|  |  | ||||||
| # parse_qs was moved from the cgi module to the urlparse module recently. | # parse_qs was moved from the cgi module to the urlparse module recently. | ||||||
| try: | try: | ||||||
| @@ -161,6 +164,56 @@ class ContentTooShortError(Exception): | |||||||
| 		self.downloaded = downloaded | 		self.downloaded = downloaded | ||||||
| 		self.expected = expected | 		self.expected = expected | ||||||
|  |  | ||||||
|  | class YoutubeDLHandler(urllib2.HTTPHandler): | ||||||
|  | 	"""Handler for HTTP requests and responses. | ||||||
|  |  | ||||||
|  | 	This class, when installed with an OpenerDirector, automatically adds | ||||||
|  | 	the standard headers to every HTTP request and handles gzipped and | ||||||
|  | 	deflated responses from web servers. If compression is to be avoided in | ||||||
|  | 	a particular request, the original request in the program code only has | ||||||
|  | 	to include the HTTP header "Youtubedl-No-Compression", which will be | ||||||
|  | 	removed before making the real request. | ||||||
|  | 	 | ||||||
|  | 	Part of this code was copied from: | ||||||
|  |  | ||||||
|  | 	  http://techknack.net/python-urllib2-handlers/ | ||||||
|  | 	   | ||||||
|  | 	Andrew Rowls, the author of that code, agreed to release it to the | ||||||
|  | 	public domain. | ||||||
|  | 	""" | ||||||
|  |  | ||||||
|  | 	@staticmethod | ||||||
|  | 	def deflate(data): | ||||||
|  | 		try: | ||||||
|  | 			return zlib.decompress(data, -zlib.MAX_WBITS) | ||||||
|  | 		except zlib.error: | ||||||
|  | 			return zlib.decompress(data) | ||||||
|  | 	 | ||||||
|  | 	def http_request(self, req): | ||||||
|  | 		for h in std_headers: | ||||||
|  | 			if h in req.headers: | ||||||
|  | 				del req.headers[h] | ||||||
|  | 			req.add_header(h, std_headers[h]) | ||||||
|  | 		if 'Youtubedl-no-compression' in req.headers: | ||||||
|  | 			if 'Accept-encoding' in req.headers: | ||||||
|  | 				del req.headers['Accept-encoding'] | ||||||
|  | 			del req.headers['Youtubedl-no-compression'] | ||||||
|  | 		return req | ||||||
|  |  | ||||||
|  | 	def http_response(self, req, resp): | ||||||
|  | 		old_resp = resp | ||||||
|  | 		# gzip | ||||||
|  | 		if resp.headers.get('Content-encoding', '') == 'gzip': | ||||||
|  | 			gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r') | ||||||
|  | 			resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) | ||||||
|  | 			resp.msg = old_resp.msg | ||||||
|  | 		# deflate | ||||||
|  | 		if resp.headers.get('Content-encoding', '') == 'deflate': | ||||||
|  | 			gz = StringIO.StringIO(self.deflate(resp.read())) | ||||||
|  | 			resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code) | ||||||
|  | 			resp.msg = old_resp.msg | ||||||
|  | 		return resp | ||||||
|  |  | ||||||
| class FileDownloader(object): | class FileDownloader(object): | ||||||
| 	"""File Downloader class. | 	"""File Downloader class. | ||||||
|  |  | ||||||
| @@ -559,8 +612,11 @@ class FileDownloader(object): | |||||||
| 		tmpfilename = self.temp_name(filename) | 		tmpfilename = self.temp_name(filename) | ||||||
| 		stream = None | 		stream = None | ||||||
| 		open_mode = 'wb' | 		open_mode = 'wb' | ||||||
| 		basic_request = urllib2.Request(url, None, std_headers) |  | ||||||
| 		request = urllib2.Request(url, None, std_headers) | 		# Do not include the Accept-Encoding header | ||||||
|  | 		headers = {'Youtubedl-no-compression': 'True'} | ||||||
|  | 		basic_request = urllib2.Request(url, None, headers) | ||||||
|  | 		request = urllib2.Request(url, None, headers) | ||||||
|  |  | ||||||
| 		# Establish possible resume length | 		# Establish possible resume length | ||||||
| 		if os.path.isfile(tmpfilename): | 		if os.path.isfile(tmpfilename): | ||||||
| @@ -822,7 +878,7 @@ class YoutubeIE(InfoExtractor): | |||||||
| 				return | 				return | ||||||
|  |  | ||||||
| 		# Set language | 		# Set language | ||||||
| 		request = urllib2.Request(self._LANG_URL, None, std_headers) | 		request = urllib2.Request(self._LANG_URL) | ||||||
| 		try: | 		try: | ||||||
| 			self.report_lang() | 			self.report_lang() | ||||||
| 			urllib2.urlopen(request).read() | 			urllib2.urlopen(request).read() | ||||||
| @@ -842,7 +898,7 @@ class YoutubeIE(InfoExtractor): | |||||||
| 				'username':	username, | 				'username':	username, | ||||||
| 				'password':	password, | 				'password':	password, | ||||||
| 				} | 				} | ||||||
| 		request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers) | 		request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) | ||||||
| 		try: | 		try: | ||||||
| 			self.report_login() | 			self.report_login() | ||||||
| 			login_results = urllib2.urlopen(request).read() | 			login_results = urllib2.urlopen(request).read() | ||||||
| @@ -858,7 +914,7 @@ class YoutubeIE(InfoExtractor): | |||||||
| 				'next_url':		'/', | 				'next_url':		'/', | ||||||
| 				'action_confirm':	'Confirm', | 				'action_confirm':	'Confirm', | ||||||
| 				} | 				} | ||||||
| 		request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers) | 		request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form)) | ||||||
| 		try: | 		try: | ||||||
| 			self.report_age_confirmation() | 			self.report_age_confirmation() | ||||||
| 			age_results = urllib2.urlopen(request).read() | 			age_results = urllib2.urlopen(request).read() | ||||||
| @@ -876,7 +932,7 @@ class YoutubeIE(InfoExtractor): | |||||||
|  |  | ||||||
| 		# Get video webpage | 		# Get video webpage | ||||||
| 		self.report_video_webpage_download(video_id) | 		self.report_video_webpage_download(video_id) | ||||||
| 		request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id, None, std_headers) | 		request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id) | ||||||
| 		try: | 		try: | ||||||
| 			video_webpage = urllib2.urlopen(request).read() | 			video_webpage = urllib2.urlopen(request).read() | ||||||
| 		except (urllib2.URLError, httplib.HTTPException, socket.error), err: | 		except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||||
| @@ -895,7 +951,7 @@ class YoutubeIE(InfoExtractor): | |||||||
| 		for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: | 		for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: | ||||||
| 			video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' | 			video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' | ||||||
| 					   % (video_id, el_type)) | 					   % (video_id, el_type)) | ||||||
| 			request = urllib2.Request(video_info_url, None, std_headers) | 			request = urllib2.Request(video_info_url) | ||||||
| 			try: | 			try: | ||||||
| 				video_info_webpage = urllib2.urlopen(request).read() | 				video_info_webpage = urllib2.urlopen(request).read() | ||||||
| 				video_info = parse_qs(video_info_webpage) | 				video_info = parse_qs(video_info_webpage) | ||||||
| @@ -1055,7 +1111,7 @@ class MetacafeIE(InfoExtractor): | |||||||
|  |  | ||||||
| 	def _real_initialize(self): | 	def _real_initialize(self): | ||||||
| 		# Retrieve disclaimer | 		# Retrieve disclaimer | ||||||
| 		request = urllib2.Request(self._DISCLAIMER, None, std_headers) | 		request = urllib2.Request(self._DISCLAIMER) | ||||||
| 		try: | 		try: | ||||||
| 			self.report_disclaimer() | 			self.report_disclaimer() | ||||||
| 			disclaimer = urllib2.urlopen(request).read() | 			disclaimer = urllib2.urlopen(request).read() | ||||||
| @@ -1068,7 +1124,7 @@ class MetacafeIE(InfoExtractor): | |||||||
| 			'filters': '0', | 			'filters': '0', | ||||||
| 			'submit': "Continue - I'm over 18", | 			'submit': "Continue - I'm over 18", | ||||||
| 			} | 			} | ||||||
| 		request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers) | 		request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form)) | ||||||
| 		try: | 		try: | ||||||
| 			self.report_age_confirmation() | 			self.report_age_confirmation() | ||||||
| 			disclaimer = urllib2.urlopen(request).read() | 			disclaimer = urllib2.urlopen(request).read() | ||||||
| @@ -1771,7 +1827,7 @@ class YoutubeSearchIE(InfoExtractor): | |||||||
| 		while True: | 		while True: | ||||||
| 			self.report_download_page(query, pagenum) | 			self.report_download_page(query, pagenum) | ||||||
| 			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) | 			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) | ||||||
| 			request = urllib2.Request(result_url, None, std_headers) | 			request = urllib2.Request(result_url) | ||||||
| 			try: | 			try: | ||||||
| 				page = urllib2.urlopen(request).read() | 				page = urllib2.urlopen(request).read() | ||||||
| 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||||
| @@ -1862,7 +1918,7 @@ class GoogleSearchIE(InfoExtractor): | |||||||
| 		while True: | 		while True: | ||||||
| 			self.report_download_page(query, pagenum) | 			self.report_download_page(query, pagenum) | ||||||
| 			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) | 			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) | ||||||
| 			request = urllib2.Request(result_url, None, std_headers) | 			request = urllib2.Request(result_url) | ||||||
| 			try: | 			try: | ||||||
| 				page = urllib2.urlopen(request).read() | 				page = urllib2.urlopen(request).read() | ||||||
| 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||||
| @@ -1953,7 +2009,7 @@ class YahooSearchIE(InfoExtractor): | |||||||
| 		while True: | 		while True: | ||||||
| 			self.report_download_page(query, pagenum) | 			self.report_download_page(query, pagenum) | ||||||
| 			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) | 			result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) | ||||||
| 			request = urllib2.Request(result_url, None, std_headers) | 			request = urllib2.Request(result_url) | ||||||
| 			try: | 			try: | ||||||
| 				page = urllib2.urlopen(request).read() | 				page = urllib2.urlopen(request).read() | ||||||
| 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||||
| @@ -2017,7 +2073,7 @@ class YoutubePlaylistIE(InfoExtractor): | |||||||
|  |  | ||||||
| 		while True: | 		while True: | ||||||
| 			self.report_download_page(playlist_id, pagenum) | 			self.report_download_page(playlist_id, pagenum) | ||||||
| 			request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) | 			request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum)) | ||||||
| 			try: | 			try: | ||||||
| 				page = urllib2.urlopen(request).read() | 				page = urllib2.urlopen(request).read() | ||||||
| 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | 			except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||||
| @@ -2079,7 +2135,7 @@ class YoutubeUserIE(InfoExtractor): | |||||||
| 		pagenum = 1 | 		pagenum = 1 | ||||||
|  |  | ||||||
| 		self.report_download_page(username) | 		self.report_download_page(username) | ||||||
| 		request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) | 		request = urllib2.Request(self._TEMPLATE_URL % (username)) | ||||||
| 		try: | 		try: | ||||||
| 			page = urllib2.urlopen(request).read() | 			page = urllib2.urlopen(request).read() | ||||||
| 		except (urllib2.URLError, httplib.HTTPException, socket.error), err: | 		except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||||
| @@ -2135,7 +2191,7 @@ class DepositFilesIE(InfoExtractor): | |||||||
|  |  | ||||||
| 		# Retrieve file webpage with 'Free download' button pressed | 		# Retrieve file webpage with 'Free download' button pressed | ||||||
| 		free_download_indication = { 'gateway_result' : '1' } | 		free_download_indication = { 'gateway_result' : '1' } | ||||||
| 		request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers) | 		request = urllib2.Request(url, urllib.urlencode(free_download_indication)) | ||||||
| 		try: | 		try: | ||||||
| 			self.report_download_webpage(file_id) | 			self.report_download_webpage(file_id) | ||||||
| 			webpage = urllib2.urlopen(request).read() | 			webpage = urllib2.urlopen(request).read() | ||||||
| @@ -2354,8 +2410,7 @@ if __name__ == '__main__': | |||||||
|  |  | ||||||
| 		# General configuration | 		# General configuration | ||||||
| 		cookie_processor = urllib2.HTTPCookieProcessor(jar) | 		cookie_processor = urllib2.HTTPCookieProcessor(jar) | ||||||
| 		urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) | 		urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) | ||||||
| 		urllib2.install_opener(urllib2.build_opener(cookie_processor)) |  | ||||||
| 		socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) | 		socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) | ||||||
|  |  | ||||||
| 		# Batch file verification | 		# Batch file verification | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user