mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	patched to add Google Video and Photobucket support
This commit is contained in:
		
				
					committed by
					
						 Ricardo Garcia
						Ricardo Garcia
					
				
			
			
				
	
			
			
			
						parent
						
							f1b4bee09d
						
					
				
				
					commit
					49c0028a7a
				
			| @@ -1 +1 @@ | ||||
| 2010.01.06 | ||||
| 2010.01.15 | ||||
|   | ||||
							
								
								
									
										158
									
								
								youtube-dl
									
									
									
									
									
								
							
							
						
						
									
										158
									
								
								youtube-dl
									
									
									
									
									
								
							| @@ -2,6 +2,7 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| # Author: Ricardo Garcia Gonzalez | ||||
| # Author: Danny Colligan | ||||
| # Author: Benjamin Johnson | ||||
| # License: Public domain code | ||||
| import htmlentitydefs | ||||
| import httplib | ||||
| @@ -936,6 +937,159 @@ class MetacafeIE(InfoExtractor): | ||||
| 			self._downloader.trouble(u'ERROR: format not available for video') | ||||
|  | ||||
|  | ||||
| class GoogleIE(InfoExtractor): | ||||
| 	"""Information extractor for video.google.com.""" | ||||
|  | ||||
| 	_VALID_URL = r'(?:http://)?video\.google\.com/videoplay\?docid=([^\&]+).*' | ||||
|  | ||||
| 	def __init__(self, downloader=None): | ||||
| 		InfoExtractor.__init__(self, downloader) | ||||
|  | ||||
| 	@staticmethod | ||||
| 	def suitable(url): | ||||
| 		return (re.match(GoogleIE._VALID_URL, url) is not None) | ||||
|  | ||||
| 	def report_download_webpage(self, video_id): | ||||
| 		"""Report webpage download.""" | ||||
| 		self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id) | ||||
|  | ||||
| 	def report_extraction(self, video_id): | ||||
| 		"""Report information extraction.""" | ||||
| 		self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id) | ||||
|  | ||||
| 	def _real_initialize(self): | ||||
| 		return | ||||
|  | ||||
| 	def _real_extract(self, url): | ||||
| 		# Extract id from URL | ||||
| 		mobj = re.match(self._VALID_URL, url) | ||||
| 		if mobj is None: | ||||
| 			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) | ||||
| 			return | ||||
|  | ||||
| 		video_id = mobj.group(1) | ||||
|  | ||||
| 		video_extension = 'mp4' | ||||
|  | ||||
| 		# Retrieve video webpage to extract further information | ||||
| 		request = urllib2.Request('http://video.google.com/videoplay?docid=%s' % video_id) | ||||
| 		try: | ||||
| 			self.report_download_webpage(video_id) | ||||
| 			webpage = urllib2.urlopen(request).read() | ||||
| 		except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| 			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) | ||||
| 			return | ||||
|  | ||||
| 		# Extract URL, uploader, and title from webpage | ||||
| 		self.report_extraction(video_id) | ||||
| 		mobj = re.search(r"download_url:'(.*)'", webpage) | ||||
| 		if mobj is None: | ||||
| 			self._downloader.trouble(u'ERROR: unable to extract media URL') | ||||
| 			return | ||||
| 		mediaURL = urllib.unquote(mobj.group(1)) | ||||
| 		mediaURL = mediaURL.replace('\\x3d', '\x3d') | ||||
| 		mediaURL = mediaURL.replace('\\x26', '\x26') | ||||
|  | ||||
| 		video_url = mediaURL | ||||
|  | ||||
| 		mobj = re.search(r'<title>(.*)</title>', webpage) | ||||
| 		if mobj is None: | ||||
| 			self._downloader.trouble(u'ERROR: unable to extract title') | ||||
| 			return | ||||
| 		video_title = mobj.group(1).decode('utf-8') | ||||
|  | ||||
| 		# Google Video doesn't show uploader nicknames? | ||||
| 		video_uploader = 'uploader' | ||||
|  | ||||
| 		try: | ||||
| 			# Process video information | ||||
| 			self._downloader.process_info({ | ||||
| 				'id':		video_id.decode('utf-8'), | ||||
| 				'url':		video_url.decode('utf-8'), | ||||
| 				'uploader':	video_uploader.decode('utf-8'), | ||||
| 				'title':	video_title.decode('utf-8'), | ||||
| 				'stitle':	video_title.decode('utf-8'), | ||||
| 				'ext':		video_extension.decode('utf-8'), | ||||
| 			}) | ||||
| 		except UnavailableFormatError: | ||||
| 			self._downloader.trouble(u'ERROR: format not available for video') | ||||
|  | ||||
|  | ||||
| class PhotobucketIE(InfoExtractor): | ||||
| 	"""Information extractor for photobucket.com.""" | ||||
|  | ||||
| 	_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' | ||||
|  | ||||
| 	def __init__(self, downloader=None): | ||||
| 		InfoExtractor.__init__(self, downloader) | ||||
|  | ||||
| 	@staticmethod | ||||
| 	def suitable(url): | ||||
| 		return (re.match(PhotobucketIE._VALID_URL, url) is not None) | ||||
|  | ||||
| 	def report_download_webpage(self, video_id): | ||||
| 		"""Report webpage download.""" | ||||
| 		self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id) | ||||
|  | ||||
| 	def report_extraction(self, video_id): | ||||
| 		"""Report information extraction.""" | ||||
| 		self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id) | ||||
|  | ||||
| 	def _real_initialize(self): | ||||
| 		return | ||||
|  | ||||
| 	def _real_extract(self, url): | ||||
| 		# Extract id from URL | ||||
| 		mobj = re.match(self._VALID_URL, url) | ||||
| 		if mobj is None: | ||||
| 			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) | ||||
| 			return | ||||
|  | ||||
| 		video_id = mobj.group(1) | ||||
|  | ||||
| 		video_extension = 'flv' | ||||
|  | ||||
| 		# Retrieve video webpage to extract further information | ||||
| 		request = urllib2.Request(url) | ||||
| 		try: | ||||
| 			self.report_download_webpage(video_id) | ||||
| 			webpage = urllib2.urlopen(request).read() | ||||
| 		except (urllib2.URLError, httplib.HTTPException, socket.error), err: | ||||
| 			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) | ||||
| 			return | ||||
|  | ||||
| 		# Extract URL, uploader, and title from webpage | ||||
| 		self.report_extraction(video_id) | ||||
| 		mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage) | ||||
| 		if mobj is None: | ||||
| 			self._downloader.trouble(u'ERROR: unable to extract media URL') | ||||
| 			return | ||||
| 		mediaURL = urllib.unquote(mobj.group(1)) | ||||
|  | ||||
| 		video_url = mediaURL | ||||
|  | ||||
| 		mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage) | ||||
| 		if mobj is None: | ||||
| 			self._downloader.trouble(u'ERROR: unable to extract title') | ||||
| 			return | ||||
| 		video_title = mobj.group(1).decode('utf-8') | ||||
|  | ||||
| 		video_uploader = mobj.group(2).decode('utf-8') | ||||
|  | ||||
| 		try: | ||||
| 			# Process video information | ||||
| 			self._downloader.process_info({ | ||||
| 				'id':		video_id.decode('utf-8'), | ||||
| 				'url':		video_url.decode('utf-8'), | ||||
| 				'uploader':	video_uploader.decode('utf-8'), | ||||
| 				'title':	video_title.decode('utf-8'), | ||||
| 				'stitle':	video_title.decode('utf-8'), | ||||
| 				'ext':		video_extension.decode('utf-8'), | ||||
| 			}) | ||||
| 		except UnavailableFormatError: | ||||
| 			self._downloader.trouble(u'ERROR: format not available for video') | ||||
|  | ||||
|  | ||||
| class YoutubeSearchIE(InfoExtractor): | ||||
| 	"""Information Extractor for YouTube search queries.""" | ||||
| 	_VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' | ||||
| @@ -1314,6 +1468,8 @@ if __name__ == '__main__': | ||||
| 		youtube_pl_ie = YoutubePlaylistIE(youtube_ie) | ||||
| 		youtube_user_ie = YoutubeUserIE(youtube_ie) | ||||
| 		youtube_search_ie = YoutubeSearchIE(youtube_ie) | ||||
| 		google_ie = GoogleIE() | ||||
| 		photobucket_ie = PhotobucketIE() | ||||
|  | ||||
| 		# File downloader | ||||
| 		fd = FileDownloader({ | ||||
| @@ -1339,6 +1495,8 @@ if __name__ == '__main__': | ||||
| 		fd.add_info_extractor(youtube_user_ie) | ||||
| 		fd.add_info_extractor(metacafe_ie) | ||||
| 		fd.add_info_extractor(youtube_ie) | ||||
| 		fd.add_info_extractor(google_ie) | ||||
| 		fd.add_info_extractor(photobucket_ie) | ||||
|  | ||||
| 		# Update version | ||||
| 		if opts.update_self: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user