mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			39 Commits
		
	
	
		
			2014.02.03
			...
			2014.02.06
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 63424b6233 | ||
|  | 0bf35c5cf5 | ||
|  | 95c29381eb | ||
|  | 94c4abce7f | ||
|  | df872ec4e7 | ||
|  | 5de90176d9 | ||
|  | dcf3eec47a | ||
|  | e9e4f30d26 | ||
|  | 83cebd73d4 | ||
|  | 1df4229bd7 | ||
|  | 3c995527e9 | ||
|  | 7c62b568a2 | ||
|  | ccf9114e84 | ||
|  | d8061908bb | ||
|  | 211e17dd43 | ||
|  | 6cb38a9994 | ||
|  | fa7df757a7 | ||
|  | 8c82077619 | ||
|  | e5d1f9e50a | ||
|  | 7ee50ae7b5 | ||
|  | de563c9da0 | ||
|  | 50451f2a18 | ||
|  | 9bc70948e1 | ||
|  | 5dc733f071 | ||
|  | bc4850908c | ||
|  | 20650c8654 | ||
|  | 56dced2670 | ||
|  | eef726c04b | ||
|  | acf1555d76 | ||
|  | 22e7f1a6ec | ||
|  | 3c49325658 | ||
|  | bb1cd2bea1 | ||
|  | fdf1f8d4ce | ||
|  | 117c8c6b97 | ||
|  | 5cef4ff09b | ||
|  | 91264ce572 | ||
|  | c79ef8e1ae | ||
|  | 58d915df51 | ||
|  | 7881a64499 | 
| @@ -22,6 +22,7 @@ import socket | ||||
|  | ||||
| import youtube_dl.YoutubeDL | ||||
| from youtube_dl.utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_HTTPError, | ||||
| @@ -110,7 +111,7 @@ def generator(test_case): | ||||
|                     ydl.download([test_case['url']]) | ||||
|                 except (DownloadError, ExtractorError) as err: | ||||
|                     # Check if the exception is not a network related one | ||||
|                     if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): | ||||
|                     if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503): | ||||
|                         raise | ||||
|  | ||||
|                     if try_num == RETRIES: | ||||
|   | ||||
| @@ -34,6 +34,7 @@ from youtube_dl.extractor import ( | ||||
|     KhanAcademyIE, | ||||
|     EveryonesMixtapeIE, | ||||
|     RutubeChannelIE, | ||||
|     GoogleSearchIE, | ||||
|     GenericIE, | ||||
| ) | ||||
|  | ||||
| @@ -240,6 +241,14 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker') | ||||
|         self.assertEqual(len(result['entries']), 3) | ||||
|  | ||||
|     def test_GoogleSearch(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = GoogleSearchIE(dl) | ||||
|         result = ie.extract('gvsearch15:python language') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'python language') | ||||
|         self.assertEqual(result['title'], 'python language') | ||||
|         self.assertTrue(len(result['entries']) == 15) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -27,6 +27,12 @@ _TESTS = [ | ||||
|         85, | ||||
|         u'3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', | ||||
|     ), | ||||
|     ( | ||||
|         u'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', | ||||
|         u'js', | ||||
|         90, | ||||
|         u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', | ||||
|     ), | ||||
| ] | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -54,12 +54,14 @@ from .ebaumsworld import EbaumsWorldIE | ||||
| from .ehow import EHowIE | ||||
| from .eighttracks import EightTracksIE | ||||
| from .eitb import EitbIE | ||||
| from .elpais import ElPaisIE | ||||
| from .escapist import EscapistIE | ||||
| from .everyonesmixtape import EveryonesMixtapeIE | ||||
| from .exfm import ExfmIE | ||||
| from .extremetube import ExtremeTubeIE | ||||
| from .facebook import FacebookIE | ||||
| from .faz import FazIE | ||||
| from .firstpost import FirstpostIE | ||||
| from .fktv import ( | ||||
|     FKTVIE, | ||||
|     FKTVPosteckeIE, | ||||
| @@ -96,6 +98,7 @@ from .ina import InaIE | ||||
| from .infoq import InfoQIE | ||||
| from .instagram import InstagramIE | ||||
| from .internetvideoarchive import InternetVideoArchiveIE | ||||
| from .iprima import IPrimaIE | ||||
| from .ivi import ( | ||||
|     IviIE, | ||||
|     IviCompilationIE | ||||
| @@ -117,6 +120,7 @@ from .lynda import ( | ||||
|     LyndaIE, | ||||
|     LyndaCourseIE | ||||
| ) | ||||
| from .m6 import M6IE | ||||
| from .macgamestore import MacGameStoreIE | ||||
| from .malemotion import MalemotionIE | ||||
| from .mdr import MDRIE | ||||
| @@ -126,6 +130,7 @@ from .mit import TechTVMITIE, MITIE | ||||
| from .mixcloud import MixcloudIE | ||||
| from .mpora import MporaIE | ||||
| from .mofosex import MofosexIE | ||||
| from .mooshare import MooshareIE | ||||
| from .mtv import ( | ||||
|     MTVIE, | ||||
|     MTVIggyIE, | ||||
| @@ -139,6 +144,7 @@ from .nba import NBAIE | ||||
| from .nbc import NBCNewsIE | ||||
| from .ndtv import NDTVIE | ||||
| from .newgrounds import NewgroundsIE | ||||
| from .nfb import NFBIE | ||||
| from .nhl import NHLIE, NHLVideocenterIE | ||||
| from .niconico import NiconicoIE | ||||
| from .ninegag import NineGagIE | ||||
|   | ||||
| @@ -1,19 +1,14 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import json | ||||
| import re | ||||
| import socket | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_http_client, | ||||
|     compat_str, | ||||
|     compat_urllib_error, | ||||
|     compat_urllib_request, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
| ) | ||||
|  | ||||
|   | ||||
| @@ -4,6 +4,7 @@ import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class CollegeHumorIE(InfoExtractor): | ||||
| @@ -11,22 +12,25 @@ class CollegeHumorIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', | ||||
|         'file': '6902724.mp4', | ||||
|         'md5': 'dcc0f5c1c8be98dc33889a191f4c26bd', | ||||
|         'info_dict': { | ||||
|             'id': '6902724', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Comic-Con Cosplay Catastrophe', | ||||
|             'description': 'Fans get creative this year at San Diego.  Too', | ||||
|             'description': 'Fans get creative this year', | ||||
|             'age_limit': 13, | ||||
|         }, | ||||
|     }, | ||||
|     { | ||||
|         'url': 'http://www.collegehumor.com/video/3505939/font-conference', | ||||
|         'file': '3505939.mp4', | ||||
|         'md5': '72fa701d8ef38664a4dbb9e2ab721816', | ||||
|         'info_dict': { | ||||
|             'id': '3505939', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Font Conference', | ||||
|             'description': 'This video wasn\'t long enough, so we made it double-spaced.', | ||||
|             'description': 'This video wasn\'t long enough,', | ||||
|             'age_limit': 10, | ||||
|             'duration': 179, | ||||
|         }, | ||||
|     }, | ||||
|     # embedded youtube video | ||||
| @@ -38,7 +42,7 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             'title': 'Funny Dogs Protecting Babies Compilation 2014 [NEW HD]', | ||||
|             'uploader': 'Funnyplox TV', | ||||
|             'uploader_id': 'funnyploxtv', | ||||
|             'description': 'md5:506f69f7a297ed698ced3375f2363b0e', | ||||
|             'description': 'md5:11812366244110c3523968aa74f02521', | ||||
|             'upload_date': '20140128', | ||||
|         }, | ||||
|         'params': { | ||||
| @@ -82,6 +86,8 @@ class CollegeHumorIE(InfoExtractor): | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         duration = int_or_none(vdata.get('duration'), 1000) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': vdata['title'], | ||||
| @@ -89,4 +95,5 @@ class CollegeHumorIE(InfoExtractor): | ||||
|             'thumbnail': vdata.get('thumbnail'), | ||||
|             'formats': formats, | ||||
|             'age_limit': age_limit, | ||||
|             'duration': duration, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										58
									
								
								youtube_dl/extractor/elpais.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								youtube_dl/extractor/elpais.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
|  | ||||
|  | ||||
| class ElPaisIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:[^.]+\.)?elpais\.com/.*/(?P<id>[^/#?]+)\.html(?:$|[?#])' | ||||
|     IE_DESCR = 'El País' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://blogs.elpais.com/la-voz-de-inaki/2014/02/tiempo-nuevo-recetas-viejas.html', | ||||
|         'md5': '98406f301f19562170ec071b83433d55', | ||||
|         'info_dict': { | ||||
|             'id': 'tiempo-nuevo-recetas-viejas', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tiempo nuevo, recetas viejas', | ||||
|             'description': 'De lunes a viernes, a partir de las ocho de la mañana, Iñaki Gabilondo nos cuenta su visión de la actualidad nacional e internacional.', | ||||
|             'upload_date': '20140206', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         prefix = self._html_search_regex( | ||||
|             r'var url_cache = "([^"]+)";', webpage, 'URL prefix') | ||||
|         video_suffix = self._search_regex( | ||||
|             r"URLMediaFile = url_cache \+ '([^']+)'", webpage, 'video URL') | ||||
|         video_url = prefix + video_suffix | ||||
|         thumbnail_suffix = self._search_regex( | ||||
|             r"URLMediaStill = url_cache \+ '([^']+)'", webpage, 'thumbnail URL', | ||||
|             fatal=False) | ||||
|         thumbnail = ( | ||||
|             None if thumbnail_suffix is None | ||||
|             else prefix + thumbnail_suffix) | ||||
|         title = self._html_search_regex( | ||||
|             '<h2 class="entry-header entry-title.*?>(.*?)</h2>', | ||||
|             webpage, 'title') | ||||
|         date_str = self._search_regex( | ||||
|             r'<p class="date-header date-int updated"\s+title="([^"]+)">', | ||||
|             webpage, 'upload date', fatal=False) | ||||
|         upload_date = (None if date_str is None else unified_strdate(date_str)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': thumbnail, | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/firstpost.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/firstpost.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class FirstpostIE(InfoExtractor): | ||||
|     IE_NAME = 'Firstpost.com' | ||||
|     _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html', | ||||
|         'md5': 'ee9114957692f01fb1263ed87039112a', | ||||
|         'info_dict': { | ||||
|             'id': '1025403', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'India to launch indigenous aircraft carrier INS Vikrant today', | ||||
|             'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<div.*?name="div_video".*?flashvars="([^"]+)">', | ||||
|             webpage, 'video URL') | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|         } | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| import re | ||||
|  | ||||
| @@ -8,32 +10,42 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class GoogleSearchIE(SearchInfoExtractor): | ||||
|     IE_DESC = u'Google Video search' | ||||
|     _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"' | ||||
|     IE_DESC = 'Google Video search' | ||||
|     _MAX_RESULTS = 1000 | ||||
|     IE_NAME = u'video.google:search' | ||||
|     IE_NAME = 'video.google:search' | ||||
|     _SEARCH_KEY = 'gvsearch' | ||||
|  | ||||
|     def _get_n_results(self, query, n): | ||||
|         """Get a specified number of results for a query""" | ||||
|  | ||||
|         entries = [] | ||||
|         res = { | ||||
|             '_type': 'playlist', | ||||
|             'id': query, | ||||
|             'entries': [] | ||||
|             'title': query, | ||||
|         } | ||||
|  | ||||
|         for pagenum in itertools.count(1): | ||||
|             result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10) | ||||
|             webpage = self._download_webpage(result_url, u'gvsearch:' + query, | ||||
|                                              note='Downloading result page ' + str(pagenum)) | ||||
|         for pagenum in itertools.count(): | ||||
|             result_url = ( | ||||
|                 'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' | ||||
|                 % (compat_urllib_parse.quote_plus(query), pagenum * 10)) | ||||
|  | ||||
|             for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage): | ||||
|                 e = { | ||||
|             webpage = self._download_webpage( | ||||
|                 result_url, 'gvsearch:' + query, | ||||
|                 note='Downloading result page ' + str(pagenum + 1)) | ||||
|  | ||||
|             for hit_idx, mobj in enumerate(re.finditer( | ||||
|                     r'<h3 class="r"><a href="([^"]+)"', webpage)): | ||||
|  | ||||
|                 # Skip playlists | ||||
|                 if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage): | ||||
|                     continue | ||||
|  | ||||
|                 entries.append({ | ||||
|                     '_type': 'url', | ||||
|                     'url': mobj.group(1) | ||||
|                 } | ||||
|                 res['entries'].append(e) | ||||
|                 }) | ||||
|  | ||||
|             if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage): | ||||
|             if (len(entries) >= n) or not re.search(r'class="pn" id="pnnext"', webpage): | ||||
|                 res['entries'] = entries[:n] | ||||
|                 return res | ||||
|   | ||||
| @@ -1,39 +1,36 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class InaIE(InfoExtractor): | ||||
|     """Information Extractor for Ina.fr""" | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I?[A-F0-9]+)/.*' | ||||
|     _VALID_URL = r'http://(?:www\.)?ina\.fr/video/(?P<id>I?[A-Z0-9]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', | ||||
|         u'file': u'I12055569.mp4', | ||||
|         u'md5': u'a667021bf2b41f8dc6049479d9bb38a3', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Fran\u00e7ois Hollande \"Je crois que c'est clair\"" | ||||
|         'url': 'http://www.ina.fr/video/I12055569/francois-hollande-je-crois-que-c-est-clair-video.html', | ||||
|         'md5': 'a667021bf2b41f8dc6049479d9bb38a3', | ||||
|         'info_dict': { | ||||
|             'id': 'I12055569', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'François Hollande "Je crois que c\'est clair"', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         mrss_url='http://player.ina.fr/notices/%s.mrss' % video_id | ||||
|         video_extension = 'mp4' | ||||
|         webpage = self._download_webpage(mrss_url, video_id) | ||||
|         mrss_url = 'http://player.ina.fr/notices/%s.mrss' % video_id | ||||
|         info_doc = self._download_xml(mrss_url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)', | ||||
|             webpage, u'video URL') | ||||
|         video_url = info_doc.find('.//{http://search.yahoo.com/mrss/}player').attrib['url'] | ||||
|  | ||||
|         video_title = self._search_regex(r'<title><!\[CDATA\[(?P<titre>.*?)]]></title>', | ||||
|             webpage, u'title') | ||||
|  | ||||
|         return [{ | ||||
|             'id':       video_id, | ||||
|             'url':      video_url, | ||||
|             'ext':      video_extension, | ||||
|             'title':    video_title, | ||||
|         }] | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': info_doc.find('.//title').text, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										85
									
								
								youtube_dl/extractor/iprima.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								youtube_dl/extractor/iprima.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,85 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| from random import random | ||||
| from math import floor | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import compat_urllib_request | ||||
|  | ||||
|  | ||||
| class IPrimaIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://play\.iprima\.cz/(?P<videogroup>.+)/(?P<videoid>.+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://play.iprima.cz/particka/particka-92', | ||||
|         'info_dict': { | ||||
|             'id': '39152', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Partička (92)', | ||||
|             'description': 'md5:3740fda51464da35a2d4d0670b8e4fd6', | ||||
|             'thumbnail': 'http://play.iprima.cz/sites/default/files/image_crops/image_620x349/3/491483_particka-92_image_620x349.jpg', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('videoid') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         player_url = 'http://embed.livebox.cz/iprimaplay/player-embed-v2.js?__tok%s__=%s' % ( | ||||
|                          floor(random()*1073741824), | ||||
|                          floor(random()*1073741824)) | ||||
|  | ||||
|         req = compat_urllib_request.Request(player_url) | ||||
|         req.add_header('Referer', url) | ||||
|         playerpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         base_url = ''.join(re.findall(r"embed\['stream'\] = '(.+?)'.+'(\?auth=)'.+'(.+?)';", playerpage)[1]) | ||||
|  | ||||
|         zoneGEO = self._html_search_regex(r'"zoneGEO":(.+?),', webpage, 'zoneGEO') | ||||
|  | ||||
|         if zoneGEO != '0': | ||||
|             base_url = base_url.replace('token', 'token_'+zoneGEO) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id in ['lq', 'hq', 'hd']: | ||||
|             filename = self._html_search_regex(r'"%s_id":(.+?),' % format_id, webpage, 'filename') | ||||
|  | ||||
|             if filename == 'null': | ||||
|                 continue | ||||
|  | ||||
|             real_id = self._search_regex(r'Prima-[0-9]{10}-([0-9]+)_', filename, 'real video id') | ||||
|  | ||||
|             if format_id == 'lq': | ||||
|                 quality = 0 | ||||
|             elif format_id == 'hq': | ||||
|                 quality = 1 | ||||
|             elif format_id == 'hd': | ||||
|                 quality = 2 | ||||
|                 filename = 'hq/'+filename | ||||
|  | ||||
|             formats.append({ | ||||
|                 'format_id': format_id, | ||||
|                 'url': base_url, | ||||
|                 'quality': quality, | ||||
|                 'play_path': 'mp4:'+filename.replace('"', '')[:-4], | ||||
|                 'rtmp_live': True, | ||||
|                 'ext': 'flv', | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': real_id, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
| @@ -31,7 +31,7 @@ class LifeNewsIE(InfoExtractor): | ||||
|         webpage = self._download_webpage('http://lifenews.ru/mobile/news/%s' % video_id, video_id, 'Downloading page') | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<video.*?src="([^"]+)"></video>', webpage, 'video URL') | ||||
|             r'<video.*?src="([^"]+)".*?></video>', webpage, 'video URL') | ||||
|          | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'<video.*?poster="([^"]+)".*?"></video>', webpage, 'video thumbnail') | ||||
|   | ||||
							
								
								
									
										56
									
								
								youtube_dl/extractor/m6.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								youtube_dl/extractor/m6.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,56 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class M6IE(InfoExtractor): | ||||
|     IE_NAME = 'm6' | ||||
|     _VALID_URL = r'http://(?:www\.)?m6\.fr/[^/]+/videos/(?P<id>\d+)-[^\.]+\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.m6.fr/emission-les_reines_du_shopping/videos/11323908-emeline_est_la_reine_du_shopping_sur_le_theme_ma_fete_d_8217_anniversaire.html', | ||||
|         'md5': '242994a87de2c316891428e0176bcb77', | ||||
|         'info_dict': { | ||||
|             'id': '11323908', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Emeline est la Reine du Shopping sur le thème « Ma fête d’anniversaire ! »', | ||||
|             'description': 'md5:1212ae8fb4b7baa4dc3886c5676007c2', | ||||
|             'duration': 100, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         rss = self._download_xml('http://ws.m6.fr/v1/video/info/m6/bonus/%s' % video_id, video_id, | ||||
|             'Downloading video RSS') | ||||
|  | ||||
|         title = rss.find('./channel/item/title').text | ||||
|         description = rss.find('./channel/item/description').text | ||||
|         thumbnail = rss.find('./channel/item/visuel_clip_big').text | ||||
|         duration = int(rss.find('./channel/item/duration').text) | ||||
|         view_count = int(rss.find('./channel/item/nombre_vues').text) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_id in ['lq', 'sd', 'hq', 'hd']: | ||||
|             video_url = rss.find('./channel/item/url_video_%s' % format_id) | ||||
|             if video_url is None: | ||||
|                 continue | ||||
|             formats.append({ | ||||
|                 'url': video_url.text, | ||||
|                 'format_id': format_id, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										114
									
								
								youtube_dl/extractor/mooshare.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								youtube_dl/extractor/mooshare.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MooshareIE(InfoExtractor): | ||||
|     IE_NAME = 'mooshare' | ||||
|     IE_DESC = 'Mooshare.biz' | ||||
|     _VALID_URL = r'http://mooshare\.biz/(?P<id>[\da-z]{12})' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://mooshare.biz/8dqtk4bjbp8g', | ||||
|             'md5': '4e14f9562928aecd2e42c6f341c8feba', | ||||
|             'info_dict': { | ||||
|                 'id': '8dqtk4bjbp8g', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Comedy Football 2011 - (part 1-2)', | ||||
|                 'duration': 893, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://mooshare.biz/aipjtoc4g95j', | ||||
|             'info_dict': { | ||||
|                 'id': 'aipjtoc4g95j', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Orange Caramel  Dashing Through the Snow', | ||||
|                 'duration': 212, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         page = self._download_webpage(url, video_id, 'Downloading page') | ||||
|  | ||||
|         if re.search(r'>Video Not Found or Deleted<', page) is not None: | ||||
|             raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) | ||||
|  | ||||
|         hash_key = self._html_search_regex(r'<input type="hidden" name="hash" value="([^"]+)">', page, 'hash') | ||||
|         title = self._html_search_regex(r'(?m)<div class="blockTitle">\s*<h2>Watch ([^<]+)</h2>', page, 'title') | ||||
|  | ||||
|         download_form = { | ||||
|             'op': 'download1', | ||||
|             'id': video_id, | ||||
|             'hash': hash_key, | ||||
|         } | ||||
|  | ||||
|         request = compat_urllib_request.Request( | ||||
|             'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form)) | ||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|  | ||||
|         self.to_screen('%s: Waiting for timeout' % video_id) | ||||
|         time.sleep(5) | ||||
|  | ||||
|         video_page = self._download_webpage(request, video_id, 'Downloading video page') | ||||
|  | ||||
|         thumbnail = self._html_search_regex(r'image:\s*"([^"]+)",', video_page, 'thumbnail', fatal=False) | ||||
|         duration_str = self._html_search_regex(r'duration:\s*"(\d+)",', video_page, 'duration', fatal=False) | ||||
|         duration = int(duration_str) if duration_str is not None else None | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
|         # SD video | ||||
|         mobj = re.search(r'(?m)file:\s*"(?P<url>[^"]+)",\s*provider:', video_page) | ||||
|         if mobj is not None: | ||||
|             formats.append({ | ||||
|                 'url': mobj.group('url'), | ||||
|                 'format_id': 'sd', | ||||
|                 'format': 'SD', | ||||
|             }) | ||||
|  | ||||
|         # HD video | ||||
|         mobj = re.search(r'\'hd-2\': { file: \'(?P<url>[^\']+)\' },', video_page) | ||||
|         if mobj is not None: | ||||
|             formats.append({ | ||||
|                 'url': mobj.group('url'), | ||||
|                 'format_id': 'hd', | ||||
|                 'format': 'HD', | ||||
|             }) | ||||
|  | ||||
|         # rtmp video | ||||
|         mobj = re.search(r'(?m)file: "(?P<playpath>[^"]+)",\s*streamer: "(?P<rtmpurl>rtmp://[^"]+)",', video_page) | ||||
|         if mobj is not None: | ||||
|             formats.append({ | ||||
|                 'url': mobj.group('rtmpurl'), | ||||
|                 'play_path': mobj.group('playpath'), | ||||
|                 'rtmp_live': False, | ||||
|                 'ext': 'mp4', | ||||
|                 'format_id': 'rtmp', | ||||
|                 'format': 'HD', | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -82,10 +82,13 @@ class MTVServicesInfoExtractor(InfoExtractor): | ||||
|             title_el = find_xpath_attr( | ||||
|                 itemdoc, './/{http://search.yahoo.com/mrss/}category', | ||||
|                 'scheme', 'urn:mtvn:video_title') | ||||
|         if title_el is None: | ||||
|             title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title') | ||||
|         if title_el is None: | ||||
|             title_el = itemdoc.find('.//title') | ||||
|             if title_el.text is None: | ||||
|                 title_el = None | ||||
|         if title_el is None: | ||||
|             title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title') | ||||
|  | ||||
|         title = title_el.text | ||||
|         if title is None: | ||||
|             raise ExtractorError('Could not find video title') | ||||
|   | ||||
							
								
								
									
										76
									
								
								youtube_dl/extractor/nfb.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								youtube_dl/extractor/nfb.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,76 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NFBIE(InfoExtractor): | ||||
|     IE_NAME = 'nfb' | ||||
|     IE_DESC = 'National Film Board of Canada' | ||||
|     _VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P<id>[\da-z_-]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', | ||||
|         'info_dict': { | ||||
|             'id': 'qallunaat_why_white_people_are_funny', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Qallunaat! Why White People Are Funny ', | ||||
|             'description': 'md5:836d8aff55e087d04d9f6df554d4e038', | ||||
|             'duration': 3128, | ||||
|             'uploader': 'Mark Sandiford', | ||||
|             'uploader_id': 'mark-sandiford', | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
|             'skip_download': True, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page') | ||||
|  | ||||
|         uploader_id = self._html_search_regex(r'<a class="director-link" href="/explore-all-directors/([^/]+)/"', | ||||
|             page, 'director id', fatal=False) | ||||
|         uploader = self._html_search_regex(r'<em class="director-name" itemprop="name">([^<]+)</em>', | ||||
|             page, 'director name', fatal=False) | ||||
|  | ||||
|         request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id, | ||||
|             compat_urllib_parse.urlencode({'getConfig': 'true'})) | ||||
|         request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') | ||||
|  | ||||
|         config = self._download_xml(request, video_id, 'Downloading player config XML') | ||||
|  | ||||
|         thumbnail = config.find("./player/stream/media[@type='posterImage']/assets/asset[@quality='high']/default/url").text | ||||
|         video = config.find("./player/stream/media[@type='video']") | ||||
|         duration = int(video.get('duration')) | ||||
|         title = video.find('title').text | ||||
|         description = video.find('description').text | ||||
|  | ||||
|         # It seems assets always go from lower to better quality, so no need to sort | ||||
|         formats = [{ | ||||
|             'url': x.find('default/streamerURI').text + '/', | ||||
|             'play_path': x.find('default/url').text, | ||||
|             'rtmp_live': False, | ||||
|             'ext': 'mp4', | ||||
|             'format_id': x.get('quality'), | ||||
|         } for x in video.findall('assets/asset')] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -1,34 +1,68 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class PBSIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://video\.pbs\.org/video/(?P<id>\d+)/?' | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|         (?: | ||||
|             # Direct video URL | ||||
|             video\.pbs\.org/video/(?P<id>[0-9]+)/? | | ||||
|             # Article with embedded player | ||||
|            (?:www\.)?pbs\.org/(?:[^/]+/){2,5}(?P<presumptive_id>[^/]+)/?(?:$|[?\#]) | | ||||
|            # Player | ||||
|            video\.pbs\.org/partnerplayer/(?P<player_id>[^/]+)/ | ||||
|         ) | ||||
|     ''' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://video.pbs.org/video/2365006249/', | ||||
|         u'file': u'2365006249.mp4', | ||||
|         u'md5': 'ce1888486f0908d555a8093cac9a7362', | ||||
|         u'info_dict': { | ||||
|             u'title': u'A More Perfect Union', | ||||
|             u'description': u'md5:ba0c207295339c8d6eced00b7c363c6a', | ||||
|             u'duration': 3190, | ||||
|         'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/', | ||||
|         'md5': 'ce1888486f0908d555a8093cac9a7362', | ||||
|         'info_dict': { | ||||
|             'id': '2365006249', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'A More Perfect Union', | ||||
|             'description': 'md5:ba0c207295339c8d6eced00b7c363c6a', | ||||
|             'duration': 3190, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         presumptive_id = mobj.group('presumptive_id') | ||||
|         display_id = presumptive_id | ||||
|         if presumptive_id: | ||||
|             webpage = self._download_webpage(url, display_id) | ||||
|             url = self._search_regex( | ||||
|                 r'<iframe\s+id=["\']partnerPlayer["\'].*?\s+src=["\'](.*?)["\']>', | ||||
|                 webpage, 'player URL') | ||||
|             mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         player_id = mobj.group('player_id') | ||||
|         if not display_id: | ||||
|             display_id = player_id | ||||
|         if player_id: | ||||
|             player_page = self._download_webpage( | ||||
|                 url, display_id, note='Downloading player page', | ||||
|                 errnote='Could not download player page') | ||||
|             video_id = self._search_regex( | ||||
|                 r'<div\s+id="video_([0-9]+)"', player_page, 'video ID') | ||||
|         else: | ||||
|             video_id = mobj.group('id') | ||||
|             display_id = video_id | ||||
|  | ||||
|         info_url = 'http://video.pbs.org/videoInfo/%s?format=json' % video_id | ||||
|         info_page = self._download_webpage(info_url, video_id) | ||||
|         info =json.loads(info_page) | ||||
|         return {'id': video_id, | ||||
|                 'title': info['title'], | ||||
|                 'url': info['alternate_encoding']['url'], | ||||
|                 'ext': 'mp4', | ||||
|                 'description': info['program'].get('description'), | ||||
|                 'thumbnail': info.get('image_url'), | ||||
|                 'duration': info.get('duration'), | ||||
|                 } | ||||
|         info = self._download_json(info_url, display_id) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info['title'], | ||||
|             'url': info['alternate_encoding']['url'], | ||||
|             'ext': 'mp4', | ||||
|             'description': info['program'].get('description'), | ||||
|             'thumbnail': info.get('image_url'), | ||||
|             'duration': info.get('duration'), | ||||
|         } | ||||
|   | ||||
| @@ -1,36 +1,38 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class StatigramIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)' | ||||
|     _VALID_URL = r'https?://(www\.)?statigr\.am/p/(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://statigr.am/p/522207370455279102_24101272', | ||||
|         u'file': u'522207370455279102_24101272.mp4', | ||||
|         u'md5': u'6eb93b882a3ded7c378ee1d6884b1814', | ||||
|         u'info_dict': { | ||||
|             u'uploader_id': u'aguynamedpatrick', | ||||
|             u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)', | ||||
|         'url': 'http://statigr.am/p/522207370455279102_24101272', | ||||
|         'md5': '6eb93b882a3ded7c378ee1d6884b1814', | ||||
|         'info_dict': { | ||||
|             'id': '522207370455279102_24101272', | ||||
|             'ext': 'mp4', | ||||
|             'uploader_id': 'aguynamedpatrick', | ||||
|             'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         html_title = self._html_search_regex( | ||||
|             r'<title>(.+?)</title>', | ||||
|             webpage, u'title') | ||||
|             webpage, 'title') | ||||
|         title = re.sub(r'(?: *\(Videos?\))? \| Statigram$', '', html_title) | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r'@([^ ]+)', title, u'uploader name', fatal=False) | ||||
|         ext = 'mp4' | ||||
|             r'@([^ ]+)', title, 'uploader name', fatal=False) | ||||
|  | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       self._og_search_video_url(webpage), | ||||
|             'ext':       ext, | ||||
|             'title':     title, | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': self._og_search_video_url(webpage), | ||||
|             'title': title, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'uploader_id' : uploader_id | ||||
|         }] | ||||
|             'uploader_id': uploader_id | ||||
|         } | ||||
|   | ||||
| @@ -68,13 +68,14 @@ class SubtitlesInfoExtractor(InfoExtractor): | ||||
|     def _request_subtitle_url(self, sub_lang, url): | ||||
|         """ makes the http request for the subtitle """ | ||||
|         try: | ||||
|             return self._download_subtitle_url(sub_lang, url) | ||||
|             sub = self._download_subtitle_url(sub_lang, url) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning(u'unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) | ||||
|             return | ||||
|         if not sub: | ||||
|             self._downloader.report_warning(u'Did not fetch video subtitles') | ||||
|             return | ||||
|         return sub | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|         """ | ||||
|   | ||||
| @@ -1,22 +1,23 @@ | ||||
| #coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
| ) | ||||
| from ..utils import determine_ext | ||||
|  | ||||
|  | ||||
| class ThisAVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*' | ||||
|     _TEST = { | ||||
|         u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html", | ||||
|         u"file": u"47734.flv", | ||||
|         u"md5": u"0480f1ef3932d901f0e0e719f188f19b", | ||||
|         u"info_dict": { | ||||
|             u"title": u"高樹マリア - Just fit", | ||||
|             u"uploader": u"dj7970", | ||||
|             u"uploader_id": u"dj7970" | ||||
|         'url': 'http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html', | ||||
|         'md5': '0480f1ef3932d901f0e0e719f188f19b', | ||||
|         'info_dict': { | ||||
|             'id': '47734', | ||||
|             'ext': 'flv', | ||||
|             'title': '高樹マリア - Just fit', | ||||
|             'uploader': 'dj7970', | ||||
|             'uploader_id': 'dj7970' | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -25,19 +26,18 @@ class ThisAVIE(InfoExtractor): | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title') | ||||
|         title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, 'title') | ||||
|         video_url = self._html_search_regex( | ||||
|             r"addVariable\('file','([^']+)'\);", webpage, u'video url') | ||||
|             r"addVariable\('file','([^']+)'\);", webpage, 'video url') | ||||
|         uploader = self._html_search_regex( | ||||
|             r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>', | ||||
|             webpage, u'uploader name', fatal=False) | ||||
|             webpage, 'uploader name', fatal=False) | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>', | ||||
|             webpage, u'uploader id', fatal=False) | ||||
|             webpage, 'uploader id', fatal=False) | ||||
|         ext = determine_ext(video_url) | ||||
|          | ||||
|         return { | ||||
|             '_type':       'video', | ||||
|             'id':          video_id, | ||||
|             'url':         video_url, | ||||
|             'uploader':    uploader, | ||||
|   | ||||
| @@ -1,4 +1,6 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -9,25 +11,25 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class TouTvIE(InfoExtractor): | ||||
|     IE_NAME = u'tou.tv' | ||||
|     IE_NAME = 'tou.tv' | ||||
|     _VALID_URL = r'https?://www\.tou\.tv/(?P<id>[a-zA-Z0-9_-]+(?:/(?P<episode>S[0-9]+E[0-9]+)))' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.tou.tv/30-vies/S04E41', | ||||
|         u'file': u'30-vies_S04E41.mp4', | ||||
|         u'info_dict': { | ||||
|             u'title': u'30 vies Saison 4 / Épisode 41', | ||||
|             u'description': u'md5:da363002db82ccbe4dafeb9cab039b09', | ||||
|             u'age_limit': 8, | ||||
|             u'uploader': u'Groupe des Nouveaux Médias', | ||||
|             u'duration': 1296, | ||||
|             u'upload_date': u'20131118', | ||||
|             u'thumbnail': u'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg', | ||||
|         'url': 'http://www.tou.tv/30-vies/S04E41', | ||||
|         'file': '30-vies_S04E41.mp4', | ||||
|         'info_dict': { | ||||
|             'title': '30 vies Saison 4 / Épisode 41', | ||||
|             'description': 'md5:da363002db82ccbe4dafeb9cab039b09', | ||||
|             'age_limit': 8, | ||||
|             'uploader': 'Groupe des Nouveaux Médias', | ||||
|             'duration': 1296, | ||||
|             'upload_date': '20131118', | ||||
|             'thumbnail': 'http://static.tou.tv/medias/images/2013-11-18_19_00_00_30VIES_0341_01_L.jpeg', | ||||
|         }, | ||||
|         u'params': { | ||||
|             u'skip_download': True,  # Requires rtmpdump | ||||
|         'params': { | ||||
|             'skip_download': True,  # Requires rtmpdump | ||||
|         }, | ||||
|         u'skip': 'Only available in Canada' | ||||
|         'skip': 'Only available in Canada' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -36,25 +38,25 @@ class TouTvIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         mediaId = self._search_regex( | ||||
|             r'"idMedia":\s*"([^"]+)"', webpage, u'media ID') | ||||
|             r'"idMedia":\s*"([^"]+)"', webpage, 'media ID') | ||||
|  | ||||
|         streams_url = u'http://release.theplatform.com/content.select?pid=' + mediaId | ||||
|         streams_url = 'http://release.theplatform.com/content.select?pid=' + mediaId | ||||
|         streams_doc = self._download_xml( | ||||
|             streams_url, video_id, note=u'Downloading stream list') | ||||
|             streams_url, video_id, note='Downloading stream list') | ||||
|  | ||||
|         video_url = next(n.text | ||||
|                          for n in streams_doc.findall('.//choice/url') | ||||
|                          if u'//ad.doubleclick' not in n.text) | ||||
|                          if '//ad.doubleclick' not in n.text) | ||||
|         if video_url.endswith('/Unavailable.flv'): | ||||
|             raise ExtractorError( | ||||
|                 u'Access to this video is blocked from outside of Canada', | ||||
|                 'Access to this video is blocked from outside of Canada', | ||||
|                 expected=True) | ||||
|  | ||||
|         duration_str = self._html_search_meta( | ||||
|             'video:duration', webpage, u'duration') | ||||
|             'video:duration', webpage, 'duration') | ||||
|         duration = int(duration_str) if duration_str else None | ||||
|         upload_date_str = self._html_search_meta( | ||||
|             'video:release_date', webpage, u'upload date') | ||||
|             'video:release_date', webpage, 'upload date') | ||||
|         upload_date = unified_strdate(upload_date_str) if upload_date_str else None | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -6,6 +6,7 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class TrailerAddictIE(InfoExtractor): | ||||
|     _WORKING = False | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?traileraddict\.com/(?:trailer|clip)/(?P<movie>.+?)/(?P<trailer_name>.+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.traileraddict.com/trailer/prince-avalanche/trailer', | ||||
|   | ||||
| @@ -1,3 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -10,45 +13,44 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class Vbox7IE(InfoExtractor): | ||||
|     """Information Extractor for Vbox7""" | ||||
|     _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)' | ||||
|     _VALID_URL = r'http://(www\.)?vbox7\.com/play:(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         u'url': u'http://vbox7.com/play:249bb972c2', | ||||
|         u'file': u'249bb972c2.flv', | ||||
|         u'md5': u'99f65c0c9ef9b682b97313e052734c3f', | ||||
|         u'info_dict': { | ||||
|             u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430" | ||||
|         } | ||||
|         'url': 'http://vbox7.com/play:249bb972c2', | ||||
|         'md5': '99f65c0c9ef9b682b97313e052734c3f', | ||||
|         'info_dict': { | ||||
|             'id': '249bb972c2', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Смях! Чудо - чист за секунди - Скрита камера', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         if mobj is None: | ||||
|             raise ExtractorError(u'Invalid URL: %s' % url) | ||||
|         video_id = mobj.group(1) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         redirect_page, urlh = self._download_webpage_handle(url, video_id) | ||||
|         new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location') | ||||
|         new_location = self._search_regex(r'window\.location = \'(.*)\';', | ||||
|             redirect_page, 'redirect location') | ||||
|         redirect_url = urlh.geturl() + new_location | ||||
|         webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page') | ||||
|         webpage = self._download_webpage(redirect_url, video_id, | ||||
|             'Downloading redirect page') | ||||
|  | ||||
|         title = self._html_search_regex(r'<title>(.*)</title>', | ||||
|             webpage, u'title').split('/')[0].strip() | ||||
|             webpage, 'title').split('/')[0].strip() | ||||
|  | ||||
|         ext = "flv" | ||||
|         info_url = "http://vbox7.com/play/magare.do" | ||||
|         data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id}) | ||||
|         data = compat_urllib_parse.urlencode({'as3': '1', 'vid': video_id}) | ||||
|         info_request = compat_urllib_request.Request(info_url, data) | ||||
|         info_request.add_header('Content-Type', 'application/x-www-form-urlencoded') | ||||
|         info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage') | ||||
|         info_response = self._download_webpage(info_request, video_id, 'Downloading info webpage') | ||||
|         if info_response is None: | ||||
|             raise ExtractorError(u'Unable to extract the media url') | ||||
|             raise ExtractorError('Unable to extract the media url') | ||||
|         (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&')) | ||||
|  | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       final_url, | ||||
|             'ext':       ext, | ||||
|             'title':     title, | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': final_url, | ||||
|             'ext': 'flv', | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail_url, | ||||
|         }] | ||||
|         } | ||||
|   | ||||
| @@ -1,18 +1,21 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class VineIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://)?(?:www\.)?vine\.co/v/(?P<id>\w+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?vine\.co/v/(?P<id>\w+)' | ||||
|     _TEST = { | ||||
|         u'url': u'https://vine.co/v/b9KOOWX7HUx', | ||||
|         u'file': u'b9KOOWX7HUx.mp4', | ||||
|         u'md5': u'2f36fed6235b16da96ce9b4dc890940d', | ||||
|         u'info_dict': { | ||||
|             u"uploader": u"Jack Dorsey",  | ||||
|             u"title": u"Chicken." | ||||
|         } | ||||
|         'url': 'https://vine.co/v/b9KOOWX7HUx', | ||||
|         'md5': '2f36fed6235b16da96ce9b4dc890940d', | ||||
|         'info_dict': { | ||||
|             'id': 'b9KOOWX7HUx', | ||||
|             'ext': 'mp4', | ||||
|             'uploader': 'Jack Dorsey', | ||||
|             'title': 'Chicken.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -24,17 +27,17 @@ class VineIE(InfoExtractor): | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex(r'<meta property="twitter:player:stream" content="(.+?)"', | ||||
|             webpage, u'video URL') | ||||
|         video_url = self._html_search_meta('twitter:player:stream', webpage, | ||||
|             'video URL') | ||||
|  | ||||
|         uploader = self._html_search_regex(r'<p class="username">(.*?)</p>', | ||||
|             webpage, u'uploader', fatal=False, flags=re.DOTALL) | ||||
|             webpage, 'uploader', fatal=False, flags=re.DOTALL) | ||||
|  | ||||
|         return [{ | ||||
|             'id':        video_id, | ||||
|             'url':       video_url, | ||||
|             'ext':       'mp4', | ||||
|             'title':     self._og_search_title(webpage), | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'ext': 'mp4', | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'uploader':  uploader, | ||||
|         }] | ||||
|             'uploader': uploader, | ||||
|         } | ||||
|   | ||||
| @@ -502,7 +502,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 return a % b | ||||
|  | ||||
|             m = re.match( | ||||
|                 r'^(?P<func>[a-zA-Z]+)\((?P<args>[a-z0-9,]+)\)$', expr) | ||||
|                 r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr) | ||||
|             if m: | ||||
|                 fname = m.group('func') | ||||
|                 if fname not in functions: | ||||
|   | ||||
| @@ -751,13 +751,14 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | ||||
|     https_request = http_request | ||||
|     https_response = http_response | ||||
|  | ||||
|  | ||||
| def unified_strdate(date_str): | ||||
|     """Return a string with the date in the format YYYYMMDD""" | ||||
|     upload_date = None | ||||
|     #Replace commas | ||||
|     date_str = date_str.replace(',',' ') | ||||
|     # %z (UTC offset) is only supported in python>=3.2 | ||||
|     date_str = re.sub(r' (\+|-)[\d]*$', '', date_str) | ||||
|     date_str = re.sub(r' ?(\+|-)[0-9:]*$', '', date_str) | ||||
|     format_expressions = [ | ||||
|         '%d %B %Y', | ||||
|         '%B %d %Y', | ||||
| @@ -771,11 +772,12 @@ def unified_strdate(date_str): | ||||
|         '%Y-%m-%dT%H:%M:%S.%fZ', | ||||
|         '%Y-%m-%dT%H:%M:%S.%f0Z', | ||||
|         '%Y-%m-%dT%H:%M:%S', | ||||
|         '%Y-%m-%dT%H:%M', | ||||
|     ] | ||||
|     for expression in format_expressions: | ||||
|         try: | ||||
|             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d') | ||||
|         except: | ||||
|         except ValueError: | ||||
|             pass | ||||
|     if upload_date is None: | ||||
|         timetuple = email.utils.parsedate_tz(date_str) | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.02.03.1' | ||||
| __version__ = '2014.02.06.2' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user