mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			149 Commits
		
	
	
		
			2015.02.17
			...
			2015.02.24
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 4f3b21e1c7 | ||
|  | 54233c9080 | ||
|  | db8e13ef71 | ||
|  | 5a42414b9c | ||
|  | 9c665ab72e | ||
|  | b665ba6aa6 | ||
|  | ec5913b5cd | ||
|  | 25ac63ed71 | ||
|  | 99209c2916 | ||
|  | 1fbaa0a521 | ||
|  | 3037b91e05 | ||
|  | ffdf972b91 | ||
|  | 459e5fbd5f | ||
|  | bfc993cc91 | ||
|  | 4432db35d9 | ||
|  | 591ab1dff9 | ||
|  | 5bca2424bc | ||
|  | bd61a9e770 | ||
|  | 3438e7acd2 | ||
|  | 09c200acf2 | ||
|  | 716889cab1 | ||
|  | 409693984f | ||
|  | 04e8c11080 | ||
|  | 80af2b73ab | ||
|  | 3cc57f9645 | ||
|  | a65d4e7f14 | ||
|  | b531cfc019 | ||
|  | 543ec2136b | ||
|  | 93b5071f73 | ||
|  | ddc369f073 | ||
|  | fcc3e6138b | ||
|  | 9fe6ef7ab2 | ||
|  | c010af6f19 | ||
|  | 35b7982303 | ||
|  | f311cfa231 | ||
|  | 80970e531b | ||
|  | b7bb76df05 | ||
|  | 98c70d6fc7 | ||
|  | ab84349b16 | ||
|  | 03091e372f | ||
|  | 4d17184817 | ||
|  | e086e0eb6c | ||
|  | 314368c822 | ||
|  | c5181ab410 | ||
|  | ea5152cae1 | ||
|  | 255fca5eea | ||
|  | 4aeccadf4e | ||
|  | 93540ee10e | ||
|  | 8fb3ac3649 | ||
|  | 77b2986b5b | ||
|  | 62b013df0d | ||
|  | fad6768bd1 | ||
|  | a78125f925 | ||
|  | a00a8bcc8a | ||
|  | 1e9a9e167d | ||
|  | 3da0db62e6 | ||
|  | e14ced7918 | ||
|  | ab9d02f53b | ||
|  | a461a11989 | ||
|  | 1bd838608f | ||
|  | 365577f567 | ||
|  | 50efb383f0 | ||
|  | 5da6bd0083 | ||
|  | 5e9a033e6e | ||
|  | fb7cb6823e | ||
|  | dd0a58f5f0 | ||
|  | a21420389e | ||
|  | 6140baf4e1 | ||
|  | 8fc642eb5b | ||
|  | e66e1a0046 | ||
|  | d5c69f1da4 | ||
|  | f13b1e7d7f | ||
|  | 5c8a3f862a | ||
|  | 8807f1277f | ||
|  | a3b9157f49 | ||
|  | b88ba05356 | ||
|  | b74d505577 | ||
|  | 9e2d7dca87 | ||
|  | d236b37ac9 | ||
|  | e880c66bd8 | ||
|  | 383456aa29 | ||
|  | 1a13940c8d | ||
|  | 3d54788495 | ||
|  | 71d53ace2f | ||
|  | f37e3f99f0 | ||
|  | bd03ffc16e | ||
|  | 1ac1af9b47 | ||
|  | 3bf5705316 | ||
|  | 1c2528c8a3 | ||
|  | 7bd15b1a03 | ||
|  | 6b961a85fd | ||
|  | 7707004043 | ||
|  | a025d3c5a5 | ||
|  | c460bdd56b | ||
|  | b81a359eb6 | ||
|  | d61aefb24c | ||
|  | d305dd73a3 | ||
|  | 93a16ba238 | ||
|  | 4f7cea6c53 | ||
|  | afbdd3acc3 | ||
|  | 85d5866177 | ||
|  | 9789d7535d | ||
|  | d8443cd3f7 | ||
|  | d47c26e168 | ||
|  | 01561da142 | ||
|  | 0af25f784b | ||
|  | b9b42f2ea0 | ||
|  | 311c393838 | ||
|  | 18c1c42405 | ||
|  | 37dd5d4629 | ||
|  | 81975f4693 | ||
|  | b8b928d5cb | ||
|  | 3eff81fbf7 | ||
|  | 785521bf4f | ||
|  | 6d1a55a521 | ||
|  | 9cad27008b | ||
|  | 11e611a7fa | ||
|  | 72c1f8de06 | ||
|  | 6e99868e4c | ||
|  | 4d278fde64 | ||
|  | f21e915fb9 | ||
|  | 6f53c63df6 | ||
|  | 1def5f359e | ||
|  | 15ec669374 | ||
|  | a3fa5da496 | ||
|  | 30965ac66a | ||
|  | 09ab40b7d1 | ||
|  | edab9dbf4d | ||
|  | 9868ea4936 | ||
|  | 85920dd01d | ||
|  | fa15607773 | ||
|  | a91a2c1a83 | ||
|  | 16e7711e22 | ||
|  | 5cda4eda72 | ||
|  | 98f000409f | ||
|  | bd7fe0cf66 | ||
|  | 48246541da | ||
|  | 4a8d4a53b1 | ||
|  | 4cd95bcbc3 | ||
|  | be24c8697f | ||
|  | 0d93378887 | ||
|  | 4069766c52 | ||
|  | 360e1ca5cc | ||
|  | a1f2a06b34 | ||
|  | c84dd8a90d | ||
|  | 65469a7f8b | ||
|  | 6b597516c1 | ||
|  | b5857f62e2 | ||
|  | a504ced097 | 
							
								
								
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								AUTHORS
									
									
									
									
									
								
							| @@ -111,3 +111,4 @@ Paul Hartmann | ||||
| Frans de Jonge | ||||
| Robin de Rooij | ||||
| Ryan Schmidt | ||||
| Leslie P. Polzer | ||||
|   | ||||
							
								
								
									
										4
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,7 +1,7 @@ | ||||
| all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites | ||||
|  | ||||
| clean: | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe | ||||
| 	rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish *.dump *.part *.info.json *.mp4 *.flv *.mp3 *.avi CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe | ||||
|  | ||||
| PREFIX ?= /usr/local | ||||
| BINDIR ?= $(PREFIX)/bin | ||||
| @@ -43,7 +43,7 @@ test: | ||||
| ot: offlinetest | ||||
|  | ||||
| offlinetest: codetest | ||||
| 	nosetests --verbose test --exclude test_download --exclude test_age_restriction --exclude test_subtitles --exclude test_write_annotations --exclude test_youtube_lists | ||||
| 	nosetests --verbose test --exclude test_download.py --exclude test_age_restriction.py --exclude test_subtitles.py --exclude test_write_annotations.py --exclude test_youtube_lists.py | ||||
|  | ||||
| tar: youtube-dl.tar.gz | ||||
|  | ||||
|   | ||||
							
								
								
									
										14
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								README.md
									
									
									
									
									
								
							| @@ -351,8 +351,8 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --all-subs                       downloads all the available subtitles of | ||||
|                                      the video | ||||
|     --list-subs                      lists all available subtitles for the video | ||||
|     --sub-format FORMAT              subtitle format (default=srt) ([sbv/vtt] | ||||
|                                      youtube only) | ||||
|     --sub-format FORMAT              subtitle format, accepts formats | ||||
|                                      preference, for example: "ass/srt/best" | ||||
|     --sub-lang LANGS                 languages of the subtitles to download | ||||
|                                      (optional) separated by commas, use IETF | ||||
|                                      language tags like 'en,pt' | ||||
| @@ -515,11 +515,15 @@ If you want to play the video on a machine that is not running youtube-dl, you c | ||||
|  | ||||
| ### ERROR: no fmt_url_map or conn information found in video info | ||||
|  | ||||
| youtube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. | ||||
| YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. | ||||
|  | ||||
| ### ERROR: unable to download video ### | ||||
|  | ||||
| youtube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. You can update youtube-dl with `sudo youtube-dl --update`. | ||||
| YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. | ||||
|  | ||||
| ### ExtractorError: Could not find JS function u'OF' | ||||
|  | ||||
| In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl. | ||||
|  | ||||
| ### SyntaxError: Non-ASCII character ### | ||||
|  | ||||
| @@ -567,7 +571,7 @@ Support requests for services that **do** purchase the rights to distribute thei | ||||
|  | ||||
| ### How can I detect whether a given URL is supported by youtube-dl? | ||||
|  | ||||
| For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. | ||||
| For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/video/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. | ||||
|  | ||||
| It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. | ||||
|  | ||||
|   | ||||
| @@ -45,12 +45,12 @@ for test in get_testcases(): | ||||
|  | ||||
|         RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) | ||||
|  | ||||
|     if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] | ||||
|                    or test['info_dict']['age_limit'] != 18): | ||||
|     if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or | ||||
|                    test['info_dict']['age_limit'] != 18): | ||||
|         print('\nPotential missing age_limit check: {0}'.format(test['name'])) | ||||
|  | ||||
|     elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] | ||||
|                          and test['info_dict']['age_limit'] == 18): | ||||
|     elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and | ||||
|                          test['info_dict']['age_limit'] == 18): | ||||
|         print('\nPotential false negative: {0}'.format(test['name'])) | ||||
|  | ||||
|     else: | ||||
|   | ||||
| @@ -68,9 +68,12 @@ | ||||
|  - **Canalplus**: canalplus.fr, piwiplus.fr and d8.tv | ||||
|  - **CBS** | ||||
|  - **CBSNews**: CBS News | ||||
|  - **CBSSports** | ||||
|  - **CeskaTelevize** | ||||
|  - **channel9**: Channel 9 | ||||
|  - **Chilloutzone** | ||||
|  - **chirbit** | ||||
|  - **chirbit:profile** | ||||
|  - **Cinchcast** | ||||
|  - **Cinemassacre** | ||||
|  - **clipfish** | ||||
| @@ -121,6 +124,7 @@ | ||||
|  - **EllenTV** | ||||
|  - **EllenTV:clips** | ||||
|  - **ElPais**: El País | ||||
|  - **Embedly** | ||||
|  - **EMPFlix** | ||||
|  - **Engadget** | ||||
|  - **Eporner** | ||||
| @@ -190,6 +194,7 @@ | ||||
|  - **ign.com** | ||||
|  - **imdb**: Internet Movie Database trailers | ||||
|  - **imdb:list**: Internet Movie Database lists | ||||
|  - **Imgur** | ||||
|  - **Ina** | ||||
|  - **InfoQ** | ||||
|  - **Instagram** | ||||
| @@ -262,6 +267,7 @@ | ||||
|  - **myvideo** | ||||
|  - **MyVidster** | ||||
|  - **n-tv.de** | ||||
|  - **NationalGeographic** | ||||
|  - **Naver** | ||||
|  - **NBA** | ||||
|  - **NBC** | ||||
| @@ -319,12 +325,14 @@ | ||||
|  - **podomatic** | ||||
|  - **PornHd** | ||||
|  - **PornHub** | ||||
|  - **PornHubPlaylist** | ||||
|  - **Pornotube** | ||||
|  - **PornoXO** | ||||
|  - **PromptFile** | ||||
|  - **prosiebensat1**: ProSiebenSat.1 Digital | ||||
|  - **Pyvideo** | ||||
|  - **QuickVid** | ||||
|  - **R7** | ||||
|  - **radio.de** | ||||
|  - **radiobremen** | ||||
|  - **radiofrance** | ||||
| @@ -352,6 +360,7 @@ | ||||
|  - **rutube:movie**: Rutube movies | ||||
|  - **rutube:person**: Rutube person videos | ||||
|  - **RUTV**: RUTV.RU | ||||
|  - **Sandia**: Sandia National Laboratories | ||||
|  - **Sapo**: SAPO Vídeos | ||||
|  - **savefrom.net** | ||||
|  - **SBS**: sbs.com.au | ||||
| @@ -379,7 +388,8 @@ | ||||
|  - **soundcloud:playlist** | ||||
|  - **soundcloud:set** | ||||
|  - **soundcloud:user** | ||||
|  - **Soundgasm** | ||||
|  - **soundgasm** | ||||
|  - **soundgasm:profile** | ||||
|  - **southpark.cc.com** | ||||
|  - **southpark.de** | ||||
|  - **Space** | ||||
| @@ -445,6 +455,7 @@ | ||||
|  - **Turbo** | ||||
|  - **Tutv** | ||||
|  - **tv.dfb.de** | ||||
|  - **TV4**: tv4.se and tv4play.se | ||||
|  - **tvigle**: Интернет-телевидение Tvigle.ru | ||||
|  - **tvp.pl** | ||||
|  - **tvp.pl:Series** | ||||
| @@ -552,6 +563,7 @@ | ||||
|  - **youtube:subscriptions**: YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication) | ||||
|  - **youtube:user**: YouTube.com user videos (URL or "ytuser" keyword) | ||||
|  - **youtube:watch_later**: Youtube watch later list, ":ytwatchlater" for short (requires authentication) | ||||
|  - **Zapiks** | ||||
|  - **ZDF** | ||||
|  - **ZDFChannel** | ||||
|  - **zingmp3:album**: mp3.zing.vn albums | ||||
|   | ||||
| @@ -113,6 +113,16 @@ def expect_info_dict(self, got_dict, expected_dict): | ||||
|             self.assertTrue( | ||||
|                 got.startswith(start_str), | ||||
|                 'field %s (value: %r) should start with %r' % (info_field, got, start_str)) | ||||
|         elif isinstance(expected, compat_str) and expected.startswith('contains:'): | ||||
|             got = got_dict.get(info_field) | ||||
|             contains_str = expected[len('contains:'):] | ||||
|             self.assertTrue( | ||||
|                 isinstance(got, compat_str), | ||||
|                 'Expected a %s object, but got %s for field %s' % ( | ||||
|                     compat_str.__name__, type(got).__name__, info_field)) | ||||
|             self.assertTrue( | ||||
|                 contains_str in got, | ||||
|                 'field %s (value: %r) should contain %r' % (info_field, got, contains_str)) | ||||
|         elif isinstance(expected, type): | ||||
|             got = got_dict.get(info_field) | ||||
|             self.assertTrue(isinstance(got, expected), | ||||
| @@ -163,12 +173,14 @@ def expect_info_dict(self, got_dict, expected_dict): | ||||
|             info_dict_str += ''.join( | ||||
|                 '    %s: %s,\n' % (_repr(k), _repr(v)) | ||||
|                 for k, v in test_info_dict.items() if k not in missing_keys) | ||||
|             info_dict_str += '\n' | ||||
|  | ||||
|             if info_dict_str: | ||||
|                 info_dict_str += '\n' | ||||
|         info_dict_str += ''.join( | ||||
|             '    %s: %s,\n' % (_repr(k), _repr(test_info_dict[k])) | ||||
|             for k in missing_keys) | ||||
|         write_string( | ||||
|             '\n\'info_dict\': {\n' + info_dict_str + '}\n', out=sys.stderr) | ||||
|             '\n\'info_dict\': {\n' + info_dict_str + '},\n', out=sys.stderr) | ||||
|         self.assertFalse( | ||||
|             missing_keys, | ||||
|             'Missing keys in test definition: %s' % ( | ||||
|   | ||||
| @@ -28,7 +28,7 @@ | ||||
|     "retries": 10,  | ||||
|     "simulate": false,  | ||||
|     "subtitleslang": null,  | ||||
|     "subtitlesformat": "srt", | ||||
|     "subtitlesformat": "best", | ||||
|     "test": true,  | ||||
|     "updatetime": true,  | ||||
|     "usenetrc": false,  | ||||
|   | ||||
| @@ -337,6 +337,65 @@ class TestFormatSelection(unittest.TestCase): | ||||
|         downloaded = ydl.downloaded_info_dicts[0] | ||||
|         self.assertEqual(downloaded['format_id'], 'G') | ||||
|  | ||||
|     def test_subtitles(self): | ||||
|         def s_formats(lang, autocaption=False): | ||||
|             return [{ | ||||
|                 'ext': ext, | ||||
|                 'url': 'http://localhost/video.%s.%s' % (lang, ext), | ||||
|                 '_auto': autocaption, | ||||
|             } for ext in ['vtt', 'srt', 'ass']] | ||||
|         subtitles = dict((l, s_formats(l)) for l in ['en', 'fr', 'es']) | ||||
|         auto_captions = dict((l, s_formats(l, True)) for l in ['it', 'pt', 'es']) | ||||
|         info_dict = { | ||||
|             'id': 'test', | ||||
|             'title': 'Test', | ||||
|             'url': 'http://localhost/video.mp4', | ||||
|             'subtitles': subtitles, | ||||
|             'automatic_captions': auto_captions, | ||||
|             'extractor': 'TEST', | ||||
|         } | ||||
|  | ||||
|         def get_info(params={}): | ||||
|             params.setdefault('simulate', True) | ||||
|             ydl = YDL(params) | ||||
|             ydl.report_warning = lambda *args, **kargs: None | ||||
|             return ydl.process_video_result(info_dict, download=False) | ||||
|  | ||||
|         result = get_info() | ||||
|         self.assertFalse(result.get('requested_subtitles')) | ||||
|         self.assertEqual(result['subtitles'], subtitles) | ||||
|         self.assertEqual(result['automatic_captions'], auto_captions) | ||||
|  | ||||
|         result = get_info({'writesubtitles': True}) | ||||
|         subs = result['requested_subtitles'] | ||||
|         self.assertTrue(subs) | ||||
|         self.assertEqual(set(subs.keys()), set(['en'])) | ||||
|         self.assertTrue(subs['en'].get('data') is None) | ||||
|         self.assertEqual(subs['en']['ext'], 'ass') | ||||
|  | ||||
|         result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'}) | ||||
|         subs = result['requested_subtitles'] | ||||
|         self.assertEqual(subs['en']['ext'], 'srt') | ||||
|  | ||||
|         result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']}) | ||||
|         subs = result['requested_subtitles'] | ||||
|         self.assertTrue(subs) | ||||
|         self.assertEqual(set(subs.keys()), set(['es', 'fr'])) | ||||
|  | ||||
|         result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) | ||||
|         subs = result['requested_subtitles'] | ||||
|         self.assertTrue(subs) | ||||
|         self.assertEqual(set(subs.keys()), set(['es', 'pt'])) | ||||
|         self.assertFalse(subs['es']['_auto']) | ||||
|         self.assertTrue(subs['pt']['_auto']) | ||||
|  | ||||
|         result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) | ||||
|         subs = result['requested_subtitles'] | ||||
|         self.assertTrue(subs) | ||||
|         self.assertEqual(set(subs.keys()), set(['es', 'pt'])) | ||||
|         self.assertTrue(subs['es']['_auto']) | ||||
|         self.assertTrue(subs['pt']['_auto']) | ||||
|  | ||||
|     def test_add_extra_info(self): | ||||
|         test_dict = { | ||||
|             'extractor': 'Foo', | ||||
|   | ||||
| @@ -70,6 +70,8 @@ class TestJSInterpreter(unittest.TestCase): | ||||
|         self.assertEqual(jsi.call_function('f'), -11) | ||||
|  | ||||
|     def test_comments(self): | ||||
|         'Skipping: Not yet fully implemented' | ||||
|         return | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { | ||||
|             var x = /* 1 + */ 2; | ||||
| @@ -80,6 +82,15 @@ class TestJSInterpreter(unittest.TestCase): | ||||
|         ''') | ||||
|         self.assertEqual(jsi.call_function('x'), 52) | ||||
|  | ||||
|         jsi = JSInterpreter(''' | ||||
|         function f() { | ||||
|             var x = "/*"; | ||||
|             var y = 1 /* comment */ + 2; | ||||
|             return y; | ||||
|         } | ||||
|         ''') | ||||
|         self.assertEqual(jsi.call_function('f'), 3) | ||||
|  | ||||
|     def test_precedence(self): | ||||
|         jsi = JSInterpreter(''' | ||||
|         function x() { | ||||
|   | ||||
| @@ -18,6 +18,14 @@ from youtube_dl.extractor import ( | ||||
|     VimeoIE, | ||||
|     WallaIE, | ||||
|     CeskaTelevizeIE, | ||||
|     LyndaIE, | ||||
|     NPOIE, | ||||
|     ComedyCentralIE, | ||||
|     NRKTVIE, | ||||
|     RaiIE, | ||||
|     VikiIE, | ||||
|     ThePlatformIE, | ||||
|     RTVEALaCartaIE, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -27,42 +35,38 @@ class BaseTestSubtitles(unittest.TestCase): | ||||
|  | ||||
|     def setUp(self): | ||||
|         self.DL = FakeYDL() | ||||
|         self.ie = self.IE(self.DL) | ||||
|         self.ie = self.IE() | ||||
|         self.DL.add_info_extractor(self.ie) | ||||
|  | ||||
|     def getInfoDict(self): | ||||
|         info_dict = self.ie.extract(self.url) | ||||
|         info_dict = self.DL.extract_info(self.url, download=False) | ||||
|         return info_dict | ||||
|  | ||||
|     def getSubtitles(self): | ||||
|         info_dict = self.getInfoDict() | ||||
|         return info_dict['subtitles'] | ||||
|         subtitles = info_dict['requested_subtitles'] | ||||
|         if not subtitles: | ||||
|             return subtitles | ||||
|         for sub_info in subtitles.values(): | ||||
|             if sub_info.get('data') is None: | ||||
|                 uf = self.DL.urlopen(sub_info['url']) | ||||
|                 sub_info['data'] = uf.read().decode('utf-8') | ||||
|         return dict((l, sub_info['data']) for l, sub_info in subtitles.items()) | ||||
|  | ||||
|  | ||||
| class TestYoutubeSubtitles(BaseTestSubtitles): | ||||
|     url = 'QRS8MkLhQmM' | ||||
|     IE = YoutubeIE | ||||
|  | ||||
|     def test_youtube_no_writesubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = False | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|  | ||||
|     def test_youtube_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') | ||||
|  | ||||
|     def test_youtube_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['it'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') | ||||
|  | ||||
|     def test_youtube_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 13) | ||||
|         self.assertEqual(md5(subtitles['en']), '4cd9278a35ba2305f47354ee13472260') | ||||
|         self.assertEqual(md5(subtitles['it']), '164a51f16f260476a05b50fe4c2f161d') | ||||
|         for lang in ['it', 'fr', 'de']: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
|     def test_youtube_subtitles_sbv_format(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
| @@ -76,12 +80,6 @@ class TestYoutubeSubtitles(BaseTestSubtitles): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06') | ||||
|  | ||||
|     def test_youtube_list_subtitles(self): | ||||
|         self.DL.expect_warning('Video doesn\'t have automatic captions') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_youtube_automatic_captions(self): | ||||
|         self.url = '8YoUxe5ncPo' | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
| @@ -103,55 +101,22 @@ class TestYoutubeSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|  | ||||
|     def test_youtube_multiple_langs(self): | ||||
|         self.url = 'QRS8MkLhQmM' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['it', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|         self.assertFalse(subtitles) | ||||
|  | ||||
|  | ||||
| class TestDailymotionSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.dailymotion.com/video/xczg00' | ||||
|     IE = DailymotionIE | ||||
|  | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|  | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') | ||||
|  | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['fr'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) >= 6) | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_automatic_captions(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslang'] = ['en'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) == 0) | ||||
|         self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') | ||||
|         self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') | ||||
|         for lang in ['es', 'fr', 'de']: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
|     def test_nosubtitles(self): | ||||
|         self.DL.expect_warning('video doesn\'t have subtitles') | ||||
| @@ -159,61 +124,21 @@ class TestDailymotionSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|  | ||||
|     def test_multiple_langs(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['es', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|         self.assertFalse(subtitles) | ||||
|  | ||||
|  | ||||
| class TestTedSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' | ||||
|     IE = TEDIE | ||||
|  | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|  | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') | ||||
|  | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['fr'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) >= 28) | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_automatic_captions(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslang'] = ['en'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) == 0) | ||||
|  | ||||
|     def test_multiple_langs(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['es', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|         self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') | ||||
|         self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') | ||||
|         for lang in ['es', 'fr', 'de']: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
|  | ||||
| @@ -221,14 +146,7 @@ class TestBlipTVSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://blip.tv/a/a-6603250' | ||||
|     IE = BlipTVIE | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
| @@ -240,39 +158,13 @@ class TestVimeoSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://vimeo.com/76979871' | ||||
|     IE = VimeoIE | ||||
|  | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|  | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') | ||||
|  | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['fr'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['de', 'en', 'es', 'fr'])) | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_automatic_captions(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslang'] = ['en'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) == 0) | ||||
|         self.assertEqual(md5(subtitles['en']), '8062383cf4dec168fc40a088aa6d5888') | ||||
|         self.assertEqual(md5(subtitles['fr']), 'b6191146a6c5d3a452244d853fde6dc8') | ||||
|  | ||||
|     def test_nosubtitles(self): | ||||
|         self.DL.expect_warning('video doesn\'t have subtitles') | ||||
| @@ -280,27 +172,13 @@ class TestVimeoSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|  | ||||
|     def test_multiple_langs(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['es', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||
|         self.assertFalse(subtitles) | ||||
|  | ||||
|  | ||||
| class TestWallaSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://vod.walla.co.il/movie/2705958/the-yes-men' | ||||
|     IE = WallaIE | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
| @@ -315,19 +193,13 @@ class TestWallaSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|         self.assertFalse(subtitles) | ||||
|  | ||||
|  | ||||
| class TestCeskaTelevizeSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' | ||||
|     IE = CeskaTelevizeIE | ||||
|  | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, None) | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.expect_warning('Automatic Captions not supported by this server') | ||||
|         self.DL.params['writesubtitles'] = True | ||||
| @@ -342,7 +214,110 @@ class TestCeskaTelevizeSubtitles(BaseTestSubtitles): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles), 0) | ||||
|         self.assertFalse(subtitles) | ||||
|  | ||||
|  | ||||
| class TestLyndaSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html' | ||||
|     IE = LyndaIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['en'])) | ||||
|         self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7') | ||||
|  | ||||
|  | ||||
| class TestNPOSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860' | ||||
|     IE = NPOIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['nl'])) | ||||
|         self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4') | ||||
|  | ||||
|  | ||||
| class TestMTVSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.cc.com/video-clips/kllhuv/stand-up-greg-fitzsimmons--uncensored---too-good-of-a-mother' | ||||
|     IE = ComedyCentralIE | ||||
|  | ||||
|     def getInfoDict(self): | ||||
|         return super(TestMTVSubtitles, self).getInfoDict()['entries'][0] | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['en'])) | ||||
|         self.assertEqual(md5(subtitles['en']), 'b9f6ca22a6acf597ec76f61749765e65') | ||||
|  | ||||
|  | ||||
| class TestNRKSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1' | ||||
|     IE = NRKTVIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['no'])) | ||||
|         self.assertEqual(md5(subtitles['no']), '1d221e6458c95c5494dcd38e6a1f129a') | ||||
|  | ||||
|  | ||||
| class TestRaiSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-cb27157f-9dd0-4aee-b788-b1f67643a391.html' | ||||
|     IE = RaiIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['it'])) | ||||
|         self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a') | ||||
|  | ||||
|  | ||||
| class TestVikiSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.viki.com/videos/1060846v-punch-episode-18' | ||||
|     IE = VikiIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['en'])) | ||||
|         self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a') | ||||
|  | ||||
|  | ||||
| class TestThePlatformSubtitles(BaseTestSubtitles): | ||||
|     # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/ | ||||
|     # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/) | ||||
|     url = 'theplatform:JFUjUE1_ehvq' | ||||
|     IE = ThePlatformIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['en'])) | ||||
|         self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') | ||||
|  | ||||
|  | ||||
| class TestRtveSubtitles(BaseTestSubtitles): | ||||
|     url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/' | ||||
|     IE = RTVEALaCartaIE | ||||
|  | ||||
|     def test_allsubtitles(self): | ||||
|         print('Skipping, only available from Spain') | ||||
|         return | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(set(subtitles.keys()), set(['es'])) | ||||
|         self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|   | ||||
| @@ -34,8 +34,8 @@ def _make_testfunc(testfile): | ||||
|     def test_func(self): | ||||
|         as_file = os.path.join(TEST_DIR, testfile) | ||||
|         swf_file = os.path.join(TEST_DIR, test_id + '.swf') | ||||
|         if ((not os.path.exists(swf_file)) | ||||
|                 or os.path.getmtime(swf_file) < os.path.getmtime(as_file)): | ||||
|         if ((not os.path.exists(swf_file)) or | ||||
|                 os.path.getmtime(swf_file) < os.path.getmtime(as_file)): | ||||
|             # Recompile | ||||
|             try: | ||||
|                 subprocess.check_call([ | ||||
|   | ||||
| @@ -85,6 +85,8 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual( | ||||
|             sanitize_filename('New World record at 0:12:34'), | ||||
|             'New World record at 0_12_34') | ||||
|         self.assertEqual(sanitize_filename('--gasdgf'), '_-gasdgf') | ||||
|         self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') | ||||
|  | ||||
|         forbidden = '"\0\\/' | ||||
|         for fc in forbidden: | ||||
| @@ -370,6 +372,10 @@ class TestUtil(unittest.TestCase): | ||||
|             "playlist":[{"controls":{"all":null}}] | ||||
|         }''') | ||||
|  | ||||
|         inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"' | ||||
|         json_code = js_to_json(inp) | ||||
|         self.assertEqual(json.loads(json_code), json.loads(inp)) | ||||
|  | ||||
|     def test_js_to_json_edgecases(self): | ||||
|         on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") | ||||
|         self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"}) | ||||
|   | ||||
| @@ -64,6 +64,12 @@ _TESTS = [ | ||||
|         'js', | ||||
|         '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', | ||||
|         '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B' | ||||
|     ), | ||||
|     ( | ||||
|         'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', | ||||
|         'js', | ||||
|         '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', | ||||
|         '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', | ||||
|     ) | ||||
| ] | ||||
|  | ||||
|   | ||||
| @@ -154,7 +154,7 @@ class YoutubeDL(object): | ||||
|     allsubtitles:      Downloads all the subtitles of the video | ||||
|                        (requires writesubtitles or writeautomaticsub) | ||||
|     listsubtitles:     Lists all available subtitles for the video | ||||
|     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt) | ||||
|     subtitlesformat:   The format code for subtitles | ||||
|     subtitleslangs:    List of languages of the subtitles to download | ||||
|     keepvideo:         Keep the video file after post-processing | ||||
|     daterange:         A DateRange object, download only if the upload_date is in the range. | ||||
| @@ -199,18 +199,25 @@ class YoutubeDL(object): | ||||
|                        postprocessor. | ||||
|     progress_hooks:    A list of functions that get called on download | ||||
|                        progress, with a dictionary with the entries | ||||
|                        * status: One of "downloading" and "finished". | ||||
|                        * status: One of "downloading", "error", or "finished". | ||||
|                                  Check this first and ignore unknown values. | ||||
|  | ||||
|                        If status is one of "downloading" or "finished", the | ||||
|                        If status is one of "downloading", or "finished", the | ||||
|                        following properties may also be present: | ||||
|                        * filename: The final filename (always present) | ||||
|                        * tmpfilename: The filename we're currently writing to | ||||
|                        * downloaded_bytes: Bytes on disk | ||||
|                        * total_bytes: Size of the whole file, None if unknown | ||||
|                        * tmpfilename: The filename we're currently writing to | ||||
|                        * total_bytes_estimate: Guess of the eventual file size, | ||||
|                                                None if unavailable. | ||||
|                        * elapsed: The number of seconds since download started. | ||||
|                        * eta: The estimated time in seconds, None if unknown | ||||
|                        * speed: The download speed in bytes/second, None if | ||||
|                                 unknown | ||||
|                        * fragment_index: The counter of the currently | ||||
|                                          downloaded video fragment. | ||||
|                        * fragment_count: The number of fragments (= individual | ||||
|                                          files that will be merged) | ||||
|  | ||||
|                        Progress hooks are guaranteed to be called at least once | ||||
|                        (with status "finished") if the download is successful. | ||||
| @@ -301,8 +308,8 @@ class YoutubeDL(object): | ||||
|                     raise | ||||
|  | ||||
|         if (sys.version_info >= (3,) and sys.platform != 'win32' and | ||||
|                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] | ||||
|                 and not params.get('restrictfilenames', False)): | ||||
|                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and | ||||
|                 not params.get('restrictfilenames', False)): | ||||
|             # On Python 3, the Unicode filesystem API will throw errors (#1474) | ||||
|             self.report_warning( | ||||
|                 'Assuming --restrict-filenames since file system encoding ' | ||||
| @@ -1001,6 +1008,15 @@ class YoutubeDL(object): | ||||
|                 info_dict['timestamp']) | ||||
|             info_dict['upload_date'] = upload_date.strftime('%Y%m%d') | ||||
|  | ||||
|         if self.params.get('listsubtitles', False): | ||||
|             if 'automatic_captions' in info_dict: | ||||
|                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions') | ||||
|             self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles') | ||||
|             return | ||||
|         info_dict['requested_subtitles'] = self.process_subtitles( | ||||
|             info_dict['id'], info_dict.get('subtitles'), | ||||
|             info_dict.get('automatic_captions')) | ||||
|  | ||||
|         # This extractors handle format selection themselves | ||||
|         if info_dict['extractor'] in ['Youku']: | ||||
|             if download: | ||||
| @@ -1129,6 +1145,55 @@ class YoutubeDL(object): | ||||
|         info_dict.update(formats_to_download[-1]) | ||||
|         return info_dict | ||||
|  | ||||
|     def process_subtitles(self, video_id, normal_subtitles, automatic_captions): | ||||
|         """Select the requested subtitles and their format""" | ||||
|         available_subs = {} | ||||
|         if normal_subtitles and self.params.get('writesubtitles'): | ||||
|             available_subs.update(normal_subtitles) | ||||
|         if automatic_captions and self.params.get('writeautomaticsub'): | ||||
|             for lang, cap_info in automatic_captions.items(): | ||||
|                 if lang not in available_subs: | ||||
|                     available_subs[lang] = cap_info | ||||
|  | ||||
|         if (not self.params.get('writesubtitles') and not | ||||
|                 self.params.get('writeautomaticsub') or not | ||||
|                 available_subs): | ||||
|             return None | ||||
|  | ||||
|         if self.params.get('allsubtitles', False): | ||||
|             requested_langs = available_subs.keys() | ||||
|         else: | ||||
|             if self.params.get('subtitleslangs', False): | ||||
|                 requested_langs = self.params.get('subtitleslangs') | ||||
|             elif 'en' in available_subs: | ||||
|                 requested_langs = ['en'] | ||||
|             else: | ||||
|                 requested_langs = [list(available_subs.keys())[0]] | ||||
|  | ||||
|         formats_query = self.params.get('subtitlesformat', 'best') | ||||
|         formats_preference = formats_query.split('/') if formats_query else [] | ||||
|         subs = {} | ||||
|         for lang in requested_langs: | ||||
|             formats = available_subs.get(lang) | ||||
|             if formats is None: | ||||
|                 self.report_warning('%s subtitles not available for %s' % (lang, video_id)) | ||||
|                 continue | ||||
|             for ext in formats_preference: | ||||
|                 if ext == 'best': | ||||
|                     f = formats[-1] | ||||
|                     break | ||||
|                 matches = list(filter(lambda f: f['ext'] == ext, formats)) | ||||
|                 if matches: | ||||
|                     f = matches[-1] | ||||
|                     break | ||||
|             else: | ||||
|                 f = formats[-1] | ||||
|                 self.report_warning( | ||||
|                     'No subtitle format found matching "%s" for language %s, ' | ||||
|                     'using %s' % (formats_query, lang, f['ext'])) | ||||
|             subs[lang] = f | ||||
|         return subs | ||||
|  | ||||
|     def process_info(self, info_dict): | ||||
|         """Process a single resolved IE result.""" | ||||
|  | ||||
| @@ -1231,15 +1296,22 @@ class YoutubeDL(object): | ||||
|         subtitles_are_requested = any([self.params.get('writesubtitles', False), | ||||
|                                        self.params.get('writeautomaticsub')]) | ||||
|  | ||||
|         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']: | ||||
|         if subtitles_are_requested and info_dict.get('requested_subtitles'): | ||||
|             # subtitles download errors are already managed as troubles in relevant IE | ||||
|             # that way it will silently go on when used with unsupporting IE | ||||
|             subtitles = info_dict['subtitles'] | ||||
|             sub_format = self.params.get('subtitlesformat', 'srt') | ||||
|             for sub_lang in subtitles.keys(): | ||||
|                 sub = subtitles[sub_lang] | ||||
|                 if sub is None: | ||||
|                     continue | ||||
|             subtitles = info_dict['requested_subtitles'] | ||||
|             for sub_lang, sub_info in subtitles.items(): | ||||
|                 sub_format = sub_info['ext'] | ||||
|                 if sub_info.get('data') is not None: | ||||
|                     sub_data = sub_info['data'] | ||||
|                 else: | ||||
|                     try: | ||||
|                         uf = self.urlopen(sub_info['url']) | ||||
|                         sub_data = uf.read().decode('utf-8') | ||||
|                     except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||
|                         self.report_warning('Unable to download subtitle for "%s": %s' % | ||||
|                                             (sub_lang, compat_str(err))) | ||||
|                         continue | ||||
|                 try: | ||||
|                     sub_filename = subtitles_filename(filename, sub_lang, sub_format) | ||||
|                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)): | ||||
| @@ -1247,7 +1319,7 @@ class YoutubeDL(object): | ||||
|                     else: | ||||
|                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename) | ||||
|                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | ||||
|                             subfile.write(sub) | ||||
|                             subfile.write(sub_data) | ||||
|                 except (OSError, IOError): | ||||
|                     self.report_error('Cannot write subtitles file ' + sub_filename) | ||||
|                     return | ||||
| @@ -1359,8 +1431,8 @@ class YoutubeDL(object): | ||||
|         """Download a given list of URLs.""" | ||||
|         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL) | ||||
|         if (len(url_list) > 1 and | ||||
|                 '%' not in outtmpl | ||||
|                 and self.params.get('max_downloads') != 1): | ||||
|                 '%' not in outtmpl and | ||||
|                 self.params.get('max_downloads') != 1): | ||||
|             raise SameFileError(outtmpl) | ||||
|  | ||||
|         for url in url_list: | ||||
| @@ -1527,29 +1599,18 @@ class YoutubeDL(object): | ||||
|         return res | ||||
|  | ||||
|     def list_formats(self, info_dict): | ||||
|         def line(format, idlen=20): | ||||
|             return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % ( | ||||
|                 format['format_id'], | ||||
|                 format['ext'], | ||||
|                 self.format_resolution(format), | ||||
|                 self._format_note(format), | ||||
|             )) | ||||
|  | ||||
|         formats = info_dict.get('formats', [info_dict]) | ||||
|         idlen = max(len('format code'), | ||||
|                     max(len(f['format_id']) for f in formats)) | ||||
|         formats_s = [ | ||||
|             line(f, idlen) for f in formats | ||||
|         table = [ | ||||
|             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)] | ||||
|             for f in formats | ||||
|             if f.get('preference') is None or f['preference'] >= -1000] | ||||
|         if len(formats) > 1: | ||||
|             formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)' | ||||
|             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)' | ||||
|  | ||||
|         header_line = line({ | ||||
|             'format_id': 'format code', 'ext': 'extension', | ||||
|             'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen) | ||||
|         header_line = ['format code', 'extension', 'resolution', 'note'] | ||||
|         self.to_screen( | ||||
|             '[info] Available formats for %s:\n%s\n%s' % | ||||
|             (info_dict['id'], header_line, '\n'.join(formats_s))) | ||||
|             '[info] Available formats for %s:\n%s' % | ||||
|             (info_dict['id'], render_table(header_line, table))) | ||||
|  | ||||
|     def list_thumbnails(self, info_dict): | ||||
|         thumbnails = info_dict.get('thumbnails') | ||||
| @@ -1568,6 +1629,17 @@ class YoutubeDL(object): | ||||
|             ['ID', 'width', 'height', 'URL'], | ||||
|             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails])) | ||||
|  | ||||
|     def list_subtitles(self, video_id, subtitles, name='subtitles'): | ||||
|         if not subtitles: | ||||
|             self.to_screen('%s has no %s' % (video_id, name)) | ||||
|             return | ||||
|         self.to_screen( | ||||
|             'Available %s for %s:' % (name, video_id)) | ||||
|         self.to_screen(render_table( | ||||
|             ['Language', 'formats'], | ||||
|             [[lang, ', '.join(f['ext'] for f in reversed(formats))] | ||||
|                 for lang, formats in subtitles.items()])) | ||||
|  | ||||
|     def urlopen(self, req): | ||||
|         """ Start an HTTP download """ | ||||
|  | ||||
|   | ||||
| @@ -189,14 +189,14 @@ def _real_main(argv=None): | ||||
|         # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) | ||||
|         if opts.outtmpl is not None: | ||||
|             opts.outtmpl = opts.outtmpl.decode(preferredencoding()) | ||||
|     outtmpl = ((opts.outtmpl is not None and opts.outtmpl) | ||||
|                or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') | ||||
|                or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') | ||||
|                or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') | ||||
|                or (opts.usetitle and '%(title)s-%(id)s.%(ext)s') | ||||
|                or (opts.useid and '%(id)s.%(ext)s') | ||||
|                or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') | ||||
|                or DEFAULT_OUTTMPL) | ||||
|     outtmpl = ((opts.outtmpl is not None and opts.outtmpl) or | ||||
|                (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s') or | ||||
|                (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s') or | ||||
|                (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s') or | ||||
|                (opts.usetitle and '%(title)s-%(id)s.%(ext)s') or | ||||
|                (opts.useid and '%(id)s.%(ext)s') or | ||||
|                (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s') or | ||||
|                DEFAULT_OUTTMPL) | ||||
|     if not os.path.splitext(outtmpl)[1] and opts.extractaudio: | ||||
|         parser.error('Cannot download a video and extract audio into the same' | ||||
|                      ' file! Use "{0}.%(ext)s" instead of "{0}" as the output' | ||||
| @@ -226,7 +226,6 @@ def _real_main(argv=None): | ||||
|     if opts.embedsubtitles: | ||||
|         postprocessors.append({ | ||||
|             'key': 'FFmpegEmbedSubtitle', | ||||
|             'subtitlesformat': opts.subtitlesformat, | ||||
|         }) | ||||
|     if opts.xattrs: | ||||
|         postprocessors.append({'key': 'XAttrMetadata'}) | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division, unicode_literals | ||||
|  | ||||
| import os | ||||
| import re | ||||
| @@ -54,6 +54,7 @@ class FileDownloader(object): | ||||
|         self.ydl = ydl | ||||
|         self._progress_hooks = [] | ||||
|         self.params = params | ||||
|         self.add_progress_hook(self.report_progress) | ||||
|  | ||||
|     @staticmethod | ||||
|     def format_seconds(seconds): | ||||
| @@ -226,42 +227,64 @@ class FileDownloader(object): | ||||
|             self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) | ||||
|         self.to_console_title('youtube-dl ' + msg) | ||||
|  | ||||
|     def report_progress(self, percent, data_len_str, speed, eta): | ||||
|         """Report download progress.""" | ||||
|         if self.params.get('noprogress', False): | ||||
|     def report_progress(self, s): | ||||
|         if s['status'] == 'finished': | ||||
|             if self.params.get('noprogress', False): | ||||
|                 self.to_screen('[download] Download completed') | ||||
|             else: | ||||
|                 s['_total_bytes_str'] = format_bytes(s['total_bytes']) | ||||
|                 if s.get('elapsed') is not None: | ||||
|                     s['_elapsed_str'] = self.format_seconds(s['elapsed']) | ||||
|                     msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s' | ||||
|                 else: | ||||
|                     msg_template = '100%% of %(_total_bytes_str)s' | ||||
|                 self._report_progress_status( | ||||
|                     msg_template % s, is_last_line=True) | ||||
|  | ||||
|         if self.params.get('noprogress'): | ||||
|             return | ||||
|         if eta is not None: | ||||
|             eta_str = self.format_eta(eta) | ||||
|         else: | ||||
|             eta_str = 'Unknown ETA' | ||||
|         if percent is not None: | ||||
|             percent_str = self.format_percent(percent) | ||||
|         else: | ||||
|             percent_str = 'Unknown %' | ||||
|         speed_str = self.format_speed(speed) | ||||
|  | ||||
|         msg = ('%s of %s at %s ETA %s' % | ||||
|                (percent_str, data_len_str, speed_str, eta_str)) | ||||
|         self._report_progress_status(msg) | ||||
|  | ||||
|     def report_progress_live_stream(self, downloaded_data_len, speed, elapsed): | ||||
|         if self.params.get('noprogress', False): | ||||
|         if s['status'] != 'downloading': | ||||
|             return | ||||
|         downloaded_str = format_bytes(downloaded_data_len) | ||||
|         speed_str = self.format_speed(speed) | ||||
|         elapsed_str = FileDownloader.format_seconds(elapsed) | ||||
|         msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str) | ||||
|         self._report_progress_status(msg) | ||||
|  | ||||
|     def report_finish(self, data_len_str, tot_time): | ||||
|         """Report download finished.""" | ||||
|         if self.params.get('noprogress', False): | ||||
|             self.to_screen('[download] Download completed') | ||||
|         if s.get('eta') is not None: | ||||
|             s['_eta_str'] = self.format_eta(s['eta']) | ||||
|         else: | ||||
|             self._report_progress_status( | ||||
|                 ('100%% of %s in %s' % | ||||
|                  (data_len_str, self.format_seconds(tot_time))), | ||||
|                 is_last_line=True) | ||||
|             s['_eta_str'] = 'Unknown ETA' | ||||
|  | ||||
|         if s.get('total_bytes') and s.get('downloaded_bytes') is not None: | ||||
|             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) | ||||
|         elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: | ||||
|             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) | ||||
|         else: | ||||
|             if s.get('downloaded_bytes') == 0: | ||||
|                 s['_percent_str'] = self.format_percent(0) | ||||
|             else: | ||||
|                 s['_percent_str'] = 'Unknown %' | ||||
|  | ||||
|         if s.get('speed') is not None: | ||||
|             s['_speed_str'] = self.format_speed(s['speed']) | ||||
|         else: | ||||
|             s['_speed_str'] = 'Unknown speed' | ||||
|  | ||||
|         if s.get('total_bytes') is not None: | ||||
|             s['_total_bytes_str'] = format_bytes(s['total_bytes']) | ||||
|             msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' | ||||
|         elif s.get('total_bytes_estimate') is not None: | ||||
|             s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) | ||||
|             msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' | ||||
|         else: | ||||
|             if s.get('downloaded_bytes') is not None: | ||||
|                 s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) | ||||
|                 if s.get('elapsed'): | ||||
|                     s['_elapsed_str'] = self.format_seconds(s['elapsed']) | ||||
|                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' | ||||
|                 else: | ||||
|                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' | ||||
|             else: | ||||
|                 msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' | ||||
|  | ||||
|         self._report_progress_status(msg_template % s) | ||||
|  | ||||
|     def report_resuming_byte(self, resume_len): | ||||
|         """Report attempt to resume at given byte.""" | ||||
| @@ -288,14 +311,14 @@ class FileDownloader(object): | ||||
|         """ | ||||
|  | ||||
|         nooverwrites_and_exists = ( | ||||
|             self.params.get('nooverwrites', False) | ||||
|             and os.path.exists(encodeFilename(filename)) | ||||
|             self.params.get('nooverwrites', False) and | ||||
|             os.path.exists(encodeFilename(filename)) | ||||
|         ) | ||||
|  | ||||
|         continuedl_and_exists = ( | ||||
|             self.params.get('continuedl', False) | ||||
|             and os.path.isfile(encodeFilename(filename)) | ||||
|             and not self.params.get('nopart', False) | ||||
|             self.params.get('continuedl', False) and | ||||
|             os.path.isfile(encodeFilename(filename)) and | ||||
|             not self.params.get('nopart', False) | ||||
|         ) | ||||
|  | ||||
|         # Check file already present | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from __future__ import unicode_literals | ||||
| from __future__ import division, unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import io | ||||
| @@ -15,7 +15,6 @@ from ..compat import ( | ||||
| from ..utils import ( | ||||
|     struct_pack, | ||||
|     struct_unpack, | ||||
|     format_bytes, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     xpath_text, | ||||
| @@ -252,17 +251,6 @@ class F4mFD(FileDownloader): | ||||
|         requested_bitrate = info_dict.get('tbr') | ||||
|         self.to_screen('[download] Downloading f4m manifest') | ||||
|         manifest = self.ydl.urlopen(man_url).read() | ||||
|         self.report_destination(filename) | ||||
|         http_dl = HttpQuietDownloader( | ||||
|             self.ydl, | ||||
|             { | ||||
|                 'continuedl': True, | ||||
|                 'quiet': True, | ||||
|                 'noprogress': True, | ||||
|                 'ratelimit': self.params.get('ratelimit', None), | ||||
|                 'test': self.params.get('test', False), | ||||
|             } | ||||
|         ) | ||||
|  | ||||
|         doc = etree.fromstring(manifest) | ||||
|         formats = [(int(f.attrib.get('bitrate', -1)), f) | ||||
| @@ -298,39 +286,65 @@ class F4mFD(FileDownloader): | ||||
|         # For some akamai manifests we'll need to add a query to the fragment url | ||||
|         akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) | ||||
|  | ||||
|         self.report_destination(filename) | ||||
|         http_dl = HttpQuietDownloader( | ||||
|             self.ydl, | ||||
|             { | ||||
|                 'continuedl': True, | ||||
|                 'quiet': True, | ||||
|                 'noprogress': True, | ||||
|                 'ratelimit': self.params.get('ratelimit', None), | ||||
|                 'test': self.params.get('test', False), | ||||
|             } | ||||
|         ) | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') | ||||
|  | ||||
|         write_flv_header(dest_stream) | ||||
|         write_metadata_tag(dest_stream, metadata) | ||||
|  | ||||
|         # This dict stores the download progress, it's updated by the progress | ||||
|         # hook | ||||
|         state = { | ||||
|             'status': 'downloading', | ||||
|             'downloaded_bytes': 0, | ||||
|             'frag_counter': 0, | ||||
|             'frag_index': 0, | ||||
|             'frag_count': total_frags, | ||||
|             'filename': filename, | ||||
|             'tmpfilename': tmpfilename, | ||||
|         } | ||||
|         start = time.time() | ||||
|  | ||||
|         def frag_progress_hook(status): | ||||
|             frag_total_bytes = status.get('total_bytes', 0) | ||||
|             estimated_size = (state['downloaded_bytes'] + | ||||
|                               (total_frags - state['frag_counter']) * frag_total_bytes) | ||||
|             if status['status'] == 'finished': | ||||
|         def frag_progress_hook(s): | ||||
|             if s['status'] not in ('downloading', 'finished'): | ||||
|                 return | ||||
|  | ||||
|             frag_total_bytes = s.get('total_bytes', 0) | ||||
|             if s['status'] == 'finished': | ||||
|                 state['downloaded_bytes'] += frag_total_bytes | ||||
|                 state['frag_counter'] += 1 | ||||
|                 progress = self.calc_percent(state['frag_counter'], total_frags) | ||||
|                 byte_counter = state['downloaded_bytes'] | ||||
|                 state['frag_index'] += 1 | ||||
|  | ||||
|             estimated_size = ( | ||||
|                 (state['downloaded_bytes'] + frag_total_bytes) / | ||||
|                 (state['frag_index'] + 1) * total_frags) | ||||
|             time_now = time.time() | ||||
|             state['total_bytes_estimate'] = estimated_size | ||||
|             state['elapsed'] = time_now - start | ||||
|  | ||||
|             if s['status'] == 'finished': | ||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||
|             else: | ||||
|                 frag_downloaded_bytes = status['downloaded_bytes'] | ||||
|                 byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes | ||||
|                 frag_downloaded_bytes = s['downloaded_bytes'] | ||||
|                 frag_progress = self.calc_percent(frag_downloaded_bytes, | ||||
|                                                   frag_total_bytes) | ||||
|                 progress = self.calc_percent(state['frag_counter'], total_frags) | ||||
|                 progress = self.calc_percent(state['frag_index'], total_frags) | ||||
|                 progress += frag_progress / float(total_frags) | ||||
|  | ||||
|             eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) | ||||
|             self.report_progress(progress, format_bytes(estimated_size), | ||||
|                                  status.get('speed'), eta) | ||||
|                 state['eta'] = self.calc_eta( | ||||
|                     start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) | ||||
|                 state['speed'] = s.get('speed') | ||||
|             self._hook_progress(state) | ||||
|  | ||||
|         http_dl.add_progress_hook(frag_progress_hook) | ||||
|  | ||||
|         frags_filenames = [] | ||||
| @@ -354,8 +368,8 @@ class F4mFD(FileDownloader): | ||||
|             frags_filenames.append(frag_filename) | ||||
|  | ||||
|         dest_stream.close() | ||||
|         self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) | ||||
|  | ||||
|         elapsed = time.time() - start | ||||
|         self.try_rename(tmpfilename, filename) | ||||
|         for frag_file in frags_filenames: | ||||
|             os.remove(frag_file) | ||||
| @@ -366,6 +380,7 @@ class F4mFD(FileDownloader): | ||||
|             'total_bytes': fsize, | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|             'elapsed': elapsed, | ||||
|         }) | ||||
|  | ||||
|         return True | ||||
|   | ||||
| @@ -1,10 +1,9 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import os | ||||
| import time | ||||
|  | ||||
| from socket import error as SocketError | ||||
| import errno | ||||
| import os | ||||
| import socket | ||||
| import time | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from ..compat import ( | ||||
| @@ -15,7 +14,6 @@ from ..utils import ( | ||||
|     ContentTooShortError, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
|     format_bytes, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -102,7 +100,7 @@ class HttpFD(FileDownloader): | ||||
|                             resume_len = 0 | ||||
|                             open_mode = 'wb' | ||||
|                             break | ||||
|             except SocketError as e: | ||||
|             except socket.error as e: | ||||
|                 if e.errno != errno.ECONNRESET: | ||||
|                     # Connection reset is no problem, just retry | ||||
|                     raise | ||||
| @@ -137,7 +135,6 @@ class HttpFD(FileDownloader): | ||||
|                 self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) | ||||
|                 return False | ||||
|  | ||||
|         data_len_str = format_bytes(data_len) | ||||
|         byte_counter = 0 + resume_len | ||||
|         block_size = self.params.get('buffersize', 1024) | ||||
|         start = time.time() | ||||
| @@ -196,20 +193,19 @@ class HttpFD(FileDownloader): | ||||
|             # Progress message | ||||
|             speed = self.calc_speed(start, now, byte_counter - resume_len) | ||||
|             if data_len is None: | ||||
|                 eta = percent = None | ||||
|                 eta = None | ||||
|             else: | ||||
|                 percent = self.calc_percent(byte_counter, data_len) | ||||
|                 eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) | ||||
|             self.report_progress(percent, data_len_str, speed, eta) | ||||
|  | ||||
|             self._hook_progress({ | ||||
|                 'status': 'downloading', | ||||
|                 'downloaded_bytes': byte_counter, | ||||
|                 'total_bytes': data_len, | ||||
|                 'tmpfilename': tmpfilename, | ||||
|                 'filename': filename, | ||||
|                 'status': 'downloading', | ||||
|                 'eta': eta, | ||||
|                 'speed': speed, | ||||
|                 'elapsed': now - start, | ||||
|             }) | ||||
|  | ||||
|             if is_test and byte_counter == data_len: | ||||
| @@ -221,7 +217,13 @@ class HttpFD(FileDownloader): | ||||
|             return False | ||||
|         if tmpfilename != '-': | ||||
|             stream.close() | ||||
|         self.report_finish(data_len_str, (time.time() - start)) | ||||
|  | ||||
|         self._hook_progress({ | ||||
|             'downloaded_bytes': byte_counter, | ||||
|             'total_bytes': data_len, | ||||
|             'tmpfilename': tmpfilename, | ||||
|             'status': 'error', | ||||
|         }) | ||||
|         if data_len is not None and byte_counter != data_len: | ||||
|             raise ContentTooShortError(byte_counter, int(data_len)) | ||||
|         self.try_rename(tmpfilename, filename) | ||||
| @@ -235,6 +237,7 @@ class HttpFD(FileDownloader): | ||||
|             'total_bytes': byte_counter, | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|             'elapsed': time.time() - start, | ||||
|         }) | ||||
|  | ||||
|         return True | ||||
|   | ||||
| @@ -11,7 +11,6 @@ from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     check_executable, | ||||
|     encodeFilename, | ||||
|     format_bytes, | ||||
|     get_exe_version, | ||||
| ) | ||||
|  | ||||
| @@ -51,23 +50,23 @@ class RtmpFD(FileDownloader): | ||||
|                     if not resume_percent: | ||||
|                         resume_percent = percent | ||||
|                         resume_downloaded_data_len = downloaded_data_len | ||||
|                     eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent) | ||||
|                     speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len) | ||||
|                     time_now = time.time() | ||||
|                     eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) | ||||
|                     speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) | ||||
|                     data_len = None | ||||
|                     if percent > 0: | ||||
|                         data_len = int(downloaded_data_len * 100 / percent) | ||||
|                     data_len_str = '~' + format_bytes(data_len) | ||||
|                     self.report_progress(percent, data_len_str, speed, eta) | ||||
|                     cursor_in_new_line = False | ||||
|                     self._hook_progress({ | ||||
|                         'status': 'downloading', | ||||
|                         'downloaded_bytes': downloaded_data_len, | ||||
|                         'total_bytes': data_len, | ||||
|                         'total_bytes_estimate': data_len, | ||||
|                         'tmpfilename': tmpfilename, | ||||
|                         'filename': filename, | ||||
|                         'status': 'downloading', | ||||
|                         'eta': eta, | ||||
|                         'elapsed': time_now - start, | ||||
|                         'speed': speed, | ||||
|                     }) | ||||
|                     cursor_in_new_line = False | ||||
|                 else: | ||||
|                     # no percent for live streams | ||||
|                     mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) | ||||
| @@ -75,15 +74,15 @@ class RtmpFD(FileDownloader): | ||||
|                         downloaded_data_len = int(float(mobj.group(1)) * 1024) | ||||
|                         time_now = time.time() | ||||
|                         speed = self.calc_speed(start, time_now, downloaded_data_len) | ||||
|                         self.report_progress_live_stream(downloaded_data_len, speed, time_now - start) | ||||
|                         cursor_in_new_line = False | ||||
|                         self._hook_progress({ | ||||
|                             'downloaded_bytes': downloaded_data_len, | ||||
|                             'tmpfilename': tmpfilename, | ||||
|                             'filename': filename, | ||||
|                             'status': 'downloading', | ||||
|                             'elapsed': time_now - start, | ||||
|                             'speed': speed, | ||||
|                         }) | ||||
|                         cursor_in_new_line = False | ||||
|                     elif self.params.get('verbose', False): | ||||
|                         if not cursor_in_new_line: | ||||
|                             self.to_screen('') | ||||
|   | ||||
| @@ -58,10 +58,15 @@ from .canalplus import CanalplusIE | ||||
| from .canalc2 import Canalc2IE | ||||
| from .cbs import CBSIE | ||||
| from .cbsnews import CBSNewsIE | ||||
| from .cbssports import CBSSportsIE | ||||
| from .ccc import CCCIE | ||||
| from .ceskatelevize import CeskaTelevizeIE | ||||
| from .channel9 import Channel9IE | ||||
| from .chilloutzone import ChilloutzoneIE | ||||
| from .chirbit import ( | ||||
|     ChirbitIE, | ||||
|     ChirbitProfileIE, | ||||
| ) | ||||
| from .cinchcast import CinchcastIE | ||||
| from .clipfish import ClipfishIE | ||||
| from .cliphunter import CliphunterIE | ||||
| @@ -121,6 +126,7 @@ from .ellentv import ( | ||||
|     EllenTVClipsIE, | ||||
| ) | ||||
| from .elpais import ElPaisIE | ||||
| from .embedly import EmbedlyIE | ||||
| from .empflix import EMPFlixIE | ||||
| from .engadget import EngadgetIE | ||||
| from .eporner import EpornerIE | ||||
| @@ -204,6 +210,7 @@ from .imdb import ( | ||||
|     ImdbIE, | ||||
|     ImdbListIE | ||||
| ) | ||||
| from .imgur import ImgurIE | ||||
| from .ina import InaIE | ||||
| from .infoq import InfoQIE | ||||
| from .instagram import InstagramIE, InstagramUserIE | ||||
| @@ -282,6 +289,7 @@ from .myspace import MySpaceIE, MySpaceAlbumIE | ||||
| from .myspass import MySpassIE | ||||
| from .myvideo import MyVideoIE | ||||
| from .myvidster import MyVidsterIE | ||||
| from .nationalgeographic import NationalGeographicIE | ||||
| from .naver import NaverIE | ||||
| from .nba import NBAIE | ||||
| from .nbc import ( | ||||
| @@ -350,13 +358,17 @@ from .playfm import PlayFMIE | ||||
| from .playvid import PlayvidIE | ||||
| from .podomatic import PodomaticIE | ||||
| from .pornhd import PornHdIE | ||||
| from .pornhub import PornHubIE | ||||
| from .pornhub import ( | ||||
|     PornHubIE, | ||||
|     PornHubPlaylistIE, | ||||
| ) | ||||
| from .pornotube import PornotubeIE | ||||
| from .pornoxo import PornoXOIE | ||||
| from .promptfile import PromptFileIE | ||||
| from .prosiebensat1 import ProSiebenSat1IE | ||||
| from .pyvideo import PyvideoIE | ||||
| from .quickvid import QuickVidIE | ||||
| from .r7 import R7IE | ||||
| from .radiode import RadioDeIE | ||||
| from .radiobremen import RadioBremenIE | ||||
| from .radiofrance import RadioFranceIE | ||||
| @@ -386,6 +398,7 @@ from .rutube import ( | ||||
|     RutubePersonIE, | ||||
| ) | ||||
| from .rutv import RUTVIE | ||||
| from .sandia import SandiaIE | ||||
| from .sapo import SapoIE | ||||
| from .savefrom import SaveFromIE | ||||
| from .sbs import SBSIE | ||||
| @@ -416,7 +429,10 @@ from .soundcloud import ( | ||||
|     SoundcloudUserIE, | ||||
|     SoundcloudPlaylistIE | ||||
| ) | ||||
| from .soundgasm import SoundgasmIE | ||||
| from .soundgasm import ( | ||||
|     SoundgasmIE, | ||||
|     SoundgasmProfileIE | ||||
| ) | ||||
| from .southpark import ( | ||||
|     SouthParkIE, | ||||
|     SouthparkDeIE, | ||||
| @@ -482,6 +498,7 @@ from .tumblr import TumblrIE | ||||
| from .tunein import TuneInIE | ||||
| from .turbo import TurboIE | ||||
| from .tutv import TutvIE | ||||
| from .tv4 import TV4IE | ||||
| from .tvigle import TvigleIE | ||||
| from .tvp import TvpIE, TvpSeriesIE | ||||
| from .tvplay import TVPlayIE | ||||
| @@ -603,6 +620,7 @@ from .youtube import ( | ||||
|     YoutubeUserIE, | ||||
|     YoutubeWatchLaterIE, | ||||
| ) | ||||
| from .zapiks import ZapiksIE | ||||
| from .zdf import ZDFIE, ZDFChannelIE | ||||
| from .zingmp3 import ( | ||||
|     ZingMp3SongIE, | ||||
|   | ||||
| @@ -28,7 +28,6 @@ class AdobeTVIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         player = self._parse_json( | ||||
| @@ -44,8 +43,10 @@ class AdobeTVIE(InfoExtractor): | ||||
|             self._html_search_meta('datepublished', webpage, 'upload date')) | ||||
|  | ||||
|         duration = parse_duration( | ||||
|             self._html_search_meta('duration', webpage, 'duration') | ||||
|             or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration')) | ||||
|             self._html_search_meta('duration', webpage, 'duration') or | ||||
|             self._search_regex( | ||||
|                 r'Runtime:\s*(\d{2}:\d{2}:\d{2})', | ||||
|                 webpage, 'duration', fatal=False)) | ||||
|  | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'<div class="views">\s*Views?:\s*([\d,.]+)\s*</div>', | ||||
|   | ||||
| @@ -38,6 +38,7 @@ class AdultSwimIE(InfoExtractor): | ||||
|             }, | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'id': 'rQxZvXQ4ROaSOqq-or2Mow', | ||||
|             'title': 'Rick and Morty - Pilot', | ||||
|             'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " | ||||
|         } | ||||
| @@ -55,6 +56,7 @@ class AdultSwimIE(InfoExtractor): | ||||
|             } | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'id': '-t8CamQlQ2aYZ49ItZCFog', | ||||
|             'title': 'American Dad - Putting Francine Out of Business', | ||||
|             'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' | ||||
|         }, | ||||
|   | ||||
| @@ -11,9 +11,12 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class AppleTrailersIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' | ||||
|     _TEST = { | ||||
|     _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' | ||||
|     _TESTS = [{ | ||||
|         "url": "http://trailers.apple.com/trailers/wb/manofsteel/", | ||||
|         'info_dict': { | ||||
|             'id': 'manofsteel', | ||||
|         }, | ||||
|         "playlist": [ | ||||
|             { | ||||
|                 "md5": "d97a8e575432dbcb81b7c3acb741f8a8", | ||||
| @@ -60,7 +63,10 @@ class AppleTrailersIE(InfoExtractor): | ||||
|                 }, | ||||
|             }, | ||||
|         ] | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://trailers.apple.com/ca/metropole/autrui/', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     _JSON_RE = r'iTunes.playURL\((.*?)\);' | ||||
|  | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
| import time | ||||
| import hmac | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
|     compat_urllib_parse, | ||||
| @@ -17,7 +17,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AtresPlayerIE(SubtitlesInfoExtractor): | ||||
| class AtresPlayerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html' | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -144,13 +144,12 @@ class AtresPlayerIE(SubtitlesInfoExtractor): | ||||
|         thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail') | ||||
|  | ||||
|         subtitles = {} | ||||
|         subtitle = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') | ||||
|         if subtitle: | ||||
|             subtitles['es'] = subtitle | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|         subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle') | ||||
|         if subtitle_url: | ||||
|             subtitles['es'] = [{ | ||||
|                 'ext': 'srt', | ||||
|                 'url': subtitle_url, | ||||
|             }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -159,5 +158,5 @@ class AtresPlayerIE(SubtitlesInfoExtractor): | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': self.extract_subtitles(video_id, subtitles), | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|   | ||||
| @@ -109,7 +109,7 @@ class BandcampIE(InfoExtractor): | ||||
|  | ||||
| class BandcampAlbumIE(InfoExtractor): | ||||
|     IE_NAME = 'Bandcamp:album' | ||||
|     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<title>[^?#]+)|/?(?:$|[?#]))' | ||||
|     _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^?#]+)|/?(?:$|[?#]))' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1', | ||||
| @@ -133,31 +133,37 @@ class BandcampAlbumIE(InfoExtractor): | ||||
|         ], | ||||
|         'info_dict': { | ||||
|             'title': 'Jazz Format Mixtape vol.1', | ||||
|             'id': 'jazz-format-mixtape-vol-1', | ||||
|             'uploader_id': 'blazo', | ||||
|         }, | ||||
|         'params': { | ||||
|             'playlistend': 2 | ||||
|         }, | ||||
|         'skip': 'Bandcamp imposes download limits. See test_playlists:test_bandcamp_album for the playlist test' | ||||
|         'skip': 'Bandcamp imposes download limits.' | ||||
|     }, { | ||||
|         'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave', | ||||
|         'info_dict': { | ||||
|             'title': 'Hierophany of the Open Grave', | ||||
|             'uploader_id': 'nightbringer', | ||||
|             'id': 'hierophany-of-the-open-grave', | ||||
|         }, | ||||
|         'playlist_mincount': 9, | ||||
|     }, { | ||||
|         'url': 'http://dotscale.bandcamp.com', | ||||
|         'info_dict': { | ||||
|             'title': 'Loom', | ||||
|             'id': 'dotscale', | ||||
|             'uploader_id': 'dotscale', | ||||
|         }, | ||||
|         'playlist_mincount': 7, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         playlist_id = mobj.group('subdomain') | ||||
|         title = mobj.group('title') | ||||
|         display_id = title or playlist_id | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         uploader_id = mobj.group('subdomain') | ||||
|         album_id = mobj.group('album_id') | ||||
|         playlist_id = album_id or uploader_id | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|         tracks_paths = re.findall(r'<a href="(.*?)" itemprop="url">', webpage) | ||||
|         if not tracks_paths: | ||||
|             raise ExtractorError('The page doesn\'t contain any tracks') | ||||
| @@ -168,8 +174,8 @@ class BandcampAlbumIE(InfoExtractor): | ||||
|             r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'uploader_id': uploader_id, | ||||
|             'id': playlist_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'entries': entries, | ||||
|         } | ||||
|   | ||||
| @@ -2,12 +2,12 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
| from ..compat import compat_HTTPError | ||||
|  | ||||
|  | ||||
| class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
| class BBCCoUkIE(InfoExtractor): | ||||
|     IE_NAME = 'bbc.co.uk' | ||||
|     IE_DESC = 'BBC iPlayer' | ||||
|     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})' | ||||
| @@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|             formats.extend(conn_formats) | ||||
|         return formats | ||||
|  | ||||
|     def _extract_captions(self, media, programme_id): | ||||
|     def _get_subtitles(self, media, programme_id): | ||||
|         subtitles = {} | ||||
|         for connection in self._extract_connections(media): | ||||
|             captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions') | ||||
|             lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') | ||||
|             ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}')) | ||||
|             srt = '' | ||||
|  | ||||
|             def _extract_text(p): | ||||
|                 if p.text is not None: | ||||
|                     stripped_text = p.text.strip() | ||||
|                     if stripped_text: | ||||
|                         return stripped_text | ||||
|                 return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span')) | ||||
|             for pos, p in enumerate(ps): | ||||
|                 srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), | ||||
|                                                           p.text.strip() if p.text is not None else '') | ||||
|             subtitles[lang] = srt | ||||
|                 srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p)) | ||||
|             subtitles[lang] = [ | ||||
|                 { | ||||
|                     'url': connection.get('href'), | ||||
|                     'ext': 'ttml', | ||||
|                 }, | ||||
|                 { | ||||
|                     'data': srt, | ||||
|                     'ext': 'srt', | ||||
|                 }, | ||||
|             ] | ||||
|         return subtitles | ||||
|  | ||||
|     def _download_media_selector(self, programme_id): | ||||
| @@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|             elif kind == 'video': | ||||
|                 formats.extend(self._extract_video(media, programme_id)) | ||||
|             elif kind == 'captions': | ||||
|                 subtitles = self._extract_captions(media, programme_id) | ||||
|                 subtitles = self.extract_subtitles(media, programme_id) | ||||
|  | ||||
|         return formats, subtitles | ||||
|  | ||||
| @@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|         else: | ||||
|             programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(programme_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -1,40 +1,35 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import remove_start | ||||
| from ..utils import ( | ||||
|     remove_start, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BlinkxIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' | ||||
|     _VALID_URL = r'(?:https?://(?:www\.)blinkx\.com/#?ce/|blinkx:)(?P<id>[^?]+)' | ||||
|     IE_NAME = 'blinkx' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.blinkx.com/ce/8aQUy7GVFYgFzpKhT0oqsilwOGFRVXk3R1ZGWWdGenBLaFQwb3FzaWx3OGFRVXk3R1ZGWWdGenB', | ||||
|         'md5': '2e9a07364af40163a908edbf10bb2492', | ||||
|         'url': 'http://www.blinkx.com/ce/Da0Gw3xc5ucpNduzLuDDlv4WC9PuI4fDi1-t6Y3LyfdY2SZS5Urbvn-UPJvrvbo8LTKTc67Wu2rPKSQDJyZeeORCR8bYkhs8lI7eqddznH2ofh5WEEdjYXnoRtj7ByQwt7atMErmXIeYKPsSDuMAAqJDlQZ-3Ff4HJVeH_s3Gh8oQ', | ||||
|         'md5': '337cf7a344663ec79bf93a526a2e06c7', | ||||
|         'info_dict': { | ||||
|             'id': '8aQUy7GV', | ||||
|             'id': 'Da0Gw3xc', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Police Car Rolls Away', | ||||
|             'uploader': 'stupidvideos.com', | ||||
|             'upload_date': '20131215', | ||||
|             'timestamp': 1387068000, | ||||
|             'description': 'A police car gently rolls away from a fight. Maybe it felt weird being around a confrontation and just had to get out of there!', | ||||
|             'duration': 14.886, | ||||
|             'thumbnails': [{ | ||||
|                 'width': 100, | ||||
|                 'height': 76, | ||||
|                 'resolution': '100x76', | ||||
|                 'url': 'http://cdn.blinkx.com/stream/b/41/StupidVideos/20131215/1873969261/1873969261_tn_0.jpg', | ||||
|             }], | ||||
|             'title': 'No Daily Show for John Oliver; HBO Show Renewed - IGN News', | ||||
|             'uploader': 'IGN News', | ||||
|             'upload_date': '20150217', | ||||
|             'timestamp': 1424215740, | ||||
|             'description': 'HBO has renewed Last Week Tonight With John Oliver for two more seasons.', | ||||
|             'duration': 47.743333, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, rl): | ||||
|         m = re.match(self._VALID_URL, rl) | ||||
|         video_id = m.group('id') | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         display_id = video_id[:8] | ||||
|  | ||||
|         api_url = ('https://apib4.blinkx.com/api.php?action=play_video&' + | ||||
| @@ -60,18 +55,20 @@ class BlinkxIE(InfoExtractor): | ||||
|             elif m['type'] in ('flv', 'mp4'): | ||||
|                 vcodec = remove_start(m['vcodec'], 'ff') | ||||
|                 acodec = remove_start(m['acodec'], 'ff') | ||||
|                 tbr = (int(m['vbr']) + int(m['abr'])) // 1000 | ||||
|                 vbr = int_or_none(m.get('vbr') or m.get('vbitrate'), 1000) | ||||
|                 abr = int_or_none(m.get('abr') or m.get('abitrate'), 1000) | ||||
|                 tbr = vbr + abr if vbr and abr else None | ||||
|                 format_id = '%s-%sk-%s' % (vcodec, tbr, m['w']) | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'url': m['link'], | ||||
|                     'vcodec': vcodec, | ||||
|                     'acodec': acodec, | ||||
|                     'abr': int(m['abr']) // 1000, | ||||
|                     'vbr': int(m['vbr']) // 1000, | ||||
|                     'abr': abr, | ||||
|                     'vbr': vbr, | ||||
|                     'tbr': tbr, | ||||
|                     'width': int(m['w']), | ||||
|                     'height': int(m['h']), | ||||
|                     'width': int_or_none(m.get('w')), | ||||
|                     'height': int_or_none(m.get('h')), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|   | ||||
| @@ -3,7 +3,6 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
| @@ -18,7 +17,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class BlipTVIE(SubtitlesInfoExtractor): | ||||
| class BlipTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+_]+)))' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -143,7 +142,7 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|         categories = [category.text for category in item.findall('category')] | ||||
|  | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|         subtitles_urls = {} | ||||
|  | ||||
|         media_group = item.find(media('group')) | ||||
|         for media_content in media_group.findall(media('content')): | ||||
| @@ -161,7 +160,7 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|                 } | ||||
|                 lang = role.rpartition('-')[-1].strip().lower() | ||||
|                 langcode = LANGS.get(lang, lang) | ||||
|                 subtitles[langcode] = url | ||||
|                 subtitles_urls[langcode] = url | ||||
|             elif media_type.startswith('video/'): | ||||
|                 formats.append({ | ||||
|                     'url': real_url, | ||||
| @@ -175,11 +174,7 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, subtitles) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|         subtitles = self.extract_subtitles(video_id, subtitles_urls) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -192,15 +187,22 @@ class BlipTVIE(SubtitlesInfoExtractor): | ||||
|             'thumbnail': thumbnail, | ||||
|             'categories': categories, | ||||
|             'formats': formats, | ||||
|             'subtitles': video_subtitles, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _download_subtitle_url(self, sub_lang, url): | ||||
|         # For some weird reason, blip.tv serves a video instead of subtitles | ||||
|         # when we request with a common UA | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('User-Agent', 'youtube-dl') | ||||
|         return self._download_webpage(req, None, note=False) | ||||
|     def _get_subtitles(self, video_id, subtitles_urls): | ||||
|         subtitles = {} | ||||
|         for lang, url in subtitles_urls.items(): | ||||
|             # For some weird reason, blip.tv serves a video instead of subtitles | ||||
|             # when we request with a common UA | ||||
|             req = compat_urllib_request.Request(url) | ||||
|             req.add_header('User-Agent', 'youtube-dl') | ||||
|             subtitles[lang] = [{ | ||||
|                 # The extension is 'srt' but it's actually an 'ass' file | ||||
|                 'ext': 'ass', | ||||
|                 'data': self._download_webpage(req, None, note=False), | ||||
|             }] | ||||
|         return subtitles | ||||
|  | ||||
|  | ||||
| class BlipTVUserIE(InfoExtractor): | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class BloombergIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<name>.+?)\.html' | ||||
|     _VALID_URL = r'https?://www\.bloomberg\.com/video/(?P<id>.+?)\.html' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html', | ||||
| @@ -20,9 +20,9 @@ class BloombergIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         name = mobj.group('name') | ||||
|         name = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, name) | ||||
|  | ||||
|         f4m_url = self._search_regex( | ||||
|             r'<source src="(https?://[^"]+\.f4m.*?)"', webpage, | ||||
|             'f4m url') | ||||
|   | ||||
| @@ -95,6 +95,7 @@ class BrightcoveIE(InfoExtractor): | ||||
|             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', | ||||
|             'info_dict': { | ||||
|                 'title': 'Sealife', | ||||
|                 'id': '3550319591001', | ||||
|             }, | ||||
|             'playlist_mincount': 7, | ||||
|         }, | ||||
| @@ -247,7 +248,7 @@ class BrightcoveIE(InfoExtractor): | ||||
|         playlist_info = json_data['videoList'] | ||||
|         videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] | ||||
|  | ||||
|         return self.playlist_result(videos, playlist_id=playlist_info['id'], | ||||
|         return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'], | ||||
|                                     playlist_title=playlist_info['mediaCollectionDTO']['displayName']) | ||||
|  | ||||
|     def _extract_video_info(self, video_info): | ||||
|   | ||||
| @@ -33,6 +33,7 @@ class BuzzFeedIE(InfoExtractor): | ||||
|             'skip_download': True,  # Got enough YouTube download tests | ||||
|         }, | ||||
|         'info_dict': { | ||||
|             'id': 'look-at-this-cute-dog-omg', | ||||
|             'description': 're:Munchkin the Teddy Bear is back ?!', | ||||
|             'title': 'You Need To Stop What You\'re Doing And Watching This Dog Walk On A Treadmill', | ||||
|         }, | ||||
| @@ -42,8 +43,8 @@ class BuzzFeedIE(InfoExtractor): | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20141124', | ||||
|                 'uploader_id': 'CindysMunchkin', | ||||
|                 'description': 're:© 2014 Munchkin the Shih Tzu', | ||||
|                 'uploader': 'Munchkin the Shih Tzu', | ||||
|                 'description': 're:© 2014 Munchkin the', | ||||
|                 'uploader': 're:^Munchkin the', | ||||
|                 'title': 're:Munchkin the Teddy Bear gets her exercise', | ||||
|             }, | ||||
|         }] | ||||
|   | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -39,8 +37,7 @@ class CBSIE(InfoExtractor): | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         real_id = self._search_regex( | ||||
|             r"video\.settings\.pid\s*=\s*'([^']+)';", | ||||
|   | ||||
							
								
								
									
										30
									
								
								youtube_dl/extractor/cbssports.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								youtube_dl/extractor/cbssports.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CBSSportsIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://www\.cbssports\.com/video/player/(?P<section>[^/]+)/(?P<id>[^/]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.cbssports.com/video/player/tennis/318462531970/0/us-open-flashbacks-1990s', | ||||
|         'info_dict': { | ||||
|             'id': '_d5_GbO8p1sT', | ||||
|             'ext': 'flv', | ||||
|             'title': 'US Open flashbacks: 1990s', | ||||
|             'description': 'Bill Macatee relives the best moments in US Open history from the 1990s.', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         section = mobj.group('section') | ||||
|         video_id = mobj.group('id') | ||||
|         all_videos = self._download_json( | ||||
|             'http://www.cbssports.com/data/video/player/getVideos/%s?as=json' % section, | ||||
|             video_id) | ||||
|         # The json file contains the info of all the videos in the section | ||||
|         video_info = next(v for v in all_videos if v['pcid'] == video_id) | ||||
|         return self.url_result('theplatform:%s' % video_info['pid'], 'ThePlatform') | ||||
| @@ -3,7 +3,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
| @@ -15,7 +15,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class CeskaTelevizeIE(SubtitlesInfoExtractor): | ||||
| class CeskaTelevizeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.ceskatelevize\.cz/(porady|ivysilani)/(.+/)?(?P<id>[^?#]+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -107,13 +107,7 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor): | ||||
|         subtitles = {} | ||||
|         subs = item.get('subtitles') | ||||
|         if subs: | ||||
|             subtitles['cs'] = subs[0]['url'] | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         subtitles = self._fix_subtitles(self.extract_subtitles(video_id, subtitles)) | ||||
|             subtitles = self.extract_subtitles(episode_id, subs) | ||||
|  | ||||
|         return { | ||||
|             'id': episode_id, | ||||
| @@ -125,11 +119,20 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor): | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _get_subtitles(self, episode_id, subs): | ||||
|         original_subtitles = self._download_webpage( | ||||
|             subs[0]['url'], episode_id, 'Downloading subtitles') | ||||
|         srt_subs = self._fix_subtitles(original_subtitles) | ||||
|         return { | ||||
|             'cs': [{ | ||||
|                 'ext': 'srt', | ||||
|                 'data': srt_subs, | ||||
|             }] | ||||
|         } | ||||
|  | ||||
|     @staticmethod | ||||
|     def _fix_subtitles(subtitles): | ||||
|         """ Convert millisecond-based subtitles to SRT """ | ||||
|         if subtitles is None: | ||||
|             return subtitles  # subtitles not requested | ||||
|  | ||||
|         def _msectotimecode(msec): | ||||
|             """ Helper utility to convert milliseconds to timecode """ | ||||
| @@ -149,7 +152,4 @@ class CeskaTelevizeIE(SubtitlesInfoExtractor): | ||||
|                 else: | ||||
|                     yield line | ||||
|  | ||||
|         fixed_subtitles = {} | ||||
|         for k, v in subtitles.items(): | ||||
|             fixed_subtitles[k] = "\r\n".join(_fix_subtitle(v)) | ||||
|         return fixed_subtitles | ||||
|         return "\r\n".join(_fix_subtitle(subtitles)) | ||||
|   | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/chirbit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/chirbit.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ChirbitIE(InfoExtractor): | ||||
|     IE_NAME = 'chirbit' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirb\.it/(?:(?:wp|pl)/|fb_chirbit_player\.swf\?key=)?(?P<id>[\da-zA-Z]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://chirb.it/PrIPv5', | ||||
|         'md5': '9847b0dad6ac3e074568bf2cfb197de8', | ||||
|         'info_dict': { | ||||
|             'id': 'PrIPv5', | ||||
|             'ext': 'mp3', | ||||
|             'title': 'Фасадстрой', | ||||
|             'duration': 52, | ||||
|             'view_count': int, | ||||
|             'comment_count': int, | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'https://chirb.it/fb_chirbit_player.swf?key=PrIPv5', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         audio_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://chirb.it/%s' % audio_id, audio_id) | ||||
|  | ||||
|         audio_url = self._search_regex( | ||||
|             r'"setFile"\s*,\s*"([^"]+)"', webpage, 'audio url') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'itemprop="name">([^<]+)', webpage, 'title') | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, 'duration', fatal=False)) | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'itemprop="playCount"\s*>(\d+)', webpage, | ||||
|             'listen count', fatal=False)) | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'>(\d+) Comments?:', webpage, | ||||
|             'comment count', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': audio_id, | ||||
|             'url': audio_url, | ||||
|             'title': title, | ||||
|             'duration': duration, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class ChirbitProfileIE(InfoExtractor): | ||||
|     IE_NAME = 'chirbit:profile' | ||||
|     _VALID_URL = r'https?://(?:www\.)?chirbit.com/(?:rss/)?(?P<id>[^/]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://chirbit.com/ScarletBeauty', | ||||
|         'info_dict': { | ||||
|             'id': 'ScarletBeauty', | ||||
|             'title': 'Chirbits by ScarletBeauty', | ||||
|         }, | ||||
|         'playlist_mincount': 3, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         profile_id = self._match_id(url) | ||||
|  | ||||
|         rss = self._download_xml( | ||||
|             'http://chirbit.com/rss/%s' % profile_id, profile_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result(audio_url.text, 'Chirbit') | ||||
|             for audio_url in rss.findall('./channel/item/link')] | ||||
|  | ||||
|         title = rss.find('./channel/title').text | ||||
|  | ||||
|         return self.playlist_result(entries, profile_id, title) | ||||
| @@ -27,7 +27,6 @@ from ..utils import ( | ||||
|     compiled_regex_type, | ||||
|     ExtractorError, | ||||
|     float_or_none, | ||||
|     HEADRequest, | ||||
|     int_or_none, | ||||
|     RegexNotFoundError, | ||||
|     sanitize_filename, | ||||
| @@ -151,8 +150,14 @@ class InfoExtractor(object): | ||||
|                     If not explicitly set, calculated from timestamp. | ||||
|     uploader_id:    Nickname or id of the video uploader. | ||||
|     location:       Physical location where the video was filmed. | ||||
|     subtitles:      The subtitle file contents as a dictionary in the format | ||||
|                     {language: subtitles}. | ||||
|     subtitles:      The available subtitles as a dictionary in the format | ||||
|                     {language: subformats}. "subformats" is a list sorted from | ||||
|                     lower to higher preference, each element is a dictionary | ||||
|                     with the "ext" entry and one of: | ||||
|                         * "data": The subtitles file contents | ||||
|                         * "url": A url pointing to the subtitles file | ||||
|     automatic_captions: Like 'subtitles', used by the YoutubeIE for | ||||
|                     automatically generated captions | ||||
|     duration:       Length of the video in seconds, as an integer. | ||||
|     view_count:     How many users have watched the video on the platform. | ||||
|     like_count:     Number of positive ratings of the video | ||||
| @@ -392,6 +397,16 @@ class InfoExtractor(object): | ||||
|             if blocked_iframe: | ||||
|                 msg += ' Visit %s for more details' % blocked_iframe | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|         if '<title>The URL you requested has been blocked</title>' in content[:512]: | ||||
|             msg = ( | ||||
|                 'Access to this webpage has been blocked by Indian censorship. ' | ||||
|                 'Use a VPN or proxy server (with --proxy) to route around it.') | ||||
|             block_msg = self._html_search_regex( | ||||
|                 r'</h1><p>(.*?)</p>', | ||||
|                 content, 'block message', default=None) | ||||
|             if block_msg: | ||||
|                 msg += ' (Message: "%s")' % block_msg.replace('\n', ' ') | ||||
|             raise ExtractorError(msg, expected=True) | ||||
|  | ||||
|         return content | ||||
|  | ||||
| @@ -753,9 +768,7 @@ class InfoExtractor(object): | ||||
|  | ||||
|     def _is_valid_url(self, url, video_id, item='video'): | ||||
|         try: | ||||
|             self._request_webpage( | ||||
|                 HEADRequest(url), video_id, | ||||
|                 'Checking %s URL' % item) | ||||
|             self._request_webpage(url, video_id, 'Checking %s URL' % item) | ||||
|             return True | ||||
|         except ExtractorError as e: | ||||
|             if isinstance(e.cause, compat_HTTPError): | ||||
| @@ -801,8 +814,8 @@ class InfoExtractor(object): | ||||
|             media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') | ||||
|         for i, media_el in enumerate(media_nodes): | ||||
|             if manifest_version == '2.0': | ||||
|                 manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' | ||||
|                                 + (media_el.attrib.get('href') or media_el.attrib.get('url'))) | ||||
|                 manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' + | ||||
|                                 (media_el.attrib.get('href') or media_el.attrib.get('url'))) | ||||
|             tbr = int_or_none(media_el.attrib.get('bitrate')) | ||||
|             formats.append({ | ||||
|                 'format_id': '-'.join(filter(None, [f4m_id, 'f4m-%d' % (i if tbr is None else tbr)])), | ||||
| @@ -826,7 +839,7 @@ class InfoExtractor(object): | ||||
|             'url': m3u8_url, | ||||
|             'ext': ext, | ||||
|             'protocol': 'm3u8', | ||||
|             'preference': -1, | ||||
|             'preference': preference - 1 if preference else -1, | ||||
|             'resolution': 'multiple', | ||||
|             'format_note': 'Quality selection URL', | ||||
|         }] | ||||
| @@ -841,6 +854,7 @@ class InfoExtractor(object): | ||||
|             note='Downloading m3u8 information', | ||||
|             errnote='Failed to download m3u8 information') | ||||
|         last_info = None | ||||
|         last_media = None | ||||
|         kv_rex = re.compile( | ||||
|             r'(?P<key>[a-zA-Z_-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)') | ||||
|         for line in m3u8_doc.splitlines(): | ||||
| @@ -851,6 +865,13 @@ class InfoExtractor(object): | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_info[m.group('key')] = v | ||||
|             elif line.startswith('#EXT-X-MEDIA:'): | ||||
|                 last_media = {} | ||||
|                 for m in kv_rex.finditer(line): | ||||
|                     v = m.group('val') | ||||
|                     if v.startswith('"'): | ||||
|                         v = v[1:-1] | ||||
|                     last_media[m.group('key')] = v | ||||
|             elif line.startswith('#') or not line.strip(): | ||||
|                 continue | ||||
|             else: | ||||
| @@ -879,6 +900,9 @@ class InfoExtractor(object): | ||||
|                     width_str, height_str = resolution.split('x') | ||||
|                     f['width'] = int(width_str) | ||||
|                     f['height'] = int(height_str) | ||||
|                 if last_media is not None: | ||||
|                     f['m3u8_media'] = last_media | ||||
|                     last_media = None | ||||
|                 formats.append(f) | ||||
|                 last_info = {} | ||||
|         self._sort_formats(formats) | ||||
| @@ -993,6 +1017,24 @@ class InfoExtractor(object): | ||||
|             any_restricted = any_restricted or is_restricted | ||||
|         return not any_restricted | ||||
|  | ||||
|     def extract_subtitles(self, *args, **kwargs): | ||||
|         if (self._downloader.params.get('writesubtitles', False) or | ||||
|                 self._downloader.params.get('listsubtitles')): | ||||
|             return self._get_subtitles(*args, **kwargs) | ||||
|         return {} | ||||
|  | ||||
|     def _get_subtitles(self, *args, **kwargs): | ||||
|         raise NotImplementedError("This method must be implemented by subclasses") | ||||
|  | ||||
|     def extract_automatic_captions(self, *args, **kwargs): | ||||
|         if (self._downloader.params.get('writeautomaticsub', False) or | ||||
|                 self._downloader.params.get('listsubtitles')): | ||||
|             return self._get_automatic_captions(*args, **kwargs) | ||||
|         return {} | ||||
|  | ||||
|     def _get_automatic_captions(self, *args, **kwargs): | ||||
|         raise NotImplementedError("This method must be implemented by subclasses") | ||||
|  | ||||
|  | ||||
| class SearchInfoExtractor(InfoExtractor): | ||||
|     """ | ||||
|   | ||||
| @@ -9,7 +9,7 @@ import xml.etree.ElementTree | ||||
|  | ||||
| from hashlib import sha1 | ||||
| from math import pow, sqrt, floor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| @@ -25,10 +25,9 @@ from ..aes import ( | ||||
|     aes_cbc_decrypt, | ||||
|     inc, | ||||
| ) | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class CrunchyrollIE(SubtitlesInfoExtractor): | ||||
| class CrunchyrollIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:[^/]*/[^/?&]*?|media/\?id=)(?P<video_id>[0-9]+))(?:[/?&]|$)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513', | ||||
| @@ -187,6 +186,38 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|  | ||||
|         return output | ||||
|  | ||||
|     def _get_subtitles(self, video_id, webpage): | ||||
|         subtitles = {} | ||||
|         for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): | ||||
|             sub_page = self._download_webpage( | ||||
|                 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, | ||||
|                 video_id, note='Downloading subtitles for ' + sub_name) | ||||
|             id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) | ||||
|             iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) | ||||
|             data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) | ||||
|             if not id or not iv or not data: | ||||
|                 continue | ||||
|             id = int(id) | ||||
|             iv = base64.b64decode(iv) | ||||
|             data = base64.b64decode(data) | ||||
|  | ||||
|             subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') | ||||
|             lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) | ||||
|             if not lang_code: | ||||
|                 continue | ||||
|             sub_root = xml.etree.ElementTree.fromstring(subtitle) | ||||
|             subtitles[lang_code] = [ | ||||
|                 { | ||||
|                     'ext': 'srt', | ||||
|                     'data': self._convert_subtitles_to_srt(sub_root), | ||||
|                 }, | ||||
|                 { | ||||
|                     'ext': 'ass', | ||||
|                     'data': self._convert_subtitles_to_ass(sub_root), | ||||
|                 }, | ||||
|             ] | ||||
|         return subtitles | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('video_id') | ||||
| @@ -249,34 +280,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text | ||||
|                 'format_id': video_format, | ||||
|             }) | ||||
|  | ||||
|         subtitles = {} | ||||
|         sub_format = self._downloader.params.get('subtitlesformat', 'srt') | ||||
|         for sub_id, sub_name in re.findall(r'\?ssid=([0-9]+)" title="([^"]+)', webpage): | ||||
|             sub_page = self._download_webpage( | ||||
|                 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id, | ||||
|                 video_id, note='Downloading subtitles for ' + sub_name) | ||||
|             id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False) | ||||
|             iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False) | ||||
|             data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False) | ||||
|             if not id or not iv or not data: | ||||
|                 continue | ||||
|             id = int(id) | ||||
|             iv = base64.b64decode(iv) | ||||
|             data = base64.b64decode(data) | ||||
|  | ||||
|             subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8') | ||||
|             lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False) | ||||
|             if not lang_code: | ||||
|                 continue | ||||
|             sub_root = xml.etree.ElementTree.fromstring(subtitle) | ||||
|             if sub_format == 'ass': | ||||
|                 subtitles[lang_code] = self._convert_subtitles_to_ass(sub_root) | ||||
|             else: | ||||
|                 subtitles[lang_code] = self._convert_subtitles_to_srt(sub_root) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|         subtitles = self.extract_subtitles(video_id, webpage) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -6,7 +6,6 @@ import json | ||||
| import itertools | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
| @@ -31,7 +30,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): | ||||
|         return request | ||||
|  | ||||
|  | ||||
| class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
| class DailymotionIE(DailymotionBaseInfoExtractor): | ||||
|     """Information Extractor for Dailymotion""" | ||||
|  | ||||
|     _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(embed|#)/)?video/(?P<id>[^/?_]+)' | ||||
| @@ -143,9 +142,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, webpage) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, webpage) | ||||
|             return | ||||
|  | ||||
|         view_count = str_to_int(self._search_regex( | ||||
|             r'video_views_count[^>]+>\s+([\d\.,]+)', | ||||
| @@ -169,7 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             'view_count': view_count, | ||||
|         } | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|     def _get_subtitles(self, video_id, webpage): | ||||
|         try: | ||||
|             sub_list = self._download_webpage( | ||||
|                 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id, | ||||
| @@ -179,7 +175,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             return {} | ||||
|         info = json.loads(sub_list) | ||||
|         if (info['total'] > 0): | ||||
|             sub_lang_list = dict((l['language'], l['url']) for l in info['list']) | ||||
|             sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list']) | ||||
|             return sub_lang_list | ||||
|         self._downloader.report_warning('video doesn\'t have subtitles') | ||||
|         return {} | ||||
| @@ -194,6 +190,7 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): | ||||
|         'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q', | ||||
|         'info_dict': { | ||||
|             'title': 'SPORT', | ||||
|             'id': 'xv4bw_nqtv_sport', | ||||
|         }, | ||||
|         'playlist_mincount': 20, | ||||
|     }] | ||||
|   | ||||
| @@ -25,8 +25,9 @@ class DefenseGouvFrIE(InfoExtractor): | ||||
|             r"flashvars.pvg_id=\"(\d+)\";", | ||||
|             webpage, 'ID') | ||||
|  | ||||
|         json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/' | ||||
|                     + video_id) | ||||
|         json_url = ( | ||||
|             'http://static.videos.gouv.fr/brightcovehub/export/json/%s' % | ||||
|             video_id) | ||||
|         info = self._download_json(json_url, title, 'Downloading JSON config') | ||||
|         video_url = info['renditions'][0]['url'] | ||||
|  | ||||
|   | ||||
| @@ -1,11 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import ExtractorError | ||||
| from .common import InfoExtractor, ExtractorError | ||||
| from ..utils import parse_iso8601 | ||||
|  | ||||
|  | ||||
| class DRTVIE(SubtitlesInfoExtractor): | ||||
| class DRTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?dr\.dk/tv/se/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)' | ||||
|  | ||||
|     _TEST = { | ||||
| @@ -76,7 +75,7 @@ class DRTVIE(SubtitlesInfoExtractor): | ||||
|                     } | ||||
|                     for subs in subtitles_list: | ||||
|                         lang = subs['Language'] | ||||
|                         subtitles[LANGS.get(lang, lang)] = subs['Uri'] | ||||
|                         subtitles[LANGS.get(lang, lang)] = [{'url': subs['Uri'], 'ext': 'vtt'}] | ||||
|  | ||||
|         if not formats and restricted_to_denmark: | ||||
|             raise ExtractorError( | ||||
| @@ -84,10 +83,6 @@ class DRTVIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
| @@ -96,5 +91,5 @@ class DRTVIE(SubtitlesInfoExtractor): | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': self.extract_subtitles(video_id, subtitles), | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|   | ||||
							
								
								
									
										16
									
								
								youtube_dl/extractor/embedly.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								youtube_dl/extractor/embedly.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,16 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urllib_parse_unquote | ||||
|  | ||||
|  | ||||
| class EmbedlyIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         return self.url_result(compat_urllib_parse_unquote(self._match_id(url))) | ||||
| @@ -22,6 +22,7 @@ class EscapistIE(InfoExtractor): | ||||
|             'uploader_id': 'the-escapist-presents', | ||||
|             'uploader': 'The Escapist Presents', | ||||
|             'title': "Breaking Down Baldur's Gate", | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -30,19 +31,25 @@ class EscapistIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         uploader_id = self._html_search_regex( | ||||
|             r"<h1 class='headline'><a href='/videos/view/(.*?)'", | ||||
|             r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'", | ||||
|             webpage, 'uploader ID', fatal=False) | ||||
|         uploader = self._html_search_regex( | ||||
|             r"<h1 class='headline'>(.*?)</a>", | ||||
|             r"<h1\s+class='headline'>(.*?)</a>", | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         raw_title = self._html_search_meta('title', webpage, fatal=True) | ||||
|         title = raw_title.partition(' : ')[2] | ||||
|  | ||||
|         player_url = self._og_search_video_url(webpage, name='player URL') | ||||
|         config_url = compat_urllib_parse.unquote(self._search_regex( | ||||
|             r'config=(.*)$', player_url, 'config URL')) | ||||
|         config_url = compat_urllib_parse.unquote(self._html_search_regex( | ||||
|             r'''(?x) | ||||
|             (?: | ||||
|                 <param\s+name="flashvars"\s+value="config=| | ||||
|                 flashvars="config= | ||||
|             ) | ||||
|             ([^"&]+) | ||||
|             ''', | ||||
|             webpage, 'config URL')) | ||||
|  | ||||
|         formats = [] | ||||
|  | ||||
| @@ -81,5 +88,4 @@ class EscapistIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': description, | ||||
|             'player_url': player_url, | ||||
|         } | ||||
|   | ||||
| @@ -126,11 +126,17 @@ class FacebookIE(InfoExtractor): | ||||
|         params_raw = compat_urllib_parse.unquote(data['params']) | ||||
|         params = json.loads(params_raw) | ||||
|         video_data = params['video_data'][0] | ||||
|         video_url = video_data.get('hd_src') | ||||
|         if not video_url: | ||||
|             video_url = video_data['sd_src'] | ||||
|         if not video_url: | ||||
|             raise ExtractorError('Cannot find video URL') | ||||
|  | ||||
|         formats = [] | ||||
|         for quality in ['sd', 'hd']: | ||||
|             src = video_data.get('%s_src' % quality) | ||||
|             if src is not None: | ||||
|                 formats.append({ | ||||
|                     'format_id': quality, | ||||
|                     'url': src, | ||||
|                 }) | ||||
|         if not formats: | ||||
|             raise ExtractorError('Cannot find video formats') | ||||
|  | ||||
|         video_title = self._html_search_regex( | ||||
|             r'<h2 class="uiHeaderTitle">([^<]*)</h2>', webpage, 'title', | ||||
| @@ -146,7 +152,7 @@ class FacebookIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'url': video_url, | ||||
|             'formats': formats, | ||||
|             'duration': int_or_none(video_data.get('video_duration')), | ||||
|             'thumbnail': video_data.get('thumbnail_src'), | ||||
|         } | ||||
|   | ||||
| @@ -14,6 +14,7 @@ class FiveMinIE(InfoExtractor): | ||||
|     IE_NAME = '5min' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://[^/]*?5min\.com/Scripts/PlayerSeed\.js\?(?:.*?&)?playList=| | ||||
|             https?://(?:(?:massively|www)\.)?joystiq\.com/video/| | ||||
|             5min:) | ||||
|         (?P<id>\d+) | ||||
|         ''' | ||||
|   | ||||
| @@ -7,6 +7,7 @@ from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
| from ..utils import remove_end | ||||
|  | ||||
|  | ||||
| class GDCVaultIE(InfoExtractor): | ||||
| @@ -65,10 +66,12 @@ class GDCVaultIE(InfoExtractor): | ||||
|  | ||||
|     def _parse_flv(self, xml_description): | ||||
|         video_formats = [] | ||||
|         akami_url = xml_description.find('./metadata/akamaiHost').text | ||||
|         akamai_url = xml_description.find('./metadata/akamaiHost').text | ||||
|         slide_video_path = xml_description.find('./metadata/slideVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + slide_video_path, | ||||
|             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, | ||||
|             'play_path': remove_end(slide_video_path, '.flv'), | ||||
|             'ext': 'flv', | ||||
|             'format_note': 'slide deck video', | ||||
|             'quality': -2, | ||||
|             'preference': -2, | ||||
| @@ -76,7 +79,9 @@ class GDCVaultIE(InfoExtractor): | ||||
|         }) | ||||
|         speaker_video_path = xml_description.find('./metadata/speakerVideo').text | ||||
|         video_formats.append({ | ||||
|             'url': 'rtmp://' + akami_url + '/' + speaker_video_path, | ||||
|             'url': 'rtmp://%s/ondemand?ovpfv=1.1' % akamai_url, | ||||
|             'play_path': remove_end(speaker_video_path, '.flv'), | ||||
|             'ext': 'flv', | ||||
|             'format_note': 'speaker video', | ||||
|             'quality': -1, | ||||
|             'preference': -1, | ||||
|   | ||||
| @@ -473,6 +473,7 @@ class GenericIE(InfoExtractor): | ||||
|         { | ||||
|             'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', | ||||
|             'info_dict': { | ||||
|                 'id': '1986', | ||||
|                 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', | ||||
|             }, | ||||
|             'playlist_mincount': 2, | ||||
| @@ -531,7 +532,7 @@ class GenericIE(InfoExtractor): | ||||
|             'info_dict': { | ||||
|                 'id': 'Mrj4DVp2zeA', | ||||
|                 'ext': 'mp4', | ||||
|                 'upload_date': '20150204', | ||||
|                 'upload_date': '20150212', | ||||
|                 'uploader': 'The National Archives UK', | ||||
|                 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', | ||||
|                 'uploader_id': 'NationalArchives08', | ||||
| @@ -546,7 +547,16 @@ class GenericIE(InfoExtractor): | ||||
|                 'id': 'aanslagen-kopenhagen', | ||||
|                 'title': 'Aanslagen Kopenhagen | RTL Nieuws', | ||||
|             } | ||||
|         } | ||||
|         }, | ||||
|         # Zapiks embed | ||||
|         { | ||||
|             'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html', | ||||
|             'info_dict': { | ||||
|                 'id': '118046', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def report_following_redirect(self, new_url): | ||||
| @@ -1097,6 +1107,12 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Livestream') | ||||
|  | ||||
|         # Look for Zapiks embed | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Zapiks') | ||||
|  | ||||
|         def check_video(vurl): | ||||
|             if YoutubeIE.suitable(vurl): | ||||
|                 return True | ||||
|   | ||||
| @@ -34,6 +34,9 @@ class IGNIE(InfoExtractor): | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://me.ign.com/en/feature/15775/100-little-things-in-gta-5-that-will-blow-your-mind', | ||||
|             'info_dict': { | ||||
|                 'id': '100-little-things-in-gta-5-that-will-blow-your-mind', | ||||
|             }, | ||||
|             'playlist': [ | ||||
|                 { | ||||
|                     'info_dict': { | ||||
|   | ||||
							
								
								
									
										97
									
								
								youtube_dl/extractor/imgur.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								youtube_dl/extractor/imgur.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     mimetype2ext, | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ImgurIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P<id>[a-zA-Z0-9]+)(?:\.mp4|\.gifv)?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'https://i.imgur.com/A61SaA1.gifv', | ||||
|         'info_dict': { | ||||
|             'id': 'A61SaA1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', | ||||
|             'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'https://imgur.com/A61SaA1', | ||||
|         'info_dict': { | ||||
|             'id': 'A61SaA1', | ||||
|             'ext': 'mp4', | ||||
|             'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$', | ||||
|             'description': 're:The origin of the Internet\'s most viral images$|The Internet\'s visual storytelling community\. Explore, share, and discuss the best visual stories the Internet has to offer\.$', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         width = int_or_none(self._search_regex( | ||||
|             r'<param name="width" value="([0-9]+)"', | ||||
|             webpage, 'width', fatal=False)) | ||||
|         height = int_or_none(self._search_regex( | ||||
|             r'<param name="height" value="([0-9]+)"', | ||||
|             webpage, 'height', fatal=False)) | ||||
|  | ||||
|         video_elements = self._search_regex( | ||||
|             r'(?s)<div class="video-elements">(.*?)</div>', | ||||
|             webpage, 'video elements', default=None) | ||||
|         if not video_elements: | ||||
|             raise ExtractorError( | ||||
|                 'No sources found for video %s. Maybe an image?' % video_id, | ||||
|                 expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements): | ||||
|             formats.append({ | ||||
|                 'format_id': m.group('type').partition('/')[2], | ||||
|                 'url': self._proto_relative_url(m.group('src')), | ||||
|                 'ext': mimetype2ext(m.group('type')), | ||||
|                 'acodec': 'none', | ||||
|                 'width': width, | ||||
|                 'height': height, | ||||
|                 'http_headers': { | ||||
|                     'User-Agent': 'youtube-dl (like wget)', | ||||
|                 }, | ||||
|             }) | ||||
|  | ||||
|         gif_json = self._search_regex( | ||||
|             r'(?s)var\s+videoItem\s*=\s*(\{.*?\})', | ||||
|             webpage, 'GIF code', fatal=False) | ||||
|         if gif_json: | ||||
|             gifd = self._parse_json( | ||||
|                 gif_json, video_id, transform_source=js_to_json) | ||||
|             formats.append({ | ||||
|                 'format_id': 'gif', | ||||
|                 'preference': -10, | ||||
|                 'width': width, | ||||
|                 'height': height, | ||||
|                 'ext': 'gif', | ||||
|                 'acodec': 'none', | ||||
|                 'vcodec': 'gif', | ||||
|                 'container': 'gif', | ||||
|                 'url': self._proto_relative_url(gifd['gifUrl']), | ||||
|                 'filesize': gifd.get('size'), | ||||
|                 'http_headers': { | ||||
|                     'User-Agent': 'youtube-dl (like wget)', | ||||
|                 }, | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'title': self._og_search_title(webpage), | ||||
|         } | ||||
| @@ -1,23 +1,26 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import random | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     xpath_text, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class Laola1TvIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?laola1\.tv/(?P<lang>[a-z]+)-(?P<portal>[a-z]+)/.*?/(?P<id>[0-9]+)\.html' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.laola1.tv/de-de/live/bwf-bitburger-open-grand-prix-gold-court-1/250019.html', | ||||
|         'url': 'http://www.laola1.tv/de-de/video/straubing-tigers-koelner-haie/227883.html', | ||||
|         'info_dict': { | ||||
|             'id': '250019', | ||||
|             'id': '227883', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Bitburger Open Grand Prix Gold - Court 1', | ||||
|             'categories': ['Badminton'], | ||||
|             'uploader': 'BWF - Badminton World Federation', | ||||
|             'is_live': True, | ||||
|             'title': 'Straubing Tigers - Kölner Haie', | ||||
|             'categories': ['Eishockey'], | ||||
|             'is_live': False, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -43,15 +46,26 @@ class Laola1TvIE(InfoExtractor): | ||||
|             r'flashvars\.([_a-zA-Z0-9]+)\s*=\s*"([^"]*)";', iframe) | ||||
|         flashvars = dict((m[0], m[1]) for m in flashvars_m) | ||||
|  | ||||
|         partner_id = self._search_regex( | ||||
|             r'partnerid\s*:\s*"([^"]+)"', iframe, 'partner id') | ||||
|  | ||||
|         xml_url = ('http://www.laola1.tv/server/hd_video.php?' + | ||||
|                    'play=%s&partner=1&portal=%s&v5ident=&lang=%s' % ( | ||||
|                        video_id, portal, lang)) | ||||
|                    'play=%s&partner=%s&portal=%s&v5ident=&lang=%s' % ( | ||||
|                        video_id, partner_id, portal, lang)) | ||||
|         hd_doc = self._download_xml(xml_url, video_id) | ||||
|  | ||||
|         title = hd_doc.find('.//video/title').text | ||||
|         flash_url = hd_doc.find('.//video/url').text | ||||
|         categories = hd_doc.find('.//video/meta_sports').text.split(',') | ||||
|         uploader = hd_doc.find('.//video/meta_organistation').text | ||||
|         title = xpath_text(hd_doc, './/video/title', fatal=True) | ||||
|         flash_url = xpath_text(hd_doc, './/video/url', fatal=True) | ||||
|         uploader = xpath_text(hd_doc, './/video/meta_organistation') | ||||
|  | ||||
|         is_live = xpath_text(hd_doc, './/video/islive') == 'true' | ||||
|         if is_live: | ||||
|             raise ExtractorError( | ||||
|                 'Live streams are not supported by the f4m downloader.') | ||||
|  | ||||
|         categories = xpath_text(hd_doc, './/video/meta_sports') | ||||
|         if categories: | ||||
|             categories = categories.split(',') | ||||
|  | ||||
|         ident = random.randint(10000000, 99999999) | ||||
|         token_url = '%s&ident=%s&klub=0&unikey=0×tamp=%s&auth=%s' % ( | ||||
| @@ -60,15 +74,16 @@ class Laola1TvIE(InfoExtractor): | ||||
|         token_doc = self._download_xml( | ||||
|             token_url, video_id, note='Downloading token') | ||||
|         token_attrib = token_doc.find('.//token').attrib | ||||
|         if token_attrib.get('auth') == 'blocked': | ||||
|             raise ExtractorError('Token error: ' % token_attrib.get('comment')) | ||||
|         if token_attrib.get('auth') in ('blocked', 'restricted'): | ||||
|             raise ExtractorError( | ||||
|                 'Token error: %s' % token_attrib.get('comment'), expected=True) | ||||
|  | ||||
|         video_url = '%s?hdnea=%s&hdcore=3.2.0' % ( | ||||
|             token_attrib['url'], token_attrib['auth']) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'is_live': True, | ||||
|             'is_live': is_live, | ||||
|             'title': title, | ||||
|             'url': video_url, | ||||
|             'uploader': uploader, | ||||
|   | ||||
| @@ -37,6 +37,7 @@ class LivestreamIE(InfoExtractor): | ||||
|         'url': 'http://new.livestream.com/tedx/cityenglish', | ||||
|         'info_dict': { | ||||
|             'title': 'TEDCity2.0 (English)', | ||||
|             'id': '2245590', | ||||
|         }, | ||||
|         'playlist_mincount': 4, | ||||
|     }, { | ||||
| @@ -148,7 +149,8 @@ class LivestreamIE(InfoExtractor): | ||||
|                   if is_relevant(video_data, video_id)] | ||||
|         if video_id is None: | ||||
|             # This is an event page: | ||||
|             return self.playlist_result(videos, info['id'], info['full_name']) | ||||
|             return self.playlist_result( | ||||
|                 videos, '%s' % info['id'], info['full_name']) | ||||
|         else: | ||||
|             if not videos: | ||||
|                 raise ExtractorError('Cannot find video %s' % video_id) | ||||
|   | ||||
| @@ -3,7 +3,6 @@ from __future__ import unicode_literals | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
| @@ -16,7 +15,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class LyndaIE(SubtitlesInfoExtractor): | ||||
| class LyndaIE(InfoExtractor): | ||||
|     IE_NAME = 'lynda' | ||||
|     IE_DESC = 'lynda.com videos' | ||||
|     _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html' | ||||
| @@ -88,11 +87,7 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|         self._check_formats(formats, video_id) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, page) | ||||
|             return | ||||
|  | ||||
|         subtitles = self._fix_subtitles(self.extract_subtitles(video_id, page)) | ||||
|         subtitles = self.extract_subtitles(video_id, page) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -144,38 +139,31 @@ class LyndaIE(SubtitlesInfoExtractor): | ||||
|         if re.search(self._SUCCESSFUL_LOGIN_REGEX, login_page) is None: | ||||
|             raise ExtractorError('Unable to log in') | ||||
|  | ||||
|     def _fix_subtitles(self, subtitles): | ||||
|         if subtitles is None: | ||||
|             return subtitles  # subtitles not requested | ||||
|  | ||||
|         fixed_subtitles = {} | ||||
|         for k, v in subtitles.items(): | ||||
|             subs = json.loads(v) | ||||
|             if len(subs) == 0: | ||||
|     def _fix_subtitles(self, subs): | ||||
|         srt = '' | ||||
|         for pos in range(0, len(subs) - 1): | ||||
|             seq_current = subs[pos] | ||||
|             m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode']) | ||||
|             if m_current is None: | ||||
|                 continue | ||||
|             srt = '' | ||||
|             for pos in range(0, len(subs) - 1): | ||||
|                 seq_current = subs[pos] | ||||
|                 m_current = re.match(self._TIMECODE_REGEX, seq_current['Timecode']) | ||||
|                 if m_current is None: | ||||
|                     continue | ||||
|                 seq_next = subs[pos + 1] | ||||
|                 m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode']) | ||||
|                 if m_next is None: | ||||
|                     continue | ||||
|                 appear_time = m_current.group('timecode') | ||||
|                 disappear_time = m_next.group('timecode') | ||||
|                 text = seq_current['Caption'] | ||||
|                 srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text) | ||||
|             if srt: | ||||
|                 fixed_subtitles[k] = srt | ||||
|         return fixed_subtitles | ||||
|             seq_next = subs[pos + 1] | ||||
|             m_next = re.match(self._TIMECODE_REGEX, seq_next['Timecode']) | ||||
|             if m_next is None: | ||||
|                 continue | ||||
|             appear_time = m_current.group('timecode') | ||||
|             disappear_time = m_next.group('timecode') | ||||
|             text = seq_current['Caption'] | ||||
|             srt += '%s\r\n%s --> %s\r\n%s' % (str(pos), appear_time, disappear_time, text) | ||||
|         if srt: | ||||
|             return srt | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|     def _get_subtitles(self, video_id, webpage): | ||||
|         url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id | ||||
|         sub = self._download_webpage(url, None, False) | ||||
|         sub_json = json.loads(sub) | ||||
|         return {'en': url} if len(sub_json) > 0 else {} | ||||
|         subs = self._download_json(url, None, False) | ||||
|         if subs: | ||||
|             return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} | ||||
|         else: | ||||
|             return {} | ||||
|  | ||||
|  | ||||
| class LyndaCourseIE(InfoExtractor): | ||||
|   | ||||
| @@ -5,9 +5,6 @@ import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .youtube import YoutubeIE | ||||
| from ..compat import ( | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     clean_html, | ||||
|     ExtractorError, | ||||
| @@ -108,7 +105,6 @@ class OCWMITIE(InfoExtractor): | ||||
|                 'upload_date': '20121109', | ||||
|                 'uploader_id': 'MIT', | ||||
|                 'uploader': 'MIT OpenCourseWare', | ||||
|                 # 'subtitles': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/MIT6_041F11_lec07_300k.mp4.srt' | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
| @@ -121,7 +117,6 @@ class OCWMITIE(InfoExtractor): | ||||
|                 'uploader_id': 'MIT', | ||||
|                 'uploader': 'MIT OpenCourseWare', | ||||
|                 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.', | ||||
|                 # 'subtitles': 'http://ocw.mit.edu//courses/mathematics/18-01sc-single-variable-calculus-fall-2010/ocw-18.01-f07-lec01_300k.SRT' | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
| @@ -140,7 +135,6 @@ class OCWMITIE(InfoExtractor): | ||||
|             metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1)) | ||||
|             metadata = re.split(r', ?', metadata) | ||||
|             yt = metadata[1] | ||||
|             subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7]) | ||||
|         else: | ||||
|             # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file) | ||||
|             embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage) | ||||
| @@ -148,7 +142,6 @@ class OCWMITIE(InfoExtractor): | ||||
|                 metadata = re.sub(r'[\'"]', '', embed_media.group(1)) | ||||
|                 metadata = re.split(r', ?', metadata) | ||||
|                 yt = metadata[1] | ||||
|                 subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5]) | ||||
|             else: | ||||
|                 raise ExtractorError('Unable to find embedded YouTube video.') | ||||
|         video_id = YoutubeIE.extract_id(yt) | ||||
| @@ -159,7 +152,5 @@ class OCWMITIE(InfoExtractor): | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'url': yt, | ||||
|             'url_transparent' | ||||
|             'subtitles': subs, | ||||
|             'ie_key': 'Youtube', | ||||
|         } | ||||
|   | ||||
| @@ -2,7 +2,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| @@ -23,7 +23,7 @@ def _media_xml_tag(tag): | ||||
|     return '{http://search.yahoo.com/mrss/}%s' % tag | ||||
|  | ||||
|  | ||||
| class MTVServicesInfoExtractor(SubtitlesInfoExtractor): | ||||
| class MTVServicesInfoExtractor(InfoExtractor): | ||||
|     _MOBILE_TEMPLATE = None | ||||
|  | ||||
|     @staticmethod | ||||
| @@ -95,25 +95,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor): | ||||
|  | ||||
|     def _extract_subtitles(self, mdoc, mtvn_id): | ||||
|         subtitles = {} | ||||
|         FORMATS = { | ||||
|             'scc': 'cea-608', | ||||
|             'eia-608': 'cea-608', | ||||
|             'xml': 'ttml', | ||||
|         } | ||||
|         subtitles_format = FORMATS.get( | ||||
|             self._downloader.params.get('subtitlesformat'), 'ttml') | ||||
|         for transcript in mdoc.findall('.//transcript'): | ||||
|             if transcript.get('kind') != 'captions': | ||||
|                 continue | ||||
|             lang = transcript.get('srclang') | ||||
|             for typographic in transcript.findall('./typographic'): | ||||
|                 captions_format = typographic.get('format') | ||||
|                 if captions_format == subtitles_format: | ||||
|                     subtitles[lang] = compat_str(typographic.get('src')) | ||||
|                     break | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(mtvn_id, subtitles) | ||||
|         return self.extract_subtitles(mtvn_id, subtitles) | ||||
|             subtitles[lang] = [{ | ||||
|                 'url': compat_str(typographic.get('src')), | ||||
|                 'ext': typographic.get('format') | ||||
|             } for typographic in transcript.findall('./typographic')] | ||||
|         return subtitles | ||||
|  | ||||
|     def _get_video_info(self, itemdoc): | ||||
|         uri = itemdoc.find('guid').text | ||||
| @@ -196,8 +186,6 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor): | ||||
|                 webpage, 'mgid') | ||||
|  | ||||
|         videos_info = self._get_videos_info(mgid) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             return | ||||
|         return videos_info | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										38
									
								
								youtube_dl/extractor/nationalgeographic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								youtube_dl/extractor/nationalgeographic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     smuggle_url, | ||||
|     url_basename, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NationalGeographicIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://video\.nationalgeographic\.com/video/.*?' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://video.nationalgeographic.com/video/news/150210-news-crab-mating-vin?source=featuredvideo', | ||||
|         'info_dict': { | ||||
|             'id': '4DmDACA6Qtk_', | ||||
|             'ext': 'flv', | ||||
|             'title': 'Mating Crabs Busted by Sharks', | ||||
|             'description': 'md5:16f25aeffdeba55aaa8ec37e093ad8b3', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         name = url_basename(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, name) | ||||
|         feed_url = self._search_regex(r'data-feed-url="([^"]+)"', webpage, 'feed url') | ||||
|         guid = self._search_regex(r'data-video-guid="([^"]+)"', webpage, 'guid') | ||||
|  | ||||
|         feed = self._download_xml('%s?byGuid=%s' % (feed_url, guid), name) | ||||
|         content = feed.find('.//{http://search.yahoo.com/mrss/}content') | ||||
|         theplatform_id = url_basename(content.attrib.get('url')) | ||||
|  | ||||
|         return self.url_result(smuggle_url( | ||||
|             'http://link.theplatform.com/s/ngs/%s?format=SMIL&formats=MPEG4&manifest=f4m' % theplatform_id, | ||||
|             # For some reason, the normal links don't work and we must force the use of f4m | ||||
|             {'force_smil_url': True})) | ||||
| @@ -18,13 +18,13 @@ class NBCIE(InfoExtractor): | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.nbc.com/chicago-fire/video/i-am-a-firefighter/2734188', | ||||
|             'url': 'http://www.nbc.com/the-tonight-show/segments/112966', | ||||
|             # md5 checksum is not stable | ||||
|             'info_dict': { | ||||
|                 'id': 'bTmnLCvIbaaH', | ||||
|                 'id': 'c9xnCo0YPOPH', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'I Am a Firefighter', | ||||
|                 'description': 'An emergency puts Dawson\'sf irefighter skills to the ultimate test in this four-part digital series.', | ||||
|                 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s', | ||||
|                 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|   | ||||
| @@ -29,6 +29,9 @@ class NetzkinoIE(InfoExtractor): | ||||
|             'timestamp': 1344858571, | ||||
|             'age_limit': 12, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': 'Download only works from Germany', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,6 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     fix_xml_ampersands, | ||||
| @@ -12,7 +11,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NPOBaseIE(SubtitlesInfoExtractor): | ||||
| class NPOBaseIE(InfoExtractor): | ||||
|     def _get_token(self, video_id): | ||||
|         token_page = self._download_webpage( | ||||
|             'http://ida.omroep.nl/npoplayer/i.js', | ||||
| @@ -164,13 +163,10 @@ class NPOIE(NPOBaseIE): | ||||
|  | ||||
|         subtitles = {} | ||||
|         if metadata.get('tt888') == 'ja': | ||||
|             subtitles['nl'] = 'http://e.omroep.nl/tt888/%s' % video_id | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         subtitles = self.extract_subtitles(video_id, subtitles) | ||||
|             subtitles['nl'] = [{ | ||||
|                 'ext': 'vtt', | ||||
|                 'url': 'http://e.omroep.nl/tt888/%s' % video_id, | ||||
|             }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -10,7 +10,6 @@ from ..utils import ( | ||||
|     parse_duration, | ||||
|     unified_strdate, | ||||
| ) | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
|  | ||||
| class NRKIE(InfoExtractor): | ||||
| @@ -73,7 +72,7 @@ class NRKIE(InfoExtractor): | ||||
|         } | ||||
|  | ||||
|  | ||||
| class NRKTVIE(SubtitlesInfoExtractor): | ||||
| class NRKTVIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -156,7 +155,7 @@ class NRKTVIE(SubtitlesInfoExtractor): | ||||
|         if self._downloader.params.get('verbose', False): | ||||
|             self.to_screen('[debug] %s' % txt) | ||||
|  | ||||
|     def _extract_captions(self, subtitlesurl, video_id, baseurl): | ||||
|     def _get_subtitles(self, subtitlesurl, video_id, baseurl): | ||||
|         url = "%s%s" % (baseurl, subtitlesurl) | ||||
|         self._debug_print('%s: Subtitle url: %s' % (video_id, url)) | ||||
|         captions = self._download_xml(url, video_id, 'Downloading subtitles') | ||||
| @@ -170,7 +169,10 @@ class NRKTVIE(SubtitlesInfoExtractor): | ||||
|             endtime = self._seconds2str(begin + duration) | ||||
|             text = '\n'.join(p.itertext()) | ||||
|             srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text) | ||||
|         return {lang: srt} | ||||
|         return {lang: [ | ||||
|             {'ext': 'ttml', 'url': url}, | ||||
|             {'ext': 'srt', 'data': srt}, | ||||
|         ]} | ||||
|  | ||||
|     def _extract_f4m(self, manifest_url, video_id): | ||||
|         return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id) | ||||
| @@ -243,10 +245,7 @@ class NRKTVIE(SubtitlesInfoExtractor): | ||||
|             webpage, 'subtitle URL', default=None) | ||||
|         subtitles = None | ||||
|         if subtitles_url: | ||||
|             subtitles = self._extract_captions(subtitles_url, video_id, baseurl) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|             subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -1,9 +1,6 @@ | ||||
| # encoding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
| @@ -11,7 +8,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class PatreonIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(.+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.patreon.com/creation?hid=743933', | ||||
| @@ -35,6 +32,23 @@ class PatreonIE(InfoExtractor): | ||||
|                 'thumbnail': 're:^https?://.*$', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'https://www.patreon.com/creation?hid=1682498', | ||||
|             'info_dict': { | ||||
|                 'id': 'SU4fj_aEMVw', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'I\'m on Patreon!', | ||||
|                 'uploader': 'TraciJHines', | ||||
|                 'thumbnail': 're:^https?://.*$', | ||||
|                 'upload_date': '20150211', | ||||
|                 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4', | ||||
|                 'uploader_id': 'TraciJHines', | ||||
|             }, | ||||
|             'params': { | ||||
|                 'noplaylist': True, | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     # Currently Patreon exposes download URL via hidden CSS, so login is not | ||||
| @@ -65,26 +79,29 @@ class PatreonIE(InfoExtractor): | ||||
|     ''' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|  | ||||
|         video_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._og_search_title(webpage).strip() | ||||
|  | ||||
|         attach_fn = self._html_search_regex( | ||||
|             r'<div class="attach"><a target="_blank" href="([^"]+)">', | ||||
|             webpage, 'attachment URL', default=None) | ||||
|         embed = self._html_search_regex( | ||||
|             r'<div id="watchCreation">\s*<iframe class="embedly-embed" src="([^"]+)"', | ||||
|             webpage, 'embedded URL', default=None) | ||||
|  | ||||
|         if attach_fn is not None: | ||||
|             video_url = 'http://www.patreon.com' + attach_fn | ||||
|             thumbnail = self._og_search_thumbnail(webpage) | ||||
|             uploader = self._html_search_regex( | ||||
|                 r'<strong>(.*?)</strong> is creating', webpage, 'uploader') | ||||
|         elif embed is not None: | ||||
|             return self.url_result(embed) | ||||
|         else: | ||||
|             playlist_js = self._search_regex( | ||||
|             playlist = self._parse_json(self._search_regex( | ||||
|                 r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])', | ||||
|                 webpage, 'playlist JSON') | ||||
|             playlist_json = js_to_json(playlist_js) | ||||
|             playlist = json.loads(playlist_json) | ||||
|                 webpage, 'playlist JSON'), | ||||
|                 video_id, transform_source=js_to_json) | ||||
|             data = playlist[0] | ||||
|             video_url = self._proto_relative_url(data['mp3']) | ||||
|             thumbnail = self._proto_relative_url(data.get('cover')) | ||||
|   | ||||
| @@ -56,7 +56,7 @@ class PornHubIE(InfoExtractor): | ||||
|  | ||||
|         video_title = self._html_search_regex(r'<h1 [^>]+>([^<]+)', webpage, 'title') | ||||
|         video_uploader = self._html_search_regex( | ||||
|             r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|<span class="username)[^>]+>(.+?)<', | ||||
|             r'(?s)From: .+?<(?:a href="/users/|a href="/channels/|span class="username)[^>]+>(.+?)<', | ||||
|             webpage, 'uploader', fatal=False) | ||||
|         thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) | ||||
|         if thumbnail: | ||||
| @@ -110,3 +110,33 @@ class PornHubIE(InfoExtractor): | ||||
|             'formats': formats, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class PornHubPlaylistIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?pornhub\.com/playlist/(?P<id>\d+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.pornhub.com/playlist/6201671', | ||||
|         'info_dict': { | ||||
|             'id': '6201671', | ||||
|             'title': 'P0p4', | ||||
|         }, | ||||
|         'playlist_mincount': 35, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         playlist_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, playlist_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result('http://www.pornhub.com/%s' % video_url, 'PornHub') | ||||
|             for video_url in set(re.findall('href="/?(view_video\.php\?viewkey=\d+[^"]*)"', webpage)) | ||||
|         ] | ||||
|  | ||||
|         playlist = self._parse_json( | ||||
|             self._search_regex( | ||||
|                 r'playlistObject\s*=\s*({.+?});', webpage, 'playlist'), | ||||
|             playlist_id) | ||||
|  | ||||
|         return self.playlist_result( | ||||
|             entries, playlist_id, playlist.get('title'), playlist.get('description')) | ||||
|   | ||||
							
								
								
									
										88
									
								
								youtube_dl/extractor/r7.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								youtube_dl/extractor/r7.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,88 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     js_to_json, | ||||
|     unescapeHTML, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class R7IE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x)https?:// | ||||
|                         (?: | ||||
|                             (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| | ||||
|                             noticias\.r7\.com(?:/[^/]+)+/[^/]+-| | ||||
|                             player\.r7\.com/video/i/ | ||||
|                         ) | ||||
|                         (?P<id>[\da-f]{24}) | ||||
|                         ''' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', | ||||
|         'md5': '403c4e393617e8e8ddc748978ee8efde', | ||||
|         'info_dict': { | ||||
|             'id': '54e7050b0cf2ff57e0279389', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', | ||||
|             'thumbnail': 're:^https?://.*\.jpg$', | ||||
|             'duration': 98, | ||||
|             'like_count': int, | ||||
|             'view_count': int, | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage( | ||||
|             'http://player.r7.com/video/i/%s' % video_id, video_id) | ||||
|  | ||||
|         item = self._parse_json(js_to_json(self._search_regex( | ||||
|             r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id) | ||||
|  | ||||
|         title = unescapeHTML(item['title']) | ||||
|         thumbnail = item.get('init', {}).get('thumbUri') | ||||
|         duration = None | ||||
|  | ||||
|         statistics = item.get('statistics', {}) | ||||
|         like_count = int_or_none(statistics.get('likes')) | ||||
|         view_count = int_or_none(statistics.get('views')) | ||||
|  | ||||
|         formats = [] | ||||
|         for format_key, format_dict in item['playlist'][0].items(): | ||||
|             src = format_dict.get('src') | ||||
|             if not src: | ||||
|                 continue | ||||
|             format_id = format_dict.get('format') or format_key | ||||
|             if duration is None: | ||||
|                 duration = format_dict.get('duration') | ||||
|             if '.f4m' in src: | ||||
|                 formats.extend(self._extract_f4m_formats(src, video_id, preference=-1)) | ||||
|             elif src.endswith('.m3u8'): | ||||
|                 formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2)) | ||||
|             else: | ||||
|                 formats.append({ | ||||
|                     'url': src, | ||||
|                     'format_id': format_id, | ||||
|                 }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'like_count': like_count, | ||||
|             'view_count': view_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -1,7 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| @@ -10,13 +8,13 @@ class RadioDeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' | ||||
|     _TEST = { | ||||
|         'url': 'http://ndr2.radio.de/', | ||||
|         'md5': '3b4cdd011bc59174596b6145cda474a4', | ||||
|         'info_dict': { | ||||
|             'id': 'ndr2', | ||||
|             'ext': 'mp3', | ||||
|             'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | ||||
|             'description': 'md5:591c49c702db1a33751625ebfb67f273', | ||||
|             'thumbnail': 're:^https?://.*\.png', | ||||
|             'is_live': True, | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
| @@ -25,16 +23,15 @@ class RadioDeIE(InfoExtractor): | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         radio_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, radio_id) | ||||
|         jscode = self._search_regex( | ||||
|             r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n", | ||||
|             webpage, 'broadcast') | ||||
|  | ||||
|         broadcast = json.loads(self._search_regex( | ||||
|             r'_getBroadcast\s*=\s*function\(\s*\)\s*{\s*return\s+({.+?})\s*;\s*}', | ||||
|             webpage, 'broadcast')) | ||||
|  | ||||
|         broadcast = self._parse_json(jscode, radio_id) | ||||
|         title = self._live_title(broadcast['name']) | ||||
|         description = broadcast.get('description') or broadcast.get('shortDescription') | ||||
|         thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') | ||||
|         thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100') | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': stream['streamUrl'], | ||||
|   | ||||
| @@ -2,7 +2,7 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_parse, | ||||
| ) | ||||
| @@ -12,7 +12,7 @@ from ..utils import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| class RaiIE(SubtitlesInfoExtractor): | ||||
| class RaiIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?P<url>http://(?:.+?\.)?(?:rai\.it|rai\.tv|rainews\.it)/dl/.+?-(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})(?:-.+?)?\.html)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
| @@ -89,15 +89,7 @@ class RaiIE(SubtitlesInfoExtractor): | ||||
|                 'ext': 'mp4', | ||||
|             }) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             page = self._download_webpage(url, video_id) | ||||
|             self._list_available_subtitles(video_id, page) | ||||
|             return | ||||
|  | ||||
|         subtitles = {} | ||||
|         if self._have_to_download_any_subtitles: | ||||
|             page = self._download_webpage(url, video_id) | ||||
|             subtitles = self.extract_subtitles(video_id, page) | ||||
|         subtitles = self.extract_subtitles(video_id, url) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -111,7 +103,8 @@ class RaiIE(SubtitlesInfoExtractor): | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|     def _get_subtitles(self, video_id, url): | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         subtitles = {} | ||||
|         m = re.search(r'<meta name="closedcaption" content="(?P<captions>[^"]+)"', webpage) | ||||
|         if m: | ||||
| @@ -120,5 +113,8 @@ class RaiIE(SubtitlesInfoExtractor): | ||||
|             SRT_EXT = '.srt' | ||||
|             if captions.endswith(STL_EXT): | ||||
|                 captions = captions[:-len(STL_EXT)] + SRT_EXT | ||||
|             subtitles['it'] = 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions) | ||||
|             subtitles['it'] = [{ | ||||
|                 'ext': 'srt', | ||||
|                 'url': 'http://www.rai.tv%s' % compat_urllib_parse.quote(captions), | ||||
|             }] | ||||
|         return subtitles | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import re | ||||
| import time | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     struct_unpack, | ||||
|     remove_end, | ||||
| @@ -96,12 +97,14 @@ class RTVEALaCartaIE(InfoExtractor): | ||||
|             ).replace('.net.rtve', '.multimedia.cdn.rtve') | ||||
|             video_path = self._download_webpage( | ||||
|                 auth_url, video_id, 'Getting video url') | ||||
|             # Use mvod.akcdn instead of flash.akamaihd.multimedia.cdn to get | ||||
|             # Use mvod1.akcdn instead of flash.akamaihd.multimedia.cdn to get | ||||
|             # the right Content-Length header and the mp4 format | ||||
|             video_url = ( | ||||
|                 'http://mvod.akcdn.rtve.es/{0}&v=2.6.8' | ||||
|                 '&fp=MAC%2016,0,0,296&r=MRUGG&g=OEOJWFXNFGCP'.format(video_path) | ||||
|             ) | ||||
|             video_url = compat_urlparse.urljoin( | ||||
|                 'http://mvod1.akcdn.rtve.es/', video_path) | ||||
|  | ||||
|         subtitles = None | ||||
|         if info.get('sbtFile') is not None: | ||||
|             subtitles = self.extract_subtitles(video_id, info['sbtFile']) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -109,8 +112,17 @@ class RTVEALaCartaIE(InfoExtractor): | ||||
|             'url': video_url, | ||||
|             'thumbnail': info.get('image'), | ||||
|             'page_url': url, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|     def _get_subtitles(self, video_id, sub_file): | ||||
|         subs = self._download_json( | ||||
|             sub_file + '.json', video_id, | ||||
|             'Downloading subtitles info')['page']['items'] | ||||
|         return dict( | ||||
|             (s['lang'], [{'ext': 'vtt', 'url': s['src']}]) | ||||
|             for s in subs) | ||||
|  | ||||
|  | ||||
| class RTVELiveIE(InfoExtractor): | ||||
|     IE_NAME = 'rtve.es:live' | ||||
|   | ||||
							
								
								
									
										117
									
								
								youtube_dl/extractor/sandia.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								youtube_dl/extractor/sandia.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,117 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import itertools | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
| ) | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     js_to_json, | ||||
|     mimetype2ext, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SandiaIE(InfoExtractor): | ||||
|     IE_DESC = 'Sandia National Laboratories' | ||||
|     _VALID_URL = r'https?://digitalops\.sandia\.gov/Mediasite/Play/(?P<id>[0-9a-f]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d', | ||||
|         'md5': '9422edc9b9a60151727e4b6d8bef393d', | ||||
|         'info_dict': { | ||||
|             'id': '24aace4429fc450fb5b38cdbf424a66e1d', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Xyce Software Training - Section 1', | ||||
|             'description': 're:(?s)SAND Number: SAND 2013-7800.{200,}', | ||||
|             'upload_date': '20120904', | ||||
|             'duration': 7794, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         req = compat_urllib_request.Request(url) | ||||
|         req.add_header('Cookie', 'MediasitePlayerCaps=ClientPlugins=4') | ||||
|         webpage = self._download_webpage(req, video_id) | ||||
|  | ||||
|         js_path = self._search_regex( | ||||
|             r'<script type="text/javascript" src="(/Mediasite/FileServer/Presentation/[^"]+)"', | ||||
|             webpage, 'JS code URL') | ||||
|         js_url = compat_urlparse.urljoin(url, js_path) | ||||
|  | ||||
|         js_code = self._download_webpage( | ||||
|             js_url, video_id, note='Downloading player') | ||||
|  | ||||
|         def extract_str(key, **args): | ||||
|             return self._search_regex( | ||||
|                 r'Mediasite\.PlaybackManifest\.%s\s*=\s*(.+);\s*?\n' % re.escape(key), | ||||
|                 js_code, key, **args) | ||||
|  | ||||
|         def extract_data(key, **args): | ||||
|             data_json = extract_str(key, **args) | ||||
|             if data_json is None: | ||||
|                 return data_json | ||||
|             return self._parse_json( | ||||
|                 data_json, video_id, transform_source=js_to_json) | ||||
|  | ||||
|         formats = [] | ||||
|         for i in itertools.count(): | ||||
|             fd = extract_data('VideoUrls[%d]' % i, default=None) | ||||
|             if fd is None: | ||||
|                 break | ||||
|             formats.append({ | ||||
|                 'format_id': '%s' % i, | ||||
|                 'format_note': fd['MimeType'].partition('/')[2], | ||||
|                 'ext': mimetype2ext(fd['MimeType']), | ||||
|                 'url': fd['Location'], | ||||
|                 'protocol': 'f4m' if fd['MimeType'] == 'video/x-mp4-fragmented' else None, | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         slide_baseurl = compat_urlparse.urljoin( | ||||
|             url, extract_data('SlideBaseUrl')) | ||||
|         slide_template = slide_baseurl + re.sub( | ||||
|             r'\{0:D?([0-9+])\}', r'%0\1d', extract_data('SlideImageFileNameTemplate')) | ||||
|         slides = [] | ||||
|         last_slide_time = 0 | ||||
|         for i in itertools.count(1): | ||||
|             sd = extract_str('Slides[%d]' % i, default=None) | ||||
|             if sd is None: | ||||
|                 break | ||||
|             timestamp = int_or_none(self._search_regex( | ||||
|                 r'^Mediasite\.PlaybackManifest\.CreateSlide\("[^"]*"\s*,\s*([0-9]+),', | ||||
|                 sd, 'slide %s timestamp' % i, fatal=False)) | ||||
|             slides.append({ | ||||
|                 'url': slide_template % i, | ||||
|                 'duration': timestamp - last_slide_time, | ||||
|             }) | ||||
|             last_slide_time = timestamp | ||||
|         formats.append({ | ||||
|             'format_id': 'slides', | ||||
|             'protocol': 'slideshow', | ||||
|             'url': json.dumps(slides), | ||||
|             'preference': -10000,  # Downloader not yet written | ||||
|         }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         title = extract_data('Title') | ||||
|         description = extract_data('Description', fatal=False) | ||||
|         duration = int_or_none(extract_data( | ||||
|             'Duration', fatal=False), scale=1000) | ||||
|         upload_date = unified_strdate(extract_data('AirDate', fatal=False)) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'formats': formats, | ||||
|             'upload_date': upload_date, | ||||
|             'duration': duration, | ||||
|         } | ||||
| @@ -25,7 +25,6 @@ class SockshareIE(InfoExtractor): | ||||
|             'id': '437BE28B89D799D7', | ||||
|             'title': 'big_buck_bunny_720p_surround.avi', | ||||
|             'ext': 'avi', | ||||
|             'thumbnail': 're:^http://.*\.jpg$', | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -45,7 +44,7 @@ class SockshareIE(InfoExtractor): | ||||
|             ''', webpage, 'hash') | ||||
|  | ||||
|         fields = { | ||||
|             "hash": confirm_hash, | ||||
|             "hash": confirm_hash.encode('utf-8'), | ||||
|             "confirm": "Continue as Free User" | ||||
|         } | ||||
|  | ||||
| @@ -68,7 +67,7 @@ class SockshareIE(InfoExtractor): | ||||
|             webpage, 'title', default=None) | ||||
|         thumbnail = self._html_search_regex( | ||||
|             r'<img\s+src="([^"]*)".+?name="bg"', | ||||
|             webpage, 'thumbnail') | ||||
|             webpage, 'thumbnail', default=None) | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|   | ||||
| @@ -7,6 +7,7 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SoundgasmIE(InfoExtractor): | ||||
|     IE_NAME = 'soundgasm' | ||||
|     _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://soundgasm.net/u/ytdl/Piano-sample', | ||||
| @@ -38,3 +39,26 @@ class SoundgasmIE(InfoExtractor): | ||||
|             'title': audio_title, | ||||
|             'description': description | ||||
|         } | ||||
|  | ||||
|  | ||||
| class SoundgasmProfileIE(InfoExtractor): | ||||
|     IE_NAME = 'soundgasm:profile' | ||||
|     _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$' | ||||
|     _TEST = { | ||||
|         'url': 'http://soundgasm.net/u/ytdl', | ||||
|         'info_dict': { | ||||
|             'id': 'ytdl', | ||||
|         }, | ||||
|         'playlist_count': 1, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         profile_id = self._match_id(url) | ||||
|  | ||||
|         webpage = self._download_webpage(url, profile_id) | ||||
|  | ||||
|         entries = [ | ||||
|             self.url_result(audio_url, 'Soundgasm') | ||||
|             for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)] | ||||
|  | ||||
|         return self.playlist_result(entries, profile_id) | ||||
|   | ||||
| @@ -1,99 +0,0 @@ | ||||
| from __future__ import unicode_literals | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..compat import compat_str | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class SubtitlesInfoExtractor(InfoExtractor): | ||||
|     @property | ||||
|     def _have_to_download_any_subtitles(self): | ||||
|         return any([self._downloader.params.get('writesubtitles', False), | ||||
|                     self._downloader.params.get('writeautomaticsub')]) | ||||
|  | ||||
|     def _list_available_subtitles(self, video_id, webpage): | ||||
|         """ outputs the available subtitles for the video """ | ||||
|         sub_lang_list = self._get_available_subtitles(video_id, webpage) | ||||
|         auto_captions_list = self._get_available_automatic_caption(video_id, webpage) | ||||
|         sub_lang = ",".join(list(sub_lang_list.keys())) | ||||
|         self.to_screen('%s: Available subtitles for video: %s' % | ||||
|                        (video_id, sub_lang)) | ||||
|         auto_lang = ",".join(auto_captions_list.keys()) | ||||
|         self.to_screen('%s: Available automatic captions for video: %s' % | ||||
|                        (video_id, auto_lang)) | ||||
|  | ||||
|     def extract_subtitles(self, video_id, webpage): | ||||
|         """ | ||||
|         returns {sub_lang: sub} ,{} if subtitles not found or None if the | ||||
|         subtitles aren't requested. | ||||
|         """ | ||||
|         if not self._have_to_download_any_subtitles: | ||||
|             return None | ||||
|         available_subs_list = {} | ||||
|         if self._downloader.params.get('writeautomaticsub', False): | ||||
|             available_subs_list.update(self._get_available_automatic_caption(video_id, webpage)) | ||||
|         if self._downloader.params.get('writesubtitles', False): | ||||
|             available_subs_list.update(self._get_available_subtitles(video_id, webpage)) | ||||
|  | ||||
|         if not available_subs_list:  # error, it didn't get the available subtitles | ||||
|             return {} | ||||
|         if self._downloader.params.get('allsubtitles', False): | ||||
|             sub_lang_list = available_subs_list | ||||
|         else: | ||||
|             if self._downloader.params.get('subtitleslangs', False): | ||||
|                 requested_langs = self._downloader.params.get('subtitleslangs') | ||||
|             elif 'en' in available_subs_list: | ||||
|                 requested_langs = ['en'] | ||||
|             else: | ||||
|                 requested_langs = [list(available_subs_list.keys())[0]] | ||||
|  | ||||
|             sub_lang_list = {} | ||||
|             for sub_lang in requested_langs: | ||||
|                 if sub_lang not in available_subs_list: | ||||
|                     self._downloader.report_warning('no closed captions found in the specified language "%s"' % sub_lang) | ||||
|                     continue | ||||
|                 sub_lang_list[sub_lang] = available_subs_list[sub_lang] | ||||
|  | ||||
|         subtitles = {} | ||||
|         for sub_lang, url in sub_lang_list.items(): | ||||
|             subtitle = self._request_subtitle_url(sub_lang, url) | ||||
|             if subtitle: | ||||
|                 subtitles[sub_lang] = subtitle | ||||
|         return subtitles | ||||
|  | ||||
|     def _download_subtitle_url(self, sub_lang, url): | ||||
|         return self._download_webpage(url, None, note=False) | ||||
|  | ||||
|     def _request_subtitle_url(self, sub_lang, url): | ||||
|         """ makes the http request for the subtitle """ | ||||
|         try: | ||||
|             sub = self._download_subtitle_url(sub_lang, url) | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning('unable to download video subtitles for %s: %s' % (sub_lang, compat_str(err))) | ||||
|             return | ||||
|         if not sub: | ||||
|             self._downloader.report_warning('Did not fetch video subtitles') | ||||
|             return | ||||
|         return sub | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|         """ | ||||
|         returns {sub_lang: url} or {} if not available | ||||
|         Must be redefined by the subclasses | ||||
|         """ | ||||
|  | ||||
|         # By default, allow implementations to simply pass in the result | ||||
|         assert isinstance(webpage, dict), \ | ||||
|             '_get_available_subtitles not implemented' | ||||
|         return webpage | ||||
|  | ||||
|     def _get_available_automatic_caption(self, video_id, webpage): | ||||
|         """ | ||||
|         returns {sub_lang: url} or {} if not available | ||||
|         Must be redefined by the subclasses that support automatic captions, | ||||
|         otherwise it will return {} | ||||
|         """ | ||||
|         self._downloader.report_warning('Automatic Captions not supported by this server') | ||||
|         return {} | ||||
| @@ -1,8 +1,10 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import qualities | ||||
|  | ||||
|  | ||||
| class TeamcocoIE(InfoExtractor): | ||||
| @@ -24,8 +26,8 @@ class TeamcocoIE(InfoExtractor): | ||||
|             'info_dict': { | ||||
|                 'id': '19705', | ||||
|                 'ext': 'mp4', | ||||
|                 "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.", | ||||
|                 "title": "Louis C.K. Interview Pt. 1 11/3/11", | ||||
|                 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.', | ||||
|                 'title': 'Louis C.K. Interview Pt. 1 11/3/11', | ||||
|                 'age_limit': 0, | ||||
|             } | ||||
|         } | ||||
| @@ -42,42 +44,39 @@ class TeamcocoIE(InfoExtractor): | ||||
|         display_id = mobj.group('display_id') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = mobj.group("video_id") | ||||
|         video_id = mobj.group('video_id') | ||||
|         if not video_id: | ||||
|             video_id = self._html_search_regex( | ||||
|                 self._VIDEO_ID_REGEXES, webpage, 'video id') | ||||
|  | ||||
|         data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id | ||||
|         data = self._download_xml( | ||||
|             data_url, display_id, 'Downloading data webpage') | ||||
|         embed_url = 'http://teamcoco.com/embed/v/%s' % video_id | ||||
|         embed = self._download_webpage( | ||||
|             embed_url, video_id, 'Downloading embed page') | ||||
|  | ||||
|         encoded_data = self._search_regex( | ||||
|             r'"preload"\s*:\s*"([^"]+)"', embed, 'encoded data') | ||||
|         data = self._parse_json( | ||||
|             base64.b64decode(encoded_data.encode('ascii')).decode('utf-8'), video_id) | ||||
|  | ||||
|         qualities = ['500k', '480p', '1000k', '720p', '1080p'] | ||||
|         formats = [] | ||||
|         for filed in data.findall('files/file'): | ||||
|             if filed.attrib.get('playmode') == 'all': | ||||
|                 # it just duplicates one of the entries | ||||
|                 break | ||||
|             file_url = filed.text | ||||
|             m_format = re.search(r'(\d+(k|p))\.mp4', file_url) | ||||
|         get_quality = qualities(['500k', '480p', '1000k', '720p', '1080p']) | ||||
|         for filed in data['files']: | ||||
|             m_format = re.search(r'(\d+(k|p))\.mp4', filed['url']) | ||||
|             if m_format is not None: | ||||
|                 format_id = m_format.group(1) | ||||
|             else: | ||||
|                 format_id = filed.attrib['bitrate'] | ||||
|                 format_id = filed['bitrate'] | ||||
|             tbr = ( | ||||
|                 int(filed.attrib['bitrate']) | ||||
|                 if filed.attrib['bitrate'].isdigit() | ||||
|                 int(filed['bitrate']) | ||||
|                 if filed['bitrate'].isdigit() | ||||
|                 else None) | ||||
|  | ||||
|             try: | ||||
|                 quality = qualities.index(format_id) | ||||
|             except ValueError: | ||||
|                 quality = -1 | ||||
|             formats.append({ | ||||
|                 'url': file_url, | ||||
|                 'url': filed['url'], | ||||
|                 'ext': 'mp4', | ||||
|                 'tbr': tbr, | ||||
|                 'format_id': format_id, | ||||
|                 'quality': quality, | ||||
|                 'quality': get_quality(format_id), | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
| @@ -86,8 +85,8 @@ class TeamcocoIE(InfoExtractor): | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'formats': formats, | ||||
|             'title': self._og_search_title(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'title': data['title'], | ||||
|             'thumbnail': data.get('thumb', {}).get('href'), | ||||
|             'description': data.get('teaser'), | ||||
|             'age_limit': self._family_friendly_search(webpage), | ||||
|         } | ||||
|   | ||||
| @@ -3,14 +3,14 @@ from __future__ import unicode_literals | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
|  | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TEDIE(SubtitlesInfoExtractor): | ||||
| class TEDIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?P<proto>https?://) | ||||
|         (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/ | ||||
| @@ -83,6 +83,22 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }, { | ||||
|         # YouTube video | ||||
|         'url': 'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond', | ||||
|         'add_ie': ['Youtube'], | ||||
|         'info_dict': { | ||||
|             'id': 'aFBIPO-P7LM', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'The hidden power of siblings: Jeff Kluger at TEDxAsheville', | ||||
|             'description': 'md5:3d7a4f50d95ca5dd67104e2a20f43fe1', | ||||
|             'uploader': 'TEDx Talks', | ||||
|             'uploader_id': 'TEDxTalks', | ||||
|             'upload_date': '20111216', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     _NATIVE_FORMATS = { | ||||
| @@ -132,11 +148,16 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|         talk_info = self._extract_info(webpage)['talks'][0] | ||||
|  | ||||
|         if talk_info.get('external') is not None: | ||||
|             self.to_screen('Found video from %s' % talk_info['external']['service']) | ||||
|         external = talk_info.get('external') | ||||
|         if external: | ||||
|             service = external['service'] | ||||
|             self.to_screen('Found video from %s' % service) | ||||
|             ext_url = None | ||||
|             if service.lower() == 'youtube': | ||||
|                 ext_url = external.get('code') | ||||
|             return { | ||||
|                 '_type': 'url', | ||||
|                 'url': talk_info['external']['uri'], | ||||
|                 'url': ext_url or external['uri'], | ||||
|             } | ||||
|  | ||||
|         formats = [{ | ||||
| @@ -163,11 +184,6 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         video_id = compat_str(talk_info['id']) | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, talk_info) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, talk_info) | ||||
|             return | ||||
|  | ||||
|         thumbnail = talk_info['thumb'] | ||||
|         if not thumbnail.startswith('http'): | ||||
| @@ -178,21 +194,25 @@ class TEDIE(SubtitlesInfoExtractor): | ||||
|             'uploader': talk_info['speaker'], | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'subtitles': video_subtitles, | ||||
|             'subtitles': self._get_subtitles(video_id, talk_info), | ||||
|             'formats': formats, | ||||
|             'duration': talk_info.get('duration'), | ||||
|         } | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, talk_info): | ||||
|     def _get_subtitles(self, video_id, talk_info): | ||||
|         languages = [lang['languageCode'] for lang in talk_info.get('languages', [])] | ||||
|         if languages: | ||||
|             sub_lang_list = {} | ||||
|             for l in languages: | ||||
|                 url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) | ||||
|                 sub_lang_list[l] = url | ||||
|                 sub_lang_list[l] = [ | ||||
|                     { | ||||
|                         'url': 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/%s' % (video_id, l, ext), | ||||
|                         'ext': ext, | ||||
|                     } | ||||
|                     for ext in ['ted', 'srt'] | ||||
|                 ] | ||||
|             return sub_lang_list | ||||
|         else: | ||||
|             self._downloader.report_warning('video doesn\'t have subtitles') | ||||
|             return {} | ||||
|  | ||||
|     def _watch_info(self, url, name): | ||||
|   | ||||
| @@ -4,11 +4,10 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class TheOnionIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?x)https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<article_id>[0-9]+)/?' | ||||
|     _VALID_URL = r'https?://(?:www\.)?theonion\.com/video/[^,]+,(?P<id>[0-9]+)/?' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.theonion.com/video/man-wearing-mm-jacket-gods-image,36918/', | ||||
|         'md5': '19eaa9a39cf9b9804d982e654dc791ee', | ||||
| @@ -22,10 +21,8 @@ class TheOnionIE(InfoExtractor): | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         article_id = mobj.group('article_id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, article_id) | ||||
|         display_id = self._match_id(url) | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         video_id = self._search_regex( | ||||
|             r'"videoId":\s(\d+),', webpage, 'video ID') | ||||
| @@ -34,10 +31,6 @@ class TheOnionIE(InfoExtractor): | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         sources = re.findall(r'<source src="([^"]+)" type="([^"]+)"', webpage) | ||||
|         if not sources: | ||||
|             raise ExtractorError( | ||||
|                 'No sources found for video %s' % video_id, expected=True) | ||||
|  | ||||
|         formats = [] | ||||
|         for src, type_ in sources: | ||||
|             if type_ == 'video/mp4': | ||||
| @@ -54,15 +47,15 @@ class TheOnionIE(InfoExtractor): | ||||
|                 }) | ||||
|             elif type_ == 'application/x-mpegURL': | ||||
|                 formats.extend( | ||||
|                     self._extract_m3u8_formats(src, video_id, preference=-1)) | ||||
|                     self._extract_m3u8_formats(src, display_id, preference=-1)) | ||||
|             else: | ||||
|                 self.report_warning( | ||||
|                     'Encountered unexpected format: %s' % type_) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': display_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': thumbnail, | ||||
|   | ||||
| @@ -8,7 +8,7 @@ import binascii | ||||
| import hashlib | ||||
|  | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_str, | ||||
| ) | ||||
| @@ -22,7 +22,7 @@ from ..utils import ( | ||||
| _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language'}) | ||||
|  | ||||
|  | ||||
| class ThePlatformIE(SubtitlesInfoExtractor): | ||||
| class ThePlatformIE(InfoExtractor): | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/ | ||||
|            (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)? | ||||
| @@ -71,7 +71,9 @@ class ThePlatformIE(SubtitlesInfoExtractor): | ||||
|         if not provider_id: | ||||
|             provider_id = 'dJ5BDC' | ||||
|  | ||||
|         if mobj.group('config'): | ||||
|         if smuggled_data.get('force_smil_url', False): | ||||
|             smil_url = url | ||||
|         elif mobj.group('config'): | ||||
|             config_url = url + '&form=json' | ||||
|             config_url = config_url.replace('swf/', 'config/') | ||||
|             config_url = config_url.replace('onsite/', 'onsite/config/') | ||||
| @@ -104,15 +106,11 @@ class ThePlatformIE(SubtitlesInfoExtractor): | ||||
|         captions = info.get('captions') | ||||
|         if isinstance(captions, list): | ||||
|             for caption in captions: | ||||
|                 lang, src = caption.get('lang'), caption.get('src') | ||||
|                 if lang and src: | ||||
|                     subtitles[lang] = src | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         subtitles = self.extract_subtitles(video_id, subtitles) | ||||
|                 lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type') | ||||
|                 subtitles[lang] = [{ | ||||
|                     'ext': 'srt' if mime == 'text/srt' else 'ttml', | ||||
|                     'url': src, | ||||
|                 }] | ||||
|  | ||||
|         head = meta.find(_x('smil:head')) | ||||
|         body = meta.find(_x('smil:body')) | ||||
|   | ||||
							
								
								
									
										100
									
								
								youtube_dl/extractor/tv4.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										100
									
								
								youtube_dl/extractor/tv4.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,100 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     parse_iso8601, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TV4IE(InfoExtractor): | ||||
|     IE_DESC = 'tv4.se and tv4play.se' | ||||
|     _VALID_URL = r'''(?x)https?://(?:www\.)? | ||||
|         (?: | ||||
|             tv4\.se/(?:[^/]+)/klipp/(?:.*)-| | ||||
|             tv4play\.se/ | ||||
|             (?: | ||||
|                 (?:program|barn)/(?:[^\?]+)\?video_id=| | ||||
|                 iframe/video/| | ||||
|                 film/| | ||||
|                 sport/| | ||||
|             ) | ||||
|         )(?P<id>[0-9]+)''' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650', | ||||
|             'md5': '909d6454b87b10a25aa04c4bdd416a9b', | ||||
|             'info_dict': { | ||||
|                 'id': '2491650', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Kalla Fakta 5 (english subtitles)', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'timestamp': int, | ||||
|                 'upload_date': '20131125', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tv4play.se/iframe/video/3054113', | ||||
|             'md5': '77f851c55139ffe0ebd41b6a5552489b', | ||||
|             'info_dict': { | ||||
|                 'id': '3054113', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.', | ||||
|                 'timestamp': int, | ||||
|                 'upload_date': '20150130', | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tv4play.se/sport/3060959', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tv4play.se/film/2378136', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         video_id = self._match_id(url) | ||||
|  | ||||
|         info = self._download_json( | ||||
|             'http://www.tv4play.se/player/assets/%s.json' % video_id, video_id, 'Downloading video info JSON') | ||||
|  | ||||
|         # If is_geo_restricted is true, it doesn't neceserally mean we can't download it | ||||
|         if info['is_geo_restricted']: | ||||
|             self.report_warning('This content might not be available in your country due to licensing restrictions.') | ||||
|         if info['requires_subscription']: | ||||
|             raise ExtractorError('This content requires subscription.', expected=True) | ||||
|  | ||||
|         sources_data = self._download_json( | ||||
|             'https://prima.tv4play.se/api/web/asset/%s/play.json?protocol=http&videoFormat=MP4' % video_id, video_id, 'Downloading sources JSON') | ||||
|         sources = sources_data['playback'] | ||||
|  | ||||
|         formats = [] | ||||
|         for item in sources.get('items', {}).get('item', []): | ||||
|             ext, bitrate = item['mediaFormat'], item['bitrate'] | ||||
|             formats.append({ | ||||
|                 'format_id': '%s_%s' % (ext, bitrate), | ||||
|                 'tbr': bitrate, | ||||
|                 'ext': ext, | ||||
|                 'url': item['url'], | ||||
|             }) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': info['title'], | ||||
|             'formats': formats, | ||||
|             'description': info.get('description'), | ||||
|             'timestamp': parse_iso8601(info.get('broadcast_date_time')), | ||||
|             'duration': info.get('duration'), | ||||
|             'thumbnail': info.get('image'), | ||||
|             'is_live': sources.get('live'), | ||||
|         } | ||||
| @@ -349,6 +349,13 @@ class TwitchStreamIE(TwitchBaseIE): | ||||
|             % (self._USHER_BASE, channel_id, compat_urllib_parse.urlencode(query).encode('utf-8')), | ||||
|             channel_id, 'mp4') | ||||
|  | ||||
|         # prefer the 'source' stream, the others are limited to 30 fps | ||||
|         def _sort_source(f): | ||||
|             if f.get('m3u8_media') is not None and f['m3u8_media'].get('NAME') == 'Source': | ||||
|                 return 1 | ||||
|             return 0 | ||||
|         formats = sorted(formats, key=_sort_source) | ||||
|  | ||||
|         view_count = stream.get('viewers') | ||||
|         timestamp = parse_iso8601(stream.get('created_at')) | ||||
|  | ||||
|   | ||||
| @@ -49,15 +49,31 @@ class VideoLecturesNetIE(InfoExtractor): | ||||
|         thumbnail = ( | ||||
|             None if thumbnail_el is None else thumbnail_el.attrib.get('src')) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': v.attrib['src'], | ||||
|             'width': int_or_none(v.attrib.get('width')), | ||||
|             'height': int_or_none(v.attrib.get('height')), | ||||
|             'filesize': int_or_none(v.attrib.get('size')), | ||||
|             'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0, | ||||
|             'ext': v.attrib.get('ext'), | ||||
|         } for v in switch.findall('./video') | ||||
|             if v.attrib.get('proto') == 'http'] | ||||
|         formats = [] | ||||
|         for v in switch.findall('./video'): | ||||
|             proto = v.attrib.get('proto') | ||||
|             if proto not in ['http', 'rtmp']: | ||||
|                 continue | ||||
|             f = { | ||||
|                 'width': int_or_none(v.attrib.get('width')), | ||||
|                 'height': int_or_none(v.attrib.get('height')), | ||||
|                 'filesize': int_or_none(v.attrib.get('size')), | ||||
|                 'tbr': int_or_none(v.attrib.get('systemBitrate')) / 1000.0, | ||||
|                 'ext': v.attrib.get('ext'), | ||||
|             } | ||||
|             src = v.attrib['src'] | ||||
|             if proto == 'http': | ||||
|                 if self._is_valid_url(src, video_id): | ||||
|                     f['url'] = src | ||||
|                     formats.append(f) | ||||
|             elif proto == 'rtmp': | ||||
|                 f.update({ | ||||
|                     'url': v.attrib['streamer'], | ||||
|                     'play_path': src, | ||||
|                     'rtmp_real_time': True, | ||||
|                 }) | ||||
|                 formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|   | ||||
| @@ -2,16 +2,17 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from ..compat import compat_urlparse | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
|     US_RATINGS, | ||||
| ) | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class VikiIE(SubtitlesInfoExtractor): | ||||
| class VikiIE(InfoExtractor): | ||||
|     IE_NAME = 'viki' | ||||
|  | ||||
|     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' | ||||
| @@ -69,9 +70,6 @@ class VikiIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, info_webpage) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, info_webpage) | ||||
|             return | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -85,12 +83,15 @@ class VikiIE(SubtitlesInfoExtractor): | ||||
|             'upload_date': upload_date, | ||||
|         } | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, info_webpage): | ||||
|     def _get_subtitles(self, video_id, info_webpage): | ||||
|         res = {} | ||||
|         for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage): | ||||
|         for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage): | ||||
|             sturl = unescapeHTML(sturl_html) | ||||
|             m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl) | ||||
|             if not m: | ||||
|                 continue | ||||
|             res[m.group('lang')] = sturl | ||||
|             res[m.group('lang')] = [{ | ||||
|                 'url': compat_urlparse.urljoin('http://www.viki.com', sturl), | ||||
|                 'ext': 'vtt', | ||||
|             }] | ||||
|         return res | ||||
|   | ||||
| @@ -4,9 +4,9 @@ from __future__ import unicode_literals | ||||
| import json | ||||
| import re | ||||
| import itertools | ||||
| import hashlib | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..compat import ( | ||||
|     compat_HTTPError, | ||||
|     compat_urllib_parse, | ||||
| @@ -18,6 +18,7 @@ from ..utils import ( | ||||
|     InAdvancePagedList, | ||||
|     int_or_none, | ||||
|     RegexNotFoundError, | ||||
|     smuggle_url, | ||||
|     std_headers, | ||||
|     unsmuggle_url, | ||||
|     urlencode_postdata, | ||||
| @@ -51,7 +52,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): | ||||
|         self._download_webpage(login_request, None, False, 'Wrong login info') | ||||
|  | ||||
|  | ||||
| class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
| class VimeoIE(VimeoBaseInfoExtractor): | ||||
|     """Information extractor for vimeo.com.""" | ||||
|  | ||||
|     # _VALID_URL matches Vimeo URLs | ||||
| @@ -174,7 +175,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     def _verify_video_password(self, url, video_id, webpage): | ||||
|         password = self._downloader.params.get('videopassword', None) | ||||
|         if password is None: | ||||
|             raise ExtractorError('This video is protected by a password, use the --video-password option') | ||||
|             raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) | ||||
|         token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') | ||||
|         data = compat_urllib_parse.urlencode({ | ||||
|             'password': password, | ||||
| @@ -224,6 +225,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         if mobj.group('pro') or mobj.group('player'): | ||||
|             url = 'http://player.vimeo.com/video/' + video_id | ||||
|  | ||||
|         password = self._downloader.params.get('videopassword', None) | ||||
|         if password: | ||||
|             headers['Cookie'] = '%s_password=%s' % ( | ||||
|                 video_id, hashlib.md5(password.encode('utf-8')).hexdigest()) | ||||
|  | ||||
|         # Retrieve video webpage to extract further information | ||||
|         request = compat_urllib_request.Request(url, None, headers) | ||||
|         try: | ||||
| @@ -267,8 +273,11 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                 raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') | ||||
|  | ||||
|             if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None: | ||||
|                 if data and '_video_password_verified' in data: | ||||
|                     raise ExtractorError('video password verification failed!') | ||||
|                 self._verify_video_password(url, video_id, webpage) | ||||
|                 return self._real_extract(url) | ||||
|                 return self._real_extract( | ||||
|                     smuggle_url(url, {'_video_password_verified': 'verified'})) | ||||
|             else: | ||||
|                 raise ExtractorError('Unable to extract info section', | ||||
|                                      cause=e) | ||||
| @@ -368,12 +377,10 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         text_tracks = config['request'].get('text_tracks') | ||||
|         if text_tracks: | ||||
|             for tt in text_tracks: | ||||
|                 subtitles[tt['lang']] = 'http://vimeo.com' + tt['url'] | ||||
|  | ||||
|         video_subtitles = self.extract_subtitles(video_id, subtitles) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|                 subtitles[tt['lang']] = [{ | ||||
|                     'ext': 'vtt', | ||||
|                     'url': 'http://vimeo.com' + tt['url'], | ||||
|                 }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
| @@ -389,7 +396,7 @@ class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             'view_count': view_count, | ||||
|             'like_count': like_count, | ||||
|             'comment_count': comment_count, | ||||
|             'subtitles': video_subtitles, | ||||
|             'subtitles': subtitles, | ||||
|         } | ||||
|  | ||||
|  | ||||
| @@ -401,6 +408,7 @@ class VimeoChannelIE(InfoExtractor): | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://vimeo.com/channels/tributes', | ||||
|         'info_dict': { | ||||
|             'id': 'tributes', | ||||
|             'title': 'Vimeo Tributes', | ||||
|         }, | ||||
|         'playlist_mincount': 25, | ||||
| @@ -479,6 +487,7 @@ class VimeoUserIE(VimeoChannelIE): | ||||
|         'url': 'http://vimeo.com/nkistudio/videos', | ||||
|         'info_dict': { | ||||
|             'title': 'Nki', | ||||
|             'id': 'nkistudio', | ||||
|         }, | ||||
|         'playlist_mincount': 66, | ||||
|     }] | ||||
| @@ -496,6 +505,7 @@ class VimeoAlbumIE(VimeoChannelIE): | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://vimeo.com/album/2632481', | ||||
|         'info_dict': { | ||||
|             'id': '2632481', | ||||
|             'title': 'Staff Favorites: November 2013', | ||||
|         }, | ||||
|         'playlist_mincount': 13, | ||||
| @@ -526,6 +536,7 @@ class VimeoGroupsIE(VimeoAlbumIE): | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://vimeo.com/groups/rolexawards', | ||||
|         'info_dict': { | ||||
|             'id': 'rolexawards', | ||||
|             'title': 'Rolex Awards for Enterprise', | ||||
|         }, | ||||
|         'playlist_mincount': 73, | ||||
| @@ -608,6 +619,7 @@ class VimeoLikesIE(InfoExtractor): | ||||
|         'url': 'https://vimeo.com/user755559/likes/', | ||||
|         'playlist_mincount': 293, | ||||
|         "info_dict": { | ||||
|             'id': 'user755559_likes', | ||||
|             "description": "See all the videos urza likes", | ||||
|             "title": 'Videos urza likes', | ||||
|         }, | ||||
|   | ||||
| @@ -217,6 +217,9 @@ class VKUserVideosIE(InfoExtractor): | ||||
|     _TEMPLATE_URL = 'https://vk.com/videos' | ||||
|     _TEST = { | ||||
|         'url': 'http://vk.com/videos205387401', | ||||
|         'info_dict': { | ||||
|             'id': '205387401', | ||||
|         }, | ||||
|         'playlist_mincount': 4, | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -3,14 +3,14 @@ from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     xpath_text, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class WallaIE(SubtitlesInfoExtractor): | ||||
| class WallaIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one', | ||||
| @@ -52,13 +52,10 @@ class WallaIE(SubtitlesInfoExtractor): | ||||
|         subtitles = {} | ||||
|         for subtitle in item.findall('./subtitles/subtitle'): | ||||
|             lang = xpath_text(subtitle, './title') | ||||
|             subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = xpath_text(subtitle, './src') | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         subtitles = self.extract_subtitles(video_id, subtitles) | ||||
|             subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ | ||||
|                 'ext': 'srt', | ||||
|                 'url': xpath_text(subtitle, './src'), | ||||
|             }] | ||||
|  | ||||
|         formats = [] | ||||
|         for quality in item.findall('./qualities/quality'): | ||||
|   | ||||
| @@ -45,19 +45,17 @@ class WebOfStoriesIE(InfoExtractor): | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|  | ||||
|         story_filename = self._search_regex( | ||||
|             r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename') | ||||
|         speaker_id = self._search_regex( | ||||
|             r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID') | ||||
|         story_id = self._search_regex( | ||||
|             r'\.storyId\((\d+)\)', webpage, 'story ID') | ||||
|         speaker_type = self._search_regex( | ||||
|             r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type') | ||||
|         great_life = self._search_regex( | ||||
|             r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story') | ||||
|         embed_params = [s.strip(" \r\n\t'") for s in self._search_regex( | ||||
|             r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)', | ||||
|             webpage, 'embed params').split(',')] | ||||
|  | ||||
|         ( | ||||
|             _, speaker_id, story_id, story_duration, | ||||
|             speaker_type, great_life, _thumbnail, _has_subtitles, | ||||
|             story_filename, _story_order) = embed_params | ||||
|  | ||||
|         is_great_life_series = great_life == 'true' | ||||
|         duration = int_or_none(self._search_regex( | ||||
|             r'\.duration\((\d+)\)', webpage, 'duration', fatal=False)) | ||||
|         duration = int_or_none(story_duration) | ||||
|  | ||||
|         # URL building, see: http://www.webofstories.com/scripts/player.js | ||||
|         ms_prefix = '' | ||||
|   | ||||
| @@ -18,8 +18,8 @@ class WSJIE(InfoExtractor): | ||||
|             'id': '1BD01A4C-BFE8-40A5-A42F-8A8AF9898B1A', | ||||
|             'ext': 'mp4', | ||||
|             'upload_date': '20150202', | ||||
|             'uploader_id': 'bbright', | ||||
|             'creator': 'bbright', | ||||
|             'uploader_id': 'jdesai', | ||||
|             'creator': 'jdesai', | ||||
|             'categories': list,  # a long list | ||||
|             'duration': 90, | ||||
|             'title': 'Bills Coach Rex Ryan Updates His Old Jets Tattoo', | ||||
|   | ||||
| @@ -22,7 +22,7 @@ class XTubeIE(InfoExtractor): | ||||
|             'id': 'kVTUy_G222_', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'strange erotica', | ||||
|             'description': 'http://www.xtube.com an ET kind of thing', | ||||
|             'description': 'contains:an ET kind of thing', | ||||
|             'uploader': 'greenshowers', | ||||
|             'duration': 450, | ||||
|             'age_limit': 18, | ||||
|   | ||||
| @@ -24,7 +24,6 @@ class YahooIE(InfoExtractor): | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', | ||||
|             'md5': '4962b075c08be8690a922ee026d05e69', | ||||
|             'info_dict': { | ||||
|                 'id': '2d25e626-2378-391f-ada0-ddaf1417e588', | ||||
|                 'ext': 'mp4', | ||||
|   | ||||
| @@ -11,7 +11,6 @@ import time | ||||
| import traceback | ||||
|  | ||||
| from .common import InfoExtractor, SearchInfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from ..jsinterp import JSInterpreter | ||||
| from ..swfinterp import SWFInterpreter | ||||
| from ..compat import ( | ||||
| @@ -185,7 +184,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | ||||
|             return | ||||
|  | ||||
|  | ||||
| class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
| class YoutubeIE(YoutubeBaseInfoExtractor): | ||||
|     IE_DESC = 'YouTube.com' | ||||
|     _VALID_URL = r"""(?x)^ | ||||
|                      ( | ||||
| @@ -541,26 +540,30 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         if cache_spec is not None: | ||||
|             return lambda s: ''.join(s[i] for i in cache_spec) | ||||
|  | ||||
|         download_note = ( | ||||
|             'Downloading player %s' % player_url | ||||
|             if self._downloader.params.get('verbose') else | ||||
|             'Downloading %s player %s' % (player_type, player_id) | ||||
|         ) | ||||
|         if player_type == 'js': | ||||
|             code = self._download_webpage( | ||||
|                 player_url, video_id, | ||||
|                 note='Downloading %s player %s' % (player_type, player_id), | ||||
|                 note=download_note, | ||||
|                 errnote='Download of %s failed' % player_url) | ||||
|             res = self._parse_sig_js(code) | ||||
|         elif player_type == 'swf': | ||||
|             urlh = self._request_webpage( | ||||
|                 player_url, video_id, | ||||
|                 note='Downloading %s player %s' % (player_type, player_id), | ||||
|                 note=download_note, | ||||
|                 errnote='Download of %s failed' % player_url) | ||||
|             code = urlh.read() | ||||
|             res = self._parse_sig_swf(code) | ||||
|         else: | ||||
|             assert False, 'Invalid player type %r' % player_type | ||||
|  | ||||
|         if cache_spec is None: | ||||
|             test_string = ''.join(map(compat_chr, range(len(example_sig)))) | ||||
|             cache_res = res(test_string) | ||||
|             cache_spec = [ord(c) for c in cache_res] | ||||
|         test_string = ''.join(map(compat_chr, range(len(example_sig)))) | ||||
|         cache_res = res(test_string) | ||||
|         cache_spec = [ord(c) for c in cache_res] | ||||
|  | ||||
|         self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec) | ||||
|         return res | ||||
| @@ -644,7 +647,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             raise ExtractorError( | ||||
|                 'Signature extraction failed: ' + tb, cause=e) | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|     def _get_subtitles(self, video_id, webpage): | ||||
|         try: | ||||
|             subs_doc = self._download_xml( | ||||
|                 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, | ||||
| @@ -658,23 +661,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             lang = track.attrib['lang_code'] | ||||
|             if lang in sub_lang_list: | ||||
|                 continue | ||||
|             params = compat_urllib_parse.urlencode({ | ||||
|                 'lang': lang, | ||||
|                 'v': video_id, | ||||
|                 'fmt': self._downloader.params.get('subtitlesformat', 'srt'), | ||||
|                 'name': track.attrib['name'].encode('utf-8'), | ||||
|             }) | ||||
|             url = 'https://www.youtube.com/api/timedtext?' + params | ||||
|             sub_lang_list[lang] = url | ||||
|             sub_formats = [] | ||||
|             for ext in ['sbv', 'vtt', 'srt']: | ||||
|                 params = compat_urllib_parse.urlencode({ | ||||
|                     'lang': lang, | ||||
|                     'v': video_id, | ||||
|                     'fmt': ext, | ||||
|                     'name': track.attrib['name'].encode('utf-8'), | ||||
|                 }) | ||||
|                 sub_formats.append({ | ||||
|                     'url': 'https://www.youtube.com/api/timedtext?' + params, | ||||
|                     'ext': ext, | ||||
|                 }) | ||||
|             sub_lang_list[lang] = sub_formats | ||||
|         if not sub_lang_list: | ||||
|             self._downloader.report_warning('video doesn\'t have subtitles') | ||||
|             return {} | ||||
|         return sub_lang_list | ||||
|  | ||||
|     def _get_available_automatic_caption(self, video_id, webpage): | ||||
|     def _get_automatic_captions(self, video_id, webpage): | ||||
|         """We need the webpage for getting the captions url, pass it as an | ||||
|            argument to speed up the process.""" | ||||
|         sub_format = self._downloader.params.get('subtitlesformat', 'srt') | ||||
|         self.to_screen('%s: Looking for automatic captions' % video_id) | ||||
|         mobj = re.search(r';ytplayer.config = ({.*?});', webpage) | ||||
|         err_msg = 'Couldn\'t find automatic captions for %s' % video_id | ||||
| @@ -704,14 +711,20 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             sub_lang_list = {} | ||||
|             for lang_node in caption_list.findall('target'): | ||||
|                 sub_lang = lang_node.attrib['lang_code'] | ||||
|                 params = compat_urllib_parse.urlencode({ | ||||
|                     'lang': original_lang, | ||||
|                     'tlang': sub_lang, | ||||
|                     'fmt': sub_format, | ||||
|                     'ts': timestamp, | ||||
|                     'kind': caption_kind, | ||||
|                 }) | ||||
|                 sub_lang_list[sub_lang] = caption_url + '&' + params | ||||
|                 sub_formats = [] | ||||
|                 for ext in ['sbv', 'vtt', 'srt']: | ||||
|                     params = compat_urllib_parse.urlencode({ | ||||
|                         'lang': original_lang, | ||||
|                         'tlang': sub_lang, | ||||
|                         'fmt': ext, | ||||
|                         'ts': timestamp, | ||||
|                         'kind': caption_kind, | ||||
|                     }) | ||||
|                     sub_formats.append({ | ||||
|                         'url': caption_url + '&' + params, | ||||
|                         'ext': ext, | ||||
|                     }) | ||||
|                 sub_lang_list[sub_lang] = sub_formats | ||||
|             return sub_lang_list | ||||
|         # An extractor error can be raise by the download process if there are | ||||
|         # no automatic captions but there are subtitles | ||||
| @@ -966,10 +979,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, video_webpage) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, video_webpage) | ||||
|             return | ||||
|         automatic_captions = self.extract_automatic_captions(video_id, video_webpage) | ||||
|  | ||||
|         if 'length_seconds' not in video_info: | ||||
|             self._downloader.report_warning('unable to extract video duration') | ||||
| @@ -1118,6 +1128,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|             'description': video_description, | ||||
|             'categories': video_categories, | ||||
|             'subtitles': video_subtitles, | ||||
|             'automatic_captions': automatic_captions, | ||||
|             'duration': video_duration, | ||||
|             'age_limit': 18 if age_gate else 0, | ||||
|             'annotations': video_annotations, | ||||
| @@ -1142,13 +1153,13 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|                         |  p/ | ||||
|                         ) | ||||
|                         ( | ||||
|                             (?:PL|LL|EC|UU|FL|RD)?[0-9A-Za-z-_]{10,} | ||||
|                             (?:PL|LL|EC|UU|FL|RD|UL)?[0-9A-Za-z-_]{10,} | ||||
|                             # Top tracks, they can also include dots | ||||
|                             |(?:MC)[\w\.]* | ||||
|                         ) | ||||
|                         .* | ||||
|                      | | ||||
|                         ((?:PL|LL|EC|UU|FL|RD)[0-9A-Za-z-_]{10,}) | ||||
|                         ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,}) | ||||
|                      )""" | ||||
|     _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' | ||||
|     _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&[^"]*?index=(?P<index>\d+)' | ||||
| @@ -1233,7 +1244,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|             for vid_id in ids] | ||||
|  | ||||
|     def _extract_mix(self, playlist_id): | ||||
|         # The mixes are generated from a a single video | ||||
|         # The mixes are generated from a single video | ||||
|         # the id of the playlist is just 'RD' + video_id | ||||
|         url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) | ||||
|         webpage = self._download_webpage( | ||||
| @@ -1269,7 +1280,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): | ||||
|             else: | ||||
|                 self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | ||||
|  | ||||
|         if playlist_id.startswith('RD'): | ||||
|         if playlist_id.startswith('RD') or playlist_id.startswith('UL'): | ||||
|             # Mixes require a custom extraction process | ||||
|             return self._extract_mix(playlist_id) | ||||
|  | ||||
|   | ||||
							
								
								
									
										110
									
								
								youtube_dl/extractor/zapiks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								youtube_dl/extractor/zapiks.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,110 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     parse_duration, | ||||
|     parse_iso8601, | ||||
|     xpath_with_ns, | ||||
|     xpath_text, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ZapiksIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html', | ||||
|             'md5': 'aeb3c473b2d564b2d46d664d28d5f050', | ||||
|             'info_dict': { | ||||
|                 'id': '80798', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!', | ||||
|                 'description': 'md5:7054d6f6f620c6519be1fe710d4da847', | ||||
|                 'thumbnail': 're:^https?://.*\.jpg$', | ||||
|                 'duration': 528, | ||||
|                 'timestamp': 1359044972, | ||||
|                 'upload_date': '20130124', | ||||
|                 'view_count': int, | ||||
|                 'comment_count': int, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.zapiks.fr/index.php?action=playerIframe&media_id=118046&width=640&height=360&autoStart=false&language=fr', | ||||
|             'only_matching': True, | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         display_id = mobj.group('display_id') or video_id | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         if not video_id: | ||||
|             video_id = self._search_regex( | ||||
|                 r'data-media-id="(\d+)"', webpage, 'video id') | ||||
|  | ||||
|         playlist = self._download_xml( | ||||
|             'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id, | ||||
|             display_id) | ||||
|  | ||||
|         NS_MAP = { | ||||
|             'jwplayer': 'http://rss.jwpcdn.com/' | ||||
|         } | ||||
|  | ||||
|         def ns(path): | ||||
|             return xpath_with_ns(path, NS_MAP) | ||||
|  | ||||
|         item = playlist.find('./channel/item') | ||||
|  | ||||
|         title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage) | ||||
|         description = self._og_search_description(webpage, default=None) | ||||
|         thumbnail = xpath_text( | ||||
|             item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None) | ||||
|         duration = parse_duration(self._html_search_meta( | ||||
|             'duration', webpage, 'duration', default=None)) | ||||
|         timestamp = parse_iso8601(self._html_search_meta( | ||||
|             'uploadDate', webpage, 'upload date', default=None), ' ') | ||||
|  | ||||
|         view_count = int_or_none(self._search_regex( | ||||
|             r'UserPlays:(\d+)', webpage, 'view count', default=None)) | ||||
|         comment_count = int_or_none(self._search_regex( | ||||
|             r'UserComments:(\d+)', webpage, 'comment count', default=None)) | ||||
|  | ||||
|         formats = [] | ||||
|         for source in item.findall(ns('./jwplayer:source')): | ||||
|             format_id = source.attrib['label'] | ||||
|             f = { | ||||
|                 'url': source.attrib['file'], | ||||
|                 'format_id': format_id, | ||||
|             } | ||||
|             m = re.search(r'^(?P<height>\d+)[pP]', format_id) | ||||
|             if m: | ||||
|                 f['height'] = int(m.group('height')) | ||||
|             formats.append(f) | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'timestamp': timestamp, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -30,13 +30,10 @@ class JSInterpreter(object): | ||||
|     def __init__(self, code, objects=None): | ||||
|         if objects is None: | ||||
|             objects = {} | ||||
|         self.code = self._remove_comments(code) | ||||
|         self.code = code | ||||
|         self._functions = {} | ||||
|         self._objects = objects | ||||
|  | ||||
|     def _remove_comments(self, code): | ||||
|         return re.sub(r'(?s)/\*.*?\*/', '', code) | ||||
|  | ||||
|     def interpret_statement(self, stmt, local_vars, allow_recursion=100): | ||||
|         if allow_recursion < 0: | ||||
|             raise ExtractorError('Recursion limit reached') | ||||
|   | ||||
| @@ -387,8 +387,8 @@ def parseOpts(overrideArguments=None): | ||||
|         help='lists all available subtitles for the video') | ||||
|     subtitles.add_option( | ||||
|         '--sub-format', | ||||
|         action='store', dest='subtitlesformat', metavar='FORMAT', default='srt', | ||||
|         help='subtitle format (default=srt) ([sbv/vtt] youtube only)') | ||||
|         action='store', dest='subtitlesformat', metavar='FORMAT', default='best', | ||||
|         help='subtitle format, accepts formats preference, for example: "ass/srt/best"') | ||||
|     subtitles.add_option( | ||||
|         '--sub-lang', '--sub-langs', '--srt-lang', | ||||
|         action='callback', dest='subtitleslangs', metavar='LANGS', type='str', | ||||
|   | ||||
| @@ -496,10 +496,6 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): | ||||
|         'zu': 'zul', | ||||
|     } | ||||
|  | ||||
|     def __init__(self, downloader=None, subtitlesformat='srt'): | ||||
|         super(FFmpegEmbedSubtitlePP, self).__init__(downloader) | ||||
|         self._subformat = subtitlesformat | ||||
|  | ||||
|     @classmethod | ||||
|     def _conver_lang_code(cls, code): | ||||
|         """Convert language code from ISO 639-1 to ISO 639-2/T""" | ||||
| @@ -509,13 +505,14 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): | ||||
|         if information['ext'] != 'mp4': | ||||
|             self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files') | ||||
|             return True, information | ||||
|         if not information.get('subtitles'): | ||||
|         subtitles = information.get('requested_subtitles') | ||||
|         if not subtitles: | ||||
|             self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed') | ||||
|             return True, information | ||||
|  | ||||
|         sub_langs = [key for key in information['subtitles']] | ||||
|         sub_langs = list(subtitles.keys()) | ||||
|         filename = information['filepath'] | ||||
|         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs] | ||||
|         input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()] | ||||
|  | ||||
|         opts = [ | ||||
|             '-map', '0', | ||||
|   | ||||
| @@ -304,6 +304,8 @@ def sanitize_filename(s, restricted=False, is_id=False): | ||||
|         # Common case of "Foreign band name - English song title" | ||||
|         if restricted and result.startswith('-_'): | ||||
|             result = result[2:] | ||||
|         if result.startswith('-'): | ||||
|             result = '_' + result[len('-'):] | ||||
|         if not result: | ||||
|             result = '_' | ||||
|     return result | ||||
| @@ -900,8 +902,8 @@ def _windows_write_string(s, out): | ||||
|     def not_a_console(handle): | ||||
|         if handle == INVALID_HANDLE_VALUE or handle is None: | ||||
|             return True | ||||
|         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR | ||||
|                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) | ||||
|         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or | ||||
|                 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0) | ||||
|  | ||||
|     if not_a_console(h): | ||||
|         return False | ||||
| @@ -1560,8 +1562,8 @@ def js_to_json(code): | ||||
|         return '"%s"' % v | ||||
|  | ||||
|     res = re.sub(r'''(?x) | ||||
|         "(?:[^"\\]*(?:\\\\|\\")?)*"| | ||||
|         '(?:[^'\\]*(?:\\\\|\\')?)*'| | ||||
|         "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"| | ||||
|         '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'| | ||||
|         [a-zA-Z_][.a-zA-Z_0-9]* | ||||
|         ''', fix_kv, code) | ||||
|     res = re.sub(r',(\s*\])', lambda m: m.group(1), res) | ||||
| @@ -1616,6 +1618,15 @@ def args_to_str(args): | ||||
|     return ' '.join(shlex_quote(a) for a in args) | ||||
|  | ||||
|  | ||||
| def mimetype2ext(mt): | ||||
|     _, _, res = mt.rpartition('/') | ||||
|  | ||||
|     return { | ||||
|         'x-ms-wmv': 'wmv', | ||||
|         'x-mp4-fragmented': 'mp4', | ||||
|     }.get(res, res) | ||||
|  | ||||
|  | ||||
| def urlhandle_detect_ext(url_handle): | ||||
|     try: | ||||
|         url_handle.headers | ||||
| @@ -1631,7 +1642,7 @@ def urlhandle_detect_ext(url_handle): | ||||
|             if e: | ||||
|                 return e | ||||
|  | ||||
|     return getheader('Content-Type').split("/")[1] | ||||
|     return mimetype2ext(getheader('Content-Type')) | ||||
|  | ||||
|  | ||||
| def age_restricted(content_limit, age_limit): | ||||
|   | ||||
| @@ -1,3 +1,3 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| __version__ = '2015.02.17.1' | ||||
| __version__ = '2015.02.24.2' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user