mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[ted] Added support for subtitle download
This commit is contained in:
		
							
								
								
									
										63
									
								
								test/test_ted_subtitles.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								test/test_ted_subtitles.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,63 @@ | ||||
| #!/usr/bin/env python | ||||
|  | ||||
| import sys | ||||
| import unittest | ||||
| import hashlib | ||||
|  | ||||
| # Allow direct execution | ||||
| import os | ||||
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | ||||
|  | ||||
| from youtube_dl.extractor import TEDIE | ||||
| from youtube_dl.utils import * | ||||
| from helper import FakeYDL | ||||
|  | ||||
| md5 = lambda s: hashlib.md5(s.encode('utf-8')).hexdigest() | ||||
|  | ||||
| class TestTedSubtitles(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         self.DL = FakeYDL() | ||||
|         self.url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' | ||||
|     def getInfoDict(self): | ||||
|         IE = TEDIE(self.DL) | ||||
|         info_dict = IE.extract(self.url) | ||||
|         return info_dict | ||||
|     def getSubtitles(self): | ||||
|         info_dict = self.getInfoDict() | ||||
|         return info_dict[0]['subtitles'] | ||||
|     def test_no_writesubtitles(self): | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(subtitles, None) | ||||
|     def test_subtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['en']), '2154f31ff9b9f89a0aa671537559c21d') | ||||
|     def test_subtitles_lang(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['subtitleslangs'] = ['fr'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(md5(subtitles['fr']), '7616cbc6df20ec2c1204083c83871cf6') | ||||
|     def test_allsubtitles(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertEqual(len(subtitles.keys()), 28) | ||||
|     def test_list_subtitles(self): | ||||
|         self.DL.params['listsubtitles'] = True | ||||
|         info_dict = self.getInfoDict() | ||||
|         self.assertEqual(info_dict, [None]) | ||||
|     def test_automatic_captions(self): | ||||
|         self.DL.params['writeautomaticsub'] = True | ||||
|         self.DL.params['subtitleslang'] = ['en'] | ||||
|         subtitles = self.getSubtitles() | ||||
|         self.assertTrue(len(subtitles.keys()) == 0) | ||||
|     def test_multiple_langs(self): | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         langs = ['es', 'fr', 'de'] | ||||
|         self.DL.params['subtitleslangs'] = langs | ||||
|         subtitles = self.getSubtitles() | ||||
|         for lang in langs: | ||||
|             self.assertTrue(subtitles.get(lang) is not None, u'Subtitles for \'%s\' not extracted' % lang) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
| @@ -1,10 +1,9 @@ | ||||
| import json | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
|  | ||||
|  | ||||
| class TEDIE(InfoExtractor): | ||||
| class TEDIE(SubtitlesInfoExtractor): | ||||
|     _VALID_URL=r'''http://www\.ted\.com/ | ||||
|                    ( | ||||
|                         ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist | ||||
| @@ -82,11 +81,21 @@ class TEDIE(InfoExtractor): | ||||
|             'url': stream['file'], | ||||
|             'format': stream['id'] | ||||
|             } for stream in info['htmlStreams']] | ||||
|  | ||||
|         video_id = info['id'] | ||||
|  | ||||
|         # subtitles | ||||
|         video_subtitles = self.extract_subtitles(video_id, webpage) | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, webpage) | ||||
|             return | ||||
|  | ||||
|         info = { | ||||
|             'id': info['id'], | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': desc, | ||||
|             'subtitles': video_subtitles, | ||||
|             'formats': formats, | ||||
|         } | ||||
|  | ||||
| @@ -94,3 +103,14 @@ class TEDIE(InfoExtractor): | ||||
|         info.update(info['formats'][-1]) | ||||
|  | ||||
|         return info | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|         options = self._search_regex(r'(?:<select name="subtitles_language_select" id="subtitles_language_select">)(.*?)(?:</select>)', webpage, 'subtitles_language_select', flags=re.DOTALL) | ||||
|         languages = re.findall(r'(?:<option value=")(\S+)"', options) | ||||
|         if languages: | ||||
|             sub_lang_list = {} | ||||
|             for l in languages: | ||||
|                 url = 'http://www.ted.com/talks/subtitles/id/%s/lang/%s/format/srt' % (video_id, l) | ||||
|                 sub_lang_list[l] = url | ||||
|             return sub_lang_list | ||||
|         return {} | ||||
|   | ||||
		Reference in New Issue
	
	Block a user