mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[YouTube] Support JSON3 subtitle format
* subtitle tests updated to match
This commit is contained in:
		| @@ -59,6 +59,21 @@ class BaseTestSubtitles(unittest.TestCase): | |||||||
|  |  | ||||||
|  |  | ||||||
| class TestYoutubeSubtitles(BaseTestSubtitles): | class TestYoutubeSubtitles(BaseTestSubtitles): | ||||||
|  |     # Available subtitles for QRS8MkLhQmM: | ||||||
|  |     # Language formats | ||||||
|  |     # ru       vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # fr       vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # en       vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # nl       vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # de       vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # ko       vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # it       vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # zh-Hant  vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # hi       vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # pt-BR    vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # es-MX    vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # ja       vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|  |     # pl       vtt, ttml, srv3, srv2, srv1, json3 | ||||||
|     url = 'QRS8MkLhQmM' |     url = 'QRS8MkLhQmM' | ||||||
|     IE = YoutubeIE |     IE = YoutubeIE | ||||||
|  |  | ||||||
| @@ -67,41 +82,60 @@ class TestYoutubeSubtitles(BaseTestSubtitles): | |||||||
|         self.DL.params['allsubtitles'] = True |         self.DL.params['allsubtitles'] = True | ||||||
|         subtitles = self.getSubtitles() |         subtitles = self.getSubtitles() | ||||||
|         self.assertEqual(len(subtitles.keys()), 13) |         self.assertEqual(len(subtitles.keys()), 13) | ||||||
|         self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06') |         self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d') | ||||||
|         self.assertEqual(md5(subtitles['it']), '6d752b98c31f1cf8d597050c7a2cb4b5') |         self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9') | ||||||
|         for lang in ['fr', 'de']: |         for lang in ['fr', 'de']: | ||||||
|             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) |             self.assertTrue(subtitles.get(lang) is not None, 'Subtitles for \'%s\' not extracted' % lang) | ||||||
|  |  | ||||||
|     def test_youtube_subtitles_ttml_format(self): |     def _test_subtitles_format(self, fmt, md5_hash, lang='en'): | ||||||
|         self.DL.params['writesubtitles'] = True |         self.DL.params['writesubtitles'] = True | ||||||
|         self.DL.params['subtitlesformat'] = 'ttml' |         self.DL.params['subtitlesformat'] = fmt | ||||||
|         subtitles = self.getSubtitles() |         subtitles = self.getSubtitles() | ||||||
|         self.assertEqual(md5(subtitles['en']), 'e306f8c42842f723447d9f63ad65df54') |         self.assertEqual(md5(subtitles[lang]), md5_hash) | ||||||
|  |  | ||||||
|  |     def test_youtube_subtitles_ttml_format(self): | ||||||
|  |         self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2') | ||||||
|  |  | ||||||
|     def test_youtube_subtitles_vtt_format(self): |     def test_youtube_subtitles_vtt_format(self): | ||||||
|         self.DL.params['writesubtitles'] = True |         self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d') | ||||||
|         self.DL.params['subtitlesformat'] = 'vtt' |  | ||||||
|  |     def test_youtube_subtitles_json3_format(self): | ||||||
|  |         self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b') | ||||||
|  |  | ||||||
|  |     def _test_automatic_captions(self, url, lang): | ||||||
|  |         self.url = url | ||||||
|  |         self.DL.params['writeautomaticsub'] = True | ||||||
|  |         self.DL.params['subtitleslangs'] = [lang] | ||||||
|         subtitles = self.getSubtitles() |         subtitles = self.getSubtitles() | ||||||
|         self.assertEqual(md5(subtitles['en']), '3cb210999d3e021bd6c7f0ea751eab06') |         self.assertTrue(subtitles[lang] is not None) | ||||||
|  |  | ||||||
|     def test_youtube_automatic_captions(self): |     def test_youtube_automatic_captions(self): | ||||||
|         self.url = '8YoUxe5ncPo' |         # Available automatic captions for 8YoUxe5ncPo: | ||||||
|         self.DL.params['writeautomaticsub'] = True |         # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3) | ||||||
|         self.DL.params['subtitleslangs'] = ['it'] |         # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr, | ||||||
|         subtitles = self.getSubtitles() |         # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da, | ||||||
|         self.assertTrue(subtitles['it'] is not None) |         # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv, | ||||||
|  |         # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy, | ||||||
|  |         # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur, | ||||||
|  |         # mt, ms, mr, ug, ta, my, af, sw, is, am,  | ||||||
|  |         #                                         *it*, iw, sv, ar, | ||||||
|  |         # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi, | ||||||
|  |         # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl, | ||||||
|  |         # ky, sd | ||||||
|  |         # ... | ||||||
|  |         self._test_automatic_captions('8YoUxe5ncPo', 'it') | ||||||
|  |  | ||||||
|  |     @unittest.skip('ASR subs all in all supported langs now') | ||||||
|     def test_youtube_translated_subtitles(self): |     def test_youtube_translated_subtitles(self): | ||||||
|         # This video has a subtitles track, which can be translated |         # This video has a subtitles track, which can be translated (#4555) | ||||||
|         self.url = 'Ky9eprVWzlI' |         self._test_automatic_captions('Ky9eprVWzlI', 'it') | ||||||
|         self.DL.params['writeautomaticsub'] = True |  | ||||||
|         self.DL.params['subtitleslangs'] = ['it'] |  | ||||||
|         subtitles = self.getSubtitles() |  | ||||||
|         self.assertTrue(subtitles['it'] is not None) |  | ||||||
|  |  | ||||||
|     def test_youtube_nosubtitles(self): |     def test_youtube_nosubtitles(self): | ||||||
|         self.DL.expect_warning('video doesn\'t have subtitles') |         self.DL.expect_warning('video doesn\'t have subtitles') | ||||||
|         self.url = 'n5BB19UTcdA' |         # Available automatic captions for 8YoUxe5ncPo: | ||||||
|  |         # ... | ||||||
|  |         # 8YoUxe5ncPo has no subtitles | ||||||
|  |         self.url = '8YoUxe5ncPo' | ||||||
|         self.DL.params['writesubtitles'] = True |         self.DL.params['writesubtitles'] = True | ||||||
|         self.DL.params['allsubtitles'] = True |         self.DL.params['allsubtitles'] = True | ||||||
|         subtitles = self.getSubtitles() |         subtitles = self.getSubtitles() | ||||||
|   | |||||||
| @@ -499,7 +499,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): | |||||||
|         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', |         r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', | ||||||
|         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', |         r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.js$', | ||||||
|     ) |     ) | ||||||
|     _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') |     _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') | ||||||
|  |  | ||||||
|     _GEO_BYPASS = False |     _GEO_BYPASS = False | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user