mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			115 Commits
		
	
	
		
			2014.06.24
			...
			2014.07.11
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 09018e19a5 | ||
|  | 345e37831c | ||
|  | 00ac799b68 | ||
|  | 133af9385b | ||
|  | 40c696e5c6 | ||
|  | d6d5028922 | ||
|  | 38ad119f97 | ||
|  | 4e415288d7 | ||
|  | fada438acf | ||
|  | 1df0ae2170 | ||
|  | d96b9d40f0 | ||
|  | fa19dfccf9 | ||
|  | cdc22cb886 | ||
|  | 04c77a54b0 | ||
|  | 64a8c39a1f | ||
|  | 3d55f2806e | ||
|  | 1eb867f33f | ||
|  | e93f4f7578 | ||
|  | 45ead916d1 | ||
|  | 3a0879c8c8 | ||
|  | ebf361ce18 | ||
|  | 953b358668 | ||
|  | 3dfd25b3aa | ||
|  | 6f66eedc5d | ||
|  | 4094b6e36d | ||
|  | c09cbf0ed9 | ||
|  | 391d53e1dd | ||
|  | f64ebfe3e5 | ||
|  | fc040bfd05 | ||
|  | c8bf86d50d | ||
|  | 61989fb5e9 | ||
|  | 6f9d4d542f | ||
|  | b3a8878080 | ||
|  | f4d66a99cf | ||
|  | 537ba6f381 | ||
|  | 411f691b21 | ||
|  | d6aa1967ad | ||
|  | 6e1e0e4b5b | ||
|  | 3941669d69 | ||
|  | 1aac03797e | ||
|  | 459af43494 | ||
|  | f4f7e3cf41 | ||
|  | 1fd015516e | ||
|  | 76bafa8ffe | ||
|  | 8d5797b00f | ||
|  | 7571c02c8a | ||
|  | 49cbe7c8e3 | ||
|  | ba4133c9eb | ||
|  | b67f1840a1 | ||
|  | 165c46690f | ||
|  | 16bc9ab601 | ||
|  | 15ce1338b4 | ||
|  | 0ff30c5333 | ||
|  | 6feb2d5e80 | ||
|  | 1e07fea200 | ||
|  | 7aeb67b39b | ||
|  | 93881db22a | ||
|  | 64ed7a38f9 | ||
|  | 2fd466fcfc | ||
|  | dc2fc73691 | ||
|  | c4808c6009 | ||
|  | c67f584eb3 | ||
|  | 29f6ed78e8 | ||
|  | 7807ee664d | ||
|  | d518d06efd | ||
|  | 25a0cc44b9 | ||
|  | 825cdcec3c | ||
|  | 41b610acab | ||
|  | 0364fa8b65 | ||
|  | 849086a1ae | ||
|  | 36fbc6887f | ||
|  | a8a98e43f2 | ||
|  | 57bdc730e2 | ||
|  | 31a196d7f5 | ||
|  | 9b27e6c3b4 | ||
|  | 62f1f9507f | ||
|  | ee8dda41ae | ||
|  | 01ba178097 | ||
|  | 78ff59d052 | ||
|  | f3f1cd6b3b | ||
|  | 803540e811 | ||
|  | 458ade6361 | ||
|  | a69969ee05 | ||
|  | f2b8db57eb | ||
|  | 331ae266ff | ||
|  | 4242001863 | ||
|  | 78338f71ca | ||
|  | f5172a3084 | ||
|  | c7df67edbd | ||
|  | d410fee91d | ||
|  | ba7aa464de | ||
|  | 8333034dce | ||
|  | 637b6af80f | ||
|  | 1044f8afd2 | ||
|  | 2f775107f9 | ||
|  | 85342674b2 | ||
|  | fd69098a45 | ||
|  | 8867f908fc | ||
|  | b7c33124c8 | ||
|  | 89a8c423c7 | ||
|  | cea2582df2 | ||
|  | e423e0baaa | ||
|  | 60b2dd1285 | ||
|  | 36ddd8b3f7 | ||
|  | e66ab17a36 | ||
|  | cb437dc2ad | ||
|  | 0d933b2ad5 | ||
|  | e5c3a4b549 | ||
|  | 1d0668ed5a | ||
|  | 305d068362 | ||
|  | a231ce87b5 | ||
|  | a84d20fc14 | ||
|  | 9e30092361 | ||
|  | 10d5c7aa5f | ||
|  | 412f356e04 | 
| @@ -70,8 +70,9 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|     --default-search PREFIX          Use this prefix for unqualified URLs. For | ||||
|                                      example "gvsearch2:" downloads two videos | ||||
|                                      from google videos for  youtube-dl "large | ||||
|                                      apple". By default (with value "auto") | ||||
|                                      youtube-dl guesses. | ||||
|                                      apple". Use the value "auto" to let | ||||
|                                      youtube-dl guess. The default value "error" | ||||
|                                      just throws an error. | ||||
|     --ignore-config                  Do not read configuration files. When given | ||||
|                                      in the global configuration file /etc | ||||
|                                      /youtube-dl.conf: do not read the user | ||||
| @@ -254,7 +255,7 @@ which means you can modify it, redistribute it or use it however you like. | ||||
|                                      128K (default 5) | ||||
|     --recode-video FORMAT            Encode the video to another format if | ||||
|                                      necessary (currently supported: | ||||
|                                      mp4|flv|ogg|webm) | ||||
|                                      mp4|flv|ogg|webm|mkv) | ||||
|     -k, --keep-video                 keeps the video file on disk after the | ||||
|                                      post-processing; the video is erased by | ||||
|                                      default | ||||
|   | ||||
| @@ -69,9 +69,6 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|     def test_youtube_show_matching(self): | ||||
|         self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) | ||||
|  | ||||
|     def test_youtube_truncated(self): | ||||
|         self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) | ||||
|  | ||||
|     def test_youtube_search_matching(self): | ||||
|         self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) | ||||
|         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) | ||||
|   | ||||
| @@ -28,8 +28,9 @@ from youtube_dl.extractor import ( | ||||
|     SoundcloudSetIE, | ||||
|     SoundcloudUserIE, | ||||
|     SoundcloudPlaylistIE, | ||||
|     TeacherTubeClassroomIE, | ||||
|     TeacherTubeUserIE, | ||||
|     LivestreamIE, | ||||
|     LivestreamOriginalIE, | ||||
|     NHLVideocenterIE, | ||||
|     BambuserChannelIE, | ||||
|     BandcampAlbumIE, | ||||
| @@ -40,6 +41,7 @@ from youtube_dl.extractor import ( | ||||
|     KhanAcademyIE, | ||||
|     EveryonesMixtapeIE, | ||||
|     RutubeChannelIE, | ||||
|     RutubePersonIE, | ||||
|     GoogleSearchIE, | ||||
|     GenericIE, | ||||
|     TEDIE, | ||||
| @@ -109,7 +111,7 @@ class TestPlaylists(unittest.TestCase): | ||||
|         ie = VineUserIE(dl) | ||||
|         result = ie.extract('https://vine.co/Visa') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertTrue(len(result['entries']) >= 50) | ||||
|         self.assertTrue(len(result['entries']) >= 47) | ||||
|  | ||||
|     def test_ustream_channel(self): | ||||
|         dl = FakeYDL() | ||||
| @@ -135,6 +137,14 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertEqual(result['id'], '9615865') | ||||
|         self.assertTrue(len(result['entries']) >= 12) | ||||
|  | ||||
|     def test_soundcloud_likes(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = SoundcloudUserIE(dl) | ||||
|         result = ie.extract('https://soundcloud.com/the-concept-band/likes') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], '9615865') | ||||
|         self.assertTrue(len(result['entries']) >= 1) | ||||
|  | ||||
|     def test_soundcloud_playlist(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = SoundcloudPlaylistIE(dl) | ||||
| @@ -154,6 +164,14 @@ class TestPlaylists(unittest.TestCase): | ||||
|         self.assertEqual(result['title'], 'TEDCity2.0 (English)') | ||||
|         self.assertTrue(len(result['entries']) >= 4) | ||||
|  | ||||
|     def test_livestreamoriginal_folder(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = LivestreamOriginalIE(dl) | ||||
|         result = ie.extract('https://www.livestream.com/newplay/folder?dirId=a07bf706-d0e4-4e75-a747-b021d84f2fd3') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'a07bf706-d0e4-4e75-a747-b021d84f2fd3') | ||||
|         self.assertTrue(len(result['entries']) >= 28) | ||||
|  | ||||
|     def test_nhl_videocenter(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = NHLVideocenterIE(dl) | ||||
| @@ -256,10 +274,18 @@ class TestPlaylists(unittest.TestCase): | ||||
|     def test_rutube_channel(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = RutubeChannelIE(dl) | ||||
|         result = ie.extract('http://rutube.ru/tags/video/1409') | ||||
|         result = ie.extract('http://rutube.ru/tags/video/1800/') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], '1409') | ||||
|         self.assertTrue(len(result['entries']) >= 34) | ||||
|         self.assertEqual(result['id'], '1800') | ||||
|         self.assertTrue(len(result['entries']) >= 68) | ||||
|  | ||||
|     def test_rutube_person(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = RutubePersonIE(dl) | ||||
|         result = ie.extract('http://rutube.ru/video/person/313878/') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], '313878') | ||||
|         self.assertTrue(len(result['entries']) >= 37) | ||||
|  | ||||
|     def test_multiple_brightcove_videos(self): | ||||
|         # https://github.com/rg3/youtube-dl/issues/2283 | ||||
| @@ -361,13 +387,13 @@ class TestPlaylists(unittest.TestCase): | ||||
|             result['title'], 'Brace Yourself - Today\'s Weirdest News') | ||||
|         self.assertTrue(len(result['entries']) >= 10) | ||||
|  | ||||
|     def test_TeacherTubeClassroom(self): | ||||
|     def test_TeacherTubeUser(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = TeacherTubeClassroomIE(dl) | ||||
|         result = ie.extract('http://www.teachertube.com/view_classroom.php?user=rbhagwati2') | ||||
|         ie = TeacherTubeUserIE(dl) | ||||
|         result = ie.extract('http://www.teachertube.com/user/profile/rbhagwati2') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['id'], 'rbhagwati2') | ||||
|         self.assertTrue(len(result['entries']) >= 20) | ||||
|         self.assertTrue(len(result['entries']) >= 179) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -87,7 +87,7 @@ class TestYoutubeSubtitles(BaseTestSubtitles): | ||||
|  | ||||
|     def test_youtube_nosubtitles(self): | ||||
|         self.DL.expect_warning(u'video doesn\'t have subtitles') | ||||
|         self.url = 'sAjKT8FhjI8' | ||||
|         self.url = 'n5BB19UTcdA' | ||||
|         self.DL.params['writesubtitles'] = True | ||||
|         self.DL.params['allsubtitles'] = True | ||||
|         subtitles = self.getSubtitles() | ||||
|   | ||||
| @@ -33,6 +33,12 @@ _TESTS = [ | ||||
|         90, | ||||
|         u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', | ||||
|     ), | ||||
|     ( | ||||
|         u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', | ||||
|         u'js', | ||||
|         u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', | ||||
|         u'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', | ||||
|     ), | ||||
| ] | ||||
|  | ||||
|  | ||||
| @@ -44,7 +50,7 @@ class TestSignature(unittest.TestCase): | ||||
|             os.mkdir(self.TESTDATA_DIR) | ||||
|  | ||||
|  | ||||
| def make_tfunc(url, stype, sig_length, expected_sig): | ||||
| def make_tfunc(url, stype, sig_input, expected_sig): | ||||
|     basename = url.rpartition('/')[2] | ||||
|     m = re.match(r'.*-([a-zA-Z0-9_-]+)\.[a-z]+$', basename) | ||||
|     assert m, '%r should follow URL format' % basename | ||||
| @@ -66,7 +72,9 @@ def make_tfunc(url, stype, sig_length, expected_sig): | ||||
|             with open(fn, 'rb') as testf: | ||||
|                 swfcode = testf.read() | ||||
|             func = ie._parse_sig_swf(swfcode) | ||||
|         src_sig = compat_str(string.printable[:sig_length]) | ||||
|         src_sig = ( | ||||
|             compat_str(string.printable[:sig_input]) | ||||
|             if isinstance(sig_input, int) else sig_input) | ||||
|         got_sig = func(src_sig) | ||||
|         self.assertEqual(got_sig, expected_sig) | ||||
|  | ||||
|   | ||||
| @@ -993,6 +993,8 @@ class YoutubeDL(object): | ||||
|                         fd = get_suitable_downloader(info)(self, self.params) | ||||
|                         for ph in self._progress_hooks: | ||||
|                             fd.add_progress_hook(ph) | ||||
|                         if self.params.get('verbose'): | ||||
|                             self.to_stdout('[debug] Invoking downloader on %r' % info.get('url')) | ||||
|                         return fd.download(name, info) | ||||
|                     if info_dict.get('requested_formats') is not None: | ||||
|                         downloaded = [] | ||||
|   | ||||
| @@ -59,6 +59,10 @@ __authors__  = ( | ||||
|     'Adam Thalhammer', | ||||
|     'Georg Jähnig', | ||||
|     'Ralf Haring', | ||||
|     'Koki Takahashi', | ||||
|     'Ariset Llerena', | ||||
|     'Adam Malcontenti-Wilson', | ||||
|     'Tobias Bell', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
| @@ -269,7 +273,7 @@ def parseOpts(overrideArguments=None): | ||||
|     general.add_option( | ||||
|         '--default-search', | ||||
|         dest='default_search', metavar='PREFIX', | ||||
|         help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". By default (with value "auto") youtube-dl guesses.') | ||||
|         help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for  youtube-dl "large apple". Use the value "auto" to let youtube-dl guess. The default value "error" just throws an error.') | ||||
|     general.add_option( | ||||
|         '--ignore-config', | ||||
|         action='store_true', | ||||
| @@ -505,7 +509,7 @@ def parseOpts(overrideArguments=None): | ||||
|     postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', | ||||
|             help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') | ||||
|     postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, | ||||
|             help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)') | ||||
|             help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv)') | ||||
|     postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, | ||||
|             help='keeps the video file on disk after the post-processing; the video is erased by default') | ||||
|     postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, | ||||
|   | ||||
| @@ -3,6 +3,7 @@ from .addanime import AddAnimeIE | ||||
| from .aftonbladet import AftonbladetIE | ||||
| from .anitube import AnitubeIE | ||||
| from .aol import AolIE | ||||
| from .allocine import AllocineIE | ||||
| from .aparat import AparatIE | ||||
| from .appletrailers import AppleTrailersIE | ||||
| from .archiveorg import ArchiveOrgIE | ||||
| @@ -63,6 +64,7 @@ from .dailymotion import ( | ||||
| from .daum import DaumIE | ||||
| from .dotsub import DotsubIE | ||||
| from .dreisat import DreiSatIE | ||||
| from .drtv import DRTVIE | ||||
| from .defense import DefenseGouvFrIE | ||||
| from .discovery import DiscoveryIE | ||||
| from .divxstage import DivxStageIE | ||||
| @@ -103,6 +105,7 @@ from .freesound import FreesoundIE | ||||
| from .freespeech import FreespeechIE | ||||
| from .funnyordie import FunnyOrDieIE | ||||
| from .gamekings import GamekingsIE | ||||
| from .gameone import GameOneIE | ||||
| from .gamespot import GameSpotIE | ||||
| from .gametrailers import GametrailersIE | ||||
| from .gdcvault import GDCVaultIE | ||||
| @@ -110,6 +113,7 @@ from .generic import GenericIE | ||||
| from .googleplus import GooglePlusIE | ||||
| from .googlesearch import GoogleSearchIE | ||||
| from .gorillavid import GorillaVidIE | ||||
| from .goshgay import GoshgayIE | ||||
| from .hark import HarkIE | ||||
| from .helsinki import HelsinkiIE | ||||
| from .hentaistigma import HentaiStigmaIE | ||||
| @@ -147,7 +151,11 @@ from .ku6 import Ku6IE | ||||
| from .la7 import LA7IE | ||||
| from .lifenews import LifeNewsIE | ||||
| from .liveleak import LiveLeakIE | ||||
| from .livestream import LivestreamIE, LivestreamOriginalIE | ||||
| from .livestream import ( | ||||
|     LivestreamIE, | ||||
|     LivestreamOriginalIE, | ||||
|     LivestreamShortenerIE, | ||||
| ) | ||||
| from .lynda import ( | ||||
|     LyndaIE, | ||||
|     LyndaCourseIE | ||||
| @@ -165,6 +173,7 @@ from .mpora import MporaIE | ||||
| from .mofosex import MofosexIE | ||||
| from .mooshare import MooshareIE | ||||
| from .morningstar import MorningstarIE | ||||
| from .motherless import MotherlessIE | ||||
| from .motorsport import MotorsportIE | ||||
| from .moviezine import MoviezineIE | ||||
| from .movshare import MovShareIE | ||||
| @@ -197,6 +206,7 @@ from .normalboots import NormalbootsIE | ||||
| from .novamov import NovaMovIE | ||||
| from .nowness import NownessIE | ||||
| from .nowvideo import NowVideoIE | ||||
| from .npo import NPOIE | ||||
| from .nrk import ( | ||||
|     NRKIE, | ||||
|     NRKTVIE, | ||||
| @@ -238,6 +248,7 @@ from .rutube import ( | ||||
| from .rutv import RUTVIE | ||||
| from .savefrom import SaveFromIE | ||||
| from .scivee import SciVeeIE | ||||
| from .screencast import ScreencastIE | ||||
| from .servingsys import ServingSysIE | ||||
| from .sina import SinaIE | ||||
| from .slideshare import SlideshareIE | ||||
| @@ -255,6 +266,7 @@ from .soundcloud import ( | ||||
|     SoundcloudUserIE, | ||||
|     SoundcloudPlaylistIE | ||||
| ) | ||||
| from .soundgasm import SoundgasmIE | ||||
| from .southparkstudios import ( | ||||
|     SouthParkStudiosIE, | ||||
|     SouthparkDeIE, | ||||
| @@ -274,12 +286,13 @@ from .sztvhu import SztvHuIE | ||||
| from .tagesschau import TagesschauIE | ||||
| from .teachertube import ( | ||||
|     TeacherTubeIE, | ||||
|     TeacherTubeClassroomIE, | ||||
|     TeacherTubeUserIE, | ||||
| ) | ||||
| from .teachingchannel import TeachingChannelIE | ||||
| from .teamcoco import TeamcocoIE | ||||
| from .techtalks import TechTalksIE | ||||
| from .ted import TEDIE | ||||
| from .tenplay import TenPlayIE | ||||
| from .testurl import TestURLIE | ||||
| from .tf1 import TF1IE | ||||
| from .theplatform import ThePlatformIE | ||||
| @@ -327,12 +340,14 @@ from .vimeo import ( | ||||
|     VimeoReviewIE, | ||||
|     VimeoWatchLaterIE, | ||||
| ) | ||||
| from .vimple import VimpleIE | ||||
| from .vine import ( | ||||
|     VineIE, | ||||
|     VineUserIE, | ||||
| ) | ||||
| from .viki import VikiIE | ||||
| from .vk import VKIE | ||||
| from .vodlocker import VodlockerIE | ||||
| from .vube import VubeIE | ||||
| from .vuclip import VuClipIE | ||||
| from .vulture import VultureIE | ||||
|   | ||||
							
								
								
									
										89
									
								
								youtube_dl/extractor/allocine.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								youtube_dl/extractor/allocine.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,89 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_str, | ||||
|     qualities, | ||||
|     determine_ext, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class AllocineIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?allocine\.fr/(?P<typ>article|video|film)/(fichearticle_gen_carticle=|player_gen_cmedia=|fichefilm_gen_cfilm=)(?P<id>[0-9]+)(?:\.html)?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.allocine.fr/article/fichearticle_gen_carticle=18635087.html', | ||||
|         'md5': '0c9fcf59a841f65635fa300ac43d8269', | ||||
|         'info_dict': { | ||||
|             'id': '19546517', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Astérix - Le Domaine des Dieux Teaser VF', | ||||
|             'description': 'md5:4a754271d9c6f16c72629a8a993ee884', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.allocine.fr/video/player_gen_cmedia=19540403&cfilm=222257.html', | ||||
|         'md5': 'd0cdce5d2b9522ce279fdfec07ff16e0', | ||||
|         'info_dict': { | ||||
|             'id': '19540403', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Planes 2 Bande-annonce VF', | ||||
|             'description': 'md5:c4b1f7bd682a91de6491ada267ec0f4d', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.allocine.fr/film/fichefilm_gen_cfilm=181290.html', | ||||
|         'md5': '101250fb127ef9ca3d73186ff22a47ce', | ||||
|         'info_dict': { | ||||
|             'id': '19544709', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Dragons 2 - Bande annonce finale VF', | ||||
|             'description': 'md5:e74a4dc750894bac300ece46c7036490', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         typ = mobj.group('typ') | ||||
|         display_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         if typ == 'film': | ||||
|             video_id = self._search_regex(r'href="/video/player_gen_cmedia=([0-9]+).+"', webpage, 'video id') | ||||
|         else: | ||||
|             player = self._search_regex(r'data-player=\'([^\']+)\'>', webpage, 'data player') | ||||
|  | ||||
|             player_data = json.loads(player) | ||||
|             video_id = compat_str(player_data['refMedia']) | ||||
|  | ||||
|         xml = self._download_xml('http://www.allocine.fr/ws/AcVisiondataV4.ashx?media=%s' % video_id, display_id) | ||||
|  | ||||
|         video = xml.find('.//AcVisionVideo').attrib | ||||
|         quality = qualities(['ld', 'md', 'hd']) | ||||
|  | ||||
|         formats = [] | ||||
|         for k, v in video.items(): | ||||
|             if re.match(r'.+_path', k): | ||||
|                 format_id = k.split('_')[0] | ||||
|                 formats.append({ | ||||
|                     'format_id': format_id, | ||||
|                     'quality': quality(format_id), | ||||
|                     'url': v, | ||||
|                     'ext': determine_ext(v), | ||||
|                 }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video['videoTitle'], | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'formats': formats, | ||||
|             'description': self._og_search_description(webpage), | ||||
|         } | ||||
| @@ -1,22 +1,24 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class AnitubeIE(InfoExtractor): | ||||
|     IE_NAME = u'anitube.se' | ||||
|     IE_NAME = 'anitube.se' | ||||
|     _VALID_URL = r'https?://(?:www\.)?anitube\.se/video/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.anitube.se/video/36621', | ||||
|         u'md5': u'59d0eeae28ea0bc8c05e7af429998d43', | ||||
|         u'file': u'36621.mp4', | ||||
|         u'info_dict': { | ||||
|             u'id': u'36621', | ||||
|             u'ext': u'mp4', | ||||
|             u'title': u'Recorder to Randoseru 01', | ||||
|         'url': 'http://www.anitube.se/video/36621', | ||||
|         'md5': '59d0eeae28ea0bc8c05e7af429998d43', | ||||
|         'info_dict': { | ||||
|             'id': '36621', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Recorder to Randoseru 01', | ||||
|             'duration': 180.19, | ||||
|         }, | ||||
|         u'skip': u'Blocked in the US', | ||||
|         'skip': 'Blocked in the US', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -24,13 +26,15 @@ class AnitubeIE(InfoExtractor): | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         key = self._html_search_regex(r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', | ||||
|                                       webpage, u'key') | ||||
|         key = self._html_search_regex( | ||||
|             r'http://www\.anitube\.se/embed/([A-Za-z0-9_-]*)', webpage, 'key') | ||||
|  | ||||
|         config_xml = self._download_xml('http://www.anitube.se/nuevo/econfig.php?key=%s' % key, | ||||
|                                                 key) | ||||
|         config_xml = self._download_xml( | ||||
|             'http://www.anitube.se/nuevo/econfig.php?key=%s' % key, key) | ||||
|  | ||||
|         video_title = config_xml.find('title').text | ||||
|         thumbnail = config_xml.find('image').text | ||||
|         duration = float(config_xml.find('duration').text) | ||||
|  | ||||
|         formats = [] | ||||
|         video_url = config_xml.find('file') | ||||
| @@ -49,5 +53,7 @@ class AnitubeIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video_title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats | ||||
|         } | ||||
|   | ||||
| @@ -39,7 +39,10 @@ class ArteTvIE(InfoExtractor): | ||||
|  | ||||
|         formats = [{ | ||||
|             'forma_id': q.attrib['quality'], | ||||
|             'url': q.text, | ||||
|             # The playpath starts at 'mp4:', if we don't manually | ||||
|             # split the url, rtmpdump will incorrectly parse them | ||||
|             'url': q.text.split('mp4:', 1)[0], | ||||
|             'play_path': 'mp4:' + q.text.split('mp4:', 1)[1], | ||||
|             'ext': 'flv', | ||||
|             'quality': 2 if q.attrib['quality'] == 'hd' else 1, | ||||
|         } for q in config.findall('./urls/url')] | ||||
| @@ -111,7 +114,7 @@ class ArteTVPlus7IE(InfoExtractor): | ||||
|         if not formats: | ||||
|             # Some videos are only available in the 'Originalversion' | ||||
|             # they aren't tagged as being in French or German | ||||
|             if all(f['versionCode'] == 'VO' for f in all_formats): | ||||
|             if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats): | ||||
|                 formats = all_formats | ||||
|             else: | ||||
|                 raise ExtractorError(u'The formats list is empty') | ||||
| @@ -189,9 +192,10 @@ class ArteTVFutureIE(ArteTVPlus7IE): | ||||
|     _TEST = { | ||||
|         'url': 'http://future.arte.tv/fr/sujet/info-sciences#article-anchor-7081', | ||||
|         'info_dict': { | ||||
|             'id': '050940-003', | ||||
|             'id': '5201', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Les champignons au secours de la planète', | ||||
|             'upload_date': '20131101', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -15,7 +15,7 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class BlipTVIE(SubtitlesInfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z]+)))' | ||||
|     _VALID_URL = r'https?://(?:\w+\.)?blip\.tv/(?:(?:.+-|rss/flash/)(?P<id>\d+)|((?:play/|api\.swf#)(?P<lookup_id>[\da-zA-Z+]+)))' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|   | ||||
| @@ -130,7 +130,7 @@ class ComedyCentralShowsIE(InfoExtractor): | ||||
|                 raise ExtractorError('Invalid redirected URL: ' + url) | ||||
|             if mobj.group('episode') == '': | ||||
|                 raise ExtractorError('Redirected URL is still not specific: ' + url) | ||||
|             epTitle = mobj.group('episode').rpartition('/')[-1] | ||||
|             epTitle = (mobj.group('episode') or mobj.group('videotitle')).rpartition('/')[-1] | ||||
|  | ||||
|         mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage) | ||||
|         if len(mMovieParams) == 0: | ||||
|   | ||||
| @@ -1,11 +1,12 @@ | ||||
| import base64 | ||||
| import hashlib | ||||
| import json | ||||
| import netrc | ||||
| import os | ||||
| import re | ||||
| import socket | ||||
| import sys | ||||
| import netrc | ||||
| import time | ||||
| import xml.etree.ElementTree | ||||
|  | ||||
| from ..utils import ( | ||||
| @@ -459,14 +460,17 @@ class InfoExtractor(object): | ||||
|         if secure: regexes = self._og_regexes('video:secure_url') + regexes | ||||
|         return self._html_search_regex(regexes, html, name, **kargs) | ||||
|  | ||||
|     def _html_search_meta(self, name, html, display_name=None, fatal=False): | ||||
|     def _og_search_url(self, html, **kargs): | ||||
|         return self._og_search_property('url', html, **kargs) | ||||
|  | ||||
|     def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): | ||||
|         if display_name is None: | ||||
|             display_name = name | ||||
|         return self._html_search_regex( | ||||
|             r'''(?ix)<meta | ||||
|                     (?=[^>]+(?:itemprop|name|property)=["\']%s["\']) | ||||
|                     [^>]+content=["\']([^"\']+)["\']''' % re.escape(name), | ||||
|             html, display_name, fatal=fatal) | ||||
|             html, display_name, fatal=fatal, **kwargs) | ||||
|  | ||||
|     def _dc_search_uploader(self, html): | ||||
|         return self._html_search_meta('dc.creator', html, 'uploader') | ||||
| @@ -572,6 +576,13 @@ class InfoExtractor(object): | ||||
|         else: | ||||
|             return url | ||||
|  | ||||
|     def _sleep(self, timeout, video_id, msg_template=None): | ||||
|         if msg_template is None: | ||||
|             msg_template = u'%(video_id)s: Waiting for %(timeout)s seconds' | ||||
|         msg = msg_template % {'video_id': video_id, 'timeout': timeout} | ||||
|         self.to_screen(msg) | ||||
|         time.sleep(timeout) | ||||
|  | ||||
|  | ||||
| class SearchInfoExtractor(InfoExtractor): | ||||
|     """ | ||||
| @@ -615,4 +626,3 @@ class SearchInfoExtractor(InfoExtractor): | ||||
|     @property | ||||
|     def SEARCH_KEY(self): | ||||
|         return self._SEARCH_KEY | ||||
|  | ||||
|   | ||||
| @@ -1,40 +1,43 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import determine_ext | ||||
|  | ||||
|  | ||||
| class CriterionIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.criterion\.com/films/(\d*)-.+' | ||||
|     _VALID_URL = r'https?://www\.criterion\.com/films/(?P<id>[0-9]+)-.+' | ||||
|     _TEST = { | ||||
|         u'url': u'http://www.criterion.com/films/184-le-samourai', | ||||
|         u'file': u'184.mp4', | ||||
|         u'md5': u'bc51beba55685509883a9a7830919ec3', | ||||
|         u'info_dict': { | ||||
|             u"title": u"Le Samouraï", | ||||
|             u"description" : u'md5:a2b4b116326558149bef81f76dcbb93f', | ||||
|         'url': 'http://www.criterion.com/films/184-le-samourai', | ||||
|         'md5': 'bc51beba55685509883a9a7830919ec3', | ||||
|         'info_dict': { | ||||
|             'id': '184', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Le Samouraï', | ||||
|             'description': 'md5:a2b4b116326558149bef81f76dcbb93f', | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group(1) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         final_url = self._search_regex(r'so.addVariable\("videoURL", "(.+?)"\)\;', | ||||
|                                 webpage, 'video url') | ||||
|         title = self._html_search_regex(r'<meta content="(.+?)" property="og:title" />', | ||||
|                                 webpage, 'video title') | ||||
|         description = self._html_search_regex(r'<meta name="description" content="(.+?)" />', | ||||
|                                 webpage, 'video description') | ||||
|         thumbnail = self._search_regex(r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', | ||||
|                                 webpage, 'thumbnail url') | ||||
|         final_url = self._search_regex( | ||||
|             r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url') | ||||
|         title = self._og_search_title(webpage) | ||||
|         description = self._html_search_regex( | ||||
|             r'<meta name="description" content="(.+?)" />', | ||||
|             webpage, 'video description') | ||||
|         thumbnail = self._search_regex( | ||||
|             r'so.addVariable\("thumbnailURL", "(.+?)"\)\;', | ||||
|             webpage, 'thumbnail url') | ||||
|  | ||||
|         return {'id': video_id, | ||||
|                 'url' : final_url, | ||||
|                 'title': title, | ||||
|                 'ext': determine_ext(final_url), | ||||
|                 'description': description, | ||||
|                 'thumbnail': thumbnail, | ||||
|                 } | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': final_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
|   | ||||
| @@ -150,7 +150,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|         return { | ||||
|             'id':       video_id, | ||||
|             'formats': formats, | ||||
|             'uploader': info['owner_screenname'], | ||||
|             'uploader': info['owner.screenname'], | ||||
|             'upload_date':  video_upload_date, | ||||
|             'title':    self._og_search_title(webpage), | ||||
|             'subtitles':    video_subtitles, | ||||
|   | ||||
| @@ -7,9 +7,9 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class DiscoveryIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://dsc\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?' | ||||
|     _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9\-]*)(.htm)?' | ||||
|     _TEST = { | ||||
|         'url': 'http://dsc.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', | ||||
|         'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm', | ||||
|         'md5': 'e12614f9ee303a6ccef415cb0793eba2', | ||||
|         'info_dict': { | ||||
|             'id': '614784', | ||||
|   | ||||
							
								
								
									
										91
									
								
								youtube_dl/extractor/drtv.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								youtube_dl/extractor/drtv.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,91 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .subtitles import SubtitlesInfoExtractor | ||||
| from .common import ExtractorError | ||||
| from ..utils import parse_iso8601 | ||||
|  | ||||
|  | ||||
| class DRTVIE(SubtitlesInfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?dr\.dk/tv/se/[^/]+/(?P<id>[\da-z-]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.dr.dk/tv/se/partiets-mand/partiets-mand-7-8', | ||||
|         'md5': '4a7e1dd65cdb2643500a3f753c942f25', | ||||
|         'info_dict': { | ||||
|             'id': 'partiets-mand-7-8', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Partiets mand (7:8)', | ||||
|             'description': 'md5:a684b90a8f9336cd4aab94b7647d7862', | ||||
|             'timestamp': 1403047940, | ||||
|             'upload_date': '20140617', | ||||
|             'duration': 1299.040, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         programcard = self._download_json( | ||||
|             'http://www.dr.dk/mu/programcard/expanded/%s' % video_id, video_id, 'Downloading video JSON') | ||||
|  | ||||
|         data = programcard['Data'][0] | ||||
|  | ||||
|         title = data['Title'] | ||||
|         description = data['Description'] | ||||
|         timestamp = parse_iso8601(data['CreatedTime'][:-5]) | ||||
|  | ||||
|         thumbnail = None | ||||
|         duration = None | ||||
|  | ||||
|         restricted_to_denmark = False | ||||
|  | ||||
|         formats = [] | ||||
|         subtitles = {} | ||||
|  | ||||
|         for asset in data['Assets']: | ||||
|             if asset['Kind'] == 'Image': | ||||
|                 thumbnail = asset['Uri'] | ||||
|             elif asset['Kind'] == 'VideoResource': | ||||
|                 duration = asset['DurationInMilliseconds'] / 1000.0 | ||||
|                 restricted_to_denmark = asset['RestrictedToDenmark'] | ||||
|                 for link in asset['Links']: | ||||
|                     target = link['Target'] | ||||
|                     uri = link['Uri'] | ||||
|                     formats.append({ | ||||
|                         'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri, | ||||
|                         'format_id': target, | ||||
|                         'ext': link['FileFormat'], | ||||
|                         'preference': -1 if target == 'HDS' else -2, | ||||
|                     }) | ||||
|                 subtitles_list = asset.get('SubtitlesList') | ||||
|                 if isinstance(subtitles_list, list): | ||||
|                     LANGS = { | ||||
|                         'Danish': 'dk', | ||||
|                     } | ||||
|                     for subs in subtitles_list: | ||||
|                         lang = subs['Language'] | ||||
|                         subtitles[LANGS.get(lang, lang)] = subs['Uri'] | ||||
|  | ||||
|         if not formats and restricted_to_denmark: | ||||
|             raise ExtractorError( | ||||
|                 'Unfortunately, DR is not allowed to show this program outside Denmark.', expected=True) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         if self._downloader.params.get('listsubtitles', False): | ||||
|             self._list_available_subtitles(video_id, subtitles) | ||||
|             return | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|             'timestamp': timestamp, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'subtitles': self.extract_subtitles(video_id, subtitles), | ||||
|         } | ||||
							
								
								
									
										90
									
								
								youtube_dl/extractor/gameone.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								youtube_dl/extractor/gameone.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,90 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     xpath_with_ns, | ||||
|     parse_iso8601 | ||||
| ) | ||||
|  | ||||
| NAMESPACE_MAP = { | ||||
|     'media': 'http://search.yahoo.com/mrss/', | ||||
| } | ||||
|  | ||||
| # URL prefix to download the mp4 files directly instead of streaming via rtmp | ||||
| # Credits go to XBox-Maniac | ||||
| # http://board.jdownloader.org/showpost.php?p=185835&postcount=31 | ||||
| RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/' | ||||
|  | ||||
|  | ||||
| class GameOneIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.gameone.de/tv/288', | ||||
|         'md5': '136656b7fb4c9cb4a8e2d500651c499b', | ||||
|         'info_dict': { | ||||
|             'id': '288', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Game One - Folge 288', | ||||
|             'duration': 1238, | ||||
|             'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg', | ||||
|             'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1', | ||||
|             'age_limit': 16, | ||||
|             'upload_date': '20140513', | ||||
|             'timestamp': 1399980122, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         og_video = self._og_search_video_url(webpage, secure=False) | ||||
|         description = self._html_search_meta('description', webpage) | ||||
|         age_limit = int( | ||||
|             self._search_regex( | ||||
|                 r'age=(\d+)', | ||||
|                 self._html_search_meta( | ||||
|                     'age-de-meta-label', | ||||
|                     webpage), | ||||
|                 'age_limit', | ||||
|                 '0')) | ||||
|         mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss') | ||||
|  | ||||
|         mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss') | ||||
|         title = mrss.find('.//item/title').text | ||||
|         thumbnail = mrss.find('.//item/image').get('url') | ||||
|         timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ') | ||||
|         content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP)) | ||||
|         content_url = content.get('url') | ||||
|  | ||||
|         content = self._download_xml( | ||||
|             content_url, | ||||
|             video_id, | ||||
|             'Downloading media:content') | ||||
|         rendition_items = content.findall('.//rendition') | ||||
|         duration = int(rendition_items[0].get('duration')) | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text), | ||||
|                 'width': int(r.get('width')), | ||||
|                 'height': int(r.get('height')), | ||||
|                 'tbr': int(r.get('bitrate')), | ||||
|             } | ||||
|             for r in rendition_items | ||||
|         ] | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'duration': duration, | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|             'age_limit': age_limit, | ||||
|             'timestamp': timestamp, | ||||
|         } | ||||
| @@ -383,7 +383,7 @@ class GenericIE(InfoExtractor): | ||||
|         if not parsed_url.scheme: | ||||
|             default_search = self._downloader.params.get('default_search') | ||||
|             if default_search is None: | ||||
|                 default_search = 'auto_warning' | ||||
|                 default_search = 'error' | ||||
|  | ||||
|             if default_search in ('auto', 'auto_warning'): | ||||
|                 if '/' in url: | ||||
| @@ -397,8 +397,13 @@ class GenericIE(InfoExtractor): | ||||
|                                 expected=True) | ||||
|                         else: | ||||
|                             self._downloader.report_warning( | ||||
|                                 'Falling back to youtube search for  %s . Set --default-search to "auto" to suppress this warning.' % url) | ||||
|                                 'Falling back to youtube search for  %s . Set --default-search "auto" to suppress this warning.' % url) | ||||
|                     return self.url_result('ytsearch:' + url) | ||||
|             elif default_search == 'error': | ||||
|                 raise ExtractorError( | ||||
|                     ('%r is not a valid URL. ' | ||||
|                      'Set --default-search "ytseach" (or run  youtube-dl "ytsearch:%s" ) to search YouTube' | ||||
|                     ) % (url, url), expected=True) | ||||
|             else: | ||||
|                 assert ':' in default_search | ||||
|                 return self.url_result(default_search + url) | ||||
| @@ -620,6 +625,11 @@ class GenericIE(InfoExtractor): | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'VK') | ||||
|  | ||||
|         # Look for embedded ivi player | ||||
|         mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage) | ||||
|         if mobj is not None: | ||||
|             return self.url_result(mobj.group('url'), 'Ivi') | ||||
|  | ||||
|         # Look for embedded Huffington Post player | ||||
|         mobj = re.search( | ||||
|             r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage) | ||||
|   | ||||
| @@ -52,8 +52,7 @@ class GooglePlusIE(InfoExtractor): | ||||
|  | ||||
|         # Extract title | ||||
|         # Get the first line for title | ||||
|         video_title = self._html_search_regex(r'<meta name\=\"Description\" content\=\"(.*?)[\n<"]', | ||||
|             webpage, 'title', default='NA') | ||||
|         video_title = self._og_search_description(webpage).splitlines()[0] | ||||
|  | ||||
|         # Step 2, Simulate clicking the image box to launch video | ||||
|         DOMAIN = 'https://plus.google.com/' | ||||
|   | ||||
| @@ -12,7 +12,12 @@ from ..utils import ( | ||||
|  | ||||
|  | ||||
| class GorillaVidIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?gorillavid\.in/(?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)?' | ||||
|     IE_DESC = 'GorillaVid.in and daclips.in' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         https?://(?:www\.)? | ||||
|             (?:daclips\.in|gorillavid\.in)/ | ||||
|         (?:embed-)?(?P<id>[0-9a-zA-Z]+)(?:-[0-9]+x[0-9]+\.html)? | ||||
|     ''' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://gorillavid.in/06y9juieqpmi', | ||||
| @@ -32,14 +37,20 @@ class GorillaVidIE(InfoExtractor): | ||||
|             'title': 'Say something nice', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://daclips.in/3rso4kdn6f9m', | ||||
|         'info_dict': { | ||||
|             'id': '3rso4kdn6f9m', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Micro Pig piglets ready on 16th July 2009', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         url = 'http://gorillavid.in/%s' % video_id | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         fields = dict(re.findall(r'''(?x)<input\s+ | ||||
|   | ||||
							
								
								
									
										73
									
								
								youtube_dl/extractor/goshgay.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								youtube_dl/extractor/goshgay.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     str_to_int, | ||||
|     ExtractorError, | ||||
| ) | ||||
| import json | ||||
|  | ||||
|  | ||||
| class GoshgayIE(InfoExtractor): | ||||
|     _VALID_URL = r'^(?:https?://)www.goshgay.com/video(?P<id>\d+?)($|/)' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.goshgay.com/video4116282', | ||||
|         'md5': '268b9f3c3229105c57859e166dd72b03', | ||||
|         'info_dict': { | ||||
|             'id': '4116282', | ||||
|             'ext': 'flv', | ||||
|             'title': 'md5:089833a4790b5e103285a07337f245bf', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|             'age_limit': 18, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         title = self._search_regex(r'class="video-title"><h1>(.+?)<', webpage, 'title') | ||||
|  | ||||
|         player_config = self._search_regex( | ||||
|             r'(?s)jwplayer\("player"\)\.setup\(({.+?})\)', webpage, 'config settings') | ||||
|         player_vars = json.loads(player_config.replace("'", '"')) | ||||
|         width = str_to_int(player_vars.get('width')) | ||||
|         height = str_to_int(player_vars.get('height')) | ||||
|         config_uri = player_vars.get('config') | ||||
|  | ||||
|         if config_uri is None: | ||||
|             raise ExtractorError('Missing config URI') | ||||
|         node = self._download_xml(config_uri, video_id, 'Downloading player config XML', | ||||
|                                   errnote='Unable to download XML') | ||||
|         if node is None: | ||||
|             raise ExtractorError('Missing config XML') | ||||
|         if node.tag != 'config': | ||||
|             raise ExtractorError('Missing config attribute') | ||||
|         fns = node.findall('file') | ||||
|         imgs = node.findall('image') | ||||
|         if len(fns) != 1: | ||||
|             raise ExtractorError('Missing media URI') | ||||
|         video_url = fns[0].text | ||||
|         if len(imgs) < 1: | ||||
|             thumbnail = None | ||||
|         else: | ||||
|             thumbnail = imgs[0].text | ||||
|  | ||||
|         url_comp = compat_urlparse.urlparse(url) | ||||
|         ref = "%s://%s%s" % (url_comp[0], url_comp[1], url_comp[2]) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'width': width, | ||||
|             'height': height, | ||||
|             'thumbnail': thumbnail, | ||||
|             'http_referer': ref, | ||||
|             'age_limit': 18, | ||||
|         } | ||||
| @@ -14,7 +14,7 @@ from ..utils import ( | ||||
| class IviIE(InfoExtractor): | ||||
|     IE_DESC = 'ivi.ru' | ||||
|     IE_NAME = 'ivi' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?ivi\.ru/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<videoid>\d+)' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         # Single movie | ||||
|   | ||||
| @@ -9,6 +9,7 @@ from ..utils import ( | ||||
|     compat_urlparse, | ||||
|     xpath_with_ns, | ||||
|     compat_str, | ||||
|     orderedSet, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -64,7 +65,10 @@ class LivestreamIE(InfoExtractor): | ||||
| # The original version of Livestream uses a different system | ||||
| class LivestreamOriginalIE(InfoExtractor): | ||||
|     IE_NAME = 'livestream:original' | ||||
|     _VALID_URL = r'https?://www\.livestream\.com/(?P<user>[^/]+)/video\?.*?clipId=(?P<id>.*?)(&|$)' | ||||
|     _VALID_URL = r'''(?x)https?://www\.livestream\.com/ | ||||
|         (?P<user>[^/]+)/(?P<type>video|folder) | ||||
|         (?:\?.*?Id=|/)(?P<id>.*?)(&|$) | ||||
|         ''' | ||||
|     _TEST = { | ||||
|         'url': 'http://www.livestream.com/dealbook/video?clipId=pla_8aa4a3f1-ba15-46a4-893b-902210e138fb', | ||||
|         'info_dict': { | ||||
| @@ -78,10 +82,7 @@ class LivestreamOriginalIE(InfoExtractor): | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         user = mobj.group('user') | ||||
|     def _extract_video(self, user, video_id): | ||||
|         api_url = 'http://x{0}x.api.channel.livestream.com/2.0/clipdetails?extendedInfo=true&id={1}'.format(user, video_id) | ||||
|  | ||||
|         info = self._download_xml(api_url, video_id) | ||||
| @@ -99,3 +100,44 @@ class LivestreamOriginalIE(InfoExtractor): | ||||
|             'ext': 'flv', | ||||
|             'thumbnail': thumbnail_url, | ||||
|         } | ||||
|  | ||||
|     def _extract_folder(self, url, folder_id): | ||||
|         webpage = self._download_webpage(url, folder_id) | ||||
|         urls = orderedSet(re.findall(r'<a href="(https?://livestre\.am/.*?)"', webpage)) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|             'id': folder_id, | ||||
|             'entries': [{ | ||||
|                 '_type': 'url', | ||||
|                 'url': video_url, | ||||
|             } for video_url in urls], | ||||
|         } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         id = mobj.group('id') | ||||
|         user = mobj.group('user') | ||||
|         url_type = mobj.group('type') | ||||
|         if url_type == 'folder': | ||||
|             return self._extract_folder(url, id) | ||||
|         else: | ||||
|             return self._extract_video(user, id) | ||||
|  | ||||
|  | ||||
| # The server doesn't support HEAD request, the generic extractor can't detect | ||||
| # the redirection | ||||
| class LivestreamShortenerIE(InfoExtractor): | ||||
|     IE_NAME = 'livestream:shortener' | ||||
|     IE_DESC = False  # Do not list | ||||
|     _VALID_URL = r'https?://livestre\.am/(?P<id>.+)' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, id) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url', | ||||
|             'url': self._og_search_url(webpage), | ||||
|         } | ||||
|   | ||||
							
								
								
									
										87
									
								
								youtube_dl/extractor/motherless.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										87
									
								
								youtube_dl/extractor/motherless.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,87 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import datetime | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     int_or_none, | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class MotherlessIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?:www\.)?motherless\.com/(?P<id>[A-Z0-9]+)' | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://motherless.com/AC3FFE1', | ||||
|             'md5': '5527fef81d2e529215dad3c2d744a7d9', | ||||
|             'info_dict': { | ||||
|                 'id': 'AC3FFE1', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Fucked in the ass while playing PS3', | ||||
|                 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'], | ||||
|                 'upload_date': '20100913', | ||||
|                 'uploader_id': 'famouslyfuckedup', | ||||
|                 'thumbnail': 're:http://.*\.jpg', | ||||
|                 'age_limit': 18, | ||||
|             } | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://motherless.com/532291B', | ||||
|             'md5': 'bc59a6b47d1f958e61fbd38a4d31b131', | ||||
|             'info_dict': { | ||||
|                 'id': '532291B', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Amazing girl playing the omegle game, PERFECT!', | ||||
|                 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen', 'game', 'hairy'], | ||||
|                 'upload_date': '20140622', | ||||
|                 'uploader_id': 'Sulivana7x', | ||||
|                 'thumbnail': 're:http://.*\.jpg', | ||||
|                 'age_limit': 18, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self,url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         title = self._html_search_regex(r'id="view-upload-title">\s+([^<]+)<', webpage, 'title') | ||||
|          | ||||
|         video_url = self._html_search_regex(r'setup\(\{\s+"file".+: "([^"]+)",', webpage, 'video_url') | ||||
|         age_limit = self._rta_search(webpage) | ||||
|  | ||||
|         view_count = self._html_search_regex(r'<strong>Views</strong>\s+([^<]+)<', webpage, 'view_count') | ||||
|   | ||||
|         upload_date = self._html_search_regex(r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload_date') | ||||
|         if 'Ago' in upload_date: | ||||
|             days = int(re.search(r'([0-9]+)', upload_date).group(1)) | ||||
|             upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d') | ||||
|         else: | ||||
|             upload_date = unified_strdate(upload_date) | ||||
|  | ||||
|         like_count = self._html_search_regex(r'<strong>Favorited</strong>\s+([^<]+)<', webpage, 'like_count') | ||||
|  | ||||
|         comment_count = webpage.count('class="media-comment-contents"') | ||||
|         uploader_id = self._html_search_regex(r'"thumb-member-username">\s+<a href="/m/([^"]+)"', webpage, 'uploader_id') | ||||
|  | ||||
|         categories = self._html_search_meta('keywords', webpage) | ||||
|         if categories: | ||||
|             categories = [cat.strip() for cat in categories.split(',')] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'upload_date': upload_date, | ||||
|             'uploader_id': uploader_id, | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'categories': categories, | ||||
|             'view_count': int_or_none(view_count.replace(',', '')), | ||||
|             'like_count': int_or_none(like_count.replace(',', '')), | ||||
|             'comment_count': comment_count, | ||||
|             'age_limit': age_limit, | ||||
|             'url': video_url, | ||||
|         } | ||||
| @@ -28,7 +28,7 @@ class MporaIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         data_json = self._search_regex( | ||||
|             r"new FM\.Player\('[^']+',\s*(\{.*?)\);\n", webpage, 'json') | ||||
|             r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') | ||||
|  | ||||
|         data = json.loads(data_json) | ||||
|  | ||||
|   | ||||
| @@ -4,18 +4,19 @@ from __future__ import unicode_literals | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class NewstubeIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?newstube\.ru/media/(?P<id>.+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://newstube.ru/media/na-korable-progress-prodolzhaetsya-testirovanie-sistemy-kurs', | ||||
|         'url': 'http://www.newstube.ru/media/telekanal-cnn-peremestil-gorod-slavyansk-v-krym', | ||||
|         'info_dict': { | ||||
|             'id': 'd156a237-a6e9-4111-a682-039995f721f1', | ||||
|             'id': '728e0ef2-e187-4012-bac0-5a081fdcb1f6', | ||||
|             'ext': 'flv', | ||||
|             'title': 'На корабле «Прогресс» продолжается тестирование системы «Курс»', | ||||
|             'description': 'md5:d0cbe7b4a6f600552617e48548d5dc77', | ||||
|             'duration': 20.04, | ||||
|             'title': 'Телеканал CNN переместил город Славянск в Крым', | ||||
|             'description': 'md5:419a8c9f03442bc0b0a794d689360335', | ||||
|             'duration': 31.05, | ||||
|         }, | ||||
|         'params': { | ||||
|             # rtmp download | ||||
| @@ -40,6 +41,10 @@ class NewstubeIE(InfoExtractor): | ||||
|         def ns(s): | ||||
|             return s.replace('/', '/%(ns)s') % {'ns': '{http://app1.newstube.ru/N2SiteWS/player.asmx}'} | ||||
|  | ||||
|         error_message = player.find(ns('./ErrorMessage')) | ||||
|         if error_message is not None: | ||||
|             raise ExtractorError('%s returned error: %s' % (self.IE_NAME, error_message.text), expected=True) | ||||
|  | ||||
|         session_id = player.find(ns('./SessionId')).text | ||||
|         media_info = player.find(ns('./Medias/MediaInfo')) | ||||
|         title = media_info.find(ns('./Name')).text | ||||
|   | ||||
| @@ -8,10 +8,9 @@ from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
|     compat_str, | ||||
|  | ||||
|     ExtractorError, | ||||
|     unified_strdate, | ||||
|     parse_duration, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| @@ -30,6 +29,7 @@ class NiconicoIE(InfoExtractor): | ||||
|             'uploader_id': '2698420', | ||||
|             'upload_date': '20131123', | ||||
|             'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org', | ||||
|             'duration': 33, | ||||
|         }, | ||||
|         'params': { | ||||
|             'username': 'ydl.niconico@gmail.com', | ||||
| @@ -37,17 +37,20 @@ class NiconicoIE(InfoExtractor): | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     _VALID_URL = r'^https?://(?:www\.|secure\.)?nicovideo\.jp/watch/([a-z][a-z][0-9]+)(?:.*)$' | ||||
|     _VALID_URL = r'https?://(?:www\.|secure\.)?nicovideo\.jp/watch/((?:[a-z]{2})?[0-9]+)' | ||||
|     _NETRC_MACHINE = 'niconico' | ||||
|     # Determine whether the downloader uses authentication to download video | ||||
|     _AUTHENTICATE = False | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|         if self._downloader.params.get('username', None) is not None: | ||||
|             self._AUTHENTICATE = True | ||||
|  | ||||
|         if self._AUTHENTICATE: | ||||
|             self._login() | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             # Login is required | ||||
|             raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True) | ||||
|  | ||||
|         # Log in | ||||
|         login_form_strs = { | ||||
| @@ -79,44 +82,66 @@ class NiconicoIE(InfoExtractor): | ||||
|             'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id, video_id, | ||||
|             note='Downloading video info page') | ||||
|  | ||||
|         # Get flv info | ||||
|         flv_info_webpage = self._download_webpage( | ||||
|             'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, | ||||
|             video_id, 'Downloading flv info') | ||||
|         if self._AUTHENTICATE: | ||||
|             # Get flv info | ||||
|             flv_info_webpage = self._download_webpage( | ||||
|                 'http://flapi.nicovideo.jp/api/getflv?v=' + video_id, | ||||
|                 video_id, 'Downloading flv info') | ||||
|         else: | ||||
|             # Get external player info | ||||
|             ext_player_info = self._download_webpage( | ||||
|                 'http://ext.nicovideo.jp/thumb_watch/' + video_id, video_id) | ||||
|             thumb_play_key = self._search_regex( | ||||
|                 r'\'thumbPlayKey\'\s*:\s*\'(.*?)\'', ext_player_info, 'thumbPlayKey') | ||||
|  | ||||
|             # Get flv info | ||||
|             flv_info_data = compat_urllib_parse.urlencode({ | ||||
|                 'k': thumb_play_key, | ||||
|                 'v': video_id | ||||
|             }) | ||||
|             flv_info_request = compat_urllib_request.Request( | ||||
|                 'http://ext.nicovideo.jp/thumb_watch', flv_info_data, | ||||
|                 {'Content-Type': 'application/x-www-form-urlencoded'}) | ||||
|             flv_info_webpage = self._download_webpage( | ||||
|                 flv_info_request, video_id, | ||||
|                 note='Downloading flv info', errnote='Unable to download flv info') | ||||
|  | ||||
|         video_real_url = compat_urlparse.parse_qs(flv_info_webpage)['url'][0] | ||||
|  | ||||
|         # Start extracting information | ||||
|         video_title = video_info.find('.//title').text | ||||
|         video_extension = video_info.find('.//movie_type').text | ||||
|         video_format = video_extension.upper() | ||||
|         video_thumbnail = video_info.find('.//thumbnail_url').text | ||||
|         video_description = video_info.find('.//description').text | ||||
|         video_uploader_id = video_info.find('.//user_id').text | ||||
|         video_upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) | ||||
|         video_view_count = video_info.find('.//view_counter').text | ||||
|         video_webpage_url = video_info.find('.//watch_url').text | ||||
|         title = video_info.find('.//title').text | ||||
|         extension = video_info.find('.//movie_type').text | ||||
|         video_format = extension.upper() | ||||
|         thumbnail = video_info.find('.//thumbnail_url').text | ||||
|         description = video_info.find('.//description').text | ||||
|         upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0]) | ||||
|         view_count = int_or_none(video_info.find('.//view_counter').text) | ||||
|         comment_count = int_or_none(video_info.find('.//comment_num').text) | ||||
|         duration = parse_duration(video_info.find('.//length').text) | ||||
|         webpage_url = video_info.find('.//watch_url').text | ||||
|  | ||||
|         # uploader | ||||
|         video_uploader = video_uploader_id | ||||
|         url = 'http://seiga.nicovideo.jp/api/user/info?id=' + video_uploader_id | ||||
|         try: | ||||
|             user_info = self._download_xml( | ||||
|                 url, video_id, note='Downloading user information') | ||||
|             video_uploader = user_info.find('.//nickname').text | ||||
|         except ExtractorError as err: | ||||
|             self._downloader.report_warning('Unable to download user info webpage: %s' % compat_str(err)) | ||||
|         if video_info.find('.//ch_id') is not None: | ||||
|             uploader_id = video_info.find('.//ch_id').text | ||||
|             uploader = video_info.find('.//ch_name').text | ||||
|         elif video_info.find('.//user_id') is not None: | ||||
|             uploader_id = video_info.find('.//user_id').text | ||||
|             uploader = video_info.find('.//user_nickname').text | ||||
|         else: | ||||
|             uploader_id = uploader = None | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_real_url, | ||||
|             'title': video_title, | ||||
|             'ext': video_extension, | ||||
|             'title': title, | ||||
|             'ext': extension, | ||||
|             'format': video_format, | ||||
|             'thumbnail': video_thumbnail, | ||||
|             'description': video_description, | ||||
|             'uploader': video_uploader, | ||||
|             'upload_date': video_upload_date, | ||||
|             'uploader_id': video_uploader_id, | ||||
|             'view_count': video_view_count, | ||||
|             'webpage_url': video_webpage_url, | ||||
|             'thumbnail': thumbnail, | ||||
|             'description': description, | ||||
|             'uploader': uploader, | ||||
|             'upload_date': upload_date, | ||||
|             'uploader_id': uploader_id, | ||||
|             'view_count': view_count, | ||||
|             'comment_count': comment_count, | ||||
|             'duration': duration, | ||||
|             'webpage_url': webpage_url, | ||||
|         } | ||||
|   | ||||
| @@ -47,7 +47,7 @@ class NineGagIE(InfoExtractor): | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|  | ||||
|         post_view = json.loads(self._html_search_regex( | ||||
|             r'var postView = new app\.PostView\({\s*post:\s*({.+?}),', webpage, 'post view')) | ||||
|             r'var postView = new app\.PostView\({\s*post:\s*({.+?}),\s*posts:\s*prefetchedCurrentPost', webpage, 'post view')) | ||||
|  | ||||
|         youtube_id = post_view['videoExternalId'] | ||||
|         title = post_view['title'] | ||||
|   | ||||
							
								
								
									
										62
									
								
								youtube_dl/extractor/npo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								youtube_dl/extractor/npo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class NPOIE(InfoExtractor): | ||||
|     IE_NAME = 'npo.nl' | ||||
|     _VALID_URL = r'https?://www\.npo\.nl/[^/]+/[^/]+/(?P<id>[^/?]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.npo.nl/nieuwsuur/22-06-2014/VPWON_1220719', | ||||
|         'md5': '4b3f9c429157ec4775f2c9cb7b911016', | ||||
|         'info_dict': { | ||||
|             'id': 'VPWON_1220719', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Nieuwsuur', | ||||
|             'description': 'Dagelijks tussen tien en elf: nieuws, sport en achtergronden.', | ||||
|             'upload_date': '20140622', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         metadata = self._download_json( | ||||
|             'http://e.omroep.nl/metadata/aflevering/%s' % video_id, | ||||
|             video_id, | ||||
|             # We have to remove the javascript callback | ||||
|             transform_source=lambda j: re.sub(r'parseMetadata\((.*?)\);\n//epc', r'\1', j) | ||||
|         ) | ||||
|         token_page = self._download_webpage( | ||||
|             'http://ida.omroep.nl/npoplayer/i.js', | ||||
|             video_id, | ||||
|             note='Downloading token' | ||||
|         ) | ||||
|         token = self._search_regex(r'npoplayer.token = "(.+?)"', token_page, 'token') | ||||
|         streams_info = self._download_json( | ||||
|             'http://ida.omroep.nl/odi/?prid=%s&puboptions=h264_std&adaptive=yes&token=%s' % (video_id, token), | ||||
|             video_id | ||||
|         ) | ||||
|  | ||||
|         stream_info = self._download_json( | ||||
|             streams_info['streams'][0] + '&type=json', | ||||
|             video_id, | ||||
|             'Downloading stream info' | ||||
|         ) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': metadata['titel'], | ||||
|             'ext': 'mp4', | ||||
|             'url': stream_info['url'], | ||||
|             'description': metadata['info'], | ||||
|             'thumbnail': metadata['images'][-1]['url'], | ||||
|             'upload_date': unified_strdate(metadata['gidsdatum']), | ||||
|         } | ||||
| @@ -35,7 +35,8 @@ class RaiIE(SubtitlesInfoExtractor): | ||||
|                 'description': '', | ||||
|                 'upload_date': '20140612', | ||||
|                 'duration': 1758, | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'Error 404', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.rainews.it/dl/rainews/media/state-of-the-net-Antonella-La-Carpia-regole-virali-7aafdea9-0e5d-49d5-88a6-7e65da67ae13.html', | ||||
|   | ||||
							
								
								
									
										95
									
								
								youtube_dl/extractor/screencast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								youtube_dl/extractor/screencast.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,95 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     compat_parse_qs, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class ScreencastIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.screencast\.com/t/(?P<id>[a-zA-Z0-9]+)' | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.screencast.com/t/3ZEjQXlT', | ||||
|         'md5': '917df1c13798a3e96211dd1561fded83', | ||||
|         'info_dict': { | ||||
|             'id': '3ZEjQXlT', | ||||
|             'ext': 'm4v', | ||||
|             'title': 'Color Measurement with Ocean Optics Spectrometers', | ||||
|             'description': 'md5:240369cde69d8bed61349a199c5fb153', | ||||
|             'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', | ||||
|         'md5': 'e8e4b375a7660a9e7e35c33973410d34', | ||||
|         'info_dict': { | ||||
|             'id': 'V2uXehPJa1ZI', | ||||
|             'ext': 'mov', | ||||
|             'title': 'The Amadeus Spectrometer', | ||||
|             'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', | ||||
|             'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', | ||||
|         } | ||||
|     }, { | ||||
|         'url': 'http://www.screencast.com/t/aAB3iowa', | ||||
|         'md5': 'dedb2734ed00c9755761ccaee88527cd', | ||||
|         'info_dict': { | ||||
|             'id': 'aAB3iowa', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Google Earth Export', | ||||
|             'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', | ||||
|             'thumbnail': 're:^https?://.*\.(?:gif|jpg)$', | ||||
|         } | ||||
|     }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         video_url = self._html_search_regex( | ||||
|             r'<embed name="Video".*?src="([^"]+)"', webpage, | ||||
|             'QuickTime embed', default=None) | ||||
|  | ||||
|         if video_url is None: | ||||
|             flash_vars_s = self._html_search_regex( | ||||
|                 r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars', | ||||
|                 default=None) | ||||
|             if flash_vars_s: | ||||
|                 flash_vars = compat_parse_qs(flash_vars_s) | ||||
|                 video_url_raw = compat_urllib_request.quote( | ||||
|                     flash_vars['content'][0]) | ||||
|                 video_url = video_url_raw.replace('http%3A', 'http:') | ||||
|  | ||||
|         if video_url is None: | ||||
|             video_meta = self._html_search_meta( | ||||
|                 'og:video', webpage, default=None) | ||||
|             if video_meta: | ||||
|                 video_url = self._search_regex( | ||||
|                     r'src=(.*?)(?:$|&)', video_meta, | ||||
|                     'meta tag video URL', default=None) | ||||
|  | ||||
|         if video_url is None: | ||||
|             raise ExtractorError('Cannot find video') | ||||
|  | ||||
|         title = self._og_search_title(webpage, default=None) | ||||
|         if title is None: | ||||
|             title = self._html_search_regex( | ||||
|                 r'class="tabSeperator">></span><span class="tabText">(.*?)<', | ||||
|                 webpage, 'title') | ||||
|         thumbnail = self._og_search_thumbnail(webpage) | ||||
|         description = self._og_search_description(webpage, default=None) | ||||
|         if description is None: | ||||
|             description = self._html_search_meta('description', webpage) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'url': video_url, | ||||
|             'title': title, | ||||
|             'description': description, | ||||
|             'thumbnail': thumbnail, | ||||
|         } | ||||
| @@ -255,7 +255,7 @@ class SoundcloudSetIE(SoundcloudIE): | ||||
|  | ||||
|  | ||||
| class SoundcloudUserIE(SoundcloudIE): | ||||
|     _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)(/?(tracks/)?)?(\?.*)?$' | ||||
|     _VALID_URL = r'https?://(www\.)?soundcloud\.com/(?P<user>[^/]+)/?((?P<rsrc>tracks|likes)/?)?(\?.*)?$' | ||||
|     IE_NAME = 'soundcloud:user' | ||||
|  | ||||
|     # it's in tests/test_playlists.py | ||||
| @@ -264,24 +264,31 @@ class SoundcloudUserIE(SoundcloudIE): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         uploader = mobj.group('user') | ||||
|         resource = mobj.group('rsrc') | ||||
|         if resource is None: | ||||
|             resource = 'tracks' | ||||
|         elif resource == 'likes': | ||||
|             resource = 'favorites' | ||||
|  | ||||
|         url = 'http://soundcloud.com/%s/' % uploader | ||||
|         resolv_url = self._resolv_url(url) | ||||
|         user = self._download_json( | ||||
|             resolv_url, uploader, 'Downloading user info') | ||||
|         base_url = 'http://api.soundcloud.com/users/%s/tracks.json?' % uploader | ||||
|         base_url = 'http://api.soundcloud.com/users/%s/%s.json?' % (uploader, resource) | ||||
|  | ||||
|         entries = [] | ||||
|         for i in itertools.count(): | ||||
|             data = compat_urllib_parse.urlencode({ | ||||
|                 'offset': i * 50, | ||||
|                 'limit': 50, | ||||
|                 'client_id': self._CLIENT_ID, | ||||
|             }) | ||||
|             new_entries = self._download_json( | ||||
|                 base_url + data, uploader, 'Downloading track page %s' % (i + 1)) | ||||
|             entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) | ||||
|             if len(new_entries) < 50: | ||||
|             if len(new_entries) == 0: | ||||
|                 self.to_screen('%s: End page received' % uploader) | ||||
|                 break | ||||
|             entries.extend(self._extract_info_dict(e, quiet=True) for e in new_entries) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'playlist', | ||||
|   | ||||
							
								
								
									
										40
									
								
								youtube_dl/extractor/soundgasm.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								youtube_dl/extractor/soundgasm.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SoundgasmIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_\-]+)/(?P<title>[0-9a-zA-Z_\-]+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://soundgasm.net/u/ytdl/Piano-sample', | ||||
|         'md5': '010082a2c802c5275bb00030743e75ad', | ||||
|         'info_dict': { | ||||
|             'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9', | ||||
|             'ext': 'm4a', | ||||
|             'title': 'ytdl_Piano-sample', | ||||
|             'description': 'Royalty Free Sample Music' | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         display_id = mobj.group('title') | ||||
|         audio_title = mobj.group('user') + '_' + mobj.group('title') | ||||
|         webpage = self._download_webpage(url, display_id) | ||||
|         audio_url = self._html_search_regex( | ||||
|             r'(?s)m4a\:\s"([^"]+)"', webpage, 'audio URL') | ||||
|         audio_id = re.split('\/|\.', audio_url)[-2] | ||||
|         description = self._html_search_regex( | ||||
|             r'(?s)<li>Description:\s(.*?)<\/li>', webpage, 'description', | ||||
|             fatal=False) | ||||
|  | ||||
|         return { | ||||
|             'id': audio_id, | ||||
|             'display_id': display_id, | ||||
|             'url': audio_url, | ||||
|             'title': audio_title, | ||||
|             'description': description | ||||
|         } | ||||
| @@ -20,13 +20,13 @@ class TagesschauIE(InfoExtractor): | ||||
|             'thumbnail': 're:^http:.*\.jpg$', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.tagesschau.de/multimedia/video/video-196.html', | ||||
|         'md5': '8aaa8bf3ae1ca2652309718c03019128', | ||||
|         'url': 'http://www.tagesschau.de/multimedia/video/video-5964.html', | ||||
|         'md5': '66652566900963a3f962333579eeffcf', | ||||
|         'info_dict': { | ||||
|             'id': '196', | ||||
|             'id': '5964', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Ukraine-Konflikt: Klitschko in Kiew als Bürgermeister vereidigt', | ||||
|             'description': 'md5:f22e4af75821d174fa6c977349682691', | ||||
|             'title': 'Nahost-Konflikt: Israel bombadiert Ziele im Gazastreifen und Westjordanland', | ||||
|             'description': 'md5:07bfc78c48eec3145ed4805299a1900a', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|     }] | ||||
|   | ||||
| @@ -14,7 +14,7 @@ class TeacherTubeIE(InfoExtractor): | ||||
|     IE_NAME = 'teachertube' | ||||
|     IE_DESC = 'teachertube.com videos' | ||||
|  | ||||
|     _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=)(?P<id>\d+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(viewVideo\.php\?video_id=|music\.php\?music_id=|video/(?:[\da-z-]+-)?|audio/)(?P<id>\d+)' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.teachertube.com/viewVideo.php?video_id=339997', | ||||
| @@ -45,6 +45,15 @@ class TeacherTubeIE(InfoExtractor): | ||||
|             'title': 'PER ASPERA AD ASTRA', | ||||
|             'description': 'RADIJSKA EMISIJA ZRAKOPLOVNE TEHNI?KE ?KOLE P', | ||||
|         }, | ||||
|     }, { | ||||
|         'url': 'http://www.teachertube.com/video/intro-video-schleicher-297790', | ||||
|         'md5': '9c79fbb2dd7154823996fc28d4a26998', | ||||
|         'info_dict': { | ||||
|             'id': '297790', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Intro Video - Schleicher', | ||||
|             'description': 'Intro Video - Why to flip, how flipping will', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -66,6 +75,7 @@ class TeacherTubeIE(InfoExtractor): | ||||
|  | ||||
|         media_urls = re.findall(r'data-contenturl="([^"]+)"', webpage) | ||||
|         media_urls.extend(re.findall(r'var\s+filePath\s*=\s*"([^"]+)"', webpage)) | ||||
|         media_urls.extend(re.findall(r'\'file\'\s*:\s*["\']([^"\']+)["\'],', webpage)) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
| @@ -79,28 +89,36 @@ class TeacherTubeIE(InfoExtractor): | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': self._html_search_regex(r'var\s+thumbUrl\s*=\s*"([^"]+)"', webpage, 'thumbnail'), | ||||
|             'thumbnail': self._html_search_regex(r'\'image\'\s*:\s*["\']([^"\']+)["\']', webpage, 'thumbnail'), | ||||
|             'formats': formats, | ||||
|             'description': description, | ||||
|         } | ||||
|  | ||||
|  | ||||
| class TeacherTubeClassroomIE(InfoExtractor): | ||||
|     IE_NAME = 'teachertube:classroom' | ||||
|     IE_DESC = 'teachertube.com online classrooms' | ||||
| class TeacherTubeUserIE(InfoExtractor): | ||||
|     IE_NAME = 'teachertube:user:collection' | ||||
|     IE_DESC = 'teachertube.com user and collection videos' | ||||
|  | ||||
|     _VALID_URL = r'https?://(?:www\.)?teachertube\.com/view_classroom\.php\?user=(?P<user>[0-9a-zA-Z]+)' | ||||
|     _VALID_URL = r'https?://(?:www\.)?teachertube\.com/(user/profile|collection)/(?P<user>[0-9a-zA-Z]+)/?' | ||||
|  | ||||
|     _MEDIA_RE = r'(?s)"sidebar_thumb_time">[0-9:]+</div>.+?<a href="(https?://(?:www\.)?teachertube\.com/(?:video|audio)/[^"]+)">' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         user_id = mobj.group('user') | ||||
|  | ||||
|         rss = self._download_xml( | ||||
|             'http://www.teachertube.com/rssclassroom.php?mode=user&username=%s' % user_id, | ||||
|             user_id, 'Downloading classroom RSS') | ||||
|         urls = [] | ||||
|         webpage = self._download_webpage(url, user_id) | ||||
|         urls.extend(re.findall(self._MEDIA_RE, webpage)) | ||||
|          | ||||
|         pages = re.findall(r'/ajax-user/user-videos/%s\?page=([0-9]+)' % user_id, webpage)[1:-1] | ||||
|         for p in pages: | ||||
|             more = 'http://www.teachertube.com/ajax-user/user-videos/%s?page=%s' % (user_id, p) | ||||
|             webpage = self._download_webpage(more, user_id, 'Downloading page %s/%s' % (p, len(pages) + 1)) | ||||
|             urls.extend(re.findall(self._MEDIA_RE, webpage)) | ||||
|  | ||||
|         entries = [] | ||||
|         for url in rss.findall('.//{http://search.yahoo.com/mrss/}player'): | ||||
|             entries.append(self.url_result(url.attrib['url'], 'TeacherTube')) | ||||
|         for url in urls: | ||||
|             entries.append(self.url_result(url, 'TeacherTube')) | ||||
|  | ||||
|         return self.playlist_result(entries, user_id) | ||||
|   | ||||
							
								
								
									
										84
									
								
								youtube_dl/extractor/tenplay.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										84
									
								
								youtube_dl/extractor/tenplay.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,84 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class TenPlayIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?ten(play)?\.com\.au/.+' | ||||
|     _TEST = { | ||||
|         'url': 'http://tenplay.com.au/ten-insider/extra/season-2013/tenplay-tv-your-way', | ||||
|         #'md5': 'd68703d9f73dc8fccf3320ab34202590', | ||||
|         'info_dict': { | ||||
|             'id': '2695695426001', | ||||
|             'ext': 'flv', | ||||
|             'title': 'TENplay: TV your way', | ||||
|             'description': 'Welcome to a new TV experience. Enjoy a taste of the TENplay benefits.', | ||||
|             'timestamp': 1380150606.889, | ||||
|             'upload_date': '20130925', | ||||
|             'uploader': 'TENplay', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # Requires rtmpdump | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     _video_fields = [ | ||||
|         "id", "name", "shortDescription", "longDescription", "creationDate", | ||||
|         "publishedDate", "lastModifiedDate", "customFields", "videoStillURL", | ||||
|         "thumbnailURL", "referenceId", "length", "playsTotal", | ||||
|         "playsTrailingWeek", "renditions", "captioning", "startDate", "endDate"] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         webpage = self._download_webpage(url, url) | ||||
|         video_id = self._html_search_regex( | ||||
|             r'videoID: "(\d+?)"', webpage, 'video_id') | ||||
|         api_token = self._html_search_regex( | ||||
|             r'apiToken: "([a-zA-Z0-9-_\.]+?)"', webpage, 'api_token') | ||||
|         title = self._html_search_regex( | ||||
|             r'<meta property="og:title" content="\s*(.*?)\s*"\s*/?\s*>', | ||||
|             webpage, 'title') | ||||
|  | ||||
|         json = self._download_json('https://api.brightcove.com/services/library?command=find_video_by_id&video_id=%s&token=%s&video_fields=%s' % (video_id, api_token, ','.join(self._video_fields)), title) | ||||
|  | ||||
|         formats = [] | ||||
|         for rendition in json['renditions']: | ||||
|             url = rendition['remoteUrl'] or rendition['url'] | ||||
|             protocol = 'rtmp' if url.startswith('rtmp') else 'http' | ||||
|             ext = 'flv' if protocol == 'rtmp' else rendition['videoContainer'].lower() | ||||
|  | ||||
|             if protocol == 'rtmp': | ||||
|                 url = url.replace('&mp4:', '') | ||||
|  | ||||
|             formats.append({ | ||||
|                 'format_id': '_'.join(['rtmp', rendition['videoContainer'].lower(), rendition['videoCodec'].lower()]), | ||||
|                 'width': rendition['frameWidth'], | ||||
|                 'height': rendition['frameHeight'], | ||||
|                 'tbr': rendition['encodingRate'] / 1024, | ||||
|                 'filesize': rendition['size'], | ||||
|                 'protocol': protocol, | ||||
|                 'ext': ext, | ||||
|                 'vcodec': rendition['videoCodec'].lower(), | ||||
|                 'container': rendition['videoContainer'].lower(), | ||||
|                 'url': url, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'display_id': json['referenceId'], | ||||
|             'title': json['name'], | ||||
|             'description': json['shortDescription'] or json['longDescription'], | ||||
|             'formats': formats, | ||||
|             'thumbnails': [{ | ||||
|                 'url': json['videoStillURL'] | ||||
|             }, { | ||||
|                 'url': json['thumbnailURL'] | ||||
|             }], | ||||
|             'thumbnail': json['videoStillURL'], | ||||
|             'duration': json['length'] / 1000, | ||||
|             'timestamp': float(json['creationDate']) / 1000, | ||||
|             'uploader': json['customFields']['production_company_distributor'] if 'production_company_distributor' in json['customFields'] else 'TENplay', | ||||
|             'view_count': json['playsTotal'] | ||||
|         } | ||||
| @@ -1,10 +1,13 @@ | ||||
| # -*- coding:utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| import re | ||||
|  | ||||
|  | ||||
| class ToypicsIE(InfoExtractor): | ||||
|     IE_DESC = 'Toypics user profile' | ||||
|     _VALID_URL = r'http://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*' | ||||
|     _VALID_URL = r'https?://videos\.toypics\.net/view/(?P<id>[0-9]+)/.*' | ||||
|     _TEST = { | ||||
|         'url': 'http://videos.toypics.net/view/514/chancebulged,-2-1/', | ||||
|         'md5': '16e806ad6d6f58079d210fe30985e08b', | ||||
| @@ -61,7 +64,7 @@ class ToypicsUserIE(InfoExtractor): | ||||
|                 note='Downloading page %d/%d' % (n, page_count)) | ||||
|             urls.extend( | ||||
|                 re.findall( | ||||
|                     r'<p class="video-entry-title">\n\s*<a href="(http://videos.toypics.net/view/[^"]+)">', | ||||
|                     r'<p class="video-entry-title">\s+<a href="(https?://videos.toypics.net/view/[^"]+)">', | ||||
|                     lpage)) | ||||
|  | ||||
|         return { | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| @@ -10,14 +11,27 @@ from ..utils import ( | ||||
|  | ||||
| class TumblrIE(InfoExtractor): | ||||
|     _VALID_URL = r'http://(?P<blog_name>.*?)\.tumblr\.com/((post)|(video))/(?P<id>\d*)($|/)' | ||||
|     _TEST = { | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', | ||||
|         'file': '54196191430.mp4', | ||||
|         'md5': '479bb068e5b16462f5176a6828829767', | ||||
|         'info_dict': { | ||||
|             "title": "tatiana maslany news" | ||||
|             'id': '54196191430', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...', | ||||
|             'description': 'md5:dfac39636969fe6bf1caa2d50405f069', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         } | ||||
|     } | ||||
|     }, { | ||||
|         'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all', | ||||
|         'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359', | ||||
|         'info_dict': { | ||||
|             'id': '90208453769', | ||||
|             'ext': 'mp4', | ||||
|             'title': '5SOS STRUM ;)', | ||||
|             'description': 'md5:dba62ac8639482759c8eb10ce474586a', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         } | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         m_url = re.match(self._VALID_URL, url) | ||||
| @@ -48,6 +62,7 @@ class TumblrIE(InfoExtractor): | ||||
|         return [{'id': video_id, | ||||
|                  'url': video_url, | ||||
|                  'title': video_title, | ||||
|                  'description': self._html_search_meta('description', webpage), | ||||
|                  'thumbnail': video_thumbnail, | ||||
|                  'ext': ext | ||||
|                  }] | ||||
|   | ||||
| @@ -49,6 +49,7 @@ class VeohIE(InfoExtractor): | ||||
|                 'description': 'md5:f5a11c51f8fb51d2315bca0937526891', | ||||
|                 'uploader': 'newsy-videos', | ||||
|             }, | ||||
|             'skip': 'This video has been deleted.', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,10 @@ import re | ||||
| import base64 | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import unified_strdate | ||||
| from ..utils import ( | ||||
|     unified_strdate, | ||||
|     int_or_none, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VideoTtIE(InfoExtractor): | ||||
| @@ -50,9 +53,9 @@ class VideoTtIE(InfoExtractor): | ||||
|             'thumbnail': settings['config']['thumbnail'], | ||||
|             'upload_date': unified_strdate(video['added']), | ||||
|             'uploader': video['owner'], | ||||
|             'view_count': int(video['view_count']), | ||||
|             'comment_count': int(video['comment_count']), | ||||
|             'like_count': int(video['liked']), | ||||
|             'dislike_count': int(video['disliked']), | ||||
|             'view_count': int_or_none(video['view_count']), | ||||
|             'comment_count': None if video.get('comment_count') == '--' else int_or_none(video['comment_count']), | ||||
|             'like_count': int_or_none(video['liked']), | ||||
|             'dislike_count': int_or_none(video['disliked']), | ||||
|             'formats': formats, | ||||
|         } | ||||
							
								
								
									
										86
									
								
								youtube_dl/extractor/vimple.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										86
									
								
								youtube_dl/extractor/vimple.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,86 @@ | ||||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import re | ||||
| import xml.etree.ElementTree | ||||
| import zlib | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import int_or_none | ||||
|  | ||||
|  | ||||
| class VimpleIE(InfoExtractor): | ||||
|     IE_DESC = 'Vimple.ru' | ||||
|     _VALID_URL = r'https?://(player.vimple.ru/iframe|vimple.ru)/(?P<id>[a-f0-9]{10,})' | ||||
|     _TESTS = [ | ||||
|         # Quality: Large, from iframe | ||||
|         { | ||||
|             'url': 'http://player.vimple.ru/iframe/b132bdfd71b546d3972f9ab9a25f201c', | ||||
|             'info_dict': { | ||||
|                 'id': 'b132bdfd71b546d3972f9ab9a25f201c', | ||||
|                 'title': 'great-escape-minecraft.flv', | ||||
|                 'ext': 'mp4', | ||||
|                 'duration': 352, | ||||
|                 'webpage_url': 'http://vimple.ru/b132bdfd71b546d3972f9ab9a25f201c', | ||||
|             }, | ||||
|         }, | ||||
|         # Quality: Medium, from mainpage | ||||
|         { | ||||
|             'url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', | ||||
|             'info_dict': { | ||||
|                 'id': 'a15950562888453b8e6f9572dc8600cd', | ||||
|                 'title': 'DB 01', | ||||
|                 'ext': 'flv', | ||||
|                 'duration': 1484, | ||||
|                 'webpage_url': 'http://vimple.ru/a15950562888453b8e6f9572dc8600cd', | ||||
|             } | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         iframe_url = 'http://player.vimple.ru/iframe/%s' % video_id | ||||
|  | ||||
|         iframe = self._download_webpage( | ||||
|             iframe_url, video_id, | ||||
|             note='Downloading iframe', errnote='unable to fetch iframe') | ||||
|         player_url = self._html_search_regex( | ||||
|             r'"(http://player.vimple.ru/flash/.+?)"', iframe, 'player url') | ||||
|  | ||||
|         player = self._request_webpage( | ||||
|             player_url, video_id, note='Downloading swf player').read() | ||||
|  | ||||
|         player = zlib.decompress(player[8:]) | ||||
|  | ||||
|         xml_pieces = re.findall(b'([a-zA-Z0-9 =+/]{500})', player) | ||||
|         xml_pieces = [piece[1:-1] for piece in xml_pieces] | ||||
|  | ||||
|         xml_data = b''.join(xml_pieces) | ||||
|         xml_data = base64.b64decode(xml_data) | ||||
|  | ||||
|         xml_data = xml.etree.ElementTree.fromstring(xml_data) | ||||
|  | ||||
|         video = xml_data.find('Video') | ||||
|         quality = video.get('quality') | ||||
|         q_tag = video.find(quality.capitalize()) | ||||
|  | ||||
|         formats = [ | ||||
|             { | ||||
|                 'url': q_tag.get('url'), | ||||
|                 'tbr': int(q_tag.get('bitrate')), | ||||
|                 'filesize': int(q_tag.get('filesize')), | ||||
|                 'format_id': quality, | ||||
|             }, | ||||
|         ] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': video.find('Title').text, | ||||
|             'formats': formats, | ||||
|             'thumbnail': video.find('Poster').get('url'), | ||||
|             'duration': int_or_none(video.get('duration')), | ||||
|             'webpage_url': video.find('Share').get('videoPageUrl'), | ||||
|         } | ||||
| @@ -16,7 +16,7 @@ from ..utils import ( | ||||
|  | ||||
| class VKIE(InfoExtractor): | ||||
|     IE_NAME = 'vk.com' | ||||
|     _VALID_URL = r'https?://vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:videos.*?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))' | ||||
|     _VALID_URL = r'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>.*?)(?:\?|%2F|$))' | ||||
|     _NETRC_MACHINE = 'vk' | ||||
|  | ||||
|     _TESTS = [ | ||||
| @@ -27,7 +27,7 @@ class VKIE(InfoExtractor): | ||||
|                 'id': '162222515', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'ProtivoGunz - Хуёвая песня', | ||||
|                 'uploader': 'Noize MC', | ||||
|                 'uploader': 're:Noize MC.*', | ||||
|                 'duration': 195, | ||||
|             }, | ||||
|         }, | ||||
| @@ -62,11 +62,47 @@ class VKIE(InfoExtractor): | ||||
|                 'id': '164049491', | ||||
|                 'ext': 'mp4', | ||||
|                 'uploader': 'Триллеры', | ||||
|                 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]\u00a0', | ||||
|                 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', | ||||
|                 'duration': 8352, | ||||
|             }, | ||||
|             'skip': 'Requires vk account credentials', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', | ||||
|             'md5': 'd82c22e449f036282d1d3f7f4d276869', | ||||
|             'info_dict': { | ||||
|                 'id': '166094326', | ||||
|                 'ext': 'mp4', | ||||
|                 'uploader': 'Киномания - лучшее из мира кино', | ||||
|                 'title': 'Запах женщины (1992)', | ||||
|                 'duration': 9392, | ||||
|             }, | ||||
|             'skip': 'Requires vk account credentials', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', | ||||
|             'md5': '4d7a5ef8cf114dfa09577e57b2993202', | ||||
|             'info_dict': { | ||||
|                 'id': '168067957', | ||||
|                 'ext': 'mp4', | ||||
|                 'uploader': 'Киномания - лучшее из мира кино', | ||||
|                 'title': ' ', | ||||
|                 'duration': 7291, | ||||
|             }, | ||||
|             'skip': 'Requires vk account credentials', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540', | ||||
|             'md5': '0c45586baa71b7cb1d0784ee3f4e00a6', | ||||
|             'note': 'ivi.ru embed', | ||||
|             'info_dict': { | ||||
|                 'id': '60690', | ||||
|                 'ext': 'mp4', | ||||
|                 'title': 'Книга Илая', | ||||
|                 'duration': 6771, | ||||
|             }, | ||||
|             'skip': 'Only works from Russia', | ||||
|         }, | ||||
|     ] | ||||
|  | ||||
|     def _login(self): | ||||
| @@ -110,6 +146,16 @@ class VKIE(InfoExtractor): | ||||
|         if m_yt is not None: | ||||
|             self.to_screen('Youtube video detected') | ||||
|             return self.url_result(m_yt.group(1), 'Youtube') | ||||
|  | ||||
|         m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.*?});', info_page) | ||||
|         if m_opts: | ||||
|             m_opts_url = re.search(r"url\s*:\s*'([^']+)", m_opts.group(1)) | ||||
|             if m_opts_url: | ||||
|                 opts_url = m_opts_url.group(1) | ||||
|                 if opts_url.startswith('//'): | ||||
|                     opts_url = 'http:' + opts_url | ||||
|                 return self.url_result(opts_url) | ||||
|  | ||||
|         data_json = self._search_regex(r'var vars = ({.*?});', info_page, 'vars') | ||||
|         data = json.loads(data_json) | ||||
|  | ||||
|   | ||||
							
								
								
									
										63
									
								
								youtube_dl/extractor/vodlocker.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								youtube_dl/extractor/vodlocker.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,63 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_parse, | ||||
|     compat_urllib_request, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class VodlockerIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:www\.)?vodlocker.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://vodlocker.com/e8wvyzz4sl42', | ||||
|         'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf', | ||||
|         'info_dict': { | ||||
|             'id': 'e8wvyzz4sl42', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Germany vs Brazil', | ||||
|             'thumbnail': 're:http://.*\.jpg', | ||||
|         }, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|  | ||||
|         fields = dict(re.findall(r'''(?x)<input\s+ | ||||
|             type="hidden"\s+ | ||||
|             name="([^"]+)"\s+ | ||||
|             (?:id="[^"]+"\s+)? | ||||
|             value="([^"]*)" | ||||
|             ''', webpage)) | ||||
|  | ||||
|         if fields['op'] == 'download1': | ||||
|             self._sleep(3, video_id)  # they do detect when requests happen too fast! | ||||
|             post = compat_urllib_parse.urlencode(fields) | ||||
|             req = compat_urllib_request.Request(url, post) | ||||
|             req.add_header('Content-type', 'application/x-www-form-urlencoded') | ||||
|             webpage = self._download_webpage( | ||||
|                 req, video_id, 'Downloading video page') | ||||
|  | ||||
|         title = self._search_regex( | ||||
|             r'id="file_title".*?>\s*(.*?)\s*<span', webpage, 'title') | ||||
|         thumbnail = self._search_regex( | ||||
|             r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail') | ||||
|         url = self._search_regex( | ||||
|             r'file:\s*"(http[^\"]+)",', webpage, 'file url') | ||||
|  | ||||
|         formats = [{ | ||||
|             'format_id': 'sd', | ||||
|             'url': url, | ||||
|         }] | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'thumbnail': thumbnail, | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -1,3 +1,4 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
| @@ -54,14 +55,14 @@ class WDRIE(InfoExtractor): | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.funkhauseuropa.de/av/audiogrenzenlosleckerbaklava101-audioplayer.html', | ||||
|             'md5': 'cfff440d4ee64114083ac44676df5d15', | ||||
|             'url': 'http://www.funkhauseuropa.de/av/audiosuepersongsoulbossanova100-audioplayer.html', | ||||
|             'md5': '24e83813e832badb0a8d7d1ef9ef0691', | ||||
|             'info_dict': { | ||||
|                 'id': 'mdb-363068', | ||||
|                 'id': 'mdb-463528', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': 'Grenzenlos lecker - Baklava', | ||||
|                 'title': 'Süpersong: Soul Bossa Nova', | ||||
|                 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', | ||||
|                 'upload_date': '20140311', | ||||
|                 'upload_date': '20140630', | ||||
|             }, | ||||
|         }, | ||||
|     ] | ||||
| @@ -127,9 +128,10 @@ class WDRMobileIE(InfoExtractor): | ||||
|         'info_dict': { | ||||
|             'title': '4283021', | ||||
|             'id': '421735', | ||||
|             'ext': 'mp4', | ||||
|             'age_limit': 0, | ||||
|         }, | ||||
|         '_skip': 'Will be depublicized shortly' | ||||
|         'skip': 'Problems with loading data.' | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
| @@ -139,6 +141,7 @@ class WDRMobileIE(InfoExtractor): | ||||
|             'title': mobj.group('title'), | ||||
|             'age_limit': int(mobj.group('age_limit')), | ||||
|             'url': url, | ||||
|             'ext': determine_ext(url), | ||||
|             'user_agent': 'mobile', | ||||
|         } | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import json | ||||
| import re | ||||
|  | ||||
| @@ -5,14 +7,16 @@ from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class WistiaIE(InfoExtractor): | ||||
|     _VALID_URL = r'^https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' | ||||
|     _VALID_URL = r'https?://(?:fast\.)?wistia\.net/embed/iframe/(?P<id>[a-z0-9]+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         u"url": u"http://fast.wistia.net/embed/iframe/sh7fpupwlt", | ||||
|         u"file": u"sh7fpupwlt.mov", | ||||
|         u"md5": u"cafeb56ec0c53c18c97405eecb3133df", | ||||
|         u"info_dict": { | ||||
|             u"title": u"cfh_resourceful_zdkh_final_1" | ||||
|         'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt', | ||||
|         'md5': 'cafeb56ec0c53c18c97405eecb3133df', | ||||
|         'info_dict': { | ||||
|             'id': 'sh7fpupwlt', | ||||
|             'ext': 'mov', | ||||
|             'title': 'Being Resourceful', | ||||
|             'duration': 117, | ||||
|         }, | ||||
|     } | ||||
|  | ||||
| @@ -22,7 +26,7 @@ class WistiaIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         data_json = self._html_search_regex( | ||||
|             r'Wistia.iframeInit\((.*?), {}\);', webpage, u'video data') | ||||
|             r'Wistia\.iframeInit\((.*?), {}\);', webpage, 'video data') | ||||
|  | ||||
|         data = json.loads(data_json) | ||||
|  | ||||
| @@ -54,4 +58,5 @@ class WistiaIE(InfoExtractor): | ||||
|             'title': data['name'], | ||||
|             'formats': formats, | ||||
|             'thumbnails': thumbnails, | ||||
|             'duration': data.get('duration'), | ||||
|         } | ||||
|   | ||||
| @@ -865,71 +865,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|     def _decrypt_signature(self, s, video_id, player_url, age_gate=False): | ||||
|         """Turn the encrypted s field into a working signature""" | ||||
|  | ||||
|         if player_url is not None: | ||||
|             if player_url.startswith(u'//'): | ||||
|                 player_url = u'https:' + player_url | ||||
|             try: | ||||
|                 player_id = (player_url, len(s)) | ||||
|                 if player_id not in self._player_cache: | ||||
|                     func = self._extract_signature_function( | ||||
|                         video_id, player_url, len(s) | ||||
|                     ) | ||||
|                     self._player_cache[player_id] = func | ||||
|                 func = self._player_cache[player_id] | ||||
|                 if self._downloader.params.get('youtube_print_sig_code'): | ||||
|                     self._print_sig_code(func, len(s)) | ||||
|                 return func(s) | ||||
|             except Exception: | ||||
|                 tb = traceback.format_exc() | ||||
|                 self._downloader.report_warning( | ||||
|                     u'Automatic signature extraction failed: ' + tb) | ||||
|         if player_url is None: | ||||
|             raise ExtractorError(u'Cannot decrypt signature without player_url') | ||||
|  | ||||
|             self._downloader.report_warning( | ||||
|                 u'Warning: Falling back to static signature algorithm') | ||||
|  | ||||
|         return self._static_decrypt_signature( | ||||
|             s, video_id, player_url, age_gate) | ||||
|  | ||||
|     def _static_decrypt_signature(self, s, video_id, player_url, age_gate): | ||||
|         if age_gate: | ||||
|             # The videos with age protection use another player, so the | ||||
|             # algorithms can be different. | ||||
|             if len(s) == 86: | ||||
|                 return s[2:63] + s[82] + s[64:82] + s[63] | ||||
|  | ||||
|         if len(s) == 93: | ||||
|             return s[86:29:-1] + s[88] + s[28:5:-1] | ||||
|         elif len(s) == 92: | ||||
|             return s[25] + s[3:25] + s[0] + s[26:42] + s[79] + s[43:79] + s[91] + s[80:83] | ||||
|         elif len(s) == 91: | ||||
|             return s[84:27:-1] + s[86] + s[26:5:-1] | ||||
|         elif len(s) == 90: | ||||
|             return s[25] + s[3:25] + s[2] + s[26:40] + s[77] + s[41:77] + s[89] + s[78:81] | ||||
|         elif len(s) == 89: | ||||
|             return s[84:78:-1] + s[87] + s[77:60:-1] + s[0] + s[59:3:-1] | ||||
|         elif len(s) == 88: | ||||
|             return s[7:28] + s[87] + s[29:45] + s[55] + s[46:55] + s[2] + s[56:87] + s[28] | ||||
|         elif len(s) == 87: | ||||
|             return s[6:27] + s[4] + s[28:39] + s[27] + s[40:59] + s[2] + s[60:] | ||||
|         elif len(s) == 86: | ||||
|             return s[80:72:-1] + s[16] + s[71:39:-1] + s[72] + s[38:16:-1] + s[82] + s[15::-1] | ||||
|         elif len(s) == 85: | ||||
|             return s[3:11] + s[0] + s[12:55] + s[84] + s[56:84] | ||||
|         elif len(s) == 84: | ||||
|             return s[78:70:-1] + s[14] + s[69:37:-1] + s[70] + s[36:14:-1] + s[80] + s[:14][::-1] | ||||
|         elif len(s) == 83: | ||||
|             return s[80:63:-1] + s[0] + s[62:0:-1] + s[63] | ||||
|         elif len(s) == 82: | ||||
|             return s[80:37:-1] + s[7] + s[36:7:-1] + s[0] + s[6:0:-1] + s[37] | ||||
|         elif len(s) == 81: | ||||
|             return s[56] + s[79:56:-1] + s[41] + s[55:41:-1] + s[80] + s[40:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] | ||||
|         elif len(s) == 80: | ||||
|             return s[1:19] + s[0] + s[20:68] + s[19] + s[69:80] | ||||
|         elif len(s) == 79: | ||||
|             return s[54] + s[77:54:-1] + s[39] + s[53:39:-1] + s[78] + s[38:34:-1] + s[0] + s[33:29:-1] + s[34] + s[28:9:-1] + s[29] + s[8:0:-1] + s[9] | ||||
|  | ||||
|         else: | ||||
|             raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) | ||||
|         if player_url.startswith(u'//'): | ||||
|             player_url = u'https:' + player_url | ||||
|         try: | ||||
|             player_id = (player_url, len(s)) | ||||
|             if player_id not in self._player_cache: | ||||
|                 func = self._extract_signature_function( | ||||
|                     video_id, player_url, len(s) | ||||
|                 ) | ||||
|                 self._player_cache[player_id] = func | ||||
|             func = self._player_cache[player_id] | ||||
|             if self._downloader.params.get('youtube_print_sig_code'): | ||||
|                 self._print_sig_code(func, len(s)) | ||||
|             return func(s) | ||||
|         except Exception as e: | ||||
|             tb = traceback.format_exc() | ||||
|             raise ExtractorError( | ||||
|                 u'Automatic signature extraction failed: ' + tb, cause=e) | ||||
|  | ||||
|     def _get_available_subtitles(self, video_id, webpage): | ||||
|         try: | ||||
| @@ -1698,14 +1653,14 @@ class YoutubeSearchURLIE(InfoExtractor): | ||||
|  | ||||
|         webpage = self._download_webpage(url, query) | ||||
|         result_code = self._search_regex( | ||||
|             r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML') | ||||
|             r'(?s)<ol class="item-section"(.*?)</ol>', webpage, u'result HTML') | ||||
|  | ||||
|         part_codes = re.findall( | ||||
|             r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code) | ||||
|         entries = [] | ||||
|         for part_code in part_codes: | ||||
|             part_title = self._html_search_regex( | ||||
|                 r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) | ||||
|                 [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False) | ||||
|             part_url_snippet = self._html_search_regex( | ||||
|                 r'(?s)href="([^"]+)"', part_code, 'item URL') | ||||
|             part_url = compat_urlparse.urljoin( | ||||
| @@ -1825,10 +1780,21 @@ class YoutubeTruncatedURLIE(InfoExtractor): | ||||
|     IE_NAME = 'youtube:truncated_url' | ||||
|     IE_DESC = False  # Do not list | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$| | ||||
|         (?:https?://)?[^/]+/watch\?(?: | ||||
|             feature=[a-z_]+| | ||||
|             annotation_id=annotation_[^&]+ | ||||
|         )?$| | ||||
|         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ | ||||
|     ''' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|         'url': 'http://www.youtube.com/watch?annotation_id=annotation_3951667041', | ||||
|         'only_matching': True, | ||||
|     }, { | ||||
|         'url': 'http://www.youtube.com/watch?', | ||||
|         'only_matching': True, | ||||
|     }] | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         raise ExtractorError( | ||||
|             u'Did you forget to quote the URL? Remember that & is a meta ' | ||||
|   | ||||
| @@ -59,7 +59,7 @@ class JSInterpreter(object): | ||||
|             if member == 'split("")': | ||||
|                 return list(val) | ||||
|             if member == 'join("")': | ||||
|                 return u''.join(val) | ||||
|                 return ''.join(val) | ||||
|             if member == 'length': | ||||
|                 return len(val) | ||||
|             if member == 'reverse()': | ||||
| @@ -99,7 +99,7 @@ class JSInterpreter(object): | ||||
|  | ||||
|     def extract_function(self, funcname): | ||||
|         func_m = re.search( | ||||
|             (r'(?:function %s|%s\s*=\s*function)' % ( | ||||
|             (r'(?:function %s|[{;]%s\s*=\s*function)' % ( | ||||
|                 re.escape(funcname), re.escape(funcname))) + | ||||
|             r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', | ||||
|             self.code) | ||||
|   | ||||
| @@ -775,7 +775,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler): | ||||
|     https_response = http_response | ||||
|  | ||||
|  | ||||
| def parse_iso8601(date_str): | ||||
| def parse_iso8601(date_str, delimiter='T'): | ||||
|     """ Return a UNIX timestamp from the given date """ | ||||
|  | ||||
|     if date_str is None: | ||||
| @@ -795,8 +795,8 @@ def parse_iso8601(date_str): | ||||
|             timezone = datetime.timedelta( | ||||
|                 hours=sign * int(m.group('hours')), | ||||
|                 minutes=sign * int(m.group('minutes'))) | ||||
|  | ||||
|     dt = datetime.datetime.strptime(date_str, '%Y-%m-%dT%H:%M:%S') - timezone | ||||
|     date_format =  '%Y-%m-%d{0}%H:%M:%S'.format(delimiter) | ||||
|     dt = datetime.datetime.strptime(date_str, date_format) - timezone | ||||
|     return calendar.timegm(dt.timetuple()) | ||||
|  | ||||
|  | ||||
| @@ -816,6 +816,9 @@ def unified_strdate(date_str): | ||||
|         '%d %b %Y', | ||||
|         '%B %d %Y', | ||||
|         '%b %d %Y', | ||||
|         '%b %dst %Y %I:%M%p', | ||||
|         '%b %dnd %Y %I:%M%p', | ||||
|         '%b %dth %Y %I:%M%p', | ||||
|         '%Y-%m-%d', | ||||
|         '%d.%m.%Y', | ||||
|         '%d/%m/%Y', | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.06.24' | ||||
| __version__ = '2014.07.11.3' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user