mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Compare commits
	
		
			40 Commits
		
	
	
		
			2014.02.13
			...
			2014.02.19
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | c377110539 | ||
|  | a9c7198a0b | ||
|  | f6f01ea17b | ||
|  | f2d0fc6823 | ||
|  | f7000f3a1b | ||
|  | c7f0177fa7 | ||
|  | 09c4d50944 | ||
|  | 2eb5d315d4 | ||
|  | ad5976b4d9 | ||
|  | a0dfcdce5e | ||
|  | 96d1637082 | ||
|  | 960f317171 | ||
|  | 4412ca751d | ||
|  | cbffec0c95 | ||
|  | 0cea52cc18 | ||
|  | 6d784e87f4 | ||
|  | ae6cae78f1 | ||
|  | 0f99566c01 | ||
|  | 2db806b4aa | ||
|  | 3f32c0ba4c | ||
|  | 541cb26c0d | ||
|  | 5544e038ab | ||
|  | 9032dc28a6 | ||
|  | 03635e2a71 | ||
|  | 00cf938aa5 | ||
|  | a5f707c495 | ||
|  | 1824b48169 | ||
|  | 07ad22b8af | ||
|  | b53466e168 | ||
|  | 6a7a389679 | ||
|  | 4edff78531 | ||
|  | 99043c2ea5 | ||
|  | e68abba910 | ||
|  | 3165dc4d9f | ||
|  | 66c43a53e4 | ||
|  | 463b334616 | ||
|  | b71dbc57c4 | ||
|  | 72ca1d7f45 | ||
|  | cf1eb45153 | ||
|  | a97bcd80ba | 
| @@ -281,10 +281,12 @@ Videos can be filtered by their upload date using the options `--date`, `--dateb | ||||
|   | ||||
| Examples: | ||||
|  | ||||
|   $ # Download only the videos uploaded in the last 6 months | ||||
|     # Download only the videos uploaded in the last 6 months | ||||
|     $ youtube-dl --dateafter now-6months | ||||
|   $ # Download only the videos uploaded on January 1, 1970 | ||||
|  | ||||
|     # Download only the videos uploaded on January 1, 1970 | ||||
|     $ youtube-dl --date 19700101 | ||||
|  | ||||
|     $ # will only download the videos uploaded in the 200x decade | ||||
|     $ youtube-dl --dateafter 20000101 --datebefore 20091231 | ||||
|  | ||||
|   | ||||
| @@ -68,6 +68,9 @@ class TestAllURLsMatching(unittest.TestCase): | ||||
|     def test_youtube_show_matching(self): | ||||
|         self.assertMatch('http://www.youtube.com/show/airdisasters', ['youtube:show']) | ||||
|  | ||||
|     def test_youtube_truncated(self): | ||||
|         self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) | ||||
|  | ||||
|     def test_justin_tv_channelid_matching(self): | ||||
|         self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv")) | ||||
|         self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) | ||||
|   | ||||
| @@ -55,10 +55,10 @@ class TestPlaylists(unittest.TestCase): | ||||
|     def test_dailymotion_user(self): | ||||
|         dl = FakeYDL() | ||||
|         ie = DailymotionUserIE(dl) | ||||
|         result = ie.extract('http://www.dailymotion.com/user/generation-quoi/') | ||||
|         result = ie.extract('https://www.dailymotion.com/user/nqtv') | ||||
|         self.assertIsPlaylist(result) | ||||
|         self.assertEqual(result['title'], 'Génération Quoi') | ||||
|         self.assertTrue(len(result['entries']) >= 26) | ||||
|         self.assertEqual(result['title'], 'Rémi Gaillard') | ||||
|         self.assertTrue(len(result['entries']) >= 100) | ||||
|  | ||||
|     def test_vimeo_channel(self): | ||||
|         dl = FakeYDL() | ||||
|   | ||||
| @@ -25,6 +25,7 @@ from youtube_dl.utils import ( | ||||
|     shell_quote, | ||||
|     smuggle_url, | ||||
|     str_to_int, | ||||
|     struct_unpack, | ||||
|     timeconvert, | ||||
|     unescapeHTML, | ||||
|     unified_strdate, | ||||
| @@ -201,7 +202,16 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual(parse_duration('1'), 1) | ||||
|         self.assertEqual(parse_duration('1337:12'), 80232) | ||||
|         self.assertEqual(parse_duration('9:12:43'), 33163) | ||||
|         self.assertEqual(parse_duration('12:00'), 720) | ||||
|         self.assertEqual(parse_duration('00:01:01'), 61) | ||||
|         self.assertEqual(parse_duration('x:y'), None) | ||||
|         self.assertEqual(parse_duration('3h11m53s'), 11513) | ||||
|         self.assertEqual(parse_duration('62m45s'), 3765) | ||||
|         self.assertEqual(parse_duration('6m59s'), 419) | ||||
|         self.assertEqual(parse_duration('49s'), 49) | ||||
|         self.assertEqual(parse_duration('0h0m0s'), 0) | ||||
|         self.assertEqual(parse_duration('0m0s'), 0) | ||||
|         self.assertEqual(parse_duration('0s'), 0) | ||||
|  | ||||
|     def test_fix_xml_ampersands(self): | ||||
|         self.assertEqual( | ||||
| @@ -237,5 +247,8 @@ class TestUtil(unittest.TestCase): | ||||
|         testPL(5, 2, (2, 99), [2, 3, 4]) | ||||
|         testPL(5, 2, (20, 99), []) | ||||
|  | ||||
|     def test_struct_unpack(self): | ||||
|         self.assertEqual(struct_unpack(u'!B', b'\x00'), (0,)) | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -41,7 +41,11 @@ __authors__  = ( | ||||
|     'Chris Gahan', | ||||
|     'Saimadhav Heblikar', | ||||
|     'Mike Col', | ||||
|     'Oleg Prutz', | ||||
|     'pulpe', | ||||
|     'Andreas Schmitz', | ||||
|     'Michael Kaiser', | ||||
|     'Niklas Laxström', | ||||
| ) | ||||
|  | ||||
| __license__ = 'Public Domain' | ||||
|   | ||||
| @@ -5,6 +5,7 @@ from .hls import HlsFD | ||||
| from .http import HttpFD | ||||
| from .mplayer import MplayerFD | ||||
| from .rtmp import RtmpFD | ||||
| from .f4m import F4mFD | ||||
|  | ||||
| from ..utils import ( | ||||
|     determine_ext, | ||||
| @@ -22,5 +23,7 @@ def get_suitable_downloader(info_dict): | ||||
|         return HlsFD | ||||
|     if url.startswith('mms') or url.startswith('rtsp'): | ||||
|         return MplayerFD | ||||
|     if determine_ext(url) == 'f4m': | ||||
|         return F4mFD | ||||
|     else: | ||||
|         return HttpFD | ||||
|   | ||||
							
								
								
									
										315
									
								
								youtube_dl/downloader/f4m.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										315
									
								
								youtube_dl/downloader/f4m.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,315 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import base64 | ||||
| import io | ||||
| import itertools | ||||
| import os | ||||
| import time | ||||
| import xml.etree.ElementTree as etree | ||||
|  | ||||
| from .common import FileDownloader | ||||
| from .http import HttpFD | ||||
| from ..utils import ( | ||||
|     struct_pack, | ||||
|     struct_unpack, | ||||
|     compat_urllib_request, | ||||
|     compat_urlparse, | ||||
|     format_bytes, | ||||
|     encodeFilename, | ||||
|     sanitize_open, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class FlvReader(io.BytesIO): | ||||
|     """ | ||||
|     Reader for Flv files | ||||
|     The file format is documented in https://www.adobe.com/devnet/f4v.html | ||||
|     """ | ||||
|  | ||||
|     # Utility functions for reading numbers and strings | ||||
|     def read_unsigned_long_long(self): | ||||
|         return struct_unpack('!Q', self.read(8))[0] | ||||
|  | ||||
|     def read_unsigned_int(self): | ||||
|         return struct_unpack('!I', self.read(4))[0] | ||||
|  | ||||
|     def read_unsigned_char(self): | ||||
|         return struct_unpack('!B', self.read(1))[0] | ||||
|  | ||||
|     def read_string(self): | ||||
|         res = b'' | ||||
|         while True: | ||||
|             char = self.read(1) | ||||
|             if char == b'\x00': | ||||
|                 break | ||||
|             res += char | ||||
|         return res | ||||
|  | ||||
|     def read_box_info(self): | ||||
|         """ | ||||
|         Read a box and return the info as a tuple: (box_size, box_type, box_data) | ||||
|         """ | ||||
|         real_size = size = self.read_unsigned_int() | ||||
|         box_type = self.read(4) | ||||
|         header_end = 8 | ||||
|         if size == 1: | ||||
|             real_size = self.read_unsigned_long_long() | ||||
|             header_end = 16 | ||||
|         return real_size, box_type, self.read(real_size-header_end) | ||||
|  | ||||
|     def read_asrt(self): | ||||
|         # version | ||||
|         self.read_unsigned_char() | ||||
|         # flags | ||||
|         self.read(3) | ||||
|         quality_entry_count = self.read_unsigned_char() | ||||
|         # QualityEntryCount | ||||
|         for i in range(quality_entry_count): | ||||
|             self.read_string() | ||||
|  | ||||
|         segment_run_count = self.read_unsigned_int() | ||||
|         segments = [] | ||||
|         for i in range(segment_run_count): | ||||
|             first_segment = self.read_unsigned_int() | ||||
|             fragments_per_segment = self.read_unsigned_int() | ||||
|             segments.append((first_segment, fragments_per_segment)) | ||||
|  | ||||
|         return { | ||||
|             'segment_run': segments, | ||||
|         } | ||||
|  | ||||
|     def read_afrt(self): | ||||
|         # version | ||||
|         self.read_unsigned_char() | ||||
|         # flags | ||||
|         self.read(3) | ||||
|         # time scale | ||||
|         self.read_unsigned_int() | ||||
|  | ||||
|         quality_entry_count = self.read_unsigned_char() | ||||
|         # QualitySegmentUrlModifiers | ||||
|         for i in range(quality_entry_count): | ||||
|             self.read_string() | ||||
|  | ||||
|         fragments_count = self.read_unsigned_int() | ||||
|         fragments = [] | ||||
|         for i in range(fragments_count): | ||||
|             first = self.read_unsigned_int() | ||||
|             first_ts = self.read_unsigned_long_long() | ||||
|             duration = self.read_unsigned_int() | ||||
|             if duration == 0: | ||||
|                 discontinuity_indicator = self.read_unsigned_char() | ||||
|             else: | ||||
|                 discontinuity_indicator = None | ||||
|             fragments.append({ | ||||
|                 'first': first, | ||||
|                 'ts': first_ts, | ||||
|                 'duration': duration, | ||||
|                 'discontinuity_indicator': discontinuity_indicator, | ||||
|             }) | ||||
|  | ||||
|         return { | ||||
|             'fragments': fragments, | ||||
|         } | ||||
|  | ||||
|     def read_abst(self): | ||||
|         # version | ||||
|         self.read_unsigned_char() | ||||
|         # flags | ||||
|         self.read(3) | ||||
|         # BootstrapinfoVersion | ||||
|         bootstrap_info_version = self.read_unsigned_int() | ||||
|         # Profile,Live,Update,Reserved | ||||
|         self.read(1) | ||||
|         # time scale | ||||
|         self.read_unsigned_int() | ||||
|         # CurrentMediaTime | ||||
|         self.read_unsigned_long_long() | ||||
|         # SmpteTimeCodeOffset | ||||
|         self.read_unsigned_long_long() | ||||
|         # MovieIdentifier | ||||
|         movie_identifier = self.read_string() | ||||
|         server_count = self.read_unsigned_char() | ||||
|         # ServerEntryTable | ||||
|         for i in range(server_count): | ||||
|             self.read_string() | ||||
|         quality_count = self.read_unsigned_char() | ||||
|         # QualityEntryTable | ||||
|         for i in range(server_count): | ||||
|             self.read_string() | ||||
|         # DrmData | ||||
|         self.read_string() | ||||
|         # MetaData | ||||
|         self.read_string() | ||||
|  | ||||
|         segments_count = self.read_unsigned_char() | ||||
|         segments = [] | ||||
|         for i in range(segments_count): | ||||
|             box_size, box_type, box_data = self.read_box_info() | ||||
|             assert box_type == b'asrt' | ||||
|             segment = FlvReader(box_data).read_asrt() | ||||
|             segments.append(segment) | ||||
|         fragments_run_count = self.read_unsigned_char() | ||||
|         fragments = [] | ||||
|         for i in range(fragments_run_count): | ||||
|             box_size, box_type, box_data = self.read_box_info() | ||||
|             assert box_type == b'afrt' | ||||
|             fragments.append(FlvReader(box_data).read_afrt()) | ||||
|  | ||||
|         return { | ||||
|             'segments': segments, | ||||
|             'fragments': fragments, | ||||
|         } | ||||
|  | ||||
|     def read_bootstrap_info(self): | ||||
|         total_size, box_type, box_data = self.read_box_info() | ||||
|         assert box_type == b'abst' | ||||
|         return FlvReader(box_data).read_abst() | ||||
|  | ||||
|  | ||||
| def read_bootstrap_info(bootstrap_bytes): | ||||
|     return FlvReader(bootstrap_bytes).read_bootstrap_info() | ||||
|  | ||||
|  | ||||
| def build_fragments_list(boot_info): | ||||
|     """ Return a list of (segment, fragment) for each fragment in the video """ | ||||
|     res = [] | ||||
|     segment_run_table = boot_info['segments'][0] | ||||
|     # I've only found videos with one segment | ||||
|     segment_run_entry = segment_run_table['segment_run'][0] | ||||
|     n_frags = segment_run_entry[1] | ||||
|     fragment_run_entry_table = boot_info['fragments'][0]['fragments'] | ||||
|     first_frag_number = fragment_run_entry_table[0]['first'] | ||||
|     for (i, frag_number) in zip(range(1, n_frags+1), itertools.count(first_frag_number)): | ||||
|         res.append((1, frag_number)) | ||||
|     return res | ||||
|  | ||||
|  | ||||
| def write_flv_header(stream, metadata): | ||||
|     """Writes the FLV header and the metadata to stream""" | ||||
|     # FLV header | ||||
|     stream.write(b'FLV\x01') | ||||
|     stream.write(b'\x05') | ||||
|     stream.write(b'\x00\x00\x00\x09') | ||||
|     # FLV File body | ||||
|     stream.write(b'\x00\x00\x00\x00') | ||||
|     # FLVTAG | ||||
|     # Script data | ||||
|     stream.write(b'\x12') | ||||
|     # Size of the metadata with 3 bytes | ||||
|     stream.write(struct_pack('!L', len(metadata))[1:]) | ||||
|     stream.write(b'\x00\x00\x00\x00\x00\x00\x00') | ||||
|     stream.write(metadata) | ||||
|     # Magic numbers extracted from the output files produced by AdobeHDS.php | ||||
|     #(https://github.com/K-S-V/Scripts) | ||||
|     stream.write(b'\x00\x00\x01\x73') | ||||
|  | ||||
|  | ||||
| def _add_ns(prop): | ||||
|     return '{http://ns.adobe.com/f4m/1.0}%s' % prop | ||||
|  | ||||
|  | ||||
| class HttpQuietDownloader(HttpFD): | ||||
|     def to_screen(self, *args, **kargs): | ||||
|         pass | ||||
|  | ||||
|  | ||||
| class F4mFD(FileDownloader): | ||||
|     """ | ||||
|     A downloader for f4m manifests or AdobeHDS. | ||||
|     """ | ||||
|  | ||||
|     def real_download(self, filename, info_dict): | ||||
|         man_url = info_dict['url'] | ||||
|         self.to_screen('[download] Downloading f4m manifest') | ||||
|         manifest = self.ydl.urlopen(man_url).read() | ||||
|         self.report_destination(filename) | ||||
|         http_dl = HttpQuietDownloader(self.ydl, | ||||
|             { | ||||
|                 'continuedl': True, | ||||
|                 'quiet': True, | ||||
|                 'noprogress': True, | ||||
|                 'test': self.params.get('test', False), | ||||
|             }) | ||||
|  | ||||
|         doc = etree.fromstring(manifest) | ||||
|         formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))] | ||||
|         formats = sorted(formats, key=lambda f: f[0]) | ||||
|         rate, media = formats[-1] | ||||
|         base_url = compat_urlparse.urljoin(man_url, media.attrib['url']) | ||||
|         bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text) | ||||
|         metadata = base64.b64decode(media.find(_add_ns('metadata')).text) | ||||
|         boot_info = read_bootstrap_info(bootstrap) | ||||
|         fragments_list = build_fragments_list(boot_info) | ||||
|         if self.params.get('test', False): | ||||
|             # We only download the first fragment | ||||
|             fragments_list = fragments_list[:1] | ||||
|         total_frags = len(fragments_list) | ||||
|  | ||||
|         tmpfilename = self.temp_name(filename) | ||||
|         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') | ||||
|         write_flv_header(dest_stream, metadata) | ||||
|  | ||||
|         # This dict stores the download progress, it's updated by the progress | ||||
|         # hook | ||||
|         state = { | ||||
|             'downloaded_bytes': 0, | ||||
|             'frag_counter': 0, | ||||
|         } | ||||
|         start = time.time() | ||||
|  | ||||
|         def frag_progress_hook(status): | ||||
|             frag_total_bytes = status.get('total_bytes', 0) | ||||
|             estimated_size = (state['downloaded_bytes'] + | ||||
|                 (total_frags - state['frag_counter']) * frag_total_bytes) | ||||
|             if status['status'] == 'finished': | ||||
|                 state['downloaded_bytes'] += frag_total_bytes | ||||
|                 state['frag_counter'] += 1 | ||||
|                 progress = self.calc_percent(state['frag_counter'], total_frags) | ||||
|                 byte_counter = state['downloaded_bytes'] | ||||
|             else: | ||||
|                 frag_downloaded_bytes = status['downloaded_bytes'] | ||||
|                 byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes | ||||
|                 frag_progress = self.calc_percent(frag_downloaded_bytes, | ||||
|                     frag_total_bytes) | ||||
|                 progress = self.calc_percent(state['frag_counter'], total_frags) | ||||
|                 progress += frag_progress / float(total_frags) | ||||
|  | ||||
|             eta = self.calc_eta(start, time.time(), estimated_size, byte_counter) | ||||
|             self.report_progress(progress, format_bytes(estimated_size), | ||||
|                 status.get('speed'), eta) | ||||
|         http_dl.add_progress_hook(frag_progress_hook) | ||||
|  | ||||
|         frags_filenames = [] | ||||
|         for (seg_i, frag_i) in fragments_list: | ||||
|             name = 'Seg%d-Frag%d' % (seg_i, frag_i) | ||||
|             url = base_url + name | ||||
|             frag_filename = '%s-%s' % (tmpfilename, name) | ||||
|             success = http_dl.download(frag_filename, {'url': url}) | ||||
|             if not success: | ||||
|                 return False | ||||
|             with open(frag_filename, 'rb') as down: | ||||
|                 down_data = down.read() | ||||
|                 reader = FlvReader(down_data) | ||||
|                 while True: | ||||
|                     _, box_type, box_data = reader.read_box_info() | ||||
|                     if box_type == b'mdat': | ||||
|                         dest_stream.write(box_data) | ||||
|                         break | ||||
|             frags_filenames.append(frag_filename) | ||||
|  | ||||
|         self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start) | ||||
|  | ||||
|         self.try_rename(tmpfilename, filename) | ||||
|         for frag_file in frags_filenames: | ||||
|             os.remove(frag_file) | ||||
|  | ||||
|         fsize = os.path.getsize(encodeFilename(filename)) | ||||
|         self._hook_progress({ | ||||
|             'downloaded_bytes': fsize, | ||||
|             'total_bytes': fsize, | ||||
|             'filename': filename, | ||||
|             'status': 'finished', | ||||
|         }) | ||||
|  | ||||
|         return True | ||||
| @@ -73,6 +73,7 @@ from .fktv import ( | ||||
|     FKTVPosteckeIE, | ||||
| ) | ||||
| from .flickr import FlickrIE | ||||
| from .fourtube import FourTubeIE | ||||
| from .franceinter import FranceInterIE | ||||
| from .francetv import ( | ||||
|     PluzzIE, | ||||
| @@ -91,6 +92,7 @@ from .generic import GenericIE | ||||
| from .googleplus import GooglePlusIE | ||||
| from .googlesearch import GoogleSearchIE | ||||
| from .hark import HarkIE | ||||
| from .helsinki import HelsinkiIE | ||||
| from .hotnewhiphop import HotNewHipHopIE | ||||
| from .howcast import HowcastIE | ||||
| from .huffpost import HuffPostIE | ||||
| @@ -209,10 +211,12 @@ from .statigram import StatigramIE | ||||
| from .steam import SteamIE | ||||
| from .streamcloud import StreamcloudIE | ||||
| from .streamcz import StreamCZIE | ||||
| from .syfy import SyfyIE | ||||
| from .sztvhu import SztvHuIE | ||||
| from .teamcoco import TeamcocoIE | ||||
| from .techtalks import TechTalksIE | ||||
| from .ted import TEDIE | ||||
| from .testurl import TestURLIE | ||||
| from .tf1 import TF1IE | ||||
| from .theplatform import ThePlatformIE | ||||
| from .thisav import ThisAVIE | ||||
|   | ||||
| @@ -13,13 +13,13 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|  | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://www.bbc.co.uk/programmes/p01q7wz1', | ||||
|             'url': 'http://www.bbc.co.uk/programmes/b039g8p7', | ||||
|             'info_dict': { | ||||
|                 'id': 'p01q7wz4', | ||||
|                 'id': 'b039d07m', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'Friction: Blu Mar Ten guest mix: Blu Mar Ten - Guest Mix', | ||||
|                 'description': 'Blu Mar Ten deliver a Guest Mix for Friction.', | ||||
|                 'duration': 1936, | ||||
|                 'title': 'Kaleidoscope: Leonard Cohen', | ||||
|                 'description': 'md5:db4755d7a665ae72343779f7dacb402c', | ||||
|                 'duration': 1740, | ||||
|             }, | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
| @@ -38,7 +38,8 @@ class BBCCoUkIE(SubtitlesInfoExtractor): | ||||
|             'params': { | ||||
|                 # rtmp download | ||||
|                 'skip_download': True, | ||||
|             } | ||||
|             }, | ||||
|             'skip': 'Episode is no longer available on BBC iPlayer Radio', | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/', | ||||
|   | ||||
							
								
								
									
										95
									
								
								youtube_dl/extractor/fourtube.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										95
									
								
								youtube_dl/extractor/fourtube.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,95 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     compat_urllib_request, | ||||
|     unified_strdate, | ||||
|     str_to_int, | ||||
|     parse_duration, | ||||
| ) | ||||
| from youtube_dl.utils import clean_html | ||||
|  | ||||
|  | ||||
| class FourTubeIE(InfoExtractor): | ||||
|     IE_NAME = '4tube' | ||||
|     _VALID_URL = r'https?://(?:www\.)?4tube\.com/videos/(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.4tube.com/videos/209733/hot-babe-holly-michaels-gets-her-ass-stuffed-by-black', | ||||
|         'md5': '6516c8ac63b03de06bc8eac14362db4f', | ||||
|         'info_dict': { | ||||
|             'id': '209733', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Hot Babe Holly Michaels gets her ass stuffed by black', | ||||
|             'uploader': 'WCP Club', | ||||
|             'uploader_id': 'wcp-club', | ||||
|             'upload_date': '20131031', | ||||
|             'duration': 583, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|  | ||||
|         video_id = mobj.group('id') | ||||
|         webpage_url = 'http://www.4tube.com/videos/' + video_id | ||||
|         webpage = self._download_webpage(webpage_url, video_id) | ||||
|  | ||||
|         self.report_extraction(video_id) | ||||
|  | ||||
|         playlist_json = self._html_search_regex(r'var playerConfigPlaylist\s+=\s+([^;]+)', webpage, 'Playlist') | ||||
|         media_id = self._search_regex(r'idMedia:\s*(\d+)', playlist_json, 'Media Id') | ||||
|         sources = self._search_regex(r'sources:\s*\[([^\]]*)\]', playlist_json, 'Sources').split(',') | ||||
|         title = self._search_regex(r'title:\s*"([^"]*)', playlist_json, 'Title') | ||||
|         thumbnail_url = self._search_regex(r'image:\s*"([^"]*)', playlist_json, 'Thumbnail', fatal=False) | ||||
|  | ||||
|         uploader_str = self._search_regex(r'<span>Uploaded by</span>(.*?)<span>', webpage, 'uploader', fatal=False) | ||||
|         mobj = re.search(r'<a href="/sites/(?P<id>[^"]+)"><strong>(?P<name>[^<]+)</strong></a>', uploader_str) | ||||
|         (uploader, uploader_id) = (mobj.group('name'), mobj.group('id')) if mobj else (clean_html(uploader_str), None) | ||||
|  | ||||
|         upload_date = None | ||||
|         view_count = None | ||||
|         duration = None | ||||
|         description = self._html_search_meta('description', webpage, 'description') | ||||
|         if description: | ||||
|             upload_date = self._search_regex(r'Published Date: (\d{2} [a-zA-Z]{3} \d{4})', description, 'upload date', | ||||
|                 fatal=False) | ||||
|             if upload_date: | ||||
|                 upload_date = unified_strdate(upload_date) | ||||
|             view_count = self._search_regex(r'Views: ([\d,\.]+)', description, 'view count', fatal=False) | ||||
|             if view_count: | ||||
|                 view_count = str_to_int(view_count) | ||||
|             duration = parse_duration(self._search_regex(r'Length: (\d+m\d+s)', description, 'duration', fatal=False)) | ||||
|  | ||||
|         token_url = "http://tkn.4tube.com/{0}/desktop/{1}".format(media_id, "+".join(sources)) | ||||
|         headers = { | ||||
|                 b'Content-Type': b'application/x-www-form-urlencoded', | ||||
|                 b'Origin': b'http://www.4tube.com', | ||||
|                 } | ||||
|         token_req = compat_urllib_request.Request(token_url, b'{}', headers) | ||||
|         tokens = self._download_json(token_req, video_id) | ||||
|  | ||||
|         formats = [{ | ||||
|             'url': tokens[format]['token'], | ||||
|             'format_id': format + 'p', | ||||
|             'resolution': format + 'p', | ||||
|             'quality': int(format), | ||||
|             } for format in sources] | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': title, | ||||
|             'formats': formats, | ||||
|             'thumbnail': thumbnail_url, | ||||
|             'uploader': uploader, | ||||
|             'uploader_id': uploader_id, | ||||
|             'upload_date': upload_date, | ||||
|             'view_count': view_count, | ||||
|             'duration': duration, | ||||
|             'age_limit': 18, | ||||
|             'webpage_url': webpage_url, | ||||
|         } | ||||
| @@ -184,6 +184,7 @@ class GenerationQuoiIE(InfoExtractor): | ||||
|             # It uses Dailymotion | ||||
|             'skip_download': True, | ||||
|         }, | ||||
|         'skip': 'Only available from France', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
							
								
								
									
										62
									
								
								youtube_dl/extractor/helsinki.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								youtube_dl/extractor/helsinki.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class HelsinkiIE(InfoExtractor): | ||||
|     IE_DESC = 'helsinki.fi' | ||||
|     _VALID_URL = r'https?://video\.helsinki\.fi/Arkisto/flash\.php\?id=(?P<id>\d+)' | ||||
|     _TEST = { | ||||
|         'url': 'http://video.helsinki.fi/Arkisto/flash.php?id=20258', | ||||
|         'info_dict': { | ||||
|             'id': '20258', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Tietotekniikkafoorumi-iltapäivä', | ||||
|             'description': 'md5:f5c904224d43c133225130fe156a5ee0', | ||||
|         }, | ||||
|         'params': { | ||||
|             'skip_download': True,  # RTMP | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         formats = [] | ||||
|  | ||||
|         mobj = re.search(r'file=((\w+):[^&]+)', webpage) | ||||
|         if mobj: | ||||
|             formats.append({ | ||||
|                 'ext': mobj.group(2), | ||||
|                 'play_path': mobj.group(1), | ||||
|                 'url': 'rtmp://flashvideo.it.helsinki.fi/vod/', | ||||
|                 'player_url': 'http://video.helsinki.fi/player.swf', | ||||
|                 'format_note': 'sd', | ||||
|                 'quality': 0, | ||||
|             }) | ||||
|  | ||||
|         mobj = re.search(r'hd\.file=((\w+):[^&]+)', webpage) | ||||
|         if mobj: | ||||
|             formats.append({ | ||||
|                 'ext': mobj.group(2), | ||||
|                 'play_path': mobj.group(1), | ||||
|                 'url': 'rtmp://flashvideo.it.helsinki.fi/vod/', | ||||
|                 'player_url': 'http://video.helsinki.fi/player.swf', | ||||
|                 'format_note': 'hd', | ||||
|                 'quality': 1, | ||||
|             }) | ||||
|  | ||||
|         self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
|             'id': video_id, | ||||
|             'title': self._og_search_title(webpage).replace('Video: ', ''), | ||||
|             'description': self._og_search_description(webpage), | ||||
|             'thumbnail': self._og_search_thumbnail(webpage), | ||||
|             'formats': formats, | ||||
|         } | ||||
| @@ -26,15 +26,15 @@ class NDRIE(InfoExtractor): | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://www.ndr.de/903/audio191719.html', | ||||
|             'md5': '41ed601768534dd18a9ae34d84798129', | ||||
|             'url': 'http://www.ndr.de/info/audio51535.html', | ||||
|             'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', | ||||
|             'note': 'Audio file', | ||||
|             'info_dict': { | ||||
|                 'id': '191719', | ||||
|                 'id': '51535', | ||||
|                 'ext': 'mp3', | ||||
|                 'title': '"Es war schockierend"', | ||||
|                 'description': 'md5:ed7ff8364793545021a6355b97e95f10', | ||||
|                 'duration': 112, | ||||
|                 'title': 'La Valette entgeht der Hinrichtung', | ||||
|                 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', | ||||
|                 'duration': 884, | ||||
|             } | ||||
|         } | ||||
|     ] | ||||
|   | ||||
| @@ -74,7 +74,8 @@ class NFBIE(InfoExtractor): | ||||
|                 description = media.find('description').text | ||||
|                 # It seems assets always go from lower to better quality, so no need to sort | ||||
|                 formats = [{ | ||||
|                     'url': x.find('default/streamerURI').text + '/', | ||||
|                     'url': x.find('default/streamerURI').text, | ||||
|                     'app': x.find('default/streamerURI').text.split('/', 3)[3], | ||||
|                     'play_path': x.find('default/url').text, | ||||
|                     'rtmp_live': False, | ||||
|                     'ext': 'mp4', | ||||
|   | ||||
| @@ -20,6 +20,7 @@ class SmotriIE(InfoExtractor): | ||||
|     IE_DESC = 'Smotri.com' | ||||
|     IE_NAME = 'smotri' | ||||
|     _VALID_URL = r'^https?://(?:www\.)?(?P<url>smotri\.com/video/view/\?id=(?P<videoid>v(?P<realvideoid>[0-9]+)[a-z0-9]{4}))' | ||||
|     _NETRC_MACHINE = 'smotri' | ||||
|  | ||||
|     _TESTS = [ | ||||
|         # real video id 2610366 | ||||
|   | ||||
| @@ -17,6 +17,7 @@ class SohuIE(InfoExtractor): | ||||
|         u'info_dict': { | ||||
|             u'title': u'MV:Far East Movement《The Illest》', | ||||
|         }, | ||||
|         u'skip': u'Only available from China', | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| @@ -8,14 +10,14 @@ from ..utils import RegexNotFoundError, ExtractorError | ||||
| class SpaceIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://(?:(?:www|m)\.)?space\.com/\d+-(?P<title>[^/\.\?]*?)-video\.html' | ||||
|     _TEST = { | ||||
|         u'add_ie': ['Brightcove'], | ||||
|         u'url': u'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html', | ||||
|         u'info_dict': { | ||||
|             u'id': u'2780937028001', | ||||
|             u'ext': u'mp4', | ||||
|             u'title': u'Huge Martian Landforms\' Detail Revealed By European Probe | Video', | ||||
|             u'description': u'md5:db81cf7f3122f95ed234b631a6ea1e61', | ||||
|             u'uploader': u'TechMedia Networks', | ||||
|         'add_ie': ['Brightcove'], | ||||
|         'url': 'http://www.space.com/23373-huge-martian-landforms-detail-revealed-by-european-probe-video.html', | ||||
|         'info_dict': { | ||||
|             'id': '2780937028001', | ||||
|             'ext': 'mp4', | ||||
|             'title': 'Huge Martian Landforms\' Detail Revealed By European Probe | Video', | ||||
|             'description': 'md5:db81cf7f3122f95ed234b631a6ea1e61', | ||||
|             'uploader': 'TechMedia Networks', | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|   | ||||
							
								
								
									
										27
									
								
								youtube_dl/extractor/syfy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								youtube_dl/extractor/syfy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
|  | ||||
|  | ||||
| class SyfyIE(InfoExtractor): | ||||
|     _VALID_URL = r'https?://www\.syfy\.com/videos/.+?vid:(?P<id>\d+)' | ||||
|  | ||||
|     _TEST = { | ||||
|         'url': 'http://www.syfy.com/videos/Robot%20Combat%20League/Behind%20the%20Scenes/vid:2631458', | ||||
|         'md5': 'e07de1d52c7278adbb9b9b1c93a66849', | ||||
|         'info_dict': { | ||||
|             'id': 'NmqMrGnXvmO1', | ||||
|             'ext': 'flv', | ||||
|             'title': 'George Lucas has Advice for his Daughter', | ||||
|             'description': 'Listen to what insights George Lucas give his daughter Amanda.', | ||||
|         }, | ||||
|         'add_ie': ['ThePlatform'], | ||||
|     } | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         webpage = self._download_webpage(url, video_id) | ||||
|         return self.url_result(self._og_search_video_url(webpage)) | ||||
							
								
								
									
										66
									
								
								youtube_dl/extractor/testurl.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								youtube_dl/extractor/testurl.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| from __future__ import unicode_literals | ||||
|  | ||||
| import re | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ExtractorError | ||||
|  | ||||
|  | ||||
| class TestURLIE(InfoExtractor): | ||||
|     """ Allows adressing of the test cases as test:yout.*be_1 """ | ||||
|  | ||||
|     IE_DESC = False  # Do not list | ||||
|     _VALID_URL = r'test(?:url)?:(?P<id>(?P<extractor>.+?)(?:_(?P<num>[0-9]+))?)$' | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         from ..extractor import gen_extractors | ||||
|  | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         extractor_id = mobj.group('extractor') | ||||
|         all_extractors = gen_extractors() | ||||
|  | ||||
|         rex = re.compile(extractor_id, flags=re.IGNORECASE) | ||||
|         matching_extractors = [ | ||||
|             e for e in all_extractors if rex.search(e.IE_NAME)] | ||||
|  | ||||
|         if len(matching_extractors) == 0: | ||||
|             raise ExtractorError( | ||||
|                 'No extractors matching %r found' % extractor_id, | ||||
|                 expected=True) | ||||
|         elif len(matching_extractors) > 1: | ||||
|             # Is it obvious which one to pick? | ||||
|             try: | ||||
|                 extractor = next( | ||||
|                     ie for ie in matching_extractors | ||||
|                     if ie.IE_NAME.lower() == extractor_id.lower()) | ||||
|             except StopIteration: | ||||
|                 raise ExtractorError( | ||||
|                     ('Found multiple matching extractors: %s' % | ||||
|                         ' '.join(ie.IE_NAME for ie in matching_extractors)), | ||||
|                     expected=True) | ||||
|  | ||||
|         num_str = mobj.group('num') | ||||
|         num = int(num_str) if num_str else 0 | ||||
|  | ||||
|         testcases = [] | ||||
|         t = getattr(extractor, '_TEST', None) | ||||
|         if t: | ||||
|             testcases.append(t) | ||||
|         testcases.extend(getattr(extractor, '_TESTS', [])) | ||||
|  | ||||
|         try: | ||||
|             tc = testcases[num] | ||||
|         except IndexError: | ||||
|             raise ExtractorError( | ||||
|                 ('Test case %d not found, got only %d tests' % | ||||
|                     (num, len(testcases))), | ||||
|                 expected=True) | ||||
|  | ||||
|         self.to_screen('Test URL: %s' % tc['url']) | ||||
|  | ||||
|         return { | ||||
|             '_type': 'url', | ||||
|             'url': tc['url'], | ||||
|             'id': video_id, | ||||
|         } | ||||
| @@ -11,7 +11,10 @@ _x = lambda p: xpath_with_ns(p, {'smil': 'http://www.w3.org/2005/SMIL21/Language | ||||
|  | ||||
|  | ||||
| class ThePlatformIE(InfoExtractor): | ||||
|     _VALID_URL = r'(?:https?://link\.theplatform\.com/s/[^/]+/|theplatform:)(?P<id>[^/\?]+)' | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://(?:link|player)\.theplatform\.com/[sp]/[^/]+/ | ||||
|            (?P<config>[^/\?]+/(?:swf|config)/select/)? | ||||
|          |theplatform:)(?P<id>[^/\?&]+)''' | ||||
|  | ||||
|     _TEST = { | ||||
|         # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/ | ||||
| @@ -29,9 +32,7 @@ class ThePlatformIE(InfoExtractor): | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|     def _get_info(self, video_id): | ||||
|         smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' | ||||
|             'format=smil&mbr=true'.format(video_id)) | ||||
|     def _get_info(self, video_id, smil_url): | ||||
|         meta = self._download_xml(smil_url, video_id) | ||||
|  | ||||
|         try: | ||||
| @@ -50,6 +51,15 @@ class ThePlatformIE(InfoExtractor): | ||||
|  | ||||
|         head = meta.find(_x('smil:head')) | ||||
|         body = meta.find(_x('smil:body')) | ||||
|  | ||||
|         f4m_node = body.find(_x('smil:seq/smil:video')) | ||||
|         if f4m_node is not None: | ||||
|             formats = [{ | ||||
|                 'ext': 'flv', | ||||
|                 # the parameters are from syfy.com, other sites may use others | ||||
|                 'url': f4m_node.attrib['src'] + '?g=UXWGVKRWHFSP&hdcore=3.0.3', | ||||
|             }] | ||||
|         else: | ||||
|             base_url = head.find(_x('smil:meta')).attrib['base'] | ||||
|             switch = body.find(_x('smil:switch')) | ||||
|             formats = [] | ||||
| @@ -68,7 +78,6 @@ class ThePlatformIE(InfoExtractor): | ||||
|                     'height': height, | ||||
|                     'vbr': vbr, | ||||
|                 }) | ||||
|  | ||||
|             self._sort_formats(formats) | ||||
|  | ||||
|         return { | ||||
| @@ -83,4 +92,13 @@ class ThePlatformIE(InfoExtractor): | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|         return self._get_info(video_id) | ||||
|         if mobj.group('config'): | ||||
|             config_url = url+ '&form=json' | ||||
|             config_url = config_url.replace('swf/', 'config/') | ||||
|             config_json = self._download_webpage(config_url, video_id, u'Downloading config') | ||||
|             config = json.loads(config_json) | ||||
|             smil_url = config['releaseUrl'] + '&format=SMIL&formats=MPEG4' | ||||
|         else: | ||||
|             smil_url = ('http://link.theplatform.com/s/dJ5BDC/{0}/meta.smil?' | ||||
|                 'format=smil&mbr=true'.format(video_id)) | ||||
|         return self._get_info(video_id, smil_url) | ||||
|   | ||||
| @@ -57,6 +57,7 @@ class VestiIE(InfoExtractor): | ||||
|                 # m3u8 download | ||||
|                 'skip_download': True, | ||||
|             }, | ||||
|             'skip': 'Blocked outside Russia' | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://sochi2014.vesti.ru/live/play/live_id/301', | ||||
| @@ -86,8 +87,7 @@ class VestiIE(InfoExtractor): | ||||
|             video_id = mobj.group('id') | ||||
|         else: | ||||
|             mobj = re.search( | ||||
|                 r'<div.+?id="current-video-holder".*?>\s*<iframe src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*"', | ||||
|                 page) | ||||
|                 r'<iframe.+?src="http://player\.rutv\.ru/iframe/(?P<type>[^/]+)/id/(?P<id>\d+)[^"]*".*?></iframe>', page) | ||||
|  | ||||
|             if not mobj: | ||||
|                 raise ExtractorError('No media found') | ||||
|   | ||||
| @@ -37,9 +37,10 @@ class VimeoIE(SubtitlesInfoExtractor): | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://vimeo.com/56015672#at=0', | ||||
|             'file': '56015672.mp4', | ||||
|             'md5': '8879b6cc097e987f02484baf890129e5', | ||||
|             'info_dict': { | ||||
|                 'id': '56015672', | ||||
|                 'ext': 'mp4', | ||||
|                 "upload_date": "20121220", | ||||
|                 "description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", | ||||
|                 "uploader_id": "user7108434", | ||||
|   | ||||
| @@ -6,6 +6,9 @@ import json | ||||
|  | ||||
| from .common import InfoExtractor | ||||
| from ..utils import ( | ||||
|     ExtractorError, | ||||
|     compat_urllib_request, | ||||
|     compat_urllib_parse, | ||||
|     compat_str, | ||||
|     unescapeHTML, | ||||
| ) | ||||
| @@ -14,31 +17,80 @@ from ..utils import ( | ||||
| class VKIE(InfoExtractor): | ||||
|     IE_NAME = 'vk.com' | ||||
|     _VALID_URL = r'https?://vk\.com/(?:videos.*?\?.*?z=)?video(?P<id>.*?)(?:\?|%2F|$)' | ||||
|     _NETRC_MACHINE = 'vk' | ||||
|  | ||||
|     _TESTS = [{ | ||||
|     _TESTS = [ | ||||
|         { | ||||
|             'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', | ||||
|         'file': '162222515.flv', | ||||
|             'md5': '0deae91935c54e00003c2a00646315f0', | ||||
|             'info_dict': { | ||||
|                 'id': '162222515', | ||||
|                 'ext': 'flv', | ||||
|                 'title': 'ProtivoGunz - Хуёвая песня', | ||||
|                 'uploader': 'Noize MC', | ||||
|                 'duration': 195, | ||||
|             }, | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vk.com/video4643923_163339118', | ||||
|         'file': '163339118.mp4', | ||||
|             'md5': 'f79bccb5cd182b1f43502ca5685b2b36', | ||||
|             'info_dict': { | ||||
|                 'id': '163339118', | ||||
|                 'ext': 'mp4', | ||||
|                 'uploader': 'Elvira Dzhonik', | ||||
|                 'title': 'Dream Theater - Hollow Years Live at Budokan 720*', | ||||
|                 'duration': 558, | ||||
|             } | ||||
|     }] | ||||
|         }, | ||||
|         { | ||||
|             'url': 'http://vk.com/video-8871596_164049491', | ||||
|             'md5': 'a590bcaf3d543576c9bd162812387666', | ||||
|             'note': 'Only available for registered users', | ||||
|             'info_dict': { | ||||
|                 'id': '164049491', | ||||
|                 'ext': 'mp4', | ||||
|                 'uploader': 'Триллеры', | ||||
|                 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]\u00a0', | ||||
|                 'duration': 8352, | ||||
|             }, | ||||
|             'skip': 'Requires vk account credentials', | ||||
|         } | ||||
|     ] | ||||
|  | ||||
|     def _login(self): | ||||
|         (username, password) = self._get_login_info() | ||||
|         if username is None: | ||||
|             return | ||||
|  | ||||
|         login_form = { | ||||
|             'act': 'login', | ||||
|             'role': 'al_frame', | ||||
|             'expire': '1', | ||||
|             'email': username, | ||||
|             'pass': password, | ||||
|         } | ||||
|  | ||||
|         request = compat_urllib_request.Request('https://login.vk.com/?act=login', | ||||
|             compat_urllib_parse.urlencode(login_form).encode('utf-8')) | ||||
|         login_page = self._download_webpage(request, None, note='Logging in as %s' % username) | ||||
|  | ||||
|         if re.search(r'onLoginFailed', login_page): | ||||
|             raise ExtractorError('Unable to login, incorrect username and/or password', expected=True) | ||||
|  | ||||
|     def _real_initialize(self): | ||||
|         self._login() | ||||
|  | ||||
|     def _real_extract(self, url): | ||||
|         mobj = re.match(self._VALID_URL, url) | ||||
|         video_id = mobj.group('id') | ||||
|  | ||||
|         info_url = 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id | ||||
|         info_page = self._download_webpage(info_url, video_id) | ||||
|  | ||||
|         if re.search(r'<!>Please log in or <', info_page): | ||||
|             raise ExtractorError('This video is only available for registered users, ' | ||||
|                 'use --username and --password options to provide account credentials.', expected=True) | ||||
|  | ||||
|         m_yt = re.search(r'src="(http://www.youtube.com/.*?)"', info_page) | ||||
|         if m_yt is not None: | ||||
|             self.to_screen(u'Youtube video detected') | ||||
| @@ -60,4 +112,5 @@ class VKIE(InfoExtractor): | ||||
|             'title': unescapeHTML(data['md_title']), | ||||
|             'thumbnail': data.get('jpg'), | ||||
|             'uploader': data.get('md_author'), | ||||
|             'duration': data.get('duration') | ||||
|         } | ||||
|   | ||||
| @@ -138,13 +138,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): | ||||
|                          (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/| | ||||
|                             (?:www\.)?deturl\.com/www\.youtube\.com/| | ||||
|                             (?:www\.)?pwnyoutube\.com/| | ||||
|                             (?:www\.)?yourepeat\.com/| | ||||
|                             tube\.majestyc\.net/| | ||||
|                             youtube\.googleapis\.com/)                        # the various hostnames, with wildcard subdomains | ||||
|                          (?:.*?\#/)?                                          # handle anchor (#/) redirect urls | ||||
|                          (?:                                                  # the various things that can precede the ID: | ||||
|                              (?:(?:v|embed|e)/)                               # v/ or embed/ or e/ | ||||
|                              |(?:                                             # or the v= param in all its forms | ||||
|                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?)?    # preceding watch(_popup|.php) or nothing (like /?v=xxxx) | ||||
|                                  (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)?  # preceding watch(_popup|.php) or nothing (like /?v=xxxx) | ||||
|                                  (?:\?|\#!?)                                  # the params delimiter ? or # or #! | ||||
|                                  (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx) | ||||
|                                  v= | ||||
| @@ -1694,7 +1695,8 @@ class YoutubeSearchIE(SearchInfoExtractor): | ||||
|             api_response = data['data'] | ||||
|  | ||||
|             if 'items' not in api_response: | ||||
|                 raise ExtractorError(u'[youtube] No video results') | ||||
|                 raise ExtractorError( | ||||
|                     u'[youtube] No video results', expected=True) | ||||
|  | ||||
|             new_ids = list(video['id'] for video in api_response['items']) | ||||
|             video_ids += new_ids | ||||
| @@ -1814,7 +1816,7 @@ class YoutubeTruncatedURLIE(InfoExtractor): | ||||
|     IE_NAME = 'youtube:truncated_url' | ||||
|     IE_DESC = False  # Do not list | ||||
|     _VALID_URL = r'''(?x) | ||||
|         (?:https?://)?[^/]+/watch\?feature=[a-z_]+$| | ||||
|         (?:https?://)?[^/]+/watch\?(?:feature=[a-z_]+)?$| | ||||
|         (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ | ||||
|     ''' | ||||
|  | ||||
|   | ||||
| @@ -17,6 +17,7 @@ import platform | ||||
| import re | ||||
| import ssl | ||||
| import socket | ||||
| import struct | ||||
| import subprocess | ||||
| import sys | ||||
| import traceback | ||||
| @@ -761,6 +762,7 @@ def unified_strdate(date_str): | ||||
|     date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str) | ||||
|     format_expressions = [ | ||||
|         '%d %B %Y', | ||||
|         '%d %b %Y', | ||||
|         '%B %d %Y', | ||||
|         '%b %d %Y', | ||||
|         '%Y-%m-%d', | ||||
| @@ -1143,7 +1145,7 @@ def parse_duration(s): | ||||
|         return None | ||||
|  | ||||
|     m = re.match( | ||||
|         r'(?:(?:(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)$', s) | ||||
|         r'(?:(?:(?P<hours>[0-9]+)[:h])?(?P<mins>[0-9]+)[:m])?(?P<secs>[0-9]+)s?$', s) | ||||
|     if not m: | ||||
|         return None | ||||
|     res = int(m.group('secs')) | ||||
| @@ -1220,3 +1222,20 @@ def uppercase_escape(s): | ||||
|     return re.sub( | ||||
|         r'\\U([0-9a-fA-F]{8})', | ||||
|         lambda m: compat_chr(int(m.group(1), base=16)), s) | ||||
|  | ||||
| try: | ||||
|     struct.pack(u'!I', 0) | ||||
| except TypeError: | ||||
|     # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument | ||||
|     def struct_pack(spec, *args): | ||||
|         if isinstance(spec, compat_str): | ||||
|             spec = spec.encode('ascii') | ||||
|         return struct.pack(spec, *args) | ||||
|  | ||||
|     def struct_unpack(spec, *args): | ||||
|         if isinstance(spec, compat_str): | ||||
|             spec = spec.encode('ascii') | ||||
|         return struct.unpack(spec, *args) | ||||
| else: | ||||
|     struct_pack = struct.pack | ||||
|     struct_unpack = struct.unpack | ||||
|   | ||||
| @@ -1,2 +1,2 @@ | ||||
|  | ||||
| __version__ = '2014.02.13' | ||||
| __version__ = '2014.02.19' | ||||
|   | ||||
		Reference in New Issue
	
	Block a user