mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[bandcamp] Fix free downloads extraction and extract all formats (closes #11067)
This commit is contained in:
		| @@ -1,7 +1,9 @@ | |||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
|  |  | ||||||
| import json | import json | ||||||
|  | import random | ||||||
| import re | import re | ||||||
|  | import time | ||||||
|  |  | ||||||
| from .common import InfoExtractor | from .common import InfoExtractor | ||||||
| from ..compat import ( | from ..compat import ( | ||||||
| @@ -12,6 +14,9 @@ from ..utils import ( | |||||||
|     ExtractorError, |     ExtractorError, | ||||||
|     float_or_none, |     float_or_none, | ||||||
|     int_or_none, |     int_or_none, | ||||||
|  |     parse_filesize, | ||||||
|  |     unescapeHTML, | ||||||
|  |     update_url_query, | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -81,35 +86,68 @@ class BandcampIE(InfoExtractor): | |||||||
|             r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$', |             r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$', | ||||||
|             webpage, 'video id') |             webpage, 'video id') | ||||||
|  |  | ||||||
|         download_webpage = self._download_webpage(download_link, video_id, 'Downloading free downloads page') |         download_webpage = self._download_webpage( | ||||||
|         # We get the dictionary of the track from some javascript code |             download_link, video_id, 'Downloading free downloads page') | ||||||
|         all_info = self._parse_json(self._search_regex( |  | ||||||
|             r'(?sm)items: (.*?),$', download_webpage, 'items'), video_id) |         blob = self._parse_json( | ||||||
|         info = all_info[0] |             self._search_regex( | ||||||
|         # We pick mp3-320 for now, until format selection can be easily implemented. |                 r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage, | ||||||
|         mp3_info = info['downloads']['mp3-320'] |                 'blob', group='blob'), | ||||||
|         # If we try to use this url it says the link has expired |             video_id, transform_source=unescapeHTML) | ||||||
|         initial_url = mp3_info['url'] |  | ||||||
|         m_url = re.match( |         info = blob['digital_items'][0] | ||||||
|             r'(?P<server>http://(.*?)\.bandcamp\.com)/download/track\?enc=mp3-320&fsig=(?P<fsig>.*?)&id=(?P<id>.*?)&ts=(?P<ts>.*)$', |  | ||||||
|             initial_url) |         downloads = info['downloads'] | ||||||
|         # We build the url we will use to get the final track url |         track = info['title'] | ||||||
|         # This url is build in Bandcamp in the script download_bunde_*.js |  | ||||||
|         request_url = '%s/statdownload/track?enc=mp3-320&fsig=%s&id=%s&ts=%s&.rand=665028774616&.vrs=1' % (m_url.group('server'), m_url.group('fsig'), video_id, m_url.group('ts')) |         artist = info.get('artist') | ||||||
|         final_url_webpage = self._download_webpage(request_url, video_id, 'Requesting download url') |         title = '%s - %s' % (artist, track) if artist else track | ||||||
|         # If we could correctly generate the .rand field the url would be |  | ||||||
|         # in the "download_url" key |         download_formats = {} | ||||||
|         final_url = self._proto_relative_url(self._search_regex( |         for f in blob['download_formats']: | ||||||
|             r'"retry_url":"(.+?)"', final_url_webpage, 'final video URL'), 'http:') |             name, ext = f.get('name'), f.get('file_extension') | ||||||
|  |             if all(isinstance(x, compat_str) for x in (name, ext)): | ||||||
|  |                 download_formats[name] = ext.strip('.') | ||||||
|  |  | ||||||
|  |         formats = [] | ||||||
|  |         for format_id, f in downloads.items(): | ||||||
|  |             format_url = f.get('url') | ||||||
|  |             if not format_url: | ||||||
|  |                 continue | ||||||
|  |             # Stat URL generation algorithm is reverse engineered from | ||||||
|  |             # download_*_bundle_*.js | ||||||
|  |             stat_url = update_url_query( | ||||||
|  |                 format_url.replace('/download/', '/statdownload/'), { | ||||||
|  |                     '.rand': int(time.time() * 1000 * random.random()), | ||||||
|  |                 }) | ||||||
|  |             format_id = f.get('encoding_name') or format_id | ||||||
|  |             stat = self._download_json( | ||||||
|  |                 stat_url, video_id, 'Downloading %s JSON' % format_id, | ||||||
|  |                 transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1], | ||||||
|  |                 fatal=False) | ||||||
|  |             if not stat: | ||||||
|  |                 continue | ||||||
|  |             retry_url = stat.get('retry_url') | ||||||
|  |             if not isinstance(retry_url, compat_str): | ||||||
|  |                 continue | ||||||
|  |             formats.append({ | ||||||
|  |                 'url': self._proto_relative_url(retry_url, 'http:'), | ||||||
|  |                 'ext': download_formats.get(format_id), | ||||||
|  |                 'format_id': format_id, | ||||||
|  |                 'format_note': f.get('description'), | ||||||
|  |                 'filesize': parse_filesize(f.get('size_mb')), | ||||||
|  |                 'vcodec': 'none', | ||||||
|  |             }) | ||||||
|  |         self._sort_formats(formats) | ||||||
|  |  | ||||||
|         return { |         return { | ||||||
|             'id': video_id, |             'id': video_id, | ||||||
|             'title': info['title'], |             'title': title, | ||||||
|             'ext': 'mp3', |  | ||||||
|             'vcodec': 'none', |  | ||||||
|             'url': final_url, |  | ||||||
|             'thumbnail': info.get('thumb_url'), |             'thumbnail': info.get('thumb_url'), | ||||||
|             'uploader': info.get('artist'), |             'uploader': info.get('artist'), | ||||||
|  |             'artist': artist, | ||||||
|  |             'track': track, | ||||||
|  |             'formats': formats, | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user