mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Add back YoutubeSearchURLIE
				
					
				
			This commit is contained in:
		| @@ -66,9 +66,9 @@ class TestAllURLsMatching(unittest.TestCase): | |||||||
|         self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab']) |         self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab']) | ||||||
|         self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab']) |         self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab']) | ||||||
|  |  | ||||||
|     # def test_youtube_search_matching(self): |     def test_youtube_search_matching(self): | ||||||
|     #     self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) |         self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) | ||||||
|     #     self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) |         self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) | ||||||
|  |  | ||||||
|     def test_youtube_extract(self): |     def test_youtube_extract(self): | ||||||
|         assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) |         assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id) | ||||||
|   | |||||||
| @@ -1556,7 +1556,7 @@ from .youtube import ( | |||||||
|     YoutubeRecommendedIE, |     YoutubeRecommendedIE, | ||||||
|     YoutubeSearchDateIE, |     YoutubeSearchDateIE, | ||||||
|     YoutubeSearchIE, |     YoutubeSearchIE, | ||||||
|     #YoutubeSearchURLIE, |     YoutubeSearchURLIE, | ||||||
|     YoutubeSubscriptionsIE, |     YoutubeSubscriptionsIE, | ||||||
|     YoutubeTruncatedIDIE, |     YoutubeTruncatedIDIE, | ||||||
|     YoutubeTruncatedURLIE, |     YoutubeTruncatedURLIE, | ||||||
|   | |||||||
| @@ -308,6 +308,77 @@ class YoutubeBaseInfoExtractor(InfoExtractor): | |||||||
|                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', |                 r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg', | ||||||
|                 default='{}'), video_id, fatal=False) |                 default='{}'), video_id, fatal=False) | ||||||
|  |  | ||||||
|  |     def _search_results(self, query, params): | ||||||
|  |         data = { | ||||||
|  |             'context': { | ||||||
|  |                 'client': { | ||||||
|  |                     'clientName': 'WEB', | ||||||
|  |                     'clientVersion': '2.20201021.03.00', | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             'query': query, | ||||||
|  |         } | ||||||
|  |         if params: | ||||||
|  |             data['params'] = params | ||||||
|  |         for page_num in itertools.count(1): | ||||||
|  |             search = self._download_json( | ||||||
|  |                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', | ||||||
|  |                 video_id='query "%s"' % query, | ||||||
|  |                 note='Downloading page %s' % page_num, | ||||||
|  |                 errnote='Unable to download API page', fatal=False, | ||||||
|  |                 data=json.dumps(data).encode('utf8'), | ||||||
|  |                 headers={'content-type': 'application/json'}) | ||||||
|  |             if not search: | ||||||
|  |                 break | ||||||
|  |             slr_contents = try_get( | ||||||
|  |                 search, | ||||||
|  |                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], | ||||||
|  |                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), | ||||||
|  |                 list) | ||||||
|  |             if not slr_contents: | ||||||
|  |                 break | ||||||
|  |             isr_contents = try_get( | ||||||
|  |                 slr_contents, | ||||||
|  |                 lambda x: x[0]['itemSectionRenderer']['contents'], | ||||||
|  |                 list) | ||||||
|  |             if not isr_contents: | ||||||
|  |                 break | ||||||
|  |             for content in isr_contents: | ||||||
|  |                 if not isinstance(content, dict): | ||||||
|  |                     continue | ||||||
|  |                 video = content.get('videoRenderer') | ||||||
|  |                 if not isinstance(video, dict): | ||||||
|  |                     continue | ||||||
|  |                 video_id = video.get('videoId') | ||||||
|  |                 if not video_id: | ||||||
|  |                     continue | ||||||
|  |                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str) | ||||||
|  |                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str) | ||||||
|  |                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str)) | ||||||
|  |                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or '' | ||||||
|  |                 view_count = int_or_none(self._search_regex( | ||||||
|  |                     r'^(\d+)', re.sub(r'\s', '', view_count_text), | ||||||
|  |                     'view count', default=None)) | ||||||
|  |                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str) | ||||||
|  |                 yield { | ||||||
|  |                     '_type': 'url_transparent', | ||||||
|  |                     'ie_key': YoutubeIE.ie_key(), | ||||||
|  |                     'id': video_id, | ||||||
|  |                     'url': video_id, | ||||||
|  |                     'title': title, | ||||||
|  |                     'description': description, | ||||||
|  |                     'duration': duration, | ||||||
|  |                     'view_count': view_count, | ||||||
|  |                     'uploader': uploader, | ||||||
|  |                 } | ||||||
|  |             token = try_get( | ||||||
|  |                 slr_contents, | ||||||
|  |                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], | ||||||
|  |                 compat_str) | ||||||
|  |             if not token: | ||||||
|  |                 break | ||||||
|  |             data['continuation'] = token | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeIE(YoutubeBaseInfoExtractor): | class YoutubeIE(YoutubeBaseInfoExtractor): | ||||||
|     IE_DESC = 'YouTube.com' |     IE_DESC = 'YouTube.com' | ||||||
| @@ -2454,7 +2525,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): | |||||||
|                         (?: |                         (?: | ||||||
|                             (?:channel|c|user|feed)/| |                             (?:channel|c|user|feed)/| | ||||||
|                             (?:playlist|watch)\?.*?\blist=| |                             (?:playlist|watch)\?.*?\blist=| | ||||||
|                             (?!(?:watch|embed|v|e)\b) |                             (?!(?:watch|embed|v|e|results)\b) | ||||||
|                         ) |                         ) | ||||||
|                         (?P<id>[^/?\#&]+) |                         (?P<id>[^/?\#&]+) | ||||||
|                     ''' |                     ''' | ||||||
| @@ -3379,88 +3450,18 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): | |||||||
|  |  | ||||||
| class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor): | class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor): | ||||||
|     IE_DESC = 'YouTube.com searches' |     IE_DESC = 'YouTube.com searches' | ||||||
|     # there doesn't appear to be a real limit, for example if you search for |  | ||||||
|     # 'python' you get more than 8.000.000 results |  | ||||||
|     _MAX_RESULTS = float('inf') |  | ||||||
|     IE_NAME = 'youtube:search' |     IE_NAME = 'youtube:search' | ||||||
|     _SEARCH_KEY = 'ytsearch' |     _SEARCH_KEY = 'ytsearch' | ||||||
|     _SEARCH_PARAMS = None |     _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only | ||||||
|     _TESTS = [] |     _TESTS = [] | ||||||
|  |  | ||||||
|     def _entries(self, query, n): |     def _entries(self, query, n): | ||||||
|         data = { |  | ||||||
|             'context': { |  | ||||||
|                 'client': { |  | ||||||
|                     'clientName': 'WEB', |  | ||||||
|                     'clientVersion': '2.20201021.03.00', |  | ||||||
|                 } |  | ||||||
|             }, |  | ||||||
|             'query': query, |  | ||||||
|         } |  | ||||||
|         if self._SEARCH_PARAMS: |  | ||||||
|             data['params'] = self._SEARCH_PARAMS |  | ||||||
|         total = 0 |         total = 0 | ||||||
|         for page_num in itertools.count(1): |         for entry in self._search_results(query, self._SEARCH_PARAMS): | ||||||
|             search = self._download_json( |             yield entry | ||||||
|                 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', |             total += 1 | ||||||
|                 video_id='query "%s"' % query, |             if total >= n: | ||||||
|                 note='Downloading page %s' % page_num, |                 return | ||||||
|                 errnote='Unable to download API page', fatal=False, |  | ||||||
|                 data=json.dumps(data).encode('utf8'), |  | ||||||
|                 headers={'content-type': 'application/json'}) |  | ||||||
|             if not search: |  | ||||||
|                 break |  | ||||||
|             slr_contents = try_get( |  | ||||||
|                 search, |  | ||||||
|                 (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], |  | ||||||
|                  lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), |  | ||||||
|                 list) |  | ||||||
|             if not slr_contents: |  | ||||||
|                 break |  | ||||||
|             isr_contents = try_get( |  | ||||||
|                 slr_contents, |  | ||||||
|                 lambda x: x[0]['itemSectionRenderer']['contents'], |  | ||||||
|                 list) |  | ||||||
|             if not isr_contents: |  | ||||||
|                 break |  | ||||||
|             for content in isr_contents: |  | ||||||
|                 if not isinstance(content, dict): |  | ||||||
|                     continue |  | ||||||
|                 video = content.get('videoRenderer') |  | ||||||
|                 if not isinstance(video, dict): |  | ||||||
|                     continue |  | ||||||
|                 video_id = video.get('videoId') |  | ||||||
|                 if not video_id: |  | ||||||
|                     continue |  | ||||||
|                 title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str) |  | ||||||
|                 description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str) |  | ||||||
|                 duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str)) |  | ||||||
|                 view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or '' |  | ||||||
|                 view_count = int_or_none(self._search_regex( |  | ||||||
|                     r'^(\d+)', re.sub(r'\s', '', view_count_text), |  | ||||||
|                     'view count', default=None)) |  | ||||||
|                 uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str) |  | ||||||
|                 total += 1 |  | ||||||
|                 yield { |  | ||||||
|                     '_type': 'url_transparent', |  | ||||||
|                     'ie_key': YoutubeIE.ie_key(), |  | ||||||
|                     'id': video_id, |  | ||||||
|                     'url': video_id, |  | ||||||
|                     'title': title, |  | ||||||
|                     'description': description, |  | ||||||
|                     'duration': duration, |  | ||||||
|                     'view_count': view_count, |  | ||||||
|                     'uploader': uploader, |  | ||||||
|                 } |  | ||||||
|                 if total == n: |  | ||||||
|                     return |  | ||||||
|             token = try_get( |  | ||||||
|                 slr_contents, |  | ||||||
|                 lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], |  | ||||||
|                 compat_str) |  | ||||||
|             if not token: |  | ||||||
|                 break |  | ||||||
|             data['continuation'] = token |  | ||||||
|  |  | ||||||
|     def _get_n_results(self, query, n): |     def _get_n_results(self, query, n): | ||||||
|         """Get a specified number of results for a query""" |         """Get a specified number of results for a query""" | ||||||
| @@ -3471,18 +3472,19 @@ class YoutubeSearchDateIE(YoutubeSearchIE): | |||||||
|     IE_NAME = YoutubeSearchIE.IE_NAME + ':date' |     IE_NAME = YoutubeSearchIE.IE_NAME + ':date' | ||||||
|     _SEARCH_KEY = 'ytsearchdate' |     _SEARCH_KEY = 'ytsearchdate' | ||||||
|     IE_DESC = 'YouTube.com searches, newest videos first' |     IE_DESC = 'YouTube.com searches, newest videos first' | ||||||
|     _SEARCH_PARAMS = 'CAI%3D' |     _SEARCH_PARAMS = 'CAISAhAB'  # Videos only, sorted by date | ||||||
|  |     _TESTS = [] | ||||||
|  |  | ||||||
|  |  | ||||||
| r""" | class YoutubeSearchURLIE(YoutubeBaseInfoExtractor): | ||||||
| class YoutubeSearchURLIE(YoutubeSearchIE): |     IE_DESC = 'YouTube search URLs with sorting and filter support' | ||||||
|     IE_DESC = 'YouTube.com search URLs' |     IE_NAME = YoutubeSearchIE.IE_NAME + '_url' | ||||||
|     IE_NAME = 'youtube:search_url' |     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)' | ||||||
|     _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)' |  | ||||||
|     _TESTS = [{ |     _TESTS = [{ | ||||||
|         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', |         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', | ||||||
|         'playlist_mincount': 5, |         'playlist_mincount': 5, | ||||||
|         'info_dict': { |         'info_dict': { | ||||||
|  |             'id': 'youtube-dl test video', | ||||||
|             'title': 'youtube-dl test video', |             'title': 'youtube-dl test video', | ||||||
|         } |         } | ||||||
|     }, { |     }, { | ||||||
| @@ -3491,11 +3493,10 @@ class YoutubeSearchURLIE(YoutubeSearchIE): | |||||||
|     }] |     }] | ||||||
|  |  | ||||||
|     def _real_extract(self, url): |     def _real_extract(self, url): | ||||||
|         mobj = re.match(self._VALID_URL, url) |         qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) | ||||||
|         query = compat_urllib_parse_unquote_plus(mobj.group('query')) |         query = (qs.get('search_query') or qs.get('q'))[0] | ||||||
|         webpage = self._download_webpage(url, query) |         params = qs.get('sp', ('',))[0] | ||||||
|         return self.playlist_result(self._process_page(webpage), playlist_title=query) |         return self.playlist_result(self._search_results(query, params), query, query) | ||||||
| """ |  | ||||||
|  |  | ||||||
|  |  | ||||||
| class YoutubeFeedsInfoExtractor(YoutubeTabIE): | class YoutubeFeedsInfoExtractor(YoutubeTabIE): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user