mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[extractor/common] Introduce expected_status for convenient accept of failed HTTP requests
Useful when some non-success (2xx) HTTP status codes should be considered normal. Previously this required to manually catch corresponding exceptions and read the response.
This commit is contained in:
		| @@ -19,6 +19,7 @@ from ..compat import ( | |||||||
|     compat_cookies, |     compat_cookies, | ||||||
|     compat_etree_fromstring, |     compat_etree_fromstring, | ||||||
|     compat_getpass, |     compat_getpass, | ||||||
|  |     compat_integer_types, | ||||||
|     compat_http_client, |     compat_http_client, | ||||||
|     compat_os_name, |     compat_os_name, | ||||||
|     compat_str, |     compat_str, | ||||||
| @@ -548,8 +549,26 @@ class InfoExtractor(object): | |||||||
|     def IE_NAME(self): |     def IE_NAME(self): | ||||||
|         return compat_str(type(self).__name__[:-2]) |         return compat_str(type(self).__name__[:-2]) | ||||||
|  |  | ||||||
|     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}): |     @staticmethod | ||||||
|         """ Returns the response handle """ |     def __can_accept_status_code(err, expected_status): | ||||||
|  |         assert isinstance(err, compat_urllib_error.HTTPError) | ||||||
|  |         if expected_status is None: | ||||||
|  |             return False | ||||||
|  |         if isinstance(expected_status, compat_integer_types): | ||||||
|  |             return err.code == expected_status | ||||||
|  |         elif isinstance(expected_status, (list, tuple)): | ||||||
|  |             return err.code in expected_status | ||||||
|  |         elif callable(expected_status): | ||||||
|  |             return expected_status(err.code) is True | ||||||
|  |         else: | ||||||
|  |             assert False | ||||||
|  |  | ||||||
|  |     def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None): | ||||||
|  |         """ | ||||||
|  |         Return the response handle. | ||||||
|  |  | ||||||
|  |         See _download_webpage docstring for arguments specification. | ||||||
|  |         """ | ||||||
|         if note is None: |         if note is None: | ||||||
|             self.report_download_webpage(video_id) |             self.report_download_webpage(video_id) | ||||||
|         elif note is not False: |         elif note is not False: | ||||||
| @@ -578,6 +597,10 @@ class InfoExtractor(object): | |||||||
|         try: |         try: | ||||||
|             return self._downloader.urlopen(url_or_request) |             return self._downloader.urlopen(url_or_request) | ||||||
|         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: | ||||||
|  |             if isinstance(err, compat_urllib_error.HTTPError): | ||||||
|  |                 if self.__can_accept_status_code(err, expected_status): | ||||||
|  |                     return err.fp | ||||||
|  |  | ||||||
|             if errnote is False: |             if errnote is False: | ||||||
|                 return False |                 return False | ||||||
|             if errnote is None: |             if errnote is None: | ||||||
| @@ -590,13 +613,17 @@ class InfoExtractor(object): | |||||||
|                 self._downloader.report_warning(errmsg) |                 self._downloader.report_warning(errmsg) | ||||||
|                 return False |                 return False | ||||||
|  |  | ||||||
|     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}): |     def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): | ||||||
|         """ Returns a tuple (page content as string, URL handle) """ |         """ | ||||||
|  |         Return a tuple (page content as string, URL handle). | ||||||
|  |  | ||||||
|  |         See _download_webpage docstring for arguments specification. | ||||||
|  |         """ | ||||||
|         # Strip hashes from the URL (#1038) |         # Strip hashes from the URL (#1038) | ||||||
|         if isinstance(url_or_request, (compat_str, str)): |         if isinstance(url_or_request, (compat_str, str)): | ||||||
|             url_or_request = url_or_request.partition('#')[0] |             url_or_request = url_or_request.partition('#')[0] | ||||||
|  |  | ||||||
|         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query) |         urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status) | ||||||
|         if urlh is False: |         if urlh is False: | ||||||
|             assert not fatal |             assert not fatal | ||||||
|             return False |             return False | ||||||
| @@ -685,13 +712,52 @@ class InfoExtractor(object): | |||||||
|  |  | ||||||
|         return content |         return content | ||||||
|  |  | ||||||
|     def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None, data=None, headers={}, query={}): |     def _download_webpage( | ||||||
|         """ Returns the data of the page as a string """ |             self, url_or_request, video_id, note=None, errnote=None, | ||||||
|  |             fatal=True, tries=1, timeout=5, encoding=None, data=None, | ||||||
|  |             headers={}, query={}, expected_status=None): | ||||||
|  |         """ | ||||||
|  |         Return the data of the page as a string. | ||||||
|  |  | ||||||
|  |         Arguments: | ||||||
|  |         url_or_request -- plain text URL as a string or | ||||||
|  |             a compat_urllib_request.Requestobject | ||||||
|  |         video_id -- Video/playlist/item identifier (string) | ||||||
|  |  | ||||||
|  |         Keyword arguments: | ||||||
|  |         note -- note printed before downloading (string) | ||||||
|  |         errnote -- note printed in case of an error (string) | ||||||
|  |         fatal -- flag denoting whether error should be considered fatal, | ||||||
|  |             i.e. whether it should cause ExtractionError to be raised, | ||||||
|  |             otherwise a warning will be reported and extraction continued | ||||||
|  |         tries -- number of tries | ||||||
|  |         timeout -- sleep interval between tries | ||||||
|  |         encoding -- encoding for a page content decoding, guessed automatically | ||||||
|  |             when not explicitly specified | ||||||
|  |         data -- POST data (bytes) | ||||||
|  |         headers -- HTTP headers (dict) | ||||||
|  |         query -- URL query (dict) | ||||||
|  |         expected_status -- allows to accept failed HTTP requests (non 2xx | ||||||
|  |             status code) by explicitly specifying a set of accepted status | ||||||
|  |             codes. Can be any of the following entities: | ||||||
|  |                 - an integer type specifying an exact failed status code to | ||||||
|  |                   accept | ||||||
|  |                 - a list or a tuple of integer types specifying a list of | ||||||
|  |                   failed status codes to accept | ||||||
|  |                 - a callable accepting an actual failed status code and | ||||||
|  |                   returning True if it should be accepted | ||||||
|  |             Note that this argument does not affect success status codes (2xx) | ||||||
|  |             which are always accepted. | ||||||
|  |         """ | ||||||
|  |  | ||||||
|         success = False |         success = False | ||||||
|         try_count = 0 |         try_count = 0 | ||||||
|         while success is False: |         while success is False: | ||||||
|             try: |             try: | ||||||
|                 res = self._download_webpage_handle(url_or_request, video_id, note, errnote, fatal, encoding=encoding, data=data, headers=headers, query=query) |                 res = self._download_webpage_handle( | ||||||
|  |                     url_or_request, video_id, note, errnote, fatal, | ||||||
|  |                     encoding=encoding, data=data, headers=headers, query=query, | ||||||
|  |                     expected_status=expected_status) | ||||||
|                 success = True |                 success = True | ||||||
|             except compat_http_client.IncompleteRead as e: |             except compat_http_client.IncompleteRead as e: | ||||||
|                 try_count += 1 |                 try_count += 1 | ||||||
| @@ -707,11 +773,17 @@ class InfoExtractor(object): | |||||||
|     def _download_xml_handle( |     def _download_xml_handle( | ||||||
|             self, url_or_request, video_id, note='Downloading XML', |             self, url_or_request, video_id, note='Downloading XML', | ||||||
|             errnote='Unable to download XML', transform_source=None, |             errnote='Unable to download XML', transform_source=None, | ||||||
|             fatal=True, encoding=None, data=None, headers={}, query={}): |             fatal=True, encoding=None, data=None, headers={}, query={}, | ||||||
|         """Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle)""" |             expected_status=None): | ||||||
|  |         """ | ||||||
|  |         Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle). | ||||||
|  |  | ||||||
|  |         See _download_webpage docstring for arguments specification. | ||||||
|  |         """ | ||||||
|         res = self._download_webpage_handle( |         res = self._download_webpage_handle( | ||||||
|             url_or_request, video_id, note, errnote, fatal=fatal, |             url_or_request, video_id, note, errnote, fatal=fatal, | ||||||
|             encoding=encoding, data=data, headers=headers, query=query) |             encoding=encoding, data=data, headers=headers, query=query, | ||||||
|  |             expected_status=expected_status) | ||||||
|         if res is False: |         if res is False: | ||||||
|             return res |             return res | ||||||
|         xml_string, urlh = res |         xml_string, urlh = res | ||||||
| @@ -719,15 +791,21 @@ class InfoExtractor(object): | |||||||
|             xml_string, video_id, transform_source=transform_source, |             xml_string, video_id, transform_source=transform_source, | ||||||
|             fatal=fatal), urlh |             fatal=fatal), urlh | ||||||
|  |  | ||||||
|     def _download_xml(self, url_or_request, video_id, |     def _download_xml( | ||||||
|                       note='Downloading XML', errnote='Unable to download XML', |             self, url_or_request, video_id, | ||||||
|                       transform_source=None, fatal=True, encoding=None, |             note='Downloading XML', errnote='Unable to download XML', | ||||||
|                       data=None, headers={}, query={}): |             transform_source=None, fatal=True, encoding=None, | ||||||
|         """Return the xml as an xml.etree.ElementTree.Element""" |             data=None, headers={}, query={}, expected_status=None): | ||||||
|  |         """ | ||||||
|  |         Return the xml as an xml.etree.ElementTree.Element. | ||||||
|  |  | ||||||
|  |         See _download_webpage docstring for arguments specification. | ||||||
|  |         """ | ||||||
|         res = self._download_xml_handle( |         res = self._download_xml_handle( | ||||||
|             url_or_request, video_id, note=note, errnote=errnote, |             url_or_request, video_id, note=note, errnote=errnote, | ||||||
|             transform_source=transform_source, fatal=fatal, encoding=encoding, |             transform_source=transform_source, fatal=fatal, encoding=encoding, | ||||||
|             data=data, headers=headers, query=query) |             data=data, headers=headers, query=query, | ||||||
|  |             expected_status=expected_status) | ||||||
|         return res if res is False else res[0] |         return res if res is False else res[0] | ||||||
|  |  | ||||||
|     def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): |     def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True): | ||||||
| @@ -745,11 +823,17 @@ class InfoExtractor(object): | |||||||
|     def _download_json_handle( |     def _download_json_handle( | ||||||
|             self, url_or_request, video_id, note='Downloading JSON metadata', |             self, url_or_request, video_id, note='Downloading JSON metadata', | ||||||
|             errnote='Unable to download JSON metadata', transform_source=None, |             errnote='Unable to download JSON metadata', transform_source=None, | ||||||
|             fatal=True, encoding=None, data=None, headers={}, query={}): |             fatal=True, encoding=None, data=None, headers={}, query={}, | ||||||
|         """Return a tuple (JSON object, URL handle)""" |             expected_status=None): | ||||||
|  |         """ | ||||||
|  |         Return a tuple (JSON object, URL handle). | ||||||
|  |  | ||||||
|  |         See _download_webpage docstring for arguments specification. | ||||||
|  |         """ | ||||||
|         res = self._download_webpage_handle( |         res = self._download_webpage_handle( | ||||||
|             url_or_request, video_id, note, errnote, fatal=fatal, |             url_or_request, video_id, note, errnote, fatal=fatal, | ||||||
|             encoding=encoding, data=data, headers=headers, query=query) |             encoding=encoding, data=data, headers=headers, query=query, | ||||||
|  |             expected_status=expected_status) | ||||||
|         if res is False: |         if res is False: | ||||||
|             return res |             return res | ||||||
|         json_string, urlh = res |         json_string, urlh = res | ||||||
| @@ -760,11 +844,18 @@ class InfoExtractor(object): | |||||||
|     def _download_json( |     def _download_json( | ||||||
|             self, url_or_request, video_id, note='Downloading JSON metadata', |             self, url_or_request, video_id, note='Downloading JSON metadata', | ||||||
|             errnote='Unable to download JSON metadata', transform_source=None, |             errnote='Unable to download JSON metadata', transform_source=None, | ||||||
|             fatal=True, encoding=None, data=None, headers={}, query={}): |             fatal=True, encoding=None, data=None, headers={}, query={}, | ||||||
|  |             expected_status=None): | ||||||
|  |         """ | ||||||
|  |         Return the JSON object as a dict. | ||||||
|  |  | ||||||
|  |         See _download_webpage docstring for arguments specification. | ||||||
|  |         """ | ||||||
|         res = self._download_json_handle( |         res = self._download_json_handle( | ||||||
|             url_or_request, video_id, note=note, errnote=errnote, |             url_or_request, video_id, note=note, errnote=errnote, | ||||||
|             transform_source=transform_source, fatal=fatal, encoding=encoding, |             transform_source=transform_source, fatal=fatal, encoding=encoding, | ||||||
|             data=data, headers=headers, query=query) |             data=data, headers=headers, query=query, | ||||||
|  |             expected_status=expected_status) | ||||||
|         return res if res is False else res[0] |         return res if res is False else res[0] | ||||||
|  |  | ||||||
|     def _parse_json(self, json_string, video_id, transform_source=None, fatal=True): |     def _parse_json(self, json_string, video_id, transform_source=None, fatal=True): | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user