mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[extractor/common] Eliminate media tag name regex duplication
This commit is contained in:
		| @@ -2513,15 +2513,16 @@ class InfoExtractor(object): | |||||||
|         # amp-video and amp-audio are very similar to their HTML5 counterparts |         # amp-video and amp-audio are very similar to their HTML5 counterparts | ||||||
|         # so we wll include them right here (see |         # so we wll include them right here (see | ||||||
|         # https://www.ampproject.org/docs/reference/components/amp-video) |         # https://www.ampproject.org/docs/reference/components/amp-video) | ||||||
|  |         _MEDIA_TAG_NAME_RE = r'(?:amp-)?(video|audio)' | ||||||
|         media_tags = [(media_tag, media_type, '') |         media_tags = [(media_tag, media_type, '') | ||||||
|                       for media_tag, media_type |                       for media_tag, media_type | ||||||
|                       in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)] |                       in re.findall(r'(?s)(<%s[^>]*/>)' % _MEDIA_TAG_NAME_RE, webpage)] | ||||||
|         media_tags.extend(re.findall( |         media_tags.extend(re.findall( | ||||||
|             # We only allow video|audio followed by a whitespace or '>'. |             # We only allow video|audio followed by a whitespace or '>'. | ||||||
|             # Allowing more characters may end up in significant slow down (see |             # Allowing more characters may end up in significant slow down (see | ||||||
|             # https://github.com/ytdl-org/youtube-dl/issues/11979, example URL: |             # https://github.com/ytdl-org/youtube-dl/issues/11979, example URL: | ||||||
|             # http://www.porntrex.com/maps/videositemap.xml). |             # http://www.porntrex.com/maps/videositemap.xml). | ||||||
|             r'(?s)(<(?P<tag>(?:amp-)?(video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage)) |             r'(?s)(<(?P<tag>%s)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>' % _MEDIA_TAG_NAME_RE, webpage)) | ||||||
|         for media_tag, _, media_type, media_content in media_tags: |         for media_tag, _, media_type, media_content in media_tags: | ||||||
|             media_info = { |             media_info = { | ||||||
|                 'formats': [], |                 'formats': [], | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user