mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[utils] add a function to clean podcast URLs
This commit is contained in:
		| @@ -21,6 +21,7 @@ from youtube_dl.utils import ( | ||||
|     encode_base_n, | ||||
|     caesar, | ||||
|     clean_html, | ||||
|     clean_podcast_url, | ||||
|     date_from_str, | ||||
|     DateRange, | ||||
|     detect_exe_version, | ||||
| @@ -1470,6 +1471,10 @@ Line 1 | ||||
|         self.assertEqual(get_elements_by_attribute('class', 'foo', html), []) | ||||
|         self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), []) | ||||
|  | ||||
|     def test_clean_podcast_url(self): | ||||
|         self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') | ||||
|         self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     unittest.main() | ||||
|   | ||||
| @@ -5706,3 +5706,20 @@ def random_birthday(year_field, month_field, day_field): | ||||
|         month_field: str(random_date.month), | ||||
|         day_field: str(random_date.day), | ||||
|     } | ||||
|  | ||||
|  | ||||
| def clean_podcast_url(url): | ||||
|     return re.sub(r'''(?x) | ||||
|         (?: | ||||
|             (?: | ||||
|                 chtbl\.com/track| | ||||
|                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/ | ||||
|                 play\.podtrac\.com | ||||
|             )/[^/]+| | ||||
|             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure | ||||
|             flex\.acast\.com| | ||||
|             pd(?: | ||||
|                 cn\.co| # https://podcorn.com/analytics-prefix/ | ||||
|                 st\.fm # https://podsights.com/docs/ | ||||
|             )/e | ||||
|         )/''', '', url) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user