mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	[utils] Sanitize look-alike Unicode glyphs in non-ID filename fields when --restrict-filenames
Implements https://github.com/ytdl-org/youtube-dl/issues/31216#issuecomment-1236102822, which has a test.
This commit is contained in:
		| @@ -33,6 +33,7 @@ import sys | |||||||
| import tempfile | import tempfile | ||||||
| import time | import time | ||||||
| import traceback | import traceback | ||||||
|  | import unicodedata | ||||||
| import xml.etree.ElementTree | import xml.etree.ElementTree | ||||||
| import zlib | import zlib | ||||||
|  |  | ||||||
| @@ -2118,6 +2119,9 @@ def sanitize_filename(s, restricted=False, is_id=False): | |||||||
|             return '_' |             return '_' | ||||||
|         return char |         return char | ||||||
|  |  | ||||||
|  |     # Replace look-alike Unicode glyphs | ||||||
|  |     if restricted and not is_id: | ||||||
|  |         s = unicodedata.normalize('NFKC', s) | ||||||
|     # Handle timestamps |     # Handle timestamps | ||||||
|     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) |     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) | ||||||
|     result = ''.join(map(replace_insane, s)) |     result = ''.join(map(replace_insane, s)) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user