mirror of
				https://github.com/ytdl-org/youtube-dl.git
				synced 2025-10-29 09:26:20 -07:00 
			
		
		
		
	Merge pull request #9367 from codesparkle/master
Feature: --restrict-filenames: replace accented characters by their unaccented counterpart instead of "_"
This commit is contained in:
		| @@ -139,8 +139,8 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) | ||||
|         self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) | ||||
|  | ||||
|         tests = 'a\xe4b\u4e2d\u56fd\u7684c' | ||||
|         self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c') | ||||
|         tests = 'aäb\u4e2d\u56fd\u7684c' | ||||
|         self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c') | ||||
|         self.assertTrue(sanitize_filename('\xf6', restricted=True) != '')  # No empty filename | ||||
|  | ||||
|         forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' | ||||
| @@ -155,6 +155,10 @@ class TestUtil(unittest.TestCase): | ||||
|         self.assertTrue(sanitize_filename('-', restricted=True) != '') | ||||
|         self.assertTrue(sanitize_filename(':', restricted=True) != '') | ||||
|  | ||||
|         self.assertEqual(sanitize_filename( | ||||
|             'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True), | ||||
|             'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy') | ||||
|  | ||||
|     def test_sanitize_ids(self): | ||||
|         self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') | ||||
|         self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') | ||||
|   | ||||
| @@ -14,8 +14,8 @@ import email.utils | ||||
| import errno | ||||
| import functools | ||||
| import gzip | ||||
| import itertools | ||||
| import io | ||||
| import itertools | ||||
| import json | ||||
| import locale | ||||
| import math | ||||
| @@ -24,8 +24,8 @@ import os | ||||
| import pipes | ||||
| import platform | ||||
| import re | ||||
| import ssl | ||||
| import socket | ||||
| import ssl | ||||
| import struct | ||||
| import subprocess | ||||
| import sys | ||||
| @@ -89,6 +89,11 @@ KNOWN_EXTENSIONS = ( | ||||
|     'wav', | ||||
|     'f4f', 'f4m', 'm3u8', 'smil') | ||||
|  | ||||
| # needed for sanitizing filenames in restricted mode | ||||
| ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', | ||||
|                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'], | ||||
|                                         'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy'))) | ||||
|  | ||||
|  | ||||
| def preferredencoding(): | ||||
|     """Get preferred encoding. | ||||
| @@ -365,6 +370,8 @@ def sanitize_filename(s, restricted=False, is_id=False): | ||||
|     Set is_id if this is not an arbitrary string, but an ID that should be kept if possible | ||||
|     """ | ||||
|     def replace_insane(char): | ||||
|         if restricted and char in ACCENT_CHARS: | ||||
|             return ACCENT_CHARS[char] | ||||
|         if char == '?' or ord(char) < 32 or ord(char) == 127: | ||||
|             return '' | ||||
|         elif char == '"': | ||||
|   | ||||
		Reference in New Issue
	
	Block a user