当系统编码与文件编码不匹配时，如何找到它？

# -*- coding: utf-8 -*- import pytest as p import os import itertools import shutil from os import environ environ['HOME']= '/tmp/test' import scope as s #This is the library I am testing @p.fixture(scope='module') def scopes(request): """creates temporary directories, temporary test files and returns a list of created scopes""" dirs = dict(home=['~'], simple=['~/simple1', '~/simple2', '~/simple3'], recursive=['~/réc', '~/réc/sub11', '~/réc/sub12', '~/réc/sub11/sub111', '~/réc/sub11/sub112', '~/réc/sub12/sub 121', '~/réc/sub12/sub 122']) # Create the test directories os.mkdir(os.path.expanduser('~')) for pthlist in dirs.values(): for dirpth in pthlist: if dirpth != '~': os.mkdir(os.path.expanduser(dirpth)) # Make a few files in each directory too for pthlist in dirs.values(): for dirpth in pthlist: hidden = ('','.') base = ('test','zest','hello') num = ('1','2','3','4','5') ext = ('.txt','.jpg','.pdf','.todo','.otl') fnames=itertools.product(hidden,base,num,ext) touch = lambda fullpth: open(fullpth,'w').close() for f in fnames: touch(os.path.join(os.path.expanduser(dirpth),''.join(f))) def delete_directories(): shutil.rmtree('/tmp/test') request.addfinalizer(delete_directories) return [ s.Scope('home', 'no scope filtera applied'), s.Scope('simple', 'simple scope', ['~/simple1', '~/simple2', '~/simple3']), s.Scope('recursive', 'recursive scope', ['~/r*c/**', '~/réc/sub11/sub111'], ['~/r*c/**/*1'])] class Test_Scope: def test_recursive_paths(self, scopes): assert sorted(scopes[2].get_dir()) == \ sorted([os.path.expanduser(item) for item in ['~/réc/sub12', '~/réc/sub11/sub111', '~/réc/sub11/sub112', '~/réc/sub12/sub 122']])

1条回答

网友

1楼 · 发布于 2024-10-03 23:24:13

它们都是UTF-8，只是两种表示字符的方式。在

>>> import unicodedata
>>> unicodedata.name(b'\xcc\x81'.decode('utf8'))
'COMBINING ACUTE ACCENT'
>>> unicodedata.name(b'\xc3\xa9'.decode('utf8'))
'LATIN SMALL LETTER E WITH ACUTE'

>>> print(b'\xc3\xa9'.decode('utf8'))
é
>>> print(b'\xcc\x81'.decode('utf8'))
 ́
>>> print(b'e\xcc\x81'.decode('utf8'))
é

所以，当OSX写文件/目录时，它写的是“e”+“组合锐音符”，而你期望它是一个字面的“é”。在

要解决这个问题，您需要比较规范化的unicode字符串，而不是字节字符串（甚至是解码的unicode字符串）。python标准库中的^{}函数可以执行以下操作：

^{pr2}$

相关问题更多 >

编程相关推荐

热门问题

热门文章