微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

javascript – 无法从文件名中删除变音符号

在尝试替换变音符号和文件名的特殊字符时,我偶然发现了一种奇怪的行为.

函数可以正常使用字符串,但是当我尝试使用文件名为it doesn’t的完全相同的序列时.

$(document).on('change','input[type=file]',function() {
    var files = this.files;
    for (var i = 0; i < files.length; i++) {
      (function(file) {
        // Assuming the file name is áñǽŦõş
        var _string = 'áñǽŦõş.jpg',_filename = file.name;
        $('.string .result').html(convertAscii(_string));
        $('.filename .result').html(convertAscii(_filename));
      })(files[i]);
    }
});
    
function convertAscii(str) {
    str = str.replace(/Ä/g,'Ae');
    str = str.replace(/æ|ǽ|ä/g,'ae');
    str = str.replace(/À|Á|Â|Ã|Å|Ǻ|Ā|Ă|Ą|Ǎ|А/g,'A');
    str = str.replace(/à|á|â|ã|å|ǻ|ā|ă|ą|ǎ|ª|а/g,'a');
    str = str.replace(/Б/g,'B');
    str = str.replace(/б/g,'b');
    str = str.replace(/Ç|Ć|Ĉ|Ċ|Č|Ц/g,'C');
    str = str.replace(/ç|ć|ĉ|ċ|č|ц/g,'c');
    str = str.replace(/Ð|Ď|Đ/g,'Dj');
    str = str.replace(/ð|ď|đ/g,'dj');
    str = str.replace(/Д/g,'D');
    str = str.replace(/д/g,'d');
    str = str.replace(/È|É|Ê|Ë|Ē|Ĕ|Ė|Ę|Ě|Е|Ё|Э/g,'E');
    str = str.replace(/è|é|ê|ë|ē|ĕ|ė|ę|ě|е|ё|э/g,'e');
    str = str.replace(/Ф/g,'F');
    str = str.replace(/ƒ|ф/g,'f');
    str = str.replace(/Ĝ|Ğ|Ġ|Ģ|Г/g,'G');
    str = str.replace(/ĝ|ğ|ġ|ģ|г/g,'g');
    str = str.replace(/Ĥ|Ħ|Х/g,'H');
    str = str.replace(/ĥ|ħ|х/g,'h');
    str = str.replace(/Ì|Í|Î|Ï|Ĩ|Ī|Ĭ|Ǐ|Į|İ|И/g,'I');
    str = str.replace(/ì|í|î|ï|ĩ|ī|ĭ|ǐ|į|ı|и/g,'i');
    str = str.replace(/Ĵ|Й/g,'J');
    str = str.replace(/ĵ|й/g,'j');
    str = str.replace(/Ķ|К/g,'K');
    str = str.replace(/ķ|к/g,'k');
    str = str.replace(/Ĺ|Ļ|Ľ|Ŀ|Ł|Л/g,'L');
    str = str.replace(/ĺ|ļ|ľ|ŀ|ł|л/g,'l');
    str = str.replace(/М/g,'M');
    str = str.replace(/м/g,'m');
    str = str.replace(/Ñ|Ń|Ņ|Ň|Н/g,'N');
    str = str.replace(/ñ|ń|ņ|ň|ʼn|н/g,'n');
    str = str.replace(/Ö/g,'Oe');
    str = str.replace(/œ|ö/g,'oe');
    str = str.replace(/Ò|Ó|Ô|Õ|Ō|Ŏ|Ǒ|Ő|Ơ|Ø|Ǿ|О/g,'O');
    str = str.replace(/ò|ó|ô|õ|ō|ŏ|ǒ|ő|ơ|ø|ǿ|º|о/g,'o');
    str = str.replace(/П/g,'P');
    str = str.replace(/п/g,'p');
    str = str.replace(/Ŕ|Ŗ|Ř|Р/g,'R');
    str = str.replace(/ŕ|ŗ|ř|р/g,'r');
    str = str.replace(/Ś|Ŝ|Ş|Ș|Š|С/g,'S');
    str = str.replace(/ś|ŝ|ş|ș|š|ſ|с/g,'s');
    str = str.replace(/Ţ|Ț|Ť|Ŧ|Т/g,'T');
    str = str.replace(/ţ|ț|ť|ŧ|т/g,'t');
    str = str.replace(/Ü/g,'Ue');
    str = str.replace(/ü/g,'ue');
    str = str.replace(/Ù|Ú|Û|Ũ|Ū|Ŭ|Ů|Ű|Ų|Ư|Ǔ|Ǖ|Ǘ|Ǚ|Ǜ|У/g,'U');
    str = str.replace(/ù|ú|û|ũ|ū|ŭ|ů|ű|ų|ư|ǔ|ǖ|ǘ|ǚ|ǜ|у/g,'u');
    str = str.replace(/В/g,'V');
    str = str.replace(/в/g,'v');
    str = str.replace(/Ý|Ÿ|Ŷ|Ы/g,'Y');
    str = str.replace(/ý|ÿ|ŷ|ы/g,'y');
    str = str.replace(/Ŵ/g,'W');
    str = str.replace(/ŵ/g,'w');
    str = str.replace(/Ź|Ż|Ž|З/g,'Z');
    str = str.replace(/ź|ż|ž|з/g,'z');
    str = str.replace(/Æ|Ǽ/g,'AE');
    str = str.replace(/ß/g,'ss');
    str = str.replace(/IJ/g,'IJ');
    str = str.replace(/ij/g,'ij');
    str = str.replace(/Œ/g,'OE');
    str = str.replace(/Ч/g,'Ch');
    str = str.replace(/ч/g,'ch');
    str = str.replace(/Ю/g,'Ju');
    str = str.replace(/ю/g,'ju');
    str = str.replace(/Я/g,'Ja');
    str = str.replace(/я/g,'ja');
    str = str.replace(/Ш/g,'Sh');
    str = str.replace(/ш/g,'sh');
    str = str.replace(/Щ/g,'Shch');
    str = str.replace(/щ/g,'shch');
    str = str.replace(/Ж/g,'Zh');
    str = str.replace(/ж/g,'zh');
	return str;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<input type="file" name="pic" accept="image/*">

<div>
  <div class="string">Converted string : <span class="result"></span></div>
  <div class="filename">Converted filename : <span class="result"></span></div>
</div>

我也提出a fiddle显示问题,你会看到我上传一个名为áñaéTõş的jpg的意思.

我从输入中获取文件名.奇怪的行为是这样的:

console.log(convertAscii(_string)); // Correct => anaetos
console.log(convertAscii(_filename)); // Wrong => áñaéTõş

我相信这是因为变音符号被解释为独立的角色,但有人知道如何修复它吗?

解决方法

我确信你的代码因codeppoint问题而无法正常工作.有问题的人物可能看起来像你期望的特殊字符,但最终不相同.因此,字符串替换不起作用

解决此问题,正如@ClasG所建议的那样,您可以使用外部库执行Unicode规范化,因为JavaScript没有内置此功能.

> unorm是低级别的lib,可以为您进行规范化.
>还有更多高级库,如iconv lite,它们具有更多依赖性.

它还需要在代码中使用Unicode字符,以便与规范化的文本工作进行比较.

这是我修改过的fiddle.我希望这能为您解决问题.

PS:您应该正确包含unorm.js.我这样做是为了让它适用于jsfiddle.

<input type="file" name="pic" accept="image/*">
<script type="text/javascript">
  document.write("\<script src='https://raw.githubusercontent.com/walling/unorm/master/lib/unorm.js' type='text/javascript'>\<\/script>");
</script>
<div>
  <div class="string">Converted string : <span class="result"></span></div>
  <div class="filename">Converted filename : <span class="result"></span></div>
</div>

    $(document).on('change',function() {

    var files = this.files;

    for (var i = 0; i < files.length; i++) {
      (function(file) {
        // Assuming the file name is áñǽŦõş
        var _string = 'äöüß',// 'áñǽŦõş.jpg',_filename = file.name;

        $('.string .result').html(convertAscii(_string.normalize('NFC')));
        $('.filename .result').html(convertAscii(_filename.normalize('NFC')));

      })(files[i]);
    }

});

function convertAscii(str) {
    //convert German umlauts (normalized using nfc: Canonical Decomposition,followed by Canonical Composition) to Ascii
    tr = {"\u00e4":"ae","\u00fc":"ue","\u00f6":"oe","\u00df":"ss" }
    str = str.replace(/[\u00e4|\u00fc|\u00f6|\u00df]/g,function($0) { return tr[$0] })
    //... add more..

    return str;
}

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐