u modifier를 이용하여 정규표현식에 유니코드를 사용할 수 있음.
유니코드에 정의된 언어셋별로도 활용할 수 있어 유용할 듯.

 

const s = 'a가韓あアb나灭いイ';
 
[
    'Hangul', 'Hiragana', 'Katakana', 'Han'
].forEach((script)=>{
    const pattern = new RegExp(`\\\p{Script=${script}}`, 'gu');
    const matches = s.match(pattern);
    console.log(`[${script}]`);
    console.log(matches);
});
 
// 출력
/*
[Hangul]
['가', '나']
 
[Hiragana]
['あ', 'い']
 
[Katakana]
['ア', 'イ']
 
[Han]
['韓', '灭']
*/

 

 

 

이모지도 됨.

const s = 'a❤가✔あ😀ア🍖0';

['Emoji', 'Emoji_Presentation'].forEach((binaryUnicodeProperty)=>{
const pattern = new RegExp(`\\\p{${binaryUnicodeProperty}}`, 'gu');
const matches = s.match(pattern);
    console.log(`[${binaryUnicodeProperty}]`);
    console.log(matches);
});


// 출력
/*
[Emoji]
['❤', '✔', '😀', '🍖', '0']

[Emoji_Presentation]
['😀', '🍖']
*/

 

 

[참고]

https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Unicode_Property_Escapes

https://www.regular-expressions.info/unicode.html

 

 

Posted by bloodguy
,