commit
This commit is contained in:
@@ -19,6 +19,14 @@ const SENSITIVE_WORDS = {
|
||||
'颠覆国家', '推翻政权', '分裂国家', '恐怖组织', '极端主义',
|
||||
'反动', '暴乱', '煽动颠覆', '分裂势力', '恐怖袭击',
|
||||
'邪教组织', '法轮', '法轮功', '台独', '藏独', '疆独',
|
||||
// Political figure names and common variants (homophone / split-char evasion)
|
||||
'习近平', '刁近平', '习大大', '习主席', '习总',
|
||||
'XiJinping', 'xijinping', '习近', '近平',
|
||||
'李强', '王岐山', '栗战书', '汪洋', '韩正',
|
||||
'李克强', '胡锦涛', '江泽民', '温家宝', '朱镕基',
|
||||
'邓小平', '毛泽东', '周恩来', '刘少奇', '彭德怀',
|
||||
'薄熙来', '周永康', '徐才厚', '郭伯雄', '令计划',
|
||||
'孙政才', '赵乐际', '王沪宁', '丁薛祥', '蔡奇',
|
||||
],
|
||||
|
||||
pornography: [
|
||||
@@ -98,10 +106,17 @@ function checkText(text) {
|
||||
const matchedWords = [];
|
||||
const categories = new Set();
|
||||
const lowerText = text.toLowerCase();
|
||||
// Strip common evasion characters for split-char detection
|
||||
const strippedText = text.replace(/[\s\u3000.,;:!?·…—\-_\|\\/~~`@#$%^&*+=<>()\[\]{}""''「」『』【】()〈〕\u200b\u200c\u200d\ufeff]/g, '').toLowerCase();
|
||||
|
||||
for (const [category, words] of Object.entries(SENSITIVE_WORDS)) {
|
||||
for (const word of words) {
|
||||
if (lowerText.includes(word.toLowerCase())) {
|
||||
const lowerWord = word.toLowerCase();
|
||||
if (lowerText.includes(lowerWord)) {
|
||||
matchedWords.push(word);
|
||||
categories.add(category);
|
||||
} else if (word.length >= 2 && strippedText.includes(lowerWord)) {
|
||||
// Split-char evasion detected
|
||||
matchedWords.push(word);
|
||||
categories.add(category);
|
||||
}
|
||||
@@ -120,7 +135,7 @@ function checkText(text) {
|
||||
* @returns {string}
|
||||
*/
|
||||
function getVersion() {
|
||||
return '2026-05-11-v1';
|
||||
return '2026-05-12-v2';
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
||||
Reference in New Issue
Block a user