This commit is contained in:
jakciehan
2026-05-12 08:03:21 +08:00
parent d263c7bf48
commit c4bd390478
7 changed files with 292 additions and 198 deletions
+17 -2
View File
@@ -19,6 +19,14 @@ const SENSITIVE_WORDS = {
'颠覆国家', '推翻政权', '分裂国家', '恐怖组织', '极端主义',
'反动', '暴乱', '煽动颠覆', '分裂势力', '恐怖袭击',
'邪教组织', '法轮', '法轮功', '台独', '藏独', '疆独',
// Political figure names and common variants (homophone / split-char evasion)
'习近平', '刁近平', '习大大', '习主席', '习总',
'XiJinping', 'xijinping', '习近', '近平',
'李强', '王岐山', '栗战书', '汪洋', '韩正',
'李克强', '胡锦涛', '江泽民', '温家宝', '朱镕基',
'邓小平', '毛泽东', '周恩来', '刘少奇', '彭德怀',
'薄熙来', '周永康', '徐才厚', '郭伯雄', '令计划',
'孙政才', '赵乐际', '王沪宁', '丁薛祥', '蔡奇',
],
pornography: [
@@ -98,10 +106,17 @@ function checkText(text) {
const matchedWords = [];
const categories = new Set();
const lowerText = text.toLowerCase();
// Strip common evasion characters for split-char detection
const strippedText = text.replace(/[\s\u3000.,;:!?·…—\-_\|\\/~`@#$%^&*+=<>()\[\]{}""''「」『』【】()〈〕\u200b\u200c\u200d\ufeff]/g, '').toLowerCase();
for (const [category, words] of Object.entries(SENSITIVE_WORDS)) {
for (const word of words) {
if (lowerText.includes(word.toLowerCase())) {
const lowerWord = word.toLowerCase();
if (lowerText.includes(lowerWord)) {
matchedWords.push(word);
categories.add(category);
} else if (word.length >= 2 && strippedText.includes(lowerWord)) {
// Split-char evasion detected
matchedWords.push(word);
categories.add(category);
}
@@ -120,7 +135,7 @@ function checkText(text) {
* @returns {string}
*/
function getVersion() {
return '2026-05-11-v1';
return '2026-05-12-v2';
}
module.exports = {