今天是2024年11月24日 第47周 星期日

代人,时大变了。

我们生活在大地上,但我们的梦想超越天空。

“User:白龙/common.js”的版本间的差异

来自Akarin
跳到导航 跳到搜索
(创建页面,内容为“let s1 = /([0-9A-Za-z_])([\u4e00-\u9fa5]+)/g”)
标签移动网页编辑 移动版编辑
 
 
(未显示2个用户的7个中间版本)
第1行: 第1行:
let s1 = /([0-9A-Za-z_])([\u4e00-\u9fa5]+)/g
+
// ==UserScript==
 +
// @name     Pangu for Wikipedia
 +
// @source    https://github.com/AlexanderMisel/pangu-for-wikipedia/blob/master/pangu_wiki.user.js
 +
// @author    Alexander Misel, Vinta Chen
 +
// @match     https://zh.wikipedia.org/wiki/*
 +
// ==/UserScript==
 +
 
 +
// CJK is short for Chinese, Japanese, and Korean.
 +
//
 +
// CJK includes following Unicode blocks:
 +
// \u2e80-\u2eff CJK Radicals Supplement
 +
// \u2f00-\u2fdf Kangxi Radicals
 +
// \u3040-\u309f Hiragana
 +
// \u30a0-\u30ff Katakana
 +
// \u3100-\u312f Bopomofo
 +
// \u3200-\u32ff Enclosed CJK Letters and Months
 +
// \u3400-\u4dbf CJK Unified Ideographs Extension A
 +
// \u4e00-\u9fff CJK Unified Ideographs
 +
// \uf900-\ufaff CJK Compatibility Ideographs
 +
//
 +
// For more information about Unicode blocks, see
 +
// http://unicode-table.com/en/
 +
// https://github.com/vinta/pangu
 +
//
 +
// all J below does not include \u30fb
 +
const CJK = '\u2e80-\u2eff\u2f00-\u2fdf\u3040-\u309f\u30a0-\u30fa\u30fc-\u30ff\u3100-\u312f\u3200-\u32ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff';
 +
 
 +
// ANS is short for Alphabets, Numbers, and Symbols.
 +
//
 +
// A includes A-Za-z\u0370-\u03ff
 +
// N includes 0-9
 +
// S includes `~!@#$%^&*()-_=+[]{}\|;'",<.>/?
 +
//
 +
// some S below does not include all symbols
 +
const ANY_CJK = new RegExp(`[${CJK}]`);
 +
 
 +
// the symbol part only includes + - * / = & | < >
 +
const CJK_OPERATOR_ANS = new RegExp(`([${CJK}])([\\+\\-\\*\\/=&\\|<>])([A-Za-z0-9])`, 'g');
 +
const ANS_OPERATOR_CJK = new RegExp(`([A-Za-z0-9])([\\+\\-\\*\\/=&\\|<>])([${CJK}])`, 'g');
 +
 
 +
const FIX_SLASH_AS = /([/]) ([a-z\-_\./]+)/g;
 +
const FIX_SLASH_AS_SLASH = /([/\.])([A-Za-z\-_\./]+) ([/])/g;
 +
 
 +
const CJK_ANS = new RegExp(`([${CJK}])([A-Za-z\u0370-\u03ff0-9@\\$%\\^&\\*\\-\\+\\\\=\\|/\u00a1-\u00ff\u2150-\u218f\u2700—\u27bf])`, 'g');
 +
const ANS_CJK = new RegExp(`([A-Za-z\u0370-\u03ff0-9~\\$%\\^&\\*\\-\\+\\\\=\\|/!;,\\.\\?\u00a1-\u00ff\u2150-\u218f\u2700—\u27bf])([${CJK}])`, 'g');
 +
 
 +
const S_A = /(%)([A-Za-z])/g;
 +
 
 +
class Pangu {
 +
  spacing(text) {
 +
   if (typeof text !== 'string') {
 +
    console.warn(`spacing(text) only accepts string but got ${typeof text}`); // eslint-disable-line no-console
 +
    return text;
 +
   }
 +
   if (text.length <= 1 || !ANY_CJK.test(text)) {
 +
    return text;
 +
   }
 +
 
 +
   const self = this;
 +
 
 +
   // DEBUG
 +
   // String.prototype.rawReplace = String.prototype.replace;
 +
   // String.prototype.replace = function(regexp, newSubstr) {
 +
   //  const oldText = this;
 +
   //  const newText = this.rawReplace(regexp, newSubstr);
 +
   //  if (oldText !== newText) {
 +
   //   console.log(`regexp: ${regexp}`);
 +
   //   console.log(`oldText: ${oldText}`);
 +
   //   console.log(`newText: ${newText}`);
 +
   //  }
 +
   //  return newText;
 +
   // };
 +
 
 +
   let newText = text;
 +
 
 +
   newText = newText.replace(CJK_OPERATOR_ANS, '$1\u2005$2\u2005$3');
 +
   newText = newText.replace(ANS_OPERATOR_CJK, '$1\u2005$2\u2005$3');
 +
 
 +
   newText = newText.replace(FIX_SLASH_AS, '$1$2');
 +
   newText = newText.replace(FIX_SLASH_AS_SLASH, '$1$2$3');
 +
 
 +
   newText = newText.replace(CJK_ANS, '$1\u2005$2');
 +
   newText = newText.replace(ANS_CJK, '$1\u2005$2');
 +
 
 +
   newText = newText.replace(S_A, '$1\u2005$2');
 +
 
 +
   // DEBUG
 +
   // String.prototype.replace = String.prototype.rawReplace;
 +
 
 +
   return newText;
 +
  }
 +
}
 +
 
 +
const pangu = new Pangu();
 +
 
 +
(function() {
 +
  'use strict';
 +
 
 +
  var traverse = function (node) {
 +
   var childNodes = node.childNodes;
 +
   for (let i = 0; i < childNodes.length; i++) {
 +
    var childNode = childNodes[i];
 +
    if (childNode.nodeType === Node.TEXT_NODE) {
 +
     childNode.data = pangu.spacing(childNode.data);
 +
     if (i === 0) {
 +
      var previousSibling = node.previousSibling;
 +
      if (previousSibling && previousSibling.nodeType === Node.TEXT_NODE) {
 +
       var testText = previousSibling.data.substr(-1) + childNode.data.charAt(0);
 +
       var testNewText = pangu.spacing(testText);
 +
       if (testText !== testNewText) {
 +
        previousSibling.data += '\u2005';
 +
       }
 +
      }
 +
     }
 +
     if (i + 1 === childNodes.length) {
 +
      var nextSibling = node.nextSibling;
 +
      if (nextSibling && nextSibling.nodeType === Node.TEXT_NODE) {
 +
       var testText = childNode.data.substr(-1) + nextSibling.data.charAt(0);
 +
       var testNewText = pangu.spacing(testText);
 +
       if (testText !== testNewText) {
 +
        nextSibling.data = '\u2005' + nextSibling.data;
 +
       }
 +
      }
 +
     }
 +
    } else if (childNode.nodeName !== 'CODE') {
 +
     traverse(childNode);
 +
    }
 +
   }
 +
  }
 +
 
 +
  traverse(document.getElementById('mw-content-text'));
 +
})();

2020年8月7日 (五) 00:06的最新版本

// ==UserScript==
// @name         Pangu for Wikipedia
// @source       https://github.com/AlexanderMisel/pangu-for-wikipedia/blob/master/pangu_wiki.user.js
// @author       Alexander Misel, Vinta Chen
// @match        https://zh.wikipedia.org/wiki/*
// ==/UserScript==

// CJK is short for Chinese, Japanese, and Korean.
//
// CJK includes following Unicode blocks:
// \u2e80-\u2eff CJK Radicals Supplement
// \u2f00-\u2fdf Kangxi Radicals
// \u3040-\u309f Hiragana
// \u30a0-\u30ff Katakana
// \u3100-\u312f Bopomofo
// \u3200-\u32ff Enclosed CJK Letters and Months
// \u3400-\u4dbf CJK Unified Ideographs Extension A
// \u4e00-\u9fff CJK Unified Ideographs
// \uf900-\ufaff CJK Compatibility Ideographs
//
// For more information about Unicode blocks, see
// http://unicode-table.com/en/
// https://github.com/vinta/pangu
//
// all J below does not include \u30fb
const CJK = '\u2e80-\u2eff\u2f00-\u2fdf\u3040-\u309f\u30a0-\u30fa\u30fc-\u30ff\u3100-\u312f\u3200-\u32ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff';

// ANS is short for Alphabets, Numbers, and Symbols.
//
// A includes A-Za-z\u0370-\u03ff
// N includes 0-9
// S includes `~!@#$%^&*()-_=+[]{}\|;'",<.>/?
//
// some S below does not include all symbols
const ANY_CJK = new RegExp(`[${CJK}]`);

// the symbol part only includes + - * / = & | < >
const CJK_OPERATOR_ANS = new RegExp(`([${CJK}])([\\+\\-\\*\\/=&\\|<>])([A-Za-z0-9])`, 'g');
const ANS_OPERATOR_CJK = new RegExp(`([A-Za-z0-9])([\\+\\-\\*\\/=&\\|<>])([${CJK}])`, 'g');

const FIX_SLASH_AS = /([/]) ([a-z\-_\./]+)/g;
const FIX_SLASH_AS_SLASH = /([/\.])([A-Za-z\-_\./]+) ([/])/g;

const CJK_ANS = new RegExp(`([${CJK}])([A-Za-z\u0370-\u03ff0-9@\\$%\\^&\\*\\-\\+\\\\=\\|/\u00a1-\u00ff\u2150-\u218f\u2700—\u27bf])`, 'g');
const ANS_CJK = new RegExp(`([A-Za-z\u0370-\u03ff0-9~\\$%\\^&\\*\\-\\+\\\\=\\|/!;,\\.\\?\u00a1-\u00ff\u2150-\u218f\u2700—\u27bf])([${CJK}])`, 'g');

const S_A = /(%)([A-Za-z])/g;

class Pangu {
  spacing(text) {
    if (typeof text !== 'string') {
      console.warn(`spacing(text) only accepts string but got ${typeof text}`); // eslint-disable-line no-console
      return text;
    }
    if (text.length <= 1 || !ANY_CJK.test(text)) {
      return text;
    }

    const self = this;

    // DEBUG
    // String.prototype.rawReplace = String.prototype.replace;
    // String.prototype.replace = function(regexp, newSubstr) {
    //   const oldText = this;
    //   const newText = this.rawReplace(regexp, newSubstr);
    //   if (oldText !== newText) {
    //     console.log(`regexp: ${regexp}`);
    //     console.log(`oldText: ${oldText}`);
    //     console.log(`newText: ${newText}`);
    //   }
    //   return newText;
    // };

    let newText = text;

    newText = newText.replace(CJK_OPERATOR_ANS, '$1\u2005$2\u2005$3');
    newText = newText.replace(ANS_OPERATOR_CJK, '$1\u2005$2\u2005$3');

    newText = newText.replace(FIX_SLASH_AS, '$1$2');
    newText = newText.replace(FIX_SLASH_AS_SLASH, '$1$2$3');

    newText = newText.replace(CJK_ANS, '$1\u2005$2');
    newText = newText.replace(ANS_CJK, '$1\u2005$2');

    newText = newText.replace(S_A, '$1\u2005$2');

    // DEBUG
    // String.prototype.replace = String.prototype.rawReplace;

    return newText;
  }
}

const pangu = new Pangu();

(function() {
  'use strict';

  var traverse = function (node) {
    var childNodes = node.childNodes;
    for (let i = 0; i < childNodes.length; i++) {
      var childNode = childNodes[i];
      if (childNode.nodeType === Node.TEXT_NODE) {
        childNode.data = pangu.spacing(childNode.data);
        if (i === 0) {
          var previousSibling = node.previousSibling;
          if (previousSibling && previousSibling.nodeType === Node.TEXT_NODE) {
            var testText = previousSibling.data.substr(-1) + childNode.data.charAt(0);
            var testNewText = pangu.spacing(testText);
            if (testText !== testNewText) {
              previousSibling.data += '\u2005';
            }
          }
        }
        if (i + 1 === childNodes.length) {
          var nextSibling = node.nextSibling;
          if (nextSibling && nextSibling.nodeType === Node.TEXT_NODE) {
            var testText = childNode.data.substr(-1) + nextSibling.data.charAt(0);
            var testNewText = pangu.spacing(testText);
            if (testText !== testNewText) {
              nextSibling.data = '\u2005' + nextSibling.data;
            }
          }
        }
      } else if (childNode.nodeName !== 'CODE') {
        traverse(childNode);
      }
    }
  }

  traverse(document.getElementById('mw-content-text'));
})();