Revision 137475 of "User:PerfektesChaos/js/WikiSyntaxTextMod/dS.js" on testwiki/// PerfektesChaos/js/WikiSyntaxTextMod/?S.js /// 2012-05-31 [email protected] /// Fingerprint: #0#0# /// <nowiki> // WikiSyntaxTextMod: String utility package /* jshint bitwise: true, curly:true, latedef:true, laxbreak:true, trailing:true, undef:true, white:false */ /* global mw: true, mediaWiki: false, window: false */ /*jslint bitwise: true, plusplus: true, regexp: true, sloppy: true, unparam: true, vars: true, white: true, maxerr: 50 */ /*globals mw: true, mediaWiki: false, window: false */ if (typeof(mediaWiki) !== "object") { // disconnected mw = { libs: { WikiSyntaxTextMod: { } }, log: function () {} }; } if (typeof(mw.libs.WikiSyntaxTextMod) !== "object") { // isolated mw.libs.WikiSyntaxTextMod = { }; } if (typeof(mw.libs.WikiSyntaxTextMod.str) !== "object") { mw.libs.WikiSyntaxTextMod.str = { }; } mw.libs.WikiSyntaxTextMod.str.vsn = -4.51; if (typeof(mw.libs.WikiSyntaxTextMod.bb) !== "object") { mw.libs.WikiSyntaxTextMod.bb = { }; } if (typeof(mw.libs.WikiSyntaxTextMod.debugging) !== "object") { mw.libs.WikiSyntaxTextMod.debugging = { debugging: false }; } mw.libs.WikiSyntaxTextMod.bb.str = function (appObj) { /* String utility package. .charEntity() .charEntityAt() .charEntityCode() .charEntityHTML4() .deCapitalize() .decodeOctet() .escapeLight() .fromCharCode() .fromNum() .hexcode() .isBlank() .isLetter() .isWhiteBlank() .makeString() .setChar() .setString() .sortAppropriate() .sortChar() .sortLocale() .sortString() .substrEnd() .trim() .trimL() .trimR() .uniques() >< .sort_lang >< .sort_mode >< .spaces >< .sticks Requires: JavaScript 1.3 (String.charCodeAt String.fromCharCode) 2012-05-31 [email protected] */ if (typeof(appObj.str) !== "object") { appObj.str = { }; } if (typeof(appObj.str.locateEntities) !== "boolean") { appObj.str.locateEntities = false; } if (appObj.str.sort_lang === undefined) { // string or false appObj.str.sort_lang = false; } if (appObj.str.sort_mode === undefined) { // string or false appObj.str.sort_mode = false; } // "de-DIN31638" not yet passed to sorting if (typeof(appObj.str.spaces) !== "string") { // 2012-03-19 appObj.str.spaces = String.fromCharCode( 160, 5760, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288, 12351); } if (typeof(appObj.str.sticks) !== "string") { // 2012-03-19 appObj.str.sticks = String.fromCharCode( 45, 173, 8208, 8209, 8210, 8211, 8212, 8213, 8722); } //8204 // zwnj _re_ZW //8205 // zwj _re_ZW appObj.str.charEntity = function (adjust) { /** Retrieve character code (UCS) for named HTML4 or numeric entity @version 2012-03-09 [email protected] @param adjust string to be examined @return information about character <BR><TT>false</TT> if not resolved <BR><TT>number</TT> UCS code of single character @since JavaScript 1.3 String.charCodeAt() */ // Uses: // .str.charEntityCode() // .str.charEntityHTML4() var r; if (adjust.charCodeAt(1) === 35) { // '#' r = this.charEntityCode(adjust); } else { r = this.charEntityHTML4(adjust); } return r; }; // .str.charEntity() appObj.str.charEntityAt = function (adjust, address, advance) { /** Retrieve character code of ML entity at position @version 2012-03-09 [email protected] @param adjust string to be examined @param address position in adjust @param advance true: '&' at address; false: ';' at address @return Array with entity information, or false <BR><TT>[0]</TT> code value <BR><TT>[1]</TT> entity position <BR><TT>[2]</TT> length of entity @since JavaScript 1.3 String.charCodeAt() */ // Uses: // .str.charEntity() var r = false; var i; var n = 0; i = adjust.charCodeAt(address); if (advance) { if (i === 38) { // '&' i = adjust.indexOf(";", address + 2); if (i > address) { n = i - address + 1; i = address; } } } else { if (i === 59) { // ';' if (address > 2) { i = adjust.lastIndexOf("&", address - 2); if (i >= 0) { n = address - i + 1; } } } } if (n) { if (n < 8) { r = this.charEntity(adjust.substr(i, n)); } if (r) { r = [ r, i, n ]; } } return r; }; // .str.charEntityAt() appObj.str.charEntityCode = function (adjust) { /** Retrieve character code (UCS) for numeric ML entity @version 2010-03-30 [email protected] @param adjust string with character entity like "&#xHH;" or "&#NN;" <BR>first two characters are assumed to be '&#' <BR>third character may be 'x' or digit <BR>last character is assumed to be ';' @return information about character <BR><TT>false</TT> if not resolved <BR><TT>number</TT> UCS code of single character @since JavaScript 1.3 String.charCodeAt() */ var n; var s = adjust.substr(2, adjust.length - 3); if (s.charCodeAt(0) === 120) { // hex n = parseInt(s.substr(1), 16); } else { // dec n = parseInt(s, 10); } if (isNaN(n)) { // failure n = 0; } return (n > 0 ? n : false); }; // .str.charEntityCode() appObj.str.charEntityHTML4 = function (adjust) { /** Retrieve character code (UCS) for named HTML4 (or similar) entity @version 2012-03-09 [email protected] @param adjust string with character named entity "&xyz;" <BR>first character is assumed to be '&' <BR>last character is assumed to be ';' @return information about character <BR><TT>false</TT> if not resolved <BR><TT>number</TT> UCS code of single character */ var r; switch (adjust.substr(1, adjust.length - 2)) { /* ML escapes */ case "quot" : r= 34; break; case "amp" : r= 38; break; case "apos" : r= 39; break; case "lt" : r= 60; break; case "gt" : r= 62; break; case "nbsp" : r= 160; break; case "iexcl" : r= 161; break; case "cent" : r= 162; break; case "pound" : r= 163; break; case "curren" : r= 164; break; case "yen" : r= 165; break; case "brvbar" : r= 166; break; case "sect" : r= 167; break; case "uml" : r= 168; break; case "copy" : r= 169; break; case "ordf" : r= 170; break; case "laquo" : r= 171; break; case "not" : r= 172; break; case "shy" : r= 173; break; case "reg" : r= 174; break; case "macr" : r= 175; break; case "deg" : r= 176; break; case "plusmn" : r= 177; break; case "sup2" : r= 178; break; case "sup3" : r= 179; break; case "acute" : r= 180; break; case "micro" : r= 181; break; case "para" : r= 182; break; case "middot" : r= 183; break; case "cedil" : r= 184; break; case "sup1" : r= 185; break; case "ordm" : r= 186; break; case "raquo" : r= 187; break; case "frac14" : r= 188; break; case "frac12" : r= 189; break; case "frac34" : r= 190; break; case "iquest" : r= 191; break; case "Agrave" : r= 192; break; case "Aacute" : r= 193; break; case "Acirc" : r= 194; break; case "Atilde" : r= 195; break; case "Auml" : r= 196; break; case "Aring" : r= 197; break; case "AElig" : r= 198; break; case "Ccedil" : r= 199; break; case "Egrave" : r= 200; break; case "Eacute" : r= 201; break; case "Ecirc" : r= 202; break; case "Euml" : r= 203; break; case "Igrave" : r= 204; break; case "Iacute" : r= 205; break; case "Icirc" : r= 206; break; case "Iuml" : r= 207; break; case "ETH" : r= 208; break; case "Ntilde" : r= 209; break; case "Ograve" : r= 210; break; case "Oacute" : r= 211; break; case "Ocirc" : r= 212; break; case "Otilde" : r= 213; break; case "Ouml" : r= 214; break; case "times" : r= 215; break; case "Oslash" : r= 216; break; case "Ugrave" : r= 217; break; case "Uacute" : r= 218; break; case "Ucirc" : r= 219; break; case "Uuml" : r= 220; break; case "Yacute" : r= 221; break; case "THORN" : r= 222; break; case "szlig" : r= 223; break; case "agrave" : r= 224; break; case "aacute" : r= 225; break; case "acirc" : r= 226; break; case "atilde" : r= 227; break; case "auml" : r= 228; break; case "aring" : r= 229; break; case "aelig" : r= 230; break; case "ccedil" : r= 231; break; case "egrave" : r= 232; break; case "eacute" : r= 233; break; case "ecirc" : r= 234; break; case "euml" : r= 235; break; case "igrave" : r= 236; break; case "iacute" : r= 237; break; case "icirc" : r= 238; break; case "iuml" : r= 239; break; case "eth" : r= 240; break; case "ntilde" : r= 241; break; case "ograve" : r= 242; break; case "oacute" : r= 243; break; case "ocirc" : r= 244; break; case "otilde" : r= 245; break; case "ouml" : r= 246; break; case "divide" : r= 247; break; case "oslash" : r= 248; break; case "ugrave" : r= 249; break; case "uacute" : r= 250; break; case "ucirc" : r= 251; break; case "uuml" : r= 252; break; case "yacute" : r= 253; break; case "thorn" : r= 254; break; case "yuml" : r= 255; break; case "OElig" : r= 338; break; case "oelig" : r= 339; break; case "Scaron" : r= 352; break; case "scaron" : r= 353; break; case "Yuml" : r= 376; break; case "fnof" : r= 402; break; case "circ" : r= 710; break; case "tilde" : r= 732; break; case "Alpha" : r= 913; break; case "Beta" : r= 914; break; case "Gamma" : r= 915; break; case "Delta" : r= 916; break; case "Epsilon" : r= 917; break; case "Zeta" : r= 918; break; case "Eta" : r= 919; break; case "Theta" : r= 920; break; case "Iota" : r= 921; break; case "Kappa" : r= 922; break; case "Lambda" : r= 923; break; case "Mu" : r= 924; break; case "Nu" : r= 925; break; case "Xi" : r= 926; break; case "Omicron" : r= 927; break; case "Pi" : r= 928; break; case "Rho" : r= 929; break; case "Sigma" : r= 931; break; case "Tau" : r= 932; break; case "Upsilon" : r= 933; break; case "Phi" : r= 934; break; case "Chi" : r= 935; break; case "Psi" : r= 936; break; case "Omega" : r= 937; break; case "alpha" : r= 945; break; case "beta" : r= 946; break; case "gamma" : r= 947; break; case "delta" : r= 948; break; case "epsilon" : r= 949; break; case "zeta" : r= 950; break; case "eta" : r= 951; break; case "theta" : r= 952; break; case "iota" : r= 953; break; case "kappa" : r= 954; break; case "lambda" : r= 955; break; case "mu" : r= 956; break; case "nu" : r= 957; break; case "xi" : r= 958; break; case "omicron" : r= 959; break; case "pi" : r= 960; break; case "rho" : r= 961; break; case "sigmaf" : r= 962; break; case "sigma" : r= 963; break; case "tau" : r= 964; break; case "upsilon" : r= 965; break; case "phi" : r= 966; break; case "chi" : r= 967; break; case "psi" : r= 968; break; case "omega" : r= 969; break; case "thetasym": r= 977; break; case "upsih" : r= 978; break; case "piv" : r= 982; break; case "thinsp" : r=8201; break; case "zwnj" : r=8204; break; case "zwj" : r=8205; break; case "lrm" : r=8206; break; case "rlm" : r=8207; break; case "ensp" : r=8194; break; case "emsp" : r=8195; break; case "ndash" : r=8211; break; case "mdash" : r=8212; break; case "lsquo" : r=8216; break; case "rsquo" : r=8217; break; case "sbquo" : r=8218; break; case "ldquo" : r=8220; break; case "rdquo" : r=8221; break; case "bdquo" : r=8222; break; case "dagger" : r=8224; break; case "Dagger" : r=8225; break; case "bull" : r=8226; break; case "hellip" : r=8230; break; case "permil" : r=8240; break; case "prime" : r=8242; break; case "Prime" : r=8243; break; case "lsaquo" : r=8249; break; case "rsaquo" : r=8250; break; case "oline" : r=8254; break; case "frasl" : r=8260; break; case "euro" : r=8364; break; case "image" : r=8465; break; case "weierp" : r=8472; break; case "real" : r=8476; break; case "trade" : r=8482; break; case "alefsym" : r=8501; break; case "larr" : r=8592; break; case "uarr" : r=8593; break; case "rarr" : r=8594; break; case "darr" : r=8595; break; case "harr" : r=8596; break; case "crarr" : r=8629; break; case "lArr" : r=8656; break; case "uArr" : r=8657; break; case "rArr" : r=8658; break; case "dArr" : r=8659; break; case "hArr" : r=8660; break; case "forall" : r=8704; break; case "part" : r=8706; break; case "exist" : r=8707; break; case "empty" : r=8709; break; case "nabla" : r=8711; break; case "isin" : r=8712; break; case "notin" : r=8713; break; case "ni" : r=8715; break; case "prod" : r=8719; break; case "sum" : r=8721; break; case "minus" : r=8722; break; case "lowast" : r=8727; break; case "radic" : r=8730; break; case "prop" : r=8733; break; case "infin" : r=8734; break; case "ang" : r=8736; break; case "and" : r=8743; break; case "or" : r=8744; break; case "cap" : r=8745; break; case "cup" : r=8746; break; case "int" : r=8747; break; case "there4" : r=8756; break; case "sim" : r=8764; break; case "cong" : r=8773; break; case "asymp" : r=8776; break; case "ne" : r=8800; break; case "equiv" : r=8801; break; case "le" : r=8804; break; case "ge" : r=8805; break; case "sub" : r=8834; break; case "sup" : r=8835; break; case "nsub" : r=8836; break; case "sube" : r=8838; break; case "supe" : r=8839; break; case "oplus" : r=8853; break; case "otimes" : r=8855; break; case "perp" : r=8869; break; case "sdot" : r=8901; break; case "lceil" : r=8968; break; case "rceil" : r=8969; break; case "lfloor" : r=8970; break; case "rfloor" : r=8971; break; case "lang" : r=9001; break; case "rang" : r=9002; break; case "loz" : r=9674; break; case "spades" : r=9824; break; case "clubs" : r=9827; break; case "hearts" : r=9829; break; case "diams" : r=9830; break; case "lsqb" : r= 91; break; // [ case "rsqb" : r= 93; break; // ] case "lcub" : r= 123; break; // { case "verbar" : r= 124; break; // | case "rcub" : r= 125; break; // } default : r = false; break; } // switch substr return r; }; // .str.charEntityHTML4() appObj.str.deCapitalize = function (alter) { /** Downcase first character @version 2011-04-26 [email protected] @param alter string to be decapitalized @return decapitalized string */ return alter.substr(0, 1).toLowerCase() + alter.substr(1); }; // .str.deCapitalize() appObj.str.decodeOctet = function (assembly, address) { /** Retrieve hexadecimal value of octet similar to parseInt() base 16 but consider uppercase A-F only @version 2010-06-26 [email protected] @param assembly string to be analyzed @param address index in string @return parsed number 0...15, or -1 if invalid @since JavaScript 1.3 String.charCodeAt() */ var r = -1; if (assembly.length > address + 1) { var c = assembly.charCodeAt(address); if (c > 47 && c < 58) { r = c - 48; } else if (c > 64 && c < 71) { r = c - 55; } if (r >= 0) { r *= 16; c = assembly.charCodeAt(address + 1); if (c > 47 && c < 58) { r += c - 48; } else if (c > 64 && c < 71) { r += c - 55; } else { r = -1; } } // first digit } return r; }; // .str.decodeOctet() appObj.str.escapeLight = function (alter) { /** Minimal escaping for HTML @version 2012-03-07 [email protected] @param alter string to be escaped @return string with escaping */ return alter.replace(/&/g, "&").replace(/</g, "<"); }; // .str.escapeLight() appObj.str.fromCharCode = function (apply) { /** Extended fromCharCode for UCS > 0xFFFF (4 bytes/char) @version 2011-10-27 [email protected] @param apply number, UCS @return single character, which might have a string length of 2 instead of 1 @since JavaScript 1.3 String.fromCharCode() 2 byte chars only */ var c; var r; if (apply > 0xFFFF) { c = apply - 0x10000; r = String.fromCharCode( 0xD800 + (c >> 10), 0xDC00 + (c & 0x3FF) ); } else { r = String.fromCharCode(apply); } return r; }; // .str.fromCharCode() appObj.str.fromNum = function (adjust) { /** Format number as string @version 2012-03-11 [email protected] @param adjust number to be formatted @return adjust as string */ return adjust.toString(); }; // .str.fromNum() appObj.str.hexcode = function (amount, align, allow) { /** Retrieve hexadecimal representation @version 2011-01-27 [email protected] @param amount number: decimal @param align left padded number of digits, or false @param allow true: use lowercase letters @return string with hex number */ var k = amount; var m; var r = ""; var s = (allow ? "0123456789abcdef" : "0123456789ABCDEF"); while (k > 0) { m = k % 16; r = s[m] + r; k = (k - m) / 16; } // while k > 0 if (align) { m = align - r.length; while (m > 0) { r = "0" + r; m--; } // while m > 0 } // left padding return r; }; // .str.hexcode() appObj.str.isBlank = function (ask, any) { /** Test for invisible character @version 2012-03-09 [email protected] @param ask character code to be examined @param any true: include zero width and marks @return true iff ask is any space or other invisible character code */ var r = false; switch (ask) { case 32 : // ' ' case 160 : // nbsp case 5760 : // OGHAM SPACE MARK case 8192 : // EN QUAD case 8193 : // EM QUAD case 8194 : // N-SPACE case 8195 : // M-SPACE case 8196 : // THREE-PER-EM SPACE case 8197 : // FOUR-PER-EM SPACE case 8198 : // SIX-PER-EM SPACE case 8199 : // FIGURE SPACE case 8200 : // PUNCTUATION SPACE case 8201 : // thinsp case 8202 : // HAIR SPACE (english typography) case 8239 : // NARROW NO-BREAK SPACE case 8287 : // MEDIUM MATHEMATICAL SPACE case 12288 : // IDEOGRAPHIC SPACE case 12351 : // IDEOGRAPHIC HALF FILL SPACE case 917536 : // TAG SPACE r = true; break; case 133 : // NEXT LINE (NEL) case 6158 : // MONGOLIAN VOWEL SEPARATOR case 8203 : // ZERO WIDTH SPACE case 8204 : // zwnj case 8205 : // zwj case 8206 : // lrm case 8207 : // rlm case 8232 : // LINE SEPARATOR case 8233 : // PARAGRAPH SEPARATOR case 8234 : // LEFT-TO-RIGHT EMBEDDING case 8235 : // RIGHT-TO-LEFT EMBEDDING case 8236 : // POP DIRECTIONAL FORMATTING case 8237 : // LEFT-TO-RIGHT OVERRIDE case 8238 : // RIGHT-TO-LEFT OVERRIDE case 8288 : // WORD JOINER case 8289 : // FUNCTION APPLICATION case 8290 : // INVISIBLE TIMES case 8291 : // INVISIBLE SEPARATOR case 8292 : // INVISIBLE PLUS case 65279 : // BYTE ORDER MARK case 917505 : // LANGUAGE TAG if (any) { r = true; } break; } // switch ask return r; }; // .str.isBlank() appObj.str.isLetter = function (ask) { /** Test whether a character is a letter (currently Latin based) @version 2011-04-27 [email protected] @param ask character code to be examined, or string (first char) @return true iff ask is identified as any kind of letter @since JavaScript 1.3 String.charCodeAt() */ // Uses: // .str.sortChar() var n = ask; var t = typeof(ask); var r = false; var x; if (t !== "number") { if (t === "string") { n = ask.charCodeAt(0); } else { n = -1; } } // typeof if (n > 160) { x = this.sortChar(n, false); if (x) { t = typeof(x); if (t === "number") { n = x; } else if (t === "string") { n = 90; } else { n = 0; } } else { n = 0; } } // non-ASCII if (n <= 122) { // ASCII if (n >= 65) { r = ((n <= 90) || (n >= 97)); } } return r; }; // .str.isLetter() appObj.str.isWhiteBlank = function (ask, any, against) { /** Test for invisible character or newline @version 2012-04-25 [email protected] @param ask character code to be examined @param any true: include zero width and direction marks @param against true: behave like .isBlank() @return true iff ask is any whitespace or other invisible */ // Uses: // .str.isBlank() var r; if (ask === 10 && ! against) { r = true; } else { r = this.isBlank(ask, any); } // switch ask return r; }; // .str.isWhiteBlank() appObj.str.makeString = function (apply, amount) { /** Return string of certain length with repeated character @version 2011-04-27 [email protected] @param apply character code to be set @param amount number of repeated characters apply @return new string */ var i; var s = ""; var c = String.fromCharCode(apply); for (i = 0; i < amount; i++) { s = s + c; } // for i return s; }; // .str.makeString() appObj.str.setChar = function (array, apply, address) { /** Set character or string at certain string position @version 2012-02-22 [email protected] @param array string to be manipulated @param apply character code or string to be set @param address single character position to be replaced @return modified string @since JavaScript 1.3 String.fromCharCode() One day direct array[i] setting might work in a JavaScript String. */ var n = array.length; var r = (typeof(apply) === "number" ? String.fromCharCode(apply) : apply); // toString() done by concat if (! address) { r = r + array.substr(1); } else if (address === n) { r = array.substr(0, address) + r; } else { r = array.substr(0, address) + r + array.substr(address + 1); } return r; }; // .str.setChar() appObj.str.setString = function (array, address, adjust, apply) { /** Modify string in certain range @version 2012-05-09 [email protected] @param array string to be manipulated @param address character position to start replacement @param adjust range specification number of characters to be removed at address string (adjust.length is used as number) @param apply string to replace range @return modified string @since JavaScript 1.3 String.fromCharCode() */ // One day direct array[i] setting might work in a JavaScript String. var n = false; var r = false; var t = typeof(adjust); if (t === "number") { n = adjust; } else if (t === "string") { n = adjust.length; } // typeof if (n !== false) { if (address > 0) { r = array.substr(0, address) + apply; } else { r = apply; } // head n += address; if (n < array.length) { r += array.substr(n); } // tail } // typeof return r; }; // .str.setString() appObj.str.sortAppropriate = function (adjust) { /** Retrieve sortable character(s) in particular local environment (hook) <BR>(RegExp is not modified) @version 2010-08-20 [email protected] @param adjust character code of a single character <BR> 196 * Ä <BR> 197 * Å <BR> 198 * Æ *always* <BR> 228 * ä <BR> 229 * å <BR> 230 * æ *always* <BR> 208 * Ð <BR> 272 * Dstroke <BR> 240 * ð <BR> 273 * dstroke <BR> 214 * Ö <BR> 246 * ö <BR> 338 * OElig *always* <BR> 339 * oelig *always* <BR> 223 * ß *always* <BR> 222 * Þ *always* <BR> 254 * þ *always* <BR> 220 * Ü <BR> 252 * ü @return information about sortable character <BR><TT>false</TT> no particular local request <BR><TT>true</TT> remove character from sort key <BR><TT>number</TT> with ASCII code of single character <BR><TT>string</TT> of two ASCII characters, (first) character case will be kept, second char (if any) downcase. @see .str.sort_lang @see .str.sort_mode */ var r = false; if (typeof(this.sort_lang) === "undefined") { this.sort_lang = false; } switch (this.sort_lang) { /* case "de" : r = sortLocale(adjust, "de"); break; // this.sort_lang */ case "dk" : case "no" : case "sv" : switch (adjust) { case 196 : // Ä case 197 : // Å case 198 : // Æ case 228 : // ä case 229 : // å case 230 : // æ case 214 : // Ö case 246 : // ö r = adjust; break; case 220 : // Ü r = 89; // Y break; case 252 : // ü r = 121; // y break; } // switch adjust break; // this.sort_lang } // switch this.sort_lang /* if (this.sort_mode) { if (this.sort_mode.indexOf("$expand") >= 0) { switch (adjust) { case 197 : // Å r = "Aa"; break; case 229 : // å r = "aa"; break; case 272 : // Dstroke r = "Dj"; break; case 273 : // dstroke r = "dj"; break; } // switch adjust } // $expand } // this.sort_mode */ if (! r) { switch (adjust) { case 198 : // Æ r = "Ae"; break; case 230 : // æ r = "ae"; break; /* case 240 : // ð case 273 : // dstroke r = 100; // d break; case 208 : // Ð case 272 : // Dstroke r = 68; // D break; */ case 338 : // OElig r = "Oe"; break; case 339 : // oelig r = "oe"; break; case 223 : // ß r = "ss"; break; case 222 : // Þ r = "Th"; break; case 254 : // þ r = "th"; break; } // switch adjust } // not yet defined return r; }; // .str.sortAppropriate() appObj.str.sortChar = function (adjust) { /** Retrieve sortable character(s) for non-ASCII Latin based Unicode <BR>(RegExp is not modified) @version 2011-01-27 [email protected] @param adjust character code of a single character <BR>(expecting <TT>adjust</TT> from 160 up) @return information about sortable character <BR><TT>false</TT> if nothing to do <BR><TT>true</TT> remove character from sort key <BR><TT>number</TT> with ASCII code of single character <BR><TT>string</TT> of two ASCII characters, (first) character case will be kept, second char (if any) downcase. <BR>Only glyphs used in any (European) language considered. */ // Uses: // .str.sortAppropriate() var r = false; if (adjust >= 160) { switch (adjust) { case 160 : // nbsp case 8194 : // N-SPACE case 8195 : // M-SPACE case 8196 : // THREE-PER-EM SPACE case 8197 : // FOUR-PER-EM SPACE case 8198 : // SIX-PER-EM SPACE case 8199 : // FIGURE SPACE case 8200 : // PUNCTUATION SPACE case 8201 : // thinsp case 8202 : // hair space (english typography) case 8239 : // NARROW NO-BREAK SPACE r = 32; // SPC break; case 8208 : // HYPHEN case 8209 : // NON-BREAKING HYPHEN case 8722 : // MINUS sign r = 45; // HYPHEN break; case 196 : // Ä r = this.sortAppropriate(adjust); if (r) { break; } case 197 : // Å r = this.sortAppropriate(adjust); if (r) { break; } case 192 : // À case 193 : // Á case 194 : // Â case 195 : // Ã case 256 : // Amacron case 260 : // Aogonek r = 65; // A break; case 198 : // Æ r = this.sortAppropriate(adjust); break; case 228 : // ä r = this.sortAppropriate(adjust); if (r) { break; } case 229 : // å r = this.sortAppropriate(adjust); if (r) { break; } case 224 : // à case 225 : // á case 226 : // â case 227 : // ã case 170 : // feminine ordinal indicator case 257 : // amacron case 261 : // aogonek r = 97; // a break; case 230 : // æ r = this.sortAppropriate(adjust); break; case 199 : // Ç case 262 : // Cacute case 264 : // Ccirc case 266 : // Cdot case 268 : // Ccaron r = 67; // C break; case 231 : // ç case 263 : // cacute case 265 : // ccirc case 267 : // cdot case 269 : // ccaron r = 99; // c break; case 208 : // Ð case 272 : // Dstroke r = this.sortAppropriate(adjust); if (r) { break; } case 270 : // Dcaron r = 68; // D break; case 240 : // ð case 273 : // dstroke r = this.sortAppropriate(adjust); if (r) { break; } case 271 : // dcaron r = 100; // d break; case 200 : // È case 201 : // É case 202 : // Ê case 203 : // Ë case 274 : // Emacron case 278 : // Edot case 280 : // Eogonek case 282 : // Ecaron r = 69; // E break; case 232 : // è case 233 : // é case 234 : // ê case 235 : // ë case 275 : // emacron case 279 : // edot case 281 : // eogonek case 283 : // ecaron r = 101; // e break; case 284 : // Gcirc case 286 : // Gbreve case 288 : // Gdot case 290 : // Gcedil r = 71; // G break; case 285 : // gcirc case 287 : // gbreve case 289 : // gdot case 291 : // gcedil r = 103; // g break; case 292 : // Hcirc case 294 : // Hstroke r = 72; // H break; case 293 : // hcirc case 295 : // hstroke r = 104; // h break; case 204 : // Ì case 205 : // Í case 206 : // Î case 207 : // Ï case 298 : // Imacron case 302 : // Iogonek case 304 : // Idot r = 73; // I break; case 236 : // ì case 237 : // í case 238 : // î case 239 : // ï case 299 : // imacron case 303 : // iogonek case 305 : // inodot r = 105; // i break; case 306 : // IJlig r = "IJ"; // Dutch: both capital break; case 307 : // ijlig r = "ij"; break; case 308 : // Jcirc r = 74; // J break; case 309 : // jcirc r = 106; // j break; case 310 : // Kcedil r = 75; // K break; case 311 : // kcedil r = 107; // k break; case 315 : // Lcedil case 321 : // Lstroke r = 76; // L break; case 316 : // lcedil case 322 : // lstroke r = 108; // l break; // r = "ll"; case 209 : // Ñ case 323 : // Nacute case 325 : // Ncedil case 327 : // Ncaron r = 78; // N break; case 241 : // ñ case 324 : // nacute case 326 : // ncedil case 328 : // ncaron r = 110; // n break; case 214 : // Ö r = this.sortAppropriate(adjust); if (r) { break; } case 210 : // Ò case 211 : // Ó case 212 : // Ô case 213 : // Õ case 216 : // Ø case 332 : // Omacron case 336 : // Odacute r = 79; // O break; case 338 : // OElig r = this.sortAppropriate(adjust); break; case 246 : // ö r = this.sortAppropriate(adjust); if (r) { break; } case 242 : // ò case 243 : // ó case 244 : // ô case 245 : // õ case 248 : // ø case 333 : // omacron case 337 : // odacute case 186 : // masculine ordinal indicator r = 111; // o break; case 339 : // oelig r = this.sortAppropriate(adjust); break; case 344 : // Rcaron r = 82; // R break; case 345 : // rcaron r = 114; // r break; case 346 : // Sacute case 348 : // Scirc case 350 : // Scedil case 352 : // Scaron case 536 : // S with comma below r = 83; // S break; case 347 : // sacute case 349 : // scirc case 351 : // scedil case 353 : // scaron case 537 : // s with comma below r = 115; // s break; case 223 : // ß r = this.sortAppropriate(adjust); break; case 354 : // Tcedil case 538 : // T with comma below r = 84; // T break; case 355 : // tcedil case 539 : // t with comma below r = 116; // t break; case 222 : // Þ r = this.sortAppropriate(adjust); break; case 254 : // þ r = this.sortAppropriate(adjust); break; case 220 : // Ü r = this.sortAppropriate(adjust); if (r) { break; } case 217 : // Ù case 218 : // Ú case 219 : // Û case 362 : // Umacron case 364 : // Ubreve case 366 : // Uring case 368 : // Udacute case 370 : // Uogonek r = 85; // U break; case 252 : // ü r = this.sortAppropriate(adjust); if (r) { break; } case 249 : // ù case 250 : // ú case 251 : // û case 363 : // umacron case 365 : // ubreve case 367 : // uring case 369 : // udacute case 371 : // uogonek r = 117; // u break; case 221 : // Ý case 376 : // Yuml r = 89; // Y break; case 253 : // ý case 255 : // ÿ r = 121; // y break; case 377 : // Zacute case 379 : // Zdot case 381 : // Zcaron r = 90; // Z break; case 378 : // zacute case 380 : // zdot case 382 : // zcaron r = 122; // z break; case 185 : // sup1 r = 49; // 1 break; case 178 : // sup2 r = 50; // 2 break; case 179 : // sup3 r = 51; // 3 break; case 8210 : // FIGURE DASH case 8211 : // ndash case 8212 : // mdash case 8213 : // HORIZONTAL BAR case 8259 : // HYPHEN BULLET r = 45; // - break; case 39 : // ' case 96 : // grave case 168 : // uml case 175 : // macr case 180 : // acute case 184 : // cedil case 710 : // circ case 711 : // caron case 728 : // breve case 730 : // ring case 731 : // ogon case 732 : // tilde case 733 : // dacute case 173 : // soft hyphen case 8204 : // ZERO WIDTH NON-JOINER case 8205 : // ZERO WIDTH JOINER case 8206 : // LEFT-TO-RIGHT MARK case 8207 : // RIGHT-TO-LEFT MARK r = true; break; } // switch adjust } // adjust >= 160 return r; }; // .str.sortChar() appObj.str.sortLocale = function (adjust, area) { /** Retrieve sortcode char or string for Unicode @version 2012-03-19 [email protected] @param adjust string to be checked @param area language code, or false <BR><TT>de</TT> German DIN 31638 (DIN 5007) requests umlaut "Ae" when sorting names of persons, @return sortable string or character <BR><TT>false</TT> no particular local request <BR>Replace by two character string for German umlauts or scandinavian "aa" for Aring. @see .str.sort_mode */ var r = false; if (this.sort_mode) { switch (area) { case "de" : if (this.sort_mode.indexOf("de-DIN31638") >= 0) { switch (adjust) { case 196 : // Ä r = "Ae"; break; case 228 : // ä r = "ae"; break; case 214 : // Ö r = "Oe"; break; case 246 : // ö r = "oe"; break; case 220 : // Ü r = "Ue"; break; case 252 : // ü r = "ue"; break; } // switch adjust } // de-DIN31638 break; // area } // switch area } // this.sort_mode return r; }; // .str.sortLocale() appObj.str.sortString = function (adjust, advanced) { /** Retrieve sortable string for non-ASCII Latin based Unicode <BR>Trailing or multiple whitespace shrinks. @version 2012-03-19 [email protected] @param adjust string to be checked or modified @param advanced optional <BR><TT>true</TT> Replace two character string for German umlauts and scandinavian Aring. <BR>German DIN 31638 (DIN 5007) requests umlaut "Ae" when sorting names of persons, and scandinavian languages use the same transscription as well as "aa" for aring. @return information about sortable string <BR><TT>false</TT> if nothing to do, adjust is fine <BR><TT>string</TT> changes against adjust <BR>Only glyphs used in any (European) language considered. @since JavaScript 1.3 String.charCodeAt() String.fromCharCode() */ // Uses: // .str.setChar() // .str.sortChar() var s = adjust; var c; var i; var k; for (i = s.length - 1; i >= 0; i--) { c = s.charCodeAt(i); if (c < 32) { // control character s = this.setChar(s, 32, i); // ' ' } else if (c < 128) { // ASCII ; // okay } else if (c < 160) { // win-1252 or something like that s = this.setChar(s, "?", i); } else { // not a sortable character -- definitively wrong k = this.sortChar(c, advanced); if (k) { // replace switch (typeof(k)) { case "boolean" : // true -- remove s = s.substr(0, i) + s.substr(i + 1); break; case "number" : k = String.fromCharCode(k); // fall through case "string" : s = this.setChar(s, k, i); break; } // switch typeof } // replace } // cascade c } // for i // single heading SPC is kept (i > 0): for (i = s.length - 1; i > 0; i--) { if (s.charCodeAt(i) === 32) { // trailing SPC s = s.substr(0, i); // trim right } else { break; // for i } } // for i for (i = s.length - 1; i > 0; i--) { if (s.charCodeAt(i) === 32) { // any SPC if (s.charCodeAt(i-1) === 32) { // SPC before s = s.substr(0, i) + s.substr(i + 1); } } } // for i return (s === adjust ? false : s); }; // .str.sortString() appObj.str.substrEnd = function (apply, amount, after) { /** Retrieve last characters from string like Mozilla substr(-n, n) @version 2012-04-10 [email protected] @param apply string @param amount position counted from end @param after optional: number of chars, if not amount @return string at end @since JavaScript 1.0 String.substr() */ // This function has been included for compatibility reasons. // With ECMA.3, String.slice() with negative start argument will work. // String.slice() with negative argument wasn't defined in earlier JS. // String.substr() with negative argument does not go with IE. var j = apply.length - amount; var n = amount; if (j < 0) { j = 0; if (typeof(after) === "number") { n = after; } } return apply.substr(j, n); }; // .str.substrEnd() appObj.str.substrExcept = function (apply, amount) { /** Retrieve all but last characters from string @version 2012-04-10 [email protected] @param apply string @param amount position counted from end @return string at end @see #substrEnd() */ // This function has been included for compatibility reasons. // String.slice(0, -amount) should work, is permitted by MS JScript. var j = apply.length - amount; if (j < 0) { j = 0; } return apply.substr(0, j); }; // .str.substrExcept() appObj.str.trim = function (adjust, any, aware) { /** Remove heading or trailing spacing charcodes of any kind @version 2012-05-31 [email protected] @param adjust string to be trimmed @param any true: include zero width and direction marks @param aware true: remove also trailing line breaks @return modified string */ // Uses: // .str.trimL() // .str.trimR() return this.trimR( this.trimL(adjust, any), any, aware ); }; // .str.trim() appObj.str.trimL = function (adjust, any) { /** Return string without heading spacing charcodes of any kind @version 2012-03-10 [email protected] @param adjust string to be trimmed @param any true: include zero width and direction marks @see .str.locateEntities @since JavaScript 1.3 String.charCodeAt() */ // Uses: // .str.isBlank() // .str.charEntityAt() // Other approach: find first non-blank char, then substr if not start var r = adjust; var e; var i = 0; var k = true; while (k) { k = r.charCodeAt(i); if (k) { if (this.isBlank(k, any)) { i++; } else if (this.locateEntities) { if (k === 38) { // '&' k = false; e = this.charEntityAt(adjust, i, true); if (e) { if (this.isBlank(e[0], any)) { i += e[2]; k = true; } } } else { break; // while } } else { break; // while } } } // while k if (i) { r = r.substr(i); } return r; }; // .str.trimL() appObj.str.trimR = function (adjust, any, aware, align) { /** Return string without trailing spaces charcodes of any kind @version 2012-05-31 [email protected] @param adjust string to be trimmed @param any true: include zero width and direction marks @param aware true: remove also line breaks @param align true: re-establish line breaks after trimming @see .str.locateEntities @since JavaScript 1.3 String.charCodeAt() */ // Uses: // .str.isWhiteBlank() // .str.isBlank() var r = adjust; var e; var k; var m = 0; var n = r.length - 1; var s = ""; while (n > -1) { k = r.charCodeAt(n); if (this.isWhiteBlank(k, any, ! aware)) { if (align && k == 10) { s = s + "\n"; } if (n) { m = n; n--; } else { r = ""; m = 0; break; // while } } else if (this.locateEntities) { if (k === 59) { // ';' e = this.charEntityAt(adjust, n, false); if (e) { if (this.isBlank(e[0], any)) { n -= e[2]; m = n + 1; k = false; } } } if (k) { break; // while } } else { break; // while } } // while n if (m) { r = r.substr(0, m) + s; } return r; }; // .str.trimR() appObj.str.uniques = function (adjust, against) { /** Return string with unique sequence of items @version 2012-03-19 [email protected] @param adjust string to be reduced, items separated by against @param against character for separation @return string with all items in adjust, separated by against (no leading nor trailing against) */ var i; var r = ""; var single; var swap = ""; var words = adjust.split(against); words.sort(); for (i = 0; i < words.length; i++) { single = words[i]; if (single !== swap) { r = r + against + single; swap = single; } } // for i if (r.length) { r = r.substr(1); } // for i return r; }; // .str.uniques() }; // appObj.bb.str mw.libs.WikiSyntaxTextMod.bb.str(mw.libs.WikiSyntaxTextMod); // Start on import: callback to waiting ... if (typeof(mw.libs.WikiSyntaxTextMod.main) !== "object") { mw.libs.WikiSyntaxTextMod.main = { }; } if (typeof(mw.libs.WikiSyntaxTextMod.main.wait) === "function") { mw.libs.WikiSyntaxTextMod.main.wait("S", mw.libs.WikiSyntaxTextMod.str.vsn); } delete mw.libs.WikiSyntaxTextMod.str.vsn; delete mw.libs.WikiSyntaxTextMod.bb.str; // Emacs // Local Variables: // encoding: iso-8859-1-dos // fill-column: 80 // End: /// EOF </nowiki> WikiSyntaxTextMod/?S.js All content in the above text box is licensed under the Creative Commons Attribution-ShareAlike license Version 4 and was originally sourced from https://test.wikipedia.org/w/index.php?oldid=137475.
![]() ![]() This site is not affiliated with or endorsed in any way by the Wikimedia Foundation or any of its affiliates. In fact, we fucking despise them.
|