Revision 87410 of "MediaWiki:Gadget-CorrectorOCR.js" on cawikisource

/*Barreja de diferents Mediawikis de Wikisource (ThomasV, Pathoschild, et al.)*/

function Fragment_Tipo(text) {
 var typo_def = new Array (
  "(  )", " ",
  "(— )", "—",
  "( —)", "—",
  "(- )", "-",
  "( -)", "-",
  "(■)",";",
  "( \,)", "\,",
  "( \;)", ";",
  "( :)", ":",
  "(-[1I!li])", "-l", //predre-l
  "( [\n])", "\n", //exc. IE
  "-\n([a-zéèçà])", "$1", //cesura. Exc. IE
  "(Digitized by)", " ",
  "(VjOOQIC)", " ",
  "(a[bh]a )", "aba ",
  "(a[bh]an )", "aban ",
  "(acccn)", "accen",
  "([an][iïíìl!1I]x)", "aix",
  "(x[6d])", "xó",
  "(a[iïíìl!1I]g[anou][nu])", "algun",
  "(a[iïíìl!1I]g[un]a)", "aigua",
  "(aU)", "all",
  "( attre )", " altre ",
  "( a[iïíìl!1I][rtl]r)", " altr",
  "( a[iïíìl!1I][8s] )", " als ",
  "(anl )", "ant ",
  "b[ce]rt", "bert",
  "(b[6d])", "bó",
  "( calala)", " catala",
  "(c[iïíìl!1I][56d] )", "ció ",
  "(c[iïíìl!1I][56d][ns][8s] )", "cions ",
  "( corn )", " com ",
  "( co[un][iïíìl!1I] )", " com ",
  "(cb)", "ch",
  "(c[iïíìl!1I] )", "ci ", //ofici
  "(cuU)", "cull",
  "(d[*'\"])", "d'",
  "( d[ocs] )", " de ",
  "(d[ce][iïíìl!1I]x)", "deix",
  "( d[ce][iïíìl!1I] )", " del ",
  "( d[ce][iïíìl!1I][8s] )", " dels ",
  "(d[ce][s8])", "des",
  "(D[ce][anu] )", "Deu ",
  "(d6[nu])", "déu",
  "(d[!1I])", "di",
  "(d[iïíìl!1I]g)", "dig",
  "dfs", "dís",
  "([nu][bh][ec][nu])", "uhen",
  "(drc)", "dre",
  "(dr[iïíìl!1I])", "dri",
  "([ce]o[nu][ce][ce][bh])", "conech",
  "(efe[ce][ce])", "efecc",
  "([BE][iïíìl!1I] )", "El ",
  "(EU )", "Ell ",
  "( [ce][iïíìl!1I] )", " el ",
  "([ -][ce][iïíìl!1I][8s] )", " els ",
  "( [ce][iïíìl!1I][iïíìl!1I])", " ell",
  "(eU)", "ell",
  "(cm)", "em",
  "[BE][nqu] ", "En ",
  " cn", " en",
  " [ce][un] "," en ",
  "[ce]n[lt] ", "ent ",
  " [ce]n[it]r[ce]", " entre",
  " [ce]s ", " es ",
  "[ce][8s][ce]ri", "escri",
  "[ce][8s][ce]r ", "eser ",
  " [ceo][8s][8s][ceo]r ", " esser ",
  " [ce][s8][lt]([aáà])([ t])", " est$1$2",
  "exlr", "extr",
  "fc", "fe",
  "f[ce][nu][lt]", "fent",
  "f[1!Iil]([bcdfgjlmnpqrstvxyz])", "fi$1", //fic,fim...
  "[fí][iïíìl!1I][iïíìl!1I][iïíìl!1I]", "fill",
  "(g[iïíìl!1I][6é]s)", "glés",
  "g[nu][óé]", "gué",
  "gn[ce]", "gne", //p.ex. digne
  "gu[ce]", "gue",
  "g[nu]t", "gut",
  "[hb]a[nu] ", "han ",
  "[hb]ab[ce]m", "havem",
  "[hb]av[iïíìl!1I]a", "havia",
  "([hb]av[iïíìl!1I][ce][nu])", "havien",
  "([hb][iïíìl!1I] [hb]a)", "hi ha",
  " [bh]a ", " ha ",
  "h[iïìl!1I]", "hi",
  "[bh][oe]m[ce]", "home",
  "(horn)", "hom",
  "( [bh]o[no]t )", " hont ",
  "([iïíìl!1]U)", "ill",
  "([iïíìl!1I]n[lt] )", "int ",
  "J[ce][s8]", "Jes",
  "jomada", "jornada",
  " [iïíìl!1I][nu]r ", " lur ",
  " [*'´\\^][iïíìl!1I] ", " 'l ",
  "L[*´\\^]", "L'",
  "([ .,;])[iïíìl!1I][*'´\\^]", "$1l'",
  "Ta([iy])gua", "l'a$1gua",
  "V a", "l' a",
  " [iïíìl!1]a ", " la ",
  " [iïíìl!1][ec][a8s] ", " les ",
  " [iïíìl!1]i ", " li ",
  "[UI][iïíìl!1I][bh]r[ec]", "Uibre",
  "ii([aáàeéèiíìoóòuúù])", "ll$1", //"iiibre"
  " [iïíìl!1]o ", " lo ",
  " [iïíìl!1]o[a8s] ", " los ",
  "Uoc", "lloc",
  "U[nu][nu]", "llun",
  "([*'´][iïíìl!1I][8s])", "'ls",
  "( M[8s] )", " 'ls ",
  "( [iïíìl!1][8s] )", " ls ",
  " ine ", "me",
  "m[ce]([ '])", "me$1",
  "(loient )", "lment ",
  "(ni[ce][nu]y)", "menys",
  "(mo[iïíìl!1]t)", "molt",
  "(—jN)", "—¡N",
  "(n[*'\\^])", "n'",
  "([*'\\^]n)", "'n",
  " [nu]o ", " no ",
  "nl([ ;.:r])","nt$1", //entre, -nt
  "(oU)", "oll",
  "(—jP)", "—¡P",
  "(prc)", "pre",
  "pnn", "pun",
  "(—[ij]Q)", "—¡Q",
  "(Q[anou][ce])", "Que",
  "(Q[anou][iïíìl!1I])", "Qui",
  "([çq][*'\\^])", "q'",
  "([çq][nu][*'\\^])", "qu'",
  "([çq][anou][anou][il])", "qual",
  "([çq][anou][anou][nu])", "quan",
  "([çq][anou][ceo])", "que",
  "([çq]ii[ceo])", "que",
  "qae", "que",
  "([çq]u[ce] [iïíìl!1I] )", "que l ",
  "([çq][anou][óé])", "qué",
  "([çq][anou][ce]s[lt])", "quest",
  "([çq][anou][iïíìl!1I])", "qui",
  "(rcg)", "reg", //tb podria ser "rog"
  "(rcr)", "rer", //tb podria ser "ror"
  "rcy", "rey",
  "(Sl)", "Si",
  "([s8][*'\\^])", "s'",
  "([*'\\^]s)", "'s",
  " [sa8][ce] ", " se ",
  "([8s]cc)", "sec",
  "scd", "sed",
  "scg", "seg",
  "([8s][ceo][çgq][eo][nu])", "segon",
  "( [8s]cr )", " ser ", //tb podria ser " sor "
  "( [8s][ce]r[ce])", " sere",
  "( [8s]o[nD] )", " son ",
  "([8s]lr)", "str", //vostra
  "(t[*'\\^])", "t'",
  "([*'\\^]t)", "'t",
  "[lt]am[6b]([éèe])", "tamb$1",
  "[lt]amp[ceo][ceo]", "tampoc",
  "( tc )", " te ",
  "[lt][ce]rr", "terr",
  "(tU)", "tll",
  " [tl][iIïíìÏÌÍIL][un][ce]([h ;.:])"," tinc$1",
  "(trc)", "tre", //tb "tro"
  "( [nu][nu] )", " un ", //tb " nu "
  "( [nu][nu][oa] )", " una ", //tb " nua "
  "(uU)", "ull",
  "(▼)", "V",
  "v[ce]ll", "vell",
  "(•)", ".",
  "([*\\^])", " "
 );
 
 var lng = typo_def.length;
 if (lng % 2 != 0) return text;
 for (var i = 0; i < lng; i += 2) {
  search = new RegExp(typo_def[i + 0], "g");
  text = text.replace(search, typo_def[i + 1]);
 }
 return text;
}

function Fragment_Prosa(text) {
 var typo_def = new Array (
  //Canvia salts de línia per un espai excepte si hi ha puntuació
  "([a-zàáèéìíòóùú\,])[\n]", "$1 "
 );
 
 var lng = typo_def.length;
 if (lng % 2 != 0) return text;
 for (var i = 0; i < lng; i += 2) {
  search = new RegExp(typo_def[i + 0], "g");
  text = text.replace(search, typo_def[i + 1]);
 }
 return text;
}

function Tipografia() {
  if (mw.config.get("wgNamespaceNumber") == 102) {
    var editbox = document.getElementById('wpTextbox1');
    if (editbox) {
      var text = editbox.value;
      var new_text = '';
      var last_match = 0;
      // Dividir el text en paraules per a no aplicar canvis al propi codi html
      splitter = new RegExp("<math>.*</math>|<[a-zA-z0-9 =\"']>|[</[a-zA-z0-9 =\"']+>|style=\".*\"|&nbsp;|&mdash;|<!--.*-->|\n:[:]*|\n;[;]*|[[][[].*]]", "gm");
      while ((result = splitter.exec(text)) != null) {
	new_text += Fragment_Tipo(text.slice(last_match, splitter.lastIndex - result[0].length));
	new_text += result;
	last_match = splitter.lastIndex;
      }
      new_text += Fragment_Tipo(text.slice(last_match))
      editbox.value = new_text;
    }
  }
}


function CreaFormulari() {
 if(document.getElementById('regexform')) TreuFormulari();
  else {
   var editbox = document.getElementById('wpTextbox1');
   var regexform = document.createElement('div');//contenidor
   regexform.setAttribute('id','regexform');
   editbox.parentNode.insertBefore(regexform,editbox.parentNode.firstChild);
   var formform = document.createElement('form');
   formform.setAttribute('id','regexformform');
   regexform.appendChild(formform);

   var corregir_button = document.createElement('input');
   corregir_button.setAttribute('type',"button");
   corregir_button.setAttribute('onclick',"Tipografia();");
   corregir_button.setAttribute('title',"Corregeix");
   corregir_button.setAttribute('value',"Corregeix");
   formform.appendChild(corregir_button);

   var prosa_button = document.createElement('input');
   prosa_button.setAttribute('type',"button");
   prosa_button.setAttribute('onclick',"Fes_Prosa();");
   prosa_button.setAttribute('title',"Prosa");
   prosa_button.setAttribute('value',"Prosa");
   formform.appendChild(prosa_button);

   var newinput = document.createElement('input');
   newinput.setAttribute('id','formsearch');
   var newlabel = document.createElement('label');
   newlabel.setAttribute('for','formsearch');
   newlabel.appendChild(document.createTextNode("Canvia "));
   formform.appendChild(newlabel);
   formform.appendChild(newinput);

   var newinput = document.createElement('input');
   newinput.setAttribute('id','formreplace');
   var newlabel = document.createElement('label');
   newlabel.setAttribute('for','formreplace');
   newlabel.appendChild(document.createTextNode(' per '));
   formform.appendChild(newlabel);
   formform.appendChild(newinput);

   var go_button = document.createElement('input');
   go_button.setAttribute('type',"button");
   go_button.setAttribute('onclick',"customgo();");
   go_button.setAttribute('title',"Vés!");
   go_button.setAttribute('value',"Vés!");
   formform.appendChild(go_button);
 }
}

function Fes_Prosa() {
 var editbox = document.getElementById('wpTextbox1');
 if (editbox) {
  var text = editbox.value;
  var new_text = '';
  var last_match = 0;
  // Dividir el text en paraules per a no aplicar canvis al propi codi html
  splitter = new RegExp("<math>.*</math>|<[a-zA-z0-9 =\"']>|[</[a-zA-z0-9 =\"']+>|style=\".*\"|&nbsp;|&mdash;|<!--.*-->|\n:[:]*|\n;[;]*|[[][[].*]]", "gm");
  while ((result = splitter.exec(text)) != null) {
   new_text += Fragment_Prosa(text.slice(last_match, splitter.lastIndex - result[0].length));
   new_text += result;
   last_match = splitter.lastIndex;
  }
 new_text += Fragment_Prosa(text.slice(last_match))
 editbox.value = new_text;
 }
}

function customgo() {
 var msearch = document.getElementById('formsearch').value;
 msearch = msearch.replace(/\\n/g, '\n');
 var mreplace = document.getElementById('formreplace').value;
 mreplace = mreplace.replace(/\\n/g, '\n');
 //convertir entrada en expressió regular 
 if(!msearch.match(/^\s*\/[\s\S]*\/[a-z]*\s*$/i)) {//sense delimitadors
  var search_re = new RegExp(msearch,'g');
 }
 else {//amb delimitadors
  //trenquem en parts
  var regpattern = msearch.replace(/^\s*\/([\s\S]*)\/[a-z]*\s*$/i,'$1');
  var regmodifiers = msearch.replace(/^\s*\/[\s\S]*\/([a-z]*)\s*$/,'$1');
  //filtrem etiquetes invàlides
  regmodifiers = regmodifiers.replace(/[^gim]/ig,'');
  var search_re = new RegExp(regpattern, regmodifiers);
 }
 var editbox = document.getElementById('wpTextbox1');
 editbox.value = editbox.value.replace(search_re,mreplace);
}

function TreuFormulari() {
 var regexform = document.getElementById('regexform');
 regexform.parentNode.removeChild(regexform);
 patterncount = -1;
}

function addButton2(id,alt,comment,source,onclick){
 var tb  = document.getElementById("toolbar"); 
 if(tb){
  var image = document.createElement("img");
  image.width = 46;
  image.height = 22;
  image.border = 0;
  image.className = "mw-toolbar-editbutton";
  image.style.cursor = "pointer";
  image.alt = alt;
  image.title = comment;
  image.src = source;
  image.onclick = onclick;
  tb.appendChild(image);
 }
}
function add_Corr_button(){
 if (mw.config.get("wgNamespaceNumber") == 102) {
  addButton2("wpRep","Correccions automàtiques","Correcció automàtica", "//upload.wikimedia.org/wikipedia/commons/a/af/Button_Fractur_OCR.png", CreaFormulari);
 }
}
$(document).ready(add_Corr_button);