Revision 58200 of "MediaWiki:Gadget-CorrectorOCR.js" on cawikisource

/*Barreja de diferents Mediawikis de Wikisource (ThomasV, Pathoschild, et al.)*/

function Fragment_Tipo(text) {
 var typo_def = new Array (
  "(  )", " ",
  "(— )", "—",
  "( —)", "—",
  "(- )", "-",
  "( -)", "-",
  "(■)",";",
  "( \,)", "\,",
  "( \;)", ";",
  "( :)", ":",
  "(-[1I!li])", "-l", //predre-l
  "( [\n])", "\n", //exc. IE
  "-\n([a-zéèçà])", "$1", //cesura. Exc. IE
  "(Digitized by)", " ",
  "(VjOOQIC)", " ",
  "(a[bh]a )", "aba ",
  "(a[bh]an )", "aban ",
  "(acccn)", "accen",
  "([an][iïíìl!1I]x)", "aix",
  "(x[6d])", "xó",
  "(a[iïíìl!1I]g[anou][nu])", "algun",
  "(a[iïíìl!1I]g[un]a)", "aigua",
  "(aU)", "all",
  "( attre )", " altre ",
  "( a[iïíìl!1I][rtl]r)", " altr",
  "( a[iïíìl!1I][8s] )", " als ",
  "(anl )", "ant ",
  "b[ce]rt", "bert",
  "(b[6d])", "bó",
  "( calala)", " catala",
  "(c[iïíìl!1I][56d] )", "ció ",
  "(c[iïíìl!1I][56d][ns][8s] )", "cions ",
  "( corn )", " com ",
  "( co[un][iïíìl!1I] )", " com ",
  "(cb)", "ch",
  "(c[iïíìl!1I] )", "ci ", //ofici
  "(cuU)", "cull",
  "(d[*'\"])", "d'",
  "( d[ocs] )", " de ",
  "(d[ce][iïíìl!1I]x)", "deix",
  "( d[ce][iïíìl!1I] )", " del ",
  "( d[ce][iïíìl!1I][8s] )", " dels ",
  "(d[ce][s8])", "des",
  "(D[ce][anu] )", "Deu ",
  "(d6[nu])", "déu",
  "(d[!1I])", "di",
  "(d[iïíìl!1I]g)", "dig",
  "([nu][bh][ec][nu])", "uhen",
  "(drc)", "dre",
  "(dr[iïíìl!1I])", "dri",
  "([ce]o[nu][ce][ce][bh])", "conech",
  "(efe[ce][ce])", "efecc",
  "([BE][iïíìl!1I] )", "El ",
  "(EU )", "Ell ",
  "( [ce][iïíìl!1I] )", " el ",
  "([ -][ce][iïíìl!1I][8s] )", " els ",
  "( [ce][iïíìl!1I][iïíìl!1I])", " ell",
  "(eU)", "ell",
  "(cm)", "em",
  "([BE][nqu] )", "En ",
  "(cn)", "en",
  "( [ce][un] )"," en ",
  "([ce]n[lt] )", "ent ",
  "( [ce]n[it]r[ce])", " entre",
  " [ce]s ", " es ",
  "[ce][8s][ce]ri", "escri",
  "[ce][8s][ce]r ", "eser ",
  "( [ceo][8s][8s][ceo]r )", " esser ",
  " [ce][s8][lt]([aáà]) ", " est$1 ",
  "exlr", "extr",
  "(fc)", "fe",
  "f[1!Iil]([bcdfgjlmnpqrstvxyz])", "fi$1", //fic,fim...
  "[fí][iïíìl!1I][iïíìl!1I][iïíìl!1I]", "fill",
  "(g[iïíìl!1I][6é]s)", "glés",
  "(g[nu][óé])", "gué",
  "(gn[ce])", "gne", //p.ex. digne
  "(gu[ce])", "gue",
  "g[nu]t", "gut",
  "([hb]a[nu] )", "han ",
  "([hb]ab[ce]m)", "havem",
  "([hb]av[iïíìl!1I]a)", "havia",
  "([hb]av[iïíìl!1I][ce][nu])", "havien",
  "([hb][iïíìl!1I] [hb]a)", "hi ha",
  "( [bh]a )", " ha ",
  "(h[iïìl!1I])", "hi",
  "([bh][oe]m[ce])", "home",
  "(horn)", "hom",
  "( [bh]o[no]t )", " hont ",
  "([iïíìl!1]U)", "ill",
  "([iïíìl!1I]n[lt] )", "int ",
  "J[ce][s8]", "Jes",
  "(jomada)", "jornada",
  "( [iïíìl!1I][nu]r )", " lur ",
  "( [*'´\\^][iïíìl!1I] )", " 'l ",
  "(L[*´\\^])", "L'",
  "([ .,;])[iïíìl!1I][*'´\\^]", "$1l'",
  "Ta([iy])gua", "l'a$1gua",
  "(V a)", "l' a",
  "( [iïíìl!1]a )", " la ",
  "( [iïíìl!1][ec][a8s] )", " les ",
  "( [iïíìl!1]i )", " li ",
  "([UI][iïíìl!1I][bh]r[ec])", "Uibre",
  "ii([aáàeéèiíìoóòuúù])", "ll$1", //"iiibre"
  "( [iïíìl!1]o )", " lo ",
  "( [iïíìl!1]o[a8s] )", " los ",
  "(Uoc)", "lloc",
  "(U[nu][nu])", "llun",
  "([*'´][iïíìl!1I][8s])", "'ls",
  "( M[8s] )", " 'ls ",
  "( [iïíìl!1][8s] )", " ls ",
  "(m[ce])", "me",
  "(loient )", "lment ",
  "(ni[ce][nu]y)", "menys",
  "(mo[iïíìl!1]t)", "molt",
  "(—jN)", "—¡N",
  "(n[*'\\^])", "n'",
  "([*'\\^]n)", "'n",
  " [nu]o ", " no ",
  "nl([ ;.:r])","nt$1", //entre, -nt
  "(oU)", "oll",
  "(—jP)", "—¡P",
  "(prc)", "pre",
  "(—[ij]Q)", "—¡Q",
  "(Q[anou][ce])", "Que",
  "(Q[anou][iïíìl!1I])", "Qui",
  "([çq][*'\\^])", "q'",
  "([çq][nu][*'\\^])", "qu'",
  "([çq][anou][anou][il])", "qual",
  "([çq][anou][anou][nu])", "quan",
  "([çq][anou][ceo])", "que",
  "([çq]ii[ceo])", "que",
  "([çq]u[ce] [iïíìl!1I] )", "que l ",
  "([çq][anou][óé])", "qué",
  "([çq][anou][ce]s[lt])", "quest",
  "([çq][anou][iïíìl!1I])", "qui",
  "(rcg)", "reg", //tb podria ser "rog"
  "(rcr)", "rer", //tb podria ser "ror"
  "rcy", "rey",
  "(Sl)", "Si",
  "([s8][*'\\^])", "s'",
  "([*'\\^]s)", "'s",
  "([8s]cc)", "sec",
  "scd", "sed",
  "([8s][ceo][çgq][eo][nu])", "segon",
  "( [8s]cr )", " ser ", //tb podria ser " sor "
  "( [8s][ce]r[ce])", " sere",
  "( [8s]o[nD] )", " son ",
  "([8s]lr)", "str", //vostra
  "(t[*'\\^])", "t'",
  "([*'\\^]t)", "'t",
  "[lt]amb([éèe])", "tamb$1",
  "( tc )", " te ",
  "[lt][ce]rr", "terr",
  "(tU)", "tll",
  " [tl][iIïíìÏÌÍIL][un][ce]([h ;.:])"," tinc$1",
  "(trc)", "tre", //tb "tro"
  "( [nu][nu] )", " un ", //tb " nu "
  "( [nu][nu][oa] )", " una ", //tb " nua "
  "(uU)", "ull",
  "(▼)", "V",
  "v[ce]ll", "vell",
  "(•)", ".",
  "([*\\^])", " "
 );
 
 var lng = typo_def.length;
 if (lng % 2 != 0) return text;
 for (var i = 0; i < lng; i += 2) {
  search = new RegExp(typo_def[i + 0], "g");
  text = text.replace(search, typo_def[i + 1]);
 }
 return text;
}

function Fragment_Prosa(text) {
 var typo_def = new Array (
  //Neteja prèvia necessària
//  "(  )", " ",
//  "( [\n])", "\n", //exc. IE
  //Canvia salts de línia per un espai excepte si hi ha puntuació
  "([a-zàáèéìíòóùú\,])[\n]", "$1 "
 );
 
 var lng = typo_def.length;
 if (lng % 2 != 0) return text;
 for (var i = 0; i < lng; i += 2) {
  search = new RegExp(typo_def[i + 0], "g");
  text = text.replace(search, typo_def[i + 1]);
 }
 return text;
}

function Tipografia() {
  if (wgNamespaceNumber == 102) {
    var editbox = document.getElementById('wpTextbox1');
    if (editbox) {
      var text = editbox.value;
      var new_text = '';
      var last_match = 0;
      // Dividir el text en paraules per a no aplicar canvis al propi codi html
      splitter = new RegExp("<math>.*</math>|<[a-zA-z0-9 =\"']>|[</[a-zA-z0-9 =\"']+>|style=\".*\"|&nbsp;|&mdash;|<!--.*-->|\n:[:]*|\n;[;]*|[[][[].*]]", "gm");
      while ((result = splitter.exec(text)) != null) {
	new_text += Fragment_Tipo(text.slice(last_match, splitter.lastIndex - result[0].length));
	new_text += result;
	last_match = splitter.lastIndex;
      }
      new_text += Fragment_Tipo(text.slice(last_match))
      editbox.value = new_text;
    }
  }
}


function CreaFormulari() {
 if(document.getElementById('regexform')) TreuFormulari();
  else {
   var editbox = document.getElementById('wpTextbox1');
   var regexform = document.createElement('div');//contenidor
   regexform.setAttribute('id','regexform');
   editbox.parentNode.insertBefore(regexform,editbox.parentNode.firstChild);
   var formform = document.createElement('form');
   formform.setAttribute('id','regexformform');
   regexform.appendChild(formform);

   var corregir_button = document.createElement('input');
   corregir_button.setAttribute('type',"button");
   corregir_button.setAttribute('onclick',"Tipografia();");
   corregir_button.setAttribute('title',"Corregeix");
   corregir_button.setAttribute('value',"Corregeix");
   formform.appendChild(corregir_button);

   var prosa_button = document.createElement('input');
   prosa_button.setAttribute('type',"button");
   prosa_button.setAttribute('onclick',"Fes_Prosa();");
   prosa_button.setAttribute('title',"Prosa");
   prosa_button.setAttribute('value',"Prosa");
   formform.appendChild(prosa_button);

   var newinput = document.createElement('input');
   newinput.setAttribute('id','formsearch');
   var newlabel = document.createElement('label');
   newlabel.setAttribute('for','formsearch');
   newlabel.appendChild(document.createTextNode("Canvia "));
   formform.appendChild(newlabel);
   formform.appendChild(newinput);

   var newinput = document.createElement('input');
   newinput.setAttribute('id','formreplace');
   var newlabel = document.createElement('label');
   newlabel.setAttribute('for','formreplace');
   newlabel.appendChild(document.createTextNode(' per '));
   formform.appendChild(newlabel);
   formform.appendChild(newinput);

   var go_button = document.createElement('input');
   go_button.setAttribute('type',"button");
   go_button.setAttribute('onclick',"customgo();");
   go_button.setAttribute('title',"Vés!");
   go_button.setAttribute('value',"Vés!");
   formform.appendChild(go_button);
 }
}

function Fes_Prosa() {
 var editbox = document.getElementById('wpTextbox1');
 if (editbox) {
  var text = editbox.value;
  var new_text = '';
  var last_match = 0;
  // Dividir el text en paraules per a no aplicar canvis al propi codi html
  splitter = new RegExp("<math>.*</math>|<[a-zA-z0-9 =\"']>|[</[a-zA-z0-9 =\"']+>|style=\".*\"|&nbsp;|&mdash;|<!--.*-->|\n:[:]*|\n;[;]*|[[][[].*]]", "gm");
  while ((result = splitter.exec(text)) != null) {
   new_text += Fragment_Prosa(text.slice(last_match, splitter.lastIndex - result[0].length));
   new_text += result;
   last_match = splitter.lastIndex;
  }
 new_text += Fragment_Prosa(text.slice(last_match))
 editbox.value = new_text;
 }
}

function customgo() {
 var msearch = document.getElementById('formsearch').value;
 msearch = msearch.replace(/\\n/g, '\n');
 var mreplace = document.getElementById('formreplace').value;
 mreplace = mreplace.replace(/\\n/g, '\n');
 //convertir entrada en expressió regular 
 if(!msearch.match(/^\s*\/[\s\S]*\/[a-z]*\s*$/i)) {//sense delimitadors
  var search_re = new RegExp(msearch,'g');
 }
 else {//amb delimitadors
  //trenquem en parts
  var regpattern = msearch.replace(/^\s*\/([\s\S]*)\/[a-z]*\s*$/i,'$1');
  var regmodifiers = msearch.replace(/^\s*\/[\s\S]*\/([a-z]*)\s*$/,'$1');
  //filtrem etiquetes invàlides
  regmodifiers = regmodifiers.replace(/[^gim]/ig,'');
  var search_re = new RegExp(regpattern, regmodifiers);
 }
 var editbox = document.getElementById('wpTextbox1');
 editbox.value = editbox.value.replace(search_re,mreplace);
}

function TreuFormulari() {
 var regexform = document.getElementById('regexform');
 regexform.parentNode.removeChild(regexform);
 patterncount = -1;
}

function addButton2(id,alt,comment,source,onclick){
 var tb  = document.getElementById("toolbar"); 
 if(tb){
  var image = document.createElement("img");
  image.width = 46;
  image.height = 22;
  image.border = 0;
  image.className = "mw-toolbar-editbutton";
  image.style.cursor = "pointer";
  image.alt = alt;
  image.title = comment;
  image.src = source;
  image.onclick = onclick;
  tb.appendChild(image);
 }
}
function add_Corr_button(){
 if (wgNamespaceNumber == 102) {
  addButton2("wpRep","Correccions automàtiques","Correcció automàtica", "//upload.wikimedia.org/wikipedia/commons/a/af/Button_Fractur_OCR.png", CreaFormulari);
 }
}
addOnloadHook(add_Corr_button);