Revision 58145 of "MediaWiki:Gadget-CorrectorOCR.js" on cawikisource/*Barreja de diferents Mediawikis de Wikisource (ThomasV, Pathoschild, et al.)*/
function Fragment_Tipo(text) {
var typo_def = new Array (
"( )", " ",
"(— )", "—",
"( —)", "—",
"(- )", "-",
"( -)", "-",
"(■)",";",
"( \,)", "\,",
"( \;)", ";",
"( :)", ":",
"(-[1I!li])", "-l", //predre-l
"( [\n])", "\n", //exc. IE
"-\n([a-zéèçà])", "$1", //cesura. Exc. IE
"(Digitized by)", " ",
"(VjOOQIC)", " ",
"(a[bh]a )", "aba ",
"(a[bh]an )", "aban ",
"(acccn)", "accen",
"([an][iïíìl!1I]x)", "aix",
"(x[6d])", "xó",
"(a[iïíìl!1I]g[anou][nu])", "algun",
"(a[iïíìl!1I]g[un]a)", "aigua",
"(aU)", "all",
"( attre )", " altre ",
"( a[iïíìl!1I][rtl]r)", " altr",
"( a[iïíìl!1I][8s] )", " als ",
"(anl )", "ant ",
"b[ce]rt", "bert",
"(b[6d])", "bó",
"( calala)", " catala",
"(c[iïíìl!1I][56d] )", "ció ",
"(c[iïíìl!1I][56d][ns][8s] )", "cions ",
"( corn )", " com ",
"( co[un][iïíìl!1I] )", " com ",
"(cb)", "ch",
"(c[iïíìl!1I] )", "ci ", //ofici
"(cuU)", "cull",
"(d[*'\"])", "d'",
"( d[oc] )", " de ",
"(d[ce][iïíìl!1I]x)", "deix",
"( d[ce][iïíìl!1I] )", " del ",
"( d[ce][iïíìl!1I][8s] )", " dels ",
"(d[ce][s8])", "des",
"(D[ce][anu] )", "Deu ",
"(d6[nu])", "déu",
"(d[!1I])", "di",
"(d[iïíìl!1I]g)", "dig",
"([nu][bh][ec][nu])", "uhen",
"(drc)", "dre",
"(dr[iïíìl!1I])", "dri",
"([ce]o[nu][ce][ce][bh])", "conech",
"(efe[ce][ce])", "efecc",
"([BE][iïíìl!1I] )", "El ",
"(EU )", "Ell ",
"( [ce][iïíìl!1I] )", " el ",
"([ -][ce][iïíìl!1I][8s] )", " els ",
"( [ce][iïíìl!1I][iïíìl!1I])", " ell",
"(eU)", "ell",
"(cm)", "em",
"([BE][nqu] )", "En ",
"(cn)", "en",
"( [ce][un] )"," en ",
"([ce]n[lt] )", "ent ",
"( [ce]n[it]r[ce])", " entre",
" [ce]s ", " es ",
"[ce][8s][ce]ri", "escri",
"[ce][8s][ce]r ", "eser ",
"( [ceo][8s][8s][ceo]r )", " esser ",
" [ce][s8][lt]([aáà]) ", " est$1 ",
"(fc)", "fe",
"f[1!Iil]([bcdfgjlmnpqrstvxyz])", "fi$1", //fic,fim...
"[fí][iïíìl!1I][iïíìl!1I][iïíìl!1I]", "fill",
"(g[iïíìl!1I][6é]s)", "glés",
"(g[nu][óé])", "gué",
"(gn[ce])", "gne", //p.ex. digne
"(gu[ce])", "gue",
"g[nu]t", "gut",
"([hb]a[nu] )", "han ",
"([hb]ab[ce]m)", "havem",
"([hb]av[iïíìl!1I]a)", "havia",
"([hb]av[iïíìl!1I][ce][nu])", "havien",
"([hb][iïíìl!1I] [hb]a)", "hi ha",
"( [bh]a )", " ha ",
"(h[iïìl!1I])", "hi",
"([bh][oe]m[ce])", "home",
"(horn)", "hom",
"( [bh]o[no]t )", " hont ",
"([iïíìl!1]U)", "ill",
"([iïíìl!1I]n[lt] )", "int ",
"J[ce][s8]", "Jes",
"(jomada)", "jornada",
"( [iïíìl!1I][nu]r )", " lur ",
"( [*'´\\^][iïíìl!1I] )", " 'l ",
"(L[*´\\^])", "L'",
"([ .,;])[iïíìl!1I][*'´\\^]", "$1l'",
"Ta([iy])gua", "l'a$1gua",
"(V a)", "l' a",
"( [iïíìl!1]a )", " la ",
"( [iïíìl!1][ec][a8s] )", " les ",
"( [iïíìl!1]i )", " li ",
"([UI][iïíìl!1I][bh]r[ec])", "Uibre",
"ii([aáàeéèiíìoóòuúù])", "ll$1", //"iiibre"
"( [iïíìl!1]o )", " lo ",
"( [iïíìl!1]o[a8s] )", " los ",
"(Uoc)", "lloc",
"(U[nu][nu])", "llun",
"([*'´][iïíìl!1I][8s])", "'ls",
"( M[8s] )", " 'ls ",
"( [iïíìl!1][8s] )", " ls ",
"(m[ce])", "me",
"(loient )", "lment ",
"(ni[ce][nu]y)", "menys",
"(mo[iïíìl!1]t)", "molt",
"(—jN)", "—¡N",
"(n[*'\\^])", "n'",
"([*'\\^]n)", "'n",
" [nu]o ", " no ",
"[a-z][un][lt]([ ;.:r])[aeiou]","nt$1",//entre,contra
"(oU)", "oll",
"(—jP)", "—¡P",
"(prc)", "pre",
"(—[ij]Q)", "—¡Q",
"(Q[anou][ce])", "Que",
"(Q[anou][iïíìl!1I])", "Qui",
"([çq][*'\\^])", "q'",
"([çq][nu][*'\\^])", "qu'",
"([çq][anou][anou][il])", "qual",
"([çq][anou][anou][nu])", "quan",
"([çq][anou][ceo])", "que",
"([çq]ii[ceo])", "que",
"([çq]u[ce] [iïíìl!1I] )", "que l ",
"([çq][anou][óé])", "qué",
"([çq][anou][ce]s[lt])", "quest",
"([çq][anou][iïíìl!1I])", "qui",
"(rcg)", "reg", //tb podria ser "rog"
"(rcr)", "rer", //tb podria ser "ror"
"rcy", "rey",
"(Sl)", "Si",
"([s8][*'\\^])", "s'",
"([*'\\^]s)", "'s",
"([8s]cc)", "sec",
"([8s][ceo][çgq][eo][nu])", "segon",
"( [8s]cr )", " ser ", //tb podria ser " sor "
"( [8s][ce]r[ce])", " sere",
"( [8s]o[nD] )", " son ",
"([8s]lr)", "str", //vostra
"(t[*'\\^])", "t'",
"([*'\\^]t)", "'t",
"[lt]amb([éèe])", "tamb$1",
"( tc )", " te ",
"[lt][ce]rr", "terr",
"(tU)", "tll",
" [tl][iIïíìÏÌÍIL][un][ce]([h ;.:])"," tinc$1",
"(trc)", "tre", //tb "tro"
"( [nu][nu] )", " un ", //tb " nu "
"( [nu][nu][oa] )", " una ", //tb " nua "
"(uU)", "ull",
"(▼)", "V",
"v[ce]ll", "vell",
"exlr", "extr",
"(•)", ".",
"([*\\^])", " "
);
var lng = typo_def.length;
if (lng % 2 != 0) return text;
for (var i = 0; i < lng; i += 2) {
search = new RegExp(typo_def[i + 0], "g");
text = text.replace(search, typo_def[i + 1]);
}
return text;
}
function Fragment_Prosa(text) {
var typo_def = new Array (
//Neteja prèvia necessària
// "( )", " ",
// "( [\n])", "\n", //exc. IE
//Canvia salts de línia per un espai excepte si hi ha puntuació
"([a-zàáèéìíòóùú\,])[\n]", "$1 "
);
var lng = typo_def.length;
if (lng % 2 != 0) return text;
for (var i = 0; i < lng; i += 2) {
search = new RegExp(typo_def[i + 0], "g");
text = text.replace(search, typo_def[i + 1]);
}
return text;
}
function Tipografia() {
if (wgNamespaceNumber == 102) {
var editbox = document.getElementById('wpTextbox1');
if (editbox) {
var text = editbox.value;
var new_text = '';
var last_match = 0;
// Dividir el text en paraules per a no aplicar canvis al propi codi html
splitter = new RegExp("<math>.*</math>|<[a-zA-z0-9 =\"']>|[</[a-zA-z0-9 =\"']+>|style=\".*\"| |—|<!--.*-->|\n:[:]*|\n;[;]*|[[][[].*]]", "gm");
while ((result = splitter.exec(text)) != null) {
new_text += Fragment_Tipo(text.slice(last_match, splitter.lastIndex - result[0].length));
new_text += result;
last_match = splitter.lastIndex;
}
new_text += Fragment_Tipo(text.slice(last_match))
editbox.value = new_text;
}
}
}
function CreaFormulari() {
if(document.getElementById('regexform')) TreuFormulari();
else {
var editbox = document.getElementById('wpTextbox1');
var regexform = document.createElement('div');//contenidor
regexform.setAttribute('id','regexform');
editbox.parentNode.insertBefore(regexform,editbox.parentNode.firstChild);
var formform = document.createElement('form');
formform.setAttribute('id','regexformform');
regexform.appendChild(formform);
var corregir_button = document.createElement('input');
corregir_button.setAttribute('type',"button");
corregir_button.setAttribute('onclick',"Tipografia();");
corregir_button.setAttribute('title',"Corregeix");
corregir_button.setAttribute('value',"Corregeix");
formform.appendChild(corregir_button);
var prosa_button = document.createElement('input');
prosa_button.setAttribute('type',"button");
prosa_button.setAttribute('onclick',"Fes_Prosa();");
prosa_button.setAttribute('title',"Prosa");
prosa_button.setAttribute('value',"Prosa");
formform.appendChild(prosa_button);
var newinput = document.createElement('input');
newinput.setAttribute('id','formsearch');
var newlabel = document.createElement('label');
newlabel.setAttribute('for','formsearch');
newlabel.appendChild(document.createTextNode("Canvia "));
formform.appendChild(newlabel);
formform.appendChild(newinput);
var newinput = document.createElement('input');
newinput.setAttribute('id','formreplace');
var newlabel = document.createElement('label');
newlabel.setAttribute('for','formreplace');
newlabel.appendChild(document.createTextNode(' per '));
formform.appendChild(newlabel);
formform.appendChild(newinput);
var go_button = document.createElement('input');
go_button.setAttribute('type',"button");
go_button.setAttribute('onclick',"customgo();");
go_button.setAttribute('title',"Vés!");
go_button.setAttribute('value',"Vés!");
formform.appendChild(go_button);
}
}
function Fes_Prosa() {
var editbox = document.getElementById('wpTextbox1');
if (editbox) {
var text = editbox.value;
var new_text = '';
var last_match = 0;
// Dividir el text en paraules per a no aplicar canvis al propi codi html
splitter = new RegExp("<math>.*</math>|<[a-zA-z0-9 =\"']>|[</[a-zA-z0-9 =\"']+>|style=\".*\"| |—|<!--.*-->|\n:[:]*|\n;[;]*|[[][[].*]]", "gm");
while ((result = splitter.exec(text)) != null) {
new_text += Fragment_Prosa(text.slice(last_match, splitter.lastIndex - result[0].length));
new_text += result;
last_match = splitter.lastIndex;
}
new_text += Fragment_Prosa(text.slice(last_match))
editbox.value = new_text;
}
}
function customgo() {
var msearch = document.getElementById('formsearch').value;
msearch = msearch.replace(/\\n/g, '\n');
var mreplace = document.getElementById('formreplace').value;
mreplace = mreplace.replace(/\\n/g, '\n');
//convertir entrada en expressió regular
if(!msearch.match(/^\s*\/[\s\S]*\/[a-z]*\s*$/i)) {//sense delimitadors
var search_re = new RegExp(msearch,'g');
}
else {//amb delimitadors
//trenquem en parts
var regpattern = msearch.replace(/^\s*\/([\s\S]*)\/[a-z]*\s*$/i,'$1');
var regmodifiers = msearch.replace(/^\s*\/[\s\S]*\/([a-z]*)\s*$/,'$1');
//filtrem etiquetes invàlides
regmodifiers = regmodifiers.replace(/[^gim]/ig,'');
var search_re = new RegExp(regpattern, regmodifiers);
}
var editbox = document.getElementById('wpTextbox1');
editbox.value = editbox.value.replace(search_re,mreplace);
}
function TreuFormulari() {
var regexform = document.getElementById('regexform');
regexform.parentNode.removeChild(regexform);
patterncount = -1;
}
function addButton2(id,alt,comment,source,onclick){
var tb = document.getElementById("toolbar");
if(tb){
var image = document.createElement("img");
image.width = 46;
image.height = 22;
image.border = 0;
image.className = "mw-toolbar-editbutton";
image.style.cursor = "pointer";
image.alt = alt;
image.title = comment;
image.src = source;
image.onclick = onclick;
tb.appendChild(image);
}
}
function add_Corr_button(){
if (wgNamespaceNumber == 102) {
addButton2("wpRep","Correccions automàtiques","Correcció automàtica", "//upload.wikimedia.org/wikipedia/commons/a/af/Button_Fractur_OCR.png", CreaFormulari);
}
}
addOnloadHook(add_Corr_button);All content in the above text box is licensed under the Creative Commons Attribution-ShareAlike license Version 4 and was originally sourced from https://ca.wikisource.org/w/index.php?oldid=58145.
![]() ![]() This site is not affiliated with or endorsed in any way by the Wikimedia Foundation or any of its affiliates. In fact, we fucking despise them.
|