Revision 117754 of "Utente:Ruthven/PostOCR.js" on napwikisource/* Correzioni post-OCR automatiche alla creazione di una nuova Pagina */
mw.loader.using(['ext.proofreadpage.page', 'oojs-ui-core']).done( function () {
$(document).ready(function() {
// Only for NS Paggena
if ( mw.config.get('wgCanonicalNamespace') === 'Page' &&
(mw.config.get("wgAction")==="edit" ||mw.config.get("wgAction")==="submit") ) {
// bozza funzioni specifiche
if ( typeof $ != 'undefined' && typeof $.fn.wikiEditor != 'undefined' ) {
$( function() {
// CREA PULSANTE
var fixButton = new OO.ui.ButtonWidget( {
label: '’’',
id: 'postOCR-button-widget',
framed: false,
title: 'Appara \'o tiesto'
} );
fixButton.on( 'click', function ( context ) {
postOCR();
} );
$( '#wikiEditor-section-advanced' ).append( fixButton.$element );
});
}}});
});
function newDpl(testo) {
var r6 = /(.*)\<\!--(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\d+)--\>/;
var r4 = /(.*)\<\!--(\d+)\s(\d+)\s(\d+)\s(\d+)--\>/;
if (r6.test(testo)) {
testo = testo.split("\n");
datiPagina = {};
datiPagina.righe = [];
for (var i = 0; i < testo.length; i += 1) {
if (r6.test(testo[i])) {
var res = r6.exec( testo[i] );
datiPagina.xypagina = [ res[6], res[7] ];
var riga = [ res[2], res[3], res[4], res[5], res[1] ];
datiPagina.righe.push( riga );
testo[i] = res[1];
}
if (r4.test(testo[i])) {
var res = r4.exec( testo[i] );
var riga = [ res[2], res[3], res[4], res[5], res[1] ];
datiPagina.righe.push( riga );
testo[i] = res[1];
}
}
testo = testo.join("\n");
}
return testo;
}
/* Correzioni post-OCR automatiche alla creazione di una nuova Pagina */
function postOCR () {
var editbox = document.getElementsByName('wpTextbox1')[0];
editbox.value = newDpl(editbox.value);
editbox.value = editbox.value.replace(/'/g, '’')
.replace(/’’’/g, '\'\'\'')
.replace(/(\s|\W)’’(\w+)/g, '$1\'\'$2')
.replace(/(\b|\W)’’(\W)/g, '$1\'\'$2').replace(/ \n/g, '\n');
// tento di disabilitare la riunione delle righe spezzate |
//.replace(/-\n/g, '')
//.replace(/- \n/g, '');
editbox.value = editbox.value.replace(/([^|])[-¬] *\n([^ \n]*)[ ]*[\n]?/g,"$1$2\n"); // importo da cleanup()
// dehyphen(editbox.value)
// eseguo temporaneamente dehyphen in attesa di costruire/trovare la regex giusta
editbox.value = editbox.value.replace(/1’/g, 'l’')
.replace(/\bdeir\b\s*/g, 'dell’')
.replace(/\bair\b\s*/g, 'all’')
.replace(/\bneir\b\s*/g, 'nell’')
.replace(/\bcoir\b\s*/g, 'coll’')
.replace(/\bperche\b/g, 'perchè')
.replace(/\bpoiche\b/g, 'poichè')
.replace(/\bpiu\b/g, 'più')
.replace(/\bpiti\b/g, 'più')
.replace(/\s+([,;.:!?])/g, '$1')
.replace(/«\s+/g, '«')
.replace(/\s+»/g, '»')
.replace(/([bcdfghlmnprstvzBCDFGHLMNPRSTV])’\s+/g, '$1’')
.replace(/(eh|cb)’/g, 'ch’')
.replace(/\s+$/, '')
.replace(/\.\.\./g, '…')
.replace(/fi/g, 'fi')
.replace(/U’/g, 'll’');
console.log("Eseguite correzioni post-OCR automatiche");
}All content in the above text box is licensed under the Creative Commons Attribution-ShareAlike license Version 4 and was originally sourced from https://nap.wikisource.org/w/index.php?oldid=117754.
![]() ![]() This site is not affiliated with or endorsed in any way by the Wikimedia Foundation or any of its affiliates. In fact, we fucking despise them.
|