Jump to content

MediaWiki:OCR.js

From Wikisource

Note: After publishing, you may have to bypass your browser's cache to see the changes.

  • Firefox / Safari: Hold Shift while clicking Reload, or press either Ctrl-F5 or Ctrl-R (⌘-R on a Mac)
  • Google Chrome: Press Ctrl-Shift-R (⌘-Shift-R on a Mac)
  • Edge: Hold Ctrl while clicking Refresh, or press Ctrl-F5.
/*jshint boss:true*/
/*global $, mw*/

/*
 * Query an ocr for a given Page:, first try to get the hocr text layer as it's available
 * for most book, fast and of a better quality. If it fails, try the older and slower
 * ocr method. hocr fail around 1/5000 books. ocr should never fails as it use the image
 * visible on the Page:.
 */

var lang = mw.config.get( 'wgContentLanguage' );

function disable_input(set)
{
	if (set) {
		$(document).keyup(function(e) {
			if (e.which == 27) { disable_input(false); }
		});
	}

	set ? $('#wsOcr1').off('click') : $('#wsOcr1').on('click', do_hocr);
	set ? $('#wsOcr2').off('click') : $('#wsOcr1').on('click', fraktur_ocr);

	$('#wpTextbox1').prop('disabled', set);
}

function ocr_callback(data) {
	if (data.error) {
		alert(data.text);
	} else {
		// Checking if tb is disabled is required with chrome as ESC doesn't kill
		// the query.
		var tb = document.getElementById("wpTextbox1");
		if (tb.disabled)
			tb.value = data.text;
	}

	disable_input(false);
}

function hocr_callback(data) {
	// Fallback to old OCR when data.text doesn’t contain XML to workaround T228594
	if ( data.error || data.text.substring(0,5)!="<?xml" ) {
		// Fallback to the slow way.
		disable_input(false);
		do_ocr();
		return;
	} else {
		// Checking if tb is disabled is required with chrome as ESC doesn't kill
		// the query.
		var tb = document.getElementById("wpTextbox1");
		if (tb.disabled) {
                        localStorage.ws_hOCR = data.text;

			var text = $(data.text).text();
			// Ugly as hell.
			text = text.replace(/^ +/mg, '')
				.replace(/\n{4,}/g, '@_@_@_@')
				.replace(/\n{2,}/g, '____SPACE____')
				.replace(/\n/g, ' ')
				.replace(/____SPACE____/g, '\n')
				.replace(/@_@_@_@/g, '\n\n');
			tb.value = $.trim(text);
		}
	}

	disable_input(false);
}

function do_hocr() {
	disable_input(true);

	var request_url = '//phetools.toolforge.org//hocr_cgi.py?cmd=hocr&book='
		+ encodeURIComponent(mw.config.get('wgTitle')) + '&lang=' + lang + '&user=' + mw.config.get('wgUserName');

	$.getJSON(request_url).done(hocr_callback).fail(do_ocr);
}

function do_ocr() {
	if ($( '.prp-page-image img' ).length) {
		disable_input(true);

		// server side can't use protocol relative url, request it as https:
		var url_image = 'https:' + $( '.prp-page-image img' ).attr('src');

		var request_url = "//phetools.toolforge.org/ocr.php?cmd=ocr&url="+url_image+"&lang="+lang+"&user="+mw.config.get('wgUserName');

		$.getJSON( request_url ).done( ocr_callback );
	}
}

function fraktur_ocr()
{
	lang = 'de-f';
	// For fraktur we need to use the slow way, all hocr for 'de'
	// are done with non-fraktur.
	do_ocr();
	lang = mw.config.get( 'wgContentLanguage' );
}

function addButtonToWikiEditorToolbar( b ){
	var tools = {};
	tools[ b.imageId ] = {
		label: b.speedTip,
		type: 'button',
		icon: b.imageFile,
		action: {
			type: 'callback',
			execute: b.onClick
		}
	};
	$( '#wpTextbox1' ).wikiEditor( 'addToToolbar', {
		section: 'main',
		group: 'insert',
		tools: tools
	} );
	$( '[rel="' + b.imageId + '"]' ).width( 42 );
}

function addButtonToClassicToolbar( b ){
	mw.toolbar.addButton( {
		imageFile: b.imageFile,
		speedTip: b.speedTip,
		imageId: b.imageId
	} );
	$( '#' + b.imageId ).off( 'click' ).click( function () {
		b.onClick();
		return false;
	} ).width( 46 );
}

function customizeToolbar()
{
	var modules, add, img;
	// This can be the string "0" if the user disabled the preference ([[bugzilla:52542#c3]])
	if( mw.user.options.get( 'usebetatoolbar' ) == 1 ){
		modules = [ 'ext.wikiEditor' ];
		img = '//upload.wikimedia.org/wikipedia/commons/c/c9/Toolbaricon_OCR.png';
		add = addButtonToWikiEditorToolbar;
	} else if ( mw.user.options.get( 'showtoolbar' ) == 1 ){
		modules = 'mediawiki.toolbar';
		img = '//upload.wikimedia.org/wikipedia/commons/e/e0/Button_ocr.png';
		add = addButtonToClassicToolbar;
	} else {
		return;
	}
	$.when(
		mw.loader.using( modules ),
		$.ready
	).then( function(){
		if( mw.config.get( 'wgContentLanguage' ) === 'de' ){
			add( {
				imageFile: img,
				speedTip: 'Normale OCR',
				imageId: 'wsOcr1',
				onClick: do_hocr
			} );
			add( {
				imageFile: '//upload.wikimedia.org/wikipedia/commons/a/af/Button_Fractur_OCR.png',
				speedTip: 'Fraktur OCR',
				imageId: 'wsOcr2',
				onClick: fraktur_ocr
			} );
		} else {
			add( {
				imageFile: img,
				speedTip: 'Get the text by OCR',
				imageId: 'wsOcr1',
				onClick: do_hocr
			} );
		}
	} );
}

if ( mw.config.get( 'wgCanonicalNamespace' ) === 'Page' &&
	$.inArray( mw.config.get( 'wgAction' ), [ 'edit', 'submit' ] ) !== -1 &&
	!self.proofreadpage_disable_ocr
) {
	mw.loader.using( 'user.options' ).done( customizeToolbar );
}