--- /dev/null
+/*
+ rule based OCR engine, partly rewritten for edges (old=pixel)
+ */
+/*
+This is a Optical-Character-Recognition program
+Copyright (C) 2000-2007 Joerg Schulenburg
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ see README for email address
+
+ >>> DO NOT EDIT THIS FILE IF YOU NOT REALLY KNOW WHAT YOU ARE DOING! <<<
+
+ I have invested lot of time, to write this part of the program.
+ This engine should recognize chars allways right or return UNKNOWN.
+ If you change something, test all other example files too,
+ to be sure that all things work better. (JoergS)
+
+ This engine was pixelbased until 0.40 which was not successfull enough.
+ Also code changes always hade side effects. The vectorisation of the code
+ starts from version 0.41 with the chars XNz and seems to be much better
+ to handle. Vectorization means we frame each character by a chain of
+ vectors and dont care about pixels anymore. Unfortunatly I have to
+ replace all the pixel codes, which is a long process. Old code will be lost.
+ (JorgS)
+
+
+ToDo:
+ - if box1->p and b differ, reduce probability
+ - probability makes life much easier here
+ - use only one box!?, may be bits have usefull infos
+ - divide this file, suggestion: classify chars:
+ high=ABCDEFGHIJKLMNOPQRSTUVWXYZbdfhklt, low=acegijmnopqrsuvwxyz
+ or
+ often_used=etianmsurwdkgo rarely_used=hvjcflpqxyz.,:
+ or
+ every char (large overhead)
+ - two-pass version (first pass without tolerance)
+ 2nd pass with tolerance (ex: one tiny more in sdata->holes)
+
+ general feature extraction:
+ - white holes at middle, upper, lower position (cost much time)
+ - test lines and triangles insteat of rectangles
+
+ char is removed, wchar_t is used (better code)
+
+ making a static global variable-set x.x0,x.x1, and call test_a,
+ test_b ... (faster compilation, but not reentrant!)
+
+ - adding slant-angle (if detected) to distinguish between l and / ?
+ - ac (alternate chars) as string add_ac(box1,"/") => box1->ac="Il/";
+ for better context correction or output: "Ha[lI][lI]o!"
+
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+// #include "pgm2asc.h"
+#include "ocr0.h"
+// #include "ocr1.h"
+#include "pnm.h"
+#include "gocr.h"
+
+#define IFV if(JOB->cfg.verbose&4)
+#define MM {IFV fprintf(stderr,"\nDBG %c L%04d (%d,%d): ",(char)c_ask,__LINE__,box1->x0,box1->y0);}
+
+// the old debug mode (0.40) was only for a special char, for another char
+// code must be recompiled with C_ASK='char'
+// new debug mode (0.41) explains why char is declined or accepted as ABC...
+// the output can be filtered by external scripts
+// ToDo: we could reduce output to filter string
+#ifndef DO_DEBUG /* can be defined outside */
+#define DO_DEBUG 0 /* 0 is the default */
+#endif
+
+/* this macro is for debugging output: "if char is declined, why?" */
+#if DO_DEBUG /* 0=Work mode, 1=debugging mode */
+// Setac: output, that char is choosen with a probability
+// Break: output, why the char is not choosen
+// MSG: debugging functions for char C_ASK, mostly messages
+// DBG: definitions usefull only for debugging
+#define Setac(box1,ac,ad) { MM;IFV fprintf(stderr,"setac %d",ad);setac(box1,ac,ad); }
+#define Break { MM;IFV fprintf(stderr,"break"); break; }
+#define MSG(x) { MM;IFV x }
+#define DBG(x) x
+#else
+#define Setac(box1,ac,ad) setac(box1,ac,ad)
+#define Break break
+#define MSG(x)
+#define DBG(x)
+#endif
+
+/* extern "C"{ */
+
+// static inline int sq(int x) { return x*x; } /* square */
+
+/*
+ * go from vector j1 to vector j2 and measure maximum deviation of
+ * the steps from the line connecting j1 and j2
+ * return the squared maximum distance
+ * in units of the box size times 1024
+ * ToDo: 1) better give back max-dx and max-dy ???
+ * errors if j1 and j2 are in different frames or belong to
+ * more then one frame?
+ * 2) Better get deviation from a complete vector graphic?
+ * The vectorgraphic is the ideal test char adapted to the
+ * extrem vertices of the real char.
+ */
+int line_deviation( struct box *box1, int j1, int j2 ) {
+ int r1x, r1y, r2x, r2y, r3x, r3y, i, x, y, d, dist, maxdist=0, frame, l2;
+ r1x=box1->frame_vector[j1][0];
+ r1y=box1->frame_vector[j1][1];
+ r2x=box1->frame_vector[j2][0];
+ r2y=box1->frame_vector[j2][1];
+ if (!box1->num_frames) return(-1);
+ if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] ||
+ j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) {
+ fprintf(stderr,"Error in "__FILE__" L%d: idx out of range",__LINE__);
+ return(-1);
+ }
+ /* get the frame the endvector belongs to */
+ for (i=0;i<box1->num_frames;i++)
+ if (j2<box1->num_frame_vectors[i]) break;
+ frame=i;
+ /* frame(j1)<=frame(j2) possible */
+ for (i=j1;;i++) { // do it for each vector between j1 and j2
+ if (i >= box1->num_frame_vectors[frame])
+ i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */
+ if (i==j2) break;
+ // for (i=j1;i!=j2;i=(i+1)%box1->num_frame_vectors[0]) {~}
+ r3x=box1->frame_vector[i][0];
+ r3y=box1->frame_vector[i][1];
+ // Language=german
+ // german: Abstand Punkt von Strecke, Laenge Lotrechte
+ // germ.Strecke : l1=(r1+r2)/2+d*(r2-r1)/2 for d=-1..1
+ // germ.Lotrechte: l2=r3+b*[-(r2-r1).y,(r2-r1).x]
+ // Schnittpunkt : l1=l2,
+ // eq1x: (r1x+r2x)/2-r3x+d*(r2x-r1x)/2+b*(r2y-r1y)=0
+ // eq1y: (r1y+r2y)/2-r3y+d*(r2y-r1y)/2-b*(r2x-r1x)=0
+ // eq2x: b*(r2x-r1x)*(r2y-r1y)=-((r1x+r2x)/2-r3x+d*(r2x-r1x)/2)*(r2x-r1x)
+ // eq2y: b*(r2x-r1x)*(r2y-r1y)= ((r1y+r2y)/2-r3y+d*(r2y-r1y)/2)*(r2y-r1y)
+ // eq2y-eq2x: ... in units of 1024 (fast integer rounded correctly)
+ l2=sq(r2x-r1x)+sq(r2y-r1y); // square of distance r2-r1
+ if (l2==0) {
+ // fprintf(stderr,"ocr0 L%d: r1==r2 r1= %d %d",__LINE__, r1x, r1y); // debugging
+ d=-1024;
+ } else
+ d=-( ((r1x+r2x)-2*r3x)*(r2x-r1x)
+ +((r1y+r2y)-2*r3y)*(r2y-r1y))*1024/l2; // ..-1024..+1024..
+ if (d<=-1024) { x=r1x; y=r1y; } // starting point
+ else {
+ if (d>=1024) { x=r2x; y=r2y; } // end point
+ else {
+ x=((r1x+r2x)+1)/2+(d*(r2x-r1x))/2048;
+ y=((r1y+r2y)+1)/2+(d*(r2y-r1y))/2048;
+ /* we have the crossing point x,y now */
+ }
+ }
+ dist=sq((x-r3x)*1024/(box1->x1-box1->x0+1))
+ +sq((y-r3y)*1024/(box1->y1-box1->y0+1)); // 0..2*sq(1024)
+ if (dist>maxdist) maxdist=dist;
+ // for debugging:
+ // fprintf(stderr,"\nDBG dev: %d-%d-%d dist=%5d max=%5d d=%d %d,%d-%d,%d"
+ // " vector= %d %d crosspoint= %d %d ",
+ // j1,i,j2,dist,maxdist,d,r1x,r1y,r2x,r2y,r3x,r3y,x,y);
+ }
+ return maxdist;
+}
+
+/*
+ * search vectors between j1 and j2 for nearest point a to point r
+ * example:
+ *
+ * r-> $$...$$ $ - mark vectors
+ * @@$..@@ @ - black pixels
+ * @@$..@@ . - white pixels
+ * @@@@.$@
+ * a-> @@$@$@@
+ * @$.@@@@
+ * @@..$@@
+ * @@..$@@
+ * j1 --> $$...$$ <-- j2
+ *
+ * ToDo: vector aa[5] = {rx,ry,x,y,d^2,idx} statt rx,ry?
+ * j1 and j2 must be in the same frame
+ * return aa?
+ */
+int nearest_frame_vector( struct box *box1, int j1, int j2, int rx, int ry) {
+ int x,y,d,i,aa[4]; /* x,y,normalized_distance^2,vector_index */
+ int frame=0, x0=box1->x0, y0=box1->y0,
+ x1=box1->x1, y1=box1->y1,
+ dx=box1->x1-x0+1, dy=box1->y1-y0+1;
+ if (!box1->num_frames) return(-1);
+ if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] ||
+ j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) {
+ fprintf(stderr,"Error in "__FILE__" L%d: idx %d-%d out of range\n",__LINE__,j1,j2);
+ //out_x(box1);
+ return(-1);
+ }
+ aa[0]=x=box1->frame_vector[j2][0]; /* x */
+ aa[1]=y=box1->frame_vector[j2][1]; /* y */
+ /* maximum is (distance*128)^2 if r is inside the box */
+ aa[2]=d=2*sq(128)+sq((rx-(x0+x1)/2)*128/dx)+sq((ry-(y0+y1)/2)*128/dy);
+ aa[3]=j2; /* vector index */
+ /* get the frame the endvector belongs to */
+ for (i=0;i<box1->num_frames;i++)
+ if (j2<box1->num_frame_vectors[i]) break;
+ frame=i;
+ /* frame(j1)<=frame(j2) possible */
+ for (i=j1;;i++) {
+ if (i >= box1->num_frame_vectors[frame])
+ i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */
+ x=box1->frame_vector[i][0]; /* take a vector */
+ y=box1->frame_vector[i][1];
+ /* distance to upper left end, normalized to 128 */
+ d=sq((x-rx)*128/dx)+sq((y-ry)*128/dy);
+ if (d<aa[2]) { aa[0]=x; aa[1]=y; aa[2]=d; aa[3]=i; }
+ if (i==j2) break;
+ }
+ return aa[3];
+}
+
+// test for umlauts, if ret>0 and m==1 box1 is changed
+// m>0 modify box1->dots
+// m==2 modify box1->y0
+// called by pgm2asc + ocr0(?)
+int testumlaut(struct box *box1, int cs, int m, wchar_t *modifier){
+ // pix p=*(box1->p);
+ int r,y,x,x0,x1,y0,y1,dx,dy,m1,m2,m3,
+ xl,xr,yu,yl; // left, right, upper and lower border of dots
+ wchar_t mod='\0'; /* (TeX-) modifier ~"'` for compose() */
+ DBG( wchar_t c_ask='"'; )
+ r=0;
+ x0=box1->x0; x1=box1->x1; dx=x1-x0+1;
+ y0=box1->y0; y1=box1->y1; dy=y1-y0+1;
+ m1=box1->m1; m2=box1->m2; m3=box1->m3;
+ xl=x0; xr=x1; yu=yl=y0;
+ if( dy < 5 || 4*y0 > 3*m2+m3 ) return 0; // no low chars: .,-=
+ /* modifier in box included? */
+ if( 2*y1 > m1+m2 ){
+ /* modifier in box included? */
+ for(y=y0;2*y<y0+y1;y++)if( get_bw(xl,xr,y,y,box1->p,cs,1)==0 ) break;
+ if( 2*y<y0+y1 ){ /* yes => extract */
+ yl=y;
+ while( get_bw(xl,xr,y,y,box1->p,cs,1)==0 && 2*y<=y0+y1) y++;
+ if( m&2 ) box1->y0=y; /* set new upper bond */
+ }
+ }
+ if( yu>=yl ) { if(m) box1->dots=0; return 0; } /* nothing found */
+ if( get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==1 ) // neighbour overlap?
+ while( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==1 && 2*xl<x0+x1) xl++;
+ for(;xl<x1;xl++)if( get_bw(xl,xl,yu,yl,box1->p,cs,1)==1 ) break;
+ for(;xr>xl;xr--)if( get_bw(xr,xr,yu,yl,box1->p,cs,1)==1 ) break;
+
+ if ( yl-1>yu ) { // tall box ij"a"o"u
+#if 0
+ x=box1->y0; box1->y0=m1; out_x(box1); box1->y0=x;
+ fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0);
+ fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0);
+#define DEBUG 1
+#endif
+ {
+
+ x=xl;y=yu;
+ if( get_bw(xl,x1+1,yu,yl-1,box1->p,cs,1)==0 ) r=0; // neighbour overlap?
+ else
+ if( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==0
+ || get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==0 ) // be sure there are gap to neighbours
+ if( get_bw(xr ,xr ,yu,yl-1,box1->p,cs,1)==0
+ || get_bw(xr+1,xr+1,yu,yl-1,box1->p,cs,1)==0 )
+ { int i,j,x;
+ r=1;
+ // ...@@@.... RING_ABOVE // ..@@@..@@. TILDE
+ // ..@...@... // @@.@@@@@..
+ // ..@...@... // @.........
+ // ..@..@@...
+ // ...@@@....
+ for (i=yu;i<yl;i++) if (get_bw(xl,xr,i,i,box1->p,cs,1)==1) break;
+ for ( ;i<yl;i++) if (get_bw(xl,xr,i,i,box1->p,cs,1)==0) break;
+ for (j=xl;j<xr;j++) if (get_bw(j,j,yu,i,box1->p,cs,1)==1) break;
+ for ( ;j<xr;j++) if (get_bw(j,j,yu,i,box1->p,cs,1)==0) break;
+ for ( x=j;x<xr;x++) if (get_bw(x,x,yu,i,box1->p,cs,1)==1) break;
+ // vert. gap detected
+ if( j<xr && x<xr && j<x && xr-xl>2
+ && num_obj(xl,xr,yu,yl-1,box1->p,cs)>=2 // not best!!!
+ && num_cross(xl,xr,yu +(yl-yu)/4,yu+ (yl-yu)/4,box1->p,cs) == 2
+ && num_cross(xl,xr,yl-1-(yl-yu)/2,yl-1-(yl-yu)/2,box1->p,cs) == 2
+ ){ // may be the following lines are not quite ok
+ while( get_bw(xl,xr,yl,yl,box1->p,cs,1)==0 && 2*yl<y0+y1) yl++;
+ r=2;
+// out_x(box1);printf(" x,y=%d,%d i=%d xl=%d xr=%d yu=%d yl=%d",x0,y0,i-x0,xl-x0,xr-x0,yu-y0,yl-y0);
+ mod = DIAERESIS;
+ }
+ if( m&2 ) box1->y0=yl;
+/* if( m&2 ) box1->y0= ( (r==1) ? yu : yl ); */
+ // out_x(box1);
+ }
+ if(r==0){ // divided fr != fi
+ while( get_bw(x0,x1,yu,yu,box1->p,cs,1)==0 && 2*yu<y0+y1) yu++;
+ if(m)box1->y0=yu;
+ }
+ if( r==1 ){ yl--;
+// .@@@. ..@@.
+// .@@.. .@@..
+// .@... .@@..
+//
+// if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
+// > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8
+// && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
+// < loop(box1->p,xr,yl,xr-xl,cs,0,LE)) // -dx/8 ) // é Nov03
+ if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
+ - loop(box1->p,xr,yu,xr-xl,cs,0,LE)
+ > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8
+ - loop(box1->p,xr,yl,xr-xl,cs,0,LE)+1) // -dx/8 ) // é Nov03
+ mod = ACUTE_ACCENT; // '
+
+ if( xr-xl+1 > 3*(yl-yu+1)
+ && get_bw(xl,xr,yu,yl,box1->p,cs,2)==0 )
+ mod = MACRON; // "-" above
+
+// .@@@. .@@..
+// ..@@. ..@@.
+// ...@. ..@@.
+//
+// if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
+// < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8
+// && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
+// > loop(box1->p,xr,yl,xr-xl,cs,0,LE) ) // +dx/8 ) à Nov03
+ if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
+ - loop(box1->p,xr,yu,xr-xl,cs,0,LE)
+ < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8
+ - loop(box1->p,xr,yl,xr-xl,cs,0,LE) -1 ) // +dx/8 ) à Nov03
+ mod = GRAVE_ACCENT; // `
+
+#ifdef DEBUG
+ fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0);
+ fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0);
+#endif
+ if( (xr-xl+1) < 2*(yl-yu+1)+2
+ && 2*(xr-xl+1)+2 > (yl-yu+1) ) {
+ int i,i1,i2,i3,i4;
+ i1=loop(box1->p,xl ,(yu+yl)/2,xr-xl+1,cs,0,RI);
+ i1=loop(box1->p,xl+i1,(yu+yl)/2,xr-xl+1,cs,1,RI);
+ i2=loop(box1->p,(xl+xr)/2,yu ,yl-yu+1,cs,0,DO);
+ i2=loop(box1->p,(xl+xr)/2,yu+i2,yl-yu+1,cs,1,DO);
+ for (i=0;i<xr-xl+1 && i<yl-yu+1;i++)
+ if (getpixel(box1->p,xl+i,yu+i)< cs) break; i3=i;
+ for ( ;i<xr-xl+1 && i<yl-yu+1;i++)
+ if (getpixel(box1->p,xl+i,yu+i)>=cs) break; i3=i-i3;
+ for (i=0;i<xr-xl+1 && i<yl-yu+1;i++)
+ if (getpixel(box1->p,xr-i,yu+i)< cs) break; i4=i;
+ for ( ;i<xr-xl+1 && i<yl-yu+1;i++)
+ if (getpixel(box1->p,xr-i,yu+i)>=cs) break; i4=i-i4;
+#ifdef DEBUG
+ fprintf(stderr,"\n#DEBUG DOT_ABOVE %d %d %d %d",i1,i2,i3,i4);
+#endif
+ if ( (xr-xl<5 && yl-yu<8) /* to small */
+ || (i1>=(xr-xl+1)/2+2 && i2>=(yl-yu+1)/2+2 /* symmetrical */
+ && abs(i3-i4)<=i1/4+2 && abs(i1-i2)<=i1/4+2
+ && abs(i3-i1)<=i1/4+4 && abs(i4-i2)<=i1/4+4)
+ )
+ mod = DOT_ABOVE; // "." above, ToDo: improve it!
+ }
+
+ if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
+ > loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/8
+ || loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
+ > loop(box1->p,xl,yl-1,xr-xl,cs,0,RI)-dx/8 )
+ && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
+ > loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/8
+ || loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
+ > loop(box1->p,xr,yl-1,xr-xl,cs,0,LE)-dx/8 )
+ && num_cross(xl,xr,yu ,yu ,box1->p,cs) == 1
+ && ( num_cross(xl,xr,yl ,yl ,box1->p,cs) == 2
+ || num_cross(xl,xr,yl-1,yl-1,box1->p,cs) == 2 ))
+ mod = CIRCUMFLEX_ACCENT; // "^"
+
+ if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
+ < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10
+ || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI)
+ < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 )
+ && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
+ < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10
+ || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE)
+ < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 )
+ && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2
+ || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 )
+ && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 )
+ mod = CARON; // "v" above
+
+ if( /* test for bow (new0.3.6) */
+ loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
+ + loop(box1->p,xl,yl ,xr-xl,cs,0,RI)
+ - 2*loop(box1->p,xl,(yl+yu)/2,xr-xl,cs,0,RI) > dx/16+1
+ && xr-xl>10)
+ if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI)
+ < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10
+ || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI)
+ < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 )
+ && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE)
+ < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10
+ || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE)
+ < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 )
+ && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2
+ || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 )
+ && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 )
+ mod = BREVE; // round "u" above
+
+ if( xr-xl>3 && yl-yu>1 )
+ if( loop(box1->p,xl,yu,xr-xl,cs,0,RI)
+ > loop(box1->p,xl,yl,xr-xl,cs,0,RI)
+ && loop(box1->p,xr,yu,xr-xl,cs,0,LE)
+ < loop(box1->p,xr,yl,xr-xl,cs,0,LE)
+ && num_cross(xl,xr,yu,yu,box1->p,cs) == 2
+ && num_cross(xl,xr,yl,yl,box1->p,cs) == 2 )
+ mod = TILDE;
+
+ if( xr-xl>2 && yl-yu>2)
+ if( num_cross(xl,xr,(yu+yl)/2,(yu+yl)/2,box1->p,cs) >1 )
+ if( num_cross((xl+xr)/2,(xl+xr)/2,yu,yl,box1->p,cs) >1 )
+ if( num_hole(xl,xr,yu,yl,box1->p,cs,NULL) == 1 )
+ mod = RING_ABOVE;
+
+#ifdef DEBUG
+ printf("\n#DEBUG umlaut mod=0x%04x x=%d..%d y=%d..%d r=%d %s",
+ (int)mod,yu-box1->y0,yl-box1->y0,
+ xl-box1->x0,xr-box1->x0,r,((mod==CARON)?"CARON":
+ ((mod==ACUTE_ACCENT)?"ACUTE":
+ ((mod==TILDE)?"TILDE":"?"))));
+ out_x(box1);
+#endif
+
+ }
+ }
+ if (m) box1->dots=r; // set to 0 also possible after division
+ if (m) box1->modifier=mod; /* should be resetted after compose ??? */
+ MSG(fprintf(stderr,"umlaut mod=%s dots=%d y0o=%d",decode(mod,ASCII),r,y0);)
+ }
+// printf(" modifier=%c",mod);
+ if (modifier) *modifier=mod; /* set modifier */
+ return r;
+}
+
+
+static wchar_t ocr0_eE(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ int i,i1,i2,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,bad_e=0,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
+
+ // --- most frequent letter e first!!!
+ // --- test e ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (smallest seen is 5x6)
+ DBG( wchar_t c_ask='e'; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ if (sdata->holes.num != 1) ad=97*ad/100;
+ /* ToDo: may be a two pass version intolerant/tolerant is better */
+ if( loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)>dx/3 ) Break; // rough test
+ if( loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO)>dy/3 ) Break;
+ if( loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP)>dy/3 ) Break;
+ if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 2
+ && num_cross(x0,x1,y0+dy/4+1,y0+dy/4+1,box1->p,cs) > 2 ) Break; // gt
+ x=(x0+x1)/2;i= num_cross(x,x,y0,y1,box1->p,cs); // v0.40
+ if (i!=3) { x=(x0+2*x1)/3;i= num_cross(x,x,y0,y1,box1->p,cs); }
+ if (i!=3) { x=(x0+3*x1)/4;i= num_cross(x,x,y0,y1,box1->p,cs); }
+ if (i!=3) { i= num_cross((x0+2*x1)/3,(x0+x1)/2,y0,y1,box1->p,cs); }
+ i=loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI); if( i>dx/2 ) Break;
+ j=loop(box1->p,x0,y0 ,x1-x0,cs,0,RI); if( j<i ) Break;
+ j=loop(box1->p,x0,y1 ,x1-x0,cs,0,RI); if( j<i ) Break;
+ i=loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO); if( i>dx/2 ) Break;
+ j=loop(box1->p,x1-dx/3,y0,y1-y0,cs,0,DO); if( j<i ) i=j;
+ j=loop(box1->p,x0 ,y0,y1-y0,cs,0,DO); if( j<i ) Break;
+ j=loop(box1->p,x1 ,y0,y1-y0,cs,0,DO); if( j<i ) Break;
+ i=loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP); if( i>dx/2 ) Break;
+ j=loop(box1->p,x0 ,y1,y1-y0,cs,0,UP); if( j<i ) Break;
+ j=loop(box1->p,x1 ,y1,y1-y0,cs,0,UP); if( j<i ) Break;
+ j=2*loop(box1->p,x0, (y0+y1)/2,x1-x0,cs,0,RI)
+ -loop(box1->p,x0,(3*y0+y1)/4,x1-x0,cs,0,RI)
+ -loop(box1->p,x0,(y0+3*y1)/4,x1-x0,cs,0,RI);
+ if (dx>3 && j>=dx/4) Break; // ~g 4x6font
+ for(y=1;y<dy/2;y++) if( num_cross(x0,x1,y0+y,y0+y,box1->p,cs) == 2 ) break;
+ if( y==dy/2 ) Break; // v0.2.5 ~ bad_t
+ for(i=0,j=x0+dx/4;j<=x1-dx/4 && i<=dx/4;j++)
+ if( num_cross(j,j,y0,y1,box1->p,cs) == 3 ) i++;
+ if( dx>4 && dy>5 && (i<dx/4-1 || i==0) ) Break; // ~g but 4x6-e
+ // look for horizontal white line (right gap) => set x,y
+ for(x=0,y=i=y0+dy/3;i<y1-dy/6;i++){
+ j=loop(box1->p,x1,i,y1-y0,cs,0,LE);
+ if(j>=x) { x=j;y=i; }
+ }
+ if (x<dx/2){ // no gap found, fat font???
+ // check smallest thickness left > 2* smallest thickness right
+ for(i1=dx,i=y0+dy/3;i<y1-dy/6;i++){
+ j =loop(box1->p,x0 ,i,y1-y0,cs,0,RI); if (j>dx/2) break;
+ j =loop(box1->p,x0+j,i,y1-y0,cs,1,RI);
+ if (j<i1) i1=j; // smallest thickness on left bow
+ }
+ for(i2=dx,y=i=y0+dy/3;i<y1-dy/6;i++){
+ j =loop(box1->p,x1 ,i,y1-y0,cs,0,LE);
+ j =loop(box1->p,x1-j,i,y1-y0,cs,1,LE);
+ if(j<i2) { i2=j;y=i; }
+ } if (3*i2>2*i1) Break; // not accepted, if right line is not very thinn
+ x =loop(box1->p,x1 ,y,y1-y0,cs,0,LE);
+ x+=loop(box1->p,x1-x,y,y1-y0,cs,1,LE);
+ x+=loop(box1->p,x1-x,y,y1-y0,cs,0,LE);
+ if (3*i2>i1) ad=99*ad/100;
+ if (2*i2>i1) ad=99*ad/100;
+ bad_e=60; // used later?
+ }
+ if (x<dx/2) Break;
+ for(i=1,j=x0+dx/6;j<x1-dx/6 && i;j++)
+ if( num_cross(j,j,y0,y,box1->p,cs) > 1 ) i=0;
+ if( i ) Break;
+// ..@@@@...<-
+// .@@@@@@;.
+// @@,...@@.
+// @@.....@,
+// @@@@@@@@@
+// @@.,;.@,. <- problem (y) == bad_e>50
+// @@.....@.
+// @@,...@@.
+// .@@@,@@@.
+// ..@@@@;..<-
+ if (dy>11 && bad_e<50)
+ if ( num_cross(x0,x1,y,y,box1->p,cs) != 1 ) Break; // except "geschwungenem e"
+ if ( num_cross(x0,x1-dx/3,y ,y ,box1->p,cs) != 1
+ && num_cross(x0,x1-dx/3,y+1,y+1,box1->p,cs) != 1 ) Break;
+ // if( num_hole(x0, x1, y0 , y ,box1->p,cs,NULL) < 1 ){
+ if( sdata->holes.num == 0 || sdata->holes.hole[0].y1 >= y-y0){
+ if( sdata->hchar ) Break; // ~ \it t
+ // look if thinn font (may be h-line is broken) Mai00
+ for(j=0,i=x0+dx/8;i<x1-1;i++)
+ if( get_bw(i,i,y0+dy/4,y,box1->p,cs,1) == 1 ) j++;
+ if(j<2*dx/4) Break;
+ }
+ if( sdata->holes.num>0 && sdata->holes.hole[0].y0 > y-y0) Break;
+ if( sdata->holes.num>1 && sdata->holes.hole[1].y0 > y-y0) Break;
+ if( sdata->holes.num==1 && sdata->holes.hole[0].x0 >= dx/2) {
+ ad=95*ad/100; } /* 8*10 @ (=at) is not an e */
+ // look for horizontal gap
+ for(x=0,y=i=y0+dy/4;i<y1-dy/4;i++){
+ j=loop(box1->p,x0,i,x1-x0,cs,0,RI);
+ if(j>=x) { x=j;y=i; }
+ }
+ if (y>y0+dy/4 && y<y1-dy/4 && x>dx/2) Break; // s
+ if (x>dx/4) ad=99*ad/100;
+
+ if( num_cross(x0+dx/2,x1 ,y1-dy/4,y1 ,box1->p,cs) == 0
+ && num_cross(x0+dx/2,x1-1,y1-dy/4,y1 ,box1->p,cs) == 0
+ && num_cross(x0+dx/2,x1 ,y1-dy/4,y1-1,box1->p,cs) == 0 ) {
+ if (sdata->gchar) Break; // ~p
+ ad=99*ad/100;
+ }
+ /* upper case is for 5x6 box */
+ if( sdata->hchar // broken B ? should also work when linedetection fails
+ && loop(box1->p,x1,y1-dy/3,dx,cs,0,LE)<=dx/8 ) {
+ x = loop(box1->p,x0,y0+dy/2,dx,cs,0,RI);
+ if( loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)<=x
+ && loop(box1->p,x0,y0+dy/8,dx,cs,0,RI)<=x ) Break;
+ if( loop(box1->p,x0,y1-dy/4,dx,cs,0,RI)<=x
+ && loop(box1->p,x0,y1-dy/8,dx,cs,0,RI)<=x ) Break;
+ }
+ x = loop(sdata->bp,0,dy-2 ,dx,cs,0,RI);
+ if( loop(sdata->bp,0,dy-1-dy/8,dx,cs,0,RI)>x && dy>16) Break; // some Q
+ if (box1->m2) {
+ if (sdata->gchar) ad=99*ad/100;
+ if (sdata->hchar) ad=99*ad/100;
+ } else ad=99*ad/100;
+
+ Setac(box1,(wchar_t)'e',ad);
+ if (ad>=100) return 'e';
+ break;
+ }
+ // --- test E ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>4 ;){ // min 3x4
+ // rewritten for vectors 0.43
+ int i1, i2, i3, i4, i5; // line derivation + corners
+ DBG( wchar_t c_ask='E'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ /* half distance to the center */
+ d=2*sq(128/4);
+ /* now we check for the upper right end of the h */
+ if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj... */
+ if (aa[0][2]>d/2) Break; /* upper left end */
+ if (aa[1][2]>d/2) Break; /* lower left end */
+ if (aa[2][2]>d/2) Break; /* lowerright end */
+/*
+ E f near E
+
+ OOOOOOOO OOOO
+ O5 O O
+ O4 O
+ OOOO3 OOOOOO
+ O2 O
+ O O
+ O1 O O
+ OOOOOOOO OOOOOO
+*/
+ // check the bow from below
+ for (i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) {
+ if (y1-box1->frame_vector[ i][1]>dy/4) break; // fatal!
+ } if (i!=aa[2][3]) Break; // ~AHKMNRX
+ // search most left+down between bottom right and top right
+ i1=nearest_frame_vector(box1, aa[2][3],aa[3][3], x0, y1);
+ i5=nearest_frame_vector(box1, i1,aa[3][3], x0, y0);
+ i3=nearest_frame_vector(box1, i1, i5, x1, (y0+y1)/2);
+ i2=nearest_frame_vector(box1, i1, i3, x0, (2*y0+y1)/3);
+ i4=nearest_frame_vector(box1, i3, i5, x0, (y0+2*y1)/3);
+ i =nearest_frame_vector(box1, aa[0][3],aa[1][3], x0-dx/4, (y0+y1)/2);
+ if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]-1-dx/16) Break;
+ if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]) ad=99*ad/100; // f
+
+ MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);)
+ // holes right open?
+ for( i=1,y=y0; y<y0+dy/4 && i; y++ ) // long black line
+ if( get_bw(x0+dx/3,x1-dx/6,y,y,box1->p,cs,2) == 0 ) i=0;
+ if( i ) Break;
+ for( i=1,y=y1; y>y1-dy/4 && i; y-- ) // long black line
+ if( get_bw(x0+dx/6,x1-dx/4,y,y,box1->p,cs,2) == 0 ) i=0;
+ if( i ) Break;
+ for( i=1,y=y0+dy/3; y<y1-dy/3 && i; y++ ){ // black line
+ j=loop(box1->p,x0 ,y,dx,cs,0,RI);
+ j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>dx/3 ) i=0;
+ } if( i ) Break;
+ x=x1-dx/3; y=y0; // von oben durchbohren!
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,DO); if( x<=x1 || y>y0+dy/2 ) Break;
+ x=x1-dx/3; y=y1; // von unten durchbohren!
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); if( y<y1-dy/4 ) Break;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,UP); if( y<y0-dy/3 ) Break;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,UP); if( x<=x1 || y<y0+dy/2 ) Break;
+ x=x1-dx/3; y=y0; // von oben durchbohren!
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break;
+ y+=dy/15;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x<x0 ) Break;
+ if (dx>15 && x==x0) ad=99*ad/100; // to thin
+ x+=dx/15+1;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y1-dy/3 ) Break;
+ // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) Break;
+ if (sdata->holes.num > 0) Break;
+ i=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI); if(i>dx/2) Break;
+ j=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); if(j<i-dx/4 || j>i+dx/8) Break; i=j;
+ j=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI); if(j<i-dx/4 || j>i+dx/8) Break;
+ j=loop(box1->p,x1,y1-dy/4,dx,cs,0,LE);
+ for( x=dx,y=y0+dy/6; y<y1-dy/9; y++ ) // left border straight
+ { i=loop(box1->p,x0,y,dx,cs,0,RI);
+ if (i>j/2 && ad>98) ad=99*ad/100;
+ if (i>dx/4) break;
+ if(i<x) x=i;
+ } if( y<y1-dy/9 ) Break; // t
+ if(dy>3*dx) // ~[
+ if( get_bw(x0+dx/2,x0+dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) Break;
+
+ if (box1->m2) {
+ if (!hchar) ad=ad*99/100;
+ if ( gchar) ad=ad*99/100;
+ }
+ Setac(box1,(wchar_t)'E',ad);
+ if (ad>=100) return 'E';
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_n(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ int i,j,d,x,y,i1,i2,i3,handwritten=0,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test n ---------------------------------------------------
+ // glued rm is very similar to glued nn -> thickness of h-line should grow
+ // may02: tested for 8x12 font
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='n'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ i= num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs);
+ j= num_cross( 0,dx-1,dy/2,dy/2,sdata->bp,cs);
+ if( (i<2 || i>3) && j!=2 ) Break;
+ if( loop(sdata->bp,dx/2,0,dy,cs,0,DO) > dy/8 && sdata->hchar ) Break; /* tt */
+ y=5*dy/8; /* also for handwritten n, where first bow goes not down enough */
+ if( num_cross( 0,dx/2,y ,y ,sdata->bp,cs) != 1
+ && num_cross( 0,dx/2,y-1,y-1,sdata->bp,cs) != 1
+ && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) < 1 ) Break; // n rr
+ // ~thick_w
+ y=loop(sdata->bp,dx-1-dx/4,0,dy,cs,0,DO); if(y>dy/2) Break;
+ if(y>1)if( get_bw(dx-1-dx/4,dx-1,0,y-2,sdata->bp,cs,1) == 1 ) Break;
+
+ y=3*dy/4;
+ if( num_cross(0, dx/2,y ,y ,sdata->bp,cs) == 1
+ && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) == 0 ) Break; // ~p
+ y=dy/2;
+ if( num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) == 2
+ && num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) == 2 ) { // n rr
+ /* printed n */
+ x =loop(sdata->bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line
+ x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap
+ x+=loop(sdata->bp,x,y,dx-x,cs,0,RI); if(x< dx/2) Break; i2=x; // 2nd v-line
+ x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x<3*dx/4) Break; i3=x; // 2nd gap
+ i=dy/4; y=13*dy/16;
+ if( num_cross(dx/2,dx-1,y,y,sdata->bp,cs)==2 ) i=3*dy/8; // \it n
+ if (i<2 && i<dy/2) i++; // correct for small fonts like 8x12
+ // the same game for the lower part =>l1 l2 l3 l4 ???
+ for(x=i1;x<i2;x++) if( loop(sdata->bp,x, 0,dy,cs,0,DO)>=i ) break;
+ if(x <i2) Break; // gap detected
+ for(x=i1;x<i2;x++) if( loop(sdata->bp,x,dy-1,dy,cs,0,UP) >dy/4 ) break;
+ if(x==i2) Break; // no gap detected (glued serifs ??? )
+ // glued rm as nn ???
+ for(y=0,x=(i1+i2)/2;x<i2;x++){
+ i=loop(sdata->bp,x,0,dy,cs,0,DO);
+ i=loop(sdata->bp,x,i,dy,cs,1,DO); // measure thickness
+ if( i>y ) y=i; if( i<y/2 ) break;
+ }
+ if(x <i2) Break; // unusual property for n
+ if( dy>7 )
+ if( loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,LE)
+ +loop(sdata->bp, 0,dy-1-dy/8,dx,cs,0,RI)-dx/8-1
+ > loop(sdata->bp,dx-1,dy-1-dy/2,dx,cs,0,LE)
+ +loop(sdata->bp, 0,dy-1-dy/2,dx,cs,0,RI) ) ad=90*ad/100; // broken o
+ if( dy>7 && dx>7 )
+ if( loop(sdata->bp,dx-1, dy/2,dx,cs,0,LE)==0
+ && loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,RI)>dx/8 ) ad=98*ad/100; // broken o
+ } else { /* check handwritten n */
+ if( num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) != 3
+ && num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) != 3 ) Break;
+ i =loop(sdata->bp,0,dy/2-dy/8,dx,cs,0,RI); if (i>dx/4) Break;
+ i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI); if (i>dx/2) Break;
+ i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,0,RI);
+ if( num_cross(i,i, 0,dy/2-2*dy/8,sdata->bp,cs) != 0 ) Break;
+ i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI);
+ if( num_cross(i,i,dy/2+1, dy-1,sdata->bp,cs) != 0 ) Break;
+ handwritten=80;
+ }
+
+ i= loop(sdata->bp,dx-1 ,dy/2,dx,cs,0,LE); if(i>5)
+ if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,sdata->bp,cs,1) == 1 ) Break; // ~rr
+ i+=loop(sdata->bp,dx-1-i,dy/2,dx,cs,1,LE);
+ if( get_bw(dx-1-i ,dx-1-i ,0,dy/2,sdata->bp,cs,1) == 0 ) Break; // ~rv
+
+ if( get_bw(dx/2,dx/2,dy/4,dy/4,sdata->bp,cs,1) == 0
+ && get_bw(dx/2,dx-1,dy-2,dy-2,sdata->bp,cs,1) == 0
+ && get_bw(dx/2,dx/2,dy/4,dy-2,sdata->bp,cs,1) == 1 ) Break; // ~P
+
+ // glued ri ???
+ if( box1->dots>0 && box1->m1 )
+ if( get_bw((x1+x0)/2,x1,box1->m1,y0-1,box1->p,cs,1) == 1 )
+ if( num_cross( 0,dx-1,0 ,0 ,sdata->bp,cs) >2
+ || num_cross( 0,dx-1,1 ,1 ,sdata->bp,cs) >2 ) Break;
+
+
+ i=loop(sdata->bp,dx-1, dy-1,dx,cs,0,LE); if (i>dx/2)
+ i=loop(sdata->bp,dx-1, dy-2,dx,cs,0,LE);
+ x=loop(sdata->bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
+ if (sdata->hchar && i-x>1) Break; // ß
+ x=loop(sdata->bp, 0,dy-1,dx,cs,0,LE); // check for serifs
+ i=loop(sdata->bp, 0,dy-2,dx,cs,0,LE); if (i<x) x=i;
+ i=loop(sdata->bp, 0, 1,dx,cs,0,LE); if (i<x) x=i;
+ i=loop(sdata->bp, 0, 2,dx,cs,0,LE); if (i<x) x=i;
+ if (sdata->hchar && x>0) Break; // fl
+
+ if (num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs)>=3) ad=98*ad/100; // small M
+ if (sdata->hchar || 2*y0<box1->m1+box1->m2) ad=96*ad/100;
+ if (sdata->gchar) ad=96*ad/100; // ß fl
+ if (dx<5) { // for small fonts no middle line is possible for m
+ ad=99*ad/100; // 4x6 m
+ if (num_cross(0,dx-1,dy/8,dy/8,sdata->bp,cs)>=2) {
+ ad=97*ad/100; // ~m
+ if (dy<=4) Setac(box1,'m',97); // only for 4x6 font!
+ }
+ }
+ Setac(box1,'n',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_M(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int d,x,y,i0,i1,i2,i3,t1,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // ------------------ test M ---------------------------
+ for(ad=d=100;dx>3 && dy>3;){ // dy<=dx nicht perfekt! besser mittleres
+ // min-suchen fuer m
+ DBG( wchar_t c_ask='M'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if( num_cross(0,dx-1, dy/2, dy/2,bp,cs)<3
+ && num_cross(0,dx-1, dy/4, dy/4,bp,cs)<3
+ && num_cross(0,dx-1,5*dy/8,5*dy/8,bp,cs)<3
+ && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<3
+ && dx>4 ) Break;
+ if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<2
+ && num_cross(0,dx-1, dy/8, dy/8,bp,cs)<2 ) Break; /* fat M */
+ if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<2 ) Break;
+
+ x = loop(bp,dx-1 ,dy-1,dx,cs,0,LE); // ~ melted kl
+ x = loop(bp,dx-1-x,dy-1,dx,cs,1,LE); if( x>dx/2 ) Break;
+
+ if( loop(bp, 0,7*dy/16,dx,cs,0,RI)
+ + loop(bp,dx-1,7*dy/16,dx,cs,0,LE) > dx/2 ) Break; // ~K
+
+ if( dy>8 /* following lines should be extend to range check */
+ && loop(bp, dx/4,dy-1, dy,cs,0,UP)<dy/4
+ && loop(bp,3*dx/8,dy-1, dy,cs,0,UP)<dy/4 )
+ if( loop(bp, 0,dy-1-dy/ 8,dx,cs,0,RI)
+ < loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)-dx/32 ) Break; // ~it_u
+ if( num_cross(0,dx-1, dy/2, dy/2,bp,cs)==2
+ && num_cross(0,dx-1, dy/4, dy/4,bp,cs)> 2
+ && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 2 ) Break; // ~it_u
+ if( num_cross(0 ,dx-1,3*dy/4,3*dy/4,bp,cs)==2
+ && num_cross(dx/2,dx/2,3*dy/4, dy-1,bp,cs)> 0 ) Break; // ~it_v
+
+ if( loop(bp,3*dx/4, 0,dy,cs,0,DO)
+ > loop(bp,2*dx/4, 0,dy,cs,0,DO)
+ && loop(bp,3*dx/4,dy-1,dy,cs,0,UP)
+ < loop(bp,2*dx/4,dy-1,dy,cs,0,UP) ) Break; // ~N
+ if( loop(bp,3*dx/4, dy/8,dy,cs,0,DO)
+ > loop(bp,2*dx/4, dy/8,dy,cs,0,DO)
+ && loop(bp,3*dx/4,dy-1-dy/8,dy,cs,0,UP)
+ < loop(bp,2*dx/4,dy-1-dy/8,dy,cs,0,UP) ) Break; // ~serif_N
+
+ // i0 is lower end of upper serifen (widest gap? )
+ i0=0;
+
+ if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=4 ){ // Is it a N ?
+ if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==3 ){
+ for(y=dy/2+1;y<dy;y++){
+ if( num_cross(0,dx-1,y,y,bp,cs)<3 ) break;
+ }
+ if( num_cross(0,dx-1,y,y,bp,cs)==2 ){
+ x =loop(bp,dx-1 ,y-1,dx,cs,0,LE);
+ x+=loop(bp,dx-1-x,y-1,dx,cs,1,LE);
+ x+=loop(bp,dx-1-x,y-1,dx,cs,0,LE);
+ if( loop(bp,dx-x,y-1,dy,cs,0,UP)>y-2 ) Break; // ~N
+ }
+ }
+ }
+ // MNWK
+ for(i2=0,i1=x=dx/2;x<dx-dx/4;x++){ // lowest pixel
+ y=loop(bp,x,0,dy,cs,0,DO); if(y>i2) {i2=y;i1=x;} else break; }
+ i3=i2+loop(bp,i1,i2,dy-i2,cs,1,DO);
+ if(i2<dy/4) {
+ if (!sdata->hchar) Break; // rm
+ ad=99*ad/100;
+ }
+ if (i2==0 && dx>8 && dy>12) Break; // glued and bad splitted serifen-MN
+
+ // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) != 0 ) Break; // small A
+ if (sdata->holes.num != 0) Break;
+ t1=loop(bp,0 ,3*dy/4,dx,cs,0,RI);
+ t1=loop(bp,t1,3*dy/4,dx,cs,1,RI); // thickness of line?
+ if( 7*(t1+1)<dx )
+ if( num_cross(i1,dx-1,i2-1,i2-1,bp,cs)!=2
+ || num_cross(0 ,i1 ,i2-1,i2-1,bp,cs)!=2 ) Break; // too hard ???
+
+ // ~u_n-pair
+ if( num_cross(0,dx-1,0,0,bp,cs)!=2
+ && num_cross(0,dx-1,1,1,bp,cs)!=2
+ && num_cross(0,dx-1,2,2,bp,cs)!=2 ) Break;
+
+ // ~nn v0.2.4a3
+ if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)==4
+ && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)==4 ){
+ i1 =loop(bp, 0, dy/4,dx,cs,0,RI);
+ i1+=loop(bp,i1, dy/4,dx,cs,1,RI);
+ i1+=loop(bp,i1, dy/4,dx,cs,0,RI);
+ i2 =loop(bp, 0,3*dy/4,dx,cs,0,RI);
+ i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI);
+ i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI);
+ if( i1>=i2 ) Break; // no good M
+ i1+=loop(bp,i1, dy/4,dx,cs,1,RI);
+ i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI);
+ if( i1>=i2 ) Break; // no good M
+ i1+=loop(bp,i1, dy/4,dx,cs,0,RI);
+ i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI);
+ if( i1<=i2 ) Break; // no good M
+ }
+ if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==2
+ && num_cross(0,dx-1,dy/4,dy/4,bp,cs)==2 && !hchar ) Break; // ~ \it u
+
+ if (dy<17)
+ if( num_cross(0,dx-1, 0, 0,bp,cs)<2 ) ad=99*ad/100;
+ if (dx>5) /* 4x6 font has only 1 cross at y=1 */
+ if( num_cross(0,dx-1, 1, 1,bp,cs)<2 ) ad=96*ad/100; // kt
+ if( num_cross(dx/2,dx/2, 0, dy-1,bp,cs)!=1) ad=98*ad/100; // kt
+ if (dx<5 && loop(bp,dx/2,0,dy,cs,0,DO)>=3*dy/8) ad=96*ad/100; // 4x6 H
+
+ if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<=2
+ && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<=2
+ && dx>8 && dy>12 ){
+ ad=98*ad/100;
+ for(y=5*dy/16;y<5*dy/8;y++) // look for H-line
+ if( num_cross(0,dx-1,y ,y ,bp,cs)==1 ) break;
+ if( y<5*dy/8 ) ad=95*ad/100;
+ if( y<5*dy/8 )
+ if( num_cross(2+dx/6,dx-3-dx/6,y-2,y-2,bp,cs)==0
+ || num_cross(2+dx/6,dx-3-dx/6,y-1,y-1,bp,cs)==0 ) Break; // ~H bad!
+ }
+
+ if( loop(bp,3*dx/8, 0,dy,cs,0,DO) >dy/2
+ && loop(bp,5*dx/8,dy-1,dy,cs,0,UP) >dy/2 ) ad=95*ad/100;
+
+ if(!hchar){
+ ad=98*ad/100; /* not sure */
+ if( loop(bp,0, dy/4,dx,cs,0,RI)
+ < loop(bp,0,dy-1-dy/8,dx,cs,0,RI)-dx/16 ) Break; // ~wi glued
+ }
+ if( gchar ) ad=98*ad/100;
+ if (ad>99 && dx<8) ad=99*ad/100; /* give 5x8 N a chance */
+ Setac(box1,'M',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_N(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */
+ dbg[9],
+ ad; /* tmp-vars */
+
+ // --- test N ------- +hchar -gchar
+ for(ad=d=100;dx>3 && dy>3;){ // 4x6font
+ DBG( wchar_t c_ask='N'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if (sdata->holes.num > 0) ad=98*ad/100; /* # */
+ if (dx<6) ad=99*ad/100;
+ if (dx<5) ad=99*ad/100;
+ /* half distance to the center */
+ d=2*sq(128/4);
+ /* now we check for the 4 ends of the x */
+ if (aa[0][2]>d) Break;
+ if (aa[1][2]>d) Break;
+ if (aa[2][2]>d) Break;
+ if (aa[3][2]>d) Break;
+ if (aa[3][0]-aa[0][0]<dx/2) Break;
+ if (aa[2][0]-aa[1][0]<dx/2) Break;
+ if (aa[1][1]-aa[0][1]<dy/2) Break;
+ if (aa[2][1]-aa[3][1]<dy/2) Break;
+ if (aa[3][0]-aa[0][0]<4-1) Break; /* to small to hold an N */
+ if (aa[2][0]-aa[1][0]<4-1) Break; /* to small */
+ if (abs(aa[3][1]-aa[0][1])>(dy+2)/5) Break; /* glued tu */
+ if (abs(aa[3][1]-aa[0][1])>(dy+4)/8) ad=98*ad/100; /* glued tu */
+ /* left and right vertical line */
+ d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break;
+ ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
+ d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break;
+
+ /* search uppermost left ^ (between near 0,0) */
+ i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0+dx/8, y0);
+ x=box1->frame_vector[i1][0];
+ y=box1->frame_vector[i1][1];
+ MSG( fprintf(stderr,"i1= %d (%d,%d) left ^", i1,x-x0,y-y0);)
+ if (y-y0 > 5*dy/8) Break;
+ if (x-x0 > 5*dx/8) Break;
+ /* search uppermost right ^ ~H */
+ i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0);
+ MSG( fprintf(stderr,"i3= %d (%d,%d) right ^",\
+ i3, box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);)
+
+ /* check if upper left and lower right point are joined directly */
+ dbg[0]=d=line_deviation(box1,i1, aa[2][3]);
+ /* check if lower left and lower left point are joined directly */
+ dbg[1]=d=line_deviation(box1, aa[1][3],i1);
+ MSG( fprintf(stderr," i1-a2 %d a1-i1 %d",dbg[0],dbg[1]); )
+ if (dbg[0] > sq(1024/4)) Break;
+ if (dx>4 && dbg[1] > sq(1024/4)) ad=97*ad/100; // d=0..2*sq(1024)
+ if (dx>4 && dbg[1] > sq(1024/3)) Break; // d=0..2*sq(1024)
+ // serif N has d=sq(1024/3)=116508
+
+ /* serach lowest right v, same frame? N-tilde etc.? */
+ i2=nearest_frame_vector(box1,aa[3][3],aa[0][3], x1, y1-dy/8);
+ x=box1->frame_vector[i2][0];
+ y=box1->frame_vector[i2][1];
+ MSG( fprintf(stderr,"i2= %d (%d,%d) right v",\
+ i2, box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0);)
+ if (y-y0 < 3*dy/8) Break;
+ if (x-x0 < 3*dx/8) Break;
+ // test H
+ if ( box1->frame_vector[i3][0]-box1->frame_vector[i1][0]> dx/4
+ && box1->frame_vector[i3][1]-box1->frame_vector[i1][1]<=dy/8
+ && y<=box1->frame_vector[i1][1]) Break;
+ /* check if upper left and lower right point are joined directly */
+ dbg[2]=d=line_deviation(box1,i2, aa[0][3]);
+ /* check if lower right and lower right point are joined directly */
+ dbg[3]=d=line_deviation(box1, aa[3][3],i2);
+ MSG( fprintf(stderr," i2-a0 %d a3-i2 %d",dbg[2],dbg[3]); )
+ if (dbg[2] > sq(1024/4)) Break;
+ if (dbg[3] > sq(1024/4)) ad=97*ad/100; // serif N, ToDo: do it better
+ if (dbg[3] > sq(1024/3)) Break;
+
+ if (abs((box1->frame_vector[i1][1]-y0)
+ -(y1-box1->frame_vector[i2][1]))>dy/8) ad=99*ad/100; /* ~ tu */
+ if (abs(((y0+y1)/2-box1->frame_vector[i1][1])
+ -(box1->frame_vector[i2][1]-(y0+y1)/2))>dy/8) ad=99*ad/100; /* ~ tu */
+ if (box1->frame_vector[i2][0]
+ -box1->frame_vector[i1][0]<=dx/8) Break; /* nonsignificant distance */
+ if (box1->frame_vector[i2][1]
+ -box1->frame_vector[i1][1]<=dy/8) ad=97*ad/100; /* too flat (ff,H) */
+ if (box1->frame_vector[i2][1]
+ -box1->frame_vector[i1][1]<=dy/2) ad=99*ad/100;
+ MSG( \
+ fprintf(stderr,"^v %d %d %d %d line deviation %d %d %d %d max %d %d",\
+ box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\
+ box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\
+ dbg[0],dbg[1],dbg[2],dbg[3],sq(1024/4),sq(1024));)
+ ad=(100-(dbg[0]-sq(1024)/2)/sq(1024)/4)*ad/100;
+ ad=(100-(dbg[1]-sq(1024)/2)/sq(1024)/4)*ad/100;
+ ad=(100-(dbg[2]-sq(1024)/2)/sq(1024)/4)*ad/100;
+ ad=(100-(dbg[3]-sq(1024)/2)/sq(1024)/4)*ad/100;
+
+ if (!hchar) ad=99*ad/100;
+ if ( gchar) ad=98*ad/100; // \sc N
+ Setac(box1,'N',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_h(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
+
+ // --- test h ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ // rewritten for vectors 0.42
+ int i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners
+ DBG( wchar_t c_ask='h'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ /* half distance to the center */
+ d=2*sq(128/4);
+ /* now we check for the upper right end of the h */
+ if (aa[3][2]<d/4) Break; /* [2] = distance, ~BCDEF... */
+ if (aa[0][2]>d/2) Break; /* upper left end */
+ if (aa[1][2]>d/2) Break; /* lower left end */
+ if (aa[2][2]>d/2) Break; /* lowerright end */
+/*
+ type A B=italic ???
+ 18 OOO
+ O O O
+ O O
+ O7OOO OOOO
+ O4 O O O
+ O O O O
+ O O O O O
+ 2O3 5O6 O OOO
+*/
+ i1=i8=aa[0][3];
+ i2=i3=aa[1][3];
+ i5=i6=aa[2][3];
+ // check the bow from below
+ for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) {
+ if (box1->frame_vector[ i][1]
+ <box1->frame_vector[i4][1]) i4=i; // get next maximum
+ if (box1->frame_vector[ i][1]<=y0) break; // fatal!
+ }
+ if (box1->frame_vector[i4][1]-y0<dy/4) Break; // ~MN
+ if (y1-box1->frame_vector[i4][1]<dy/4) Break; // ~BCDEGIJLOQSUYZ
+ // two steps for i7 to go around pitfalls on italic h
+ i7=nearest_frame_vector(box1, i6, i8, (x0+x1)/2, (y0+y1)/2);
+ i7=nearest_frame_vector(box1, i6, i7, x0, (y0+y1)/2);
+ i3=nearest_frame_vector(box1, i2, i4, (x0+x1)/2, y1);
+ i5=nearest_frame_vector(box1, i4, i6, (x0+x1)/2, y1);
+
+ MSG(fprintf(stderr,"i1-7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);)
+ /* ... new part /// old obsolete part ... */
+ if( get_bw(0 ,dx/2,dy/8 ,dy/8 ,bp,cs,1) != 1 ) Break;
+ if( get_bw(0 ,dx/2,dy/2 ,dy/2 ,bp,cs,1) != 1 ) Break;
+ if( get_bw(dx/2 ,dx-1,dy-1-dy/3,dy-1-dy/3,bp,cs,1) != 1 ) Break;
+ if( get_bw(dx/2 ,dx/2,dy/5 ,dy-1-dy/3,bp,cs,1) != 1 ) Break;
+ if( get_bw(dx-1-dx/3,dx-1,0 ,1 ,bp,cs,1) == 1 ) Break;
+ if( get_bw(dx-1-dx/3,dx-1,1 ,dy/6 ,bp,cs,1) == 1 ) Break;
+ if( dy>18 )
+ if( get_bw(dx-1-dx/3,dx-1,dy/6 ,dy/5 ,bp,cs,1) == 1 ) Break;
+ if( get_bw(dx-1-dx/3,dx-1,dy-1-dy/4,dy-1 ,bp,cs,1) == 0 ) Break; // s-
+ for( x=x0+dx/3;x<x1-dx/3;x++)
+ if( get_bw(x, x,y1-dy/4, y1, box1->p,cs,1) == 0 ) break;
+ if( x>=x1-dx/3 ) Break;
+ for(i=dy/4,y=y0+dy/3;y<=y1 && i;y++){
+ if( num_cross(x0,x1 ,y,y, box1->p,cs) == 2 ) i--;
+ } if( i ) Break;
+ for(i=dy/4,y=y0;y<=y0+dy/2 && i;y++){
+ if( num_cross(x0,x0+dx/2,y,y, box1->p,cs) == 1 ) i--;
+ } if( i ) Break;
+ // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) // could happen
+ if (sdata->holes.num > 0)
+ if (sdata->holes.hole[0].y0 > dy/3
+ && sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
+ // if( num_hole(x0, x1, y0+dy/3 , y1-dy/3 ,box1->p,cs,NULL) != 1 ) Break; // mini
+ if( loop(bp,dx-1,dy/3,dx,cs,0,LE)+dx/8
+ < loop(bp,dx-1,dy/2,dx,cs,0,LE)
+ && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8
+ < loop(bp,dx-1,dy/2,dx,cs,0,LE)) Break; // ~k Okt00
+ i=loop(bp,0,dy-1-dy/4,dx,cs,0,RI);
+ if (i>1 && num_cross(x0,x0,y0+dy/8+2,y0+dy/2, box1->p,cs) == 1 ){ // fi fu
+ ad=(99-(1<<i))*ad/100;
+ if (num_cross(x0,x0,y0,y0+dy/8+2, box1->p,cs) == 0 ) ad=97*ad/100;
+ if (num_cross(x0+dx/2,x0+dx/2,y0,y0+dy/8+2, box1->p,cs) == 1 ) ad=97*ad/100;
+ if (ad<1) break;
+ }
+ i =loop(bp,0,dy/4,dx,cs,0,RI);
+ i+=loop(bp,i,dy/4,dx,cs,1,RI)+1;
+ for ( ; i<dx-dx/3; i++ )
+ if( loop(bp,i,0,dy,cs,0,DO)>5*dy/8 ) {
+ ad=98*ad/100; // melted hi, li, but handwritten h
+ MSG(fprintf(stderr,"ad=%d",ad);) }
+ if( num_cross(x0,x0,y0+(dy+3)/8,y1,box1->p,cs) > 1 ) {
+ ad=98*ad/100; // melted fr
+ MSG(fprintf(stderr,"ad=%d",ad);) }
+
+ i=loop(bp,dx-1,3*dy/4,dx,cs,0,LE); // melted "fr" for vertikal letters
+ if (i>dx/4 && loop(bp,dx-1-i,dy-1,dy,cs,1,UP)>dy/2) {
+ ad=94*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) }
+
+ i=loop(bp,dx-1,1+dy/16,dx,cs,0,LE); if (i<dx/4) {
+ ad=98*ad/100;
+ MSG(fprintf(stderr,"ad=%d",ad);) }
+ if( num_cross(dx-i+1+dx/8,dx-i+1+dx/8,0,1+dy/16,bp,cs) > 0 ) {
+ ad=95*ad/100; // melted fi
+ MSG(fprintf(stderr,"ad=%d",ad);) }
+ if (loop(box1->p,x1,y0+1+dy/16,dx,cs,0,LE)<dx/4) {
+ ad=98*ad/100; // fi
+ MSG(fprintf(stderr,"ad=%d",ad);) }
+ if (loop(box1->p,x1,y0 ,dx,cs,0,LE)<dx/4
+ || loop(box1->p,x1,y0+1,dx,cs,0,LE)<dx/4) {
+ ad=98*ad/100; // li
+ MSG(fprintf(stderr,"ad=%d",ad);) }
+
+
+ if (sdata->holes.num > 0) ad=97*ad/100;
+ if (box1->m2) {
+ if ( gchar) ad=98*ad/100;
+ if (!hchar) ad=97*ad/100;
+ } else ad=99*ad/100;
+ Setac(box1,'h',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_H(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,j1,d,x,y,ya,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test H ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='H'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if( num_cross(0,dx-1,dy/4 ,dy/4 ,bp,cs) != 2
+ && num_cross(0,dx-1,dy/4-1,dy/4-1,bp,cs) != 2 ) Break;
+ if( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2
+ && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) Break;
+ if( loop(bp,0 ,dy/8,dx,cs,0,RI)
+ + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) Break; // ~A
+ for( j1=0,i=1,y=y0+dy/10; y<y1-dy/10 && i; y++ ) // 2 vertikal lines
+ { j=loop(box1->p,x0 ,y,dx,cs,0,RI)
+ +loop(box1->p,x1 ,y,dx,cs,0,LE); if( j>dx/2 ) i=0; if(j>j1)j1=j; }
+ if( !i ) Break;
+ for( i=1,y=dy/4; y<dy-1-dy/4 && i; y++ ) // max - min width
+ { j=loop(bp,0 ,y,dx,cs,0,RI)
+ +loop(bp,dx-1,y,dx,cs,0,LE); if( j1-j>dx/5 ) i=0; }
+ if( !i ) Break; // ~K Jul00
+ for( i=0,ya=y=y0+dy/3; y<y1-dy/3; y++ ) // horizontal line
+ { j=loop(box1->p,x0 ,y,dx,cs,0,RI);
+ j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } }
+ if( i<=dx/2 ) Break; ya-=y0;
+ if( num_cross(0,dx-1,ya ,ya ,bp,cs) != 1
+ && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) Break; /* Dec00 */
+ for( y=ya; y<dy-dy/4; y++ ) // ~M Dec00
+ if( num_cross(0,dx-1,y ,y ,bp,cs) > 2
+ && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break;
+ if ( y<dy-dy/4 ) Break;
+ for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
+ if( get_bw( x, x,y0 ,y0+dy/4,box1->p,cs,1) == 0 ) i=0;
+ } if( i ) Break;
+ for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
+ if( get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0;
+ } if( i ) Break;
+ for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
+ if( num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0;
+ } if( i ) Break;
+ for(i=1,y=y0;y<=y0+dy/4 && i;y++){
+ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
+ } if( i ) Break;
+ for(i=1,y=y1-dy/4;y<=y1 && i;y++){
+ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
+ } if( i ) Break;
+ if( get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) Break;
+ i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) Break;
+ i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2<i1-dx/4 || i2>i1+dx/8) Break;
+ i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3<i2-dx/4 || i3>i2+dx/8) Break;
+ if(abs(i1+i3-2*i2)>dx/16+1) Break;
+ // test for thick tall N looking like a H
+ if( num_cross(x0,x1,y0,y1, box1->p,cs) < 2 ) Break; // sure N
+ i1=loop(bp, 0, dy/4,dx,cs,0,RI);
+ i1=loop(bp, i1, dy/4,dx,cs,1,RI);
+ i2=loop(bp, 0,dy-1-dy/4,dx,cs,0,RI);
+ i2=loop(bp, i2,dy-1-dy/4,dx,cs,1,RI);
+ i3=loop(bp,dx-1 ,dy-1-dy/4,dx,cs,0,LE);
+ i3=loop(bp,dx-1-i3,dy-1-dy/4,dx,cs,1,LE);
+ i =loop(bp, 0,dy/2+1+dy/8,dx,cs,0,RI);
+ i+=loop(bp, i,dy/2+1+dy/8,dx,cs,1,RI);
+ i =loop(bp, i,dy/2+1+dy/8,dx,cs,0,RI);
+ if (i<dx/2-1 && 5*i1>6*i2 && 5*i3>6*i2 && i1>i2 && i3>i2 ) Break;
+ if( dx>8 )
+ if ( loop(bp,dx-1, 3*dy/8,dx,cs,0,LE)
+ -loop(bp,dx-1, dy/8,dx,cs,0,LE)>dx/4
+ && loop(bp,dx-1, 3*dy/8,dx,cs,0,LE)
+ -loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)>dx/4 ) Break; // ~K
+ // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) Break;
+ if (sdata->holes.num != 0) Break;
+ if ( gchar) ad=99*ad/100;
+ if (!hchar) ad=98*ad/100;
+ Setac(box1,'H',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_k(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+ int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */
+
+ // --- test k ---------------------------------------------------
+ for(ad=100;dx>2 && dy>3;){ // min 3x4
+ // rewritten for vectors 0.43
+ int d, i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners
+ DBG( wchar_t c_ask='k'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ /* half distance to the center */
+ d=2*sq(128/4);
+ /* now we check for the upper right end of the h */
+ if (aa[3][2]<d/4) Break; /* [2] = distance, ~BCDEF... */
+ if (aa[0][2]>d/2) Break; /* upper left end */
+ if (aa[1][2]>d/2) Break; /* lower left end */
+ if (aa[2][2]>d/2) Break; /* lowerright end */
+/*
+ type A B=italic ???
+ 18 OOO
+ O O O
+ O O6 O
+ O7 OO O OO
+ O4OO OO OO
+ O OO O O
+ O OO O O O
+ 2O3 O5 O OOO
+*/
+ i1=i8=aa[0][3];
+ i2=i3=aa[1][3];
+ i5= aa[2][3];
+ // check the bow from below
+ for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) {
+ if (box1->frame_vector[ i][1]
+ <box1->frame_vector[i4][1]) i4=i; // get next maximum
+ if (box1->frame_vector[ i][1]<=y0) break; // fatal!
+ }
+ if (box1->frame_vector[i4][1]-y0<dy/4) Break; // ~MN
+ if (y1-box1->frame_vector[i4][1]<dy/4) Break; // ~BCDEGIJLOQSUYZ
+ i6=nearest_frame_vector(box1, i5, i8, x1, (2*y0+y1)/3);
+ // two steps for i7 to go around pitfalls on italic h
+ i7=nearest_frame_vector(box1, i6, i8, x0, y1);
+ i3=nearest_frame_vector(box1, i2, i4, (x0+x1)/2, y1);
+ i =nearest_frame_vector(box1, i5, i6, x0, (y0+2*y1)/3);
+ if (x1-box1->frame_vector[i][0]<dy/4) Break; // h
+ if (x1-box1->frame_vector[i][0]<dy/2) ad=98*ad/100;
+
+ MSG(fprintf(stderr,"i1-7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);)
+ if( num_cross(0, dx-1,0,0,bp,cs) != 1
+ && num_cross(0, dx-1,1,1,bp,cs) != 1 ) Break;
+ if( num_cross(0,3*dx/4, dy/8 , dy/8 ,bp,cs) != 1
+ || num_cross(0,3*dx/4,3*dy/16,3*dy/16,bp,cs) != 1 ) Break;
+ if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 2
+ && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 2 ) Break;
+ if( dx<8
+ && num_cross(dx-1,dx-1,dy/4,dy-1,bp,cs) != 2
+ && num_cross(dx-2,dx-2,dy/4,dy-1,bp,cs) != 2 ) Break;
+ i1=loop(bp,0,dy/2-dy/4,dx,cs,0,RI);
+ i2=loop(bp,0,dy/2 ,dx,cs,0,RI);if(i2>dx/2) Break;
+ i3=loop(bp,0,dy/2+dy/4,dx,cs,0,RI);
+ if(abs(i1+i3-2*i2)>dx/16+1 || i1<i3-1) Break; // v-line on left side?
+ if( get_bw(x0 ,x0+dx/2,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2,x1, y1-dy/3,y1 ,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1-dx/4,x1, y0 ,y0+3*dy/16,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x1-dx/4,x1, y0+dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break; //~1
+ if( get_bw(x1-dx/4,x1, y1-dy/8,y1 ,box1->p,cs,1) != 1 ) Break;
+ if (sdata->holes.num > 0)
+ if (sdata->holes.hole[0].y0 > dy/4) Break;
+ // if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) Break;
+ for(y=y0+1;y<y0+dy/2;y++) // luecke ???
+ if( get_bw(x0,x1,y,y,box1->p,cs,1) == 0 ) break;
+ if( y<y0+dy/2 ) Break;
+ for(i=1,x=x0;x<=x0+dx/2 && i;x++)
+ if(get_line(x,y0 ,x ,y1,box1->p,cs,100)>50) i=0;
+ if( i ) Break; // no vertikal line!
+
+ /* check for falling line in the lower left corner */
+ for (j=x=0,y=5*dy/8;y<7*dy/8;y++) {
+ i= loop(bp,dx-1,y,dx,cs,0,LE); if(i>x) { x=i;j=y; }
+ } // x=dx/6 on fat k
+ if (x + loop(bp,dx-1-x,y,dx,cs,1,LE)/2 <dx/4) Break;
+ if (x + loop(bp,dx-1-x,y,dx,cs,1,LE)/2 <dx/2) ad=98*ad/100;
+ x=dx-1-x; y=j;
+ i =loop(bp,dx-1,dy-1,dx,cs,0,LE); if(i>dx/2)
+ i =loop(bp,dx-1,dy-2,dx,cs,0,LE); if(i>dx/2) Break;
+ i+=loop(bp,dx-1-i,dy-1,dx,cs,1,LE)/2;
+ if( get_line(x,y,dx-1-i,dy-1,bp,cs,100)<60 ) Break;
+
+ for(y=y0+dy/3;y<y1;y++) if( num_cross(x0,x1,y,y,box1->p,cs)==2 ) break;
+ if( y==y1 ) Break;
+ if(
+ // num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL)>0 // ~A happens!
+ sdata->holes.num > 0 )
+ if (sdata->holes.hole[0].x1>dx-1-dx/4
+ || sdata->holes.hole[0].y1>dy-1-dy/4
+ || sdata->holes.hole[0].y0< dy/4) Break;
+ // if ( num_hole(x0,x1-dx/4,y0+dy/4,y1-dy/4,box1->p,cs,NULL)==0 ) Break;
+ i=loop(bp,0,dy-1,dx,cs,0,RI);
+ i=loop(bp,i,dy-1,dx,cs,1,RI); if (dx>8 && 4*i>3*dx) Break; // ~glued_tz
+ i =loop(bp,0,dy/4,dx,cs,0,RI);
+ if (i>dx/4
+ && i+loop(bp,i,dy/4,dx,cs,1,RI)>dx/2
+ && loop(bp, 0,0,dx,cs,0,RI)<=dx/4
+ && loop(bp,dx-1,0,dx,cs,0,LE)>=dx/2 ) ad=90*ad/100; // divided Q
+
+ if( 2*y0>(box1->m1+box1->m2) ) ad=99*ad/100;
+
+ if ( gchar) ad=98*ad/100;
+ if (!hchar) ad=98*ad/100;
+ Setac(box1,'k',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_K(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,i1,i2,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad,ya,xa,yb,xb,yc,xc,yd,xd,ye,xe,yf,xf; /* tmp-vars */
+
+ // --- test K ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // updated 29 Mar 2000 perfect???
+ DBG( wchar_t c_ask='K'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ for(y=dy/8;y<dy-dy/8;y++)
+ if( !get_bw(0,dx/2,y,y,bp,cs,1) ) break;
+ if( y<dy-dy/8 ) Break;
+ for(j=0,i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
+ y= loop(box1->p,x,y0,y1-y0,cs,0,DO); if (y>3*dy/4) { i=1;break; }
+ if (dy>15 && j>dy/8){
+ j =loop(box1->p,x-1,y0+y-1,x1-x0,cs,0,LE)/2;
+ y+=loop(box1->p,x-j,y0+y-1,y1-y0,cs,0,DO)-1;
+ }
+ if(y>=dy/4) i=0; /* ok, found gap */
+ } if( i ) Break;
+ for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap
+ i=loop(box1->p,x,y1,dy,cs,0,UP);
+ /* on small chars bypass possible low left serifs */
+ if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP);
+ if (i2>1) i+=i2-1; }
+ if (i>y) { y=i; i1=x; }
+ } if( y<=dy/8 ) Break; if (y<dy/4) ad=80*ad/100;
+ for(i=1,x=x0+dx/3;x<=x1-dx/8 && i;x++){
+ if( num_cross(x,x,y0,y1, box1->p,cs) == 2 ) i=0;
+ } if( i ) Break;
+ for(i=1,y=y0;y<=y0+dy/4 && i;y++){
+ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
+ } if( i ) Break;
+ if( dx<10 ){
+ for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){
+ if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0;
+ } if( i ) Break;
+ }
+ for(i=1,y=y1-dy/4;y<=y1 && i;y++){
+ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
+ } if( i ) Break;
+ if( get_bw(x1-dx/3,x1,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break; // ~k
+ if( dy>16
+ && loop(bp,0, dy/4,dx,cs,0,RI)
+ +loop(bp,0,3*dy/4,dx,cs,0,RI)
+ <2*loop(bp,0, dy/2,dx,cs,0,RI)-2-dx/32 ) Break; // ~X
+
+ i=loop(box1->p,x1,y0+ dy/4,x1-x0+1,cs,0,LE); if(i>dx/2) Break;
+ j=loop(box1->p,x1,y0+ dy/2,x1-x0+1,cs,0,LE);
+ x=loop(box1->p,x1,y0+3*dy/8,x1-x0+1,cs,0,LE); if(x>j) j=x;
+ if(j<=i ) Break; i=j;
+ j=loop(box1->p,x1,y1-dy/4,x1-x0+1,cs,0,LE); if(j>=i ) Break;
+ // out_x(box1); // detailed analysis
+ //
+ // a d <= that are main points of K
+ // | /
+ // b/e
+ // | \ .
+ // c f
+ ya= dy/4;xa=loop(bp,0,ya,dx,cs,0,RI);xa+=loop(bp,xa,ya,dx,cs,1,RI)/2;
+ yc=dy-dy/4;xc=loop(bp,0,yc,dx,cs,0,RI);xc+=loop(bp,xc,yc,dx,cs,1,RI)/2;
+ yb=dy/2; xb=dx-1-loop(bp,dx-1,dy/2,dx,cs,0,LE);
+ for(yd=ye=yf=xe=y=i=0,xf=xd=dx;y<dy/4;y++){ // range 0..1/4
+ x =loop(bp,dx-1, y,dx,cs,0,LE); if(x<xd){ xd=x;yd= y; }
+ x =loop(bp,dx-1,dy-1-y,dx,cs,0,LE); if(x<xf){ xf=x;yf=dy-1-y; }
+ x =loop(bp,dx-1,dy/2+y,dx,cs,0,LE); if(x>xe){ xe=x;ye=dy/2+y; }
+ x =loop(bp,dx-1,dy/2-y,dx,cs,0,LE); if(x>xe){ xe=x;ye=dy/2-y; }
+#if 0 // removed v0.2.4a2
+ x =loop(bp,0 ,dy/2+y,dx,cs,0,RI); // middle left border
+ x+=loop(bp,x ,dy/2+y,dx,cs,1,RI); // test 2nd cross
+ x+=loop(bp,x ,dy/2+y,dx,cs,0,RI); if(x<xb){ xb=x;yb=dy/2+y; }
+#endif
+ x =loop(bp,0 ,dy/2-y,dx,cs,0,RI);
+ x+=loop(bp,x ,dy/2-y,dx,cs,1,RI); // test 2nd cross
+ x+=loop(bp,x ,dy/2-y,dx,cs,0,RI); if(x<xb){ xb=x;yb=dy/2-y; }
+ x =dx-1-loop(bp,dx-1,dy/2-y,dx,cs,0,LE); if(x<xb){ xb=x;yb=dy/2-y; }
+ }
+ xd=dx-1-xd;xe=dx-1-xe;xf=dx-1-xf;
+ xb+=loop(bp,xb,yb,dx,cs,1,RI)/4; // detect center of line
+ xe-=loop(bp,xe,ye,dx,cs,1,LE)/4;
+ xd-=loop(bp,xd,yd,dx,cs,1,LE)/4;
+ xf-=loop(bp,xf,yf,dx,cs,1,LE)/4;
+#if 0
+ MSG( \
+ printf("a=%d %d b=%d %d c=%d %d d=%d %d e=%d %d f=%d %d dxdy %d %d",\
+ xa,ya,xb,yb,xc,yc,xd,yd,xe,ye,xf,yf,dx,dy);\
+ )
+#endif
+ if( get_line2(xa,ya,xc,yc,bp,cs,100)<95 ) Break;
+ if( dx>8 ){ // example szaka0103
+ if( xe>5*dx/8 || xb>5*dx/8 ) Break; // ~{\it n}
+ i=loop(bp,xb,yb,xb,cs,1,LE); // thick center? see font22
+ if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) // right up
+ if( get_line2(xb-i/2,yb,xd,yd,bp,cs,100)<95 ) Break;
+ if( get_line2(xe,ye,xf,yf,bp,cs,100)<95 ) Break; // right down
+ xe+=loop(bp,xe,ye,dx,cs,1,RI); if( xe>=xf ) Break; // ~{\it n}
+ } else {
+ if( dy<16 && !hchar ) Break;
+ if( loop(bp,0,1,dy,cs,1,DO)<=3*dx/4
+ && loop(bp,1,1,dy,cs,1,DO)<=3*dx/4
+ && loop(bp,2,1,dy,cs,1,DO)<=3*dx/4 ) Break; // ~x
+ }
+ if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)<=dx/8){
+ ad=99*ad/100; /* broken B ? */
+ if (sdata->holes.num > 0)
+ if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
+ // if( num_hole(x0,x1,y0,(y0+2*y1)/3,box1->p,cs,NULL)>0) Break; // broken B
+ }
+ if(box1->m3 && !hchar) ad=99*ad/100;
+ if(box1->m3 && gchar) ad=99*ad/100;
+ // printf(" ok xe=%d",xe);
+ Setac(box1,'K',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_f(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
+ ab[8][4], /* special points (x,y,dist^2,vector_idx) */
+ ad; /* tmp-vars */
+ /* x=mindist_to_a y=0 "t"
+ 0>..$$. 0>..$$ 0>..$$ end right bow a--..$$ a--.$7. y>0 "f"
+ 1>.$..$ 1>.$.. 1>.$$$ start right bow .$7. .$..
+ .@... .@.. 2>.@@. start upper end .@.. .@..
+ 2>.$... 2>.$.. 3>$$$$ crossing bar .$.. $$$.
+ 3>$@$$. 3>$@$. $@@$ $@$. .@..
+ 4>.$... 4>.$.. 4>.$$. lower end .$.. .$..
+ .@... .@.. .@@. .@.. .@..
+ .@... .@.. .@@. .@.. .@..
+ 5>.$... 5>.$.. 5>.$$. lower start .$.. .$..
+ 6>..... 6>$... 6>.... optional left bow
+ */
+ // --- test f like t ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>5;){ // sometimes no hchar!
+ // rewritten for vectors 0.43
+ int d, i1, i2, i3, i4, i5, i6, i7, i8, i9; // line derivation + corners
+ DBG( wchar_t c_ask='f'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ /* half distance to the center */
+ d=2*sq(128/4);
+ /* now we check for the upper right end of the h */
+ if (aa[3][2]>d/2) Break; /* [2] = distance, ~BCDEF... */
+ if (aa[0][2]>d ) Break; /* upper left end */
+/*
+ 9
+ OOO
+ O 7 O8
+ O6
+ 1OOOO5
+ O4
+ O
+ 2O3
+ OOOOO
+*/
+ i1=nearest_frame_vector(box1,aa[0][3],aa[1][3],x0-dx/2,(5*y0+3*y1)/8);
+ /* we need i for 4x6 font, where left side of h-bar is near (x0,y1) */
+ i =aa[1][3]; if (box1->frame_vector[i][1]<y1-dy/8)
+ i =nearest_frame_vector(box1,aa[1][3],aa[2][3], x0, y1+dy/4);
+ i2=nearest_frame_vector(box1, i1, i, x1, y1);
+ i =nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y1+dy/4);
+ i3=nearest_frame_vector(box1, i,aa[3][3], x0, y1);
+ i7=nearest_frame_vector(box1, i3,aa[3][3],(x0+x1)/2, y0);
+ i8=nearest_frame_vector(box1, i7,aa[0][3], x1, (3*y0+y1)/4);
+ i9=nearest_frame_vector(box1,aa[3][3],aa[0][3],(x0+2*x1)/3,y0-dy/4);
+ i5=nearest_frame_vector(box1, i3, i7, x1+dx/4, (5*y0+3*y1)/8);
+ i4=nearest_frame_vector(box1, i3, i5, x0, (3*y0+y1)/4);
+ i6=nearest_frame_vector(box1, i5, i7, x0, (y0+3*y1)/4);
+
+ MSG(fprintf(stderr,"i1-9 %d %d %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7,i8,i9);)
+
+ // check if vertical line is near to the left side
+ if (box1->frame_vector[i2][0]-x0>dx/2) Break; // ~3
+ i =nearest_frame_vector(box1, aa[0][3], i2, x1+2*dx, (y0+y1)/2);
+ // MSG(fprintf(stderr,"i %d",i);)
+ if (box1->frame_vector[i ][0]
+ -box1->frame_vector[i9][0]>dx/8) Break; // ~3
+
+ if( (box1->dots) ) Break; // Bold-face is gchar
+ if (dy<=box1->m3-box1->m2+1) Break;
+ for(x=0,j=y=2+(3*dy+4)/32;y<=5*dy/8;y++){ // upper cross line min=2
+ i=loop(bp,0,y,dx,cs,0,RI); if( y>dy/4 && i>5*dx/8 ) break;
+ i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;j=y; }
+ if( y<3*dy/4 && y>dy/4
+ && num_cross(0,dx-1,y ,y ,bp,cs) != 1
+ && num_cross(0,dx-1,y+1,y+1,bp,cs) != 1 // against noise
+ ) break;
+ } if( y<=5*dy/8 ) Break; y=j;// if( y>dy/2 || y<dy/8 ) Break;
+ // x is thickest width of vertical line here
+ i=loop(bp,(dx+1)/2,0,dy,cs,0,DO)/2;
+ if( i>dy/8
+ && num_cross( 0, (dx+1)/2,i,i,bp,cs) > 0
+ && num_cross((dx+1)/2,dx-1,i,i,bp,cs) > 0 ) Break; // ~Y
+
+ if (loop(bp,3*dx/4, 0,dy,cs,0,DO)>dy/8
+ && loop(bp,3*dx/4-1,0,dy,cs,0,DO)>dy/8) Break; // upper bow
+ i=3*dy/4; if (box1->m3 && i>=box1->m3) i=box1->m3-1;
+ if (num_cross(0,dx-1,i,i,bp,cs)!=1) Break;
+
+ // the middle bar appear in a wide vertical range, get part below
+ for (i1=dx,i2=y,j=y+1;j<dy-dy/4;j++){
+ i=loop(bp,0,j,dx,cs,0,RI);
+ i=loop(bp,i,j,dx,cs,1,RI); // thickness vert. line
+ if (i<i1) { i1=i; i2=j; if (2*i<=x) break; }
+ } i=i1; j=i2; /* i=dx, j=y below horiz-bar */
+ MSG(fprintf(stderr,"j=%d i=%d y=%d x=%d",j,i,y,x);)
+ // bar should have twice of the thickness of v-line
+ if (x<2*i && x<dx) Break;
+ if (x<i+2+dx/8) ad=97*ad/100; // fat f
+
+ // check for the upper bow to the right top side
+ i3=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y0);
+ MSG(fprintf(stderr,"xy= %d %d %d %d",x0,y0,\
+ box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);)
+ ab[7][0]=box1->frame_vector[i3][0];
+ ab[7][1]=box1->frame_vector[i3][1];
+ ab[7][3]=i3;
+ if (ab[7][1]-y0<=dy/16) ad=95*ad/100; // ~t
+ // because of the dx,dy scaling the horiz. bar could be nearer to (x1,y0)
+ // as the upper right end of the "t"
+ if (aa[3][0]-x0>3*dx/4 && aa[3][1]-y0>3*dy/16) ad=99*ad/100; // ~t
+
+
+ j=loop(bp,0,dy/8,dx,cs,0,RI); // if j>dx/2 we have italic f
+ if ((2*x<dx && j<=dx/2) || 3*x<dx) Break; // bar should be not to small
+ for(i=dy/8;i<dy;i++)
+ if (loop(bp,0,i,dx,cs,0,RI)>(j+dx/4)) break;
+ if (i<dy) Break; // check for v-line
+
+ if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<dx/2 )
+ if( loop(bp,dx-1,dy/2,dx,cs,0,LE)-1
+ <=loop(bp,dx-1, y ,dx,cs,0,LE) )
+ if( loop(bp,dx-1, y-1,dx,cs,0,LE)
+ <=loop(bp,dx-1, y ,dx,cs,0,LE) ) Break; // ~1
+
+ if( loop(bp,0,dy/2,dx,cs,0,RI)-1
+ >loop(bp,0, 1,dx,cs,0,RI) ) Break; // ~X
+
+ i=y;j=1; // j used as flag
+ if( num_cross(0,dx-1,0,0,bp,cs)==1 && hchar) //~r
+ if( num_cross(0,dx-1,dy-1,dy-1,bp,cs)!=1
+ && num_cross(0,dx-1,dy-2,dy-2,bp,cs)!=1 ) Break; // ~* etc.
+ // check for upper bow to right
+ for(y=1;j && y<i; y++) // no @@ pattern
+ if( num_cross(0,dx-1,y ,y ,bp,cs) ==2 ) j=0;
+ if (j==0) { ad=(ad+101)/2; }
+ for(y=1;j && y<i; y++) // no @@ pattern, try to detect it
+ for(x=0;j && x<dx ;x++){ // ..
+ if( (getpixel(bp,x ,y )>=cs || dx<7) && getpixel(bp,x+1,y )>=cs
+ && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs )
+ { j=0;break; }
+ } if(j) ad=98*ad/100; // not detected
+
+ // if( num_hole (x0 , x1 , y0, y1,box1->p,cs,NULL) != 0 ) Break; // ~e
+ if (sdata->holes.num != 0) Break; // ~e
+ for(i1=i2=dx,y=7*dy/8;y<dy;y++){
+ x=loop(bp,0 ,y,dx,cs,0,RI);if(x<i1)i1=x;
+ x=loop(bp,dx-1,y,dx,cs,0,LE);if(x<i2)i2=x;
+ }
+ if(i1>i2+dx/4) Break; // ~t ~e
+ if(i1>i2+1) ad=96*ad/100; // ~t ~e
+ if( loop(bp,0,3*dy/4,dx,cs,0,RI)<i1-dx/4 ) Break;
+ if( dx>5 && !hchar)
+ if( loop(bp,dx-1,dy/2,dx,cs,0,LE)>3*dx/4 )
+ if( loop(bp,dx-1,dy-1,dy,cs,0,UP)<dx/2 ) Break; // ~c
+ if( dx>8 )
+ if( loop(bp, 0,2*dy/3 ,dx,cs,0,RI)>2*dx/3
+ || loop(bp, 0,2*dy/3-1,dx,cs,0,RI)>2*dx/3 )
+ if( loop(bp,dx-1, dy/4 ,dx,cs,0,LE)>2*dx/3 ) Break; // ~5 ~S
+
+ if (!hchar)
+ if ( get_bw(x0+dx/8,x0+dx/8,y0+dy/4,y1-dy/16,box1->p,cs,2) == 0
+ && num_cross(x1-dx/4,x1-dx/4,y0,y1,box1->p,cs)!=2
+ && num_cross(x1-dx/8,x1-dx/8,y0,y1,box1->p,cs)!=2 ) Break; // ~r
+
+ if (dy>15)
+ if( num_cross(x0,x1,y1-dy/4,y1-dy/4,box1->p,cs)>1
+ && num_cross(x0,x1,y0+dy/4,y0+dy/4,box1->p,cs)>1 ) Break; // ~H
+
+ if( dx>4 )
+ if( loop(bp,dx-1 ,3*dy/4,dx,cs,0,LE)-
+ loop(bp,0 ,3*dy/4,dx,cs,0,RI)>dx/5+1
+ && loop(bp,dx-1-dx/8,dy-1 ,dy,cs,0,UP)<dy/4 ) {
+ if( loop(bp,dx-1 ,5*dy/16,dx,cs,0,LE)-
+ loop(bp,0 ,5*dy/16,dx,cs,0,RI)>=dx/5+1) ad=98*ad/100; // ~E
+ i=loop(bp,dx/8,0,dy,cs,0,DO);
+ if (i<dy/8 || i>dy/2) {
+ ad=98*ad/100; // ~E, could also be a "f" with big serifs
+ MSG(fprintf(stderr,"ad=%d",ad);) }
+ if (!gchar) { ad=98*ad/100;
+ MSG(fprintf(stderr,"ad=%d",ad);) }
+ }
+ i = loop(bp,dx-1 ,3*dy/4,dx ,cs,0,LE)/2;
+ if (loop(bp,dx-1-i , dy-1,dy/2,cs,0,UP)<dy/4)
+ if (loop(bp,0 ,3*dy/4,dx ,cs,0,RI)<dx/4) {
+ ad=98*ad/100; // ~E but serif-f
+ MSG(fprintf(stderr,"ad=%d",ad);) }
+
+ if( loop(bp,0,dy/4,dx ,cs,0,RI)>1
+ && loop(bp,0, 0,dy/4,cs,0,DO)<dy/4 ) {
+ ad=95*ad/100; // ~I
+ MSG(fprintf(stderr,"ad=%d",ad);) }
+
+ if (get_bw(x0+dx/16,x1-dx/16,y0,y0,box1->p,cs,2) == 0) { // white pixels?
+ ad=98*ad/100; // F
+ MSG(fprintf(stderr,"ad=%d",ad);) }
+
+ if (!hchar) ad=ad*98/100; // d*=100;d/=128 // not 100% !
+ if (box1->m4>0 && gchar && ad<99 &&
+ 8*box1->y1 >= box1->m4*7+box1->m3) ad++;
+ Setac(box1,'f',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_bB(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test B ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>4;){ // min 3x4
+ DBG( wchar_t c_ask='B'; )
+ if (sdata->holes.num < 2) Break; /* tolerant against a tiny hole */
+ for(i=1,y=y0;y<y1-dy/2 && i;y++)
+ if( get_bw(x0,x0+dx/2, y , y ,box1->p,cs,1) != 1 ) i=0;
+ if( !i ) Break;
+ for(i=1,y=y1-dy/2;y<y1 && i;y++)
+ if( get_bw(x0,x0+dx/3, y , y ,box1->p,cs,1) != 1 ) i=0;
+ if( !i ) Break;
+ if( get_bw(x1,x1 , y0 , y0 ,box1->p,cs,1) == 1 ) Break;
+ if( num_cross(x0+dx/2, x0+dx/2,y0,y1 ,box1->p,cs) != 3 )
+ if( num_cross(x1-dx/3, x1-dx/3,y0,y1 ,box1->p,cs) != 3 ) Break;
+ /* --- detect center of lower hole --- */
+ y = loop(box1->p,x0+dx/2,y1 ,dy,cs,0,UP); if (y>1+dy/8) Break;
+ y+= loop(box1->p,x0+dx/2,y1-y,dy,cs,1,UP); if (y>dy/3) Break;
+ y=y1-y-loop(box1->p,x0+dx/2,y1-y,dy,cs,0,UP)/2; if (y<y0+3*dy/8) Break;
+ if (y<y0+dy/2) ad=96*ad/100;
+ if( num_cross(0,dx-1,y-y0 ,y-y0 ,bp,cs) != 2 )
+ if( num_cross(0,dx-1,y-y0+1,y-y0+1,bp,cs) != 2 ) Break;
+ if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
+ if( num_cross(0,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 )
+ if( num_cross(0,dx-1, dy/4-1, dy/4-1,bp,cs) != 2 ) Break;
+ for( y=dy/4;y<3*dy/4;y++ ) if( num_cross(0,dx-1,y,y,bp,cs)==1 ) break;
+ if( y==3*dy/4 ) Break;
+
+ if( loop(box1->p,x0,y0+ y ,dx,cs,0,RI)
+ > loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)+dx/32 )
+ if( get_bw(x0,x0,y0,y0,box1->p,cs,1) == 0 )
+ if( get_bw(x0,x0,y1,y1,box1->p,cs,1) == 0 ) Break; // ~8
+ i1=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI);
+ i2=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI);
+ i =loop(box1->p,x0,y0+dy/2-dy/ 8,dx,cs,0,RI); if(i>i2) i2=i;
+ i =loop(box1->p,x0,y0+dy/2-dy/16,dx,cs,0,RI); if(i>i2) i2=i;
+ i3=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI);
+ if(dy>16 && i3<i2 && i1+i3<2*i2){
+ if (i3+i1<2*i2-dx/16) ad=98*ad/100; // ~8
+ if (i3+i1<2*i2-dx/8 ) ad=96*ad/100;
+ if( loop(box1->p,x0,y0+ 1 ,dx,cs,0,RI)
+ >= loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 )
+ if( loop(box1->p,x0,y0+ 0 ,dx,cs,0,RI)
+ > loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 )
+ if( loop(box1->p,x0,y1- 0 ,dx,cs,0,RI)
+ > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 )
+ if( loop(box1->p,x0,y1- 1 ,dx,cs,0,RI)
+ > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 ) Break; // ~8 Aug00
+ }
+
+ if (sdata->holes.num != 2) Break;
+ if (sdata->holes.hole[0].y0 < y-1
+ && sdata->holes.hole[1].y0 < y-1 ) Break;
+ if (sdata->holes.hole[0].y1 > y+1
+ && sdata->holes.hole[1].y1 > y+1 ) Break;
+ // if( num_hole(0,dx-1,0 ,y+1 ,bp,cs,NULL) != 1 ) Break;
+ // if( num_hole(0,dx-1,y-1,dy-1,bp,cs,NULL) != 1 ) Break;
+ // out_x(box1);
+
+ for( x=dx,y=dy/6; y<dy-dy/8; y++ ) // left border straight
+ { i=loop(box1->p,x0,y0+y,dx,cs,0,RI); if( i>x+dx/9 ) break;
+ if(i<x) x=i;
+ } if( y<dy-dy/8 ) Break; // ~8 bad_a
+
+ for( x=dx,y=1;y<dy/4;y++ ) // right border straight
+ { i=loop(bp,dx-1,dy-y,dx,cs,0,LE);
+ if( i<x ) x=i; else if( i>x )break;
+ } if( y<dy/4 ) Break; // ~ff (serifen?)
+
+ x=loop(bp,0,dy/2 ,dx,cs,0,RI);
+ i=loop(bp,0,dy/2-1,dx,cs,0,RI); if (i>x) x=i; // allow dust
+ i=loop(bp,0,dy/2+1,dx,cs,0,RI); if (i>x) x=i;
+ if ( loop(bp,0, dy/8,dx,cs,0,RI)
+ +loop(bp,0,7*dy/8,dx,cs,0,RI) > 2*x+1 ) Break; // not konvex!
+
+ if(!hchar){ // ~ fat_a
+ ad=99*ad/100;
+ x =loop(bp,0,dy/4,dx,cs,0,RI);
+ if(loop(bp,0,dy/2,dx,cs,0,RI)>x+dx/8) ad=97*ad/100;
+ }
+
+ if ( (!hchar) && (dx<=10 || dy<=10) ) ad=97*ad/100; // hchar or good_quality
+ if (gchar) ad=99*ad/100;
+ Setac(box1,'B',ad);
+ break;
+ }
+ // --- test b ---------------------------------------------------
+ for(ad=d=100;dx>3 && dy>4;){ // min 3x4
+ DBG( wchar_t c_ask='b'; )
+ if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
+ for(y=y0;y<y1;y++)
+ if( get_bw(x0 , x0+dx/2, y , y ,box1->p,cs,1) != 1 ) Break;
+ if(y<y1-dy/32-1) Break;
+ if( get_bw(x0+ dx/2, x0+dx/2, y1-dy/3, y1 ,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1- dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1- dx/3, x1 , y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x1-4*dx/9, x1 , y0+dy/5, y0+dy/5,box1->p,cs,1) == 1 ) Break;
+ if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 1 ) // &
+ if( num_cross(x0,x1,y0+dy/4-1,y0+dy/4-1,box1->p,cs) > 1 )
+ if( dy<16 ||
+ num_cross(x0,x1,y0+dy/5 ,y0+dy/5 ,box1->p,cs) > 1 ) Break; // fat b
+ for(i=j=0,y=dy/2;y<dy-dy/8;y++)
+ if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i++; else j++;
+ if( i<2*j ) Break; // v024a4
+ if (sdata->holes.num != 1) Break;
+ if (sdata->holes.hole[0].y0 < dy/4) Break;
+ if ((sdata->holes.hole[0].y1-sdata->holes.hole[0].y0+1)
+ *(sdata->holes.hole[0].x1-sdata->holes.hole[0].x0+1)*16
+ < dx*dy) ad=90*ad/100; // hole to small
+ if( num_hole( x0, x1 , y0+dy/4, y1,box1->p,cs,NULL) != 1 ) Break;
+ i=loop(bp,dx-1,dy-1 ,dx,cs,0,LE);
+ j=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(j>i) Break;
+ if (!hchar) ad=99*ad/100;
+ if ( gchar) ad=99*ad/100;
+ Setac(box1,'b',ad);
+ if (ad>=100) return 'b';
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_dD(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,d,x,y,ya,yb,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test D ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='D'; )
+ if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
+ if( get_bw(x0 ,x0+dx/3,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1-dx/3,x1 ,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1 ,x1 ,y0 ,y0+dy/16,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x1-dx/2,x1 ,y0+dy/4,y0+dy/4 ,box1->p,cs,1) != 1 ) Break;
+ if( num_cross(x0+dx/2,x0+dx/2,y0 ,y1 ,box1->p,cs) != 2 )
+ if( num_cross(x1-dx/3,x1-dx/3,y0 ,y1 ,box1->p,cs) != 2 ) Break;
+ if( num_cross(x0 ,x1 ,y0+dy/3,y0+dy/3,box1->p,cs) != 2 ) Break;
+ if( num_cross(x0 ,x1 ,y1-dy/3,y1-dy/3,box1->p,cs) != 2 ) Break;
+ if (sdata->holes.num != 1) Break;
+ if (sdata->holes.hole[0].y0 > dy/3) Break;
+ if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
+ // if( num_hole (x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break;
+ // test if left edge is straight
+ for(x=0,y=bp->y-1-dy/8;y>=dy/5;y--){
+ i=loop(bp,0,y,x1-x0,cs,0,RI);
+ if( i+2+dx/16<=x ) break;
+ if( i>x ) x=i;
+ }
+ if (y>=dy/5 ) Break;
+ /* test if right edge is falling */
+ for(x=dx,y=0;y<dy/3;y++){
+ i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
+ if( i>x+dx/16 ) break;
+ if( i<x ) x=i;
+ }
+ if (y<dy/3 ) Break;
+ /* test if right edge is raising */
+ for(x=dx,y=bp->y-1;y>2*dy/3;y--){
+ i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
+ if( i>x+dx/16 ) break;
+ if( i<x ) x=i;
+ }
+ if (y>2*dy/3 ) Break;
+ if( loop(bp,dx-1,dy-1 ,dx,cs,0,LE) <=
+ loop(bp,dx-1,dy-2-dy/16,dx,cs,0,LE) ) Break; // P
+
+ y=loop(bp,dx/2,dy-1,dy,cs,0,UP)-1; if (dy>16) y/=2;
+ if ( y>=dy/16 ) { y-=dy/16;
+ if (get_bw(dx/2,dx-1,dy-1-y,dy-1-y,bp,cs,1)==1) Break; // ~A
+ }
+
+ ya=loop(bp, 0,dy-1,dy,cs,0,UP);
+ yb=loop(bp,dx/16+1,dy-1,dy,cs,0,UP);
+ if( ya<dy/2 && ya>dy/16 && ya>yb ) Break; // ~O
+
+ if ( loop(bp, dx/2, 0,dy,cs,0,DO)
+ -loop(bp, dx/2,dy-1,dy,cs,0,UP) > dy/8 ) ad=97*ad/100; // ~b
+
+
+
+ if (loop(bp, 0, 0,dx,cs,0,RI)>=dx/2
+ && loop(bp,dx-1,dy-1,dx,cs,0,LE)>=dx/2
+ && loop(bp, 0,dy/2,dx,cs,0,RI)< 2 ) ad=96*ad/100; // thin O
+
+ if(box1->dots) ad=ad*94/100;
+ if ( gchar) ad=99*ad/100;
+ if (!hchar) ad=99*ad/100;
+ Setac(box1,'D',ad);
+ break;
+ }
+ // --- test d ---------------------------------------------------
+ for(d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='d'; )
+ ad=100;
+ if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */
+ if( get_bw(x0 , x0+dx/2, y1-dy/6, y1-dy/9,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1-dx/4, x1 , y0+dy/8, y0+dy/8,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2, x0+dx/2, y1-dy/4, y1 ,box1->p,cs,1) != 1 ) Break;
+ if(dy>19)
+ if( get_bw(x0 , x0+dx/3, y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x0 , x0+dx/3, y0 , y0+dy/6,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x0 , x0+dx/4, y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2-1,x0+dx/2,y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break; // ~"A
+ if( loop(bp,bp->x-1, bp->y/4,x1-x0,cs,0,LE) >
+ loop(bp,bp->x-1,3*bp->y/4,x1-x0,cs,0,LE)+1 ) Break;
+ for(i=dx/8+1,x=0;x<dx && i;x++){
+ if( num_cross(x ,x ,0 ,dy-1, bp,cs) == 2 ) i--;
+ } if( i ) Break;
+ for(i=dy/6+1,y=dy/4;y<dy && i;y++){
+ if( num_cross(0 ,dx-1,y ,y , bp,cs) == 2 ) i--;
+ if( num_cross(0 ,dx-1,y ,y , bp,cs) > 3 ) i++; // ~al
+ } if( i ) ad=98*ad/100;
+ for(i=dy/8+1,y=0;y<dy/2 && i;y++){
+ if( num_cross(0 ,dx-1,y ,y , bp,cs) == 1 )
+ if( num_cross(dx/2,dx-1,y ,y , bp,cs) == 1 ) i--;
+ } if( i ) Break;
+ if (sdata->holes.num<1) Break;
+ if (sdata->holes.num>1) {
+ if (dx<6) Break; ad=95*ad/100; } // glued j above 8 (4x6 sample)
+ MSG(fprintf(stderr,"hole[0].y0,y1= %d %d",sdata->holes.hole[0].y0,sdata->holes.hole[0].y1););
+ if ( sdata->holes.hole[0].y0 < dy/4 ) Break;
+ if (dy-sdata->holes.hole[0].y1 > dy/4+1) Break; // glued et
+ // if( num_hole(x0 , x1 , y0+dy/4 , y1 ,box1->p,cs,NULL) !=1 ) Break;
+ if( num_cross(0 ,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs) != 2 ) { // glued al
+ if (dy>15) { Break; } else ad=96*ad/100;
+ }
+ if (!hchar) ad=98*ad/100;
+ if ( gchar) ad=99*ad/100;
+ Setac(box1,'d',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_F(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test F ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx
+ DBG( wchar_t c_ask='F'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if( get_bw(x0+dx/2,x0+dx/2,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0,x0+dx/4,y1-dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0,x0+dx/2,y0+dy/4,y0+dy/4,box1->p,cs,1) != 1 ) Break;
+
+ for (x=0,y=0;y<dy/4;y++) {
+ j=loop(bp,dx-1,dy-1-y,dx,cs,0,LE); if(j<3 || 3*j<dx) break; // ~f Jun00
+ if (j>x) x=j;
+ } if (y<dy/4 || x<dx/2) Break;
+
+ for( i=1,y=0; y<dy/4 && i; y++ ){ // long black line
+ j=loop(bp,0,y,dx,cs,0,RI);
+ j=loop(bp,j,y,dx,cs,1,RI); if( j>dx/2 ) i=0; }
+ if( i ) Break;
+
+ x=loop(bp,0,dy-1-dy/4,dx,cs,0,RI);
+ x=loop(bp,x,dy-1-dy/4,dx,cs,1,RI); // strichdicke
+ for( i=1,y=dy/3; y<dy-1-dy/3 && i; y++ ) // black line
+ { j=loop(bp,0,y,dx,cs,0,RI);
+ j=loop(bp,j,y,dx,cs,1,RI); if( j>dx/3 && ((j>2*x && dx>8) || j>x+1)) i=0; }
+ if( i ) Break;
+
+ y=dy/8; if (y<1) y=1;
+ for( i=1; y<dy-1-dy/2; y++ ){ // search horizontal white gap
+ x =loop(bp,dx-1,y,dx,cs,0,LE); if(x<2) continue; // skip serifs
+ j =loop(bp,dx-x,y,dy/4,cs,0,UP);
+ x+=loop(bp,dx-x,y-j+1,dx,cs,0,LE); if (x>=dx/3) { i=0; break; }
+ }
+ if( i ) Break;
+
+ // check for vertical line on left side
+ for(i=1,y=1;y<=dy/2 && i;y++)
+ if( get_bw(0,dx/2,y,y,bp,cs,1) != 1 ) i=0;
+ if( !i ) Break;
+
+ for(i=1,y=dy/2;y<dy && i;y++)
+ if( get_bw(0,dx/3,y,y,bp,cs,1) != 1 ) i=0;
+ if( !i ) Break;
+
+ i=loop(bp,dx-1,dy-1,dx,cs,0,LE); // serif or E ?
+ if (i<=dx/3) {
+ if (loop(bp,dx-1,(dy+4)/8,dx,cs,0,LE)>dx/8 // no serif
+ || loop(bp, 0, dy-3,dx,cs,0,RI)<1) break;
+ ad=99*ad/100;
+ }
+ if( get_bw(dx-1-dx/4,dx-1,dy-1-dy/4,dy-1,bp,cs,1) == 1 ) Break; // ~E
+ if( get_bw(dx-1 ,dx-1,0 ,dy/3,bp,cs,1) != 1 ) Break;
+
+ if( loop(bp,0, bp->y/4,dx,cs,0,RI) <
+ loop(bp,0,3*bp->y/4,dx,cs,0,RI)-1 ) Break;
+ // if( num_hole(x0 , x1 , y0 , y1 ,box1->p,cs,NULL) >0 ) Break;
+ if (sdata->holes.num > 0) Break;
+ for(i=0,x=dx/4;x<dx-1;x++)
+ if( num_cross(x,x,0,dy-2,bp,cs) == 2 ) i++;
+ if ( i<1 ) Break; // 0.2.4a4
+
+ if(dy<20) /* special case of small fi, not very elegant */
+ if( get_bw( 1, 1,1,1,bp,cs,1) == 1
+ && get_bw( 0, 0,2,2,bp,cs,1) == 1
+ && get_bw(dx-2,dx-1,0,0,bp,cs,1) == 0
+ && get_bw( 0, 1,0,0,bp,cs,1) == 0
+ && get_bw( 0, 0,0,1,bp,cs,1) == 0 ) Break;
+
+ // check for screen font f
+ i= loop(bp,0,3*bp->y/4,dx,cs,0,RI)-1;
+ if (i>=0 && loop(bp,dy-1,i,dy,cs,0,UP)<=3*dy/4 ) ad=ad*98/100;
+
+ // check for screen font P
+ i= loop(bp,bp->x-1,bp->y/4,dx,cs,0,LE);
+ if (i<1) {
+ j=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE);
+ j= loop(bp,bp->x-1-j,bp->y/4,3*dy/4,cs,0,DO);
+ if (j<=dy/2) {
+ i=loop(bp,bp->x-1,0,dx,cs,0,LE);
+ ad=ad*98/100;
+ if (i>dx/8) Break;
+ if (i) ad=98*ad/100;
+ }
+ }
+
+ if (!hchar) if ((box1->m2-box1->y0)*8>=dy) { // ignore bad m1..4
+ if ( num_cross(2*dx/3,2*dx/3,0,dy-1,bp,cs) < 2 ) ad=90*ad/100; // ~r
+ }
+ if (gchar) ad=99*ad/100;
+ Setac(box1,'F',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_uU(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+ wchar_t bc=UNKNOWN;
+
+ // --- test uU ---------------------------------------------------
+ // in Mitte so breit wie oben (bei V kontinuierlich schmaler)
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='u'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ for(y=y0+dy/4;y<y1-dy/4;y++) /* also handwritten u */
+ if( num_cross(x0,x1,y,y,box1->p,cs) < 2 ) break;
+ if( y<y1-dy/4 ) Break;
+ if( get_bw(dx/2,dx/2,dy/2,dy-1,bp,cs,1)==0 ) Break;
+ if( get_bw(dx/2,dx-1,dy/2,dy/2,bp,cs,1)==0 ) Break;
+ for(i=0,x=3*dx/8;x<dx-dx/4;x++){
+ y=loop(bp,x,0,dy,cs,0,DO); if(y>i)i=y; if(y<i && i>1) break;
+ } if( i<dy/4 ) Break; x--;
+ if( get_bw(0,x ,i-1,i-1,bp,cs,1)==0 ) Break;
+ if( get_bw(x,dx-1,i-1,i-1,bp,cs,1)==0 ) Break;
+
+ for(i=dy/8+2,y=dy/8;y<dy-(dy+2)/4 && i;y++){ // 12%+1 Fehler
+ j=num_cross(0,dx/2-((y>dy/2)?dx/8:0),y,y,bp,cs);
+ if( y<dy/2 && num_cross(dx/2,dx-1,y,y,bp,cs)>1 ) i--; // ~{\it v}
+ if( y<dy/2 && (j<1 && j>2) ) { i--; ad=90*ad/100; }
+ if( y>dy/2 && j!=1 ) { i--; ad=95*ad/100; }
+ } if( !i ) Break;
+ for(i=dy/16+1,y=dy/8;y<dy-dy/4 && i;y++){ // 12%+1 Fehler
+ j=num_cross(dx-dx/2,dx-1,y,y,bp,cs);
+ if( y>dy/2 && (j<1 && j>2) ) i--;
+ if( y<dy/2 && j!=1 ) i--;
+ } if( !i ) Break;
+ for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
+ if( get_bw( x, x, y0, y0+dy/3,box1->p,cs,1) != 1 ) i=0;
+ } if( i ) Break;
+ for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
+ if( get_bw( x, x,y0+dy/3,y1-dy/3,box1->p,cs,3) != 2 ) i--;
+ } if( !i ) Break;
+ for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
+ if( get_bw( x, x,y1-dy/2,y1,box1->p,cs,3) == 2 ) i=0;
+ if( get_bw( x, x,y1-dy/3,y1,box1->p,cs,3) == 2 ) ad=98*ad/100;
+ } if( !i ) Break;
+ if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2
+ && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==1 ) Break; // ~{\it v}
+
+ i=loop(bp,0,dy-1-dy/16,dx,cs,0,RI);
+ j=loop(bp,0,dy-1-dy/8 ,dx,cs,0,RI);
+ if( i<j ) Break; // ~ll v0.2.4a3
+ if(dy>15)
+ if( loop(bp,dx-1,dy/16,dx,cs,0,LE)
+ > loop(bp,dx-1,dy/8 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad 0 (thinn)
+ if( hchar && dy>7)
+ if( loop(bp, 0, dy-1,dx,cs,1,RI)==dx
+ && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/16
+ && loop(bp, 0,3*dy/4,dx,cs,0,RI)>dx/16
+ && loop(bp,dx-1, dy/2,dx,cs,0,LE)>dx/16
+ && loop(bp, 0, dy/2,dx,cs,0,RI)>dx/16
+ ) Break; // melted ll
+
+ i=loop(bp, 0,dy-2-dy/8,dx,cs,0,RI);
+ j=loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE);
+ if ( i>dx/4 && j>dx/4 && i+j>=dx/2) Break; // v
+ if (i+j>=dx/2) ad=97*ad/100;
+
+ if ( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=2 ) ad=96*ad/100; // w
+ if ( loop(bp,dx/2,dy-1,dy,cs,0,UP)>0 ) ad=98*ad/100; // w
+
+ if (ad==100) ad=99; // ToDo: only if lines.wt<100
+ bc='u';
+ if (gchar) ad=98*ad/100;
+ if (hchar) bc='U';
+ if (box1->dots>0) ad=99*ad/100;
+ Setac(box1,bc,ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_micro(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,i2,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test \mu µ MICRO_SIGN --------------------------------------
+ // in Mitte so breit wie oben (bei V kontinuierlich schmaler)
+ if( gchar && !hchar )
+ for(ad=d=100;dx>2 && dy>4;){ // min 3x4
+ DBG( wchar_t c_ask='u'; )
+ if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */
+ for(y=y0+dy/8;y<box1->m3-dy/4;y++)
+ if( num_cross(x0,x1,y,y,box1->p,cs) < 2 ) break;
+ if( y<box1->m3-dy/4 ) break;
+ if( get_bw(dx/2,dx/2,3*dy/8,7*dy/8,bp,cs,1)==0 ) break;
+ if( get_bw(dx/2,dx-1,3*dy/8,7*dy/8,bp,cs,1)==0 ) break;
+ for(y=dy/2;y<dy;y++){
+ x=loop(bp,dx-1,y,dx,cs,0,LE); if(8*x>5*dx) break;
+ } if( y>=dy || 2*y>box1->m3+box1->m4) break; i2=y;
+ for(i=0,x=2*dx/8;x<dx-1-dx/4;x++){
+ y=loop(bp,x,0,dy,cs,0,DO); if(y>i)i=y; if(y<i && i>1) break;
+ } if( i<dy/4 ) break; x--;
+ if( get_bw(0,x ,i-1,i-1,bp,cs,1)==0 ) break;
+ if( get_bw(x,dx-1,i-1,i-1,bp,cs,1)==0 ) break;
+ for(i=dy/16+1,y=dy/8;y<dy-(box1->m4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler
+ j=num_cross(0,dx/2,y,y,bp,cs);
+ if( y<dy/2 && num_cross(dx/2,dx-1,y,y,bp,cs)>1 ) i--; // ~{\it v}
+ if( y<dy/2 && (j<1 && j>2) ) i--;
+ if( y>dy/2 && j!=1 ) i--;
+ } if( !i ) break;
+ for(i=dy/16+1,y=dy/8;y<dy-(box1->m4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler
+ j=num_cross(dx-dx/2,dx-1,y,y,bp,cs);
+ if( y>dy/2 && (j<1 && j>2) ) i--;
+ if( y<dy/2 && j!=1 ) i--;
+ } if( !i ) break;
+ for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
+ if( get_bw( x, x, y0, y0+dy/4,box1->p,cs,1) != 1 ) i=0;
+ } if( i ) break;
+ for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){
+ if( get_bw( x, x,y0+dy/4,y1-dy/2,box1->p,cs,3) != 2 ) i--;
+ } if( !i ) break;
+ if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)!=1 ) break;
+ if( num_cross(dx-dx/2,dx-1,dy-dy/2,dy-dy/2,bp,cs)!=1 ) break;
+ if( get_bw( (dx+2)/4,dx-1,dy-2-3*dy/16,dy-1,bp,cs,1) == 1 ) break;
+ if( num_cross(0,dx/4,dy-1,dy-1,bp,cs)!=1 ) break;
+
+ Setac(box1,MICRO_SIGN,ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_vV(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+ wchar_t bc=UNKNOWN;
+
+ // --- test v -------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='v'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ x=loop(bp,dx/2,0,dx,cs,1,RI)+dx/2; // be sure in the upper gap
+ y=loop(bp, x,0,(dy+1)/2,cs,0,DO)-1; // (x,y) should be in the gap
+ if (x>3*dx/4 || y<dy/4) Break;
+ if( get_bw(x0,x0+x,y0+y,y0+y,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+x,x1,y0+y,y0+y,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+x,x0+x,y1-dy/2,y1, box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+x, x0+x ,y0, y0+dy/3,box1->p,cs,1) == 1 ) // it v?
+ if( get_bw(x0+x+1,x0+x+1,y0, y0+dy/3,box1->p,cs,1) == 1 ) Break;
+
+ // UVW
+ if(((num_cross( 0,dx/2+1,dy/ 8,dy/ 8,bp,cs)!=1)
+ && (num_cross( 0,dx/2+1,dy/16,dy/16,bp,cs)!=1) // it v
+ && (num_cross(dx/2+1,dx -1,dy/ 8,dy/ 8,bp,cs)!=1)) /* () added on Sep00 */
+ || ((num_cross( 0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs)> 1)
+ && (num_cross( 0,dx-1,dy-1 ,dy-1 ,bp,cs)> 1)) ) Break;
+ // UV
+ if( get_bw(0 ,dx/8,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break;
+ if( get_bw(dx-1-dx/8,dx-1,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break;
+ if( loop(bp,0 ,dy/6 ,dx,cs,0,RI)
+ >=loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) && dy>6 ) Break;
+ if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI)
+ >loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI)
+ && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE)
+ >loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) Break; // better OR ?
+ if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI)
+ >=loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI)
+ && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE)
+ >=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) ad=99*ad/100; // font21
+ if( loop(bp,dx-1,dy/6 ,dx,cs,0,LE)
+ >=loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) && dy>6 ) Break;
+ x=loop(bp,0,dy-1,dx,cs,0,RI); // 3*x>dx changed to 2*x>dx May2001 JS
+ x=loop(bp,x,dy-1,dx,cs,1,RI); if ( dx>14 && 2*x>dx ) Break; // U
+ if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2
+ && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==2 ) Break; // ~{\it u}
+
+#if 0
+ // measure thickness of lower v
+ i=loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)
+ +loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE);
+ j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI)
+ +loop(bp,dx-1,dy-1-dy/4 ,dx,cs,0,LE);
+ if( box1->m1 && hchar && dy>15 && j>=i-dx/32 ) Break; // ~Y
+#endif
+ /* V has serifs only on upper site! Y also on bottom, check it. Okt00 */
+ i=loop(bp, 0, 0,dx,cs,0,RI);
+ i=loop(bp, i, 0,dx,cs,1,RI); i1=i; // thickness
+ i=loop(bp, 0, 1,dx,cs,0,RI);
+ i=loop(bp, i, 1,dx,cs,1,RI); if(i>i1) i1=i; // thiggest
+ i=loop(bp, 0,dy/4,dx,cs,0,RI);
+ i=loop(bp, i,dy/4,dx,cs,1,RI); i2=i;
+ i=loop(bp, 0,dy ,dx,cs,0,RI);
+ i=loop(bp, i,dy ,dx,cs,1,RI); i3=i; // thickness
+ i=loop(bp, 0,dy-1,dx,cs,0,RI);
+ i=loop(bp, i,dy-1,dx,cs,1,RI); if(i>i3) i3=i; // thiggest
+ if( y0 < box1->m2 )
+ if( i1-i2 > dx/32+2
+ && i3-i2 > dx/32+2 ) Break; // ~serif_Y
+
+ if( y0 < box1->m2 ) // uppercase V ?
+ if( i1-i2 < dx/32+2 ) /* no serif detected */
+ if( num_cross(0,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs)==1 ){
+ j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI);
+ j=loop(bp, j,dy-1-dy/4 ,dx,cs,1,RI);
+ if (j<i2+1) Break; // ~Y
+ if (j<=i2+1) ad=99*ad/100; // ~Y
+ }
+
+ ad=99*ad/100; // be carefull (remove later)
+
+ if( loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI)
+ >loop(bp,0 ,dy-1 ,dx,cs,0,RI) ) ad=96*ad/100;
+
+ if (gchar) ad=99*ad/100;
+ bc='v';
+ if( hchar ) bc='V';
+ Setac(box1, bc, ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_rR(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test r -------
+ for(ad=d=100;dy>3 && dx>1;){ // dy>dx, 4x6 font, dx=2 smallest prop-font
+ DBG( wchar_t c_ask='r'; )
+ if (sdata->holes.num > 0
+ && ( sdata->holes.hole[0].y1 > dy/2 // tiny hole in upper left
+ || sdata->holes.hole[0].x1 > dx/2 ) // is tolerated, ~Pp
+ ) Break; /* tolerant against a tiny hole */
+ if( 2*dy<box1->m3-box1->m1) Break;
+
+ if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8 ) Break;
+ x= loop(bp,dx-1,dy/2,dx,cs,0,LE); if (x<=dx/2) ad=99*ad/100; // ~t
+ if (loop(bp,dx-1-x/2,0,dy,cs,0,DO)>dy/8) ad=99*ad/100; // ~t
+ if( dx>4 )
+ if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8+2 ) Break; // ~v Jun00
+
+ i=dy-(dy+20)/32; // ignore dust on the ground
+
+ for( y=4*dy/8; y<i; y++ ){ // center down v-line
+ if( y<dy-2*dy/8 && num_cross(0,dx-1,y,y,bp,cs) !=1 ) break;
+ i1= loop(bp,0 ,y,dx,cs,0,RI); if(i1>3*dx/8) break;
+ i2= loop(bp,dx-1,y,dx,cs,0,LE); if(i1>i2) break;
+ if( (i1+(dx-i2
+ -1))/2 >= 4*dx/8 ) break; // mass middle should be left
+ }
+ if (y<i) Break;
+
+ for( x=4*dx/8; x<dx-dx/8; x++ ){ // right upper h-line
+ if( get_bw(x,x,0,(dy+2)/4,bp,cs,1) !=1 ) break; }
+ if (x<dx-dx/8) Break;
+
+ if( loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)>5*dx/8 // not a C
+ && get_bw(dx-1-dx/8,dx-1,dy-1-dy/4,dy-1,bp,cs,1) ==1 ) Break;
+
+ if( loop(bp, 0,5*dy/8,dx,cs,0,RI)<=dx/8
+ && loop(bp,dx-1,5*dy/8,dx,cs,0,LE)>=5*dy/8
+ && loop(bp,dx/2, dy-1,dy,cs,0,UP)<=dy/8 ) Break; // ~c
+
+ if( loop(bp, 0,3*dy/8,dx,cs,0,RI)
+ > loop(bp,dx-1,3*dy/8,dx,cs,0,LE)+dx/8 ) {
+ if( loop(bp, 0, dy/8,dx,cs,0,RI)<dx/8 ) Break; // ~z (broken)
+ ad=98*ad/100;
+ }
+
+ if( loop(bp,0,dy/3,dx,cs,0,RI)>3*dx/4 ) Break; // ~i
+ if( loop(bp,0,dy/4,dx,cs,0,RI)>3*dx/8 // ~I
+ && get_bw(0,dx/8,0,dy/4,bp,cs,1) ==1 ) Break;
+ if( num_cross(0,dx-1,dy/2, dy/2 ,bp,cs)!=1
+ && num_cross(0,dx-1,dy/2+1,dy/2+1,bp,cs)!=1 ) Break; // ~n 024a3
+
+ // itallic t is sometimes not high enough, look for v-like shape
+ for(y=3*dy/4;y<dy-1;y++)
+ if( num_cross(0,dx-1,y, y ,bp,cs)==2
+ && num_cross(0,dx-1,y+1+dy/32,y+1+dy/32,bp,cs)==2 ) break; // ~t
+ if(y<dy-1) Break;
+ if (loop(bp,dx-1-dx/4,dy-1,dx,cs,0,UP)<dy/4) ad=98*ad/100; // ~f (serif)
+ if( num_cross(dx-1,dx-1,0,3*dy/4,bp,cs)>1 ) ad=95*ad/100; // ~f
+ if( num_cross(dx/2 ,dx/2 ,0,dy-1,bp,cs)>2
+ && num_cross(dx/2+1,dx/2+1,0,dy-1,bp,cs)>2 ) Break; // ~f
+
+ if (box1->dots) ad=98*ad/100; /* could be modified latin2-r */
+ if (hchar) ad=96*ad/100;
+ if (gchar) ad=97*ad/100;
+ Setac(box1,'r',ad);
+ break; // not 100% sure!
+ }
+ // --- test R ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='R'; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ if( num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 2 ) Break; // ~P
+ if (loop(bp, dx/2, dy/4,dy,cs,0,DO)>dy/2) Break; // ~C
+ if (loop(bp, dx/2, 0,dy,cs,0,DO)>dy/8
+ && loop(bp, dx/2,dy/16,dx,cs,0,RI)<dx/2
+ && dy>=16 ) Break;
+ for(i=1,y=y0+dy/8;y<=y1-dy/8 && i;y++){ // left v-line
+ if( get_bw(x0 , x0+dx/2,y, y,box1->p,cs,1) != 1 ) i=0;
+ } if( !i ) Break;
+ for(i=1,x=x0+3*dx/8;x<=x1-dx/4 && i;x++){ // upper h-line
+ if( get_bw( x, x, y0, y0+dy/4,box1->p,cs,1) != 1 ) i=0;
+ } if( !i ) Break;
+ for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap
+ i=loop(box1->p,x,y1,dy,cs,0,UP);
+ /* on small chars bypass possible low left serifs */
+ if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP);
+ if (i2>1) i+=i2-1; }
+ if (i>y) { y=i; i1=x; }
+ } if( y<=dy/8 ) Break; if (y<dy/4) ad=80*ad/100;
+ for(i=1,x=x0+dx/3;x<=x1-dx/8 && i;x++){ // vert crossed 2 ???
+ if( num_cross(x,x,y0,y1, box1->p,cs) == 2 ) i=0;
+ } if( i ) Break;
+ for(i=1,y=y0;y<=y0+3*dy/8 && i;y++){ // upper 2 vert lines
+ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
+ } if( i ) Break;
+ for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){ // midle h line
+ if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0;
+ } if( i ) ad=95*ad/100; /* sometimes there is a small gap */
+ for(i=1,y=y1-dy/4;y<=y1 && i;y++){ // lower 2 vert lies
+ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
+ } if( i ) Break;
+ if( get_bw(x1-dx/3,x1,y0,y0+dy/4,box1->p,cs,1) != 1 ) Break; // pixel ru
+ x=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(x>dx/2) Break; i=x; // ru
+ x=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(x<=i ) Break; i=x; // rc
+ x=loop(bp,dx-1, 5*dy/8,dx,cs,0,LE); if(x>i ) i=x;
+ x=loop(bp,dx-1, 6*dy/8,dx,cs,0,LE); if(x>i ) i=x;
+ x=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(x>=i ) Break; // rd
+
+ i1=loop(bp,0, dy/4,dx,cs,0,RI); // straight
+ i2=loop(bp,0, dy/2,dx,cs,0,RI);
+ i3=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); if( abs(i1+i3-2*i2)>1+dx/16 ) Break;
+ if (dy>15)
+ if (loop(bp,dx-1, dy/2,dx,cs,0,LE)>=loop(bp,dx-1, dy-1,dx,cs,0,LE)
+ && loop(bp,dx-1,3*dy/16,dx,cs,0,LE)>=loop(bp,dx-1,dy/16,dx,cs,0,LE)+dx/8 ) Break; // ~ff
+ if (dy>7)
+ if (loop(bp,dx-1,dy-2 ,dx,cs,0,LE)
+ >loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)) {
+ ad=98*ad/100;
+ if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)==0
+ && loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)>0 ) Break; // broken B ??
+ }
+ j=sdata->holes.num;
+ if (j != 1) {
+ i=num_hole (x0,x1,y0,y1-dy/3,box1->p,cs,NULL);
+ // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL);
+ if (i==0) ad=90*ad/100; /* some times there is a small gap */
+ if (j>1 || j>i) Break;
+ }
+ if (sdata->holes.num < 1) ad=90*ad/100;
+ if (sdata->holes.num==1)
+ if (sdata->holes.hole[0].y1 > 3*dy/4) ad=95*ad/100; // alpha
+
+ if (!hchar) ad=98*ad/100;
+ if ( gchar) ad=98*ad/100;
+ Setac(box1,'R',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_m(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
+ handwritten=0,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test m -------
+ for(ad=d=100;dx>4 && dy>3;){
+ DBG( wchar_t c_ask='m'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if (sdata->holes.num > 0) ad=96*ad/100;
+ x =loop(bp,dx-1,dy/2,dx,cs,0,LE); if(3*x>dx) Break; // ~K
+ y=dy/2;
+ i=num_cross(0,dx-1,y ,y ,bp,cs); if (i!=3)
+ i=num_cross(0,dx-1,y+1,y+1,bp,cs);
+ if (i<3 && i>5) Break; // m ru rn, handwritten m
+ // im or glued.mm cut to nm
+ if (i>3) { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) }
+ for (i=0,y=dy-1-dy/8;y>dy/2;y--) {
+ i=num_cross(0,dx-1,y,y,bp,cs); if (i>2) break;
+ } if (i>3) Break;
+ for ( ;y>dy/2;y--) {
+ i=num_cross(0,dx-1,y,y,bp,cs); if (i!=3) break;
+ } if (i>5) Break; y++; i5=y;
+ if (y> dy/2) handwritten=10;
+ if (y>3*dy/4) handwritten=60;
+ /* @@...............
+ @@......,........
+ @@,...@@@....@@@.
+ @@,,.@@@@..@@@@@,
+ @@@.@@@@@.@@@@@@,
+ @@;@@@@@@@@@;,@@,
+ @@@@@,.@@@@,,,@@@ <- i5
+ ,@@@...;@@....@@@
+ .@;...........,@@
+ ...............@@
+ i1 i2 i3 i4
+ */
+ x =loop(bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line
+ x+=loop(bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // first gap
+ x+=loop(bp,x,y,dx-x,cs,0,RI); if(x>3*dx/4) Break; i2=x; // 2nd v-line
+ x+=loop(bp,x,y,dx-x,cs,1,RI); if(x>6*dx/8) Break; i3=x; // 2nd gap
+ x+=loop(bp,x,y,dx-x,cs,0,RI); if(x<5*dx/8) Break; i4=x; // 3th v-line
+ if (x>=dx) Break; // missing 3th v-line, ~W
+ MSG(fprintf(stderr,"y=%d x=%d %d %d %d",y,i1,i2,i3,i4);)
+ if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/4 ) Break; // same gap width? rn
+ if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/8 ) ad=98*ad/100; // same gap width? rn
+ // the same game for the lower part =>l1 l2 l3 l4 ???
+ i =loop(bp,0,5*dy/8,dx,cs,0,RI);
+ i =loop(bp,i,5*dy/8,dx,cs,1,RI);
+ x =loop(bp,0,dy-dy/32-1,dx,cs,0,RI);
+ x =loop(bp,x,dy-dy/32-1,dx,cs,1,RI);
+ if( x > i+1 ) i=1; else i=0; /* looks like serif m, Okt00 */
+ for(y=0,x=i1;x<i2;x++) {
+ i=loop(bp,x,dy-1,dy,cs,0,UP); if (i>y) y=i;
+ }
+ if(y<dy/4 || y<y1-y0-i5-1-dy/16) Break; // no gap detected
+ for(y=0,x=i3;x<i4;x++) {
+ i=loop(bp,x,dy-1,dy,cs,0,UP); if (i>y) y=i;
+ }
+ if(y<dy/4) Break; // no gap detected
+ for(x=i1;x<i4;x++) if( loop(bp,x,0,dy,cs,0,DO)>=dy/2 ) break;
+ if(x<i4 && handwritten<10) Break; // gap detected
+ // glued rn as m ??? hmm seems a ballance act
+ if(i2-i1>i4-i3+dx/16){
+ for(y=0,x=(i1+i2)/2;x<i2;x++){
+ i=loop(bp,x,0,dy,cs,0,DO);
+ i=loop(bp,x,i,dy,cs,1,DO); // measure thickness
+ if( i>y ) y=i; if( 2*i<y ) Break;
+ }
+ if(x <i2) Break; // unusual property for m (see n)
+ }
+ if(gchar) ad=99*ad/100;
+ if(hchar) ad=99*ad/100;
+
+ if( loop(bp,dx-1,dy/16,dx,cs,0,LE)<2
+ && loop(bp,dx-1,dy/4 ,dx,cs,0,LE)>3 ) Break; // melted WT
+
+ x=loop(bp,dx-1,dy/2,dx,cs,0,LE);
+ if (x>2 && loop(bp,dx-1-x/2,0,dy,cs,0,DO)<dy/2) Break; // melt toc
+ if (loop(bp,(i3+i4)/2,0,dy,cs,0,DO)>dy/2) Break; // N
+
+ // {\it m}
+ if( loop(bp,1, dy/4,dx,cs,0,RI)
+ >loop(bp,0,7*dy/8,dx,cs,0,RI) )
+ Setac(box1,'m',98*ad/100);
+
+ if (handwritten<10){
+ x =loop(bp,0,dy/4,dx,cs,0,RI);
+ x+=loop(bp,x,dy/4,dx,cs,1,RI);
+ for( ;x<i4;x++){ // x=i1 ?
+ i=loop(bp,x,0,dy,cs,0,DO);
+ if (i>=dy/4) ad=99*ad/100;
+ if (i>(dy+2)/4) ad=95*ad/100;
+ if (3*i>dy) Break;
+ }
+ if(x<i4) Break; // gap detected
+ }
+
+ if (box1->dots) ad=99*ad/100;
+ Setac(box1,'m',ad);
+ if (ad>=100) return 'm';
+ break;
+
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_tT(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,i1,i2,i3,i4,j,d,x,y,yb,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test T ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // dx>1 dy>2*dx
+ DBG( wchar_t c_ask='T'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ // upper horizontal line
+ i1= loop (bp, dx/8,0,dy,cs,0,DO); // left side
+ i2= loop (bp,dx-1-dx/8,0,dy,cs,0,DO); // right side
+ i3= loop (bp, dx/8,i1,dy,cs,1,DO); // left side
+ i4= loop (bp,dx-1-dx/8,i2,dy,cs,1,DO); // right side
+ if (i1>dy/4 || i2>dy/4) Break;
+ for (x=dx/8;x<dx-1-dx/8;x++) {
+ i= loop (bp,x,0,dy,cs,0,DO);
+ if (i>i1+dy/8 && i>i2+dy/8) break;
+ if (i<i1-dy/8 && i<i2-dy/8) break;
+ } if (x<dx-1-dx/8) Break;
+ if( get_bw( 0,dx-1, dy/2, dy/2,bp,cs,1) != 1 ) Break;
+ if( get_bw( 0,(dx-1)/8, dy/2,dy-1-dy/8,bp,cs,1) == 1 ) Break;
+ if( get_bw( 0,3*dx/16, dy/2,dy-1-dy/4,bp,cs,1) == 1 ) Break;
+ if( get_bw(dx-1-dx/4,dx-1, dy/2,dy-1-dy/4,bp,cs,1) == 1 ) Break;
+ // center width
+ for( y=dy/4;y<3*dy/4;y++){ // oberer Balken?
+ i=dx/4+loop(bp,dx/4,y,dx,cs,0,RI); // left side of vertical line
+ j= loop(bp, i,y,dx,cs,1,RI); // width of vertical line
+ if (3*j>dx+1 || i+j>=dx || i+j/2<dx/2-1) break; // ~r?7
+ } if (y<3*dy/4) Break; // Jan07
+ // down width
+ for( y=3*dy/4;y<dy;y++){
+ i= loop(bp,dx/4,y,dx,cs,0,RI);
+ i= loop(bp, i,y,dx,cs,1,RI);if(4*i>3*x) break; //~I
+ } if( y<dy ) Break;
+
+ i =dx/4+loop(bp,dx/4,dy/4,dx,cs,0,RI);if(i>3*dx/4) Break; // ~7
+ i+= loop(bp,i ,dy/4,dx,cs,1,RI);if(i>3*dx/4) Break;
+
+ if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1
+ && num_cross(0,dx-1, dy-2, dy-2,bp,cs) != 1 ) Break;
+ if( num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1
+ && num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1 ) Break;
+ if (box1->m3 && 2*y1>box1->m3+box1->m4
+ && loop(bp,0, 0,dy/2,cs,0,DO)>=dy/4
+ && loop(bp,0,dy-1,dy ,cs,0,UP)<=dy/2) ad=96*ad/100; // ~J
+ if (gchar) ad=98*ad/100;
+ if( loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8) ad=99*ad/100; // ~J
+ i = loop(bp,0,dy/2,dx,cs,0,RI);
+ j = loop(bp,i,dy/2,dx,cs,1,RI);
+ if( 2*i>=dx || 2*(dx-j-i)<i) ad=95*ad/100; // ~J
+
+ Setac(box1,'T',ad);
+ if (ad>=100) return 'T';
+ break;
+ }
+ // --- test t ---------------------------------------------------
+ // written t can look like a + or even with missing right side
+ // smallest t found in win-screenshot (prop-font) dx=2
+ for(ad=d=100;dx>1 && dy>=box1->m3-box1->m2-1;){ // sometimes no hchar!
+ DBG( wchar_t c_ask='t'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if (dy<=box1->m3-box1->m2+1) ad=96*ad/100; // bad line detection?
+ for(x=0,yb=j=y=dy/32+3*dy/16;y<5*dy/8;y++)if(y>0){ // upper cross line
+ i=loop(bp,0,y,dx,cs,0,RI);
+ i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;yb=j=y; } // hor. line
+ i=num_cross(0,dx-1,y ,y ,bp,cs);
+ j=num_cross(0,dx-1,y+1,y+1,bp,cs); if (i>2 && j>2) break;
+ if( y<11*dy/16
+ && num_cross(0,dx-1,y ,y ,bp,cs) != 1
+ && ( num_cross(0,dx-1,y+dy/8,y+dy/8,bp,cs) != 1 || dy<13) // against noise
+ ) break;
+ } if( y<4*dy/8 ) Break;
+ if (dy>12 && x>4 && x>dx/2 && yb<=(dy+4)/8)
+ if ( loop(bp,dx-1-3*x/4,yb,dy,cs,1,UP)
+ <=loop(bp,dx-1-1*x/4,yb,dy,cs,1,UP)+1 )
+ if ( loop(bp,0 ,dy/2,dy,cs,1,UP)>dx/8 ) Break; // ~C
+
+ if (x<dx/2) ad=95*ad/100; // unusual small ?
+ if (x>=dx && 9*dx>=8*dy) { ad=99*ad/100; } // +
+
+ i=loop(bp,dx-1,0,dx,cs,0,LE);
+ for(y=0;y<dy/4;y++){
+ if( num_cross(0,dx-1,y ,y ,bp,cs) == 2
+ && num_cross(0,dx-1,y+1,y+1,bp,cs) == 2 ) break;
+ j=loop(bp,dx-1,y,dx,cs,0,LE); if(j-i>1) break; i=j;
+ }
+ if( y<dy/4 ) Break; // ~f
+
+ i=loop(bp,dx-1,yb,dx,cs,0,LE);
+ for(y=dy/8;y<yb;y++)
+ if( loop(bp,dx-1,y,dx,cs,0,LE)>i ) break;
+ if( y==yb ) break;
+
+ j=loop(bp,0, dy/2,dx,cs,0,RI);
+ j=loop(bp,j, dy/2,dx,cs,1,RI); i=j; // thickness
+ j=loop(bp,0, dy/4,dx,cs,0,RI);
+ j=loop(bp,j, dy/4,dx,cs,1,RI); if (j<i) i=j; // thickness
+ j=loop(bp,0,3*dy/4,dx,cs,0,RI);
+ j=loop(bp,j,3*dy/4,dx,cs,1,RI); if (j<i) i=j; // thickness
+ if( 2*x<3*i ) Break;
+
+ if( loop(bp,dx-1,dy/2,dx,cs,0,LE)-dx/8
+ <=loop(bp,dx-1, yb ,dx,cs,0,LE) )
+ if( loop(bp,dx-1, yb ,dx,cs,0,LE)-dx/8
+ >=loop(bp,dx-1,yb/2,dx,cs,0,LE) ) Break; // ~1 ???
+
+ j=1;
+ for(y=1;j && y<yb; y++) // no @@ pattern
+ for(x=0;j && x<dx-2;x++){ // ..
+ if( getpixel(bp,x ,y )>=cs && getpixel(bp,x+1,y )>=cs
+ && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs ) { j=0;break; }
+ } if(!j) Break;
+
+ if( num_cross(0,dx-1,dy-2,dy-2,bp,cs) == 2
+ && num_cross(0,dx-1,dy-1,dy-1,bp,cs) == 2 ) Break; // ~* (5er)
+
+ if( dy>= 16
+ && loop(bp, 0, 3*dy/4,dx,cs,0,RI)
+ >=loop(bp, 0, dy-2,dx,cs,0,RI)
+ && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE)
+ <=loop(bp,dx-1, dy-2,dx,cs,0,LE)
+ && loop(bp,dx-1, 1,dx,cs,0,LE)+dx/16
+ <loop(bp,dx-1,3*dy/16,dx,cs,0,LE)
+ && ( loop(bp, 0, 1,dx,cs,0,RI)
+ >loop(bp, 0,3*dy/16,dx,cs,0,RI)+dx/16
+ || loop(bp,dx-1, 0,dx,cs,0,LE)==0
+ || loop(bp,dx-1, 1,dx,cs,0,LE)==0) ) ad=96*ad/100; // ~f Jan02
+ if(dx<8 && dy>12){ // thin f's could easily confound with t
+ x=loop(bp,dx-1,3*dy/16,dx,cs,0,LE);
+ if (x)
+ if (loop(bp,dx-x,0,dy,cs,0,DO)<3*dy/16
+ && loop(bp, 0, 3*dy/4,dx,cs,0,RI)+1
+ >=loop(bp, 0, dy-2,dx,cs,0,RI)
+ && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE)
+ <=loop(bp,dx-1, dy-2,dx,cs,0,LE) ) Break;
+ }
+ if (dx>7)
+ if( num_cross( 0,dx-1,2*dy/3,2*dy/3,bp,cs) > 1
+ && num_cross( 0,dx/2,2*dy/3,2*dy/3,bp,cs) > 0
+ && num_cross(dx/2,dx-1,2*dy/3,2*dy/3,bp,cs) > 0 )
+ if (sdata->holes.num > 0)
+ if (sdata->holes.hole[0].y0 > dy/4) Break; // ~6
+ // if ( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break; // ~6
+
+ if( num_cross(0,dx-1,3*dy/4, 3*dy/4, bp,cs) >= 2
+ && num_cross(0,dx-1,3*dy/4-1,3*dy/4-1,bp,cs) >= 2 ){
+ ad=99*ad/100; /* italic t ? */
+ if (loop(bp,dx/2 ,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h
+ if (loop(bp,dx/2+1,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h
+ }
+
+ x= loop(bp,dx-1,dy/2,dx,cs,0,LE);
+ i= loop(bp,dx-1,dy/8,dx,cs,0,LE);
+ if (i>x && loop(bp,dx-x,0,dy,cs,0,DO)>=dy/2) ad=90*ad/100; /* ~\ */
+
+ x= loop(bp,0, 0,dx,cs,0,RI);
+ i= loop(bp,0, 1,dx,cs,0,RI); if (i<x) x=i;
+ i= loop(bp,0,dy/4,dx,cs,0,RI);
+ if (i-x>1) Break; // l
+
+ // this happens quite often, do not be to strong
+ if (!box1->m2) ad=99*ad/100;
+ if (box1->m2) {
+ if (!hchar) ad=99*ad/100; /* some times t is not long enough */
+ if( y0>=box1->m2-(box1->m2-box1->m1)/4 ) ad=99*ad/100; /* to short */
+ if( y0>=box1->m2 ) ad=99*ad/100; /* to short */
+ }
+
+ if (sdata->holes.num > 0) ad=95*ad/100;
+ if (gchar) ad=99*ad/100;
+ if (box1->dots) ad=90*ad/100;
+ Setac(box1,'t',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_sS(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+ wchar_t ac;
+
+ // --- test sS near 5 ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (4x6 font)
+ DBG( wchar_t c_ask='s'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if( num_cross( dx/2, dx/2,0,dy-1,bp,cs)!=3
+ && num_cross(5*dx/8,3*dx/8,0,dy-1,bp,cs)!=3
+ && dy>4 ) Break;
+ if( num_cross(0,dx-1,dy/2 ,dy/2 ,bp,cs)!=1
+ && num_cross(0,dx-1,dy/2-1,dy/2-1,bp,cs)!=1 ) Break;
+ // get the upper and lower hole koords
+ y=dy/4;
+ x =loop(bp,0,y,dx,cs,0,RI); if(x>3*dx/8) Break; /* slanted too */
+ x +=loop(bp,x,y,dx,cs,1,RI); if(x>5*dx/8) Break; /* fat too */
+ i1 =loop(bp,x,y,dx,cs,0,RI); i1=(i1+2*x)/2; // upper center x
+ y=11*dy/16;
+ x =loop(bp,dx-1 ,y,dx,cs,0,LE); if(x>dx/4) Break;
+ x +=loop(bp,dx-1-x,y,dx,cs,1,LE); if(dx>5 && dy>7 && x>dx/2) Break;
+ if (x>3*dx/4) Break; if(x>dx/2) { ad=98*ad/100; MSG({})}
+ i2 =loop(bp,dx-1-x,y,dx,cs,0,LE); i2=dx-1-(i2+2*x)/2; // upper center x
+ for( y=dy/4;y<dy/2;y++ ) // Mai00 ~3
+ if( get_bw(0,i1,y,y,bp,cs,1) != 1 ) break;
+ if( y<dy/2 ) Break;
+ y=dy/2-loop(bp,dx-1,dy/2,dy/2,cs,1,UP);
+// if( !joined(bp,i1,dy/4,dx-1,y,cs) ){
+ // break; // sometimes thick small fonts have no gap
+// }
+ for(y=dy/4;y<dy/2;y++){
+ x=loop(bp,dx-1,y,dx,cs,0,LE);if(x>dx/8) break;
+ }
+ if(y==dy/2) Break; // Mai00
+
+ y=dy/2+loop(bp,0,dy/2,dy/2,cs,1,DO);
+ if( !joined(bp,0,y,i2,11*dy/16,cs) ) Break;
+
+ if (sdata->holes.num > 0)
+ if (sdata->holes.hole[0].y0 > dy/4) Break; // ???
+ // if( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break;
+
+ i1=loop(bp,dx-1,dy-1,dx,cs,0,LE);
+ i2=loop(bp,dx-1,dy-2,dx,cs,0,LE);
+ if (i2-i1 >= dx/4) Break; // ~{ 5x7font
+
+ i1=loop(bp, 0, 0,dx,cs,0,RI);
+ i2=loop(bp, 0, 1,dx,cs,0,RI);
+ if (i2-i1 >= dx/4) Break; // ~} 5x7font
+
+ // sS5 \sl z left upper v-bow ?
+
+ i1=loop(bp, 0,dy/2,dx,cs,0,RI);
+ i1=loop(bp, i1,dy/2,dx,cs,1,RI);
+ if (4*i1>=3*dx) ad=97*ad/100; // ~5 7-segment
+
+ i1=loop(bp,0, dy/16,dx,cs,0,RI);
+ i2=loop(bp,0,4*dy/16,dx,cs,0,RI);
+ i3=loop(bp,0,7*dy/16,dx,cs,0,RI);
+ if( 2*i2+dx/32 >= i1+i3 ){
+ if( 2*i2+dx/32 > i1+i3 || dx>9 ) Break;
+ // very small s?
+ i1+=loop(bp,i1, dy/16,dx,cs,1,RI);
+ i2+=loop(bp,i2,4*dy/16,dx,cs,1,RI);
+ i3+=loop(bp,i3,7*dy/16,dx,cs,1,RI);
+ if( 2*i2+dx/32 >= i1+i3 ) Break;
+ }
+
+ for(y=7*dy/16;y<5*dy/8;y++){
+ if( num_cross( 0,dx-1,y ,y ,bp,cs)==2 )
+ if( num_cross( 0,dx-1,y+1,y+1,bp,cs)==1 )
+ if( num_cross( 0,dx/4,y,y,bp,cs)==1 ) break; // ~5
+ } if(y<5*dy/8) Break; // v0.2.4a5
+ if ( loop(bp, dx-1,dy-2-dy/32,dx,cs,0,LE)
+ > loop(bp, 0, 1+dy/32,dx,cs,0,RI) + dx/4 ) Break; // ~5 Dec00
+ ac='s';
+ if (gchar) { ad=98*ad/100; MSG({}) }
+ if( hchar ){ // S but 5 is very similar! check it
+ ac='S';
+ if ( loop(bp, dx-1,dy-1-dy/32,dx,cs,0,LE)
+ > loop(bp, 0, 0+dy/32,dx,cs,0,RI) ) ad=99*ad/100; // ~5
+ if ( loop(bp, 0,dy-1-dy/32,dx,cs,0,RI)
+ > loop(bp, dx-1, 0+dy/32,dx,cs,0,LE) ) ad=99*ad/100; // ~5
+ }
+ Setac(box1,ac,ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_gG(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test g ---------------------------------------------------
+ /* some g's have crotchet at upper right end, so hchar can be set */
+ // ~italic g
+ for(ad=d=100;dx>2 && dy>4;){ // min 3x5
+ DBG( wchar_t c_ask='g'; )
+ if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
+ if( get_bw(x0+dx/2, x0+dx/2, y1-dy/2, y1,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1-dx/4, x1 , y1-dy/4, y1,box1->p,cs,1) != 1 ) Break; // ~p
+ if( get_bw(x0+dx/2, x0+dx/2, y0, y0+dy/2,box1->p,cs,1) != 1 ) Break;
+
+ if( num_cross(x0+dx/2, x0+dx/2, y0, y1, box1->p,cs) < 3 )
+ if( num_cross(x1-dx/2, x1-dx/2, y0, y1, box1->p,cs) < 3 ) Break;
+ if (sdata->holes.num < 1) Break;
+ for (i=0;i<sdata->holes.num;i++){
+ if (sdata->holes.hole[i].y1 < 5*dy/8+1) break;
+ } if (i==sdata->holes.num) Break; // no upper hole found
+ // if( num_hole ( x0, x1, y0, y0+5*dy/8, box1->p,cs,NULL) != 1 ) Break;
+ for(y=dy/4;y<dy;y++) if( num_cross(0,dx-1,y,y,bp,cs)==2 ) break;
+ if( y==dy ) Break; // ~q
+ if( get_bw(0,dx/2,7*dy/8,7*dy/8,bp,cs,1) != 1 ) Break; // ~q
+ y =loop(bp,dx/16,0,dy,cs,0,DO); if(y<=dy/8)
+ y+=loop(bp,dx/16,y,dy,cs,1,DO); if(16*y>=15*dy) Break; // ~B
+
+ if (num_cross(x1, x1, (y0+y1)/2, y1, box1->p,cs)>1) {
+ ad=98*ad/100; // ~&
+ if (num_cross(x1 , x1 , y0, (y0+y1)/2, box1->p,cs)<1 ) ad=96*ad/100;
+ if (num_cross(x1-1, x1-1, y0, (y0+y1)/2, box1->p,cs)<1 ) ad=95*ad/100;
+ }
+ // looking for a gap
+ for (x=0,y=dy/4;y<dy-dy/4;y++){
+ i=loop(bp,dx-1,y,dy,cs,0,LE); if (i>x) x=i;
+ } // in a good font x is greater dx/2
+
+ if (x<dx/2) { // bad font? or %
+ if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) > 2
+ || num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) > 2) ad=90*ad/100;
+ if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2
+ || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100;
+ }
+ if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) >2 ) ad=99*ad/100; // ~/o
+
+ /* test for horizontal symmetry ~8 */
+ for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
+ if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
+ if (y==dy) Break; /* ~8 */
+
+ if (box1->m4==0) ad=98*ad/100;
+ if ( hchar) ad=96*ad/100;
+ if (!gchar) ad=96*ad/100;
+ ad=98*ad/100;
+ Setac(box1,'g',ad);
+ break;
+ }
+ // --- test rundes G ---------------------------------------------
+ for(ad=d=100;dx>3 && dy>4;){ // min 3x4
+ DBG( wchar_t c_ask='G'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if( get_bw(x0 ,x0+dx/2,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2,x1-dx/4,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2,x0+dx/2,y1-dy/4,y1 ,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0 ,x0+dx/2,y1-dy/3,y1-dy/3,box1->p,cs,1) != 1 ) Break; // ~S
+ for( y=y0+dy/4;y<y1-dy/3;y++ )
+ if( get_bw(x1-dx/2,x1,y,y,box1->p,cs,1) == 0 ) break;
+ if( y==y1-dy/3 ) Break; // no gap
+
+ if( num_cross(x0+dx/2 , x0+dx/2 , y0, y, box1->p,cs) != 1
+ || num_cross(x0+dx/2+1, x0+dx/2+1, y0, y, box1->p,cs) != 1 ) Break; // ~e
+
+ x=x0; y=y1;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); // left bow?
+ if( y<y0+dy/4 ) Break; // filter W
+
+ x=x1; y=y1-dy/3; // upper right offen bow
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST);
+ if( x<x1-3*dx/8 ) Break;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
+ if( x<x0+dx/2 ){ // not sure, try again (not best)
+ x=x1; y=y1-dy/4;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST);
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
+ if( x<x0+dx/2 ) Break;
+ }
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,UP); // upper end right midle
+ if( x<=x1 ) Break;
+ if( y<y0+3*dy/8 ) Break;
+ if( y>y1-dy/4 ) Break;
+
+ x=x1-dx/3;y=y1; // follow left C-bow, filter S
+ turmite(box1->p,&x,&y,x0,x1,y0+dy/4,y1,cs,LE,UP); // w=LE b=UP
+ if( y>y0+dy/4+1 ) Break; /* leave box below for S or on top for CG */
+ MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);)
+ /* if (y<y0) y++; else x++; */ /* enter the box again */
+ turmite(box1->p,&x,&y,x0,x1,y0 ,y1,cs,RI,UP);
+ MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);)
+ if( y>y0 ) Break;
+ if (sdata->holes.num > 0) Break;
+ // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) > 0 ) Break;
+ if( dx>4 && dy>6){ // no (<[
+ for(i=1,y=0;i && y<dy/3;y++)
+ if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i=0;
+ if( i ) ad=98*ad/100;
+ for(i=1,y=0;i && y<dy/3;y++)
+ if( num_cross(0,dx-1,dy-1-y,dy-1-y,bp,cs) == 2 ) i=0;
+ if( i ) Break;
+ }
+ for(i=1,y=dy/2;i && y<dy;y++)
+ if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) i=0;
+ if( i ) Break;
+ for(i=0,y=3*dy/4;y<dy;y++){
+ x=loop(bp,0,y,dx,cs,0,RI); // Kante abfallend <=> Z
+ if( x<i-dx/20 ) break;
+ if( x>i ) i=x;
+ } if( y<dy ) Break;
+
+ // only check the middle!
+ for(i=0,i1=y=dy/4;y<dy-dy/4;y++){ // look for horizontal line
+ x=loop(bp,dx-1 ,y,dx/4,cs,0,LE);
+ x=loop(bp,dx-1-x,y,dx/2,cs,1,LE); if(x>i){ i=x;i1=y; }
+ } if( i1<=dy/4 || i1>=dy-dy/4 ) Break; // around the middle ?
+ // check from above for gap and left vertical line (~S)
+ x =loop(bp,0,i1,dx ,cs,0,RI);
+ x+=loop(bp,x,i1,dx-x,cs,1,RI); // left vertical bow
+ x+=loop(bp,x,i1,dx-x,cs,0,RI); if (x>=dx) ad=90*ad/100;
+ MSG(fprintf(stderr,"h-bar y dx %d %d ad= %d",i1,i,ad);)
+
+ i=1; // Mar06: adapted to 4x6 font
+ for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
+ for(y=dy/2;y<dy-1 && i;y++){ // .@
+ if( getpixel(bp,x ,y )>=cs
+ && getpixel(bp,x+1,y )< cs
+ && getpixel(bp,x+1,y-1)< cs
+ && getpixel(bp,x ,y-1)< cs ) { i=0;break; }
+ }
+ if(i) ad=95*ad/100; // ~C
+ if(!hchar) ad=98*ad/100;
+ if( gchar) ad=98*ad/100;
+
+ Setac(box1,'G',ad);
+ break;
+ }
+ // --- test \it g like 9 ----------------------------------------------
+ for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx
+ DBG( wchar_t c_ask='g'; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ if( num_cross(x0+dx/2,x0+dx/2,y0,y1,box1->p,cs) != 3 // pre select
+ && num_cross(x0+dx/4,x1-dx/4,y0,y1,box1->p,cs) != 3 ) Break;
+ for( x=0,i=y=y0+dy/2;y<=y1-3*dy/16;y++){ // suche kerbe
+ j=loop(box1->p,x0,y,dx,cs,0,RI);
+ if( j>2 && j>dx/4 && y<y1-3 && j<dx/2 ) // long bow
+ j+=loop(box1->p,x0+j-2,y+1,dx,cs,0,RI)-2;
+ if( j>x ) { x=j; i=y; }
+ }
+ if( x<4*dx/8 ) Break;
+ if( num_cross(x0+dx/2,x1,i ,y1,box1->p,cs) != 1
+ && num_cross(x0+dx/2,x1,i+1,y1,box1->p,cs) != 1 ) Break;
+ if( num_hole(x0,x1,y0,i+1,box1->p,cs,NULL)!=1 ) Break;
+ if( num_hole(x0,x1,i-1,y1,box1->p,cs,NULL)!=0 ) Break;
+ if( loop(box1->p,x0,y1 ,dy,cs,0,RI)>dx/3 &&
+ loop(box1->p,x0,y1-1,dy,cs,0,RI)>dx/3) Break; // no q
+ for( x=0,i=y=y0+dy/3;y<=y1-dy/3;y++){ // suche kerbe
+ j=loop(box1->p,x1,y,dx,cs,0,LE);
+ if( j>x ) { x=j; i=y; }
+ } if( x>dx/2 ) Break; // no g
+ i1=loop(bp,dx-1,dy/8 ,dx,cs,0,LE); if(i1>dx/2) Break;
+ i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE);
+ i2=loop(bp,dx-1,dy/2 ,dx,cs,0,LE); if(i1+i3<2*i2-dx/8) Break; // konvex
+ i1=loop(bp,dx-1,dy/4 ,dx,cs,0,LE); if(i1>dx/2) Break;
+ i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE);
+ for(y=dy/4;y<dy-1-dy/4;y++){
+ i2=loop(bp,dx-1,y,dx,cs,0,LE);
+ if(i1+i3-2*i2<-1-dx/16) break; // konvex from right ~g ~3
+ } if(y<dy-1-dy/4) Break;
+ x=loop(bp,dx -1,6*dy/8,dx,cs,0,LE); if(x>0){ x--; // robust
+ y=loop(bp,dx-x-1, dy-1,dy,cs,0,UP);
+ if(y<dy/8) Break; // ~q (serif!)
+ }
+ // %
+ if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) > 2) ad=90*ad/100;
+ if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2
+ || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100;
+
+ if (box1->m4==0) ad=98*ad/100;
+ if ( hchar) ad=96*ad/100;
+ if (!gchar) ad=96*ad/100;
+ if (ad>99) ad=99; // never be sure to have a 9
+ Setac(box1,'g',ad);
+ break;
+ }
+ return box1->c;
+}
+
+// rewritten for vector usage v0.41
+static wchar_t ocr0_xX(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ // pix *bp=sdata->bp; // obsolete
+ int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0, x1=box1->x1, y0=box1->y0, y1=box1->y1; // ,cs=sdata->cs;
+ int dx=x1-x0+1, dy=y1-y0+1, /* size */
+ (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
+ ad; /* tmp-vars */
+ wchar_t bc=UNKNOWN;
+
+ // --- test xX ---------------------------------------------------
+ // rewritten for vectors 0.41
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ int ld, i1, i2, i3, i4; // lien derivation, 4 inner edges
+ DBG( wchar_t c_ask='x'; )
+ if (sdata->holes.num > 0) Break; /* # */
+ /* half distance to the center */
+ d=2*sq(128/4);
+ /* now we check for the 4 ends of the x */
+ if (aa[0][2]>d) Break;
+ if (aa[1][2]>d) Break;
+ if (aa[2][2]>d) Break;
+ if (aa[3][2]>d) Break;
+ if (aa[3][0]-aa[0][0]<dx/2) Break;
+ if (aa[2][0]-aa[1][0]<dx/2) Break;
+ if (aa[1][1]-aa[0][1]<dy/2) Break;
+ if (aa[2][1]-aa[3][1]<dy/2) Break;
+ /* searching for 4 notches between neighbouring ends */
+
+ /* only left side */
+ for (j=i=aa[0][3];i!=aa[1][3];i=(i+1)%box1->num_frame_vectors[0]) {
+ if (box1->frame_vector[i][0]
+ >=box1->frame_vector[j][0]) j=i; /* notice most right vector */
+ } if (j==i) Break;
+ /* calculate the distance to the center */
+ x=box1->frame_vector[j][0];
+ y=box1->frame_vector[j][1]; i1=j;
+ if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
+ if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
+ if ( aa[0][0]+aa[1][0]-2*x>=0) Break;
+ if ( aa[1][0] >= x ) Break;
+ if ( aa[0][0] > x ) Break;
+ if ( aa[0][0] >= x ) ad=99*ad/100;
+ if (x-x0<dx/8) Break;
+ if (x-x0<dx/4) ad=99*ad/100;
+ /* check if upper left and center point are joined directly */
+ ld=line_deviation(box1, aa[0][3], j);
+ MSG(fprintf(stderr," 0-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
+ if (ld >2*sq(1024/4)) Break;
+ /* check if lower left and center point are joined directly */
+ ld=line_deviation(box1, j, aa[1][3]);
+ MSG(fprintf(stderr," X-1 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
+ if (ld >2*sq(1024/4)) Break;
+
+ /* only lower side */
+ for (j=i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) {
+ if (box1->frame_vector[i][1]
+ <=box1->frame_vector[j][1]) j=i; /* notice most upper vector */
+ } if (j==i) Break;
+ /* calculate the distance to the center */
+ x=box1->frame_vector[j][0];
+ y=box1->frame_vector[j][1]; i2=j;
+ if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
+ if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
+ if ( aa[1][1]+aa[2][1]-2*y<=0) Break;
+ /* check if lower left and center point are joined directly */
+ ld=line_deviation(box1, aa[1][3], j);
+ MSG(fprintf(stderr," 1-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
+ if (ld >2*sq(1024/4)) Break;
+ /* check if lower right and center point are joined directly */
+ ld=line_deviation(box1, j, aa[2][3]);
+ MSG(fprintf(stderr," X-2 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
+ if (ld >2*sq(1024/4)) Break;
+
+ /* only right side */
+ for (j=i=aa[2][3];i!=aa[3][3];i=(i+1)%box1->num_frame_vectors[0]) {
+ if (box1->frame_vector[i][0]
+ <=box1->frame_vector[j][0]) j=i; /* notice most left vector */
+ } if (j==i) Break;
+ /* calculate the distance to the center */
+ x=box1->frame_vector[j][0];
+ y=box1->frame_vector[j][1]; i3=j;
+ if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
+ if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
+ if ( aa[2][0]+aa[3][0]-2*x<=0) Break;
+ if ( aa[3][0] <= x ) Break;
+ if ( aa[2][0] < x ) Break;
+ if ( aa[2][0] <= x ) ad=99*ad/100;
+ if (dx-(x-x0)<dx/8) Break;
+ if (dx-(x-x0)<dx/4) ad=99*ad/100;
+ /* check if lower right and center point are joined directly */
+ ld=line_deviation(box1, aa[2][3], j);
+ MSG(fprintf(stderr," 2-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
+ if (ld >2*sq(1024/4)) Break;
+ /* check if upper right and center point are joined directly */
+ ld=line_deviation(box1, j, aa[3][3]);
+ MSG(fprintf(stderr," X-3 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
+ if (ld >2*sq(1024/4)) Break;
+
+ /* only upper side */
+ for (j=i=aa[3][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) {
+ if (box1->frame_vector[i][1]
+ >=box1->frame_vector[j][1]) j=i; /* notice lowest vector */
+ } if (j==i) Break;
+ /* calculate the distance to the center */
+ x=box1->frame_vector[j][0];
+ y=box1->frame_vector[j][1]; i4=j;
+ if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break;
+ if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break;
+ if ( aa[3][1]+aa[0][1]-2*y>=0) Break;
+ /* check if upper left and center point are joined directly */
+ ld=line_deviation(box1, aa[3][3], j);
+ MSG(fprintf(stderr," 3-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
+ if (ld >2*sq(1024/4)) Break;
+ /* check if lower left and center point are joined directly */
+ ld=line_deviation(box1, j, aa[0][3]);
+ MSG(fprintf(stderr," X-0 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));)
+ if (ld >2*sq(1024/4)) Break;
+
+ // center crossing of diagonal lines is small?
+ if (box1->frame_vector[i3][0] - box1->frame_vector[i1][0] > dx/2) Break;
+
+ if (gchar) ad=99*ad/100;
+ bc='x'; if(hchar) bc='X';
+ Setac(box1,bc,ad);
+ break;
+ }
+ // --- test \it x ---------------------------------------------------
+#if 0
+ for(ad=d=99;dx>4 && dy>4;){ // min 3x4
+ DBG( wchar_t c_ask='x'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if( get_bw(x0,x0+dx/4,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break;
+ if( get_bw(x1-dx/4,x1,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break;
+ if( num_cross(x0+dx/4,x1-dx/4,y0+dy/2,y0+dy/2, box1->p,cs) != 1 ) Break;
+ if( num_cross(x0,x1,y0+dy/4,y0+dy/4, box1->p,cs) != 3
+ && num_cross(x0,x1,y0+dy/8,y0+dy/8, box1->p,cs) < 3 ) Break;
+ if( num_cross(x0,x1,y1-dy/4,y1-dy/4, box1->p,cs) != 3
+ && num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 3 ) Break;
+ if( gchar ) ad=97*ad/100;
+ if( hchar ) ad=96*ad/100;
+ bc='x';
+ Setac(box1,bc,ad);
+ break;
+ }
+#endif
+ return box1->c;
+}
+
+static wchar_t ocr0_yY(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad,xa,ya,xb,yb,xc,yc,xd,yd; /* tmp-vars */
+ wchar_t bc=UNKNOWN;
+
+ // --- test italic yY --------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='y'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if (sdata->holes.num > 0) ad=97*ad/100;
+ if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) < 2
+ && num_cross(0,dx-1, 1, 1,bp,cs) < 2 ) Break;
+ if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1
+ && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break;
+ if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1
+ && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1 ) Break;
+ if( num_cross(dx/3,dx/3,dy/4,dy-1,bp,cs) != 2
+ && num_cross(dx/2,dx/2,dy/4,dy-1,bp,cs) != 2 ) Break;
+ for(yc=y=0,xc=x=dx/4;x<dx-dx/4;x++){ // search deepest point
+ i=loop(bp,x,0,dy,cs,0,DO); if(i>y){ yc=y=i;xc=x; }
+ } if( y>12*dy/16 || y<3*dy/8 ) Break;
+ ya=dy/8; xa=xc-loop(bp,xc,ya,dx,cs,0,LE); if(xa< 0) Break;
+ yb=dy/8; xb=xc+loop(bp,xc,yb,dx,cs,0,RI); if(xb>=dx) Break;
+ for(y=dy/8;y<yc-dy/8;y++){
+ if( num_cross(xc,dx-1,y,y,bp,cs) != 1 ) break;
+ if( num_cross(0 ,xc ,y,y,bp,cs) < 1 ) break;
+ } if(y<yc-dy/8) Break;
+ yd=dy-1-dy/8;xd=dx-1-loop(bp,dx-1,yd,dx,cs,0,LE);
+ g_debug(fprintf(stderr," debug_yY: \n"
+ " /a b \n"
+ " | | \n"
+ " -c/ \n"
+ " \e-d \n");)
+ g_debug(fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d",
+ xa,ya,xb,yb,xc,yc,xd,yd);)
+ if(xd>6*dx/8) ad=99*ad/100; // why this???
+ if (loop(bp,dx-1,dy-1,dx,cs,0,LE)<1) Break;
+ // printf(" abcd=%d %d %d %d %d %d %d %d -",xa,ya,xb,yb,xc,yc,xd,yd);
+ if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) Break;
+ // if( get_line2(xc,yc,xd,yd,bp,cs,100)<95 ) Break;
+ // printf("ok");
+ bc='y';
+ if(gchar && !hchar) bc='y'; else
+ if(hchar && (!gchar || dy<14)) bc='Y'; else ad=98*ad/100; // SMALL-CAPS ???
+ Setac(box1,bc,ad);
+ break;
+ }
+ // --- test yY ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='y'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if( get_bw(x0,x0,y1-dy/8,y1,box1->p,cs,1) == 1 ) {
+ if( get_bw(x0,x0+4*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break;
+ } else {
+ if( get_bw(x0,x0+3*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break;
+ }
+ if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) != 2
+ && num_cross(0,dx-1, 1, 1,bp,cs) != 2 ) Break;
+ if( num_cross(dx/2,dx/2,0, 1,bp,cs) != 0 ) Break;
+ if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1
+ && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break;
+ if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1
+ && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1
+ && num_cross(dx-dx/8-1,dx-dx/8-1,0,dy-1,bp,cs) != 1 ) Break;
+ if( loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8+1 // Jul00
+ < loop(bp, 0,dy-1-dy/8,dx,cs,0,RI) ) Break;
+ for(y=0,x=dx/4;x<dx-dx/4;x++){ // search lowest point
+ i=loop(bp,x,0,dy,cs,0,DO); if(i>y) y=i;
+ } if( y>10*dy/16 || y<2*dy/8 ) Break;
+ for(xc=xb=xa=dx,yc=yb=ya=y=0;y<dy/4;y++){
+ x =loop(bp, 0 , y,dx,cs,0,RI); if(x<xa){ xa=x;ya=y; }
+ x =loop(bp,dx-1 , y,dx,cs,0,LE); if(x<xb){ xb=x;yb=y; }
+ }
+ if(yb>dy/8) Break;
+ for(i=dx,yc=y=dy/4;y<3*dy/4;y++){
+ if( num_cross(0,dx-1,y,y,bp,cs) < 2 ) break;
+ x =loop(bp,dx-1 ,y,dx,cs,0,LE);
+ x+=loop(bp,dx-1-x,y,dx,cs,1,LE);
+ j =loop(bp,dx-1-x,y,dx,cs,0,LE); if(j<=i){ i=j;yc=y;xc=dx-1-x-j/2; }
+ } yc+=dy/16+1;
+ yc+=loop(bp,xc,yc,i,cs,1,DO)/2;
+ xa+= loop(bp,xa ,ya,dx,cs,1,RI)/2;
+ xb=dx-1-loop(bp,dx-1,yb,dx,cs,1,LE)/2;
+ yd=dy-1-dy/8;xd=dx-1-loop(bp,dx-1,yd,dx,cs,0,LE); if(xd>6*dx/8) Break;
+ /* check for serife at lower end */
+ for (i=0,x=dx-1;i<dy/4;i++) {
+ j=loop(bp,dx-1,dy-1-i,dx,cs,0,LE);
+ if (j>x+dx/16+1) break; /* detect serif */
+ if (j<x) x=j;
+ } if (i<dy/4) xd-=loop(bp,xd,yd,dx,cs,1,LE)/2;
+ MSG( fprintf(stderr," debug_yY: \n"
+ " a b \n"
+ " \\ / \n"
+ " c \n"
+ " ed ");)
+ MSG(fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d",
+ xa,ya,xb,yb,xc,yc,xd,yd);)
+ // check upper left line
+ if( get_line2(xa,ya,xc ,yc,bp,cs,100)<95
+ && get_line2(xa,ya,xc-1,yc,bp,cs,100)<95 ) Break;
+ // check upper right line
+ if( get_line2(xb,yb,xc ,yc,bp,cs,100)<95
+ && get_line2(xb,yb,xc-1,yc,bp,cs,100)<95 ) {
+ // Times-Italic y ???
+ xb+=loop(bp,xb,yb,dx/4,cs,1,RI)-1;
+ yb+=loop(bp,xb,yb,dy/8,cs,1,DO)-1;
+ if( get_line2(xb,yb,xc ,yc,bp,cs,100)<95 ) Break;
+ }
+ if( get_line2(xc,yc,xd,yd,bp,cs,100)<95 ) Break;
+
+ // decission between V and Y is sometimes very difficult
+ // hope that the following code is the ultimate solution
+ if( yc>=5*dy/8 && !gchar)
+ if( get_line2(xa,ya,xd ,yd,bp,cs,100)>95 )
+ if( get_line2(xb,yb,xd ,yd,bp,cs,100)>95 )
+ { if (dx>4) { Break; } else ad=ad*98/100; } // ~V
+ xa=loop(bp,0,dy/8,dx,cs,0,RI);
+ xb=loop(bp,0,dy/2,dx,cs,0,RI);
+ xc=loop(bp,0,dy-1,dx,cs,0,RI);
+ if( 2*xb< xa+xc ) ad=98*ad/100; // ~V
+ if( 2*xb<=xa+xc ) ad=98*ad/100;
+ if( 2*xb<=xa+xc+1 ) ad=98*ad/100;
+
+ bc='y';
+ if ((!gchar) && (!hchar)) ad=98*ad/100;
+ if(y0<box1->m2-(box1->m2-box1->m1)/4)
+ { bc='Y'; if(gchar) ad=98*ad/100; }
+ // SMALL-CAPS ???
+ Setac(box1,bc,ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_zZ(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ int i1,i2,i3,i4,i5,dbg[9],
+ d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */
+ ad; /* tmp-vars */
+ wchar_t bc=UNKNOWN;
+
+ // --- test zZ -------
+ for(ad=d=100;dx>3 && dy>3;){ // dy>dx
+ DBG( wchar_t c_ask='z'; ) /* for debugging purpose */
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if (sdata->holes.num > 0) ad=98*ad/100; /* # */
+ /* half distance to the center */
+ d=2*sq(128/4);
+ /* now we check for the 4 edges of the z */
+ if (aa[0][2]>d) Break;
+ if (aa[1][2]>d) Break;
+ if (aa[2][2]>d) Break;
+ if (aa[3][2]>d) Break;
+ if (aa[3][0]-aa[0][0]<dx/2) Break;
+ if (aa[2][0]-aa[1][0]<dx/2) Break;
+ if (aa[1][1]-aa[0][1]<dy/2) Break;
+ if (aa[2][1]-aa[3][1]<dy/2) Break;
+ if (aa[3][0]-aa[0][0]<4-1) Break; /* to small to hold a z */
+ if (aa[2][0]-aa[1][0]<4-1) Break; /* to small */
+ if (aa[3][1]-y0>dy/8) ad=99*ad/100;
+ if (aa[0][1]-y0>dy/8) ad=99*ad/100;
+ if (2*dx<dy) ad=99*ad/100;
+ MSG( \
+ fprintf(stderr,"xy= %d %d aa %d %d %d %d %d %d %d %d", \
+ x0,y0,aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,\
+ aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0);)
+ /* upper and lower horizontal line */
+ d=line_deviation(box1, aa[3][3], aa[0][3]); if (d>2*sq(1024/4)) Break;
+ ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
+ d=line_deviation(box1, aa[1][3], aa[2][3]); if (d>2*sq(1024/4)) Break;
+
+ /* search uppermost right > */
+ i1=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1, y0);
+ x=box1->frame_vector[i1][0];
+ y=box1->frame_vector[i1][1];
+ if (y-y0 > 5*dy/8) Break;
+ if (x-x0 < 3*dx/8) Break;
+ if (x-aa[0][0]<=dx/4) Break; // ~lI
+ if (x-aa[0][0]<=dx/3) ad=98*ad/100; // ~lI
+ if (x-aa[0][0]<=dx/2) ad=99*ad/100; // ~lI
+ /* search most right > ~2 */
+ i3=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1+2*dx, (y0+y1)/2);
+ MSG(fprintf(stderr,"xy= %d %d %d %d %d %d",x0,y0,x-x0,y-y0,box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);)
+ if ( box1->frame_vector[i3][1]-y0> dy/4
+ && box1->frame_vector[i3][0]-x>=0) Break;
+ if ( box1->frame_vector[i3][1]-y> dy/8
+ && box1->frame_vector[i3][0]-x>=-dx/8) ad=98*ad/100;
+ if ( box1->frame_vector[i3][1]-y> dy/8
+ && box1->frame_vector[i3][0]-x>= 0) ad=97*ad/100;
+ if (box1->frame_vector[i3][0]-aa[0][0]
+ < aa[3][0]-box1->frame_vector[i3][0]) break; // ~lI
+ if (box1->frame_vector[i3][0]-aa[0][0]
+ <(aa[3][0]-box1->frame_vector[i3][0])*2) ad=98*ad/100; // ~lI
+ /* better test for a bow or peaked angle */
+ /* upper part of a 2, on a Z a and b should be at c
+ .....$@@@@@@a...c. o1 (o1-a)=(dx+5)^2 =dx^2+10*dx+25
+ ...$$@@@@@@@@@.... (o1-b)=(dx+1)^2+4^2=dx^2+ 2*dx+18
+ ..$@@$@@@$@@@@@...
+ ..@@@.....$$@@@@..
+ ..@@.......@$@@@b.
+ ..$.........$@@@@.
+ .$$..........$@@@.
+ .$...........@@@@.
+ .............@@@@.<
+ .............$@@$.
+ ............$@@@..
+ ............@@$...
+ ............$@$...
+ --- snip ----
+ */
+ i4=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1+dx, y0);
+ i5=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1, y0-dx);
+ d=sq(box1->frame_vector[i5][0]-box1->frame_vector[i4][0])
+ +sq(box1->frame_vector[i5][1]-box1->frame_vector[i4][1]);
+ if (d>2*sq(dx/8+1)) break;
+
+ /* check if upper left and upper right point are joined directly */
+ dbg[0]=d=line_deviation(box1, aa[0][3], i1); if (d >2*sq(1024/4)) Break;
+ /* check if lower right and upper left point are joined directly */
+ dbg[1]=d=line_deviation(box1, i1, aa[1][3]); if (d >2*sq(1024/4)) Break;
+
+ /* search lowest left < */
+ i2=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y1);
+ x=box1->frame_vector[i2][0];
+ y=box1->frame_vector[i2][1];
+ if (y-y0 < 3*dy/8) Break;
+ if (x-x0 > 5*dx/8) Break;
+ if (aa[2][0]-x<=dx/4) Break; // ~lI
+ if (aa[2][0]-x<=dx/3) ad=98*ad/100; // ~lI
+ if (aa[2][0]-x<=dx/2) ad=99*ad/100; // ~lI
+ /* check if upper right and lower left point are joined directly */
+ dbg[2]=d=line_deviation(box1,i2, aa[3][3]); if (d >2*sq(1024/4)) Break;
+ /* check if lower left and lower right point are joined directly */
+ dbg[3]=d=line_deviation(box1, aa[2][3],i2); if (d >2*sq(1024/4)) Break;
+
+ if (box1->frame_vector[i1][0]
+ -box1->frame_vector[i2][0]<=dx/8) Break; /* nonsignificant distance */
+ MSG( \
+ fprintf(stderr,"^v %d %d %d %d line deviation %d %d %d %d max %d %d",\
+ box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\
+ box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\
+ dbg[0],dbg[1],dbg[2],dbg[3],2*sq(1024/4),2*sq(1024));)
+ ad=(100-(dbg[0]-sq(1024)/2)/sq(1024)/4)*ad/100;
+ ad=(100-(dbg[1]-sq(1024)/2)/sq(1024)/4)*ad/100;
+ ad=(100-(dbg[2]-sq(1024)/2)/sq(1024)/4)*ad/100;
+ ad=(100-(dbg[3]-sq(1024)/2)/sq(1024)/4)*ad/100;
+
+ if ( gchar) ad=98*ad/100;
+ bc='z';
+ if( hchar ) bc='Z';
+ Setac(box1,bc,ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_wW(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,handwritten=0,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad,ya,yb,xa,xb,xc,xd,xe,t1; /* tmp-vars */
+ wchar_t ac;
+
+ // ------- test w ~{\it w} ---------------
+ for(ad=d=100;dx>3 && dy>3;){ // dy<=dx
+ DBG( wchar_t c_ask='w'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ // xa xe
+ // \ xc / <=ya connected xa-xb-xc-xd-xe
+ // xb xd <=yb
+ // get two lowest points i3,i4,ya
+ // out_x(box1);
+ // ~ul ~uf
+ // out_x(box1);
+ for(y=dy/8;y< dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs)< 2 ) break;
+ if(y<dy/2) Break;
+ yb=dy-1;
+ if (dx>4) { /* 4x6 is to small */
+ for(y=dy-1-dy/16;y>3*dy/4;y--)
+ if( num_cross(0,dx-1,y,y,bp,cs)==2 ) break;
+ if(y==3*dy/4) Break;
+ }
+ yb=y;
+ t1=loop(bp,0 ,dy/4,dx,cs,0,RI);
+ t1=loop(bp,t1,dy/4,dx,cs,1,RI); // thickness of line?
+ for(i=j=0 ;y> dy/4;y--) if( num_cross(0,dx-1,y,y,bp,cs)==4 ) i++;
+ else if( num_cross(0,dx-1,y,y,bp,cs)>=3 ) j++;
+ if(i+5<dy/4 && 7*t1<dy) Break; // only for large letters
+ if(i+j==0 && (dy>6 || dx>4)) Break;
+ if(i+j==0 && dx<=4){
+ if (abs(loop(bp, 1,dy-1,dy,cs,0,UP)
+ -loop(bp,dx-2,dy-1,dy,cs,0,UP))>dy/8+1) Break; // 4x6 N
+ if ( ( loop(bp, 1, 0,dy,cs,0,DO)>=dy-2
+ && loop(bp, 0,dy-1,dy,cs,0,UP)>0)
+ || ( loop(bp,dx-2, 0,dy,cs,0,DO)>=dy-2
+ && loop(bp,dx-1,dy-1,dy,cs,0,UP)>0)) Break; // 4x6 UV
+ ad=ad*99/100; // 4x6 font
+ MSG(fprintf(stderr,"ad=%d",ad);)
+ }
+ if( num_cross(0,dx-1, 1, 1,bp,cs)< 2
+ && num_cross(0,dx-1,dy/16,dy/16,bp,cs)< 2 ) Break;
+ x =loop(bp,0 ,yb,dx,cs,0,RI);
+ xb=loop(bp,x ,yb,dx,cs,1,RI);xb=x+xb/2; if(xb>dx/2) Break;
+ x =loop(bp,dx-1 ,yb,dx,cs,0,LE);
+ xd=loop(bp,dx-1-x,yb,dx,cs,1,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break;
+ for(y=0,xc=x=xb+1;x<xd;x++)
+ if((i=loop(bp,x,dy-1,dy,cs,0,UP))>y){xc=x;y=i;}
+ if(dx>4 && !y) Break;
+ ya=dy-1-y; // flat
+ y=loop(bp,xc,ya,dy,cs,1,UP);if(y)y--;
+ if (dy>6 || dx>4) { // ~4x6 font
+ if( num_cross(0 ,xc ,ya-y ,ya-y ,bp,cs)!= 2
+ && num_cross(0 ,xc ,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break;
+ if( num_cross(xc,dx-1,ya-y ,ya-y ,bp,cs)!= 2
+ && num_cross(xc,dx-1,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break;
+ }
+ ya-=y/2;
+ x =loop(bp,0 ,1 ,dx,cs,0,RI);
+ xa=loop(bp,x ,1 ,dx,cs,1,RI);
+ if( x+xa>xb ){ // may be, here is a small but thick letter
+ // later add some proofs
+ xa=x+xa/4;
+ } else {
+ xa=x+xa/2;
+ }
+ x =loop(bp,dx-1 ,1 ,dx,cs,0,LE);
+ xe=loop(bp,dx-1-x,1 ,dx,cs,1,LE);xe=dx-1-x-xe/2;
+ MSG( fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d %d %d",
+ xa,1,xb,yb,xc,ya,xd,yb,xe,1);)
+ if (ya<dy/2 && xc<dx/2) ad=95*ad/100; /* ~N */
+ i= loop(bp,xa ,1 ,dx,cs,1,RI);
+ for (x=xa;x<xa+i;x++)
+ if( get_line2(x,1,xb,yb,bp,cs,100)>94 ) break;
+ if (x==xa+i) Break; // no vert. line found
+ if( get_line2(xb,yb-1,xc,ya ,bp,cs,100)<95
+ && get_line2(xb,yb-1,xc,ya+dy/32,bp,cs,100)<95
+ && get_line2(xb,yb-1,xc,ya+dy/16,bp,cs,100)<95 ) Break;
+ if( get_line2(xc, ya,xd, yb,bp,cs,100)<95
+ && get_line2(xc+1,ya,xd, yb,bp,cs,100)<95 ) Break;
+ if( get_line2(xd,yb,xe ,1+dy/16,bp,cs,100)<95
+ && get_line2(xd,yb,dx-1 ,1+dy/8 ,bp,cs,100)<95 // round w
+ && get_line2(xd,yb,xe+dx/20,1+dy/16,bp,cs,100)<95 ) Break;
+ // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break;
+ // ~ur
+ MSG(fprintf(stderr,"ad=%d",ad);)
+ for(i=0,y=5*dy/8;y<dy;y++){
+ x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>i ) i=x; if( x<i-2 ) break;
+ if (x<i) ad=98*ad/100;
+ } if( y<dy ) Break;
+ MSG(fprintf(stderr,"ad=%d",ad);)
+ ac=((hchar)?'W':'w');
+ if (gchar) ad=98*ad/100;
+ Setac(box1,ac,ad);
+ break;
+ }
+ // --- test ~w {\it w} ohmega? also handwritten -------
+ // italic
+ for(ad=d=100;dx>3 && dy>3;){ // dy<=dx 4x6font (like a H with fat bar)
+ DBG( wchar_t c_ask='w'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ // ~ul ~uf
+ if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)<2 ) Break;
+ if( num_cross(0,dx-1,dy/8,dy/8,bp,cs)<2 ) handwritten=40;
+ if( num_cross(0,dx-1,dy/4,dy/4,bp,cs)<2 ) handwritten=80;
+ for(i=0,y=0;y<dy-1;y++)
+ if( num_cross(0,dx-1,y,y,bp,cs)==3 ) i++;
+ if(i<=dy/8) Break; // three legs
+ // xa xe
+ // \ xc / <=yb connected xa-xb-xc-xd-xe
+ // xb xd
+ for(y=dy/2;y<dy-1-dy/8;y++)
+ if( num_cross(0,dx-1,y,y,bp,cs)==3 ) break;
+ yb=y;
+ x =loop(bp,0 ,yb,dx,cs,0,RI);
+ x+=loop(bp,x ,yb,dx,cs,1,RI); if(x>dx/2) Break;
+ xb=loop(bp,x ,yb,dx,cs,0,RI);xb=x+xb/2; if(xb>dx/2) Break;
+ x =loop(bp,dx-1 ,yb,dx,cs,0,LE);
+ x+=loop(bp,dx-1-x,yb,dx,cs,1,LE);
+ xd=loop(bp,dx-1-x,yb,dx,cs,0,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break;
+ if( num_cross(xb,xd,yb,yb ,bp,cs)!= 1 ) Break;
+ if( num_cross(xb,xb,yb,dy-1,bp,cs)!= 1 ) Break;
+ if( num_cross(xd,xd,yb,dy-1,bp,cs)!= 1 ) Break;
+ if( num_cross(xb,xb, 0,yb ,bp,cs)!= 0 ) Break;
+ if( num_cross(xd,xd, 0,yb ,bp,cs)!= 0 ) Break;
+ // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break;
+ if (sdata->holes.num != 0) Break;
+ // ~ur
+ for(i=0,y=3*dy/4;y<dy;y++){
+ x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>i ) i=x; if( x<i-2 ) break;
+ } if( y<dy ) Break; // fail for overlapping neighbouring slanted chars?
+ ac=((hchar)?'W':'w');
+ if (gchar) ad=98*ad/100;
+ Setac(box1,ac,ad);
+ Break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_aA(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad,ya; /* tmp-vars */
+
+ // --- test A ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='A'; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ // first selection (rough sieve)
+ if( get_bw(dx/2 ,dx/2 ,dy-1-dy/8,dy-1,bp,cs,1) == 1
+ && get_bw(dx/2-1,dx/2-1,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) Break; // ~B
+ ya=0; /* upper end, not 0 for modified A etc. */
+ if (box1->modifier)
+ for (ya=0;ya<dy/2;ya++)
+ if (num_cross(0,dx-1,ya,ya,bp,cs)==0) break;
+ if (ya>=dy/2) ya=0; // already subtracted?
+ if( num_cross(0,dx-1,ya+ 1 ,ya+ 1 ,bp,cs)!=1 // 600dpi
+ && num_cross(0,dx-1,ya+ dy/8 ,ya+ dy/8 ,bp,cs)!=1
+ && num_cross(0,dx-1,ya+ dy/16 ,ya+ dy/16 ,bp,cs)!=1
+ && num_cross(0,dx-1,ya+ dy/8+1,ya+ dy/8+1,bp,cs)!=1 ) Break;
+ if( num_cross(0,dx-1, 7*dy/8 , 7*dy/8 ,bp,cs)!=2
+ && num_cross(0,dx-1, 7*dy/8-1, 7*dy/8-1,bp,cs)!=2 ) Break;
+ if ( num_cross( 0,dx/8,ya+dy/8,ya+0,bp,cs)>0 ) Break; // ~R
+ for(y=ya+dy/8;y<ya+dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs) > 1 ) break;
+ if( y==ya+dy/2 ) Break; i1=y;
+ if (dy>20) i1++; /* get arround some noise fat font */
+
+ x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) Break;
+ x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) Break; i2=x;
+ x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) Break; i2=(x+i2)/2;
+ // hole (i2,i1)
+ y+=loop(bp,i2,y,dy,cs,1,DO);
+ y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100;
+ if (y>5*dy/6) { MSG(fprintf(stderr,"x,y,i1,i2= %d %d %d %d",x,y,i1,i2);) }
+ if (y>5*dy/6) Break;
+
+ if( sdata->holes.num != ((box1->modifier==RING_ABOVE)?2:1)
+ || sdata->holes.hole[0].y1-ya >= dy-1-dy/4) Break;
+ // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) Break;
+ // out_x(box1);
+ i3=0;i4=0;
+ for(x=dx/3;x<2*dx/3;x++){
+ i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2)
+ i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break;
+ if(i4==1) i3=x;
+ } if(i4<1 || i4>2 || i3==0){
+// ToDo: MSG(fprintf(stderr,"x,y,i4,i3= %d %d %d %d",x,y,i4,i3);)
+ Break;
+ }
+ if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) Break;
+
+ i1=loop(bp,dx-1,ya+ (dy-ya)/4,dx,cs,0,LE);
+ i2=loop(bp,dx-1,ya+ (dy-ya)/2,dx,cs,0,LE);
+ i3=loop(bp,dx-1,dy-1-(dy-ya)/4,dx,cs,0,LE);
+ if( 2*i2-dx/8>i1+i3 ) ad=99*ad/100; /* 6*8 font */
+ if( 2*i2+dx/4<i1+i3 || 2*i2-dx/4>i1+i3 ) Break;
+
+ i1=loop(bp,0 ,ya+ (dy-ya)/4,dx,cs,0,RI); // linke senkr. linie
+ i2=loop(bp,0 ,ya+ (dy-ya)/2,dx,cs,0,RI);
+ i3=loop(bp,0 ,dy-1-(dy-ya)/4,dx,cs,0,RI);
+ if( 2*i2-dx/8>i1+i3 ) ad=98*ad/100; /* 6*8 font */
+ if( 2*i2+dx/4<i1+i3 || 2*i2-dx/4>i1+i3 || i1<i3) Break;
+
+ // lower ends could be round on thick fonts
+ for(i3=dx,y=ya+(dy-ya)/4;y<7*dy/8;y++){ // increasing width
+ i1=loop(bp, 0, y,dx,cs,0,RI);
+ i2=loop(bp,dx-1, y,dx,cs,0,LE);
+ if(i1+i2>i3+dx/16) break; if( i1+12<i3 ) i3=i1+i2;
+ } if(y<7*dy/8) Break;
+ if ( loop(bp, 0,dy-1-dy/8,dx,cs,0,RI)
+ -loop(bp, 0,dy/2 ,dx,cs,0,RI)>0) ad=97*ad/100; // italic-a
+
+ if (!hchar) ad=99*ad/100; // italic-a
+ Setac(box1,'A',ad);
+ break;
+ }
+ // --- test a -------------------------------------------
+ // with a open bow above the circle starting
+ // on the right side of the circle
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='a'; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1-dx/3, x1 , y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1-dx/3, x1 , y0+dy/4, y0+dy/4,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2, x0+dx/2, y1-dy/3, y1, box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2, x0+dx/2, y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/3, x1-dx/3, y0 , y0 ,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/4, x1-dx/2, y1 , y1 ,box1->p,cs,1) != 1 )
+ if( get_bw(x0+dx/4, x1-dx/3, y1-1 , y1-1 ,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0 , x0 , y0+dy/2, y1 ,box1->p,cs,1) != 1 )
+ if( get_bw(x0+dx/8, x0+dx/8, y0+dy/2, y1 ,box1->p,cs,1) != 1 ) Break;
+ if( loop(bp,3*dx/8,0,dy,cs,0,DO) > 3*dy/16 ) Break; // ~d
+ if( num_cross(0,dx-1,dy/4 ,dy/4 , bp,cs) >2 // ~glued am != an
+ && num_cross(0,dx-1,dy/4+1,dy/4+1, bp,cs) >2 ) Break;
+
+ for( x=dx/4;x<dx-dx/4;x++ ){ // ar
+ i=loop(bp,x, 0,y1-y0,cs,0,DO); if (i>dy/2) break;
+ i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break;
+ } if( x<dx-dx/4 ) Break;
+
+ for(i=dx/8+1,x=dx/4;x<=dx-1-dx/4 && i;x++){
+ if( num_cross(x,x,0,bp->y-1, bp,cs) == 3 ) i--;
+ } if( i ) Break;
+
+ i1=loop(bp,0, dy/8,dx,cs,0,RI);
+ i3=loop(bp,0,3*dy/4,dx,cs,0,RI);
+ for(y=dy/8+1;y<3*dy/4;y++){
+ i2=loop(bp,0,y,dx,cs,0,RI);if(2*i2>i1+i3+1) break;
+ } if(y==3*dy/4) Break; // ~6
+ // ~ s (small thick s), look for vertikal line piece
+ for(x=3*dx/4;x<dx;x++)
+ if( loop(bp,x,dy/4,dy/2,cs,1,DO)>dy/4 ) break;
+ if( x==dx ) Break;
+
+ if (sdata->holes.num != 1) ad=96*ad/100; else
+ if (sdata->holes.num == 1)
+ if( num_hole ( x0, x1, y0+dy/3, y1 ,box1->p,cs,NULL) != 1 ) Break;
+ // if( num_hole ( x0, x1, y0, y1, box1->p,cs,NULL) != 1 ) Break;
+ if( num_hole ( x0, x1, y0, y1-dy/3 ,box1->p,cs,NULL) != 0 ){
+ i =loop(bp,0,dy/4,dx,cs,0,RI);
+ i =loop(bp,i,dy/4,dx,cs,1,RI);
+ if(i<dx/4+1) Break; // fat a
+ i =loop(bp,0,dy/4,dx,cs,0,RI);
+ i+=loop(bp,i,dy/4,dx,cs,1,RI);
+ for(y=dy/4;y<dy/2;y++)
+ if( num_cross(0,dx-1,y,y, bp,cs) !=2 ) break;
+ x =loop(bp,0,y-1,dx,cs,0,RI);
+ x+=loop(bp,x,y-1,dx,cs,1,RI);
+ if(x>i) Break; // ~ 8
+ }
+ /* test for horizontal symmetry ~8 */
+ for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
+ if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
+ if (y==dy) Break; /* ~8 */
+
+ if (hchar) ad=96*ad/100;
+ if (gchar) ad=96*ad/100;
+ Setac(box1,'a',ad);
+ break;
+ }
+ // --- test hand written a ---------------------------------------------------
+ // rarely char, without bow above the circle
+ for(ad=d=100;dx>3 && dy>3;){ // min 4x4
+ DBG( wchar_t c_ask='a'; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/3 , x0+dx/3,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ i = loop(bp,dx/2, 0 ,dy,cs,0,DO); if (i>dy/4) Break;
+ i+= loop(bp,dx/2, i ,dy,cs,1,DO); if (i>dy/2) Break;
+ i = loop(bp,dx/2, i ,dy,cs,0,DO); if (i<dy/4) Break;
+ if( get_bw(x0 , x0 ,y1 , y1 ,box1->p,cs,1) == 1 ) Break;
+
+ if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) != 2 ) Break;
+ if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND
+ if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
+ i = loop(bp,dx/2,dy-1 ,dy,cs,0,UP); if (i>dy/3) Break;
+ y = i+loop(bp,dx/2,dy-1-i,dy,cs,1,UP); if (i>dy/2) Break;
+ // normal 'a' has a well separated vertical line right from the circle
+ // but fat 'a' is like a 'o', only bigger on the right side
+ if( num_cross(x0+dx/2-1,x1,y1 ,y1 ,box1->p,cs) < 2 /* 4x6font */
+ && num_cross(x0+dx/2-1,x1,y1-i,y1-i ,box1->p,cs) < 2 /* 2 or 3 */
+ && num_cross(x0+dx/2-1,x1,y1-y,y1-y ,box1->p,cs) < 2 )
+ { if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI)
+ <4*loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)) { Break;}
+ else ad=98*ad/100;
+ }
+ if( num_cross(x0,x1,y0+dy/2 , y0+dy/2,box1->p,cs) < 2
+ || num_cross(x0,x1,y0+dy/3 , y0+dy/3,box1->p,cs) < 2 ) Break; // Jun00
+
+ if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 )
+ if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 ) Break;
+ if (sdata->holes.num != 1)
+ if( num_hole(x0,x1-2,y0 ,y1 ,box1->p,cs,NULL) != 1 )
+ // if( num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 )
+ Break;
+ if( num_hole(x0,x1 ,y0+dy/3,y1-1 ,box1->p,cs,NULL) != 0 ) Break;
+
+ if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<=
+ loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
+
+ if( loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)> dx/4
+ && loop(bp,dx-1,dy-2,x1-x0,cs,0,LE)> (dx+4)/8 ) ad=97*ad/100;
+
+ x=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
+ i=loop(bp,dx-1, dy/4,dx,cs,0,LE); if (abs(x-i)>dx/4) Break;
+
+ for( x=dx/4;x<dx-dx/4;x++ ){ // ar
+ i=loop(bp,x, 0,y1-y0,cs,0,DO); if (i>dy/2) break;
+ i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break;
+ } if( x<dx-dx/4 ) Break;
+
+ if( num_cross(x0 , x1, y1, y1,box1->p,cs) == 1 )
+ if( num_cross(x0 , x1, y0, y0,box1->p,cs) == 1 )
+ if( loop(bp,dx-1, 0,y1-y0,cs,0,DO)> dy/4
+ && loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~o
+ if( loop(bp,dx/2,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~q
+
+ if (hchar) ad=98*ad/100;
+ if (gchar) ad=98*ad/100;
+ // handwritten-a (alpha)
+ Setac(box1,'a',ad);
+ break;
+ }
+ // --- test A_A_WITH_OGONEK 0x0104 Centr.Eur.Font -------------------------
+ /* not sure if we should move this to a get_CentralEuropean-function */
+ for(ad=d=100;dx>2 && dy>4;){ // min 3x4
+ DBG( wchar_t c_ask='A'; )
+ if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
+ // first selection (grobes Sieb)
+ if( get_bw(dx/2,dx/2,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) break; // ~B
+ if( num_cross(0,dx-1, 1 , 1 ,bp,cs)!=1 // 600dpi
+ && num_cross(0,dx-1, dy/8 , dy/8 ,bp,cs)!=1
+ && num_cross(0,dx-1, dy/16 , dy/16 ,bp,cs)!=1
+ && num_cross(0,dx-1, dy/8+1, dy/8+1,bp,cs)!=1 ) break;
+ if( num_cross(0,dx-1, dy-1 , dy-1 ,bp,cs)!=1 ) break;
+ if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs)!=2
+ && num_cross(0,dx-1, dy/3 , dy/3 ,bp,cs)!=2 ) break;
+ if ( num_cross( 0,dx/8,dy/8, 0,bp,cs)>0 ) break; // ~R
+ for(y=dy/8;y<dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs) > 1 ) break;
+ if( y==dy/2 ) break; i1=y;
+ if (dy>20) i1++; /* get arround some noise fat font */
+
+ x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) break;
+ x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) break; i2=x;
+ x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) break; i2=(x+i2)/2;
+ // hole (i2,i1)
+ y+=loop(bp,i2,y,dy,cs,1,DO);
+ y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100;
+ if (y>5*dy/6) break;
+
+ if( sdata->holes.num != 1 || sdata->holes.hole[0].y1 >= dy-1-dy/4) break;
+ // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) break;
+ // out_x(box1);
+ i3=0;i4=0;
+ for(x=dx/3;x<2*dx/3;x++){
+ i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2)
+ i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break;
+ if(i4==1) i3=x;
+ } if(i4<1 || i4>2 || i3==0){
+// ToDo: g_debug_A(printf(" A: x,y,i4,i3= %d %d %d %d\n",x,y,i4,i3);)
+ break;
+ }
+ if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) break;
+ /* dy/4 changed to dy/6 because of screenfonts */
+ /* there are strange fonts, one has a serif on the upper end of A */
+ if ( num_cross( 0,dx/8,dy/6, 0,bp,cs)>0 ) break;
+ if ( num_cross(dx-1-dx/4,dx-1, 0,dy/6,bp,cs)>0 ) break;
+
+ i1=loop(bp,dx-1, dy/4,dx,cs,0,LE);
+ i2=loop(bp,dx-1, dy/2,dx,cs,0,LE);
+ i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE);
+ if( 2*i2+dx/4<i1+i3 || 2*i2-dx/8>i1+i3 ) break;
+
+ i1=loop(bp,0 , dy/4,dx,cs,0,RI); // linke senkr. linie
+ i2=loop(bp,0 , dy/2,dx,cs,0,RI);
+ i3=loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI);
+ if( 2*i2+dx/4<i1+i3 || 2*i2-dx/8>i1+i3 || i1<i3) break;
+
+ // lower ends could be round on thick fonts
+ for(i3=dx,y=dy/4;y<6*dy/8;y++){ // increasing width
+ i1=loop(bp, 0, y,dx,cs,0,RI);
+ i2=loop(bp,dx-1, y,dx,cs,0,LE);
+ if(i1+i2>i3+dx/16) break; if( i1+12<i3 ) i3=i1+i2;
+ } if(y<6*dy/8) break;
+
+ if (!hchar) ad=96*ad/100;
+ if (!gchar) ad=98*ad/100;
+ Setac(box1,(wchar_t)LATIN_CAPITAL_LETTER_A_WITH_OGONEK,ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_cC(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad,t1; /* tmp-vars */
+ wchar_t bc=UNKNOWN;
+
+ // --- test c,C ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>2;){ // min 3x4
+ DBG( wchar_t c_ask='c'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if( get_bw(x0 , x0+dx/3,y0+dy/2, y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2, x0+dx/2,y1-dy/3, y1, box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2, x0+dx/2,y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break;
+ if( num_cross(x0,(x0+x1)/2,(y0+y1)/2,(y0+y1)/2,box1->p,cs) > 1 ) Break; // ~ocr-a-[
+
+ for(y=y0+dy/4;y<y0+3*dy/4;y++)
+ if( get_bw(x0+dx/2,x1,y,y,box1->p,cs,1) == 0 ) break;
+ if( y==y0+3*dy/4 ) Break; i1=y; // i1: upper end of right gap
+
+ // measure thickness of line!
+ t1=loop(bp, 0,dy/2,dx,cs,0,RI);
+ t1=loop(bp,t1,dy/2,dx,cs,1,RI);
+ if (t1>dx/2) Break;
+
+ for(y=i1,i2=0,x=x0+dx/2;x<x0+6*dx/8;x++){
+ i=y-1+loop(box1->p,x0+dx/2,i1,dy,cs,0,DO);
+ if( i>i2 ) { i2=i; }
+ } if(i2<y0+5*dy/8-t1/2) Break; // i2: lowest white point above lower bow
+
+ i3=y+1-loop(box1->p,x0+5*dx/8,i1,dy,cs,0,UP);
+ i =y+1-loop(box1->p,x0+4*dx/8,i1,dy,cs,0,UP); if(i<i3) i3=i;
+ if(i3>y0+ dy/4+t1/2) Break; // highest
+
+ for(y=i1;y<y1-dy/8;y++)
+ if( get_bw(x0+dx/2,x1,y,y,box1->p,cs,1) == 1 ) break;
+ if( y-i1<dy/6 ) Break; i2=y-1; // lower end of right gap
+ // pixelbased num_cross for streight lines could fail on small fonts
+ if( num_cross(x1-dx/4,x1-dx/4,i2,y0,box1->p,cs) < 1 ) Break; // ~L
+ if (loop(box1->p,x0,y0+3*dy/4,dx,cs,0,RI)>dx/16)
+ if( num_cross(x0+dx/2,x1,i3 ,y1,box1->p,cs) < 1
+ && num_cross(x0+dx/2,x1,y1-dy/4,y1,box1->p,cs) < 1 ) Break; // ~r
+
+ i=1;
+ for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
+ for(y=dy/2;y<dy-1-dy/8 && i;y++){ // .@
+ if( getpixel(bp,x ,y )>=cs
+ && getpixel(bp,x+1,y )< cs
+ && getpixel(bp,x+1,y-1)< cs
+ && getpixel(bp,x ,y-1)< cs ) { i=0;break; }
+ }
+ if(!i) ad=95*ad/100; // ~G
+
+ i=loop(bp,0,dy/2,dx,cs,0,RI);
+ for(y=0;y<dy;y++)if( loop(bp,0,y,dx,cs,0,RI)<i-1-dx/32 ) break;
+ if( y<dy ) Break; // ~r
+ // out_x(box1);
+ for(i5=0,i4=dx,y=dy/2;y>=dy/4;y--){
+ x =loop(bp,0,y,dx,cs,0,RI);
+ x+=loop(bp,x,y,dx,cs,1,RI); if(x>i5) i5=x;
+ i =loop(bp,x,y,dx,cs,0,RI); if(i<i4) i4=i;
+ if( i5<x-dx/32 && i>i4+dx/32 ) break; // unusual for c, more a bad e?
+ } if( y>=dy/4 ) Break;
+
+ if( !hchar ){ // test for e where the middle line is partly removed
+ x= loop(bp,0,dy/2,dx,cs,0,RI);
+ x=x +loop(bp,x,dy/2,dx,cs,1,RI);
+ y=dy/2-loop(bp,x,dy/2,dy,cs,0,UP)-1;
+ i=x +loop(bp,x,y,dx,cs,1,RI);
+ i=i +loop(bp,i,y,dx,cs,0,RI);
+ if( num_cross(x ,x ,1,dy/2,bp,cs) > 1
+ || num_cross(x+1,x+1,1,dy/2,bp,cs) > 1 )
+ if( num_cross(i-1,i-1,1,dy/2,bp,cs) > 1
+ || num_cross(i ,i ,1,dy/2,bp,cs) > 1 ) Break; // ~bad e
+ }
+ if( dy>16 && dy>3*dx && hchar ){ // ~[
+ x= loop(bp,0, dy/16,dx,cs,0,RI);
+ x=+loop(bp,0,dy-1-dy/16,dx,cs,0,RI);
+ i= loop(bp,0, dy/2 ,dx,cs,0,RI)*2;
+ if( i>=x )
+ if( num_cross(0,dx-1,dy/4,dy/4,bp,cs) < 2 ) Break;
+
+ }
+ if( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2
+ && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
+ && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
+ && get_bw(x1,x1,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~[ */
+
+ x =loop(bp, 0,dy/2,dx,cs,0,RI);
+ i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
+ if( (i<dx/2 || i<3) && hchar && dy>7 )
+ if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
+ && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
+ && loop(bp,dx-1,dy-1-dy/ 8,dx,cs,0,LE)
+ > loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)
+ && loop(bp,dx-1, dy/ 8,dx,cs,0,LE)
+ > loop(bp,dx-1, dy/16,dx,cs,0,LE) ) Break; // ~(
+
+// printf(" hchar=%d i1=%d i2=%d %d\n",hchar,i1-y0,i2-y0,9*dy/16);
+ // ~G without characteristic crotchet
+ if (hchar && dy>15 && dx>7 && i2-y0<9*dy/16 && i1-y0<=dy/4)
+ if ( loop(bp,5*dx/8,i2-y0,dy,cs,0,DO) > 2*dy/8 ){
+ Setac(box1,'G',90);
+ Break;
+ }
+
+ if (hchar){
+ i=1;
+ for(x=dx/2;x<dx-1 && i;x++) // look for @@ (instead +1 use +delta?)
+ for(y= 1;y<dy/4 && i;y++){ // .@
+ if( getpixel(bp,x ,y )>=cs
+ && getpixel(bp,x+1,y )< cs
+ && getpixel(bp,x+1,y-1)< cs
+ && getpixel(bp,x ,y-1)< cs ) { i=0;break; }
+ }
+ if (i) ad=98*ad/100; // ~(
+ if (dy>2*dx) ad=99*ad/100;
+ }
+ if( loop(bp,dx-1,dy/2,dx,cs,0,LE) < 6*dx/8 ) ad=98*ad/100;
+
+ i= loop(bp,dx-1,dy/16,dx,cs,0,LE);
+ j= loop(bp,dx/2,0 ,dy,cs,0,DO);
+ if (i>=dx/2 && j>dy/8 && j>2 && j<dy/2) Break; // t
+
+ if (dy>=3*dx && dy>12) ad=99*ad/100; // (
+ i= loop(bp,dx-1,dy-1,dy,cs,0,UP);
+ j= loop(bp,dx/2,dy-1,dy,cs,0,UP);
+ if (i==0 && j>dy/8) ad=95*ad/100; // <
+ i= loop(bp,dx-1, 0,dy,cs,0,DO);
+ j= loop(bp,dx/2, 0,dy,cs,0,DO);
+ if (i==0 && j>dy/8) ad=95*ad/100; // <
+ if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>= 3*dx/4) ad=98*ad/100; // <
+ if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>=(dx+1)/2) ad=98*ad/100; // <
+ if (loop(bp,0, dy/8,dx,cs,0,RI)>=dx/2) ad=98*ad/100; // <
+
+ if (gchar) ad=98*ad/100; // could happen for 5x7 font
+ bc=((hchar)?'C':'c');
+ Setac(box1,bc,ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_lL(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,i0,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test L ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>4;){ // min 3x4
+ DBG( wchar_t c_ask='L'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ i=loop(bp,dx-1,dy/2,dx,cs,0,LE);
+ if (i<3 && dy>8) {Break;}
+ if (i<dx/2) ad=98*ad/100; // ~G
+
+ if (dx<8 && 3*loop(bp,dx-1,0,dy,cs,0,DO)<=dy) break; // ~G
+ for( i=i1=0,y=y1-dy/4;y<=y1;y++){ // check bottom line (i1)
+ j=loop(box1->p,x0 ,y,dx,cs,0,RI);
+ j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ){ i=j;i1=y; }
+ } if( i<3*dx/4 ) Break; i1=i; // length of horizontal line
+ // line thickness (i2)
+ i=loop(box1->p,x0 ,y0+dy/2,dx,cs,0,RI); if( i>dx/2 ) Break;
+ j=loop(box1->p,x0+i,y0+dy/2,dx,cs,1,RI); if( i+j>dx/2 ) Break; i2=j;
+ if (loop(bp,dx-1, 0,dx,cs,0,LE)<dx/8
+ && loop(bp,dx-1, dy/4,dx,cs,0,LE)>dx/2
+ && loop(bp, 0,5*dy/8,dx,cs,0,RI)<dx/4
+ && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)<dx/4) Break; // ~G
+ for( i=1,y=y0;y<=y1-dy/4 && i;y++){ // check vertical line
+ j=loop(box1->p,x0 ,y,dx,cs,0,RI);
+ if ( j>(dx+2)/4+(y1-dy/4-y)*dx/2/dy ) { i=0; break; }
+ x=loop(box1->p,x0+j,y,dx,cs,1,RI);
+ if( ((x>i2+1 || 4*x<3*i2) && y>y0+dy/8) || 4*x>3*i1 ) i=0;
+ } if( !i ) Break;
+ if( num_cross(0, dx-1-dx/8, dy-1-dy/2, dy-1-dy/2,bp,cs) != 1 ) Break;
+ if( num_cross(0, dx-1 , dy/3 , dy/3,bp,cs) != 1 ) Break;
+ if( num_cross(0, dx-1 , dy/8 , dy/8,bp,cs) != 1 ) Break;
+ if (loop(bp,0,dy-1,dx,cs,0,RI)
+ -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c
+ if (loop(box1->p,x0+dx/4,y1,dy,cs,0,UP)>1+dy/16) ad=99*ad/100; // ~4
+
+ if ( gchar) ad=98*ad/100;
+ if (!hchar) ad=99*ad/100;
+ if (5*dx<2*dy && loop(box1->p,x0,y1,dx,cs,0,RI)>dx/4) ad=99*ad/100; // ~l
+ Setac(box1,'L',ad);
+ break;
+ }
+ // --- test l ---------------------------------------------------
+ // recognize a "l" is a never ending problem, because there are lots of
+ // variants and the char is not very unique (under construction)
+ // --- test italic l ---------------------------------------------------
+ // --- test l ~italic (set flag-italic) --------------------------------
+ // if unsure d should be multiplied by 80..90%
+ for(ad=d=100; dy>dx && dy>5;){ // min 3x4
+ DBG( wchar_t c_ask='l'; )
+ if( box1->dots>0 ) Break;
+ if( num_cross(0, dx-1,dy/2,dy/2,bp,cs) != 1
+ || num_cross(0, dx-1,dy/4,dy/4,bp,cs) != 1 ) Break;
+ // mesure thickness
+ for(i1=0,i2=dx,y=dy/4;y<dy-dy/4;y++){
+ j = loop(bp,0,y,dx,cs,0,RI);
+ j = loop(bp,j,y,dx,cs,1,RI);
+ if( j>i1 ) { i1=j; } // thickest
+ if( j<i2 ) { i2=j; } // thinnest
+ }
+ if ( i1>2*i2 ) Break;
+ if(box1->m3 && dy<=box1->m3-box1->m2) ad=94*ad/100;
+ if( box1->m2-box1->m1>1 && y0>=box1->m2 ) ad=94*ad/100;
+ for(i0=0,i3=0,y=0;y<dy/4;y++){
+ j = loop(bp,0,y,dx,cs,0,RI);
+ if( j>i3 ) { i3=j; } // widest space
+ j = loop(bp,j,y,dx,cs,1,RI);
+ if( j>i0 ) { i0=j;i3=0; } // thickest
+ }
+ if ( i0>4*i2 || 3*i3>2*dx)
+ if ( loop(bp,dx-1,dy-1,dx,cs,0,LE)>3*dx/8
+ || loop(bp, 0,dy-1,dx,cs,0,RI)>3*dx/8) Break; // ~7
+
+ // detect serifs
+ x =loop(bp,0, 0,dx,cs,0,RI);
+ i3=loop(bp,x, 0,dx,cs,0,RI);
+ x =loop(bp,0, 1,dx,cs,0,RI);
+ x =loop(bp,x, 1,dx,cs,0,RI); if(x>i3) i3=x;
+ x =loop(bp,0,dy-1,dx,cs,0,RI);
+ i4=loop(bp,x,dy-1,dx,cs,0,RI);
+ x =loop(bp,0,dy-2,dx,cs,0,RI);
+ x =loop(bp,x,dy-2,dx,cs,0,RI); if(x>i4) i4=x;
+ if( i3>i1+dx/8+1 && i4>i1+dx/8+1 ) Break; // ~I
+
+ for(i=dx,j=0,y=1;y<dy/4;y++){
+ x=loop(bp,dx-1,y,dx,cs,0,LE); if(x>i+1) break; i=x;
+ if( num_cross(0,dx-1,y ,y ,bp,cs)==2
+ && num_cross(0,dx-1,y+1+dy/32,y+1+dy/32,bp,cs)==2 ) j=1;
+ } if ( y<dy/4 ) Break;
+ if(j){ // if loop at the upper end, look also on bottom
+ for(y=3*dy/4;y<dy;y++){
+ if( num_cross(0,dx-1,y ,y ,bp,cs)==2
+ && num_cross(0,dx-1,y-1-dy/32,y-1-dy/32,bp,cs)==2 ) break;
+ } if ( y==dy ) Break;
+ }
+
+ // if( get_bw(x0,x1,y0,y1,p,cs,2) == 0 ) Break; // unsure !I|
+
+ if(dx>3)
+ if( get_bw(dx-1-dx/8,dx-1,0,dy/6,bp,cs,1) != 1 )
+ if( get_bw(dx-1-dx/8,dx-1,0,dy/2,bp,cs,1) == 1 ) Break;
+
+ if( get_bw(dx-1-dx/8,dx-1,dy/4,dy/3,bp,cs,1) != 1 ) // large I ???
+ if( get_bw(0 ,dx/8,dy/4,dy/3,bp,cs,1) != 1 )
+ if( get_bw(dx-1-dx/8,dx-1,0 ,dy/8,bp,cs,1) == 1 )
+ if( get_bw(0 ,dx/8,0 ,dy/8,bp,cs,1) == 1 ) ad=ad*97/100;
+ if( get_bw(dx-1-dx/8,dx-1,dy/2,dy-1,bp,cs,1) != 1 ) // r ???
+ if( get_bw(0 ,dx/8,dy/2,dy-1,bp,cs,1) == 1 )
+ if( get_bw(dx-1-dx/8,dx-1,0 ,dy/3,bp,cs,1) == 1 )
+ if( get_bw(0 ,dx/8,0 ,dy/3,bp,cs,1) == 1 ) Break;
+
+ for( y=1;y<12*dy/16;y++ )
+ if( num_cross(0, dx-1, y , y ,bp,cs) != 1 // sure ?
+ && num_cross(0, dx-1, y-1, y-1,bp,cs) != 1 ) break;
+ if( y<12*dy/16 ) Break;
+
+ if(dx>3){
+ for( y=dy/2;y<dy-1;y++ )
+ if( get_bw(dx/4,dx-1-dx/4,y,y,bp,cs,1) != 1 ) break;
+ if( y<dy-1 ) Break;
+ }
+ // test ob rechte Kante gerade
+ for(x=dx,y=bp->y-1-5*dy/16;y>=dy/5;y--){ // rechts abfallende Kante/Knick?
+ i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE);
+ if( i-2-dx/16>=x ) break;
+ if( i<x ) x=i;
+ }
+ if (y>=dy/5 ) Break;
+
+ // test ob linke Kante gerade
+ for(x=0,y=bp->y-1-dy/5;y>=dy/5;y--){ // rechts abfallende Kante/Knick?
+ i=loop(bp,0,y,x1-x0,cs,0,RI);
+ if( i+2+dx/16<x ) break;
+ if( i>x ) x=i;
+ }
+ if (y>=dy/5 ) Break;
+ if (box1->m4 && y1<box1->m4)
+ if ( get_bw(x0,x1,y1+1,box1->m4+dy/8,box1->p,cs,1) == 1 )
+ ad=ad*97/100; // unsure !l|
+ i=loop(bp,dx-1,dy/16,dx,cs,0,LE);
+ j=loop(bp,dx-1,dy/2 ,dx,cs,0,LE);
+ if( i>3 && j>3 )
+ if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,bp,cs,1) == 1 ) Break; // ~t
+
+ for(y=5*dy/8;y<dy;y++)
+ if( num_cross(0,dx-1,y,y,bp,cs) == 2 ) break;
+ if( y<dy ){
+ i =loop(bp,0,y,dx,cs,0,RI);
+ i+=loop(bp,i,y,dx,cs,1,RI);
+ i+=loop(bp,i,y,dx,cs,0,RI)/2; // middle of v-gap
+ if( num_cross(0,i,5*dy/8,5*dy/8,bp,cs)==0
+ && num_cross(i,i,5*dy/8, y,bp,cs)==0 ) Break; // ~J
+ }
+ if ( dx>8
+ && loop(bp, 0,3*dy/4,dx,cs,0,RI)>=dx/4
+ && loop(bp, 0,7*dy/8,dx,cs,0,RI)<=dx/8
+ && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)<=dx/8
+ && loop(bp,dx-1,7*dy/8,dx,cs,0,LE)<=dx/8 ) Break; // ~J
+
+ if ( 2*i3>5*i1 ) // hmm \tt l can look very similar to 7
+ if ( loop(bp,0,dy/4,dx,cs,0,RI)>dx/2
+ && get_bw(0,dx/8,0,dy/4,bp,cs,1) == 1 ) Break; // ~7
+
+ if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/2
+ && get_bw(3*dx/4,dx-1,3*dy/4,dy-1,bp,cs,1) == 1) {
+ if (loop(bp,0,dy-1,dx,cs,0,RI)<dx/8) ad=99*ad/100; // ~L
+ if(5*dx>2*dy) ad=99*ad/100; // ~L
+ if(5*dx>3*dy) ad=99*ad/100; // ~L
+ }
+ if(!hchar){ // right part (bow) of h is never a l
+ if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1
+ && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break;
+ }
+ if( dx>3 && dy>3*dx )
+ if( loop(bp,dx/4,dy-1 ,dy,cs,0,UP)< dy/4
+ && loop(bp, 0,dy-1-dy/8,dx,cs,0,RI)>=dx/2
+ && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)<=dx/4 ){
+ ad=98*ad/100; // ~]
+ if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)==0 ) Break;
+ }
+
+ for(x=0;x<dx/2;x++)
+ if( get_bw( x, x, 0,dy/4 ,bp,cs,1) == 1 ) break;
+ // works only for perpenticular char
+ if( get_bw( x,x+dx/16, 0,dy/16,bp,cs,1) == 0
+ && get_bw( x,x+dx/16,dy/4 ,dy/2 ,bp,cs,1) == 0
+ && get_bw( x,x+dx/16,dy/16,dy/4 ,bp,cs,1) == 1 ){
+ for(i=dx,y=0;y<dy/4;y++){
+ x=loop(bp,0,y,dx,cs,0,RI);
+ if( x>i ) break;
+ }
+ if( x>=loop(bp,0,y+1,dx,cs,0,RI) )
+ if( loop(bp,0 ,0,dy,cs,0,DO)>1 )
+ if( loop(bp,0 ,0,dy,cs,0,DO)
+ - loop(bp,dx/16+1,0,dy,cs,0,DO) < dx/16+1 ) Break; // ~1 Jul00,Nov00
+ if( num_cross(0,dx/2,y-1,y-1,bp,cs)==2 ) Break; // ~1
+ }
+ if(dx<8 && dy<12){ // screen font
+ i= loop(bp,0,0,dy,cs,0,DO);
+ if( loop(bp,dx/2,1,dy,cs,1,DO)>=dy-2
+ && loop(bp,0,dy/2,dx,cs,0,RI)>=2
+ && i>1 && i<dy/2 ) Break; // ~1
+ }
+ if( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2
+ && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
+ && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
+ && get_bw(x0,x0+dx/4,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~] */
+ i=loop(bp,dx-1,dy/2,dx,cs,0,LE);
+ if( loop(bp, 0,dy/2,dx,cs,0,RI)>=dx/2
+ && (i<dx/2 || i==0) ) ad=98*ad/100; // ~]
+ if( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2
+ && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
+ && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
+ && get_bw(x1-dx/4,x1,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~[ */
+
+ x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~()
+ i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
+ if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
+ && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
+ && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8
+ && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~(
+ if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8
+ && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8
+ && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8
+ && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~)
+
+ i= loop(bp, 0, 0,dy,cs,0,DO); // horizontal line?
+ if(dy>=12 && i>dy/8 && i<dy/2){
+ if( loop(bp,dx-1,3*dy/16,dx,cs,0,LE)-dx/8
+ >loop(bp,dx-1, i,dx,cs,0,LE)
+ || loop(bp,dx-1,3*dy/16,dx,cs,0,LE)-dx/8
+ >loop(bp,dx-1, i+1,dx,cs,0,LE) )
+ if( loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8
+ >loop(bp,dx-1, i,dx,cs,0,LE)
+ || loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8
+ >loop(bp,dx-1, i+1,dx,cs,0,LE) )
+ if( loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8
+ >loop(bp, 0, i,dx,cs,0,RI)
+ || loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8
+ >loop(bp, 0, i+1,dx,cs,0,RI) )
+ if( loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8
+ >loop(bp, 0, i,dx,cs,0,RI)
+ || loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8
+ >loop(bp, 0, i+1,dx,cs,0,RI) ) Break; // ~t
+ if( loop(bp, 0,i-1,dx,cs,0,RI)>1 && dx<6 ) Break; // ~t
+ if( loop(bp, 0,8*dy/16,dx,cs,0,RI)>dx/8
+ && loop(bp, 0, i,dx,cs,1,RI)>=dx-1
+ && loop(bp,dx-1,8*dy/16,dx,cs,0,LE)>dx/8
+ && loop(bp,dx-1, i-1,dx,cs,0,LE)>dx/8 ) Break; // ~t
+ }
+// if( vertical_detected && dx>5 )
+ if( loop(bp,0, 1,dx,cs,0,RI)>=dx/2
+ && ( loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8
+ || loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8 ) )
+ if( ( loop(bp,dx-1, 0,dx,cs,0,LE)<=dx/8
+ || loop(bp,dx-1, 1,dx,cs,0,LE)<=dx/8 )
+ && loop(bp,dx-1,dy-2,dx,cs,0,LE)>=dx/2 ) ad=98*ad/100; // ~/
+
+ if( get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
+
+ if (!hchar || loop(bp,0,dy/4,dx,cs,0,RI)>dx/2){ // ~z
+ i=loop(bp,0,dy/16 ,dx,cs,0,RI);
+ i=loop(bp,i,dy/16 ,dx,cs,1,RI); j=i;
+ i=loop(bp,0,dy/16+1,dx,cs,0,RI);
+ i=loop(bp,i,dy/16+1,dx,cs,1,RI); if (i>j) j=i;
+ i=loop(bp,0,dy/16+2,dx,cs,0,RI);
+ i=loop(bp,i,dy/16+2,dx,cs,1,RI); if (i>j) j=i;
+ if (j*4>=dx*3) ad=98*ad/100; // ~z
+ if (j*8>=dx*7) ad=96*ad/100; // ~z
+ }
+
+ if( get_bw(x0,x0,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
+ if( get_bw(x1,x1,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
+ if (ad==100) ad--; /* I have to fix that:
+ .@@@@.<-
+ @@..@@
+ ....@@
+ ....@@<
+ ...@@.
+ ..@@@.
+ ..@@..
+ .@@...
+ @@....
+ @@@@@@<-
+ */
+ if(!hchar) ad=ad*99/100;
+ if( gchar) ad=ad*99/100;
+ Setac(box1,'l',ad);
+// if( i<100 ) Break; ????
+// if( loop(bp,0, 1,dx,cs,0,RI)<=dx/8
+// && loop(bp,0,dy/2,dx,cs,0,RI)<=dx/8
+// && loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8 ) vertical_detected=1;
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_oO(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+ wchar_t bc=UNKNOWN;
+
+ // --- test o,O ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='o'; )
+ if (sdata->holes.num !=1 ) Break;
+ if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/2 , y1-dy/3,box1->p,cs,1) != 0 ) Break;
+ if (sdata->holes.hole[0].y0 > dy/3
+ || sdata->holes.hole[0].y1 < dy-1-dy/3) Break;
+
+ if( num_cross(x0+dx/2 ,x0+dx/2 ,y0, y1 ,box1->p,cs) != 2
+ && num_cross(x0+dx/2+1,x0+dx/2+1,y0, y1 ,box1->p,cs) != 2 ) Break;
+ if( num_cross(x0+dx/3,x1-dx/4,y0 , y0 ,box1->p,cs) != 1 ) // AND
+ if( num_cross(x0+dx/3,x1-dx/4,y0+1 , y0+1,box1->p,cs) != 1 ) Break;
+ if( num_cross(x0+dx/4,x1-dx/3,y1 , y1 ,box1->p,cs) != 1 ) // against "rauschen"
+ if( num_cross(x0+dx/4,x1-dx/3,y1-1 , y1-1,box1->p,cs) != 1 ) Break;
+ if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
+ if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
+ if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
+ if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
+
+ if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<=
+ loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
+
+ x=loop(bp,dx-1,dy-1-dy/3,x1-x0,cs,0,LE); // should be minimum
+ for( y=dy-1-dy/3;y<dy;y++ ){
+ i=loop(bp,dx-1,y,x1-x0,cs,0,LE);
+ if( i<x ) break; x=i;
+ }
+ if( y<dy ) Break;
+
+ // ~D
+ if( loop(bp,0, dy/16,dx,cs,0,RI)
+ + loop(bp,0,dy-1-dy/16,dx,cs,0,RI)
+ <= 2*loop(bp,0, dy/2 ,dx,cs,0,RI)+dx/8 ) Break; // not konvex
+ if( loop(bp,0 , 1+dy/16,dx,cs,0,RI) + dx/4
+ <= loop(bp,dx-1, 1+dy/16,dx,cs,0,LE) ) Break; // Dec00
+
+ if( loop(bp,dx-1, dy/16,dx,cs,0,LE)>dx/8 )
+ if( loop(bp,0 , dy/16,dx,cs,0,RI)<dx/16 ) Break;
+ if( loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)>dx/8 )
+ if( loop(bp,0 ,dy-1-dy/16,dx,cs,0,RI)<dx/16 ) Break;
+ if( get_bw(x1-dx/32,x1,y0,y0+dy/32,box1->p,cs,1) == 0
+ && get_bw(x1-dx/32,x1,y1-dy/32,y1,box1->p,cs,1) == 0
+// && ( get_bw(x0,x0+dx/32,y0,y0+dy/32,box1->p,cs,1) == 1
+ && ( get_bw(0,dx/32,0,dy/32,bp,cs,1) == 1
+ || get_bw(x0,x0+dx/32,y1-dy/32,y1,box1->p,cs,1) == 1 ) ) Break; // ~D
+
+ // search lowest inner white point
+ for(y=dy,j=x=0;x<dx;x++) {
+ i =loop(bp,x,dy-1 ,y1-y0,cs,0,UP);
+ i+=loop(bp,x,dy-1-i,y1-y0,cs,1,UP);
+ if (i<=y) { y=i; j=x; }
+ } i=y;
+ // italic a
+ for(y=dy-1-i;y<dy-1;y++)
+ if( num_cross(j,dx-1,y,y,bp,cs) > 1 ) ad=99*ad/100; // ~a \it a
+ for(y=0;y<dy-1-i;y++)
+ if( num_cross(0,dx-1,y,y,bp,cs) > 2 ) ad=98*ad/100; // ~a \it a
+ if (loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)<dx/8) ad=98*ad/100; // \it a
+ if (loop(bp,dx-1, 0,x1-x0,cs,0,LE)<dx/8) ad=98*ad/100; // \it a
+ if (loop(bp,dx-1,dy-1-dy/8,x1-x0,cs,0,LE)+1+dx/16
+ <loop(bp, 0,dy-1-dy/8,x1-x0,cs,0,RI))
+ { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // \it a
+ if (loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)+1+(dy+3)/8
+ <loop(bp, 0,dy-1,y1-y0,cs,0,UP))
+ { ad=98*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // \it a
+
+ if (abs(loop(bp,dx/2, 0,dy,cs,0,DO)
+ -loop(bp,dx/2,dy-1,dy,cs,0,UP))>dy/8
+ || num_cross(0,dx-1, 0, 0,bp,cs) > 1
+ || num_cross(0,dx-1,dy-1,dy-1,bp,cs) > 1
+ ) ad=98*ad/100; // ~bq
+
+ if( hchar && 2*y0<box1->m1+box1->m2 ) i=1; else i=0;
+ if (gchar) ad=99*ad/100;
+ bc='o';
+ if( i ){ bc='O'; }
+ if ( bc=='O' && ad>99) ad=99; /* we can never 100% sure, 0O */
+ Setac(box1,bc,ad);
+ if (bc=='O') Setac(box1,'0',ad);
+ if (bc=='o') Setac(box1,'0',98*ad/100);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_pP(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+ wchar_t bc=UNKNOWN;
+
+ // --- test pP ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='p'; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ if( get_bw(0 , dx/2,3*dy/4,3*dy/4,bp,cs,1) != 1 ) Break;
+ if( get_bw(0 , dx/2, dy/2, dy/2,bp,cs,1) < 1 ) Break;
+ if( get_bw(dx/4, dx-1, dy/4, dy/4,bp,cs,1) != 1 ) Break;
+ i= loop(bp,dx-1,3*dy/4,dx,cs,0,LE); if (i<dx/4) Break;
+ if( num_cross(x1-3*i/4,x1-3*i/4, y0, y1-3*dy/16,box1->p,cs) != 2 )
+ if( num_cross(x0+dx/2 ,x0+dx/2 , y0, y1-3*dy/16,box1->p,cs) != 2 )
+ if( num_cross(x0+dx/2+1,x0+dx/2+1, y0, y1-3*dy/16,box1->p,cs) != 2 ) Break;
+ if( num_cross(0,dx-1,7*dy/8 ,7*dy/8 ,bp,cs) != 1 )
+ if( num_cross(0,dx-1,7*dy/8-1,7*dy/8-1,bp,cs) != 1 ) Break;
+ if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
+ if( num_cross(0,dx-1, dy/4-1, dy/4-1,bp,cs) != 3 ) // \it p with nice kurve
+ if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
+ if( num_cross(0,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break;
+
+ i= loop(bp,0,dy/2,dx,cs,0,RI); if(i<1) i++;
+ if( num_cross(i-1,dx-1, dy/4 , dy/4 ,bp,cs) != 2 )
+ if( num_cross(i-1,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break;
+
+ i1= loop(bp, 0,3*dy/8,dx,cs,0,RI); if (i1>=dx/2) ad=90*ad/100;
+ i2=i1+loop(bp,i1,3*dy/8,dx,cs,1,RI); // upper x-position of v line
+ i3= loop(bp, 0,7*dy/8,dx,cs,0,RI);
+ i4=i3+loop(bp,i3,7*dy/8,dx,cs,1,RI); // lower x-position of v line
+ // out_x(box1);printf(" p:");
+ for ( y=dy/8; y<7*dy/8; y++ ){
+ x=i2+ (8*y-3*dy)*(i4-i2)/(4*dy); // right limit of line
+ i= loop(bp,0,y,dx,cs,0,RI); if(i>x+dx/16) break;
+ } if ( y<7*dy/8 ) Break;
+ for ( x=0,j=y=dy/3; y<dy-dy/8; y++ ){ // suche unterkante (also 4x6)
+ i=loop(bp,dx-1,y,dx,cs,0,LE);
+ if ( i>x ) { x=i; j=y; } if(x>dx/2) break;
+ } if ( x<dx/2 || x>=dx) Break;
+ if( get_bw(3*dx/4,dx-1, y , dy-1,bp,cs,1) == 1 ) Break;
+
+ i=num_hole (x0,x1,y0,y1-dy/5,box1->p,cs,NULL);
+ // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL);
+ j=sdata->holes.num;
+
+ if (j!=1 && dx< 8) ad=96*ad/100;
+ if (j!=1 && dx>=8) ad=98*ad/100;
+ if (i==0 && j==0) ad=90*ad/100; /* some times there is a small gap */
+ if (i>1 || j>1 || j>i) Break;
+
+ // check for serif F
+ i= loop(bp,bp->x-1, bp->y/4, dx ,cs,0,LE);
+ i=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE);
+ j= loop(bp,bp->x-1-i,bp->y/4,3*dy/4,cs,0,DO);
+ if (j>dy/2) ad=80*ad/100; // its an serif-F
+
+ if( ((!hchar) && (!gchar)) || (hchar && gchar)) ad=95*ad/100;
+ bc='p';
+ if( hchar && ((!gchar) || dy<14)) bc='P';
+ if ( hchar && gchar) ad=98*ad/100; // \ss sz
+ if ((!hchar) && !gchar) ad=98*ad/100;
+
+ Setac(box1,bc,ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_qQ(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad; /* tmp-vars */
+
+ // --- test Q ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>4;){ // min 3x4
+ DBG( wchar_t c_ask='Q'; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ if( get_bw(x0 ,x0+dx/3,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1-dx/3,x1 ,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2,x0+dx/2,y1-dy/3,y1, box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2,x0+dx/2,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2,x0+dx/2,y0+dy/3,y1-dy/2,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x1 ,x1 ,y0 ,y0 ,box1->p,cs,1) == 1 ) Break; //alpha
+ if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) < 2 ) Break;
+ if( num_cross(x0+dx/5,x1-dx/5,y0 , y0 ,box1->p,cs) != 1 ) // AND
+ if( num_cross(x0+dx/5,x1-dx/5,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
+ if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
+ if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
+ if( get_bw(x1 ,x1 ,y1-dy/8 , y1 ,box1->p,cs,1) == 0 )
+ if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
+ if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
+ // i=num_hole(x0,x1,y0,y1,box1->p,cs,NULL);
+ i=sdata->holes.num;
+ if(!i) Break;
+ if( i!=1 && (i!=2 || num_hole(x0,x1,y0+dy/2,y1,box1->p,cs,NULL)!=1) ) Break;
+ x=x1;y=y1;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x<x1-dx/2 ) Break;
+ turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE);
+ if( x<x1-dx/2 ) { if (gchar) ad=98*ad/100; else ad=90*ad/100; }
+ if( loop(bp,0 ,0 ,dx,cs,0,RI)
+ < loop(bp,0 ,2 ,dx,cs,0,RI) ) Break;
+ if( loop(bp,0 ,dy/8+2,dx,cs,0,RI)
+ +loop(bp,dx-1,dy/8+2,dx,cs,0,LE) > 5*dx/8 ) Break; // ~4 Okt00
+
+ x= loop(bp,dx-1,3*dy/8,dy,cs,0,LE); if( x>dx/4 ) Break;
+ if( loop(bp,dx-1-x,0 ,dy,cs,0,DO)
+ <= loop(bp,dx-2-x,0 ,dy,cs,0,DO) ) Break; // 4
+
+ if( loop(bp,dx-1,dy-2,dx,cs,0,LE)
+ <= loop(bp,dx-1,dy/2,dx,cs,0,LE) )
+ if( loop(bp, 1,dy-1,dy,cs,0,UP)
+ <= loop(bp,dx/2,dy-1,dy,cs,0,UP) )
+ if( loop(bp, 0,dy-2,dx,cs,0,RI)>dx/2 )
+ if( loop(bp, 0, 0,dx,cs,0,RI)>dx/2 ) Break; // 4
+
+ if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE)
+ + loop(bp, 0,3*dy/4,dx,cs,0,RI)
+ < loop(bp,dx-1,2*dy/4,dx,cs,0,LE)
+ + loop(bp, 0,2*dy/4,dx,cs,0,RI) ) ad=94*ad/100; // 4
+ if( loop(bp,0 ,3*dy/4,dx,cs,1,RI) >= dx ) ad=94*ad/100; // 4
+
+
+ if( loop(bp,dx-1,dy/3,dx,cs,0,LE)> dx/4 ) Break;
+ j=loop(bp,dx/2,dy-1,dy,cs,0,UP);
+ if (j>1 && j>dy/8) {
+ if( get_bw(0,dx/2,dy-1-j/2,dy-1-j/2,bp,cs,1) == 1 ) { // ~RA
+ if (j<5) ad=95*ad/100;
+ else Break;
+ }
+ }
+
+ // italic a
+ for(i=0,y=0;y<dy/2;y++)
+ if( num_cross(0,dx-1,y,y,bp,cs) > 2 ) i++; if(i>dy/8) Break; // ~a \it a
+ if (i>0) ad=99*ad/100;
+
+ // ~o look at the lower right side for falling line
+ for(j=x=0,y=dy/2;y<dy;y++){
+ i=loop(bp,dx-1,y,dx,cs,0,LE);if(i>x){ x=i; }
+ if (x-i>j) j=x-i;
+ if( j>dx/16 ) Break; // falling line detected
+ }
+ if (j==0) Break; // no falling line => no Q
+ if (j<=dx/16) ad=98*ad/100;
+ if(y1<=box1->m3) ad=98*ad/100; // ~q no underlength! rare
+ if(!hchar) ad=96*ad/100;
+ Setac(box1,'Q',ad);
+ break;
+ }
+ // --- test q ---------------------------------------------------
+ for(ad=d=100;dx>2 && dy>3;){ // min 3x4
+ DBG( wchar_t c_ask='q'; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ for ( y=y0; 2*y<=y0+y1; y++ ){ // detect ring
+ if( num_cross(x0,x1, y, y,box1->p,cs) == 2 ) Break;
+ } if (2*y>y0+y1) Break; /* < */
+ for ( y=(y0+y1)/2; y<=y1; y++ ){ // detect vert line
+ if( num_cross(x0, x1, y, y,box1->p,cs) == 1
+ && num_cross(x0,x0+dx/2, y, y,box1->p,cs) == 0 ) Break;
+ } if (y>y1) Break; /* O (y==y1 for 4x6font-q) */
+ for ( x=0,j=y=y0+dy/3; y<=y1-dy/8; y++ ){ // detect baseline
+ i=loop(box1->p,x0,y,dx,cs,0,RI);
+ if ( i>x ) { x=i; j=y; }
+ if ( x>dx/2 ) break;
+ } if ( x<dx/2 || x>=dx) Break;
+ if (y1-j+1<dy/4) ad=96*ad/100; // ~\it{a}
+ if( num_cross(x0+x/2,x0+x/2, j, y1,box1->p,cs) != 0 ) ad=96*ad/100; // ~g
+ if( loop(box1->p,x0+dx/16,j,dy,cs,0,UP)<1+dy/16 ){
+ ad=97*ad/100;
+ if (hchar || !gchar) Break; // 4
+ }
+ if( loop(box1->p,x0+dx/16,j-dy/32-1,dy,cs,1,RI)>=dx-dx/8
+ || loop(box1->p,x0+dx/16,j-dy/16-1,dy,cs,1,RI)>=dx-dx/8 ){
+ ad=96*ad/100; // 4
+ }
+ if( get_bw(x1-dx/3, x1, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0, x0+dx/3, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0, x0+dx/4, y1-dy/8, y1-dy/9,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x0, x0+dx/4, y1-dy/5, y1-dy/9,box1->p,cs,1) == 1 ) ad=99*ad/100;
+ if( num_cross(x0+dx/2,x0+dx/2, y0, j ,box1->p,cs) != 2 ) Break;
+ // if( num_hole (x0 ,x1 , y0, y1 ,box1->p,cs,NULL) != 1 )
+ if (sdata->holes.num != 1)
+ { if (dx<16) ad=98*ad/100; else Break; }
+ if( num_hole (x0 ,x1 , y0, j ,box1->p,cs,NULL) != 1 )
+ { if (dx<16) ad=98*ad/100; else Break; }
+ // ~\it g
+ if( loop(bp,0,dy-1-dy/4,dx,cs,0,RI)>5*dx/8
+ && get_bw(dx/4,dx/4,dy-1-dy/4,dy-1,bp,cs,1)==1 ) Break; // ~\it g
+ // what about unsure m1-m4?
+ if(!gchar){ ad=ad*99/100; } // ~4
+ if( hchar){ ad=ad*99/100; } // ~49
+ Setac(box1,'q',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_iIjJ(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar,
+ ax,ay,bx,by,cx,cy,ex,ey,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ad,ya,yb,yc,yd,ye,yf,xa,xb, /* tmp-vars */
+ (*aa)[4]=sdata->aa; /* the for line ends, (x,y,dist^2,vector_idx) */
+
+ // --- test i ---------------------------------------------------
+ // if(box1->dots==1) // what about \it neighbouring ij
+ for(ad=d=100;dy>3 && dx>0;){ // min 3x4 without dot
+ DBG( wchar_t c_ask='i'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ // ToDo: ':' check that high of dot is smaller than the vert. line!
+ /*
+ * o <== ya
+ * o
+ *
+ * ooo <== yb
+ * o
+ * o
+ * o
+ * ooo
+ */
+ ya=y0;
+ if (box1->dots!=1) ad=98*ad/100;
+ while(dy>3*dx && box1->m2){ // test for vertical i without detected dot
+ i= loop(bp,dx/2,dy-1 ,dy,cs,0,UP);
+ if (dy-1-i<box1->m3-2) break;
+ i+=loop(bp,dx/2,dy-1-i,dy,cs,1,UP);
+ // distance upper end to m2 > (m2-m1)/3
+ if (3*abs(dy-1-i-box1->m2)>box1->m2-box1->m1) break;
+ if( get_bw(x0,x1,y0,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 )
+ if( get_bw(x0,x1,y1-i ,y1-i ,box1->p,cs,1) == 0
+ || get_bw(x0,x1,y1-i-1,y1-i-1,box1->p,cs,1) == 0
+ || get_bw(x0,x1,y1-i-2,y1-i-2,box1->p,cs,1) == 0 )
+ {
+ Setac(box1,'i',ad);
+ return 'i'; /* beleave me, thats an "i"! */
+ } break;
+ }
+// if( box1->dots!=1 ) Break;
+ if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1;
+
+// out_x(box1);
+ for (y=ya;2*y<ya+y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
+ if (2*y>=ya+y1) Break; // hmm, gap only, no dot?
+ ya=y;
+ if (box1->m2 && ya>box1->m2+2) Break;
+ for ( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
+ if (2*y>=ya+y1) Break; // hmm no gap
+ for ( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
+ yb=y;
+ if (5*yb>=3*ya+2*y1) ad=99*ad/100; // large gap
+ if (2*yb>= ya+ y1) ad=97*ad/100; // very large gap, ~:
+ if (5*yb>=2*ya+3*y1) Break; // huge gap, ~:
+ if (loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2) // unusual (right part of ouml)
+ ad=95*ad/100;
+
+ // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs));
+ // printf(" dots=%d\n",box1->dots); out_x(box1);
+ // \sl ~f. !
+ for (y=y1;y>ya;y--) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
+ if (y>(ya+3*y1)/4) Break;
+ if (y>(ya+2*y1)/3) ad=96*ad/100;
+
+ y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */
+ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) Break;
+ for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y;
+ for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y;
+ if( yd<3*(y1-yb+1)/4+yb-y0 ) Break;
+ y=(y1-yb+1)/2+yb-y0;
+ for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y;
+ for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y;
+ if( yf>(y1-yb+1)/4+yb-y0 ) Break;
+ if(yd>yc+2){
+ xa=loop(bp, 0,yc-1,dx,cs,0,RI);
+ xb=loop(bp,dx-1,yc-1,dx,cs,0,LE);
+ if(
+ xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */
+ > xa-loop(bp, 0,yc,dx,cs,0,RI) ){
+ y= loop(bp,dx-xb,yc-1,dy,cs,0,DO);
+ if(y>0){
+ i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO);
+ if( i>0 ) y+=i-1;
+ }
+ if( yc-1+y < yd-1 ) Break;
+ } else {
+ y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO);
+ if( yc-1+y < yd-2 ) Break;
+ }
+ }
+ if(yf<ye-2){
+ x=loop(bp,0 ,ye+1,dx,cs,0,RI);
+ y=loop(bp,x-1,ye+1,dy,cs,0,UP);
+ i=loop(bp,x ,ye+2-y,dy,cs,0,UP);
+ if( i>0 ) y+=i-1;
+ if( ye+1-y > yf+1 ) Break;
+ }
+ if( 2*y0 <= box1->m1+box1->m2
+ && loop(bp,0, 0,dx,cs,0,RI)+1
+ < loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100;
+
+ if( gchar ) // i is more often than j, be sure that realy correct Mai00
+ if( loop(bp, 0,2*dy/4,dx,cs,0,RI)
+ -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)>dx/8 ) Break;
+
+ // could be a broken + or similar thing?
+ if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=90*ad/100;
+
+ if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/2
+ && loop(bp,dx-1, dy-1,dx,cs,0,LE)<dx/4 ) Break; // ~d=cl
+
+ // test for é
+ if( dx>5 && num_cross(x0+dx/2,x0+dx/2, ya, y1 ,box1->p,cs) >= 3 )
+ ad=95*ad/100;
+
+ Setac(box1,'i',ad);
+ break;
+ }
+ // --- test j ---------------------------------------------------
+ // if(box1->dots==1) // what about \it neighbouring ij
+ for(ad=d=100;dy>4 && dx>0;){ // min 3x4
+ DBG( wchar_t c_ask='j'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ ya=y0;
+ if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1;
+
+ for(y=ya;2*y<ya+y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
+ if(2*y>=ya+y1) Break; // hmm only gap
+ ya=y;
+ if( box1->m2 && ya>box1->m2+2 ) Break;
+ for( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
+ if(2*y>=ya+y1) Break; // hmm no gap
+ for( ;2*y<y1+ya;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
+ if(2*y>=ya+y1) Break; // hmm very large gap
+ yb=y;
+ if( loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2 ) Break; // unusual (right part of ouml)
+
+ // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs));
+ // printf(" dots=%d\n",box1->dots); out_x(box1);
+ // \sl ~f. !
+ for(y=(ya+y1)/2;y<=y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break;
+ if(y<=y1) Break;
+
+ y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */
+ if( num_cross(0,dx-1,y,y,bp,cs) >2 ) Break;
+ for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y;
+ for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y;
+ if( yd<3*(y1-yb+1)/4+yb-y0 ) Break;
+ y=(y1-yb+1)/2+yb-y0;
+ for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y;
+ for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y;
+ if( yf>(y1-yb+1)/4+yb-y0 ) Break;
+ if(yd>yc+2){
+ xa=loop(bp, 0,yc-1,dx,cs,0,RI);
+ xb=loop(bp,dx-1,yc-1,dx,cs,0,LE);
+ if(
+ xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */
+ > xa-loop(bp, 0,yc,dx,cs,0,RI) ){
+ y= loop(bp,dx-xb,yc-1,dy,cs,0,DO);
+ if(y>0){
+ i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO);
+ if( i>0 ) y+=i-1;
+ }
+ if( yc-1+y < yd-1 ) Break;
+ } else {
+ y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO);
+ if( yc-1+y < yd-2 ) Break;
+ }
+ }
+ if(yf<ye-2){
+ x=loop(bp,0 ,ye+1,dx,cs,0,RI);
+ y=loop(bp,x-1,ye+1,dy,cs,0,UP);
+ i=loop(bp,x ,ye+2-y,dy,cs,0,UP);
+ if( i>0 ) y+=i-1;
+ if( ye+1-y > yf+1 ) Break;
+ }
+ if( 2*y0 <= box1->m1+box1->m2
+ && loop(bp,0, 0,dx,cs,0,RI)+1
+ < loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100;
+ if (loop(bp,0,dy-1,dx,cs,0,RI)
+ -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c
+
+ if( gchar ) // i is more often than j, be sure that realy correct Mai00
+ if( loop(bp, 0,2*dy/4,dx,cs,0,RI)
+ -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)<=dx/8 ) Break;
+ // could be a broken + or similar thing?
+ if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=80*ad/100;
+ if (!gchar) ad=96*ad/100;
+ if( box1->dots!=1 ) ad=98*ad/100;
+
+ Setac(box1,'j',ad);
+
+ break;
+ }
+ // --- test I ---------------------------------------------------
+ for(ad=d=100;dy>4 && dy>dx && 5*dy>4*(box1->m3-box1->m2);){ // min 3x4
+ DBG( wchar_t c_ask='I'; )
+ if( box1->dots==1 ) Break;
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+
+ x =loop(bp,0, dy/2,dx,cs,0,RI); // konvex? divided Q
+ if(loop(bp,0,7*dy/8,dx,cs,0,RI) > x+dx/8) Break;
+ for( y=dy/16;y<dy-1-dy/16;y++ )
+ if( num_cross(0, dx-1, y , y ,bp,cs) != 1 )
+ if( num_cross(0, dx-1, y+dy/16 , y+dy/16 ,bp,cs) != 1 ) break;
+ if( y<dy-1-dy/16 ) Break;
+ x =loop(bp,0, dy/2,dx,cs,0,RI);
+ i5=loop(bp,x, dy/2,dx,cs,1,RI); // center width
+ for(y=dy/4;y<3*dy/4;y++ ){ // same width ?
+ x =loop(bp,0, y,dx,cs,0,RI);
+ x =loop(bp,x, y,dx,cs,1,RI); // width
+ if( abs(x-i5)>1+dx/8 ) break;
+ } if( y<3*dy/4 ) Break;
+ // out_x(box1);
+
+ // upper max width
+ for(i2=i1=0,y=0;y<dy/4;y++ ){
+ x =loop(bp,0, y,dx,cs,0,RI);
+ x =loop(bp,x, y,dx,cs,1,RI); if(x>i1){ i1=x;i2=y; }
+ }
+ for(i4=i3=0,y=3*dy/4;y<dy;y++ ){
+ x =loop(bp,0, y,dx,cs,0,RI);
+ x =loop(bp,x, y,dx,cs,1,RI); if(x>i3){ i3=x;i4=y; }
+ }
+ if( abs(i3-i1)>1+dx/8 ) Break; // if i3>>i5 more sure!
+ if( i1>i5 ){ // look for edges else *80%
+ }
+ if(i1+1<i5 && !hchar) Break; // Jun00
+
+ // calculate upper and lower mass center
+ x =loop(bp,0, dy/8,dx,cs,0,RI); i1=x;
+ x+=loop(bp,x, dy/8,dx,cs,1,RI); i1=(i1+x-1)/2;
+
+ x =loop(bp,0,dy-1-dy/8,dx,cs,0,RI); i2=x;
+ x+=loop(bp,x,dy-1-dy/8,dx,cs,1,RI); i2=(i2+x-1)/2;
+ x =loop(bp,0,dy-2-dy/8,dx,cs,0,RI); i=x;
+ x+=loop(bp,x,dy-2-dy/8,dx,cs,1,RI); i=(i+x-1)/2; if( i>i2 ) i2=i;
+
+ // printf(" get_line(%d,%d) %d\n",i1,i2,
+ // get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100));
+ if( get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100)<95 ) Break;
+ x =(i1-i2+4)/8; i1+=x; i2-=x;
+
+ // upper and lower width (what about serifs?)
+ y=dy/8;
+ x =loop(bp,i1, y+0,dx,cs,1,LE); i=x;
+ x =loop(bp,i1, y+1,dx,cs,1,LE); if(x>i)i=x;
+ x =loop(bp,i1, y+0,dx,cs,1,RI); j=x;
+ x =loop(bp,i1, y+1,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
+ x =loop(bp,i2,dy-y-1,dx,cs,1,LE); j=x;
+ x =loop(bp,i2,dy-y-2,dx,cs,1,LE); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
+ x =loop(bp,i2,dy-y-1,dx,cs,1,RI); j=x;
+ x =loop(bp,i2,dy-y-2,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break;
+
+ if(dy>15) // v024a4
+ if( loop(bp,dx-1,dy/16 ,dx,cs,0,LE)
+ > loop(bp,dx-1,dy/4 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad ) (thinn)
+
+ for(i=0,y=dy/16;y<15*dy/16 && i<2;y++)
+ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
+ if( i>1 ) Break;
+
+ if(!hchar){ // right part (bow) of h is never a l
+ if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1
+ && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break;
+ if( loop(bp, 0,dy/4,dx,cs,0,RI)> dx/4
+ && loop(bp,dx-1,dy/4,dx,cs,0,LE)<=dx/4
+ && loop(bp, 1, 0,dy,cs,0,DO)<=dy/4 ) Break; // ~z
+ }
+
+ if( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2
+ && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2
+ && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2
+ && get_bw(x0,x0,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~] */
+
+ if ( loop(bp,dx-1, dy/4,dx,cs,0,LE) > dx/2
+ && loop(bp,dx-1,3*dy/4,dx,cs,0,LE) > dx/2
+ && loop(bp, 0, dy/2,dx,cs,0,RI) < dx/4 ) Break; /* ~[ */
+
+ x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~()
+ i =loop(bp,dx-1,dy/2,dx,cs,0,LE);
+ if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8
+ && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8
+ && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8
+ && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~(
+ if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8
+ && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8
+ && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8
+ && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~)
+ if( loop(bp, 0, dy/8,dx,cs,0,RI)
+ -(dx-loop(bp,dx-1,7*dy/8,dx,cs,0,LE)) > dx/4 ) Break; // ~/
+ if( loop(bp, 0, 0,dx,cs,0,RI) > dx/2 // ToDo: check for serifs
+ && loop(bp, 0, dy/8,dx,cs,0,RI) > dx/2
+ && loop(bp,dx-1,dy-1 ,dx,cs,0,LE) > dx/2
+ && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) > dx/2 ) ad=99*ad/100; // ~/
+
+ if (box1->m2 && 3*y0>box1->m1+2*box1->m2)
+ if( get_bw(x0+dx/8,x1-dx/8,box1->m1,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 )
+ Break; // ~i
+
+ if(i1+1<i5 && !hchar){ ad=65*ad/100; MSG({}) } // ~ slanted I
+
+ // be sure only for serif
+ i3=loop(bp,dx-1, dy/4,dx,cs,0,LE);
+ i4=loop(bp, 0,dy-1-dy/4,dx,cs,0,RI);
+ if (i3<2 || i4<2
+ || get_bw(x1-i3/4,x1-i3/4,y0,y0+dy/4,box1->p,cs,1) != 1
+ || get_bw(x0+i4/4,x0+i4/4,y1-dy/4,y1,box1->p,cs,1) != 1 )
+ { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // ToDo: improve it
+ if(!hchar){ ad=96*ad/100; MSG({}) } // ~bad_small_r
+ if (box1->m4 && y1<box1->m4) { // probably lower dot?
+ if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1)
+ || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1)) {
+ ad=96*ad/100;
+ }
+ } // ~!
+ // a---b
+ // I
+ // I
+ // c---e
+ // check against Z
+ for(bx=0,ax=dx,ay=by=y=0;y<dy/4;y++){
+ i =loop(bp,dx-1 ,y,dx,cs,0,LE); if (dx-i-1>bx) { bx=dx-1-i; by=y; }
+ i+=loop(bp,dx-1-i,y,dx,cs,1,LE); if (dx-i-1<ax) { ax=dx-i; ay=y; }
+ }
+ for(cx=dx,ex=0,ey=cy=y=dy-1;y>dy-1-dy/4;y--){
+ i =loop(bp,0,y,dx,cs,0,RI); if (i<cx) { cx=i; cy=y; }
+ i+=loop(bp,i,y,dx,cs,1,RI); if (i>ex) { ex=i; ey=y; }
+ }
+ x=(3*ax+cx)/4; y=(3*ay+cy)/4; i= loop(bp,x,y,dx,cs,0,RI);
+ x=(3*bx+ex)/4; y=(3*by+ey)/4; j= loop(bp,x,y,dx,cs,0,LE);
+ if (j>0 && (2*i>3*j || 3*i<2*j )) ad=99*ad/100;
+ if (j>0 && ( i>2*j || 2*i< j )) ad=97*ad/100;
+ i=loop(bp,0,0,dy,cs,0,DO);
+ if (i>dy/8 && i<dy/2) ad=99*ad/100; // ~1
+ if (loop(bp,dx-1,0,dx,cs,0,LE)
+ -loop(bp, 0,0,dx,cs,0,RI)>dx/4) ad=96*ad/100; // ~l 5x7
+
+ if( get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0 ) ad=99*ad/100;
+ if (gchar) ad=98*ad/100; // J
+ if (box1->m3 && 2*y1<=box1->m2+box1->m3) ad=96*ad/100; // '
+
+ Setac(box1,'I',ad);
+ break;
+ }
+ // --- test J --------------------------------------------------- 22Nov06
+ for(ad=d=100;dy>4 && dy>=dx && dx>2;){ // min 3x4 ~Y)]d',
+ // rewritten for vectors 0.42
+ int ld, i1, i2, i3, i4, i5, i6, i7; // line derivation + corners
+ DBG( wchar_t c_ask='J'; )
+ if (sdata->holes.num > 0) Break; /* no hole */
+ /* half distance to the center */
+ d=2*sq(128/4);
+ /* now we check for the upper right end of the J */
+ if (aa[3][2]>d) Break; /* [2] = distance */
+ /* searching for 4 notches between neighbouring ends */
+
+/*
+ type A B
+
+ 6OOOO 6O5
+ 7O5 7O
+ O O
+ O O
+ 2O 1O4 1O4
+ OO 2OO
+ 3 3
+*/
+
+ /* Warning: aa0 can be left upper or left lower point for type B */
+ /* get a point on the inner low left side of the J */
+ i =nearest_frame_vector(box1,aa[3][3],aa[1][3],(x0+x1)/2,y0);
+ i1=nearest_frame_vector(box1,i ,aa[1][3], x1+dx,(y0+3*y1)/4);
+ /* get the most left point on the lower part of the J */
+ i2=nearest_frame_vector(box1,i1,aa[3][3], x0-2*dx, y1-dy/8);
+ /* get a point on the middle of the bottom of the J */
+ i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], (x0+x1)/2, y1);
+ /* get a point on the outer low right side of the J */
+ i4=nearest_frame_vector(box1,aa[1][3],aa[3][3], x1, (y0+2*y1)/3);
+ /* get a point on the outer right side below top serif */
+ i5=nearest_frame_vector(box1,aa[2][3],aa[3][3], (x0+2*x1)/3,y0);
+ /* get a point on the left side of upper serif */
+ i6=nearest_frame_vector(box1,aa[3][3],i1, x0, y0);
+ /* get a point on the most right left side of upper serif */
+ i7=nearest_frame_vector(box1,i6,i1, x1, y0);
+ MSG(fprintf(stderr," i1-i7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);)
+
+ /* check the highest point on lower left area */
+ i =nearest_frame_vector(box1,i1,i3,x0,y0);
+ if (box1->frame_vector[i ][1]-y0<dy/4) Break; // U
+ if (box1->frame_vector[i ][1]-y0<=dy/2) ad=97*ad/100; // imperfect a
+ /* check the lowest point on upper left area, serife? */
+ j =nearest_frame_vector(box1,i6,i7,x0,y1);
+ if (box1->frame_vector[i ][1]
+ -box1->frame_vector[j ][1]<=dy/4) Break; // imperfect a
+ if (box1->frame_vector[i7][1]>y0+dy/4) Break; // not to low
+ if (box1->frame_vector[i1][1]
+ -box1->frame_vector[i7][1]<dy/2) Break;
+ if (box1->frame_vector[i4][1]
+ -box1->frame_vector[i5][1]<dy/2) Break;
+ if (box1->frame_vector[i7][0]<x0+dx/2) Break;
+ if (box1->frame_vector[i1][0]
+ -box1->frame_vector[i2][0]<=dx/8) Break; // ~1
+ if (box1->frame_vector[i1][0]
+ -box1->frame_vector[i2][0]<=dx/4) ad=ad*99/100; // ~1
+ if (box1->frame_vector[i6][1]>y0+dy/8) ad=99*ad/100; // ~1
+ if (aa[0][2]==0) { // ]?
+ ad=99*ad/100;
+ if (aa[1][2]==0) ad=98*ad/100;
+ if (aa[2][2]<=aa[3][2]) ad=97*ad/100;
+ }
+
+ /* check for left bow */
+ for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) {
+ if (box1->frame_vector[ i][0] /* [0]=x */
+ <box1->frame_vector[i1][0]) break; /* curve? */
+ } if (i==i4) Break; // ~I
+ /* check for no right bow */
+ for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) {
+ if (box1->frame_vector[ i][0] /* [0]=x */
+ >box1->frame_vector[i4][0]) break;
+ } if (i!=i4) Break; // ~I
+ /* check for no right bow */
+ for (j=i=i5;i!=i6;i=(i+1)%box1->num_frame_vectors[0]) {
+ if (box1->frame_vector[ i][1] > y0+dy/4) break;
+ } if (i!=i6) Break; // ~Y
+ /* check if upper left and lower left points are joined directly */
+ ld=line_deviation(box1, i7, i1);
+ MSG(fprintf(stderr," i7,i1 %d %d linedist= %d/%d",i7,i1,ld,2*sq(1024/4));)
+ if (ld >2*sq(1024/4)) Break;
+ if (5*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
+ if (6*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
+ if (7*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
+ if (8*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3
+ /* check if lower right and upper right points are joined directly */
+ ld=line_deviation(box1, i4, i5);
+ MSG(fprintf(stderr," i4,i5 %d %d linedist= %d/%d",i4,i5,ld,2*sq(1024/4));)
+ if (ld >2*sq(1024/4)) Break;
+ if (5*ld >4*2*sq(1024/4)) ad=99*ad/100;
+
+ // J exists as gchar and ~gchar
+ if(!hchar){ ad=99*ad/100; }
+ Setac(box1,'J',ad);
+ break;
+ }
+ return box1->c;
+}
+
+static wchar_t ocr0_brackets(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,i1,i2,i3,i4,i5,i6,hchar=sdata->hchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */
+ ad,r1,r2; /* tmp-vars */
+ wchar_t bc=UNKNOWN;
+
+ // --- test > derived from xX ---------------------------------------------------
+ // rewritten for vectors v0.41
+ for(ad=d=100;dx>1 && dy>2;){ // min 3x2
+ // 0 - indizes 0,1,i1,i2 pointing to edges of the char
+ // \ .
+ // \ .
+ // i1,i2
+ // /
+ // /
+ // 1
+ DBG( wchar_t c_ask='>'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if (sdata->holes.num > 0 && (dx<6 || dy<6)) Break; /* # */
+ /* calculate the half distance to the center */
+ d=2*sq(128/4);
+ /* now we check for the 2 left ends of the > */
+ if (aa[0][2]>d) Break; /* upper left end */
+ if (aa[1][2]>d) Break; /* lower left end */
+ if (aa[1][1]-aa[0][1]<dy/2) Break;
+ /* searching for 4 notches between neighbouring ends */
+
+ /* run along left side from top to bottom */
+ for (j=i=aa[0][3];i!=aa[1][3];i=(i+1)%box1->num_frame_vectors[0]) {
+ if (box1->frame_vector[i][0]
+ >=box1->frame_vector[j][0]) j=i; /* notice most right vector */
+ } if (j==i || j==aa[0][3]) Break;
+ /* calculate the distance to the center */
+ x=box1->frame_vector[j][0];
+ y=box1->frame_vector[j][1];
+ if (2*x-aa[0][0]-aa[1][0]<dx) ad=99*ad/100;
+ if (abs(aa[0][1]+aa[1][1]-2*y)>(dy+2)) Break;
+ if ( aa[0][0]+aa[1][0]-2*x>=0) Break;
+ i1=j;
+ d=line_deviation(box1, aa[0][3], j) >sq(1024/4);
+ /* check if upper left and center point are joined directly */
+ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
+ if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
+ MSG(fprintf(stderr,"ad=%d", ad);)
+ d=line_deviation(box1, j, aa[1][3]);
+ /* check if lower left and center point are joined directly */
+ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
+ if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
+ MSG(fprintf(stderr,"ad=%d", ad);)
+
+ /* run along right side from bottom to top */
+ for (j=i=aa[1][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) {
+ if (box1->frame_vector[i][0]
+ >=box1->frame_vector[j][0]) j=i; /* notice most right vector */
+ // MSG(fprintf(stderr,"search right: %d %d %d %d",i,j,aa[1][3],aa[0][3]);)
+ } if (j==i || j==aa[1][3]) Break;
+ /* calculate the distance to the center */
+ x=box1->frame_vector[j][0];
+ y=box1->frame_vector[j][1];
+ if ( (aa[0][0]+aa[1][0]-2*x)>= 0 ) Break;
+ if (abs(aa[0][1]+aa[1][1]-2*y)>(dy+2)/4) Break;
+ if (aa[0][0]>=x || aa[1][0]>=x) Break;
+ i2=j;
+ d=line_deviation(box1, j, aa[0][3]);
+ /* check if upper left and center point are directly joined directly */
+ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
+ if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
+ MSG(fprintf(stderr,"ad=%d", ad);)
+ d=line_deviation(box1, aa[1][3], j);
+ /* check if lower left and center point are directly joined */
+ MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));)
+ if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024);
+ MSG(fprintf(stderr,"ad=%d", ad);)
+
+ /*
+ ToDo: calculate momentums or max derivations
+ along lines to distinguish )]}>
+ i1,i2
+ */
+
+ if (sdata->gchar) ad=98*ad/100;
+ if (sdata->hchar) ad=99*ad/100;
+ bc='>';
+ Setac(box1,bc,ad);
+ break;
+ }
+ // --- test /\\ ------------------------------------------------
+// if(bc==UNKNOWN)
+// if(!box1->dots)
+ for(ad=d=100;dx>3 && dy>3;){ // min 4x4 for 4x6 font
+ DBG( wchar_t c_ask='/'; )
+ if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
+#if 1
+ for(i=y=0;y<dy;y++){
+ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
+ if( loop(bp, 0,y,dx,cs,0,RI)
+ + loop(bp,dx-1,y,dx,cs,0,LE)<3*dx/8 ) break;
+ }
+ if( y<dy ) Break;
+ if ( i>2 || (i>0 && dy<16)) Break;
+#endif
+ /* get the center as exact as possible */
+ i2=dx-1-loop(bp,dx-1,dy/2 ,dx,cs,0,LE) // be exact for small fonts
+ +dx-1-loop(bp,dx-1,dy/2+dy%2-1,dx,cs,0,LE)
+ + loop(bp, 0,dy/2 ,dx,cs,0,RI)
+ + loop(bp, 0,dy/2+dy%2-1,dx,cs,0,RI);
+ if (abs(i2-2*dx)>1+dx/2) Break;
+ if (abs(i2-2*dx)> dx/2) ad=99*ad/100;
+
+ i1=loop(bp,dx-1,dy/16,dx,cs,0,LE); // right side
+ i3=loop(bp,dx-1,dy-1 ,dx,cs,0,LE);
+ i4=loop(bp, 0,0 ,dx,cs,0,RI); // left side
+ i6=loop(bp, 0,dy-1 ,dx,cs,0,RI);
+ i=(box1->m4+box1->m3)/2-box1->m2;
+ //
+ // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6);
+
+ // ~lI
+ for(i=i4,y=0;y<dy;y++){
+ x=loop(bp,0 ,y,dx,cs,0,RI);if(abs(x-i)>dx/6+1 ) break; i=x;
+ } if( y<dy ) Break;
+ for(i=i1,y=0;y<dy;y++){
+ x=loop(bp,dx-1,y,dx,cs,0,LE);if(abs(x-i)>dx/6+1 ) break; i=x;
+ } if( y<dy ) Break;
+ if(i1<=dx/8 && i6<=dx/8 && i4-(dx-i3)>dx/4 ) { Setac(box1,(bc='/'),ad);break; }
+ if(i4<=dx/8 && i3<=dx/8 && i6-(dx-i1)>dx/4 ) { Setac(box1,(bc='\\'),ad);break; }
+ Break;
+ }
+ // --- test ()<> ------------------------------------------------
+// if(bc==UNKNOWN)
+// if(!box1->dots)
+ for(ad=d=100;dx>1 && dy>4;){ // min 3x4
+ DBG( wchar_t c_ask='('; )
+ if (sdata->holes.num > 1) {Break;}; /* tolerant against a tiny hole */
+#if 1
+ for(i=y=0;y<dy;y++){
+ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++;
+ if( loop(bp, 0,y,dx,cs,0,RI)
+ + loop(bp,dx-1,y,dx,cs,0,LE)<3*dx/8 ) break;
+ }
+ if( y<dy ) {Break;};
+ if ( i>2 || (i>0 && dy<16)) {Break;};
+#endif
+ /* look for the extrema => r1..r2 */
+ for(i=dx,r1=r2=y=dy/2-dy/8;y<=dy/2+dy/8;y++){
+ j=loop(bp, 0,y,dx,cs,0,RI); if(j==i) r2=y; if(j<i){ r2=r1=y; i=j; }
+ j=loop(bp,dx-1,y,dx,cs,0,LE); if(j==i) r2=y; if(j<i){ r2=r1=y; i=j; }
+ } y=(r1+r2)/2;
+ i1=loop(bp,dx-1, dy/16,dx,cs,0,LE);
+ i2=loop(bp,dx-1,y ,dx,cs,0,LE);
+ i3=loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE);
+ i4=loop(bp, 0,dy/16 ,dx,cs,0,RI);
+ i5=loop(bp, 0,y ,dx,cs,0,RI);
+ i6=loop(bp, 0,dy-1-dy/16,dx,cs,0,RI);
+ if(dx>dy){
+// from Aug06 vector-version of greater is used
+// if(i2==0 && 3*i5>dx && i4<=dx/8 && i6<=dx/8) { Setac(box1,(bc='>'),98);{Break;}; }
+ if(i5==0 && 3*i2>dx && i1<=dx/8 && i3<=dx/8) { Setac(box1,(bc='<'),98);{Break;}; }
+ }
+ if( dx > 2 && 9*dx>=5*dy ){ // 4x6 screen-font (3*5)
+ ad=98;
+ if (dx<8) ad=99*ad/100;
+ if (dx<6) ad=96*ad/100;
+ if( 2*dx > JOB->res.avX && 4*dx>dy ) ad=98;
+// printf(" %d %d %d %d %d %d\n",i5,i1,i3,i2,i4,i6);
+ if( i5==0 && i1<=dx/8+1 && i3<=dx/8+1 && i1+i3<=dx/8+1
+ && i2>=dx/2 && i4>=3*dx/4 && i6>=3*dx/4 ) {
+ if (2*loop(bp, 0, y/2,dx,cs,0,RI)+1+dx/16<i4+i5) ad=95*ad/100;
+ if (2*loop(bp, 0,dy-1-y/2,dx,cs,0,RI)+1+dx/16<i6+i5) ad=95*ad/100;
+ Setac(box1,(bc='<'),ad);{Break;};
+ }
+/* obsolete code Aug06, will be removed if new code is stable
+ if( i2==0 && i4<=dx/8 && i6<=dx/8
+ && i5>=dx/2 && i1>=3*dx/4 && i3>=3*dx/4 ) {
+ if (2*loop(bp,dx-1, y/2,dx,cs,0,LE)+1+dx/16<i1+i2) ad=95*ad/100;
+ if (2*loop(bp,dx-1,dy-1-y/2,dx,cs,0,LE)+1+dx/16<i3+i2) ad=95*ad/100;
+ Setac(box1,(bc='>'),ad);{Break;};
+ }
+*/
+ }
+
+ i1=loop(bp,dx-1,dy/16,dx,cs,0,LE);
+ i2=loop(bp,dx-1,dy/2 ,dx,cs,0,LE);
+ i3=loop(bp,dx-1,dy-1 ,dx,cs,0,LE);
+ i4=loop(bp, 0,0 ,dx,cs,0,RI);
+ i5=loop(bp, 0,dy/2,dx,cs,0,RI);
+ i6=loop(bp, 0,dy-1,dx,cs,0,RI);
+ i=(box1->m4+box1->m3)/2-box1->m2;
+ //
+ // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6);
+ if(2*i2<i1+i3 && 2*i5>i4+i6 && 2*dx<dy && dy>=i){
+ Setac(box1,(bc=')'),98);break; }
+ if(2*i2>i1+i3 && 2*i5<i4+i6 && 2*dx<dy && dy>=i){
+ if(2*i2<=i1+i3+1 || 2*i5>=i4+i6-1) ad=98*ad/100;
+ if(2*i2<=i1+i3+2 || 2*i5>=i4+i6-2) ad=98*ad/100;
+ for(x=y=0;y<dy/4;y++){
+ i=loop(bp,0,y,dx,cs,0,RI);if( i>x ) x=i;
+ }
+ for(y=0;y<(dy+2)/4;y++){
+ i=loop(bp,0,y+dy/8,dx,cs,0,RI);if( i<x ) break;
+ }
+ if( y==(dy+2)/4 ) {Break;}; // ~l (left upper side must be convex) Jul00
+ Setac(box1,(bc='('),ad); break;
+ }
+ Break;
+ }
+ // --------- test [] --------------------------------
+ for(ad=d=98;dx>2 && dy>4 && dy>=2*dx;){ // (3,6) on 4x6 font
+ DBG( wchar_t c_ask=']'; )
+ if (sdata->holes.num > 1) { Break;} /* tolerant against a tiny hole */
+ if (!hchar) ad=97*ad/100;
+ for(y=0;y<dy;y++){
+ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
+ } if (y<dy) {Break;};
+ if( get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) == 2
+ && get_bw(x0,x1,y0+1,y0+1,box1->p,cs,2) == 2 ) {Break;};
+ if( get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) == 2
+ && get_bw(x0,x1,y1-1,y1-1,box1->p,cs,2) == 2 ) {Break;};
+ if( get_bw(x0 ,x0,y0 ,y1 ,box1->p,cs,2) == 0
+ || get_bw(x0+1 ,x0+1,y0 ,y1 ,box1->p,cs,2) == 0 )
+ if( get_bw(x0+dx/2,x1,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 )
+ { Setac(box1,(bc='['),ad);break; }
+ if( get_bw(x1 ,x1,y0 ,y1 ,box1->p,cs,2) == 0
+ || get_bw(x1-1 ,x1-1,y0 ,y1 ,box1->p,cs,2) == 0 )
+ if( get_bw(x0,x1-dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 )
+ { Setac(box1,(bc=']'),ad);break; }
+ break;
+ }
+
+#if CODE_NOT_COMPLETED
+ // --- test ] -------
+ for(ad=d=100;dx>2 && dy>3;){
+ DBG( wchar_t c_ask=']'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if (sdata->holes.num > 0) ad=98*ad/100; /* # */
+ /* 1/8 distance to the center */
+ d=2*sq(128/16);
+ /* now we check for the 4 ends of the x */
+ if (aa[0][2]>d) Break;
+ if (aa[1][2]>d) Break;
+ if (aa[2][2]>d) Break;
+ if (aa[3][2]>d) Break;
+ if (aa[3][0]-aa[0][0]<7*dx/8) Break;
+ if (aa[2][0]-aa[1][0]<7*dx/8) Break;
+ if (aa[1][1]-aa[0][1]<7*dy/8) Break;
+ if (aa[2][1]-aa[3][1]<7*dy/8) Break;
+ if (aa[3][0]-aa[0][0]<2) Break; /* to small */
+ if (aa[2][0]-aa[1][0]<2) Break; /* to small */
+ MSG( fprintf(stderr," aa %d %d %d %d %d %d %d %d d %d %d %d %d",\
+ aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,\
+ aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,\
+ aa[0][2],aa[1][2],aa[2][2],aa[3][2]);)
+ /* left and right vertical line */
+ d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break;
+ ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100;
+ d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break;
+
+ /* search uppermost left ^ */
+ i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0, y0);
+ x=box1->frame_vector[i1][0];
+ y=box1->frame_vector[i1][1];
+ if (y-y0 > 5*dy/8) Break;
+ if (x-x0 > 5*dx/8) Break;
+ /* search uppermost right ^ ~H */
+ i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0);
+ if ( box1->frame_vector[i3][0]-x> dx/4
+ && box1->frame_vector[i3][1]-y<=dy/8) Break;
+
+ /* check if upper left and lower right point are joined directly */
+ dbg[0]=d=line_deviation(box1,i1, aa[2][3]); if (d >2*sq(1024/4)) Break;
+ /* check if lower left and lower left point are joined directly */
+ dbg[1]=d=line_deviation(box1, aa[1][3],i1); if (d >2*sq(1024/4)) Break;
+
+ if (!hchar) ad=99*ad/100;
+ if ( gchar) ad=98*ad/100; // \sc N
+ ac=(wchar_t) ']';
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+ }
+#endif
+ // --------- test ocr-a-[] --------------------------------
+ if(bc==UNKNOWN)
+ for(ad=d=98;dx>5 && dy>7 && 2*dy>3*dx;){ // only for accurate font at the moment
+ DBG( wchar_t c_ask='['; )
+ if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
+ if (!hchar) ad=97*ad/100;
+ if( num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) break;
+ if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break;
+ if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)
+ +loop(bp, 0,dy/2,dx,cs,0,RI) <= dx/4 ) break; // O
+ for(y=dy/8;y<dy-dy/8;y++){
+ if( num_cross(0,dx,y,y,bp,cs) != 2 ) break;
+ } if (y<dy-dy/8) break;
+ if( get_bw((3*x0+5*x1)/8,x1,y0+3*dy/16,y1-3*dy/16,box1->p,cs,1) == 0)
+ { Setac(box1,(bc='['),ad);break; }
+ if( get_bw(x0,(5*x0+3*x1)/8,y0+3*dy/16,y1-3*dy/16,box1->p,cs,1) == 0)
+ { Setac(box1,(bc=']'),ad);break; }
+ break;
+ }
+ // --------- test {} --------------------------------
+ for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){
+ DBG( wchar_t c_ask='{'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if (!hchar) ad=97*ad/100;
+ for(y=0;y<dy;y++){
+ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
+ } if (y<dy) Break;
+ for(x=0;x<dx/2;x++){
+ if( num_cross(dx-1-x,dx-1-x,0,dy-1,bp,cs) != 2 ) break;
+ } if (y<dx/2) Break;
+ if ( num_cross(dx-1,dx-1,dy/4,dy-1-dy/4,bp,cs) != 0 ) Break;
+ if ( num_cross( 0, 0,dy/4,dy-1-dy/4,bp,cs) != 1 ) Break;
+ if ( loop(bp,0,dy-1,dx,cs,0,RI)>3*dx/4 ) ad=99*ad/100;
+ if ( loop(bp,0, 0,dx,cs,0,RI)>3*dx/4 ) ad=99*ad/100; // <
+ if ( loop(bp,0, 0,dy,cs,0,DO)<dy/2-1 ) ad=98*ad/100;
+ if ( loop(bp,0,dy-1,dy,cs,0,UP)<dy/2-2 ) ad=98*ad/100; // (
+ if ( loop(bp,dx-1,0,dx,cs,0,LE)
+ + loop(bp,dx-1,2,dx,cs,0,LE)
+ - 2*loop(bp,dx-1,1,dx,cs,0,LE) >=dx/8 ) ad=98*ad/100; // <
+ if ( loop(bp,dx-2,dy-1,dy,cs,0,UP)>dy/4 ) Break; // f
+ if ( get_bw(x0,x0,y0,y0+dy/4,box1->p,cs,1) == 1
+ || get_bw(x0,x0,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break;
+ Setac(box1,(bc='{'),ad);Break;
+ }
+ for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){
+ DBG( wchar_t c_ask='}'; )
+ if (!hchar) ad=97*ad/100;
+ for(y=0;y<dy;y++){
+ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break;
+ } if (y<dy) Break;
+ for(x=0;x<dx/2;x++){
+ if( num_cross(x,x,0,dy-1,bp,cs) != 2 ) break;
+ } if (y<dx/2) Break;
+ if ( num_cross( 0, 0,dy/4,dy-1-dy/4,bp,cs) != 0 ) Break;
+ if ( num_cross(dx-1,dx-1,dy/4,dy-1-dy/4,bp,cs) != 1 ) Break;
+ if ( loop(bp,dx-1,dy-1,dx,cs,0,LE)>3*dx/4 ) {ad=99*ad/100;}
+ if ( loop(bp,dx-1, 0,dx,cs,0,LE)>3*dx/4 ) {ad=99*ad/100;} // >
+ if ( loop(bp,dx-1, 0,dy,cs,0,DO)<dy/2-1 ) {ad=98*ad/100;}
+ if ( loop(bp,dx-1,dy-1,dy,cs,0,UP)<dy/2-2 ) {ad=98*ad/100;} // )
+ if ( loop(bp,0,0,dx,cs,0,RI)
+ + loop(bp,0,2,dx,cs,0,RI)
+ - 2*loop(bp,0,1,dx,cs,0,RI) >=dx/8 ) ad=98*ad/100; // <
+ if ( loop(bp,1,dy-1,dy,cs,0,UP)>dy/4 ) Break; // ???
+ if ( get_bw(x1,x1,y0,y0+dy/4,box1->p,cs,1) == 1
+ || get_bw(x1,x1,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break;
+ Setac(box1,(bc='}'),ad);Break;
+ }
+ return box1->c;
+}
+
+#if 0
+/* ---------- empty prototype function for copy and expand ---------- */
+static wchar_t ocr0_XXX(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,i0,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar,
+ x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ ac,ad; /* tmp-vars */
+
+ // --- test XXX ---------------------------------------------------
+ return box1->c;
+}
+#endif
+
+
+/* ----------------------- part9 -------------------------------- */
+static wchar_t ocr0p9(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ i1,i2,i3,i4; /* tmp-vars */
+ int xa,xb, /* used for store significant points of char */
+ dbg[9]={0,0,0,0,0,0,0,0,0}, /* debugging space */
+ ya,ad,cs=sdata->cs;
+ wchar_t ac,bc=UNKNOWN; // bestletter
+ int hchar; // char is higher than e
+ int gchar; // char has ink lower than m3
+ // --- hchar --- gchar -------------------------
+ hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
+ gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1;
+ // if the char is slightly moved down correction can be done
+ if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
+ if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
+
+ /* reserved for the future */
+ // --- test beta,\3,sz,"s ---------------------------------------------
+ if(bc==UNKNOWN && hchar)
+ for(ad=d=100;dx>3 && dy>6;){ // min 4x7
+ DBG( wchar_t c_ask='S'; )
+ if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
+ /* this part is provisorium, should be changed!
+ a-\
+ | d
+ b| /
+ | \
+ -c /
+ */
+ if( num_cross(x0 ,x1 ,y0+dy/4 ,y0+dy/4 ,box1->p,cs) != 2
+ && num_cross(x0 ,x1 ,y0+dy/4+1,y0+dy/4+1,box1->p,cs) != 2 ) break;
+ for(i=1+dy/16,y=y0+dy/8;y<y1-dy/4 && i>0;y++){
+ if( y<y1-6*dy/16 ){ if( num_cross(x0 ,x1 ,y,y,box1->p,cs) != 2 ) i--;}
+ else { if( num_cross(x0 ,x1 ,y,y,box1->p,cs) < 2 ) i--;}
+ if( get_bw(x0,x0+dx/2,y,y,box1->p,cs,1) == 0 ) i--;
+ if( y<y1-5*dy/16 )
+ if( get_bw(x1-dx/2,x1,y,y,box1->p,cs,1) == 0 ) i--;
+ } if( i<=0 ) break;
+ // out_x(box1);
+
+ for(y=y0+dy/3;y<y1-dy/3;y++){
+ i =loop(box1->p,x1,y,dx,cs,0,LE);
+ if( i>=dx/8 ) break;
+ i+=loop(box1->p,x1-i,y,dx,cs,1,LE);
+ if( i>=dx/2 ) break;
+ } if( y>=y1-dy/3 ) break;
+
+ for(y=y0+dy/5;y<y0+dy/3;y++)
+ if( get_bw(x1-dx/6,x1,y,y,box1->p,cs,1) == 1 ) break;
+ if( y>=y0+dy/3 ) break;
+
+ for(y=y0+dy/2;y<y1;y++)
+ if( get_bw(x1-dx/6,x1,y,y,box1->p,cs,1) == 1 ) break;
+ if( y>=y1 ) break;
+
+ for(y=y1-dy/3;y<y1-dy/8;y++){
+ i=loop(box1->p,x1,y,dx,cs,0,LE);
+ if( i>dx/4
+ && get_bw(x1-dx/8,x1-dx/8,y,y1,box1->p,cs,1) == 1 ) break;
+ } if( y<y1-dy/8 ) break; // ~Q
+
+ if( box1->m3==0 || 2*y1<box1->m3+box1->m4 )
+ if( loop(box1->p,x1,y1, dx,cs,0,LE)==0
+ && loop(box1->p,x1,y1-dy/4,dx,cs,0,LE)>dx/8 ) break; // ~R
+
+
+ for(x=x0+dx/4;x<x1-dx/4;x++)
+ if( num_cross(x,x,y0,y1,box1->p,cs) == 3 ) break;
+ if( x>=x1-dx/4 ) break;
+
+ i=loop(bp,dx/2,dy-1,dy,cs,0,UP)+dy/64; // Jul00
+ for(x=dx/5;x<dx/2;x++)
+ if( loop(bp,x,dy-1,dy,cs,0,UP) > i ) break;
+ if( x==dx/2 ) break;
+
+ x=x0+loop(bp,0,dy/4,dx,cs,0,RI);
+ for(;x<x1-dx/3;x++)
+ if( get_bw(x,x,y0,y0+dy/4,box1->p,cs,1) == 0 ) break;
+ if( x<x1-dx/3 ) break;
+
+ if( !gchar )
+ // if( num_hole( x0, x1, y0, y1,box1->p,cs,NULL) != 0 ) break;
+ if (sdata->holes.num != 0) break;
+
+ bc=LATIN_SMALL_LETTER_SHARP_S;
+ Setac(box1,(wchar_t)bc,98);
+ break;
+ }
+ // --- test + ------------------------------------------------
+ for(ad=d=100;dx>2 && dy>2;){ // min 3x3
+ DBG( wchar_t c_ask='+'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ xa=(dx+1)/3-1; ya=(dy+1)/3-1;
+ xb=(dx+1)/4;
+ if( get_bw(x0,x0+xa,y0,y0+ya,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x0,x0+xa,y1-ya,y1,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x1-xb,x1,y0,y0+ya,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x1-xa,x1,y1-ya,y1,box1->p,cs,1) == 1 ) Break;
+ for(i=0,y=y0+ya;y<=y1-ya;y++){ // horizontal line
+ if( get_bw(x0+dx/9,x1-dx/9,y,y,box1->p,cs,2) == 0 ) { i=y; break; }
+ }
+ if (3*dx<2*dy) ad=99*ad/100; // ~t
+ if( !i ) Break;
+ ac=(wchar_t) '+';
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+ }
+ // --- test $ ------------------------------------------------
+ for(ad=d=99;dx>3 && dy>5;){ // min 3x4
+ DBG( wchar_t c_ask='$'; )
+ if (sdata->holes.num != 2) Break;
+
+ if( get_bw(x0,x0+dx/5,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x0,x0+dx/9,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x1-dx/9,x1,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x1-dx/5,x1,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break;
+ if( get_bw(x0,x0+dx/3,y0+dy/3 ,y0+dy/2 ,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1-dx/3,x1,y1-dy/2 ,y1-dy/3 ,box1->p,cs,1) != 1 ) Break;
+ i1=x0+loop(box1->p,x0,y0,dx,cs,0,RI); if( i1<x0+dx/3 || i1>x1-dx/5 ) Break;
+ i2=x0+loop(box1->p,x0,y1,dx,cs,0,RI); if( i2<x0+dx/5 || i2>i1 ) Break;
+ ad= get_line2(i1,y0,i2,y1,box1->p,cs,100)*ad/100;
+ // check upper left and lower right half circle, $
+ for (x=0,i3=y=0;y<dy/3;y++)
+ if( num_cross(x0,x1,y0+dy/2-y,y0+dy/2-y,box1->p,cs) == 2 ) {
+ i = loop(box1->p,x0,y0+dy/2-y,dx,cs,0,RI);
+ if (i>x) { x=i; i3=y0+dy/2-y; }
+ } if (x<=dx/4) Break;
+ for (x=0,i4=y=0;y<dy/3;y++)
+ if( num_cross(x0,x1,y0+dy/2+y,y0+dy/2+y,box1->p,cs) == 2 ) {
+ i = loop(box1->p,x0,y0+dy/2+y,dx,cs,0,RI);
+ if (i>x) { x=i; i4=y0+dy/2+y; }
+ } if (x<=dx/4) Break;
+ if (ad<95) Break;
+ ac=(wchar_t) '$';
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+ }
+ // --- test & ------------------------------------------------
+ for(ad=d=99;dx>3 && dy>4;){ /* 4x6 font */
+ DBG( wchar_t c_ask='&'; )
+ if (sdata->holes.num != 2) Break;
+ if( get_bw(x1-dx/9,x1,y0,y0+dy/4,box1->p,cs,1) == 1 ) Break; // g
+ if( loop(bp,dx/2,0,dy,cs,0,DO)>dy/2) Break;
+ i1=loop(bp,0,dy/8 ,dx,cs,0,RI); if (i1>dx/2) Break;
+ i =loop(bp,0,dy/4 ,dx,cs,0,RI); if (i1>dx/2) Break; if (i<i1) i1=i;
+ i3=loop(bp,0,dy-dy/4 ,dx,cs,0,RI); if (i3>dx/2) Break;
+ i =loop(bp,0,dy-dy/4-1,dx,cs,0,RI); if (i3>dx/2) Break; if (i<i3) i3=i;
+ if (i3>i1) Break;
+ for( i2=0, y=dy/4; y<=dy/2+1; y++ ){
+ i =loop(bp,0,y,dx,cs,0,RI); if( i>i2 ) i2=i;
+ }
+ if(2*i2-i1-i3<1) Break;
+ // if( num_hole(x0,x1 ,y0,y1,box1->p,cs,NULL)!=2 ) Break;
+ if( num_hole(x0,x1-dx/4,y0,y1,box1->p,cs,NULL)!=2 ) Break;
+ if( num_cross(dx-1,dx-1,dy/4,dy-1,bp,cs) < 1 ) Break;
+ for( x=dx-1; x>=dx/2; x-- ){
+ if( num_cross(x,x,dy/4,dy-1,bp,cs) > 1 ) break;
+ } if( x<=3*dx/4 && x<dx-2) Break;
+ if( num_cross(0,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs) > 3 ) { // glued ah
+ if (dy>15) { Break; } else ad=96*ad/100;
+ }
+ if (!hchar) ad=98*ad/100;
+ bc=(wchar_t) '&';
+ Setac(box1,bc,ad);
+ if (ad>=100) return bc;
+ break;
+ }
+ // --- test \it & like \epsilon\tau ------------------------------
+ if(bc==UNKNOWN)
+ for(ad=d=100;dx>7 && dy>7;){
+ DBG( wchar_t c_ask='&'; )
+ if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
+ if( num_cross(0,dx-1, dy/4, dy/4,bp,cs) != 3 ) break;
+ if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 4 ) break;
+ if( num_cross(dx/2,dx-1,dy/2, dy/2,bp,cs) != 2 ) break;
+ if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs) != 2 ) break;
+ if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1 ) break;
+ if( num_cross( 0, 0,0,dy-1,bp,cs) != 1 ) break;
+ if( num_cross( dx/3, dx/3,0,dy-1,bp,cs) != 4 ) break;
+ if( num_cross(13*dx/16,13*dx/16,0,dy/8,bp,cs) != 0 ) break;
+ if( num_cross(4*dx/8,4*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
+ if( num_cross(3*dx/8,3*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
+ if( num_cross(5*dx/8,5*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break;
+ if( num_hole(x0 ,(x0+x1)/2,y0, y1,box1->p,cs,NULL) != 1 ) break;
+ if( num_hole(x0+dx/8,x1-dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) break;
+ ac=(wchar_t) '&';
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+ }
+ // --- test ? ---------------------------------------------------
+ for(ad=d=98;dx>2 && dy>5;){ // min 3x(4+2)
+ DBG( wchar_t c_ask='?'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ if ( num_cross(x0, x1, y0, y0, box1->p, cs) !=1 ) Break; // ~?
+ if ( num_cross(x0, x1, y1, y1, box1->p, cs) > 1 ) Break; // ~?
+ for(y=y0;y<y1;y++) // new y1
+ if( get_bw(x0, x1, y, y,box1->p,cs,1) != 1 ) break; // lower end
+ if (2*y<y0+y1) Break;
+ i1=y1;
+ if (y==y1 && box1->m4) { // probably lower dot not catched in box?
+ if (get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) != 1 ) Break;
+ i1=box1->m4;
+ for(;i1>y1;i1--) // new y1
+ if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot
+ }
+ y--; i=y-y0+1; // new dy
+ for (y=0;y<dy/2;y++)
+ if( num_cross(x0, x1, y0+y, y0+y, box1->p, cs) == 2 ) break;
+ if (y==dy/2) Break;
+ // if( num_hole( x0, x1, y0, y1, box1->p,cs,NULL) > 0 ) Break;
+ if (sdata->holes.num > 0) Break;
+ for(y=y0+dy/2;y<=i1;y++)
+ if( get_bw(x0,x1,y,y,box1->p,cs,1) == 0 ) break;
+ if( y==i1 ) Break;
+ for( ;y<=i1;y++)
+ if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break;
+ if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+7*dx/8,x1,y,i1,box1->p,cs,1) == 1 ) Break; // broken thin 2
+ bc='?';
+ Setac(box1,(wchar_t)bc,98);
+ return bc;
+ }
+ // --- test !| ---------------------------------------------------
+ for(ad=d=99; dy>4 && dy>2*dx;){ // min 3x4
+ DBG( wchar_t c_ask='!'; )
+ if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */
+ // measure thickness
+ if (num_cross(x0,x1,y0 ,y0 ,box1->p,cs)!=1) Break;
+ if (num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs)!=1) Break;
+ for(y=y0;y<y1;y++) // new y1
+ if( get_bw(x0, x1, y, y,box1->p,cs,1) != 1 ) break; // lower end
+ if (2*y<y0+y1) Break;
+ if (y==y1 && y>box1->m3-dy/8) ad=ad*97/100; /* missing dot? */
+ i1=y1;
+ if (y==y1 && box1->m4) { // probably lower dot not catched in box?
+ if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1)
+ || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1 )) {
+ i1=box1->m4;
+ for(;i1>y1;i1--) // new y1
+ if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot
+ }
+ } i2=i1;
+ for( i1=0,y=y0;y<=i2;y++){
+ i=num_cross(x0,x1,y,y,box1->p,cs); if(i>1) break;
+ if(i==0 && i1==0) i1=y;
+ } if(y<=i2 || i1==0 || i1<y0+dy/2) Break;
+
+ if( loop(bp,dx-1,dy/8,dx,cs,0,LE)
+ -loop(bp,dx-1, 0,dx,cs,0,LE)>dx/4+1 ) Break; // f
+
+ if (!hchar) ad=96*ad/100;
+ Setac(box1,(wchar_t)'!',ad);
+ break;
+ }
+ // --- test * five egdes (jagges? beames?) what is the right english word? ----
+ for(ad=d=99;dx>2 && dy>4;){
+ DBG( wchar_t c_ask='*'; )
+ if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
+ if( num_cross(0,dx-1, 0,dy-1,bp,cs) != 1
+ && num_cross(0,dx-1, 1,dy-2,bp,cs) != 1 ) Break;
+ if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 2
+ && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 2 ) Break;
+ x=dx/2;y=(6*dy+8)/16; // center point 6/8=6/2^3 rounded
+ /* upwarts from center */
+ dbg[0]=i=get_line2(x,y,x ,0,bp,cs,100); if(i<95) Break;
+ if (dx<8) /* be exact on small fonts, where get_line2 returns 100 (ToDo change) */
+ if (get_bw(x,x,0,y,bp,cs,2)==2) Break;
+ /* horizontal */
+ dbg[1]=i=get_line2(0,y,dx-1,y,bp,cs,100); if(i<95) Break;
+ if (dy<8)
+ if (get_bw(0,dx-1,y ,y ,bp,cs,2)==2
+ && get_bw(0,dx-1,y+1,y+1,bp,cs,2)==2) Break;
+ /* down (right) */
+ i=get_line2(x,y,(5*dx+4)/8,dy-1,bp,cs,100);
+ j=get_line2(x,y,(6*dx+4)/8,dy-1,bp,cs,100); if(j>i) dbg[2]=i=j;
+ if(i<95) Break;
+ /* down (left) */
+ dbg[3]=i=get_line2(x, y,(2*dx+4)/8,dy-1,bp,cs,100); if(i<95) Break; // straight up
+ /* check for lower gap at bottom */
+ dbg[4]=i=get_bw( x, x,dy-1-dy/8,dy-1,bp,cs,1); if(i==1) Break;
+ dbg[5]=i=get_line2( dx/4,dy/4, 0,0,bp,cs,101); if(i<95) Break; // upper left gap
+ dbg[6]=i=get_line2(dx-1-dx/4,dy/4,dx-1,0,bp,cs,101); if(i<95) Break; // upper right gap
+ MSG(fprintf(stderr,"%d %d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5],dbg[6]);)
+ Setac(box1,(wchar_t)'*',ad);
+ break;
+ }
+ // --- test * six egdes (jagges? beames?) what is the right english word? ----
+ for(ad=d=100;dx>4 && dy>4;){
+ DBG( wchar_t c_ask='*'; )
+ if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */
+ if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 3
+ && num_cross(0,dx-1, 1+dy/8, 1+dy/8,bp,cs) != 3) Break;
+ if( num_cross(0,dx-1,dy-2-dy/8,dy-2-dy/8,bp,cs) != 3) Break;
+ if( num_cross(0 , 0, 0,dy-1,bp,cs) != 2) Break;
+ if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) != 2) Break;
+ if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) != 1) Break;
+ if( num_cross( 0 ,dx/8,dy/2,dy/2,bp,cs) != 0) Break;
+ if( num_cross(dx-1-dx/8,dx-1,dy/2,dy/2,bp,cs) != 0) Break;
+ if (dx>5) {
+ dbg[0]=i=get_line2(0,dy-2-dy/8,dx-1,dy/8,bp,cs,100); if(i<95) Break; // black upwarts beam
+ dbg[1]=i=get_line2(0,dy/8,dx-1,dy-2-dy/8,bp,cs,100); if(i<95) Break; // black downwards beam
+ /* check vertical line */
+ dbg[2]=i=get_line2(dx/2,0,dx/2, dy-1,bp,cs,100); if(i<95) Break;
+ }
+ MSG(fprintf(stderr,"%d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5]);)
+ Setac(box1,(wchar_t)'*',98);
+ break;
+ }
+ // --- test @ - a popular char should be detectable! added in version v0.2.4a5
+ if(bc==UNKNOWN)
+ for(ad=d=99;dx>5 && dy>7;){
+ DBG( wchar_t c_ask='@'; )
+ if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
+ if (loop(bp, 0,dy/2,dx,cs,0,RI)>dx/4) Break;
+ if (loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/4) Break;
+ if (loop(bp,dx/2,dy-1,dy,cs,0,UP)>dx/8) Break;
+ if (loop(bp,dx/2, 0,dy,cs,0,DO)>dx/8) Break;
+ /* ..@@@@..<- 8*10 example
+ .@@..@@.
+ @@....@@
+ @@..@@@@<
+ @@.@@.@@
+ @@.@@.@@
+ @@..@@@.
+ @@......
+ .@@...@@
+ ..@@@@@.<- */
+ x=6*dx/16;
+ y=dy/2;
+ i=num_cross(0,dx-1,y,y,bp,cs);
+ if (i<3 || i>4) Break;
+ if( i != 4 && dx>8 ) ad=98*ad/100;
+
+ i=num_cross(x,x,0,dy-1,bp,cs); if (i<2) Break;
+ if (i!=4) { j=num_cross(x+1,x+1,0,dy-1,bp,cs);
+ if (abs(4-j)<abs(i-4)) i=j; }
+ if (i!=4) { j=num_cross(x+2,x+2,0,dy-1,bp,cs);
+ if (abs(4-j)<abs(i-4)) i=j; }
+ if (i<3 || i>4) Break;
+ if (i!=4) ad=97*ad/100;
+ if( num_cross(0, x,y,y,bp,cs) != 2 ) Break;
+ if( num_cross(x,dx-1,y,y,bp,cs) != 2 ) Break;
+ if( num_cross(x,x,0, y,bp,cs) != 2 ) Break;
+ if( num_cross(x,x,y,dy-1,bp,cs) != 2 ) Break;
+ if (dx>7) {
+ // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 1 ) Break;
+ if (sdata->holes.num != 1) Break;
+ if( num_hole(x0+dx/8,x1-3*dx/16,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break;
+ }
+ Setac(box1,(wchar_t)'@',ad);
+ break;
+ }
+ // --- test paragraph v0.2.6
+ if(bc==UNKNOWN && hchar)
+ for(ad=d=100;dx>4 && dy>15;){
+ DBG( wchar_t c_ask='$'; )
+ if (sdata->holes.num > 3) break; /* tolerant against a tiny hole */
+ if( get_bw( 0,dx/2,3*dy/4,3*dy/4,bp,cs,1) == 1 ) break;
+ if( get_bw(3*dx/4,dx-1,3*dy/4,3*dy/4,bp,cs,1) == 0 ) break;
+ if( get_bw( 0,dx/4, dy/4, dy/4,bp,cs,1) == 0 ) break;
+ if( get_bw( dx/2,dx-1, dy/4, dy/4,bp,cs,1) == 1 ) break;
+ if( get_bw(dx/2,dx/2, 0, dy/4,bp,cs,1) == 0 ) break;
+ if( get_bw(dx/2,dx/2,dy-1-dy/4, dy-1,bp,cs,1) == 0 ) break;
+ if( num_cross(dx/2,dx/2,0,dy-1,bp,cs) != 4 ) break;
+ if( num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs) != 2 ) break;
+ if( num_hole( x0,x1,y0+dy/4,y1-dy/4,box1->p,cs,NULL) != 1 ) break;
+ Setac(box1,SECTION_SIGN,96);
+ break; // paragraph=0xA7=167
+ }
+
+ return bc;
+}
+
+/* ----------------------- partx -------------------------------- */
+static wchar_t ocr0px(ocr0_shared_t *sdata){
+ struct box *box1=sdata->box1;
+ pix *bp=sdata->bp;
+ int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ i1,i2,i3,i4,j1,cs=sdata->cs; /* tmp-vars */
+ int ya,ad; /* used for store significant points of char */
+ wchar_t ac,bc=UNKNOWN; // bestletter
+ int hchar; // char is higher than e
+ int gchar; // char has ink lower than m3
+ // --- hchar --- gchar -------------------------
+ hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
+ gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1;
+ // if the char is slightly moved down correction can be done
+ if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
+ if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
+
+ /* reserved for special chars, to test at the end */
+ // --- test 'ff' ---------------------------------------------------
+ // ToDo: better check and call test 'f' and 'f' with subboxes
+ if( bc==UNKNOWN )
+ for(ad=98;dx>4 && dy>6;){ // Dec00 body copied from H
+ DBG( wchar_t c_ask='f'; )
+ if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
+ if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2
+ && num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) != 2 ) break;
+ if( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2
+ && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) break;
+ if( loop(bp,0 ,dy/8,dx,cs,0,RI)
+ + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) break; // ~A
+ for( j1=0,i=1,y=y0+dy/10; y<y1-dy/10 && i; y++ ) // 2 vertikal lines
+ { j=loop(box1->p,x0 ,y,dx,cs,0,RI)
+ +loop(box1->p,x1 ,y,dx,cs,0,LE);
+ if( j>10*dx/16 ) i=0; if ( j>j1 ) j1=j; }
+ if( !i ) break;
+ for( x=dx/4; x<dx/2; x++ ){ // lower gap
+ y=loop(bp,x ,dy-1,dy,cs,0,UP);
+ if ( y > 3*dy/8 ) break;
+ if ( 10*y > dy ){ /* italic */
+ i=loop(bp,x ,dy-y,dx,cs,0,RI);
+ if( i>1 && y+loop(bp,x+i-1,dy-y,dy,cs,0,UP)>3*dy/8 ) break;
+ }
+ } if( x>=dx/2 ) break;
+ x=loop(box1->p,x0 ,y1-dy/8,dx,cs,0,RI)
+ +loop(box1->p,x1 ,y1-dy/8,dx,cs,0,LE);
+ for( i=1,y=dy/4; y<dy-1-dy/4 && i; y++ ) // max - min width
+ { j=loop(bp,0 ,y,dx,cs,0,RI)
+ +loop(bp,dx-1,y,dx,cs,0,LE); if( j-x>dx/5 ) i=0; }
+ if( !i ) break; // ~K Jul00
+ for( i=0,ya=y=y0+dy/4; y<y1-dy/3; y++ ) // horizontal line
+ { j=loop(box1->p,x0 ,y,dx,cs,0,RI);
+ j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } }
+ if( i<=dx/2 ) break; ya-=y0;
+ if( num_cross(0,dx-1,ya ,ya ,bp,cs) != 1
+ && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) break; /* Dec00 */
+ for( y=ya; y<dy-dy/4; y++ ) // ~M Dec00
+ if( num_cross(0,dx-1,y ,y ,bp,cs) > 2
+ && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break;
+ if ( y<dy-dy/4 ) break;
+ for(i=1,x=x0+dx/2;x<=x1-dx/4 && i;x++){
+ if( get_bw( x, x,y0 ,y0+dy/4,box1->p,cs,1) == 0 ) i=0;
+ } if( !i ) break;
+ for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
+ if( get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0;
+ } if( i ) break;
+ for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){
+ if( num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0;
+ } if( i ) break;
+ for(i=1,y=y0;y<=y0+dy/4 && i;y++){
+ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
+ } if( i ) break;
+ for(i=1,y=y1-dy/4;y<=y1 && i;y++){
+ if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0;
+ } if( i ) break;
+ if( num_cross(x0 ,x0+dx/8 ,y0+dy/8 ,y0 ,box1->p,cs) != 0 ) ad=96*ad/100;
+ if( get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) break;
+ if( get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) break;
+ i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) break;
+ i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2<i1-dx/4 || i2>i1+dx/8) break;
+ i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3<i2-dx/4 || i3>i2+dx/8) break;
+ if(abs(i1+i3-2*i2)>dx/16+1) break;
+ if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) break;
+ if (!hchar) ad=96*ad/100;
+ if (!gchar) ad=99*ad/100;
+ ac=LATIN_SMALL_LIGATURE_FF;
+ Setac(box1,ac,ad);
+ break;
+ }
+ // --- test ae ---------------------------------------------------
+ if( bc==UNKNOWN )
+ for(ad=98;dx>4 && dy>6;){ // provisorium
+ DBG( wchar_t c_ask=LATIN_SMALL_LETTER_AE; )
+ if (sdata->holes.num > 4) Break; /* tolerant against a tiny hole */
+ if( num_cross( dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 2
+ && num_cross(dx-1-dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 1 ) Break;
+ if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break;
+ if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break;
+ if( num_cross(dx-1,0, 0, dy-1,bp,cs) < 3 ) Break;
+ if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) < 2 )
+ if( num_cross(0,dx-1,1+dy/16,1+dy/16,bp,cs) < 2 ) Break;
+ if( num_cross(0,dx-1,dy-1-dy/16,dy-1-dy/16,bp,cs) < 2 ) Break;
+ for( x=0,i2=y=dy/4; y<3*dy/4; y++ ){
+ j=loop(bp,0,y,dx,cs,0,RI); if(j>x) { i2=y; x=j; }
+ } if( x<dx/4 || x>3*dx/4 ) Break;
+ for( x=0,i4=y=dy/4; y<3*dy/4; y++ ){
+ j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; }
+ } if( x<dx/4 || x>3*dx/4 ) Break;
+ for( x=0,i4=y=dy/8; y<3*dy/4; y++ ){
+ j=loop(bp,dx-1 ,y,dx,cs,0,LE);
+ j=loop(bp,dx-1-j,y,dx,cs,1,LE);
+ if(j>x) { i4=y; x=j; }
+ } if( x<dx/4 ) Break;
+ if( num_hole(x0,x0+3*dx/4,y0+dy/4,y1,box1->p,cs,NULL) != 1 ) Break;
+ if( num_hole(x0+dx/2-1,x1,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
+ ac=LATIN_SMALL_LETTER_AE;
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+
+ }
+ // --- test AE ---------------------------------------------------
+ if( bc==UNKNOWN )
+ for(ad=98;dx>5 && dy>6;){ // provisorium
+ DBG( wchar_t c_ask=LATIN_CAPITAL_LETTER_AE; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) < 2 ) Break;
+ if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break;
+ if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break;
+ if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) != 1
+ && num_cross(0,dx-1, dy/32, dy/32,bp,cs) != 1
+ && num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) Break;
+ // check for upper horizontal line
+ j=loop(bp,dx-1 ,0,dx,cs,0,LE); x=j;
+ j=loop(bp,dx-1-j,0,dx,cs,1,LE);
+ i=loop(bp,dx-1 ,1,dx,cs,0,LE); if (i<x) x=i;
+ i=loop(bp,dx-1-i,1,dx,cs,1,LE);
+ if (i>j) j=i;
+ if (x>dx/8) Break;
+ if (j<dx/4) Break;
+ for( x=dx,i1=i3=0,i2=y=dy/4; y<3*dy/4; y++ ){
+ j=loop(bp, 0,y,dx,cs,0,RI); if(j>x) break; x=j;
+ j=loop(bp, j,y,dx,cs,1,RI); if(j>i1) { i1=j; i2=y; }
+ j=loop(bp,dx-1 ,y,dx,cs,0,LE);
+ j=loop(bp,dx-1-j,y,dx,cs,1,LE); if(j>i3) { i3=j; i4=y; }
+ } if( y<3*dy/4 || i1<dx/4-1 || i3<dx/4-1) Break;
+ for( i1=i3=0,y=0; y<dy/8; y++ ){
+ j=loop(bp,dx-1 , y,dx,cs,0,LE);
+ j=loop(bp,dx-1-j, y,dx,cs,1,LE); if(j>i1) { i1=j; }
+ j=loop(bp,dx-1 ,dy-1-y,dx,cs,0,LE);
+ j=loop(bp,dx-1-j,dy-1-y,dx,cs,1,LE); if(j>i3) { i3=j; }
+ } if( i1<=dx/4 || i3<=dx/4 ) Break;
+ for( x=dx-1-dx/8; x>dx/2; x-- ){ // look for right the E
+ if( num_cross(x,x, 0,dy-1,bp,cs) == 3 )
+ if( num_cross(x,x, 0,dy/4,bp,cs) == 1 )
+ if( num_cross(x-1,dx-1-dx/8,3*dy/4,3*dy/4,bp,cs) == 0 )
+ if( num_cross(x,x,3*dy/4,dy-1,bp,cs) == 1 ) break;
+ } if (x<=dx/2) Break; // not found
+ if (sdata->holes.num != 1) Break;
+ if( num_hole(x0,x0+3*dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
+ // if( num_hole(x0, x1,y0,y1 ,box1->p,cs,NULL) != 1 ) Break;
+ ac=LATIN_CAPITAL_LETTER_AE;
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+
+ }
+ // --- test /0 /o /O O_WITH_STROKE -----------------------------------------
+ for(ad=99;dx>4 && dy>4;){ // provisorium
+ DBG( wchar_t c_ask=LATIN_SMALL_LETTER_O_WITH_STROKE; )
+ if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */
+ if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 3 ) Break;
+ if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break;
+ if (loop(bp,dx-1,3*dy/8,dx,cs,0,RI)>dx/8) Break;
+ if (loop(bp, 0,5*dy/8,dx,cs,0,RI)>dx/8) Break;
+ if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break;
+ if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break;
+ if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break;
+ if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break;
+ if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break;
+ if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 2 ) Break;
+ if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break;
+ if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 2 ) Break;
+ i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/8 ) Break;
+ i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/3 ) Break; i1=dx-1-i1;
+ i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/8 ) Break;
+ for(y=1;y<dy-1;y++){
+ x=i1+y*(i2-i1)/dy-dx/8; if(x<0)x=0;
+ j=loop(bp,x,y,dx,cs,0,RI); if( j>3*dx/16 ) break;
+ } if( y<dy-1 ) Break;
+ if( num_cross( 0 ,dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
+ if( num_cross(dx-1-dx/4,dx-1,dy/2,dy/2,bp,cs) != 1 ) Break;
+ if( num_cross(dx/4,dx-1-dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
+ if (sdata->holes.num != 2) Break;
+ // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 2 ) Break;
+
+ if ( hchar && 2*y0<box1->m1+box1->m2 )
+ ac=LATIN_CAPITAL_LETTER_O_WITH_STROKE;
+ else ac=LATIN_SMALL_LETTER_O_WITH_STROKE;
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+
+ }
+ // --- test /c /C C_WITH_STROKE CENT_SIGN --------------------------
+ // here only the version with a continuously vertical line (not broken variant)
+ if( bc==UNKNOWN )
+ for(ad=98;dx>4 && dy>4;){ // provisorium
+ DBG( wchar_t c_ask=CENT_SIGN; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 2 ) Break;
+ if( num_cross(0,dx-1-dx/4,dy/2,dy/2,bp,cs) != 2 ) Break;
+ if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break;
+ if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break;
+ if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break;
+ if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break;
+ if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break;
+ if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break;
+ if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 3 ) Break;
+ if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break;
+ if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 3 ) Break;
+ i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/4 ) Break;
+ i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/4 ) Break; i1=dx-1-i1;
+ i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/4 ) Break;
+ for(y=0;y<dy;y++){
+ x=i1+y*(i2-i1)/dy; if(x>dx/16+1) x-=dx/16+1;
+ j=loop(bp,x,y,dx,cs,0,RI); // fprintf(stderr,"\n x=%d j=%d",x,j);
+ if( j>(dx+4)/8 ) ad=96*ad/100;
+ if( j>(dx+2)/4 ) break;
+ } if( y<dy ) Break;
+ if( num_cross( 0 ,dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
+ if( num_cross(dx-1-dx/4,dx-1,dy/2,dy/2,bp,cs) != 0 ) Break;
+ if( num_cross(dx/4,dx-1-dx/4,dy/2,dy/2,bp,cs) != 1 ) Break;
+ // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 1 ) Break;
+ if (sdata->holes.num != 1) Break;
+
+ ac=CENT_SIGN;
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+
+ }
+ // --- test EURO_CURRENCY_SIGN -----------------------------------------
+ if( bc==UNKNOWN )
+ for(ad=98;dx>4 && dy>6;){ // provisorium
+ DBG( wchar_t c_ask='&'; )
+ if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */
+ if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 4 ) break;
+ if( num_cross( 0,dx-1, 0, 0,bp,cs) != 1 ) break;
+ if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break;
+ if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 1 ) break;
+ for(i=0,y=dy/4;y<dy-dy/4-1;y++){ // check if no gap on left side
+ x=loop(bp,0,y,dx,cs,0,RI); if( x>dx/4 ) break;
+ j=loop(bp,x,y,dx,cs,1,RI); if( j>i ) i=j;
+ } if( y<dy-dy/4-1 || i<dx/2 ) break;
+ for(y=dy/4;y<dy-dy/4-1;y++){ // check for right horizontal gap
+ x=loop(bp,dx-1,y,dx,cs,0,LE); if( x>dx/2 ) break;
+ } if( y>=dy-dy/4-1 ) break;
+ // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
+ if (sdata->holes.num != 0) break;
+ ac=EURO_CURRENCY_SIGN;
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+ }
+ // --- test LETTER_C_WITH_CEDILLA ---------------------------------------------------
+ if (bc==UNKNOWN)
+ if (gchar)
+ for(ad=98;dx>3 && dy>6;){ // provisorium
+ DBG( wchar_t c_ask='c'; )
+ if (sdata->holes.num > 0) break; /* no tolerant against tiny holes */
+ j=loop(bp,dx-1,dy/16 ,dy,cs,0,LE);
+ x=loop(bp,dx-1,dy/16+1,dy,cs,0,LE); if (x<j) j=x;
+ if (3*x>dx) Break; // ~4 ocr-b
+ if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) > 2 ) break;
+ if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 2 ) break;
+ if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) > 2 ) break;
+ for( x=dx,i2=y=dy/4; y<3*dy/4; y++ ){
+ j=loop(bp,0,y,dx,cs,0,RI); if(j<x) { i2=y; x=j; }
+ } if( x>0 ) break; i1=x;
+ for( x=0,i4=y=dy/4; y<5*dy/8; y++ ){
+ j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; }
+ } if( x<dx/2 ) break; i3=x;
+ j =loop(bp,dx/2,0,dy,cs,0,DO);
+ j+=loop(bp,dx/2,j,dy,cs,1,DO); if(j>dy/4) break;
+ j =loop(bp,dx/2,j,dy,cs,0,DO); if(j<dy/2) break;
+ j =loop(bp,dx-1 ,dy-1-dy/8,dx,cs,0,LE); if(j<dx/4 || 4*j>3*dx) break;
+ j =loop(bp,dx-1-j/2,dy-1-dy/8,dy,cs,0,UP); if(j>dy/2) break; // ~()
+ // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
+ if (sdata->holes.num) break;
+ if( hchar ) ac= LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
+ else ac= LATIN_SMALL_LETTER_C_WITH_CEDILLA;
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+
+ }
+ // --- test # ---------------------------------------------------
+ for(ad=99;dx>4 && dy>4;){ // never sure?
+ DBG( wchar_t c_ask='#'; )
+ if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */
+ if (sdata->holes.num < 1) Break;
+ if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 2 ) Break;
+ if( num_cross(0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs) != 2 ) Break;
+ if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 2 ) Break;
+ if( num_cross(0,dx/2, dy/2, dy/2,bp,cs) != 1 ) Break;
+ /* fat "#" have only small ends on left and right side, we tolerate this */
+ j=loop(bp, 0,dy/8,dx,cs,0,RI); if(j<1 || j<dx/16) Break; if (j<dx/8) {ad=ad*96/100;}
+ j=loop(bp, 0,dy/2,dx,cs,0,RI); if(j<1 || j<dx/16 || j>=dx/2) Break; if (j<dx/8) {ad=ad*96/100;}
+ j=loop(bp,dx-1,dy/2,dx,cs,0,LE); if(j<1 || j<dx/16 || j>=dx/2) Break; if (j<dx/8) {ad=ad*96/100;}
+ j=loop(bp,dx-1,dy-1,dx,cs,0,LE); if(j<1 || j<dx/16) Break; if (j<dx/8) {ad=ad*96/100;}
+ for( i1=i3=0,y=dy/4; y<dy/2; y++ ){
+ j=loop(bp,0, y,dx,cs,0,RI); if(j>3*dx/4) { i1=0; break; }
+ j=loop(bp,j, y,dx,cs,1,RI); if(j>i1) { i1=j; }
+ j=loop(bp,0,dy-1-y,dx,cs,0,RI); if(j>3*dx/4) { i1=0; break; }
+ j=loop(bp,j,dy-1-y,dx,cs,1,RI); if(j>i3) { i3=j; }
+ }
+ if (i1<dx-dx/4 || i3<dx-dx/4) Break;
+ if (i1<dx-dx/8) ad=97*ad/100;
+ if (i3<dx-dx/8) ad=97*ad/100;
+ if (sdata->holes.num != 1) {ad=95*ad/100;}
+ if( num_hole(x0+dx/8,x1-dx/8,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break;
+ // if( num_hole(x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break;
+
+ ac=(wchar_t) '#';
+ if( gchar ) {ad=99*ad/100;}
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+ }
+ // --- test bullet, full_box, grabbed cursor, ZapfDingBats_156
+ if (bc==UNKNOWN)
+ for(ad=96;dx>4 && dy>4 && 2*dx>dy;){ // provisorium
+ DBG( wchar_t c_ask='#'; )
+ if( get_bw(x0,x1,y0,y1,box1->p,cs,2) != 0 ) break;
+ ac=BULLET;
+ if (gchar && !hchar) ad=80*ad/100;
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+ }
+ /* --- test | (vertical line, could be a I or l) --- */
+ for(ad=99;dy>4 && 2*dx<dy;){ /* v0.44 */
+ DBG( wchar_t c_ask='|'; )
+ /* test if everything is filled black */
+ if( get_bw(x0+dx/8,x1-dx/8,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) break;
+ /* more unsure if the borders are not exact */
+ if( get_bw(x0 ,x0+dx/8,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100;
+ if( get_bw(x1-dx/8,x1 ,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100;
+ if( get_bw(x0+dx/8,x1-dx/8,y0 ,y0+dy/8,box1->p,cs,2) != 0 ) ad=99*ad/100;
+ if( get_bw(x0+dx/8,x1-dx/8,y1-dy/8,y1 ,box1->p,cs,2) != 0 ) ad=99*ad/100;
+ if (3*dx<dy) ad=98*ad/100;
+ if (4*dx<dy) ad=99*ad/100;
+ if (box1->m2 && 2*y1> box1->m2+box1->m3) Break;
+ if (box1->m2 && 3*y1>2*box1->m2+box1->m3) ad=95*ad/100;
+ ac='|';
+ if (!hchar) ad=98*ad/100;
+ Setac(box1,ac,ad);
+ break;
+ }
+ // --- test % ---------------------------------------------------
+ for(ad=100;dx>5 && dy>7;){ // provisorium
+ DBG( wchar_t c_ask='%'; )
+ if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */
+ if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) != 3
+ && num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) != 3 ) Break;
+ if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) != 3
+ && num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) != 3 ) Break;
+ if( num_cross(x0,x1, y0, y1,box1->p,cs) < 4
+ && num_cross(x0+dx/8,x1, y0, y1,box1->p,cs) < 4
+ && num_cross(x0,x1+dx/4, y0, y1,box1->p,cs) < 4
+ && dx>7 && dy>15) Break;
+ if( num_cross(x0,x1, y0, y1,box1->p,cs) !=5 ) ad=99*ad/100;
+
+ if (dx>7 && dy>12) {
+ if( num_hole(x0 ,x1 ,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break;
+ if( num_hole(x0+dx/4,x1+dx/4,y0+dy/4,y1,box1->p,cs,NULL) != 1 ) Break;
+ if( num_hole(x0 ,x1+dx/4,y0,y1 ,box1->p,cs,NULL) != 2 ) Break;
+ } else ad=98*ad/100;
+ // use box1->p instead of b, because % is a sum of 3 objects
+ if ( loop(box1->p,x0,y0 ,dx,cs,0,RI)
+ <= loop(box1->p,x0,y0+dy/16+1,dx,cs,0,RI) ) ad=96*ad/100; // X
+ if ( loop(box1->p,x1,y1 ,dx,cs,0,LE)
+ <= loop(box1->p,x1,y1-1-dy/16,dx,cs,0,LE) ) ad=96*ad/100; // X
+ for (x=0;x<dx;x++) { /* look for a vertical line and break if found */
+ if ( get_bw(x0+x,x0+x,y0+dy/8,y1-dy/8,box1->p,cs,2) != 2 ) break;
+ } if (x<dx) Break; // ~gluedVI
+ if (gchar) ad=98*ad/100;
+ ac=(wchar_t) '%';
+ Setac(box1,ac,ad);
+ if (ad>=100) return ac;
+ break;
+ }
+ // --- test Omega ---------------------------------------------------
+ for(ad=d=99;dx>7 && dy>7;){ // min 3x4
+ DBG( wchar_t c_ask=GREEK_CAPITAL_LETTER_OMEGA; )
+ if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break;
+ if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/3 , y1-dy/3,box1->p,cs,1) != 0 ) Break;
+
+ if( num_cross(x0+dx/2,x0+dx/2,y0 , y1-dy/3,box1->p,cs) != 1 ) Break;
+ if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND
+ if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break;
+ if( num_cross(x0+dx/3,x1-dx/3,y1 , y1 ,box1->p,cs) != 2 ) // against "rauschen"
+ if( num_cross(x0+dx/3,x1-dx/3,y1-1 , y1-1 ,box1->p,cs) != 2 ) Break;
+ if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
+ if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
+ if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 )
+ if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break;
+ if (sdata->holes.num) Break;
+ // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break;
+
+ if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<=
+ loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break;
+ if( loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,RI)>dx/4
+ || loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,LE)>dx/4 ) Break;
+ if( loop(bp,dx/2,3*dy/8,x1-x0,cs,0,RI)<dx/4
+ || loop(bp,dx/2,3*dy/8,x1-x0,cs,0,LE)<dx/4 ) Break;
+
+ i=loop(bp,0,dy-1-dy/16,x1-x0,cs,0,RI); if(i>dx/8) Break;
+ x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<3*dx/8 || i>dx/2) Break;
+ x=loop(bp,i,dy-1-dy/16,x1-x0,cs,0,RI); i+=x; if(i<dx/2 || i>5*dx/8) Break;
+ x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<7*dx/8) Break;
+
+ /* look for a vertikal gap at lower end */
+ for( x=dx/4;x<3*dx/4;x++ ){
+ i=loop(bp,x,dy-1,y1-y0,cs,0,UP);
+ if( i>3*dy/4 ) break;
+ }
+ if( x>=3*dx/4 ) Break;
+
+ if( !hchar ) ad=60*ad/100;
+ bc=GREEK_CAPITAL_LETTER_OMEGA;
+ Setac(box1,bc,ad);
+ break;
+ }
+
+ return bc;
+}
+
+// -------------------- OCR engine ;) ----------------------------
+wchar_t ocr0(struct box *box1, pix *bp, int cs){
+ // pix p=*(box1->p);
+ int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1;
+ int dx=x1-x0+1,dy=y1-y0+1, /* size */
+ rx,ry,r1,r2,i1,i2,ad; /* tmp-vars */
+ // ad,ac will be used in future
+ wchar_t bc = UNKNOWN; // bestletter
+ wchar_t um = SPACE; // modifier '"
+ int hchar; // char is higher than e
+ int gchar; // char has ink lower than m3
+ int aa[4][4]; /* corner points, see xX, (x,y,dist^2,vector_idx) v0.41 */
+ ocr0_shared_t sdata; // data used in all subfunctions
+
+ sdata.box1=box1;
+ sdata.bp=bp;
+ sdata.cs=cs;
+ // --- hchar --- gchar -------------------------
+ hchar=0;if( y0 < box1->m2-(box1->m2-box1->m1)/2 ) hchar=1;
+ gchar=0;if( y1 > box1->m3+(box1->m4-box1->m3)/2 ) gchar=1;
+ // if the char is slightly moved down correction can be done
+ if ( y0<box1->m2 && y1>box1->m3 && 2*y1<box1->m3+box1->m4) // moved
+ if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1;
+
+ sdata.hchar=hchar;
+ sdata.gchar=gchar;
+
+ /* search for nearest points to the 4 courners, typical for xX */
+ /* this is faster as calling nearest_frame_vector 4 times */
+ aa[0][0]=aa[1][0]=aa[2][0]=aa[3][0]=(x0+x1)/2; /* set to center */
+ aa[0][1]=aa[1][1]=aa[2][1]=aa[3][1]=(y0+y1)/2; /* set to center */
+ aa[0][2]=aa[1][2]=aa[2][2]=aa[3][2]=2*sq(128); /* distance to box edges */
+ aa[0][3]=aa[1][3]=aa[2][3]=aa[3][3]=0; /* vector index */
+ /* searching for 4 diagonal line ends */
+ for (i=0;i<box1->num_frame_vectors[0];i++) {
+ x=box1->frame_vector[i][0]; /* take a vector */
+ y=box1->frame_vector[i][1];
+ /* distance to upper left end, normalized to 128 */
+ j=0; d=sq((x-x0)*128/dx)+sq((y-y0)*128/dy);
+ // fprintf(stderr," setaa i= %2d xy= %3d %3d d=%5d aa[3]=%2d\n",i,x-x0,y-y0,d,aa[0][3]);
+ if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
+ /* distance to lower left end */
+ j=1; d=sq((x-x0)*128/dx)+sq((y-y1)*128/dy);
+ if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
+ /* distance to lower right end */
+ j=2; d=sq((x-x1)*128/dx)+sq((y-y1)*128/dy);
+ if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
+ /* distance to upper right end */
+ j=3; d=sq((x-x1)*128/dx)+sq((y-y0)*128/dy);
+ if (d<aa[j][2]) { aa[j][0]=x; aa[j][1]=y; aa[j][2]=d; aa[j][3]=i; }
+ }
+ for (i=0;i<16;i++) sdata.aa[i/4][i%4]=aa[i/4][i%4];
+
+ /* extract number position and size of holes and store in a table
+ * - hole coordinates are relative to box (x-x0,y-y0)
+ */
+ sdata.holes.num=0;
+ if (box1->num_frames>0) // speedup v0.42
+ num_hole(x0,x1,y0,y1,box1->p,cs,&sdata.holes); // call once
+ // printf(" num_holes=%d\n",sdata.holes.num);
+
+ /*
+ after division of two glued chars, boundaries could be wrong,
+ check this first (ToDo: only if a flag set?)
+ */
+ if (2*y0 < box1->m2+box1->m3)
+ if (box1->m4>box1->m3 && 2*box1->y1>box1->m4+box1->m3){
+ /* could be a "I" from divided "Ij" or "Ig" */
+ for(y=(box1->m3+box1->m2)/2;2*y<box1->m3+box1->m4;y++)
+ if( get_bw(x0,x1,y,y,box1->p,cs,1)==0 ) break;
+ if(2*y<box1->m3+box1->m4)
+ if( get_bw((x0+x1)/2,(x0+x1)/2,y,box1->m4,box1->p,cs,1)==0 ){
+ /* be sure, ~_ */
+ if (y>y0) y1=box1->y1=y;
+ }
+ }
+
+ DBG( IFV fprintf(stderr,"\nDBG L%d (%d,%d): ",__LINE__,box1->x0,box1->y0); )
+ DBG( IFV out_b(box1,sdata.bp,0,0,dx,dy,160); )
+ DBG( IFV fprintf(stderr,"# aa[] %d %d %d %d %d %d %d %d (4 corners)"
+ " d= %d %d %d %d",
+ aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,
+ aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,
+ aa[0][2], aa[1][2], aa[2][2], aa[3][2]);)
+ DBG( IFV fprintf(stderr,"\n# holes %d gchar=%d hchar=%d",sdata.holes.num, gchar, hchar);)
+
+ // --- test thin lines - ---------------------------------
+ for( ad=100; 2*dy<box1->m3-box1->m2 && 3*dx>=4*dy && dx>2; ){ // min 3x3 (small font)
+ DBG( wchar_t c_ask='-'; )
+ if( get_bw(x0+dx/8+1,x1-dx/8-1,y0+dy/8+((dy>2)?1:0),
+ y1-dy/8-((dy>2)?1:0),box1->p,cs,2)==2 ) break;
+ if( box1->dots ) { Setac(box1,'=',97);break; }
+ if (dx<=2*dy) ad=98*ad/100;
+ if (dx<=3*dy) ad=99*ad/100;
+ if (!box1->m4) ad=96*ad/100;
+ else {
+ if (y1>=box1->m3) {
+ if ( dx<2*dy) ad=98*ad/100;
+ if (2*dx<3*dy) ad=98*ad/100;
+ Setac(box1,'_',ad);
+ break;
+ }
+ }
+ Setac(box1,'-',ad); if (ad>=100) return '-';
+ break;
+ }
+ // --- test thin lines = ---------------------------------
+ for( ; dy>2 && dx>2; ){ // min 3x3 (small font)
+ DBG( wchar_t c_ask='='; )
+ for( y=y0;y<y1;y++) // remove upper empty space
+ if( get_bw(x0+dx/10,x1-dx/10,y ,y ,box1->p,cs,1)==1 ) break;
+ if( get_bw(x0+dx/10,x1-dx/10,y ,y ,box1->p,cs,2)==2 ) break;
+ if( get_bw(x0 ,x1 ,(y+y1)/2,(y+y1)/2,box1->p,cs,1)==1 ) break;
+ if( get_bw(x0+dx/10,x1-dx/10,y1 ,y1 ,box1->p,cs,2)==2 ) break;
+ Setac(box1,'=',100);
+ return '=';
+ }
+ // --- test dots : ---------------------------------
+ for( ad=100; dy>2 && dy>=2*dx; ){ // max 3x3 (small font)
+
+ DBG( wchar_t c_ask=':'; )
+ // check the gap hight
+ for( i1=dy/16;i1<dy/2;i1++)
+ if( get_bw(x0+dx/8,x1-dx/8,y0+i1,y0+i1,box1->p,cs,1)==0 ) break;
+ if (i1>=dy/2) break;
+ for( i2=dy/16;i2<dy/2;i2++)
+ if( get_bw(x0+dx/8,x1-dx/8,y1-i2,y1-i2,box1->p,cs,1)==0 ) break;
+ if (i2>=dy/2) Break;
+ MSG(fprintf(stderr,"gap y12 %d %d",i1,i2);)
+
+ if (box1->m3 && y1>box1->m3) ad=98*ad/100; // ~;
+ if (box1->m3 && 2*y0> box1->m2+box1->m1) ad=98*ad/100; // ~i
+ if (gchar) ad=99*ad/100;
+ ad=ad-abs(i1-i2)/dy*20;
+ if (abs(i1-dx)>dy/4) Break; // round or quadratic dots?
+ if (abs(i1-dx)>dy/8) ad=98*ad/100;
+ if (abs(i2-dx)>dy/4) Break; // round or quadratic dots?
+ if (abs(i2-dx)>dy/8) ad=98*ad/100;
+ if (box1->dots!=1) ad=96*ad/100;
+ Setac(box1,':',ad); // dx<=3 ad--
+ if (ad>=100) return ':';
+ break;
+ }
+ // --- test dots ; ---------------------------------
+ if( 2*y0> box1->m2+box1->m1 ) // ~i
+ if( 4*y1>=3*box1->m3+box1->m2 ) // ~:
+ for( ad=100; dy>5 && dx>1 && dy>2*dx; ){ // max 3x3 (small font)
+ DBG( wchar_t c_ask=';'; )
+ // better would it be to detect round pixelcluster on top
+ // check high of upper and lower dot
+ for( i1=0;i1<dy/2;i1++)
+ if( get_bw(x0,x1,y0+i1,y0+i1,box1->p,cs,1)==0 ) break;
+ if (i1>=dy/2) break;
+ for( i2=0;i2<dy/2;i2++)
+ if( get_bw(x0,x1,y1-i2,y1-i2,box1->p,cs,1)==0 ) break;
+ if (i2<i1) break;
+
+ /* test for horizontal symmetry ~i */
+ for (y=0;y<dy;y++) for (x=0;x<dx/2;x++)
+ if ((getpixel(bp,x,y)<cs)!=(getpixel(bp,dx-1-x,y)<cs)) { y=dy+1; break; }
+ if (y==dy) ad=96*ad/100; /* ~i */
+
+ if (i2==i1 && y1<=box1->m3) ad=97*ad/100;
+ if (i2-i1<dy/8) ad=99*ad/100;
+ Setac(box1,';',ad); // dx<=3 ad--
+ if (ad>=100) return ';';
+ break;
+ }
+ // --- first test small dots . ---------------------------------
+ if( 3*dy<box1->m4-box1->m1 && abs(dx-dy)<(dx+dy)/4+2
+ && 3*y1>=(2*box1->m3+ box1->m2) // dot near baseline?
+ && 5*y0>=(3*box1->m3+2*box1->m2) ){ // Jul00
+ DBG( wchar_t c_ask='.'; )
+ d=0; r1=60;r2=140; ad=99;
+ for(x=x0;x<=x1;x++)for(y=y0;y<=y1;y++){ /* circle equation */
+ rx=100*(2*x-(x0+x1))/dx; // normalize to 15bit number
+ ry=100*(2*y-(y0+y1))/dy;
+ if( rx*rx + ry*ry < r1*r1 ) if( getpixel(box1->p,x,y)>=cs ){ d++;x=x1+1;y=y1+1; }
+ if( rx*rx + ry*ry > r2*r2 ) if( getpixel(box1->p,x,y)< cs ){ d++;x=x1+1;y=y1+1; }
+ // fprintf(stderr,"\nDBG . x= %3d %3d r= %6d %6d %6d", rx, ry, rx*rx+ry*ry, r1*r1, r2*r2);
+ }
+ if(d==0)
+ if( loop(box1->p,x0,y0,x1-x0,cs,0,RI)
+ <= loop(box1->p,x0,y1,x1-x0,cs,0,RI)
+ || loop(box1->p,x1,y0,x1-x0,cs,0,LE)
+ >= loop(box1->p,x1,y1,x1-x0,cs,0,LE) )
+ {
+ bc='.'; if (box1->dots) { Setac(box1,':',ad); ad=98*ad/100; }
+ Setac(box1,bc,ad);
+ }
+ }
+ // --- first test small dots , ---------------------------------
+ if( 3*dy<2*(box1->m4-box1->m1)
+ && 2*y0> box1->m2+box1->m3
+ && (2*dx<3*dy
+ || get_bw(0,dx/2,dy/2,dy-1,bp,cs,1)==0) ){ // ocr-a-,
+ DBG( wchar_t c_ask=','; )
+ ad=100; bc=',';
+ if (dy==1 && dx==1) ad=98*ad/100;
+ if (dy==2 && dx==1) ad=99*ad/100; // this is a problem case
+ if (dx>=dy) ad=99*ad/100;
+ if( 2*dy >= box1->m4-box1->m1) ad=98*ad/100;
+ if( loop(box1->p,x0,y0,x1-x0,cs,0,RI) /* simple line */
+ > loop(box1->p,x0,y1,x1-x0,cs,0,RI)
+ && loop(box1->p,x1,y0,x1-x0,cs,0,LE)
+ < loop(box1->p,x1,y1,x1-x0,cs,0,LE) ) { ad=99*ad/100; }
+ else { /* with upper circle */
+ if( loop(box1->p,x0,(y0+y1+1)/2,x1-x0,cs,0,RI)<dx/2 ) ad=98*ad/100;
+ if( loop(box1->p,x1, y1 ,x1-x0,cs,0,LE)<dx/2 ) ad=98*ad/100;
+ if( loop(box1->p,x0,y1-((dy>5)?1:0),x1-x0,cs,0,LE)>(dx+1)/2 )
+ if( loop(box1->p,x0, y1 ,x1-x0,cs,0,LE)>(dx+1)/2 ) ad=96*ad/100;
+ }
+ if(box1->dots==1) { Setac(box1,';',ad); ad=99*ad/100; }
+ Setac(box1,bc,ad);
+ }
+ // --- first test small dots '" ---------------------------------
+ if( 2*dy < box1->m4 -box1->m1+1
+ && 2*y0 < box1->m2 +box1->m3
+ && 3*y1 < box1->m2+2*box1->m3+2 ){
+ DBG( wchar_t c_ask='\''; )
+ ad=100; bc='\'';
+ if (2*y1 >= box1->m2+box1->m3) { ad=96*ad/100; MSG({}) } // ~!
+ if (3*y1>=2*box1->m2+box1->m3) { ad=96*ad/100; MSG({}) }
+ if (get_bw(x0,x1,(box1->m2+box1->m3)/2,box1->m4,box1->p,cs,1)!=0)
+ { ad=98*ad/100; MSG({}) }
+ if (dx>4
+ && num_cross(x0,x1,y1,y1,box1->p,cs) == 2) { // " "
+ bc='"';
+ // ocr-a-" has no gap!
+ if ( get_bw((x0+x1)/2,(x0+x1)/2,y0,y1,box1->p,cs,1)!=0 ) ad=96*ad/100;
+ } else {
+ if ( num_cross(x0,x1, y0 , y0 ,box1->p,cs)!=1) ad=96*ad/100;
+ if ( num_cross(x0,x1,(y0+y1)/2,(y0+y1)/2,box1->p,cs)!=1) ad=98*ad/100;
+ if (dx>dy) { ad=96*ad/100; MSG({}) }
+ }
+ if (2*y0 > box1->m1+box1->m2) ad=99*ad/100;
+ Setac(box1,bc,ad);
+ if (ad>=100) return bc;
+ }
+ // --- TILDE ~ ---------------------------------
+ if( 2*dy<box1->m4-box1->m1 && dx>=dy && dx>3 && dy>1
+ && 2*y0< box1->m1+box1->m2
+ && 3*y1<2*box1->m2+box1->m3 ){
+ if( loop(box1->p,x0,y0,dx,cs,0,RI)
+ > loop(box1->p,x0,y1,dx,cs,0,RI)
+ && loop(box1->p,x1,y0,dx,cs,0,LE)
+ < loop(box1->p,x1,y1,dx,cs,0,LE)
+ && num_cross(x0,x1,y0,y0,box1->p,cs) == 2
+ && num_cross(x0,x1,y1,y1,box1->p,cs) == 2 ) {
+ DBG( wchar_t c_ask='~'; )
+ bc=TILDE;
+ Setac(box1,bc,99);
+ }
+ }
+ // --- CIRCUMFLEX, hat ^ ---------------------------------
+ if( 2*dy<box1->m4-box1->m1 && dx>=dy && dx>2 && dy>1
+ && 2*y0< box1->m1+box1->m2
+ && 3*y1<2*box1->m2+box1->m3 ){
+ DBG( wchar_t c_ask='^'; )
+ if( ( loop(box1->p,x0,y0 ,dx,cs,0,RI)
+ > loop(box1->p,x0,y1 ,dx,cs,0,RI)-dx/8
+ || loop(box1->p,x0,y0 ,dx,cs,0,RI)
+ > loop(box1->p,x0,y1-1,dx,cs,0,RI)-dx/8 )
+ && ( loop(box1->p,x1,y0 ,dx,cs,0,LE)
+ > loop(box1->p,x1,y1 ,dx,cs,0,LE)-dx/8
+ || loop(box1->p,x1,y0 ,dx,cs,0,LE)
+ > loop(box1->p,x1,y1-1,dx,cs,0,LE)-dx/8 )
+ && num_cross(x0,x1,y0 ,y0 ,box1->p,cs) == 1
+ && ( num_cross(x0,x1,y1 ,y1 ,box1->p,cs) == 2
+ || num_cross(x0,x1,y1-1,y1-1,box1->p,cs) == 2 )) {
+ bc='^';
+ Setac(box1,bc,99);
+ }
+ }
+ // ------------------------------------------------------
+// if( dots==1 ){ um='\''; }
+#if 0 /* ToDo: change to vectors, call here or in whatletter */
+ if (box1->dots==0) { // i-dots ??? (if dots==0 is wrong)
+ y=box1->m1;
+ for(;y<y0+dy/2;y++)if( get_bw(x0+dx/4,x1,y,y,box1->p,cs,1)==1) break;
+ { i1=y;
+ if( y<y0+dy/4 )
+ for(;y<y0+dy/2;y++)if( get_bw(x0,x1,y,y,box1->p,cs,1)==0) break;
+ if( y<y0+dy/2 && 5*(y-i1+1)>box1->m2-box1->m1){
+ testumlaut(box1,cs,2,&um); // set modifier + new y0 ???
+
+ }
+ }
+ }
+#else
+ um = box1->modifier;
+#endif
+ if ( /* um==ACUTE_ACCENT || */ um==DIAERESIS){
+ for(y=y1;y>y0;y--)
+ if( get_bw(x0,x1,y,y,box1->p,cs,1)==0) { y0=y; dy=y1-y0+1; break; } // scan "a "o "u
+ }
+
+ // --- test numbers 0..9 --- separated for faster compilation
+ if( JOB->cfg.only_numbers ) return ocr0n(&sdata);
+
+ // bc=ocr1(box1,bp,cs);
+ if(bc!=UNKNOWN && box1->num_ac>0 && box1->wac[0]==100)
+ return bc; // for fast compilable tests
+
+ // ------ separated for faster compilation
+ // ToDo: inser ocr0_shared_t here and split into a,b,cC,d,e,f,g9,...
+#define IF_NOT_SURE if(bc==UNKNOWN || box1->num_ac==0 || box1->wac[0]<100)
+
+ IF_NOT_SURE bc=ocr0_eE(&sdata);
+ IF_NOT_SURE bc=ocr0_f(&sdata);
+ IF_NOT_SURE bc=ocr0_bB(&sdata);
+ IF_NOT_SURE bc=ocr0_dD(&sdata);
+ IF_NOT_SURE bc=ocr0_F(&sdata);
+ IF_NOT_SURE bc=ocr0_uU(&sdata);
+ IF_NOT_SURE bc=ocr0_micro(&sdata);
+ IF_NOT_SURE bc=ocr0_vV(&sdata);
+ IF_NOT_SURE bc=ocr0_rR(&sdata);
+ IF_NOT_SURE bc=ocr0_m(&sdata);
+ IF_NOT_SURE bc=ocr0_tT(&sdata);
+ IF_NOT_SURE bc=ocr0_sS(&sdata);
+ IF_NOT_SURE bc=ocr0_gG(&sdata);
+ IF_NOT_SURE bc=ocr0_xX(&sdata);
+ IF_NOT_SURE bc=ocr0_yY(&sdata);
+ IF_NOT_SURE bc=ocr0_zZ(&sdata);
+ IF_NOT_SURE bc=ocr0_wW(&sdata);
+ IF_NOT_SURE bc=ocr0_aA(&sdata);
+ IF_NOT_SURE bc=ocr0_cC(&sdata);
+ IF_NOT_SURE bc=ocr0_lL(&sdata);
+ IF_NOT_SURE bc=ocr0_oO(&sdata);
+ IF_NOT_SURE bc=ocr0_pP(&sdata);
+ IF_NOT_SURE bc=ocr0_qQ(&sdata);
+ IF_NOT_SURE bc=ocr0_iIjJ(&sdata);
+ IF_NOT_SURE bc=ocr0_n(&sdata);
+ IF_NOT_SURE bc=ocr0_M(&sdata);
+ IF_NOT_SURE bc=ocr0_N(&sdata);
+ IF_NOT_SURE bc=ocr0_h(&sdata);
+ IF_NOT_SURE bc=ocr0_H(&sdata);
+ IF_NOT_SURE bc=ocr0_k(&sdata);
+ IF_NOT_SURE bc=ocr0_K(&sdata);
+ IF_NOT_SURE bc=ocr0n(&sdata);
+ IF_NOT_SURE bc=ocr0_brackets(&sdata);
+ IF_NOT_SURE bc=ocr0p9(&sdata);
+ IF_NOT_SURE bc=ocr0px(&sdata);
+
+
+ if(box1->num_ac==0 && bc!=UNKNOWN) fprintf(stderr,"<!--ERROR 576-->");
+ if(box1->num_ac>0 && box1->wac[0]>95) box1->c=bc=box1->tac[0];
+ /* will be removed later, only fix old things */
+ for (i=0;i<box1->num_ac;i++) if (box1->tac[i]==bc) { bc=box1->tac[0]; }
+
+ return bc;
+}
+
+
--- /dev/null
+/*
+This is a Optical-Character-Recognition program
+Copyright (C) 2000-2007 Joerg Schulenburg
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ see README for EMAIL-address
+
+ sometimes I have written comments in german language, sorry for that
+
+ - look for ??? for preliminary code
+ - space: avX=22 11-13 (empirical estimated)
+ avX=16 5-7
+ avX= 7 5-6
+
+ ToDo: - add filter (r/s mismatch) g300c1
+ - better get_line2 function (problems on high resolution)
+ - write parallelizable code!
+ - learnmode (optimize filter)
+ - use ispell for final control or if unsure
+ - better line scanning (if not even)
+ - step 5: same chars differ? => expert mode
+ - chars dx>dy and above 50% hor-crossing > 4 is char-group ?
+ - detect color of chars and background
+ - better word space calculation (look at the examples)
+ (distance: left-left, middle-middle, left-right, thickness of e *0.75)
+
+ GLOBAL DATA (mostly structures)
+ - pix : image - one byte per pixel bits0-2=working
+ - lines : rows of the text (points to pix)
+ - box : list of bounding box for character
+ - obj : objects (lines, splines, etc. building a character)
+ */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include "../../config.h"
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
+#include "list.h"
+#include "pgm2asc.h"
+// #include "pcx.h" /* needed for writebmp (removed later) */
+/* ocr1 is the test-engine - remember: this is development version */
+#include "ocr1.h"
+/* first engine */
+#include "ocr0.h"
+#include "otsu.h"
+#include "progress.h"
+
+#include "gocr.h"
+
+/* wew: will be exceeded by capitals at 1200dpi */
+#define MaxBox (100*200) // largest possible letter (buffersize)
+#define MAX(a,b) ((a) >= (b) ? (a) : (b))
+
+/* if the system does not know about wchar.h, define functions here */
+#ifndef HAVE_WCHAR_H
+/* typedef unsigned wchar_t; */
+/* Find the first occurrence of WC in WCS. */
+const wchar_t *wcschr (const wchar_t *wcs, const wchar_t wc) {
+ int i; for(i=0;wcs[i];i++) if (wcs[i]==wc) return wcs+i; return NULL;
+}
+const wchar_t *wcscpy (wchar_t *dest, const wchar_t *src) {
+ int i; for(i=0;src[i];i++) dest[i]=src[i]; dest[i]=0; return dest;
+}
+size_t wcslen (const wchar_t *s){
+ size_t i; for(i=0;s[i];i++); return i;
+}
+#endif
+#ifndef HAVE_WCSDUP
+wchar_t * wcsdup (const wchar_t *WS) { /* its a gnu extension */
+ wchar_t *copy;
+ copy = (wchar_t *) malloc((wcslen(WS)+1)*sizeof(wchar_t));
+ if (!copy)return NULL;
+ wcscpy(copy, WS);
+ return copy;
+}
+#endif
+
+// ------------------------ feature extraction -----------------
+// -------------------------------------------------------------
+// detect maximas in of line overlaps (return in %) and line coordinates
+// this is for future use
+#define HOR 1 // horizontal
+#define VER 2 // vertical
+#define RIS 3 // rising=steigend
+#define FAL 4 // falling=fallend
+
+/* exchange two variables */
+static void swap(int *a, int *b) {
+ int c = *a;
+ *a = *b;
+ *b = c;
+}
+
+// calculate the overlapping of the line (0-1) with black points
+// by recursive bisection
+// line: y=dy/dx*x+b, implicit form: d=F(x,y)=dy*x-dx*y+b*dx=0
+// incremental y(i+1)=m*(x(i)+1)+b, F(x+1,y+1)=f(F(x,y))
+// ret & 1 => inverse pixel!
+// d=2*F(x,y) integer numbers
+int get_line(int x0, int y0, int x1, int y1, pix *p, int cs, int ret){
+ int dx,dy,incrE,incrNE,d,x,y,r0,r1,ty,tx,
+ *px,*py,*pdx,*pdy,*ptx,*pty,*px1;
+ dx=abs(x1-x0); tx=((x1>x0)?1:-1); // tx=x-spiegelung (new)
+ dy=abs(y1-y0); ty=((y1>y0)?1:-1); // ty=y-spiegelung (new)
+ // rotate coordinate system if dy>dx
+/*bbg: can be faster if instead of pointers we use the variables and swaps? */
+/*js: Do not know, I am happy that the current code is working and is small */
+ if(dx>dy){ pdx=&dx;pdy=&dy;px=&x;py=&y;ptx=&tx;pty=&ty;px1=&x1; }
+ else { pdx=&dy;pdy=&dx;px=&y;py=&x;ptx=&ty;pty=&tx;px1=&y1; }
+ if( *ptx<0 ){ swap(&x0,&x1);swap(&y0,&y1);tx=-tx;ty=-ty; }
+ d=((*pdy)<<1)-(*pdx); incrE=(*pdy)<<1; incrNE=((*pdy)-(*pdx))<<1;
+ x=x0; y=y0; r0=r1=0; /* dd=tolerance (store max drift) */
+ while( (*px)<=(*px1) ){
+ if( ((getpixel(p,x,y)<cs)?1:0)^(ret&1) ) r0++; else r1++;
+ (*px)++; if( d<=0 ){ d+=incrE; } else { d+=incrNE; (*py)+=(*pty); }
+ }
+ return (r0*(ret&~1))/(r0+r1); // ret==100 => percentage %
+}
+
+// this function should detect whether a direct connection between points
+// exists or not, not finally implemented
+// ret & 1 => inverse pixel!
+// d=2*F(x,y) integer numbers, ideal line: ,I pixel: I@
+// ..@ @@@ .@. ...,@2@. +1..+3 floodfill around line ???
+// ..@ .@@ .@. ...,.@@@ +2..+4 <= that's not implemented yet
+// ..@ ..@ .@. ...,.@@@ +2..+4
+// @.@ @.. .@. ...,@@@. +1..+3
+// @.@ @@. .@. ...I@@@. 0..+3
+// @@@ @@@ .@. ..@1@@.. 0..+2
+// 90% 0% 100% 90% r1-r2
+// I am not satisfied with it
+int get_line2(int x0, int y0, int x1, int y1, pix *p, int cs, int ret){
+ int dx,dy,incrE,incrNE,d,x,y,r0,r1,ty,tx,q,ddy,rx,ry,
+ *px,*py,*pdx,*pdy,*ptx,*pty,*px1;
+ dx=abs(x1-x0); tx=((x1>x0)?1:-1); // tx=x-spiegelung (new)
+ dy=abs(y1-y0); ty=((y1>y0)?1:-1); // ty=y-spiegelung (new)
+ // rotate coordinate system if dy>dx
+ if(dx>dy){ pdx=&dx;pdy=&dy;px=&x;py=&y;ptx=&tx;pty=&ty;px1=&x1;rx=1;ry=0; }
+ else { pdx=&dy;pdy=&dx;px=&y;py=&x;ptx=&ty;pty=&tx;px1=&y1;rx=0;ry=1; }
+ if( *ptx<0 ){ swap(&x0,&x1);swap(&y0,&y1);tx=-tx;ty=-ty; }
+ d=((*pdy)<<1)-(*pdx); incrE=(*pdy)<<1; incrNE=((*pdy)-(*pdx))<<1;
+ x=x0; y=y0; r0=r1=0; ddy=3; // tolerance = bit 1 + bit 0 = left+right
+ // int t=(*pdx)/16,tl,tr; // tolerance, left-,right delimiter
+ while( (*px)<=(*px1) ){ // not finaly implemented
+ q=((getpixel(p,x,y)<cs)?1:0)^(ret&1);
+ if ( !q ){ // tolerance one pixel perpenticular to the line
+ // what about 2 or more pixels tolerance???
+ ddy&=(~1)|(((getpixel(p,x+ry,y+rx)<cs)?1:0)^(ret&1));
+ ddy&=(~2)|(((getpixel(p,x-ry,y-rx)<cs)?1:0)^(ret&1))*2;
+ } else ddy=3;
+ if( ddy ) r0++; else r1++;
+ (*px)++; if( d<=0 ){ d+=incrE; } else { d+=incrNE; (*py)+=(*pty); }
+ }
+ return (r0*(ret&~1))/(r0+r1); // ret==100 => percentage %
+}
+
+/* Look for dots in the rectangular region x0 <= x <= x1 and y0 <= y
+ <= y1 in pixmap p. The two low order bits in mask indicate the color
+ of dots to look for: If mask==1 then look for black dots (where a
+ pixel value less than cs is considered black). If mask==2 then look
+ for white dots. If mask==3 then look for both black and white dots.
+ If the dots are found, the corresponding bits are set in the returned
+ value. Heavily used by the engine ocr0*.cc */
+char get_bw(int x0, int x1, int y0, int y1, pix * p, int cs, int mask) {
+ char rc = 0; // later with error < 2% (1 dot)
+ int x, y;
+
+ if (x0 < 0) x0 = 0;
+ if (x1 >= p->x) x1 = p->x - 1;
+ if (y0 < 0) y0 = 0;
+ if (y1 >= p->y) y1 = p->y - 1;
+
+ for ( y = y0; y <= y1; y++)
+ for ( x = x0; x <= x1; x++) {
+ rc |= ((getpixel(p, x, y) < cs) ? 1 : 2); // break if rc==3
+ if ((rc & mask) == mask)
+ return mask; // break loop
+ }
+ return (rc & mask);
+}
+
+/* more general Mar2000 (x0,x1,y0,y1 instead of x0,y0,x1,y1! (history))
+ * look for black crossings throw a line from x0,y0 to x1,y1 and count them
+ * follow line and count crossings ([white]-black-transitions)
+ * ex: horizontal num_cross of 'm' would return 3 */
+int num_cross(int x0, int x1, int y0, int y1, pix *p, int cs) {
+ int rc = 0, col = 0, k, x, y, i, d; // rc=crossings col=0=white
+ int dx = x1 - x0, dy = y1 - y0;
+
+ d = MAX(abs(dx), abs(dy));
+ for (i = 0, x = x0, y = y0; i <= d; i++) {
+ if (d) {
+ x = x0 + i * dx / d;
+ y = y0 + i * dy / d;
+ }
+ k = ((getpixel(p, x, y) < cs) ? 1 : 0); // 0=white 1=black
+ if (col == 0 && k == 1)
+ rc++;
+ col = k;
+ }
+ return rc;
+}
+
+/* check if test matches pattern
+ * possible pattern: "a-zA-Z0-9+\-\\" (x-y dont work for c>127)
+ * ToDo: wchar_t cc + matching UTF-8 pattern for nonASCII
+ */
+int my_strchr( char *pattern, wchar_t cc ) {
+ char *s1;
+ if (pattern==(char *)NULL) return 0;
+
+ /* if (!(cc&0x80)) s1=strchr(pattern,(char)cc); else */
+ s1=strstr(pattern,decode(cc, UTF8));
+ switch (cc) {
+ case '-':
+ case '\\':
+ if ((!s1) || s1-pattern<1 || *(s1-1)!='\\') return 0;
+ else return 1;
+ default:
+ if (s1) return 1; /* cc simply matches */
+ s1=pattern+1;
+ while (s1) {
+ if ((!s1[0]) || (!s1[1])) return 0; /* end of string */
+ if (*(s1-1)!='\\' && *(s1-1)<=cc && *(s1+1)>=cc) return 1;
+ s1=strchr(s1+1,'-'); /* look for next '-' */
+ }
+ }
+ return 0;
+}
+
+/* set alternate chars and its weight, called from the engine
+ if a char is recognized to (weight) percent
+ can be used for filtering (only numbers etc)
+ often usefull if Il1 are looking very similar
+ should this function stay in box.c ???
+ weight is between 0 and 100 in percent, 100 means absolutely sure
+ - not final, not time critical (js)
+ - replace it by a string-function setaobj(*b,"string",weight)
+ and let call setac the setas function
+ */
+
+int setas(struct box *b, char *as, int weight){
+ int i,j;
+ if (b->num_ac > NumAlt || b->num_ac<0) {
+ fprintf(stderr,"\nDBG: There is something wrong with setas()!");
+ b->num_ac=0;
+ }
+ if (as==NULL) {
+ fprintf(stderr,"\nDBG: setas(NULL) makes no sense!"); return 0; }
+ if (as[0]==0) {
+ fprintf(stderr,"\nDBG: setas(\"\") makes no sense!"
+ " x= %d %d", b->x0, b->y0);
+ // out_x(b);
+ return 0;
+ }
+
+ /* char filter (ex: only numbers) ToDo: cfilter as UTF-8 */
+ if (JOB->cfg.cfilter) {
+ /* do not accept chars which are not in the cfilter string */
+ if ( as[0]>0 && as[1]==0 )
+ if ( !my_strchr(JOB->cfg.cfilter,as[0]) ) return 0;
+ }
+#if 0 /* obsolete, done in setac */
+ /* not sure that this is the right place, but where else? */
+ if ( as[0]>0 && as[1]==0 )
+ if (b->modifier != SPACE && b->modifier != 0) {
+ wchar_t newac;
+ newac = compose(as[0], b->modifier);
+ as = (char *)decode(newac, UTF8); /* was (const char *) */
+ if (newac == as[0]) { /* nothing composed */
+ fprintf(stderr, "\nDBG setas compose was useless %d %d",b->x0,b->y0);
+ // out_x(b);
+ }
+ }
+#endif
+
+ /* only the first run gets the full weight */
+ weight=(100-JOB->tmp.n_run)*weight/100;
+
+ /* remove same entries from table */
+ for (i=0;i<b->num_ac;i++)
+ if (b->tas[i])
+ if (strcmp(as,b->tas[i])==0) break;
+ if (b->num_ac>0 && i<b->num_ac){
+ if (weight<=b->wac[i]) return 0; /* if found + less weight ignore it */
+ /* to insert the new weigth on the right place, we remove it first */
+ if (b->tas[i]) free(b->tas[i]);
+ for (j=i;j<b->num_ac-1;j++){ /* shift lower entries */
+ b->tac[j]=b->tac[j+1]; /* copy the char */
+ b->tas[j]=b->tas[j+1]; /* copy the pointer to the string */
+ b->wac[j]=b->wac[j+1]; /* copy the weight */
+ }
+ b->num_ac--; /* shrink table */
+ }
+ /* sorting and add it to the table */
+ for (i=0;i<b->num_ac;i++) if (weight>b->wac[i]) break;
+ if (b->num_ac<NumAlt-1) b->num_ac++; /* enlarge table */
+ for (j=b->num_ac-1;j>i;j--){ /* shift lower entries */
+ b->tac[j]=b->tac[j-1]; /* copy the char */
+ b->tas[j]=b->tas[j-1]; /* copy the pointer to the string */
+ b->wac[j]=b->wac[j-1]; /* copy the weight */
+ }
+ if (i<b->num_ac) { /* insert new entry */
+ b->tac[i]=0; /* insert the char=0 ... */
+ b->tas[i]=(char *)malloc(strlen(as)+1); /* ... string */
+ if (b->tas[i]) memcpy(b->tas[i],as,strlen(as)+1);
+ b->wac[i]=weight; /* ... and its weight */
+ }
+ if (i==0) b->c=b->tac[0]; /* char or 0 for string */
+ return 0;
+}
+
+/* ToDo: this function will be replaced by a call of setas() later */
+int setac(struct box *b, wchar_t ac, int weight){
+ int i,j;
+ if ((!b) || b->num_ac > NumAlt || b->num_ac<0) {
+ fprintf(stderr,"\nDBG: This is a bad call to setac()!");
+ b->num_ac=0;
+ }
+ if (ac==0 || ac==UNKNOWN) {
+ fprintf(stderr,"\nDBG: setac(0) makes no sense!");
+ return 0;
+ }
+ /* char filter (ex: only numbers) ToDo: cfilter as UTF-8 */
+ if (JOB->cfg.cfilter) {
+ /* do not accept chars which are not in the cfilter string */
+ /* if ( ac>255 || !strchr(JOB->cfg.cfilter,(char)ac) ) return 0; */
+ if ( !my_strchr(JOB->cfg.cfilter,ac) ) return 0;
+ }
+ /* not sure that this is the right place, but where else? */
+ if (b->modifier != SPACE && b->modifier != 0) {
+ wchar_t newac;
+ newac = compose(ac, b->modifier);
+ if (newac == ac) { /* nothing composed */
+ if(JOB->cfg.verbose & 7)
+ fprintf(stderr, "\nDBG setac(%s): compose was useless @ %d %d",
+ decode(ac,ASCII), b->x0, b->y0);
+ }
+ ac = newac;
+ }
+
+ /* only the first run gets the full weight */
+ weight=(100-JOB->tmp.n_run)*weight/100;
+
+ /* remove same entries from table */
+ for (i=0;i<b->num_ac;i++) if (ac==b->tac[i]) break;
+ if (b->num_ac>0 && i<b->num_ac){
+ if (weight<=b->wac[i]) return 0;
+ if (b->tas[i]) free(b->tas[i]);
+ for (j=i;j<b->num_ac-1;j++){ /* shift lower entries */
+ b->tac[j]=b->tac[j+1]; /* copy the char */
+ b->tas[j]=b->tas[j+1]; /* copy the pointer to the string */
+ b->wac[j]=b->wac[j+1]; /* copy the weight */
+ }
+ b->num_ac--; /* shrink table */
+ }
+ /* sorting it to the table */
+ for (i=0;i<b->num_ac;i++) if (weight>b->wac[i]) break;
+ if (b->num_ac<NumAlt-1) b->num_ac++; /* enlarge table */
+ for (j=b->num_ac-1;j>i;j--){ /* shift lower entries */
+ b->tac[j]=b->tac[j-1]; /* copy the char */
+ b->tas[j]=b->tas[j-1]; /* copy the pointer to the string */
+ b->wac[j]=b->wac[j-1]; /* copy the weight */
+ }
+ if (i<b->num_ac) { /* insert new entry */
+ b->tac[i]=ac; /* insert the char ... */
+ b->tas[j]=NULL; /* ... no string (?) */
+ b->wac[i]=weight; /* ... and its weight */
+ }
+ if (i==0) b->c=ac; /* store best result to b->c (will be obsolete) */
+
+ return 0;
+}
+
+/* test if ac in wac-table
+ usefull for contextcorrection and box-splitting
+ return 0 if not found
+ return wac if found (wac>0)
+ */
+int testac(struct box *b, wchar_t ac){
+ int i;
+ if (b->num_ac > NumAlt || b->num_ac<0) {
+ fprintf(stderr,"\n#DEBUG: There is something wrong with testac()!");
+ b->num_ac=0;
+ }
+ /* search entries in table */
+ for (i=0;i<b->num_ac;i++) if (ac==b->tac[i]) return b->wac[i];
+ return 0;
+}
+
+
+/* look for edges: follow a line from x0,y0 to x1,y1, record the
+ * location of each transition, and return their number.
+ * ex: horizontal num_cross of 'm' would return 6
+ * remark: this function is not used, obsolete? ToDo: remove?
+ */
+int follow_path(int x0, int x1, int y0, int y1, pix *p, int cs, path_t *path) {
+ int rc = 0, prev, x, y, i, d, color; // rc=crossings col=0=white
+ int dx = x1 - x0, dy = y1 - y0;
+
+ d = MAX(abs(dx), abs(dy));
+ prev = getpixel(p, x0, y0) < cs; // 0=white 1=black
+ path->start = prev;
+ for (i = 1, x = x0, y = y0; i <= d; i++) {
+ if (d) {
+ x = x0 + i * dx / d;
+ y = y0 + i * dy / d;
+ }
+ color = getpixel(p, x, y) < cs; // 0=white 1=black
+ if (color != prev){
+ if (rc>=path->max){
+ int n=path->max*2+10;
+ path->x = (int *) xrealloc(path->x, n*sizeof(int));
+ path->y = (int *) xrealloc(path->y, n*sizeof(int));
+ path->max = n;
+ }
+ path->x[rc]=x;
+ path->y[rc]=y;
+ rc++;
+ }
+ prev = color;
+ }
+ path->num=rc;
+ return rc;
+}
+
+/* ToDo: only used in follow_path, which is obsolete, remove? */
+void *xrealloc(void *ptr, size_t size){
+ void *p;
+ p = realloc(ptr, size);
+ if (size>0 && (!p)){
+ fprintf(stderr, "insufficient memory");
+ exit(1);
+ }
+ return p;
+}
+
+/*
+ * -------------------------------------------------------------
+ * mark edge-points
+ * - first move forward until b/w-edge
+ * - more than 2 pixel?
+ * - loop around
+ * - if forward pixel : go up, rotate right
+ * - if forward no pixel : rotate left
+ * - stop if found first 2 pixel in same order
+ * go_along_the_right_wall strategy is very similar and used otherwhere
+ * --------------------------------------------------------------
+ * turmite game: inp: start-x,y, regel r_black=UP,r_white=RIght until border
+ * out: last-position
+ *
+ * could be used to extract more features:
+ * by counting stepps, dead-end streets ,xmax,ymax,ro-,ru-,lo-,lu-edges
+ *
+ * use this little animal to find features, I first was happy about it
+ * but now I prefer the loop() function
+ */
+
+void turmite(pix *p, int *x, int *y,
+ int x0, int x1, int y0, int y1, int cs, int rw, int rb) {
+ int r;
+ if (outbounds(p, x0, y0)) // out of pixmap
+ return;
+ while (*x >= x0 && *y >= y0 && *x <= x1 && *y <= y1) {
+ r = ((getpixel(p, *x, *y) < cs) ? rb : rw); // select rule
+ switch (r) {
+ case UP: (*y)--; break;
+ case DO: (*y)++; break;
+ case RI: (*x)++; break;
+ case LE: (*x)--; break;
+ case ST: break;
+ default: assert(0);
+ }
+ if( r==ST ) break; /* leave the while-loop */
+ }
+}
+
+/* search a way from p0 to p1 without crossing pixels of type t
+ * only two directions, useful to test if there is a gap 's'
+ * labyrinth algorithm - do you know a faster way? */
+int joined(pix *p, int x0, int y0, int x1, int y1, int cs){
+ int t,r,x,y,dx,dy,xa,ya,xb,yb;
+ x=x0;y=y0;dx=1;dy=0;
+ if(x1>x0){xa=x0;xb=x1;} else {xb=x0;xa=x1;}
+ if(y1>y0){ya=y0;yb=y1;} else {yb=y0;ya=y1;}
+ t=((getpixel(p,x,y)<cs)?1:0);
+ for(;;){
+ if( t==((getpixel(p,x+dy,y-dx)<cs)?1:0) // right free?
+ && x+dy>=xa && x+dy<=xb && y-dx>=ya && y-dx<=yb) // wall
+ { r=dy;dy=-dx;dx=r;x+=dx;y+=dy; } // rotate right and step forward
+ else { r=dx;dx=-dy;dy=r; } // rotate left
+ // fprintf(stderr," path xy %d-%d %d-%d %d %d %d %d\n",xa,xb,ya,yb,x,y,dx,dy);
+ if( x==x1 && y==y1 ) return 1;
+ if( x==x0 && y==y0 && dx==1) return 0;
+ }
+ // return 0; // endless loop ?
+}
+
+/* move from x,y to direction r until pixel of color col is found
+ * or maximum of l steps
+ * return the number of steps done */
+int loop(pix *p,int x,int y,int l,int cs,int col, DIRECTION r){
+ int i=0;
+ if(x>=0 && y>=0 && x<p->x && y<p->y){
+ switch (r) {
+ case UP:
+ for( ;i<l && y>=0;i++,y--)
+ if( (getpixel(p,x,y)<cs)^col )
+ break;
+ break;
+ case DO:
+ for( ;i<l && y<p->y;i++,y++)
+ if( (getpixel(p,x,y)<cs)^col )
+ break;
+ break;
+ case LE:
+ for( ;i<l && x>=0;i++,x--)
+ if( (getpixel(p,x,y)<cs)^col )
+ break;
+ break;
+ case RI:
+ for( ;i<l && x<p->x;i++,x++)
+ if( (getpixel(p,x,y)<cs)^col )
+ break;
+ break;
+ default:;
+ }
+ }
+ return i;
+}
+
+/* Given a point, frames a rectangle containing all points of the same
+ * color surrounding it, and mark these points.
+ * ToDo: obsolate and replaced by frame_vector
+ *
+ * looking for better algo: go horizontally and look for upper/lower non_marked_pixel/nopixel
+ * use lowest three bits for mark
+ * - recursive version removed! AmigaOS has no Stack-OVL-Event
+ * run around the chape using laby-robot
+ * bad changes can lead to endless loop!
+ * - this is not absolutely sure but mostly works well
+ * diag - 0: only pi/2 direction, 1: pi/4 directions (diagonal)
+ * mark - 3 bit marker, mark each valid pixel with it
+ */
+int frame_nn(pix *p, int x, int y,
+ int *x0, int *x1, int *y0, int *y1, // enlarge frame
+ int cs, int mark,int diag){
+#if 1 /* flood-fill to detect black objects, simple and faster? */
+ int rc = 0, dx, col, maxstack=0; static int overflow=0;
+ int bmax=1024, blen=0, *buf; /* buffer as replacement for recursion stack */
+
+ /* check bounds */
+ if (outbounds(p, x, y)) return 0;
+ /* check if already marked (with mark since v0.4) */
+ if ((marked(p,x,y)&mark)==mark) return 0;
+
+ col = ((getpixel(p, x, y) < cs) ? 0 : 1);
+ buf=(int *)malloc(bmax*sizeof(int)*2);
+ if (!buf) { fprintf(stderr,"malloc failed (frame_nn)\n");return 0;}
+ buf[0]=x;
+ buf[1]=y;
+ blen=1;
+
+ g_debug(fprintf(stderr,"\nframe_nn x=%4d y=%4d",x,y);)
+ for ( ; blen ; ) {
+ /* max stack depth is complexity of the object */
+ if (blen>maxstack) maxstack=blen;
+ blen--; /* reduce the stack */
+ x=buf[blen*2+0];
+ y=buf[blen*2+1];
+ if (y < *y0) *y0 = y;
+ if (y > *y1) *y1 = y;
+ /* first go to leftmost pixel */
+ for ( ; x>0 && (col == ((getpixel(p, x-1, y) < cs) ? 0 : 1)) ; x--);
+ if ((marked(p,x,y)&mark)==mark) continue; /* already scanned */
+ for (dx=-1;dx<2;dx+=2) /* look at upper and lower line, left */
+ if ( diag && x<p->x && x-1>0 && y+dx >=0 && y+dx < p->y
+ && col != ((getpixel(p, x , y+dx) < cs) ? 0 : 1)
+ && col == ((getpixel(p, x-1, y+dx) < cs) ? 0 : 1)
+ && !((marked(p,x-1,y+dx)&mark)==mark)
+ ) {
+ if (blen+1>=bmax) { overflow|=1; continue; }
+ buf[blen*2+0]=x-1;
+ buf[blen*2+1]=y+dx;
+ blen++;
+ }
+ if (x < *x0) *x0 = x;
+ /* second go right, mark and get new starting points */
+ for ( ; x<p->x && (col == ((getpixel(p, x , y) < cs) ? 0 : 1)) ; x++) {
+ p->p[x + y * p->x] |= (mark & 7); rc++; /* mark pixel */
+ /* enlarge frame */
+ if (x > *x1) *x1 = x;
+ for (dx=-1;dx<2;dx+=2) /* look at upper and lower line */
+ if ( col == ((getpixel(p, x , y+dx) < cs) ? 0 : 1)
+ && (
+ col != ((getpixel(p, x-1, y ) < cs) ? 0 : 1)
+ || col != ((getpixel(p, x-1, y+dx) < cs) ? 0 : 1) )
+ && !((marked(p,x,y+dx)&mark)==mark) && y+dx<p->y && y+dx>=0
+ ) {
+ if (blen+1>=bmax) { overflow|=1; continue; }
+ buf[blen*2+0]=x;
+ buf[blen*2+1]=y+dx;
+ blen++;
+ }
+ }
+ for (dx=-1;dx<2;dx+=2) /* look at upper and lower line, right */
+ if ( diag && x<p->x && x-1>0 && y+dx >=0 && y+dx < p->y
+ && col == ((getpixel(p, x-1, y ) < cs) ? 0 : 1)
+ && col != ((getpixel(p, x , y ) < cs) ? 0 : 1)
+ && col != ((getpixel(p, x-1, y+dx) < cs) ? 0 : 1)
+ && col == ((getpixel(p, x , y+dx) < cs) ? 0 : 1)
+ && !((marked(p,x,y+dx)&mark)==mark)
+ ) {
+ if (blen+1>=bmax) { overflow|=1; continue; }
+ buf[blen*2+0]=x;
+ buf[blen*2+1]=y+dx;
+ blen++;
+ }
+ }
+
+ /* debug, ToDo: use info maxstack and pixels for image classification */
+ g_debug(fprintf(stderr," maxstack= %4d pixels= %6d",maxstack,rc);)
+ if (overflow==1){
+ overflow|=2;
+ fprintf(stderr,"# Warning: frame_nn stack oerflow\n");
+ }
+ free(buf);
+#else /* old version, ToDo: improve it for tmp04/005*.pgm.gz */
+ int i, j, d, dx, ox, oy, od, nx, ny, rc = 0, rot = 0, x2 = x, y2 = y, ln;
+
+ static const int d0[8][2] = { { 0, -1} /* up */, {-1, -1},
+ {-1, 0} /* left */, {-1, 1},
+ { 0, 1} /* down */, { 1, 1},
+ { 1, 0} /* right */, { 1, -1}};
+
+ /* check bounds */
+ if (outbounds(p, x, y))
+ return 0;
+ /* check if already marked */
+ if ((marked(p,x,y)&mark)==mark)
+ return 0;
+
+ i = ((getpixel(p, x, y) < cs) ? 0 : 1);
+ rc = 0;
+
+ g_debug(fprintf(stderr," start frame:");)
+
+ for (ln = 0; ln < 2 && rot >= 0; ln++) { // repeat if right-loop
+ g_debug(fprintf(stderr," ln=%d diag=%d cs=%d x=%d y=%d - go to border\n",ln,diag,cs,x,y);)
+
+ od=d=(8+4*ln-diag)&7; // start robot looks up, right is a wall
+ // go to right (left) border
+ if (ln==1) {
+ x=x2; y=y2;
+ }
+ /* start on leftmost position */
+ for (dx = 1 - 2*ln; x + dx < p->x && x + dx >= 0 /* bounds */ &&
+ i == ((getpixel(p, x + dx, y) < cs) ? 0 : 1) /* color */;
+ x += dx);
+
+ g_debug(fprintf(stderr," ln=%d diag=%d cs=%d x=%d y=%d\n",ln,diag,cs,x,y);)
+
+ /* robot stores start-position */
+ ox = x; oy = y;
+ for (rot = 0; abs(rot) <= 64; ) { /* for sure max. 8 spirals */
+ /* leftmost position */
+ if (ln == 0 && x < x2) {
+ x2 = x; y2 = y;
+ }
+
+ g_debug(fprintf(stderr," x=%3d y=%3d d=%d i=%d p=%3d rc=%d\n",x,y,d,i,getpixel(p,x,y),rc);)
+
+ if ( abs(d0[d][1]) ) { /* mark left (right) pixels */
+ for (j = 0, dx = d0[d][1]; x + j >= 0 && x + j < p->x
+ && i == ((getpixel(p, x + j, y) < cs) ? 0 : 1); j += dx) {
+ if (!((marked(p, x + j, y)&mark)==mark))
+ rc++;
+ p->p[x + j + y * p->x] |= (mark & 7);
+ }
+ }
+ /* look to the front of robot */
+ nx = x + d0[d][0];
+ ny = y + d0[d][1];
+ /* if right is a wall */
+ if ( outbounds(p, nx, ny) || i != ((getpixel(p,nx,ny)<cs) ? 0 : 1) ) {
+ /* rotate left */
+ d=(d+2-diag) & 7; rot-=2-diag;
+ }
+ else { /* if no wall, go, turn back and rotate left */
+ x=nx; y=ny; d=(d+4+2-diag) & 7; rot+=2-diag+4;
+ /* enlarge frame */
+ if (x < *x0) *x0 = x;
+ if (x > *x1) *x1 = x;
+ if (y < *y0) *y0 = y;
+ if (y > *y1) *y1 = y;
+ }
+ if(x==ox && y==oy && d==od) break; // round trip finished
+ }
+ }
+ g_debug(fprintf(stderr," rot=%d\n",rot);)
+#endif
+ return rc;
+}
+
+/* obsolete! replaced by vectors
+ * mark neighbouring pixel of same color, return number
+ * better with neighbours of same color (more general) ???
+ * parameters: (&~7)-pixmap, start-point, critical_value, mark
+ * recursion is removed */
+int mark_nn(pix * p, int x, int y, int cs, int r) {
+ /* out of bounds or already marked? */
+ if (outbounds(p, x, y) || (marked(p, x, y)&r)==r)
+ return 0;
+ {
+ int x0, x1, y0, y1;
+ x0 = x1 = x;
+ y0 = y1 = y; // not used
+ return frame_nn(p, x, y, &x0, &x1, &y0, &y1, cs, r, JOB->tmp.n_run & 1);
+ // using same scheme
+ }
+}
+
+/* ToDo: finish to replace old frame by this new one
+ *
+ * @...........#@@@@@@@. # = marked as already scanned black pixels
+ * @........@@@@@@@@@@@# only left and right border
+ * .......#@@@@@@@@@@@@@ left side on even y
+ * ......@@@@@@@@#.@@@@# right side on odd y
+ * .....#@@@@@......#@@@ no border is marked twice
+ * ....@@@@@#......@@@#. works also for thinn lines
+ * ...#@@@@........#@@@. - outer loop is stored as first
+ * ..@@@@#........@@@#.. - inner loop is stored as second
+ * .#@@@@........#@@@@.. 1st in an extra box (think on white chars)
+ * @@@@#.......@@@@#.... 2nd merge in an extra step
+ * #@@@@@....#@@@@@.....
+ * @@@@@@@@@@@@@@#......
+ * .#@@@@@@@@@@@@.......
+ *
+ * run around the chape using laby-robot
+ * - used for scanning boxes, look for horizontal b/w transitions
+ * with unmarked black pixels and call this routine
+ * - stop if crossing a marked box in same direction (left=up, right=down)
+ * box - char box, store frame_vectors and box
+ * x,y - starting point
+ * mark - 3 bit marker, mark each valid pixel with it
+ * diag - 0: only pi/2 direction, 1: pi/4 directions (diagonal)
+ * ds - start direction, 6=right of right border, 2=left of left border
+ * ret - 0=ok, -1=already marked, -2=max_num_frames_exceeded
+ * -7=no border in direction ds
+ */
+#if 0
+#undef g_debug
+#define g_debug(x) x
+#endif
+/* grep keywords: scan_vectors frame_vector */
+int frame_vector(struct box *box1, int x, int y,
+ int cs, int mark, int diag, int ds) {
+ int i1, i2, i2o,
+ new_x=1, /* flag for storing the vector x,y */
+ steps=1, /* steps between stored vectors, speedup for big frames */
+ d, /* direction */
+ ox, oy, /* starting point */
+ nx, ny, mx, my, /* used for simplification */
+ /* ToDo: add periphery to box (german: Umfang?) */
+ rc = 1, /* return code, circumference, sum vector lengths */
+ rot = 0, /* memory for rotation, rot=8 means one full rotation */
+ vol = 0; /* volume inside frame, negative for white inside black */
+ pix *p=box1->p;
+
+ /* translate the 8 directions to (x,y) pairs,
+ * if only four directions are used, only every 2nd vector is accessed,
+ * +1 turn left, -1 turn right
+ */
+ static const int d0[8][2] =
+ { { 0, -1}, /* up */ {-1, -1}, /* up-le */
+ {-1, 0}, /* left */ {-1, 1}, /* do-le */
+ { 0, 1}, /* down */ { 1, 1}, /* do-ri */
+ { 1, 0}, /* right */ { 1, -1} }; /* up-ri */
+
+ /* check bounds */
+ if (outbounds(p, x, y))
+ return 0;
+
+ /* pixel color we are looking for, 0=black, 1=white */
+ d = ds;
+ i1 = ((getpixel(p, x, y ) < cs) ? 0 : 1);
+ i2 = ((getpixel(p, x + d0[d][0], y + d0[d][1]) < cs) ? 0 : 1);
+
+ g_debug(fprintf(stderr,"\nLEV2 frame_vector @ %3d %3d d%d %2d %2d"
+ " %d-%d pix=%3d mark=%d cs=%d",\
+ x,y,ds,d0[ds][0],d0[ds][1],i1,i2,getpixel(p,x,y),mark,cs);)
+
+ if (i1==i2){
+ fprintf(stderr,"ERROR frame_vector: no border\n");
+ return -7; /* no border detected */
+ }
+
+ /* initialize boxframe outside this function
+ box1->x0=box1->x1=x;
+ box1->y0=box1->y1=y;
+ */
+
+ /* initialize boxvector outside this function
+ box1->num_frames=0
+ num_frame_vectors[0]=0 ???
+ and store start value
+ */
+ if (box1->num_frames > MaxNumFrames) return -2;
+ /* index to next (x,y) */
+ i2o=i2=( (box1->num_frames==0)?0:
+ box1->num_frame_vectors[ box1->num_frames ] );
+#if 0 // obsolete v0.43
+ box1->frame_vector[i2][0]=x;
+ box1->frame_vector[i2][1]=y;
+ i2++;
+ box1->num_frame_vectors[ box1->num_frames ]=i2;
+#endif
+ box1->num_frames++;
+
+ /* robot stores start-position */
+ ox = x; oy = y; /* look forward to white pixel */
+
+ for (;;) { /* stop if same marked pixel touched */
+
+ g_debug(fprintf(stderr,"\nLEV3: x= %3d %3d d= %d rot= %2d %3d",x,y,d,rot,i2);)
+
+ /* ToDo: store max. abs(rot) ??? for better recognition */
+ if (new_x) {
+ g_debug(fprintf(stderr,"\nLEV2: markB xy= %3d %3d ", x, y);)
+ p->p[x + y * p->x] |= (mark & 7); /* mark black pixel */
+ }
+
+ /* store a new vector or enlarge the predecessor */
+ if (new_x && (rc%steps)==0) { /* dont store everything on big chars */
+ if (i2>=MaxFrameVectors) {
+ box1->num_frame_vectors[ box1->num_frames-1 ]=i2;
+ reduce_vectors(box1,1); /* simplify loop */
+ i2=box1->num_frame_vectors[ box1->num_frames-1 ];
+ /* enlarge steps on big chars getting speedup */
+ steps=(box1->y1-box1->y0+box1->x1-box1->x0)/32+1;
+ }
+ /* store frame-vector */
+ if (i2<MaxFrameVectors) {
+ box1->frame_vector[i2][0]=x;
+ box1->frame_vector[i2][1]=y;
+ /* test if older vector points to the same direction */
+ if (i2>1) {
+ /* get predecessor */
+ nx=box1->frame_vector[i2-1][0]-box1->frame_vector[i2-2][0];
+ ny=box1->frame_vector[i2-1][1]-box1->frame_vector[i2-2][1];
+ mx=x -box1->frame_vector[i2-1][0];
+ my=y -box1->frame_vector[i2-1][1];
+ /* same direction? */
+ if (nx*my-ny*mx==0 && nx*mx>=0 && ny*my>=0) {
+ /* simplify by removing predecessor */
+ i2--;
+ box1->frame_vector[i2][0]=x;
+ box1->frame_vector[i2][1]=y;
+ } /* do not simplify */
+ }
+ i2++;
+ box1->num_frame_vectors[ box1->num_frames-1 ]=i2;
+ }
+ g_debug(fprintf(stderr," stored @ %3d steps= %d", i2-1, steps);)
+ }
+ new_x=0; /* work for new pixel (x,y) done */
+
+ /* check if round trip is finished */
+ if (x==ox && y==oy && abs(rot)>=8) break;
+
+ /* look to the front of robot (turtle or ant) */
+ nx = x + d0[d][0];
+ ny = y + d0[d][1];
+
+ /* next step, if right is a wall turn the turtle left */
+ if ( outbounds(p, nx, ny) || i1 != ((getpixel(p,nx,ny)<cs) ? 0 : 1) ) {
+ if (y==ny && nx>=0 && nx<p->x) { /* if inbound */
+ g_debug(fprintf(stderr,"\nLEV2: markW xy= %3d %3d ", nx, ny);)
+ p->p[nx + ny * p->x] |= (mark & 7); /* mark white pixel */
+ }
+ /* rotate left 90 or 45 degrees */
+ d=(d+2-diag) & 7; rot+=2-diag;
+ /* calculate volume inside frame */
+ switch (d+diag) {
+ case 2+2: vol-=x-1; break;
+ case 6+2: vol+=x; break;
+ }
+ }
+ else { /* if no wall, go forward and turn right (90 or 45 degrees) */
+ x=nx; y=ny;
+ /* turn back and rotate left */
+ d=(d+4+2-diag) & 7; rot+=2-diag-4;
+ rc++; /* counting steps, used for speedup */
+
+ /* enlarge frame */
+ if (x < box1->x0) box1->x0 = x;
+ if (x > box1->x1) box1->x1 = x;
+ if (y < box1->y0) box1->y0 = y;
+ if (y > box1->y1) box1->y1 = y;
+
+ new_x=1;
+ }
+ }
+
+ /* to distinguish inner and outer frames, store volume as +v or -v */
+ box1->frame_vol[ box1->num_frames-1 ] = vol;
+ box1->frame_per[ box1->num_frames-1 ] = rc-1;
+
+ /* dont count and store the first vector twice */
+ if (i2-i2o>1) {
+ i2--; rc--; box1->num_frame_vectors[ box1->num_frames-1 ]=i2;
+ }
+ /* output break conditions */
+ g_debug(fprintf(stderr,"\nLEV2 o= %3d %3d x= %3d %3d r=%d v=%d",ox,oy,x,y,rot,vol);)
+ /* rc=1 for a single point, rc=2 for a two pixel sized point */
+ g_debug(fprintf(stderr," steps= %3d vectors= %3d",rc,i2);)
+ /* out_x(box1); ToDo: output only the first thousend */
+ return rc; /* return number of bordering pixels = periphery? */
+}
+
+
+
+/* clear lowest 3 (marked) bits (they are used for marking) */
+void clr_bits(pix * p, int x0, int x1, int y0, int y1) {
+ int x, y;
+ for ( y=y0; y <= y1; y++)
+ for ( x=x0; x <= x1; x++)
+ p->p[x+y*p->x] &= ~7;
+}
+
+/* look for white holes surrounded by black points
+ * at the moment look for white point with black in all four directions
+ * - store position of hole in coordinates relativ to box!
+ * ToDo: count only holes with vol>10% ???
+ * ToDo: rewrite for frame vectors (faster, no malloc)
+ * holes are frames rotating left hand
+ * obsolete, do it with vectors
+ */
+int num_hole(int x0, int x1, int y0, int y1, pix * p, int cs, holes_t *holes) {
+ int num_holes = 0, x, y, hole_size;
+ pix b; // temporary mini-page
+ int dx = x1 - x0 + 1, dy = y1 - y0 + 1;
+ unsigned char *buf; // 2nd copy of picture, for working
+
+ if (holes) holes->num=0;
+ if(dx<3 || dy<3) return 0;
+ b.p = buf = (unsigned char *) malloc( dx * dy );
+ if( !buf ){
+ fprintf( stderr, "\nFATAL: malloc(%d) failed, skip num_hole", dx*dy );
+ return 0;
+ }
+ if (copybox(p, x0, y0, dx, dy, &b, dx * dy))
+ { free(b.p); return -1;}
+
+ // printf(" num_hole(");
+ /* --- mark white-points connected with border */
+ for (x = 0; x < b.x; x++) {
+ if (getpixel(&b, x, 0) >= cs)
+ mark_nn(&b, x, 0, cs, AT);
+ if (getpixel(&b, x, b.y - 1) >= cs)
+ mark_nn(&b, x, b.y - 1, cs, AT);
+ }
+ for (y = 0; y < b.y; y++) {
+ if (getpixel(&b, 0, y) >= cs)
+ mark_nn(&b, 0, y, cs, AT);
+ if (getpixel(&b, b.x - 1, y) >= cs)
+ mark_nn(&b, b.x - 1, y, cs, AT);
+ }
+
+ //g_debug(out_b(NULL,&b,0,0,b.x,b.y,cs);)
+ // --- look for unmarked white points => hole
+ for (x = 0; x < b.x; x++)
+ for (y = 0; y < b.y; y++)
+ if (!((marked(&b, x, y)&AT)==AT)) // unmarked
+ if (getpixel(&b, x, y) >= cs) { // hole found
+#if 0
+ hole_size=mark_nn(&b, x, y, cs, AT); /* old version */
+ if (hole_size > 1 || dx * dy <= 40)
+ num_holes++;
+#else
+ { /* new version, for future store of hole characteristics */
+ int x0, x1, y0, y1, i, j;
+ x0 = x1 = x;
+ y0 = y1 = y; // not used
+ hole_size=frame_nn(&b, x, y, &x0, &x1, &y0, &y1, cs, AT, JOB->tmp.n_run & 1);
+ // store hole for future use, num is initialized with 0
+ if (hole_size > 1 || dx * dy <= 40){
+ num_holes++;
+ if (holes) {
+ // sort in table
+ for (i=0;i<holes->num && i<MAX_HOLES;i++)
+ if (holes->hole[i].size < hole_size) break;
+ for (j=MAX_HOLES-2;j>=i;j--)
+ holes->hole[j+1]=holes->hole[j];
+ if (i<MAX_HOLES) {
+ // printf(" i=%d size=%d\n",i,hole_size);
+ holes->hole[i].size=hole_size;
+ holes->hole[i].x=x;
+ holes->hole[i].y=y;
+ holes->hole[i].x0=x0;
+ holes->hole[i].y0=y0;
+ holes->hole[i].x1=x1;
+ holes->hole[i].y1=y1;
+ }
+ holes->num++;
+ }
+ }
+ }
+#endif
+ }
+ free(b.p);
+ // printf(")=%d",num_holes);
+ return num_holes;
+}
+
+/* count for black nonconnected objects --- used for i,auml,ouml,etc. */
+/* ToDo: obsolete, replaced by vectors and box.num_boxes */
+int num_obj(int x0, int x1, int y0, int y1, pix * p, int cs) {
+ int x, y, rc = 0; // rc=num_obj
+ unsigned char *buf; // 2nd copy of picture, for working
+ pix b;
+
+ if(x1<x0 || y1<y0) return 0;
+ b.p = buf = (unsigned char *) malloc( (x1-x0+1) * (y1-y0+1) );
+ if( !buf ){
+ fprintf( stderr, "\nFATAL: malloc(%d) failed, skip num_obj",(x1-x0+1)*(y1-y0+1) );
+ return 0;
+ }
+ if (copybox(p, x0, y0, x1 - x0 + 1, y1 - y0 + 1, &b, (x1-x0+1) * (y1-y0+1)))
+ { free(b.p); return -1; }
+ // --- mark black-points connected with neighbours
+ for (x = 0; x < b.x; x++)
+ for (y = 0; y < b.y; y++)
+ if (getpixel(&b, x, y) < cs)
+ if (!((marked(&b, x, y)&AT)==AT)) {
+ rc++;
+ mark_nn(&b, x, y, cs, AT);
+ }
+ free(b.p);
+ return rc;
+}
+
+#if 0
+// ----------------------------------------------------------------------
+// first idea for making recognition based on probability
+// - start with a list of all possible chars
+// - call recognition_of_char(box *)
+// - remove chars from list which could clearly excluded
+// - reduce probability of chars which have wrong features
+// - font types list could also build
+// at the moment it is only an idea, I should put it to the todo list
+//
+char *list="0123456789,.\0xe4\0xf6\0xfc" // "a=228 o=246 u=252
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+int wert[100];
+int listlen=0,numrest=0;
+// initialize a new character list (for future)
+void ini_list(){ int i;
+ for(i=0;list[i]!=0 && i<100;i++) wert[i]=0;
+ numrest=listlen=i; }
+// exclude??? (for future) oh it was long time ago, I wrote that :/
+void exclude(char *filt){ int i,j;
+ for(j=0;filt[j]!=0 && j<100;j++)
+ for(i=0;list[i]!=0 && i<100;i++)
+ if( filt[j]==list[i] ) { if(!wert[i])numrest--; wert[i]++; } }
+// get the result after all the work (for future)
+char getresult(){ int i;
+ if( numrest==1 )
+ for(i=0;list[i]!=0 && i<100;i++) if(!wert[i]) return list[i];
+ return '_';
+ }
+#endif
+
+// look at the environment of the pixel too (contrast etc.)
+// detailed analysis only of diff pixels!
+//
+// 100% * "distance", 0 is ideal fit
+// = similarity of two chars for recognition of garbled (verstuemmelter) chars
+// weight of pixels with only one same neighbour set to 0
+// look at contours too! v0.2.4: B==H
+// changed for v0.41, Mar06
+int distance( pix *p1, struct box *box1,
+ pix *p2, struct box *box2, int cs){
+ int rc=0,x,y,v1,v2,i1,i2,rgood=0,rbad=0,x1,y1,x2,y2,dx,dy,dx1,dy1,dx2,dy2;
+ x1=box1->x0;y1=box1->y0;x2=box2->x0;y2=box2->y0;
+ dx1=box1->x1-box1->x0+1; dx2=box2->x1-box2->x0+1; dx=((dx1>dx2)?dx1:dx2);
+ dy1=box1->y1-box1->y0+1; dy2=box2->y1-box2->y0+1; dy=((dy1>dy2)?dy1:dy2);
+ if(abs(dx1-dx2)>1+dx/16 || abs(dy1-dy2)>1+dy/16) return 100;
+ // compare relations to baseline and upper line
+ if(2*box1->y1>box1->m3+box1->m4 && 2*box2->y1<box2->m3+box2->m4) rbad+=128;
+ if(2*box1->y0>box1->m1+box1->m2 && 2*box2->y0<box2->m1+box2->m2) rbad+=128;
+ // compare pixels
+ for( y=0;y<dy;y++ )
+ for( x=0;x<dx;x++ ) { // try global shift too ???
+ v1 =((getpixel(p1,x1+x ,y1+y )<cs)?1:0); i1=8; // better gray?
+ v2 =((getpixel(p2,x2+x ,y2+y )<cs)?1:0); i2=8; // better gray?
+ if(v1==v2) { rgood+=8; continue; } // all things are right!
+ // what about different pixel???
+ // test overlap of 8 surounding pixels ??? bad if two nb. are bad
+ v1=-1;
+ for(i1=-1;i1<2;i1++)
+ for(i2=-1;i2<2;i2++)if(i1!=0 || i2!=0){
+ if( ((getpixel(p1,x1+x+i1*(1+dx/32),y1+y+i2*(1+dy/32))<cs)?1:0)
+ !=((getpixel(p2,x2+x+i1*(1+dx/32),y2+y+i2*(1+dy/32))<cs)?1:0) ) v1++;
+ }
+ if (v1>0) rbad+=16*v1;
+ else rbad++;
+ }
+ if(rgood+rbad) rc= (100*rbad+(rgood+rbad-1))/(rgood+rbad); else rc=99;
+ if(rc<10 && JOB->cfg.verbose & 7){
+ fprintf(stderr,"\n# distance rc=%d good=%d bad=%d",rc,rgood,rbad);
+// out_x(box1);out_x(box2);
+ }
+ return rc;
+}
+
+
+
+// ============================= call OCR engine ================== ;)
+// nrun=0 from outside, nrun=1 from inside (allows modifications, oobsolete)
+wchar_t whatletter(struct box *box1, int cs, int nrun){
+ wchar_t bc=UNKNOWN; // best letter
+ wchar_t um=SPACE; // umlaut? '" => modifier
+ pix *p=box1->p; // whole image
+ int x,y,dots,xa,ya,x0,x1,y0,y1,dx,dy,i;
+ pix b; // box
+ struct box bbuf=*box1; // restore after modifikation!
+
+ if (box1->num_ac>0 && box1->wac[0]>=JOB->cfg.certainty && bc==UNKNOWN) {
+ bc=box1->tac[0];
+ }
+ // if (bc!=UNKNOWN) return bc;
+ // if whatletter() called again, only unknown chars are processed
+ // bad for splitting!
+
+ // store box data, which can be modified for modified chars in 2nd run
+ bbuf.x0=box1->x0; bbuf.y0=box1->y0;
+ bbuf.x1=box1->x1; bbuf.y1=box1->y1;
+
+ xa=box1->x; ya=box1->y;
+ x0=box1->x0; y0=box1->y0;
+ x1=box1->x1; y1=box1->y1;
+ // int vol=(y1-y0+1)*(x1-x0+1); // volume
+ // crossed l-m , divided chars
+ while( get_bw(x0,x1,y0,y0,p,cs,1)!=1 && y0+1<y1) y0++;
+ while( get_bw(x0,x1,y1,y1,p,cs,1)!=1 && y0+1<y1) y1--;
+ dx=x1-x0+1;
+ dy=y1-y0+1; // size
+
+ // better to proof the white frame too!!! ????
+ // --- test for german umlaut and points above, not robust enough???
+ // if three chars are connected i-dots (ari) sometimes were not detected
+ // - therefore after division a test could be useful
+ // modify y0 only in second run!?
+ // we need it here to have the right copybox
+ if (um==SPACE && dy>5 && box1->num_boxes>1)
+ testumlaut(box1,cs,2,&um); /* set box1->modifier + new y0 */
+
+ dots=box1->dots;
+ y0 =box1->y0; // dots==2 => y0 below double dots
+ dy =y1-y0+1;
+
+ // move upper and lower border (for divided letters)
+ while( get_bw(x0,x1,y0,y0,p,cs,1)==0 && y0+1<y1) y0++;
+ while( get_bw(x0,x1,y1,y1,p,cs,1)==0 && y0+1<y1) y1--;
+ while( get_bw(x0,x0,y0,y1,p,cs,1)==0 && x0+1<x1) x0++;
+ while( get_bw(x1,x1,y0,y1,p,cs,1)==0 && x0+1<x1) x1--;
+ dx=x1-x0+1;
+ dy=y1-y0+1; // size
+ box1->x0=x0; box1->y0=y0; // set reduced frame
+ box1->x1=x1; box1->y1=y1;
+
+ // set good startpoint (probably bad from division)?
+ if( xa<x0 || xa>x1 || ya<y0 || ya>y1
+ || getpixel(p,xa,ya)>=cs /* || 2*ya<y0+y1 */ || dots>0 ){
+ // subfunction? also called after division of two glued chars?
+ for(y=y1;y>=y0;y--) // low to high (not i-dot)
+ for(x=(x0+x1)/2,i=0;x>=x0 && x<=x1;i++,x+=((2*i&2)-1)*i) /* is that ok? */
+ if (getpixel(p,x,y)<cs && (getpixel(p,x+1,y)<cs
+ || getpixel(p,x,y+1)<cs)){ xa=x;ya=y;y=-1;break; }
+ /* should box1->x,y be set? */
+ }
+
+ // ----- create char-only-box -------------------------------------
+ // ToDo: this will be obsolete if vectors are used only
+ if(dx<1 || dy<1) return bc; /* should not happen */
+ b.p = (unsigned char *) malloc( dx * dy );
+ if (!b.p) fprintf(stderr,"Warning: malloc failed L%d\n",__LINE__);
+ if( copybox(p,x0,y0,dx,dy,&b,dx*dy) )
+ { free(b.p); return bc; }
+ // clr_bits(&b,0,b.x-1,0,b.y-1);
+ // ------ use diagonal too (only 2nd run?)
+ /* following code failes on ! and ? obsolete if vectors are used
+ ToDo:
+ - mark pixels neighoured to pixels outside and remove them from &b
+ v0.40
+ will be replaced by list of edge vectors
+ - mark accents, dots and remove them from &b
+ */
+#if 1 /* becomes obsolate by vector code */
+ if (y0>0) // mark upper overlap
+ for ( x=x0; x<=x1; x++) {
+ if (getpixel(p,x,y0-1)<cs
+ && getpixel(p,x,y0 )<cs && (marked(&b,x-x0,0)&1)!=1)
+ mark_nn(&b,x-x0,0,cs,1);
+ }
+ if (x0>0) // mark left overlap
+ for ( y=y0; y<=y1; y++) {
+ if (getpixel(p,x0-1,y)<cs
+ && getpixel(p,x0 ,y)<cs && (marked(&b,0,y-y0 )&1)!=1)
+ mark_nn(&b,0,y-y0,cs,1);
+ }
+ if (x1<p->x-1) // mark right overlap
+ for ( y=y0; y<=y1; y++) {
+ if (getpixel(p,x1+1,y)<cs
+ && getpixel(p,x1 ,y)<cs && (marked(&b,x1-x0,y-y0)&1)!=1)
+ mark_nn(&b,x1-x0,y-y0,cs,1);
+ }
+ mark_nn(&b,xa-x0,ya-y0,cs,2); // not glued chars
+ for(x=0;x<b.x;x++)
+ for(y=0;y<b.y;y++){
+ if ( (marked(&b,x,y )&3)==1 && getpixel(&b,x,y )<cs )
+ b.p[x+y*b.x] = 255&~7; /* reset pixel */
+ }
+#endif
+
+ // if (bc == UNKNOWN) // cause split to fail
+ bc=ocr0(box1,&b,cs);
+
+ /* ToDo: try to change pixels near cs?? or melt? */
+ if (box1->num_ac>0 && box1->wac[0]>=JOB->cfg.certainty && bc==UNKNOWN) {
+ bc=box1->tac[0];
+ }
+
+ if (um!=0 && um!=SPACE && bc<127) { /* ToDo: is that obsolete now? */
+ wchar_t newbc;
+ newbc = compose(bc, um );
+ if (newbc == bc) { /* nothing composed */
+ if(JOB->cfg.verbose & 7)
+ fprintf(stderr, "\nDBG whatletter: compose(%s) was useless (%d,%d)",
+ decode(bc,ASCII), box1->x0, box1->y0);
+ // if(JOB->cfg.verbose & 6) out_x(box1);
+ }
+ bc = newbc;
+ }
+ // restore modified boxes
+ box1->x0=bbuf.x0; box1->y0=bbuf.y0;
+ box1->x1=bbuf.x1; box1->y1=bbuf.y1;
+// if (box1->c==UNKNOWN) out_b(box1,&b,0,0,dx,dy,cs); // test
+
+ free(b.p);
+ return bc;
+}
+
+/*
+** creates a list of boxes/frames around objects detected
+** on the pixmap p for further work
+** returns number of boxes created.
+** - by the way: get average X, Y (avX=sumX/numC,..)
+*/
+int scan_boxes( pix *p ){
+ int x, y, nx, cs, rc, ds;
+ struct box *box3;
+
+ if (JOB->cfg.verbose)
+ fprintf(stderr,"# scanning boxes");
+
+ cs = JOB->cfg.cs;
+ JOB->res.sumX = JOB->res.sumY = JOB->res.numC = 0;
+
+ /* clear the lowest bits of each pixel, later used as "scanned"-marker */
+ clr_bits( p, 0, p->x - 1, 0, p->y - 1);
+
+ for (y=0; y < p->y; y++)
+ for (x=0; x < p->x; x++)
+ for (ds=2; ds<7; ds+=4) { // NO - dust of size 1 is not removed !!!
+ nx=x+((ds==2)?-1:+1);
+ if (nx<0 || nx>=p->x) continue; /* out of image, ex: recframe */
+ if ( getpixel(p, x,y)>=cs || getpixel(p,nx,y)< cs) // b/w transition?
+ continue;
+ if ((marked(p, x,y) & 1)&&(marked(p, nx, y) & 1))
+ continue;
+ /* check (and mark) only horizontal b/w transitions */
+ // --- insert new box in list
+ box3 = (struct box *)malloc_box(NULL);
+ box3->x0=box3->x1=box3->x=x;
+ box3->y0=box3->y1=box3->y=y;
+ box3->num_frames=0;
+ box3->dots=0;
+ box3->num_boxes=1;
+ box3->num_subboxes=0;
+ box3->modifier='\0';
+ box3->num=JOB->res.numC;
+ box3->line=0; // not used here
+ box3->m1=0; box3->m2=0; box3->m3=0; box3->m4=0;
+ box3->p=p;
+ box3->num_ac=0; // for future use
+
+/* frame, vectorize and mark only odd/even horizontal b/w transitions
+ * args: box, x,y, cs, mark, diag={0,1}, ds={2,6}
+ * ds - start direction, 6=right of right border, 2=left of left border
+ * ret - 0=ok, -1=already marked, -2=max_num_frames_exceeded
+ * -7=no border in direction ds
+ * ToDo: count errors and print out for debugging
+ */
+ rc=frame_vector(box3, x, y, cs, 1, 1, ds);
+ g_debug(fprintf(stderr,"\n# ... scan xy= %3d %3d rc= %2d", x, y, rc);)
+ if (rc<0) { free_box(box3); continue; }
+ if (box3->num_frames && !box3->num_frame_vectors[0])
+ fprintf(stderr,"\nERROR scan_boxes: no vector in frame (%d,%d)",x,y);
+
+ JOB->res.numC++;
+ JOB->res.sumX += box3->x1 - box3->x0 + 1;
+ JOB->res.sumY += box3->y1 - box3->y0 + 1;
+
+ box3->c=(((box3->y1-box3->y0+1)
+ *(box3->x1-box3->x0+1)>=MaxBox)? PICTURE : UNKNOWN);
+ list_app(&(JOB->res.boxlist), box3); // append to list
+ // ToDo: debug
+ // if (JOB->cfg.verbose && box3->y0==29) out_x(box3);
+ }
+ if(JOB->res.numC){
+ if (JOB->cfg.verbose)
+ fprintf(stderr," nC= %3d avD= %2d %2d\n",JOB->res.numC,
+ (JOB->res.sumX+JOB->res.numC/2)/JOB->res.numC,
+ (JOB->res.sumY+JOB->res.numC/2)/JOB->res.numC);
+ }
+ return JOB->res.numC;
+}
+
+/* compare ints for sorting. Return -1, 0, or 1 according to
+ whether *vr < *vs, vr == *vs, or *vr > *vs */
+int
+intcompare (const void *vr, const void *vs)
+{
+ int *r=(int *)vr;
+ int *s=(int *)vs;
+
+ if (*r < *s) return -1;
+ if (*r > *s) return 1;
+ return 0;
+}
+
+/*
+ * measure_pitch - detect monospaced font and measure the pitch
+ * measure overall pitch for difficult lines,
+ * after that measure pitch per line
+ * dists arrays are limited to 1024 elements to reduce
+ * cpu usage for qsort on images with extreme high number of objects
+ * insert space if dist>=pitch in list_insert_spaces()
+ * ToDo: ???
+ * - min/max distance-matrix a-a,a-b,a-c,a-d ... etc; td,rd > ie,el,es
+ * - OR measuring distance as min. pixel distance instead of box distance
+ * especially useful for italic font!
+ */
+void measure_pitch( job_t *job ){
+ int numdists=0, spc=0, /* number of stored distances */
+ pitch_p=2, pdist, pdists[1024], /* proportional distances */
+ pitch_m=6, mdist, mdists[1024], /* monospaced distances */
+ monospaced=0, l1;
+ struct box *box2, *prev=NULL;
+
+ if(job->cfg.verbose){ fprintf(stderr,"# check for word pitch"); }
+ for (l1=0; l1<job->res.lines.num; l1++)
+ { /* 0 means all lines */
+ if(job->cfg.verbose){ fprintf(stderr,"\n# line %2d",l1); }
+ numdists = 0; /* clear distance lists */
+ for_each_data(&(job->res.boxlist)) {
+ box2 = (struct box *)list_get_current(&(job->res.boxlist));
+ if (l1>0 && box2->line!=l1) continue; /* ignore other lines */
+ /* ignore dots and pictures (min. font is 4x6) */
+ if (box2->y1 - box2->y0 + 1 < 4 || box2->c==PICTURE) { prev=NULL; }
+ if (!prev) { prev=box2; continue; } /* we need a predecessor */
+ /* use center distance for monospaced fonts */
+ mdist = ((box2->x0 + box2->x1) - (prev->x0 + prev->x1) + 1)/2;
+ /* use gap for proportional fonts */
+ pdist = box2->x0 - prev->x1 + 1;
+ /* ToDo: better take 3 instead of 2 neighbours?, smallest font 4x6 */
+ /* fonts are expected to be 6 to 60 pixels high, which is about
+ 4 to 50 pixels wide. We allow some extra margin. */
+ if (3 < mdist && mdist < 150) { /* better mdist < 3*Xaverage ? */
+ /* two options for overflow: 1) ignore, 2) store randomly */
+ if (numdists<1024) { /* we do ignore here */
+ mdists[numdists] = mdist;
+ pdists[numdists] = pdist;
+ numdists++;
+ }
+ }
+ prev = box2;
+ } end_for_each(&(job->res.boxlist));
+
+ if(job->cfg.verbose){ fprintf(stderr," num_gaps= %2d",numdists); }
+ if( numdists<8 ){
+ if (job->cfg.verbose && l1==0) /* only for all lines */
+ fprintf(stderr," (WARNING num_gaps<8)");
+ }
+ if (numdists>0) {
+ int i,diff,ni_min,max,best_m,best_p,ni; double v;
+ /* aware: takes long time for big data sets */
+ /* dilute? (german: ausduennen?) */
+ qsort (mdists, numdists, sizeof (int), intcompare);
+ qsort (pdists, numdists, sizeof (int), intcompare);
+ /* the new method, div0? */
+ v = (mdists[numdists*7/10]-mdists[numdists/5])
+ /(double)mdists[numdists/5];
+ /* measurements showed v=.09 for Courier and .44 for Times-Roman */
+ if (l1==0) monospaced = (v < .22);
+ best_m= numdists/5;
+ best_p=4*numdists/5;
+ /* try to find better pitch for monospaced font (ok for prop) */
+ for (i=numdists/5+1;i<numdists;i++) {
+ if (2*mdists[i]>=3*mdists[best_m]) { best_m=i-1; break; }
+ }
+ /* try to find better pitch for proportional font */
+ // the largest diff could be the best, if diff is always 1,
+ // take the diff with the lowest weight
+ for (ni=ni_min=1024,max=0,i=numdists/2+1;i<numdists-numdists/16;i++) {
+ diff=pdists[i]-pdists[i-1];
+ if (diff>max) {
+ max=diff; best_p=i-1;
+ if ((job->cfg.verbose&(32+16))==48)
+ fprintf(stderr," best_p=%d maxdiff=%d\n# ...", pdists[best_p], max);
+ if (max>3 && 3*pdists[i]>=4*pdists[i-1]) { break; }
+ }
+ if (diff) {
+ if (ni<ni_min) {
+ // do not try to divide one word per line
+ ni_min=ni; if (max<=1 && numdists>16) best_p=i-1;
+ if ((job->cfg.verbose&(32+16))==48)
+ fprintf(stderr," best_p=%d ni_min=%d\n# ...", pdists[best_p], ni_min);
+ }
+ ni=1;
+ } else ni++;
+ }
+ if (numdists<16 && max<=1 && ni_min>1) best_p=numdists-1; // one word
+#if 1 /* debugging */
+ if ((job->cfg.verbose&(32+16))==48) {
+ fprintf(stderr,"\n# ...");
+ for (i=0;i<numdists;i++) fprintf(stderr," %2d",mdists[i]);
+ fprintf(stderr," <- mdist[%d]\n# ...",l1);
+ for (i=0;i<numdists;i++) fprintf(stderr," %2d",pdists[i]);
+ fprintf(stderr," <- pdist[%d]\n# ...",l1);
+ fprintf(stderr," maxdiff=%d min_samediffs=%d\n# ...",max,ni_min);
+ }
+#endif
+ /* we measure spaces in two different ways (mono, prop) */
+ /* prop: gap between boxes, mono: distance of middle */
+ if (best_p<numdists-1) pitch_p = ((pdists[best_p]+pdists[best_p+1])/2+1);
+ else pitch_p = (pdists[best_p]+1 );
+ pitch_m = (mdists[best_m]*4/3);
+ if (numdists)
+ if ( pdists[numdists-1]*2 <= pdists[0]*3
+ || pdists[numdists-1] <= pdists[0]+3) {
+ /* line is just a single word */
+ pitch_p = pdists[numdists-1]+10;
+ }
+ if (l1>0 && job->cfg.spc==0) {
+ job->res.lines.pitch[l1]=(monospaced?pitch_m:pitch_p);
+ job->res.lines.mono[l1]=monospaced;
+ }
+ if (job->cfg.verbose) {
+ fprintf(stderr,"\n# ..."
+ " mono: v=%f (v<0.22) line=%d numdists=%d\n# ...",
+ v, l1, numdists);
+ fprintf(stderr," mono: min=%3d max=%3d pitch=%3d @ %2d%%\n# ...",
+ mdists[0],mdists[numdists-1],pitch_m,best_m*100/numdists);
+ fprintf(stderr," prop: min=%3d max=%3d pitch=%3d @ %2d%%\n# ...",
+ pdists[0],pdists[numdists-1],pitch_p,best_p*100/numdists);
+ fprintf(stderr," result: distance >= %d considered space\n# ...",
+ job->res.lines.pitch[l1]);
+ }
+ } /* if (not) enough spaces */
+ if (l1==0) { /* set default spaces to each line */
+ int l2;
+ spc = job->cfg.spc;
+ if (spc==0) /* set only if not set by option */
+ spc = ((monospaced)?pitch_m:pitch_p);
+ for (l2=0; l2<job->res.lines.num; l2++ )
+ job->res.lines.pitch[l2]=spc;
+ }
+ } /* each line */
+ if (job->cfg.spc==0)
+ job->cfg.spc = spc;
+ if (job->cfg.verbose)
+ fprintf(stderr," overall space width is %d %s\n",
+ spc, ((monospaced)?"monospaced":"proportional"));
+
+
+}
+
+/* ---- count subboxes (white holes within black area) --------
+ * new: count boxes lying inside another box (usually holes, ex: "aeobdg")
+ * needed for glue_boxes, dont glue textboxes, tables and other complex
+ * objects
+ * ToDo: count only frames of invers spin? do we need sorted list here? -> no
+ */
+int count_subboxes( pix *pp ){
+ int ii=0, num_mini=0, num_same=0, cnt=0;
+ struct box *box2,*box4;
+ progress_counter_t *pc = NULL;
+ if (JOB->cfg.verbose) { fprintf(stderr,"# count subboxes\n# ..."); }
+
+ pc = open_progress(JOB->res.boxlist.n,"count_subboxes");
+ for_each_data(&(JOB->res.boxlist)) {
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ box2->num_subboxes=0;
+ progress(cnt++,pc);
+ if ( (box2->x1 - box2->x0)<2
+ || (box2->y1 - box2->y0)<2) continue; /* speedup for dotted bg */
+ // holes inside box2 char, aoebdqg, 0.41
+ for_each_data(&(JOB->res.boxlist)) {
+ box4=(struct box *)list_get_current(&(JOB->res.boxlist));
+ if (box4->y0 > box2->y1) break; // faster, but boxes need to be sorted
+ // ToDo: better use binary tree (above/below x) to find near boxes?
+ if (box4==box2) continue;
+ if( box4->x0==box2->x0 && box4->x1==box2->x1
+ && box4->y0==box2->y0 && box4->y1==box2->y1)
+ num_same++; /* erroneous!? */
+ if ( box4->x0 >= box2->x0 && box4->x1 <= box2->x1
+ && box4->y0 >= box2->y0 && box4->y1 <= box2->y1
+ && box4->num_subboxes==0 ) /* box4 inside box2? */
+ {
+ box2->num_subboxes++; ii++;
+ if ((box4->x1 - box4->x0 + 1)
+ *(box4->y1 - box4->y0 + 1)<17) num_mini++;
+ }
+ } end_for_each(&(JOB->res.boxlist));
+#if 0
+ if (cnt < 1000 && JOB->cfg.verbose)
+ fprintf(stderr," %4d box %4d %4d %+3d %+3d subboxes %4d\n# ...",
+ cnt, box2->x0, box2->y0, box2->x1-box2->x0,
+ box2->y1-box2->y0, box2->num_subboxes);
+#endif
+ } end_for_each(&(JOB->res.boxlist));
+ close_progress(pc);
+ if (JOB->cfg.verbose)
+ fprintf(stderr," %3d subboxes counted (mini=%d, same=%d) nC= %d\n",
+ ii, num_mini, num_same/2 /* counted twice */, cnt);
+ return 0;
+}
+
+/* ---- glue holes tochars( before step1 ) v0.42 -----------------------
+ glue boxes lying inside another box (usually holes, ex: "aeobdg46890")
+ Dont add dust to a char!
+ lines are not detected yet
+*/
+int glue_holes_inside_chars( pix *pp ){
+ int ii, cs, x0, y0, x1, y1, cnt=0,
+ glued_same=0, glued_holes=0;
+ struct box *box2, *box4;
+ progress_counter_t *pc = NULL;
+ cs=JOB->cfg.cs;
+ {
+ count_subboxes( pp ); /* move to pgm2asc() later */
+
+ pc = open_progress(JOB->res.boxlist.n,"glue_holes_inside_chars");
+ if (JOB->cfg.verbose)
+ fprintf(stderr,"# glue holes to chars nC= %d\n# ...",JOB->res.numC);
+ ii=0;
+ for_each_data(&(JOB->res.boxlist)) {
+ // get the smaller box which may be extended by bigger boxes around it
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ x0 = box2->x0; x1 = box2->x1;
+ y0 = box2->y0; y1 = box2->y1;
+
+ progress(cnt++,pc);
+
+ // would it better than moving vectors to build a sub-box-tree?
+
+ // do not remove chars inside pictures (car plates on photos)
+ if( box2->c == PICTURE || box2->num_subboxes > 7) continue;
+
+ // holes inside char, aoebdqg, 0.41
+ // dont merge boxes which have subboxes by itself!
+ // search boxes inside box2
+ // if (x1-x0+1>2 || y1-y0+1>2) /* skip tiny boxes, bad for 4x6 */
+ for_each_data(&(JOB->res.boxlist)) {
+ box4=(struct box *)list_get_current(&(JOB->res.boxlist));
+ if(box4!=box2 && box4->c != PICTURE )
+ {
+ // ToDo: dont glue, if size differs by big factors (>16?)
+ if ( ( box4->x0==x0 && box4->x1==x1
+ && box4->y0==y0 && box4->y1==y1 ) /* do not happen !? */
+ || ( box4->x0>=x0 && box4->x1<=x1
+ && box4->y0>=y0 && box4->y1<=y1
+ && box4->num_subboxes==0 ) ) /* no or very small subboxes? */
+ { // fkt melt(box2,box4)
+ // same box, if very small but hollow char (4x5 o)
+ if( box4->x0==x0 && box4->x1==x1
+ && box4->y0==y0 && box4->y1==y1) glued_same++; else glued_holes++;
+ // fprintf(stderr,"\n# DEBUG merge:");
+ // out_x(box2); // small
+ // out_x(box4); // big
+ if ((JOB->cfg.verbose & 7)==7) // LEV3
+ fprintf(stderr," glue hole (%4d %4d %+3d %+3d %+4d)"
+ " (%4d %4d %+3d %+3d %+4d) %d\n# ...",
+ x0, y0, x1-x0+1, y1-y0+1, box2->frame_vol[0],
+ box4->x0, box4->y0,
+ box4->x1-box4->x0+1, box4->y1-box4->y0+1,
+ box4->frame_vol[0], glued_same);
+ if ((box4->x1-box4->x0+1)< 8*(x1-x0+1)
+ || (box4->y1-box4->y0+1)<12*(y1-y0+1)) // skip dust
+ merge_boxes( box2, box4 ); // add box4 to box2
+ // out_x(box2);
+ x0 = box2->x0; x1 = box2->x1;
+ y0 = box2->y0; y1 = box2->y1;
+ JOB->res.numC--; // dont count fragments as chars
+ ii++; // count removed
+ list_del(&(JOB->res.boxlist), box4); // remove box4
+ free_box(box4);
+ // now search another hole inside box2
+ }
+ }
+ } end_for_each(&(JOB->res.boxlist));
+
+ } end_for_each(&(JOB->res.boxlist));
+
+ if (JOB->cfg.verbose)
+ fprintf(stderr," glued: %3d holes, %3d same, nC= %d\n",
+ glued_holes, glued_same, JOB->res.numC);
+ close_progress(pc);
+ }
+ return 0;
+}
+
+
+/* ---- glue broken chars ( before step1 ??? ) -----------------------
+ use this carefully, do not destroy previous detection ~fi, broken K=k' g
+ glue if boxes are near or diagonally connected
+ other strategy: mark boxes for deleting and delete in extra loop at end
+ faster: check only next two following boxes because list is sorted!
+ ToDo: store m4 of upper line to m4_of_prev_line, and check that "-points are below
+ done: glue boxes lying inside another box (usually holes, ex: "aeobdg")
+ Dont add dust to a char!
+ lines should be detected already (Test it for m1-m4 unknown)
+ ToDo: divide in glue_idots, glue_thin_chars etc. and optimize it
+*/
+int glue_broken_chars( pix *pp ){
+ int ii, y, cs, x0, y0, x1, y1, cnt=0,
+ num_frags=0, glued_frags=0, glued_hor=0;
+ struct box *box2, *box4;
+ progress_counter_t *pc = NULL;
+ cs=JOB->cfg.cs;
+ {
+ count_subboxes( pp ); /* move to pgm2asc() later */
+
+ pc = open_progress(JOB->res.boxlist.n,"glue_broken_chars");
+ if (JOB->cfg.verbose)
+ fprintf(stderr,"# glue broken chars nC= %d\n# ...",JOB->res.numC);
+ ii=0;
+ for_each_data(&(JOB->res.boxlist)) {
+ // get the box which may be extended by boxes around it
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ x0 = box2->x0; x1 = box2->x1;
+ y0 = box2->y0; y1 = box2->y1;
+
+ progress(cnt++,pc);
+
+ // vertical broken (g965T umlauts etc.)
+ // not: f,
+
+ // would it better than moving vectors to build a sub-box-tree?
+
+ // do not remove chars inside pictures (car plates on photos)
+ if( box2->c == PICTURE || box2->num_subboxes > 7) continue;
+
+ /* continue loop if box is below or above line */
+ if( box2->m4>0 && y0>box2->m4 ) continue; /* dust outside ? */
+ if( box2->m1>0 && y0<box2->m1-(box2->m3-box2->m2) ) continue;
+ /* ToDo:
+ * - check that y0 is greater as m3 of the char/line above
+ */
+
+ // check small boxes (box2) whether they belong
+ // to near same size or bigger boxes (box4)
+ if( 2*(y1-y0) < box2->m4 - box2->m1 // care for dots etc.
+ && ( 2*y1<=(box2->m3+box2->m2) // upper fragments
+ || 2*y0>=(box2->m3+box2->m2)) ) { // lower fragments
+ struct box *box5=NULL, *box6=NULL; // nearest and next nearest box
+ box4=NULL;
+ num_frags++; /* count for debugging */
+ // get the [2nd] next x-nearest box in the same line
+ for_each_data(&(JOB->res.boxlist)) {
+ box4=(struct box *)list_get_current(&(JOB->res.boxlist));
+ if (box4 == box2 || box4->c == PICTURE) continue;
+ /* 0.42 speed up for backround pixel pattern, box4 to small */
+ if ( box4->x1 - box4->x0 + 1 < x1-x0+1
+ && box4->y1 - box4->y0 + 1 < y1-y0+1 ) continue;
+ // have in mind that line number may be wrong for dust
+ if (box4->line>=0 && box2->line>=0 && box4->line==box2->line)
+ {
+ if (!box5) box5=box4;
+ if ( abs(box4->x0 + box4->x1 - 2*box2->x0)
+ <abs(box5->x0 + box5->x1 - 2*box2->x0))
+ { box6=box5; box5=box4; }
+ }
+ } end_for_each(&(JOB->res.boxlist));
+ box4=box5; // next nearest box within the same line
+ if (box4) {
+#if 0 /* set this to 1 for debugging of melting bugs */
+ if (JOB->cfg.verbose & 7) {
+ fprintf(stderr,"\n# next two boxes are candidates for melting ");
+ out_x(box2);
+ out_x(box4); }
+#endif
+ if( /* umlaut "a "o "u, ij; box2 is the small dot, box4 the body */
+ ( y1 <= box2->m2
+ && box4->y1 >= box2->m2 // dont melt dots together
+ && 2* y1 < box4->y1 + box4->y0 // box2 above box4
+ && box4->x1+JOB->res.avX/2>=x0
+ && box4->x0-JOB->res.avX/2<=x1
+ && (y1 < box4->y0 || x0 < box4->x1) // dont melt "d'"
+ && 3* ( y1 - box4->y0)
+ <= 2* (box4->y1 - box4->y0) // too far away? dust!
+ && 8* ( x1 - x0 + 1)
+ >= (box4->x1 - box4->x0 + 1) // dot must have minimum size
+ && 10* ( y1 - y0 + 1)
+ >= (box4->y1 - box4->y0 + 1) // dot must have minimum size
+ ) || ( 0 && /* broken T */
+ 3*(box2->x1 - box2->x0) > 2*JOB->res.avX
+ && 4*box4->x0>3*box2->x0+box2->x1
+ && 4*box4->x1<box2->x0+3*box2->x1
+ )
+ || /* !?; box2 is the dot, box4 the body */
+ ( 2*box4->x1>=x0+x1 /* test if box4 is around box2 */
+ && 2*box4->x0<=2*x1 /* +x0+1 Jan00 */
+ && ( x1-x0 <= box4->x1-box4->x0+2 )
+ && 2*y0>=box2->m2+box2->m3
+ && 4*y1>=box2->m2+3*box2->m3
+ && 4*(y1-y0)<box2->m4-box2->m1
+ && (8*box4->y1 < box4->m2+7*box4->m3
+ || box4->m4-box4->m1<16) /* Jan00 */
+ )
+ || /* =;: box2 is the upper box, box4 the lower box */
+ ( 2*box4->x1>=x0+x1 /* test if box4 is around box2 */
+ && 2*box4->x0<=2*x1 /* +x0+1 */
+ && ( x1-x0 <= box4->x1-box4->x0+4 )
+ && ( 4*x0 <= 3*box4->x1+box4->x0 )
+ && (( box2->m2 && box4->m2
+ && y1< box2->m3
+ && 2*box4->y1 > box4->m3+box4->m2 // can be bigger than m3
+ && 4*box4->y0 >= 3*box4->m2+box4->m3
+ && 2*box2->y0 < box2->m3+box2->m2
+ )
+ || ( (!box2->m2) || (!box4->m2) )
+ )
+ )
+ )
+ { // fkt melt(box2,box4)
+ if (JOB->cfg.verbose & 7)
+ fprintf(stderr," glue objects (%3d %3d %+3d %+3d)"
+ " (%3d %3d %+3d %+3d)\n# ...",
+ x0, y0, x1-x0+1, y1-y0+1, box4->x0, box4->y0,
+ box4->x1-box4->x0+1, box4->y1-box4->y0+1);
+ // fprintf(stderr,"\n# DEBUG merge:"); // d=7x34 @ (109,51) ???
+ // out_x(box2);
+ // out_x(box4);
+ merge_boxes( box2, box4 ); // add box4 to box2
+ x0 = box2->x0; x1 = box2->x1;
+ y0 = box2->y0; y1 = box2->y1;
+ // if (JOB->cfg.verbose & 4) out_x(box2);
+ // JOB->res.numC--; // dont count fragments as chars
+ ii++; glued_frags++; // remove
+ // output_list(JOB);
+ list_del(&(JOB->res.boxlist), box4); /* ret&1: error-message ??? */
+ // output_list(JOB);
+ free_box(box4);
+ }
+ }
+ }
+// continue;
+
+ // horizontally broken w' K'
+ if( 2*y1 < (box2->m3+box2->m2) )
+ if( 2*(y1-y0) < (box2->m3+box2->m2) ) // fragment
+ for_each_data(&(JOB->res.boxlist)) {
+ box4=(struct box *)list_get_current(&(JOB->res.boxlist));
+ if(box4!=box2 && box4->c != PICTURE )
+ {
+ if( box4->line>=0 && box4->line==box2->line
+ && box4->x1>=x0-1 && box4->x1<x0 // do not glue 6-
+ && box4->x0+3*box4->x1<4*x0)
+ if( get_bw(x0 ,x0 ,y1,y1 ,pp,cs,1) == 1)
+ if( get_bw(x0-2,x0-1,y1,y1+2,pp,cs,1) == 1)
+ { // fkt melt(box2,box4)
+ put(pp,x0,y1+1,~(128+64),0);
+ merge_boxes( box2, box4 );
+ x0 = box2->x0; x1 = box2->x1;
+ y0 = box2->y0; y1 = box2->y1;
+ JOB->res.numC--; ii++; // remove
+ glued_hor++;
+ list_del(&(JOB->res.boxlist), box4);
+ free_box(box4);
+ }
+ }
+ } end_for_each(&(JOB->res.boxlist));
+
+ // horizontally broken n h (h=l_) v0.2.5 Jun00
+ if( abs(box2->m2-y0)<=(y1-y0)/8 )
+ if( abs(box2->m3-y1)<=(y1-y0)/8 )
+ if( num_cross(x0, x1,(y0+ y1)/2,(y0+ y1)/2,pp,cs) == 1)
+ if( num_cross(x0, x1,(y0+3*y1)/4,(y0+3*y1)/4,pp,cs) == 1)
+ if( get_bw((3*x0+x1)/4,(3*x0+x1)/4,(3*y0+y1)/4,y1,pp,cs,1) == 0)
+ if( get_bw(x0,(3*x0+x1)/4,(3*y0+y1)/4,(y0+3*y1)/4,pp,cs,1) == 0)
+ if( get_bw(x0, x0, y0,(3*y0+y1)/4,pp,cs,1) == 1)
+ for_each_data(&(JOB->res.boxlist)) {
+ box4=(struct box *)list_get_current(&(JOB->res.boxlist));
+ if(box4!=box2 && box4->c != PICTURE )
+ {
+ if( box4->line>=0 && box4->line==box2->line
+ && box4->x1>x0-3 && box4->x1-2<x0
+ && abs(box4->y1-box2->m3)<2)
+ { // fkt melt(box2,box4)
+ y=loop(pp,x0,y0,y1-y0,cs,0,DO);if(2*y>y1-y0) continue;
+ put(pp,x0-1,y0+y ,~(128+64),0);
+ put(pp,x0-1,y0+y+1,~(128+64),0);
+ merge_boxes( box2, box4 ); // add box4 to box2
+ x0 = box2->x0; x1 = box2->x1;
+ y0 = box2->y0; y1 = box2->y1;
+ JOB->res.numC--; ii++; // remove
+ glued_hor++;
+ list_del(&(JOB->res.boxlist), box4);
+ free_box(box4);
+ }
+ }
+ } end_for_each(&(JOB->res.boxlist));
+ } end_for_each(&(JOB->res.boxlist));
+ if (JOB->cfg.verbose)
+ fprintf(stderr," glued: %3d fragments (found %3d), %3d rest, nC= %d\n",
+ glued_frags, num_frags, glued_hor, JOB->res.numC);
+ close_progress(pc);
+ }
+ return 0;
+}
+
+/*
+** this is a simple way to improve results on noisy images:
+** - find similar chars (build cluster of same chars)
+** - analyze clusters (could be used for generating unknown font-base)
+** - the quality of the result depends mainly on the distance function
+*/
+ // ---- analyse boxes, compare chars, compress picture ------------
+ // ToDo: - error-correction only on large chars!
+int find_same_chars( pix *pp){
+ int i,k,d,cs,dist,n1,dx; struct box *box2,*box3,*box4,*box5;
+ pix p=(*pp);
+ cs=JOB->cfg.cs;
+ {
+ if(JOB->cfg.verbose)fprintf(stderr,"# packing");
+ i = list_total(&(JOB->res.boxlist));
+ for_each_data(&(JOB->res.boxlist)) {
+ box4 = box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ dist=1000; // 100% maximum
+ dx = box2->x1 - box2->x0 + 1;
+
+ if(JOB->cfg.verbose)fprintf(stderr,"\r# packing %5d",i);
+ if( dx>3 )
+ for(box3=(struct box *)list_next(&(JOB->res.boxlist),box2);box3;
+ box3=(struct box *)list_next(&(JOB->res.boxlist),box3)) {
+ if(box2->num!=box3->num){
+ int d=distance(&p,box2,&p,box3,cs);
+ if ( d<dist ) { dist=d; box4=box3; } // best fit
+ if ( d<5 ){ // good limit = 5% ???
+ i--;n1=box3->num; // set all num==box2.num to box2.num
+ for_each_data(&(JOB->res.boxlist)) {
+ box5=(struct box *)(struct box *)list_get_current(&(JOB->res.boxlist));
+ if(box5!=box2)
+ if( box5->num==n1 ) box5->num=box2->num;
+ } end_for_each(&(JOB->res.boxlist));
+ // out_x2(box2,box5);
+ // fprintf(stderr," dist=%d\n",d);
+ }
+ }
+ }
+ // nearest dist to box2 has box4
+ // out_b2(box2,box4);
+ // fprintf(stderr," dist=%d\n",dist);
+ } end_for_each(&(JOB->res.boxlist));
+ k=0;
+ if(JOB->cfg.verbose)fprintf(stderr," %d different chars",i);
+ for_each_data(&(JOB->res.boxlist)) {
+ struct box *box3,*box4;
+ int j,dist;
+ box2=(struct box *)list_get_current(&(JOB->res.boxlist));
+ for(box3=(struct box *)list_get_header(&(JOB->res.boxlist));
+ box3!=box2 && box3!=NULL;
+ box3=(struct box *)list_next(&(JOB->res.boxlist), box3))
+ if(box3->num==box2->num)break;
+ if(box3!=box2 && box3!=NULL)continue;
+ i++;
+ // count number of same chars
+ dist=0;box4=box2;
+
+ for(box3=box2,j=0;box3;
+ box3=(struct box *)list_next(&(JOB->res.boxlist), box3)) {
+ if(box3->num==box2->num){
+ j++;
+ d=distance(&p,box2,&p,box3,cs);
+ if ( d>dist ) { dist=d; box4=box3; } // worst fit
+ }
+ }
+ if(JOB->cfg.verbose&8){
+ fprintf(stderr," no %d char %4d %5d times maxdist=%d\n",i,box2->num,j,dist);
+ }
+ // calculate mean-char (error-correction)
+ // ToDo: calculate maxdist in group
+ k+=j;
+ // if(j>1)
+ // out_b(box1,NULL,0,0,0,0,cs);
+ if(JOB->cfg.verbose&8)
+ fprintf(stderr," no %d char %4d %5d times sum=%d\n",i,box2->num,j,k);
+ } end_for_each(&(JOB->res.boxlist));
+ if(JOB->cfg.verbose)fprintf(stderr," ok\n");
+ }
+ return 0;
+}
+
+/*
+** call the first engine for all boxes and set box->c=result;
+**
+*/
+int char_recognition( pix *pp, int mo){
+ int i,ii,ni,cs,x0,y0,x1,y1;
+ struct box *box2;
+ progress_counter_t *pc;
+ wchar_t cc;
+ cs=JOB->cfg.cs;
+ // ---- analyse boxes, find chars ---------------------------------
+ if (JOB->cfg.verbose)
+ fprintf(stderr,"# char recognition");
+ i=ii=ni=0;
+ for_each_data(&(JOB->res.boxlist)) { /* count boxes */
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ /* wew: isn't this just JOB->res.numC? */
+ /* js: The program is very complex. I am not sure anymore
+ wether numC is the number of boxes or the number of valid
+ characters.
+ Because its not time consuming I count the boxes here. */
+ if (box2->c==UNKNOWN) i++;
+ if (box2->c==PICTURE) ii++;
+ ni++;
+ } end_for_each(&(JOB->res.boxlist));
+ if(JOB->cfg.verbose)
+ fprintf(stderr," unknown= %d picts= %d boxes= %d\n# ",i,ii,ni);
+ if (!ni) return 0;
+ i=ii=0;
+ pc = open_progress(ni,"char_recognition");
+ for_each_data(&(JOB->res.boxlist)) {
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ x0=box2->x0;x1=box2->x1;
+ y0=box2->y0;y1=box2->y1; // box
+ cc=box2->c;
+ if (cc==PICTURE) continue;
+
+ if ((mo&256)==0) { /* this case should be default (main engine) */
+ if(cc==UNKNOWN || box2->num_ac==0 || box2->wac[0]<JOB->cfg.certainty)
+ cc=whatletter(box2,cs ,0);
+ }
+
+ if(mo&2)
+ if(cc==UNKNOWN || box2->num_ac==0 || box2->wac[0]<JOB->cfg.certainty)
+ cc=ocr_db(box2);
+
+
+ // box2->c=cc; bad idea (May03 removed)
+ // set(box2,cc,95); ToDo: is that better?
+
+ if(cc==UNKNOWN)
+ i++;
+ ii++;
+
+ if(JOB->cfg.verbose&8) {
+ fprintf(stderr,"\n# code= %04lx %c",(long)cc,(char)((cc<255)?cc:'_'));
+ //out_b(box2,pp,x0,y0,x1-x0+1,y1-y0+1,cs);
+ }
+ progress(ii,pc); /* ii = 0..ni */
+
+ } end_for_each(&(JOB->res.boxlist));
+ close_progress(pc);
+ if(JOB->cfg.verbose)fprintf(stderr," %d of %d chars unidentified\n",i,ii);
+ return 0;
+}
+
+
+/*
+** compare unknown with known chars,
+** very similar to the find_similar_char_function but here only to
+** improve the result
+*/
+int compare_unknown_with_known_chars(pix * pp, int mo) {
+ int i, cs = JOB->cfg.cs, dist, d, ad, wac, ni, ii;
+ struct box *box2, *box3, *box4;
+ progress_counter_t *pc=NULL;
+ wchar_t bc;
+ i = ii = 0; // ---- -------------------------------
+ if (JOB->cfg.verbose)
+ fprintf(stderr, "# try to compare unknown with known chars !(mode&8)");
+ if (!(mo & 8))
+ {
+ ii=ni=0;
+ for_each_data(&(JOB->res.boxlist)) { ni++; } end_for_each(&(JOB->res.boxlist));
+ pc = open_progress(ni,"compare_chars");
+ for_each_data(&(JOB->res.boxlist)) {
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist)); ii++;
+ if (box2->c == UNKNOWN || (box2->num_ac>0 && box2->wac[0]<97))
+ if (box2->y1 - box2->y0 > 4 && box2->x1 - box2->x0 > 1) { // no dots!
+ box4 = (struct box *)list_get_header(&(JOB->res.boxlist));;
+ dist = 1000; /* 100% maximum */
+ bc = UNKNOWN; /* best fit char */
+ for_each_data(&(JOB->res.boxlist)) {
+ box3 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ wac=((box3->num_ac>0)?box3->wac[0]:100);
+ if (box3 == box2 || box3->c == UNKNOWN
+ || wac<JOB->cfg.certainty) continue;
+ if (box2->y1 - box2->y0 < 5 || box2->x1 - box2->x0 < 3) continue;
+ d = distance(pp, box2, pp, box3, cs);
+ if (d < dist) {
+ dist = d; bc = box3->c; box4 = box3;
+ }
+ } end_for_each(&(JOB->res.boxlist));
+ if (dist < 10) {
+ /* sureness can be maximal of box3 */
+ if (box4->num_ac>0) ad = box4->wac[0];
+ else ad = 97;
+ ad-=dist; if(ad<1) ad=1;
+ /* ToDo: ad should depend on ad of bestfit */
+ setac(box2,(wchar_t)bc,ad);
+ i++;
+ } // limit as option???
+ // => better max distance('e','e') ???
+ if (dist < 50 && (JOB->cfg.verbose & 7)) { // only for debugging
+ fprintf(stderr,"\n# L%02d best fit was %04x=%c dist=%3d%% i=%d",
+ box2->line, (int)bc, (char)((bc<128)?bc:'_'), dist, i);
+ if(box4->num_ac>0)fprintf(stderr," w= %3d%%",box4->wac[0]);
+ }
+ progress(ii,pc);
+ }
+ } end_for_each(&(JOB->res.boxlist));
+ close_progress(pc);
+ }
+ if (JOB->cfg.verbose)
+ fprintf(stderr, " - found %d (nC=%d)\n", i, ii);
+ return 0;
+}
+
+/*
+// ---- divide overlapping chars which !strchr("_,.:;",c);
+// block-splitting (two ore three glued chars)
+// division if dots>0 does not work properly! ???
+//
+// what about glued "be"?
+// what about recursive division?
+// ToDo: mark divided boxes to give the engine a chance to
+// handle wrong divisions
+*/
+int try_to_divide_boxes( pix *pp, int mo){
+ struct box *box2, boxa, boxb;
+ int cs=JOB->cfg.cs, ad=100,
+ a2[8], ar, // certainty of each part, ar = product of all certainties
+ cbest; // best certainty, skip search of certainty<cbest-1 for speed
+ wchar_t ci[8], // split max. 8 chars
+ s1[]={ UNKNOWN, '_', '.', ',', '\'', '!', ';', '?', ':', '-',
+ '=', '(', ')', '/', '\\', '\0' }; // not accepted chars, \0-terminated!
+ int x0, x1, y0, y1,
+ xi[8+1]; // cutting positions
+ int i, ii, n1, dy, dx;
+ // pix p=(*pp); // remove!
+ if (JOB->cfg.verbose)
+ fprintf(stderr,"# try to divide unknown chars !(mode&16)");
+ if(!(mo&16)) // put this to the caller
+ for_each_data(&(JOB->res.boxlist)) {
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ // don't try to split simple structures (ex: 400x30 square)
+ if ((!box2->num_frames)
+ || box2->num_frame_vectors[ box2->num_frames-1 ]<9) continue;
+ if((box2->c==UNKNOWN || (box2->num_ac && box2->wac[0]<JOB->cfg.certainty))
+ && box2->x1-box2->x0>5 && box2->y1-box2->y0>4){
+ x0=box2->x0; x1=box2->x1;
+ y0=box2->y0; y1=box2->y1;
+ ad=100;
+ cbest=0;
+
+ /* get minimum vertical lines */
+ n1 = num_cross(x0,x1,( y1+y0)/2,( y1+y0)/2,pp,cs);
+ ii = num_cross(x0,x1,(3*y1+y0)/4,(3*y1+y0)/4,pp,cs); if (ii<n1) n1=ii;
+ if (box2->m2 && box2->m3 > box2->m2+2)
+ for (i=box2->m2+1;i<=box2->m3-1;i++) {
+ if (loop(pp,x0+1,i,x1-x0,cs,1,RI) > (x1-x0-2)) continue; // ll
+ ii = num_cross(x0,x1,i,i,pp,cs); if (ii<n1) n1=ii;
+ } if (n1<2) continue; // seems to make no sense to divide
+ if (n1<4) ad=99*ad/100; // not to strong because m2+m3 could be wrong
+ if (n1<3) ad=99*ad/100;
+
+ if( 2*y1 < box2->m3+box2->m4 /* baseline char ? */
+ && num_cross(x0,x1,y1-1,y1-1,pp,cs)==1 // -1 for slopes
+ && num_cross((x0+2*x1)/3,(x0+3*x1)/4,y0,y1,pp,cs)<3 // not exclude tz
+ && num_cross((3*x0+x1)/4,(2*x0+x1)/3,y0,y1,pp,cs)<3 // not exclude zl
+ && loop(pp,x0,y1-(y1-y0)/32,x1-x0,cs,0,RI)
+ +loop(pp,x1,y1-(y1-y0)/32,x1-x0,cs,0,LE) > (x1-x0+1)/2
+ ) continue; /* do not try on bvdo"o etc. */
+
+ // one vertical line can not be two glued chars, lc?
+ if ( num_cross(x0,x1,(y1+y0)/2,(y1+y0)/2,pp,cs)<=1 ) continue;
+ { // doublet = 2 letters
+ // char buf[4]="\0\0\0"; // 4th byte is string end == \0
+ // buf[0]=c1; // c1 is wchar_t! (0xbf00 to 0) failes
+ // buf[1]=c2;
+ char buf[64]=""; // end == \0
+ if (JOB->cfg.verbose&2){
+ fprintf(stderr, "\n#\n# divide box: %4d %4d %3d %3d\n",
+ x0, y0, x1-x0+1, y1-y0+1);
+ }
+ // it would be better if testing is only if most right and left char
+ // is has no horizontal gap (below m2) ex: be
+ i=0; // num splittet chars
+ xi[0]=x0; xi[1]=x0+3; xi[2]=x1;
+ for ( ; ; xi[i+1]++) { // x[i] .. x[i+1], slower? but better v0.42
+ /* break if x is to near to the right border */
+ if (xi[i+1]>x1-3) { if (i==0) break; i--; xi[i+2]=x1; continue; }
+ // ToDo: skip if not a local dy-min for speedup
+ { int ymin=y1, ymax=y0, bow=0, // min max at cutting point
+ max0=y0, max1=y0, // max y on left and right side
+ min0=y1, min1=y1; // min y on left and right side
+ for (dy=0,ii=0;ii<box2->num_frame_vectors[ 0 ];ii++) {
+ int pre=ii-1, next=(ii+1)%box2->num_frame_vectors[ 0 ];
+ if (pre<0) pre=box2->num_frame_vectors[ 0 ]-1;
+ // check if vector is inside box to cut
+ if ( box2->frame_vector[ii ][0]<=xi[i ]) continue;
+ if ( box2->frame_vector[ii ][0]> xi[i+2]) continue;
+ // 2nd derivation of y(x)
+ if (abs(box2->frame_vector[ii ][0]-xi[i+1])<2) {
+ dy= 2*box2->frame_vector[ii ][1]
+ -box2->frame_vector[next][1]
+ -box2->frame_vector[pre ][1];
+ dx= box2->frame_vector[next][0]
+ -box2->frame_vector[pre ][0];
+ // rotate 180 degree if dx<0
+ if (((dx>0)?dy:-dy)<-abs(dx)/2) { bow=1; }
+ }
+ // its not the best if we think on glued fi fo etc.
+ if (( box2->frame_vector[pre ][0]<=xi[i+1]
+ && box2->frame_vector[next][0]>=xi[i+1])
+ || ( box2->frame_vector[pre ][0]>=xi[i+1]
+ && box2->frame_vector[next][0]<=xi[i+1])) {
+ if ( box2->frame_vector[ii ][1]>ymax)
+ ymax= box2->frame_vector[ii ][1];
+ if ( box2->frame_vector[ii ][1]<ymin)
+ ymin= box2->frame_vector[ii ][1];
+ }
+ // min and max of left and right side
+ if ( box2->frame_vector[ii ][1]>max0
+ && box2->frame_vector[ii ][0]<=xi[i+1])
+ max0=box2->frame_vector[ii ][1];
+ if ( box2->frame_vector[ii ][1]>max1
+ && box2->frame_vector[ii ][0]> xi[i+1])
+ max1=box2->frame_vector[ii ][1];
+ if ( box2->frame_vector[ii ][1]<min0
+ && box2->frame_vector[ii ][0]<=xi[i+1])
+ min0=box2->frame_vector[ii ][1];
+ if ( box2->frame_vector[ii ][1]<min1
+ && box2->frame_vector[ii ][0]> xi[i+1])
+ min1=box2->frame_vector[ii ][1];
+ }
+ if(JOB->cfg.verbose&2)
+ fprintf(stderr,"\n# test if to split at x%d= %2d %2d %2d"
+ " bow,(max-min)[i,0,1] %d %3d %3d %3d"
+ , i, xi[i]-x0, xi[i+1]-x0, xi[i+2]-x0, bow, ymax-ymin, max0-min0, max1-min1);
+ /* skip if no local minimum at xi[i+1] or if its not thin enough */
+ if (bow==0 || 4*(ymax-ymin)>2*(y1-y0)) continue;
+ // cuttet parts should have about the same height (max-min)
+ // we dont want to cut an 'n' in three parts!
+ if (2*(max0-min0+1)<(y1-y0+1)) continue; // left height
+ if (2*(max1-min1+1)<(y1-y0+1)) continue; // right height
+ // ToDo: thickness on xi[i+1]?
+ }
+ // try to split successive right box if left box is recognised,
+ // else shift the splitting point further to the right border
+ // removing ->dots if dot only above one char !!! ??? not implemented
+ if(JOB->cfg.verbose&2)
+ fprintf(stderr,"\n# try to split, newbox[%d].x= %2d ... %2d "
+ "dy= %d ", i, xi[i]-x0, xi[i+1]-x0, dy);
+ boxa=*box2; // copy contents, ToDo: reset ac-list (in cut_box?)
+ boxa.x=xi[i]; boxa.y=y0; // obsolete? mark pixel, overlap?
+ boxa.x0=xi[i];boxa.x1=xi[i+1]; // new horizontal box range
+ cut_box(&boxa); boxa.num_ac=0;
+ // out_x(&boxa);
+ // get wchar + certainty
+ ci[i]=whatletter(&boxa,cs,0); a2[i]=testac(&boxa,ci[i]);
+ if(JOB->cfg.verbose&2)
+ fprintf(stderr,"\n# certainty %d limit= %d cbest= %d ",
+ a2[i], JOB->cfg.certainty, cbest);
+ if (a2[i]<JOB->cfg.certainty || a2[i]<cbest-1
+ || wcschr(s1,ci[i]) ) { continue; } // dont split here
+
+ for (ar=ad,ii=0;ii<=i;ii++) {
+ ar=a2[ii]*ar/100; } // multiply all probabilities
+ if (ar<98*JOB->cfg.certainty/100 || ar<cbest) {
+ continue; } // dont go deeper, no longer string
+
+ i++; if (i==8) break; // maximum splits
+ if (i==4) break; // at the moment its to slow to go further
+ if (i+1<8) xi[i+1]=x1; // right border of next box
+ if (i+2<8) xi[i+2]=x1;
+
+ if(JOB->cfg.verbose&2)
+ fprintf(stderr,"\n try end split [%d]=%d [%d]=%d ",
+ i, xi[i]-x0, i+1, xi[i+1]-x0);
+ boxb=*box2; // try rest if it has to be split again
+ boxb.x=xi[i]+1; boxb.y=y0;
+ boxb.x0=xi[i]+1;boxb.x1=xi[i+1];
+ cut_box(&boxb); boxb.num_ac=0;
+ ci[i]=whatletter(&boxb,cs,0); a2[i]=testac(&boxb,ci[i]);
+ if (a2[i]<JOB->cfg.certainty || a2[i]<cbest-1
+ || wcschr(s1,ci[i]) ) { xi[i+1]=xi[i]+2; continue; } // split rest
+ // now we have everything splittet
+
+ if(JOB->cfg.verbose&2) {
+ fprintf(stderr,"\n split at/to: ");
+ for (ii=0;ii<=i;ii++)
+ fprintf(stderr," %2d %s (%3d)", xi[ii+1]-x0,
+ decode(ci[ii],ASCII), a2[ii]);
+ fprintf(stderr,"\n");
+ }
+ // boxa..c changed!!! dots should be modified!!!
+ // Question: cut it into boxes v0.40 or set a string v0.41?
+ // new way of building a string v0.41 (can call setas multiple)
+ // usefull if compare unknown with known strings (except barcode?)
+ // ToDo: also create alternate variants? ex: I <-> l
+ for (buf[0]=0,ar=ad,ii=0;ii<=i;ii++) {
+ ar=a2[ii]*ar/100; // multiply all probabilities
+ if (i>0 && ci[ii]=='n' && ci[ii-1]=='r') ar--; // m == rn
+ strncat(buf,decode(ci[ii],JOB->cfg.out_format),20);
+ }
+
+ if (ar>cbest) cbest=ar; // best (highest) certainty found
+ // reduce, but not if we cross certainty border
+ if (99*ar/100 > JOB->cfg.certainty) ar=99*ar/100;
+ if (JOB->cfg.verbose&2)
+ fprintf(stderr,"\n split result= %s (%3d) ",buf, ar);
+ setas(box2,buf,ar); // char *, does it disturb further splitting?
+ buf[0]=0;
+ i--; xi[i+2]=x1;
+ }
+ }
+ }
+ } end_for_each(&(JOB->res.boxlist));
+ if(JOB->cfg.verbose)fprintf(stderr,", numC %d\n",JOB->res.numC);
+ return 0;
+}
+
+/*
+// ---- divide vertical glued boxes (ex: g above T);
+*/
+int divide_vert_glued_boxes( pix *pp, int mo){
+ struct box *box2,*box3,*box4;
+ int y0,y1,y,dy,flag_found,dx;
+ if(JOB->cfg.verbose)fprintf(stderr,"# divide vertical glued boxes");
+ for_each_data(&(JOB->res.boxlist)) {
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ if (box2->c != UNKNOWN) continue; /* dont try on pictures */
+ y0=box2->y0; y1=box2->y1; dy=y1-y0+1;
+ dx=4*(JOB->res.avX+box2->x1-box2->x0+1); // we want to be sure to look at 4ex distance
+ if ( dy>2*JOB->res.avY && dy<6*JOB->res.avY && box2->m1
+ && y0<=box2->m2+2 && y0>=box2->m1-2
+ && y1>=box2->m4+JOB->res.avY-2)
+ { // test if lower end fits one of the other lines?
+ box4=box2; flag_found=0;
+ for_each_data(&(JOB->res.boxlist)) {
+ box4 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ if (box4->c != UNKNOWN) continue; /* dont try on pictures */
+ if (box4->x1<box2->x0-dx || box4->x0>box2->x1+dx) continue; // ignore far boxes
+ if (box4->line==box2->line ) flag_found|=1; // near char on same line
+ if (box4->line==box2->line+1) flag_found|=2; // near char on next line
+ if (flag_found==3) break; // we have two vertical glued chars
+ } end_for_each(&(JOB->res.boxlist));
+ if (flag_found!=3) continue; // do not divide big chars or special symbols
+ y=box2->m4; // lower end of the next line
+ if(JOB->cfg.verbose&2){
+ fprintf(stderr,"\n# divide box below y=%4d",y-y0);
+ }
+ // --- insert box3 before box2
+ box3= (struct box *) malloc_box(box2);
+ box3->y1=y;
+ box2->y0=y+1; box2->line++; // m1..m4 should be corrected!
+ if (box4->line == box2->line){
+ box2->m1=box4->m1; box2->m2=box4->m2;
+ box2->m3=box4->m3; box2->m4=box4->m4;
+ }
+ box3->num=JOB->res.numC;
+ if (list_ins(&(JOB->res.boxlist), box2, box3)) {
+ fprintf(stderr,"ERROR list_ins\n"); };
+ JOB->res.numC++;
+ }
+ } end_for_each(&(JOB->res.boxlist));
+ if(JOB->cfg.verbose)fprintf(stderr,", numC %d\n",JOB->res.numC);
+ return 0;
+}
+
+
+/*
+ on some systems isupper(>255) cause a segmentation fault SIGSEGV
+ therefore this function
+ ToDo: should be replaced (?) by wctype if available on every system
+ */
+int wisupper(wchar_t cc){ return ((cc<128)?isupper(cc):0); }
+int wislower(wchar_t cc){ return ((cc<128)?islower(cc):0); }
+int wisalpha(wchar_t cc){ return ((cc<128)?isalpha(cc):0); }
+int wisdigit(wchar_t cc){ return ((cc<128)?isdigit(cc):0); }
+int wisspace(wchar_t cc){ return ((cc<128)?isspace(cc):0); }
+
+/* set box2->c to cc if cc is in the ac-list of box2, return 1 on success */
+int setc(struct box *box2, wchar_t cc){
+ int ret=0, w1, w2;
+ w1=((box2->num_ac) ? box2->wac[0] : 0); // weight of replaced char
+ w2=testac(box2,cc);
+ if (JOB->cfg.verbose)
+ fprintf(stderr, "\n# change %s (%d) to %s (%d) at (%d,%d)",
+ decode(box2->c,ASCII), w1, decode(cc,ASCII), w2, box2->x0, box2->y0);
+ if (w2) { if (box2->c!=cc) { ret=1; setac(box2,cc,(100+w2)/2); } }
+ // if(JOB->cfg.verbose & 4) out_x(box2);
+ // ToDo: modify per setac (shift ac)
+ return ret;
+}
+
+
+/* ---- proof difficult chars Il1 by context view ----
+ context: separator, number, vowel, nonvowel, upper case ????
+ could be also used to find unknown chars if the environment (nonumbers)
+ can be found in other places!
+ ToDo:
+ - box->tac[] as set of possible chars, ac set by engine, example:
+ ac="l/" (not "Il|/\" because serifs detected and slant>0)
+ correction only to one of the ac-set (alternative chars)!
+ - should be language-settable; Unicode compatible
+ - box2->ad and wac should be changed? (not proper yet)
+ * ------------- */
+int context_correction( job_t *job ) {
+ // const static char
+ char *l_vowel="aeiouy";
+ // *l_Vowel="AEIOU",chars if the environment (nonumbers)
+ char *l_nonvo = "bcdfghjklmnpqrstvwxz";
+ struct box *box4, *box3, *box2, *prev, *next;
+ // pix *pp = &(job->src.p);
+ int nc=0, ns=0; // num corrections
+
+ if (job->cfg.verbose)
+ fprintf(stderr, "# context correction Il1 0O");
+
+ for_each_data(&(job->res.boxlist)) {
+ box2 = (struct box *)list_get_current(&(job->res.boxlist));
+ if (box2->c > 0xFF) continue; // temporary UNICODE fix
+ prev = (struct box *)list_get_cur_prev(&(job->res.boxlist));
+ next = (struct box *)list_get_cur_next(&(job->res.boxlist));
+ if( (prev) && (prev->c > 0xFF)) continue; // temporary UNICODE fix 2
+ if( (next) && (next->c > 0xFF)) continue; // temporary UNICODE fix 3
+ if (box2->num_ac<2) continue; // no alternatives
+ if (box2->wac[0]==100 && box2->wac[1]<100) continue;
+ if (box2->num_ac && box2->tas[0]) continue; // buggy space_remove 0.42
+
+ /* check for Il1| which are general difficult to distinguish */
+ /* bbg: not very good. Should add some tests to check if is preceded by '.',
+ spelling, etc */
+ /* ToDo: only correct if not 100% sure (wac[i]<100)
+ and new char is in wat[] */
+ if (strchr("Il1|", box2->c) && next && prev) {
+// if( strchr(" \n",prev->c) // SPC
+// && strchr(" \n",next->c) ) box2->c='I'; else // bad idea! I have ...
+ if (wisalpha(next->c) && next->c!='i' &&
+ ( prev->c == '\n' ||
+ ( prev->c == ' ' &&
+ ( box4=(struct box *)list_prev(&(job->res.boxlist), prev)) &&
+ box4->c == '.' ) ) ) { nc+=setc(box2,(wchar_t)'I'); }
+ else if (box2->c!='1' && strchr(l_nonvo,next->c) &&
+ strchr("\" \n",prev->c)) /* lnt => Int, but 1st */
+ /* do not change he'll to he'Il! */
+ { nc+=setc(box2,(wchar_t)'I'); } // set box2->c to 'I' if 'I' is in the ac-list
+ else if (strchr(l_vowel,next->c)) /* unusual? Ii Ie Ia Iy Iu */
+ /* && strchr("KkBbFfgGpP",prev->c)) */ /* kle Kla Kli */
+ { nc+=setc(box2,(wchar_t)'l'); }
+ else if (wisupper(next->c)
+ && !strchr("O0I123456789",next->c)
+ && !strchr("O0I123456789",prev->c)) /* avoid lO => IO (10) */
+ { nc+=setc(box2,(wchar_t)'I'); }
+ else if (wislower(prev->c))
+ { nc+=setc(box2,(wchar_t)'l'); }
+ else if (wisdigit(prev->c) || wisdigit(next->c)
+ || (next->c=='O' && !wisalpha(prev->c))) /* lO => 10 */
+ { nc+=setc(box2,(wchar_t)'1'); }
+ }
+
+ /* check for O0 */
+ else if (strchr("O0", box2->c) && next && prev) {
+ if (wisspace(prev->c) && wisalpha(next->c)) /* initial letter */
+ { nc+=setc(box2,(wchar_t)'O'); }
+ else if (wisalpha(prev->c) && wisalpha(next->c)
+ && wisupper(next->c)) /* word in upper case */
+ { nc+=setc(box2,(wchar_t)'O'); }
+ else if (wisdigit(prev->c) || wisdigit(next->c))
+ { nc+=setc(box2,(wchar_t)'0'); }
+ }
+
+ /* check for 5S */
+ else if (strchr("5S", box2->c) && next && prev) {
+ if (wisspace(prev->c) && wisalpha(next->c)) /* initial letter */
+ { nc+=setc(box2,(wchar_t)'S'); }
+ else if (wisalpha(prev->c) && wisalpha(next->c)
+ && wisupper(next->c)) /* word in upper case */
+ { nc+=setc(box2,(wchar_t)'S'); }
+ else if (wisdigit(prev->c) || wisdigit(next->c))
+ { nc+=setc(box2,(wchar_t)'5'); }
+ }
+
+ /* was a space not found? xXx => x Xx ??? */
+ if (wisupper(box2->c) && next && prev) {
+ if (wislower(prev->c) && wislower(next->c)
+ && 2 * (box2->x0 - prev->x1) > 3 * (next->x0 - box2->x1)) {
+ struct box *box3 = malloc_box((struct box *) NULL);
+ box3->x0 = prev->x1 + 2;
+ box3->x1 = box2->x0 - 2;
+ box3->y0 = box2->y0;
+ box3->y1 = box2->y1;
+ box3->x = box2->x0 - 1;
+ box3->y = box2->y0;
+ box3->dots = 0;
+ box3->num_boxes = 0;
+ box3->num_subboxes = 0;
+ box3->c = ' ';
+ box3->modifier = 0;
+ setac(box3,' ',99); /* ToDo: weight depends from distance */
+ box3->num = -1;
+ box3->line = prev->line;
+ box3->m1 = box3->m2 = box3->m3 = box3->m4 = 0;
+ box3->p = &(job->src.p);
+ list_ins(&(job->res.boxlist), box2, box3);
+ }
+ }
+
+ /* a space before punctuation? but not " ./file" */
+ if ( prev && next)
+ if (prev->c == ' ' && strchr(" \n" , next->c)
+ && strchr(".,;:!?)", box2->c))
+ if (prev->x1 - prev->x0 < 2 * job->res.avX) { // carefully on tables
+ box3 = prev;
+ if ( !list_del(&(job->res.boxlist), box3) ) free_box(box3);
+ prev = (struct box *)list_get_cur_prev(&(job->res.boxlist));
+ ns++;
+ }
+
+ /* \'\' to \" */
+ if ( prev )
+ if ( (prev->c == '`' || prev->c == '\'')
+ && (box2->c == '`' || box2->c == '\'') )
+ if (prev->x1 - box2->x0 < job->res.avX) { // carefully on tables
+ box2->c='\"';
+ box3 = prev;
+ list_del(&(job->res.boxlist), box3);
+ free_box(box3);
+ }
+ } end_for_each(&(job->res.boxlist));
+ if (job->cfg.verbose)
+ fprintf(stderr, " num_corrected= %d removed_spaces= %d\n", nc, ns);
+ return 0;
+}
+
+
+/* ---- insert spaces ----
+ * depends strongly from the outcome of measure_pitch()
+ * ------------------------ */
+int list_insert_spaces( pix *pp, job_t *job ) {
+ int i=0, j1, j2, i1, maxline=-1, dy=0; char cc;
+ struct box *box2, *box3=NULL, *box4=NULL;
+
+ // measure mean line height
+ for(i1=1;i1<job->res.lines.num;i1++) {
+ dy+=job->res.lines.m4[i1]-job->res.lines.m1[i1]+1;
+ } if (job->res.lines.num>1) dy/=(job->res.lines.num-1);
+ i=0; j2=0;
+ for(i1=1;i1<job->res.lines.num;i1++) {
+ j1=job->res.lines.m4[i1]-job->res.lines.m1[i1]+1;
+ if (j1>dy*120/100 || j1<dy*80/100) continue; // only most frequently
+ j2+=j1; i++;
+ } if (i>0 && j2/i>7) dy=j2/i;
+ if( job->cfg.verbose&1 )
+ fprintf(stderr,"# insert space between words (dy=%d) ...",dy);
+ if (!dy) dy=(job->res.avY)*110/100+1;
+
+ i=0;
+ for_each_data(&(job->res.boxlist)) {
+ box2 =(struct box *)list_get_current(&(job->res.boxlist));
+ cc=0;
+ if (box2->line>maxline) { // lines and chars must be sorted!
+ if (maxline>=0) cc='\n'; // NL
+ maxline=box2->line;
+ }
+ if((box3 = (struct box *)list_prev(&(job->res.boxlist), box2))){
+ if (maxline && !box2->line && cc==0) cc=' ';
+ if (box2->line<=maxline && cc==0) { // lines and chars must be sorted!
+ int thispitch = job->res.lines.pitch[box2->line];
+ int thismono = job->res.lines.mono[box2->line];
+ int mdist = (box2->x1 + box2->x0 - (box3->x1 + box3->x0) + 1)/2;
+ int pdist = box2->x0 - box3->x1 + 1;
+ if (box2->x1 - box2->x0 < thispitch) pdist=pdist*4/3;
+ /* allow extra pixels around small characters .,'!: etc */
+ // fprintf(stderr,"#\n ... mono= %2d pitch= %2d mdist= %2d pdist= %2d",
+ // thismono, thispitch, mdist, pdist);
+ if ((thismono!=0 && mdist >= thispitch)
+ || (thismono==0 && pdist >= thispitch))
+ cc=' '; // insert SPACE
+ }
+ }
+ if(cc){
+ box4=(struct box *)list_prev(&(job->res.boxlist), box2);
+ box3=(struct box *)malloc_box(NULL);
+ box3->x0=box2->x0-2; box3->x1=box2->x0-2;
+ box3->y0=box2->y0; box3->y1=box2->y1;
+ if(cc!='\n' && box4)
+ box3->x0=box4->x1+2;
+ if(cc=='\n' || !box4)
+ box3->x0=job->res.lines.x0[box2->line];
+ if(cc=='\n' && box4){
+ box3->y0=box4->y1; // better use lines.y1[box2->pre] ???
+ box3->y1=box2->y0;
+ }
+ box3->x =box2->x0-1; box3->y=box2->y0;
+ box3->dots=0; box3->c=cc;
+ box3->num_boxes = 0;
+ box3->num_subboxes = 0;
+ box3->modifier='\0';
+ box3->num=-1; box3->line=box2->line;
+ box3->m1=box2->m1; box3->m2=box2->m2;
+ box3->m3=box2->m3; box3->m4=box2->m4;
+ box3->p=pp;
+ setac(box3,cc,99); /* ToDo: weight depends from distance */
+ list_ins(&(job->res.boxlist),box2,box3);
+ if( job->cfg.verbose&1 ) {
+ fprintf(stderr,"\n# insert space &%d; at x= %4d %4d box= %p",
+ (int)cc, box3->x0, box3->y0, (void*)box3);
+ /* out_x(box3); */
+ }
+ i++;
+ }
+ } end_for_each(&(job->res.boxlist));
+ if( job->cfg.verbose&1 ) fprintf(stderr," found %d\n",i);
+ return 0;
+}
+
+
+/*
+ add infos where the box is positioned to the box
+ this is useful for better recognition
+*/
+int add_line_info(/* List *boxlist2 */){
+ // pix *pp=&JOB->src.p;
+ struct tlines *lines = &JOB->res.lines;
+ struct box *box2;
+ int i,xx,m1,m2,m3,m4,num_line_members=0,num_rest=0;
+ if( JOB->cfg.verbose&1 ) fprintf(stderr,"# add line infos to boxes ...");
+ for_each_data(&(JOB->res.boxlist)) {
+ box2 =(struct box *)list_get_current(&(JOB->res.boxlist));
+ for(i=1;i<JOB->res.lines.num;i++) /* line 0 is a place holder */
+ {
+ if (lines->dx) xx=lines->dy*((box2->x1+box2->x0)/2)/lines->dx; else xx=0;
+ m1=lines->m1[i]+xx;
+ m2=lines->m2[i]+xx;
+ m3=lines->m3[i]+xx;
+ m4=lines->m4[i]+xx;
+ // fprintf(stderr," test line %d m1=%d %d %d %d\n",i,m1,m2,m3,m4);
+ if (m4-m1==0) continue; /* no text line (line==0) */
+#if 0
+ if( box2->y1+2*JOB->res.avY >= m1
+ && box2->y0-2*JOB->res.avY <= m4 ) /* not to far away */
+#endif
+ /* give also a comma behind the line a chance */
+ if( box2->x0 >= lines->x0[i] && box2->x1 <= lines->x1[i]+JOB->res.avX )
+ if( box2->m2==0 || abs(box2->y0-box2->m2) > abs(box2->y0-m2) )
+ { /* found nearest line */
+ box2->m1=m1;
+ box2->m2=m2;
+ box2->m3=m3;
+ box2->m4=m4;
+ box2->line=i;
+ }
+ }
+ if( box2->y1+2 < box2->m1
+ || box2->y0 < box2->m1 - (box2->m3-box2->m1)/2
+ || box2->y0-2 > box2->m4
+ || box2->y1 > box2->m3 + (box2->m3-box2->m1)
+ ) /* to far away */
+ { /* reset */
+ box2->m1=0;
+ box2->m2=0;
+ box2->m3=0;
+ box2->m4=0;
+ box2->line=0;
+ num_rest++;
+ } else num_line_members++;
+ } end_for_each(&(JOB->res.boxlist));
+ if( JOB->cfg.verbose&1 )
+ fprintf(stderr," done, num_line_chars=%d rest=%d\n",
+ num_line_members, num_rest);
+ return 0;
+}
+
+
+/*
+ * bring the boxes in right order
+ * add_line_info must be executed first!
+ */
+int sort_box_func (const void *a, const void *b) {
+ struct box *boxa, *boxb;
+
+ boxa = (struct box *)a;
+ boxb = (struct box *)b;
+
+ if ( ( boxb->line < boxa->line ) ||
+ ( boxb->line == boxa->line && boxb->x0 < boxa->x0 ) )
+ return 1;
+ return -1;
+}
+
+// -------------------------------------------------------------
+// ------ use this for entry from other programs
+// include pnm.h pgm2asc.h
+// -------------------------------------------------------------
+// entry point for gocr.c or if it is used as lib
+// better name is call_ocr ???
+// jb: OLD COMMENT: not removed due to set_options_* ()
+// args after pix *pp should be removed and new functions
+// set_option_mode(int mode), set_option_spacewidth() .... etc.
+// should be used instead, before calling pgm2asc(pix *pp)
+// ! change if you can ! - used by X11 frontend
+int pgm2asc(job_t *job)
+{
+ pix *pp;
+ progress_counter_t *pc;
+
+ assert(job);
+ /* FIXME jb: remove pp */
+ pp = &(job->src.p);
+
+ if( job->cfg.verbose )
+ fprintf(stderr, "# db_path= %s\n", job->cfg.db_path);
+
+ pc = open_progress(100,"pgm2asc_main");
+ progress(0,pc); /* start progress output 0% 0% */
+
+ /* ----- count colors ------ create histogram -------
+ - this should be used to create a upper and lower limit for cs
+ - cs is the optimum gray value between cs_min and cs_max
+ - also inverse scans could be detected here later */
+ if (job->cfg.cs==0)
+ job->cfg.cs=otsu( pp->p,pp->y,pp->x,0,0,pp->x,pp->y, job->cfg.verbose & 1 );
+ /* renormalize the image and set the normalized threshold value */
+ job->cfg.cs=thresholding( pp->p,pp->y,pp->x,0,0,pp->x,pp->y, job->cfg.cs );
+ if( job->cfg.verbose )
+ fprintf(stderr, "# thresholding new_threshold= %d\n", job->cfg.cs);
+
+ progress(5,pc); /* progress is only estimated */
+
+#if 0 /* dont vast memory */
+ /* FIXME jb: malloc */
+ if ( job->cfg.verbose & 32 ) {
+ // generate 2nd imagebuffer for debugging output
+ job->tmp.ppo.p = (unsigned char *)malloc(job->src.p.y * job->src.p.x);
+ // buffer
+ assert(job->tmp.ppo.p);
+ copybox(&job->src.p,
+ 0, 0, job->src.p.x, job->src.p.y,
+ &job->tmp.ppo,
+ job->src.p.x * job->src.p.y);
+ }
+#else
+ job->tmp.ppo=job->src.p; /* temporarely, removed later */
+#endif
+
+ /* load character data base */
+ if ( job->cfg.mode&2 )
+ load_db();
+
+ /* this is first step for reorganize the PG
+ ---- look for letters, put rectangular frames around letters
+ letter = connected points near color F
+ should be used by dust removing (faster) and line detection!
+ ---- 0..cs = black letters, last change = Mai99 */
+
+ progress(8,pc); /* progress is only estimated */
+
+ scan_boxes( pp );
+ if ( !job->res.numC ){
+ fprintf( stderr,"# no boxes found - stopped\n" );
+ //if(job->cfg.verbose&32) debug_img("out01",job,8);
+ /***** should free stuff, etc) */
+ return(1);
+ }
+ // if (job->cfg.verbose&32) debug_img("out00",job,4+8);
+
+ progress(10,pc); /* progress is only estimated */
+ // if(job->cfg.verbose&32) debug_img("out01",job,4+8);
+ // output_list(job); // for debugging
+ // ToDo: matrix printer preprocessing
+
+ remove_dust( job ); /* from the &(job->res.boxlist)! */
+// if(job->cfg.verbose&32) debug_img("out02",job,4+8);
+// output_list(job); // for debugging
+ smooth_borders( job ); /* only for big chars */
+ progress(12,pc); /* progress is only estimated */
+// if(job->cfg.verbose&32) debug_img("out03",job,4+8);
+// output_list(job); // for debugging
+
+ //detect_barcode( job ); /* mark barcode */
+// if(job->cfg.verbose&32) debug_img("out04",job,4+8);
+// output_list(job); // for debugging
+
+ detect_pictures( job ); /* mark pictures */
+// if(job->cfg.verbose&32) debug_img("out05",job,4+8);
+// output_list(job); // for debugging
+
+ remove_pictures( job ); /* do this as early as possible, before layout */
+// if(job->cfg.verbose&32) debug_img("out06",job,4+8);
+// output_list(job); // for debugging
+
+ glue_holes_inside_chars( pp ); /* including count subboxes (holes) */
+
+ detect_rotation_angle( job );
+
+#if 1 /* Rotate the whole picture! move boxes */
+ if( job->res.lines.dy!=0 ){ // move down lowest first, move up highest first
+ // in work! ??? (at end set dy=0) think on ppo!
+ }
+#endif
+ detect_text_lines( pp, job->cfg.mode ); /* detect and mark JOB->tmp.ppo */
+// if(job->cfg.verbose&32) debug_img("out07",job,4+8);
+ progress(20,pc); /* progress is only estimated */
+
+ add_line_info(/* &(job->res.boxlist) */);
+ //if (job->cfg.verbose&32) debug_img("out10",job,4+8);
+
+ divide_vert_glued_boxes( pp, job->cfg.mode); /* after add_line_info, before list_sort! */
+// if(job->cfg.verbose&32) debug_img("out11",job,0);
+
+ remove_melted_serifs( pp ); /* make some corrections on pixmap */
+ /* list_ins seems to sort in the boxes on the wrong place ??? */
+// if(job->cfg.verbose&32) debug_img("out12",job,4+8);
+
+ glue_broken_chars( pp ); /* 2nd glue */
+// if(job->cfg.verbose&32) debug_img("out14",job,4+8);
+
+ remove_rest_of_dust( );
+// if(job->cfg.verbose&32) debug_img("out15",job,4+8);
+
+ /* better sort after dust is removed (slow for lot of pixels) */
+ list_sort(&(job->res.boxlist), sort_box_func);
+
+ measure_pitch( job );
+
+ if(job->cfg.mode&64) find_same_chars( pp );
+ progress(30,pc); /* progress is only estimated */
+// if(job->cfg.verbose&32) debug_img("out16",job,4+8);
+
+ char_recognition( pp, job->cfg.mode);
+ progress(60,pc); /* progress is only estimated */
+// if(job->cfg.verbose&32) debug_img("out17",job,4+8);
+
+ if ( adjust_text_lines( pp, job->cfg.mode ) ) { /* correct using chars */
+ /* may be, characters/pictures have changed line number */
+ list_sort(&(job->res.boxlist), sort_box_func);
+ // 2nd recognition call if lines are adjusted
+ char_recognition( pp, job->cfg.mode);
+ }
+
+#define BlownUpDrawing 1 /* german: Explosionszeichnung, temporarly */
+#if BlownUpDrawing == 1 /* german: Explosionszeichnung */
+{ /* just for debugging */
+ int i,ii,ni; struct box *box2;
+ i=ii=ni=0;
+ for_each_data(&(JOB->res.boxlist)) { /* count boxes */
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ if (box2->c==UNKNOWN) i++;
+ if (box2->c==PICTURE) ii++;
+ ni++;
+ } end_for_each(&(JOB->res.boxlist));
+ if (JOB->cfg.verbose)
+ fprintf(stderr,"# debug: unknown= %d picts= %d boxes= %d\n",i,ii,ni);
+}
+#endif
+ // ----------- write out20.pgm ----------- mark lines + boxes
+ //if (job->cfg.verbose&32) debug_img("out20",job,1+4+8);
+
+ compare_unknown_with_known_chars( pp, job->cfg.mode);
+ progress(70,pc); /* progress is only estimated */
+
+ try_to_divide_boxes( pp, job->cfg.mode);
+ progress(80,pc); /* progress is only estimated */
+
+ /* --- list output ---- for debugging --- */
+ //if (job->cfg.verbose&6) output_list(job);
+
+ /* ---- insert spaces ---- */
+ list_insert_spaces( pp , job );
+
+ // ---- proof difficult chars Il1 by context view ----
+ if (JOB->cfg.verbose)
+ fprintf(stderr,"# context correction if !(mode&32)\n");
+ if (!(job->cfg.mode&32)) context_correction( job );
+
+ store_boxtree_lines( job->cfg.mode );
+ progress(90,pc); /* progress is only estimated */
+
+/* 0050002.pgm.gz ca. 109 digits, only 50 recognized (only in lines?)
+ * ./gocr -v 39 -m 56 -e - -m 4 -C 0-9 -f XML tmp0406/0050002.pbm.gz
+ * awk 'BEGIN{num=0}/1<\/box>/{num++;}END{print num}' o
+ * 15*0 24*1 18*2 19*3 15*4 6*5 6*6 6*7 4*8 8*9 sum=125digits counted boxes
+ * 9*0 19*1 14*2 15*3 11*4 6*5 5*6 6*7 4*8 8*9 sum=97digits recognized
+ * 1*1 1*7 not recognized (Oct04)
+ * 33*SPC 76*NL = 109 spaces + 36*unknown sum=241 * 16 missed
+ */
+#if BlownUpDrawing == 1 /* german: Explosionszeichnung */
+{ /* just for debugging */
+ int i,ii,ni; struct box *box2; const char *testc="0123456789ABCDEFGHIJK";
+ i=ii=ni=0;
+ for_each_data(&(JOB->res.boxlist)) { /* count boxes */
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ if (box2->c==UNKNOWN) i++;
+ if (box2->c==PICTURE) ii++;
+ if (box2->c>' ' && box2->c<='z') ni++;
+ } end_for_each(&(JOB->res.boxlist));
+ if(JOB->cfg.verbose)
+ fprintf(stderr,"# debug: (_)= %d picts= %d chars= %d",i,ii,ni);
+ for (i=0;i<20;i++) {
+ ni=0;
+ for_each_data(&(JOB->res.boxlist)) { /* count boxes */
+ box2 = (struct box *)list_get_current(&(JOB->res.boxlist));
+ if (box2->c==testc[i]) ni++;
+ } end_for_each(&(JOB->res.boxlist));
+ if(JOB->cfg.verbose && ni>0)
+ fprintf(stderr," (%c)=%d",testc[i],ni);
+ }
+ if(JOB->cfg.verbose)
+ fprintf(stderr,"\n");
+}
+#endif
+
+ // ---- frame-size-histogram
+ // ---- (my own defined) distance between letters
+ // ---- write internal picture of textsite
+ // ----------- write out30.pgm -----------
+ //if( job->cfg.verbose&32 ) debug_img("out30",job,2+4);
+
+ progress(100,pc); /* progress is only estimated */
+
+ close_progress(pc);
+
+ return 0; /* what should I return? error-state? num-of-chars? */
+}