X-Git-Url: http://git.asbjorn.it/?a=blobdiff_plain;f=lib%2Fgocr%2Focr0.c;fp=lib%2Fgocr%2Focr0.c;h=1066b7cb6e444167f639f3a9df771bfd000c2ff9;hb=fe0ca47a7024b0e592efecc1151962149fe8ce38;hp=0000000000000000000000000000000000000000;hpb=80fabd0e494d8049b3f3f793ad0444c0ea38bdb8;p=swftools.git diff --git a/lib/gocr/ocr0.c b/lib/gocr/ocr0.c new file mode 100644 index 0000000..1066b7c --- /dev/null +++ b/lib/gocr/ocr0.c @@ -0,0 +1,6591 @@ +/* + rule based OCR engine, partly rewritten for edges (old=pixel) + */ +/* +This is a Optical-Character-Recognition program +Copyright (C) 2000-2007 Joerg Schulenburg + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + see README for email address + + >>> DO NOT EDIT THIS FILE IF YOU NOT REALLY KNOW WHAT YOU ARE DOING! <<< + + I have invested lot of time, to write this part of the program. + This engine should recognize chars allways right or return UNKNOWN. + If you change something, test all other example files too, + to be sure that all things work better. (JoergS) + + This engine was pixelbased until 0.40 which was not successfull enough. + Also code changes always hade side effects. The vectorisation of the code + starts from version 0.41 with the chars XNz and seems to be much better + to handle. Vectorization means we frame each character by a chain of + vectors and dont care about pixels anymore. Unfortunatly I have to + replace all the pixel codes, which is a long process. Old code will be lost. + (JorgS) + + +ToDo: + - if box1->p and b differ, reduce probability + - probability makes life much easier here + - use only one box!?, may be bits have usefull infos + - divide this file, suggestion: classify chars: + high=ABCDEFGHIJKLMNOPQRSTUVWXYZbdfhklt, low=acegijmnopqrsuvwxyz + or + often_used=etianmsurwdkgo rarely_used=hvjcflpqxyz.,: + or + every char (large overhead) + - two-pass version (first pass without tolerance) + 2nd pass with tolerance (ex: one tiny more in sdata->holes) + + general feature extraction: + - white holes at middle, upper, lower position (cost much time) + - test lines and triangles insteat of rectangles + + char is removed, wchar_t is used (better code) + + making a static global variable-set x.x0,x.x1, and call test_a, + test_b ... (faster compilation, but not reentrant!) + + - adding slant-angle (if detected) to distinguish between l and / ? + - ac (alternate chars) as string add_ac(box1,"/") => box1->ac="Il/"; + for better context correction or output: "Ha[lI][lI]o!" + +*/ + +#include +#include +// #include "pgm2asc.h" +#include "ocr0.h" +// #include "ocr1.h" +#include "pnm.h" +#include "gocr.h" + +#define IFV if(JOB->cfg.verbose&4) +#define MM {IFV fprintf(stderr,"\nDBG %c L%04d (%d,%d): ",(char)c_ask,__LINE__,box1->x0,box1->y0);} + +// the old debug mode (0.40) was only for a special char, for another char +// code must be recompiled with C_ASK='char' +// new debug mode (0.41) explains why char is declined or accepted as ABC... +// the output can be filtered by external scripts +// ToDo: we could reduce output to filter string +#ifndef DO_DEBUG /* can be defined outside */ +#define DO_DEBUG 0 /* 0 is the default */ +#endif + +/* this macro is for debugging output: "if char is declined, why?" */ +#if DO_DEBUG /* 0=Work mode, 1=debugging mode */ +// Setac: output, that char is choosen with a probability +// Break: output, why the char is not choosen +// MSG: debugging functions for char C_ASK, mostly messages +// DBG: definitions usefull only for debugging +#define Setac(box1,ac,ad) { MM;IFV fprintf(stderr,"setac %d",ad);setac(box1,ac,ad); } +#define Break { MM;IFV fprintf(stderr,"break"); break; } +#define MSG(x) { MM;IFV x } +#define DBG(x) x +#else +#define Setac(box1,ac,ad) setac(box1,ac,ad) +#define Break break +#define MSG(x) +#define DBG(x) +#endif + +/* extern "C"{ */ + +// static inline int sq(int x) { return x*x; } /* square */ + +/* + * go from vector j1 to vector j2 and measure maximum deviation of + * the steps from the line connecting j1 and j2 + * return the squared maximum distance + * in units of the box size times 1024 + * ToDo: 1) better give back max-dx and max-dy ??? + * errors if j1 and j2 are in different frames or belong to + * more then one frame? + * 2) Better get deviation from a complete vector graphic? + * The vectorgraphic is the ideal test char adapted to the + * extrem vertices of the real char. + */ +int line_deviation( struct box *box1, int j1, int j2 ) { + int r1x, r1y, r2x, r2y, r3x, r3y, i, x, y, d, dist, maxdist=0, frame, l2; + r1x=box1->frame_vector[j1][0]; + r1y=box1->frame_vector[j1][1]; + r2x=box1->frame_vector[j2][0]; + r2y=box1->frame_vector[j2][1]; + if (!box1->num_frames) return(-1); + if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] || + j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) { + fprintf(stderr,"Error in "__FILE__" L%d: idx out of range",__LINE__); + return(-1); + } + /* get the frame the endvector belongs to */ + for (i=0;inum_frames;i++) + if (j2num_frame_vectors[i]) break; + frame=i; + /* frame(j1)<=frame(j2) possible */ + for (i=j1;;i++) { // do it for each vector between j1 and j2 + if (i >= box1->num_frame_vectors[frame]) + i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */ + if (i==j2) break; + // for (i=j1;i!=j2;i=(i+1)%box1->num_frame_vectors[0]) {~} + r3x=box1->frame_vector[i][0]; + r3y=box1->frame_vector[i][1]; + // Language=german + // german: Abstand Punkt von Strecke, Laenge Lotrechte + // germ.Strecke : l1=(r1+r2)/2+d*(r2-r1)/2 for d=-1..1 + // germ.Lotrechte: l2=r3+b*[-(r2-r1).y,(r2-r1).x] + // Schnittpunkt : l1=l2, + // eq1x: (r1x+r2x)/2-r3x+d*(r2x-r1x)/2+b*(r2y-r1y)=0 + // eq1y: (r1y+r2y)/2-r3y+d*(r2y-r1y)/2-b*(r2x-r1x)=0 + // eq2x: b*(r2x-r1x)*(r2y-r1y)=-((r1x+r2x)/2-r3x+d*(r2x-r1x)/2)*(r2x-r1x) + // eq2y: b*(r2x-r1x)*(r2y-r1y)= ((r1y+r2y)/2-r3y+d*(r2y-r1y)/2)*(r2y-r1y) + // eq2y-eq2x: ... in units of 1024 (fast integer rounded correctly) + l2=sq(r2x-r1x)+sq(r2y-r1y); // square of distance r2-r1 + if (l2==0) { + // fprintf(stderr,"ocr0 L%d: r1==r2 r1= %d %d",__LINE__, r1x, r1y); // debugging + d=-1024; + } else + d=-( ((r1x+r2x)-2*r3x)*(r2x-r1x) + +((r1y+r2y)-2*r3y)*(r2y-r1y))*1024/l2; // ..-1024..+1024.. + if (d<=-1024) { x=r1x; y=r1y; } // starting point + else { + if (d>=1024) { x=r2x; y=r2y; } // end point + else { + x=((r1x+r2x)+1)/2+(d*(r2x-r1x))/2048; + y=((r1y+r2y)+1)/2+(d*(r2y-r1y))/2048; + /* we have the crossing point x,y now */ + } + } + dist=sq((x-r3x)*1024/(box1->x1-box1->x0+1)) + +sq((y-r3y)*1024/(box1->y1-box1->y0+1)); // 0..2*sq(1024) + if (dist>maxdist) maxdist=dist; + // for debugging: + // fprintf(stderr,"\nDBG dev: %d-%d-%d dist=%5d max=%5d d=%d %d,%d-%d,%d" + // " vector= %d %d crosspoint= %d %d ", + // j1,i,j2,dist,maxdist,d,r1x,r1y,r2x,r2y,r3x,r3y,x,y); + } + return maxdist; +} + +/* + * search vectors between j1 and j2 for nearest point a to point r + * example: + * + * r-> $$...$$ $ - mark vectors + * @@$..@@ @ - black pixels + * @@$..@@ . - white pixels + * @@@@.$@ + * a-> @@$@$@@ + * @$.@@@@ + * @@..$@@ + * @@..$@@ + * j1 --> $$...$$ <-- j2 + * + * ToDo: vector aa[5] = {rx,ry,x,y,d^2,idx} statt rx,ry? + * j1 and j2 must be in the same frame + * return aa? + */ +int nearest_frame_vector( struct box *box1, int j1, int j2, int rx, int ry) { + int x,y,d,i,aa[4]; /* x,y,normalized_distance^2,vector_index */ + int frame=0, x0=box1->x0, y0=box1->y0, + x1=box1->x1, y1=box1->y1, + dx=box1->x1-x0+1, dy=box1->y1-y0+1; + if (!box1->num_frames) return(-1); + if (j1<0 || j1>box1->num_frame_vectors[box1->num_frames-1] || + j2<0 || j2>box1->num_frame_vectors[box1->num_frames-1]) { + fprintf(stderr,"Error in "__FILE__" L%d: idx %d-%d out of range\n",__LINE__,j1,j2); + //out_x(box1); + return(-1); + } + aa[0]=x=box1->frame_vector[j2][0]; /* x */ + aa[1]=y=box1->frame_vector[j2][1]; /* y */ + /* maximum is (distance*128)^2 if r is inside the box */ + aa[2]=d=2*sq(128)+sq((rx-(x0+x1)/2)*128/dx)+sq((ry-(y0+y1)/2)*128/dy); + aa[3]=j2; /* vector index */ + /* get the frame the endvector belongs to */ + for (i=0;inum_frames;i++) + if (j2num_frame_vectors[i]) break; + frame=i; + /* frame(j1)<=frame(j2) possible */ + for (i=j1;;i++) { + if (i >= box1->num_frame_vectors[frame]) + i=((frame)?box1->num_frame_vectors[frame-1]:0); /* go around */ + x=box1->frame_vector[i][0]; /* take a vector */ + y=box1->frame_vector[i][1]; + /* distance to upper left end, normalized to 128 */ + d=sq((x-rx)*128/dx)+sq((y-ry)*128/dy); + if (d0 and m==1 box1 is changed +// m>0 modify box1->dots +// m==2 modify box1->y0 +// called by pgm2asc + ocr0(?) +int testumlaut(struct box *box1, int cs, int m, wchar_t *modifier){ + // pix p=*(box1->p); + int r,y,x,x0,x1,y0,y1,dx,dy,m1,m2,m3, + xl,xr,yu,yl; // left, right, upper and lower border of dots + wchar_t mod='\0'; /* (TeX-) modifier ~"'` for compose() */ + DBG( wchar_t c_ask='"'; ) + r=0; + x0=box1->x0; x1=box1->x1; dx=x1-x0+1; + y0=box1->y0; y1=box1->y1; dy=y1-y0+1; + m1=box1->m1; m2=box1->m2; m3=box1->m3; + xl=x0; xr=x1; yu=yl=y0; + if( dy < 5 || 4*y0 > 3*m2+m3 ) return 0; // no low chars: .,-= + /* modifier in box included? */ + if( 2*y1 > m1+m2 ){ + /* modifier in box included? */ + for(y=y0;2*yp,cs,1)==0 ) break; + if( 2*y extract */ + yl=y; + while( get_bw(xl,xr,y,y,box1->p,cs,1)==0 && 2*y<=y0+y1) y++; + if( m&2 ) box1->y0=y; /* set new upper bond */ + } + } + if( yu>=yl ) { if(m) box1->dots=0; return 0; } /* nothing found */ + if( get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==1 ) // neighbour overlap? + while( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==1 && 2*xlp,cs,1)==1 ) break; + for(;xr>xl;xr--)if( get_bw(xr,xr,yu,yl,box1->p,cs,1)==1 ) break; + + if ( yl-1>yu ) { // tall box ij"a"o"u +#if 0 + x=box1->y0; box1->y0=m1; out_x(box1); box1->y0=x; + fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0); + fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0); +#define DEBUG 1 +#endif + { + + x=xl;y=yu; + if( get_bw(xl,x1+1,yu,yl-1,box1->p,cs,1)==0 ) r=0; // neighbour overlap? + else + if( get_bw(xl ,xl ,yu,yl-1,box1->p,cs,1)==0 + || get_bw(xl-1,xl-1,yu,yl-1,box1->p,cs,1)==0 ) // be sure there are gap to neighbours + if( get_bw(xr ,xr ,yu,yl-1,box1->p,cs,1)==0 + || get_bw(xr+1,xr+1,yu,yl-1,box1->p,cs,1)==0 ) + { int i,j,x; + r=1; + // ...@@@.... RING_ABOVE // ..@@@..@@. TILDE + // ..@...@... // @@.@@@@@.. + // ..@...@... // @......... + // ..@..@@... + // ...@@@.... + for (i=yu;ip,cs,1)==1) break; + for ( ;ip,cs,1)==0) break; + for (j=xl;jp,cs,1)==1) break; + for ( ;jp,cs,1)==0) break; + for ( x=j;xp,cs,1)==1) break; + // vert. gap detected + if( j2 + && num_obj(xl,xr,yu,yl-1,box1->p,cs)>=2 // not best!!! + && num_cross(xl,xr,yu +(yl-yu)/4,yu+ (yl-yu)/4,box1->p,cs) == 2 + && num_cross(xl,xr,yl-1-(yl-yu)/2,yl-1-(yl-yu)/2,box1->p,cs) == 2 + ){ // may be the following lines are not quite ok + while( get_bw(xl,xr,yl,yl,box1->p,cs,1)==0 && 2*yly0=yl; +/* if( m&2 ) box1->y0= ( (r==1) ? yu : yl ); */ + // out_x(box1); + } + if(r==0){ // divided fr != fi + while( get_bw(x0,x1,yu,yu,box1->p,cs,1)==0 && 2*yuy0=yu; + } + if( r==1 ){ yl--; +// .@@@. ..@@. +// .@@.. .@@.. +// .@... .@@.. +// +// if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) +// > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8 +// && loop(box1->p,xr,yu,xr-xl,cs,0,LE) +// < loop(box1->p,xr,yl,xr-xl,cs,0,LE)) // -dx/8 ) // é Nov03 + if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) + - loop(box1->p,xr,yu,xr-xl,cs,0,LE) + > loop(box1->p,xl,yl,xr-xl,cs,0,RI) // +dx/8 + - loop(box1->p,xr,yl,xr-xl,cs,0,LE)+1) // -dx/8 ) // é Nov03 + mod = ACUTE_ACCENT; // ' + + if( xr-xl+1 > 3*(yl-yu+1) + && get_bw(xl,xr,yu,yl,box1->p,cs,2)==0 ) + mod = MACRON; // "-" above + +// .@@@. .@@.. +// ..@@. ..@@. +// ...@. ..@@. +// +// if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) +// < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8 +// && loop(box1->p,xr,yu,xr-xl,cs,0,LE) +// > loop(box1->p,xr,yl,xr-xl,cs,0,LE) ) // +dx/8 ) à Nov03 + if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) + - loop(box1->p,xr,yu,xr-xl,cs,0,LE) + < loop(box1->p,xl,yl,xr-xl,cs,0,RI) // -dx/8 + - loop(box1->p,xr,yl,xr-xl,cs,0,LE) -1 ) // +dx/8 ) à Nov03 + mod = GRAVE_ACCENT; // ` + +#ifdef DEBUG + fprintf(stderr,"\n#testumlaut x= %d %d m1=%d m2=%d",x0,y0,m1-y0,m2-y0); + fprintf(stderr," yu=%d yl=%d xl=%d xr=%d",yu-y0,yl-y0,xl-x0,xr-x0); +#endif + if( (xr-xl+1) < 2*(yl-yu+1)+2 + && 2*(xr-xl+1)+2 > (yl-yu+1) ) { + int i,i1,i2,i3,i4; + i1=loop(box1->p,xl ,(yu+yl)/2,xr-xl+1,cs,0,RI); + i1=loop(box1->p,xl+i1,(yu+yl)/2,xr-xl+1,cs,1,RI); + i2=loop(box1->p,(xl+xr)/2,yu ,yl-yu+1,cs,0,DO); + i2=loop(box1->p,(xl+xr)/2,yu+i2,yl-yu+1,cs,1,DO); + for (i=0;ip,xl+i,yu+i)< cs) break; i3=i; + for ( ;ip,xl+i,yu+i)>=cs) break; i3=i-i3; + for (i=0;ip,xr-i,yu+i)< cs) break; i4=i; + for ( ;ip,xr-i,yu+i)>=cs) break; i4=i-i4; +#ifdef DEBUG + fprintf(stderr,"\n#DEBUG DOT_ABOVE %d %d %d %d",i1,i2,i3,i4); +#endif + if ( (xr-xl<5 && yl-yu<8) /* to small */ + || (i1>=(xr-xl+1)/2+2 && i2>=(yl-yu+1)/2+2 /* symmetrical */ + && abs(i3-i4)<=i1/4+2 && abs(i1-i2)<=i1/4+2 + && abs(i3-i1)<=i1/4+4 && abs(i4-i2)<=i1/4+4) + ) + mod = DOT_ABOVE; // "." above, ToDo: improve it! + } + + if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI) + > loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/8 + || loop(box1->p,xl,yu ,xr-xl,cs,0,RI) + > loop(box1->p,xl,yl-1,xr-xl,cs,0,RI)-dx/8 ) + && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE) + > loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/8 + || loop(box1->p,xr,yu ,xr-xl,cs,0,LE) + > loop(box1->p,xr,yl-1,xr-xl,cs,0,LE)-dx/8 ) + && num_cross(xl,xr,yu ,yu ,box1->p,cs) == 1 + && ( num_cross(xl,xr,yl ,yl ,box1->p,cs) == 2 + || num_cross(xl,xr,yl-1,yl-1,box1->p,cs) == 2 )) + mod = CIRCUMFLEX_ACCENT; // "^" + + if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI) + < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 + || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI) + < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 ) + && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE) + < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 + || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE) + < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 ) + && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2 + || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 ) + && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 ) + mod = CARON; // "v" above + + if( /* test for bow (new0.3.6) */ + loop(box1->p,xl,yu ,xr-xl,cs,0,RI) + + loop(box1->p,xl,yl ,xr-xl,cs,0,RI) + - 2*loop(box1->p,xl,(yl+yu)/2,xr-xl,cs,0,RI) > dx/16+1 + && xr-xl>10) + if( ( loop(box1->p,xl,yu ,xr-xl,cs,0,RI) + < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 + || loop(box1->p,xl,yu+1,xr-xl,cs,0,RI) + < loop(box1->p,xl,yl ,xr-xl,cs,0,RI)-dx/10 ) + && ( loop(box1->p,xr,yu ,xr-xl,cs,0,LE) + < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 + || loop(box1->p,xr,yu+1,xr-xl,cs,0,LE) + < loop(box1->p,xr,yl ,xr-xl,cs,0,LE)-dx/10 ) + && ( num_cross(xl,xr,yu ,yu ,box1->p,cs) == 2 + || num_cross(xl,xr,yu+1,yu+1,box1->p,cs) == 2 ) + && num_cross(xl,xr,yl ,yl ,box1->p,cs) == 1 ) + mod = BREVE; // round "u" above + + if( xr-xl>3 && yl-yu>1 ) + if( loop(box1->p,xl,yu,xr-xl,cs,0,RI) + > loop(box1->p,xl,yl,xr-xl,cs,0,RI) + && loop(box1->p,xr,yu,xr-xl,cs,0,LE) + < loop(box1->p,xr,yl,xr-xl,cs,0,LE) + && num_cross(xl,xr,yu,yu,box1->p,cs) == 2 + && num_cross(xl,xr,yl,yl,box1->p,cs) == 2 ) + mod = TILDE; + + if( xr-xl>2 && yl-yu>2) + if( num_cross(xl,xr,(yu+yl)/2,(yu+yl)/2,box1->p,cs) >1 ) + if( num_cross((xl+xr)/2,(xl+xr)/2,yu,yl,box1->p,cs) >1 ) + if( num_hole(xl,xr,yu,yl,box1->p,cs,NULL) == 1 ) + mod = RING_ABOVE; + +#ifdef DEBUG + printf("\n#DEBUG umlaut mod=0x%04x x=%d..%d y=%d..%d r=%d %s", + (int)mod,yu-box1->y0,yl-box1->y0, + xl-box1->x0,xr-box1->x0,r,((mod==CARON)?"CARON": + ((mod==ACUTE_ACCENT)?"ACUTE": + ((mod==TILDE)?"TILDE":"?")))); + out_x(box1); +#endif + + } + } + if (m) box1->dots=r; // set to 0 also possible after division + if (m) box1->modifier=mod; /* should be resetted after compose ??? */ + MSG(fprintf(stderr,"umlaut mod=%s dots=%d y0o=%d",decode(mod,ASCII),r,y0);) + } +// printf(" modifier=%c",mod); + if (modifier) *modifier=mod; /* set modifier */ + return r; +} + + +static wchar_t ocr0_eE(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + int i,i1,i2,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,bad_e=0, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ + + // --- most frequent letter e first!!! + // --- test e --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (smallest seen is 5x6) + DBG( wchar_t c_ask='e'; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + if (sdata->holes.num != 1) ad=97*ad/100; + /* ToDo: may be a two pass version intolerant/tolerant is better */ + if( loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI)>dx/3 ) Break; // rough test + if( loop(box1->p,x0+dx/2,y0,y1-y0,cs,0,DO)>dy/3 ) Break; + if( loop(box1->p,x0+dx/2,y1,y1-y0,cs,0,UP)>dy/3 ) Break; + if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 2 + && num_cross(x0,x1,y0+dy/4+1,y0+dy/4+1,box1->p,cs) > 2 ) Break; // gt + x=(x0+x1)/2;i= num_cross(x,x,y0,y1,box1->p,cs); // v0.40 + if (i!=3) { x=(x0+2*x1)/3;i= num_cross(x,x,y0,y1,box1->p,cs); } + if (i!=3) { x=(x0+3*x1)/4;i= num_cross(x,x,y0,y1,box1->p,cs); } + if (i!=3) { i= num_cross((x0+2*x1)/3,(x0+x1)/2,y0,y1,box1->p,cs); } + i=loop(box1->p,x0,y0+dy/2,x1-x0,cs,0,RI); if( i>dx/2 ) Break; + j=loop(box1->p,x0,y0 ,x1-x0,cs,0,RI); if( jp,x0,y1 ,x1-x0,cs,0,RI); if( jp,x0+dx/2,y0,y1-y0,cs,0,DO); if( i>dx/2 ) Break; + j=loop(box1->p,x1-dx/3,y0,y1-y0,cs,0,DO); if( jp,x0 ,y0,y1-y0,cs,0,DO); if( jp,x1 ,y0,y1-y0,cs,0,DO); if( jp,x0+dx/2,y1,y1-y0,cs,0,UP); if( i>dx/2 ) Break; + j=loop(box1->p,x0 ,y1,y1-y0,cs,0,UP); if( jp,x1 ,y1,y1-y0,cs,0,UP); if( jp,x0, (y0+y1)/2,x1-x0,cs,0,RI) + -loop(box1->p,x0,(3*y0+y1)/4,x1-x0,cs,0,RI) + -loop(box1->p,x0,(y0+3*y1)/4,x1-x0,cs,0,RI); + if (dx>3 && j>=dx/4) Break; // ~g 4x6font + for(y=1;yp,cs) == 2 ) break; + if( y==dy/2 ) Break; // v0.2.5 ~ bad_t + for(i=0,j=x0+dx/4;j<=x1-dx/4 && i<=dx/4;j++) + if( num_cross(j,j,y0,y1,box1->p,cs) == 3 ) i++; + if( dx>4 && dy>5 && (i set x,y + for(x=0,y=i=y0+dy/3;ip,x1,i,y1-y0,cs,0,LE); + if(j>=x) { x=j;y=i; } + } + if (x 2* smallest thickness right + for(i1=dx,i=y0+dy/3;ip,x0 ,i,y1-y0,cs,0,RI); if (j>dx/2) break; + j =loop(box1->p,x0+j,i,y1-y0,cs,1,RI); + if (jp,x1 ,i,y1-y0,cs,0,LE); + j =loop(box1->p,x1-j,i,y1-y0,cs,1,LE); + if(j2*i1) Break; // not accepted, if right line is not very thinn + x =loop(box1->p,x1 ,y,y1-y0,cs,0,LE); + x+=loop(box1->p,x1-x,y,y1-y0,cs,1,LE); + x+=loop(box1->p,x1-x,y,y1-y0,cs,0,LE); + if (3*i2>i1) ad=99*ad/100; + if (2*i2>i1) ad=99*ad/100; + bad_e=60; // used later? + } + if (xp,cs) > 1 ) i=0; + if( i ) Break; +// ..@@@@...<- +// .@@@@@@;. +// @@,...@@. +// @@.....@, +// @@@@@@@@@ +// @@.,;.@,. <- problem (y) == bad_e>50 +// @@.....@. +// @@,...@@. +// .@@@,@@@. +// ..@@@@;..<- + if (dy>11 && bad_e<50) + if ( num_cross(x0,x1,y,y,box1->p,cs) != 1 ) Break; // except "geschwungenem e" + if ( num_cross(x0,x1-dx/3,y ,y ,box1->p,cs) != 1 + && num_cross(x0,x1-dx/3,y+1,y+1,box1->p,cs) != 1 ) Break; + // if( num_hole(x0, x1, y0 , y ,box1->p,cs,NULL) < 1 ){ + if( sdata->holes.num == 0 || sdata->holes.hole[0].y1 >= y-y0){ + if( sdata->hchar ) Break; // ~ \it t + // look if thinn font (may be h-line is broken) Mai00 + for(j=0,i=x0+dx/8;ip,cs,1) == 1 ) j++; + if(j<2*dx/4) Break; + } + if( sdata->holes.num>0 && sdata->holes.hole[0].y0 > y-y0) Break; + if( sdata->holes.num>1 && sdata->holes.hole[1].y0 > y-y0) Break; + if( sdata->holes.num==1 && sdata->holes.hole[0].x0 >= dx/2) { + ad=95*ad/100; } /* 8*10 @ (=at) is not an e */ + // look for horizontal gap + for(x=0,y=i=y0+dy/4;ip,x0,i,x1-x0,cs,0,RI); + if(j>=x) { x=j;y=i; } + } + if (y>y0+dy/4 && ydx/2) Break; // s + if (x>dx/4) ad=99*ad/100; + + if( num_cross(x0+dx/2,x1 ,y1-dy/4,y1 ,box1->p,cs) == 0 + && num_cross(x0+dx/2,x1-1,y1-dy/4,y1 ,box1->p,cs) == 0 + && num_cross(x0+dx/2,x1 ,y1-dy/4,y1-1,box1->p,cs) == 0 ) { + if (sdata->gchar) Break; // ~p + ad=99*ad/100; + } + /* upper case is for 5x6 box */ + if( sdata->hchar // broken B ? should also work when linedetection fails + && loop(box1->p,x1,y1-dy/3,dx,cs,0,LE)<=dx/8 ) { + x = loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); + if( loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)<=x + && loop(box1->p,x0,y0+dy/8,dx,cs,0,RI)<=x ) Break; + if( loop(box1->p,x0,y1-dy/4,dx,cs,0,RI)<=x + && loop(box1->p,x0,y1-dy/8,dx,cs,0,RI)<=x ) Break; + } + x = loop(sdata->bp,0,dy-2 ,dx,cs,0,RI); + if( loop(sdata->bp,0,dy-1-dy/8,dx,cs,0,RI)>x && dy>16) Break; // some Q + if (box1->m2) { + if (sdata->gchar) ad=99*ad/100; + if (sdata->hchar) ad=99*ad/100; + } else ad=99*ad/100; + + Setac(box1,(wchar_t)'e',ad); + if (ad>=100) return 'e'; + break; + } + // --- test E --------------------------------------------------- + for(ad=d=100;dx>2 && dy>4 ;){ // min 3x4 + // rewritten for vectors 0.43 + int i1, i2, i3, i4, i5; // line derivation + corners + DBG( wchar_t c_ask='E'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + /* half distance to the center */ + d=2*sq(128/4); + /* now we check for the upper right end of the h */ + if (aa[3][2]>d/2) Break; /* [2] = distance, ~dj... */ + if (aa[0][2]>d/2) Break; /* upper left end */ + if (aa[1][2]>d/2) Break; /* lower left end */ + if (aa[2][2]>d/2) Break; /* lowerright end */ +/* + E f near E + + OOOOOOOO OOOO + O5 O O + O4 O + OOOO3 OOOOOO + O2 O + O O + O1 O O + OOOOOOOO OOOOOO +*/ + // check the bow from below + for (i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) { + if (y1-box1->frame_vector[ i][1]>dy/4) break; // fatal! + } if (i!=aa[2][3]) Break; // ~AHKMNRX + // search most left+down between bottom right and top right + i1=nearest_frame_vector(box1, aa[2][3],aa[3][3], x0, y1); + i5=nearest_frame_vector(box1, i1,aa[3][3], x0, y0); + i3=nearest_frame_vector(box1, i1, i5, x1, (y0+y1)/2); + i2=nearest_frame_vector(box1, i1, i3, x0, (2*y0+y1)/3); + i4=nearest_frame_vector(box1, i3, i5, x0, (y0+2*y1)/3); + i =nearest_frame_vector(box1, aa[0][3],aa[1][3], x0-dx/4, (y0+y1)/2); + if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]-1-dx/16) Break; + if (2*box1->frame_vector[i][0] < aa[0][0]+aa[1][0]) ad=99*ad/100; // f + + MSG(fprintf(stderr,"i1-5 %d %d %d %d %d",i1,i2,i3,i4,i5);) + // holes right open? + for( i=1,y=y0; yp,cs,2) == 0 ) i=0; + if( i ) Break; + for( i=1,y=y1; y>y1-dy/4 && i; y-- ) // long black line + if( get_bw(x0+dx/6,x1-dx/4,y,y,box1->p,cs,2) == 0 ) i=0; + if( i ) Break; + for( i=1,y=y0+dy/3; yp,x0 ,y,dx,cs,0,RI); + j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>dx/3 ) i=0; + } if( i ) Break; + x=x1-dx/3; y=y0; // von oben durchbohren! + turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break; + turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break; + turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,RI,DO); if( x<=x1 || y>y0+dy/2 ) Break; + x=x1-dx/3; y=y1; // von unten durchbohren! + turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); if( yp,&x,&y,x0,x1,y0,y1,cs,ST,UP); if( yp,&x,&y,x0,x1,y0,y1,cs,RI,UP); if( x<=x1 || yp,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y0+dy/4 ) Break; + turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,DO); if( y>y0+dy/3 ) Break; + y+=dy/15; + turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( x15 && x==x0) ad=99*ad/100; // to thin + x+=dx/15+1; + turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,DO,ST); if( y>y1-dy/3 ) Break; + // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) Break; + if (sdata->holes.num > 0) Break; + i=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI); if(i>dx/2) Break; + j=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); if(ji+dx/8) Break; i=j; + j=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI); if(ji+dx/8) Break; + j=loop(box1->p,x1,y1-dy/4,dx,cs,0,LE); + for( x=dx,y=y0+dy/6; yp,x0,y,dx,cs,0,RI); + if (i>j/2 && ad>98) ad=99*ad/100; + if (i>dx/4) break; + if(i3*dx) // ~[ + if( get_bw(x0+dx/2,x0+dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) Break; + + if (box1->m2) { + if (!hchar) ad=ad*99/100; + if ( gchar) ad=ad*99/100; + } + Setac(box1,(wchar_t)'E',ad); + if (ad>=100) return 'E'; + break; + } + return box1->c; +} + +static wchar_t ocr0_n(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + int i,j,d,x,y,i1,i2,i3,handwritten=0, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test n --------------------------------------------------- + // glued rm is very similar to glued nn -> thickness of h-line should grow + // may02: tested for 8x12 font + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='n'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + i= num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs); + j= num_cross( 0,dx-1,dy/2,dy/2,sdata->bp,cs); + if( (i<2 || i>3) && j!=2 ) Break; + if( loop(sdata->bp,dx/2,0,dy,cs,0,DO) > dy/8 && sdata->hchar ) Break; /* tt */ + y=5*dy/8; /* also for handwritten n, where first bow goes not down enough */ + if( num_cross( 0,dx/2,y ,y ,sdata->bp,cs) != 1 + && num_cross( 0,dx/2,y-1,y-1,sdata->bp,cs) != 1 + && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) < 1 ) Break; // n rr + // ~thick_w + y=loop(sdata->bp,dx-1-dx/4,0,dy,cs,0,DO); if(y>dy/2) Break; + if(y>1)if( get_bw(dx-1-dx/4,dx-1,0,y-2,sdata->bp,cs,1) == 1 ) Break; + + y=3*dy/4; + if( num_cross(0, dx/2,y ,y ,sdata->bp,cs) == 1 + && num_cross(dx/2,dx-1,y ,y ,sdata->bp,cs) == 0 ) Break; // ~p + y=dy/2; + if( num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) == 2 + && num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) == 2 ) { // n rr + /* printed n */ + x =loop(sdata->bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line + x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // 1st gap + x+=loop(sdata->bp,x,y,dx-x,cs,0,RI); if(x< dx/2) Break; i2=x; // 2nd v-line + x+=loop(sdata->bp,x,y,dx-x,cs,1,RI); if(x<3*dx/4) Break; i3=x; // 2nd gap + i=dy/4; y=13*dy/16; + if( num_cross(dx/2,dx-1,y,y,sdata->bp,cs)==2 ) i=3*dy/8; // \it n + if (i<2 && il1 l2 l3 l4 ??? + for(x=i1;xbp,x, 0,dy,cs,0,DO)>=i ) break; + if(x bp,x,dy-1,dy,cs,0,UP) >dy/4 ) break; + if(x==i2) Break; // no gap detected (glued serifs ??? ) + // glued rm as nn ??? + for(y=0,x=(i1+i2)/2;xbp,x,0,dy,cs,0,DO); + i=loop(sdata->bp,x,i,dy,cs,1,DO); // measure thickness + if( i>y ) y=i; if( i7 ) + if( loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,LE) + +loop(sdata->bp, 0,dy-1-dy/8,dx,cs,0,RI)-dx/8-1 + > loop(sdata->bp,dx-1,dy-1-dy/2,dx,cs,0,LE) + +loop(sdata->bp, 0,dy-1-dy/2,dx,cs,0,RI) ) ad=90*ad/100; // broken o + if( dy>7 && dx>7 ) + if( loop(sdata->bp,dx-1, dy/2,dx,cs,0,LE)==0 + && loop(sdata->bp,dx-1,dy-1-dy/8,dx,cs,0,RI)>dx/8 ) ad=98*ad/100; // broken o + } else { /* check handwritten n */ + if( num_cross(0,dx-1,dy/2, dy/2 ,sdata->bp,cs) != 3 + && num_cross(0,dx-1,dy/2-dy/8,dy/2-dy/8,sdata->bp,cs) != 3 ) Break; + i =loop(sdata->bp,0,dy/2-dy/8,dx,cs,0,RI); if (i>dx/4) Break; + i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI); if (i>dx/2) Break; + i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,0,RI); + if( num_cross(i,i, 0,dy/2-2*dy/8,sdata->bp,cs) != 0 ) Break; + i+=loop(sdata->bp,i,dy/2-dy/8,dx,cs,1,RI); + if( num_cross(i,i,dy/2+1, dy-1,sdata->bp,cs) != 0 ) Break; + handwritten=80; + } + + i= loop(sdata->bp,dx-1 ,dy/2,dx,cs,0,LE); if(i>5) + if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,sdata->bp,cs,1) == 1 ) Break; // ~rr + i+=loop(sdata->bp,dx-1-i,dy/2,dx,cs,1,LE); + if( get_bw(dx-1-i ,dx-1-i ,0,dy/2,sdata->bp,cs,1) == 0 ) Break; // ~rv + + if( get_bw(dx/2,dx/2,dy/4,dy/4,sdata->bp,cs,1) == 0 + && get_bw(dx/2,dx-1,dy-2,dy-2,sdata->bp,cs,1) == 0 + && get_bw(dx/2,dx/2,dy/4,dy-2,sdata->bp,cs,1) == 1 ) Break; // ~P + + // glued ri ??? + if( box1->dots>0 && box1->m1 ) + if( get_bw((x1+x0)/2,x1,box1->m1,y0-1,box1->p,cs,1) == 1 ) + if( num_cross( 0,dx-1,0 ,0 ,sdata->bp,cs) >2 + || num_cross( 0,dx-1,1 ,1 ,sdata->bp,cs) >2 ) Break; + + + i=loop(sdata->bp,dx-1, dy-1,dx,cs,0,LE); if (i>dx/2) + i=loop(sdata->bp,dx-1, dy-2,dx,cs,0,LE); + x=loop(sdata->bp,dx-1,dy-1-dy/4,dx,cs,0,LE); + if (sdata->hchar && i-x>1) Break; // ß + x=loop(sdata->bp, 0,dy-1,dx,cs,0,LE); // check for serifs + i=loop(sdata->bp, 0,dy-2,dx,cs,0,LE); if (ibp, 0, 1,dx,cs,0,LE); if (ibp, 0, 2,dx,cs,0,LE); if (ihchar && x>0) Break; // fl + + if (num_cross( 0,dx-1,dy/4,dy/4,sdata->bp,cs)>=3) ad=98*ad/100; // small M + if (sdata->hchar || 2*y0m1+box1->m2) ad=96*ad/100; + if (sdata->gchar) ad=96*ad/100; // ß fl + if (dx<5) { // for small fonts no middle line is possible for m + ad=99*ad/100; // 4x6 m + if (num_cross(0,dx-1,dy/8,dy/8,sdata->bp,cs)>=2) { + ad=97*ad/100; // ~m + if (dy<=4) Setac(box1,'m',97); // only for 4x6 font! + } + } + Setac(box1,'n',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_M(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int d,x,y,i0,i1,i2,i3,t1,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // ------------------ test M --------------------------- + for(ad=d=100;dx>3 && dy>3;){ // dy<=dx nicht perfekt! besser mittleres + // min-suchen fuer m + DBG( wchar_t c_ask='M'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if( num_cross(0,dx-1, dy/2, dy/2,bp,cs)<3 + && num_cross(0,dx-1, dy/4, dy/4,bp,cs)<3 + && num_cross(0,dx-1,5*dy/8,5*dy/8,bp,cs)<3 + && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<3 + && dx>4 ) Break; + if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<2 + && num_cross(0,dx-1, dy/8, dy/8,bp,cs)<2 ) Break; /* fat M */ + if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<2 ) Break; + + x = loop(bp,dx-1 ,dy-1,dx,cs,0,LE); // ~ melted kl + x = loop(bp,dx-1-x,dy-1,dx,cs,1,LE); if( x>dx/2 ) Break; + + if( loop(bp, 0,7*dy/16,dx,cs,0,RI) + + loop(bp,dx-1,7*dy/16,dx,cs,0,LE) > dx/2 ) Break; // ~K + + if( dy>8 /* following lines should be extend to range check */ + && loop(bp, dx/4,dy-1, dy,cs,0,UP) 2 + && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)> 2 ) Break; // ~it_u + if( num_cross(0 ,dx-1,3*dy/4,3*dy/4,bp,cs)==2 + && num_cross(dx/2,dx/2,3*dy/4, dy-1,bp,cs)> 0 ) Break; // ~it_v + + if( loop(bp,3*dx/4, 0,dy,cs,0,DO) + > loop(bp,2*dx/4, 0,dy,cs,0,DO) + && loop(bp,3*dx/4,dy-1,dy,cs,0,UP) + < loop(bp,2*dx/4,dy-1,dy,cs,0,UP) ) Break; // ~N + if( loop(bp,3*dx/4, dy/8,dy,cs,0,DO) + > loop(bp,2*dx/4, dy/8,dy,cs,0,DO) + && loop(bp,3*dx/4,dy-1-dy/8,dy,cs,0,UP) + < loop(bp,2*dx/4,dy-1-dy/8,dy,cs,0,UP) ) Break; // ~serif_N + + // i0 is lower end of upper serifen (widest gap? ) + i0=0; + + if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=4 ){ // Is it a N ? + if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==3 ){ + for(y=dy/2+1;yy-2 ) Break; // ~N + } + } + } + // MNWK + for(i2=0,i1=x=dx/2;xi2) {i2=y;i1=x;} else break; } + i3=i2+loop(bp,i1,i2,dy-i2,cs,1,DO); + if(i2hchar) Break; // rm + ad=99*ad/100; + } + if (i2==0 && dx>8 && dy>12) Break; // glued and bad splitted serifen-MN + + // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) != 0 ) Break; // small A + if (sdata->holes.num != 0) Break; + t1=loop(bp,0 ,3*dy/4,dx,cs,0,RI); + t1=loop(bp,t1,3*dy/4,dx,cs,1,RI); // thickness of line? + if( 7*(t1+1)=i2 ) Break; // no good M + i1+=loop(bp,i1, dy/4,dx,cs,1,RI); + i2+=loop(bp,i2,3*dy/4,dx,cs,1,RI); + if( i1>=i2 ) Break; // no good M + i1+=loop(bp,i1, dy/4,dx,cs,0,RI); + i2+=loop(bp,i2,3*dy/4,dx,cs,0,RI); + if( i1<=i2 ) Break; // no good M + } + if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)==2 + && num_cross(0,dx-1,dy/4,dy/4,bp,cs)==2 && !hchar ) Break; // ~ \it u + + if (dy<17) + if( num_cross(0,dx-1, 0, 0,bp,cs)<2 ) ad=99*ad/100; + if (dx>5) /* 4x6 font has only 1 cross at y=1 */ + if( num_cross(0,dx-1, 1, 1,bp,cs)<2 ) ad=96*ad/100; // kt + if( num_cross(dx/2,dx/2, 0, dy-1,bp,cs)!=1) ad=98*ad/100; // kt + if (dx<5 && loop(bp,dx/2,0,dy,cs,0,DO)>=3*dy/8) ad=96*ad/100; // 4x6 H + + if( num_cross(0,dx-1, dy/4, dy/4,bp,cs)<=2 + && num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs)<=2 + && dx>8 && dy>12 ){ + ad=98*ad/100; + for(y=5*dy/16;y<5*dy/8;y++) // look for H-line + if( num_cross(0,dx-1,y ,y ,bp,cs)==1 ) break; + if( y<5*dy/8 ) ad=95*ad/100; + if( y<5*dy/8 ) + if( num_cross(2+dx/6,dx-3-dx/6,y-2,y-2,bp,cs)==0 + || num_cross(2+dx/6,dx-3-dx/6,y-1,y-1,bp,cs)==0 ) Break; // ~H bad! + } + + if( loop(bp,3*dx/8, 0,dy,cs,0,DO) >dy/2 + && loop(bp,5*dx/8,dy-1,dy,cs,0,UP) >dy/2 ) ad=95*ad/100; + + if(!hchar){ + ad=98*ad/100; /* not sure */ + if( loop(bp,0, dy/4,dx,cs,0,RI) + < loop(bp,0,dy-1-dy/8,dx,cs,0,RI)-dx/16 ) Break; // ~wi glued + } + if( gchar ) ad=98*ad/100; + if (ad>99 && dx<8) ad=99*ad/100; /* give 5x8 N a chance */ + Setac(box1,'M',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_N(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */ + dbg[9], + ad; /* tmp-vars */ + + // --- test N ------- +hchar -gchar + for(ad=d=100;dx>3 && dy>3;){ // 4x6font + DBG( wchar_t c_ask='N'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if (sdata->holes.num > 0) ad=98*ad/100; /* # */ + if (dx<6) ad=99*ad/100; + if (dx<5) ad=99*ad/100; + /* half distance to the center */ + d=2*sq(128/4); + /* now we check for the 4 ends of the x */ + if (aa[0][2]>d) Break; + if (aa[1][2]>d) Break; + if (aa[2][2]>d) Break; + if (aa[3][2]>d) Break; + if (aa[3][0]-aa[0][0](dy+2)/5) Break; /* glued tu */ + if (abs(aa[3][1]-aa[0][1])>(dy+4)/8) ad=98*ad/100; /* glued tu */ + /* left and right vertical line */ + d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break; + ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100; + d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break; + + /* search uppermost left ^ (between near 0,0) */ + i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0+dx/8, y0); + x=box1->frame_vector[i1][0]; + y=box1->frame_vector[i1][1]; + MSG( fprintf(stderr,"i1= %d (%d,%d) left ^", i1,x-x0,y-y0);) + if (y-y0 > 5*dy/8) Break; + if (x-x0 > 5*dx/8) Break; + /* search uppermost right ^ ~H */ + i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0); + MSG( fprintf(stderr,"i3= %d (%d,%d) right ^",\ + i3, box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);) + + /* check if upper left and lower right point are joined directly */ + dbg[0]=d=line_deviation(box1,i1, aa[2][3]); + /* check if lower left and lower left point are joined directly */ + dbg[1]=d=line_deviation(box1, aa[1][3],i1); + MSG( fprintf(stderr," i1-a2 %d a1-i1 %d",dbg[0],dbg[1]); ) + if (dbg[0] > sq(1024/4)) Break; + if (dx>4 && dbg[1] > sq(1024/4)) ad=97*ad/100; // d=0..2*sq(1024) + if (dx>4 && dbg[1] > sq(1024/3)) Break; // d=0..2*sq(1024) + // serif N has d=sq(1024/3)=116508 + + /* serach lowest right v, same frame? N-tilde etc.? */ + i2=nearest_frame_vector(box1,aa[3][3],aa[0][3], x1, y1-dy/8); + x=box1->frame_vector[i2][0]; + y=box1->frame_vector[i2][1]; + MSG( fprintf(stderr,"i2= %d (%d,%d) right v",\ + i2, box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0);) + if (y-y0 < 3*dy/8) Break; + if (x-x0 < 3*dx/8) Break; + // test H + if ( box1->frame_vector[i3][0]-box1->frame_vector[i1][0]> dx/4 + && box1->frame_vector[i3][1]-box1->frame_vector[i1][1]<=dy/8 + && y<=box1->frame_vector[i1][1]) Break; + /* check if upper left and lower right point are joined directly */ + dbg[2]=d=line_deviation(box1,i2, aa[0][3]); + /* check if lower right and lower right point are joined directly */ + dbg[3]=d=line_deviation(box1, aa[3][3],i2); + MSG( fprintf(stderr," i2-a0 %d a3-i2 %d",dbg[2],dbg[3]); ) + if (dbg[2] > sq(1024/4)) Break; + if (dbg[3] > sq(1024/4)) ad=97*ad/100; // serif N, ToDo: do it better + if (dbg[3] > sq(1024/3)) Break; + + if (abs((box1->frame_vector[i1][1]-y0) + -(y1-box1->frame_vector[i2][1]))>dy/8) ad=99*ad/100; /* ~ tu */ + if (abs(((y0+y1)/2-box1->frame_vector[i1][1]) + -(box1->frame_vector[i2][1]-(y0+y1)/2))>dy/8) ad=99*ad/100; /* ~ tu */ + if (box1->frame_vector[i2][0] + -box1->frame_vector[i1][0]<=dx/8) Break; /* nonsignificant distance */ + if (box1->frame_vector[i2][1] + -box1->frame_vector[i1][1]<=dy/8) ad=97*ad/100; /* too flat (ff,H) */ + if (box1->frame_vector[i2][1] + -box1->frame_vector[i1][1]<=dy/2) ad=99*ad/100; + MSG( \ + fprintf(stderr,"^v %d %d %d %d line deviation %d %d %d %d max %d %d",\ + box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\ + box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\ + dbg[0],dbg[1],dbg[2],dbg[3],sq(1024/4),sq(1024));) + ad=(100-(dbg[0]-sq(1024)/2)/sq(1024)/4)*ad/100; + ad=(100-(dbg[1]-sq(1024)/2)/sq(1024)/4)*ad/100; + ad=(100-(dbg[2]-sq(1024)/2)/sq(1024)/4)*ad/100; + ad=(100-(dbg[3]-sq(1024)/2)/sq(1024)/4)*ad/100; + + if (!hchar) ad=99*ad/100; + if ( gchar) ad=98*ad/100; // \sc N + Setac(box1,'N',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_h(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ + + // --- test h --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + // rewritten for vectors 0.42 + int i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners + DBG( wchar_t c_ask='h'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + /* half distance to the center */ + d=2*sq(128/4); + /* now we check for the upper right end of the h */ + if (aa[3][2]d/2) Break; /* upper left end */ + if (aa[1][2]>d/2) Break; /* lower left end */ + if (aa[2][2]>d/2) Break; /* lowerright end */ +/* + type A B=italic ??? + 18 OOO + O O O + O O + O7OOO OOOO + O4 O O O + O O O O + O O O O O + 2O3 5O6 O OOO +*/ + i1=i8=aa[0][3]; + i2=i3=aa[1][3]; + i5=i6=aa[2][3]; + // check the bow from below + for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) { + if (box1->frame_vector[ i][1] + frame_vector[i4][1]) i4=i; // get next maximum + if (box1->frame_vector[ i][1]<=y0) break; // fatal! + } + if (box1->frame_vector[i4][1]-y0frame_vector[i4][1]18 ) + if( get_bw(dx-1-dx/3,dx-1,dy/6 ,dy/5 ,bp,cs,1) == 1 ) Break; + if( get_bw(dx-1-dx/3,dx-1,dy-1-dy/4,dy-1 ,bp,cs,1) == 0 ) Break; // s- + for( x=x0+dx/3;xp,cs,1) == 0 ) break; + if( x>=x1-dx/3 ) Break; + for(i=dy/4,y=y0+dy/3;y<=y1 && i;y++){ + if( num_cross(x0,x1 ,y,y, box1->p,cs) == 2 ) i--; + } if( i ) Break; + for(i=dy/4,y=y0;y<=y0+dy/2 && i;y++){ + if( num_cross(x0,x0+dx/2,y,y, box1->p,cs) == 1 ) i--; + } if( i ) Break; + // if( num_hole(x0, x1, y0 , y1 ,box1->p,cs,NULL) > 0 ) // could happen + if (sdata->holes.num > 0) + if (sdata->holes.hole[0].y0 > dy/3 + && sdata->holes.hole[0].y1 < dy-1-dy/3) Break; + // if( num_hole(x0, x1, y0+dy/3 , y1-dy/3 ,box1->p,cs,NULL) != 1 ) Break; // mini + if( loop(bp,dx-1,dy/3,dx,cs,0,LE)+dx/8 + < loop(bp,dx-1,dy/2,dx,cs,0,LE) + && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8 + < loop(bp,dx-1,dy/2,dx,cs,0,LE)) Break; // ~k Okt00 + i=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); + if (i>1 && num_cross(x0,x0,y0+dy/8+2,y0+dy/2, box1->p,cs) == 1 ){ // fi fu + ad=(99-(1<p,cs) == 0 ) ad=97*ad/100; + if (num_cross(x0+dx/2,x0+dx/2,y0,y0+dy/8+2, box1->p,cs) == 1 ) ad=97*ad/100; + if (ad<1) break; + } + i =loop(bp,0,dy/4,dx,cs,0,RI); + i+=loop(bp,i,dy/4,dx,cs,1,RI)+1; + for ( ; i5*dy/8 ) { + ad=98*ad/100; // melted hi, li, but handwritten h + MSG(fprintf(stderr,"ad=%d",ad);) } + if( num_cross(x0,x0,y0+(dy+3)/8,y1,box1->p,cs) > 1 ) { + ad=98*ad/100; // melted fr + MSG(fprintf(stderr,"ad=%d",ad);) } + + i=loop(bp,dx-1,3*dy/4,dx,cs,0,LE); // melted "fr" for vertikal letters + if (i>dx/4 && loop(bp,dx-1-i,dy-1,dy,cs,1,UP)>dy/2) { + ad=94*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } + + i=loop(bp,dx-1,1+dy/16,dx,cs,0,LE); if (i 0 ) { + ad=95*ad/100; // melted fi + MSG(fprintf(stderr,"ad=%d",ad);) } + if (loop(box1->p,x1,y0+1+dy/16,dx,cs,0,LE)p,x1,y0 ,dx,cs,0,LE)p,x1,y0+1,dx,cs,0,LE)holes.num > 0) ad=97*ad/100; + if (box1->m2) { + if ( gchar) ad=98*ad/100; + if (!hchar) ad=97*ad/100; + } else ad=99*ad/100; + Setac(box1,'h',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_H(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,j1,d,x,y,ya,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test H --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='H'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if( num_cross(0,dx-1,dy/4 ,dy/4 ,bp,cs) != 2 + && num_cross(0,dx-1,dy/4-1,dy/4-1,bp,cs) != 2 ) Break; + if( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2 + && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) Break; + if( loop(bp,0 ,dy/8,dx,cs,0,RI) + + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) Break; // ~A + for( j1=0,i=1,y=y0+dy/10; yp,x0 ,y,dx,cs,0,RI) + +loop(box1->p,x1 ,y,dx,cs,0,LE); if( j>dx/2 ) i=0; if(j>j1)j1=j; } + if( !i ) Break; + for( i=1,y=dy/4; ydx/5 ) i=0; } + if( !i ) Break; // ~K Jul00 + for( i=0,ya=y=y0+dy/3; yp,x0 ,y,dx,cs,0,RI); + j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } } + if( i<=dx/2 ) Break; ya-=y0; + if( num_cross(0,dx-1,ya ,ya ,bp,cs) != 1 + && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) Break; /* Dec00 */ + for( y=ya; y 2 + && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break; + if ( yp,cs,1) == 0 ) i=0; + } if( i ) Break; + for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ + if( get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0; + } if( i ) Break; + for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ + if( num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0; + } if( i ) Break; + for(i=1,y=y0;y<=y0+dy/4 && i;y++){ + if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; + } if( i ) Break; + for(i=1,y=y1-dy/4;y<=y1 && i;y++){ + if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; + } if( i ) Break; + if( get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) Break; + i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) Break; + i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2i1+dx/8) Break; + i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3i2+dx/8) Break; + if(abs(i1+i3-2*i2)>dx/16+1) Break; + // test for thick tall N looking like a H + if( num_cross(x0,x1,y0,y1, box1->p,cs) < 2 ) Break; // sure N + i1=loop(bp, 0, dy/4,dx,cs,0,RI); + i1=loop(bp, i1, dy/4,dx,cs,1,RI); + i2=loop(bp, 0,dy-1-dy/4,dx,cs,0,RI); + i2=loop(bp, i2,dy-1-dy/4,dx,cs,1,RI); + i3=loop(bp,dx-1 ,dy-1-dy/4,dx,cs,0,LE); + i3=loop(bp,dx-1-i3,dy-1-dy/4,dx,cs,1,LE); + i =loop(bp, 0,dy/2+1+dy/8,dx,cs,0,RI); + i+=loop(bp, i,dy/2+1+dy/8,dx,cs,1,RI); + i =loop(bp, i,dy/2+1+dy/8,dx,cs,0,RI); + if (i6*i2 && 5*i3>6*i2 && i1>i2 && i3>i2 ) Break; + if( dx>8 ) + if ( loop(bp,dx-1, 3*dy/8,dx,cs,0,LE) + -loop(bp,dx-1, dy/8,dx,cs,0,LE)>dx/4 + && loop(bp,dx-1, 3*dy/8,dx,cs,0,LE) + -loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)>dx/4 ) Break; // ~K + // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) Break; + if (sdata->holes.num != 0) Break; + if ( gchar) ad=99*ad/100; + if (!hchar) ad=98*ad/100; + Setac(box1,'H',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_k(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,x,y,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + int (*aa)[4]=sdata->aa; /* corner-points, (x,y,dist^2,vector_idx) */ + + // --- test k --------------------------------------------------- + for(ad=100;dx>2 && dy>3;){ // min 3x4 + // rewritten for vectors 0.43 + int d, i1, i2, i3, i4, i5, i6, i7, i8; // line derivation + corners + DBG( wchar_t c_ask='k'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + /* half distance to the center */ + d=2*sq(128/4); + /* now we check for the upper right end of the h */ + if (aa[3][2]d/2) Break; /* upper left end */ + if (aa[1][2]>d/2) Break; /* lower left end */ + if (aa[2][2]>d/2) Break; /* lowerright end */ +/* + type A B=italic ??? + 18 OOO + O O O + O O6 O + O7 OO O OO + O4OO OO OO + O OO O O + O OO O O O + 2O3 O5 O OOO +*/ + i1=i8=aa[0][3]; + i2=i3=aa[1][3]; + i5= aa[2][3]; + // check the bow from below + for (i4=i=i2;i!=i5;i=(i+1)%box1->num_frame_vectors[0]) { + if (box1->frame_vector[ i][1] + frame_vector[i4][1]) i4=i; // get next maximum + if (box1->frame_vector[ i][1]<=y0) break; // fatal! + } + if (box1->frame_vector[i4][1]-y0frame_vector[i4][1]frame_vector[i][0]frame_vector[i][0]dx/2) Break; + i3=loop(bp,0,dy/2+dy/4,dx,cs,0,RI); + if(abs(i1+i3-2*i2)>dx/16+1 || i1p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2,x1, y1-dy/3,y1 ,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1-dx/4,x1, y0 ,y0+3*dy/16,box1->p,cs,1) == 1 ) Break; + if( get_bw(x1-dx/4,x1, y0+dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break; //~1 + if( get_bw(x1-dx/4,x1, y1-dy/8,y1 ,box1->p,cs,1) != 1 ) Break; + if (sdata->holes.num > 0) + if (sdata->holes.hole[0].y0 > dy/4) Break; + // if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) Break; + for(y=y0+1;yp,cs,1) == 0 ) break; + if( yp,cs,100)>50) i=0; + if( i ) Break; // no vertikal line! + + /* check for falling line in the lower left corner */ + for (j=x=0,y=5*dy/8;y<7*dy/8;y++) { + i= loop(bp,dx-1,y,dx,cs,0,LE); if(i>x) { x=i;j=y; } + } // x=dx/6 on fat k + if (x + loop(bp,dx-1-x,y,dx,cs,1,LE)/2 dx/2) + i =loop(bp,dx-1,dy-2,dx,cs,0,LE); if(i>dx/2) Break; + i+=loop(bp,dx-1-i,dy-1,dx,cs,1,LE)/2; + if( get_line(x,y,dx-1-i,dy-1,bp,cs,100)<60 ) Break; + + for(y=y0+dy/3;yp,cs)==2 ) break; + if( y==y1 ) Break; + if( + // num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL)>0 // ~A happens! + sdata->holes.num > 0 ) + if (sdata->holes.hole[0].x1>dx-1-dx/4 + || sdata->holes.hole[0].y1>dy-1-dy/4 + || sdata->holes.hole[0].y0< dy/4) Break; + // if ( num_hole(x0,x1-dx/4,y0+dy/4,y1-dy/4,box1->p,cs,NULL)==0 ) Break; + i=loop(bp,0,dy-1,dx,cs,0,RI); + i=loop(bp,i,dy-1,dx,cs,1,RI); if (dx>8 && 4*i>3*dx) Break; // ~glued_tz + i =loop(bp,0,dy/4,dx,cs,0,RI); + if (i>dx/4 + && i+loop(bp,i,dy/4,dx,cs,1,RI)>dx/2 + && loop(bp, 0,0,dx,cs,0,RI)<=dx/4 + && loop(bp,dx-1,0,dx,cs,0,LE)>=dx/2 ) ad=90*ad/100; // divided Q + + if( 2*y0>(box1->m1+box1->m2) ) ad=99*ad/100; + + if ( gchar) ad=98*ad/100; + if (!hchar) ad=98*ad/100; + Setac(box1,'k',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_K(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,i1,i2,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad,ya,xa,yb,xb,yc,xc,yd,xd,ye,xe,yf,xf; /* tmp-vars */ + + // --- test K --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // updated 29 Mar 2000 perfect??? + DBG( wchar_t c_ask='K'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + for(y=dy/8;yp,x,y0,y1-y0,cs,0,DO); if (y>3*dy/4) { i=1;break; } + if (dy>15 && j>dy/8){ + j =loop(box1->p,x-1,y0+y-1,x1-x0,cs,0,LE)/2; + y+=loop(box1->p,x-j,y0+y-1,y1-y0,cs,0,DO)-1; + } + if(y>=dy/4) i=0; /* ok, found gap */ + } if( i ) Break; + for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap + i=loop(box1->p,x,y1,dy,cs,0,UP); + /* on small chars bypass possible low left serifs */ + if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP); + if (i2>1) i+=i2-1; } + if (i>y) { y=i; i1=x; } + } if( y<=dy/8 ) Break; if (yp,cs) == 2 ) i=0; + } if( i ) Break; + for(i=1,y=y0;y<=y0+dy/4 && i;y++){ + if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; + } if( i ) Break; + if( dx<10 ){ + for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){ + if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0; + } if( i ) Break; + } + for(i=1,y=y1-dy/4;y<=y1 && i;y++){ + if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; + } if( i ) Break; + if( get_bw(x1-dx/3,x1,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break; // ~k + if( dy>16 + && loop(bp,0, dy/4,dx,cs,0,RI) + +loop(bp,0,3*dy/4,dx,cs,0,RI) + <2*loop(bp,0, dy/2,dx,cs,0,RI)-2-dx/32 ) Break; // ~X + + i=loop(box1->p,x1,y0+ dy/4,x1-x0+1,cs,0,LE); if(i>dx/2) Break; + j=loop(box1->p,x1,y0+ dy/2,x1-x0+1,cs,0,LE); + x=loop(box1->p,x1,y0+3*dy/8,x1-x0+1,cs,0,LE); if(x>j) j=x; + if(j<=i ) Break; i=j; + j=loop(box1->p,x1,y1-dy/4,x1-x0+1,cs,0,LE); if(j>=i ) Break; + // out_x(box1); // detailed analysis + // + // a d <= that are main points of K + // | / + // b/e + // | \ . + // c f + ya= dy/4;xa=loop(bp,0,ya,dx,cs,0,RI);xa+=loop(bp,xa,ya,dx,cs,1,RI)/2; + yc=dy-dy/4;xc=loop(bp,0,yc,dx,cs,0,RI);xc+=loop(bp,xc,yc,dx,cs,1,RI)/2; + yb=dy/2; xb=dx-1-loop(bp,dx-1,dy/2,dx,cs,0,LE); + for(yd=ye=yf=xe=y=i=0,xf=xd=dx;yxe){ xe=x;ye=dy/2+y; } + x =loop(bp,dx-1,dy/2-y,dx,cs,0,LE); if(x>xe){ xe=x;ye=dy/2-y; } +#if 0 // removed v0.2.4a2 + x =loop(bp,0 ,dy/2+y,dx,cs,0,RI); // middle left border + x+=loop(bp,x ,dy/2+y,dx,cs,1,RI); // test 2nd cross + x+=loop(bp,x ,dy/2+y,dx,cs,0,RI); if(x8 ){ // example szaka0103 + if( xe>5*dx/8 || xb>5*dx/8 ) Break; // ~{\it n} + i=loop(bp,xb,yb,xb,cs,1,LE); // thick center? see font22 + if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) // right up + if( get_line2(xb-i/2,yb,xd,yd,bp,cs,100)<95 ) Break; + if( get_line2(xe,ye,xf,yf,bp,cs,100)<95 ) Break; // right down + xe+=loop(bp,xe,ye,dx,cs,1,RI); if( xe>=xf ) Break; // ~{\it n} + } else { + if( dy<16 && !hchar ) Break; + if( loop(bp,0,1,dy,cs,1,DO)<=3*dx/4 + && loop(bp,1,1,dy,cs,1,DO)<=3*dx/4 + && loop(bp,2,1,dy,cs,1,DO)<=3*dx/4 ) Break; // ~x + } + if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)<=dx/8){ + ad=99*ad/100; /* broken B ? */ + if (sdata->holes.num > 0) + if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break; + // if( num_hole(x0,x1,y0,(y0+2*y1)/3,box1->p,cs,NULL)>0) Break; // broken B + } + if(box1->m3 && !hchar) ad=99*ad/100; + if(box1->m3 && gchar) ad=99*ad/100; + // printf(" ok xe=%d",xe); + Setac(box1,'K',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_f(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */ + ab[8][4], /* special points (x,y,dist^2,vector_idx) */ + ad; /* tmp-vars */ + /* x=mindist_to_a y=0 "t" + 0>..$$. 0>..$$ 0>..$$ end right bow a--..$$ a--.$7. y>0 "f" + 1>.$..$ 1>.$.. 1>.$$$ start right bow .$7. .$.. + .@... .@.. 2>.@@. start upper end .@.. .@.. + 2>.$... 2>.$.. 3>$$$$ crossing bar .$.. $$$. + 3>$@$$. 3>$@$. $@@$ $@$. .@.. + 4>.$... 4>.$.. 4>.$$. lower end .$.. .$.. + .@... .@.. .@@. .@.. .@.. + .@... .@.. .@@. .@.. .@.. + 5>.$... 5>.$.. 5>.$$. lower start .$.. .$.. + 6>..... 6>$... 6>.... optional left bow + */ + // --- test f like t --------------------------------------------------- + for(ad=d=100;dx>2 && dy>5;){ // sometimes no hchar! + // rewritten for vectors 0.43 + int d, i1, i2, i3, i4, i5, i6, i7, i8, i9; // line derivation + corners + DBG( wchar_t c_ask='f'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + /* half distance to the center */ + d=2*sq(128/4); + /* now we check for the upper right end of the h */ + if (aa[3][2]>d/2) Break; /* [2] = distance, ~BCDEF... */ + if (aa[0][2]>d ) Break; /* upper left end */ +/* + 9 + OOO + O 7 O8 + O6 + 1OOOO5 + O4 + O + 2O3 + OOOOO +*/ + i1=nearest_frame_vector(box1,aa[0][3],aa[1][3],x0-dx/2,(5*y0+3*y1)/8); + /* we need i for 4x6 font, where left side of h-bar is near (x0,y1) */ + i =aa[1][3]; if (box1->frame_vector[i][1]frame_vector[i2][0]-x0>dx/2) Break; // ~3 + i =nearest_frame_vector(box1, aa[0][3], i2, x1+2*dx, (y0+y1)/2); + // MSG(fprintf(stderr,"i %d",i);) + if (box1->frame_vector[i ][0] + -box1->frame_vector[i9][0]>dx/8) Break; // ~3 + + if( (box1->dots) ) Break; // Bold-face is gchar + if (dy<=box1->m3-box1->m2+1) Break; + for(x=0,j=y=2+(3*dy+4)/32;y<=5*dy/8;y++){ // upper cross line min=2 + i=loop(bp,0,y,dx,cs,0,RI); if( y>dy/4 && i>5*dx/8 ) break; + i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;j=y; } + if( y<3*dy/4 && y>dy/4 + && num_cross(0,dx-1,y ,y ,bp,cs) != 1 + && num_cross(0,dx-1,y+1,y+1,bp,cs) != 1 // against noise + ) break; + } if( y<=5*dy/8 ) Break; y=j;// if( y>dy/2 || ydy/8 + && num_cross( 0, (dx+1)/2,i,i,bp,cs) > 0 + && num_cross((dx+1)/2,dx-1,i,i,bp,cs) > 0 ) Break; // ~Y + + if (loop(bp,3*dx/4, 0,dy,cs,0,DO)>dy/8 + && loop(bp,3*dx/4-1,0,dy,cs,0,DO)>dy/8) Break; // upper bow + i=3*dy/4; if (box1->m3 && i>=box1->m3) i=box1->m3-1; + if (num_cross(0,dx-1,i,i,bp,cs)!=1) Break; + + // the middle bar appear in a wide vertical range, get part below + for (i1=dx,i2=y,j=y+1;jframe_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);) + ab[7][0]=box1->frame_vector[i3][0]; + ab[7][1]=box1->frame_vector[i3][1]; + ab[7][3]=i3; + if (ab[7][1]-y0<=dy/16) ad=95*ad/100; // ~t + // because of the dx,dy scaling the horiz. bar could be nearer to (x1,y0) + // as the upper right end of the "t" + if (aa[3][0]-x0>3*dx/4 && aa[3][1]-y0>3*dy/16) ad=99*ad/100; // ~t + + + j=loop(bp,0,dy/8,dx,cs,0,RI); // if j>dx/2 we have italic f + if ((2*x(j+dx/4)) break; + if (iloop(bp,0, 1,dx,cs,0,RI) ) Break; // ~X + + i=y;j=1; // j used as flag + if( num_cross(0,dx-1,0,0,bp,cs)==1 && hchar) //~r + if( num_cross(0,dx-1,dy-1,dy-1,bp,cs)!=1 + && num_cross(0,dx-1,dy-2,dy-2,bp,cs)!=1 ) Break; // ~* etc. + // check for upper bow to right + for(y=1;j && y=cs || dx<7) && getpixel(bp,x+1,y )>=cs + && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs ) + { j=0;break; } + } if(j) ad=98*ad/100; // not detected + + // if( num_hole (x0 , x1 , y0, y1,box1->p,cs,NULL) != 0 ) Break; // ~e + if (sdata->holes.num != 0) Break; // ~e + for(i1=i2=dx,y=7*dy/8;yi2+dx/4) Break; // ~t ~e + if(i1>i2+1) ad=96*ad/100; // ~t ~e + if( loop(bp,0,3*dy/4,dx,cs,0,RI)5 && !hchar) + if( loop(bp,dx-1,dy/2,dx,cs,0,LE)>3*dx/4 ) + if( loop(bp,dx-1,dy-1,dy,cs,0,UP)8 ) + if( loop(bp, 0,2*dy/3 ,dx,cs,0,RI)>2*dx/3 + || loop(bp, 0,2*dy/3-1,dx,cs,0,RI)>2*dx/3 ) + if( loop(bp,dx-1, dy/4 ,dx,cs,0,LE)>2*dx/3 ) Break; // ~5 ~S + + if (!hchar) + if ( get_bw(x0+dx/8,x0+dx/8,y0+dy/4,y1-dy/16,box1->p,cs,2) == 0 + && num_cross(x1-dx/4,x1-dx/4,y0,y1,box1->p,cs)!=2 + && num_cross(x1-dx/8,x1-dx/8,y0,y1,box1->p,cs)!=2 ) Break; // ~r + + if (dy>15) + if( num_cross(x0,x1,y1-dy/4,y1-dy/4,box1->p,cs)>1 + && num_cross(x0,x1,y0+dy/4,y0+dy/4,box1->p,cs)>1 ) Break; // ~H + + if( dx>4 ) + if( loop(bp,dx-1 ,3*dy/4,dx,cs,0,LE)- + loop(bp,0 ,3*dy/4,dx,cs,0,RI)>dx/5+1 + && loop(bp,dx-1-dx/8,dy-1 ,dy,cs,0,UP)=dx/5+1) ad=98*ad/100; // ~E + i=loop(bp,dx/8,0,dy,cs,0,DO); + if (idy/2) { + ad=98*ad/100; // ~E, could also be a "f" with big serifs + MSG(fprintf(stderr,"ad=%d",ad);) } + if (!gchar) { ad=98*ad/100; + MSG(fprintf(stderr,"ad=%d",ad);) } + } + i = loop(bp,dx-1 ,3*dy/4,dx ,cs,0,LE)/2; + if (loop(bp,dx-1-i , dy-1,dy/2,cs,0,UP)1 + && loop(bp,0, 0,dy/4,cs,0,DO)p,cs,2) == 0) { // white pixels? + ad=98*ad/100; // F + MSG(fprintf(stderr,"ad=%d",ad);) } + + if (!hchar) ad=ad*98/100; // d*=100;d/=128 // not 100% ! + if (box1->m4>0 && gchar && ad<99 && + 8*box1->y1 >= box1->m4*7+box1->m3) ad++; + Setac(box1,'f',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_bB(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test B --------------------------------------------------- + for(ad=d=100;dx>2 && dy>4;){ // min 3x4 + DBG( wchar_t c_ask='B'; ) + if (sdata->holes.num < 2) Break; /* tolerant against a tiny hole */ + for(i=1,y=y0;yp,cs,1) != 1 ) i=0; + if( !i ) Break; + for(i=1,y=y1-dy/2;yp,cs,1) != 1 ) i=0; + if( !i ) Break; + if( get_bw(x1,x1 , y0 , y0 ,box1->p,cs,1) == 1 ) Break; + if( num_cross(x0+dx/2, x0+dx/2,y0,y1 ,box1->p,cs) != 3 ) + if( num_cross(x1-dx/3, x1-dx/3,y0,y1 ,box1->p,cs) != 3 ) Break; + /* --- detect center of lower hole --- */ + y = loop(box1->p,x0+dx/2,y1 ,dy,cs,0,UP); if (y>1+dy/8) Break; + y+= loop(box1->p,x0+dx/2,y1-y,dy,cs,1,UP); if (y>dy/3) Break; + y=y1-y-loop(box1->p,x0+dx/2,y1-y,dy,cs,0,UP)/2; if (yp,x0,y0+ y ,dx,cs,0,RI) + > loop(box1->p,x0,y0+dy/4,dx,cs,0,RI)+dx/32 ) + if( get_bw(x0,x0,y0,y0,box1->p,cs,1) == 0 ) + if( get_bw(x0,x0,y1,y1,box1->p,cs,1) == 0 ) Break; // ~8 + i1=loop(box1->p,x0,y0+dy/4,dx,cs,0,RI); + i2=loop(box1->p,x0,y0+dy/2,dx,cs,0,RI); + i =loop(box1->p,x0,y0+dy/2-dy/ 8,dx,cs,0,RI); if(i>i2) i2=i; + i =loop(box1->p,x0,y0+dy/2-dy/16,dx,cs,0,RI); if(i>i2) i2=i; + i3=loop(box1->p,x0,y1-dy/4,dx,cs,0,RI); + if(dy>16 && i3p,x0,y0+ 1 ,dx,cs,0,RI) + >= loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 ) + if( loop(box1->p,x0,y0+ 0 ,dx,cs,0,RI) + > loop(box1->p,x0,y0+ 3 ,dx,cs,0,RI)+dx/32 ) + if( loop(box1->p,x0,y1- 0 ,dx,cs,0,RI) + > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 ) + if( loop(box1->p,x0,y1- 1 ,dx,cs,0,RI) + > loop(box1->p,x0,y1- 3 ,dx,cs,0,RI)+dx/32 ) Break; // ~8 Aug00 + } + + if (sdata->holes.num != 2) Break; + if (sdata->holes.hole[0].y0 < y-1 + && sdata->holes.hole[1].y0 < y-1 ) Break; + if (sdata->holes.hole[0].y1 > y+1 + && sdata->holes.hole[1].y1 > y+1 ) Break; + // if( num_hole(0,dx-1,0 ,y+1 ,bp,cs,NULL) != 1 ) Break; + // if( num_hole(0,dx-1,y-1,dy-1,bp,cs,NULL) != 1 ) Break; + // out_x(box1); + + for( x=dx,y=dy/6; yp,x0,y0+y,dx,cs,0,RI); if( i>x+dx/9 ) break; + if(ix )break; + } if( yx) x=i; // allow dust + i=loop(bp,0,dy/2+1,dx,cs,0,RI); if (i>x) x=i; + if ( loop(bp,0, dy/8,dx,cs,0,RI) + +loop(bp,0,7*dy/8,dx,cs,0,RI) > 2*x+1 ) Break; // not konvex! + + if(!hchar){ // ~ fat_a + ad=99*ad/100; + x =loop(bp,0,dy/4,dx,cs,0,RI); + if(loop(bp,0,dy/2,dx,cs,0,RI)>x+dx/8) ad=97*ad/100; + } + + if ( (!hchar) && (dx<=10 || dy<=10) ) ad=97*ad/100; // hchar or good_quality + if (gchar) ad=99*ad/100; + Setac(box1,'B',ad); + break; + } + // --- test b --------------------------------------------------- + for(ad=d=100;dx>3 && dy>4;){ // min 3x4 + DBG( wchar_t c_ask='b'; ) + if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */ + for(y=y0;yp,cs,1) != 1 ) Break; + if(yp,cs,1) != 1 ) Break; + if( get_bw(x1- dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1- dx/3, x1 , y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break; + if( get_bw(x1-4*dx/9, x1 , y0+dy/5, y0+dy/5,box1->p,cs,1) == 1 ) Break; + if( num_cross(x0,x1,y0+dy/4 ,y0+dy/4 ,box1->p,cs) > 1 ) // & + if( num_cross(x0,x1,y0+dy/4-1,y0+dy/4-1,box1->p,cs) > 1 ) + if( dy<16 || + num_cross(x0,x1,y0+dy/5 ,y0+dy/5 ,box1->p,cs) > 1 ) Break; // fat b + for(i=j=0,y=dy/2;yholes.num != 1) Break; + if (sdata->holes.hole[0].y0 < dy/4) Break; + if ((sdata->holes.hole[0].y1-sdata->holes.hole[0].y0+1) + *(sdata->holes.hole[0].x1-sdata->holes.hole[0].x0+1)*16 + < dx*dy) ad=90*ad/100; // hole to small + if( num_hole( x0, x1 , y0+dy/4, y1,box1->p,cs,NULL) != 1 ) Break; + i=loop(bp,dx-1,dy-1 ,dx,cs,0,LE); + j=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(j>i) Break; + if (!hchar) ad=99*ad/100; + if ( gchar) ad=99*ad/100; + Setac(box1,'b',ad); + if (ad>=100) return 'b'; + break; + } + return box1->c; +} + +static wchar_t ocr0_dD(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,d,x,y,ya,yb,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test D --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='D'; ) + if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */ + if( get_bw(x0 ,x0+dx/3,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1-dx/3,x1 ,y0+dy/2,y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1 ,x1 ,y0 ,y0+dy/16,box1->p,cs,1) == 1 ) Break; + if( get_bw(x1-dx/2,x1 ,y0+dy/4,y0+dy/4 ,box1->p,cs,1) != 1 ) Break; + if( num_cross(x0+dx/2,x0+dx/2,y0 ,y1 ,box1->p,cs) != 2 ) + if( num_cross(x1-dx/3,x1-dx/3,y0 ,y1 ,box1->p,cs) != 2 ) Break; + if( num_cross(x0 ,x1 ,y0+dy/3,y0+dy/3,box1->p,cs) != 2 ) Break; + if( num_cross(x0 ,x1 ,y1-dy/3,y1-dy/3,box1->p,cs) != 2 ) Break; + if (sdata->holes.num != 1) Break; + if (sdata->holes.hole[0].y0 > dy/3) Break; + if (sdata->holes.hole[0].y1 < dy-1-dy/3) Break; + // if( num_hole (x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break; + // test if left edge is straight + for(x=0,y=bp->y-1-dy/8;y>=dy/5;y--){ + i=loop(bp,0,y,x1-x0,cs,0,RI); + if( i+2+dx/16<=x ) break; + if( i>x ) x=i; + } + if (y>=dy/5 ) Break; + /* test if right edge is falling */ + for(x=dx,y=0;yx-1,y,x1-x0,cs,0,LE); + if( i>x+dx/16 ) break; + if( iy-1;y>2*dy/3;y--){ + i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE); + if( i>x+dx/16 ) break; + if( i2*dy/3 ) Break; + if( loop(bp,dx-1,dy-1 ,dx,cs,0,LE) <= + loop(bp,dx-1,dy-2-dy/16,dx,cs,0,LE) ) Break; // P + + y=loop(bp,dx/2,dy-1,dy,cs,0,UP)-1; if (dy>16) y/=2; + if ( y>=dy/16 ) { y-=dy/16; + if (get_bw(dx/2,dx-1,dy-1-y,dy-1-y,bp,cs,1)==1) Break; // ~A + } + + ya=loop(bp, 0,dy-1,dy,cs,0,UP); + yb=loop(bp,dx/16+1,dy-1,dy,cs,0,UP); + if( yady/16 && ya>yb ) Break; // ~O + + if ( loop(bp, dx/2, 0,dy,cs,0,DO) + -loop(bp, dx/2,dy-1,dy,cs,0,UP) > dy/8 ) ad=97*ad/100; // ~b + + + + if (loop(bp, 0, 0,dx,cs,0,RI)>=dx/2 + && loop(bp,dx-1,dy-1,dx,cs,0,LE)>=dx/2 + && loop(bp, 0,dy/2,dx,cs,0,RI)< 2 ) ad=96*ad/100; // thin O + + if(box1->dots) ad=ad*94/100; + if ( gchar) ad=99*ad/100; + if (!hchar) ad=99*ad/100; + Setac(box1,'D',ad); + break; + } + // --- test d --------------------------------------------------- + for(d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='d'; ) + ad=100; + if (sdata->holes.num < 1) Break; /* tolerant against a tiny hole */ + if( get_bw(x0 , x0+dx/2, y1-dy/6, y1-dy/9,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2, x1 , y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1-dx/4, x1 , y0+dy/8, y0+dy/8,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2, x0+dx/2, y1-dy/4, y1 ,box1->p,cs,1) != 1 ) Break; + if(dy>19) + if( get_bw(x0 , x0+dx/3, y0 , y0+dy/5,box1->p,cs,1) == 1 ) Break; + if( get_bw(x0 , x0+dx/3, y0 , y0+dy/6,box1->p,cs,1) == 1 ) Break; + if( get_bw(x0 , x0+dx/4, y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2-1,x0+dx/2,y1-dy/8, y1 ,box1->p,cs,1) != 1 ) Break; // ~"A + if( loop(bp,bp->x-1, bp->y/4,x1-x0,cs,0,LE) > + loop(bp,bp->x-1,3*bp->y/4,x1-x0,cs,0,LE)+1 ) Break; + for(i=dx/8+1,x=0;x 3 ) i++; // ~al + } if( i ) ad=98*ad/100; + for(i=dy/8+1,y=0;yholes.num<1) Break; + if (sdata->holes.num>1) { + if (dx<6) Break; ad=95*ad/100; } // glued j above 8 (4x6 sample) + MSG(fprintf(stderr,"hole[0].y0,y1= %d %d",sdata->holes.hole[0].y0,sdata->holes.hole[0].y1);); + if ( sdata->holes.hole[0].y0 < dy/4 ) Break; + if (dy-sdata->holes.hole[0].y1 > dy/4+1) Break; // glued et + // if( num_hole(x0 , x1 , y0+dy/4 , y1 ,box1->p,cs,NULL) !=1 ) Break; + if( num_cross(0 ,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs) != 2 ) { // glued al + if (dy>15) { Break; } else ad=96*ad/100; + } + if (!hchar) ad=98*ad/100; + if ( gchar) ad=99*ad/100; + Setac(box1,'d',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_F(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test F --------------------------------------------------- + for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx + DBG( wchar_t c_ask='F'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if( get_bw(x0+dx/2,x0+dx/2,y0,y0+dy/8,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0,x0+dx/4,y1-dy/4,y1-dy/4,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0,x0+dx/2,y0+dy/4,y0+dy/4,box1->p,cs,1) != 1 ) Break; + + for (x=0,y=0;yx) x=j; + } if (ydx/2 ) i=0; } + if( i ) Break; + + x=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); + x=loop(bp,x,dy-1-dy/4,dx,cs,1,RI); // strichdicke + for( i=1,y=dy/3; ydx/3 && ((j>2*x && dx>8) || j>x+1)) i=0; } + if( i ) Break; + + y=dy/8; if (y<1) y=1; + for( i=1; y=dx/3) { i=0; break; } + } + if( i ) Break; + + // check for vertical line on left side + for(i=1,y=1;y<=dy/2 && i;y++) + if( get_bw(0,dx/2,y,y,bp,cs,1) != 1 ) i=0; + if( !i ) Break; + + for(i=1,y=dy/2;ydx/8 // no serif + || loop(bp, 0, dy-3,dx,cs,0,RI)<1) break; + ad=99*ad/100; + } + if( get_bw(dx-1-dx/4,dx-1,dy-1-dy/4,dy-1,bp,cs,1) == 1 ) Break; // ~E + if( get_bw(dx-1 ,dx-1,0 ,dy/3,bp,cs,1) != 1 ) Break; + + if( loop(bp,0, bp->y/4,dx,cs,0,RI) < + loop(bp,0,3*bp->y/4,dx,cs,0,RI)-1 ) Break; + // if( num_hole(x0 , x1 , y0 , y1 ,box1->p,cs,NULL) >0 ) Break; + if (sdata->holes.num > 0) Break; + for(i=0,x=dx/4;xy/4,dx,cs,0,RI)-1; + if (i>=0 && loop(bp,dy-1,i,dy,cs,0,UP)<=3*dy/4 ) ad=ad*98/100; + + // check for screen font P + i= loop(bp,bp->x-1,bp->y/4,dx,cs,0,LE); + if (i<1) { + j=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE); + j= loop(bp,bp->x-1-j,bp->y/4,3*dy/4,cs,0,DO); + if (j<=dy/2) { + i=loop(bp,bp->x-1,0,dx,cs,0,LE); + ad=ad*98/100; + if (i>dx/8) Break; + if (i) ad=98*ad/100; + } + } + + if (!hchar) if ((box1->m2-box1->y0)*8>=dy) { // ignore bad m1..4 + if ( num_cross(2*dx/3,2*dx/3,0,dy-1,bp,cs) < 2 ) ad=90*ad/100; // ~r + } + if (gchar) ad=99*ad/100; + Setac(box1,'F',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_uU(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + wchar_t bc=UNKNOWN; + + // --- test uU --------------------------------------------------- + // in Mitte so breit wie oben (bei V kontinuierlich schmaler) + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='u'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + for(y=y0+dy/4;yp,cs) < 2 ) break; + if( yi)i=y; if(y1) break; + } if( idy/2)?dx/8:0),y,y,bp,cs); + if( y1 ) i--; // ~{\it v} + if( y2) ) { i--; ad=90*ad/100; } + if( y>dy/2 && j!=1 ) { i--; ad=95*ad/100; } + } if( !i ) Break; + for(i=dy/16+1,y=dy/8;ydy/2 && (j<1 && j>2) ) i--; + if( yp,cs,1) != 1 ) i=0; + } if( i ) Break; + for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){ + if( get_bw( x, x,y0+dy/3,y1-dy/3,box1->p,cs,3) != 2 ) i--; + } if( !i ) Break; + for(i=1,x=x0+dx/3;x<=x1-dx/3 && i;x++){ + if( get_bw( x, x,y1-dy/2,y1,box1->p,cs,3) == 2 ) i=0; + if( get_bw( x, x,y1-dy/3,y1,box1->p,cs,3) == 2 ) ad=98*ad/100; + } if( !i ) Break; + if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2 + && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==1 ) Break; // ~{\it v} + + i=loop(bp,0,dy-1-dy/16,dx,cs,0,RI); + j=loop(bp,0,dy-1-dy/8 ,dx,cs,0,RI); + if( i15) + if( loop(bp,dx-1,dy/16,dx,cs,0,LE) + > loop(bp,dx-1,dy/8 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad 0 (thinn) + if( hchar && dy>7) + if( loop(bp, 0, dy-1,dx,cs,1,RI)==dx + && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/16 + && loop(bp, 0,3*dy/4,dx,cs,0,RI)>dx/16 + && loop(bp,dx-1, dy/2,dx,cs,0,LE)>dx/16 + && loop(bp, 0, dy/2,dx,cs,0,RI)>dx/16 + ) Break; // melted ll + + i=loop(bp, 0,dy-2-dy/8,dx,cs,0,RI); + j=loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE); + if ( i>dx/4 && j>dx/4 && i+j>=dx/2) Break; // v + if (i+j>=dx/2) ad=97*ad/100; + + if ( num_cross(0,dx-1,dy/2,dy/2,bp,cs)!=2 ) ad=96*ad/100; // w + if ( loop(bp,dx/2,dy-1,dy,cs,0,UP)>0 ) ad=98*ad/100; // w + + if (ad==100) ad=99; // ToDo: only if lines.wt<100 + bc='u'; + if (gchar) ad=98*ad/100; + if (hchar) bc='U'; + if (box1->dots>0) ad=99*ad/100; + Setac(box1,bc,ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_micro(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,i2,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test \mu µ MICRO_SIGN -------------------------------------- + // in Mitte so breit wie oben (bei V kontinuierlich schmaler) + if( gchar && !hchar ) + for(ad=d=100;dx>2 && dy>4;){ // min 3x4 + DBG( wchar_t c_ask='u'; ) + if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */ + for(y=y0+dy/8;ym3-dy/4;y++) + if( num_cross(x0,x1,y,y,box1->p,cs) < 2 ) break; + if( ym3-dy/4 ) break; + if( get_bw(dx/2,dx/2,3*dy/8,7*dy/8,bp,cs,1)==0 ) break; + if( get_bw(dx/2,dx-1,3*dy/8,7*dy/8,bp,cs,1)==0 ) break; + for(y=dy/2;y5*dx) break; + } if( y>=dy || 2*y>box1->m3+box1->m4) break; i2=y; + for(i=0,x=2*dx/8;xi)i=y; if(y1) break; + } if( im4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler + j=num_cross(0,dx/2,y,y,bp,cs); + if( y1 ) i--; // ~{\it v} + if( y2) ) i--; + if( y>dy/2 && j!=1 ) i--; + } if( !i ) break; + for(i=dy/16+1,y=dy/8;ym4-box1->m3)-dy/4 && i;y++){ // 12%+1 Fehler + j=num_cross(dx-dx/2,dx-1,y,y,bp,cs); + if( y>dy/2 && (j<1 && j>2) ) i--; + if( yp,cs,1) != 1 ) i=0; + } if( i ) break; + for(i=dx/4+1,x=x0+dx/3;x<=x1-dx/3 && i;x++){ + if( get_bw( x, x,y0+dy/4,y1-dy/2,box1->p,cs,3) != 2 ) i--; + } if( !i ) break; + if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)!=1 ) break; + if( num_cross(dx-dx/2,dx-1,dy-dy/2,dy-dy/2,bp,cs)!=1 ) break; + if( get_bw( (dx+2)/4,dx-1,dy-2-3*dy/16,dy-1,bp,cs,1) == 1 ) break; + if( num_cross(0,dx/4,dy-1,dy-1,bp,cs)!=1 ) break; + + Setac(box1,MICRO_SIGN,ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_vV(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + wchar_t bc=UNKNOWN; + + // --- test v ------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='v'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + x=loop(bp,dx/2,0,dx,cs,1,RI)+dx/2; // be sure in the upper gap + y=loop(bp, x,0,(dy+1)/2,cs,0,DO)-1; // (x,y) should be in the gap + if (x>3*dx/4 || yp,cs,1) != 1 ) Break; + if( get_bw(x0+x,x1,y0+y,y0+y,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+x,x0+x,y1-dy/2,y1, box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+x, x0+x ,y0, y0+dy/3,box1->p,cs,1) == 1 ) // it v? + if( get_bw(x0+x+1,x0+x+1,y0, y0+dy/3,box1->p,cs,1) == 1 ) Break; + + // UVW + if(((num_cross( 0,dx/2+1,dy/ 8,dy/ 8,bp,cs)!=1) + && (num_cross( 0,dx/2+1,dy/16,dy/16,bp,cs)!=1) // it v + && (num_cross(dx/2+1,dx -1,dy/ 8,dy/ 8,bp,cs)!=1)) /* () added on Sep00 */ + || ((num_cross( 0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs)> 1) + && (num_cross( 0,dx-1,dy-1 ,dy-1 ,bp,cs)> 1)) ) Break; + // UV + if( get_bw(0 ,dx/8,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break; + if( get_bw(dx-1-dx/8,dx-1,dy-1-dy/6,dy-1,bp,cs,1)==1 ) Break; + if( loop(bp,0 ,dy/6 ,dx,cs,0,RI) + >=loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) && dy>6 ) Break; + if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) + >loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI) + && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) + >loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) Break; // better OR ? + if( loop(bp,0 ,dy-1-dy/3,dx,cs,0,RI) + >=loop(bp,0 ,dy-1-dy/8,dx,cs,0,RI) + && loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) + >=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) ) ad=99*ad/100; // font21 + if( loop(bp,dx-1,dy/6 ,dx,cs,0,LE) + >=loop(bp,dx-1,dy-1-dy/3,dx,cs,0,LE) && dy>6 ) Break; + x=loop(bp,0,dy-1,dx,cs,0,RI); // 3*x>dx changed to 2*x>dx May2001 JS + x=loop(bp,x,dy-1,dx,cs,1,RI); if ( dx>14 && 2*x>dx ) Break; // U + if( num_cross(0 ,dx/2, dy/4, dy/4,bp,cs)==2 + && num_cross(dx-dx/2,dx-1,dy-dy/4,dy-dy/4,bp,cs)==2 ) Break; // ~{\it u} + +#if 0 + // measure thickness of lower v + i=loop(bp, 0,dy-1-dy/16,dx,cs,0,RI) + +loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE); + j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI) + +loop(bp,dx-1,dy-1-dy/4 ,dx,cs,0,LE); + if( box1->m1 && hchar && dy>15 && j>=i-dx/32 ) Break; // ~Y +#endif + /* V has serifs only on upper site! Y also on bottom, check it. Okt00 */ + i=loop(bp, 0, 0,dx,cs,0,RI); + i=loop(bp, i, 0,dx,cs,1,RI); i1=i; // thickness + i=loop(bp, 0, 1,dx,cs,0,RI); + i=loop(bp, i, 1,dx,cs,1,RI); if(i>i1) i1=i; // thiggest + i=loop(bp, 0,dy/4,dx,cs,0,RI); + i=loop(bp, i,dy/4,dx,cs,1,RI); i2=i; + i=loop(bp, 0,dy ,dx,cs,0,RI); + i=loop(bp, i,dy ,dx,cs,1,RI); i3=i; // thickness + i=loop(bp, 0,dy-1,dx,cs,0,RI); + i=loop(bp, i,dy-1,dx,cs,1,RI); if(i>i3) i3=i; // thiggest + if( y0 < box1->m2 ) + if( i1-i2 > dx/32+2 + && i3-i2 > dx/32+2 ) Break; // ~serif_Y + + if( y0 < box1->m2 ) // uppercase V ? + if( i1-i2 < dx/32+2 ) /* no serif detected */ + if( num_cross(0,dx-1,dy-1-dy/4,dy-1-dy/4,bp,cs)==1 ){ + j=loop(bp, 0,dy-1-dy/4 ,dx,cs,0,RI); + j=loop(bp, j,dy-1-dy/4 ,dx,cs,1,RI); + if (jloop(bp,0 ,dy-1 ,dx,cs,0,RI) ) ad=96*ad/100; + + if (gchar) ad=99*ad/100; + bc='v'; + if( hchar ) bc='V'; + Setac(box1, bc, ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_rR(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test r ------- + for(ad=d=100;dy>3 && dx>1;){ // dy>dx, 4x6 font, dx=2 smallest prop-font + DBG( wchar_t c_ask='r'; ) + if (sdata->holes.num > 0 + && ( sdata->holes.hole[0].y1 > dy/2 // tiny hole in upper left + || sdata->holes.hole[0].x1 > dx/2 ) // is tolerated, ~Pp + ) Break; /* tolerant against a tiny hole */ + if( 2*dym3-box1->m1) Break; + + if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8 ) Break; + x= loop(bp,dx-1,dy/2,dx,cs,0,LE); if (x<=dx/2) ad=99*ad/100; // ~t + if (loop(bp,dx-1-x/2,0,dy,cs,0,DO)>dy/8) ad=99*ad/100; // ~t + if( dx>4 ) + if( loop(bp,dx-1,dy/2,dx,cs,0,LE)<=dx/8+2 ) Break; // ~v Jun00 + + i=dy-(dy+20)/32; // ignore dust on the ground + + for( y=4*dy/8; y3*dx/8) break; + i2= loop(bp,dx-1,y,dx,cs,0,LE); if(i1>i2) break; + if( (i1+(dx-i2 + -1))/2 >= 4*dx/8 ) break; // mass middle should be left + } + if (y5*dx/8 // not a C + && get_bw(dx-1-dx/8,dx-1,dy-1-dy/4,dy-1,bp,cs,1) ==1 ) Break; + + if( loop(bp, 0,5*dy/8,dx,cs,0,RI)<=dx/8 + && loop(bp,dx-1,5*dy/8,dx,cs,0,LE)>=5*dy/8 + && loop(bp,dx/2, dy-1,dy,cs,0,UP)<=dy/8 ) Break; // ~c + + if( loop(bp, 0,3*dy/8,dx,cs,0,RI) + > loop(bp,dx-1,3*dy/8,dx,cs,0,LE)+dx/8 ) { + if( loop(bp, 0, dy/8,dx,cs,0,RI)3*dx/4 ) Break; // ~i + if( loop(bp,0,dy/4,dx,cs,0,RI)>3*dx/8 // ~I + && get_bw(0,dx/8,0,dy/4,bp,cs,1) ==1 ) Break; + if( num_cross(0,dx-1,dy/2, dy/2 ,bp,cs)!=1 + && num_cross(0,dx-1,dy/2+1,dy/2+1,bp,cs)!=1 ) Break; // ~n 024a3 + + // itallic t is sometimes not high enough, look for v-like shape + for(y=3*dy/4;y1 ) ad=95*ad/100; // ~f + if( num_cross(dx/2 ,dx/2 ,0,dy-1,bp,cs)>2 + && num_cross(dx/2+1,dx/2+1,0,dy-1,bp,cs)>2 ) Break; // ~f + + if (box1->dots) ad=98*ad/100; /* could be modified latin2-r */ + if (hchar) ad=96*ad/100; + if (gchar) ad=97*ad/100; + Setac(box1,'r',ad); + break; // not 100% sure! + } + // --- test R --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='R'; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + if( num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 2 ) Break; // ~P + if (loop(bp, dx/2, dy/4,dy,cs,0,DO)>dy/2) Break; // ~C + if (loop(bp, dx/2, 0,dy,cs,0,DO)>dy/8 + && loop(bp, dx/2,dy/16,dx,cs,0,RI)=16 ) Break; + for(i=1,y=y0+dy/8;y<=y1-dy/8 && i;y++){ // left v-line + if( get_bw(x0 , x0+dx/2,y, y,box1->p,cs,1) != 1 ) i=0; + } if( !i ) Break; + for(i=1,x=x0+3*dx/8;x<=x1-dx/4 && i;x++){ // upper h-line + if( get_bw( x, x, y0, y0+dy/4,box1->p,cs,1) != 1 ) i=0; + } if( !i ) Break; + for(y=0,x=x0+dx/4;x<=x1-dx/4;x++){ // lower h-gap + i=loop(box1->p,x,y1,dy,cs,0,UP); + /* on small chars bypass possible low left serifs */ + if (i>0) { i2=loop(box1->p,x-1,y1-i-1,dy,cs,0,UP); + if (i2>1) i+=i2-1; } + if (i>y) { y=i; i1=x; } + } if( y<=dy/8 ) Break; if (yp,cs) == 2 ) i=0; + } if( i ) Break; + for(i=1,y=y0;y<=y0+3*dy/8 && i;y++){ // upper 2 vert lines + if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; + } if( i ) Break; + for(i=1,y=y0+dy/3;y<=y1-dy/3 && i;y++){ // midle h line + if( num_cross(x0,x1,y,y, box1->p,cs) == 1 ) i=0; + } if( i ) ad=95*ad/100; /* sometimes there is a small gap */ + for(i=1,y=y1-dy/4;y<=y1 && i;y++){ // lower 2 vert lies + if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; + } if( i ) Break; + if( get_bw(x1-dx/3,x1,y0,y0+dy/4,box1->p,cs,1) != 1 ) Break; // pixel ru + x=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(x>dx/2) Break; i=x; // ru + x=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(x<=i ) Break; i=x; // rc + x=loop(bp,dx-1, 5*dy/8,dx,cs,0,LE); if(x>i ) i=x; + x=loop(bp,dx-1, 6*dy/8,dx,cs,0,LE); if(x>i ) i=x; + x=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); if(x>=i ) Break; // rd + + i1=loop(bp,0, dy/4,dx,cs,0,RI); // straight + i2=loop(bp,0, dy/2,dx,cs,0,RI); + i3=loop(bp,0,dy-1-dy/4,dx,cs,0,RI); if( abs(i1+i3-2*i2)>1+dx/16 ) Break; + if (dy>15) + if (loop(bp,dx-1, dy/2,dx,cs,0,LE)>=loop(bp,dx-1, dy-1,dx,cs,0,LE) + && loop(bp,dx-1,3*dy/16,dx,cs,0,LE)>=loop(bp,dx-1,dy/16,dx,cs,0,LE)+dx/8 ) Break; // ~ff + if (dy>7) + if (loop(bp,dx-1,dy-2 ,dx,cs,0,LE) + >loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)) { + ad=98*ad/100; + if (loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE)==0 + && loop(bp,dx-1,dy-2-dy/8,dx,cs,0,LE)>0 ) Break; // broken B ?? + } + j=sdata->holes.num; + if (j != 1) { + i=num_hole (x0,x1,y0,y1-dy/3,box1->p,cs,NULL); + // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL); + if (i==0) ad=90*ad/100; /* some times there is a small gap */ + if (j>1 || j>i) Break; + } + if (sdata->holes.num < 1) ad=90*ad/100; + if (sdata->holes.num==1) + if (sdata->holes.hole[0].y1 > 3*dy/4) ad=95*ad/100; // alpha + + if (!hchar) ad=98*ad/100; + if ( gchar) ad=98*ad/100; + Setac(box1,'R',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_m(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar, + handwritten=0, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test m ------- + for(ad=d=100;dx>4 && dy>3;){ + DBG( wchar_t c_ask='m'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if (sdata->holes.num > 0) ad=96*ad/100; + x =loop(bp,dx-1,dy/2,dx,cs,0,LE); if(3*x>dx) Break; // ~K + y=dy/2; + i=num_cross(0,dx-1,y ,y ,bp,cs); if (i!=3) + i=num_cross(0,dx-1,y+1,y+1,bp,cs); + if (i<3 && i>5) Break; // m ru rn, handwritten m + // im or glued.mm cut to nm + if (i>3) { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } + for (i=0,y=dy-1-dy/8;y>dy/2;y--) { + i=num_cross(0,dx-1,y,y,bp,cs); if (i>2) break; + } if (i>3) Break; + for ( ;y>dy/2;y--) { + i=num_cross(0,dx-1,y,y,bp,cs); if (i!=3) break; + } if (i>5) Break; y++; i5=y; + if (y> dy/2) handwritten=10; + if (y>3*dy/4) handwritten=60; + /* @@............... + @@......,........ + @@,...@@@....@@@. + @@,,.@@@@..@@@@@, + @@@.@@@@@.@@@@@@, + @@;@@@@@@@@@;,@@, + @@@@@,.@@@@,,,@@@ <- i5 + ,@@@...;@@....@@@ + .@;...........,@@ + ...............@@ + i1 i2 i3 i4 + */ + x =loop(bp,0,y,dx ,cs,0,RI); if(x> dx/4) Break; // search 1st v-line + x+=loop(bp,x,y,dx-x,cs,1,RI); if(x> dx/2) Break; i1=x; // first gap + x+=loop(bp,x,y,dx-x,cs,0,RI); if(x>3*dx/4) Break; i2=x; // 2nd v-line + x+=loop(bp,x,y,dx-x,cs,1,RI); if(x>6*dx/8) Break; i3=x; // 2nd gap + x+=loop(bp,x,y,dx-x,cs,0,RI); if(x<5*dx/8) Break; i4=x; // 3th v-line + if (x>=dx) Break; // missing 3th v-line, ~W + MSG(fprintf(stderr,"y=%d x=%d %d %d %d",y,i1,i2,i3,i4);) + if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/4 ) Break; // same gap width? rn + if( abs((i2-i1)-(i4-i3)) > 2+((i2-i1)+(i4-i3))/8 ) ad=98*ad/100; // same gap width? rn + // the same game for the lower part =>l1 l2 l3 l4 ??? + i =loop(bp,0,5*dy/8,dx,cs,0,RI); + i =loop(bp,i,5*dy/8,dx,cs,1,RI); + x =loop(bp,0,dy-dy/32-1,dx,cs,0,RI); + x =loop(bp,x,dy-dy/32-1,dx,cs,1,RI); + if( x > i+1 ) i=1; else i=0; /* looks like serif m, Okt00 */ + for(y=0,x=i1;xy) y=i; + } + if(yy) y=i; + } + if(y=dy/2 ) break; + if(xi4-i3+dx/16){ + for(y=0,x=(i1+i2)/2;xy ) y=i; if( 2*i3 ) Break; // melted WT + + x=loop(bp,dx-1,dy/2,dx,cs,0,LE); + if (x>2 && loop(bp,dx-1-x/2,0,dy,cs,0,DO)dy/2) Break; // N + + // {\it m} + if( loop(bp,1, dy/4,dx,cs,0,RI) + >loop(bp,0,7*dy/8,dx,cs,0,RI) ) + Setac(box1,'m',98*ad/100); + + if (handwritten<10){ + x =loop(bp,0,dy/4,dx,cs,0,RI); + x+=loop(bp,x,dy/4,dx,cs,1,RI); + for( ;x=dy/4) ad=99*ad/100; + if (i>(dy+2)/4) ad=95*ad/100; + if (3*i>dy) Break; + } + if(xdots) ad=99*ad/100; + Setac(box1,'m',ad); + if (ad>=100) return 'm'; + break; + + } + return box1->c; +} + +static wchar_t ocr0_tT(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,i1,i2,i3,i4,j,d,x,y,yb,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test T --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // dx>1 dy>2*dx + DBG( wchar_t c_ask='T'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + // upper horizontal line + i1= loop (bp, dx/8,0,dy,cs,0,DO); // left side + i2= loop (bp,dx-1-dx/8,0,dy,cs,0,DO); // right side + i3= loop (bp, dx/8,i1,dy,cs,1,DO); // left side + i4= loop (bp,dx-1-dx/8,i2,dy,cs,1,DO); // right side + if (i1>dy/4 || i2>dy/4) Break; + for (x=dx/8;xi1+dy/8 && i>i2+dy/8) break; + if (idx+1 || i+j>=dx || i+j/23*x) break; //~I + } if( y3*dx/4) Break; // ~7 + i+= loop(bp,i ,dy/4,dx,cs,1,RI);if(i>3*dx/4) Break; + + if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1 + && num_cross(0,dx-1, dy-2, dy-2,bp,cs) != 1 ) Break; + if( num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1 + && num_cross(0,dx-1,2*dy/3,2*dy/3,bp,cs) != 1 ) Break; + if (box1->m3 && 2*y1>box1->m3+box1->m4 + && loop(bp,0, 0,dy/2,cs,0,DO)>=dy/4 + && loop(bp,0,dy-1,dy ,cs,0,UP)<=dy/2) ad=96*ad/100; // ~J + if (gchar) ad=98*ad/100; + if( loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8) ad=99*ad/100; // ~J + i = loop(bp,0,dy/2,dx,cs,0,RI); + j = loop(bp,i,dy/2,dx,cs,1,RI); + if( 2*i>=dx || 2*(dx-j-i)=100) return 'T'; + break; + } + // --- test t --------------------------------------------------- + // written t can look like a + or even with missing right side + // smallest t found in win-screenshot (prop-font) dx=2 + for(ad=d=100;dx>1 && dy>=box1->m3-box1->m2-1;){ // sometimes no hchar! + DBG( wchar_t c_ask='t'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if (dy<=box1->m3-box1->m2+1) ad=96*ad/100; // bad line detection? + for(x=0,yb=j=y=dy/32+3*dy/16;y<5*dy/8;y++)if(y>0){ // upper cross line + i=loop(bp,0,y,dx,cs,0,RI); + i=loop(bp,i,y,dx,cs,1,RI); if( i>x ) { x=i;yb=j=y; } // hor. line + i=num_cross(0,dx-1,y ,y ,bp,cs); + j=num_cross(0,dx-1,y+1,y+1,bp,cs); if (i>2 && j>2) break; + if( y<11*dy/16 + && num_cross(0,dx-1,y ,y ,bp,cs) != 1 + && ( num_cross(0,dx-1,y+dy/8,y+dy/8,bp,cs) != 1 || dy<13) // against noise + ) break; + } if( y<4*dy/8 ) Break; + if (dy>12 && x>4 && x>dx/2 && yb<=(dy+4)/8) + if ( loop(bp,dx-1-3*x/4,yb,dy,cs,1,UP) + <=loop(bp,dx-1-1*x/4,yb,dy,cs,1,UP)+1 ) + if ( loop(bp,0 ,dy/2,dy,cs,1,UP)>dx/8 ) Break; // ~C + + if (x=dx && 9*dx>=8*dy) { ad=99*ad/100; } // + + + i=loop(bp,dx-1,0,dx,cs,0,LE); + for(y=0;y1) break; i=j; + } + if( yi ) break; + if( y==yb ) break; + + j=loop(bp,0, dy/2,dx,cs,0,RI); + j=loop(bp,j, dy/2,dx,cs,1,RI); i=j; // thickness + j=loop(bp,0, dy/4,dx,cs,0,RI); + j=loop(bp,j, dy/4,dx,cs,1,RI); if (j=loop(bp,dx-1,yb/2,dx,cs,0,LE) ) Break; // ~1 ??? + + j=1; + for(y=1;j && y=cs && getpixel(bp,x+1,y )>=cs + && getpixel(bp,x ,y-1)< cs && getpixel(bp,x+1,y-1)< cs ) { j=0;break; } + } if(!j) Break; + + if( num_cross(0,dx-1,dy-2,dy-2,bp,cs) == 2 + && num_cross(0,dx-1,dy-1,dy-1,bp,cs) == 2 ) Break; // ~* (5er) + + if( dy>= 16 + && loop(bp, 0, 3*dy/4,dx,cs,0,RI) + >=loop(bp, 0, dy-2,dx,cs,0,RI) + && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE) + <=loop(bp,dx-1, dy-2,dx,cs,0,LE) + && loop(bp,dx-1, 1,dx,cs,0,LE)+dx/16 + loop(bp, 0,3*dy/16,dx,cs,0,RI)+dx/16 + || loop(bp,dx-1, 0,dx,cs,0,LE)==0 + || loop(bp,dx-1, 1,dx,cs,0,LE)==0) ) ad=96*ad/100; // ~f Jan02 + if(dx<8 && dy>12){ // thin f's could easily confound with t + x=loop(bp,dx-1,3*dy/16,dx,cs,0,LE); + if (x) + if (loop(bp,dx-x,0,dy,cs,0,DO)<3*dy/16 + && loop(bp, 0, 3*dy/4,dx,cs,0,RI)+1 + >=loop(bp, 0, dy-2,dx,cs,0,RI) + && loop(bp,dx-1, 3*dy/4,dx,cs,0,LE) + <=loop(bp,dx-1, dy-2,dx,cs,0,LE) ) Break; + } + if (dx>7) + if( num_cross( 0,dx-1,2*dy/3,2*dy/3,bp,cs) > 1 + && num_cross( 0,dx/2,2*dy/3,2*dy/3,bp,cs) > 0 + && num_cross(dx/2,dx-1,2*dy/3,2*dy/3,bp,cs) > 0 ) + if (sdata->holes.num > 0) + if (sdata->holes.hole[0].y0 > dy/4) Break; // ~6 + // if ( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break; // ~6 + + if( num_cross(0,dx-1,3*dy/4, 3*dy/4, bp,cs) >= 2 + && num_cross(0,dx-1,3*dy/4-1,3*dy/4-1,bp,cs) >= 2 ){ + ad=99*ad/100; /* italic t ? */ + if (loop(bp,dx/2 ,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h + if (loop(bp,dx/2+1,dy-1,dy,cs,0,UP)>dy/4) Break; // ~h + } + + x= loop(bp,dx-1,dy/2,dx,cs,0,LE); + i= loop(bp,dx-1,dy/8,dx,cs,0,LE); + if (i>x && loop(bp,dx-x,0,dy,cs,0,DO)>=dy/2) ad=90*ad/100; /* ~\ */ + + x= loop(bp,0, 0,dx,cs,0,RI); + i= loop(bp,0, 1,dx,cs,0,RI); if (i1) Break; // l + + // this happens quite often, do not be to strong + if (!box1->m2) ad=99*ad/100; + if (box1->m2) { + if (!hchar) ad=99*ad/100; /* some times t is not long enough */ + if( y0>=box1->m2-(box1->m2-box1->m1)/4 ) ad=99*ad/100; /* to short */ + if( y0>=box1->m2 ) ad=99*ad/100; /* to short */ + } + + if (sdata->holes.num > 0) ad=95*ad/100; + if (gchar) ad=99*ad/100; + if (box1->dots) ad=90*ad/100; + Setac(box1,'t',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_sS(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + wchar_t ac; + + // --- test sS near 5 --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 (4x6 font) + DBG( wchar_t c_ask='s'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if( num_cross( dx/2, dx/2,0,dy-1,bp,cs)!=3 + && num_cross(5*dx/8,3*dx/8,0,dy-1,bp,cs)!=3 + && dy>4 ) Break; + if( num_cross(0,dx-1,dy/2 ,dy/2 ,bp,cs)!=1 + && num_cross(0,dx-1,dy/2-1,dy/2-1,bp,cs)!=1 ) Break; + // get the upper and lower hole koords + y=dy/4; + x =loop(bp,0,y,dx,cs,0,RI); if(x>3*dx/8) Break; /* slanted too */ + x +=loop(bp,x,y,dx,cs,1,RI); if(x>5*dx/8) Break; /* fat too */ + i1 =loop(bp,x,y,dx,cs,0,RI); i1=(i1+2*x)/2; // upper center x + y=11*dy/16; + x =loop(bp,dx-1 ,y,dx,cs,0,LE); if(x>dx/4) Break; + x +=loop(bp,dx-1-x,y,dx,cs,1,LE); if(dx>5 && dy>7 && x>dx/2) Break; + if (x>3*dx/4) Break; if(x>dx/2) { ad=98*ad/100; MSG({})} + i2 =loop(bp,dx-1-x,y,dx,cs,0,LE); i2=dx-1-(i2+2*x)/2; // upper center x + for( y=dy/4;ydx/8) break; + } + if(y==dy/2) Break; // Mai00 + + y=dy/2+loop(bp,0,dy/2,dy/2,cs,1,DO); + if( !joined(bp,0,y,i2,11*dy/16,cs) ) Break; + + if (sdata->holes.num > 0) + if (sdata->holes.hole[0].y0 > dy/4) Break; // ??? + // if( num_hole( x0, x1, y0+dy/4, y1, box1->p,cs,NULL) > 0 ) Break; + + i1=loop(bp,dx-1,dy-1,dx,cs,0,LE); + i2=loop(bp,dx-1,dy-2,dx,cs,0,LE); + if (i2-i1 >= dx/4) Break; // ~{ 5x7font + + i1=loop(bp, 0, 0,dx,cs,0,RI); + i2=loop(bp, 0, 1,dx,cs,0,RI); + if (i2-i1 >= dx/4) Break; // ~} 5x7font + + // sS5 \sl z left upper v-bow ? + + i1=loop(bp, 0,dy/2,dx,cs,0,RI); + i1=loop(bp, i1,dy/2,dx,cs,1,RI); + if (4*i1>=3*dx) ad=97*ad/100; // ~5 7-segment + + i1=loop(bp,0, dy/16,dx,cs,0,RI); + i2=loop(bp,0,4*dy/16,dx,cs,0,RI); + i3=loop(bp,0,7*dy/16,dx,cs,0,RI); + if( 2*i2+dx/32 >= i1+i3 ){ + if( 2*i2+dx/32 > i1+i3 || dx>9 ) Break; + // very small s? + i1+=loop(bp,i1, dy/16,dx,cs,1,RI); + i2+=loop(bp,i2,4*dy/16,dx,cs,1,RI); + i3+=loop(bp,i3,7*dy/16,dx,cs,1,RI); + if( 2*i2+dx/32 >= i1+i3 ) Break; + } + + for(y=7*dy/16;y<5*dy/8;y++){ + if( num_cross( 0,dx-1,y ,y ,bp,cs)==2 ) + if( num_cross( 0,dx-1,y+1,y+1,bp,cs)==1 ) + if( num_cross( 0,dx/4,y,y,bp,cs)==1 ) break; // ~5 + } if(y<5*dy/8) Break; // v0.2.4a5 + if ( loop(bp, dx-1,dy-2-dy/32,dx,cs,0,LE) + > loop(bp, 0, 1+dy/32,dx,cs,0,RI) + dx/4 ) Break; // ~5 Dec00 + ac='s'; + if (gchar) { ad=98*ad/100; MSG({}) } + if( hchar ){ // S but 5 is very similar! check it + ac='S'; + if ( loop(bp, dx-1,dy-1-dy/32,dx,cs,0,LE) + > loop(bp, 0, 0+dy/32,dx,cs,0,RI) ) ad=99*ad/100; // ~5 + if ( loop(bp, 0,dy-1-dy/32,dx,cs,0,RI) + > loop(bp, dx-1, 0+dy/32,dx,cs,0,LE) ) ad=99*ad/100; // ~5 + } + Setac(box1,ac,ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_gG(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test g --------------------------------------------------- + /* some g's have crotchet at upper right end, so hchar can be set */ + // ~italic g + for(ad=d=100;dx>2 && dy>4;){ // min 3x5 + DBG( wchar_t c_ask='g'; ) + if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */ + if( get_bw(x0+dx/2, x0+dx/2, y1-dy/2, y1,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1-dx/4, x1 , y1-dy/4, y1,box1->p,cs,1) != 1 ) Break; // ~p + if( get_bw(x0+dx/2, x0+dx/2, y0, y0+dy/2,box1->p,cs,1) != 1 ) Break; + + if( num_cross(x0+dx/2, x0+dx/2, y0, y1, box1->p,cs) < 3 ) + if( num_cross(x1-dx/2, x1-dx/2, y0, y1, box1->p,cs) < 3 ) Break; + if (sdata->holes.num < 1) Break; + for (i=0;iholes.num;i++){ + if (sdata->holes.hole[i].y1 < 5*dy/8+1) break; + } if (i==sdata->holes.num) Break; // no upper hole found + // if( num_hole ( x0, x1, y0, y0+5*dy/8, box1->p,cs,NULL) != 1 ) Break; + for(y=dy/4;y=15*dy) Break; // ~B + + if (num_cross(x1, x1, (y0+y1)/2, y1, box1->p,cs)>1) { + ad=98*ad/100; // ~& + if (num_cross(x1 , x1 , y0, (y0+y1)/2, box1->p,cs)<1 ) ad=96*ad/100; + if (num_cross(x1-1, x1-1, y0, (y0+y1)/2, box1->p,cs)<1 ) ad=95*ad/100; + } + // looking for a gap + for (x=0,y=dy/4;yx) x=i; + } // in a good font x is greater dx/2 + + if (xp,cs) > 2 + || num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) > 2) ad=90*ad/100; + if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2 + || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100; + } + if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) >2 ) ad=99*ad/100; // ~/o + + /* test for horizontal symmetry ~8 */ + for (y=0;ym4==0) ad=98*ad/100; + if ( hchar) ad=96*ad/100; + if (!gchar) ad=96*ad/100; + ad=98*ad/100; + Setac(box1,'g',ad); + break; + } + // --- test rundes G --------------------------------------------- + for(ad=d=100;dx>3 && dy>4;){ // min 3x4 + DBG( wchar_t c_ask='G'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if( get_bw(x0 ,x0+dx/2,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2,x1-dx/4,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2,x0+dx/2,y1-dy/4,y1 ,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0 ,x0+dx/2,y1-dy/3,y1-dy/3,box1->p,cs,1) != 1 ) Break; // ~S + for( y=y0+dy/4;yp,cs,1) == 0 ) break; + if( y==y1-dy/3 ) Break; // no gap + + if( num_cross(x0+dx/2 , x0+dx/2 , y0, y, box1->p,cs) != 1 + || num_cross(x0+dx/2+1, x0+dx/2+1, y0, y, box1->p,cs) != 1 ) Break; // ~e + + x=x0; y=y1; + turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,UP,ST); // left bow? + if( yp,&x,&y,x0,x1,y0,y1,cs,LE,ST); + if( xp,&x,&y,x0,x1,y0,y1,cs,ST,LE); + if( xp,&x,&y,x0,x1,y0,y1,cs,LE,ST); + turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,ST,LE); + if( xp,&x,&y,x0,x1,y0,y1,cs,RI,UP); // upper end right midle + if( x<=x1 ) Break; + if( yy1-dy/4 ) Break; + + x=x1-dx/3;y=y1; // follow left C-bow, filter S + turmite(box1->p,&x,&y,x0,x1,y0+dy/4,y1,cs,LE,UP); // w=LE b=UP + if( y>y0+dy/4+1 ) Break; /* leave box below for S or on top for CG */ + MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);) + /* if (yp,&x,&y,x0,x1,y0 ,y1,cs,RI,UP); + MSG(fprintf(stderr,"xy= %d %d",x-x0,y-y0);) + if( y>y0 ) Break; + if (sdata->holes.num > 0) Break; + // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) > 0 ) Break; + if( dx>4 && dy>6){ // no (<[ + for(i=1,y=0;i && y Z + if( xi ) i=x; + } if( yi){ i=x;i1=y; } + } if( i1<=dy/4 || i1>=dy-dy/4 ) Break; // around the middle ? + // check from above for gap and left vertical line (~S) + x =loop(bp,0,i1,dx ,cs,0,RI); + x+=loop(bp,x,i1,dx-x,cs,1,RI); // left vertical bow + x+=loop(bp,x,i1,dx-x,cs,0,RI); if (x>=dx) ad=90*ad/100; + MSG(fprintf(stderr,"h-bar y dx %d %d ad= %d",i1,i,ad);) + + i=1; // Mar06: adapted to 4x6 font + for(x=dx/2;x=cs + && getpixel(bp,x+1,y )< cs + && getpixel(bp,x+1,y-1)< cs + && getpixel(bp,x ,y-1)< cs ) { i=0;break; } + } + if(i) ad=95*ad/100; // ~C + if(!hchar) ad=98*ad/100; + if( gchar) ad=98*ad/100; + + Setac(box1,'G',ad); + break; + } + // --- test \it g like 9 ---------------------------------------------- + for(ad=d=100;dx>2 && dy>4;){ // dx>1 dy>2*dx + DBG( wchar_t c_ask='g'; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + if( num_cross(x0+dx/2,x0+dx/2,y0,y1,box1->p,cs) != 3 // pre select + && num_cross(x0+dx/4,x1-dx/4,y0,y1,box1->p,cs) != 3 ) Break; + for( x=0,i=y=y0+dy/2;y<=y1-3*dy/16;y++){ // suche kerbe + j=loop(box1->p,x0,y,dx,cs,0,RI); + if( j>2 && j>dx/4 && yp,x0+j-2,y+1,dx,cs,0,RI)-2; + if( j>x ) { x=j; i=y; } + } + if( x<4*dx/8 ) Break; + if( num_cross(x0+dx/2,x1,i ,y1,box1->p,cs) != 1 + && num_cross(x0+dx/2,x1,i+1,y1,box1->p,cs) != 1 ) Break; + if( num_hole(x0,x1,y0,i+1,box1->p,cs,NULL)!=1 ) Break; + if( num_hole(x0,x1,i-1,y1,box1->p,cs,NULL)!=0 ) Break; + if( loop(box1->p,x0,y1 ,dy,cs,0,RI)>dx/3 && + loop(box1->p,x0,y1-1,dy,cs,0,RI)>dx/3) Break; // no q + for( x=0,i=y=y0+dy/3;y<=y1-dy/3;y++){ // suche kerbe + j=loop(box1->p,x1,y,dx,cs,0,LE); + if( j>x ) { x=j; i=y; } + } if( x>dx/2 ) Break; // no g + i1=loop(bp,dx-1,dy/8 ,dx,cs,0,LE); if(i1>dx/2) Break; + i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); + i2=loop(bp,dx-1,dy/2 ,dx,cs,0,LE); if(i1+i3<2*i2-dx/8) Break; // konvex + i1=loop(bp,dx-1,dy/4 ,dx,cs,0,LE); if(i1>dx/2) Break; + i3=loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE); + for(y=dy/4;y0){ x--; // robust + y=loop(bp,dx-x-1, dy-1,dy,cs,0,UP); + if(yp,cs) > 2) ad=90*ad/100; + if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) > 2 + || num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) > 2) ad=90*ad/100; + + if (box1->m4==0) ad=98*ad/100; + if ( hchar) ad=96*ad/100; + if (!gchar) ad=96*ad/100; + if (ad>99) ad=99; // never be sure to have a 9 + Setac(box1,'g',ad); + break; + } + return box1->c; +} + +// rewritten for vector usage v0.41 +static wchar_t ocr0_xX(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + // pix *bp=sdata->bp; // obsolete + int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0, x1=box1->x1, y0=box1->y0, y1=box1->y1; // ,cs=sdata->cs; + int dx=x1-x0+1, dy=y1-y0+1, /* size */ + (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */ + ad; /* tmp-vars */ + wchar_t bc=UNKNOWN; + + // --- test xX --------------------------------------------------- + // rewritten for vectors 0.41 + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + int ld, i1, i2, i3, i4; // lien derivation, 4 inner edges + DBG( wchar_t c_ask='x'; ) + if (sdata->holes.num > 0) Break; /* # */ + /* half distance to the center */ + d=2*sq(128/4); + /* now we check for the 4 ends of the x */ + if (aa[0][2]>d) Break; + if (aa[1][2]>d) Break; + if (aa[2][2]>d) Break; + if (aa[3][2]>d) Break; + if (aa[3][0]-aa[0][0]num_frame_vectors[0]) { + if (box1->frame_vector[i][0] + >=box1->frame_vector[j][0]) j=i; /* notice most right vector */ + } if (j==i) Break; + /* calculate the distance to the center */ + x=box1->frame_vector[j][0]; + y=box1->frame_vector[j][1]; i1=j; + if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; + if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; + if ( aa[0][0]+aa[1][0]-2*x>=0) Break; + if ( aa[1][0] >= x ) Break; + if ( aa[0][0] > x ) Break; + if ( aa[0][0] >= x ) ad=99*ad/100; + if (x-x02*sq(1024/4)) Break; + /* check if lower left and center point are joined directly */ + ld=line_deviation(box1, j, aa[1][3]); + MSG(fprintf(stderr," X-1 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) + if (ld >2*sq(1024/4)) Break; + + /* only lower side */ + for (j=i=aa[1][3];i!=aa[2][3];i=(i+1)%box1->num_frame_vectors[0]) { + if (box1->frame_vector[i][1] + <=box1->frame_vector[j][1]) j=i; /* notice most upper vector */ + } if (j==i) Break; + /* calculate the distance to the center */ + x=box1->frame_vector[j][0]; + y=box1->frame_vector[j][1]; i2=j; + if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; + if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; + if ( aa[1][1]+aa[2][1]-2*y<=0) Break; + /* check if lower left and center point are joined directly */ + ld=line_deviation(box1, aa[1][3], j); + MSG(fprintf(stderr," 1-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) + if (ld >2*sq(1024/4)) Break; + /* check if lower right and center point are joined directly */ + ld=line_deviation(box1, j, aa[2][3]); + MSG(fprintf(stderr," X-2 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) + if (ld >2*sq(1024/4)) Break; + + /* only right side */ + for (j=i=aa[2][3];i!=aa[3][3];i=(i+1)%box1->num_frame_vectors[0]) { + if (box1->frame_vector[i][0] + <=box1->frame_vector[j][0]) j=i; /* notice most left vector */ + } if (j==i) Break; + /* calculate the distance to the center */ + x=box1->frame_vector[j][0]; + y=box1->frame_vector[j][1]; i3=j; + if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; + if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; + if ( aa[2][0]+aa[3][0]-2*x<=0) Break; + if ( aa[3][0] <= x ) Break; + if ( aa[2][0] < x ) Break; + if ( aa[2][0] <= x ) ad=99*ad/100; + if (dx-(x-x0)2*sq(1024/4)) Break; + /* check if upper right and center point are joined directly */ + ld=line_deviation(box1, j, aa[3][3]); + MSG(fprintf(stderr," X-3 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) + if (ld >2*sq(1024/4)) Break; + + /* only upper side */ + for (j=i=aa[3][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) { + if (box1->frame_vector[i][1] + >=box1->frame_vector[j][1]) j=i; /* notice lowest vector */ + } if (j==i) Break; + /* calculate the distance to the center */ + x=box1->frame_vector[j][0]; + y=box1->frame_vector[j][1]; i4=j; + if (abs(aa[0][0]+aa[1][0]+aa[2][0]+aa[3][0]-4*x)>(dx+2)) Break; + if (abs(aa[0][1]+aa[1][1]+aa[2][1]+aa[3][1]-4*y)>(dy+2)) Break; + if ( aa[3][1]+aa[0][1]-2*y>=0) Break; + /* check if upper left and center point are joined directly */ + ld=line_deviation(box1, aa[3][3], j); + MSG(fprintf(stderr," 3-X %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) + if (ld >2*sq(1024/4)) Break; + /* check if lower left and center point are joined directly */ + ld=line_deviation(box1, j, aa[0][3]); + MSG(fprintf(stderr," X-0 %d %d dist= %d/%d",x-x0,y-y0,ld,2*sq(1024/4));) + if (ld >2*sq(1024/4)) Break; + + // center crossing of diagonal lines is small? + if (box1->frame_vector[i3][0] - box1->frame_vector[i1][0] > dx/2) Break; + + if (gchar) ad=99*ad/100; + bc='x'; if(hchar) bc='X'; + Setac(box1,bc,ad); + break; + } + // --- test \it x --------------------------------------------------- +#if 0 + for(ad=d=99;dx>4 && dy>4;){ // min 3x4 + DBG( wchar_t c_ask='x'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if( get_bw(x0,x0+dx/4,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break; + if( get_bw(x1-dx/4,x1,y0+dy/2,y0+dy/2,box1->p,cs,1) != 0 ) Break; + if( num_cross(x0+dx/4,x1-dx/4,y0+dy/2,y0+dy/2, box1->p,cs) != 1 ) Break; + if( num_cross(x0,x1,y0+dy/4,y0+dy/4, box1->p,cs) != 3 + && num_cross(x0,x1,y0+dy/8,y0+dy/8, box1->p,cs) < 3 ) Break; + if( num_cross(x0,x1,y1-dy/4,y1-dy/4, box1->p,cs) != 3 + && num_cross(x0,x1,y1-dy/8,y1-dy/8, box1->p,cs) < 3 ) Break; + if( gchar ) ad=97*ad/100; + if( hchar ) ad=96*ad/100; + bc='x'; + Setac(box1,bc,ad); + break; + } +#endif + return box1->c; +} + +static wchar_t ocr0_yY(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad,xa,ya,xb,yb,xc,yc,xd,yd; /* tmp-vars */ + wchar_t bc=UNKNOWN; + + // --- test italic yY -------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='y'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if (sdata->holes.num > 0) ad=97*ad/100; + if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) < 2 + && num_cross(0,dx-1, 1, 1,bp,cs) < 2 ) Break; + if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 + && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break; + if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1 + && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1 ) Break; + if( num_cross(dx/3,dx/3,dy/4,dy-1,bp,cs) != 2 + && num_cross(dx/2,dx/2,dy/4,dy-1,bp,cs) != 2 ) Break; + for(yc=y=0,xc=x=dx/4;xy){ yc=y=i;xc=x; } + } if( y>12*dy/16 || y<3*dy/8 ) Break; + ya=dy/8; xa=xc-loop(bp,xc,ya,dx,cs,0,LE); if(xa< 0) Break; + yb=dy/8; xb=xc+loop(bp,xc,yb,dx,cs,0,RI); if(xb>=dx) Break; + for(y=dy/8;y6*dx/8) ad=99*ad/100; // why this??? + if (loop(bp,dx-1,dy-1,dx,cs,0,LE)<1) Break; + // printf(" abcd=%d %d %d %d %d %d %d %d -",xa,ya,xb,yb,xc,yc,xd,yd); + if( get_line2(xb,yb,xd,yd,bp,cs,100)<95 ) Break; + // if( get_line2(xc,yc,xd,yd,bp,cs,100)<95 ) Break; + // printf("ok"); + bc='y'; + if(gchar && !hchar) bc='y'; else + if(hchar && (!gchar || dy<14)) bc='Y'; else ad=98*ad/100; // SMALL-CAPS ??? + Setac(box1,bc,ad); + break; + } + // --- test yY --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='y'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if( get_bw(x0,x0,y1-dy/8,y1,box1->p,cs,1) == 1 ) { + if( get_bw(x0,x0+4*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break; + } else { + if( get_bw(x0,x0+3*dx/8,y0+dy/8,y0+dy/8,box1->p,cs,1) != 1 ) Break; + } + if( num_cross(0,dx-1,dy/8,dy/8,bp,cs) != 2 + && num_cross(0,dx-1, 1, 1,bp,cs) != 2 ) Break; + if( num_cross(dx/2,dx/2,0, 1,bp,cs) != 0 ) Break; + if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 + && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 1 ) Break; + if( num_cross(dx-1,dx-1,0,dy-1,bp,cs) != 1 + && num_cross(dx-2,dx-2,0,dy-1,bp,cs) != 1 + && num_cross(dx-dx/8-1,dx-dx/8-1,0,dy-1,bp,cs) != 1 ) Break; + if( loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)+dx/8+1 // Jul00 + < loop(bp, 0,dy-1-dy/8,dx,cs,0,RI) ) Break; + for(y=0,x=dx/4;xy) y=i; + } if( y>10*dy/16 || y<2*dy/8 ) Break; + for(xc=xb=xa=dx,yc=yb=ya=y=0;ydy/8) Break; + for(i=dx,yc=y=dy/4;y<3*dy/4;y++){ + if( num_cross(0,dx-1,y,y,bp,cs) < 2 ) break; + x =loop(bp,dx-1 ,y,dx,cs,0,LE); + x+=loop(bp,dx-1-x,y,dx,cs,1,LE); + j =loop(bp,dx-1-x,y,dx,cs,0,LE); if(j<=i){ i=j;yc=y;xc=dx-1-x-j/2; } + } yc+=dy/16+1; + yc+=loop(bp,xc,yc,i,cs,1,DO)/2; + xa+= loop(bp,xa ,ya,dx,cs,1,RI)/2; + xb=dx-1-loop(bp,dx-1,yb,dx,cs,1,LE)/2; + yd=dy-1-dy/8;xd=dx-1-loop(bp,dx-1,yd,dx,cs,0,LE); if(xd>6*dx/8) Break; + /* check for serife at lower end */ + for (i=0,x=dx-1;ix+dx/16+1) break; /* detect serif */ + if (j=5*dy/8 && !gchar) + if( get_line2(xa,ya,xd ,yd,bp,cs,100)>95 ) + if( get_line2(xb,yb,xd ,yd,bp,cs,100)>95 ) + { if (dx>4) { Break; } else ad=ad*98/100; } // ~V + xa=loop(bp,0,dy/8,dx,cs,0,RI); + xb=loop(bp,0,dy/2,dx,cs,0,RI); + xc=loop(bp,0,dy-1,dx,cs,0,RI); + if( 2*xb< xa+xc ) ad=98*ad/100; // ~V + if( 2*xb<=xa+xc ) ad=98*ad/100; + if( 2*xb<=xa+xc+1 ) ad=98*ad/100; + + bc='y'; + if ((!gchar) && (!hchar)) ad=98*ad/100; + if(y0m2-(box1->m2-box1->m1)/4) + { bc='Y'; if(gchar) ad=98*ad/100; } + // SMALL-CAPS ??? + Setac(box1,bc,ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_zZ(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + int i1,i2,i3,i4,i5,dbg[9], + d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + (*aa)[4]=sdata->aa, /* the for line ends, (x,y,dist^2,vector_idx) */ + ad; /* tmp-vars */ + wchar_t bc=UNKNOWN; + + // --- test zZ ------- + for(ad=d=100;dx>3 && dy>3;){ // dy>dx + DBG( wchar_t c_ask='z'; ) /* for debugging purpose */ + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if (sdata->holes.num > 0) ad=98*ad/100; /* # */ + /* half distance to the center */ + d=2*sq(128/4); + /* now we check for the 4 edges of the z */ + if (aa[0][2]>d) Break; + if (aa[1][2]>d) Break; + if (aa[2][2]>d) Break; + if (aa[3][2]>d) Break; + if (aa[3][0]-aa[0][0]dy/8) ad=99*ad/100; + if (aa[0][1]-y0>dy/8) ad=99*ad/100; + if (2*dx2*sq(1024/4)) Break; + ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100; + d=line_deviation(box1, aa[1][3], aa[2][3]); if (d>2*sq(1024/4)) Break; + + /* search uppermost right > */ + i1=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1, y0); + x=box1->frame_vector[i1][0]; + y=box1->frame_vector[i1][1]; + if (y-y0 > 5*dy/8) Break; + if (x-x0 < 3*dx/8) Break; + if (x-aa[0][0]<=dx/4) Break; // ~lI + if (x-aa[0][0]<=dx/3) ad=98*ad/100; // ~lI + if (x-aa[0][0]<=dx/2) ad=99*ad/100; // ~lI + /* search most right > ~2 */ + i3=nearest_frame_vector(box1,aa[0][3],aa[1][3], x1+2*dx, (y0+y1)/2); + MSG(fprintf(stderr,"xy= %d %d %d %d %d %d",x0,y0,x-x0,y-y0,box1->frame_vector[i3][0]-x0,box1->frame_vector[i3][1]-y0);) + if ( box1->frame_vector[i3][1]-y0> dy/4 + && box1->frame_vector[i3][0]-x>=0) Break; + if ( box1->frame_vector[i3][1]-y> dy/8 + && box1->frame_vector[i3][0]-x>=-dx/8) ad=98*ad/100; + if ( box1->frame_vector[i3][1]-y> dy/8 + && box1->frame_vector[i3][0]-x>= 0) ad=97*ad/100; + if (box1->frame_vector[i3][0]-aa[0][0] + < aa[3][0]-box1->frame_vector[i3][0]) break; // ~lI + if (box1->frame_vector[i3][0]-aa[0][0] + <(aa[3][0]-box1->frame_vector[i3][0])*2) ad=98*ad/100; // ~lI + /* better test for a bow or peaked angle */ + /* upper part of a 2, on a Z a and b should be at c + .....$@@@@@@a...c. o1 (o1-a)=(dx+5)^2 =dx^2+10*dx+25 + ...$$@@@@@@@@@.... (o1-b)=(dx+1)^2+4^2=dx^2+ 2*dx+18 + ..$@@$@@@$@@@@@... + ..@@@.....$$@@@@.. + ..@@.......@$@@@b. + ..$.........$@@@@. + .$$..........$@@@. + .$...........@@@@. + .............@@@@.< + .............$@@$. + ............$@@@.. + ............@@$... + ............$@$... + --- snip ---- + */ + i4=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1+dx, y0); + i5=nearest_frame_vector(box1,aa[2][3],aa[0][3], x1, y0-dx); + d=sq(box1->frame_vector[i5][0]-box1->frame_vector[i4][0]) + +sq(box1->frame_vector[i5][1]-box1->frame_vector[i4][1]); + if (d>2*sq(dx/8+1)) break; + + /* check if upper left and upper right point are joined directly */ + dbg[0]=d=line_deviation(box1, aa[0][3], i1); if (d >2*sq(1024/4)) Break; + /* check if lower right and upper left point are joined directly */ + dbg[1]=d=line_deviation(box1, i1, aa[1][3]); if (d >2*sq(1024/4)) Break; + + /* search lowest left < */ + i2=nearest_frame_vector(box1,aa[2][3],aa[3][3], x0, y1); + x=box1->frame_vector[i2][0]; + y=box1->frame_vector[i2][1]; + if (y-y0 < 3*dy/8) Break; + if (x-x0 > 5*dx/8) Break; + if (aa[2][0]-x<=dx/4) Break; // ~lI + if (aa[2][0]-x<=dx/3) ad=98*ad/100; // ~lI + if (aa[2][0]-x<=dx/2) ad=99*ad/100; // ~lI + /* check if upper right and lower left point are joined directly */ + dbg[2]=d=line_deviation(box1,i2, aa[3][3]); if (d >2*sq(1024/4)) Break; + /* check if lower left and lower right point are joined directly */ + dbg[3]=d=line_deviation(box1, aa[2][3],i2); if (d >2*sq(1024/4)) Break; + + if (box1->frame_vector[i1][0] + -box1->frame_vector[i2][0]<=dx/8) Break; /* nonsignificant distance */ + MSG( \ + fprintf(stderr,"^v %d %d %d %d line deviation %d %d %d %d max %d %d",\ + box1->frame_vector[i1][0]-x0,box1->frame_vector[i1][1]-y0,\ + box1->frame_vector[i2][0]-x0,box1->frame_vector[i2][1]-y0,\ + dbg[0],dbg[1],dbg[2],dbg[3],2*sq(1024/4),2*sq(1024));) + ad=(100-(dbg[0]-sq(1024)/2)/sq(1024)/4)*ad/100; + ad=(100-(dbg[1]-sq(1024)/2)/sq(1024)/4)*ad/100; + ad=(100-(dbg[2]-sq(1024)/2)/sq(1024)/4)*ad/100; + ad=(100-(dbg[3]-sq(1024)/2)/sq(1024)/4)*ad/100; + + if ( gchar) ad=98*ad/100; + bc='z'; + if( hchar ) bc='Z'; + Setac(box1,bc,ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_wW(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar,handwritten=0, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad,ya,yb,xa,xb,xc,xd,xe,t1; /* tmp-vars */ + wchar_t ac; + + // ------- test w ~{\it w} --------------- + for(ad=d=100;dx>3 && dy>3;){ // dy<=dx + DBG( wchar_t c_ask='w'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + // xa xe + // \ xc / <=ya connected xa-xb-xc-xd-xe + // xb xd <=yb + // get two lowest points i3,i4,ya + // out_x(box1); + // ~ul ~uf + // out_x(box1); + for(y=dy/8;y< dy/2;y++) if( num_cross(0,dx-1,y,y,bp,cs)< 2 ) break; + if(y4) { /* 4x6 is to small */ + for(y=dy-1-dy/16;y>3*dy/4;y--) + if( num_cross(0,dx-1,y,y,bp,cs)==2 ) break; + if(y==3*dy/4) Break; + } + yb=y; + t1=loop(bp,0 ,dy/4,dx,cs,0,RI); + t1=loop(bp,t1,dy/4,dx,cs,1,RI); // thickness of line? + for(i=j=0 ;y> dy/4;y--) if( num_cross(0,dx-1,y,y,bp,cs)==4 ) i++; + else if( num_cross(0,dx-1,y,y,bp,cs)>=3 ) j++; + if(i+56 || dx>4)) Break; + if(i+j==0 && dx<=4){ + if (abs(loop(bp, 1,dy-1,dy,cs,0,UP) + -loop(bp,dx-2,dy-1,dy,cs,0,UP))>dy/8+1) Break; // 4x6 N + if ( ( loop(bp, 1, 0,dy,cs,0,DO)>=dy-2 + && loop(bp, 0,dy-1,dy,cs,0,UP)>0) + || ( loop(bp,dx-2, 0,dy,cs,0,DO)>=dy-2 + && loop(bp,dx-1,dy-1,dy,cs,0,UP)>0)) Break; // 4x6 UV + ad=ad*99/100; // 4x6 font + MSG(fprintf(stderr,"ad=%d",ad);) + } + if( num_cross(0,dx-1, 1, 1,bp,cs)< 2 + && num_cross(0,dx-1,dy/16,dy/16,bp,cs)< 2 ) Break; + x =loop(bp,0 ,yb,dx,cs,0,RI); + xb=loop(bp,x ,yb,dx,cs,1,RI);xb=x+xb/2; if(xb>dx/2) Break; + x =loop(bp,dx-1 ,yb,dx,cs,0,LE); + xd=loop(bp,dx-1-x,yb,dx,cs,1,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break; + for(y=0,xc=x=xb+1;xy){xc=x;y=i;} + if(dx>4 && !y) Break; + ya=dy-1-y; // flat + y=loop(bp,xc,ya,dy,cs,1,UP);if(y)y--; + if (dy>6 || dx>4) { // ~4x6 font + if( num_cross(0 ,xc ,ya-y ,ya-y ,bp,cs)!= 2 + && num_cross(0 ,xc ,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break; + if( num_cross(xc,dx-1,ya-y ,ya-y ,bp,cs)!= 2 + && num_cross(xc,dx-1,ya-y/2,ya-y/2,bp,cs)!= 2 ) Break; + } + ya-=y/2; + x =loop(bp,0 ,1 ,dx,cs,0,RI); + xa=loop(bp,x ,1 ,dx,cs,1,RI); + if( x+xa>xb ){ // may be, here is a small but thick letter + // later add some proofs + xa=x+xa/4; + } else { + xa=x+xa/2; + } + x =loop(bp,dx-1 ,1 ,dx,cs,0,LE); + xe=loop(bp,dx-1-x,1 ,dx,cs,1,LE);xe=dx-1-x-xe/2; + MSG( fprintf(stderr,"a-e: %d %d %d %d %d %d %d %d %d %d", + xa,1,xb,yb,xc,ya,xd,yb,xe,1);) + if (ya94 ) break; + if (x==xa+i) Break; // no vert. line found + if( get_line2(xb,yb-1,xc,ya ,bp,cs,100)<95 + && get_line2(xb,yb-1,xc,ya+dy/32,bp,cs,100)<95 + && get_line2(xb,yb-1,xc,ya+dy/16,bp,cs,100)<95 ) Break; + if( get_line2(xc, ya,xd, yb,bp,cs,100)<95 + && get_line2(xc+1,ya,xd, yb,bp,cs,100)<95 ) Break; + if( get_line2(xd,yb,xe ,1+dy/16,bp,cs,100)<95 + && get_line2(xd,yb,dx-1 ,1+dy/8 ,bp,cs,100)<95 // round w + && get_line2(xd,yb,xe+dx/20,1+dy/16,bp,cs,100)<95 ) Break; + // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break; + // ~ur + MSG(fprintf(stderr,"ad=%d",ad);) + for(i=0,y=5*dy/8;yi ) i=x; if( x3 && dy>3;){ // dy<=dx 4x6font (like a H with fat bar) + DBG( wchar_t c_ask='w'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + // ~ul ~uf + if( num_cross(0,dx-1,dy/2,dy/2,bp,cs)<2 ) Break; + if( num_cross(0,dx-1,dy/8,dy/8,bp,cs)<2 ) handwritten=40; + if( num_cross(0,dx-1,dy/4,dy/4,bp,cs)<2 ) handwritten=80; + for(i=0,y=0;ydx/2) Break; + xb=loop(bp,x ,yb,dx,cs,0,RI);xb=x+xb/2; if(xb>dx/2) Break; + x =loop(bp,dx-1 ,yb,dx,cs,0,LE); + x+=loop(bp,dx-1-x,yb,dx,cs,1,LE); + xd=loop(bp,dx-1-x,yb,dx,cs,0,LE);xd=dx-1-x-xd/2;if(xd<3*dx/8) Break; + if( num_cross(xb,xd,yb,yb ,bp,cs)!= 1 ) Break; + if( num_cross(xb,xb,yb,dy-1,bp,cs)!= 1 ) Break; + if( num_cross(xd,xd,yb,dy-1,bp,cs)!= 1 ) Break; + if( num_cross(xb,xb, 0,yb ,bp,cs)!= 0 ) Break; + if( num_cross(xd,xd, 0,yb ,bp,cs)!= 0 ) Break; + // if( num_hole(0,dx-1,0,dy-1,bp,cs,NULL) != 0 ) Break; + if (sdata->holes.num != 0) Break; + // ~ur + for(i=0,y=3*dy/4;yi ) i=x; if( xc; +} + +static wchar_t ocr0_aA(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad,ya; /* tmp-vars */ + + // --- test A --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='A'; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + // first selection (rough sieve) + if( get_bw(dx/2 ,dx/2 ,dy-1-dy/8,dy-1,bp,cs,1) == 1 + && get_bw(dx/2-1,dx/2-1,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) Break; // ~B + ya=0; /* upper end, not 0 for modified A etc. */ + if (box1->modifier) + for (ya=0;ya=dy/2) ya=0; // already subtracted? + if( num_cross(0,dx-1,ya+ 1 ,ya+ 1 ,bp,cs)!=1 // 600dpi + && num_cross(0,dx-1,ya+ dy/8 ,ya+ dy/8 ,bp,cs)!=1 + && num_cross(0,dx-1,ya+ dy/16 ,ya+ dy/16 ,bp,cs)!=1 + && num_cross(0,dx-1,ya+ dy/8+1,ya+ dy/8+1,bp,cs)!=1 ) Break; + if( num_cross(0,dx-1, 7*dy/8 , 7*dy/8 ,bp,cs)!=2 + && num_cross(0,dx-1, 7*dy/8-1, 7*dy/8-1,bp,cs)!=2 ) Break; + if ( num_cross( 0,dx/8,ya+dy/8,ya+0,bp,cs)>0 ) Break; // ~R + for(y=ya+dy/8;y 1 ) break; + if( y==ya+dy/2 ) Break; i1=y; + if (dy>20) i1++; /* get arround some noise fat font */ + + x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) Break; + x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) Break; i2=x; + x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) Break; i2=(x+i2)/2; + // hole (i2,i1) + y+=loop(bp,i2,y,dy,cs,1,DO); + y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100; + if (y>5*dy/6) { MSG(fprintf(stderr,"x,y,i1,i2= %d %d %d %d",x,y,i1,i2);) } + if (y>5*dy/6) Break; + + if( sdata->holes.num != ((box1->modifier==RING_ABOVE)?2:1) + || sdata->holes.hole[0].y1-ya >= dy-1-dy/4) Break; + // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) Break; + // out_x(box1); + i3=0;i4=0; + for(x=dx/3;x<2*dx/3;x++){ + i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2) + i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break; + if(i4==1) i3=x; + } if(i4<1 || i4>2 || i3==0){ +// ToDo: MSG(fprintf(stderr,"x,y,i4,i3= %d %d %d %d",x,y,i4,i3);) + Break; + } + if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) Break; + + i1=loop(bp,dx-1,ya+ (dy-ya)/4,dx,cs,0,LE); + i2=loop(bp,dx-1,ya+ (dy-ya)/2,dx,cs,0,LE); + i3=loop(bp,dx-1,dy-1-(dy-ya)/4,dx,cs,0,LE); + if( 2*i2-dx/8>i1+i3 ) ad=99*ad/100; /* 6*8 font */ + if( 2*i2+dx/4i1+i3 ) Break; + + i1=loop(bp,0 ,ya+ (dy-ya)/4,dx,cs,0,RI); // linke senkr. linie + i2=loop(bp,0 ,ya+ (dy-ya)/2,dx,cs,0,RI); + i3=loop(bp,0 ,dy-1-(dy-ya)/4,dx,cs,0,RI); + if( 2*i2-dx/8>i1+i3 ) ad=98*ad/100; /* 6*8 font */ + if( 2*i2+dx/4i1+i3 || i1i3+dx/16) break; if( i1+120) ad=97*ad/100; // italic-a + + if (!hchar) ad=99*ad/100; // italic-a + Setac(box1,'A',ad); + break; + } + // --- test a ------------------------------------------- + // with a open bow above the circle starting + // on the right side of the circle + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='a'; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + if( get_bw(x0 , x0+dx/2, y1-dy/3, y1-dy/3,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1-dx/3, x1 , y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1-dx/3, x1 , y0+dy/4, y0+dy/4,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2, x0+dx/2, y1-dy/3, y1, box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2, x0+dx/2, y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/3, x1-dx/3, y0 , y0 ,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/4, x1-dx/2, y1 , y1 ,box1->p,cs,1) != 1 ) + if( get_bw(x0+dx/4, x1-dx/3, y1-1 , y1-1 ,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0 , x0 , y0+dy/2, y1 ,box1->p,cs,1) != 1 ) + if( get_bw(x0+dx/8, x0+dx/8, y0+dy/2, y1 ,box1->p,cs,1) != 1 ) Break; + if( loop(bp,3*dx/8,0,dy,cs,0,DO) > 3*dy/16 ) Break; // ~d + if( num_cross(0,dx-1,dy/4 ,dy/4 , bp,cs) >2 // ~glued am != an + && num_cross(0,dx-1,dy/4+1,dy/4+1, bp,cs) >2 ) Break; + + for( x=dx/4;xdy/2) break; + i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break; + } if( xy-1, bp,cs) == 3 ) i--; + } if( i ) Break; + + i1=loop(bp,0, dy/8,dx,cs,0,RI); + i3=loop(bp,0,3*dy/4,dx,cs,0,RI); + for(y=dy/8+1;y<3*dy/4;y++){ + i2=loop(bp,0,y,dx,cs,0,RI);if(2*i2>i1+i3+1) break; + } if(y==3*dy/4) Break; // ~6 + // ~ s (small thick s), look for vertikal line piece + for(x=3*dx/4;xdy/4 ) break; + if( x==dx ) Break; + + if (sdata->holes.num != 1) ad=96*ad/100; else + if (sdata->holes.num == 1) + if( num_hole ( x0, x1, y0+dy/3, y1 ,box1->p,cs,NULL) != 1 ) Break; + // if( num_hole ( x0, x1, y0, y1, box1->p,cs,NULL) != 1 ) Break; + if( num_hole ( x0, x1, y0, y1-dy/3 ,box1->p,cs,NULL) != 0 ){ + i =loop(bp,0,dy/4,dx,cs,0,RI); + i =loop(bp,i,dy/4,dx,cs,1,RI); + if(ii) Break; // ~ 8 + } + /* test for horizontal symmetry ~8 */ + for (y=0;y3 && dy>3;){ // min 4x4 + DBG( wchar_t c_ask='a'; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/3 , x0+dx/3,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; + i = loop(bp,dx/2, 0 ,dy,cs,0,DO); if (i>dy/4) Break; + i+= loop(bp,dx/2, i ,dy,cs,1,DO); if (i>dy/2) Break; + i = loop(bp,dx/2, i ,dy,cs,0,DO); if (ip,cs,1) == 1 ) Break; + + if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) != 2 ) Break; + if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND + if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break; + i = loop(bp,dx/2,dy-1 ,dy,cs,0,UP); if (i>dy/3) Break; + y = i+loop(bp,dx/2,dy-1-i,dy,cs,1,UP); if (i>dy/2) Break; + // normal 'a' has a well separated vertical line right from the circle + // but fat 'a' is like a 'o', only bigger on the right side + if( num_cross(x0+dx/2-1,x1,y1 ,y1 ,box1->p,cs) < 2 /* 4x6font */ + && num_cross(x0+dx/2-1,x1,y1-i,y1-i ,box1->p,cs) < 2 /* 2 or 3 */ + && num_cross(x0+dx/2-1,x1,y1-y,y1-y ,box1->p,cs) < 2 ) + { if (loop(bp, 0,dy-1-dy/16,dx,cs,0,RI) + <4*loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE)) { Break;} + else ad=98*ad/100; + } + if( num_cross(x0,x1,y0+dy/2 , y0+dy/2,box1->p,cs) < 2 + || num_cross(x0,x1,y0+dy/3 , y0+dy/3,box1->p,cs) < 2 ) Break; // Jun00 + + if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 ) + if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/4,box1->p,cs) != 1 ) Break; + if (sdata->holes.num != 1) + if( num_hole(x0,x1-2,y0 ,y1 ,box1->p,cs,NULL) != 1 ) + // if( num_hole(x0,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) + Break; + if( num_hole(x0,x1 ,y0+dy/3,y1-1 ,box1->p,cs,NULL) != 0 ) Break; + + if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<= + loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break; + + if( loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)> dx/4 + && loop(bp,dx-1,dy-2,x1-x0,cs,0,LE)> (dx+4)/8 ) ad=97*ad/100; + + x=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); + i=loop(bp,dx-1, dy/4,dx,cs,0,LE); if (abs(x-i)>dx/4) Break; + + for( x=dx/4;xdy/2) break; + i=loop(bp,x,dy-1,y1-y0,cs,0,UP); if (i>dy/2) break; + } if( xp,cs) == 1 ) + if( num_cross(x0 , x1, y0, y0,box1->p,cs) == 1 ) + if( loop(bp,dx-1, 0,y1-y0,cs,0,DO)> dy/4 + && loop(bp,dx-1,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~o + if( loop(bp,dx/2,dy-1,y1-y0,cs,0,UP)> dy/4 ) Break; // ~q + + if (hchar) ad=98*ad/100; + if (gchar) ad=98*ad/100; + // handwritten-a (alpha) + Setac(box1,'a',ad); + break; + } + // --- test A_A_WITH_OGONEK 0x0104 Centr.Eur.Font ------------------------- + /* not sure if we should move this to a get_CentralEuropean-function */ + for(ad=d=100;dx>2 && dy>4;){ // min 3x4 + DBG( wchar_t c_ask='A'; ) + if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ + // first selection (grobes Sieb) + if( get_bw(dx/2,dx/2,dy-1-dy/8,dy-1,bp,cs,1) == 1 ) break; // ~B + if( num_cross(0,dx-1, 1 , 1 ,bp,cs)!=1 // 600dpi + && num_cross(0,dx-1, dy/8 , dy/8 ,bp,cs)!=1 + && num_cross(0,dx-1, dy/16 , dy/16 ,bp,cs)!=1 + && num_cross(0,dx-1, dy/8+1, dy/8+1,bp,cs)!=1 ) break; + if( num_cross(0,dx-1, dy-1 , dy-1 ,bp,cs)!=1 ) break; + if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs)!=2 + && num_cross(0,dx-1, dy/3 , dy/3 ,bp,cs)!=2 ) break; + if ( num_cross( 0,dx/8,dy/8, 0,bp,cs)>0 ) break; // ~R + for(y=dy/8;y 1 ) break; + if( y==dy/2 ) break; i1=y; + if (dy>20) i1++; /* get arround some noise fat font */ + + x =loop(bp,0,i1,dx,cs,0,RI); if(x>3*dx/4) break; + x+=loop(bp,x,i1,dx,cs,1,RI); if(x>3*dx/4) break; i2=x; + x+=loop(bp,x,i1,dx,cs,0,RI); if(x<3*dx/8) break; i2=(x+i2)/2; + // hole (i2,i1) + y+=loop(bp,i2,y,dy,cs,1,DO); + y+=loop(bp,i2,y,dy,cs,0,DO); if(y>3*dy/4) ad=ad*99/100; + if (y>5*dy/6) break; + + if( sdata->holes.num != 1 || sdata->holes.hole[0].y1 >= dy-1-dy/4) break; + // if( num_hole ( x0, x1, y0, y1-dy/4 ,box1->p,cs,NULL) != 1 ) break; + // out_x(box1); + i3=0;i4=0; + for(x=dx/3;x<2*dx/3;x++){ + i4=num_cross(i2,x,y ,dy-1,bp,cs);if(i4<1 || i4>2) + i4=num_cross(i2,x,y+dy/16,dy-1,bp,cs);if(i4<1 || i4>2) break; + if(i4==1) i3=x; + } if(i4<1 || i4>2 || i3==0){ +// ToDo: g_debug_A(printf(" A: x,y,i4,i3= %d %d %d %d\n",x,y,i4,i3);) + break; + } + if( get_bw(dx-1-dx/4, dx-1, dy-1-dy/4, dy-1, bp,cs,1) != 1 ) break; + /* dy/4 changed to dy/6 because of screenfonts */ + /* there are strange fonts, one has a serif on the upper end of A */ + if ( num_cross( 0,dx/8,dy/6, 0,bp,cs)>0 ) break; + if ( num_cross(dx-1-dx/4,dx-1, 0,dy/6,bp,cs)>0 ) break; + + i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); + i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); + i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); + if( 2*i2+dx/4i1+i3 ) break; + + i1=loop(bp,0 , dy/4,dx,cs,0,RI); // linke senkr. linie + i2=loop(bp,0 , dy/2,dx,cs,0,RI); + i3=loop(bp,0 ,dy-1-dy/4,dx,cs,0,RI); + if( 2*i2+dx/4i1+i3 || i1i3+dx/16) break; if( i1+12c; +} + +static wchar_t ocr0_cC(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad,t1; /* tmp-vars */ + wchar_t bc=UNKNOWN; + + // --- test c,C --------------------------------------------------- + for(ad=d=100;dx>2 && dy>2;){ // min 3x4 + DBG( wchar_t c_ask='c'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if( get_bw(x0 , x0+dx/3,y0+dy/2, y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2, x0+dx/2,y1-dy/3, y1, box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2, x0+dx/2,y0 , y0+dy/3,box1->p,cs,1) != 1 ) Break; + if( num_cross(x0,(x0+x1)/2,(y0+y1)/2,(y0+y1)/2,box1->p,cs) > 1 ) Break; // ~ocr-a-[ + + for(y=y0+dy/4;yp,cs,1) == 0 ) break; + if( y==y0+3*dy/4 ) Break; i1=y; // i1: upper end of right gap + + // measure thickness of line! + t1=loop(bp, 0,dy/2,dx,cs,0,RI); + t1=loop(bp,t1,dy/2,dx,cs,1,RI); + if (t1>dx/2) Break; + + for(y=i1,i2=0,x=x0+dx/2;xp,x0+dx/2,i1,dy,cs,0,DO); + if( i>i2 ) { i2=i; } + } if(i2p,x0+5*dx/8,i1,dy,cs,0,UP); + i =y+1-loop(box1->p,x0+4*dx/8,i1,dy,cs,0,UP); if(iy0+ dy/4+t1/2) Break; // highest + + for(y=i1;yp,cs,1) == 1 ) break; + if( y-i1p,cs) < 1 ) Break; // ~L + if (loop(box1->p,x0,y0+3*dy/4,dx,cs,0,RI)>dx/16) + if( num_cross(x0+dx/2,x1,i3 ,y1,box1->p,cs) < 1 + && num_cross(x0+dx/2,x1,y1-dy/4,y1,box1->p,cs) < 1 ) Break; // ~r + + i=1; + for(x=dx/2;x=cs + && getpixel(bp,x+1,y )< cs + && getpixel(bp,x+1,y-1)< cs + && getpixel(bp,x ,y-1)< cs ) { i=0;break; } + } + if(!i) ad=95*ad/100; // ~G + + i=loop(bp,0,dy/2,dx,cs,0,RI); + for(y=0;y=dy/4;y--){ + x =loop(bp,0,y,dx,cs,0,RI); + x+=loop(bp,x,y,dx,cs,1,RI); if(x>i5) i5=x; + i =loop(bp,x,y,dx,cs,0,RI); if(ii4+dx/32 ) break; // unusual for c, more a bad e? + } if( y>=dy/4 ) Break; + + if( !hchar ){ // test for e where the middle line is partly removed + x= loop(bp,0,dy/2,dx,cs,0,RI); + x=x +loop(bp,x,dy/2,dx,cs,1,RI); + y=dy/2-loop(bp,x,dy/2,dy,cs,0,UP)-1; + i=x +loop(bp,x,y,dx,cs,1,RI); + i=i +loop(bp,i,y,dx,cs,0,RI); + if( num_cross(x ,x ,1,dy/2,bp,cs) > 1 + || num_cross(x+1,x+1,1,dy/2,bp,cs) > 1 ) + if( num_cross(i-1,i-1,1,dy/2,bp,cs) > 1 + || num_cross(i ,i ,1,dy/2,bp,cs) > 1 ) Break; // ~bad e + } + if( dy>16 && dy>3*dx && hchar ){ // ~[ + x= loop(bp,0, dy/16,dx,cs,0,RI); + x=+loop(bp,0,dy-1-dy/16,dx,cs,0,RI); + i= loop(bp,0, dy/2 ,dx,cs,0,RI)*2; + if( i>=x ) + if( num_cross(0,dx-1,dy/4,dy/4,bp,cs) < 2 ) Break; + + } + if( get_bw(x0,x0,y0 ,y1 ,box1->p,cs,2) != 2 + && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 + && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 + && get_bw(x1,x1,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~[ */ + + x =loop(bp, 0,dy/2,dx,cs,0,RI); + i =loop(bp,dx-1,dy/2,dx,cs,0,LE); + if( (i7 ) + if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8 + && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8 + && loop(bp,dx-1,dy-1-dy/ 8,dx,cs,0,LE) + > loop(bp,dx-1,dy-1-dy/16,dx,cs,0,LE) + && loop(bp,dx-1, dy/ 8,dx,cs,0,LE) + > loop(bp,dx-1, dy/16,dx,cs,0,LE) ) Break; // ~( + +// printf(" hchar=%d i1=%d i2=%d %d\n",hchar,i1-y0,i2-y0,9*dy/16); + // ~G without characteristic crotchet + if (hchar && dy>15 && dx>7 && i2-y0<9*dy/16 && i1-y0<=dy/4) + if ( loop(bp,5*dx/8,i2-y0,dy,cs,0,DO) > 2*dy/8 ){ + Setac(box1,'G',90); + Break; + } + + if (hchar){ + i=1; + for(x=dx/2;x=cs + && getpixel(bp,x+1,y )< cs + && getpixel(bp,x+1,y-1)< cs + && getpixel(bp,x ,y-1)< cs ) { i=0;break; } + } + if (i) ad=98*ad/100; // ~( + if (dy>2*dx) ad=99*ad/100; + } + if( loop(bp,dx-1,dy/2,dx,cs,0,LE) < 6*dx/8 ) ad=98*ad/100; + + i= loop(bp,dx-1,dy/16,dx,cs,0,LE); + j= loop(bp,dx/2,0 ,dy,cs,0,DO); + if (i>=dx/2 && j>dy/8 && j>2 && j=3*dx && dy>12) ad=99*ad/100; // ( + i= loop(bp,dx-1,dy-1,dy,cs,0,UP); + j= loop(bp,dx/2,dy-1,dy,cs,0,UP); + if (i==0 && j>dy/8) ad=95*ad/100; // < + i= loop(bp,dx-1, 0,dy,cs,0,DO); + j= loop(bp,dx/2, 0,dy,cs,0,DO); + if (i==0 && j>dy/8) ad=95*ad/100; // < + if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>= 3*dx/4) ad=98*ad/100; // < + if (loop(bp,0,dy-1-dy/8,dx,cs,0,RI)>=(dx+1)/2) ad=98*ad/100; // < + if (loop(bp,0, dy/8,dx,cs,0,RI)>=dx/2) ad=98*ad/100; // < + + if (gchar) ad=98*ad/100; // could happen for 5x7 font + bc=((hchar)?'C':'c'); + Setac(box1,bc,ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_lL(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,i0,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test L --------------------------------------------------- + for(ad=d=100;dx>2 && dy>4;){ // min 3x4 + DBG( wchar_t c_ask='L'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + i=loop(bp,dx-1,dy/2,dx,cs,0,LE); + if (i<3 && dy>8) {Break;} + if (ip,x0 ,y,dx,cs,0,RI); + j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ){ i=j;i1=y; } + } if( i<3*dx/4 ) Break; i1=i; // length of horizontal line + // line thickness (i2) + i=loop(box1->p,x0 ,y0+dy/2,dx,cs,0,RI); if( i>dx/2 ) Break; + j=loop(box1->p,x0+i,y0+dy/2,dx,cs,1,RI); if( i+j>dx/2 ) Break; i2=j; + if (loop(bp,dx-1, 0,dx,cs,0,LE)dx/2 + && loop(bp, 0,5*dy/8,dx,cs,0,RI)p,x0 ,y,dx,cs,0,RI); + if ( j>(dx+2)/4+(y1-dy/4-y)*dx/2/dy ) { i=0; break; } + x=loop(box1->p,x0+j,y,dx,cs,1,RI); + if( ((x>i2+1 || 4*x<3*i2) && y>y0+dy/8) || 4*x>3*i1 ) i=0; + } if( !i ) Break; + if( num_cross(0, dx-1-dx/8, dy-1-dy/2, dy-1-dy/2,bp,cs) != 1 ) Break; + if( num_cross(0, dx-1 , dy/3 , dy/3,bp,cs) != 1 ) Break; + if( num_cross(0, dx-1 , dy/8 , dy/8,bp,cs) != 1 ) Break; + if (loop(bp,0,dy-1,dx,cs,0,RI) + -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c + if (loop(box1->p,x0+dx/4,y1,dy,cs,0,UP)>1+dy/16) ad=99*ad/100; // ~4 + + if ( gchar) ad=98*ad/100; + if (!hchar) ad=99*ad/100; + if (5*dx<2*dy && loop(box1->p,x0,y1,dx,cs,0,RI)>dx/4) ad=99*ad/100; // ~l + Setac(box1,'L',ad); + break; + } + // --- test l --------------------------------------------------- + // recognize a "l" is a never ending problem, because there are lots of + // variants and the char is not very unique (under construction) + // --- test italic l --------------------------------------------------- + // --- test l ~italic (set flag-italic) -------------------------------- + // if unsure d should be multiplied by 80..90% + for(ad=d=100; dy>dx && dy>5;){ // min 3x4 + DBG( wchar_t c_ask='l'; ) + if( box1->dots>0 ) Break; + if( num_cross(0, dx-1,dy/2,dy/2,bp,cs) != 1 + || num_cross(0, dx-1,dy/4,dy/4,bp,cs) != 1 ) Break; + // mesure thickness + for(i1=0,i2=dx,y=dy/4;yi1 ) { i1=j; } // thickest + if( j2*i2 ) Break; + if(box1->m3 && dy<=box1->m3-box1->m2) ad=94*ad/100; + if( box1->m2-box1->m1>1 && y0>=box1->m2 ) ad=94*ad/100; + for(i0=0,i3=0,y=0;yi3 ) { i3=j; } // widest space + j = loop(bp,j,y,dx,cs,1,RI); + if( j>i0 ) { i0=j;i3=0; } // thickest + } + if ( i0>4*i2 || 3*i3>2*dx) + if ( loop(bp,dx-1,dy-1,dx,cs,0,LE)>3*dx/8 + || loop(bp, 0,dy-1,dx,cs,0,RI)>3*dx/8) Break; // ~7 + + // detect serifs + x =loop(bp,0, 0,dx,cs,0,RI); + i3=loop(bp,x, 0,dx,cs,0,RI); + x =loop(bp,0, 1,dx,cs,0,RI); + x =loop(bp,x, 1,dx,cs,0,RI); if(x>i3) i3=x; + x =loop(bp,0,dy-1,dx,cs,0,RI); + i4=loop(bp,x,dy-1,dx,cs,0,RI); + x =loop(bp,0,dy-2,dx,cs,0,RI); + x =loop(bp,x,dy-2,dx,cs,0,RI); if(x>i4) i4=x; + if( i3>i1+dx/8+1 && i4>i1+dx/8+1 ) Break; // ~I + + for(i=dx,j=0,y=1;yi+1) break; i=x; + if( num_cross(0,dx-1,y ,y ,bp,cs)==2 + && num_cross(0,dx-1,y+1+dy/32,y+1+dy/32,bp,cs)==2 ) j=1; + } if ( y3) + if( get_bw(dx-1-dx/8,dx-1,0,dy/6,bp,cs,1) != 1 ) + if( get_bw(dx-1-dx/8,dx-1,0,dy/2,bp,cs,1) == 1 ) Break; + + if( get_bw(dx-1-dx/8,dx-1,dy/4,dy/3,bp,cs,1) != 1 ) // large I ??? + if( get_bw(0 ,dx/8,dy/4,dy/3,bp,cs,1) != 1 ) + if( get_bw(dx-1-dx/8,dx-1,0 ,dy/8,bp,cs,1) == 1 ) + if( get_bw(0 ,dx/8,0 ,dy/8,bp,cs,1) == 1 ) ad=ad*97/100; + if( get_bw(dx-1-dx/8,dx-1,dy/2,dy-1,bp,cs,1) != 1 ) // r ??? + if( get_bw(0 ,dx/8,dy/2,dy-1,bp,cs,1) == 1 ) + if( get_bw(dx-1-dx/8,dx-1,0 ,dy/3,bp,cs,1) == 1 ) + if( get_bw(0 ,dx/8,0 ,dy/3,bp,cs,1) == 1 ) Break; + + for( y=1;y<12*dy/16;y++ ) + if( num_cross(0, dx-1, y , y ,bp,cs) != 1 // sure ? + && num_cross(0, dx-1, y-1, y-1,bp,cs) != 1 ) break; + if( y<12*dy/16 ) Break; + + if(dx>3){ + for( y=dy/2;yy-1-5*dy/16;y>=dy/5;y--){ // rechts abfallende Kante/Knick? + i=loop(bp,bp->x-1,y,x1-x0,cs,0,LE); + if( i-2-dx/16>=x ) break; + if( i=dy/5 ) Break; + + // test ob linke Kante gerade + for(x=0,y=bp->y-1-dy/5;y>=dy/5;y--){ // rechts abfallende Kante/Knick? + i=loop(bp,0,y,x1-x0,cs,0,RI); + if( i+2+dx/16x ) x=i; + } + if (y>=dy/5 ) Break; + if (box1->m4 && y1m4) + if ( get_bw(x0,x1,y1+1,box1->m4+dy/8,box1->p,cs,1) == 1 ) + ad=ad*97/100; // unsure !l| + i=loop(bp,dx-1,dy/16,dx,cs,0,LE); + j=loop(bp,dx-1,dy/2 ,dx,cs,0,LE); + if( i>3 && j>3 ) + if( get_bw(dx-1-i/2,dx-1-i/2,0,dy/2,bp,cs,1) == 1 ) Break; // ~t + + for(y=5*dy/8;y8 + && loop(bp, 0,3*dy/4,dx,cs,0,RI)>=dx/4 + && loop(bp, 0,7*dy/8,dx,cs,0,RI)<=dx/8 + && loop(bp,dx-1,3*dy/4,dx,cs,0,LE)<=dx/8 + && loop(bp,dx-1,7*dy/8,dx,cs,0,LE)<=dx/8 ) Break; // ~J + + if ( 2*i3>5*i1 ) // hmm \tt l can look very similar to 7 + if ( loop(bp,0,dy/4,dx,cs,0,RI)>dx/2 + && get_bw(0,dx/8,0,dy/4,bp,cs,1) == 1 ) Break; // ~7 + + if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/2 + && get_bw(3*dx/4,dx-1,3*dy/4,dy-1,bp,cs,1) == 1) { + if (loop(bp,0,dy-1,dx,cs,0,RI)2*dy) ad=99*ad/100; // ~L + if(5*dx>3*dy) ad=99*ad/100; // ~L + } + if(!hchar){ // right part (bow) of h is never a l + if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1 + && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break; + } + if( dx>3 && dy>3*dx ) + if( loop(bp,dx/4,dy-1 ,dy,cs,0,UP)< dy/4 + && loop(bp, 0,dy-1-dy/8,dx,cs,0,RI)>=dx/2 + && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE)<=dx/4 ){ + ad=98*ad/100; // ~] + if ( loop(bp,dx-1,dy/2,dx,cs,0,LE)==0 ) Break; + } + + for(x=0;xi ) break; + } + if( x>=loop(bp,0,y+1,dx,cs,0,RI) ) + if( loop(bp,0 ,0,dy,cs,0,DO)>1 ) + if( loop(bp,0 ,0,dy,cs,0,DO) + - loop(bp,dx/16+1,0,dy,cs,0,DO) < dx/16+1 ) Break; // ~1 Jul00,Nov00 + if( num_cross(0,dx/2,y-1,y-1,bp,cs)==2 ) Break; // ~1 + } + if(dx<8 && dy<12){ // screen font + i= loop(bp,0,0,dy,cs,0,DO); + if( loop(bp,dx/2,1,dy,cs,1,DO)>=dy-2 + && loop(bp,0,dy/2,dx,cs,0,RI)>=2 + && i>1 && ip,cs,2) != 2 + && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 + && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 + && get_bw(x0,x0+dx/4,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~] */ + i=loop(bp,dx-1,dy/2,dx,cs,0,LE); + if( loop(bp, 0,dy/2,dx,cs,0,RI)>=dx/2 + && (ip,cs,2) != 2 + && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 + && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 + && get_bw(x1-dx/4,x1,y0+1+dy/16,y1-1-dy/16,box1->p,cs,1) != 1 ) Break; /* ~[ */ + + x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~() + i =loop(bp,dx-1,dy/2,dx,cs,0,LE); + if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8 + && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8 + && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8 + && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~( + if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8 + && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8 + && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8 + && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~) + + i= loop(bp, 0, 0,dy,cs,0,DO); // horizontal line? + if(dy>=12 && i>dy/8 && iloop(bp,dx-1, i,dx,cs,0,LE) + || loop(bp,dx-1,3*dy/16,dx,cs,0,LE)-dx/8 + >loop(bp,dx-1, i+1,dx,cs,0,LE) ) + if( loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8 + >loop(bp,dx-1, i,dx,cs,0,LE) + || loop(bp,dx-1,8*dy/16,dx,cs,0,LE)-dx/8 + >loop(bp,dx-1, i+1,dx,cs,0,LE) ) + if( loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8 + >loop(bp, 0, i,dx,cs,0,RI) + || loop(bp, 0,3*dy/16,dx,cs,0,RI)-dx/8 + >loop(bp, 0, i+1,dx,cs,0,RI) ) + if( loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8 + >loop(bp, 0, i,dx,cs,0,RI) + || loop(bp, 0,8*dy/16,dx,cs,0,RI)-dx/8 + >loop(bp, 0, i+1,dx,cs,0,RI) ) Break; // ~t + if( loop(bp, 0,i-1,dx,cs,0,RI)>1 && dx<6 ) Break; // ~t + if( loop(bp, 0,8*dy/16,dx,cs,0,RI)>dx/8 + && loop(bp, 0, i,dx,cs,1,RI)>=dx-1 + && loop(bp,dx-1,8*dy/16,dx,cs,0,LE)>dx/8 + && loop(bp,dx-1, i-1,dx,cs,0,LE)>dx/8 ) Break; // ~t + } +// if( vertical_detected && dx>5 ) + if( loop(bp,0, 1,dx,cs,0,RI)>=dx/2 + && ( loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8 + || loop(bp,0,dy-1,dx,cs,0,RI)<=dx/8 ) ) + if( ( loop(bp,dx-1, 0,dx,cs,0,LE)<=dx/8 + || loop(bp,dx-1, 1,dx,cs,0,LE)<=dx/8 ) + && loop(bp,dx-1,dy-2,dx,cs,0,LE)>=dx/2 ) ad=98*ad/100; // ~/ + + if( get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0 ) ad=99*ad/100; + + if (!hchar || loop(bp,0,dy/4,dx,cs,0,RI)>dx/2){ // ~z + i=loop(bp,0,dy/16 ,dx,cs,0,RI); + i=loop(bp,i,dy/16 ,dx,cs,1,RI); j=i; + i=loop(bp,0,dy/16+1,dx,cs,0,RI); + i=loop(bp,i,dy/16+1,dx,cs,1,RI); if (i>j) j=i; + i=loop(bp,0,dy/16+2,dx,cs,0,RI); + i=loop(bp,i,dy/16+2,dx,cs,1,RI); if (i>j) j=i; + if (j*4>=dx*3) ad=98*ad/100; // ~z + if (j*8>=dx*7) ad=96*ad/100; // ~z + } + + if( get_bw(x0,x0,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100; + if( get_bw(x1,x1,y1,y1,box1->p,cs,2) == 0 ) ad=99*ad/100; + if (ad==100) ad--; /* I have to fix that: + .@@@@.<- + @@..@@ + ....@@ + ....@@< + ...@@. + ..@@@. + ..@@.. + .@@... + @@.... + @@@@@@<- + */ + if(!hchar) ad=ad*99/100; + if( gchar) ad=ad*99/100; + Setac(box1,'l',ad); +// if( i<100 ) Break; ???? +// if( loop(bp,0, 1,dx,cs,0,RI)<=dx/8 +// && loop(bp,0,dy/2,dx,cs,0,RI)<=dx/8 +// && loop(bp,0,dy-2,dx,cs,0,RI)<=dx/8 ) vertical_detected=1; + break; + } + return box1->c; +} + +static wchar_t ocr0_oO(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + wchar_t bc=UNKNOWN; + + // --- test o,O --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='o'; ) + if (sdata->holes.num !=1 ) Break; + if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2 , x0+dx/2,y1-dy/2 , y1, box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/2 , y1-dy/3,box1->p,cs,1) != 0 ) Break; + if (sdata->holes.hole[0].y0 > dy/3 + || sdata->holes.hole[0].y1 < dy-1-dy/3) Break; + + if( num_cross(x0+dx/2 ,x0+dx/2 ,y0, y1 ,box1->p,cs) != 2 + && num_cross(x0+dx/2+1,x0+dx/2+1,y0, y1 ,box1->p,cs) != 2 ) Break; + if( num_cross(x0+dx/3,x1-dx/4,y0 , y0 ,box1->p,cs) != 1 ) // AND + if( num_cross(x0+dx/3,x1-dx/4,y0+1 , y0+1,box1->p,cs) != 1 ) Break; + if( num_cross(x0+dx/4,x1-dx/3,y1 , y1 ,box1->p,cs) != 1 ) // against "rauschen" + if( num_cross(x0+dx/4,x1-dx/3,y1-1 , y1-1,box1->p,cs) != 1 ) Break; + if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) + if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; + if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) + if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; + + if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<= + loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break; + + x=loop(bp,dx-1,dy-1-dy/3,x1-x0,cs,0,LE); // should be minimum + for( y=dy-1-dy/3;ydx/8 ) + if( loop(bp,0 , dy/16,dx,cs,0,RI)dx/8 ) + if( loop(bp,0 ,dy-1-dy/16,dx,cs,0,RI)p,cs,1) == 0 + && get_bw(x1-dx/32,x1,y1-dy/32,y1,box1->p,cs,1) == 0 +// && ( get_bw(x0,x0+dx/32,y0,y0+dy/32,box1->p,cs,1) == 1 + && ( get_bw(0,dx/32,0,dy/32,bp,cs,1) == 1 + || get_bw(x0,x0+dx/32,y1-dy/32,y1,box1->p,cs,1) == 1 ) ) Break; // ~D + + // search lowest inner white point + for(y=dy,j=x=0;x 1 ) ad=99*ad/100; // ~a \it a + for(y=0;y 2 ) ad=98*ad/100; // ~a \it a + if (loop(bp,dx-1,dy-1,x1-x0,cs,0,LE)dy/8 + || num_cross(0,dx-1, 0, 0,bp,cs) > 1 + || num_cross(0,dx-1,dy-1,dy-1,bp,cs) > 1 + ) ad=98*ad/100; // ~bq + + if( hchar && 2*y0m1+box1->m2 ) i=1; else i=0; + if (gchar) ad=99*ad/100; + bc='o'; + if( i ){ bc='O'; } + if ( bc=='O' && ad>99) ad=99; /* we can never 100% sure, 0O */ + Setac(box1,bc,ad); + if (bc=='O') Setac(box1,'0',ad); + if (bc=='o') Setac(box1,'0',98*ad/100); + break; + } + return box1->c; +} + +static wchar_t ocr0_pP(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,i1,i2,i3,i4,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + wchar_t bc=UNKNOWN; + + // --- test pP --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='p'; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + if( get_bw(0 , dx/2,3*dy/4,3*dy/4,bp,cs,1) != 1 ) Break; + if( get_bw(0 , dx/2, dy/2, dy/2,bp,cs,1) < 1 ) Break; + if( get_bw(dx/4, dx-1, dy/4, dy/4,bp,cs,1) != 1 ) Break; + i= loop(bp,dx-1,3*dy/4,dx,cs,0,LE); if (ip,cs) != 2 ) + if( num_cross(x0+dx/2 ,x0+dx/2 , y0, y1-3*dy/16,box1->p,cs) != 2 ) + if( num_cross(x0+dx/2+1,x0+dx/2+1, y0, y1-3*dy/16,box1->p,cs) != 2 ) Break; + if( num_cross(0,dx-1,7*dy/8 ,7*dy/8 ,bp,cs) != 1 ) + if( num_cross(0,dx-1,7*dy/8-1,7*dy/8-1,bp,cs) != 1 ) Break; + if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 ) + if( num_cross(0,dx-1, dy/4-1, dy/4-1,bp,cs) != 3 ) // \it p with nice kurve + if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 ) + if( num_cross(0,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break; + + i= loop(bp,0,dy/2,dx,cs,0,RI); if(i<1) i++; + if( num_cross(i-1,dx-1, dy/4 , dy/4 ,bp,cs) != 2 ) + if( num_cross(i-1,dx-1, dy/4+1, dy/4+1,bp,cs) != 2 ) Break; + + i1= loop(bp, 0,3*dy/8,dx,cs,0,RI); if (i1>=dx/2) ad=90*ad/100; + i2=i1+loop(bp,i1,3*dy/8,dx,cs,1,RI); // upper x-position of v line + i3= loop(bp, 0,7*dy/8,dx,cs,0,RI); + i4=i3+loop(bp,i3,7*dy/8,dx,cs,1,RI); // lower x-position of v line + // out_x(box1);printf(" p:"); + for ( y=dy/8; y<7*dy/8; y++ ){ + x=i2+ (8*y-3*dy)*(i4-i2)/(4*dy); // right limit of line + i= loop(bp,0,y,dx,cs,0,RI); if(i>x+dx/16) break; + } if ( y<7*dy/8 ) Break; + for ( x=0,j=y=dy/3; yx ) { x=i; j=y; } if(x>dx/2) break; + } if ( x=dx) Break; + if( get_bw(3*dx/4,dx-1, y , dy-1,bp,cs,1) == 1 ) Break; + + i=num_hole (x0,x1,y0,y1-dy/5,box1->p,cs,NULL); + // j=num_hole (x0,x1,y0,y1 ,box1->p,cs,NULL); + j=sdata->holes.num; + + if (j!=1 && dx< 8) ad=96*ad/100; + if (j!=1 && dx>=8) ad=98*ad/100; + if (i==0 && j==0) ad=90*ad/100; /* some times there is a small gap */ + if (i>1 || j>1 || j>i) Break; + + // check for serif F + i= loop(bp,bp->x-1, bp->y/4, dx ,cs,0,LE); + i=i+loop(bp,bp->x-1-i,bp->y/4, dx ,cs,1,LE); + j= loop(bp,bp->x-1-i,bp->y/4,3*dy/4,cs,0,DO); + if (j>dy/2) ad=80*ad/100; // its an serif-F + + if( ((!hchar) && (!gchar)) || (hchar && gchar)) ad=95*ad/100; + bc='p'; + if( hchar && ((!gchar) || dy<14)) bc='P'; + if ( hchar && gchar) ad=98*ad/100; // \ss sz + if ((!hchar) && !gchar) ad=98*ad/100; + + Setac(box1,bc,ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_qQ(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad; /* tmp-vars */ + + // --- test Q --------------------------------------------------- + for(ad=d=100;dx>2 && dy>4;){ // min 3x4 + DBG( wchar_t c_ask='Q'; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + if( get_bw(x0 ,x0+dx/3,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1-dx/3,x1 ,y0+dy/3,y0+dy/3,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2,x0+dx/2,y1-dy/3,y1, box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2,x0+dx/2,y0 ,y0+dy/4,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2,x0+dx/2,y0+dy/3,y1-dy/2,box1->p,cs,1) == 1 ) Break; + if( get_bw(x1 ,x1 ,y0 ,y0 ,box1->p,cs,1) == 1 ) Break; //alpha + if( num_cross(x0+dx/2,x0+dx/2,y0 , y1 ,box1->p,cs) < 2 ) Break; + if( num_cross(x0+dx/5,x1-dx/5,y0 , y0 ,box1->p,cs) != 1 ) // AND + if( num_cross(x0+dx/5,x1-dx/5,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break; + if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) + if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; + if( get_bw(x1 ,x1 ,y1-dy/8 , y1 ,box1->p,cs,1) == 0 ) + if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) + if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; + // i=num_hole(x0,x1,y0,y1,box1->p,cs,NULL); + i=sdata->holes.num; + if(!i) Break; + if( i!=1 && (i!=2 || num_hole(x0,x1,y0+dy/2,y1,box1->p,cs,NULL)!=1) ) Break; + x=x1;y=y1; + turmite(box1->p,&x,&y,x0,x1,y0,y1,cs,LE,ST); if( xp,&x,&y,x0,x1,y0,y1,cs,ST,LE); + if( x 5*dx/8 ) Break; // ~4 Okt00 + + x= loop(bp,dx-1,3*dy/8,dy,cs,0,LE); if( x>dx/4 ) Break; + if( loop(bp,dx-1-x,0 ,dy,cs,0,DO) + <= loop(bp,dx-2-x,0 ,dy,cs,0,DO) ) Break; // 4 + + if( loop(bp,dx-1,dy-2,dx,cs,0,LE) + <= loop(bp,dx-1,dy/2,dx,cs,0,LE) ) + if( loop(bp, 1,dy-1,dy,cs,0,UP) + <= loop(bp,dx/2,dy-1,dy,cs,0,UP) ) + if( loop(bp, 0,dy-2,dx,cs,0,RI)>dx/2 ) + if( loop(bp, 0, 0,dx,cs,0,RI)>dx/2 ) Break; // 4 + + if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE) + + loop(bp, 0,3*dy/4,dx,cs,0,RI) + < loop(bp,dx-1,2*dy/4,dx,cs,0,LE) + + loop(bp, 0,2*dy/4,dx,cs,0,RI) ) ad=94*ad/100; // 4 + if( loop(bp,0 ,3*dy/4,dx,cs,1,RI) >= dx ) ad=94*ad/100; // 4 + + + if( loop(bp,dx-1,dy/3,dx,cs,0,LE)> dx/4 ) Break; + j=loop(bp,dx/2,dy-1,dy,cs,0,UP); + if (j>1 && j>dy/8) { + if( get_bw(0,dx/2,dy-1-j/2,dy-1-j/2,bp,cs,1) == 1 ) { // ~RA + if (j<5) ad=95*ad/100; + else Break; + } + } + + // italic a + for(i=0,y=0;y 2 ) i++; if(i>dy/8) Break; // ~a \it a + if (i>0) ad=99*ad/100; + + // ~o look at the lower right side for falling line + for(j=x=0,y=dy/2;yx){ x=i; } + if (x-i>j) j=x-i; + if( j>dx/16 ) Break; // falling line detected + } + if (j==0) Break; // no falling line => no Q + if (j<=dx/16) ad=98*ad/100; + if(y1<=box1->m3) ad=98*ad/100; // ~q no underlength! rare + if(!hchar) ad=96*ad/100; + Setac(box1,'Q',ad); + break; + } + // --- test q --------------------------------------------------- + for(ad=d=100;dx>2 && dy>3;){ // min 3x4 + DBG( wchar_t c_ask='q'; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + for ( y=y0; 2*y<=y0+y1; y++ ){ // detect ring + if( num_cross(x0,x1, y, y,box1->p,cs) == 2 ) Break; + } if (2*y>y0+y1) Break; /* < */ + for ( y=(y0+y1)/2; y<=y1; y++ ){ // detect vert line + if( num_cross(x0, x1, y, y,box1->p,cs) == 1 + && num_cross(x0,x0+dx/2, y, y,box1->p,cs) == 0 ) Break; + } if (y>y1) Break; /* O (y==y1 for 4x6font-q) */ + for ( x=0,j=y=y0+dy/3; y<=y1-dy/8; y++ ){ // detect baseline + i=loop(box1->p,x0,y,dx,cs,0,RI); + if ( i>x ) { x=i; j=y; } + if ( x>dx/2 ) break; + } if ( x=dx) Break; + if (y1-j+1p,cs) != 0 ) ad=96*ad/100; // ~g + if( loop(box1->p,x0+dx/16,j,dy,cs,0,UP)<1+dy/16 ){ + ad=97*ad/100; + if (hchar || !gchar) Break; // 4 + } + if( loop(box1->p,x0+dx/16,j-dy/32-1,dy,cs,1,RI)>=dx-dx/8 + || loop(box1->p,x0+dx/16,j-dy/16-1,dy,cs,1,RI)>=dx-dx/8 ){ + ad=96*ad/100; // 4 + } + if( get_bw(x1-dx/3, x1, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0, x0+dx/3, y0+dy/3, y0+dy/3,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0, x0+dx/4, y1-dy/8, y1-dy/9,box1->p,cs,1) == 1 ) Break; + if( get_bw(x0, x0+dx/4, y1-dy/5, y1-dy/9,box1->p,cs,1) == 1 ) ad=99*ad/100; + if( num_cross(x0+dx/2,x0+dx/2, y0, j ,box1->p,cs) != 2 ) Break; + // if( num_hole (x0 ,x1 , y0, y1 ,box1->p,cs,NULL) != 1 ) + if (sdata->holes.num != 1) + { if (dx<16) ad=98*ad/100; else Break; } + if( num_hole (x0 ,x1 , y0, j ,box1->p,cs,NULL) != 1 ) + { if (dx<16) ad=98*ad/100; else Break; } + // ~\it g + if( loop(bp,0,dy-1-dy/4,dx,cs,0,RI)>5*dx/8 + && get_bw(dx/4,dx/4,dy-1-dy/4,dy-1,bp,cs,1)==1 ) Break; // ~\it g + // what about unsure m1-m4? + if(!gchar){ ad=ad*99/100; } // ~4 + if( hchar){ ad=ad*99/100; } // ~49 + Setac(box1,'q',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_iIjJ(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,i1,i2,i3,i4,i5,hchar=sdata->hchar,gchar=sdata->gchar, + ax,ay,bx,by,cx,cy,ex,ey, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ad,ya,yb,yc,yd,ye,yf,xa,xb, /* tmp-vars */ + (*aa)[4]=sdata->aa; /* the for line ends, (x,y,dist^2,vector_idx) */ + + // --- test i --------------------------------------------------- + // if(box1->dots==1) // what about \it neighbouring ij + for(ad=d=100;dy>3 && dx>0;){ // min 3x4 without dot + DBG( wchar_t c_ask='i'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + // ToDo: ':' check that high of dot is smaller than the vert. line! + /* + * o <== ya + * o + * + * ooo <== yb + * o + * o + * o + * ooo + */ + ya=y0; + if (box1->dots!=1) ad=98*ad/100; + while(dy>3*dx && box1->m2){ // test for vertical i without detected dot + i= loop(bp,dx/2,dy-1 ,dy,cs,0,UP); + if (dy-1-im3-2) break; + i+=loop(bp,dx/2,dy-1-i,dy,cs,1,UP); + // distance upper end to m2 > (m2-m1)/3 + if (3*abs(dy-1-i-box1->m2)>box1->m2-box1->m1) break; + if( get_bw(x0,x1,y0,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 ) + if( get_bw(x0,x1,y1-i ,y1-i ,box1->p,cs,1) == 0 + || get_bw(x0,x1,y1-i-1,y1-i-1,box1->p,cs,1) == 0 + || get_bw(x0,x1,y1-i-2,y1-i-2,box1->p,cs,1) == 0 ) + { + Setac(box1,'i',ad); + return 'i'; /* beleave me, thats an "i"! */ + } break; + } +// if( box1->dots!=1 ) Break; + if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1; + +// out_x(box1); + for (y=ya;2*yp,cs,1) == 1 ) break; + if (2*y>=ya+y1) Break; // hmm, gap only, no dot? + ya=y; + if (box1->m2 && ya>box1->m2+2) Break; + for ( ;2*yp,cs,1) != 1 ) break; + if (2*y>=ya+y1) Break; // hmm no gap + for ( ;2*yp,cs,1) == 1 ) break; + yb=y; + if (5*yb>=3*ya+2*y1) ad=99*ad/100; // large gap + if (2*yb>= ya+ y1) ad=97*ad/100; // very large gap, ~: + if (5*yb>=2*ya+3*y1) Break; // huge gap, ~: + if (loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2) // unusual (right part of ouml) + ad=95*ad/100; + + // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs)); + // printf(" dots=%d\n",box1->dots); out_x(box1); + // \sl ~f. ! + for (y=y1;y>ya;y--) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break; + if (y>(ya+3*y1)/4) Break; + if (y>(ya+2*y1)/3) ad=96*ad/100; + + y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */ + if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) Break; + for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y; + for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y; + if( yd<3*(y1-yb+1)/4+yb-y0 ) Break; + y=(y1-yb+1)/2+yb-y0; + for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y; + for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y; + if( yf>(y1-yb+1)/4+yb-y0 ) Break; + if(yd>yc+2){ + xa=loop(bp, 0,yc-1,dx,cs,0,RI); + xb=loop(bp,dx-1,yc-1,dx,cs,0,LE); + if( + xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */ + > xa-loop(bp, 0,yc,dx,cs,0,RI) ){ + y= loop(bp,dx-xb,yc-1,dy,cs,0,DO); + if(y>0){ + i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO); + if( i>0 ) y+=i-1; + } + if( yc-1+y < yd-1 ) Break; + } else { + y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO); + if( yc-1+y < yd-2 ) Break; + } + } + if(yf0 ) y+=i-1; + if( ye+1-y > yf+1 ) Break; + } + if( 2*y0 <= box1->m1+box1->m2 + && loop(bp,0, 0,dx,cs,0,RI)+1 + < loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100; + + if( gchar ) // i is more often than j, be sure that realy correct Mai00 + if( loop(bp, 0,2*dy/4,dx,cs,0,RI) + -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)>dx/8 ) Break; + + // could be a broken + or similar thing? + if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=90*ad/100; + + if( loop(bp,dx-1,3*dy/4,dx,cs,0,LE)>dx/2 + && loop(bp,dx-1, dy-1,dx,cs,0,LE)5 && num_cross(x0+dx/2,x0+dx/2, ya, y1 ,box1->p,cs) >= 3 ) + ad=95*ad/100; + + Setac(box1,'i',ad); + break; + } + // --- test j --------------------------------------------------- + // if(box1->dots==1) // what about \it neighbouring ij + for(ad=d=100;dy>4 && dx>0;){ // min 3x4 + DBG( wchar_t c_ask='j'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + ya=y0; + if( box1->m2 && 2*y0>=box1->m2+box1->m1 ) ya=box1->m1; + + for(y=ya;2*yp,cs,1) == 1 ) break; + if(2*y>=ya+y1) Break; // hmm only gap + ya=y; + if( box1->m2 && ya>box1->m2+2 ) Break; + for( ;2*yp,cs,1) != 1 ) break; + if(2*y>=ya+y1) Break; // hmm no gap + for( ;2*yp,cs,1) == 1 ) break; + if(2*y>=ya+y1) Break; // hmm very large gap + yb=y; + if( loop(bp,dx-1,y+(y1-ya+1)/32,dx,cs,0,LE)>dx/2 ) Break; // unusual (right part of ouml) + + // printf(" num_cross dy/2=%d %d\n",dy/2, num_cross(0,dx-1,dy/2,dy/2,bp,cs)); + // printf(" dots=%d\n",box1->dots); out_x(box1); + // \sl ~f. ! + for(y=(ya+y1)/2;y<=y1;y++) if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) break; + if(y<=y1) Break; + + y=(y1-yb+1)/2+yb-y0; /* only one vertical line, italic i is more an tall S */ + if( num_cross(0,dx-1,y,y,bp,cs) >2 ) Break; + for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } yc=y; + for(;y<=y1-y0;y++){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yd=y; + if( yd<3*(y1-yb+1)/4+yb-y0 ) Break; + y=(y1-yb+1)/2+yb-y0; + for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) break; } ye=y; + for(;y>0;y--){ if( num_cross(0,dx-1,y,y,bp,cs) != 2 ) break; } yf=y; + if( yf>(y1-yb+1)/4+yb-y0 ) Break; + if(yd>yc+2){ + xa=loop(bp, 0,yc-1,dx,cs,0,RI); + xb=loop(bp,dx-1,yc-1,dx,cs,0,LE); + if( + xb-loop(bp,dx-1,yc,dx,cs,0,LE) /* Dec00 */ + > xa-loop(bp, 0,yc,dx,cs,0,RI) ){ + y= loop(bp,dx-xb,yc-1,dy,cs,0,DO); + if(y>0){ + i=loop(bp,dx-xb-1,yc-1+y-1,dy,cs,0,DO); + if( i>0 ) y+=i-1; + } + if( yc-1+y < yd-1 ) Break; + } else { + y= loop(bp,11*xa/16,yc-1,dy,cs,0,DO); + if( yc-1+y < yd-2 ) Break; + } + } + if(yf0 ) y+=i-1; + if( ye+1-y > yf+1 ) Break; + } + if( 2*y0 <= box1->m1+box1->m2 + && loop(bp,0, 0,dx,cs,0,RI)+1 + < loop(bp,0,dx/2,dx,cs,0,RI) ) ad=97*ad/100; + if (loop(bp,0,dy-1,dx,cs,0,RI) + -loop(bp,0,dy-3,dx,cs,0,RI)>1+dx/16) ad=96*ad/100; // ~c + + if( gchar ) // i is more often than j, be sure that realy correct Mai00 + if( loop(bp, 0,2*dy/4,dx,cs,0,RI) + -loop(bp,dx-1,2*dy/4,dx,cs,0,LE)<=dx/8 ) Break; + // could be a broken + or similar thing? + if( 3 * ya > box1->m1 + 2*box1->m2 ) ad=80*ad/100; + if (!gchar) ad=96*ad/100; + if( box1->dots!=1 ) ad=98*ad/100; + + Setac(box1,'j',ad); + + break; + } + // --- test I --------------------------------------------------- + for(ad=d=100;dy>4 && dy>dx && 5*dy>4*(box1->m3-box1->m2);){ // min 3x4 + DBG( wchar_t c_ask='I'; ) + if( box1->dots==1 ) Break; + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + + x =loop(bp,0, dy/2,dx,cs,0,RI); // konvex? divided Q + if(loop(bp,0,7*dy/8,dx,cs,0,RI) > x+dx/8) Break; + for( y=dy/16;y1+dx/8 ) break; + } if( y<3*dy/4 ) Break; + // out_x(box1); + + // upper max width + for(i2=i1=0,y=0;yi1){ i1=x;i2=y; } + } + for(i4=i3=0,y=3*dy/4;yi3){ i3=x;i4=y; } + } + if( abs(i3-i1)>1+dx/8 ) Break; // if i3>>i5 more sure! + if( i1>i5 ){ // look for edges else *80% + } + if(i1+1i2 ) i2=i; + + // printf(" get_line(%d,%d) %d\n",i1,i2, + // get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100)); + if( get_line2(i1,dy/8,i2,dy-1-dy/8,bp,cs,100)<95 ) Break; + x =(i1-i2+4)/8; i1+=x; i2-=x; + + // upper and lower width (what about serifs?) + y=dy/8; + x =loop(bp,i1, y+0,dx,cs,1,LE); i=x; + x =loop(bp,i1, y+1,dx,cs,1,LE); if(x>i)i=x; + x =loop(bp,i1, y+0,dx,cs,1,RI); j=x; + x =loop(bp,i1, y+1,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break; + x =loop(bp,i2,dy-y-1,dx,cs,1,LE); j=x; + x =loop(bp,i2,dy-y-2,dx,cs,1,LE); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break; + x =loop(bp,i2,dy-y-1,dx,cs,1,RI); j=x; + x =loop(bp,i2,dy-y-2,dx,cs,1,RI); if(x>j)j=x; if(abs(i-j)>1+dx/8)Break; + + if(dy>15) // v024a4 + if( loop(bp,dx-1,dy/16 ,dx,cs,0,LE) + > loop(bp,dx-1,dy/4 ,dx,cs,0,LE)+1+dx/32 ) Break; // ~bad ) (thinn) + + for(i=0,y=dy/16;y<15*dy/16 && i<2;y++) + if( num_cross(0,dx-1,y,y,bp,cs) != 1 ) i++; + if( i>1 ) Break; + + if(!hchar){ // right part (bow) of h is never a l + if( get_bw(dx/4,dx/4, 0,dy/4,bp,cs,1) == 1 + && get_bw(dx/4,dx/4,dy/2,dy-1,bp,cs,1) == 0 ) Break; + if( loop(bp, 0,dy/4,dx,cs,0,RI)> dx/4 + && loop(bp,dx-1,dy/4,dx,cs,0,LE)<=dx/4 + && loop(bp, 1, 0,dy,cs,0,DO)<=dy/4 ) Break; // ~z + } + + if( get_bw(x1,x1,y0 ,y1 ,box1->p,cs,2) != 2 + && get_bw(x0,x1,y0 ,y0 ,box1->p,cs,2) != 2 + && get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) != 2 + && get_bw(x0,x0,y0+1,y1-1,box1->p,cs,1) != 1 ) Break; /* ~] */ + + if ( loop(bp,dx-1, dy/4,dx,cs,0,LE) > dx/2 + && loop(bp,dx-1,3*dy/4,dx,cs,0,LE) > dx/2 + && loop(bp, 0, dy/2,dx,cs,0,RI) < dx/4 ) Break; /* ~[ */ + + x =loop(bp, 0,dy/2,dx,cs,0,RI); // konvex/konkav? ~() + i =loop(bp,dx-1,dy/2,dx,cs,0,LE); + if( loop(bp, 0,7*dy/8,dx,cs,0,RI) > x+dx/8 + && loop(bp, 0, dy/8,dx,cs,0,RI) > x+dx/8 + && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) < i-dx/8 + && loop(bp,dx-1, dy/8,dx,cs,0,LE) < i-dx/8 ) Break; // ~( + if( loop(bp, 0,7*dy/8,dx,cs,0,RI) < x-dx/8 + && loop(bp, 0, dy/8,dx,cs,0,RI) < x-dx/8 + && loop(bp,dx-1,7*dy/8,dx,cs,0,LE) > i+dx/8 + && loop(bp,dx-1, dy/8,dx,cs,0,LE) > i+dx/8 ) Break; // ~) + if( loop(bp, 0, dy/8,dx,cs,0,RI) + -(dx-loop(bp,dx-1,7*dy/8,dx,cs,0,LE)) > dx/4 ) Break; // ~/ + if( loop(bp, 0, 0,dx,cs,0,RI) > dx/2 // ToDo: check for serifs + && loop(bp, 0, dy/8,dx,cs,0,RI) > dx/2 + && loop(bp,dx-1,dy-1 ,dx,cs,0,LE) > dx/2 + && loop(bp,dx-1,dy-1-dy/8,dx,cs,0,LE) > dx/2 ) ad=99*ad/100; // ~/ + + if (box1->m2 && 3*y0>box1->m1+2*box1->m2) + if( get_bw(x0+dx/8,x1-dx/8,box1->m1,(box1->m1+box1->m2)/2,box1->p,cs,1) == 1 ) + Break; // ~i + + if(i1+1p,cs,1) != 1 + || get_bw(x0+i4/4,x0+i4/4,y1-dy/4,y1,box1->p,cs,1) != 1 ) + { ad=99*ad/100; MSG(fprintf(stderr,"ad=%d",ad);) } // ToDo: improve it + if(!hchar){ ad=96*ad/100; MSG({}) } // ~bad_small_r + if (box1->m4 && y1m4) { // probably lower dot? + if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1) + || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1)) { + ad=96*ad/100; + } + } // ~! + // a---b + // I + // I + // c---e + // check against Z + for(bx=0,ax=dx,ay=by=y=0;ybx) { bx=dx-1-i; by=y; } + i+=loop(bp,dx-1-i,y,dx,cs,1,LE); if (dx-i-1dy-1-dy/4;y--){ + i =loop(bp,0,y,dx,cs,0,RI); if (iex) { ex=i; ey=y; } + } + x=(3*ax+cx)/4; y=(3*ay+cy)/4; i= loop(bp,x,y,dx,cs,0,RI); + x=(3*bx+ex)/4; y=(3*by+ey)/4; j= loop(bp,x,y,dx,cs,0,LE); + if (j>0 && (2*i>3*j || 3*i<2*j )) ad=99*ad/100; + if (j>0 && ( i>2*j || 2*i< j )) ad=97*ad/100; + i=loop(bp,0,0,dy,cs,0,DO); + if (i>dy/8 && idx/4) ad=96*ad/100; // ~l 5x7 + + if( get_bw(x0,x1,y0,y1,box1->p,cs,2) == 0 ) ad=99*ad/100; + if (gchar) ad=98*ad/100; // J + if (box1->m3 && 2*y1<=box1->m2+box1->m3) ad=96*ad/100; // ' + + Setac(box1,'I',ad); + break; + } + // --- test J --------------------------------------------------- 22Nov06 + for(ad=d=100;dy>4 && dy>=dx && dx>2;){ // min 3x4 ~Y)]d', + // rewritten for vectors 0.42 + int ld, i1, i2, i3, i4, i5, i6, i7; // line derivation + corners + DBG( wchar_t c_ask='J'; ) + if (sdata->holes.num > 0) Break; /* no hole */ + /* half distance to the center */ + d=2*sq(128/4); + /* now we check for the upper right end of the J */ + if (aa[3][2]>d) Break; /* [2] = distance */ + /* searching for 4 notches between neighbouring ends */ + +/* + type A B + + 6OOOO 6O5 + 7O5 7O + O O + O O + 2O 1O4 1O4 + OO 2OO + 3 3 +*/ + + /* Warning: aa0 can be left upper or left lower point for type B */ + /* get a point on the inner low left side of the J */ + i =nearest_frame_vector(box1,aa[3][3],aa[1][3],(x0+x1)/2,y0); + i1=nearest_frame_vector(box1,i ,aa[1][3], x1+dx,(y0+3*y1)/4); + /* get the most left point on the lower part of the J */ + i2=nearest_frame_vector(box1,i1,aa[3][3], x0-2*dx, y1-dy/8); + /* get a point on the middle of the bottom of the J */ + i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], (x0+x1)/2, y1); + /* get a point on the outer low right side of the J */ + i4=nearest_frame_vector(box1,aa[1][3],aa[3][3], x1, (y0+2*y1)/3); + /* get a point on the outer right side below top serif */ + i5=nearest_frame_vector(box1,aa[2][3],aa[3][3], (x0+2*x1)/3,y0); + /* get a point on the left side of upper serif */ + i6=nearest_frame_vector(box1,aa[3][3],i1, x0, y0); + /* get a point on the most right left side of upper serif */ + i7=nearest_frame_vector(box1,i6,i1, x1, y0); + MSG(fprintf(stderr," i1-i7 %d %d %d %d %d %d %d",i1,i2,i3,i4,i5,i6,i7);) + + /* check the highest point on lower left area */ + i =nearest_frame_vector(box1,i1,i3,x0,y0); + if (box1->frame_vector[i ][1]-y0frame_vector[i ][1]-y0<=dy/2) ad=97*ad/100; // imperfect a + /* check the lowest point on upper left area, serife? */ + j =nearest_frame_vector(box1,i6,i7,x0,y1); + if (box1->frame_vector[i ][1] + -box1->frame_vector[j ][1]<=dy/4) Break; // imperfect a + if (box1->frame_vector[i7][1]>y0+dy/4) Break; // not to low + if (box1->frame_vector[i1][1] + -box1->frame_vector[i7][1]frame_vector[i4][1] + -box1->frame_vector[i5][1]frame_vector[i7][0]frame_vector[i1][0] + -box1->frame_vector[i2][0]<=dx/8) Break; // ~1 + if (box1->frame_vector[i1][0] + -box1->frame_vector[i2][0]<=dx/4) ad=ad*99/100; // ~1 + if (box1->frame_vector[i6][1]>y0+dy/8) ad=99*ad/100; // ~1 + if (aa[0][2]==0) { // ]? + ad=99*ad/100; + if (aa[1][2]==0) ad=98*ad/100; + if (aa[2][2]<=aa[3][2]) ad=97*ad/100; + } + + /* check for left bow */ + for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) { + if (box1->frame_vector[ i][0] /* [0]=x */ + frame_vector[i1][0]) break; /* curve? */ + } if (i==i4) Break; // ~I + /* check for no right bow */ + for (j=i=i2;i!=i4;i=(i+1)%box1->num_frame_vectors[0]) { + if (box1->frame_vector[ i][0] /* [0]=x */ + >box1->frame_vector[i4][0]) break; + } if (i!=i4) Break; // ~I + /* check for no right bow */ + for (j=i=i5;i!=i6;i=(i+1)%box1->num_frame_vectors[0]) { + if (box1->frame_vector[ i][1] > y0+dy/4) break; + } if (i!=i6) Break; // ~Y + /* check if upper left and lower left points are joined directly */ + ld=line_deviation(box1, i7, i1); + MSG(fprintf(stderr," i7,i1 %d %d linedist= %d/%d",i7,i1,ld,2*sq(1024/4));) + if (ld >2*sq(1024/4)) Break; + if (5*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 + if (6*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 + if (7*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 + if (8*ld >4*2*sq(1024/4)) ad=99*ad/100; // ~3 + /* check if lower right and upper right points are joined directly */ + ld=line_deviation(box1, i4, i5); + MSG(fprintf(stderr," i4,i5 %d %d linedist= %d/%d",i4,i5,ld,2*sq(1024/4));) + if (ld >2*sq(1024/4)) Break; + if (5*ld >4*2*sq(1024/4)) ad=99*ad/100; + + // J exists as gchar and ~gchar + if(!hchar){ ad=99*ad/100; } + Setac(box1,'J',ad); + break; + } + return box1->c; +} + +static wchar_t ocr0_brackets(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,i1,i2,i3,i4,i5,i6,hchar=sdata->hchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + (*aa)[4]=sdata->aa, /* corner-points, (x,y,dist^2,vector_idx) */ + ad,r1,r2; /* tmp-vars */ + wchar_t bc=UNKNOWN; + + // --- test > derived from xX --------------------------------------------------- + // rewritten for vectors v0.41 + for(ad=d=100;dx>1 && dy>2;){ // min 3x2 + // 0 - indizes 0,1,i1,i2 pointing to edges of the char + // \ . + // \ . + // i1,i2 + // / + // / + // 1 + DBG( wchar_t c_ask='>'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if (sdata->holes.num > 0 && (dx<6 || dy<6)) Break; /* # */ + /* calculate the half distance to the center */ + d=2*sq(128/4); + /* now we check for the 2 left ends of the > */ + if (aa[0][2]>d) Break; /* upper left end */ + if (aa[1][2]>d) Break; /* lower left end */ + if (aa[1][1]-aa[0][1]num_frame_vectors[0]) { + if (box1->frame_vector[i][0] + >=box1->frame_vector[j][0]) j=i; /* notice most right vector */ + } if (j==i || j==aa[0][3]) Break; + /* calculate the distance to the center */ + x=box1->frame_vector[j][0]; + y=box1->frame_vector[j][1]; + if (2*x-aa[0][0]-aa[1][0](dy+2)) Break; + if ( aa[0][0]+aa[1][0]-2*x>=0) Break; + i1=j; + d=line_deviation(box1, aa[0][3], j) >sq(1024/4); + /* check if upper left and center point are joined directly */ + MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) + if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); + MSG(fprintf(stderr,"ad=%d", ad);) + d=line_deviation(box1, j, aa[1][3]); + /* check if lower left and center point are joined directly */ + MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) + if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); + MSG(fprintf(stderr,"ad=%d", ad);) + + /* run along right side from bottom to top */ + for (j=i=aa[1][3];i!=aa[0][3];i=(i+1)%box1->num_frame_vectors[0]) { + if (box1->frame_vector[i][0] + >=box1->frame_vector[j][0]) j=i; /* notice most right vector */ + // MSG(fprintf(stderr,"search right: %d %d %d %d",i,j,aa[1][3],aa[0][3]);) + } if (j==i || j==aa[1][3]) Break; + /* calculate the distance to the center */ + x=box1->frame_vector[j][0]; + y=box1->frame_vector[j][1]; + if ( (aa[0][0]+aa[1][0]-2*x)>= 0 ) Break; + if (abs(aa[0][1]+aa[1][1]-2*y)>(dy+2)/4) Break; + if (aa[0][0]>=x || aa[1][0]>=x) Break; + i2=j; + d=line_deviation(box1, j, aa[0][3]); + /* check if upper left and center point are directly joined directly */ + MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) + if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); + MSG(fprintf(stderr,"ad=%d", ad);) + d=line_deviation(box1, aa[1][3], j); + /* check if lower left and center point are directly joined */ + MSG(fprintf(stderr,"x %d %d dist= %d/%d",x-x0,y-y0,d,sq(1024/4));) + if (d >sq(1024/4)) Break; ad=ad-d*100/sq(1024); + MSG(fprintf(stderr,"ad=%d", ad);) + + /* + ToDo: calculate momentums or max derivations + along lines to distinguish )]}> + i1,i2 + */ + + if (sdata->gchar) ad=98*ad/100; + if (sdata->hchar) ad=99*ad/100; + bc='>'; + Setac(box1,bc,ad); + break; + } + // --- test /\\ ------------------------------------------------ +// if(bc==UNKNOWN) +// if(!box1->dots) + for(ad=d=100;dx>3 && dy>3;){ // min 4x4 for 4x6 font + DBG( wchar_t c_ask='/'; ) + if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */ +#if 1 + for(i=y=0;y2 || (i>0 && dy<16)) Break; +#endif + /* get the center as exact as possible */ + i2=dx-1-loop(bp,dx-1,dy/2 ,dx,cs,0,LE) // be exact for small fonts + +dx-1-loop(bp,dx-1,dy/2+dy%2-1,dx,cs,0,LE) + + loop(bp, 0,dy/2 ,dx,cs,0,RI) + + loop(bp, 0,dy/2+dy%2-1,dx,cs,0,RI); + if (abs(i2-2*dx)>1+dx/2) Break; + if (abs(i2-2*dx)> dx/2) ad=99*ad/100; + + i1=loop(bp,dx-1,dy/16,dx,cs,0,LE); // right side + i3=loop(bp,dx-1,dy-1 ,dx,cs,0,LE); + i4=loop(bp, 0,0 ,dx,cs,0,RI); // left side + i6=loop(bp, 0,dy-1 ,dx,cs,0,RI); + i=(box1->m4+box1->m3)/2-box1->m2; + // + // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6); + + // ~lI + for(i=i4,y=0;ydx/6+1 ) break; i=x; + } if( ydx/6+1 ) break; i=x; + } if( ydx/4 ) { Setac(box1,(bc='/'),ad);break; } + if(i4<=dx/8 && i3<=dx/8 && i6-(dx-i1)>dx/4 ) { Setac(box1,(bc='\\'),ad);break; } + Break; + } + // --- test ()<> ------------------------------------------------ +// if(bc==UNKNOWN) +// if(!box1->dots) + for(ad=d=100;dx>1 && dy>4;){ // min 3x4 + DBG( wchar_t c_ask='('; ) + if (sdata->holes.num > 1) {Break;}; /* tolerant against a tiny hole */ +#if 1 + for(i=y=0;y2 || (i>0 && dy<16)) {Break;}; +#endif + /* look for the extrema => r1..r2 */ + for(i=dx,r1=r2=y=dy/2-dy/8;y<=dy/2+dy/8;y++){ + j=loop(bp, 0,y,dx,cs,0,RI); if(j==i) r2=y; if(jdy){ +// from Aug06 vector-version of greater is used +// if(i2==0 && 3*i5>dx && i4<=dx/8 && i6<=dx/8) { Setac(box1,(bc='>'),98);{Break;}; } + if(i5==0 && 3*i2>dx && i1<=dx/8 && i3<=dx/8) { Setac(box1,(bc='<'),98);{Break;}; } + } + if( dx > 2 && 9*dx>=5*dy ){ // 4x6 screen-font (3*5) + ad=98; + if (dx<8) ad=99*ad/100; + if (dx<6) ad=96*ad/100; + if( 2*dx > JOB->res.avX && 4*dx>dy ) ad=98; +// printf(" %d %d %d %d %d %d\n",i5,i1,i3,i2,i4,i6); + if( i5==0 && i1<=dx/8+1 && i3<=dx/8+1 && i1+i3<=dx/8+1 + && i2>=dx/2 && i4>=3*dx/4 && i6>=3*dx/4 ) { + if (2*loop(bp, 0, y/2,dx,cs,0,RI)+1+dx/16=dx/2 && i1>=3*dx/4 && i3>=3*dx/4 ) { + if (2*loop(bp,dx-1, y/2,dx,cs,0,LE)+1+dx/16m4+box1->m3)/2-box1->m2; + // + // out_x(box1);printf("() %d %d %d %d %d %d %d\n",i,i1,i2,i3,i4,i5,i6); + if(2*i2i4+i6 && 2*dx=i){ + Setac(box1,(bc=')'),98);break; } + if(2*i2>i1+i3 && 2*i5=i){ + if(2*i2<=i1+i3+1 || 2*i5>=i4+i6-1) ad=98*ad/100; + if(2*i2<=i1+i3+2 || 2*i5>=i4+i6-2) ad=98*ad/100; + for(x=y=0;yx ) x=i; + } + for(y=0;y<(dy+2)/4;y++){ + i=loop(bp,0,y+dy/8,dx,cs,0,RI);if( i2 && dy>4 && dy>=2*dx;){ // (3,6) on 4x6 font + DBG( wchar_t c_ask=']'; ) + if (sdata->holes.num > 1) { Break;} /* tolerant against a tiny hole */ + if (!hchar) ad=97*ad/100; + for(y=0;yp,cs,2) == 2 + && get_bw(x0,x1,y0+1,y0+1,box1->p,cs,2) == 2 ) {Break;}; + if( get_bw(x0,x1,y1 ,y1 ,box1->p,cs,2) == 2 + && get_bw(x0,x1,y1-1,y1-1,box1->p,cs,2) == 2 ) {Break;}; + if( get_bw(x0 ,x0,y0 ,y1 ,box1->p,cs,2) == 0 + || get_bw(x0+1 ,x0+1,y0 ,y1 ,box1->p,cs,2) == 0 ) + if( get_bw(x0+dx/2,x1,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) + { Setac(box1,(bc='['),ad);break; } + if( get_bw(x1 ,x1,y0 ,y1 ,box1->p,cs,2) == 0 + || get_bw(x1-1 ,x1-1,y0 ,y1 ,box1->p,cs,2) == 0 ) + if( get_bw(x0,x1-dx/2,y0+dy/4,y1-dy/4,box1->p,cs,1) == 0 ) + { Setac(box1,(bc=']'),ad);break; } + break; + } + +#if CODE_NOT_COMPLETED + // --- test ] ------- + for(ad=d=100;dx>2 && dy>3;){ + DBG( wchar_t c_ask=']'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if (sdata->holes.num > 0) ad=98*ad/100; /* # */ + /* 1/8 distance to the center */ + d=2*sq(128/16); + /* now we check for the 4 ends of the x */ + if (aa[0][2]>d) Break; + if (aa[1][2]>d) Break; + if (aa[2][2]>d) Break; + if (aa[3][2]>d) Break; + if (aa[3][0]-aa[0][0]<7*dx/8) Break; + if (aa[2][0]-aa[1][0]<7*dx/8) Break; + if (aa[1][1]-aa[0][1]<7*dy/8) Break; + if (aa[2][1]-aa[3][1]<7*dy/8) Break; + if (aa[3][0]-aa[0][0]<2) Break; /* to small */ + if (aa[2][0]-aa[1][0]<2) Break; /* to small */ + MSG( fprintf(stderr," aa %d %d %d %d %d %d %d %d d %d %d %d %d",\ + aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0,\ + aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0,\ + aa[0][2],aa[1][2],aa[2][2],aa[3][2]);) + /* left and right vertical line */ + d=line_deviation(box1, aa[0][3], aa[1][3]); if (d>2*sq(1024/4)) Break; + ad=(100-(d-sq(1024)/2)/sq(1024)/4)*ad/100; + d=line_deviation(box1, aa[2][3], aa[3][3]); if (d>2*sq(1024/4)) Break; + + /* search uppermost left ^ */ + i1=nearest_frame_vector(box1,aa[1][3],aa[2][3], x0, y0); + x=box1->frame_vector[i1][0]; + y=box1->frame_vector[i1][1]; + if (y-y0 > 5*dy/8) Break; + if (x-x0 > 5*dx/8) Break; + /* search uppermost right ^ ~H */ + i3=nearest_frame_vector(box1,aa[1][3],aa[2][3], x1, y0); + if ( box1->frame_vector[i3][0]-x> dx/4 + && box1->frame_vector[i3][1]-y<=dy/8) Break; + + /* check if upper left and lower right point are joined directly */ + dbg[0]=d=line_deviation(box1,i1, aa[2][3]); if (d >2*sq(1024/4)) Break; + /* check if lower left and lower left point are joined directly */ + dbg[1]=d=line_deviation(box1, aa[1][3],i1); if (d >2*sq(1024/4)) Break; + + if (!hchar) ad=99*ad/100; + if ( gchar) ad=98*ad/100; // \sc N + ac=(wchar_t) ']'; + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + } +#endif + // --------- test ocr-a-[] -------------------------------- + if(bc==UNKNOWN) + for(ad=d=98;dx>5 && dy>7 && 2*dy>3*dx;){ // only for accurate font at the moment + DBG( wchar_t c_ask='['; ) + if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ + if (!hchar) ad=97*ad/100; + if( num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) break; + if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break; + if ( loop(bp,dx-1,dy/2,dx,cs,0,LE) + +loop(bp, 0,dy/2,dx,cs,0,RI) <= dx/4 ) break; // O + for(y=dy/8;yp,cs,1) == 0) + { Setac(box1,(bc='['),ad);break; } + if( get_bw(x0,(5*x0+3*x1)/8,y0+3*dy/16,y1-3*dy/16,box1->p,cs,1) == 0) + { Setac(box1,(bc=']'),ad);break; } + break; + } + // --------- test {} -------------------------------- + for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){ + DBG( wchar_t c_ask='{'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if (!hchar) ad=97*ad/100; + for(y=0;y3*dx/4 ) ad=99*ad/100; + if ( loop(bp,0, 0,dx,cs,0,RI)>3*dx/4 ) ad=99*ad/100; // < + if ( loop(bp,0, 0,dy,cs,0,DO)=dx/8 ) ad=98*ad/100; // < + if ( loop(bp,dx-2,dy-1,dy,cs,0,UP)>dy/4 ) Break; // f + if ( get_bw(x0,x0,y0,y0+dy/4,box1->p,cs,1) == 1 + || get_bw(x0,x0,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break; + Setac(box1,(bc='{'),ad);Break; + } + for(ad=d=99;dx>2 && dy>5 && 2*dy>3*dx;){ + DBG( wchar_t c_ask='}'; ) + if (!hchar) ad=97*ad/100; + for(y=0;y3*dx/4 ) {ad=99*ad/100;} + if ( loop(bp,dx-1, 0,dx,cs,0,LE)>3*dx/4 ) {ad=99*ad/100;} // > + if ( loop(bp,dx-1, 0,dy,cs,0,DO)=dx/8 ) ad=98*ad/100; // < + if ( loop(bp,1,dy-1,dy,cs,0,UP)>dy/4 ) Break; // ??? + if ( get_bw(x1,x1,y0,y0+dy/4,box1->p,cs,1) == 1 + || get_bw(x1,x1,y1-dy/4,y1,box1->p,cs,1) == 1 ) Break; + Setac(box1,(bc='}'),ad);Break; + } + return box1->c; +} + +#if 0 +/* ---------- empty prototype function for copy and expand ---------- */ +static wchar_t ocr0_XXX(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,i0,i1,i2,i3,hchar=sdata->hchar,gchar=sdata->gchar, + x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1,cs=sdata->cs; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + ac,ad; /* tmp-vars */ + + // --- test XXX --------------------------------------------------- + return box1->c; +} +#endif + + +/* ----------------------- part9 -------------------------------- */ +static wchar_t ocr0p9(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + i1,i2,i3,i4; /* tmp-vars */ + int xa,xb, /* used for store significant points of char */ + dbg[9]={0,0,0,0,0,0,0,0,0}, /* debugging space */ + ya,ad,cs=sdata->cs; + wchar_t ac,bc=UNKNOWN; // bestletter + int hchar; // char is higher than e + int gchar; // char has ink lower than m3 + // --- hchar --- gchar ------------------------- + hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; + gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1; + // if the char is slightly moved down correction can be done + if ( y0m2 && y1>box1->m3 && 2*y1m3+box1->m4) // moved + if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; + + /* reserved for the future */ + // --- test beta,\3,sz,"s --------------------------------------------- + if(bc==UNKNOWN && hchar) + for(ad=d=100;dx>3 && dy>6;){ // min 4x7 + DBG( wchar_t c_ask='S'; ) + if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ + /* this part is provisorium, should be changed! + a-\ + | d + b| / + | \ + -c / + */ + if( num_cross(x0 ,x1 ,y0+dy/4 ,y0+dy/4 ,box1->p,cs) != 2 + && num_cross(x0 ,x1 ,y0+dy/4+1,y0+dy/4+1,box1->p,cs) != 2 ) break; + for(i=1+dy/16,y=y0+dy/8;y0;y++){ + if( yp,cs) != 2 ) i--;} + else { if( num_cross(x0 ,x1 ,y,y,box1->p,cs) < 2 ) i--;} + if( get_bw(x0,x0+dx/2,y,y,box1->p,cs,1) == 0 ) i--; + if( yp,cs,1) == 0 ) i--; + } if( i<=0 ) break; + // out_x(box1); + + for(y=y0+dy/3;yp,x1,y,dx,cs,0,LE); + if( i>=dx/8 ) break; + i+=loop(box1->p,x1-i,y,dx,cs,1,LE); + if( i>=dx/2 ) break; + } if( y>=y1-dy/3 ) break; + + for(y=y0+dy/5;yp,cs,1) == 1 ) break; + if( y>=y0+dy/3 ) break; + + for(y=y0+dy/2;yp,cs,1) == 1 ) break; + if( y>=y1 ) break; + + for(y=y1-dy/3;yp,x1,y,dx,cs,0,LE); + if( i>dx/4 + && get_bw(x1-dx/8,x1-dx/8,y,y1,box1->p,cs,1) == 1 ) break; + } if( ym3==0 || 2*y1m3+box1->m4 ) + if( loop(box1->p,x1,y1, dx,cs,0,LE)==0 + && loop(box1->p,x1,y1-dy/4,dx,cs,0,LE)>dx/8 ) break; // ~R + + + for(x=x0+dx/4;xp,cs) == 3 ) break; + if( x>=x1-dx/4 ) break; + + i=loop(bp,dx/2,dy-1,dy,cs,0,UP)+dy/64; // Jul00 + for(x=dx/5;x i ) break; + if( x==dx/2 ) break; + + x=x0+loop(bp,0,dy/4,dx,cs,0,RI); + for(;xp,cs,1) == 0 ) break; + if( xp,cs,NULL) != 0 ) break; + if (sdata->holes.num != 0) break; + + bc=LATIN_SMALL_LETTER_SHARP_S; + Setac(box1,(wchar_t)bc,98); + break; + } + // --- test + ------------------------------------------------ + for(ad=d=100;dx>2 && dy>2;){ // min 3x3 + DBG( wchar_t c_ask='+'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + xa=(dx+1)/3-1; ya=(dy+1)/3-1; + xb=(dx+1)/4; + if( get_bw(x0,x0+xa,y0,y0+ya,box1->p,cs,1) == 1 ) Break; + if( get_bw(x0,x0+xa,y1-ya,y1,box1->p,cs,1) == 1 ) Break; + if( get_bw(x1-xb,x1,y0,y0+ya,box1->p,cs,1) == 1 ) Break; + if( get_bw(x1-xa,x1,y1-ya,y1,box1->p,cs,1) == 1 ) Break; + for(i=0,y=y0+ya;y<=y1-ya;y++){ // horizontal line + if( get_bw(x0+dx/9,x1-dx/9,y,y,box1->p,cs,2) == 0 ) { i=y; break; } + } + if (3*dx<2*dy) ad=99*ad/100; // ~t + if( !i ) Break; + ac=(wchar_t) '+'; + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + } + // --- test $ ------------------------------------------------ + for(ad=d=99;dx>3 && dy>5;){ // min 3x4 + DBG( wchar_t c_ask='$'; ) + if (sdata->holes.num != 2) Break; + + if( get_bw(x0,x0+dx/5,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break; + if( get_bw(x0,x0+dx/9,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break; + if( get_bw(x1-dx/9,x1,y0 ,y0+dy/18,box1->p,cs,1) == 1 ) Break; + if( get_bw(x1-dx/5,x1,y1-dy/23,y1 ,box1->p,cs,1) == 1 ) Break; + if( get_bw(x0,x0+dx/3,y0+dy/3 ,y0+dy/2 ,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1-dx/3,x1,y1-dy/2 ,y1-dy/3 ,box1->p,cs,1) != 1 ) Break; + i1=x0+loop(box1->p,x0,y0,dx,cs,0,RI); if( i1x1-dx/5 ) Break; + i2=x0+loop(box1->p,x0,y1,dx,cs,0,RI); if( i2i1 ) Break; + ad= get_line2(i1,y0,i2,y1,box1->p,cs,100)*ad/100; + // check upper left and lower right half circle, $ + for (x=0,i3=y=0;yp,cs) == 2 ) { + i = loop(box1->p,x0,y0+dy/2-y,dx,cs,0,RI); + if (i>x) { x=i; i3=y0+dy/2-y; } + } if (x<=dx/4) Break; + for (x=0,i4=y=0;yp,cs) == 2 ) { + i = loop(box1->p,x0,y0+dy/2+y,dx,cs,0,RI); + if (i>x) { x=i; i4=y0+dy/2+y; } + } if (x<=dx/4) Break; + if (ad<95) Break; + ac=(wchar_t) '$'; + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + } + // --- test & ------------------------------------------------ + for(ad=d=99;dx>3 && dy>4;){ /* 4x6 font */ + DBG( wchar_t c_ask='&'; ) + if (sdata->holes.num != 2) Break; + if( get_bw(x1-dx/9,x1,y0,y0+dy/4,box1->p,cs,1) == 1 ) Break; // g + if( loop(bp,dx/2,0,dy,cs,0,DO)>dy/2) Break; + i1=loop(bp,0,dy/8 ,dx,cs,0,RI); if (i1>dx/2) Break; + i =loop(bp,0,dy/4 ,dx,cs,0,RI); if (i1>dx/2) Break; if (idx/2) Break; + i =loop(bp,0,dy-dy/4-1,dx,cs,0,RI); if (i3>dx/2) Break; if (ii1) Break; + for( i2=0, y=dy/4; y<=dy/2+1; y++ ){ + i =loop(bp,0,y,dx,cs,0,RI); if( i>i2 ) i2=i; + } + if(2*i2-i1-i3<1) Break; + // if( num_hole(x0,x1 ,y0,y1,box1->p,cs,NULL)!=2 ) Break; + if( num_hole(x0,x1-dx/4,y0,y1,box1->p,cs,NULL)!=2 ) Break; + if( num_cross(dx-1,dx-1,dy/4,dy-1,bp,cs) < 1 ) Break; + for( x=dx-1; x>=dx/2; x-- ){ + if( num_cross(x,x,dy/4,dy-1,bp,cs) > 1 ) break; + } if( x<=3*dx/4 && x 3 ) { // glued ah + if (dy>15) { Break; } else ad=96*ad/100; + } + if (!hchar) ad=98*ad/100; + bc=(wchar_t) '&'; + Setac(box1,bc,ad); + if (ad>=100) return bc; + break; + } + // --- test \it & like \epsilon\tau ------------------------------ + if(bc==UNKNOWN) + for(ad=d=100;dx>7 && dy>7;){ + DBG( wchar_t c_ask='&'; ) + if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ + if( num_cross(0,dx-1, dy/4, dy/4,bp,cs) != 3 ) break; + if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 4 ) break; + if( num_cross(dx/2,dx-1,dy/2, dy/2,bp,cs) != 2 ) break; + if( num_cross(0,dx-1,3*dy/4,3*dy/4,bp,cs) != 2 ) break; + if( num_cross(0,dx-1, dy-1, dy-1,bp,cs) != 1 ) break; + if( num_cross( 0, 0,0,dy-1,bp,cs) != 1 ) break; + if( num_cross( dx/3, dx/3,0,dy-1,bp,cs) != 4 ) break; + if( num_cross(13*dx/16,13*dx/16,0,dy/8,bp,cs) != 0 ) break; + if( num_cross(4*dx/8,4*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break; + if( num_cross(3*dx/8,3*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break; + if( num_cross(5*dx/8,5*dx/8,dy-dy/4,dy-1,bp,cs) != 1 ) break; + if( num_hole(x0 ,(x0+x1)/2,y0, y1,box1->p,cs,NULL) != 1 ) break; + if( num_hole(x0+dx/8,x1-dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) break; + ac=(wchar_t) '&'; + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + } + // --- test ? --------------------------------------------------- + for(ad=d=98;dx>2 && dy>5;){ // min 3x(4+2) + DBG( wchar_t c_ask='?'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + if ( num_cross(x0, x1, y0, y0, box1->p, cs) !=1 ) Break; // ~? + if ( num_cross(x0, x1, y1, y1, box1->p, cs) > 1 ) Break; // ~? + for(y=y0;yp,cs,1) != 1 ) break; // lower end + if (2*ym4) { // probably lower dot not catched in box? + if (get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) != 1 ) Break; + i1=box1->m4; + for(;i1>y1;i1--) // new y1 + if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot + } + y--; i=y-y0+1; // new dy + for (y=0;yp, cs) == 2 ) break; + if (y==dy/2) Break; + // if( num_hole( x0, x1, y0, y1, box1->p,cs,NULL) > 0 ) Break; + if (sdata->holes.num > 0) Break; + for(y=y0+dy/2;y<=i1;y++) + if( get_bw(x0,x1,y,y,box1->p,cs,1) == 0 ) break; + if( y==i1 ) Break; + for( ;y<=i1;y++) + if( get_bw(x0,x1,y,y,box1->p,cs,1) == 1 ) break; + if( get_bw(x0,x1,y,y,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+7*dx/8,x1,y,i1,box1->p,cs,1) == 1 ) Break; // broken thin 2 + bc='?'; + Setac(box1,(wchar_t)bc,98); + return bc; + } + // --- test !| --------------------------------------------------- + for(ad=d=99; dy>4 && dy>2*dx;){ // min 3x4 + DBG( wchar_t c_ask='!'; ) + if (sdata->holes.num > 1) Break; /* tolerant against a tiny hole */ + // measure thickness + if (num_cross(x0,x1,y0 ,y0 ,box1->p,cs)!=1) Break; + if (num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs)!=1) Break; + for(y=y0;yp,cs,1) != 1 ) break; // lower end + if (2*ybox1->m3-dy/8) ad=ad*97/100; /* missing dot? */ + i1=y1; + if (y==y1 && box1->m4) { // probably lower dot not catched in box? + if ((dx>2 && get_bw(x0+1,x1-1,y1+1,box1->m4,box1->p,cs,1) == 1) + || (dx<3 && get_bw(x0 ,x1 ,y1+1,box1->m4,box1->p,cs,1) == 1 )) { + i1=box1->m4; + for(;i1>y1;i1--) // new y1 + if( get_bw(x0, x1,i1,i1,box1->p,cs,1) == 1 ) break; // lower dot + } + } i2=i1; + for( i1=0,y=y0;y<=i2;y++){ + i=num_cross(x0,x1,y,y,box1->p,cs); if(i>1) break; + if(i==0 && i1==0) i1=y; + } if(y<=i2 || i1==0 || i1dx/4+1 ) Break; // f + + if (!hchar) ad=96*ad/100; + Setac(box1,(wchar_t)'!',ad); + break; + } + // --- test * five egdes (jagges? beames?) what is the right english word? ---- + for(ad=d=99;dx>2 && dy>4;){ + DBG( wchar_t c_ask='*'; ) + if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */ + if( num_cross(0,dx-1, 0,dy-1,bp,cs) != 1 + && num_cross(0,dx-1, 1,dy-2,bp,cs) != 1 ) Break; + if( num_cross(0,dx-1,dy-1,dy-1,bp,cs) != 2 + && num_cross(0,dx-1,dy-2,dy-2,bp,cs) != 2 ) Break; + x=dx/2;y=(6*dy+8)/16; // center point 6/8=6/2^3 rounded + /* upwarts from center */ + dbg[0]=i=get_line2(x,y,x ,0,bp,cs,100); if(i<95) Break; + if (dx<8) /* be exact on small fonts, where get_line2 returns 100 (ToDo change) */ + if (get_bw(x,x,0,y,bp,cs,2)==2) Break; + /* horizontal */ + dbg[1]=i=get_line2(0,y,dx-1,y,bp,cs,100); if(i<95) Break; + if (dy<8) + if (get_bw(0,dx-1,y ,y ,bp,cs,2)==2 + && get_bw(0,dx-1,y+1,y+1,bp,cs,2)==2) Break; + /* down (right) */ + i=get_line2(x,y,(5*dx+4)/8,dy-1,bp,cs,100); + j=get_line2(x,y,(6*dx+4)/8,dy-1,bp,cs,100); if(j>i) dbg[2]=i=j; + if(i<95) Break; + /* down (left) */ + dbg[3]=i=get_line2(x, y,(2*dx+4)/8,dy-1,bp,cs,100); if(i<95) Break; // straight up + /* check for lower gap at bottom */ + dbg[4]=i=get_bw( x, x,dy-1-dy/8,dy-1,bp,cs,1); if(i==1) Break; + dbg[5]=i=get_line2( dx/4,dy/4, 0,0,bp,cs,101); if(i<95) Break; // upper left gap + dbg[6]=i=get_line2(dx-1-dx/4,dy/4,dx-1,0,bp,cs,101); if(i<95) Break; // upper right gap + MSG(fprintf(stderr,"%d %d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5],dbg[6]);) + Setac(box1,(wchar_t)'*',ad); + break; + } + // --- test * six egdes (jagges? beames?) what is the right english word? ---- + for(ad=d=100;dx>4 && dy>4;){ + DBG( wchar_t c_ask='*'; ) + if (sdata->holes.num > 0) Break; /* tolerant against a tiny hole */ + if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 3 + && num_cross(0,dx-1, 1+dy/8, 1+dy/8,bp,cs) != 3) Break; + if( num_cross(0,dx-1,dy-2-dy/8,dy-2-dy/8,bp,cs) != 3) Break; + if( num_cross(0 , 0, 0,dy-1,bp,cs) != 2) Break; + if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) != 2) Break; + if( num_cross(0,dx-1,dy/2,dy/2,bp,cs) != 1) Break; + if( num_cross( 0 ,dx/8,dy/2,dy/2,bp,cs) != 0) Break; + if( num_cross(dx-1-dx/8,dx-1,dy/2,dy/2,bp,cs) != 0) Break; + if (dx>5) { + dbg[0]=i=get_line2(0,dy-2-dy/8,dx-1,dy/8,bp,cs,100); if(i<95) Break; // black upwarts beam + dbg[1]=i=get_line2(0,dy/8,dx-1,dy-2-dy/8,bp,cs,100); if(i<95) Break; // black downwards beam + /* check vertical line */ + dbg[2]=i=get_line2(dx/2,0,dx/2, dy-1,bp,cs,100); if(i<95) Break; + } + MSG(fprintf(stderr,"%d %d %d %d %d %d",dbg[0],dbg[1],dbg[2],dbg[3],dbg[4],dbg[5]);) + Setac(box1,(wchar_t)'*',98); + break; + } + // --- test @ - a popular char should be detectable! added in version v0.2.4a5 + if(bc==UNKNOWN) + for(ad=d=99;dx>5 && dy>7;){ + DBG( wchar_t c_ask='@'; ) + if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */ + if (loop(bp, 0,dy/2,dx,cs,0,RI)>dx/4) Break; + if (loop(bp,dx-1,dy/2,dx,cs,0,LE)>dx/4) Break; + if (loop(bp,dx/2,dy-1,dy,cs,0,UP)>dx/8) Break; + if (loop(bp,dx/2, 0,dy,cs,0,DO)>dx/8) Break; + /* ..@@@@..<- 8*10 example + .@@..@@. + @@....@@ + @@..@@@@< + @@.@@.@@ + @@.@@.@@ + @@..@@@. + @@...... + .@@...@@ + ..@@@@@.<- */ + x=6*dx/16; + y=dy/2; + i=num_cross(0,dx-1,y,y,bp,cs); + if (i<3 || i>4) Break; + if( i != 4 && dx>8 ) ad=98*ad/100; + + i=num_cross(x,x,0,dy-1,bp,cs); if (i<2) Break; + if (i!=4) { j=num_cross(x+1,x+1,0,dy-1,bp,cs); + if (abs(4-j)4) Break; + if (i!=4) ad=97*ad/100; + if( num_cross(0, x,y,y,bp,cs) != 2 ) Break; + if( num_cross(x,dx-1,y,y,bp,cs) != 2 ) Break; + if( num_cross(x,x,0, y,bp,cs) != 2 ) Break; + if( num_cross(x,x,y,dy-1,bp,cs) != 2 ) Break; + if (dx>7) { + // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 1 ) Break; + if (sdata->holes.num != 1) Break; + if( num_hole(x0+dx/8,x1-3*dx/16,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break; + } + Setac(box1,(wchar_t)'@',ad); + break; + } + // --- test paragraph v0.2.6 + if(bc==UNKNOWN && hchar) + for(ad=d=100;dx>4 && dy>15;){ + DBG( wchar_t c_ask='$'; ) + if (sdata->holes.num > 3) break; /* tolerant against a tiny hole */ + if( get_bw( 0,dx/2,3*dy/4,3*dy/4,bp,cs,1) == 1 ) break; + if( get_bw(3*dx/4,dx-1,3*dy/4,3*dy/4,bp,cs,1) == 0 ) break; + if( get_bw( 0,dx/4, dy/4, dy/4,bp,cs,1) == 0 ) break; + if( get_bw( dx/2,dx-1, dy/4, dy/4,bp,cs,1) == 1 ) break; + if( get_bw(dx/2,dx/2, 0, dy/4,bp,cs,1) == 0 ) break; + if( get_bw(dx/2,dx/2,dy-1-dy/4, dy-1,bp,cs,1) == 0 ) break; + if( num_cross(dx/2,dx/2,0,dy-1,bp,cs) != 4 ) break; + if( num_cross(x0,x1,y0+dy/2,y0+dy/2,box1->p,cs) != 2 ) break; + if( num_hole( x0,x1,y0+dy/4,y1-dy/4,box1->p,cs,NULL) != 1 ) break; + Setac(box1,SECTION_SIGN,96); + break; // paragraph=0xA7=167 + } + + return bc; +} + +/* ----------------------- partx -------------------------------- */ +static wchar_t ocr0px(ocr0_shared_t *sdata){ + struct box *box1=sdata->box1; + pix *bp=sdata->bp; + int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + i1,i2,i3,i4,j1,cs=sdata->cs; /* tmp-vars */ + int ya,ad; /* used for store significant points of char */ + wchar_t ac,bc=UNKNOWN; // bestletter + int hchar; // char is higher than e + int gchar; // char has ink lower than m3 + // --- hchar --- gchar ------------------------- + hchar=0;if( 2*y0<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; + gchar=0;if( 2*y1>=2*box1->m3+(box1->m4-box1->m3) ) gchar=1; + // if the char is slightly moved down correction can be done + if ( y0m2 && y1>box1->m3 && 2*y1m3+box1->m4) // moved + if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; + + /* reserved for special chars, to test at the end */ + // --- test 'ff' --------------------------------------------------- + // ToDo: better check and call test 'f' and 'f' with subboxes + if( bc==UNKNOWN ) + for(ad=98;dx>4 && dy>6;){ // Dec00 body copied from H + DBG( wchar_t c_ask='f'; ) + if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ + if( num_cross(0,dx-1, dy/4 , dy/4 ,bp,cs) != 2 + && num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) != 2 ) break; + if( num_cross(0,dx-1,3*dy/4 ,3*dy/4 ,bp,cs) != 2 + && num_cross(0,dx-1,3*dy/4+1,3*dy/4+1,bp,cs) != 2 ) break; + if( loop(bp,0 ,dy/8,dx,cs,0,RI) + + loop(bp,dx-1,dy/8,dx,cs,0,LE)>dx/2 ) break; // ~A + for( j1=0,i=1,y=y0+dy/10; yp,x0 ,y,dx,cs,0,RI) + +loop(box1->p,x1 ,y,dx,cs,0,LE); + if( j>10*dx/16 ) i=0; if ( j>j1 ) j1=j; } + if( !i ) break; + for( x=dx/4; x 3*dy/8 ) break; + if ( 10*y > dy ){ /* italic */ + i=loop(bp,x ,dy-y,dx,cs,0,RI); + if( i>1 && y+loop(bp,x+i-1,dy-y,dy,cs,0,UP)>3*dy/8 ) break; + } + } if( x>=dx/2 ) break; + x=loop(box1->p,x0 ,y1-dy/8,dx,cs,0,RI) + +loop(box1->p,x1 ,y1-dy/8,dx,cs,0,LE); + for( i=1,y=dy/4; ydx/5 ) i=0; } + if( !i ) break; // ~K Jul00 + for( i=0,ya=y=y0+dy/4; yp,x0 ,y,dx,cs,0,RI); + j=loop(box1->p,x0+j,y,dx,cs,1,RI); if( j>i ) { i=j; ya=y; } } + if( i<=dx/2 ) break; ya-=y0; + if( num_cross(0,dx-1,ya ,ya ,bp,cs) != 1 + && num_cross(0,dx-1,ya+1,ya+1,bp,cs) != 1 ) break; /* Dec00 */ + for( y=ya; y 2 + && num_cross(0,dx-1,y+1,y+1,bp,cs) > 2 ) break; + if ( yp,cs,1) == 0 ) i=0; + } if( !i ) break; + for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ + if( get_bw( x, x,y1-dy/4,y1 ,box1->p,cs,1) == 0 ) i=0; + } if( i ) break; + for(i=1,x=x0+dx/4;x<=x1-dx/4 && i;x++){ + if( num_cross(x,x,y0+dy/8,y1-dy/8, box1->p,cs) == 1 ) i=0; + } if( i ) break; + for(i=1,y=y0;y<=y0+dy/4 && i;y++){ + if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; + } if( i ) break; + for(i=1,y=y1-dy/4;y<=y1 && i;y++){ + if( num_cross(x0,x1,y,y, box1->p,cs) == 2 ) i=0; + } if( i ) break; + if( num_cross(x0 ,x0+dx/8 ,y0+dy/8 ,y0 ,box1->p,cs) != 0 ) ad=96*ad/100; + if( get_bw(x1-dx/8, x1 , y0, y0+dy/8,box1->p,cs,1) != 1 ) break; + if( get_bw(x0 , x0+dx/8, y1-dy/8, y1,box1->p,cs,1) != 1 ) break; + i1=loop(bp,dx-1, dy/4,dx,cs,0,LE); if(i1>dx/2) break; + i2=loop(bp,dx-1, dy/2,dx,cs,0,LE); if(i2i1+dx/8) break; + i3=loop(bp,dx-1,dy-1-dy/4,dx,cs,0,LE); if(i3i2+dx/8) break; + if(abs(i1+i3-2*i2)>dx/16+1) break; + if( num_hole(x0,x1,y0+dy/4,y1,box1->p,cs,NULL) != 0 ) break; + if (!hchar) ad=96*ad/100; + if (!gchar) ad=99*ad/100; + ac=LATIN_SMALL_LIGATURE_FF; + Setac(box1,ac,ad); + break; + } + // --- test ae --------------------------------------------------- + if( bc==UNKNOWN ) + for(ad=98;dx>4 && dy>6;){ // provisorium + DBG( wchar_t c_ask=LATIN_SMALL_LETTER_AE; ) + if (sdata->holes.num > 4) Break; /* tolerant against a tiny hole */ + if( num_cross( dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 2 + && num_cross(dx-1-dx/4,dx-1,3*dy/16,3*dy/16,bp,cs) != 1 ) Break; + if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break; + if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break; + if( num_cross(dx-1,0, 0, dy-1,bp,cs) < 3 ) Break; + if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) < 2 ) + if( num_cross(0,dx-1,1+dy/16,1+dy/16,bp,cs) < 2 ) Break; + if( num_cross(0,dx-1,dy-1-dy/16,dy-1-dy/16,bp,cs) < 2 ) Break; + for( x=0,i2=y=dy/4; y<3*dy/4; y++ ){ + j=loop(bp,0,y,dx,cs,0,RI); if(j>x) { i2=y; x=j; } + } if( x3*dx/4 ) Break; + for( x=0,i4=y=dy/4; y<3*dy/4; y++ ){ + j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; } + } if( x3*dx/4 ) Break; + for( x=0,i4=y=dy/8; y<3*dy/4; y++ ){ + j=loop(bp,dx-1 ,y,dx,cs,0,LE); + j=loop(bp,dx-1-j,y,dx,cs,1,LE); + if(j>x) { i4=y; x=j; } + } if( xp,cs,NULL) != 1 ) Break; + if( num_hole(x0+dx/2-1,x1,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break; + ac=LATIN_SMALL_LETTER_AE; + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + + } + // --- test AE --------------------------------------------------- + if( bc==UNKNOWN ) + for(ad=98;dx>5 && dy>6;){ // provisorium + DBG( wchar_t c_ask=LATIN_CAPITAL_LETTER_AE; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) < 2 ) Break; + if( num_cross(0,dx-1,3*dy/ 4,3*dy/ 4,bp,cs) < 2 ) Break; + if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 3 ) Break; + if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) != 1 + && num_cross(0,dx-1, dy/32, dy/32,bp,cs) != 1 + && num_cross(0,dx-1, 0, 0,bp,cs) != 1 ) Break; + // check for upper horizontal line + j=loop(bp,dx-1 ,0,dx,cs,0,LE); x=j; + j=loop(bp,dx-1-j,0,dx,cs,1,LE); + i=loop(bp,dx-1 ,1,dx,cs,0,LE); if (ij) j=i; + if (x>dx/8) Break; + if (jx) break; x=j; + j=loop(bp, j,y,dx,cs,1,RI); if(j>i1) { i1=j; i2=y; } + j=loop(bp,dx-1 ,y,dx,cs,0,LE); + j=loop(bp,dx-1-j,y,dx,cs,1,LE); if(j>i3) { i3=j; i4=y; } + } if( y<3*dy/4 || i1i1) { i1=j; } + j=loop(bp,dx-1 ,dy-1-y,dx,cs,0,LE); + j=loop(bp,dx-1-j,dy-1-y,dx,cs,1,LE); if(j>i3) { i3=j; } + } if( i1<=dx/4 || i3<=dx/4 ) Break; + for( x=dx-1-dx/8; x>dx/2; x-- ){ // look for right the E + if( num_cross(x,x, 0,dy-1,bp,cs) == 3 ) + if( num_cross(x,x, 0,dy/4,bp,cs) == 1 ) + if( num_cross(x-1,dx-1-dx/8,3*dy/4,3*dy/4,bp,cs) == 0 ) + if( num_cross(x,x,3*dy/4,dy-1,bp,cs) == 1 ) break; + } if (x<=dx/2) Break; // not found + if (sdata->holes.num != 1) Break; + if( num_hole(x0,x0+3*dx/4,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break; + // if( num_hole(x0, x1,y0,y1 ,box1->p,cs,NULL) != 1 ) Break; + ac=LATIN_CAPITAL_LETTER_AE; + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + + } + // --- test /0 /o /O O_WITH_STROKE ----------------------------------------- + for(ad=99;dx>4 && dy>4;){ // provisorium + DBG( wchar_t c_ask=LATIN_SMALL_LETTER_O_WITH_STROKE; ) + if (sdata->holes.num > 3) Break; /* tolerant against a tiny hole */ + if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 3 ) Break; + if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break; + if (loop(bp,dx-1,3*dy/8,dx,cs,0,RI)>dx/8) Break; + if (loop(bp, 0,5*dy/8,dx,cs,0,RI)>dx/8) Break; + if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break; + if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break; + if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break; + if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break; + if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break; + if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 2 ) Break; + if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break; + if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 2 ) Break; + i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/8 ) Break; + i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/3 ) Break; i1=dx-1-i1; + i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/8 ) Break; + for(y=1;y3*dx/16 ) break; + } if( yholes.num != 2) Break; + // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 2 ) Break; + + if ( hchar && 2*y0m1+box1->m2 ) + ac=LATIN_CAPITAL_LETTER_O_WITH_STROKE; + else ac=LATIN_SMALL_LETTER_O_WITH_STROKE; + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + + } + // --- test /c /C C_WITH_STROKE CENT_SIGN -------------------------- + // here only the version with a continuously vertical line (not broken variant) + if( bc==UNKNOWN ) + for(ad=98;dx>4 && dy>4;){ // provisorium + DBG( wchar_t c_ask=CENT_SIGN; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 2 ) Break; + if( num_cross(0,dx-1-dx/4,dy/2,dy/2,bp,cs) != 2 ) Break; + if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 3 ) Break; + if( num_cross( 0,dx-1, 0, 0,bp,cs) > 2 ) Break; + if( num_cross(dx/4,dx-1, 0, 0,bp,cs) > 2 ) Break; + if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) > 2 ) Break; + if( num_cross( 0,3*dx/4,dy-1,dy-1,bp,cs) > 2 ) Break; + if( num_cross( 0, 0, 0,dy-1,bp,cs) > 2 ) Break; + if( num_cross(dx-1,dx-1, 0,dy-1,bp,cs) > 3 ) Break; + if( num_cross( 0, 0,dy/4,dy-1,bp,cs) > 2 ) Break; + if( num_cross(dx-1,dx-1, 0,3*dy/4,bp,cs) > 3 ) Break; + i1 =loop(bp,dx-1 , 0,dx,cs,0,LE); if( i1>dx/4 ) Break; + i1+=loop(bp,dx-1-i1, 0,dx,cs,1,LE); if( i1>dx/4 ) Break; i1=dx-1-i1; + i2 =loop(bp, 0,dy-1,dx,cs,0,RI); if( i2>dx/4 ) Break; + for(y=0;ydx/16+1) x-=dx/16+1; + j=loop(bp,x,y,dx,cs,0,RI); // fprintf(stderr,"\n x=%d j=%d",x,j); + if( j>(dx+4)/8 ) ad=96*ad/100; + if( j>(dx+2)/4 ) break; + } if( yp,cs,NULL) != 1 ) Break; + if (sdata->holes.num != 1) Break; + + ac=CENT_SIGN; + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + + } + // --- test EURO_CURRENCY_SIGN ----------------------------------------- + if( bc==UNKNOWN ) + for(ad=98;dx>4 && dy>6;){ // provisorium + DBG( wchar_t c_ask='&'; ) + if (sdata->holes.num > 1) break; /* tolerant against a tiny hole */ + if( num_cross(dx/2,dx/2, 0,dy-1,bp,cs) != 4 ) break; + if( num_cross( 0,dx-1, 0, 0,bp,cs) != 1 ) break; + if( num_cross( 0,dx-1,dy-1,dy-1,bp,cs) != 1 ) break; + if( num_cross( 0,dx-1,dy/2,dy/2,bp,cs) != 1 ) break; + for(i=0,y=dy/4;ydx/4 ) break; + j=loop(bp,x,y,dx,cs,1,RI); if( j>i ) i=j; + } if( ydx/2 ) break; + } if( y>=dy-dy/4-1 ) break; + // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break; + if (sdata->holes.num != 0) break; + ac=EURO_CURRENCY_SIGN; + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + } + // --- test LETTER_C_WITH_CEDILLA --------------------------------------------------- + if (bc==UNKNOWN) + if (gchar) + for(ad=98;dx>3 && dy>6;){ // provisorium + DBG( wchar_t c_ask='c'; ) + if (sdata->holes.num > 0) break; /* no tolerant against tiny holes */ + j=loop(bp,dx-1,dy/16 ,dy,cs,0,LE); + x=loop(bp,dx-1,dy/16+1,dy,cs,0,LE); if (xdx) Break; // ~4 ocr-b + if( num_cross(0,dx-1,3*dy/16,3*dy/16,bp,cs) > 2 ) break; + if( num_cross(0,dx-1, 0, dy-1,bp,cs) < 2 ) break; + if( num_cross(0,dx-1, dy/16, dy/16,bp,cs) > 2 ) break; + for( x=dx,i2=y=dy/4; y<3*dy/4; y++ ){ + j=loop(bp,0,y,dx,cs,0,RI); if(j0 ) break; i1=x; + for( x=0,i4=y=dy/4; y<5*dy/8; y++ ){ + j=loop(bp,dx-1,y,dx,cs,0,LE); if(j>x) { i4=y; x=j; } + } if( xdy/4) break; + j =loop(bp,dx/2,j,dy,cs,0,DO); if(j3*dx) break; + j =loop(bp,dx-1-j/2,dy-1-dy/8,dy,cs,0,UP); if(j>dy/2) break; // ~() + // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break; + if (sdata->holes.num) break; + if( hchar ) ac= LATIN_CAPITAL_LETTER_C_WITH_CEDILLA; + else ac= LATIN_SMALL_LETTER_C_WITH_CEDILLA; + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + + } + // --- test # --------------------------------------------------- + for(ad=99;dx>4 && dy>4;){ // never sure? + DBG( wchar_t c_ask='#'; ) + if (sdata->holes.num > 2) Break; /* tolerant against a tiny hole */ + if (sdata->holes.num < 1) Break; + if( num_cross(0,dx-1, dy/8, dy/8,bp,cs) != 2 ) Break; + if( num_cross(0,dx-1,dy-1-dy/8,dy-1-dy/8,bp,cs) != 2 ) Break; + if( num_cross(0,dx-1, dy/2, dy/2,bp,cs) != 2 ) Break; + if( num_cross(0,dx/2, dy/2, dy/2,bp,cs) != 1 ) Break; + /* fat "#" have only small ends on left and right side, we tolerate this */ + j=loop(bp, 0,dy/8,dx,cs,0,RI); if(j<1 || j=dx/2) Break; if (j=dx/2) Break; if (j3*dx/4) { i1=0; break; } + j=loop(bp,j, y,dx,cs,1,RI); if(j>i1) { i1=j; } + j=loop(bp,0,dy-1-y,dx,cs,0,RI); if(j>3*dx/4) { i1=0; break; } + j=loop(bp,j,dy-1-y,dx,cs,1,RI); if(j>i3) { i3=j; } + } + if (i1holes.num != 1) {ad=95*ad/100;} + if( num_hole(x0+dx/8,x1-dx/8,y0+dy/8,y1-dy/8,box1->p,cs,NULL) != 1 ) Break; + // if( num_hole(x0 ,x1 ,y0 ,y1 ,box1->p,cs,NULL) != 1 ) Break; + + ac=(wchar_t) '#'; + if( gchar ) {ad=99*ad/100;} + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + } + // --- test bullet, full_box, grabbed cursor, ZapfDingBats_156 + if (bc==UNKNOWN) + for(ad=96;dx>4 && dy>4 && 2*dx>dy;){ // provisorium + DBG( wchar_t c_ask='#'; ) + if( get_bw(x0,x1,y0,y1,box1->p,cs,2) != 0 ) break; + ac=BULLET; + if (gchar && !hchar) ad=80*ad/100; + Setac(box1,ac,ad); + if (ad>=100) return ac; + break; + } + /* --- test | (vertical line, could be a I or l) --- */ + for(ad=99;dy>4 && 2*dxp,cs,2) != 0 ) break; + /* more unsure if the borders are not exact */ + if( get_bw(x0 ,x0+dx/8,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100; + if( get_bw(x1-dx/8,x1 ,y0+dy/9,y1-dy/9,box1->p,cs,2) != 0 ) ad=99*ad/100; + if( get_bw(x0+dx/8,x1-dx/8,y0 ,y0+dy/8,box1->p,cs,2) != 0 ) ad=99*ad/100; + if( get_bw(x0+dx/8,x1-dx/8,y1-dy/8,y1 ,box1->p,cs,2) != 0 ) ad=99*ad/100; + if (3*dxm2 && 2*y1> box1->m2+box1->m3) Break; + if (box1->m2 && 3*y1>2*box1->m2+box1->m3) ad=95*ad/100; + ac='|'; + if (!hchar) ad=98*ad/100; + Setac(box1,ac,ad); + break; + } + // --- test % --------------------------------------------------- + for(ad=100;dx>5 && dy>7;){ // provisorium + DBG( wchar_t c_ask='%'; ) + if (sdata->holes.num > 2) break; /* tolerant against a tiny hole */ + if( num_cross(x0,x1 ,y0+dy/4,y0+dy/4,box1->p,cs) != 3 + && num_cross(x0,x1 ,y0+dy/8,y0+dy/8,box1->p,cs) != 3 ) Break; + if( num_cross(x0,x1+dx/4,y1-dy/4,y1-dy/4,box1->p,cs) != 3 + && num_cross(x0,x1+dx/4,y1-dy/8,y1-dy/8,box1->p,cs) != 3 ) Break; + if( num_cross(x0,x1, y0, y1,box1->p,cs) < 4 + && num_cross(x0+dx/8,x1, y0, y1,box1->p,cs) < 4 + && num_cross(x0,x1+dx/4, y0, y1,box1->p,cs) < 4 + && dx>7 && dy>15) Break; + if( num_cross(x0,x1, y0, y1,box1->p,cs) !=5 ) ad=99*ad/100; + + if (dx>7 && dy>12) { + if( num_hole(x0 ,x1 ,y0,y1-dy/4,box1->p,cs,NULL) != 1 ) Break; + if( num_hole(x0+dx/4,x1+dx/4,y0+dy/4,y1,box1->p,cs,NULL) != 1 ) Break; + if( num_hole(x0 ,x1+dx/4,y0,y1 ,box1->p,cs,NULL) != 2 ) Break; + } else ad=98*ad/100; + // use box1->p instead of b, because % is a sum of 3 objects + if ( loop(box1->p,x0,y0 ,dx,cs,0,RI) + <= loop(box1->p,x0,y0+dy/16+1,dx,cs,0,RI) ) ad=96*ad/100; // X + if ( loop(box1->p,x1,y1 ,dx,cs,0,LE) + <= loop(box1->p,x1,y1-1-dy/16,dx,cs,0,LE) ) ad=96*ad/100; // X + for (x=0;xp,cs,2) != 2 ) break; + } if (x=100) return ac; + break; + } + // --- test Omega --------------------------------------------------- + for(ad=d=99;dx>7 && dy>7;){ // min 3x4 + DBG( wchar_t c_ask=GREEK_CAPITAL_LETTER_OMEGA; ) + if( get_bw(x0 , x0+dx/2,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x1-dx/2 , x1 ,y0+dy/2 , y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2 , x0+dx/2,y0 , y0+dy/2,box1->p,cs,1) != 1 ) Break; + if( get_bw(x0+dx/2 , x0+dx/2,y0+dy/3 , y1-dy/3,box1->p,cs,1) != 0 ) Break; + + if( num_cross(x0+dx/2,x0+dx/2,y0 , y1-dy/3,box1->p,cs) != 1 ) Break; + if( num_cross(x0+dx/3,x1-dx/3,y0 , y0 ,box1->p,cs) != 1 ) // AND + if( num_cross(x0+dx/3,x1-dx/3,y0+1 , y0+1 ,box1->p,cs) != 1 ) Break; + if( num_cross(x0+dx/3,x1-dx/3,y1 , y1 ,box1->p,cs) != 2 ) // against "rauschen" + if( num_cross(x0+dx/3,x1-dx/3,y1-1 , y1-1 ,box1->p,cs) != 2 ) Break; + if( num_cross(x0 ,x0 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) + if( num_cross(x0+1 ,x0+1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; + if( num_cross(x1 ,x1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) + if( num_cross(x1-1 ,x1-1 ,y0+dy/3 , y1-dy/3,box1->p,cs) != 1 ) Break; + if (sdata->holes.num) Break; + // if( num_hole(x0,x1,y0,y1,box1->p,cs,NULL) != 0 ) break; + + if( loop(bp,0 ,0 ,x1-x0,cs,0,RI)<= + loop(bp,0 ,2 ,x1-x0,cs,0,RI) ) Break; + if( loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,RI)>dx/4 + || loop(bp,dx/2,dy-dy/4,x1-x0,cs,0,LE)>dx/4 ) Break; + if( loop(bp,dx/2,3*dy/8,x1-x0,cs,0,RI)dx/8) Break; + x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<3*dx/8 || i>dx/2) Break; + x=loop(bp,i,dy-1-dy/16,x1-x0,cs,0,RI); i+=x; if(i5*dx/8) Break; + x=loop(bp,i,dy-1-dy/16,x1-x0,cs,1,RI); i+=x; if(i<7*dx/8) Break; + + /* look for a vertikal gap at lower end */ + for( x=dx/4;x<3*dx/4;x++ ){ + i=loop(bp,x,dy-1,y1-y0,cs,0,UP); + if( i>3*dy/4 ) break; + } + if( x>=3*dx/4 ) Break; + + if( !hchar ) ad=60*ad/100; + bc=GREEK_CAPITAL_LETTER_OMEGA; + Setac(box1,bc,ad); + break; + } + + return bc; +} + +// -------------------- OCR engine ;) ---------------------------- +wchar_t ocr0(struct box *box1, pix *bp, int cs){ + // pix p=*(box1->p); + int i,j,d,x,y,x0=box1->x0,x1=box1->x1,y0=box1->y0,y1=box1->y1; + int dx=x1-x0+1,dy=y1-y0+1, /* size */ + rx,ry,r1,r2,i1,i2,ad; /* tmp-vars */ + // ad,ac will be used in future + wchar_t bc = UNKNOWN; // bestletter + wchar_t um = SPACE; // modifier '" + int hchar; // char is higher than e + int gchar; // char has ink lower than m3 + int aa[4][4]; /* corner points, see xX, (x,y,dist^2,vector_idx) v0.41 */ + ocr0_shared_t sdata; // data used in all subfunctions + + sdata.box1=box1; + sdata.bp=bp; + sdata.cs=cs; + // --- hchar --- gchar ------------------------- + hchar=0;if( y0 < box1->m2-(box1->m2-box1->m1)/2 ) hchar=1; + gchar=0;if( y1 > box1->m3+(box1->m4-box1->m3)/2 ) gchar=1; + // if the char is slightly moved down correction can be done + if ( y0m2 && y1>box1->m3 && 2*y1m3+box1->m4) // moved + if( 2*(y0-(y1-box1->m3))<=2*box1->m2-(box1->m2-box1->m1) ) hchar=1; + + sdata.hchar=hchar; + sdata.gchar=gchar; + + /* search for nearest points to the 4 courners, typical for xX */ + /* this is faster as calling nearest_frame_vector 4 times */ + aa[0][0]=aa[1][0]=aa[2][0]=aa[3][0]=(x0+x1)/2; /* set to center */ + aa[0][1]=aa[1][1]=aa[2][1]=aa[3][1]=(y0+y1)/2; /* set to center */ + aa[0][2]=aa[1][2]=aa[2][2]=aa[3][2]=2*sq(128); /* distance to box edges */ + aa[0][3]=aa[1][3]=aa[2][3]=aa[3][3]=0; /* vector index */ + /* searching for 4 diagonal line ends */ + for (i=0;inum_frame_vectors[0];i++) { + x=box1->frame_vector[i][0]; /* take a vector */ + y=box1->frame_vector[i][1]; + /* distance to upper left end, normalized to 128 */ + j=0; d=sq((x-x0)*128/dx)+sq((y-y0)*128/dy); + // fprintf(stderr," setaa i= %2d xy= %3d %3d d=%5d aa[3]=%2d\n",i,x-x0,y-y0,d,aa[0][3]); + if (dnum_frames>0) // speedup v0.42 + num_hole(x0,x1,y0,y1,box1->p,cs,&sdata.holes); // call once + // printf(" num_holes=%d\n",sdata.holes.num); + + /* + after division of two glued chars, boundaries could be wrong, + check this first (ToDo: only if a flag set?) + */ + if (2*y0 < box1->m2+box1->m3) + if (box1->m4>box1->m3 && 2*box1->y1>box1->m4+box1->m3){ + /* could be a "I" from divided "Ij" or "Ig" */ + for(y=(box1->m3+box1->m2)/2;2*ym3+box1->m4;y++) + if( get_bw(x0,x1,y,y,box1->p,cs,1)==0 ) break; + if(2*ym3+box1->m4) + if( get_bw((x0+x1)/2,(x0+x1)/2,y,box1->m4,box1->p,cs,1)==0 ){ + /* be sure, ~_ */ + if (y>y0) y1=box1->y1=y; + } + } + + DBG( IFV fprintf(stderr,"\nDBG L%d (%d,%d): ",__LINE__,box1->x0,box1->y0); ) + DBG( IFV out_b(box1,sdata.bp,0,0,dx,dy,160); ) + DBG( IFV fprintf(stderr,"# aa[] %d %d %d %d %d %d %d %d (4 corners)" + " d= %d %d %d %d", + aa[0][0]-x0,aa[0][1]-y0,aa[1][0]-x0,aa[1][1]-y0, + aa[2][0]-x0,aa[2][1]-y0,aa[3][0]-x0,aa[3][1]-y0, + aa[0][2], aa[1][2], aa[2][2], aa[3][2]);) + DBG( IFV fprintf(stderr,"\n# holes %d gchar=%d hchar=%d",sdata.holes.num, gchar, hchar);) + + // --- test thin lines - --------------------------------- + for( ad=100; 2*dym3-box1->m2 && 3*dx>=4*dy && dx>2; ){ // min 3x3 (small font) + DBG( wchar_t c_ask='-'; ) + if( get_bw(x0+dx/8+1,x1-dx/8-1,y0+dy/8+((dy>2)?1:0), + y1-dy/8-((dy>2)?1:0),box1->p,cs,2)==2 ) break; + if( box1->dots ) { Setac(box1,'=',97);break; } + if (dx<=2*dy) ad=98*ad/100; + if (dx<=3*dy) ad=99*ad/100; + if (!box1->m4) ad=96*ad/100; + else { + if (y1>=box1->m3) { + if ( dx<2*dy) ad=98*ad/100; + if (2*dx<3*dy) ad=98*ad/100; + Setac(box1,'_',ad); + break; + } + } + Setac(box1,'-',ad); if (ad>=100) return '-'; + break; + } + // --- test thin lines = --------------------------------- + for( ; dy>2 && dx>2; ){ // min 3x3 (small font) + DBG( wchar_t c_ask='='; ) + for( y=y0;yp,cs,1)==1 ) break; + if( get_bw(x0+dx/10,x1-dx/10,y ,y ,box1->p,cs,2)==2 ) break; + if( get_bw(x0 ,x1 ,(y+y1)/2,(y+y1)/2,box1->p,cs,1)==1 ) break; + if( get_bw(x0+dx/10,x1-dx/10,y1 ,y1 ,box1->p,cs,2)==2 ) break; + Setac(box1,'=',100); + return '='; + } + // --- test dots : --------------------------------- + for( ad=100; dy>2 && dy>=2*dx; ){ // max 3x3 (small font) + + DBG( wchar_t c_ask=':'; ) + // check the gap hight + for( i1=dy/16;i1p,cs,1)==0 ) break; + if (i1>=dy/2) break; + for( i2=dy/16;i2p,cs,1)==0 ) break; + if (i2>=dy/2) Break; + MSG(fprintf(stderr,"gap y12 %d %d",i1,i2);) + + if (box1->m3 && y1>box1->m3) ad=98*ad/100; // ~; + if (box1->m3 && 2*y0> box1->m2+box1->m1) ad=98*ad/100; // ~i + if (gchar) ad=99*ad/100; + ad=ad-abs(i1-i2)/dy*20; + if (abs(i1-dx)>dy/4) Break; // round or quadratic dots? + if (abs(i1-dx)>dy/8) ad=98*ad/100; + if (abs(i2-dx)>dy/4) Break; // round or quadratic dots? + if (abs(i2-dx)>dy/8) ad=98*ad/100; + if (box1->dots!=1) ad=96*ad/100; + Setac(box1,':',ad); // dx<=3 ad-- + if (ad>=100) return ':'; + break; + } + // --- test dots ; --------------------------------- + if( 2*y0> box1->m2+box1->m1 ) // ~i + if( 4*y1>=3*box1->m3+box1->m2 ) // ~: + for( ad=100; dy>5 && dx>1 && dy>2*dx; ){ // max 3x3 (small font) + DBG( wchar_t c_ask=';'; ) + // better would it be to detect round pixelcluster on top + // check high of upper and lower dot + for( i1=0;i1p,cs,1)==0 ) break; + if (i1>=dy/2) break; + for( i2=0;i2p,cs,1)==0 ) break; + if (i2m3) ad=97*ad/100; + if (i2-i1=100) return ';'; + break; + } + // --- first test small dots . --------------------------------- + if( 3*dym4-box1->m1 && abs(dx-dy)<(dx+dy)/4+2 + && 3*y1>=(2*box1->m3+ box1->m2) // dot near baseline? + && 5*y0>=(3*box1->m3+2*box1->m2) ){ // Jul00 + DBG( wchar_t c_ask='.'; ) + d=0; r1=60;r2=140; ad=99; + for(x=x0;x<=x1;x++)for(y=y0;y<=y1;y++){ /* circle equation */ + rx=100*(2*x-(x0+x1))/dx; // normalize to 15bit number + ry=100*(2*y-(y0+y1))/dy; + if( rx*rx + ry*ry < r1*r1 ) if( getpixel(box1->p,x,y)>=cs ){ d++;x=x1+1;y=y1+1; } + if( rx*rx + ry*ry > r2*r2 ) if( getpixel(box1->p,x,y)< cs ){ d++;x=x1+1;y=y1+1; } + // fprintf(stderr,"\nDBG . x= %3d %3d r= %6d %6d %6d", rx, ry, rx*rx+ry*ry, r1*r1, r2*r2); + } + if(d==0) + if( loop(box1->p,x0,y0,x1-x0,cs,0,RI) + <= loop(box1->p,x0,y1,x1-x0,cs,0,RI) + || loop(box1->p,x1,y0,x1-x0,cs,0,LE) + >= loop(box1->p,x1,y1,x1-x0,cs,0,LE) ) + { + bc='.'; if (box1->dots) { Setac(box1,':',ad); ad=98*ad/100; } + Setac(box1,bc,ad); + } + } + // --- first test small dots , --------------------------------- + if( 3*dy<2*(box1->m4-box1->m1) + && 2*y0> box1->m2+box1->m3 + && (2*dx<3*dy + || get_bw(0,dx/2,dy/2,dy-1,bp,cs,1)==0) ){ // ocr-a-, + DBG( wchar_t c_ask=','; ) + ad=100; bc=','; + if (dy==1 && dx==1) ad=98*ad/100; + if (dy==2 && dx==1) ad=99*ad/100; // this is a problem case + if (dx>=dy) ad=99*ad/100; + if( 2*dy >= box1->m4-box1->m1) ad=98*ad/100; + if( loop(box1->p,x0,y0,x1-x0,cs,0,RI) /* simple line */ + > loop(box1->p,x0,y1,x1-x0,cs,0,RI) + && loop(box1->p,x1,y0,x1-x0,cs,0,LE) + < loop(box1->p,x1,y1,x1-x0,cs,0,LE) ) { ad=99*ad/100; } + else { /* with upper circle */ + if( loop(box1->p,x0,(y0+y1+1)/2,x1-x0,cs,0,RI)p,x1, y1 ,x1-x0,cs,0,LE)p,x0,y1-((dy>5)?1:0),x1-x0,cs,0,LE)>(dx+1)/2 ) + if( loop(box1->p,x0, y1 ,x1-x0,cs,0,LE)>(dx+1)/2 ) ad=96*ad/100; + } + if(box1->dots==1) { Setac(box1,';',ad); ad=99*ad/100; } + Setac(box1,bc,ad); + } + // --- first test small dots '" --------------------------------- + if( 2*dy < box1->m4 -box1->m1+1 + && 2*y0 < box1->m2 +box1->m3 + && 3*y1 < box1->m2+2*box1->m3+2 ){ + DBG( wchar_t c_ask='\''; ) + ad=100; bc='\''; + if (2*y1 >= box1->m2+box1->m3) { ad=96*ad/100; MSG({}) } // ~! + if (3*y1>=2*box1->m2+box1->m3) { ad=96*ad/100; MSG({}) } + if (get_bw(x0,x1,(box1->m2+box1->m3)/2,box1->m4,box1->p,cs,1)!=0) + { ad=98*ad/100; MSG({}) } + if (dx>4 + && num_cross(x0,x1,y1,y1,box1->p,cs) == 2) { // " " + bc='"'; + // ocr-a-" has no gap! + if ( get_bw((x0+x1)/2,(x0+x1)/2,y0,y1,box1->p,cs,1)!=0 ) ad=96*ad/100; + } else { + if ( num_cross(x0,x1, y0 , y0 ,box1->p,cs)!=1) ad=96*ad/100; + if ( num_cross(x0,x1,(y0+y1)/2,(y0+y1)/2,box1->p,cs)!=1) ad=98*ad/100; + if (dx>dy) { ad=96*ad/100; MSG({}) } + } + if (2*y0 > box1->m1+box1->m2) ad=99*ad/100; + Setac(box1,bc,ad); + if (ad>=100) return bc; + } + // --- TILDE ~ --------------------------------- + if( 2*dym4-box1->m1 && dx>=dy && dx>3 && dy>1 + && 2*y0< box1->m1+box1->m2 + && 3*y1<2*box1->m2+box1->m3 ){ + if( loop(box1->p,x0,y0,dx,cs,0,RI) + > loop(box1->p,x0,y1,dx,cs,0,RI) + && loop(box1->p,x1,y0,dx,cs,0,LE) + < loop(box1->p,x1,y1,dx,cs,0,LE) + && num_cross(x0,x1,y0,y0,box1->p,cs) == 2 + && num_cross(x0,x1,y1,y1,box1->p,cs) == 2 ) { + DBG( wchar_t c_ask='~'; ) + bc=TILDE; + Setac(box1,bc,99); + } + } + // --- CIRCUMFLEX, hat ^ --------------------------------- + if( 2*dym4-box1->m1 && dx>=dy && dx>2 && dy>1 + && 2*y0< box1->m1+box1->m2 + && 3*y1<2*box1->m2+box1->m3 ){ + DBG( wchar_t c_ask='^'; ) + if( ( loop(box1->p,x0,y0 ,dx,cs,0,RI) + > loop(box1->p,x0,y1 ,dx,cs,0,RI)-dx/8 + || loop(box1->p,x0,y0 ,dx,cs,0,RI) + > loop(box1->p,x0,y1-1,dx,cs,0,RI)-dx/8 ) + && ( loop(box1->p,x1,y0 ,dx,cs,0,LE) + > loop(box1->p,x1,y1 ,dx,cs,0,LE)-dx/8 + || loop(box1->p,x1,y0 ,dx,cs,0,LE) + > loop(box1->p,x1,y1-1,dx,cs,0,LE)-dx/8 ) + && num_cross(x0,x1,y0 ,y0 ,box1->p,cs) == 1 + && ( num_cross(x0,x1,y1 ,y1 ,box1->p,cs) == 2 + || num_cross(x0,x1,y1-1,y1-1,box1->p,cs) == 2 )) { + bc='^'; + Setac(box1,bc,99); + } + } + // ------------------------------------------------------ +// if( dots==1 ){ um='\''; } +#if 0 /* ToDo: change to vectors, call here or in whatletter */ + if (box1->dots==0) { // i-dots ??? (if dots==0 is wrong) + y=box1->m1; + for(;yp,cs,1)==1) break; + { i1=y; + if( yp,cs,1)==0) break; + if( ybox1->m2-box1->m1){ + testumlaut(box1,cs,2,&um); // set modifier + new y0 ??? + + } + } + } +#else + um = box1->modifier; +#endif + if ( /* um==ACUTE_ACCENT || */ um==DIAERESIS){ + for(y=y1;y>y0;y--) + if( get_bw(x0,x1,y,y,box1->p,cs,1)==0) { y0=y; dy=y1-y0+1; break; } // scan "a "o "u + } + + // --- test numbers 0..9 --- separated for faster compilation + if( JOB->cfg.only_numbers ) return ocr0n(&sdata); + + // bc=ocr1(box1,bp,cs); + if(bc!=UNKNOWN && box1->num_ac>0 && box1->wac[0]==100) + return bc; // for fast compilable tests + + // ------ separated for faster compilation + // ToDo: inser ocr0_shared_t here and split into a,b,cC,d,e,f,g9,... +#define IF_NOT_SURE if(bc==UNKNOWN || box1->num_ac==0 || box1->wac[0]<100) + + IF_NOT_SURE bc=ocr0_eE(&sdata); + IF_NOT_SURE bc=ocr0_f(&sdata); + IF_NOT_SURE bc=ocr0_bB(&sdata); + IF_NOT_SURE bc=ocr0_dD(&sdata); + IF_NOT_SURE bc=ocr0_F(&sdata); + IF_NOT_SURE bc=ocr0_uU(&sdata); + IF_NOT_SURE bc=ocr0_micro(&sdata); + IF_NOT_SURE bc=ocr0_vV(&sdata); + IF_NOT_SURE bc=ocr0_rR(&sdata); + IF_NOT_SURE bc=ocr0_m(&sdata); + IF_NOT_SURE bc=ocr0_tT(&sdata); + IF_NOT_SURE bc=ocr0_sS(&sdata); + IF_NOT_SURE bc=ocr0_gG(&sdata); + IF_NOT_SURE bc=ocr0_xX(&sdata); + IF_NOT_SURE bc=ocr0_yY(&sdata); + IF_NOT_SURE bc=ocr0_zZ(&sdata); + IF_NOT_SURE bc=ocr0_wW(&sdata); + IF_NOT_SURE bc=ocr0_aA(&sdata); + IF_NOT_SURE bc=ocr0_cC(&sdata); + IF_NOT_SURE bc=ocr0_lL(&sdata); + IF_NOT_SURE bc=ocr0_oO(&sdata); + IF_NOT_SURE bc=ocr0_pP(&sdata); + IF_NOT_SURE bc=ocr0_qQ(&sdata); + IF_NOT_SURE bc=ocr0_iIjJ(&sdata); + IF_NOT_SURE bc=ocr0_n(&sdata); + IF_NOT_SURE bc=ocr0_M(&sdata); + IF_NOT_SURE bc=ocr0_N(&sdata); + IF_NOT_SURE bc=ocr0_h(&sdata); + IF_NOT_SURE bc=ocr0_H(&sdata); + IF_NOT_SURE bc=ocr0_k(&sdata); + IF_NOT_SURE bc=ocr0_K(&sdata); + IF_NOT_SURE bc=ocr0n(&sdata); + IF_NOT_SURE bc=ocr0_brackets(&sdata); + IF_NOT_SURE bc=ocr0p9(&sdata); + IF_NOT_SURE bc=ocr0px(&sdata); + + + if(box1->num_ac==0 && bc!=UNKNOWN) fprintf(stderr,""); + if(box1->num_ac>0 && box1->wac[0]>95) box1->c=bc=box1->tac[0]; + /* will be removed later, only fix old things */ + for (i=0;inum_ac;i++) if (box1->tac[i]==bc) { bc=box1->tac[0]; } + + return bc; +} + +