2 This is a Optical-Character-Recognition program
3 Copyright (C) 2000-2007 Joerg Schulenburg
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 see README for EMAIL-address
26 int warn=0; /* if 1 a message is generated if composition is not defined */
28 /* Arguments: the character (main), and the modifier (accent, etc). See the
29 function if you want to know the modifiers.
30 Description: This function intends to be a small helper, to avoid having
31 to write switches in functions. It's therefore mainly to accents, and
32 specially for the most usual ones. It supports the basic greek
33 characters too, which is actually not very helpful.
34 Returns: the unicode character corresponding to the composed character.
37 - It seems to me, that tables should be more effectiv.
38 So we should use tables in future? (js)
40 wchar_t compose(wchar_t main, wchar_t modifier) {
41 /* supported by now: part of ISO8859-1, basic greek characters */
42 if( main == UNKNOWN || main == PICTURE ) return main;
44 if(modifier!=UNICODE_NULL && modifier!=SPACE)
45 printf(" compose(%c,%d)",(char)main,(int)modifier);
47 if(main>127 && modifier!=0 && modifier!=SPACE && warn)
48 fprintf(stderr,"# Warning compose %04x + %04x>127\n",
49 (int)modifier,(int)main);
55 case APOSTROPHE: /* do NOT USE this. It's here for compatibility only.
56 Use ACUTE_ACCENT instead. */
57 fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
59 case ACUTE_ACCENT: /* acute/cedilla */
61 case 'a': return LATIN_SMALL_LETTER_A_WITH_ACUTE;
62 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_ACUTE;
63 case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_ACUTE;
64 case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_ACUTE;
65 case 'c': return LATIN_SMALL_LETTER_C_WITH_ACUTE;
66 case 'C': return LATIN_CAPITAL_LETTER_C_WITH_ACUTE;
67 case 'e': return LATIN_SMALL_LETTER_E_WITH_ACUTE;
68 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_ACUTE;
69 case 'g': return LATIN_SMALL_LETTER_G_WITH_ACUTE;
70 case 'G': return LATIN_CAPITAL_LETTER_G_WITH_ACUTE;
71 case 'i': return LATIN_SMALL_LETTER_I_WITH_ACUTE;
72 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_ACUTE;
73 case 'l': return LATIN_SMALL_LETTER_L_WITH_ACUTE;
74 case 'L': return LATIN_CAPITAL_LETTER_L_WITH_ACUTE;
75 case 'n': return LATIN_SMALL_LETTER_N_WITH_ACUTE;
76 case 'N': return LATIN_CAPITAL_LETTER_N_WITH_ACUTE;
77 case 'o': return LATIN_SMALL_LETTER_O_WITH_ACUTE;
78 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
79 case '0': return LATIN_CAPITAL_LETTER_O_WITH_ACUTE;
80 case 'r': return LATIN_SMALL_LETTER_R_WITH_ACUTE;
81 case 'R': return LATIN_CAPITAL_LETTER_R_WITH_ACUTE;
82 case 's': return LATIN_SMALL_LETTER_S_WITH_ACUTE;
83 case 'S': return LATIN_CAPITAL_LETTER_S_WITH_ACUTE;
84 case 'u': return LATIN_SMALL_LETTER_U_WITH_ACUTE;
85 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_ACUTE;
86 case 'y': return LATIN_SMALL_LETTER_Y_WITH_ACUTE;
87 case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_ACUTE;
88 case 'z': return LATIN_SMALL_LETTER_Z_WITH_ACUTE;
89 case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_ACUTE;
91 if(warn)fprintf( stderr, " COMPOSE: ACUTE_ACCENT+%04x not defined\n",(int)main);
95 case BREVE: /* caron (latin2) "u"-above-... (small bow) */
97 /* FIXME write separate heuristics for breve */
98 case 'a': return LATIN_SMALL_LETTER_A_WITH_BREVE;
99 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_BREVE;
100 case 'e': return LATIN_SMALL_LETTER_E_WITH_BREVE;
101 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_BREVE;
102 case 'g': return LATIN_SMALL_LETTER_G_WITH_BREVE;
103 case 'G': return LATIN_CAPITAL_LETTER_G_WITH_BREVE;
104 case 'i': return LATIN_SMALL_LETTER_I_WITH_BREVE;
105 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_BREVE;
106 case 'o': return LATIN_SMALL_LETTER_O_WITH_BREVE;
107 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_BREVE;
108 case 'u': return LATIN_SMALL_LETTER_U_WITH_BREVE;
109 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_BREVE;
111 if(warn)fprintf( stderr, " COMPOSE: BREVE+%04x not defined\n",(int)main);
115 case CARON: /* caron (latin2) "v"-above-... */
117 case 'a': return LATIN_SMALL_LETTER_A_WITH_CARON;
118 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CARON;
119 case 'c': return LATIN_SMALL_LETTER_C_WITH_CARON;
120 case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CARON;
121 case 'e': return LATIN_SMALL_LETTER_E_WITH_CARON;
122 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CARON;
123 case 'i': return LATIN_SMALL_LETTER_I_WITH_CARON;
124 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CARON;
125 case 'o': return LATIN_SMALL_LETTER_O_WITH_CARON;
126 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
127 case '0': return LATIN_CAPITAL_LETTER_O_WITH_CARON;
128 case 's': return LATIN_SMALL_LETTER_S_WITH_CARON;
129 case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CARON;
130 case 'u': return LATIN_SMALL_LETTER_U_WITH_CARON;
131 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CARON;
132 case 'z': return LATIN_SMALL_LETTER_Z_WITH_CARON;
133 case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_CARON;
135 if(warn)fprintf( stderr, " COMPOSE: CARON+%04x not defined\n",(int)main);
141 case 'c': return LATIN_SMALL_LETTER_C_WITH_CEDILLA;
142 case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CEDILLA;
144 if(warn)fprintf( stderr, " COMPOSE: CEDILLA+%04x not defined\n",(int)main);
150 case 'a': return LATIN_SMALL_LETTER_A_WITH_TILDE;
151 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_TILDE;
152 case 'i': return LATIN_SMALL_LETTER_I_WITH_TILDE;
153 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_TILDE;
154 case 'n': return LATIN_SMALL_LETTER_N_WITH_TILDE;
155 case 'N': return LATIN_CAPITAL_LETTER_N_WITH_TILDE;
156 case 'o': return LATIN_SMALL_LETTER_O_WITH_TILDE;
157 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
158 case '0': return LATIN_CAPITAL_LETTER_O_WITH_TILDE;
159 case 'u': return LATIN_SMALL_LETTER_U_WITH_TILDE;
160 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_TILDE;
162 if(warn)fprintf( stderr, " COMPOSE: TILDE+%04x not defined\n",(int)main);
168 case 'a': return LATIN_SMALL_LETTER_A_WITH_GRAVE;
169 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_GRAVE;
170 case 'e': return LATIN_SMALL_LETTER_E_WITH_GRAVE;
171 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_GRAVE;
172 case 'i': return LATIN_SMALL_LETTER_I_WITH_GRAVE;
173 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_GRAVE;
174 case 'n': return LATIN_SMALL_LETTER_N_WITH_GRAVE;
175 case 'N': return LATIN_CAPITAL_LETTER_N_WITH_GRAVE;
176 case 'o': return LATIN_SMALL_LETTER_O_WITH_GRAVE;
177 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
178 case '0': return LATIN_CAPITAL_LETTER_O_WITH_GRAVE;
179 case 'u': return LATIN_SMALL_LETTER_U_WITH_GRAVE;
180 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_GRAVE;
182 if(warn)fprintf( stderr, " COMPOSE: GRAVE_ACCENT+%04x not defined\n",(int)main);
186 case QUOTATION_MARK: /* do NOT USE this. It's here for compatibility only.
187 Use DIAERESIS instead. */
188 fprintf( stderr, "COMPOSE: got APOSTROPHE instead of ACUTE_ACCENT");
192 case 'a': return LATIN_SMALL_LETTER_A_WITH_DIAERESIS;
193 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS;
194 case 'e': return LATIN_SMALL_LETTER_E_WITH_DIAERESIS;
195 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS;
196 case 'i': return LATIN_SMALL_LETTER_I_WITH_DIAERESIS;
197 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS;
198 case 'o': return LATIN_SMALL_LETTER_O_WITH_DIAERESIS;
199 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
200 case '0': return LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS;
201 case 'u': return LATIN_SMALL_LETTER_U_WITH_DIAERESIS;
202 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS;
203 case 'y': return LATIN_SMALL_LETTER_Y_WITH_DIAERESIS;
204 case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS;
206 if(warn)fprintf( stderr, " COMPOSE: DIAERESIS+%04x (%c) not defined\n",(int)main,(char)main);
210 case CIRCUMFLEX_ACCENT: /* ^ */
212 case 'a': return LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX;
213 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX;
214 case 'c': return LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX;
215 case 'C': return LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX;
216 case 'e': return LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX;
217 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX;
218 case 'g': return LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX;
219 case 'G': return LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX;
220 case 'h': return LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX;
221 case 'H': return LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX;
222 case 'i': return LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX;
223 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX;
224 case 'j': return LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX;
225 case 'J': return LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX;
226 case 'o': return LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX;
227 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
228 case '0': return LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX;
229 case 's': return LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX;
230 case 'S': return LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX;
231 case 'u': return LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX;
232 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX;
233 case 'w': return LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX;
234 case 'W': return LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX;
235 case 'y': return LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX;
236 case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX;
238 if(warn)fprintf( stderr, " COMPOSE: CIRCUMFLEX_ACCENT+%04x not defined\n",(int)main);
242 case MACRON: /* a minus sign above the char (latin2) */
244 case 'a': return LATIN_SMALL_LETTER_A_WITH_MACRON;
245 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_MACRON;
246 case 'e': return LATIN_SMALL_LETTER_E_WITH_MACRON;
247 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_MACRON;
248 case 'i': return LATIN_SMALL_LETTER_I_WITH_MACRON;
249 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_MACRON;
250 case 'o': return LATIN_SMALL_LETTER_O_WITH_MACRON;
251 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_MACRON;
252 case 'u': return LATIN_SMALL_LETTER_U_WITH_MACRON;
253 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_MACRON;
254 case 'y': return LATIN_SMALL_LETTER_Y_WITH_MACRON;
255 case 'Y': return LATIN_CAPITAL_LETTER_Y_WITH_MACRON;
256 case LATIN_SMALL_LETTER_AE: return LATIN_SMALL_LETTER_AE_WITH_MACRON;
257 case LATIN_CAPITAL_LETTER_AE: return LATIN_CAPITAL_LETTER_AE_WITH_MACRON;
258 case '=': return IDENTICAL_TO;
259 case '-': return '=';
260 case ' ': return MODIFIER_LETTER_MACRON;
262 if(warn)fprintf( stderr, " COMPOSE: MACRON+%04x not defined\n",(int)main);
266 case DOT_ABOVE: /* latin2 */
268 case 'a': return LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE;
269 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE;
270 case 'c': return LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE;
271 case 'C': return LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE;
272 case 'e': return LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE;
273 case 'E': return LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE;
274 case 'g': return LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE;
275 case 'G': return LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE;
276 case 'l': return 'i'; /* correct wrong recognition */
277 case 'i': return 'i';
278 case LATIN_SMALL_LETTER_DOTLESS_I: return 'i';
279 case 'I': return LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
280 case 'j': return 'j';
281 case 'o': return LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE;
282 case 'O': return LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE;
283 case 'z': return LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE;
284 case 'Z': return LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE;
285 case ',': return ';';
286 case '.': return ':';
288 if(warn)fprintf( stderr, " COMPOSE: DOT_ABOVE+%04x not defined\n",(int)main);
294 case 'a': return LATIN_SMALL_LETTER_A_WITH_RING_ABOVE;
295 case 'A': return LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE;
296 case 'u': return LATIN_SMALL_LETTER_U_WITH_RING_ABOVE;
297 case 'U': return LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE;
299 if(warn)fprintf( stderr, " COMPOSE: RING_ABOVE+%04x not defined\n",(int)main);
303 case 'e': /* e ligatures: ae, oe. */
306 case 'a': return LATIN_SMALL_LETTER_AE;
307 case 'A': return LATIN_CAPITAL_LETTER_AE;
308 case 'o': return LATIN_SMALL_LIGATURE_OE;
309 case 'O': return LATIN_CAPITAL_LIGATURE_OE;
310 case '0': return LATIN_CAPITAL_LIGATURE_OE;
312 if(warn)fprintf( stderr, " COMPOSE: %04x+e/E not defined\n",(int)main);
316 case 'g': /* greek */
318 /* missing 0x37A-0x390 */
319 /* weird cases: Q -> theta (it resembles a little, doesn't it?)
320 V -> psi (what can I do?) */
321 case 'A': return GREEK_CAPITAL_LETTER_ALPHA;
322 case 'B': return GREEK_CAPITAL_LETTER_BETA;
323 case 'G': return GREEK_CAPITAL_LETTER_GAMMA;
324 case 'D': return GREEK_CAPITAL_LETTER_DELTA;
325 case 'E': return GREEK_CAPITAL_LETTER_EPSILON;
326 case 'Z': return GREEK_CAPITAL_LETTER_ZETA;
327 case 'H': return GREEK_CAPITAL_LETTER_ETA;
328 case 'Q': return GREEK_CAPITAL_LETTER_THETA;
329 case 'I': return GREEK_CAPITAL_LETTER_IOTA;
330 case 'K': return GREEK_CAPITAL_LETTER_KAPPA;
331 case 'L': return GREEK_CAPITAL_LETTER_LAMDA;
332 case 'M': return GREEK_CAPITAL_LETTER_MU;
333 case 'N': return GREEK_CAPITAL_LETTER_NU;
334 case 'X': return GREEK_CAPITAL_LETTER_XI;
335 case 'O': return GREEK_CAPITAL_LETTER_OMICRON;
336 case 'P': return GREEK_CAPITAL_LETTER_PI;
337 case 'R': return GREEK_CAPITAL_LETTER_RHO;
338 case 'S': return GREEK_CAPITAL_LETTER_SIGMA;
339 case 'T': return GREEK_CAPITAL_LETTER_TAU;
340 case 'Y': return GREEK_CAPITAL_LETTER_UPSILON;
341 case 'F': return GREEK_CAPITAL_LETTER_PHI;
342 case 'C': return GREEK_CAPITAL_LETTER_CHI;
343 case 'V': return GREEK_CAPITAL_LETTER_PSI;
344 case 'W': return GREEK_CAPITAL_LETTER_OMEGA;
346 case '': return GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA;
347 case '': return GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA;
348 case '': return GREEK_SMALL_LETTER_ALPHA_WITH_TONOS;
349 case '': return GREEK_SMALL_LETTER_EPSILON_WITH_TONOS;
350 case '': return GREEK_SMALL_LETTER_ETA_WITH_TONOS;
351 case '': return GREEK_SMALL_LETTER_IOTA_WITH_TONOS;
352 case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS;
354 case 'a': return GREEK_SMALL_LETTER_ALPHA;
355 case 'b': return GREEK_SMALL_LETTER_BETA;
356 case 'g': return GREEK_SMALL_LETTER_GAMMA;
357 case 'd': return GREEK_SMALL_LETTER_DELTA;
358 case 'e': return GREEK_SMALL_LETTER_EPSILON;
359 case 'z': return GREEK_SMALL_LETTER_ZETA;
360 case 'h': return GREEK_SMALL_LETTER_ETA;
361 case 'q': return GREEK_SMALL_LETTER_THETA;
362 case 'i': return GREEK_SMALL_LETTER_IOTA;
363 case 'k': return GREEK_SMALL_LETTER_KAPPA;
364 case 'l': return GREEK_SMALL_LETTER_LAMDA;
365 case 'm': return GREEK_SMALL_LETTER_MU;
366 case 'n': return GREEK_SMALL_LETTER_NU;
367 case 'x': return GREEK_SMALL_LETTER_XI;
368 case 'o': return GREEK_SMALL_LETTER_OMICRON;
369 case 'p': return GREEK_SMALL_LETTER_PI;
370 case 'r': return GREEK_SMALL_LETTER_RHO;
371 case '&': return GREEK_SMALL_LETTER_FINAL_SIGMA;
372 case 's': return GREEK_SMALL_LETTER_SIGMA;
373 case 't': return GREEK_SMALL_LETTER_TAU;
374 case 'y': return GREEK_SMALL_LETTER_UPSILON;
375 case 'f': return GREEK_SMALL_LETTER_PHI;
376 case 'c': return GREEK_SMALL_LETTER_CHI;
377 case 'v': return GREEK_SMALL_LETTER_PSI;
378 case 'w': return GREEK_SMALL_LETTER_OMEGA;
380 case '': return GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA;
381 case '': return GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA;
382 case '': return GREEK_SMALL_LETTER_OMICRON_WITH_TONOS;
383 case '': return GREEK_SMALL_LETTER_UPSILON_WITH_TONOS;
384 case '': return GREEK_SMALL_LETTER_OMEGA_WITH_TONOS;
385 case '': return GREEK_BETA_SYMBOL;
386 case '': return GREEK_THETA_SYMBOL;
387 case '': return GREEK_UPSILON_WITH_HOOK_SYMBOL;
388 case '': return GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL;
389 case '': return GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL;
390 case '': return GREEK_PHI_SYMBOL;
391 case '': return GREEK_PI_SYMBOL;
394 if(warn)fprintf( stderr, " COMPOSE: GREEK %04x not defined\n",(int)main);
399 fprintf( stderr, " COMPOSE: modifier %04x not defined\n",(int)modifier);
401 return (wchar_t)main;
404 #define UNDEFINED "~"
406 /* Arguments: character in Unicode format, type of format to convert to.
407 Returns: a string containing the Unicode character converted to the chosen
408 format. This string is statically allocated and should not be freed.
409 ToDo: better using tables?
411 const char *decode(wchar_t c, FORMAT type) {
412 /* static char d; --- js: big bug (missing \0) if &d returned */
413 /*FIXME jb static*/ static char bbuf[8*32]; /* space for 8 buffers, rotating */
414 /*FIXME jb static*/ static char *buf=bbuf; /* used for UTF8 sequences and undefined codes */
415 buf+=32; if(buf>=bbuf+8*32) buf=bbuf;
416 buf[0]=buf[1]=buf[2]=0;
419 if ( c <= 0xFF ) { /* UNICODE == ISO8859-1 */
423 switch (c) { /* not found in list, but perhaps we can describe it */
424 /* todo: add greek. GREEK_SMALL_LETTER_ALPHA = alpha */
426 /* general puctuation */
428 return (const char *)"-";
431 return (const char *)"--";
433 return (const char *)"---";
434 case LEFT_SINGLE_QUOTATION_MARK:
435 return (const char *)"`";
436 case RIGHT_SINGLE_QUOTATION_MARK:
437 return (const char *)"'";
438 case SINGLE_LOW_9_QUOTATION_MARK:
439 return (const char *)",";
440 case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
441 return (const char *)UNDEFINED;
442 case LEFT_DOUBLE_QUOTATION_MARK:
443 return (const char *)"``";
444 case RIGHT_DOUBLE_QUOTATION_MARK:
445 return (const char *)"''";
446 case DOUBLE_LOW_9_QUOTATION_MARK:
447 return (const char *)",,";
448 case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
449 return (const char *)UNDEFINED;
451 return (const char *)"+";
453 return (const char *)"*";
455 return (const char *)"*";
456 case TRIANGULAR_BULLET:
457 return (const char *)"*";
458 case HYPHENATION_POINT:
459 return (const char *)"-";
460 case HORIZONTAL_ELLIPSIS:
461 return (const char *)"...";
463 return (const char *)"%%"; /* awk! */
464 case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
465 return (const char *)"<";
466 case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
467 return (const char *)">";
468 case EURO_CURRENCY_SIGN:
469 return (const char *)"EUR"; /* change it! */
472 case LATIN_SMALL_LIGATURE_FF:
473 return (const char *)"ff";
474 case LATIN_SMALL_LIGATURE_FI:
475 return (const char *)"fi";
476 case LATIN_SMALL_LIGATURE_FL:
477 return (const char *)"fl";
478 case LATIN_SMALL_LIGATURE_FFI:
479 return (const char *)"ffi";
480 case LATIN_SMALL_LIGATURE_FFL:
481 return (const char *)"ffl";
482 case LATIN_SMALL_LIGATURE_LONG_S_T:
483 case LATIN_SMALL_LIGATURE_ST:
484 return (const char *)"st";
488 return (const char *)"_";
490 return (const char *)"_"; /* Due to Mobile OCR */
493 /* snprintf seems to be no standard, so I use insecure sprintf */
494 sprintf(buf,"\\code(%04x)",(unsigned)c);
495 return buf; /* UNDEFINED; */
499 if ( c >= SPACE && c <= TILDE ) { /* ASCII */
502 return (const char *)"\\$";
504 return (const char *)"\\&";
506 return (const char *)"\\%";
508 return (const char *)"\\#";
510 return (const char *)"\\_";
512 return (const char *)"\\{";
514 return (const char *)"\\}";
516 return (const char *)"$\\backslash$";
518 return (const char *)"\\~{}";
520 return (const char *)"\\^{}";
523 return (const char *)buf;
529 return (const char *)"~";
530 case INVERTED_EXCLAMATION_MARK:
531 return (const char *)"!'";
533 return (const char *)"\\textcent"; /* \usepackage{textcomp} */
535 return (const char *)"\\pounds";
536 case EURO_CURRENCY_SIGN:
537 return (const char *)"\\euro"; /* \usepackage{eurosans} */
539 return (const char *)"\\textcurrency"; /* \usepackage{textcomp} */
541 return (const char *)"\\textyen"; /* \usepackage{textcomp} */
543 return (const char *)"\\textbrokenbar"; /* \usepackage{textcomp} */
545 return (const char *)"\\S";
547 return (const char *)"\"";
549 return (const char *)"\\copyright";
550 case FEMININE_ORDINAL_INDICATOR:
551 return (const char *)"$^{\\underbar{a}}$";
552 case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
553 return (const char *)"\\flqq{}";
555 return (const char *)"$\\lnot$";
557 return (const char *)"\\-";
558 case REGISTERED_SIGN:
559 return (const char *)"\\textregistered";/* \usepackage{textcomp} */
561 return (const char *)"\\textasciimacron";/* \usepackage{textcomp} */
563 return (const char *)"$^{o}$";
564 case PLUS_MINUS_SIGN:
565 return (const char *)"$\\pm$";
566 case SUPERSCRIPT_TWO:
567 return (const char *)"$^{2}$";
568 case SUPERSCRIPT_THREE:
569 return (const char *)"$^{3}$";
571 return (const char *)"\\( \\prime \\)";
573 return (const char *)"$\\mu$";
575 return (const char *)"\\P";
577 return (const char *)"$\\cdot$";
579 return (const char *)"\\,";
580 case SUPERSCRIPT_ONE:
581 return (const char *)"$^{1}$";
582 case MASCULINE_ORDINAL_INDICATOR:
583 return (const char *)"$^{\\underbar{o}}$";
584 case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
585 return (const char *)"\\frqq{}";
586 case VULGAR_FRACTION_ONE_QUARTER: /* these fractions are not good*/
587 return (const char *)"\\( 1\\over 4 \\)";
588 case VULGAR_FRACTION_ONE_HALF:
589 return (const char *)"\\( 1\\over 2 \\)";
590 case VULGAR_FRACTION_THREE_QUARTERS:
591 return (const char *)"\\( 3\\over 4 \\)";
592 case INVERTED_QUESTION_MARK:
593 return (const char *)"?'";
594 case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
595 return (const char *)"\\`A";
596 case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
597 return (const char *)"\\'A";
598 case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
599 return (const char *)"\\^A";
600 case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
601 return (const char *)"\\~A";
602 case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
603 return (const char *)"\\\"A";
604 case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
605 return (const char *)"\\AA";
606 case LATIN_CAPITAL_LETTER_AE:
607 return (const char *)"\\AE";
608 case LATIN_CAPITAL_LETTER_C_WITH_CARON:
609 return (const char *)"\\v{C}";
610 case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
611 return (const char *)"\\C";
612 case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
613 return (const char *)"\\`E";
614 case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
615 return (const char *)"\\'E";
616 case LATIN_CAPITAL_LETTER_E_WITH_CARON:
617 return (const char *)"\\v{E}";
618 case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
619 return (const char *)"\\^E";
620 case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
621 return (const char *)"\\\"E";
622 case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
623 return (const char *)"\\`I";
624 case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
625 return (const char *)"\\'I";
626 case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
627 return (const char *)"\\^I";
628 case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
629 return (const char *)"\\\"I";
630 case LATIN_CAPITAL_LETTER_ETH:
631 return (const char *)UNDEFINED;
632 case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
633 return (const char *)"\\~N";
634 case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
635 return (const char *)"\\`O";
636 case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
637 return (const char *)"\\'O";
638 case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
639 return (const char *)"\\^O";
640 case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
641 return (const char *)"\\~O";
642 case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
643 return (const char *)"\\\"O";
644 case MULTIPLICATION_SIGN:
645 return (const char *)"$\\times$";
646 case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
647 return (const char *)"\\O";
648 case LATIN_CAPITAL_LETTER_S_WITH_CARON:
649 return (const char *)"\\v{S}";
650 case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
651 return (const char *)"\\`U";
652 case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
653 return (const char *)"\\'U";
654 case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
655 return (const char *)"\\^U";
656 case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
657 return (const char *)"\\\"U";
658 case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
659 return (const char *)"\\'Y";
660 case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
661 return (const char *)"\\v{Z}";
662 case LATIN_CAPITAL_LETTER_THORN:
663 return (const char *)UNDEFINED;
664 case LATIN_SMALL_LETTER_SHARP_S:
665 return (const char *)"\\ss";
666 case LATIN_SMALL_LETTER_A_WITH_GRAVE:
667 return (const char *)"\\`a";
668 case LATIN_SMALL_LETTER_A_WITH_ACUTE:
669 return (const char *)"\\'a";
670 case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
671 return (const char *)"\\^a";
672 case LATIN_SMALL_LETTER_A_WITH_TILDE:
673 return (const char *)"\\~a";
674 case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
675 return (const char *)"\\\"a";
676 case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
677 return (const char *)"\\aa";
678 case LATIN_SMALL_LETTER_AE:
679 return (const char *)"\\ae";
680 case LATIN_SMALL_LETTER_C_WITH_CARON:
681 return (const char *)"\\v{c}";
682 case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
683 return (const char *)"\\c";
684 case LATIN_SMALL_LETTER_E_WITH_GRAVE:
685 return (const char *)"\\`e";
686 case LATIN_SMALL_LETTER_E_WITH_ACUTE:
687 return (const char *)"\\'e";
688 case LATIN_SMALL_LETTER_E_WITH_CARON:
689 return (const char *)"\\v{e}";
690 case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
691 return (const char *)"\\^e";
692 case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
693 return (const char *)"\\\"e";
694 case LATIN_SMALL_LETTER_I_WITH_GRAVE:
695 return (const char *)"\\`i";
696 case LATIN_SMALL_LETTER_I_WITH_ACUTE:
697 return (const char *)"\\'i";
698 case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
699 return (const char *)"\\^i";
700 case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
701 return (const char *)"\\\"i";
702 case LATIN_SMALL_LETTER_ETH:
703 return (const char *)UNDEFINED;
704 case LATIN_SMALL_LETTER_N_WITH_TILDE:
705 return (const char *)"\\~n";
706 case LATIN_SMALL_LETTER_O_WITH_GRAVE:
707 return (const char *)"\\`o";
708 case LATIN_SMALL_LETTER_O_WITH_ACUTE:
709 return (const char *)"\\'o";
710 case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
711 return (const char *)"\\^o";
712 case LATIN_SMALL_LETTER_O_WITH_TILDE:
713 return (const char *)"\\~o";
714 case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
715 return (const char *)"\\\"o";
717 return (const char *)"$\\div$";
718 case LATIN_SMALL_LETTER_O_WITH_STROKE:
719 return (const char *)"\\o";
720 case LATIN_SMALL_LETTER_S_WITH_CARON:
721 return (const char *)"\\v{s}";
722 case LATIN_SMALL_LETTER_U_WITH_GRAVE:
723 return (const char *)"\\`u";
724 case LATIN_SMALL_LETTER_U_WITH_ACUTE:
725 return (const char *)"\\'u";
726 case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
727 return (const char *)"\\^u";
728 case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
729 return (const char *)"\\\"u";
730 case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
731 return (const char *)"\\'y";
732 case LATIN_SMALL_LETTER_THORN:
733 return (const char *)UNDEFINED;
734 case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
735 return (const char *)"\\\"y";
736 case LATIN_SMALL_LETTER_Z_WITH_CARON:
737 return (const char *)"\\v{z}";
740 /* some (punctuation, accents, accented capital) greek letters missing*/
741 case GREEK_CAPITAL_LETTER_ALPHA:
742 return (const char *)"A";
743 case GREEK_CAPITAL_LETTER_BETA:
744 return (const char *)"B";
745 case GREEK_CAPITAL_LETTER_GAMMA:
746 return (const char *)"\\( \\Gamma \\)";
747 case GREEK_CAPITAL_LETTER_DELTA:
748 return (const char *)"\\( \\Delta \\)";
749 case GREEK_CAPITAL_LETTER_EPSILON:
750 return (const char *)"E";
751 case GREEK_CAPITAL_LETTER_ZETA:
752 return (const char *)"Z";
753 case GREEK_CAPITAL_LETTER_ETA:
754 return (const char *)"H";
755 case GREEK_CAPITAL_LETTER_THETA:
756 return (const char *)"\\( \\Theta \\)";
757 case GREEK_CAPITAL_LETTER_IOTA:
758 return (const char *)"I";
759 case GREEK_CAPITAL_LETTER_KAPPA:
760 return (const char *)"K";
761 case GREEK_CAPITAL_LETTER_LAMDA:
762 return (const char *)"\\( \\Lambda \\)";
763 case GREEK_CAPITAL_LETTER_MU:
764 return (const char *)"M";
765 case GREEK_CAPITAL_LETTER_NU:
766 return (const char *)"N";
767 case GREEK_CAPITAL_LETTER_XI:
768 return (const char *)"\\( \\Xi \\)";
769 case GREEK_CAPITAL_LETTER_OMICRON:
770 return (const char *)"O";
771 case GREEK_CAPITAL_LETTER_PI:
772 return (const char *)"\\( \\Pi \\)";
773 case GREEK_CAPITAL_LETTER_RHO:
774 return (const char *)"P";
775 case GREEK_CAPITAL_LETTER_SIGMA:
776 return (const char *)"\\( \\Sigma \\)";
777 case GREEK_CAPITAL_LETTER_TAU:
778 return (const char *)"T";
779 case GREEK_CAPITAL_LETTER_UPSILON:
780 return (const char *)"\\( \\Upsilon \\)";
781 case GREEK_CAPITAL_LETTER_PHI:
782 return (const char *)"\\( \\Phi \\)";
783 case GREEK_CAPITAL_LETTER_CHI:
784 return (const char *)"\\( \\Chi \\)";
785 case GREEK_CAPITAL_LETTER_PSI:
786 return (const char *)"\\( \\Psi \\)";
787 case GREEK_CAPITAL_LETTER_OMEGA:
788 return (const char *)"\\( \\Omega \\)";
789 case GREEK_CAPITAL_LETTER_IOTA_WITH_DIALYTIKA:
790 return (const char *)UNDEFINED;
791 case GREEK_CAPITAL_LETTER_UPSILON_WITH_DIALYTIKA:
792 return (const char *)UNDEFINED;
793 case GREEK_SMALL_LETTER_ALPHA_WITH_TONOS:
794 return (const char *)UNDEFINED;
795 case GREEK_SMALL_LETTER_EPSILON_WITH_TONOS:
796 return (const char *)UNDEFINED;
797 case GREEK_SMALL_LETTER_ETA_WITH_TONOS:
798 return (const char *)UNDEFINED;
799 case GREEK_SMALL_LETTER_IOTA_WITH_TONOS:
800 return (const char *)UNDEFINED;
801 case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS:
802 return (const char *)UNDEFINED;
803 case GREEK_SMALL_LETTER_ALPHA:
804 return (const char *)"\\( \\alpha \\)";
805 case GREEK_SMALL_LETTER_BETA:
806 return (const char *)"\\( \\beta \\)";
807 case GREEK_SMALL_LETTER_GAMMA:
808 return (const char *)"\\( \\gamma \\)";
809 case GREEK_SMALL_LETTER_DELTA:
810 return (const char *)"\\( \\delta \\)";
811 case GREEK_SMALL_LETTER_EPSILON:
812 return (const char *)"\\( \\epsilon \\)";
813 case GREEK_SMALL_LETTER_ZETA:
814 return (const char *)"\\( \\zeta \\)";
815 case GREEK_SMALL_LETTER_ETA:
816 return (const char *)"\\( \\eta \\)";
817 case GREEK_SMALL_LETTER_THETA:
818 return (const char *)"\\( \\theta \\)";
819 case GREEK_SMALL_LETTER_IOTA:
820 return (const char *)"\\( \\iota \\)";
821 case GREEK_SMALL_LETTER_KAPPA:
822 return (const char *)"\\( \\kappa \\)";
823 case GREEK_SMALL_LETTER_LAMDA:
824 return (const char *)"\\( \\lambda \\)";
825 case GREEK_SMALL_LETTER_MU:
826 return (const char *)"\\( \\mu \\)";
827 case GREEK_SMALL_LETTER_NU:
828 return (const char *)"\\( \\nu \\)";
829 case GREEK_SMALL_LETTER_XI:
830 return (const char *)"\\( \\xi \\)";
831 case GREEK_SMALL_LETTER_OMICRON:
832 return (const char *)"\\( \\omicron \\)";
833 case GREEK_SMALL_LETTER_PI:
834 return (const char *)"\\( \\pi \\)";
835 case GREEK_SMALL_LETTER_RHO:
836 return (const char *)"\\( \\rho \\)";
837 case GREEK_SMALL_LETTER_FINAL_SIGMA:
838 return (const char *)"\\( \\varsigma \\)";
839 case GREEK_SMALL_LETTER_SIGMA:
840 return (const char *)"\\( \\sigma \\)";
841 case GREEK_SMALL_LETTER_TAU:
842 return (const char *)"\\( \\tau \\)";
843 case GREEK_SMALL_LETTER_UPSILON:
844 return (const char *)"\\( \\upsilon \\)";
845 case GREEK_SMALL_LETTER_PHI:
846 return (const char *)"\\( \\varphi \\)";
847 case GREEK_SMALL_LETTER_CHI:
848 return (const char *)"\\( \\chi \\)";
849 case GREEK_SMALL_LETTER_PSI:
850 return (const char *)"\\( \\psi \\)";
851 case GREEK_SMALL_LETTER_OMEGA:
852 return (const char *)"\\( \\omega \\)";
853 case GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA:
854 return (const char *)UNDEFINED;
855 case GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA:
856 return (const char *)UNDEFINED;
857 case GREEK_SMALL_LETTER_OMICRON_WITH_TONOS:
858 return (const char *)UNDEFINED;
859 case GREEK_SMALL_LETTER_UPSILON_WITH_TONOS:
860 return (const char *)UNDEFINED;
861 case GREEK_SMALL_LETTER_OMEGA_WITH_TONOS:
862 return (const char *)UNDEFINED;
863 case GREEK_BETA_SYMBOL:
864 return (const char *)UNDEFINED;
865 case GREEK_THETA_SYMBOL:
866 return (const char *)"\\( \\vartheta \\)";
867 case GREEK_UPSILON_WITH_HOOK_SYMBOL:
868 return (const char *)UNDEFINED;
869 case GREEK_UPSILON_WITH_ACUTE_AND_HOOK_SYMBOL:
870 return (const char *)UNDEFINED;
871 case GREEK_UPSILON_WITH_DIAERESIS_AND_HOOK_SYMBOL:
872 return (const char *)UNDEFINED;
873 case GREEK_PHI_SYMBOL:
874 return (const char *)"\\( \\phi \\)";
875 case GREEK_PI_SYMBOL:
876 return (const char *)"\\( \\varpi \\)";
877 /* and some greek letters missing*/
879 /* punctuation (partial) */
881 return (const char *)"-";
882 case NON_BREAKING_HYPHEN:
883 return (const char *)UNDEFINED;
886 return (const char *)"--";
888 return (const char *)"---";
890 return (const char *)UNDEFINED;
891 case LEFT_SINGLE_QUOTATION_MARK:
892 return (const char *)"`";
893 case RIGHT_SINGLE_QUOTATION_MARK:
894 return (const char *)"'";
895 case SINGLE_LOW_9_QUOTATION_MARK:
896 return (const char *)"\\glq{}";
897 case SINGLE_HIGH_REVERSED_9_QUOTATION_MARK:
898 return (const char *)UNDEFINED;
899 case LEFT_DOUBLE_QUOTATION_MARK:
900 return (const char *)"``";
901 case RIGHT_DOUBLE_QUOTATION_MARK:
902 return (const char *)"''";
903 case DOUBLE_LOW_9_QUOTATION_MARK:
904 return (const char *)"\\glqq{}";
905 case DOUBLE_HIGH_REVERSED_9_QUOTATION_MARK:
906 return (const char *)UNDEFINED;
908 return (const char *)"\\dag";
910 return (const char *)"\\ddag";
912 return (const char *)"$\\bullet$";
913 case TRIANGULAR_BULLET:
914 return (const char *)"$\\blacktriangleright";
915 case HYPHENATION_POINT:
916 return (const char *)"\\-";
917 case HORIZONTAL_ELLIPSIS:
918 return (const char *)"\\ldots";
920 return (const char *)UNDEFINED;
921 case SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK:
922 return (const char *)"\\flq{}";
923 case SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK:
924 return (const char *)"\\frq{}";
926 case LATIN_SMALL_LIGATURE_FF:
927 return (const char *)"ff";
928 case LATIN_SMALL_LIGATURE_FI:
929 return (const char *)"fi";
930 case LATIN_SMALL_LIGATURE_FL:
931 return (const char *)"fl";
932 case LATIN_SMALL_LIGATURE_FFI:
933 return (const char *)"ffi";
934 case LATIN_SMALL_LIGATURE_FFL:
935 return (const char *)"ffl";
936 case LATIN_SMALL_LIGATURE_LONG_S_T:
937 case LATIN_SMALL_LIGATURE_ST:
938 return (const char *)"st";
941 return (const char *)"";
943 return (const char *)"\\_";
945 return (const char *)"(PICTURE)";
947 /* snprintf seems to be no standard, so I use insecure sprintf */
948 sprintf(buf,"\\symbol{%u}",(unsigned)c);
949 return buf; /* UNDEFINED; */
952 if ( c >= SPACE && c <= TILDE ) { /* ASCII */
955 return (const char *)"&";
956 /* semicolon must not be coded */
958 return (const char *)"'";
960 return (const char *)""";
962 return (const char *)"<";
964 return (const char *)">";
971 return (const char *)"<!--PICTURE-->";
973 return (const char *)"_"; /* better use colored symbol? */
975 return (const char *)"<br />"; /* \n handled somwhere else? */
977 case CARRIAGE_RETURN:
978 return (const char *)"<br />";
980 return (const char *)"<nobr />";
981 case INVERTED_EXCLAMATION_MARK:
982 return (const char *)"¡";
984 return (const char *)"¢";
986 return (const char *)"£";
988 return (const char *)"¤";
990 return (const char *)"¥";
992 return (const char *)"¦";
994 return (const char *)"§";
996 return (const char *)"¨";
998 return (const char *)"©";
999 case FEMININE_ORDINAL_INDICATOR:
1000 return (const char *)"ªem;";
1001 case LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
1002 return (const char *)"«";
1004 return (const char *)"¬";
1006 return (const char *)"­";
1007 case REGISTERED_SIGN:
1008 return (const char *)"®";
1010 return (const char *)"¯";
1012 return (const char *)"°";
1013 case PLUS_MINUS_SIGN:
1014 return (const char *)"±";
1015 case SUPERSCRIPT_TWO:
1016 return (const char *)"²";
1017 case SUPERSCRIPT_THREE:
1018 return (const char *)"³";
1020 return (const char *)"´";
1022 return (const char *)"µ";
1024 return (const char *)"¶";
1026 return (const char *)"·";
1028 return (const char *)"¸";
1029 case SUPERSCRIPT_ONE:
1030 return (const char *)"¹";
1031 case MASCULINE_ORDINAL_INDICATOR:
1032 return (const char *)"º";
1033 case RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK:
1034 return (const char *)"»";
1035 case VULGAR_FRACTION_ONE_QUARTER:
1036 return (const char *)"¼";
1037 case VULGAR_FRACTION_ONE_HALF:
1038 return (const char *)"½";
1039 case VULGAR_FRACTION_THREE_QUARTERS:
1040 return (const char *)"¾";
1041 case INVERTED_QUESTION_MARK:
1042 return (const char *)"¿";
1043 case LATIN_CAPITAL_LETTER_A_WITH_GRAVE:
1044 return (const char *)"À";
1045 case LATIN_CAPITAL_LETTER_A_WITH_ACUTE:
1046 return (const char *)"Á";
1047 case LATIN_CAPITAL_LETTER_A_WITH_BREVE:
1048 return (const char *)"Ă";
1049 case LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX:
1050 return (const char *)"Â";
1051 case LATIN_CAPITAL_LETTER_A_WITH_TILDE:
1052 return (const char *)"Ã";
1053 case LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS:
1054 return (const char *)"Ä";
1055 case LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE:
1056 return (const char *)"Å";
1057 case LATIN_CAPITAL_LETTER_AE:
1058 return (const char *)"Æ";
1059 case LATIN_CAPITAL_LETTER_C_WITH_CARON:
1060 return (const char *)"Č";
1061 case LATIN_CAPITAL_LETTER_C_WITH_CEDILLA:
1062 return (const char *)"Ç";
1063 case LATIN_CAPITAL_LETTER_E_WITH_GRAVE:
1064 return (const char *)"È";
1065 case LATIN_CAPITAL_LETTER_E_WITH_ACUTE:
1066 return (const char *)"É";
1067 case LATIN_CAPITAL_LETTER_E_WITH_CARON:
1068 return (const char *)"Ě";
1069 case LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX:
1070 return (const char *)"Ê";
1071 case LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS:
1072 return (const char *)"Ë";
1073 case LATIN_CAPITAL_LETTER_I_WITH_GRAVE:
1074 return (const char *)"Ì";
1075 case LATIN_CAPITAL_LETTER_I_WITH_ACUTE:
1076 return (const char *)"Í";
1077 case LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX:
1078 return (const char *)"Î";
1079 case LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS:
1080 return (const char *)"Ï";
1081 case LATIN_CAPITAL_LETTER_ETH:
1082 return (const char *)"Ð";
1083 case LATIN_CAPITAL_LETTER_N_WITH_TILDE:
1084 return (const char *)"Ñ";
1085 case LATIN_CAPITAL_LETTER_O_WITH_GRAVE:
1086 return (const char *)"Ò";
1087 case LATIN_CAPITAL_LETTER_O_WITH_ACUTE:
1088 return (const char *)"Ó";
1089 case LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX:
1090 return (const char *)"Ô";
1091 case LATIN_CAPITAL_LETTER_O_WITH_TILDE:
1092 return (const char *)"Õ";
1093 case LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS:
1094 return (const char *)"Ö";
1095 case MULTIPLICATION_SIGN:
1096 return (const char *)"×";
1097 case LATIN_CAPITAL_LETTER_O_WITH_STROKE:
1098 return (const char *)"Ø";
1099 case LATIN_CAPITAL_LETTER_S_WITH_CARON:
1100 return (const char *)"Š";
1101 case LATIN_CAPITAL_LETTER_U_WITH_GRAVE:
1102 return (const char *)"Ù";
1103 case LATIN_CAPITAL_LETTER_U_WITH_ACUTE:
1104 return (const char *)"Ú";
1105 case LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX:
1106 return (const char *)"Û";
1107 case LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS:
1108 return (const char *)"Ü";
1109 case LATIN_CAPITAL_LETTER_Y_WITH_ACUTE:
1110 return (const char *)"Ý";
1111 case LATIN_CAPITAL_LETTER_Z_WITH_CARON:
1112 return (const char *)"Ž";
1113 case LATIN_CAPITAL_LETTER_THORN:
1114 return (const char *)"Þ";
1115 case LATIN_SMALL_LETTER_SHARP_S:
1116 return (const char *)"ß";
1117 case LATIN_SMALL_LETTER_A_WITH_GRAVE:
1118 return (const char *)"à";
1119 case LATIN_SMALL_LETTER_A_WITH_ACUTE:
1120 return (const char *)"á";
1121 case LATIN_SMALL_LETTER_A_WITH_BREVE:
1122 return (const char *)"ă";
1123 case LATIN_SMALL_LETTER_A_WITH_CARON:
1124 return (const char *)"&acaron;";
1125 case LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX:
1126 return (const char *)"â";
1127 case LATIN_SMALL_LETTER_A_WITH_TILDE:
1128 return (const char *)"ã";
1129 case LATIN_SMALL_LETTER_A_WITH_DIAERESIS:
1130 return (const char *)"ä";
1131 case LATIN_SMALL_LETTER_A_WITH_RING_ABOVE:
1132 return (const char *)"å";
1133 case LATIN_SMALL_LETTER_AE:
1134 return (const char *)"æ";
1135 case LATIN_SMALL_LETTER_C_WITH_CARON:
1136 return (const char *)"č";
1137 case LATIN_SMALL_LETTER_C_WITH_CEDILLA:
1138 return (const char *)"ç";
1139 case LATIN_SMALL_LETTER_E_WITH_GRAVE:
1140 return (const char *)"è";
1141 case LATIN_SMALL_LETTER_E_WITH_ACUTE:
1142 return (const char *)"é";
1143 case LATIN_SMALL_LETTER_E_WITH_CARON:
1144 return (const char *)"ě";
1145 case LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX:
1146 return (const char *)"ê";
1147 case LATIN_SMALL_LETTER_E_WITH_DIAERESIS:
1148 return (const char *)"ë";
1149 case LATIN_SMALL_LETTER_I_WITH_GRAVE:
1150 return (const char *)"ì";
1151 case LATIN_SMALL_LETTER_I_WITH_ACUTE:
1152 return (const char *)"í";
1153 case LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX:
1154 return (const char *)"î";
1155 case LATIN_SMALL_LETTER_I_WITH_DIAERESIS:
1156 return (const char *)"ï";
1157 case LATIN_SMALL_LETTER_ETH:
1158 return (const char *)"ð";
1159 case LATIN_SMALL_LETTER_N_WITH_TILDE:
1160 return (const char *)"ñ";
1161 case LATIN_SMALL_LETTER_O_WITH_GRAVE:
1162 return (const char *)"ò";
1163 case LATIN_SMALL_LETTER_O_WITH_ACUTE:
1164 return (const char *)"ó";
1165 case LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX:
1166 return (const char *)"ô";
1167 case LATIN_SMALL_LETTER_O_WITH_TILDE:
1168 return (const char *)"õ";
1169 case LATIN_SMALL_LETTER_O_WITH_DIAERESIS:
1170 return (const char *)"ö";
1172 return (const char *)"÷";
1173 case LATIN_SMALL_LETTER_O_WITH_STROKE:
1174 return (const char *)"ø";
1175 case LATIN_SMALL_LETTER_S_WITH_CARON:
1176 return (const char *)"š";
1177 case LATIN_SMALL_LETTER_U_WITH_GRAVE:
1178 return (const char *)"ù";
1179 case LATIN_SMALL_LETTER_U_WITH_ACUTE:
1180 return (const char *)"ú";
1181 case LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX:
1182 return (const char *)"û";
1183 case LATIN_SMALL_LETTER_U_WITH_DIAERESIS:
1184 return (const char *)"ü";
1185 case LATIN_SMALL_LETTER_Y_WITH_ACUTE:
1186 return (const char *)"ý";
1187 case LATIN_SMALL_LETTER_THORN:
1188 return (const char *)"þ";
1189 case LATIN_SMALL_LETTER_Y_WITH_DIAERESIS:
1190 return (const char *)"ÿ";
1191 case LATIN_SMALL_LETTER_Z_WITH_CARON:
1192 return (const char *)"ž";
1193 case EURO_CURRENCY_SIGN:
1194 return (const char *)"€";
1196 return (const char *)"";
1198 sprintf(buf,"&#%u;",(unsigned)c);
1199 return buf; /* undefined */
1201 /* break; unreachable code */
1202 case XML: /* only 5 &xxx;-ENTITIES ar defined by default */
1203 if ( c >= SPACE && c <= TILDE ) { /* ASCII */
1206 return (const char *)"&";
1208 return (const char *)"'";
1210 return (const char *)""";
1212 return (const char *)"<";
1214 return (const char *)">";
1219 switch (c) { /* subject of change! */
1221 return (const char *)"(PICTURE)";
1223 return (const char *)"_"; /* better use colored symbol? */
1224 case LINE_FEED: /* \n handled somwhere else? */
1226 case CARRIAGE_RETURN:
1227 return (const char *)"<br />";
1228 case NO_BREAK_SPACE:
1229 return (const char *)"<nobr />";
1231 return (const char *)"";
1233 sprintf(buf,"&#x%03x;",(unsigned)c);
1234 return buf; /* undefined */
1236 /* break; unreachable code */
1240 sprintf(buf,"&#%u;",(unsigned)c);
1241 return buf; /* UNDEFINED */
1243 /* break; unreachable code */
1244 case ASCII: /* mainly used for debugging */
1245 if ( c=='\n' || (c>= 0x20 && c <= 0x7F) ) {
1252 return (const char *)"(?)";
1254 return (const char *)"(?)";
1257 /* snprintf seems to be no standard, so I use insecure sprintf */
1258 if ((unsigned)c>255) sprintf(buf,"(0x%04x)",(unsigned)c);
1259 else sprintf(buf,"(0x%02x)",(unsigned)c);
1260 return buf; /* UNDEFINED; */
1262 /* break; unreachable code */
1263 default: /* use UTF8 as default, test with xterm -u8 */
1265 if ( c == UNKNOWN ) return (const char *)"_";
1266 if ( c == PICTURE ) return (const char *)"_"; /* Due to Mobile OCR */
1267 if ( c <= (wchar_t)0x0000007F ) { /* UTF8 == 7bit ASCII */
1271 if ( c <= (wchar_t)0x000007FF ) { /* UTF8 == 11bit */
1272 buf[0] = (char)(0xc0|((c>> 6) & 0x1f)); /* 110xxxxx */
1273 buf[1] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1274 buf[2] = (char)0; /* terminate string */
1277 /* wchar_t is 16bit for Borland-C !? Jan07 */
1278 if ( c <= (wchar_t)0x0000FFFF ) { /* UTF8 == 16bit */
1279 buf[0] = (char)(0xe0|((c>>12) & 0x0f)); /* 1110xxxx */
1280 buf[1] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1281 buf[2] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1282 buf[3] = (char)0; /* terminate string */
1285 if ( c <= (wchar_t)0x001FFFFF ) { /* UTF8 == 21bit */
1286 buf[0] = (char)(0xf0|((c>>18) & 0x07)); /* 11110xxx */
1287 buf[1] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1288 buf[2] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1289 buf[3] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1290 buf[4] = (char)0; /* terminate string */
1293 if ( c <= (wchar_t)0x03FFFFFF ) { /* UTF8 == 26bit */
1294 buf[0] = (char)(0xf8|((c>>24) & 0x03)); /* 111110xx */
1295 buf[1] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
1296 buf[2] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1297 buf[3] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1298 buf[4] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1299 buf[5] = (char)0; /* terminate string */
1302 if ( c <= (wchar_t)0x7FFFFFFF ) { /* UTF8 == 31bit */
1303 buf[0] = (char)(0xfc|((c>>30) & 0x01)); /* 1111110x */
1304 buf[1] = (char)(0x80|((c>>24) & 0x3f)); /* 10xxxxxx */
1305 buf[2] = (char)(0x80|((c>>18) & 0x3f)); /* 10xxxxxx */
1306 buf[3] = (char)(0x80|((c>>12) & 0x3f)); /* 10xxxxxx */
1307 buf[4] = (char)(0x80|((c>> 6) & 0x3f)); /* 10xxxxxx */
1308 buf[5] = (char)(0x80|( c & 0x3f)); /* 10xxxxxx */
1309 buf[6] = (char)0; /* terminate string */
1312 return (const char *)UNDEFINED;