3 Routines for compiling Flash2 AVM2 ABC Actionscript
5 Extension module for the rfxswf library.
6 Part of the swftools package.
8 Copyright (c) 2008 Matthias Kramm <kramm@quiss.org>
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
31 #include "tokenizer.h"
35 int as3_verbosity = 1;
36 unsigned int as3_tokencount = 0;
38 void as3_error(const char*format, ...)
45 va_start(arglist, format);
46 vsprintf(buf, format, arglist);
48 fprintf(stderr, "%s:%d:%d: error: %s\n", current_filename_short, current_line, current_column, buf);
52 void as3_warning(const char*format, ...)
59 va_start(arglist, format);
60 vsprintf(buf, format, arglist);
62 fprintf(stderr, "%s:%d:%d: warning: %s\n", current_filename_short, current_line, current_column, buf);
65 void as3_softwarning(const char*format, ...)
72 va_start(arglist, format);
73 vsprintf(buf, format, arglist);
75 fprintf(stderr, "%s:%d:%d: warning: %s\n", current_filename_short, current_line, current_column, buf);
78 static void dbg(const char*format, ...)
85 va_start(arglist, format);
86 vsprintf(buf, format, arglist);
89 while(l && buf[l-1]=='\n') {
93 printf("(tokenizer) ");
100 #ifndef YY_CURRENT_BUFFER
101 #define YY_CURRENT_BUFFER yy_current_buffer
104 void handleInclude(char*text, int len, char quotes)
108 char*p1 = strchr(text, '"');
109 char*p2 = strrchr(text, '"');
110 if(!p1 || !p2 || p1==p2) {
111 syntaxerror("Invalid include in line %d\n", current_line);
114 filename = strdup(p1+1);
118 while(!strchr(" \n\r\t", text[i1])) i1++;
120 while(strchr(" \n\r\t", text[i1])) i1++;
121 while(strchr(" \n\r\t", text[i2-1])) i2--;
122 if(i2!=len) text[i2]=0;
123 filename = strdup(&text[i1]);
126 char*fullfilename = enter_file(filename, YY_CURRENT_BUFFER);
127 yyin = fopen(fullfilename, "rb");
129 syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
132 yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
133 //BEGIN(INITIAL); keep context
136 static int do_unescape(const char*s, const char*end, char*n)
142 if(o) o[len] = *s;len++;
147 if(s==end) syntaxerror("invalid \\ at end of string");
149 /* handle the various line endings (mac, dos, unix) */
162 case '\\': if(o) o[len] = '\\';s++;len++; break;
163 case '"': if(o) o[len] = '"';s++;len++; break;
164 case '\'': if(o) o[len] = '\'';s++;len++; break;
165 case 'b': if(o) o[len] = '\b';s++;len++; break;
166 case 'f': if(o) o[len] = '\f';s++;len++; break;
167 case 'n': if(o) o[len] = '\n';s++;len++; break;
168 case 'r': if(o) o[len] = '\r';s++;len++; break;
169 case 't': if(o) o[len] = '\t';s++;len++; break;
170 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
173 while(strchr("01234567", *s) && nr<3 && s<end) {
180 syntaxerror("octal number out of range (0-255): %d", num);
181 if(o) o[len] = num;len++;
184 case 'x': case 'u': {
193 if(s==end) syntaxerror("invalid \\u or \\x at end of string");
196 if(s==end) syntaxerror("invalid \\u{ at end of string");
201 while(strchr("0123456789abcdefABCDEF", *s) && (bracket || nr < max) && s<end) {
203 if(*s>='0' && *s<='9') num |= *s - '0';
204 if(*s>='a' && *s<='f') num |= *s - 'a' + 10;
205 if(*s>='A' && *s<='F') num |= *s - 'A' + 10;
210 if(*s=='}' && s<end) {
213 syntaxerror("missing terminating '}'");
217 char*utf8 = getUTF8(num);
219 if(o) o[len] = *utf8;utf8++;len++;
223 syntaxerror("byte out of range (0-255): %d", num);
224 if(o) o[len] = num;len++;
229 syntaxerror("unknown escape sequence: \"\\%c\"", *s);
236 static string_t string_unescape(const char*in, int l)
239 const char*end = &in[l];
241 int len = do_unescape(s, end, 0);
242 char*n = (char*)malloc(len+1);
243 do_unescape(s, end, n);
244 string_t out = string_new(n, len);
248 static void handleCData(char*s, int len)
250 a3_lval.str.str = s+9; // <![CDATA[
251 a3_lval.str.len = len-9-3;// ]]>
254 static void handleString(char*s, int len)
257 // don't bother decoding strings in pass 1
258 memset(&a3_lval, 0, sizeof(a3_lval));
263 if(s[len-1]!='"') syntaxerror("String doesn't end with '\"'");
266 else if(s[0]=='\'') {
267 if(s[len-1]!='\'') syntaxerror("String doesn't end with '\"'");
270 else syntaxerror("String incorrectly terminated");
273 a3_lval.str = string_unescape(s, len);
277 char start_of_expression;
279 static inline int mkid(int type)
281 char*s = malloc(yyleng+1);
282 memcpy(s, yytext, yyleng);
288 static inline int m(int type)
290 a3_lval.token = type;
295 static char numberbuf[64];
298 if(yyleng>sizeof(numberbuf)-1)
299 syntaxerror("decimal number overflow");
301 memcpy(s, yytext, yyleng);
306 static inline int setint(int v)
308 a3_lval.number_int = v;
316 static inline int setuint(unsigned int v)
318 a3_lval.number_uint = v;
326 static inline int setfloat(double v)
328 a3_lval.number_float = v;
332 static inline int handlefloat()
335 a3_lval.number_float = atof(s);
339 static inline int handleint()
342 char l = (yytext[0]=='-');
344 char*max = l?"1073741824":"2147483647";
346 as3_warning("integer overflow: %s (converted to Number)", s);
347 return handlefloat();
351 for(t=0;t<yyleng-l;t++) {
352 if(yytext[l+t]>max[t]) {
353 as3_warning("integer overflow: %s (converted to Number)", s);
354 return handlefloat();
356 else if(yytext[l+t]<max[t])
366 for(t=0;t<yyleng;t++) {
374 static inline int handlehexfloat()
376 char l = (yytext[0]=='-')+2;
381 for(t=l;t<yyleng;t++) {
394 else if((c>='a' && c<='f') || (c>='A' && c<='F'))
395 d+=((c&0x0f)+9)*base;
399 static inline int handlehex()
401 char l = (yytext[0]=='-')+2;
406 syntaxerror("integer overflow %s", s);
416 else if((c>='a' && c<='f') || (c>='A' && c<='F'))
419 if(l && v>1073741824) {
421 as3_warning("signed integer overflow: %s (converted to Number)", s);
424 if(!l && v>2147483647) {
426 as3_warning("unsigned integer overflow: %s (converted to Number)", s);
431 return setint(-(int)v);
437 void handleLabel(char*text, int len)
440 for(t=len-1;t>=0;--t) {
445 char*s = malloc(t+1);
446 memcpy(s, yytext, t);
451 static int handleregexp()
453 char*s = malloc(yyleng);
455 memcpy(s, yytext+1, len);
458 for(t=len;t>=0;--t) {
464 a3_lval.regexp.pattern = s;
466 a3_lval.regexp.options = 0;
468 a3_lval.regexp.options = s+t+1;
473 void initialize_scanner();
474 #define YY_USER_INIT initialize_scanner();
476 /* count the number of lines+columns consumed by this token */
477 static inline void l() {
479 for(t=0;t<yyleng;t++) {
480 if(yytext[t]=='\n') {
488 /* count the number of columns consumed by this token */
489 static inline void c() {
490 current_column+=yyleng;
493 //Boolean {c();return m(KW_BOOLEAN);}
494 //int {c();return m(KW_INT);}
495 //uint {c();return m(KW_UINT);}
496 //Number {c();return m(KW_NUMBER);}
504 NAME [a-zA-Z_][a-zA-Z0-9_\\]*
507 HEXINT 0x[a-zA-Z0-9]+
508 HEXFLOAT 0x[a-zA-Z0-9]*\.[a-zA-Z0-9]*
510 FLOAT [0-9]+(\.[0-9]*)?|\.[0-9]+
512 HEXWITHSIGN [+-]?({HEXINT})
513 HEXFLOATWITHSIGN [+-]?({HEXFLOAT})
514 INTWITHSIGN [+-]?({INT})
515 FLOATWITHSIGN [+-]?({FLOAT})
517 CDATA <!\[CDATA\[([^]]|\][^]]|\]\][^>])*\]*\]\]\>
518 STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
520 MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/]
521 SINGLELINE_COMMENT \/\/[^\n]*\n
522 REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]*
526 {SINGLELINE_COMMENT} {l(); /* single line comment */}
527 {MULTILINE_COMMENT} {l(); /* multi line comment */}
528 [/][*] {syntaxerror("syntax error: unterminated comment", yytext);}
530 ^include{S}+{STRING}{S}*/\n {l();handleInclude(yytext, yyleng, 1);}
531 ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n {l();handleInclude(yytext, yyleng, 0);}
532 {STRING} {l(); BEGIN(INITIAL);handleString(yytext, yyleng);return T_STRING;}
533 {CDATA} {l(); BEGIN(INITIAL);handleCData(yytext, yyleng);return T_STRING;}
535 <BEGINNING,REGEXPOK>{
536 {REGEXP} {c(); BEGIN(INITIAL);return handleregexp();}
537 {HEXWITHSIGN} {c(); BEGIN(INITIAL);return handlehex();}
538 {HEXFLOATWITHSIGN} {c(); BEGIN(INITIAL);return handlehexfloat();}
539 {INTWITHSIGN} {c(); BEGIN(INITIAL);return handleint();}
540 {FLOATWITHSIGN} {c(); BEGIN(INITIAL);return handlefloat();}
543 \xef\xbb\xbf {/* utf 8 bom */}
546 {HEXINT} {c(); BEGIN(INITIAL);return handlehex();}
547 {HEXFLOAT} {c(); BEGIN(INITIAL);return handlehexfloat();}
548 {INT} {c(); BEGIN(INITIAL);return handleint();}
549 {FLOAT} {c(); BEGIN(INITIAL);return handlefloat();}
551 3rr0r {/* for debugging: generates a tokenizer-level error */
552 syntaxerror("3rr0r");}
554 {NAME}{S}*:{S}*for/{_} {l();handleLabel(yytext, yyleng-3);return T_FOR;}
555 {NAME}{S}*:{S}*do/{_} {l();handleLabel(yytext, yyleng-2);return T_DO;}
556 {NAME}{S}*:{S}*while/{_} {l();handleLabel(yytext, yyleng-5);return T_WHILE;}
557 {NAME}{S}*:{S}*switch/{_} {l();handleLabel(yytext, yyleng-6);return T_SWITCH;}
558 for {c();a3_lval.id="";return T_FOR;}
559 do {c();a3_lval.id="";return T_DO;}
560 while {c();a3_lval.id="";return T_WHILE;}
561 switch {c();a3_lval.id="";return T_SWITCH;}
563 [&][&] {c();BEGIN(REGEXPOK);return m(T_ANDAND);}
564 [|][|] {c();BEGIN(REGEXPOK);return m(T_OROR);}
565 [!][=] {c();BEGIN(REGEXPOK);return m(T_NE);}
566 [!][=][=] {c();BEGIN(REGEXPOK);return m(T_NEE);}
567 [=][=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);}
568 [=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQ);}
569 [>][=] {c();return m(T_GE);}
570 [<][=] {c();return m(T_LE);}
571 [-][-] {c();BEGIN(INITIAL);return m(T_MINUSMINUS);}
572 [+][+] {c();BEGIN(INITIAL);return m(T_PLUSPLUS);}
573 [+][=] {c();return m(T_PLUSBY);}
574 [-][=] {c();return m(T_MINUSBY);}
575 [/][=] {c();return m(T_DIVBY);}
576 [%][=] {c();return m(T_MODBY);}
577 [*][=] {c();return m(T_MULBY);}
578 [|][=] {c();return m(T_ORBY);}
579 [>][>][=] {c();return m(T_SHRBY);}
580 [<][<][=] {c();return m(T_SHLBY);}
581 [>][>][>][=] {c();return m(T_USHRBY);}
582 [<][<] {c();return m(T_SHL);}
583 [>][>][>] {c();return m(T_USHR);}
584 [>][>] {c();return m(T_SHR);}
585 \.\.\. {c();return m(T_DOTDOTDOT);}
586 \.\. {c();return m(T_DOTDOT);}
587 \. {c();return m('.');}
588 :: {c();return m(T_COLONCOLON);}
589 : {c();return m(':');}
590 instanceof {c();return m(KW_INSTANCEOF);}
591 implements {c();return m(KW_IMPLEMENTS);}
592 interface {c();return m(KW_INTERFACE);}
593 namespace {c();return m(KW_NAMESPACE);}
594 protected {c();return m(KW_PROTECTED);}
595 undefined {c();return m(KW_UNDEFINED);}
596 continue {c();return m(KW_CONTINUE);}
597 override {c();return m(KW_OVERRIDE);}
598 internal {c();return m(KW_INTERNAL);}
599 function {c();return m(KW_FUNCTION);}
600 finally {c();return m(KW_FINALLY);}
601 default {c();return m(KW_DEFAULT);}
602 package {c();return m(KW_PACKAGE);}
603 private {c();return m(KW_PRIVATE);}
604 dynamic {c();return m(KW_DYNAMIC);}
605 extends {c();return m(KW_EXTENDS);}
606 delete {c();return m(KW_DELETE);}
607 return {c();return m(KW_RETURN);}
608 public {c();return m(KW_PUBLIC);}
609 native {c();return m(KW_NATIVE);}
610 static {c();return m(KW_STATIC);}
611 import {c();return m(KW_IMPORT);}
612 typeof {c();return m(KW_TYPEOF);}
613 throw {c();return m(KW_THROW);}
614 class {c();return m(KW_CLASS);}
615 const {c();return m(KW_CONST);}
616 catch {c();return m(KW_CATCH);}
617 final {c();return m(KW_FINAL);}
618 false {c();return m(KW_FALSE);}
619 break {c();return m(KW_BREAK);}
620 super {c();return m(KW_SUPER);}
621 each {c();return m(KW_EACH);}
622 void {c();return m(KW_VOID);}
623 true {c();return m(KW_TRUE);}
624 null {c();return m(KW_NULL);}
625 else {c();return m(KW_ELSE);}
626 case {c();return m(KW_CASE);}
627 with {c();return m(KW_WITH);}
628 use {c();return m(KW_USE);}
629 new {c();return m(KW_NEW);}
630 get {c();return m(KW_GET);}
631 set {c();return m(KW_SET);}
632 var {c();return m(KW_VAR);}
633 try {c();return m(KW_TRY);}
634 is {c();return m(KW_IS) ;}
635 in {c();return m(KW_IN) ;}
636 if {c();return m(KW_IF) ;}
637 as {c();return m(KW_AS);}
638 {NAME} {c();BEGIN(INITIAL);return mkid(T_IDENTIFIER);}
640 [+-\/*^~@$!%&\(=\[\]\{\}|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
641 [\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);}
649 char c = buf[t]=input();
650 if(c=='\n' || c==EOF) {
655 if(c1>='0' && c1<='9')
656 syntaxerror("syntax error: %s (identifiers must not start with a digit)");
658 syntaxerror("syntax error: %s", buf);
664 void*b = leave_file();
667 yy_delete_buffer(YY_CURRENT_BUFFER);
670 yy_delete_buffer(YY_CURRENT_BUFFER);
671 yy_switch_to_buffer(b);
682 static char mbuf[256];
683 char*token2string(enum yytokentype nr, YYSTYPE v)
685 if(nr==T_STRING) return "<string>";
686 else if(nr==T_INT) return "<int>";
687 else if(nr==T_UINT) return "<uint>";
688 else if(nr==T_BYTE) return "<byte>";
689 else if(nr==T_FLOAT) return "<float>";
690 else if(nr==T_REGEXP) return "REGEXP";
691 else if(nr==T_EOF) return "***END***";
692 else if(nr==T_GE) return ">=";
693 else if(nr==T_LE) return "<=";
694 else if(nr==T_MINUSMINUS) return "--";
695 else if(nr==T_PLUSPLUS) return "++";
696 else if(nr==KW_IMPLEMENTS) return "implements";
697 else if(nr==KW_INTERFACE) return "interface";
698 else if(nr==KW_NAMESPACE) return "namespace";
699 else if(nr==KW_PROTECTED) return "protected";
700 else if(nr==KW_OVERRIDE) return "override";
701 else if(nr==KW_INTERNAL) return "internal";
702 else if(nr==KW_FUNCTION) return "function";
703 else if(nr==KW_PACKAGE) return "package";
704 else if(nr==KW_PRIVATE) return "private";
705 else if(nr==KW_BOOLEAN) return "Boolean";
706 else if(nr==KW_DYNAMIC) return "dynamic";
707 else if(nr==KW_EXTENDS) return "extends";
708 else if(nr==KW_PUBLIC) return "public";
709 else if(nr==KW_NATIVE) return "native";
710 else if(nr==KW_STATIC) return "static";
711 else if(nr==KW_IMPORT) return "import";
712 else if(nr==KW_NUMBER) return "number";
713 else if(nr==KW_CLASS) return "class";
714 else if(nr==KW_CONST) return "const";
715 else if(nr==KW_FINAL) return "final";
716 else if(nr==KW_FALSE) return "False";
717 else if(nr==KW_TRUE) return "True";
718 else if(nr==KW_UINT) return "uint";
719 else if(nr==KW_NULL) return "null";
720 else if(nr==KW_ELSE) return "else";
721 else if(nr==KW_USE) return "use";
722 else if(nr==KW_INT) return "int";
723 else if(nr==KW_NEW) return "new";
724 else if(nr==KW_GET) return "get";
725 else if(nr==KW_SET) return "set";
726 else if(nr==KW_VAR) return "var";
727 else if(nr==KW_IS) return "is";
728 else if(nr==KW_AS) return "as";
729 else if(nr==T_IDENTIFIER) return "ID";
731 sprintf(mbuf, "%d", nr);
736 void initialize_scanner()