3 Routines for compiling Flash2 AVM2 ABC Actionscript
5 Extension module for the rfxswf library.
6 Part of the swftools package.
8 Copyright (c) 2008 Matthias Kramm <kramm@quiss.org>
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
32 #include "tokenizer.h"
35 unsigned int as3_tokencount = 0;
37 static void dbg(const char*format, ...)
44 va_start(arglist, format);
45 vsprintf(buf, format, arglist);
48 while(l && buf[l-1]=='\n') {
52 printf("(tokenizer) ");
57 #ifndef YY_CURRENT_BUFFER
58 #define YY_CURRENT_BUFFER yy_current_buffer
61 static void*as3_buffer=0;
62 static int as3_buffer_pos=0;
63 static int as3_buffer_len=0;
64 void as3_file_input(FILE*fi)
69 void as3_buffer_input(void*buffer, int len)
72 syntaxerror("trying to parse zero bytearray");
79 #define YY_INPUT(buf,result,max_size) { \
82 while((result = fread(buf, 1, max_size, as3_in))==0 && ferror(as3_in)) \
83 { if(errno != EINTR) {YY_FATAL_ERROR("input in flex scanner failed"); break;} \
84 errno=0; clearerr(as3_in); \
87 int to_read = max_size; \
88 if(to_read + as3_buffer_pos > as3_buffer_len) \
89 to_read = as3_buffer_len - as3_buffer_pos; \
90 memcpy(buf, as3_buffer+as3_buffer_pos, to_read); \
91 as3_buffer_pos += to_read; \
96 void handleInclude(char*text, int len, char quotes)
100 char*p1 = strchr(text, '"');
101 char*p2 = strrchr(text, '"');
102 if(!p1 || !p2 || p1==p2) {
103 syntaxerror("Invalid include in line %d\n", current_line);
106 filename = strdup(p1+1);
110 while(!strchr(" \n\r\t", text[i1])) i1++;
112 while(strchr(" \n\r\t", text[i1])) i1++;
113 while(strchr(" \n\r\t", text[i2-1])) i2--;
114 if(i2!=len) text[i2]=0;
115 filename = strdup(&text[i1]);
118 char*fullfilename = find_file(filename, 1);
119 enter_file2(filename, fullfilename, YY_CURRENT_BUFFER);
120 yyin = fopen(fullfilename, "rb");
122 syntaxerror("Couldn't open include file \"%s\"\n", fullfilename);
125 yy_switch_to_buffer(yy_create_buffer( yyin, YY_BUF_SIZE ) );
126 //BEGIN(INITIAL); keep context
129 static int do_unescape(const char*s, const char*end, char*n)
135 if(o) o[len] = *s;len++;
140 if(s==end) syntaxerror("invalid \\ at end of string");
142 /* handle the various line endings (mac, dos, unix) */
155 case '\\': if(o) o[len] = '\\';s++;len++; break;
156 case '"': if(o) o[len] = '"';s++;len++; break;
157 case '\'': if(o) o[len] = '\'';s++;len++; break;
158 case 'b': if(o) o[len] = '\b';s++;len++; break;
159 case 'f': if(o) o[len] = '\f';s++;len++; break;
160 case 'n': if(o) o[len] = '\n';s++;len++; break;
161 case 'r': if(o) o[len] = '\r';s++;len++; break;
162 case 't': if(o) o[len] = '\t';s++;len++; break;
163 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
166 while(strchr("01234567", *s) && nr<3 && s<end) {
173 syntaxerror("octal number out of range (0-255): %d", num);
174 if(o) o[len] = num;len++;
177 case 'x': case 'u': {
186 if(s==end) syntaxerror("invalid \\u or \\x at end of string");
189 if(s==end) syntaxerror("invalid \\u{ at end of string");
194 while(strchr("0123456789abcdefABCDEF", *s) && (bracket || nr < max) && s<end) {
196 if(*s>='0' && *s<='9') num |= *s - '0';
197 if(*s>='a' && *s<='f') num |= *s - 'a' + 10;
198 if(*s>='A' && *s<='F') num |= *s - 'A' + 10;
203 if(*s=='}' && s<end) {
206 syntaxerror("missing terminating '}'");
210 char*utf8 = getUTF8(num);
212 if(o) o[len] = *utf8;utf8++;len++;
216 syntaxerror("byte out of range (0-255): %d", num);
217 if(o) o[len] = num;len++;
236 static string_t string_unescape(const char*in, int l)
239 const char*end = &in[l];
241 int len = do_unescape(s, end, 0);
242 char*n = (char*)malloc(len+1);
243 do_unescape(s, end, n);
244 string_t out = string_new(n, len);
248 static void handleCData(char*s, int len)
250 a3_lval.str.str = s+9; // <![CDATA[
251 a3_lval.str.len = len-9-3;// ]]>
252 a3_lval.str.str = strdup_n(a3_lval.str.str, a3_lval.str.len);
255 static void handleString(char*s, int len)
258 if(s[len-1]!='"') syntaxerror("String doesn't end with '\"'");
261 else if(s[0]=='\'') {
262 if(s[len-1]!='\'') syntaxerror("String doesn't end with '\"'");
265 else syntaxerror("String incorrectly terminated");
267 a3_lval.str = string_unescape(s, len);
271 char start_of_expression;
273 static inline int m(int type)
275 a3_lval.token = type;
280 static char numberbuf[64];
283 if(yyleng>sizeof(numberbuf)-1)
284 syntaxerror("decimal number overflow");
286 memcpy(s, yytext, yyleng);
291 static inline int setint(int v)
293 a3_lval.number_int = v;
296 static inline int setfloat(double v)
298 a3_lval.number_float = v;
302 static inline int handlefloat()
305 a3_lval.number_float = atof(s);
309 static inline int handleint()
312 char l = (yytext[0]=='-');
314 //char*max = l?"1073741824":"2147483647";
315 char*max = l?"2147483648":"2147483647";
318 as3_softwarning("integer overflow: %s (converted to Number)", s);
319 return handlefloat();
323 for(t=0;t<yyleng-l;t++) {
324 if(yytext[l+t]>max[t]) {
325 as3_softwarning("integer overflow: %s (converted to Number)", s);
326 return handlefloat();
328 else if(yytext[l+t]<max[t])
338 for(t=0;t<yyleng;t++) {
346 static inline int handlehexfloat()
348 char l = (yytext[0]=='-')+2;
353 for(t=l;t<yyleng;t++) {
366 else if((c>='a' && c<='f') || (c>='A' && c<='F'))
367 d+=((c&0x0f)+9)*base;
371 static inline int handlehex()
373 char l = (yytext[0]=='-')+2;
378 syntaxerror("integer overflow %s", s);
388 else if((c>='a' && c<='f') || (c>='A' && c<='F'))
391 if(l && v>=0x80000000) {
393 as3_softwarning("integer overflow: %s (converted to Number)", s);
396 if(!l && v>0x7fffffff) {
398 as3_softwarning("integer overflow: %s (converted to Number)", s);
403 return setint(-(int)v);
409 void handleLabel(char*text, int len)
412 for(t=len-1;t>=0;--t) {
417 char*s = malloc(t+1);
418 memcpy(s, yytext, t);
423 static int handleregexp()
425 char*s = malloc(yyleng);
427 memcpy(s, yytext+1, len);
430 for(t=len;t>=0;--t) {
436 a3_lval.regexp.pattern = s;
438 a3_lval.regexp.options = 0;
440 a3_lval.regexp.options = s+t+1;
445 void initialize_scanner();
446 #define YY_USER_INIT initialize_scanner();
448 /* count the number of lines+columns consumed by this token */
449 static inline void l() {
451 for(t=0;t<yyleng;t++) {
452 if(yytext[t]=='\n') {
460 /* count the number of columns consumed by this token */
461 static inline void c() {
462 current_column+=yyleng;
465 trie_t*active_namespaces = 0;
466 /*void tokenizer_register_namespace(const char*id)
468 trie_put(namespaces, id, 0);
470 void tokenizer_unregister_namespace(const char*id)
472 trie_remove(namespaces, id);
474 static inline tokenizer_is_namespace(const char*id)
476 return trie_contains(active_namespaces, id);
479 static inline int handleIdentifier()
481 char*s = malloc(yyleng+1);
482 memcpy(s, yytext, yyleng);
485 if(tokenizer_is_namespace(s))
492 //Boolean {c();return m(KW_BOOLEAN);}
493 //int {c();return m(KW_INT);}
494 //uint {c();return m(KW_UINT);}
495 //Number {c();return m(KW_NUMBER);}
502 NAME [a-zA-Z_][a-zA-Z0-9_\\]*
505 HEXINT 0x[a-zA-Z0-9]+
506 HEXFLOAT 0x[a-zA-Z0-9]*\.[a-zA-Z0-9]*
508 FLOAT ([0-9]+(\.[0-9]*)?|\.[0-9]+)(e[0-9]+)?
510 HEXWITHSIGN [+-]?({HEXINT})
511 HEXFLOATWITHSIGN [+-]?({HEXFLOAT})
512 INTWITHSIGN [+-]?({INT})
513 FLOATWITHSIGN [+-]?({FLOAT})
515 CDATA <!\[CDATA\[([^]]|\][^]]|\]\][^>])*\]*\]\]\>
516 STRING ["](\\[\x00-\xff]|[^\\"\n])*["]|['](\\[\x00-\xff]|[^\\'\n])*[']
518 MULTILINE_COMMENT [/][*]+([*][^/]|[^/*]|[^*][/]|[\x00-\x1f])*[*]+[/]
519 SINGLELINE_COMMENT \/\/[^\n\r]*[\n\r]
520 REGEXP [/]([^/\n]|\\[/])*[/][a-zA-Z]*
524 {SINGLELINE_COMMENT} {l(); /* single line comment */}
525 {MULTILINE_COMMENT} {l(); /* multi line comment */}
526 [/][*] {syntaxerror("syntax error: unterminated comment", yytext);}
528 ^include{S}+{STRING}{S}*/\n {l();handleInclude(yytext, yyleng, 1);}
529 ^include{S}+[^" \t\r\n][\x20-\xff]*{S}*/\n {l();handleInclude(yytext, yyleng, 0);}
530 {STRING} {l(); BEGIN(INITIAL);handleString(yytext, yyleng);return T_STRING;}
531 {CDATA} {l(); BEGIN(INITIAL);handleCData(yytext, yyleng);return T_STRING;}
533 <BEGINNING,REGEXPOK>{
534 {REGEXP} {c(); BEGIN(INITIAL);return handleregexp();}
535 {HEXWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlehex();}
536 {HEXFLOATWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlehexfloat();}
537 {INTWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handleint();}
538 {FLOATWITHSIGN}/{_} {c(); BEGIN(INITIAL);return handlefloat();}
541 <REGEXPOK>[\{] {c(); BEGIN(REGEXPOK);return m(T_DICTSTART);}
542 [\{] {c(); BEGIN(INITIAL); return m('{');}
544 \xef\xbb\xbf {/* utf 8 bom */}
547 {HEXINT}/{_} {c(); BEGIN(INITIAL);return handlehex();}
548 {HEXFLOAT}/{_} {c(); BEGIN(INITIAL);return handlehexfloat();}
549 {INT}/{_} {c(); BEGIN(INITIAL);return handleint();}
550 {FLOAT}/{_} {c(); BEGIN(INITIAL);return handlefloat();}
551 NaN {c(); BEGIN(INITIAL);return m(KW_NAN);}
553 3rr0r {/* for debugging: generates a tokenizer-level error */
554 syntaxerror("3rr0r");}
556 {NAME}{S}*:{S}*for/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-3);return T_FOR;}
557 {NAME}{S}*:{S}*do/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-2);return T_DO;}
558 {NAME}{S}*:{S}*while/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-5);return T_WHILE;}
559 {NAME}{S}*:{S}*switch/{_} {l();BEGIN(INITIAL);handleLabel(yytext, yyleng-6);return T_SWITCH;}
560 for {c();BEGIN(INITIAL);a3_lval.id="";return T_FOR;}
561 do {c();BEGIN(INITIAL);a3_lval.id="";return T_DO;}
562 while {c();BEGIN(INITIAL);a3_lval.id="";return T_WHILE;}
563 switch {c();BEGIN(INITIAL);a3_lval.id="";return T_SWITCH;}
565 [&][&] {c();BEGIN(REGEXPOK);return m(T_ANDAND);}
566 [|][|] {c();BEGIN(REGEXPOK);return m(T_OROR);}
567 [!][=] {c();BEGIN(REGEXPOK);return m(T_NE);}
568 [!][=][=] {c();BEGIN(REGEXPOK);return m(T_NEE);}
569 [=][=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQEQ);}
570 [=][=] {c();BEGIN(REGEXPOK);return m(T_EQEQ);}
571 [>][=] {c();BEGIN(REGEXPOK);return m(T_GE);}
572 [<][=] {c();BEGIN(REGEXPOK);return m(T_LE);}
573 [-][-] {c();BEGIN(INITIAL);return m(T_MINUSMINUS);}
574 [+][+] {c();BEGIN(INITIAL);return m(T_PLUSPLUS);}
575 [+][=] {c();BEGIN(REGEXPOK);return m(T_PLUSBY);}
576 [\^][=] {c();BEGIN(REGEXPOK);return m(T_XORBY);}
577 [-][=] {c();BEGIN(REGEXPOK);return m(T_MINUSBY);}
578 [/][=] {c();BEGIN(REGEXPOK);return m(T_DIVBY);}
579 [%][=] {c();BEGIN(REGEXPOK);return m(T_MODBY);}
580 [*][=] {c();BEGIN(REGEXPOK);return m(T_MULBY);}
581 [|][=] {c();BEGIN(REGEXPOK);return m(T_ORBY);}
582 [>][>][=] {c();BEGIN(REGEXPOK);return m(T_SHRBY);}
583 [<][<][=] {c();BEGIN(REGEXPOK);return m(T_SHLBY);}
584 [>][>][>][=] {c();BEGIN(REGEXPOK);return m(T_USHRBY);}
585 [<][<] {c();BEGIN(REGEXPOK);return m(T_SHL);}
586 [>][>][>] {c();BEGIN(REGEXPOK);return m(T_USHR);}
587 [>][>] {c();BEGIN(REGEXPOK);return m(T_SHR);}
588 \.\.\. {c();BEGIN(REGEXPOK);return m(T_DOTDOTDOT);}
589 \.\. {c();BEGIN(REGEXPOK);return m(T_DOTDOT);}
590 \. {c();BEGIN(REGEXPOK);return m('.');}
591 :: {c();BEGIN(REGEXPOK);return m(T_COLONCOLON);}
592 : {c();BEGIN(REGEXPOK);return m(':');}
593 instanceof {c();BEGIN(REGEXPOK);return m(KW_INSTANCEOF);}
594 implements {c();BEGIN(REGEXPOK);return m(KW_IMPLEMENTS);}
595 interface {c();BEGIN(INITIAL);return m(KW_INTERFACE);}
596 namespace {c();BEGIN(INITIAL);return m(KW_NAMESPACE);}
597 protected {c();BEGIN(INITIAL);return m(KW_PROTECTED);}
598 undefined {c();BEGIN(INITIAL);return m(KW_UNDEFINED);}
599 continue {c();BEGIN(INITIAL);return m(KW_CONTINUE);}
600 override {c();BEGIN(INITIAL);return m(KW_OVERRIDE);}
601 internal {c();BEGIN(INITIAL);return m(KW_INTERNAL);}
602 function {c();BEGIN(INITIAL);return m(KW_FUNCTION);}
603 finally {c();BEGIN(INITIAL);return m(KW_FINALLY);}
604 default {c();BEGIN(INITIAL);return m(KW_DEFAULT);}
605 package {c();BEGIN(INITIAL);return m(KW_PACKAGE);}
606 private {c();BEGIN(INITIAL);return m(KW_PRIVATE);}
607 dynamic {c();BEGIN(INITIAL);return m(KW_DYNAMIC);}
608 extends {c();BEGIN(INITIAL);return m(KW_EXTENDS);}
609 delete {c();BEGIN(REGEXPOK);return m(KW_DELETE);}
610 return {c();BEGIN(REGEXPOK);return m(KW_RETURN);}
611 public {c();BEGIN(INITIAL);return m(KW_PUBLIC);}
612 native {c();BEGIN(INITIAL);return m(KW_NATIVE);}
613 static {c();BEGIN(INITIAL);return m(KW_STATIC);}
614 import {c();BEGIN(REGEXPOK);return m(KW_IMPORT);}
615 typeof {c();BEGIN(REGEXPOK);return m(KW_TYPEOF);}
616 throw {c();BEGIN(REGEXPOK);return m(KW_THROW);}
617 class {c();BEGIN(INITIAL);return m(KW_CLASS);}
618 const {c();BEGIN(INITIAL);return m(KW_CONST);}
619 catch {c();BEGIN(INITIAL);return m(KW_CATCH);}
620 final {c();BEGIN(INITIAL);return m(KW_FINAL);}
621 false {c();BEGIN(INITIAL);return m(KW_FALSE);}
622 break {c();BEGIN(INITIAL);return m(KW_BREAK);}
623 super {c();BEGIN(INITIAL);return m(KW_SUPER);}
624 each {c();BEGIN(INITIAL);return m(KW_EACH);}
625 void {c();BEGIN(INITIAL);return m(KW_VOID);}
626 true {c();BEGIN(INITIAL);return m(KW_TRUE);}
627 null {c();BEGIN(INITIAL);return m(KW_NULL);}
628 else {c();BEGIN(INITIAL);return m(KW_ELSE);}
629 case {c();BEGIN(REGEXPOK);return m(KW_CASE);}
630 with {c();BEGIN(REGEXPOK);return m(KW_WITH);}
631 use {c();BEGIN(REGEXPOK);return m(KW_USE);}
632 new {c();BEGIN(REGEXPOK);return m(KW_NEW);}
633 get {c();BEGIN(INITIAL);return m(KW_GET);}
634 set {c();BEGIN(INITIAL);return m(KW_SET);}
635 var {c();BEGIN(INITIAL);return m(KW_VAR);}
636 try {c();BEGIN(INITIAL);return m(KW_TRY);}
637 is {c();BEGIN(REGEXPOK);return m(KW_IS) ;}
638 in {c();BEGIN(REGEXPOK);return m(KW_IN) ;}
639 if {c();BEGIN(INITIAL);return m(KW_IF) ;}
640 as {c();BEGIN(REGEXPOK);return m(KW_AS);}
641 $?{NAME} {c();BEGIN(INITIAL);return handleIdentifier();}
643 [\]\}*] {c();BEGIN(INITIAL);return m(yytext[0]);}
644 [+-\/^~@$!%&\(=\[|?:;,<>] {c();BEGIN(REGEXPOK);return m(yytext[0]);}
645 [\)\]] {c();BEGIN(INITIAL);return m(yytext[0]);}
653 char c = buf[t]=input();
654 if(c=='\n' || c==EOF) {
659 if(c1>='0' && c1<='9')
660 syntaxerror("syntax error: %s (identifiers must not start with a digit)");
662 syntaxerror("syntax error: %s", buf);
668 void*b = leave_file();
671 yy_delete_buffer(YY_CURRENT_BUFFER);
674 yy_delete_buffer(YY_CURRENT_BUFFER);
675 yy_switch_to_buffer(b);
686 static char mbuf[256];
687 char*token2string(enum yytokentype nr, YYSTYPE v)
690 char*s = malloc(v.str.len+10);
691 strcpy(s, "<string>");
692 memcpy(s+8, v.str.str, v.str.len);
693 sprintf(s+8+v.str.len, " (%d bytes)", v.str.len);
696 else if(nr==T_REGEXP) {
697 char*s = malloc(strlen(v.regexp.pattern)+10);
698 sprintf(s, "<regexp>%s", v.regexp.pattern);
701 else if(nr==T_IDENTIFIER) {
702 char*s = malloc(strlen(v.id)+10);
703 sprintf(s, "<ID>%s", v.id);
706 else if(nr==T_INT) return "<int>";
707 else if(nr==T_UINT) return "<uint>";
708 else if(nr==T_FLOAT) return "<float>";
709 else if(nr==T_EOF) return "***END***";
710 else if(nr==T_GE) return ">=";
711 else if(nr==T_LE) return "<=";
712 else if(nr==T_MINUSMINUS) return "--";
713 else if(nr==T_PLUSPLUS) return "++";
714 else if(nr==KW_IMPLEMENTS) return "implements";
715 else if(nr==KW_INTERFACE) return "interface";
716 else if(nr==KW_NAMESPACE) return "namespace";
717 else if(nr==KW_PROTECTED) return "protected";
718 else if(nr==KW_OVERRIDE) return "override";
719 else if(nr==KW_INTERNAL) return "internal";
720 else if(nr==KW_FUNCTION) return "function";
721 else if(nr==KW_PACKAGE) return "package";
722 else if(nr==KW_PRIVATE) return "private";
723 else if(nr==KW_BOOLEAN) return "Boolean";
724 else if(nr==KW_DYNAMIC) return "dynamic";
725 else if(nr==KW_EXTENDS) return "extends";
726 else if(nr==KW_PUBLIC) return "public";
727 else if(nr==KW_NATIVE) return "native";
728 else if(nr==KW_STATIC) return "static";
729 else if(nr==KW_IMPORT) return "import";
730 else if(nr==KW_NUMBER) return "number";
731 else if(nr==KW_CLASS) return "class";
732 else if(nr==KW_CONST) return "const";
733 else if(nr==KW_FINAL) return "final";
734 else if(nr==KW_FALSE) return "False";
735 else if(nr==KW_TRUE) return "True";
736 else if(nr==KW_UINT) return "uint";
737 else if(nr==KW_NULL) return "null";
738 else if(nr==KW_ELSE) return "else";
739 else if(nr==KW_USE) return "use";
740 else if(nr==KW_INT) return "int";
741 else if(nr==KW_NEW) return "new";
742 else if(nr==KW_GET) return "get";
743 else if(nr==KW_SET) return "set";
744 else if(nr==KW_VAR) return "var";
745 else if(nr==KW_IS) return "is";
746 else if(nr==KW_AS) return "as";
748 sprintf(mbuf, "%d", nr);
753 void initialize_scanner()