lib/h.263/swfvideo.c

   1 /* swfvideo.c
   2    Routines for handling h.263 video tags
   3
   4    Part of the swftools package.
   5
   6    Copyright (c) 2003 Matthias Kramm <kramm@quiss.org>
   7
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 2 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program; if not, write to the Free Software
  20    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
  21
  22 #include <stdlib.h>
  23 #include <stdio.h>
  24 #include <assert.h>
  25 #include <math.h>
  26 #include "../rfxswf.h"
  27 #include "h263tables.c"
  28
  29 /* TODO:
  30    - get rid of _vxy, _i endings
  31    - use prepare* / write* in encode_blockI
  32 */
  33
  34
  35 #ifdef MAIN
  36 U16 totalframes = 0;
  37 #endif
  38 void swf_SetVideoStreamDefine(TAG*tag, VIDEOSTREAM*stream, U16 frames, U16 width, U16 height)
  39 {
  40     swf_SetU16(tag, frames);
  41     swf_SetU16(tag, width);
  42     swf_SetU16(tag, height);
  43     //swf_SetU8(tag, 1); /* smoothing on */
  44     swf_SetU8(tag, 0); /* smoothing off */
  45     swf_SetU8(tag, 2); /* codec = h.263 sorenson spark */
  46
  47 #ifdef MAIN
  48     totalframes = frames;
  49 #endif
  50     memset(stream, 0, sizeof(VIDEOSTREAM));
  51     stream->olinex = width;
  52     stream->owidth = width;
  53     stream->oheight = height;
  54     width+=15;width&=~15;
  55     height+=15;height&=~15;
  56     stream->linex = width;
  57     stream->width = width;
  58     stream->height = height;
  59     stream->bbx = width/16;
  60     stream->bby = height/16;
  61     stream->current = (YUV*)malloc(width*height*sizeof(YUV));
  62     stream->oldpic = (YUV*)malloc(width*height*sizeof(YUV));
  63     stream->mvdx = (int*)malloc(stream->bbx*stream->bby*sizeof(int));
  64     stream->mvdy = (int*)malloc(stream->bbx*stream->bby*sizeof(int));
  65     stream->do_motion = 0;
  66
  67     memset(stream->oldpic, 0, width*height*sizeof(YUV));
  68     memset(stream->current, 0, width*height*sizeof(YUV));
  69 }
  70 void swf_VideoStreamClear(VIDEOSTREAM*stream)
  71 {
  72     free(stream->oldpic);stream->oldpic = 0;
  73     free(stream->current);stream->current = 0;
  74     free(stream->mvdx);stream->mvdx=0;
  75     free(stream->mvdy);stream->mvdy=0;
  76 }
  77
  78 typedef struct _block_t
  79 {
  80     int y1[64];
  81     int y2[64];
  82     int y3[64];
  83     int y4[64];
  84     int u[64];
  85     int v[64];
  86 } block_t;
  87
  88 static int zigzagtable[64] = {
  89     0, 1, 5, 6, 14, 15, 27, 28,
  90     2, 4, 7, 13, 16, 26, 29, 42,
  91     3, 8, 12, 17, 25, 30, 41, 43,
  92     9, 11, 18, 24, 31, 40, 44, 53,
  93     10, 19, 23, 32, 39, 45, 52, 54,
  94     20, 22, 33, 38, 46, 51, 55, 60,
  95     21, 34, 37, 47, 50, 56, 59, 61,
  96     35, 36, 48, 49, 57, 58, 62, 63};
  97
  98 static void zigzag(int*src)
  99 {
 100     int tmp[64];
 101     int t;
 102     for(t=0;t<64;t++) {
 103         tmp[zigzagtable[t]] = src[t];
 104     }
 105     memcpy(src, tmp, sizeof(int)*64);
 106 }
 107
 108 #define PI 3.14159265358979
 109 #define SQRT2 1.414214
 110 #define RSQRT2 (1.0/1.414214)
 111
 112 static double table[8][8] =
 113 {
 114 {0.707106781186548,0.707106781186548,0.707106781186548,0.707106781186548,0.707106781186548,0.707106781186548,0.707106781186548,0.707106781186548},
 115 {0.980785280403230,0.831469612302545,0.555570233019602,0.195090322016128,-0.195090322016128,-0.555570233019602,-0.831469612302545,-0.980785280403230},
 116 {0.923879532511287,0.382683432365090,-0.382683432365090,-0.923879532511287,-0.923879532511287,-0.382683432365090,0.382683432365090,0.923879532511287},
 117 {0.831469612302545,-0.195090322016128,-0.980785280403230,-0.555570233019602,0.555570233019602,0.980785280403230,0.195090322016129,-0.831469612302545},
 118 {0.707106781186548,-0.707106781186547,-0.707106781186548,0.707106781186547,0.707106781186548,-0.707106781186547,-0.707106781186547,0.707106781186547},
 119 {0.555570233019602,-0.980785280403230,0.195090322016128,0.831469612302545,-0.831469612302545,-0.195090322016128,0.980785280403231,-0.555570233019602},
 120 {0.382683432365090,-0.923879532511287,0.923879532511287,-0.382683432365090,-0.382683432365091,0.923879532511287,-0.923879532511286,0.382683432365090},
 121 {0.195090322016128,-0.555570233019602,0.831469612302545,-0.980785280403231,0.980785280403230,-0.831469612302545,0.555570233019602,-0.195090322016129}
 122 };
 123
 124 static void dct(int*src)
 125 {
 126     double tmp[64];
 127     int x,y,u,v,t;
 128
 129     for(v=0;v<8;v++)
 130     for(u=0;u<8;u++)
 131     {
 132         double c = 0;
 133         for(x=0;x<8;x++)
 134         {
 135             c+=table[u][x]*src[v*8+x];
 136         }
 137         tmp[v*8+u] = c;
 138     }
 139     for(u=0;u<8;u++)
 140     for(v=0;v<8;v++)
 141     {
 142         double c = 0;
 143         for(y=0;y<8;y++)
 144         {
 145             c+=table[v][y]*tmp[y*8+u];
 146         }
 147         src[v*8+u] = (int)(c*0.25+0.5);
 148     }
 149 }
 150
 151 static void idct(int*src)
 152 {
 153     double tmp[64];
 154     int x,y,u,v;
 155     for(y=0;y<8;y++)
 156     for(x=0;x<8;x++)
 157     {
 158         double c = 0;
 159         for(u=0;u<8;u++)
 160         {
 161             c+=table[u][x]*src[y*8+u];
 162         }
 163         tmp[y*8+x] = c;
 164     }
 165     for(y=0;y<8;y++)
 166     for(x=0;x<8;x++)
 167     {
 168         double c = 0;
 169         for(v=0;v<8;v++)
 170         {
 171             c+=table[v][y]*tmp[v*8+x];
 172         }
 173         src[y*8+x] = (int)(c*0.25+0.5);
 174     }
 175 }
 176
 177 static double c[8] = {1.0,
 178 0.980785280403230, // cos(Pi*1/16), sin(Pi*7/16)
 179 0.923879532511287, // cos(Pi*2/16), sin(Pi*6/16)
 180 0.831469612302545, // cos(Pi*3/16), sin(Pi*5/16)
 181 0.707106781186548, // cos(Pi*4/16), sin(Pi*4/16), 1/sqrt(2)
 182 0.555570233019602, // cos(Pi*5/16), sin(Pi*3/16)
 183 0.382683432365090, // cos(Pi*6/16), sin(Pi*2/16)
 184 0.195090322016128 // cos(Pi*7/16), sin(Pi*1/16)
 185 };
 186
 187 static double cc[8];
 188 static int ccquant = -1;
 189
 190 static void preparequant(int quant)
 191 {
 192     if(ccquant == quant)
 193         return;
 194     cc[0] = c[0]/(quant*2*4);
 195     cc[1] = c[1]/(quant*2*4);
 196     cc[2] = c[2]/(quant*2*4);
 197     cc[3] = c[3]/(quant*2*4);
 198     cc[4] = c[4]/(quant*2*4);
 199     cc[5] = c[5]/(quant*2*4);
 200     cc[6] = c[6]/(quant*2*4);
 201     cc[7] = c[7]/(quant*2*4);
 202     ccquant = quant;
 203 }
 204
 205 inline static void innerdct(double*a,double*b, double*c)
 206 {
 207     // c1*c7*2 = c6
 208     // c2*c6*2 = c4
 209     // c3*c5*2 = c2
 210     // c4*c4*2 = 1
 211
 212      //{  1,  3,  5,  7, -7, -5, -3, -1},
 213      //{  3, -7, -1, -5,  5,  1,  7, -3},
 214      //{  5, -1,  7,  3, -3, -7,  1, -5},
 215      //{  7, -5,  3, -1,  1, -3,  5, -7}
 216     double b0,b1,b2,b3,b4,b5;
 217     b2 = (a[0]+a[7]);
 218     b3 = (a[1]+a[6]);
 219     b4 = (a[2]+a[5]);
 220     b5 = (a[3]+a[4]);
 221
 222     b0 = (b2+b5)*c[4];
 223     b1 = (b3+b4)*c[4];
 224     b[0*8] = b0 + b1;
 225     b[4*8] = b0 - b1;
 226     b[2*8] = (b2-b5)*c[2] + (b3-b4)*c[6];
 227     b[6*8] = (b2-b5)*c[6] + (b4-b3)*c[2];
 228
 229     b0 = (a[0]-a[7]);
 230     b1 = (a[1]-a[6]);
 231     b2 = (a[2]-a[5]);
 232     b3 = (a[3]-a[4]);
 233
 234     b[1*8] = b0*c[1] + b1*c[3] + b2*c[5] + b3*c[7];
 235     b[3*8] = b0*c[3] - b1*c[7] - b2*c[1] - b3*c[5];
 236     b[5*8] = b0*c[5] - b1*c[1] + b2*c[7] + b3*c[3];
 237     b[7*8] = b0*c[7] - b1*c[5] + b2*c[3] - b3*c[1];
 238 }
 239
 240 static void dct2(int*src, int*dest)
 241 {
 242     double tmp[64], tmp2[64];
 243     double*p;
 244     int u,x,v,t;
 245
 246     for(t=0;t<64;t++)
 247         tmp2[t] = src[t];
 248
 249     for(v=0;v<8;v++)
 250     {
 251         double* a=&tmp2[v*8];
 252         double* b=&tmp[v];
 253         innerdct(a,b,c);
 254     }
 255     for(v=0;v<8;v++)
 256     {
 257         double* a=&tmp[v*8];
 258         double* b=&tmp2[v];
 259         innerdct(a,b,cc);
 260     }
 261     for(t=0;t<64;t++) {
 262         int v = (int)(tmp2[t]);
 263         if(v>127) v=127;
 264         if(v<-127) v=-127;
 265         dest[zigzagtable[t]] = v;
 266     }
 267 }
 268
 269
 270 static inline int truncate256(int a)
 271 {
 272     if(a>255) return 255;
 273     if(a<0) return 0;
 274     return a;
 275 }
 276
 277 static void getregion(block_t* bb, YUV*pic, int posx, int posy, int linex)
 278 {
 279     YUV*p1;
 280     YUV*p2;
 281     int y1=0, y2=0, y3=0, y4=0;
 282     int u=0,v=0;
 283     int x,y;
 284     posx*=16;
 285     posy*=16;
 286     p1 = &pic[posy*linex+posx];
 287     p2 = p1;
 288     for(y=0;y<8;y++) {
 289         for(x=0;x<8;x++) {
 290             bb->u[u++] = (p2[x*2].u + p2[x*2+1].u + p2[linex+x*2].u + p2[linex+x*2+1].u)/4;
 291             bb->v[v++] = (p2[x*2].v + p2[x*2+1].v + p2[linex+x*2].v + p2[linex+x*2+1].v)/4;
 292             bb->y1[y1++] = p1[x].y;
 293             bb->y2[y2++] = p1[x+8].y;
 294             bb->y3[y3++] = p1[linex*8+x].y;
 295             bb->y4[y4++] = p1[linex*8+x+8].y;
 296         }
 297         p1+=linex;
 298         p2+=linex*2;
 299     }
 300 }
 301
 302 static void getmvdregion(block_t* bb, YUV*pic, int posx, int posy, int mvdx, int mvdy, int linex)
 303 {
 304     YUV*p1;
 305     YUV*p2;
 306     int yy=0,uv=0;
 307     int x,y;
 308     int yhp = 0, uvhp=0;
 309     int uvposx, uvposy;
 310     posx = posx*16 + ((mvdx&~1)/2);
 311     posy = posy*16 + ((mvdy&~1)/2);
 312     p1 = &pic[posy*linex+posx];
 313     p2 = &pic[(posy&~1)*linex+(posx&~1)];
 314     uvhp = ((mvdx&1)|((mvdx>>1)&1))|((mvdy&2)|((mvdy&1)<<1));
 315     yhp = ((mvdy&1)<<1|(mvdx&1));
 316
 317     /* y */
 318     if(yhp==0 || yhp==2) {
 319         for(y=0;y<8;y++) {
 320             for(x=0;x<8;x++) {
 321                 bb->y1[yy] = p1[x].y;
 322                 bb->y2[yy] = p1[x+8].y;
 323                 bb->y3[yy] = p1[linex*8+x].y;
 324                 bb->y4[yy] = p1[linex*8+x+8].y;
 325                 yy++;
 326             }
 327             p1+=linex;
 328
 329             if(yhp==2) {
 330                 yy-=8;
 331                 for(x=0;x<8;x++) {
 332                     bb->y1[yy] += p1[x].y; bb->y1[yy] /= 2;
 333                     bb->y2[yy] += p1[x+8].y; bb->y2[yy] /= 2;
 334                     bb->y3[yy] += p1[linex*8+x].y; bb->y3[yy] /= 2;
 335                     bb->y4[yy] += p1[linex*8+x+8].y; bb->y4[yy] /= 2;
 336                     yy++;
 337                 }
 338             }
 339         }
 340     } else if(yhp==1 || yhp==3) {
 341         for(y=0;y<8;y++) {
 342             for(x=0;x<8;x++) {
 343                 bb->y1[yy] = (p1[x].y + p1[x+1].y);
 344                 bb->y2[yy] = (p1[x+8].y + p1[x+8+1].y);
 345                 bb->y3[yy] = (p1[linex*8+x].y + p1[linex*8+x+1].y);
 346                 bb->y4[yy] = (p1[linex*8+x+8].y + p1[linex*8+x+8+1].y);
 347                 yy++;
 348             }
 349             yy-=8;
 350             p1+=linex;
 351             if(yhp==3) {
 352                 for(x=0;x<8;x++) {
 353                     bb->y1[yy] += (p1[x].y + p1[x+1].y); bb->y1[yy]/=4;
 354                     bb->y2[yy] += (p1[x+8].y + p1[x+8+1].y); bb->y2[yy]/=4;
 355                     bb->y3[yy] += (p1[linex*8+x].y + p1[linex*8+x+1].y); bb->y3[yy]/=4;
 356                     bb->y4[yy] += (p1[linex*8+x+8].y + p1[linex*8+x+8+1].y); bb->y4[yy]/=4;
 357                     yy++;
 358                 }
 359             } else {
 360                 for(x=0;x<8;x++) {
 361                     bb->y1[yy]/=2; bb->y2[yy]/=2; bb->y3[yy]/=2; bb->y4[yy]/=2;
 362                     yy++;
 363                 }
 364             }
 365         }
 366     }
 367
 368     /* u,v */
 369     if(uvhp==0 || uvhp==2) {
 370         for(y=0;y<8;y++) {
 371             for(x=0;x<8;x++) {
 372                 bb->u[uv] = (p2[x*2].u + p2[x*2+1].u + p2[linex+x*2].u + p2[linex+x*2+1].u)/4;
 373                 bb->v[uv] = (p2[x*2].v + p2[x*2+1].v + p2[linex+x*2].v + p2[linex+x*2+1].v)/4;
 374                 uv++;
 375             }
 376             p2+=linex*2;
 377             if(uvhp==2) {
 378                 uv-=8;
 379                 for(x=0;x<8;x++) {
 380                     bb->u[uv] += (p2[x*2].u + p2[x*2+1].u + p2[linex+x*2].u + p2[linex+x*2+1].u)/4;
 381                     bb->v[uv] += (p2[x*2].v + p2[x*2+1].v + p2[linex+x*2].v + p2[linex+x*2+1].v)/4;
 382                     bb->u[uv] /= 2;
 383                     bb->v[uv] /= 2;
 384                     uv++;
 385                 }
 386             }
 387         }
 388     } else /* uvhp==1 || uvhp==3 */ {
 389         for(y=0;y<8;y++) {
 390             for(x=0;x<8;x++) {
 391                 bb->u[uv] = ((p2[x*2].u + p2[x*2+1].u + p2[linex+x*2].u + p2[linex+x*2+1].u)/4+
 392                              (p2[x*2+2].u + p2[x*2+1+2].u + p2[linex+x*2+2].u + p2[linex+x*2+1+2].u)/4);
 393                 bb->v[uv] = ((p2[x*2].v + p2[x*2+1].v + p2[linex+x*2].v + p2[linex+x*2+1].v)/4+
 394                              (p2[x*2+2].v + p2[x*2+1+2].v + p2[linex+x*2+2].v + p2[linex+x*2+1+2].v)/4);
 395                 uv++;
 396             }
 397             uv-=8;
 398             p2+=linex*2;
 399             if(uvhp==3) {
 400                 for(x=0;x<8;x++) {
 401                     bb->u[uv] += ((p2[x*2].u + p2[x*2+1].u + p2[linex+x*2].u + p2[linex+x*2+1].u)/4+
 402                                   (p2[x*2+2].u + p2[x*2+1+2].u + p2[linex+x*2+2].u + p2[linex+x*2+1+2].u)/4);
 403                     bb->v[uv] += ((p2[x*2].v + p2[x*2+1].v + p2[linex+x*2].v + p2[linex+x*2+1].v)/4+
 404                                   (p2[x*2+2].v + p2[x*2+1+2].v + p2[linex+x*2+2].v + p2[linex+x*2+1+2].v)/4);
 405                     bb->u[uv] /= 4;
 406                     bb->v[uv] /= 4;
 407                     uv++;
 408                 }
 409             } else {
 410                 for(x=0;x<8;x++) {
 411                     bb->u[uv] /= 2;
 412                     bb->v[uv] /= 2;
 413                     uv++;
 414                 }
 415             }
 416         }
 417     }
 418 }
 419
 420 static void rgb2yuv(YUV*dest, RGBA*src, int dlinex, int slinex, int width, int height)
 421 {
 422     int x,y;
 423     for(y=0;y<height;y++) {
 424         for(x=0;x<width;x++) {
 425             int r,g,b;
 426             r = src[y*slinex+x].r;
 427             g = src[y*slinex+x].g;
 428             b = src[y*slinex+x].b;
 429             /*dest[y*dlinex+x].y = (r*0.299 + g*0.587 + b*0.114);
 430             dest[y*dlinex+x].u = (r*-0.169 + g*-0.332 + b*0.500 + 128.0);
 431             dest[y*dlinex+x].v = (r*0.500 + g*-0.419 + b*-0.0813 + 128.0);*/
 432
 433             //dest[y*dlinex+x].y = 128;//(r*((int)( 0.299*256)) + g*((int)( 0.587*256)) + b*((int)( 0.114 *256)))>>8;
 434             dest[y*dlinex+x].y = (r*((int)( 0.299*256)) + g*((int)( 0.587*256)) + b*((int)( 0.114 *256)))>>8;
 435             dest[y*dlinex+x].u = (r*((int)(-0.169*256)) + g*((int)(-0.332*256)) + b*((int)( 0.500 *256))+ 128*256)>>8;
 436             dest[y*dlinex+x].v = (r*((int)( 0.500*256)) + g*((int)(-0.419*256)) + b*((int)(-0.0813*256))+ 128*256)>>8;
 437         }
 438     }
 439 }
 440
 441 static void copyregion(VIDEOSTREAM*s, YUV*dest, YUV*src, int bx, int by)
 442 {
 443     YUV*p1 = &src[by*s->linex*16+bx*16];
 444     YUV*p2 = &dest[by*s->linex*16+bx*16];
 445     int y;
 446     for(y=0;y<16;y++) {
 447         memcpy(p1, p2, 16*sizeof(YUV));
 448         p1+=s->linex;p2+=s->linex;
 449     }
 450 }
 451
 452 static void yuv2rgb(RGBA*dest, YUV*src, int linex, int width, int height)
 453 {
 454     int x,y;
 455     for(y=0;y<height;y++) {
 456         for(x=0;x<width;x++) {
 457             int u,v,yy;
 458             u = src[y*linex+x].u;
 459             v = src[y*linex+x].v;
 460             yy = src[y*linex+x].y;
 461             dest[y*linex+x].r = truncate256(yy + ((360*(v-128))>>8));
 462             dest[y*linex+x].g = truncate256(yy - ((88*(u-128)+183*(v-128))>>8));
 463             dest[y*linex+x].b = truncate256(yy + ((455 * (u-128))>>8));
 464         }
 465     }
 466 }
 467 static void copyblock(VIDEOSTREAM*s, YUV*dest, block_t*b, int bx, int by)
 468 {
 469     YUV*p1 = &dest[(by*16)*s->linex+bx*16];
 470     YUV*p2 = &dest[(by*16+8)*s->linex+bx*16];
 471     int x,y;
 472     for(y=0;y<8;y++) {
 473         for(x=0;x<8;x++) {
 474             int u,v,yy;
 475             p1[x+0].u = b->u[(y/2)*8+(x/2)];
 476             p1[x+0].v = b->v[(y/2)*8+(x/2)];
 477             p1[x+0].y = b->y1[y*8+x];
 478             p1[x+8].u = b->u[(y/2)*8+(x/2)+4];
 479             p1[x+8].v = b->v[(y/2)*8+(x/2)+4];
 480             p1[x+8].y = b->y2[y*8+x];
 481             p2[x+0].u = b->u[(y/2+4)*8+(x/2)];
 482             p2[x+0].v = b->v[(y/2+4)*8+(x/2)];
 483             p2[x+0].y = b->y3[y*8+x];
 484             p2[x+8].u = b->u[(y/2+4)*8+(x/2)+4];
 485             p2[x+8].v = b->v[(y/2+4)*8+(x/2)+4];
 486             p2[x+8].y = b->y4[y*8+x];
 487         }
 488         p1+=s->linex;
 489         p2+=s->linex;
 490     }
 491 }
 492
 493 static int compare_pic_oldpic(VIDEOSTREAM*s, int bx, int by)
 494 {
 495     int linex = s->width;
 496     YUV*p1 = &s->current[by*linex*16+bx*16];
 497     YUV*p2 = &s->oldpic[by*linex*16+bx*16];
 498     int diffy=0, diffuv = 0;
 499     int x,y;
 500     for(y=0;y<16;y++) {
 501         for(x=0;x<16;x++) {
 502             YUV*m = &p1[x];
 503             YUV*n = &p2[x];
 504             int y = m->y - n->y;
 505             int u = m->u - n->u;
 506             int v = m->v - n->v;
 507             diffy += abs(y);
 508             diffuv += abs(u)+abs(v);
 509         }
 510         p1+=linex;
 511         p2+=linex;
 512     }
 513     return diffy + diffuv/4;
 514 }
 515
 516 static int compare_pic_block(VIDEOSTREAM*s, block_t* b, int bx, int by)
 517 {
 518     int linex = s->width;
 519     YUV*y1 = &s->current[(by*2)*linex*8+bx*16];
 520     YUV*y2 = &s->current[(by*2)*linex*8+bx*16+8];
 521     YUV*y3 = &s->current[(by*2+1)*linex*8+bx*16];
 522     YUV*y4 = &s->current[(by*2+1)*linex*8+bx*16+8];
 523     YUV*uv = y1;
 524     int diffy=0, diffuv = 0;
 525     int x,y;
 526     for(y=0;y<8;y++) {
 527         for(x=0;x<8;x++) {
 528             int yy,u,v;
 529             int y8x = y*8+x;
 530             yy = y1[x].y - b->y1[y8x];
 531             diffy += abs(yy);
 532             yy = y2[x].y - b->y2[y8x];
 533             diffy += abs(yy);
 534             yy = y3[x].y - b->y3[y8x];
 535             diffy += abs(yy);
 536             yy = y4[x].y - b->y4[y8x];
 537             diffy += abs(yy);
 538             u = uv[x*2].u - b->u[y8x];
 539             v = uv[x*2].v - b->v[y8x];
 540             diffuv += (abs(u)+abs(v))*4;
 541         }
 542         y1+=linex;
 543         y2+=linex;
 544         y3+=linex;
 545         y4+=linex;
 546         uv+=linex*2;
 547     }
 548     return diffy + diffuv/4;
 549 }
 550
 551 static inline int valtodc(int val)
 552 {
 553     assert(val>=0);
 554
 555     /* table 12/h.263 */
 556
 557     //val+=4; //round
 558     val/=8;
 559     /* TODO: what to do for zero values? skip the block? */
 560     if(val==0)
 561         return 1;
 562     if(val==128)
 563         return 255;
 564     if(val>254)
 565         return 254;
 566     return val;
 567 }
 568 static int dctoval(int dc)
 569 {
 570     int val;
 571     assert(dc>0);
 572     assert(dc!=128);
 573     assert(dc<256);
 574     /* table 12/h.263 */
 575     val = dc*8;
 576     if(val == 255*8)
 577         val = 128*8;
 578     return val;
 579 }
 580
 581 static int codehuffman(TAG*tag, struct huffcode*table, int index)
 582 {
 583     /* TODO: !optimize! */
 584     int i=0;
 585     while(table[index].code[i]) {
 586         if(table[index].code[i]=='0')
 587             swf_SetBits(tag, 0, 1);
 588         else
 589             swf_SetBits(tag, 1, 1);
 590         i++;
 591     }
 592     return i;
 593 }
 594
 595 static void quantize8x8(int*src, int*dest, int has_dc, int quant)
 596 {
 597     int t,pos=0;
 598     double q = 1.0/(quant*2);
 599     if(has_dc) {
 600         dest[0] = valtodc((int)src[0]); /*DC*/
 601         pos++;
 602     }
 603     for(t=pos;t<64;t++)
 604     {
 605         //dest[t] = (int)src[t];
 606     /* exact: if(quant&1){dest[t] = (dest[t]/quant - 1)/2;}else{dest[t] = ((dest[t]+1)/quant - 1)/2;} */
 607         //if(quant&1){dest[t] = (dest[t]/quant - 1)/2;}else{dest[t] = ((dest[t]+1)/quant - 1)/2;}
 608         //dest[t] = dest[t]/(quant*2);
 609         dest[t] = (int)(src[t]*q);
 610         /* TODO: warn if this happens- the video will be buggy */
 611         if(dest[t]>127) dest[t]=127;
 612         if(dest[t]<-127) dest[t]=-127;
 613     }
 614 }
 615
 616 static void dequantize8x8(int*b, int has_dc, int quant)
 617 {
 618     int t,pos=0;
 619     if(has_dc) {
 620         b[0] = dctoval(b[0]); //DC
 621         pos++;
 622     }
 623     for(t=pos;t<64;t++) {
 624         if(b[t]) {
 625             int sign = 0;
 626             if(b[t]<0) {
 627                 b[t] = -b[t];
 628                 sign = 1;
 629             }
 630
 631             if(quant&1) {
 632                 b[t] = quant*(2*b[t]+1); //-7,8,24,40
 633             } else {
 634                 b[t] = quant*(2*b[t]+1)-1; //-8,7,23,39
 635             }
 636
 637             if(sign)
 638                 b[t] = -b[t];
 639         }
 640
 641         /* paragraph 6.2.2, "clipping of reconstruction levels": */
 642         if(b[t]>2047) b[t]=2047;
 643         if(b[t]<-2048) b[t]=-2048;
 644     }
 645 }
 646
 647 static int hascoef(int*b, int has_dc)
 648 {
 649     int t;
 650     int pos=0;
 651     if(has_dc)
 652         pos++;
 653     for(t=pos;t<64;t++) {
 654         if(b[t])
 655             return 1;
 656     }
 657     return 0;
 658 }
 659
 660 static int coefbits8x8(int*bb, int has_dc)
 661 {
 662     int t;
 663     int pos=0;
 664     int bits=0;
 665     int last;
 666
 667     if(has_dc) {
 668         bits+=8;
 669         pos++;
 670     }
 671     for(last=63;last>=pos;last--) {
 672         if(bb[last])
 673             break;
 674     }
 675     if(last < pos)
 676         return bits;
 677     while(1) {
 678         int run=0, level=0, islast=0,t;
 679         while(!bb[pos] && pos<last) {
 680             pos++;
 681             run++;
 682         }
 683         if(pos==last)
 684             islast=1;
 685         level=bb[pos];
 686         if(level<0) level=-level;
 687         assert(level);
 688         for(t=0;t<RLE_ESCAPE;t++) {
 689             if(rle_params[t].run == run &&
 690                rle_params[t].level == level &&
 691                rle_params[t].last == islast) {
 692                 bits += rle[t].len + 1;
 693                 break;
 694             }
 695         }
 696         if(t==RLE_ESCAPE) {
 697             bits += rle[RLE_ESCAPE].len + 1 + 6 + 8;
 698         }
 699         if(islast)
 700             break;
 701         pos++;
 702     }
 703     return bits;
 704 }
 705
 706 static int encode8x8(TAG*tag, int*bb, int has_dc, int has_tcoef)
 707 {
 708     int t;
 709     int pos=0;
 710     int bits=0;
 711
 712     if(has_dc) {
 713         swf_SetBits(tag, bb[0], 8);
 714         bits += 8;
 715         pos++;
 716     }
 717
 718     if(has_tcoef) {
 719         int last;
 720         /* determine last non-null coefficient */
 721         for(last=63;last>=pos;last--) {
 722             /* TODO: we could leave out small coefficients
 723                      after a certain point (32?) */
 724             if(bb[last])
 725                 break;
 726         }
 727         /* blocks without coefficients should not be included
 728            in the cbpy/cbpc patterns: */
 729         assert(bb[last]);
 730
 731         while(1) {
 732             int run=0;
 733             int level=0;
 734             int islast=0;
 735             int sign=0;
 736             int t;
 737             while(!bb[pos] && pos<last) {
 738                 pos++;
 739                 run++;
 740             }
 741             if(pos==last)
 742                 islast=1;
 743             level=bb[pos];
 744             assert(level);
 745             if(level<0) {
 746                 level = -level;
 747                 sign = 1;
 748             }
 749             for(t=0;t<RLE_ESCAPE;t++) {
 750                 /* TODO: lookup table */
 751                 if(rle_params[t].run == run &&
 752                    rle_params[t].level == level &&
 753                    rle_params[t].last == islast) {
 754                     bits += codehuffman(tag, rle, t);
 755                     swf_SetBits(tag, sign, 1);
 756                     bits += 1;
 757                     break;
 758                 }
 759             }
 760             if(t==RLE_ESCAPE) {
 761                 bits += codehuffman(tag, rle, RLE_ESCAPE);
 762                 level=bb[pos];
 763                 /* table 14/h.263 */
 764                 assert(level);
 765                 assert(level>=-127);
 766                 assert(level<=127);
 767
 768                 swf_SetBits(tag, islast, 1);
 769                 swf_SetBits(tag, run, 6);
 770                 swf_SetBits(tag, level, 8); //FIXME: fixme??
 771                 bits += 1 + 6 + 8;
 772             }
 773
 774             if(islast)
 775                 break;
 776             pos++;
 777         }
 778     }
 779     return bits;
 780 }
 781
 782 static void quantize(block_t*fb, block_t*b, int has_dc, int quant)
 783 {
 784     quantize8x8(fb->y1, b->y1, has_dc, quant);
 785     quantize8x8(fb->y2, b->y2, has_dc, quant);
 786     quantize8x8(fb->y3, b->y3, has_dc, quant);
 787     quantize8x8(fb->y4, b->y4, has_dc, quant);
 788     quantize8x8(fb->u, b->u, has_dc, quant);
 789     quantize8x8(fb->v, b->v, has_dc, quant);
 790 }
 791
 792 static void dodct(block_t*fb)
 793 {
 794     dct(fb->y1); dct(fb->y2); dct(fb->y3); dct(fb->y4);
 795     dct(fb->u);  dct(fb->v);
 796     zigzag(fb->y1);
 797     zigzag(fb->y2);
 798     zigzag(fb->y3);
 799     zigzag(fb->y4);
 800     zigzag(fb->u);
 801     zigzag(fb->v);
 802 }
 803 static void dodctandquant(block_t*fb, block_t*b, int has_dc, int quant)
 804 {
 805     int t;
 806     if(has_dc) {
 807         dodct(fb);
 808         quantize(fb,b,has_dc,quant);
 809         return;
 810     }
 811     preparequant(quant);
 812     dct2(fb->y1,b->y1); dct2(fb->y2,b->y2); dct2(fb->y3,b->y3); dct2(fb->y4,b->y4);
 813     dct2(fb->u,b->u);  dct2(fb->v,b->v);
 814 }
 815
 816 static void doidct(block_t*b)
 817 {
 818     block_t fb;
 819     int t;
 820     for(t=0;t<64;t++) {
 821         fb.y1[t] = b->y1[zigzagtable[t]];
 822         fb.y2[t] = b->y2[zigzagtable[t]];
 823         fb.y3[t] = b->y3[zigzagtable[t]];
 824         fb.y4[t] = b->y4[zigzagtable[t]];
 825         fb.u[t] = b->u[zigzagtable[t]];
 826         fb.v[t] = b->v[zigzagtable[t]];
 827     }
 828     idct(fb.y1); idct(fb.y2); idct(fb.y3); idct(fb.y4);
 829     idct(fb.u);  idct(fb.v);
 830     for(t=0;t<64;t++) {
 831         b->y1[t] = fb.y1[t];
 832         b->y2[t] = fb.y2[t];
 833         b->y3[t] = fb.y3[t];
 834         b->y4[t] = fb.y4[t];
 835         b->u[t] = fb.u[t];
 836         b->v[t] = fb.v[t];
 837     }
 838 }
 839 static void truncateblock(block_t*b)
 840 {
 841     int t;
 842     for(t=0;t<64;t++) {
 843         b->y1[t] = truncate256(b->y1[t]);
 844         b->y2[t] = truncate256(b->y2[t]);
 845         b->y3[t] = truncate256(b->y3[t]);
 846         b->y4[t] = truncate256(b->y4[t]);
 847         b->u[t] = truncate256(b->u[t]);
 848         b->v[t] = truncate256(b->v[t]);
 849     }
 850 }
 851
 852 static void dequantize(block_t*b, int has_dc, int quant)
 853 {
 854     dequantize8x8(b->y1, has_dc, quant);
 855     dequantize8x8(b->y2, has_dc, quant);
 856     dequantize8x8(b->y3, has_dc, quant);
 857     dequantize8x8(b->y4, has_dc, quant);
 858     dequantize8x8(b->u, has_dc, quant);
 859     dequantize8x8(b->v, has_dc, quant);
 860 }
 861
 862 static void getblockpatterns(block_t*b, int*cbpybits,int*cbpcbits, int has_dc)
 863 {
 864     *cbpybits = 0;
 865     *cbpcbits = 0;
 866
 867     *cbpybits|=hascoef(b->y1, has_dc)*8;
 868     *cbpybits|=hascoef(b->y2, has_dc)*4;
 869     *cbpybits|=hascoef(b->y3, has_dc)*2;
 870     *cbpybits|=hascoef(b->y4, has_dc)*1;
 871
 872     *cbpcbits|=hascoef(b->u, has_dc)*2;
 873     *cbpcbits|=hascoef(b->v, has_dc)*1;
 874 }
 875
 876 static void setQuant(TAG*tag, int dquant)
 877 {
 878     int code = 0;
 879     /* 00 01 10 11
 880        -1 -2 +1 +2
 881     */
 882     if(dquant == -1) {
 883         swf_SetBits(tag, 0x0, 2);
 884     } else if(dquant == -2) {
 885         swf_SetBits(tag, 0x1, 2);
 886     } else if(dquant == +1) {
 887         swf_SetBits(tag, 0x2, 2);
 888     } else if(dquant == +2) {
 889         swf_SetBits(tag, 0x3, 2);
 890     } else {
 891         assert(0*strlen("invalid dquant"));
 892     }
 893 }
 894
 895 static void change_quant(int quant, int*dquant)
 896 {
 897     /* TODO */
 898     *dquant = 0;
 899 }
 900
 901 static void yuvdiff(block_t*a, block_t*b)
 902 {
 903     int t;
 904     for(t=0;t<64;t++) {
 905         a->y1[t] = (a->y1[t] - b->y1[t]);
 906         a->y2[t] = (a->y2[t] - b->y2[t]);
 907         a->y3[t] = (a->y3[t] - b->y3[t]);
 908         a->y4[t] = (a->y4[t] - b->y4[t]);
 909         a->u[t]  = (a->u[t] - b->u[t]);
 910         a->v[t]  = (a->v[t] - b->v[t]);
 911     }
 912 }
 913
 914 static void predictmvd(VIDEOSTREAM*s, int bx, int by, int*px, int*py)
 915 {
 916     int i1,i2;
 917     int x1,y1,x2,y2,x3,y3;
 918     int x4,y4,p;
 919     if(bx) {x1=s->mvdx[by*s->bbx+bx-1];
 920             y1=s->mvdy[by*s->bbx+bx-1];
 921     } else {x1=y1=0;}
 922
 923     if(by) {x2=s->mvdx[(by-1)*s->bbx+bx];
 924             y2=s->mvdy[(by-1)*s->bbx+bx];
 925             if(bx<s->bbx-1) {
 926                 x3=s->mvdx[(by-1)*s->bbx+bx+1];
 927                 y3=s->mvdy[(by-1)*s->bbx+bx+1];
 928             } else {
 929                 x3=y3=0;
 930             }
 931            }
 932     else   {x2=x3=x1;y2=y3=y1;}
 933
 934            if((x1 <= x2 && x2 <= x3) ||
 935               (x3 <= x2 && x2 <= x1)) {
 936         x4=x2;
 937     } else if((x2 <= x1 && x1 <= x3) ||
 938               (x3 <= x1 && x1 <= x2)) {
 939         x4=x1;
 940     } else if((x1 <= x3 && x3 <= x2) ||
 941               (x2 <= x3 && x3 <= x1)) {
 942         x4=x3;
 943     } else {
 944         x4=0;
 945         assert(x4);
 946     }
 947
 948            if((y1 <= y2 && y2 <= y3) ||
 949               (y3 <= y2 && y2 <= y1)) {
 950         y4=y2;
 951     } else if((y2 <= y1 && y1 <= y3) ||
 952               (y3 <= y1 && y1 <= y2)) {
 953         y4=y1;
 954     } else if((y1 <= y3 && y3 <= y2) ||
 955               (y2 <= y3 && y3 <= y1)) {
 956         y4=y3;
 957     } else {
 958         y4=0;
 959         assert(y4);
 960     }
 961
 962     *px = x4;
 963     *py = y4;
 964     assert((x4>=-32 && x4<=31) && (y4>=-32 && y4<=31));
 965 }
 966
 967 static inline int mvd2index(int px, int py, int x, int y, int xy)
 968 {
 969     assert((x>=-32 && x<=31) && (y>=-32 && y<=31));
 970     //assert((x&1)==0 && (y&1)==0);//for now
 971     //assert((x&2)==0 && (y&2)==0);//for now(2)
 972
 973     x-=px;
 974     y-=py;
 975
 976     if(xy)
 977         x=y;
 978     x+=32;
 979
 980     /* (x&63) */
 981     if(x>63)
 982         x-=64;
 983     if(x<0)
 984         x+=64;
 985
 986     assert(x>=0 && x<64);
 987     return x;
 988 }
 989
 990 typedef struct _iblockdata_t
 991 {
 992     block_t b_i; //transformed quantized coefficients
 993     block_t reconstruction;
 994     int bits;
 995     int bx,by;
 996 } iblockdata_t;
 997
 998 typedef struct _mvdblockdata_t
 999 {
1000     block_t b_vxy;
1001     block_t fbold_vxy;
1002     block_t reconstruction;
1003     int predictmvdx;
1004     int predictmvdy;
1005     int x_vxy;
1006     int y_vxy;
1007     int bits;
1008     int bx,by;
1009 } mvdblockdata_t;
1010
1011 void prepareIBlock(VIDEOSTREAM*s, iblockdata_t*data, int bx, int by, block_t* fb, int*bits)
1012 {
1013     /* consider I-block */
1014     block_t fb_i;
1015     block_t b;
1016     int y,c;
1017     data->bx = bx;
1018     data->by = by;
1019
1020     memcpy(&fb_i, fb, sizeof(block_t));
1021     dodctandquant(&fb_i, &data->b_i, 1, s->quant);
1022     getblockpatterns(&data->b_i, &y, &c, 1);
1023     *bits = 1; //cod
1024     *bits += mcbpc_inter[3*4+c].len;
1025     *bits += cbpy[y].len;
1026     *bits += coefbits8x8(data->b_i.y1, 1);
1027     *bits += coefbits8x8(data->b_i.y2, 1);
1028     *bits += coefbits8x8(data->b_i.y3, 1);
1029     *bits += coefbits8x8(data->b_i.y4, 1);
1030     *bits += coefbits8x8(data->b_i.u, 1);
1031     *bits += coefbits8x8(data->b_i.v, 1);
1032     data->bits = *bits;
1033
1034     /* -- reconstruction -- */
1035     memcpy(&data->reconstruction,&data->b_i,sizeof(block_t));
1036     dequantize(&data->reconstruction, 1, s->quant);
1037     doidct(&data->reconstruction);
1038     truncateblock(&data->reconstruction);
1039 }
1040
1041 int writeIBlock(VIDEOSTREAM*s, TAG*tag, iblockdata_t*data)
1042 {
1043     int cbpcbits = 0, cbpybits=0;
1044     int mode = 3; /* i block (mode=3) */
1045     int has_dc=1;
1046     int bits = 0;
1047     block_t b;
1048
1049     getblockpatterns(&data->b_i, &cbpybits, &cbpcbits, has_dc);
1050     swf_SetBits(tag,0,1); bits += 1; // COD
1051     bits += codehuffman(tag, mcbpc_inter, mode*4+cbpcbits);
1052     bits += codehuffman(tag, cbpy, cbpybits);
1053
1054     /* luminance */
1055     bits += encode8x8(tag, data->b_i.y1, has_dc, cbpybits&8);
1056     bits += encode8x8(tag, data->b_i.y2, has_dc, cbpybits&4);
1057     bits += encode8x8(tag, data->b_i.y3, has_dc, cbpybits&2);
1058     bits += encode8x8(tag, data->b_i.y4, has_dc, cbpybits&1);
1059
1060     /* chrominance */
1061     bits += encode8x8(tag, data->b_i.u, has_dc, cbpcbits&2);
1062     bits += encode8x8(tag, data->b_i.v, has_dc, cbpcbits&1);
1063
1064     copyblock(s, s->current, &data->reconstruction, data->bx, data->by);
1065     assert(data->bits == bits);
1066     return bits;
1067 }
1068
1069 void prepareMVDBlock(VIDEOSTREAM*s, mvdblockdata_t*data, int bx, int by, block_t* fb, int*bits)
1070 { /* consider mvd(x,y)-block */
1071
1072     int t;
1073     int y,c;
1074     block_t fbdiff;
1075
1076     data->bx = bx;
1077     data->by = by;
1078     predictmvd(s,bx,by,&data->predictmvdx,&data->predictmvdy);
1079
1080     data->bits = 65535;
1081     data->x_vxy=0;
1082     data->y_vxy=0;
1083
1084
1085     if(s->do_motion) {
1086         int hx,hy;
1087         int bestx=0,besty=0,bestbits=65536;
1088         int startx=-32,endx=31;
1089         int starty=-32,endy=31;
1090
1091         if(!bx) startx=0;
1092         if(!by) starty=0;
1093         if(bx==s->bbx-1) endx=0;
1094         if(by==s->bby-1) endy=0;
1095
1096         for(hx=startx;hx<=endx;hx+=1)
1097         for(hy=starty;hy<=endy;hy+=1)
1098         {
1099             block_t b;
1100             block_t fbold;
1101             int bits = 0;
1102             memcpy(&fbdiff, fb, sizeof(block_t));
1103             getmvdregion(&fbold, s->oldpic, bx, by, hx, hy, s->linex);
1104             yuvdiff(&fbdiff, &fbold);
1105             dodctandquant(&fbdiff, &b, 0, s->quant);
1106             bits += coefbits8x8(b.y1, 0);
1107             bits += coefbits8x8(b.y2, 0);
1108             bits += coefbits8x8(b.y3, 0);
1109             bits += coefbits8x8(b.y4, 0);
1110             bits += coefbits8x8(b.u, 0);
1111             bits += coefbits8x8(b.v, 0);
1112             if(bits<bestbits) {
1113                 bestbits = bits;
1114                 bestx = hx;
1115                 besty = hy;
1116             }
1117         }
1118         data->x_vxy = bestx;
1119         data->y_vxy = besty;
1120     }
1121
1122     memcpy(&fbdiff, fb, sizeof(block_t));
1123     getmvdregion(&data->fbold_vxy, s->oldpic, bx, by, data->x_vxy, data->y_vxy, s->linex);
1124     yuvdiff(&fbdiff, &data->fbold_vxy);
1125     dodctandquant(&fbdiff, &data->b_vxy, 0, s->quant);
1126     getblockpatterns(&data->b_vxy, &y, &c, 0);
1127
1128     *bits = 1; //cod
1129     *bits += mcbpc_inter[0*4+c].len;
1130     *bits += cbpy[y^15].len;
1131     *bits += mvd[mvd2index(data->predictmvdx, data->predictmvdy, data->x_vxy, data->y_vxy, 0)].len; // (0,0)
1132     *bits += mvd[mvd2index(data->predictmvdx, data->predictmvdy, data->x_vxy, data->y_vxy, 1)].len;
1133     *bits += coefbits8x8(data->b_vxy.y1, 0);
1134     *bits += coefbits8x8(data->b_vxy.y2, 0);
1135     *bits += coefbits8x8(data->b_vxy.y3, 0);
1136     *bits += coefbits8x8(data->b_vxy.y4, 0);
1137     *bits += coefbits8x8(data->b_vxy.u, 0);
1138     *bits += coefbits8x8(data->b_vxy.v, 0);
1139     data->bits = *bits;
1140
1141     /* -- reconstruction -- */
1142     memcpy(&data->reconstruction, &data->b_vxy, sizeof(block_t));
1143     dequantize(&data->reconstruction, 0, s->quant);
1144     doidct(&data->reconstruction);
1145     for(t=0;t<64;t++) {
1146         data->reconstruction.y1[t] = truncate256(data->reconstruction.y1[t] + (int)data->fbold_vxy.y1[t]);
1147         data->reconstruction.y2[t] = truncate256(data->reconstruction.y2[t] + (int)data->fbold_vxy.y2[t]);
1148         data->reconstruction.y3[t] = truncate256(data->reconstruction.y3[t] + (int)data->fbold_vxy.y3[t]);
1149         data->reconstruction.y4[t] = truncate256(data->reconstruction.y4[t] + (int)data->fbold_vxy.y4[t]);
1150         data->reconstruction.u[t] = truncate256(data->reconstruction.u[t] + (int)data->fbold_vxy.u[t]);
1151         data->reconstruction.v[t] = truncate256(data->reconstruction.v[t] + (int)data->fbold_vxy.v[t]);
1152     }
1153 }
1154
1155 int writeMVDBlock(VIDEOSTREAM*s, TAG*tag, mvdblockdata_t*data)
1156 {
1157     int c = 0, y = 0;
1158     /* mvd (0,0) block (mode=0) */
1159     int t;
1160     int has_dc=0; // mvd w/o mvd24
1161     int mode = 0;
1162     int bx = data->bx;
1163     int by = data->by;
1164     int bits = 0;
1165
1166     getblockpatterns(&data->b_vxy, &y, &c, has_dc);
1167     swf_SetBits(tag,0,1); bits += 1; // COD
1168     bits += codehuffman(tag, mcbpc_inter, mode*4+c);
1169     bits += codehuffman(tag, cbpy, y^15);
1170
1171     /* vector */
1172     bits += codehuffman(tag, mvd, mvd2index(data->predictmvdx, data->predictmvdy, data->x_vxy, data->y_vxy, 0));
1173     bits += codehuffman(tag, mvd, mvd2index(data->predictmvdx, data->predictmvdy, data->x_vxy, data->y_vxy, 1));
1174     s->mvdx[by*s->bbx+bx] = data->x_vxy;
1175     s->mvdy[by*s->bbx+bx] = data->y_vxy;
1176
1177     /* luminance */
1178     bits += encode8x8(tag, data->b_vxy.y1, has_dc, y&8);
1179     bits += encode8x8(tag, data->b_vxy.y2, has_dc, y&4);
1180     bits += encode8x8(tag, data->b_vxy.y3, has_dc, y&2);
1181     bits += encode8x8(tag, data->b_vxy.y4, has_dc, y&1);
1182
1183     /* chrominance */
1184     bits += encode8x8(tag, data->b_vxy.u, has_dc, c&2);
1185     bits += encode8x8(tag, data->b_vxy.v, has_dc, c&1);
1186
1187     copyblock(s, s->current, &data->reconstruction, data->bx, data->by);
1188     assert(data->bits == bits);
1189     return bits;
1190 }
1191
1192
1193 /* should be called encode_PFrameBlock */
1194 static int encode_blockP(TAG*tag, VIDEOSTREAM*s, int bx, int by)
1195 {
1196     block_t fb;
1197     int diff1,diff2;
1198     int bits_i;
1199     int bits_vxy;
1200
1201     iblockdata_t iblock;
1202     mvdblockdata_t mvdblock;
1203
1204     getregion(&fb, s->current, bx, by, s->width);
1205     prepareIBlock(s, &iblock, bx, by, &fb, &bits_i);
1206
1207     /* encoded last frame <=> original current block: */
1208     diff1 = compare_pic_oldpic(s, bx, by);
1209     /* encoded current frame <=> original current block: */
1210     diff2 = compare_pic_block(s, &iblock.reconstruction, bx, by);
1211
1212     if(diff1 <= diff2) {
1213         swf_SetBits(tag, 1,1); /* cod=1, block skipped */
1214         /* copy the region from the last frame so that we have a complete reconstruction */
1215         copyregion(s, s->current, s->oldpic, bx, by);
1216         return 1;
1217     }
1218     prepareMVDBlock(s, &mvdblock, bx, by, &fb, &bits_vxy);
1219
1220     if(bits_i > bits_vxy) {
1221         return writeMVDBlock(s, tag, &mvdblock);
1222     } else {
1223         return writeIBlock(s, tag, &iblock);
1224     }
1225 }
1226
1227 /* should be called encode_IFrameBlock */
1228 static void encode_blockI(TAG*tag, VIDEOSTREAM*s, int bx, int by)
1229 {
1230     block_t fb;
1231     block_t b;
1232     int dquant=0;
1233     int c = 0, y=0;
1234
1235     getregion(&fb, s->current, bx, by, s->width);
1236
1237     change_quant(s->quant, &dquant);
1238     s->quant+=dquant;
1239
1240     dodctandquant(&fb, &b, 1, s->quant);
1241
1242     getblockpatterns(&b, &y, &c, 1);
1243
1244     if(dquant) {
1245         codehuffman(tag, mcbpc_intra, 4+c);
1246     } else {
1247         codehuffman(tag, mcbpc_intra, 0+c);
1248     }
1249
1250     codehuffman(tag, cbpy, y);
1251
1252     if(dquant) {
1253         setQuant(tag, dquant);
1254     }
1255
1256     /* luminance */
1257     encode8x8(tag, b.y1, 1, y&8);
1258     encode8x8(tag, b.y2, 1, y&4);
1259     encode8x8(tag, b.y3, 1, y&2);
1260     encode8x8(tag, b.y4, 1, y&1);
1261
1262     /* chrominance */
1263     encode8x8(tag, b.u, 1, c&2);
1264     encode8x8(tag, b.v, 1, c&1);
1265
1266     /* reconstruct */
1267     dequantize(&b, 1, s->quant);
1268     doidct(&b);
1269     truncateblock(&b);
1270     copyblock(s, s->current, &b, bx, by);
1271 }
1272
1273 /*static void encode_blockI(TAG*tag, VIDEOSTREAM*s, int bx, int by)
1274 {
1275     block_t fb;
1276     block_t b;
1277     iblockdata_t data;
1278     int bits, quality;
1279     int dquant = 0;
1280     int cbpcbits = 0, cbpybits = 0;
1281
1282     getregion(&fb, s->current, bx, by, s->width);
1283     prepareIBlock(s, &data, bx, by, &fb, &bits, &quality);
1284
1285     getblockpatterns(&data.b_i, &cbpybits, &cbpcbits, has_dc);
1286
1287     if(dquant) {
1288         codehuffman(tag, mcbpc_intra, 4+cbpcbits);
1289     } else {
1290         codehuffman(tag, mcbpc_intra, 0+cbpcbits);
1291     }
1292
1293     codehuffman(tag, cbpy, cbpybits);
1294
1295     if(dquant) {
1296         setQuant(tag, dquant);
1297     }
1298
1299     // luminance
1300     encode8x8(tag, b.y1, 1, cbpybits&8);
1301     encode8x8(tag, b.y2, 1, cbpybits&4);
1302     encode8x8(tag, b.y3, 1, cbpybits&2);
1303     encode8x8(tag, b.y4, 1, cbpybits&1);
1304
1305     // chrominance
1306     encode8x8(tag, b.u, 1, cbpcbits&2);
1307     encode8x8(tag, b.v, 1, cbpcbits&1);
1308
1309     copyblock(s, s->current, &data->reconstruction, data->bx, data->by);
1310 }*/
1311
1312 static int bmid = 0;
1313 void setdbgpic(TAG*tag, RGBA*pic, int width, int height)
1314 {
1315     MATRIX m;
1316     tag = tag->prev;
1317
1318     tag = swf_InsertTag(tag,ST_REMOVEOBJECT2);
1319     swf_SetU16(tag, 133);
1320
1321     tag = swf_InsertTag(tag, ST_DEFINEBITSLOSSLESS);
1322     swf_SetU16(tag, 1000+bmid);
1323     swf_SetLosslessBits(tag, width, height, (void*)pic, BMF_32BIT);
1324
1325     tag = swf_InsertTag(tag, ST_DEFINESHAPE);
1326     swf_SetU16(tag, 2000+bmid);
1327     swf_ShapeSetBitmapRect(tag, 1000+bmid, width, height);
1328
1329     tag = swf_InsertTag(tag,ST_PLACEOBJECT2);
1330     swf_GetMatrix(0,&m);
1331     m.tx = width*20;
1332     swf_ObjectPlace(tag, 2000+bmid, 133, &m, 0, 0);
1333
1334     bmid++;
1335 }
1336
1337 #define TYPE_IFRAME 0
1338 #define TYPE_PFRAME 1
1339
1340 static void writeHeader(TAG*tag, int width, int height, int frame, int quant, int type)
1341 {
1342     U32 i32;
1343     swf_SetU16(tag, frame);
1344     swf_SetBits(tag, 1, 17); /* picture start code*/
1345     swf_SetBits(tag, 0, 5); /* version=0, version 1 would optimize rle behaviour*/
1346     swf_SetBits(tag, frame, 8); /* time reference */
1347
1348     /* write dimensions, taking advantage of some predefined sizes
1349        if the opportunity presents itself */
1350     i32 = width<<16|height;
1351     switch(i32)
1352     {
1353         case 352<<16|288: swf_SetBits(tag, 2, 3);break;
1354         case 176<<16|144: swf_SetBits(tag, 3, 3);break;
1355         case 128<<16|96: swf_SetBits(tag, 4, 3);break;
1356         case 320<<16|240: swf_SetBits(tag, 5, 3);break;
1357         case 160<<16|120: swf_SetBits(tag, 6, 3);break;
1358         default:
1359             if(width>255 || height>255) {
1360                 swf_SetBits(tag, 1, 3);
1361                 swf_SetBits(tag, width, 16);
1362                 swf_SetBits(tag, height, 16);
1363             } else {
1364                 swf_SetBits(tag, 0, 3);
1365                 swf_SetBits(tag, width, 8);
1366                 swf_SetBits(tag, height, 8);
1367             }
1368     }
1369
1370     swf_SetBits(tag, type, 2); /* I-Frame or P-Frame */
1371     swf_SetBits(tag, 0, 1); /* No deblock filter */
1372     assert(quant>0);
1373     swf_SetBits(tag, quant, 5); /* quantizer (1-31), may be updated later on*/
1374     swf_SetBits(tag, 0, 1); /* No extra info */
1375 }
1376
1377 void swf_SetVideoStreamIFrame(TAG*tag, VIDEOSTREAM*s, RGBA*pic, int quant)
1378 {
1379     int bx, by;
1380
1381     if(quant<1) quant=1;
1382     if(quant>31) quant=31;
1383     s->quant = quant;
1384
1385     writeHeader(tag, s->width, s->height, s->frame, quant, TYPE_IFRAME);
1386
1387     memset(s->current, 0, s->linex*s->height*sizeof(YUV));
1388     rgb2yuv(s->current, pic, s->linex, s->olinex, s->owidth, s->oheight);
1389
1390     //dostat(s);
1391
1392     for(by=0;by<s->bby;by++)
1393     {
1394         for(bx=0;bx<s->bbx;bx++)
1395         {
1396             encode_blockI(tag, s, bx, by);
1397         }
1398     }
1399     s->frame++;
1400     memcpy(s->oldpic, s->current, s->width*s->height*sizeof(YUV));
1401 }
1402
1403 void swf_SetVideoStreamPFrame(TAG*tag, VIDEOSTREAM*s, RGBA*pic, int quant)
1404 {
1405     int bx, by;
1406
1407     if(quant<1) quant=1;
1408     if(quant>31) quant=31;
1409     s->quant = quant;
1410
1411     writeHeader(tag, s->width, s->height, s->frame, quant, TYPE_PFRAME);
1412
1413     memset(s->current, 0, s->linex*s->height*sizeof(YUV));
1414     rgb2yuv(s->current, pic, s->linex, s->olinex, s->owidth, s->oheight);
1415     memset(s->mvdx, 0, s->bbx*s->bby*sizeof(int));
1416     memset(s->mvdy, 0, s->bbx*s->bby*sizeof(int));
1417
1418     for(by=0;by<s->bby;by++)
1419     {
1420         for(bx=0;bx<s->bbx;bx++)
1421         {
1422             encode_blockP(tag, s, bx, by);
1423         }
1424     }
1425     s->frame++;
1426     memcpy(s->oldpic, s->current, s->width*s->height*sizeof(YUV));
1427
1428 //#define PNG
1429 #ifdef MAIN
1430 #ifdef PNG
1431     yuv2rgb(pic, s->current, s->linex, s->width, s->height);
1432     setdbgpic(tag, pic, s->width, s->height);
1433 #endif
1434     if(s->frame == (int)totalframes-1)
1435     {
1436         int t;
1437         FILE*fi = fopen("test.ppm", "wb");
1438         fprintf(fi, "P6\n%d %d\n255\n", s->width, s->height);
1439         for(t=0;t<s->width*s->height;t++)
1440         {
1441             fwrite(&pic[t].r, 1, 1, fi);
1442             fwrite(&pic[t].g, 1, 1, fi);
1443             fwrite(&pic[t].b, 1, 1, fi);
1444         }
1445         fclose(fi);
1446     }
1447 #endif
1448 }
1449
1450 int uline[64],vline[64],yline[64];
1451 void swf_SetVideoStreamMover(TAG*tag, VIDEOSTREAM*s, int quant)
1452 {
1453     int bx, by;
1454
1455     if(quant<1) quant=1;
1456     if(quant>31) quant=31;
1457
1458     writeHeader(tag, s->width, s->height, s->frame, quant, TYPE_PFRAME);
1459
1460     memset(s->mvdx, 0, s->bbx*s->bby*sizeof(int));
1461     memset(s->mvdy, 0, s->bbx*s->bby*sizeof(int));
1462
1463     for(by=0;by<s->bby;by++)
1464     {
1465         for(bx=0;bx<s->bbx;bx++)
1466         {
1467             //if((lrand48()&255) || !(bx>8 && bx<24 && by>8 && by<24)) {
1468             if(!(by==31)) {
1469                 /* mvd (0,0) block (mode=0) */
1470                 int t;
1471                 int mode = 0; // mvd w/o mvd24
1472                 int has_dc = 0;
1473                 int cbpybits=0,cbpcbits=0;
1474                 int predictmvdx, predictmvdy;
1475                 //int mvx=-1+(2*(s->frame&1));
1476                 //int mvy=-1+((s->frame&2));
1477                 int mvx=0;//(lrand48()%4)-2;
1478                 int mvy=3;
1479
1480                 swf_SetBits(tag,0,1); // COD
1481                 codehuffman(tag, mcbpc_inter, mode*4+cbpcbits);
1482                 codehuffman(tag, cbpy, cbpybits^15);
1483
1484                 /* vector */
1485                 predictmvd(s,bx,by,&predictmvdx,&predictmvdy);
1486                 codehuffman(tag, mvd, mvd2index(predictmvdx, predictmvdy, mvx, mvy, 0));
1487                 codehuffman(tag, mvd, mvd2index(predictmvdx, predictmvdy, mvx, mvy, 1));
1488                 s->mvdx[by*s->bbx+bx] = mvx;
1489                 s->mvdy[by*s->bbx+bx] = mvy;
1490             } else {
1491                 /* i block (mode=3) */
1492                 int mode = 3;
1493                 int has_dc = 1;
1494                 int cbpybits,cbpcbits;
1495                 int t;
1496                 block_t b;
1497                 memset(&b, 0, sizeof(block_t));
1498                 b.y1[0] = b.y2[0] = b.y3[0] = b.y4[0] = yline[bx];
1499                 b.u[0] = uline[bx];
1500                 b.v[0] = vline[bx];
1501
1502                 getblockpatterns(&b, &cbpybits, &cbpcbits, has_dc);
1503                 swf_SetBits(tag,0,1); // COD
1504                 codehuffman(tag, mcbpc_inter, mode*4+cbpcbits);
1505                 codehuffman(tag, cbpy, cbpybits);
1506
1507                 /* luminance */
1508                 encode8x8(tag, b.y1, has_dc, cbpybits&8);
1509                 encode8x8(tag, b.y2, has_dc, cbpybits&4);
1510                 encode8x8(tag, b.y3, has_dc, cbpybits&2);
1511                 encode8x8(tag, b.y4, has_dc, cbpybits&1);
1512
1513                 /* chrominance */
1514                 encode8x8(tag, b.u, has_dc, cbpcbits&2);
1515                 encode8x8(tag, b.v, has_dc, cbpcbits&1);
1516             }
1517         }
1518     }
1519 }
1520
1521 #ifdef MAIN
1522 #include "png.h"
1523 int main(int argn, char*argv[])
1524 {
1525     int fi;
1526     int t;
1527     SWF swf;
1528     TAG * tag;
1529     RGBA* pic, *pic2, rgb;
1530     SWFPLACEOBJECT obj;
1531     int width = 0;
1532     int height = 0;
1533     int frames = 20;
1534     int framerate = 29;
1535     unsigned char*data;
1536     char* fname = "/home/kramm/pics/peppers.png";
1537     VIDEOSTREAM stream;
1538     double d = 1.0;
1539
1540     memset(&stream, 0, sizeof(stream));
1541
1542     getPNG(fname, &width, &height, &data);
1543     pic = (RGBA*)malloc(width*height*sizeof(RGBA));
1544     pic2 = (RGBA*)malloc(width*height*sizeof(RGBA));
1545     memcpy(pic, data, width*height*sizeof(RGBA));
1546     free(data);
1547
1548     printf("Compressing %s, size %dx%d\n", fname, width, height);
1549
1550     memset(&swf,0,sizeof(SWF));
1551     memset(&obj,0,sizeof(obj));
1552
1553     swf.fileVersion    = 6;
1554     swf.frameRate      = framerate*256;
1555     swf.movieSize.xmax = 20*width*2;
1556     swf.movieSize.ymax = 20*height-20*64;
1557
1558     swf.firstTag = swf_InsertTag(NULL,ST_SETBACKGROUNDCOLOR);
1559     tag = swf.firstTag;
1560     rgb.r = 0x00;rgb.g = 0x00;rgb.b = 0x00;
1561     swf_SetRGB(tag,&rgb);
1562
1563     tag = swf_InsertTag(tag, ST_DEFINEVIDEOSTREAM);
1564     swf_SetU16(tag, 33);
1565     swf_SetVideoStreamDefine(tag, &stream, frames, width, height);
1566     stream.do_motion = 0;
1567
1568     for(t=0;t<frames;t++)
1569     {
1570         int x,y;
1571         double xx,yy;
1572         for(y=0,yy=0;y<height;y++,yy+=d)  {
1573             RGBA*line = &pic[((int)yy)*width];
1574             for(x=0,xx=0;x<width;x++,xx+=d) {
1575                 pic2[y*width+x] = line[((int)xx)];
1576             }
1577         }
1578         printf("frame:%d\n", t);fflush(stdout);
1579
1580         tag = swf_InsertTag(tag, ST_VIDEOFRAME);
1581         swf_SetU16(tag, 33);
1582         if(t==0)
1583             swf_SetVideoStreamIFrame(tag, &stream, pic2, 9);
1584         else {
1585             swf_SetVideoStreamPFrame(tag, &stream, pic2, 9);
1586         }
1587
1588         tag = swf_InsertTag(tag, ST_PLACEOBJECT2);
1589         swf_GetPlaceObject(0, &obj);
1590         if(t==0) {
1591             obj.depth = 1;
1592             obj.id = 33;
1593         } else {
1594             obj.move = 1;
1595             obj.depth = 1;
1596             obj.ratio = t;
1597         }
1598         swf_SetPlaceObject(tag,&obj);
1599
1600         tag = swf_InsertTag(tag, ST_SHOWFRAME);
1601         d-=0.015;
1602     }
1603     swf_VideoStreamClear(&stream);
1604
1605     tag = swf_InsertTag(tag, ST_END);
1606
1607     fi = open("video3.swf", O_WRONLY|O_CREAT|O_TRUNC, 0644);
1608     if(swf_WriteSWC(fi,&swf)<0) {
1609         fprintf(stderr,"WriteSWF() failed.\n");
1610     }
1611     close(fi);
1612     swf_FreeTags(&swf);
1613     return 0;
1614 }
1615 #undef MAIN
1616 #endif