r1019: Fix crash in Ogg file handling.
[cinelerra/simeon] / mpeg2enc / transfrm.c
1 /* transfrm.c,  forward / inverse transformation                            */
2
3 /* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
4
5 /*
6  * Disclaimer of Warranty
7  *
8  * These software programs are available to the user without any license fee or
9  * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
10  * any and all warranties, whether express, implied, or statuary, including any
11  * implied warranties or merchantability or of fitness for a particular
12  * purpose.  In no event shall the copyright-holder be liable for any
13  * incidental, punitive, or consequential damages of any kind whatsoever
14  * arising from the use of these programs.
15  *
16  * This disclaimer of warranty extends to the user of these programs and user's
17  * customers, employees, agents, transferees, successors, and assigns.
18  *
19  * The MPEG Software Simulation Group does not represent or warrant that the
20  * programs furnished hereunder are free of infringement of any third-party
21  * patents.
22  *
23  * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
24  * are subject to royalty fees to patent holders.  Many of these patents are
25  * general enough such that they are unavoidable regardless of implementation
26  * design.
27  *
28  */
29
30 #include "config.h"
31 #include "global.h"
32 #include <stdio.h>
33 #include <math.h>
34 #include "cpu_accel.h"
35
36 #ifdef X86_CPU
37 extern void fdct_mmx( int16_t * blk );
38 extern void idct_mmx( int16_t * blk, unsigned char *temp );
39
40 void add_pred_mmx (uint8_t *pred, uint8_t *cur,
41                                    int lx, int16_t *blk);
42 void sub_pred_mmx (uint8_t *pred, uint8_t *cur,
43                                    int lx, int16_t *blk);
44 #endif
45
46 extern void fdct( int16_t *blk );
47 extern void idct( int16_t *blk, unsigned char *temp );
48
49
50
51 /* private prototypes*/
52 static void add_pred (uint8_t *pred, uint8_t *cur,
53                                           int lx, int16_t *blk);
54 static void sub_pred (uint8_t *pred, uint8_t *cur,
55                                           int lx, int16_t *blk);
56
57 /*
58   Pointers to version of transform and prediction manipulation
59   routines to be used..
60  */
61
62 static void (*pfdct)( int16_t * blk );
63 static void (*pidct)( int16_t * blk , unsigned char *temp);
64 static void (*padd_pred) (uint8_t *pred, uint8_t *cur,
65                                                   int lx, int16_t *blk);
66 static void (*psub_pred) (uint8_t *pred, uint8_t *cur,
67                                                   int lx, int16_t *blk);
68
69 /*
70   Initialise DCT transformation routines
71   Currently just activates MMX routines if available
72  */
73
74
75 void init_transform_hv()
76 {
77         int flags;
78         flags = cpu_accel();
79
80 #ifdef X86_CPU
81         if( (flags & ACCEL_X86_MMX) ) /* MMX CPU */
82         {
83                 if(verbose) fprintf( stderr, "SETTING MMX for TRANSFORM!\n");
84                 pfdct = fdct_mmx;
85                 pidct = idct_mmx;
86                 padd_pred = add_pred_mmx;
87                 psub_pred = sub_pred_mmx;
88         }
89         else
90 #endif
91         {
92                 pfdct = fdct;
93                 pidct = idct;
94                 padd_pred = add_pred;
95                 psub_pred = sub_pred;
96
97         }
98 }
99
100 /* add prediction and prediction error, saturate to 0...255 */
101 static void add_pred(unsigned char *pred,
102         unsigned char *cur,
103         int lx,
104         short *blk)
105 {
106         register int j;
107
108         for (j=0; j<8; j++)
109         {
110 /*
111  *      for (i=0; i<8; i++)
112  *        cur[i] = clp[blk[i] + pred[i]];
113  */
114         cur[0] = clp[blk[0] + pred[0]];
115         cur[1] = clp[blk[1] + pred[1]];
116         cur[2] = clp[blk[2] + pred[2]];
117         cur[3] = clp[blk[3] + pred[3]];
118         cur[4] = clp[blk[4] + pred[4]];
119         cur[5] = clp[blk[5] + pred[5]];
120         cur[6] = clp[blk[6] + pred[6]];
121         cur[7] = clp[blk[7] + pred[7]];
122  
123         blk += 8;
124         cur += lx;
125         pred += lx;
126         }
127 }
128
129 /* subtract prediction from block data */
130 static void sub_pred(unsigned char *pred,
131         unsigned char *cur,
132         int lx,
133         short *blk)
134 {
135         register int j;
136
137         for (j=0; j<8; j++)
138         {
139 /*
140  *      for (i=0; i<8; i++)
141  *              blk[i] = cur[i] - pred[i];
142  */
143         blk[0] = cur[0] - pred[0];
144         blk[1] = cur[1] - pred[1];
145         blk[2] = cur[2] - pred[2];
146         blk[3] = cur[3] - pred[3];
147         blk[4] = cur[4] - pred[4];
148         blk[5] = cur[5] - pred[5];
149         blk[6] = cur[6] - pred[6];
150         blk[7] = cur[7] - pred[7];
151
152         blk += 8;
153         cur += lx;
154         pred += lx;
155         }
156 }
157
158 void transform_engine_loop(transform_engine_t *engine)
159 {
160         while(!engine->done)
161         {
162                 pthread_mutex_lock(&(engine->input_lock));
163                 
164                 if(!engine->done)
165                 {
166                         pict_data_s *picture = engine->picture;
167                         uint8_t **pred = engine->pred;
168                         uint8_t **cur = engine->cur;
169                         mbinfo_s *mbi = picture->mbinfo;
170                         int16_t (*blocks)[64] = picture->blocks;
171                         int i, j, i1, j1, k, n, cc, offs, lx;
172
173                         k = (engine->start_row / 16) * (width / 16);
174
175                         for(j = engine->start_row; j < engine->end_row; j += 16)
176                         for(i = 0; i < width; i += 16)
177                         {
178                                         mbi[k].dctblocks = &blocks[k * block_count];
179
180                                 for(n = 0; n < block_count; n++)
181                                 {
182 /* color component index */
183                                         cc = (n < 4) ? 0 : (n & 1) + 1; 
184                                         if(cc == 0)
185                                         {
186 /* A.Stevens Jul 2000 Record dct blocks associated with macroblock */
187 /* We'll use this for quantisation calculations                    */
188 /* luminance */
189                                                         if ((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type)
190                                                         {
191 /* field DCT */
192                                                                 offs = i + ((n & 1) << 3) + width * (j + ((n & 2) >> 1));
193                                                                 lx = width << 1;
194                                                         }
195                                                         else
196                                                         {
197 /* frame DCT */
198                                                                 offs = i + ((n & 1) << 3) + width2 * (j + ((n & 2) << 2));
199                                                                 lx = width2;
200                                                         }
201
202                                                         if (picture->pict_struct == BOTTOM_FIELD)
203                                                                 offs += width;
204                                         }
205                                         else
206                                         {
207 /* chrominance */
208 /* scale coordinates */
209                                                 i1 = (chroma_format == CHROMA444) ? i : i >> 1;
210                                                 j1 = (chroma_format != CHROMA420) ? j : j >> 1;
211
212                                                 if ((picture->pict_struct==FRAME_PICTURE) && mbi[k].dct_type
213                                                 && (chroma_format!=CHROMA420))
214                                                 {
215 /* field DCT */
216                                                 offs = i1 + (n&8) + chrom_width*(j1+((n&2)>>1));
217                                                 lx = chrom_width<<1;
218                                                 }
219                                                 else
220                                                 {
221 /* frame DCT */
222                                                 offs = i1 + (n&8) + chrom_width2*(j1+((n&2)<<2));
223                                                 lx = chrom_width2;
224                                                 }
225
226                                                 if(picture->pict_struct==BOTTOM_FIELD)
227                                                 offs += chrom_width;
228                                         }
229
230                                                 (*psub_pred)(pred[cc]+offs,cur[cc]+offs,lx,
231                                                                          blocks[k*block_count+n]);
232                                                 (*pfdct)(blocks[k*block_count+n]);
233                                 }
234
235                                 k++;
236                         }
237                 }
238                 pthread_mutex_unlock(&(engine->output_lock));
239         }
240 }
241
242 /* subtract prediction and transform prediction error */
243 void transform(pict_data_s *picture,
244         uint8_t *pred[], uint8_t *cur[])
245 {
246         int i;
247 /* Start loop */
248         for(i = 0; i < processors; i++)
249         {
250                 transform_engines[i].picture = picture;
251                 transform_engines[i].pred = pred;
252                 transform_engines[i].cur = cur;
253                 pthread_mutex_unlock(&(transform_engines[i].input_lock));
254         }
255
256 /* Wait for completion */
257         for(i = 0; i < processors; i++)
258         {
259                 pthread_mutex_lock(&(transform_engines[i].output_lock));
260         }
261 }
262
263
264
265 void start_transform_engines()
266 {
267         int i;
268         int rows_per_processor = (int)((float)height2 / 16 / processors + 0.5);
269         int current_row = 0;
270         pthread_attr_t  attr;
271         pthread_mutexattr_t mutex_attr;
272
273         pthread_mutexattr_init(&mutex_attr);
274         pthread_attr_init(&attr);
275         transform_engines = calloc(1, sizeof(transform_engine_t) * processors);
276         for(i = 0; i < processors; i++)
277         {
278                 transform_engines[i].start_row = current_row * 16;
279                 current_row += rows_per_processor;
280                 if(current_row > height2 / 16) current_row = height2 / 16;
281                 transform_engines[i].end_row = current_row * 16;
282                 pthread_mutex_init(&(transform_engines[i].input_lock), &mutex_attr);
283                 pthread_mutex_lock(&(transform_engines[i].input_lock));
284                 pthread_mutex_init(&(transform_engines[i].output_lock), &mutex_attr);
285                 pthread_mutex_lock(&(transform_engines[i].output_lock));
286                 transform_engines[i].done = 0;
287                 pthread_create(&(transform_engines[i].tid), 
288                         &attr, 
289                         (void*)transform_engine_loop, 
290                         &transform_engines[i]);
291         }
292 }
293
294 void stop_transform_engines()
295 {
296         int i;
297         for(i = 0; i < processors; i++)
298         {
299                 transform_engines[i].done = 1;
300                 pthread_mutex_unlock(&(transform_engines[i].input_lock));
301                 pthread_join(transform_engines[i].tid, 0);
302                 pthread_mutex_destroy(&(transform_engines[i].input_lock));
303                 pthread_mutex_destroy(&(transform_engines[i].output_lock));
304         }
305         free(transform_engines);
306 }
307
308
309
310
311
312
313
314
315
316 /* inverse transform prediction error and add prediction */
317 void itransform_engine_loop(transform_engine_t *engine)
318 {
319         while(!engine->done)
320         {
321                 pthread_mutex_lock(&(engine->input_lock));
322
323                 if(!engine->done)
324                 {
325                         pict_data_s *picture = engine->picture;
326                         uint8_t **pred = engine->pred;
327                         uint8_t **cur = engine->cur;
328                         int i, j, i1, j1, k, n, cc, offs, lx;
329                 mbinfo_s *mbi = picture->mbinfo;
330 /* Its the quantised / inverse quantised blocks were interested in
331    for inverse transformation */
332                         int16_t (*blocks)[64] = picture->qblocks;
333
334                         k = (engine->start_row / 16) * (width / 16);
335
336                         for(j = engine->start_row; j < engine->end_row; j += 16)
337                                 for(i = 0; i < width; i += 16)
338                                 {
339                                         for(n = 0; n < block_count; n++)
340                                         {
341                                         cc = (n < 4) ? 0 : (n & 1) + 1; /* color component index */
342
343                                         if(cc == 0)
344                                         {
345 /* luminance */
346                                                 if((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type)
347                                                 {
348 /* field DCT */
349                                                         offs = i + ((n & 1) << 3) + width * (j + ((n & 2) >> 1));
350                                                         lx = width<<1;
351                                                 }
352                                                 else
353                                                 {
354 /* frame DCT */
355                                                         offs = i + ((n & 1) << 3) + width2 * (j + ((n & 2) << 2));
356                                                         lx = width2;
357                                                 }
358
359                                                 if(picture->pict_struct == BOTTOM_FIELD)
360                                                 offs += width;
361                                         }
362                                         else
363                                         {
364 /* chrominance */
365
366 /* scale coordinates */
367                                                 i1 = (chroma_format==CHROMA444) ? i : i>>1;
368                                                 j1 = (chroma_format!=CHROMA420) ? j : j>>1;
369
370                                                 if((picture->pict_struct == FRAME_PICTURE) && mbi[k].dct_type
371                                                         && (chroma_format != CHROMA420))
372                                                 {
373 /* field DCT */
374                                                         offs = i1 + (n & 8) + chrom_width * (j1 + ((n & 2) >> 1));
375                                                         lx = chrom_width << 1;
376                                                 }
377                                                 else
378                                                 {
379 /* frame DCT */
380                                                         offs = i1 + (n&8) + chrom_width2 * (j1 + ((n & 2) << 2));
381                                                         lx = chrom_width2;
382                                                 }
383
384                                                 if(picture->pict_struct == BOTTOM_FIELD)
385                                                         offs += chrom_width;
386                                     }
387
388 //pthread_mutex_lock(&test_lock);
389                                                 (*pidct)(blocks[k*block_count+n], engine->temp);
390                                                 (*padd_pred)(pred[cc]+offs,cur[cc]+offs,lx,blocks[k*block_count+n]);
391 //pthread_mutex_unlock(&test_lock);
392                                         }
393
394                                         k++;
395                                 }
396                 }
397                 pthread_mutex_unlock(&(engine->output_lock));
398         }
399 }
400
401 void itransform(pict_data_s *picture,
402         uint8_t *pred[], uint8_t *cur[])
403 {
404         int i;
405 /* Start loop */
406         for(i = 0; i < processors; i++)
407         {
408                 itransform_engines[i].picture = picture;
409                 itransform_engines[i].cur = cur;
410                 itransform_engines[i].pred = pred;
411                 pthread_mutex_unlock(&(itransform_engines[i].input_lock));
412         }
413
414 /* Wait for completion */
415         for(i = 0; i < processors; i++)
416         {
417                 pthread_mutex_lock(&(itransform_engines[i].output_lock));
418         }
419 }
420
421 void start_itransform_engines()
422 {
423         int i;
424         int rows_per_processor = (int)((float)height2 / 16 / processors + 0.5);
425         int current_row = 0;
426         pthread_attr_t  attr;
427         pthread_mutexattr_t mutex_attr;
428
429         pthread_mutexattr_init(&mutex_attr);
430         pthread_attr_init(&attr);
431         itransform_engines = calloc(1, sizeof(transform_engine_t) * processors);
432         for(i = 0; i < processors; i++)
433         {
434                 itransform_engines[i].start_row = current_row * 16;
435                 current_row += rows_per_processor;
436                 if(current_row > height2 / 16) current_row = height2 / 16;
437                 itransform_engines[i].end_row = current_row * 16;
438                 pthread_mutex_init(&(itransform_engines[i].input_lock), &mutex_attr);
439                 pthread_mutex_lock(&(itransform_engines[i].input_lock));
440                 pthread_mutex_init(&(itransform_engines[i].output_lock), &mutex_attr);
441                 pthread_mutex_lock(&(itransform_engines[i].output_lock));
442                 itransform_engines[i].done = 0;
443                 pthread_create(&(itransform_engines[i].tid), 
444                         &attr, 
445                         (void*)itransform_engine_loop, 
446                         &itransform_engines[i]);
447         }
448 }
449
450 void stop_itransform_engines()
451 {
452         int i;
453         for(i = 0; i < processors; i++)
454         {
455                 itransform_engines[i].done = 1;
456                 pthread_mutex_unlock(&(itransform_engines[i].input_lock));
457                 pthread_join(itransform_engines[i].tid, 0);
458                 pthread_mutex_destroy(&(itransform_engines[i].input_lock));
459                 pthread_mutex_destroy(&(itransform_engines[i].output_lock));
460         }
461         free(itransform_engines);
462 }
463
464
465
466
467 /*
468  * select between frame and field DCT
469  *
470  * preliminary version: based on inter-field correlation
471  */
472
473 void dct_type_estimation(
474         pict_data_s *picture,
475         uint8_t *pred, uint8_t *cur
476         )
477 {
478
479         struct mbinfo *mbi = picture->mbinfo;
480
481         int16_t blk0[128], blk1[128];
482         int i, j, i0, j0, k, offs, s0, s1, sq0, sq1, s01;
483         double d, r;
484
485         k = 0;
486
487         for (j0=0; j0<height2; j0+=16)
488                 for (i0=0; i0<width; i0+=16)
489                 {
490                         if (picture->frame_pred_dct || picture->pict_struct!=FRAME_PICTURE)
491                                 mbi[k].dct_type = 0;
492                         else
493                         {
494                                 /* interlaced frame picture */
495                                 /*
496                                  * calculate prediction error (cur-pred) for top (blk0)
497                                  * and bottom field (blk1)
498                                  */
499                                 for (j=0; j<8; j++)
500                                 {
501                                         offs = width*((j<<1)+j0) + i0;
502                                         for (i=0; i<16; i++)
503                                         {
504                                                 blk0[16*j+i] = cur[offs] - pred[offs];
505                                                 blk1[16*j+i] = cur[offs+width] - pred[offs+width];
506                                                 offs++;
507                                         }
508                                 }
509                                 /* correlate fields */
510                                 s0=s1=sq0=sq1=s01=0;
511
512                                 for (i=0; i<128; i++)
513                                 {
514                                         s0+= blk0[i];
515                                         sq0+= blk0[i]*blk0[i];
516                                         s1+= blk1[i];
517                                         sq1+= blk1[i]*blk1[i];
518                                         s01+= blk0[i]*blk1[i];
519                                 }
520
521                                 d = (sq0-(s0*s0)/128.0)*(sq1-(s1*s1)/128.0);
522
523                                 if (d>0.0)
524                                 {
525                                         r = (s01-(s0*s1)/128.0)/sqrt(d);
526                                         if (r>0.5)
527                                                 mbi[k].dct_type = 0; /* frame DCT */
528                                         else
529                                                 mbi[k].dct_type = 1; /* field DCT */
530                                 }
531                                 else
532                                         mbi[k].dct_type = 1; /* field DCT */
533                         }
534                         k++;
535                 }
536 }