libavcodec/vp8.c
Go to the documentation of this file.
00001 
00025 #include "libavutil/imgutils.h"
00026 #include "avcodec.h"
00027 #include "internal.h"
00028 #include "vp8.h"
00029 #include "vp8data.h"
00030 #include "rectangle.h"
00031 #include "thread.h"
00032 
00033 #if ARCH_ARM
00034 #   include "arm/vp8.h"
00035 #endif
00036 
00037 static void free_buffers(VP8Context *s)
00038 {
00039     av_freep(&s->macroblocks_base);
00040     av_freep(&s->filter_strength);
00041     av_freep(&s->intra4x4_pred_mode_top);
00042     av_freep(&s->top_nnz);
00043     av_freep(&s->edge_emu_buffer);
00044     av_freep(&s->top_border);
00045 
00046     s->macroblocks = NULL;
00047 }
00048 
00049 static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
00050 {
00051     int ret;
00052     if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
00053         return ret;
00054     if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
00055         f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
00056     } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
00057         ff_thread_release_buffer(s->avctx, f);
00058         return AVERROR(ENOMEM);
00059     }
00060     return 0;
00061 }
00062 
00063 static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
00064 {
00065     if (f->ref_index[0]) {
00066         if (prefer_delayed_free) {
00067             /* Upon a size change, we want to free the maps but other threads may still
00068              * be using them, so queue them. Upon a seek, all threads are inactive so
00069              * we want to cache one to prevent re-allocation in the next decoding
00070              * iteration, but the rest we can free directly. */
00071             int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
00072             if (s->num_maps_to_be_freed < max_queued_maps) {
00073                 s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
00074             } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
00075                 av_free(f->ref_index[0]);
00076             } /* else: MEMLEAK (should never happen, but better that than crash) */
00077             f->ref_index[0] = NULL;
00078         } else /* vp8_decode_free() */ {
00079             av_free(f->ref_index[0]);
00080         }
00081     }
00082     ff_thread_release_buffer(s->avctx, f);
00083 }
00084 
00085 static void vp8_decode_flush_impl(AVCodecContext *avctx,
00086                                   int prefer_delayed_free, int can_direct_free, int free_mem)
00087 {
00088     VP8Context *s = avctx->priv_data;
00089     int i;
00090 
00091     if (!avctx->internal->is_copy) {
00092         for (i = 0; i < 5; i++)
00093             if (s->frames[i].data[0])
00094                 vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
00095     }
00096     memset(s->framep, 0, sizeof(s->framep));
00097 
00098     if (free_mem) {
00099         free_buffers(s);
00100         s->maps_are_invalid = 1;
00101     }
00102 }
00103 
00104 static void vp8_decode_flush(AVCodecContext *avctx)
00105 {
00106     vp8_decode_flush_impl(avctx, 1, 1, 0);
00107 }
00108 
00109 static int update_dimensions(VP8Context *s, int width, int height)
00110 {
00111     if (width  != s->avctx->width ||
00112         height != s->avctx->height) {
00113         if (av_image_check_size(width, height, 0, s->avctx))
00114             return AVERROR_INVALIDDATA;
00115 
00116         vp8_decode_flush_impl(s->avctx, 1, 0, 1);
00117 
00118         avcodec_set_dimensions(s->avctx, width, height);
00119     }
00120 
00121     s->mb_width  = (s->avctx->coded_width +15) / 16;
00122     s->mb_height = (s->avctx->coded_height+15) / 16;
00123 
00124     s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
00125     s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
00126     s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
00127     s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
00128     s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
00129 
00130     if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
00131         !s->top_nnz || !s->top_border)
00132         return AVERROR(ENOMEM);
00133 
00134     s->macroblocks        = s->macroblocks_base + 1;
00135 
00136     return 0;
00137 }
00138 
00139 static void parse_segment_info(VP8Context *s)
00140 {
00141     VP56RangeCoder *c = &s->c;
00142     int i;
00143 
00144     s->segmentation.update_map = vp8_rac_get(c);
00145 
00146     if (vp8_rac_get(c)) { // update segment feature data
00147         s->segmentation.absolute_vals = vp8_rac_get(c);
00148 
00149         for (i = 0; i < 4; i++)
00150             s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
00151 
00152         for (i = 0; i < 4; i++)
00153             s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
00154     }
00155     if (s->segmentation.update_map)
00156         for (i = 0; i < 3; i++)
00157             s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
00158 }
00159 
00160 static void update_lf_deltas(VP8Context *s)
00161 {
00162     VP56RangeCoder *c = &s->c;
00163     int i;
00164 
00165     for (i = 0; i < 4; i++)
00166         s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
00167 
00168     for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
00169         s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
00170 }
00171 
00172 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
00173 {
00174     const uint8_t *sizes = buf;
00175     int i;
00176 
00177     s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
00178 
00179     buf      += 3*(s->num_coeff_partitions-1);
00180     buf_size -= 3*(s->num_coeff_partitions-1);
00181     if (buf_size < 0)
00182         return -1;
00183 
00184     for (i = 0; i < s->num_coeff_partitions-1; i++) {
00185         int size = AV_RL24(sizes + 3*i);
00186         if (buf_size - size < 0)
00187             return -1;
00188 
00189         ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
00190         buf      += size;
00191         buf_size -= size;
00192     }
00193     ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
00194 
00195     return 0;
00196 }
00197 
00198 static void get_quants(VP8Context *s)
00199 {
00200     VP56RangeCoder *c = &s->c;
00201     int i, base_qi;
00202 
00203     int yac_qi     = vp8_rac_get_uint(c, 7);
00204     int ydc_delta  = vp8_rac_get_sint(c, 4);
00205     int y2dc_delta = vp8_rac_get_sint(c, 4);
00206     int y2ac_delta = vp8_rac_get_sint(c, 4);
00207     int uvdc_delta = vp8_rac_get_sint(c, 4);
00208     int uvac_delta = vp8_rac_get_sint(c, 4);
00209 
00210     for (i = 0; i < 4; i++) {
00211         if (s->segmentation.enabled) {
00212             base_qi = s->segmentation.base_quant[i];
00213             if (!s->segmentation.absolute_vals)
00214                 base_qi += yac_qi;
00215         } else
00216             base_qi = yac_qi;
00217 
00218         s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
00219         s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
00220         s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
00221         s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] / 100;
00222         s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
00223         s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
00224 
00225         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
00226         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
00227     }
00228 }
00229 
00243 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
00244 {
00245     VP56RangeCoder *c = &s->c;
00246 
00247     if (update)
00248         return VP56_FRAME_CURRENT;
00249 
00250     switch (vp8_rac_get_uint(c, 2)) {
00251     case 1:
00252         return VP56_FRAME_PREVIOUS;
00253     case 2:
00254         return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
00255     }
00256     return VP56_FRAME_NONE;
00257 }
00258 
00259 static void update_refs(VP8Context *s)
00260 {
00261     VP56RangeCoder *c = &s->c;
00262 
00263     int update_golden = vp8_rac_get(c);
00264     int update_altref = vp8_rac_get(c);
00265 
00266     s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
00267     s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
00268 }
00269 
00270 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
00271 {
00272     VP56RangeCoder *c = &s->c;
00273     int header_size, hscale, vscale, i, j, k, l, m, ret;
00274     int width  = s->avctx->width;
00275     int height = s->avctx->height;
00276 
00277     s->keyframe  = !(buf[0] & 1);
00278     s->profile   =  (buf[0]>>1) & 7;
00279     s->invisible = !(buf[0] & 0x10);
00280     header_size  = AV_RL24(buf) >> 5;
00281     buf      += 3;
00282     buf_size -= 3;
00283 
00284     if (s->profile > 3)
00285         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
00286 
00287     if (!s->profile)
00288         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
00289     else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
00290         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
00291 
00292     if (header_size > buf_size - 7*s->keyframe) {
00293         av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
00294         return AVERROR_INVALIDDATA;
00295     }
00296 
00297     if (s->keyframe) {
00298         if (AV_RL24(buf) != 0x2a019d) {
00299             av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
00300             return AVERROR_INVALIDDATA;
00301         }
00302         width  = AV_RL16(buf+3) & 0x3fff;
00303         height = AV_RL16(buf+5) & 0x3fff;
00304         hscale = buf[4] >> 6;
00305         vscale = buf[6] >> 6;
00306         buf      += 7;
00307         buf_size -= 7;
00308 
00309         if (hscale || vscale)
00310             av_log_missing_feature(s->avctx, "Upscaling", 1);
00311 
00312         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
00313         for (i = 0; i < 4; i++)
00314             for (j = 0; j < 16; j++)
00315                 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
00316                        sizeof(s->prob->token[i][j]));
00317         memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
00318         memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
00319         memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
00320         memset(&s->segmentation, 0, sizeof(s->segmentation));
00321     }
00322 
00323     if (!s->macroblocks_base || /* first frame */
00324         width != s->avctx->width || height != s->avctx->height) {
00325         if ((ret = update_dimensions(s, width, height)) < 0)
00326             return ret;
00327     }
00328 
00329     ff_vp56_init_range_decoder(c, buf, header_size);
00330     buf      += header_size;
00331     buf_size -= header_size;
00332 
00333     if (s->keyframe) {
00334         if (vp8_rac_get(c))
00335             av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
00336         vp8_rac_get(c); // whether we can skip clamping in dsp functions
00337     }
00338 
00339     if ((s->segmentation.enabled = vp8_rac_get(c)))
00340         parse_segment_info(s);
00341     else
00342         s->segmentation.update_map = 0; // FIXME: move this to some init function?
00343 
00344     s->filter.simple    = vp8_rac_get(c);
00345     s->filter.level     = vp8_rac_get_uint(c, 6);
00346     s->filter.sharpness = vp8_rac_get_uint(c, 3);
00347 
00348     if ((s->lf_delta.enabled = vp8_rac_get(c)))
00349         if (vp8_rac_get(c))
00350             update_lf_deltas(s);
00351 
00352     if (setup_partitions(s, buf, buf_size)) {
00353         av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
00354         return AVERROR_INVALIDDATA;
00355     }
00356 
00357     get_quants(s);
00358 
00359     if (!s->keyframe) {
00360         update_refs(s);
00361         s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
00362         s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
00363     }
00364 
00365     // if we aren't saving this frame's probabilities for future frames,
00366     // make a copy of the current probabilities
00367     if (!(s->update_probabilities = vp8_rac_get(c)))
00368         s->prob[1] = s->prob[0];
00369 
00370     s->update_last = s->keyframe || vp8_rac_get(c);
00371 
00372     for (i = 0; i < 4; i++)
00373         for (j = 0; j < 8; j++)
00374             for (k = 0; k < 3; k++)
00375                 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
00376                     if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
00377                         int prob = vp8_rac_get_uint(c, 8);
00378                         for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
00379                             s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
00380                     }
00381 
00382     if ((s->mbskip_enabled = vp8_rac_get(c)))
00383         s->prob->mbskip = vp8_rac_get_uint(c, 8);
00384 
00385     if (!s->keyframe) {
00386         s->prob->intra  = vp8_rac_get_uint(c, 8);
00387         s->prob->last   = vp8_rac_get_uint(c, 8);
00388         s->prob->golden = vp8_rac_get_uint(c, 8);
00389 
00390         if (vp8_rac_get(c))
00391             for (i = 0; i < 4; i++)
00392                 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
00393         if (vp8_rac_get(c))
00394             for (i = 0; i < 3; i++)
00395                 s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
00396 
00397         // 17.2 MV probability update
00398         for (i = 0; i < 2; i++)
00399             for (j = 0; j < 19; j++)
00400                 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
00401                     s->prob->mvc[i][j] = vp8_rac_get_nn(c);
00402     }
00403 
00404     return 0;
00405 }
00406 
00407 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
00408 {
00409     dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
00410     dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
00411 }
00412 
00416 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
00417 {
00418     int bit, x = 0;
00419 
00420     if (vp56_rac_get_prob_branchy(c, p[0])) {
00421         int i;
00422 
00423         for (i = 0; i < 3; i++)
00424             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00425         for (i = 9; i > 3; i--)
00426             x += vp56_rac_get_prob(c, p[9 + i]) << i;
00427         if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
00428             x += 8;
00429     } else {
00430         // small_mvtree
00431         const uint8_t *ps = p+2;
00432         bit = vp56_rac_get_prob(c, *ps);
00433         ps += 1 + 3*bit;
00434         x  += 4*bit;
00435         bit = vp56_rac_get_prob(c, *ps);
00436         ps += 1 + bit;
00437         x  += 2*bit;
00438         x  += vp56_rac_get_prob(c, *ps);
00439     }
00440 
00441     return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
00442 }
00443 
00444 static av_always_inline
00445 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
00446 {
00447     if (left == top)
00448         return vp8_submv_prob[4-!!left];
00449     if (!top)
00450         return vp8_submv_prob[2];
00451     return vp8_submv_prob[1-!!left];
00452 }
00453 
00458 static av_always_inline
00459 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
00460 {
00461     int part_idx;
00462     int n, num;
00463     VP8Macroblock *top_mb  = &mb[2];
00464     VP8Macroblock *left_mb = &mb[-1];
00465     const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
00466                   *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
00467                   *mbsplits_cur, *firstidx;
00468     VP56mv *top_mv  = top_mb->bmv;
00469     VP56mv *left_mv = left_mb->bmv;
00470     VP56mv *cur_mv  = mb->bmv;
00471 
00472     if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
00473         if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
00474             part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
00475         } else {
00476             part_idx = VP8_SPLITMVMODE_8x8;
00477         }
00478     } else {
00479         part_idx = VP8_SPLITMVMODE_4x4;
00480     }
00481 
00482     num = vp8_mbsplit_count[part_idx];
00483     mbsplits_cur = vp8_mbsplits[part_idx],
00484     firstidx = vp8_mbfirstidx[part_idx];
00485     mb->partitioning = part_idx;
00486 
00487     for (n = 0; n < num; n++) {
00488         int k = firstidx[n];
00489         uint32_t left, above;
00490         const uint8_t *submv_prob;
00491 
00492         if (!(k & 3))
00493             left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
00494         else
00495             left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
00496         if (k <= 3)
00497             above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
00498         else
00499             above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
00500 
00501         submv_prob = get_submv_prob(left, above);
00502 
00503         if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
00504             if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
00505                 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
00506                     mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
00507                     mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
00508                 } else {
00509                     AV_ZERO32(&mb->bmv[n]);
00510                 }
00511             } else {
00512                 AV_WN32A(&mb->bmv[n], above);
00513             }
00514         } else {
00515             AV_WN32A(&mb->bmv[n], left);
00516         }
00517     }
00518 
00519     return num;
00520 }
00521 
00522 static av_always_inline
00523 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
00524 {
00525     VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
00526                                   mb - 1 /* left */,
00527                                   mb + 1 /* top-left */ };
00528     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
00529     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
00530     int idx = CNT_ZERO;
00531     int cur_sign_bias = s->sign_bias[mb->ref_frame];
00532     int8_t *sign_bias = s->sign_bias;
00533     VP56mv near_mv[4];
00534     uint8_t cnt[4] = { 0 };
00535     VP56RangeCoder *c = &s->c;
00536 
00537     AV_ZERO32(&near_mv[0]);
00538     AV_ZERO32(&near_mv[1]);
00539     AV_ZERO32(&near_mv[2]);
00540 
00541     /* Process MB on top, left and top-left */
00542     #define MV_EDGE_CHECK(n)\
00543     {\
00544         VP8Macroblock *edge = mb_edge[n];\
00545         int edge_ref = edge->ref_frame;\
00546         if (edge_ref != VP56_FRAME_CURRENT) {\
00547             uint32_t mv = AV_RN32A(&edge->mv);\
00548             if (mv) {\
00549                 if (cur_sign_bias != sign_bias[edge_ref]) {\
00550                     /* SWAR negate of the values in mv. */\
00551                     mv = ~mv;\
00552                     mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
00553                 }\
00554                 if (!n || mv != AV_RN32A(&near_mv[idx]))\
00555                     AV_WN32A(&near_mv[++idx], mv);\
00556                 cnt[idx]      += 1 + (n != 2);\
00557             } else\
00558                 cnt[CNT_ZERO] += 1 + (n != 2);\
00559         }\
00560     }
00561 
00562     MV_EDGE_CHECK(0)
00563     MV_EDGE_CHECK(1)
00564     MV_EDGE_CHECK(2)
00565 
00566     mb->partitioning = VP8_SPLITMVMODE_NONE;
00567     if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
00568         mb->mode = VP8_MVMODE_MV;
00569 
00570         /* If we have three distinct MVs, merge first and last if they're the same */
00571         if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
00572             cnt[CNT_NEAREST] += 1;
00573 
00574         /* Swap near and nearest if necessary */
00575         if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
00576             FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
00577             FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
00578         }
00579 
00580         if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
00581             if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
00582 
00583                 /* Choose the best mv out of 0,0 and the nearest mv */
00584                 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
00585                 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
00586                                     (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
00587                                     (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
00588 
00589                 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
00590                     mb->mode = VP8_MVMODE_SPLIT;
00591                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
00592                 } else {
00593                     mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
00594                     mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
00595                     mb->bmv[0] = mb->mv;
00596                 }
00597             } else {
00598                 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
00599                 mb->bmv[0] = mb->mv;
00600             }
00601         } else {
00602             clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
00603             mb->bmv[0] = mb->mv;
00604         }
00605     } else {
00606         mb->mode = VP8_MVMODE_ZERO;
00607         AV_ZERO32(&mb->mv);
00608         mb->bmv[0] = mb->mv;
00609     }
00610 }
00611 
00612 static av_always_inline
00613 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
00614                            int mb_x, int keyframe)
00615 {
00616     uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00617     if (keyframe) {
00618         int x, y;
00619         uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
00620         uint8_t* const left = s->intra4x4_pred_mode_left;
00621         for (y = 0; y < 4; y++) {
00622             for (x = 0; x < 4; x++) {
00623                 const uint8_t *ctx;
00624                 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
00625                 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
00626                 left[y] = top[x] = *intra4x4;
00627                 intra4x4++;
00628             }
00629         }
00630     } else {
00631         int i;
00632         for (i = 0; i < 16; i++)
00633             intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
00634     }
00635 }
00636 
00637 static av_always_inline
00638 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
00639 {
00640     VP56RangeCoder *c = &s->c;
00641 
00642     if (s->segmentation.update_map) {
00643         int bit  = vp56_rac_get_prob(c, s->prob->segmentid[0]);
00644         *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
00645     } else
00646         *segment = ref ? *ref : *segment;
00647     s->segment = *segment;
00648 
00649     mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
00650 
00651     if (s->keyframe) {
00652         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
00653 
00654         if (mb->mode == MODE_I4x4) {
00655             decode_intra4x4_modes(s, c, mb_x, 1);
00656         } else {
00657             const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
00658             AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
00659             AV_WN32A(s->intra4x4_pred_mode_left, modes);
00660         }
00661 
00662         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
00663         mb->ref_frame = VP56_FRAME_CURRENT;
00664     } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
00665         // inter MB, 16.2
00666         if (vp56_rac_get_prob_branchy(c, s->prob->last))
00667             mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
00668                 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
00669         else
00670             mb->ref_frame = VP56_FRAME_PREVIOUS;
00671         s->ref_count[mb->ref_frame-1]++;
00672 
00673         // motion vectors, 16.3
00674         decode_mvs(s, mb, mb_x, mb_y);
00675     } else {
00676         // intra MB, 16.1
00677         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
00678 
00679         if (mb->mode == MODE_I4x4)
00680             decode_intra4x4_modes(s, c, mb_x, 0);
00681 
00682         s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
00683         mb->ref_frame = VP56_FRAME_CURRENT;
00684         mb->partitioning = VP8_SPLITMVMODE_NONE;
00685         AV_ZERO32(&mb->bmv[0]);
00686     }
00687 }
00688 
00689 #ifndef decode_block_coeffs_internal
00690 
00699 static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
00700                                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00701                                         int i, uint8_t *token_prob, int16_t qmul[2])
00702 {
00703     goto skip_eob;
00704     do {
00705         int coeff;
00706         if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00707             return i;
00708 
00709 skip_eob:
00710         if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
00711             if (++i == 16)
00712                 return i; // invalid input; blocks should end with EOB
00713             token_prob = probs[i][0];
00714             goto skip_eob;
00715         }
00716 
00717         if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
00718             coeff = 1;
00719             token_prob = probs[i+1][1];
00720         } else {
00721             if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
00722                 coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
00723                 if (coeff)
00724                     coeff += vp56_rac_get_prob(c, token_prob[5]);
00725                 coeff += 2;
00726             } else {
00727                 // DCT_CAT*
00728                 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
00729                     if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
00730                         coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
00731                     } else {                                    // DCT_CAT2
00732                         coeff  = 7;
00733                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
00734                         coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
00735                     }
00736                 } else {    // DCT_CAT3 and up
00737                     int a = vp56_rac_get_prob(c, token_prob[8]);
00738                     int b = vp56_rac_get_prob(c, token_prob[9+a]);
00739                     int cat = (a<<1) + b;
00740                     coeff  = 3 + (8<<cat);
00741                     coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
00742                 }
00743             }
00744             token_prob = probs[i+1][2];
00745         }
00746         block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
00747     } while (++i < 16);
00748 
00749     return i;
00750 }
00751 #endif
00752 
00764 static av_always_inline
00765 int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
00766                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
00767                         int i, int zero_nhood, int16_t qmul[2])
00768 {
00769     uint8_t *token_prob = probs[i][zero_nhood];
00770     if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
00771         return 0;
00772     return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
00773 }
00774 
00775 static av_always_inline
00776 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
00777                       uint8_t t_nnz[9], uint8_t l_nnz[9])
00778 {
00779     int i, x, y, luma_start = 0, luma_ctx = 3;
00780     int nnz_pred, nnz, nnz_total = 0;
00781     int segment = s->segment;
00782     int block_dc = 0;
00783 
00784     if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
00785         nnz_pred = t_nnz[8] + l_nnz[8];
00786 
00787         // decode DC values and do hadamard
00788         nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
00789                                   s->qmat[segment].luma_dc_qmul);
00790         l_nnz[8] = t_nnz[8] = !!nnz;
00791         if (nnz) {
00792             nnz_total += nnz;
00793             block_dc = 1;
00794             if (nnz == 1)
00795                 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
00796             else
00797                 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
00798         }
00799         luma_start = 1;
00800         luma_ctx = 0;
00801     }
00802 
00803     // luma blocks
00804     for (y = 0; y < 4; y++)
00805         for (x = 0; x < 4; x++) {
00806             nnz_pred = l_nnz[y] + t_nnz[x];
00807             nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
00808                                       nnz_pred, s->qmat[segment].luma_qmul);
00809             // nnz+block_dc may be one more than the actual last index, but we don't care
00810             s->non_zero_count_cache[y][x] = nnz + block_dc;
00811             t_nnz[x] = l_nnz[y] = !!nnz;
00812             nnz_total += nnz;
00813         }
00814 
00815     // chroma blocks
00816     // TODO: what to do about dimensions? 2nd dim for luma is x,
00817     // but for chroma it's (y<<1)|x
00818     for (i = 4; i < 6; i++)
00819         for (y = 0; y < 2; y++)
00820             for (x = 0; x < 2; x++) {
00821                 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
00822                 nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
00823                                           nnz_pred, s->qmat[segment].chroma_qmul);
00824                 s->non_zero_count_cache[i][(y<<1)+x] = nnz;
00825                 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
00826                 nnz_total += nnz;
00827             }
00828 
00829     // if there were no coded coeffs despite the macroblock not being marked skip,
00830     // we MUST not do the inner loop filter and should not do IDCT
00831     // Since skip isn't used for bitstream prediction, just manually set it.
00832     if (!nnz_total)
00833         mb->skip = 1;
00834 }
00835 
00836 static av_always_inline
00837 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00838                       int linesize, int uvlinesize, int simple)
00839 {
00840     AV_COPY128(top_border, src_y + 15*linesize);
00841     if (!simple) {
00842         AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
00843         AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
00844     }
00845 }
00846 
00847 static av_always_inline
00848 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
00849                     int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
00850                     int simple, int xchg)
00851 {
00852     uint8_t *top_border_m1 = top_border-32;     // for TL prediction
00853     src_y  -=   linesize;
00854     src_cb -= uvlinesize;
00855     src_cr -= uvlinesize;
00856 
00857 #define XCHG(a,b,xchg) do {                     \
00858         if (xchg) AV_SWAP64(b,a);               \
00859         else      AV_COPY64(b,a);               \
00860     } while (0)
00861 
00862     XCHG(top_border_m1+8, src_y-8, xchg);
00863     XCHG(top_border,      src_y,   xchg);
00864     XCHG(top_border+8,    src_y+8, 1);
00865     if (mb_x < mb_width-1)
00866         XCHG(top_border+32, src_y+16, 1);
00867 
00868     // only copy chroma for normal loop filter
00869     // or to initialize the top row to 127
00870     if (!simple || !mb_y) {
00871         XCHG(top_border_m1+16, src_cb-8, xchg);
00872         XCHG(top_border_m1+24, src_cr-8, xchg);
00873         XCHG(top_border+16,    src_cb, 1);
00874         XCHG(top_border+24,    src_cr, 1);
00875     }
00876 }
00877 
00878 static av_always_inline
00879 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
00880 {
00881     if (!mb_x) {
00882         return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
00883     } else {
00884         return mb_y ? mode : LEFT_DC_PRED8x8;
00885     }
00886 }
00887 
00888 static av_always_inline
00889 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
00890 {
00891     if (!mb_x) {
00892         return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
00893     } else {
00894         return mb_y ? mode : HOR_PRED8x8;
00895     }
00896 }
00897 
00898 static av_always_inline
00899 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
00900 {
00901     if (mode == DC_PRED8x8) {
00902         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00903     } else {
00904         return mode;
00905     }
00906 }
00907 
00908 static av_always_inline
00909 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
00910 {
00911     switch (mode) {
00912     case DC_PRED8x8:
00913         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
00914     case VERT_PRED8x8:
00915         return !mb_y ? DC_127_PRED8x8 : mode;
00916     case HOR_PRED8x8:
00917         return !mb_x ? DC_129_PRED8x8 : mode;
00918     case PLANE_PRED8x8 /*TM*/:
00919         return check_tm_pred8x8_mode(mode, mb_x, mb_y);
00920     }
00921     return mode;
00922 }
00923 
00924 static av_always_inline
00925 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
00926 {
00927     if (!mb_x) {
00928         return mb_y ? VERT_VP8_PRED : DC_129_PRED;
00929     } else {
00930         return mb_y ? mode : HOR_VP8_PRED;
00931     }
00932 }
00933 
00934 static av_always_inline
00935 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
00936 {
00937     switch (mode) {
00938     case VERT_PRED:
00939         if (!mb_x && mb_y) {
00940             *copy_buf = 1;
00941             return mode;
00942         }
00943         /* fall-through */
00944     case DIAG_DOWN_LEFT_PRED:
00945     case VERT_LEFT_PRED:
00946         return !mb_y ? DC_127_PRED : mode;
00947     case HOR_PRED:
00948         if (!mb_y) {
00949             *copy_buf = 1;
00950             return mode;
00951         }
00952         /* fall-through */
00953     case HOR_UP_PRED:
00954         return !mb_x ? DC_129_PRED : mode;
00955     case TM_VP8_PRED:
00956         return check_tm_pred4x4_mode(mode, mb_x, mb_y);
00957     case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
00958     case DIAG_DOWN_RIGHT_PRED:
00959     case VERT_RIGHT_PRED:
00960     case HOR_DOWN_PRED:
00961         if (!mb_y || !mb_x)
00962             *copy_buf = 1;
00963         return mode;
00964     }
00965     return mode;
00966 }
00967 
00968 static av_always_inline
00969 void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
00970                    int mb_x, int mb_y)
00971 {
00972     AVCodecContext *avctx = s->avctx;
00973     int x, y, mode, nnz;
00974     uint32_t tr;
00975 
00976     // for the first row, we need to run xchg_mb_border to init the top edge to 127
00977     // otherwise, skip it if we aren't going to deblock
00978     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
00979         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
00980                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
00981                        s->filter.simple, 1);
00982 
00983     if (mb->mode < MODE_I4x4) {
00984         if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
00985             mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
00986         } else {
00987             mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
00988         }
00989         s->hpc.pred16x16[mode](dst[0], s->linesize);
00990     } else {
00991         uint8_t *ptr = dst[0];
00992         uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
00993         uint8_t tr_top[4] = { 127, 127, 127, 127 };
00994 
00995         // all blocks on the right edge of the macroblock use bottom edge
00996         // the top macroblock for their topright edge
00997         uint8_t *tr_right = ptr - s->linesize + 16;
00998 
00999         // if we're on the right edge of the frame, said edge is extended
01000         // from the top macroblock
01001         if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
01002             mb_x == s->mb_width-1) {
01003             tr = tr_right[-1]*0x01010101u;
01004             tr_right = (uint8_t *)&tr;
01005         }
01006 
01007         if (mb->skip)
01008             AV_ZERO128(s->non_zero_count_cache);
01009 
01010         for (y = 0; y < 4; y++) {
01011             uint8_t *topright = ptr + 4 - s->linesize;
01012             for (x = 0; x < 4; x++) {
01013                 int copy = 0, linesize = s->linesize;
01014                 uint8_t *dst = ptr+4*x;
01015                 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
01016 
01017                 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
01018                     topright = tr_top;
01019                 } else if (x == 3)
01020                     topright = tr_right;
01021 
01022                 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
01023                     mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
01024                     if (copy) {
01025                         dst = copy_dst + 12;
01026                         linesize = 8;
01027                         if (!(mb_y + y)) {
01028                             copy_dst[3] = 127U;
01029                             AV_WN32A(copy_dst+4, 127U * 0x01010101U);
01030                         } else {
01031                             AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
01032                             if (!(mb_x + x)) {
01033                                 copy_dst[3] = 129U;
01034                             } else {
01035                                 copy_dst[3] = ptr[4*x-s->linesize-1];
01036                             }
01037                         }
01038                         if (!(mb_x + x)) {
01039                             copy_dst[11] =
01040                             copy_dst[19] =
01041                             copy_dst[27] =
01042                             copy_dst[35] = 129U;
01043                         } else {
01044                             copy_dst[11] = ptr[4*x              -1];
01045                             copy_dst[19] = ptr[4*x+s->linesize  -1];
01046                             copy_dst[27] = ptr[4*x+s->linesize*2-1];
01047                             copy_dst[35] = ptr[4*x+s->linesize*3-1];
01048                         }
01049                     }
01050                 } else {
01051                     mode = intra4x4[x];
01052                 }
01053                 s->hpc.pred4x4[mode](dst, topright, linesize);
01054                 if (copy) {
01055                     AV_COPY32(ptr+4*x              , copy_dst+12);
01056                     AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
01057                     AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
01058                     AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
01059                 }
01060 
01061                 nnz = s->non_zero_count_cache[y][x];
01062                 if (nnz) {
01063                     if (nnz == 1)
01064                         s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
01065                     else
01066                         s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
01067                 }
01068                 topright += 4;
01069             }
01070 
01071             ptr   += 4*s->linesize;
01072             intra4x4 += 4;
01073         }
01074     }
01075 
01076     if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
01077         mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
01078     } else {
01079         mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
01080     }
01081     s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
01082     s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
01083 
01084     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
01085         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
01086                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
01087                        s->filter.simple, 0);
01088 }
01089 
01090 static const uint8_t subpel_idx[3][8] = {
01091     { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
01092                                 // also function pointer index
01093     { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
01094     { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
01095 };
01096 
01113 static av_always_inline
01114 void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
01115                  int x_off, int y_off, int block_w, int block_h,
01116                  int width, int height, int linesize,
01117                  vp8_mc_func mc_func[3][3])
01118 {
01119     uint8_t *src = ref->data[0];
01120 
01121     if (AV_RN32A(mv)) {
01122 
01123         int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
01124         int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
01125 
01126         x_off += mv->x >> 2;
01127         y_off += mv->y >> 2;
01128 
01129         // edge emulation
01130         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
01131         src += y_off * linesize + x_off;
01132         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01133             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01134             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
01135                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01136                                     x_off - mx_idx, y_off - my_idx, width, height);
01137             src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01138         }
01139         mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
01140     } else {
01141         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
01142         mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
01143     }
01144 }
01145 
01163 static av_always_inline
01164 void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
01165                    const VP56mv *mv, int x_off, int y_off,
01166                    int block_w, int block_h, int width, int height, int linesize,
01167                    vp8_mc_func mc_func[3][3])
01168 {
01169     uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
01170 
01171     if (AV_RN32A(mv)) {
01172         int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
01173         int my = mv->y&7, my_idx = subpel_idx[0][my];
01174 
01175         x_off += mv->x >> 3;
01176         y_off += mv->y >> 3;
01177 
01178         // edge emulation
01179         src1 += y_off * linesize + x_off;
01180         src2 += y_off * linesize + x_off;
01181         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
01182         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
01183             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
01184             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
01185                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01186                                     x_off - mx_idx, y_off - my_idx, width, height);
01187             src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01188             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01189 
01190             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
01191                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
01192                                     x_off - mx_idx, y_off - my_idx, width, height);
01193             src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
01194             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01195         } else {
01196             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
01197             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
01198         }
01199     } else {
01200         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
01201         mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01202         mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
01203     }
01204 }
01205 
01206 static av_always_inline
01207 void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
01208                  AVFrame *ref_frame, int x_off, int y_off,
01209                  int bx_off, int by_off,
01210                  int block_w, int block_h,
01211                  int width, int height, VP56mv *mv)
01212 {
01213     VP56mv uvmv = *mv;
01214 
01215     /* Y */
01216     vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
01217                 ref_frame, mv, x_off + bx_off, y_off + by_off,
01218                 block_w, block_h, width, height, s->linesize,
01219                 s->put_pixels_tab[block_w == 8]);
01220 
01221     /* U/V */
01222     if (s->profile == 3) {
01223         uvmv.x &= ~7;
01224         uvmv.y &= ~7;
01225     }
01226     x_off   >>= 1; y_off   >>= 1;
01227     bx_off  >>= 1; by_off  >>= 1;
01228     width   >>= 1; height  >>= 1;
01229     block_w >>= 1; block_h >>= 1;
01230     vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
01231                   dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
01232                   &uvmv, x_off + bx_off, y_off + by_off,
01233                   block_w, block_h, width, height, s->uvlinesize,
01234                   s->put_pixels_tab[1 + (block_w == 4)]);
01235 }
01236 
01237 /* Fetch pixels for estimated mv 4 macroblocks ahead.
01238  * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
01239 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
01240 {
01241     /* Don't prefetch refs that haven't been used very often this frame. */
01242     if (s->ref_count[ref-1] > (mb_xy >> 5)) {
01243         int x_off = mb_x << 4, y_off = mb_y << 4;
01244         int mx = (mb->mv.x>>2) + x_off + 8;
01245         int my = (mb->mv.y>>2) + y_off;
01246         uint8_t **src= s->framep[ref]->data;
01247         int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
01248         /* For threading, a ff_thread_await_progress here might be useful, but
01249          * it actually slows down the decoder. Since a bad prefetch doesn't
01250          * generate bad decoder output, we don't run it here. */
01251         s->dsp.prefetch(src[0]+off, s->linesize, 4);
01252         off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
01253         s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
01254     }
01255 }
01256 
01260 static av_always_inline
01261 void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
01262                    int mb_x, int mb_y)
01263 {
01264     int x_off = mb_x << 4, y_off = mb_y << 4;
01265     int width = 16*s->mb_width, height = 16*s->mb_height;
01266     AVFrame *ref = s->framep[mb->ref_frame];
01267     VP56mv *bmv = mb->bmv;
01268 
01269     switch (mb->partitioning) {
01270     case VP8_SPLITMVMODE_NONE:
01271         vp8_mc_part(s, dst, ref, x_off, y_off,
01272                     0, 0, 16, 16, width, height, &mb->mv);
01273         break;
01274     case VP8_SPLITMVMODE_4x4: {
01275         int x, y;
01276         VP56mv uvmv;
01277 
01278         /* Y */
01279         for (y = 0; y < 4; y++) {
01280             for (x = 0; x < 4; x++) {
01281                 vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
01282                             ref, &bmv[4*y + x],
01283                             4*x + x_off, 4*y + y_off, 4, 4,
01284                             width, height, s->linesize,
01285                             s->put_pixels_tab[2]);
01286             }
01287         }
01288 
01289         /* U/V */
01290         x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
01291         for (y = 0; y < 2; y++) {
01292             for (x = 0; x < 2; x++) {
01293                 uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
01294                          mb->bmv[ 2*y    * 4 + 2*x+1].x +
01295                          mb->bmv[(2*y+1) * 4 + 2*x  ].x +
01296                          mb->bmv[(2*y+1) * 4 + 2*x+1].x;
01297                 uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
01298                          mb->bmv[ 2*y    * 4 + 2*x+1].y +
01299                          mb->bmv[(2*y+1) * 4 + 2*x  ].y +
01300                          mb->bmv[(2*y+1) * 4 + 2*x+1].y;
01301                 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
01302                 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
01303                 if (s->profile == 3) {
01304                     uvmv.x &= ~7;
01305                     uvmv.y &= ~7;
01306                 }
01307                 vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
01308                               dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
01309                               4*x + x_off, 4*y + y_off, 4, 4,
01310                               width, height, s->uvlinesize,
01311                               s->put_pixels_tab[2]);
01312             }
01313         }
01314         break;
01315     }
01316     case VP8_SPLITMVMODE_16x8:
01317         vp8_mc_part(s, dst, ref, x_off, y_off,
01318                     0, 0, 16, 8, width, height, &bmv[0]);
01319         vp8_mc_part(s, dst, ref, x_off, y_off,
01320                     0, 8, 16, 8, width, height, &bmv[1]);
01321         break;
01322     case VP8_SPLITMVMODE_8x16:
01323         vp8_mc_part(s, dst, ref, x_off, y_off,
01324                     0, 0, 8, 16, width, height, &bmv[0]);
01325         vp8_mc_part(s, dst, ref, x_off, y_off,
01326                     8, 0, 8, 16, width, height, &bmv[1]);
01327         break;
01328     case VP8_SPLITMVMODE_8x8:
01329         vp8_mc_part(s, dst, ref, x_off, y_off,
01330                     0, 0, 8, 8, width, height, &bmv[0]);
01331         vp8_mc_part(s, dst, ref, x_off, y_off,
01332                     8, 0, 8, 8, width, height, &bmv[1]);
01333         vp8_mc_part(s, dst, ref, x_off, y_off,
01334                     0, 8, 8, 8, width, height, &bmv[2]);
01335         vp8_mc_part(s, dst, ref, x_off, y_off,
01336                     8, 8, 8, 8, width, height, &bmv[3]);
01337         break;
01338     }
01339 }
01340 
01341 static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
01342 {
01343     int x, y, ch;
01344 
01345     if (mb->mode != MODE_I4x4) {
01346         uint8_t *y_dst = dst[0];
01347         for (y = 0; y < 4; y++) {
01348             uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
01349             if (nnz4) {
01350                 if (nnz4&~0x01010101) {
01351                     for (x = 0; x < 4; x++) {
01352                         if ((uint8_t)nnz4 == 1)
01353                             s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
01354                         else if((uint8_t)nnz4 > 1)
01355                             s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
01356                         nnz4 >>= 8;
01357                         if (!nnz4)
01358                             break;
01359                     }
01360                 } else {
01361                     s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
01362                 }
01363             }
01364             y_dst += 4*s->linesize;
01365         }
01366     }
01367 
01368     for (ch = 0; ch < 2; ch++) {
01369         uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
01370         if (nnz4) {
01371             uint8_t *ch_dst = dst[1+ch];
01372             if (nnz4&~0x01010101) {
01373                 for (y = 0; y < 2; y++) {
01374                     for (x = 0; x < 2; x++) {
01375                         if ((uint8_t)nnz4 == 1)
01376                             s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01377                         else if((uint8_t)nnz4 > 1)
01378                             s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
01379                         nnz4 >>= 8;
01380                         if (!nnz4)
01381                             goto chroma_idct_end;
01382                     }
01383                     ch_dst += 4*s->uvlinesize;
01384                 }
01385             } else {
01386                 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
01387             }
01388         }
01389 chroma_idct_end: ;
01390     }
01391 }
01392 
01393 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
01394 {
01395     int interior_limit, filter_level;
01396 
01397     if (s->segmentation.enabled) {
01398         filter_level = s->segmentation.filter_level[s->segment];
01399         if (!s->segmentation.absolute_vals)
01400             filter_level += s->filter.level;
01401     } else
01402         filter_level = s->filter.level;
01403 
01404     if (s->lf_delta.enabled) {
01405         filter_level += s->lf_delta.ref[mb->ref_frame];
01406         filter_level += s->lf_delta.mode[mb->mode];
01407     }
01408 
01409     filter_level = av_clip_uintp2(filter_level, 6);
01410 
01411     interior_limit = filter_level;
01412     if (s->filter.sharpness) {
01413         interior_limit >>= (s->filter.sharpness + 3) >> 2;
01414         interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
01415     }
01416     interior_limit = FFMAX(interior_limit, 1);
01417 
01418     f->filter_level = filter_level;
01419     f->inner_limit = interior_limit;
01420     f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
01421 }
01422 
01423 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
01424 {
01425     int mbedge_lim, bedge_lim, hev_thresh;
01426     int filter_level = f->filter_level;
01427     int inner_limit = f->inner_limit;
01428     int inner_filter = f->inner_filter;
01429     int linesize = s->linesize;
01430     int uvlinesize = s->uvlinesize;
01431     static const uint8_t hev_thresh_lut[2][64] = {
01432         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01433           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01434           3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
01435           3, 3, 3, 3 },
01436         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
01437           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
01438           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
01439           2, 2, 2, 2 }
01440     };
01441 
01442     if (!filter_level)
01443         return;
01444 
01445      bedge_lim = 2*filter_level + inner_limit;
01446     mbedge_lim = bedge_lim + 4;
01447 
01448     hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
01449 
01450     if (mb_x) {
01451         s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
01452                                        mbedge_lim, inner_limit, hev_thresh);
01453         s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01454                                        mbedge_lim, inner_limit, hev_thresh);
01455     }
01456 
01457     if (inner_filter) {
01458         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
01459                                              inner_limit, hev_thresh);
01460         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
01461                                              inner_limit, hev_thresh);
01462         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
01463                                              inner_limit, hev_thresh);
01464         s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
01465                                              uvlinesize,  bedge_lim,
01466                                              inner_limit, hev_thresh);
01467     }
01468 
01469     if (mb_y) {
01470         s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
01471                                        mbedge_lim, inner_limit, hev_thresh);
01472         s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
01473                                        mbedge_lim, inner_limit, hev_thresh);
01474     }
01475 
01476     if (inner_filter) {
01477         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
01478                                              linesize,    bedge_lim,
01479                                              inner_limit, hev_thresh);
01480         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
01481                                              linesize,    bedge_lim,
01482                                              inner_limit, hev_thresh);
01483         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
01484                                              linesize,    bedge_lim,
01485                                              inner_limit, hev_thresh);
01486         s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
01487                                              dst[2] + 4 * uvlinesize,
01488                                              uvlinesize,  bedge_lim,
01489                                              inner_limit, hev_thresh);
01490     }
01491 }
01492 
01493 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
01494 {
01495     int mbedge_lim, bedge_lim;
01496     int filter_level = f->filter_level;
01497     int inner_limit = f->inner_limit;
01498     int inner_filter = f->inner_filter;
01499     int linesize = s->linesize;
01500 
01501     if (!filter_level)
01502         return;
01503 
01504      bedge_lim = 2*filter_level + inner_limit;
01505     mbedge_lim = bedge_lim + 4;
01506 
01507     if (mb_x)
01508         s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
01509     if (inner_filter) {
01510         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
01511         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
01512         s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
01513     }
01514 
01515     if (mb_y)
01516         s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
01517     if (inner_filter) {
01518         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
01519         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
01520         s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
01521     }
01522 }
01523 
01524 static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
01525 {
01526     VP8FilterStrength *f = s->filter_strength;
01527     uint8_t *dst[3] = {
01528         curframe->data[0] + 16*mb_y*s->linesize,
01529         curframe->data[1] +  8*mb_y*s->uvlinesize,
01530         curframe->data[2] +  8*mb_y*s->uvlinesize
01531     };
01532     int mb_x;
01533 
01534     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01535         backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
01536         filter_mb(s, dst, f++, mb_x, mb_y);
01537         dst[0] += 16;
01538         dst[1] += 8;
01539         dst[2] += 8;
01540     }
01541 }
01542 
01543 static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
01544 {
01545     VP8FilterStrength *f = s->filter_strength;
01546     uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
01547     int mb_x;
01548 
01549     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
01550         backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
01551         filter_mb_simple(s, dst, f++, mb_x, mb_y);
01552         dst += 16;
01553     }
01554 }
01555 
01556 static void release_queued_segmaps(VP8Context *s, int is_close)
01557 {
01558     int leave_behind = is_close ? 0 : !s->maps_are_invalid;
01559     while (s->num_maps_to_be_freed > leave_behind)
01560         av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
01561     s->maps_are_invalid = 0;
01562 }
01563 
01568 static void skipframe_clear(VP8Context *s)
01569 {
01570     s->invisible = 1;
01571     s->next_framep[VP56_FRAME_CURRENT] = NULL;
01572     if (s->update_last)
01573         s->next_framep[VP56_FRAME_PREVIOUS] = NULL;
01574 }
01575 
01576 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
01577                             AVPacket *avpkt)
01578 {
01579     VP8Context *s = avctx->priv_data;
01580     int ret, mb_x, mb_y, i, y, referenced;
01581     enum AVDiscard skip_thresh;
01582     AVFrame *av_uninit(curframe), *prev_frame;
01583 
01584     release_queued_segmaps(s, 0);
01585 
01586     if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
01587         return ret;
01588 
01589     prev_frame = s->framep[VP56_FRAME_CURRENT];
01590 
01591     referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
01592                                 || s->update_altref == VP56_FRAME_CURRENT;
01593 
01594     skip_thresh = !referenced ? AVDISCARD_NONREF :
01595                     !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
01596 
01597     s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
01598 
01599     // release no longer referenced frames
01600     for (i = 0; i < 5; i++)
01601         if (s->frames[i].data[0] &&
01602             &s->frames[i] != prev_frame &&
01603             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01604             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01605             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
01606             vp8_release_frame(s, &s->frames[i], 1, 0);
01607 
01608     // find a free buffer
01609     for (i = 0; i < 5; i++)
01610         if (&s->frames[i] != prev_frame &&
01611             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
01612             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
01613             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
01614             curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
01615             break;
01616         }
01617     if (i == 5) {
01618         av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
01619         abort();
01620     }
01621 
01622     // check if golden and altref are swapped
01623     if (s->update_altref != VP56_FRAME_NONE) {
01624         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
01625     } else {
01626         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
01627     }
01628     if (s->update_golden != VP56_FRAME_NONE) {
01629         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
01630     } else {
01631         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
01632     }
01633     if (s->update_last) {
01634         s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
01635     } else {
01636         s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
01637     }
01638     s->next_framep[VP56_FRAME_CURRENT]      = curframe;
01639 
01640     if (avctx->skip_frame >= skip_thresh) {
01641         skipframe_clear(s);
01642         ret = avpkt->size;
01643         goto skip_decode;
01644     }
01645 
01646     // Given that arithmetic probabilities are updated every frame, it's quite likely
01647     // that the values we have on a random interframe are complete junk if we didn't
01648     // start decode on a keyframe. So just don't display anything rather than junk.
01649     if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
01650                          !s->framep[VP56_FRAME_GOLDEN] ||
01651                          !s->framep[VP56_FRAME_GOLDEN2])) {
01652         av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
01653         skipframe_clear(s);
01654         ret = AVERROR_INVALIDDATA;
01655         goto skip_decode;
01656     }
01657 
01658     if (curframe->data[0])
01659         vp8_release_frame(s, curframe, 1, 0);
01660 
01661     curframe->key_frame = s->keyframe;
01662     curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
01663     curframe->reference = referenced ? 3 : 0;
01664     if ((ret = vp8_alloc_frame(s, curframe))) {
01665         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
01666         skipframe_clear(s);
01667         goto skip_decode;
01668     }
01669 
01670     ff_thread_finish_setup(avctx);
01671 
01672     s->linesize   = curframe->linesize[0];
01673     s->uvlinesize = curframe->linesize[1];
01674 
01675     if (!s->edge_emu_buffer)
01676         s->edge_emu_buffer = av_malloc(21*s->linesize);
01677 
01678     memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
01679 
01680     /* Zero macroblock structures for top/top-left prediction from outside the frame. */
01681     memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
01682 
01683     // top edge of 127 for intra prediction
01684     if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01685         s->top_border[0][15] = s->top_border[0][23] = 127;
01686         memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
01687     }
01688     memset(s->ref_count, 0, sizeof(s->ref_count));
01689     if (s->keyframe)
01690         memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
01691 
01692 #define MARGIN (16 << 2)
01693     s->mv_min.y = -MARGIN;
01694     s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
01695 
01696     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
01697         VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
01698         VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
01699         int mb_xy = mb_y*s->mb_width;
01700         uint8_t *dst[3] = {
01701             curframe->data[0] + 16*mb_y*s->linesize,
01702             curframe->data[1] +  8*mb_y*s->uvlinesize,
01703             curframe->data[2] +  8*mb_y*s->uvlinesize
01704         };
01705 
01706         memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
01707         memset(s->left_nnz, 0, sizeof(s->left_nnz));
01708         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
01709 
01710         // left edge of 129 for intra prediction
01711         if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
01712             for (i = 0; i < 3; i++)
01713                 for (y = 0; y < 16>>!!i; y++)
01714                     dst[i][y*curframe->linesize[i]-1] = 129;
01715             if (mb_y == 1) // top left edge is also 129
01716                 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
01717         }
01718 
01719         s->mv_min.x = -MARGIN;
01720         s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
01721         if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
01722             ff_thread_await_progress(prev_frame, mb_y, 0);
01723 
01724         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
01725             /* Prefetch the current frame, 4 MBs ahead */
01726             s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
01727             s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
01728 
01729             decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
01730                            prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
01731 
01732             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
01733 
01734             if (!mb->skip)
01735                 decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
01736 
01737             if (mb->mode <= MODE_I4x4)
01738                 intra_predict(s, dst, mb, mb_x, mb_y);
01739             else
01740                 inter_predict(s, dst, mb, mb_x, mb_y);
01741 
01742             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
01743 
01744             if (!mb->skip) {
01745                 idct_mb(s, dst, mb);
01746             } else {
01747                 AV_ZERO64(s->left_nnz);
01748                 AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
01749 
01750                 // Reset DC block predictors if they would exist if the mb had coefficients
01751                 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
01752                     s->left_nnz[8]      = 0;
01753                     s->top_nnz[mb_x][8] = 0;
01754                 }
01755             }
01756 
01757             if (s->deblock_filter)
01758                 filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
01759 
01760             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
01761 
01762             dst[0] += 16;
01763             dst[1] += 8;
01764             dst[2] += 8;
01765             s->mv_min.x -= 64;
01766             s->mv_max.x -= 64;
01767         }
01768         if (s->deblock_filter) {
01769             if (s->filter.simple)
01770                 filter_mb_row_simple(s, curframe, mb_y);
01771             else
01772                 filter_mb_row(s, curframe, mb_y);
01773         }
01774         s->mv_min.y -= 64;
01775         s->mv_max.y -= 64;
01776 
01777         ff_thread_report_progress(curframe, mb_y, 0);
01778     }
01779 
01780     ff_thread_report_progress(curframe, INT_MAX, 0);
01781     ret = avpkt->size;
01782 skip_decode:
01783     // if future frames don't use the updated probabilities,
01784     // reset them to the values we saved
01785     if (!s->update_probabilities)
01786         s->prob[0] = s->prob[1];
01787 
01788     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
01789 
01790     if (!s->invisible) {
01791         *(AVFrame*)data = *curframe;
01792         *data_size = sizeof(AVFrame);
01793     }
01794 
01795     return ret;
01796 }
01797 
01798 static av_cold int vp8_decode_init(AVCodecContext *avctx)
01799 {
01800     VP8Context *s = avctx->priv_data;
01801 
01802     s->avctx = avctx;
01803     avctx->pix_fmt = PIX_FMT_YUV420P;
01804 
01805     dsputil_init(&s->dsp, avctx);
01806     ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1);
01807     ff_vp8dsp_init(&s->vp8dsp);
01808 
01809     return 0;
01810 }
01811 
01812 static av_cold int vp8_decode_free(AVCodecContext *avctx)
01813 {
01814     vp8_decode_flush_impl(avctx, 0, 1, 1);
01815     release_queued_segmaps(avctx->priv_data, 1);
01816     return 0;
01817 }
01818 
01819 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
01820 {
01821     VP8Context *s = avctx->priv_data;
01822 
01823     s->avctx = avctx;
01824 
01825     return 0;
01826 }
01827 
01828 #define REBASE(pic) \
01829     pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
01830 
01831 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
01832 {
01833     VP8Context *s = dst->priv_data, *s_src = src->priv_data;
01834 
01835     if (s->macroblocks_base &&
01836         (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
01837         free_buffers(s);
01838         s->maps_are_invalid = 1;
01839     }
01840 
01841     s->prob[0] = s_src->prob[!s_src->update_probabilities];
01842     s->segmentation = s_src->segmentation;
01843     s->lf_delta = s_src->lf_delta;
01844     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
01845 
01846     memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
01847     s->framep[0] = REBASE(s_src->next_framep[0]);
01848     s->framep[1] = REBASE(s_src->next_framep[1]);
01849     s->framep[2] = REBASE(s_src->next_framep[2]);
01850     s->framep[3] = REBASE(s_src->next_framep[3]);
01851 
01852     return 0;
01853 }
01854 
01855 AVCodec ff_vp8_decoder = {
01856     .name           = "vp8",
01857     .type           = AVMEDIA_TYPE_VIDEO,
01858     .id             = CODEC_ID_VP8,
01859     .priv_data_size = sizeof(VP8Context),
01860     .init           = vp8_decode_init,
01861     .close          = vp8_decode_free,
01862     .decode         = vp8_decode_frame,
01863     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
01864     .flush = vp8_decode_flush,
01865     .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
01866     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
01867     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
01868 };