libavcodec/x86/dwt.c
Go to the documentation of this file.
00001 /*
00002  * MMX optimized discrete wavelet transform
00003  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
00004  * Copyright (c) 2010 David Conrad
00005  *
00006  * This file is part of FFmpeg.
00007  *
00008  * FFmpeg is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU Lesser General Public
00010  * License as published by the Free Software Foundation; either
00011  * version 2.1 of the License, or (at your option) any later version.
00012  *
00013  * FFmpeg is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016  * Lesser General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU Lesser General Public
00019  * License along with FFmpeg; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00021  */
00022 
00023 #include "libavutil/x86_cpu.h"
00024 #include "dsputil_mmx.h"
00025 #include "dwt.h"
00026 
00027 #define COMPOSE_VERTICAL(ext, align) \
00028 void ff_vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
00029 void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
00030 void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
00031 void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
00032 void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
00033 void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
00034 void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
00035 \
00036 static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
00037 { \
00038     int i, width_align = width&~(align-1); \
00039 \
00040     for(i=width_align; i<width; i++) \
00041         b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
00042 \
00043     ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
00044 } \
00045 \
00046 static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
00047 { \
00048     int i, width_align = width&~(align-1); \
00049 \
00050     for(i=width_align; i<width; i++) \
00051         b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
00052 \
00053     ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
00054 } \
00055 \
00056 static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
00057                                            IDWTELEM *b3, IDWTELEM *b4, int width) \
00058 { \
00059     int i, width_align = width&~(align-1); \
00060 \
00061     for(i=width_align; i<width; i++) \
00062         b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
00063 \
00064     ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
00065 } \
00066 \
00067 static void vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
00068                                           IDWTELEM *b3, IDWTELEM *b4, int width) \
00069 { \
00070     int i, width_align = width&~(align-1); \
00071 \
00072     for(i=width_align; i<width; i++) \
00073         b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
00074 \
00075     ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
00076 } \
00077 static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
00078 { \
00079     int i, width_align = width&~(align-1); \
00080 \
00081     for(i=width_align; i<width; i++) { \
00082         b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
00083         b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
00084     } \
00085 \
00086     ff_vertical_compose_haar##ext(b0, b1, width_align); \
00087 } \
00088 static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
00089 {\
00090     int w2= w>>1;\
00091     int x= w2 - (w2&(align-1));\
00092     ff_horizontal_compose_haar0i##ext(b, tmp, w);\
00093 \
00094     for (; x < w2; x++) {\
00095         b[2*x  ] = tmp[x];\
00096         b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
00097     }\
00098 }\
00099 static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
00100 {\
00101     int w2= w>>1;\
00102     int x= w2 - (w2&(align-1));\
00103     ff_horizontal_compose_haar1i##ext(b, tmp, w);\
00104 \
00105     for (; x < w2; x++) {\
00106         b[2*x  ] = (tmp[x] + 1)>>1;\
00107         b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
00108     }\
00109 }\
00110 \
00111 
00112 #if HAVE_YASM
00113 #if !ARCH_X86_64
00114 COMPOSE_VERTICAL(_mmx, 4)
00115 #endif
00116 COMPOSE_VERTICAL(_sse2, 8)
00117 #endif
00118 
00119 
00120 void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
00121 
00122 static void horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w)
00123 {
00124     int w2= w>>1;
00125     int x= w2 - (w2&7);
00126     ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
00127 
00128     for (; x < w2; x++) {
00129         b[2*x  ] = (tmp[x] + 1)>>1;
00130         b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
00131     }
00132 }
00133 
00134 void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
00135 {
00136 #if HAVE_YASM
00137   int mm_flags = av_get_cpu_flags();
00138 
00139 #if !ARCH_X86_64
00140     if (!(mm_flags & AV_CPU_FLAG_MMX))
00141         return;
00142 
00143     switch (type) {
00144     case DWT_DIRAC_DD9_7:
00145         d->vertical_compose_l0 = vertical_compose53iL0_mmx;
00146         d->vertical_compose_h0 = vertical_compose_dd97iH0_mmx;
00147         break;
00148     case DWT_DIRAC_LEGALL5_3:
00149         d->vertical_compose_l0 = vertical_compose53iL0_mmx;
00150         d->vertical_compose_h0 = vertical_compose_dirac53iH0_mmx;
00151         break;
00152     case DWT_DIRAC_DD13_7:
00153         d->vertical_compose_l0 = vertical_compose_dd137iL0_mmx;
00154         d->vertical_compose_h0 = vertical_compose_dd97iH0_mmx;
00155         break;
00156     case DWT_DIRAC_HAAR0:
00157         d->vertical_compose   = vertical_compose_haar_mmx;
00158         d->horizontal_compose = horizontal_compose_haar0i_mmx;
00159         break;
00160     case DWT_DIRAC_HAAR1:
00161         d->vertical_compose   = vertical_compose_haar_mmx;
00162         d->horizontal_compose = horizontal_compose_haar1i_mmx;
00163         break;
00164     }
00165 #endif
00166 
00167     if (!(mm_flags & AV_CPU_FLAG_SSE2))
00168         return;
00169 
00170     switch (type) {
00171     case DWT_DIRAC_DD9_7:
00172         d->vertical_compose_l0 = vertical_compose53iL0_sse2;
00173         d->vertical_compose_h0 = vertical_compose_dd97iH0_sse2;
00174         break;
00175     case DWT_DIRAC_LEGALL5_3:
00176         d->vertical_compose_l0 = vertical_compose53iL0_sse2;
00177         d->vertical_compose_h0 = vertical_compose_dirac53iH0_sse2;
00178         break;
00179     case DWT_DIRAC_DD13_7:
00180         d->vertical_compose_l0 = vertical_compose_dd137iL0_sse2;
00181         d->vertical_compose_h0 = vertical_compose_dd97iH0_sse2;
00182         break;
00183     case DWT_DIRAC_HAAR0:
00184         d->vertical_compose   = vertical_compose_haar_sse2;
00185         d->horizontal_compose = horizontal_compose_haar0i_sse2;
00186         break;
00187     case DWT_DIRAC_HAAR1:
00188         d->vertical_compose   = vertical_compose_haar_sse2;
00189         d->horizontal_compose = horizontal_compose_haar1i_sse2;
00190         break;
00191     }
00192 
00193     if (!(mm_flags & AV_CPU_FLAG_SSSE3))
00194         return;
00195 
00196     switch (type) {
00197     case DWT_DIRAC_DD9_7:
00198         d->horizontal_compose = horizontal_compose_dd97i_ssse3;
00199         break;
00200     }
00201 #endif // HAVE_YASM
00202 }