42 #include <private/qdrawhelper_x86_p.h> 46 #include <private/qdrawingprimitive_sse2_p.h> 52 if (src >= 0xff000000)
62 #define BLENDING_LOOP(palignrOffset, length)\ 63 for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \ 64 const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\ 65 const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \ 66 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ 67 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ 68 _mm_store_si128((__m128i *)&dst[x], srcVector); \ 69 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ 70 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ 71 alphaChannel = _mm_sub_epi16(one, alphaChannel); \ 72 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ 73 __m128i destMultipliedByOneMinusAlpha; \ 74 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ 75 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ 76 _mm_store_si128((__m128i *)&dst[x], result); \ 78 srcVectorPrevLoaded = srcVectorLastLoaded;\ 93 #define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ 97 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ 98 blend_pixel(dst[x], src[x]); \ 101 const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\ 103 if (!minusOffsetToAlignSrcOn16Bytes) {\ 106 const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\ 107 for (; x < length-3; x += 4) { \ 108 const __m128i srcVector = _mm_load_si128((__m128i *)&src[x]); \ 109 const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ 110 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ 111 _mm_store_si128((__m128i *)&dst[x], srcVector); \ 112 } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ 113 __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ 114 alphaChannel = _mm_sub_epi16(one, alphaChannel); \ 115 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ 116 __m128i destMultipliedByOneMinusAlpha; \ 117 BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ 118 const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ 119 _mm_store_si128((__m128i *)&dst[x], result); \ 122 } else if ((length - x) >= 8) {\ 124 __m128i srcVectorPrevLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\ 125 const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\ 127 const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\ 128 switch (palignrOffset) {\ 130 BLENDING_LOOP(4, length)\ 133 BLENDING_LOOP(8, length)\ 136 BLENDING_LOOP(12, length)\ 140 for (; x < length; ++x) \ 141 blend_pixel(dst[x], src[x]); \ 144 void qt_blend_argb32_on_argb32_ssse3(
uchar *destPixels,
int dbpl,
145 const uchar *srcPixels,
int sbpl,
151 if (const_alpha == 256) {
152 const __m128i alphaMask = _mm_set1_epi32(0xff000000);
153 const __m128i nullVector = _mm_setzero_si128();
154 const __m128i half = _mm_set1_epi16(0x80);
155 const __m128i one = _mm_set1_epi16(0xff);
156 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
158 for (
int y = 0; y < h; ++y) {
159 BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask);
163 }
else if (const_alpha != 0) {
167 const_alpha = (const_alpha * 255) >> 8;
168 const __m128i nullVector = _mm_setzero_si128();
169 const __m128i half = _mm_set1_epi16(0x80);
170 const __m128i one = _mm_set1_epi16(0xff);
171 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
172 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
173 for (
int y = 0; y < h; ++y) {
174 BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
183 #endif // QT_HAVE_SSSE3 #define QT_END_NAMESPACE
This macro expands to.
int qAlpha(QRgb rgba)
Returns the alpha component of the ARGB quadruplet rgba.
Q_STATIC_INLINE_FUNCTION uint BYTE_MUL(uint x, uint a)
#define QT_BEGIN_NAMESPACE
This macro expands to.