42 #include <private/qdrawhelper_x86_p.h> 46 #include <private/qdrawingprimitive_sse2_p.h> 47 #include <private/qpaintengine_raster_p.h> 51 void qt_blend_argb32_on_argb32_sse2(
uchar *destPixels,
int dbpl,
52 const uchar *srcPixels,
int sbpl,
58 if (const_alpha == 256) {
59 const __m128i alphaMask = _mm_set1_epi32(0xff000000);
60 const __m128i nullVector = _mm_set1_epi32(0);
61 const __m128i half = _mm_set1_epi16(0x80);
62 const __m128i one = _mm_set1_epi16(0xff);
63 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
64 for (
int y = 0; y < h; ++y) {
65 BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, w, nullVector, half, one, colorMask, alphaMask);
69 }
else if (const_alpha != 0) {
73 const_alpha = (const_alpha * 255) >> 8;
74 const __m128i nullVector = _mm_set1_epi32(0);
75 const __m128i half = _mm_set1_epi16(0x80);
76 const __m128i one = _mm_set1_epi16(0xff);
77 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
78 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
79 for (
int y = 0; y < h; ++y) {
80 BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
89 const uchar *srcPixels,
int sbpl,
93 void qt_blend_rgb32_on_rgb32_sse2(
uchar *destPixels,
int dbpl,
94 const uchar *srcPixels,
int sbpl,
100 if (const_alpha != 256) {
101 if (const_alpha != 0) {
102 const __m128i nullVector = _mm_set1_epi32(0);
103 const __m128i half = _mm_set1_epi16(0x80);
104 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
106 const_alpha = (const_alpha * 255) >> 8;
107 int one_minus_const_alpha = 255 - const_alpha;
108 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
109 const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
110 for (
int y = 0; y < h; ++y) {
118 for (; x < w-3; x += 4) {
119 __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
120 if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) {
121 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
123 INTERPOLATE_PIXEL_255_SSE2(result, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half);
124 _mm_store_si128((__m128i *)&dst[x], result);
139 void QT_FASTCALL comp_func_SourceOver_sse2(
uint *destPixels,
const uint *srcPixels,
int length,
uint const_alpha)
146 const __m128i nullVector = _mm_set1_epi32(0);
147 const __m128i half = _mm_set1_epi16(0x80);
148 const __m128i one = _mm_set1_epi16(0xff);
149 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
150 if (const_alpha == 255) {
151 const __m128i alphaMask = _mm_set1_epi32(0xff000000);
152 BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask);
154 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
155 BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector);
163 if (const_alpha == 255) {
169 for (; x < length - 3; x += 4) {
170 const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
171 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
173 const __m128i result = _mm_adds_epu8(srcVector, dstVector);
174 _mm_store_si128((__m128i *)&dst[x], result);
178 for (; x < length; ++x)
181 const int one_minus_const_alpha = 255 - const_alpha;
182 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
183 const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
189 const __m128i half = _mm_set1_epi16(0x80);
190 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
192 for (; x < length - 3; x += 4) {
193 const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
194 const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
196 __m128i result = _mm_adds_epu8(srcVector, dstVector);
197 INTERPOLATE_PIXEL_255_SSE2(result, result, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
198 _mm_store_si128((__m128i *)&dst[x], result);
202 for (; x < length; ++x)
209 if (const_alpha == 255) {
210 ::memcpy(dst, src, length *
sizeof(
uint));
212 const int ialpha = 255 - const_alpha;
221 const __m128i half = _mm_set1_epi16(0x80);
222 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
223 const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
224 const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha);
225 for (; x < length - 3; x += 4) {
226 const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
227 __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
228 INTERPOLATE_PIXEL_255_SSE2(dstVector, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
229 _mm_store_si128((__m128i *)&dst[x], dstVector);
233 for (; x < length; ++x)
242 case 6: *dest++ = value;
243 case 5: *dest++ = value;
244 case 4: *dest++ = value;
245 case 3: *dest++ = value;
246 case 2: *dest++ = value;
247 case 1: *dest = value;
252 const int align = (
quintptr)(dest) & 0xf;
254 case 4: *dest++ = value; --count;
255 case 8: *dest++ = value; --count;
256 case 12: *dest++ = value; --count;
259 int count128 = count / 4;
260 __m128i *dst128 =
reinterpret_cast<__m128i*
>(dest);
261 const __m128i value128 = _mm_set_epi32(value, value, value, value);
263 int n = (count128 + 3) / 4;
264 switch (count128 & 0x3) {
265 case 0:
do { _mm_stream_si128(dst128++, value128);
266 case 3: _mm_stream_si128(dst128++, value128);
267 case 2: _mm_stream_si128(dst128++, value128);
268 case 1: _mm_stream_si128(dst128++, value128);
272 const int rest = count & 0x3;
275 case 3: dest[count - 3] = value;
276 case 2: dest[count - 2] = value;
277 case 1: dest[count - 1] = value;
284 if ((const_alpha &
qAlpha(color)) == 255) {
285 qt_memfill32_sse2(destPixels, color, length);
287 if (const_alpha != 255)
288 color =
BYTE_MUL(color, const_alpha);
294 const __m128i colorVector = _mm_set1_epi32(color);
295 const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
296 const __m128i half = _mm_set1_epi16(0x80);
297 const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor);
300 destPixels[x] = color +
BYTE_MUL(destPixels[x], minusAlphaOfColor);
302 for (; x < length-3; x += 4) {
303 __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
304 BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half);
305 dstVector = _mm_add_epi8(colorVector, dstVector);
306 _mm_store_si128((__m128i *)&dst[x], dstVector);
308 for (;x < length; ++x)
309 destPixels[x] = color +
BYTE_MUL(destPixels[x], minusAlphaOfColor);
314 comp_func_solid_SourceOver_sse2,
350 comp_func_SourceOver_sse2,
353 comp_func_Source_sse2,
389 case 2: *dest++ = value;
390 case 1: *dest = value;
395 const int align = (
quintptr)(dest) & 0x3;
397 case 2: *dest++ = value; --count;
400 const quint32 value32 = (value << 16) | value;
401 qt_memfill32_sse2(reinterpret_cast<quint32*>(dest), value32, count / 2);
404 dest[count - 1] = value;
407 void qt_bitmapblit32_sse2(
QRasterBuffer *rasterBuffer,
int x,
int y,
409 const uchar *src,
int width,
int height,
int stride)
414 const __m128i c128 = _mm_set1_epi32(color);
415 const __m128i maskmask1 = _mm_set_epi32(0x10101010, 0x20202020,
416 0x40404040, 0x80808080);
417 const __m128i maskadd1 = _mm_set_epi32(0x70707070, 0x60606060,
418 0x40404040, 0x00000000);
421 const __m128i maskmask2 = _mm_set_epi32(0x01010101, 0x02020202,
422 0x04040404, 0x08080808);
423 const __m128i maskadd2 = _mm_set_epi32(0x7f7f7f7f, 0x7e7e7e7e,
424 0x7c7c7c7c, 0x78787878);
426 for (
int x = 0; x < width; x += 8) {
427 const quint8 s = src[x >> 3];
430 __m128i mask1 = _mm_set1_epi8(s);
431 __m128i mask2 = mask1;
433 mask1 = _mm_and_si128(mask1, maskmask1);
434 mask1 = _mm_add_epi8(mask1, maskadd1);
435 _mm_maskmoveu_si128(c128, mask1, (
char*)(dest + x));
436 mask2 = _mm_and_si128(mask2, maskmask2);
437 mask2 = _mm_add_epi8(mask2, maskadd2);
438 _mm_maskmoveu_si128(c128, mask2, (
char*)(dest + x + 4));
447 __m128i mask1 = _mm_set1_epi8(s);
448 mask1 = _mm_and_si128(mask1, maskmask1);
449 mask1 = _mm_add_epi8(mask1, maskadd1);
450 _mm_maskmoveu_si128(c128, mask1, (
char*)(dest));
458 void qt_bitmapblit16_sse2(
QRasterBuffer *rasterBuffer,
int x,
int y,
460 const uchar *src,
int width,
int height,
int stride)
462 const quint16 c = qt_colorConvert<quint16, quint32>(color, 0);
466 const __m128i c128 = _mm_set1_epi16(c);
467 #if defined(Q_CC_MSVC) 468 # pragma warning(disable: 4309) // truncation of constant value 470 const __m128i maskmask = _mm_set_epi16(0x0101, 0x0202, 0x0404, 0x0808,
471 0x1010, 0x2020, 0x4040, 0x8080);
472 const __m128i maskadd = _mm_set_epi16(0x7f7f, 0x7e7e, 0x7c7c, 0x7878,
473 0x7070, 0x6060, 0x4040, 0x0000);
476 for (
int x = 0; x < width; x += 8) {
477 const quint8 s = src[x >> 3];
480 __m128i mask = _mm_set1_epi8(s);
481 mask = _mm_and_si128(mask, maskmask);
482 mask = _mm_add_epi8(mask, maskadd);
483 _mm_maskmoveu_si128(c128, mask, (
char*)(dest + x));
493 typedef __m128i Int32x4;
494 typedef __m128 Float32x4;
496 union Vect_buffer_i { Int32x4 v;
int i[4]; };
497 union Vect_buffer_f { Float32x4 v;
float f[4]; };
499 static inline Float32x4 v_dup(
float x) {
return _mm_set1_ps(x); }
500 static inline Float32x4 v_dup(
double x) {
return _mm_set1_ps(x); }
501 static inline Int32x4 v_dup(
int x) {
return _mm_set1_epi32(x); }
502 static inline Int32x4 v_dup(
uint x) {
return _mm_set1_epi32(x); }
504 static inline Float32x4 v_add(Float32x4
a, Float32x4 b) {
return _mm_add_ps(a, b); }
505 static inline Int32x4 v_add(Int32x4
a, Int32x4 b) {
return _mm_add_epi32(a, b); }
507 static inline Float32x4 v_max(Float32x4
a, Float32x4 b) {
return _mm_max_ps(a, b); }
508 static inline Float32x4 v_min(Float32x4
a, Float32x4 b) {
return _mm_min_ps(a, b); }
509 static inline Int32x4 v_min_16(Int32x4
a, Int32x4 b) {
return _mm_min_epi16(a, b); }
511 static inline Int32x4 v_and(Int32x4
a, Int32x4 b) {
return _mm_and_si128(a, b); }
513 static inline Float32x4 v_sub(Float32x4
a, Float32x4 b) {
return _mm_sub_ps(a, b); }
514 static inline Int32x4 v_sub(Int32x4
a, Int32x4 b) {
return _mm_sub_epi32(a, b); }
516 static inline Float32x4 v_mul(Float32x4
a, Float32x4 b) {
return _mm_mul_ps(a, b); }
518 static inline Float32x4 v_sqrt(Float32x4 x) {
return _mm_sqrt_ps(x); }
520 static inline Int32x4 v_toInt(Float32x4 x) {
return _mm_cvttps_epi32(x); }
524 #if (defined(Q_CC_MSVC) && _MSC_VER < 1500) || (defined(Q_CC_GNU) && __GNUC__ < 4) 525 static inline Int32x4 v_greaterOrEqual(Float32x4
a, Float32x4 b)
527 union Convert { Int32x4 vi; Float32x4 vf; }
convert;
528 convert.vf = _mm_cmpgt_ps(a, b);
532 static inline Int32x4 v_greaterOrEqual(Float32x4
a, Float32x4 b) {
return _mm_castps_si128(_mm_cmpgt_ps(a, b)); }
537 int y,
int x,
int length)
539 return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op,
data, y, x, length);
545 #endif // QT_HAVE_SSE2 static const int numCompositionFunctions
void QT_FASTCALL rasterop_solid_SourceOrDestination(uint *dest, int length, uint color, uint const_alpha)
void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha)
QIntegerForSizeof< void * >::Unsigned quintptr
void QT_FASTCALL comp_func_solid_Plus(uint *dest, int length, uint color, uint const_alpha)
#define QT_END_NAMESPACE
This macro expands to.
void QT_FASTCALL comp_func_Difference(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_SourceIn(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_DestinationIn(uint *dest, const uint *src, int length, uint const_alpha)
int comp_func_Plus_one_pixel_const_alpha(uint d, const uint s, const uint const_alpha, const uint one_minus_const_alpha)
void QT_FASTCALL comp_func_solid_DestinationAtop(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_solid_Overlay(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_solid_SourceIn(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL rasterop_solid_SourceAndNotDestination(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_Screen(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_solid_SourceAtop(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL rasterop_SourceXorDestination(uint *dest, const uint *src, int length, uint const_alpha)
long ASN1_INTEGER_get ASN1_INTEGER * a
void QT_FASTCALL comp_func_solid_HardLight(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_solid_SourceOut(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_solid_Exclusion(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_Destination(uint *, const uint *, int, uint)
void QT_FASTCALL comp_func_solid_Clear(uint *dest, int length, uint, uint const_alpha)
void QT_FASTCALL rasterop_NotSourceOrNotDestination(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL rasterop_solid_SourceXorDestination(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL rasterop_SourceOrDestination(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_SourceOut(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_HardLight(uint *dest, const uint *src, int length, uint const_alpha)
int qAlpha(QRgb rgba)
Returns the alpha component of the ARGB quadruplet rgba.
void QT_FASTCALL comp_func_DestinationOut(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_solid_ColorBurn(uint *dest, int length, uint color, uint const_alpha)
Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b)
Q_STATIC_INLINE_FUNCTION uint BYTE_MUL(uint x, uint a)
#define QT_BEGIN_NAMESPACE
This macro expands to.
void QT_FASTCALL comp_func_Clear(uint *dest, const uint *, int length, uint const_alpha)
void QT_FASTCALL rasterop_NotSourceXorDestination(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_XOR(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL rasterop_NotSourceAndNotDestination(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_solid_DestinationOut(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_solid_Darken(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_solid_Screen(uint *dest, int length, uint color, uint const_alpha)
static const char * data(const QByteArray &arr)
void QT_FASTCALL comp_func_Multiply(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_ColorDodge(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_solid_Lighten(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_solid_Difference(uint *dest, int length, uint color, uint const_alpha)
static bool convert(const QVariant::Private *d, QVariant::Type t, void *result, bool *ok)
void QT_FASTCALL comp_func_Overlay(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_ColorBurn(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL rasterop_solid_NotSourceOrNotDestination(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_solid_Source(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_Exclusion(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_Darken(uint *dest, const uint *src, int length, uint const_alpha)
void(QT_FASTCALL * CompositionFunction)(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_solid_SoftLight(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_SourceAtop(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL rasterop_solid_NotSourceAndDestination(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL rasterop_solid_NotSourceXorDestination(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL rasterop_NotSource(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_solid_Multiply(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_solid_DestinationIn(uint *dest, int length, uint color, uint const_alpha)
void(QT_FASTCALL * CompositionFunctionSolid)(uint *dest, int length, uint color, uint const_alpha)
int comp_func_Plus_one_pixel(uint d, const uint s)
void QT_FASTCALL comp_func_solid_Destination(uint *, int, uint, uint)
void QT_FASTCALL rasterop_SourceAndNotDestination(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_DestinationOver(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_solid_ColorDodge(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL rasterop_solid_SourceAndDestination(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL rasterop_solid_NotSourceAndNotDestination(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL rasterop_solid_NotSource(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_solid_DestinationOver(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_DestinationAtop(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL rasterop_NotSourceAndDestination(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_Lighten(uint *dest, const uint *src, int length, uint const_alpha)
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length)
void QT_FASTCALL rasterop_SourceAndDestination(uint *dest, const uint *src, int length, uint const_alpha)
void QT_FASTCALL comp_func_solid_XOR(uint *dest, int length, uint color, uint const_alpha)
void QT_FASTCALL comp_func_SoftLight(uint *dest, const uint *src, int length, uint const_alpha)