Qt 4.8
qdrawhelper_ssse3.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2014 Digia Plc and/or its subsidiary(-ies).
4 ** Contact: http://www.qt-project.org/legal
5 **
6 ** This file is part of the QtGui module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and Digia. For licensing terms and
14 ** conditions see http://qt.digia.com/licensing. For further information
15 ** use the contact form at http://qt.digia.com/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 2.1 requirements
23 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
24 **
25 ** In addition, as a special exception, Digia gives you certain additional
26 ** rights. These rights are described in the Digia Qt LGPL Exception
27 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
28 **
29 ** GNU General Public License Usage
30 ** Alternatively, this file may be used under the terms of the GNU
31 ** General Public License version 3.0 as published by the Free Software
32 ** Foundation and appearing in the file LICENSE.GPL included in the
33 ** packaging of this file. Please review the following information to
34 ** ensure the GNU General Public License version 3.0 requirements will be
35 ** met: http://www.gnu.org/copyleft/gpl.html.
36 **
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41 
42 #include <private/qdrawhelper_x86_p.h>
43 
44 #ifdef QT_HAVE_SSSE3
45 
46 #include <private/qdrawingprimitive_sse2_p.h>
47 
49 
50 inline static void blend_pixel(quint32 &dst, const quint32 src)
51 {
52  if (src >= 0xff000000)
53  dst = src;
54  else if (src != 0)
55  dst = src + BYTE_MUL(dst, qAlpha(~src));
56 }
57 
58 
59 /* The instruction palignr uses direct arguments, so we have to generate the code fo the different
60  shift (4, 8, 12). Checking the alignment inside the loop is unfortunatelly way too slow.
61  */
62 #define BLENDING_LOOP(palignrOffset, length)\
63  for (; x-minusOffsetToAlignSrcOn16Bytes < length-7; x += 4) { \
64  const __m128i srcVectorLastLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes + 4]);\
65  const __m128i srcVector = _mm_alignr_epi8(srcVectorLastLoaded, srcVectorPrevLoaded, palignrOffset); \
66  const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
67  if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
68  _mm_store_si128((__m128i *)&dst[x], srcVector); \
69  } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
70  __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \
71  alphaChannel = _mm_sub_epi16(one, alphaChannel); \
72  const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
73  __m128i destMultipliedByOneMinusAlpha; \
74  BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
75  const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
76  _mm_store_si128((__m128i *)&dst[x], result); \
77  } \
78  srcVectorPrevLoaded = srcVectorLastLoaded;\
79  }
80 
81 
82 // Basically blend src over dst with the const alpha defined as constAlphaVector.
83 // nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
84 //const __m128i nullVector = _mm_set1_epi32(0);
85 //const __m128i half = _mm_set1_epi16(0x80);
86 //const __m128i one = _mm_set1_epi16(0xff);
87 //const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
88 //const __m128i alphaMask = _mm_set1_epi32(0xff000000);
89 //
90 // The computation being done is:
91 // result = s + d * (1-alpha)
92 // with shortcuts if fully opaque or fully transparent.
93 #define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
94  int x = 0; \
95 \
96  /* First, get dst aligned. */ \
97  ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \
98  blend_pixel(dst[x], src[x]); \
99  } \
100 \
101  const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\
102 \
103  if (!minusOffsetToAlignSrcOn16Bytes) {\
104  /* src is aligned, usual algorithm but with aligned operations.\
105  See the SSE2 version for more documentation on the algorithm itself. */\
106  const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\
107  for (; x < length-3; x += 4) { \
108  const __m128i srcVector = _mm_load_si128((__m128i *)&src[x]); \
109  const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
110  if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
111  _mm_store_si128((__m128i *)&dst[x], srcVector); \
112  } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
113  __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \
114  alphaChannel = _mm_sub_epi16(one, alphaChannel); \
115  const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
116  __m128i destMultipliedByOneMinusAlpha; \
117  BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
118  const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
119  _mm_store_si128((__m128i *)&dst[x], result); \
120  } \
121  } /* end for() */\
122  } else if ((length - x) >= 8) {\
123  /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\
124  __m128i srcVectorPrevLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\
125  const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\
126 \
127  const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\
128  switch (palignrOffset) {\
129  case 4:\
130  BLENDING_LOOP(4, length)\
131  break;\
132  case 8:\
133  BLENDING_LOOP(8, length)\
134  break;\
135  case 12:\
136  BLENDING_LOOP(12, length)\
137  break;\
138  }\
139  }\
140  for (; x < length; ++x) \
141  blend_pixel(dst[x], src[x]); \
142 }
143 
144 void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
145  const uchar *srcPixels, int sbpl,
146  int w, int h,
147  int const_alpha)
148 {
149  const quint32 *src = (const quint32 *) srcPixels;
150  quint32 *dst = (quint32 *) destPixels;
151  if (const_alpha == 256) {
152  const __m128i alphaMask = _mm_set1_epi32(0xff000000);
153  const __m128i nullVector = _mm_setzero_si128();
154  const __m128i half = _mm_set1_epi16(0x80);
155  const __m128i one = _mm_set1_epi16(0xff);
156  const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
157 
158  for (int y = 0; y < h; ++y) {
159  BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask);
160  dst = (quint32 *)(((uchar *) dst) + dbpl);
161  src = (const quint32 *)(((const uchar *) src) + sbpl);
162  }
163  } else if (const_alpha != 0) {
164  // dest = (s + d * sia) * ca + d * cia
165  // = s * ca + d * (sia * ca + cia)
166  // = s * ca + d * (1 - sa*ca)
167  const_alpha = (const_alpha * 255) >> 8;
168  const __m128i nullVector = _mm_setzero_si128();
169  const __m128i half = _mm_set1_epi16(0x80);
170  const __m128i one = _mm_set1_epi16(0xff);
171  const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
172  const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
173  for (int y = 0; y < h; ++y) {
174  BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
175  dst = (quint32 *)(((uchar *) dst) + dbpl);
176  src = (const quint32 *)(((const uchar *) src) + sbpl);
177  }
178  }
179 }
180 
182 
183 #endif // QT_HAVE_SSSE3
#define QT_END_NAMESPACE
This macro expands to.
Definition: qglobal.h:90
int qAlpha(QRgb rgba)
Returns the alpha component of the ARGB quadruplet rgba.
Definition: qrgb.h:66
unsigned char uchar
Definition: qglobal.h:994
Q_STATIC_INLINE_FUNCTION uint BYTE_MUL(uint x, uint a)
#define QT_BEGIN_NAMESPACE
This macro expands to.
Definition: qglobal.h:89
unsigned int quint32
Definition: qglobal.h:938