42 #include <private/qdrawhelper_p.h> 43 #include <private/qblendfunctions_p.h> 44 #include <private/qmath_p.h> 48 #include <private/qdrawhelper_neon_p.h> 49 #include <private/qpaintengine_raster_p.h> 56 const int epilogueSize = count % 16;
58 quint32 *
const neonEnd = dest + count - epilogueSize;
59 register uint32x4_t valueVector1
asm (
"q0") = vdupq_n_u32(value);
60 register uint32x4_t valueVector2
asm (
"q1") = valueVector1;
61 while (dest != neonEnd) {
63 "vst2.32 { d0, d1, d2, d3 }, [%[DST]] !\n\t" 64 "vst2.32 { d0, d1, d2, d3 }, [%[DST]] !\n\t" 66 : [VALUE1]
"w"(valueVector1), [VALUE2]
"w"(valueVector2)
74 case 15: *dest++ = value;
75 case 14: *dest++ = value;
76 case 13: *dest++ = value;
77 case 12: *dest++ = value;
78 case 11: *dest++ = value;
79 case 10: *dest++ = value;
80 case 9: *dest++ = value;
81 case 8: *dest++ = value;
82 case 7: *dest++ = value;
83 case 6: *dest++ = value;
84 case 5: *dest++ = value;
85 case 4: *dest++ = value;
86 case 3: *dest++ = value;
87 case 2: *dest++ = value;
88 case 1: *dest++ = value;
92 static inline uint16x8_t qvdiv_255_u16(uint16x8_t x, uint16x8_t half)
96 const uint16x8_t temp = vshrq_n_u16(x, 8);
97 const uint16x8_t sum_part = vaddq_u16(x, half);
98 const uint16x8_t sum = vaddq_u16(temp, sum_part);
100 return vshrq_n_u16(sum, 8);
103 static inline uint16x8_t qvbyte_mul_u16(uint16x8_t x, uint16x8_t alpha, uint16x8_t half)
107 const uint16x8_t t = vmulq_u16(x, alpha);
108 return qvdiv_255_u16(t, half);
111 static inline uint16x8_t qvinterpolate_pixel_255(uint16x8_t x, uint16x8_t
a, uint16x8_t y, uint16x8_t b, uint16x8_t half)
115 const uint16x8_t ta = vmulq_u16(x, a);
116 const uint16x8_t tb = vmulq_u16(y, b);
118 return qvdiv_255_u16(vaddq_u16(ta, tb), half);
121 static inline uint16x8_t qvsource_over_u16(uint16x8_t src16, uint16x8_t dst16, uint16x8_t half, uint16x8_t full)
123 const uint16x4_t alpha16_high = vdup_lane_u16(vget_high_u16(src16), 3);
124 const uint16x4_t alpha16_low = vdup_lane_u16(vget_low_u16(src16), 3);
126 const uint16x8_t alpha16 = vsubq_u16(full, vcombine_u16(alpha16_low, alpha16_high));
128 return vaddq_u16(src16, qvbyte_mul_u16(dst16, alpha16, half));
132 pixman_composite_over_8888_0565_asm_neon (int32_t w,
140 pixman_composite_over_8888_8888_asm_neon (int32_t w,
148 pixman_composite_src_0565_8888_asm_neon (int32_t w,
156 pixman_composite_over_n_8_0565_asm_neon (int32_t w,
163 int32_t mask_stride);
166 pixman_composite_scanline_over_asm_neon (int32_t w,
168 const uint32_t *src);
171 pixman_composite_src_0565_0565_asm_neon (int32_t w,
180 const uchar *srcPixels,
int sbpl,
184 void qt_blend_rgb16_on_argb32_neon(
uchar *destPixels,
int dbpl,
185 const uchar *srcPixels,
int sbpl,
195 if (const_alpha != 256) {
196 quint8 a = (255 * const_alpha) >> 8;
200 for (
int x=0; x<w; ++x)
208 pixman_composite_src_0565_8888_asm_neon(w, h, dst, dbpl, src, sbpl);
213 const uchar *src,
int sbpl,
218 static inline void scanLineBlit16(
quint16 *dst,
quint16 *src,
int dstride)
222 __builtin_prefetch(dst + dstride, 1, 0);
224 for (
int i = 1; i < N/2; ++i)
231 static inline void blockBlit16(
quint16 *dst,
quint16 *src,
int dstride,
int sstride,
int h)
245 scanLineBlit16<
Width-1>(dst + 1, src + 1, dstride);
251 scanLineBlit16<Width>(dst, src, dstride);
259 void qt_blend_rgb16_on_rgb16_neon(
uchar *destPixels,
int dbpl,
260 const uchar *srcPixels,
int sbpl,
265 if (const_alpha != 256 || w >= 150) {
270 int dstride = dbpl / 2;
271 int sstride = sbpl / 2;
277 #define BLOCKBLIT(n) case n: blockBlit16<n>(dst, src, dstride, sstride, h); return; 298 pixman_composite_src_0565_0565_asm_neon (w, h, dst, dstride, src, sstride);
301 extern "C" void blend_8_pixels_argb32_on_rgb16_neon(
quint16 *dst,
const quint32 *src,
int const_alpha);
303 void qt_blend_argb32_on_rgb16_neon(
uchar *destPixels,
int dbpl,
304 const uchar *srcPixels,
int sbpl,
311 if (const_alpha != 256) {
312 for (
int y=0; y<h; ++y) {
314 for (; i < w-7; i += 8)
315 blend_8_pixels_argb32_on_rgb16_neon(&dst[i], &src[i], const_alpha);
323 for (
int j = 0; j < tail; ++j) {
324 dstBuffer[j] = dst[i + j];
325 srcBuffer[j] = src[i + j];
328 blend_8_pixels_argb32_on_rgb16_neon(dstBuffer, srcBuffer, const_alpha);
330 for (
int j = 0; j < tail; ++j)
331 dst[i + j] = dstBuffer[j];
340 pixman_composite_over_8888_0565_asm_neon(w, h, dst, dbpl / 2, src, sbpl / 4);
343 void qt_blend_argb32_on_argb32_scanline_neon(
uint *dest,
const uint *src,
int length,
uint const_alpha)
345 if (const_alpha == 255) {
346 pixman_composite_scanline_over_asm_neon(length, dest, src);
348 qt_blend_argb32_on_argb32_neon((
uchar *)dest, 4 * length, (
uchar *)src, 4 * length, length, 1, (const_alpha * 256) / 255);
352 void qt_blend_argb32_on_argb32_neon(
uchar *destPixels,
int dbpl,
353 const uchar *srcPixels,
int sbpl,
357 const uint *src = (
const uint *) srcPixels;
359 uint16x8_t half = vdupq_n_u16(0x80);
360 uint16x8_t full = vdupq_n_u16(0xff);
361 if (const_alpha == 256) {
362 pixman_composite_over_8888_8888_asm_neon(w, h, (uint32_t *)destPixels, dbpl / 4, (uint32_t *)srcPixels, sbpl / 4);
363 }
else if (const_alpha != 0) {
364 const_alpha = (const_alpha * 255) >> 8;
365 uint16x8_t const_alpha16 = vdupq_n_u16(const_alpha);
366 for (
int y = 0; y < h; ++y) {
368 for (; x < w-3; x += 4) {
369 if (src[x] | src[x+1] | src[x+2] | src[x+3]) {
370 uint32x4_t src32 = vld1q_u32((uint32_t *)&src[x]);
371 uint32x4_t dst32 = vld1q_u32((uint32_t *)&dst[x]);
373 const uint8x16_t src8 = vreinterpretq_u8_u32(src32);
374 const uint8x16_t dst8 = vreinterpretq_u8_u32(dst32);
376 const uint8x8_t src8_low = vget_low_u8(src8);
377 const uint8x8_t dst8_low = vget_low_u8(dst8);
379 const uint8x8_t src8_high = vget_high_u8(src8);
380 const uint8x8_t dst8_high = vget_high_u8(dst8);
382 const uint16x8_t src16_low = vmovl_u8(src8_low);
383 const uint16x8_t dst16_low = vmovl_u8(dst8_low);
385 const uint16x8_t src16_high = vmovl_u8(src8_high);
386 const uint16x8_t dst16_high = vmovl_u8(dst8_high);
388 const uint16x8_t srcalpha16_low = qvbyte_mul_u16(src16_low, const_alpha16, half);
389 const uint16x8_t srcalpha16_high = qvbyte_mul_u16(src16_high, const_alpha16, half);
391 const uint16x8_t result16_low = qvsource_over_u16(srcalpha16_low, dst16_low, half, full);
392 const uint16x8_t result16_high = qvsource_over_u16(srcalpha16_high, dst16_high, half, full);
394 const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low));
395 const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high));
397 vst1q_u32((uint32_t *)&dst[x], vcombine_u32(result32_low, result32_high));
415 const uchar *srcPixels,
int sbpl,
419 void qt_blend_rgb32_on_rgb32_neon(
uchar *destPixels,
int dbpl,
420 const uchar *srcPixels,
int sbpl,
424 if (const_alpha != 256) {
425 if (const_alpha != 0) {
426 const uint *src = (
const uint *) srcPixels;
428 uint16x8_t half = vdupq_n_u16(0x80);
429 const_alpha = (const_alpha * 255) >> 8;
430 int one_minus_const_alpha = 255 - const_alpha;
431 uint16x8_t const_alpha16 = vdupq_n_u16(const_alpha);
432 uint16x8_t one_minus_const_alpha16 = vdupq_n_u16(255 - const_alpha);
433 for (
int y = 0; y < h; ++y) {
435 for (; x < w-3; x += 4) {
436 uint32x4_t src32 = vld1q_u32((uint32_t *)&src[x]);
437 uint32x4_t dst32 = vld1q_u32((uint32_t *)&dst[x]);
439 const uint8x16_t src8 = vreinterpretq_u8_u32(src32);
440 const uint8x16_t dst8 = vreinterpretq_u8_u32(dst32);
442 const uint8x8_t src8_low = vget_low_u8(src8);
443 const uint8x8_t dst8_low = vget_low_u8(dst8);
445 const uint8x8_t src8_high = vget_high_u8(src8);
446 const uint8x8_t dst8_high = vget_high_u8(dst8);
448 const uint16x8_t src16_low = vmovl_u8(src8_low);
449 const uint16x8_t dst16_low = vmovl_u8(dst8_low);
451 const uint16x8_t src16_high = vmovl_u8(src8_high);
452 const uint16x8_t dst16_high = vmovl_u8(dst8_high);
454 const uint16x8_t result16_low = qvinterpolate_pixel_255(src16_low, const_alpha16, dst16_low, one_minus_const_alpha16, half);
455 const uint16x8_t result16_high = qvinterpolate_pixel_255(src16_high, const_alpha16, dst16_high, one_minus_const_alpha16, half);
457 const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low));
458 const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high));
460 vst1q_u32((uint32_t *)&dst[x], vcombine_u32(result32_low, result32_high));
476 void qt_alphamapblit_quint16_neon(
QRasterBuffer *rasterBuffer,
479 int mapWidth,
int mapHeight,
int mapStride,
487 pixman_composite_over_n_8_0565_asm_neon(mapWidth, mapHeight, dest, destStride, color, 0, mask, mapStride);
490 extern "C" void blend_8_pixels_rgb16_on_rgb16_neon(
quint16 *dst,
const quint16 *src,
int const_alpha);
492 template <
typename SRC,
typename BlendFunc>
493 struct Blend_on_RGB16_SourceAndConstAlpha_Neon {
494 Blend_on_RGB16_SourceAndConstAlpha_Neon(BlendFunc blender,
int const_alpha)
497 , m_const_alpha(const_alpha)
503 srcBuffer[m_index++] = src;
506 m_blender(dst - 7, srcBuffer, m_const_alpha);
515 for (
int i = 0; i < m_index; ++i)
516 dstBuffer[i] = dst[i - m_index];
518 m_blender(dstBuffer, srcBuffer, m_const_alpha);
520 for (
int i = 0; i < m_index; ++i)
521 dst[i - m_index] = dstBuffer[i];
534 template <
typename SRC,
typename BlendFunc>
535 Blend_on_RGB16_SourceAndConstAlpha_Neon<SRC, BlendFunc>
536 Blend_on_RGB16_SourceAndConstAlpha_Neon_create(BlendFunc blender,
int const_alpha)
538 return Blend_on_RGB16_SourceAndConstAlpha_Neon<SRC, BlendFunc>(blender, const_alpha);
541 void qt_scale_image_argb32_on_rgb16_neon(
uchar *destPixels,
int dbpl,
542 const uchar *srcPixels,
int sbpl,
int sh,
548 if (const_alpha == 0)
551 qt_scale_image_16bit<quint32>(destPixels, dbpl, srcPixels, sbpl, sh, targetRect, sourceRect, clip,
552 Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint32>(blend_8_pixels_argb32_on_rgb16_neon, const_alpha));
556 const uchar *srcPixels,
int sbpl,
int sh,
562 void qt_scale_image_rgb16_on_rgb16_neon(
uchar *destPixels,
int dbpl,
563 const uchar *srcPixels,
int sbpl,
int sh,
569 if (const_alpha == 0)
572 if (const_alpha == 256) {
577 qt_scale_image_16bit<quint16>(destPixels, dbpl, srcPixels, sbpl, sh, targetRect, sourceRect, clip,
578 Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint16>(blend_8_pixels_rgb16_on_rgb16_neon, const_alpha));
582 const uchar *srcPixels,
int sbpl,
589 void qt_transform_image_rgb16_on_rgb16_neon(
uchar *destPixels,
int dbpl,
590 const uchar *srcPixels,
int sbpl,
597 if (const_alpha == 0)
600 if (const_alpha == 256) {
606 reinterpret_cast<const quint16 *>(srcPixels), sbpl, targetRect, sourceRect, clip, targetRectTransform,
607 Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint16>(blend_8_pixels_rgb16_on_rgb16_neon, const_alpha));
610 void qt_transform_image_argb32_on_rgb16_neon(
uchar *destPixels,
int dbpl,
611 const uchar *srcPixels,
int sbpl,
618 if (const_alpha == 0)
622 reinterpret_cast<const quint32 *>(srcPixels), sbpl, targetRect, sourceRect, clip, targetRectTransform,
623 Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint32>(blend_8_pixels_argb32_on_rgb16_neon, const_alpha));
626 static inline void convert_8_pixels_rgb16_to_argb32(
quint32 *dst,
const quint16 *src)
629 "vld1.16 { d0, d1 }, [%[SRC]]\n\t" 633 "vshrn.u16 d4, q0, #8\n\t" 634 "vshrn.u16 d3, q0, #3\n\t" 635 "vsli.u16 q0, q0, #5\n\t" 636 "vsri.u8 d4, d4, #5\n\t" 637 "vsri.u8 d3, d3, #6\n\t" 638 "vshrn.u16 d2, q0, #2\n\t" 644 "vst4.8 { d2, d3, d4, d5 }, [%[DST]]" 645 : : [DST]
"r" (dst), [SRC]
"r" (src)
646 :
"memory",
"r2",
"d0",
"d1",
"d2",
"d3",
"d4",
"d5" 655 for (; i < length - 7; i += 8)
656 convert_8_pixels_rgb16_to_argb32(&buffer[i], &data[i]);
662 int tail = length - i;
663 for (
int j = 0; j < tail; ++j)
664 srcBuffer[j] = data[i + j];
666 convert_8_pixels_rgb16_to_argb32(dstBuffer, srcBuffer);
668 for (
int j = 0; j < tail; ++j)
669 buffer[i + j] = dstBuffer[j];
675 static inline void convert_8_pixels_argb32_to_rgb16(
quint16 *dst,
const quint32 *src)
678 "vld4.8 { d0, d1, d2, d3 }, [%[SRC]]\n\t" 681 "vshll.u8 q14, d2, #8\n\t" 682 "vshll.u8 q8, d1, #8\n\t" 683 "vshll.u8 q9, d0, #8\n\t" 684 "vsri.u16 q14, q8, #5\n\t" 685 "vsri.u16 q14, q9, #11\n\t" 687 "vst1.16 { d28, d29 }, [%[DST]]" 688 : : [DST]
"r" (dst), [SRC]
"r" (src)
689 :
"memory",
"d0",
"d1",
"d2",
"d3",
"d16",
"d17",
"d18",
"d19",
"d28",
"d29" 698 for (; i < length - 7; i += 8)
699 convert_8_pixels_argb32_to_rgb16(&data[i], &buffer[i]);
705 int tail = length - i;
706 for (
int j = 0; j < tail; ++j)
707 srcBuffer[j] = buffer[i + j];
709 convert_8_pixels_argb32_to_rgb16(dstBuffer, srcBuffer);
711 for (
int j = 0; j < tail; ++j)
712 data[i + j] = dstBuffer[j];
718 if ((const_alpha &
qAlpha(color)) == 255) {
721 if (const_alpha != 255)
722 color =
BYTE_MUL(color, const_alpha);
727 uint32_t *dst = (uint32_t *) destPixels;
728 const uint32x4_t colorVector = vdupq_n_u32(color);
729 uint16x8_t half = vdupq_n_u16(0x80);
730 const uint16x8_t minusAlphaOfColorVector = vdupq_n_u16(minusAlphaOfColor);
732 for (; x < length-3; x += 4) {
733 uint32x4_t dstVector = vld1q_u32(&dst[x]);
735 const uint8x16_t dst8 = vreinterpretq_u8_u32(dstVector);
737 const uint8x8_t dst8_low = vget_low_u8(dst8);
738 const uint8x8_t dst8_high = vget_high_u8(dst8);
740 const uint16x8_t dst16_low = vmovl_u8(dst8_low);
741 const uint16x8_t dst16_high = vmovl_u8(dst8_high);
743 const uint16x8_t result16_low = qvbyte_mul_u16(dst16_low, minusAlphaOfColorVector, half);
744 const uint16x8_t result16_high = qvbyte_mul_u16(dst16_high, minusAlphaOfColorVector, half);
746 const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low));
747 const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high));
749 uint32x4_t blendedPixels = vcombine_u32(result32_low, result32_high);
750 uint32x4_t colorPlusBlendedPixels = vaddq_u32(colorVector, blendedPixels);
751 vst1q_u32(&dst[x], colorPlusBlendedPixels);
754 for (;x < length; ++x)
755 destPixels[x] = color +
BYTE_MUL(destPixels[x], minusAlphaOfColor);
761 if (const_alpha == 255) {
762 uint *
const end = dst + length;
763 uint *
const neonEnd = end - 3;
765 while (dst < neonEnd) {
767 "vld2.8 { d0, d1 }, [%[SRC]] !\n\t" 768 "vld2.8 { d2, d3 }, [%[DST]]\n\t" 769 "vqadd.u8 q0, q0, q1\n\t" 770 "vst2.8 { d0, d1 }, [%[DST]] !\n\t" 771 : [DST]
"+r" (dst), [SRC]
"+r" (src)
773 :
"memory",
"d0",
"d1",
"d2",
"d3",
"q0",
"q1" 784 const int one_minus_const_alpha = 255 - const_alpha;
785 const uint16x8_t constAlphaVector = vdupq_n_u16(const_alpha);
786 const uint16x8_t oneMinusconstAlphaVector = vdupq_n_u16(one_minus_const_alpha);
788 const uint16x8_t half = vdupq_n_u16(0x80);
789 for (; x < length - 3; x += 4) {
790 const uint32x4_t src32 = vld1q_u32((uint32_t *)&src[x]);
791 const uint8x16_t src8 = vreinterpretq_u8_u32(src32);
792 uint8x16_t dst8 = vld1q_u8((uint8_t *)&dst[x]);
793 uint8x16_t result = vqaddq_u8(dst8, src8);
795 uint16x8_t result_low = vmovl_u8(vget_low_u8(result));
796 uint16x8_t result_high = vmovl_u8(vget_high_u8(result));
798 uint16x8_t dst_low = vmovl_u8(vget_low_u8(dst8));
799 uint16x8_t dst_high = vmovl_u8(vget_high_u8(dst8));
801 result_low = qvinterpolate_pixel_255(result_low, constAlphaVector, dst_low, oneMinusconstAlphaVector, half);
802 result_high = qvinterpolate_pixel_255(result_high, constAlphaVector, dst_high, oneMinusconstAlphaVector, half);
804 const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result_low));
805 const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result_high));
806 vst1q_u32((uint32_t *)&dst[x], vcombine_u32(result32_low, result32_high));
809 for (; x < length; ++x)
816 extern "C" void qt_rotate90_16_neon(
quint16 *dst,
const quint16 *src,
int sstride,
int dstride,
int count);
818 void qt_memrotate90_16_neon(
const uchar *srcPixels,
int w,
int h,
int sstride,
uchar *destPixels,
int dstride)
823 sstride /=
sizeof(
ushort);
824 dstride /=
sizeof(
ushort);
827 const int unaligned =
830 const int restY = (h - unaligned) % tileSize;
831 const int unoptimizedY = restY % pack;
832 const int numTilesX = w / tileSize + (restX > 0);
833 const int numTilesY = (h - unaligned) / tileSize + (restY >= pack);
835 for (
int tx = 0; tx < numTilesX; ++tx) {
836 const int startx = w - tx * tileSize - 1;
837 const int stopx =
qMax(startx - tileSize, 0);
840 for (
int x = startx; x >= stopx; --x) {
841 ushort *
d = dest + (w - x - 1) * dstride;
842 for (
int y = 0; y < unaligned; ++y) {
843 *d++ = src[y * sstride + x];
848 for (
int ty = 0; ty < numTilesY; ++ty) {
849 const int starty = ty * tileSize + unaligned;
850 const int stopy =
qMin(starty + tileSize, h - unoptimizedY);
854 for (; x >= stopx + 7; x -= 8) {
855 ushort *
d = dest + (w - x - 1) * dstride + starty;
856 const ushort *s = &src[starty * sstride + x - 7];
857 qt_rotate90_16_neon(d, s, sstride * 2, dstride * 2, stopy - starty);
860 for (; x >= stopx; --x) {
861 quint32 *
d =
reinterpret_cast<quint32*
>(dest + (w - x - 1) * dstride + starty);
862 for (
int y = starty; y < stopy; y += pack) {
864 for (
int i = 1; i < pack; ++i) {
865 const int shift = (
sizeof(int) * 8 / pack * i);
866 const ushort color = src[(y + i) * sstride + x];
875 const int starty = h - unoptimizedY;
876 for (
int x = startx; x >= stopx; --x) {
877 ushort *d = dest + (w - x - 1) * dstride + starty;
878 for (
int y = starty; y < h; ++y) {
879 *d++ = src[y * sstride + x];
886 extern "C" void qt_rotate270_16_neon(
quint16 *dst,
const quint16 *src,
int sstride,
int dstride,
int count);
888 void qt_memrotate270_16_neon(
const uchar *srcPixels,
int w,
int h,
890 uchar *destPixels,
int dstride)
895 sstride /=
sizeof(
ushort);
896 dstride /=
sizeof(
ushort);
899 const int unaligned =
902 const int restY = (h - unaligned) % tileSize;
903 const int unoptimizedY = restY % pack;
904 const int numTilesX = w / tileSize + (restX > 0);
905 const int numTilesY = (h - unaligned) / tileSize + (restY >= pack);
907 for (
int tx = 0; tx < numTilesX; ++tx) {
909 const int stopx =
qMin(startx + tileSize, w);
912 for (
int x = startx; x < stopx; ++x) {
913 ushort *d = dest + x * dstride;
914 for (
int y = h - 1; y >= h - unaligned; --y) {
915 *d++ = src[y * sstride + x];
920 for (
int ty = 0; ty < numTilesY; ++ty) {
921 const int starty = h - 1 - unaligned - ty *
tileSize;
922 const int stopy =
qMax(starty - tileSize, unoptimizedY);
926 for (; x < stopx - 7; x += 8) {
927 ushort *d = dest + x * dstride + h - 1 - starty;
928 const ushort *s = &src[starty * sstride + x];
929 qt_rotate90_16_neon(d + 7 * dstride, s, -sstride * 2, -dstride * 2, starty - stopy);
932 for (; x < stopx; ++x) {
935 for (
int y = starty; y > stopy; y -= pack) {
937 for (
int i = 1; i < pack; ++i) {
938 const int shift = (
sizeof(int) * 8 / pack * i);
939 const ushort color = src[(y - i) * sstride + x];
947 const int starty = unoptimizedY - 1;
948 for (
int x = startx; x < stopx; ++x) {
949 ushort *d = dest + x * dstride + h - 1 - starty;
950 for (
int y = starty; y >= 0; --y) {
951 *d++ = src[y * sstride + x];
961 typedef int32x4_t Int32x4;
962 typedef float32x4_t Float32x4;
964 union Vect_buffer_i { Int32x4 v;
int i[4]; };
965 union Vect_buffer_f { Float32x4 v;
float f[4]; };
967 static inline Float32x4 v_dup(
float x) {
return vdupq_n_f32(x); }
968 static inline Int32x4 v_dup(
int x) {
return vdupq_n_s32(x); }
969 static inline Int32x4 v_dup(
uint x) {
return vdupq_n_s32(x); }
971 static inline Float32x4 v_add(Float32x4 a, Float32x4 b) {
return vaddq_f32(a, b); }
972 static inline Int32x4 v_add(Int32x4 a, Int32x4 b) {
return vaddq_s32(a, b); }
974 static inline Float32x4 v_max(Float32x4 a, Float32x4 b) {
return vmaxq_f32(a, b); }
975 static inline Float32x4 v_min(Float32x4 a, Float32x4 b) {
return vminq_f32(a, b); }
976 static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) {
return vminq_s32(a, b); }
978 static inline Int32x4 v_and(Int32x4 a, Int32x4 b) {
return vandq_s32(a, b); }
980 static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) {
return vsubq_f32(a, b); }
981 static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) {
return vsubq_s32(a, b); }
983 static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) {
return vmulq_f32(a, b); }
985 static inline Float32x4 v_sqrt(Float32x4 x) { Float32x4 y = vrsqrteq_f32(x); y = vmulq_f32(y, vrsqrtsq_f32(x, vmulq_f32(y, y)));
return vmulq_f32(x, y); }
987 static inline Int32x4 v_toInt(Float32x4 x) {
return vcvtq_s32_f32(x); }
989 static inline Int32x4 v_greaterOrEqual(Float32x4 a, Float32x4 b) {
return vreinterpretq_s32_u32(vcgeq_f32(a, b)); }
993 int y,
int x,
int length)
995 return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdNeon> >(buffer, op,
data, y, x, length);
1000 #endif // QT_HAVE_NEON
void qt_scale_image_rgb16_on_rgb16(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int sh, const QRectF &targetRect, const QRectF &sourceRect, const QRect &clip, int const_alpha)
static ShiftResult shift(const QBezier *orig, QBezier *shifted, qreal offset, qreal threshold)
void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha)
QIntegerForSizeof< void * >::Unsigned quintptr
Q_DECL_CONSTEXPR const T & qMin(const T &a, const T &b)
#define QT_END_NAMESPACE
This macro expands to.
void qt_blend_rgb16_on_rgb16(uchar *dst, int dbpl, const uchar *src, int sbpl, int w, int h, int const_alpha)
Q_GUI_EXPORT_INLINE int qAlpha(QRgb rgb)
int comp_func_Plus_one_pixel_const_alpha(uint d, const uint s, const uint const_alpha, const uint one_minus_const_alpha)
long ASN1_INTEGER_get ASN1_INTEGER * a
Q_DECL_CONSTEXPR const T & qMax(const T &a, const T &b)
int qAlpha(QRgb rgba)
Returns the alpha component of the ARGB quadruplet rgba.
#define QT_MEMFILL_UINT(dest, length, color)
Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b)
Q_STATIC_INLINE_FUNCTION uint BYTE_MUL(uint x, uint a)
#define QT_BEGIN_NAMESPACE
This macro expands to.
void qt_transform_image(DestT *destPixels, int dbpl, const SrcT *srcPixels, int sbpl, const QRectF &targetRect, const QRectF &sourceRect, const QRect &clip, const QTransform &targetRectTransform, Blender blender)
The QRectF class defines a rectangle in the plane using floating point precision. ...
static const char * data(const QByteArray &arr)
void qt_transform_image_rgb16_on_rgb16(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, const QRectF &targetRect, const QRectF &sourceRect, const QRect &clip, const QTransform &targetRectTransform, int const_alpha)
void qt_blend_argb32_on_rgb16_const_alpha(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha)
The QRect class defines a rectangle in the plane using integer precision.
int comp_func_Plus_one_pixel(uint d, const uint s)
Q_CORE_EXPORT QTextStream & flush(QTextStream &s)
static const KeyPair *const end
static const int tileSize
DST qt_colorConvert(SRC color, DST dummy)