20 #if defined(__SSE2__) || (_M_IX86_FP >= 2) || defined(_M_X64) || defined(_M_AMD64) 22 #include <xmmintrin.h> 23 #include <emmintrin.h> 25 static INLINE __m128i _encode_sRGB_sse2_mul255(__m128 val) {
36 val = _mm_max_ps(val, _mm_set1_ps(0.0f));
37 val = _mm_min_ps(val, _mm_set1_ps(1.0f));
40 __m128 xf = _mm_mul_ps(val, _mm_set1_ps(6.3307e18f));
43 xf = _mm_cvtepi32_ps(_mm_castps_si128(xf));
46 xf = _mm_mul_ps(xf, _mm_set1_ps(2.0f / 3.0f));
49 xf = _mm_castsi128_ps(_mm_cvtps_epi32(xf));
52 __m128 xover = _mm_mul_ps(val, xf);
53 __m128 xunder = _mm_mul_ps(_mm_mul_ps(val, val),
57 __m128 xavg = _mm_mul_ps(_mm_add_ps(xover, xunder),
58 _mm_set1_ps(0.5286098f));
62 xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
63 xavg = _mm_mul_ps(xavg, _mm_rsqrt_ps(xavg));
68 xavg = _mm_mul_ps(xavg, _mm_set1_ps(269.122f));
69 xavg = _mm_sub_ps(xavg, _mm_set1_ps(13.55f));
74 __m128 lval = _mm_mul_ps(val,
75 _mm_set_ps(255.0f, 3294.6f, 3294.6f, 3294.6f));
77 lval = _mm_add_ps(lval, _mm_set1_ps(0.5f));
81 __m128 mask = _mm_cmpge_ps(val,
82 _mm_set_ps(2.0f, 0.0031308f, 0.0031308f, 0.0031308f));
85 return _mm_cvttps_epi32(_mm_or_ps(
86 _mm_and_ps(mask, xavg),
87 _mm_andnot_ps(mask, lval)));
91 encode_sRGB_uchar_sse2(
float val) {
94 return (
unsigned char)
95 _mm_extract_epi16(_encode_sRGB_sse2_mul255(_mm_set1_ps(val)), 0);
99 encode_sRGB_uchar_sse2(
const LColorf &color,
xel &into) {
101 __m128 vec = _mm_load_ps(color.get_data());
103 __m128 vec = _mm_loadu_ps(color.get_data());
106 __m128i vals = _encode_sRGB_sse2_mul255(vec);
107 into.r = _mm_extract_epi16(vals, 0);
108 into.g = _mm_extract_epi16(vals, 2);
109 into.b = _mm_extract_epi16(vals, 4);
113 encode_sRGB_uchar_sse2(
const LColorf &color,
xel &into, xelval &into_alpha) {
115 __m128 vec = _mm_load_ps(color.get_data());
117 __m128 vec = _mm_loadu_ps(color.get_data());
120 __m128i vals = _encode_sRGB_sse2_mul255(vec);
121 into.r = _mm_extract_epi16(vals, 0);
122 into.g = _mm_extract_epi16(vals, 2);
123 into.b = _mm_extract_epi16(vals, 4);
124 into_alpha = _mm_extract_epi16(vals, 6);
127 #elif defined(__i386__) || defined(_M_IX86) 132 #warning convert_srgb_sse2.cxx is being compiled without SSE2 support! 135 encode_sRGB_uchar_sse2(
float val) {
140 encode_sRGB_uchar_sse2(
const LColorf &color,
xel &into) {
145 encode_sRGB_uchar_sse2(
const LColorf &color,
xel &into, xelval &into_alpha) {
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
PANDA 3D SOFTWARE Copyright (c) Carnegie Mellon University.
EXPCL_PANDA_PNMIMAGE unsigned char encode_sRGB_uchar(unsigned char val)
Encodes the linearized unsigned char value to an sRGB-encoded unsigned char value.