-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsrgb.h
65 lines (56 loc) · 2.48 KB
/
srgb.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#pragma once
//SRGB conversion routines
//designed to be easy to vectorize by only using instructions trivially available with SSE/AVX(avoids pow etc)
//These are the C++ reference implementations
//They are accurate enough to support 8 bit srgb -> 32bit float linear -> 8 bit srgb and produce the original value
//The SIMD variants can also use the 11 bit rcp instructions, as we really do not need full 32 bit float precision here.
//The SIMD code is not provided because it uses my own SSE/AVX library which is too large and complex for someone just wanting some srgb conversion.
//THis code should be auto vectorizable on a not shitty auto vectorizer.
//MSVC is shitty so don't expect it to auto vectorize this, it is too stupid to handle the conditional despite this being a trivial VBLEND
namespace srgb {
//approximate cubic root
inline float acbrt2(float x0) {
int ix = *reinterpret_cast<int*>(&x0);
ix = ix / 4 + ix / 16; // Approximate divide by 3.
ix = ix + ix / 16;
ix = ix + ix / 256;
ix = 0x2a5137a0 + ix; // Initial guess.
float x = *reinterpret_cast<float*>(&ix);
x = 0.33333333f*(2.0f*x + x0 / (x*x)); // Newton step.
return x;
}
//This is pow(a, 1/2.4);
//http://stackoverflow.com/questions/6475373/optimizations-for-pow-with-const-non-integer-exponent
inline float pow512f_approx_cbrt(float x) {
float cbrtx = acbrt2(x);
return cbrtx*sqrt(sqrt(cbrtx));
}
//Approximate 5th root
float Fifthroot(float x0) {
int ix = *reinterpret_cast<int*>(&x0);
int div5 = (ix >> 2) - (ix >> 4) + (ix >> 6) - (ix >> 8) + (ix >> 10);
ix = 0x32c82fee + div5;
//This is an alternative version of the previous 2 lines that uses less instructions(much higher latency instructions though)
//float fx = float(ix)*0.2f + float(0x32c82fee);
//ix = int(fx);
float x = *reinterpret_cast<float*>(&ix);
float s = x*x;
x -= (x - x0 / (s*s))*0.2f; //1 newton step does the trick
return (x);
}
inline float linear_to_srgb(float linear) {
float a = linear * 12.92f;
float b = 1.055f * pow512f_approx_cbrt(linear) - 0.055f;
return (linear <= 0.0031308f) ? a : b;
}
//This can also be done with a LUT as there are only 256 possible inputs
float srgb_to_linear(int v) {
float srgb = float(v) / 255.f;
float c0(1.0f / 12.92f);
float c1(1.f / 1.055f);
float a = srgb * c0;
float p1 = (srgb*c1 + 0.055f / 1.055f);
float b = (p1*p1) * Fifthroot(p1*p1);
return srgb < 0.04045f ? a : b;
}
}