Skip to content

Commit c339526

Browse files
committed
Tile improvements, sharpyuv
1 parent 3cd83ec commit c339526

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+72889
-5062
lines changed

.gitignore

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,4 +44,12 @@ libyuv
4444
lide265
4545
libheif
4646
x265
47-
aom
47+
aom
48+
/aom/
49+
/dav1d/
50+
/libde265/
51+
/libheif/
52+
/libyuv/
53+
/x265/
54+
libwebp
55+
/libwebp
Binary file not shown.

avif-coder-coil/src/main/java/com/github/awxkee/avifcoil/HeifDecoder.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class HeifDecoder(
6464
}
6565

6666
private fun resizeAspectFill(sourceBitmap: Bitmap, dstSize: Size): Bitmap {
67-
val background = Bitmap.createBitmap(dstSize.width, dstSize.height, Bitmap.Config.ARGB_8888)
67+
val background = Bitmap.createBitmap(dstSize.width, dstSize.height, sourceBitmap.config)
6868
val originalWidth: Float = background.width.toFloat()
6969
val originalHeight: Float = background.height.toFloat()
7070
val canvas = Canvas(background)

avif-coder/build.gradle.kts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,20 @@ android {
4343
externalNativeBuild {
4444
cmake {
4545
ndkVersion = "25.2.9519653"
46-
cppFlags(
47-
"-ffunction-sections",
48-
"-fdata-sections",
49-
"-fvisibility=hidden"
46+
cppFlags.addAll(
47+
listOf(
48+
"-ffunction-sections",
49+
"-fdata-sections",
50+
"-fvisibility=hidden",
51+
"-std=c++17"
52+
)
5053
)
5154
cFlags(
5255
"-ffunction-sections",
5356
"-fdata-sections",
5457
"-fvisibility=hidden",
5558
)
56-
abiFilters += setOf("armeabi-v7a", "arm64-v8a", "x86_64")
59+
abiFilters += setOf("armeabi-v7a", "arm64-v8a", "x86_64", "x86")
5760
}
5861
}
5962

avif-coder/src/main/cpp/CMakeLists.txt

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,35 +17,52 @@ add_library( # Sets the name of the library.
1717
coder.cpp jni_exception.cpp scaler.cpp icc/cmsalpha.c icc/cmscam02.c icc/cmscgats.c icc/cmscnvrt.c icc/cmserr.c icc/cmsgamma.c
1818
icc/cmsgmt.c icc/cmshalf.c icc/cmsintrp.c icc/cmsio0.c icc/cmsio1.c icc/cmslut.c icc/cmsmd5.c icc/cmsmtrx.c icc/cmsnamed.c
1919
icc/cmsopt.c icc/cmspack.c icc/cmspcs.c icc/cmsplugin.c icc/cmsps2.c icc/cmssamp.c icc/cmssm.c icc/cmstypes.c icc/cmsvirt.c
20-
icc/cmswtpnt.c icc/cmsxform.c rgba_to_bgra_neon.cpp rgba_to_bgra.cpp colorspace.cpp halfFloats.cpp
21-
rgba16bitCopy.cpp rgbaF16bitToNBitU16.cpp rgbaF16bitNBitU8.cpp rgb1010102.cpp)
20+
icc/cmswtpnt.c icc/cmsxform.c colorspace.cpp HalfFloats.cpp
21+
RgbaF16bitToNBitU16.cpp RgbaF16bitNBitU8.cpp Rgb1010102.cpp PerceptualQuantinizer.cpp ThreadPool.hpp CopyUnalignedRGBA.cpp)
2222

2323
add_library(libaom STATIC IMPORTED)
2424
add_library(libx265 STATIC IMPORTED)
2525
add_library(libheif STATIC IMPORTED)
2626
add_library(libyuv STATIC IMPORTED)
2727
add_library(libde265 STATIC IMPORTED)
2828
add_library(libdav1d STATIC IMPORTED)
29+
add_library(libhwy STATIC IMPORTED)
30+
add_library(libsharpyuv STATIC IMPORTED)
2931

3032
set_target_properties(libaom PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/${ANDROID_ABI}/libaom.a)
3133
set_target_properties(libx265 PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/${ANDROID_ABI}/libx265.a)
3234
set_target_properties(libheif PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/${ANDROID_ABI}/libheif.a)
3335
set_target_properties(libyuv PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/${ANDROID_ABI}/libyuv.a)
3436
set_target_properties(libde265 PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/${ANDROID_ABI}/libde265.a)
3537
set_target_properties(libdav1d PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/${ANDROID_ABI}/libdav1d.a)
38+
set_target_properties(libhwy PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/${ANDROID_ABI}/libhwy.a)
39+
set_target_properties(libsharpyuv PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/${ANDROID_ABI}/libsharpyuv.a)
3640

3741
add_library(cpufeatures STATIC ${ANDROID_NDK}/sources/android/cpufeatures/cpu-features.c)
3842
target_include_directories(cpufeatures PUBLIC ${ANDROID_NDK}/sources/android/cpufeatures)
3943
target_link_libraries(cpufeatures dl)
4044
list(APPEND EXTRA_LIBS cpufeatures)
4145

4246
include(CheckCXXCompilerFlag)
43-
if(ANDROID_ABI STREQUAL arm64-v8a)
47+
if (ANDROID_ABI STREQUAL arm64-v8a)
4448
set_target_properties(coder PROPERTIES COMPILE_FLAGS -mfpu=neon)
4549
add_definitions("-DHAVE_NEON=1")
46-
endif()
50+
endif ()
4751

48-
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
52+
if (CMAKE_BUILD_TYPE STREQUAL "Release")
53+
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -ffast-math")
54+
endif ()
55+
56+
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
57+
add_definitions(-DHWY_COMPILE_ONLY_STATIC)
58+
endif ()
59+
60+
set(CMAKE_CXX_STANDARD 17)
61+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
62+
if (CMAKE_BUILD_TYPE STREQUAL "Release")
63+
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -march=native -ffp-mode=fast")
64+
endif ()
65+
add_definitions(-DCMS_NO_REGISTER_KEYWORD)
4966

5067
target_include_directories(coder PRIVATE ${CMAKE_SOURCE_DIR}/libheif ${CMAKE_SOURCE_DIR} ${CMAKE_SOURCE_DIR}/libyuv)
5168
# Searches for a specified prebuilt library and stores the path as a
@@ -69,4 +86,4 @@ target_link_libraries( # Specifies the target library.
6986

7087
# Links the target library to the log library
7188
# included in the NDK.
72-
${log-lib} libaom libx265 libheif cpufeatures libyuv -ljnigraphics libde265 libdav1d)
89+
${log-lib} libaom libx265 libheif cpufeatures libyuv -ljnigraphics libde265 libdav1d libhwy libsharpyuv)
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
//
2+
// Created by Radzivon Bartoshyk on 12/09/2023.
3+
//
4+
5+
#include "CopyUnalignedRGBA.h"
6+
#include "ThreadPool.hpp"
7+
#include <cstdint>
8+
9+
#undef HWY_TARGET_INCLUDE
10+
#define HWY_TARGET_INCLUDE "CopyUnalignedRGBA.cpp"
11+
#include "hwy/foreach_target.h"
12+
#include "hwy/highway.h"
13+
14+
HWY_BEFORE_NAMESPACE();
15+
16+
namespace coder {
17+
namespace HWY_NAMESPACE {
18+
19+
using hwy::HWY_NAMESPACE::ScalableTag;
20+
using hwy::HWY_NAMESPACE::Store;
21+
using hwy::HWY_NAMESPACE::Load;
22+
using hwy::HWY_NAMESPACE::Vec;
23+
using hwy::HWY_NAMESPACE::TFromD;
24+
25+
template<class D, typename T = TFromD<D>>
26+
void
27+
CopyUnalignedRGBARow(const D d, const T *HWY_RESTRICT src, T *HWY_RESTRICT dst, int width) {
28+
int x = 0;
29+
using VU = Vec<decltype(d)>;
30+
int pixels = d.MaxLanes() / 4;
31+
for (x = 0; x + pixels < width; x += pixels) {
32+
VU pixel = Load(d, src);
33+
Store(pixel, d, dst);
34+
35+
src += pixels * 4;
36+
dst += pixels * 4;
37+
}
38+
39+
for (; x < width; ++x) {
40+
auto p1 = src[0];
41+
auto p2 = src[1];
42+
auto p3 = src[2];
43+
auto p4 = src[3];
44+
45+
dst[0] = p1;
46+
dst[1] = p2;
47+
dst[2] = p3;
48+
dst[3] = p4;
49+
50+
src += 4;
51+
dst += 4;
52+
}
53+
}
54+
55+
void
56+
CopyUnalignedRGBA(const uint8_t *HWY_RESTRICT src, int srcStride, uint8_t *HWY_RESTRICT dst,
57+
int dstStride, int width,
58+
int height,
59+
int pixelSize) {
60+
ThreadPool pool;
61+
std::vector<std::future<void>> results;
62+
63+
for (int y = 0; y < height; y++) {
64+
if (pixelSize == 1) {
65+
const ScalableTag<uint8_t> du8;
66+
auto fn = CopyUnalignedRGBARow<decltype(du8)>;
67+
auto r = pool.enqueue(fn,
68+
du8,
69+
reinterpret_cast<const uint8_t *>(src + (y * srcStride)),
70+
reinterpret_cast<uint8_t *>(dst + (y * dstStride)),
71+
width);
72+
results.push_back(std::move(r));
73+
} else if (pixelSize == 2) {
74+
const ScalableTag<uint16_t> du16;
75+
auto fn = CopyUnalignedRGBARow<decltype(du16)>;
76+
auto r = pool.enqueue(fn,
77+
du16,
78+
reinterpret_cast<const uint16_t *>(src + (y * srcStride)),
79+
reinterpret_cast<uint16_t *>(dst + (y * dstStride)),
80+
width);
81+
results.push_back(std::move(r));
82+
} else if (pixelSize == 4) {
83+
const ScalableTag<float> df32;
84+
auto fn = CopyUnalignedRGBARow<decltype(df32)>;
85+
auto r = pool.enqueue(fn,
86+
df32,
87+
reinterpret_cast<const float *>(src + (y * srcStride)),
88+
reinterpret_cast<float *>(dst + (y * dstStride)),
89+
width);
90+
results.push_back(std::move(r));
91+
}
92+
}
93+
94+
for (auto &result: results) {
95+
result.wait();
96+
}
97+
}
98+
99+
}
100+
}
101+
102+
HWY_AFTER_NAMESPACE();
103+
104+
#if HWY_ONCE
105+
namespace coder {
106+
HWY_EXPORT(CopyUnalignedRGBA);
107+
108+
HWY_DLLEXPORT void
109+
CopyUnalignedRGBA(const uint8_t *HWY_RESTRICT src, int srcStride, uint8_t *HWY_RESTRICT dst,
110+
int dstStride, int width,
111+
int height,
112+
int pixelSize) {
113+
HWY_DYNAMIC_DISPATCH(CopyUnalignedRGBA)(src, srcStride, dst, dstStride, width, height,
114+
pixelSize);
115+
}
116+
}
117+
#endif
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//
2+
// Created by Radzivon Bartoshyk on 12/09/2023.
3+
//
4+
5+
#ifndef AVIF_COPYUNALIGNEDRGBA_H
6+
#define AVIF_COPYUNALIGNEDRGBA_H
7+
8+
#include <vector>
9+
10+
namespace coder {
11+
void
12+
CopyUnalignedRGBA(const uint8_t *__restrict__ src, int srcStride, uint8_t *__restrict__ dst,
13+
int dstStride, int width,
14+
int height,
15+
int pixelSize);
16+
}
17+
18+
#endif //AVIF_COPYUNALIGNEDRGBA_H
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//
2+
// Created by Radzivon Bartoshyk on 04/09/2023.
3+
//
4+
5+
#include "HalfFloats.h"
6+
#include <cstdint>
7+
#include <__threading_support>
8+
9+
uint as_uint(const float x) {
10+
return *(uint *) &x;
11+
}
12+
13+
float as_float(const uint x) {
14+
return *(float *) &x;
15+
}
16+
17+
uint16_t float_to_half(
18+
const float x) { // IEEE-754 16-bit floating-point format (without infinity): 1-5-10, exp-15, +-131008.0, +-6.1035156E-5, +-5.9604645E-8, 3.311 digits
19+
const uint b =
20+
as_uint(x) + 0x00001000; // round-to-nearest-even: add last bit after truncated mantissa
21+
const uint e = (b & 0x7F800000) >> 23; // exponent
22+
const uint m = b &
23+
0x007FFFFF; // mantissa; in line below: 0x007FF000 = 0x00800000-0x00001000 = decimal indicator flag - initial rounding
24+
return (b & 0x80000000) >> 16 | (e > 112) * ((((e - 112) << 10) & 0x7C00) | m >> 13) |
25+
((e < 113) & (e > 101)) * ((((0x007FF000 + m) >> (125 - e)) + 1) >> 1) |
26+
(e > 143) * 0x7FFF; // sign : normalized : denormalized : saturate
27+
}
28+
29+
float half_to_float(
30+
const uint16_t x) { // IEEE-754 16-bit floating-point format (without infinity): 1-5-10, exp-15, +-131008.0, +-6.1035156E-5, +-5.9604645E-8, 3.311 digits
31+
const uint e = (x & 0x7C00) >> 10; // exponent
32+
const uint m = (x & 0x03FF) << 13; // mantissa
33+
const uint v = as_uint((float) m)
34+
>> 23; // evil log2 bit hack to count leading zeros in denormalized format
35+
return as_float((x & 0x8000) << 16 | (e != 0) * ((e + 112) << 23 | m) | ((e == 0) & (m != 0)) *
36+
((v - 37) << 23 |
37+
((m << (150 - v)) &
38+
0x007FE000))); // sign : normalized : denormalized
39+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
//
2+
// Created by Radzivon Bartoshyk on 04/09/2023.
3+
//
4+
5+
#ifndef JXLCODER_HALFFLOATS_H
6+
#define JXLCODER_HALFFLOATS_H
7+
8+
#include <cstdint>
9+
10+
float half_to_float(const uint16_t x);
11+
uint16_t float_to_half(const float x);
12+
13+
#endif //JXLCODER_HALFFLOATS_H

0 commit comments

Comments
 (0)