Skip to content

Commit c6e8dd4

Browse files
committed
Added NEON to stride copy when necessary
1 parent 54676b5 commit c6e8dd4

File tree

11 files changed

+383
-101
lines changed

11 files changed

+383
-101
lines changed

app/build.gradle

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ android {
2222
minifyEnabled false
2323
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
2424
}
25+
debug {
26+
ndk {
27+
abiFilters 'arm64-v8a'
28+
}
29+
}
2530
}
2631
compileOptions {
2732
sourceCompatibility JavaVersion.VERSION_1_8

app/src/main/java/com/radzivon/bartoshyk/avif/MainActivity.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class MainActivity : AppCompatActivity() {
4646
// opts.inPreferredConfig = Bitmap.Config.RGBA_F16
4747
// }
4848
binding.imageView.setImageBitmap(bitmap)
49-
val encoded = HeifCoder().encodeHeic(bitmap)
49+
val encoded = HeifCoder().encodeAvif(bitmap)
5050
val decodedSample = HeifCoder().decode(encoded)
5151
binding.imageView.setImageBitmap(decodedSample)
5252

avif-coder/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ android {
5454
"-fvisibility=hidden",
5555
)
5656
abiFilters += setOf("armeabi-v7a", "arm64-v8a", "x86_64")
57+
// abiFilters += setOf("armeabi-v7a", "arm64-v8a", "x86_64")
5758
}
5859
}
5960

avif-coder/src/main/cpp/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ add_library( # Sets the name of the library.
1717
coder.cpp jni_exception.cpp scaler.cpp icc/cmsalpha.c icc/cmscam02.c icc/cmscgats.c icc/cmscnvrt.c icc/cmserr.c icc/cmsgamma.c
1818
icc/cmsgmt.c icc/cmshalf.c icc/cmsintrp.c icc/cmsio0.c icc/cmsio1.c icc/cmslut.c icc/cmsmd5.c icc/cmsmtrx.c icc/cmsnamed.c
1919
icc/cmsopt.c icc/cmspack.c icc/cmspcs.c icc/cmsplugin.c icc/cmsps2.c icc/cmssamp.c icc/cmssm.c icc/cmstypes.c icc/cmsvirt.c
20-
icc/cmswtpnt.c icc/cmsxform.c rgba_to_bgra_neon.cpp rgba_to_bgra.cpp colorspace.cpp halfFloats.cpp)
20+
icc/cmswtpnt.c icc/cmsxform.c rgba_to_bgra_neon.cpp rgba_to_bgra.cpp colorspace.cpp halfFloats.cpp
21+
rgba16bitCopy.cpp rgbaF16bitToNBitU16.cpp rgbaF16bitNBitU8.cpp)
2122

2223
add_library(libaom STATIC IMPORTED)
2324
add_library(libx265 STATIC IMPORTED)

avif-coder/src/main/cpp/coder.cpp

Lines changed: 22 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
#include <limits>
1818
#include "attenuate_alpha.h"
1919
#include "halfFloats.h"
20+
#include "rgba16bitCopy.h"
21+
#include "rgbaF16bitToNBitU16.h"
22+
#include "rgbaF16bitNBitU8.h"
2023

2124
struct AvifMemEncoder {
2225
std::vector<char> buffer;
@@ -26,30 +29,6 @@ int androidOSVersion() {
2629
return android_get_device_api_level();
2730
}
2831

29-
void
30-
copyRGBA16(std::shared_ptr<uint8_t> &source, int srcStride, uint8_t *destination, int dstStride,
31-
int width, int height) {
32-
auto src = reinterpret_cast<uint8_t *>(source.get());
33-
auto dst = reinterpret_cast<uint8_t *>(destination);
34-
35-
for (int y = 0; y < height; ++y) {
36-
37-
auto srcPtr = reinterpret_cast<uint16_t *>(src);
38-
auto dstPtr = reinterpret_cast<uint16_t *>(dst);
39-
40-
for (int x = 0; x < width; ++x) {
41-
auto srcPtr64 = reinterpret_cast<uint64_t *>(srcPtr);
42-
auto dstPtr64 = reinterpret_cast<uint64_t *>(dstPtr);
43-
dstPtr64[0] = srcPtr64[0];
44-
srcPtr += 4;
45-
dstPtr += 4;
46-
}
47-
48-
src += srcStride;
49-
dst += dstStride;
50-
}
51-
}
52-
5332
struct heif_error writeHeifData(struct heif_context *ctx,
5433
const void *data,
5534
size_t size,
@@ -189,90 +168,33 @@ jbyteArray encodeBitmap(JNIEnv *env, jobject thiz,
189168
std::shared_ptr<char> dstARGB(
190169
static_cast<char *>(malloc(info.width * info.height * 4 * sizeof(uint16_t))),
191170
[](char *f) { free(f); });
192-
auto srcData = reinterpret_cast<uint8_t *>(sourceData.data());
193-
uint16_t tmpR;
194-
uint16_t tmpG;
195-
uint16_t tmpB;
196-
uint16_t tmpA;
197-
auto data64Ptr = reinterpret_cast<uint8_t *>(dstARGB.get());
198-
const float scale = 1.0f / float((1 << bitDepth) - 1);
199171
int dstStride = (int) info.width * 4 * (int) sizeof(uint16_t);
200172

201-
for (int y = 0; y < info.height; ++y) {
202-
auto srcPtr = reinterpret_cast<uint16_t *>(srcData);
203-
auto dstPtr = reinterpret_cast<uint16_t *>(data64Ptr);
204-
for (int x = 0; x < info.width; ++x) {
205-
auto alpha = half_to_float(srcPtr[3]);
206-
tmpR = (uint16_t) fmin(fmax((half_to_float(srcPtr[0]) / scale), 0), 1023);
207-
tmpG = (uint16_t) fmin(fmax((half_to_float(srcPtr[1]) / scale), 0), 1023);
208-
tmpB = (uint16_t) fmin(fmax((half_to_float(srcPtr[2]) / scale), 0), 1023);
209-
tmpA = (uint16_t) fmin(fmax((alpha / scale), 0), 1023);
210-
211-
dstPtr[0] = tmpR;
212-
dstPtr[1] = tmpG;
213-
dstPtr[2] = tmpB;
214-
dstPtr[3] = tmpA;
215-
216-
srcPtr += 4;
217-
dstPtr += 4;
218-
}
173+
RGBAF16BitToNBitU16(reinterpret_cast<const uint16_t *>(sourceData.data()),
174+
(int) info.stride,
175+
reinterpret_cast<uint16_t *>(dstARGB.get()), dstStride,
176+
(int) info.width,
177+
(int) info.height, 10);
178+
copyRGBA16(reinterpret_cast<uint16_t *>(dstARGB.get()), dstStride,
179+
reinterpret_cast<uint16_t *>(imgData), stride, (int) info.width,
180+
(int) info.height);
219181

220-
srcData += info.stride;
221-
data64Ptr += dstStride;
222-
}
223-
auto dataPtr = reinterpret_cast<void *>(dstARGB.get());
224-
auto srcY = (char *) dataPtr;
225-
auto dstY = (char *) imgData;
226-
const auto sourceStride = info.width * 4 * sizeof(uint16_t);
227-
for (int y = 0; y < info.height; ++y) {
228-
memcpy(dstY, srcY, sourceStride);
229-
srcY += dstStride;
230-
dstY += stride;
231-
}
232182
dstARGB.reset();
233183
} else {
234184
std::shared_ptr<char> dstARGB(
235185
static_cast<char *>(malloc(info.width * info.height * 4 * sizeof(uint8_t))),
236186
[](char *f) { free(f); });
237-
auto srcData = reinterpret_cast<uint8_t *>(sourceData.data());
238-
char tmpR;
239-
char tmpG;
240-
char tmpB;
241-
char tmpA;
242-
const float scale = 1.0f / float((1 << bitDepth) - 1);
243187
int dstStride = (int) info.width * 4 * (int) sizeof(uint8_t);
244-
auto data64Ptr = reinterpret_cast<uint8_t *>(dstARGB.get());
245-
for (int y = 0; y < info.height; ++y) {
246-
auto srcPtr = reinterpret_cast<uint16_t *>(srcData);
247-
auto dstPtr = reinterpret_cast<uint8_t *>(data64Ptr);
248-
for (int x = 0; x < info.width; ++x) {
249-
auto alpha = half_to_float(srcPtr[3]);
250-
tmpR = (uint8_t) fmin(fmax((half_to_float(srcPtr[0]) / scale), 0), 255);
251-
tmpG = (uint8_t) fmin(fmax((half_to_float(srcPtr[1]) / scale), 0), 255);
252-
tmpB = (uint8_t) fmin(fmax((half_to_float(srcPtr[2]) / scale), 0), 255);
253-
tmpA = (uint8_t) fmin(fmax((alpha / scale), 0), 255);
254-
255-
dstPtr[0] = tmpR;
256-
dstPtr[1] = tmpG;
257-
dstPtr[2] = tmpB;
258-
dstPtr[3] = tmpA;
259-
260-
srcPtr += 4;
261-
dstPtr += 4;
262-
}
263188

264-
srcData += info.stride;
265-
data64Ptr += dstStride;
266-
}
267-
auto dataPtr = reinterpret_cast<void *>(dstARGB.get());
268-
auto srcY = (char *) dataPtr;
269-
auto dstY = (char *) imgData;
270-
const auto sourceStride = info.width * 4 * sizeof(uint8_t);
271-
for (int y = 0; y < info.height; ++y) {
272-
memcpy(dstY, srcY, sourceStride);
273-
srcY += sourceStride;
274-
dstY += stride;
275-
}
189+
RGBAF16BitToNBitU8(reinterpret_cast<const uint16_t *>(sourceData.data()),
190+
(int) info.stride,
191+
reinterpret_cast<uint8_t *>(dstARGB.get()), dstStride,
192+
(int) info.width,
193+
(int) info.height, 8);
194+
195+
libyuv::ARGBCopy(reinterpret_cast<uint8_t *>(dstARGB.get()), (int) dstStride,
196+
reinterpret_cast<uint8_t *>(imgData), stride, (int) info.width,
197+
(int) info.height);
276198
dstARGB.reset();
277199
}
278200
}
@@ -797,7 +719,8 @@ Java_com_radzivon_bartoshyk_avif_coder_HeifCoder_decodeImpl(JNIEnv *env, jobject
797719
}
798720

799721
if (useBitmapHalf16Floats) {
800-
copyRGBA16(dstARGB, stride, reinterpret_cast<uint8_t *>(addr), (int) info.stride,
722+
copyRGBA16(reinterpret_cast<uint16_t *>(dstARGB.get()), stride,
723+
reinterpret_cast<uint16_t *>(addr), (int) info.stride,
801724
(int) info.width,
802725
(int) info.height);
803726
} else {
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
//
2+
// Created by Radzivon Bartoshyk on 05/09/2023.
3+
//
4+
5+
#include "rgba16bitCopy.h"
6+
#include <vector>
7+
#include <cstdint>
8+
9+
#if HAVE_NEON
10+
11+
#include <arm_neon.h>
12+
13+
void copyRGBA16_NEON(uint16_t* source, int srcStride,
14+
uint16_t *destination, int dstStride,
15+
int width, int height) {
16+
auto src = reinterpret_cast<uint8_t *>(source);
17+
auto dst = reinterpret_cast<uint8_t *>(destination);
18+
19+
for (int y = 0; y < height; ++y) {
20+
21+
auto srcPtr = reinterpret_cast<uint16_t *>(src);
22+
auto dstPtr = reinterpret_cast<uint16_t *>(dst);
23+
int x;
24+
25+
for (x = 0; x < width; x += 2) {
26+
uint16x8_t neonSrc = vld1q_u16(srcPtr);
27+
vst1q_u16(dstPtr, neonSrc);
28+
srcPtr += 8;
29+
dstPtr += 8;
30+
}
31+
32+
for (x = 0; x < width; ++x) {
33+
auto srcPtr64 = reinterpret_cast<uint64_t *>(srcPtr);
34+
auto dstPtr64 = reinterpret_cast<uint64_t *>(dstPtr);
35+
dstPtr64[0] = srcPtr64[0];
36+
srcPtr += 4;
37+
dstPtr += 4;
38+
}
39+
40+
src += srcStride;
41+
dst += dstStride;
42+
}
43+
}
44+
45+
#endif
46+
47+
void copyRGBA16_C(uint16_t* source, int srcStride,
48+
uint16_t *destination, int dstStride,
49+
int width, int height) {
50+
auto src = reinterpret_cast<uint8_t *>(source);
51+
auto dst = reinterpret_cast<uint8_t *>(destination);
52+
53+
for (int y = 0; y < height; ++y) {
54+
55+
auto srcPtr = reinterpret_cast<uint16_t *>(src);
56+
auto dstPtr = reinterpret_cast<uint16_t *>(dst);
57+
58+
for (int x = 0; x < width; ++x) {
59+
auto srcPtr64 = reinterpret_cast<uint64_t *>(srcPtr);
60+
auto dstPtr64 = reinterpret_cast<uint64_t *>(dstPtr);
61+
dstPtr64[0] = srcPtr64[0];
62+
srcPtr += 4;
63+
dstPtr += 4;
64+
}
65+
66+
src += srcStride;
67+
dst += dstStride;
68+
}
69+
}
70+
71+
void copyRGBA16(uint16_t* source, int srcStride,
72+
uint16_t *destination, int dstStride,
73+
int width, int height) {
74+
#if HAVE_NEON
75+
copyRGBA16_NEON(source, srcStride, destination, dstStride, width, height);
76+
#else
77+
copyRGBA16_C(source, srcStride, destination, dstStride, width, height);
78+
#endif
79+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
//
2+
// Created by Radzivon Bartoshyk on 05/09/2023.
3+
//
4+
5+
#ifndef AVIF_RGBA16BITCOPY_H
6+
#define AVIF_RGBA16BITCOPY_H
7+
8+
#include <vector>
9+
10+
void copyRGBA16(uint16_t* source, int srcStride,
11+
uint16_t *destination, int dstStride,
12+
int width, int height);
13+
14+
#endif //AVIF_RGBA16BITCOPY_H

0 commit comments

Comments
 (0)