8000 Optimize size of ArrayDecimal128 by jedelbo · Pull Request #6111 · realm/realm-core · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Optimize size of ArrayDecimal128 #6111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

### Enhancements
* <New feature description> (PR [#????](https://github.com/realm/realm-core/pull/????))
* None.
* Storage of Decimal128 properties has been optimised so that the individual values will take up 0 bits (if all nulls), 32 bits, 64 bits or 128 bits depending on what is needed. (PR [#6111]https://github.com/realm/realm-core/pull/6111))

### Fixed
* <How do the end-user experience this issue? what was the impact?> ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?)
Expand Down
1 change: 0 additions & 1 deletion Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,6 @@ let bidExcludes: [String] = [
"bid32_tan.c",
"bid32_tanh.c",
"bid32_tgamma.c",
"bid32_to_bid128.c",
"bid32_to_bid64.c",
"bid32_to_int16.c",
"bid32_to_int32.c",
Expand Down
1 change: 1 addition & 0 deletions src/external/IntelRDFPMathLib20U2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ LIBRARY/src/bid128_add.c
LIBRARY/src/bid128_fma.c
LIBRARY/src/bid128_string.c
LIBRARY/src/bid128_2_str_tables.c
LIBRARY/src/bid32_to_bid128.c
LIBRARY/src/bid64_to_bid128.c
LIBRARY/src/bid128_to_int64.c
LIBRARY/src/bid128_quantize.c
Expand Down
270 changes: 248 additions & 22 deletions src/realm/array_decimal128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,40 +21,135 @@

namespace realm {

namespace {

uint8_t min_width(const Decimal128& value, bool zero_width_is_zero)
{
Decimal128::Bid128 coefficient;
int exponent;
bool sign;

if (value.is_null()) {
return zero_width_is_zero ? 4 : 0;
}

value.unpack(coefficient, exponent, sign);
if (coefficient.w[1] == 0) {
if (coefficient.w[0] == 0) {
return zero_width_is_zero ? 0 : 4;
}
if (coefficient.w[0] < (1ull << 23) && exponent > -91 && exponent < 91) {
return 4;
}
if (coefficient.w[0] < (1ull << 53) && exponent > -370 && exponent < 370) {
return 8;
}
}
return 16;
}
} // namespace

void ArrayDecimal128::set(size_t ndx, Decimal128 value)
{
REALM_ASSERT(ndx < m_size);
copy_on_write();
auto values = reinterpret_cast<Decimal128*>(m_data);
values[ndx] = value;
switch (upgrade_leaf(min_width(value, Array::get_context_flag()))) {
case 0:
break;
case 4: {
auto values = reinterpret_cast<Decimal::Bid32*>(m_data);
auto val = value.to_bid32();
REALM_ASSERT(val);
values[ndx] = *val;
break;
}
case 8: {
auto values = reinterpret_cast<Decimal::Bid64*>(m_data);
auto val = value.to_bid64();
REALM_ASSERT(val);
values[ndx] = *val;
break;
}
case 16: {
auto values = reinterpret_cast<Decimal128*>(m_data);
values[ndx] = value;
break;
}
}
}

void ArrayDecimal128::insert(size_t ndx, Decimal128 value)
{
REALM_ASSERT(ndx <= m_size);
if (m_size == 0 && value == Decimal128()) {
// zero width should be interpreted as 0
Array::copy_on_write();
Array::set_context_flag(true);
}
Comment on lines +84 to +88
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The existing code is fine, but if you wanted we could even adapt the meaning if changing back and forth (admittedly this would be a rare case and we could do it later if needed).

Suggested change
if (m_size == 0 && value == Decimal128()) {
// zero width should be interpreted as 0
Array::copy_on_write();
Array::set_context_flag(true);
}
if (m_size == 0) {
if (value == Decimal128()) {
// zero width should be interpreted as 0
Array::copy_on_write();
Array::set_context_flag(true);
} else if (value.is_null()) {
// zero width should be interpreted as null
Array::copy_on_write();
Array::set_context_flag(false);
}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, m_size would not be zero next time.

// Allocate room for the new value
alloc(m_size + 1, sizeof(Decimal128)); // Throws
switch (upgrade_leaf(min_width(value, Array::get_context_flag()))) {
case 0:
m_size += 1;
Array::copy_on_write();
set_header_size(m_size);
break;
case 4: {
alloc(m_size + 1, 4); // Throws

auto src = reinterpret_cast<Decimal128*>(m_data) + ndx;
auto dst = src + 1;
auto src = reinterpret_cast<Decimal::Bid32*>(m_data) + ndx;
auto dst = src + 1;

// Make gap for new value
memmove(dst, src, sizeof(Decimal128) * (m_size - 1 - ndx));
// Make gap for new value
memmove(dst, src, sizeof(Decimal::Bid32) * (m_size - 1 - ndx));

// Set new value
*src = value;
// Set new value
auto val = value.to_bid32();
REALM_ASSERT(val);
*src = *val;
break;
}
case 8: {
alloc(m_size + 1, 8); // Throws

auto src = reinterpret_cast<Decimal::Bid64*>(m_data) + ndx;
auto dst = src + 1;

// Make gap for new value
memmove(dst, src, sizeof(Decimal::Bid64) * (m_size - 1 - ndx));

// Set new value
auto val = value.to_bid64();
REALM_ASSERT(val);
*src = *val;
break;
}
case 16: {
alloc(m_size + 1, sizeof(Decimal128)); // Throws

auto src = reinterpret_cast<Decimal128*>(m_data) + ndx;
auto dst = src + 1;

// Make gap for new value
memmove(dst, src, sizeof(Decimal128) * (m_size - 1 - ndx));

// Set new value
*src = value;
break;
}
}
}

void ArrayDecimal128::erase(size_t ndx)
{

REALM_ASSERT(ndx < m_size);

copy_on_write();

Decimal128* dst = reinterpret_cast<Decimal128*>(m_data) + ndx;
Decimal128* src = dst + 1;

memmove(dst, src, sizeof(Decimal128) * (m_size - ndx));
if (m_width) {
auto dst = m_data + ndx * m_width;
memmove(dst, dst + m_width, m_width * (m_size - ndx));
}

// Update size (also in header)
m_size -= 1;
Expand All @@ -65,11 +160,19 @@ void ArrayDecimal128::move(ArrayDecimal128& dst_arr, size_t ndx)
{
size_t elements_to_move = m_size - ndx;
if (elements_to_move) {
const auto old_dst_size = dst_arr.m_size;
dst_arr.alloc(old_dst_size + elements_to_move, sizeof(Decimal128));
Decimal128* dst = reinterpret_cast<Decimal128*>(dst_arr.m_data) + old_dst_size;
Decimal128* src = reinterpret_cast<Decimal128*>(m_data) + ndx;
memmove(dst, src, elements_to_move * sizeof(Decimal128));
if (m_width >= dst_arr.m_width) {
dst_arr.upgrade_leaf(m_width);
const auto old_dst_size = dst_arr.m_size;
dst_arr.alloc(old_dst_size + elements_to_move, m_width);
auto dst = dst_arr.m_data + old_dst_size * m_width;
auto src = m_data + ndx * m_width;
memmove(dst, src, elements_to_move * m_width);
}
else {
for (size_t i = 0; i < elements_to_move; i++) {
dst_arr.add(get(ndx + i));
}
}
}
truncate(ndx);
}
Expand All @@ -81,11 +184,53 @@ size_t ArrayDecimal128::find_first(Decimal128 value, size_t start, size_t end) c
end = sz;
REALM_ASSERT(start <= sz && end <= sz && start <= end);

auto values = reinterpret_cast<Decimal128*>(this->m_data);
for (size_t i = start; i < end; i++) {
if (values[i] == value)
return i;
bool zero_width_is_zero = Array::get_context_flag();
auto width = min_width(value, zero_width_is_zero);
switch (m_width) {
case 0:
if (zero_width_is_zero) {
if (value == Decimal128()) {
return 0;
}
}
else {
if (value.is_null()) {
return 0;
}
}
break;
case 4:
if (width <= 4) {
// Worth optimizing here
auto optval32 = value.to_bid32();
REALM_ASSERT(optval32);
auto val32 = *optval32;
auto values = reinterpret_cast<Decimal128::Bid32*>(this->m_data);
for (size_t i = start; i < end; i++) {
if (values[i] == val32)
return i;
}
}
break;
case 8:
if (width <= 8) {
auto values = reinterpret_cast<Decimal128::Bid64*>(this->m_data);
for (size_t i = start; i < end; i++) {
if (Decimal128(values[i]) == value)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be done by comparing Bid64 values directly, instead of converting up to Decimal128?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That could be done, but then I would have to implement the comparison function like in the 32 bit case. But I don't think the gain would be that big as it is relatively quick to upgrade. Can be done, if you insist.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a blocker since it can be optimized later without changing the file format if it actually does show up in performance profiling.

return i;
}
}
break;
case 16: {
auto values = reinterpret_cast<Decimal128*>(this->m_data);
for (size_t i = start; i < end; i++) {
if (values[i] == value)
return i;
}
break;
}
}

return realm::npos;
}

Expand All @@ -95,4 +240,85 @@ Mixed ArrayDecimal128::get_any(size_t ndx) const
}


size_t ArrayDecimal128::upgrade_leaf(uint8_t width)
{
if (m_width == 16) {
return 16;
}
if (width <= m_width) {
return m_width;
}

if (m_size == 0) {
alloc(m_size, width);
return width;
}

if (m_width == 8) {
// Upgrade to 16 bytes
alloc(m_size, 16);
auto src = reinterpret_cast<Decimal::Bid64*>(m_data);
auto dst = reinterpret_cast<Decimal::Bid128*>(m_data);
for (size_t i = m_size; i > 0; --i) {
auto val = Decimal128(src[i - 1]);
dst[i - 1] = *val.raw();
}
return 16;
}

if (m_width == 4) {
alloc(m_size, width);
auto src = reinterpret_cast<Decimal::Bid32*>(m_data);
if (width == 8) {
// Upgrade to 8 bytes
auto dst = reinterpret_cast<Decimal::Bid64*>(m_data);
for (size_t i = m_size; i > 0; --i) {
auto val = Decimal128(src[i - 1]);
dst[i - 1] = *val.to_bid64();
}
}
else if (width == 16) {
// Upgrade to 16 bytes
auto dst = reinterpret_cast<Decimal::Bid128*>(m_data);
for (size_t i = m_size; i > 0; --i) {
auto val = Decimal128(src[i - 1]);
dst[i - 1] = *val.raw();
}
}
return width;
}

// Upgrade from zero width. Fill with either 0 or null.
Decimal128 fill_value = get_context_flag() ? Decimal128(0) : Decimal128(realm::null());

if (width == 4) {
// Upgrade to 4 bytes
alloc(m_size, 4);
auto values = reinterpret_cast<Decimal::Bid32*>(m_data);
auto fill = *fill_value.to_bid32();
for (size_t i = 0; i < m_size; i++) {
values[i] = fill;
}
return 4;
}
else if (width == 8) {
// Upgrade to 8 bytes
alloc(m_size, 8);
auto values = reinterpret_cast<Decimal::Bid64*>(m_data);
auto fill = *fill_value.to_bid64();
for (size_t i = 0; i < m_size; i++) {
values[i] = fill;
}
return 8;
}

alloc(m_size, 16);
auto values = reinterpret_cast<Decimal128*>(m_data);
for (size_t i = 0; i < m_size; i++) {
values[i] = fill_value;
}
return 16;
}


} // namespace realm
F5AA 30 changes: 27 additions & 3 deletions src/realm/array_decimal128.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,32 @@ class ArrayDecimal128 : public ArrayPayload, private Array {

bool is_null(size_t ndx) const
{
return this->get_width() == 0 || get(ndx).is_null();
if (m_width == 0) {
return !get_context_flag();
}
return get(ndx).is_null();
}

Decimal128 get(size_t ndx) const
{
REALM_ASSERT(ndx < m_size);
auto values = reinterpret_cast<Decimal128*>(this->m_data);
return values[ndx];
switch (m_width) {
case 0:
return get_context_flag() ? Decimal128() : Decimal128(realm::null());
case 4: {
auto values = reinterpret_cast<Decimal128::Bid32*>(this->m_data);
return Decimal128(values[ndx]);
}
case 8: {
auto values = reinterpret_cast<Decimal128::Bid64*>(this->m_data);
return Decimal128(values[ndx]);
}
case 16: {
auto values = reinterpret_cast<Decimal128*>(this->m_data);
return values[ndx];
}
}
return {};
}

Mixed get_any(size_t ndx) const override;
Expand All @@ -94,11 +112,17 @@ class ArrayDecimal128 : public ArrayPayload, private Array {

size_t find_first(Decimal128 value, size_t begin = 0, size_t end = npos) const noexcept;

uint8_t get_width() const noexcept
{
return m_width;
}

protected:
size_t calc_byte_len(size_t num_items, size_t) const override
{
return num_items * sizeof(Decimal128) + header_size;
}
size_t upgrade_leaf(uint8_t width);
};

} // namespace realm
Expand Down
Loading
0