8000 Secondary index support string by wuxiaobai24 · Pull Request #1307 · infiniflow/infinity · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Secondary index support string #1307

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/parser/type/data_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class DataType {
case kTime:
case kDateTime: // need to be converted to int64 and keep order
case kTimestamp: // need to be converted to int64 and keep order
case kVarchar: // need to be converted to int64 by hash
{
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,16 @@ class FilterCommandBuilder {
result.SetIntervalRange<TimestampT>(value, compare_type);
break;
}
case LogicalType::kVarchar: {
if (compare_type == FilterCompareType::kEqual) {
result.SetIntervalRange<VarcharT>(value, compare_type);
} else {
String error_message = "SaveToResult(): VarcharT only support kEqual compare type.";
LOG_CRITICAL(error_message);
UnrecoverableError(error_message);
}
break;
}
default: {
String error_message = fmt::format("SaveToResult(): type error: {}.", value.type().ToString());
LOG_CRITICAL(error_message);
10000 Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ import logger;

namespace infinity {

template<typename ColumnValueType>
ConvertToOrderedType<ColumnValueType> ConvertToOrderedKeyValueFromValue(const Value &val) {
ColumnValueType raw_val = val.GetValue<ColumnValueType>();
return ConvertToOrderedKeyValue(raw_val);
}

template<>
ConvertToOrderedType<VarcharT> ConvertToOrderedKeyValueFromValue<VarcharT>(const Value &val) {
String s = val.GetVarchar();
return ConvertToOrderedKeyValue(s);
}

// The range will only monotonically shrink
// MergeAnd is meaningful: reduce the search range
// MergeOr is not needed if expression rewrite is done
Expand All @@ -42,8 +54,7 @@ public:
"FilterExecuteSingleRangeT: Now only support integral or floating point index key type.");

explicit FilterIntervalRangeT(const Value &val, FilterCompareType compare_type) {
ColumnValueType raw_val = val.GetValue<ColumnValueType>();
T val_ = ConvertToOrderedKeyValue(raw_val);
T val_ = ConvertToOrderedKeyValueFromValue<ColumnValueType>(val);
AddFilter(val_, compare_type);
}

Expand Down Expand Up @@ -116,7 +127,8 @@ export using FilterIntervalRange = std::variant<std::monostate,
FilterIntervalRangeT<DateT>,
FilterIntervalRangeT<TimeT>,
FilterIntervalRangeT<DateTimeT>,
FilterIntervalRangeT<TimestampT>>;
FilterIntervalRangeT<TimestampT>,
FilterIntervalRangeT<VarcharT>>;

// because some rows may be deleted, kAlwaysTrue is meaningless
// kInterval of the same column can be merged in "AND" condition
Expand Down
2 changes: 2 additions & 0 deletions src/storage/meta/iter/block_column_iter.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ public:
return std::make_pair(v_ptr, block_offset_ + cur_++);
}

SharedPtr<ColumnVector> column_vector() const { return column_vector_; }

private:
SegmentOffset block_offset_;
SharedPtr<ColumnVector> column_vector_;
Expand Down
6 changes: 6 additions & 0 deletions src/storage/secondary_index/secondary_index_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,9 @@ SecondaryIndexData *GetSecondaryIndexData(const SharedPtr<DataType> &data_type,
case LogicalType::kTimestamp: {
return new SecondaryIndexDataT<TimestampT>(chunk_row_count, allocate);
}
case LogicalType::kVarchar: {
return new SecondaryIndexDataT<VarcharT>(chunk_row_count, allocate);
}
default: {
String error_message = fmt::format("Need to add secondary index support for data type: {}", data_type->ToString());
LOG_CRITICAL(error_message);
Expand Down Expand Up @@ -293,6 +296,9 @@ u32 GetSecondaryIndexDataPairSize(const SharedPtr<DataType> &data_type) {
case LogicalType::kTimestamp: {
return SecondaryIndexDataT<TimestampT>::PairSize;
}
case LogicalType::kVarchar: {
return SecondaryIndexDataT<VarcharT>::PairSize;
}
default: {
String error_message = fmt::format("Need to add secondary index support for data type: {}", data_type->ToString());
LOG_CRITICAL(error_message);
Expand Down
19 changes: 18 additions & 1 deletion src/storage/secondary_index/secondary_index_data.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ concept ConvertToOrderedI32 = IsAnyOf<T, DateT, TimeT>;
template <typename T>
concept ConvertToOrderedI64 = IsAnyOf<T, DateTimeT, TimestampT>;

template <typename T>
concept ConvertToHashU64 = IsAnyOf<T, VarcharT, String>;

template <typename ValueT>
struct ConvertToOrdered {
static_assert(false, "type not supported");
Expand All @@ -63,8 +66,13 @@ struct ConvertToOrdered<T> {
using type = i64;
};

template <ConvertToHashU64 T>
struct ConvertToOrdered<T> {
using type = u64;
};

export template <typename T>
requires KeepOrderedSelf<T> or ConvertToOrderedI32<T> or ConvertToOrderedI64<T>
requires KeepOrderedSelf<T> or ConvertToOrderedI32<T> or ConvertToOrderedI64<T> or ConvertToHashU64<T>
using ConvertToOrderedType = ConvertToOrdered<T>::type;

export template <typename RawValueType>
Expand All @@ -89,6 +97,12 @@ ConvertToOrderedType<RawValueType> ConvertToOrderedKeyValue(RawValueType value)
return value.GetEpochTime();
}

// for VarcharT
export template <>
ConvertToOrderedType<String> ConvertToOrderedKeyValue(String value) {
return std::hash<String>{}(value);
}

template <typename T>
LogicalType GetLogicalType = LogicalType::kInvalid;

Expand All @@ -110,6 +124,9 @@ LogicalType GetLogicalType<IntegerT> = LogicalType::kInteger;
template <>
LogicalType GetLogicalType<BigIntT> = LogicalType::kBigInt;

template <>
LogicalType GetLogicalType<VarcharT> = LogicalType::kVarchar;

export class SecondaryIndexData {
protected:
u32 chunk_row_count_ = 0;
Expand Down
9E81 23 changes: 21 additions & 2 deletions src/storage/secondary_index/secondary_index_in_mem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,24 @@ class SecondaryIndexInMemT final : public SecondaryIndexInMem {
break;
}
const auto &[v_ptr, offset] = opt.value();
const KeyType key = ConvertToOrderedKeyValue(*v_ptr);
in_mem_secondary_index_.emplace(key, offset);
if constexpr (std::is_same_v<RawValueType, VarcharT>) {
auto column_vector = iter.column_vector();
String str;
if (v_ptr->IsInlined()) {
str = {v_ptr->short_.data_, v_ptr->length_};
} else {
str.resize(v_ptr->length_);
column_vector->buffer_->fix_heap_mgr_->ReadFromHeap(str.data(),
v_ptr->vector_.chunk_id_,
v_ptr->vector_.chunk_offset_,
v_ptr->length_);
}
const KeyType key = ConvertToOrderedKeyValue(str);
in_mem_secondary_index_.emplace(key, offset);
} else {
const KeyType key = ConvertToOrderedKeyValue(*v_ptr);
in_mem_secondary_index_.emplace(key, offset);
}
}
}

Expand Down Expand Up @@ -144,6 +160,9 @@ SharedPtr<SecondaryIndexInMem> SecondaryIndexInMem::NewSecondaryIndexInMem(const
case LogicalType::kTimestamp: {
return MakeShared<SecondaryIndexInMemT<TimestampT>>(begin_row_id, max_size);
}
case LogicalType::kVarchar: {
return MakeShared<SecondaryIndexInMemT<VarcharT>>(begin_row_id, max_size);
}
default: {
return nullptr;
}
Expand Down
56 changes: 56 additions & 0 deletions test/sql/dql/index_scan/index_scan_str.slt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
statement ok
DROP TABLE IF EXISTS str_index_scan_insert;

statement ok
CREATE TABLE str_index_scan_insert (i INTEGER, d1 DATE, d2 DATE, name VARCHAR);

statement ok
INSERT INTO str_index_scan_insert VALUES
(2222, DATE '2022-1-31', DATE '2023-1-31', 'hello infinity'),
(1, DATE '1970-1-1', DATE '2970-1-1', 'hello 2024'),
(11, DATE '1870-11-1', DATE '2570-1-1', 'hello 2570'),
(111, DATE '6570-11-1', DATE '5570-6-21', 'hello infinity');

query I
SELECT * FROM str_index_scan_insert WHERE name = 'hello infinity';
----
2222 2022-01-31 2023-01-31 hello infinity
111 6570-11-01 5570-06-21 hello infinity

query II
EXPLAIN SELECT * FROM str_index_scan_insert WHERE name = 'hello infinity';
----
PROJECT (4)
- table index: #4
- expressions: [i (#1), d1 (#2), d2 (#3), name (#0)]
-> FILTER (3)
- filter: name (#0) = hello infinity
- output columns: [name, __rowid]
-> TABLE SCAN (2)
- table name: str_index_scan_insert(default_db.str_index_scan_insert)
- table index: #1
- output_columns: [name, __rowid]

statement ok
CREATE INDEX str_index_scan_insert_name ON str_index_scan_insert(name);

query III
SELECT * FROM str_index_scan_insert WHERE name = 'hello infinity';
----
2222 2022-01-31 2023-01-31 hello infinity
111 6570-11-01 5570-06-21 hello infinity

query IV
EXPLAIN SELECT * FROM str_index_scan_insert WHERE name = 'hello infinity';
----
PROJECT (4)
- table index: #4
- expressions: [i (#0), d1 (#1), d2 (#2), name (#3)]
-> INDEX SCAN (6)
- table name: str_index_scan_insert(default_db.str_index_scan_insert)
- table index: #1
- filter: name (#1.3) = hello infinity
- output_columns: [__rowid]

statement ok
DROP TABLE str_index_scan_insert;
Loading
0