From 6067e965955040c22d1f989e17b45be74b469a8d Mon Sep 17 00:00:00 2001 From: Long <2262328655@qq.com> Date: Mon, 30 Jun 2025 13:41:11 +0800 Subject: [PATCH 1/9] Replace simdjson interface only by string_view. Signed-off-by: Long <2262328655@qq.com> --- src/common/third_party.cppm | 1 + src/parser/definition/column_def.cpp | 16 +++++------ src/parser/definition/column_def.h | 2 +- src/parser/expr/constant_expr.cpp | 37 ++++++++++++++------------ src/parser/expr/constant_expr.h | 2 +- src/parser/type/data_type.cpp | 15 ++++++----- src/parser/type/data_type.h | 2 +- src/parser/type/info/sparse_info.cpp | 13 +++++---- src/parser/type/info/sparse_info.h | 2 +- src/storage/definition/index_base.cpp | 17 ++++-------- src/storage/definition/index_base.cppm | 2 +- src/storage/definition/index_ivf.cpp | 7 +++-- src/storage/definition/index_ivf.cppm | 2 +- 13 files changed, 60 insertions(+), 58 deletions(-) diff --git a/src/common/third_party.cppm b/src/common/third_party.cppm index 8143b7823f..8f467d6fb9 100644 --- a/src/common/third_party.cppm +++ b/src/common/third_party.cppm @@ -178,6 +178,7 @@ export using simdjson::error_code; export using ondemand::parser; export using ondemand::document; export using ondemand::object; +export using ondemand::array; export using ondemand::value; } diff --git a/src/parser/definition/column_def.cpp b/src/parser/definition/column_def.cpp index a7458ccc1d..9034bcaf3d 100644 --- a/src/parser/definition/column_def.cpp +++ b/src/parser/definition/column_def.cpp @@ -265,19 +265,17 @@ std::shared_ptr ColumnDef::FromJson(const nlohmann::json &json) { return std::make_shared(column_id, column_type, column_name, constraints, column_comment, default_expr); } -std::shared_ptr ColumnDef::FromJson(const std::string &col_def_str) { +std::shared_ptr ColumnDef::FromJson(std::string_view col_def_str) { simdjson::ondemand::parser parser; simdjson::padded_string col_def_json(col_def_str); simdjson::ondemand::document doc = parser.iterate(col_def_json); - auto column_type_json = doc["column_type"]; - auto column_type = DataType::Deserialize(column_type_json); + auto column_type = DataType::Deserialize(doc["column_type"].raw_json()); int64_t column_id = doc["column_id"].get(); std::string column_name = doc["column_name"].get(); std::set constraints; - simdjson::ondemand::array constraints_json; - if (!doc["constraints"].get(constraints_json)) { + if (simdjson::ondemand::array constraints_json; doc["constraints"].get(constraints_json) == simdjson::SUCCESS) { for (auto item : constraints_json) { ConstraintType constraint = static_cast(static_cast(item.get())); constraints.insert(constraint); @@ -285,15 +283,13 @@ std::shared_ptr ColumnDef::FromJson(const std::string &col_def_str) { } std::string column_comment; - std::string column_comment_json; - if (!doc["column_comment"].get(column_comment_json)) { + if (std::string column_comment_json; doc["column_comment"].get(column_comment_json) == simdjson::SUCCESS) { column_comment = column_comment_json; } std::shared_ptr default_expr = nullptr; - auto default_expr_json = doc["default"]; - if (default_expr_json.error() == simdjson::SUCCESS) { - default_expr = ConstantExpr::Deserialize(default_expr_json); + if (auto default_expr_json = doc["default"]; default_expr_json.error() == simdjson::SUCCESS) { + default_expr = ConstantExpr::Deserialize(default_expr_json.raw_json()); } return std::make_shared(column_id, column_type, column_name, constraints, column_comment, default_expr); diff --git a/src/parser/definition/column_def.h b/src/parser/definition/column_def.h index e6d6bdb005..7c650f945f 100644 --- a/src/parser/definition/column_def.h +++ b/src/parser/definition/column_def.h @@ -129,7 +129,7 @@ class ColumnDef : public TableElement { nlohmann::json ToJson() const; static std::shared_ptr FromJson(const nlohmann::json &json); - static std::shared_ptr FromJson(const std::string &json); + static std::shared_ptr FromJson(std::string_view json); public: int64_t id_{-1}; diff --git a/src/parser/expr/constant_expr.cpp b/src/parser/expr/constant_expr.cpp index 19c238401e..61725a9310 100644 --- a/src/parser/expr/constant_expr.cpp +++ b/src/parser/expr/constant_expr.cpp @@ -567,24 +567,27 @@ std::shared_ptr ConstantExpr::Deserialize(const nlohmann::json &cons return std::shared_ptr(const_expr); } -std::shared_ptr ConstantExpr::Deserialize(simdjson::simdjson_result &constant_expr) { - LiteralType literal_type = (LiteralType)(int32_t)constant_expr["type"].get(); +std::shared_ptr ConstantExpr::Deserialize(std::string_view constant_expr_str) { + simdjson::ondemand::parser parser; + simdjson::padded_string constant_expr_json(constant_expr_str); + simdjson::ondemand::document doc = parser.iterate(constant_expr_json); + LiteralType literal_type = (LiteralType)(int32_t)doc["type"].get(); auto const_expr = new ConstantExpr(literal_type); switch (literal_type) { case LiteralType::kBoolean: { - const_expr->bool_value_ = constant_expr["value"].get(); + const_expr->bool_value_ = doc["value"].get(); break; } case LiteralType::kDouble: { - const_expr->double_value_ = constant_expr["value"].get(); + const_expr->double_value_ = doc["value"].get(); break; } case LiteralType::kString: { - const_expr->str_value_ = strdup(static_cast(constant_expr["value"].get()).c_str()); + const_expr->str_value_ = strdup(static_cast(doc["value"].get()).c_str()); break; } case LiteralType::kInteger: { - const_expr->integer_value_ = constant_expr["value"].get(); + const_expr->integer_value_ = doc["value"].get(); break; } case LiteralType::kEmptyArray: @@ -595,20 +598,20 @@ std::shared_ptr ConstantExpr::Deserialize(simdjson::simdjson_result< case LiteralType::kTime: case LiteralType::kDateTime: case LiteralType::kTimestamp: { - const_expr->date_value_ = strdup(static_cast(constant_expr["value"].get()).c_str()); + const_expr->date_value_ = strdup(static_cast(doc["value"].get()).c_str()); break; } case LiteralType::kIntegerArray: { - const_expr->long_array_ = constant_expr["value"].get>(); + const_expr->long_array_ = doc["value"].get>(); break; } case LiteralType::kDoubleArray: { - const_expr->double_array_ = constant_expr["value"].get>(); + const_expr->double_array_ = doc["value"].get>(); break; } case LiteralType::kSubArrayArray: { - for (simdjson::ondemand::array array = constant_expr["value"]; simdjson::simdjson_result val : array) { - auto sub_arr = std::static_pointer_cast(ConstantExpr::Deserialize(val)); + for (simdjson::ondemand::array array = doc["value"]; simdjson::simdjson_result val : array) { + auto sub_arr = std::static_pointer_cast(ConstantExpr::Deserialize(val.raw_json())); const_expr->sub_array_array_.push_back(std::move(sub_arr)); } break; @@ -617,18 +620,18 @@ std::shared_ptr ConstantExpr::Deserialize(simdjson::simdjson_result< ParserError("Interval type is not supported in JSON serialization"); } case LiteralType::kLongSparseArray: { - const_expr->long_sparse_array_.first = constant_expr["value"]["indices"].get>(); - const_expr->long_sparse_array_.second = constant_expr["value"]["data"].get>(); + const_expr->long_sparse_array_.first = doc["value"]["indices"].get>(); + const_expr->long_sparse_array_.second = doc["value"]["data"].get>(); break; } case LiteralType::kDoubleSparseArray: { - const_expr->double_sparse_array_.first = constant_expr["value"]["indices"].get>(); - const_expr->double_sparse_array_.second = constant_expr["value"]["data"].get>(); + const_expr->double_sparse_array_.first = doc["value"]["indices"].get>(); + const_expr->double_sparse_array_.second = doc["value"]["data"].get>(); break; } case LiteralType::kCurlyBracketsArray: { - for (simdjson::ondemand::array array = constant_expr["value"]; simdjson::simdjson_result val : array) { - auto sub_arr = std::static_pointer_cast(Deserialize(val)); + for (simdjson::ondemand::array array = doc["value"]; simdjson::simdjson_result val : array) { + auto sub_arr = std::static_pointer_cast(Deserialize(val.raw_json())); const_expr->curly_brackets_array_.push_back(std::move(sub_arr)); } break; diff --git a/src/parser/expr/constant_expr.h b/src/parser/expr/constant_expr.h index d9a996c231..6b0d9e280b 100644 --- a/src/parser/expr/constant_expr.h +++ b/src/parser/expr/constant_expr.h @@ -67,7 +67,7 @@ class ConstantExpr : public ParsedExpr { static std::shared_ptr Deserialize(const nlohmann::json &constant_expr); - static std::shared_ptr Deserialize(simdjson::simdjson_result &constant_expr); + static std::shared_ptr Deserialize(std::string_view constant_expr); void TrySortSparseVec(const ColumnDef *col_def); diff --git a/src/parser/type/data_type.cpp b/src/parser/type/data_type.cpp index 8cd3e8ae51..2945a0f465 100644 --- a/src/parser/type/data_type.cpp +++ b/src/parser/type/data_type.cpp @@ -471,15 +471,18 @@ std::shared_ptr DataType::Deserialize(const nlohmann::json &data_type_ return data_type; } -std::shared_ptr DataType::Deserialize(simdjson::simdjson_result &data_type_json) { - const LogicalType logical_type = (LogicalType)(int8_t)data_type_json["data_type"].get(); +std::shared_ptr DataType::Deserialize(std::string_view data_type_str) { + simdjson::ondemand::parser parser; + simdjson::padded_string data_type_json(data_type_str); + simdjson::ondemand::document doc = parser.iterate(data_type_json); + + const LogicalType logical_type = (LogicalType)(int8_t)doc["data_type"].get(); std::shared_ptr type_info{nullptr}; - auto type_info_json = data_type_json["type_info"]; - if (type_info_json.error() == simdjson::SUCCESS) { + if (auto type_info_json = doc["type_info"]; type_info_json.error() == simdjson::SUCCESS) { switch (logical_type) { case LogicalType::kArray: { - const auto element_type = DataType::Deserialize(type_info_json); + const auto element_type = DataType::Deserialize(type_info_json.raw_json()); type_info = ArrayInfo::Make(std::move(*element_type)); break; } @@ -500,7 +503,7 @@ std::shared_ptr DataType::Deserialize(simdjson::simdjson_result Deserialize(const nlohmann::json &data_type_json); - static std::shared_ptr Deserialize(simdjson::simdjson_result &data_type_json); + static std::shared_ptr Deserialize(std::string_view data_type_str); static std::shared_ptr StringDeserialize(const std::string &data_type_string); // Estimated serialized size in bytes, ensured be no less than Write requires, allowed be larger. diff --git a/src/parser/type/info/sparse_info.cpp b/src/parser/type/info/sparse_info.cpp index 97536f796f..a67ca3247d 100644 --- a/src/parser/type/info/sparse_info.cpp +++ b/src/parser/type/info/sparse_info.cpp @@ -106,11 +106,14 @@ std::unique_ptr SparseInfo::Deserialize(const nlohmann::json &json) return std::make_unique(json["data_type"], json["index_type"], json["dimension"], store_type); } -std::unique_ptr SparseInfo::Deserialize(simdjson::simdjson_result &json) { - return std::make_unique((EmbeddingDataType)(int8_t)json["data_type"].get(), - (EmbeddingDataType)(int8_t)json["index_type"].get(), - (size_t)json["dimension"].get(), - (SparseStoreType)(int8_t)json["sort"].get()); +std::unique_ptr SparseInfo::Deserialize(std::string_view json_str) { + simdjson::ondemand::parser parser; + simdjson::padded_string json(json_str); + simdjson::ondemand::document doc = parser.iterate(json); + return std::make_unique((EmbeddingDataType)(int8_t)doc["data_type"].get(), + (EmbeddingDataType)(int8_t)doc["index_type"].get(), + (size_t)doc["dimension"].get(), + (SparseStoreType)(int8_t)doc["sort"].get()); } } // namespace infinity \ No newline at end of file diff --git a/src/parser/type/info/sparse_info.h b/src/parser/type/info/sparse_info.h index 2554cfa249..6adbd49408 100644 --- a/src/parser/type/info/sparse_info.h +++ b/src/parser/type/info/sparse_info.h @@ -92,7 +92,7 @@ class SparseInfo : public TypeInfo { static std::unique_ptr Deserialize(const nlohmann::json &json); - static std::unique_ptr Deserialize(simdjson::simdjson_result &json); + static std::unique_ptr Deserialize(std::string_view json); inline EmbeddingDataType DataType() const noexcept { return data_type_; } diff --git a/src/storage/definition/index_base.cpp b/src/storage/definition/index_base.cpp index ba24edc16f..74435f73f3 100644 --- a/src/storage/definition/index_base.cpp +++ b/src/storage/definition/index_base.cpp @@ -329,7 +329,7 @@ SharedPtr IndexBase::Deserialize(const nlohmann::json &index_def_json return res; } -SharedPtr IndexBase::Deserialize(const String &index_def_str) { +SharedPtr IndexBase::Deserialize(std::string_view index_def_str) { simdjson::padded_string index_def_json(index_def_str); simdjson::parser parser; simdjson::document doc = parser.iterate(index_def_json); @@ -340,9 +340,7 @@ SharedPtr IndexBase::Deserialize(const String &index_def_str) { SharedPtr index_name = MakeShared(doc["index_name"].get()); SharedPtr index_comment; - String index_comment_json; - simdjson::error_code error = doc["index_comment"].get(index_comment_json); - if (error == simdjson::SUCCESS) { + if (String index_comment_json; doc["index_comment"].get(index_comment_json) == simdjson::SUCCESS) { index_comment = MakeShared(index_comment_json); } else { index_comment = MakeShared(); @@ -352,8 +350,7 @@ SharedPtr IndexBase::Deserialize(const String &index_def_str) { Vector column_names = doc["column_names"].get>(); switch (index_type) { case IndexType::kIVF: { - auto ivf_option_json = doc["ivf_option"]; - const auto ivf_option = IndexIVF::DeserializeIndexIVFOption(ivf_option_json); + const auto ivf_option = IndexIVF::DeserializeIndexIVFOption(doc["ivf_option"].raw_json()); res = MakeShared(index_name, index_comment, file_name, std::move(column_names), ivf_option); break; } @@ -364,15 +361,11 @@ SharedPtr IndexBase::Deserialize(const String &index_def_str) { MetricType metric_type = StringToMetricType(doc["metric_type"].get()); HnswEncodeType encode_type = StringToHnswEncodeType(doc["encode_type"].get()); HnswBuildType build_type = HnswBuildType::kPlain; - String build_type_json; - simdjson::error_code error = doc["build_type"].get(build_type_json); - if (error == simdjson::SUCCESS) { + if (String build_type_json; doc["build_type"].get(build_type_json) == simdjson::SUCCESS) { build_type = StringToHnswBuildType(build_type_json); } Optional lsg_config = None; - String lsg_config_json; - error = doc["lsg_config"].get(lsg_config_json); - if (error == simdjson::SUCCESS) { + if (String lsg_config_json; doc["lsg_config"].get(lsg_config_json) == simdjson::SUCCESS) { lsg_config = LSGConfig::FromString(lsg_config_json); } res = MakeShared(index_name, diff --git a/src/storage/definition/index_base.cppm b/src/storage/definition/index_base.cppm index 8fba1e6ea2..e1fd113651 100644 --- a/src/storage/definition/index_base.cppm +++ b/src/storage/definition/index_base.cppm @@ -90,7 +90,7 @@ public: static SharedPtr Deserialize(const nlohmann::json &index_def_json); - static SharedPtr Deserialize(const String &index_def_str); + static SharedPtr Deserialize(std::string_view index_def_str); inline String column_name() const { return column_names_[0]; } diff --git a/src/storage/definition/index_ivf.cpp b/src/storage/definition/index_ivf.cpp index 6b0335d205..04cfaf4653 100644 --- a/src/storage/definition/index_ivf.cpp +++ b/src/storage/definition/index_ivf.cpp @@ -382,8 +382,11 @@ auto tag_invoke(simdjson::deserialize_tag, simdjson_value &val, IndexIVFOption & return simdjson::SUCCESS; } -IndexIVFOption IndexIVF::DeserializeIndexIVFOption(simdjson::simdjson_result &ivf_option_json) { - return ivf_option_json.get(); +IndexIVFOption IndexIVF::DeserializeIndexIVFOption(std::string_view ivf_option_str) { + simdjson::padded_string ivf_option_json(ivf_option_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(ivf_option_json); + return doc.get(); } String BuildIndexIVFStorageOptionStr(); diff --git a/src/storage/definition/index_ivf.cppm b/src/storage/definition/index_ivf.cppm index 06fe6a1237..d42ccb897d 100644 --- a/src/storage/definition/index_ivf.cppm +++ b/src/storage/definition/index_ivf.cppm @@ -91,7 +91,7 @@ public: static IndexIVFOption DeserializeIndexIVFOption(const nlohmann::json &ivf_option_json); - static IndexIVFOption DeserializeIndexIVFOption(simdjson::simdjson_result &ivf_option_json); + static IndexIVFOption DeserializeIndexIVFOption(std::string_view ivf_option_json); void ValidateColumnDataType(const SharedPtr &base_table_ref, const String &column_name); From 0b1d23266ed9ad1a8cc492b8850fcebca9e910dc Mon Sep 17 00:00:00 2001 From: Long <2262328655@qq.com> Date: Mon, 30 Jun 2025 14:02:16 +0800 Subject: [PATCH 2/9] Clean nlohmann::json code. Signed-off-by: Long <2262328655@qq.com> --- src/parser/definition/column_def.cpp | 26 ------ src/parser/definition/column_def.h | 3 +- src/parser/expr/constant_expr.cpp | 70 ---------------- src/parser/expr/constant_expr.h | 4 +- src/parser/type/data_type.cpp | 40 --------- src/parser/type/data_type.h | 1 - src/parser/type/info/sparse_info.cpp | 5 -- src/parser/type/info/sparse_info.h | 4 +- src/storage/definition/index_base.cpp | 110 ------------------------- src/storage/definition/index_base.cppm | 2 - src/storage/definition/index_ivf.cpp | 2 - src/storage/definition/index_ivf.cppm | 4 +- 12 files changed, 4 insertions(+), 267 deletions(-) diff --git a/src/parser/definition/column_def.cpp b/src/parser/definition/column_def.cpp index 9034bcaf3d..a0de28d6ea 100644 --- a/src/parser/definition/column_def.cpp +++ b/src/parser/definition/column_def.cpp @@ -239,32 +239,6 @@ nlohmann::json ColumnDef::ToJson() const { return column_def_json; } -std::shared_ptr ColumnDef::FromJson(const nlohmann::json &json) { - auto column_type = DataType::Deserialize(json.at("column_type")); - auto column_id = json.at("column_id").get(); - auto column_name = json.at("column_name").get(); - - std::set constraints; - if (json.contains("constraints")) { - for (const auto &constraint : json.at("constraints")) { - constraints.insert(constraint.get()); - } - } - - std::string column_comment; - if (json.contains("column_comment")) { - column_comment = json.at("column_comment").get(); - } - - std::shared_ptr default_expr = nullptr; - if (json.contains("default")) { - auto default_expr_json = json.at("default"); - default_expr = ConstantExpr::Deserialize(default_expr_json); - } - - return std::make_shared(column_id, column_type, column_name, constraints, column_comment, default_expr); -} - std::shared_ptr ColumnDef::FromJson(std::string_view col_def_str) { simdjson::ondemand::parser parser; simdjson::padded_string col_def_json(col_def_str); diff --git a/src/parser/definition/column_def.h b/src/parser/definition/column_def.h index 7c650f945f..54526c279f 100644 --- a/src/parser/definition/column_def.h +++ b/src/parser/definition/column_def.h @@ -128,8 +128,7 @@ class ColumnDef : public TableElement { const std::shared_ptr default_value() const { return std::dynamic_pointer_cast(default_expr_); } nlohmann::json ToJson() const; - static std::shared_ptr FromJson(const nlohmann::json &json); - static std::shared_ptr FromJson(std::string_view json); + static std::shared_ptr FromJson(std::string_view col_def_str); public: int64_t id_{-1}; diff --git a/src/parser/expr/constant_expr.cpp b/src/parser/expr/constant_expr.cpp index 61725a9310..21b6c4fbc8 100644 --- a/src/parser/expr/constant_expr.cpp +++ b/src/parser/expr/constant_expr.cpp @@ -497,76 +497,6 @@ nlohmann::json ConstantExpr::Serialize() const { return j; } -std::shared_ptr ConstantExpr::Deserialize(const nlohmann::json &constant_expr) { - LiteralType literal_type = static_cast(constant_expr["type"].get()); - auto const_expr = new ConstantExpr(literal_type); - switch (literal_type) { - case LiteralType::kBoolean: { - const_expr->bool_value_ = constant_expr["value"].get(); - break; - } - case LiteralType::kDouble: { - const_expr->double_value_ = constant_expr["value"].get(); - break; - } - case LiteralType::kString: { - const_expr->str_value_ = strdup(constant_expr["value"].get().c_str()); - break; - } - case LiteralType::kInteger: { - const_expr->integer_value_ = constant_expr["value"].get(); - break; - } - case LiteralType::kEmptyArray: - case LiteralType::kNull: { - break; - } - case LiteralType::kDate: - case LiteralType::kTime: - case LiteralType::kDateTime: - case LiteralType::kTimestamp: { - const_expr->date_value_ = strdup(constant_expr["value"].get().c_str()); - break; - } - case LiteralType::kIntegerArray: { - const_expr->long_array_ = constant_expr["value"].get>(); - break; - } - case LiteralType::kDoubleArray: { - const_expr->double_array_ = constant_expr["value"].get>(); - break; - } - case LiteralType::kSubArrayArray: { - for (const nlohmann::json &array = constant_expr["value"]; const auto &val : array) { - auto sub_arr = std::static_pointer_cast(Deserialize(val)); - const_expr->sub_array_array_.push_back(std::move(sub_arr)); - } - break; - } - case LiteralType::kInterval: { - ParserError("Interval type is not supported in JSON serialization"); - } - case LiteralType::kLongSparseArray: { - const_expr->long_sparse_array_.first = constant_expr["value"]["indices"].get>(); - const_expr->long_sparse_array_.second = constant_expr["value"]["data"].get>(); - break; - } - case LiteralType::kDoubleSparseArray: { - const_expr->double_sparse_array_.first = constant_expr["value"]["indices"].get>(); - const_expr->double_sparse_array_.second = constant_expr["value"]["data"].get>(); - break; - } - case LiteralType::kCurlyBracketsArray: { - for (const nlohmann::json &array = constant_expr["value"]; const auto &val : array) { - auto sub_arr = std::static_pointer_cast(Deserialize(val)); - const_expr->curly_brackets_array_.push_back(std::move(sub_arr)); - } - break; - } - } - return std::shared_ptr(const_expr); -} - std::shared_ptr ConstantExpr::Deserialize(std::string_view constant_expr_str) { simdjson::ondemand::parser parser; simdjson::padded_string constant_expr_json(constant_expr_str); diff --git a/src/parser/expr/constant_expr.h b/src/parser/expr/constant_expr.h index 6b0d9e280b..c3559020ac 100644 --- a/src/parser/expr/constant_expr.h +++ b/src/parser/expr/constant_expr.h @@ -65,9 +65,7 @@ class ConstantExpr : public ParsedExpr { nlohmann::json Serialize() const; - static std::shared_ptr Deserialize(const nlohmann::json &constant_expr); - - static std::shared_ptr Deserialize(std::string_view constant_expr); + static std::shared_ptr Deserialize(std::string_view constant_expr_str); void TrySortSparseVec(const ColumnDef *col_def); diff --git a/src/parser/type/data_type.cpp b/src/parser/type/data_type.cpp index 2945a0f465..f326ea5253 100644 --- a/src/parser/type/data_type.cpp +++ b/src/parser/type/data_type.cpp @@ -431,46 +431,6 @@ nlohmann::json DataType::Serialize() const { return json_res; } -std::shared_ptr DataType::Deserialize(const nlohmann::json &data_type_json) { - const auto logical_type = data_type_json["data_type"].get(); - - std::shared_ptr type_info{nullptr}; - if (data_type_json.contains("type_info")) { - const nlohmann::json &type_info_json = data_type_json["type_info"]; - switch (logical_type) { - case LogicalType::kArray: { - const auto element_type = DataType::Deserialize(type_info_json); - type_info = ArrayInfo::Make(std::move(*element_type)); - break; - } - // case LogicalType::kBitmap: { - // type_info = BitmapInfo::Make(type_info_json["length_limit"]); - // break; - // } - case LogicalType::kDecimal: { - type_info = DecimalInfo::Make(type_info_json["precision"], type_info_json["scale"]); - break; - } - case LogicalType::kTensor: - case LogicalType::kTensorArray: - case LogicalType::kMultiVector: - case LogicalType::kEmbedding: { - type_info = EmbeddingInfo::Make(type_info_json["embedding_type"].get(), type_info_json["dimension"]); - break; - } - case LogicalType::kSparse: { - type_info = SparseInfo::Deserialize(type_info_json); - break; - } - default: - // There's no type_info for other types - break; - } - } - std::shared_ptr data_type = std::make_shared(logical_type, type_info); - return data_type; -} - std::shared_ptr DataType::Deserialize(std::string_view data_type_str) { simdjson::ondemand::parser parser; simdjson::padded_string data_type_json(data_type_str); diff --git a/src/parser/type/data_type.h b/src/parser/type/data_type.h index 6637261d2b..1f02539afe 100644 --- a/src/parser/type/data_type.h +++ b/src/parser/type/data_type.h @@ -175,7 +175,6 @@ class DataType { nlohmann::json Serialize() const; - static std::shared_ptr Deserialize(const nlohmann::json &data_type_json); static std::shared_ptr Deserialize(std::string_view data_type_str); static std::shared_ptr StringDeserialize(const std::string &data_type_string); diff --git a/src/parser/type/info/sparse_info.cpp b/src/parser/type/info/sparse_info.cpp index a67ca3247d..a6320f7719 100644 --- a/src/parser/type/info/sparse_info.cpp +++ b/src/parser/type/info/sparse_info.cpp @@ -101,11 +101,6 @@ nlohmann::json SparseInfo::Serialize() const { return res; } -std::unique_ptr SparseInfo::Deserialize(const nlohmann::json &json) { - auto store_type = static_cast(json["sort"].get()); - return std::make_unique(json["data_type"], json["index_type"], json["dimension"], store_type); -} - std::unique_ptr SparseInfo::Deserialize(std::string_view json_str) { simdjson::ondemand::parser parser; simdjson::padded_string json(json_str); diff --git a/src/parser/type/info/sparse_info.h b/src/parser/type/info/sparse_info.h index 6adbd49408..7c1ed35063 100644 --- a/src/parser/type/info/sparse_info.h +++ b/src/parser/type/info/sparse_info.h @@ -90,9 +90,7 @@ class SparseInfo : public TypeInfo { [[nodiscard]] nlohmann::json Serialize() const override; - static std::unique_ptr Deserialize(const nlohmann::json &json); - - static std::unique_ptr Deserialize(std::string_view json); + static std::unique_ptr Deserialize(std::string_view json_str); inline EmbeddingDataType DataType() const noexcept { return data_type_; } diff --git a/src/storage/definition/index_base.cpp b/src/storage/definition/index_base.cpp index 74435f73f3..af5fb8832f 100644 --- a/src/storage/definition/index_base.cpp +++ b/src/storage/definition/index_base.cpp @@ -219,116 +219,6 @@ nlohmann::json IndexBase::Serialize() const { return res; } -SharedPtr IndexBase::Deserialize(const nlohmann::json &index_def_json) { - SharedPtr res = nullptr; - String index_type_name = index_def_json["index_type"]; - IndexType index_type = IndexInfo::StringToIndexType(index_type_name); - SharedPtr index_name = MakeShared(index_def_json["index_name"]); - - SharedPtr index_comment; - if (index_def_json.contains("index_comment")) { - index_comment = MakeShared(index_def_json["index_comment"]); - } else { - index_comment = MakeShared(); - } - - String file_name = index_def_json["file_name"]; - Vector column_names = index_def_json["column_names"]; - switch (index_type) { - case IndexType::kIVF: { - const auto ivf_option = IndexIVF::DeserializeIndexIVFOption(index_def_json["ivf_option"]); - res = MakeShared(index_name, index_comment, file_name, std::move(column_names), ivf_option); - break; - } - case IndexType::kHnsw: { - SizeT M = index_def_json["M"]; - SizeT ef_construction = index_def_json["ef_construction"]; - SizeT block_size = index_def_json["block_size"]; - MetricType metric_type = StringToMetricType(index_def_json["metric_type"]); - HnswEncodeType encode_type = StringToHnswEncodeType(index_def_json["encode_type"]); - HnswBuildType build_type = HnswBuildType::kPlain; - if (index_def_json.contains("build_type")) { - build_type = StringToHnswBuildType(index_def_json["build_type"]); - } - Optional lsg_config = None; - if (index_def_json.contains("lsg_config")) { - lsg_config = LSGConfig::FromString(index_def_json["lsg_config"]); - } - res = MakeShared(index_name, - index_comment, - file_name, - std::move(column_names), - metric_type, - encode_type, - build_type, - M, - ef_construction, - block_size, - lsg_config); - break; - } - case IndexType::kDiskAnn: { - SizeT R = index_def_json["R"]; - SizeT L = index_def_json["L"]; - SizeT num_pq_chunks = index_def_json["num_pq_chunks"]; - SizeT num_parts = index_def_json["num_parts"]; - MetricType metric_type = StringToMetricType(index_def_json["metric_type"]); - DiskAnnEncodeType encode_type = StringToDiskAnnEncodeType(index_def_json["encode_type"]); - res = MakeShared(index_name, - index_comment, - file_name, - std::move(column_names), - metric_type, - encode_type, - R, - L, - num_pq_chunks, - num_parts); - break; - } - case IndexType::kFullText: { - String analyzer = index_def_json["analyzer"]; - auto ft_res = MakeShared(index_name, index_comment, file_name, std::move(column_names), analyzer); - if (index_def_json.contains("flag")) { - u8 flag = index_def_json["flag"]; - ft_res->flag_ = flag; - } - res = ft_res; - break; - } - case IndexType::kSecondary: { - res = MakeShared(index_name, index_comment, file_name, std::move(column_names)); - break; - } - case IndexType::kEMVB: { - u32 residual_pq_subspace_num = index_def_json["pq_subspace_num"]; - u32 residual_pq_subspace_bits = index_def_json["pq_subspace_bits"]; - res = MakeShared(index_name, - index_comment, - file_name, - std::move(column_names), - residual_pq_subspace_num, - residual_pq_subspace_bits); - break; - } - case IndexType::kBMP: { - SizeT block_size = index_def_json["block_size"]; - auto compress_type = static_cast(index_def_json["compress_type"]); - res = MakeShared(index_name, index_comment, file_name, std::move(column_names), block_size, compress_type); - break; - } - case IndexType::kInvalid: { - String error_message = "Error index method while deserializing"; - UnrecoverableError(error_message); - } - default: { - Status status = Status::NotSupport("Not implemented"); - RecoverableError(status); - } - } - return res; -} - SharedPtr IndexBase::Deserialize(std::string_view index_def_str) { simdjson::padded_string index_def_json(index_def_str); simdjson::parser parser; diff --git a/src/storage/definition/index_base.cppm b/src/storage/definition/index_base.cppm index e1fd113651..bcbb395f4c 100644 --- a/src/storage/definition/index_base.cppm +++ b/src/storage/definition/index_base.cppm @@ -88,8 +88,6 @@ public: virtual String BuildOtherParamsString() const { return ""; } virtual nlohmann::json Serialize() const; - static SharedPtr Deserialize(const nlohmann::json &index_def_json); - static SharedPtr Deserialize(std::string_view index_def_str); inline String column_name() const { return column_names_[0]; } diff --git a/src/storage/definition/index_ivf.cpp b/src/storage/definition/index_ivf.cpp index 04cfaf4653..54e663b225 100644 --- a/src/storage/definition/index_ivf.cpp +++ b/src/storage/definition/index_ivf.cpp @@ -353,8 +353,6 @@ nlohmann::json IndexIVF::Serialize() const { return res; } -IndexIVFOption IndexIVF::DeserializeIndexIVFOption(const nlohmann::json &ivf_option_json) { return ivf_option_json; } - template auto tag_invoke(simdjson::deserialize_tag, simdjson_value &val, IndexIVFCentroidOption &ivf_centroid_option) { simdjson::object obj = val.get_object(); diff --git a/src/storage/definition/index_ivf.cppm b/src/storage/definition/index_ivf.cppm index d42ccb897d..80c5f263a1 100644 --- a/src/storage/definition/index_ivf.cppm +++ b/src/storage/definition/index_ivf.cppm @@ -89,9 +89,7 @@ public: nlohmann::json Serialize() const override; - static IndexIVFOption DeserializeIndexIVFOption(const nlohmann::json &ivf_option_json); - - static IndexIVFOption DeserializeIndexIVFOption(std::string_view ivf_option_json); + static IndexIVFOption DeserializeIndexIVFOption(std::string_view ivf_option_str); void ValidateColumnDataType(const SharedPtr &base_table_ref, const String &column_name); From dd7d03a206cae8bf7bab6aea6b04bf8addb89352 Mon Sep 17 00:00:00 2001 From: Long <2262328655@qq.com> Date: Mon, 30 Jun 2025 14:24:04 +0800 Subject: [PATCH 3/9] Fix nlohmann::json code used. Signed-off-by: Long <2262328655@qq.com> --- src/storage/catalog/meta/table_meeta.cpp | 2 +- src/storage/catalog/new_catalog.cpp | 2 +- src/storage/common/snapshot_info.cpp | 145 +++++++++++++---------- src/storage/common/snapshot_info.cppm | 12 +- 4 files changed, 89 insertions(+), 72 deletions(-) diff --git a/src/storage/catalog/meta/table_meeta.cpp b/src/storage/catalog/meta/table_meeta.cpp index 42848900bd..3a31d19a59 100644 --- a/src/storage/catalog/meta/table_meeta.cpp +++ b/src/storage/catalog/meta/table_meeta.cpp @@ -650,7 +650,7 @@ Status TableMeeta::LoadColumnDefs() { kv_instance_->Get(KeyEncode::DropTableColumnKey(db_id_str_, table_id_str_, column_name, max_commit_ts), drop_column_ts); if (drop_column_ts.empty() || std::stoull(drop_column_ts) > begin_ts_) { - auto column_def = ColumnDef::FromJson(nlohmann::json::parse(column_value)); + auto column_def = ColumnDef::FromJson(column_value); column_defs.push_back(column_def); } } diff --git a/src/storage/catalog/new_catalog.cpp b/src/storage/catalog/new_catalog.cpp index 5af6e9a6f7..0afdca6413 100644 --- a/src/storage/catalog/new_catalog.cpp +++ b/src/storage/catalog/new_catalog.cpp @@ -305,7 +305,7 @@ Status NewCatalog::GetCleanedMeta(TxnTimeStamp ts, KVInstance *kv_instance, Vect std::move(meta_infos[1]), std::stoull(meta_infos[2]), std::stoull(meta_infos[3]), - ColumnDef::FromJson(nlohmann::json::parse(std::move(meta_infos[4]))))); + ColumnDef::FromJson(meta_infos[4]))); } else if (type_str == "idx") { UniquePtr table_index_meta_key = MakeUnique(std::move(meta_infos[0]), std::move(meta_infos[1]), std::move(meta_infos[4]), std::move(meta_infos[2])); diff --git a/src/storage/common/snapshot_info.cpp b/src/storage/common/snapshot_info.cpp index f25ed56d65..70623a92b5 100644 --- a/src/storage/common/snapshot_info.cpp +++ b/src/storage/common/snapshot_info.cpp @@ -50,14 +50,17 @@ nlohmann::json BlockColumnSnapshotInfo::Serialize() { return json_res; } -SharedPtr BlockColumnSnapshotInfo::Deserialize(const nlohmann::json &column_block_json) { +SharedPtr BlockColumnSnapshotInfo::Deserialize(std::string_view column_block_str) { + simdjson::padded_string column_block_json(column_block_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(column_block_json); auto column_block_snapshot = MakeShared(); - column_block_snapshot->column_id_ = column_block_json["column_id"]; - column_block_snapshot->filename_ = column_block_json["filename"]; - if (column_block_json.contains("outlines")) { - for (const auto &outline_snapshot : column_block_json["outlines"]) { + column_block_snapshot->column_id_ = doc["column_id"].get(); + column_block_snapshot->filename_ = doc["filename"].get(); + if (simdjson::array array; doc["outlines"].get(array) == simdjson::SUCCESS) { + for (auto outline_snapshot : array) { auto outline_snapshot_info = MakeShared(); - outline_snapshot_info->filename_ = outline_snapshot; + outline_snapshot_info->filename_ = outline_snapshot.get(); column_block_snapshot->outline_snapshots_.emplace_back(outline_snapshot_info); } } @@ -74,12 +77,15 @@ nlohmann::json BlockSnapshotInfo::Serialize() { return json_res; } -SharedPtr BlockSnapshotInfo::Deserialize(const nlohmann::json &block_json) { +SharedPtr BlockSnapshotInfo::Deserialize(std::string_view block_str) { + simdjson::padded_string block_json(block_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(block_json); auto block_snapshot = MakeShared(); - block_snapshot->block_id_ = block_json["block_id"]; - block_snapshot->block_dir_ = block_json["block_dir"]; - for (const auto &column_block_json : block_json["columns"]) { - auto column_block_snapshot = BlockColumnSnapshotInfo::Deserialize(column_block_json); + block_snapshot->block_id_ = doc["block_id"].get(); + block_snapshot->block_dir_ = doc["block_dir"].get(); + for (auto column_block_json : doc["columns"]) { + auto column_block_snapshot = BlockColumnSnapshotInfo::Deserialize(column_block_json.raw_json()); block_snapshot->column_block_snapshots_.emplace_back(column_block_snapshot); } return block_snapshot; @@ -102,19 +108,22 @@ nlohmann::json SegmentSnapshotInfo::Serialize() { return json_res; } -SharedPtr SegmentSnapshotInfo::Deserialize(const nlohmann::json &segment_json) { +SharedPtr SegmentSnapshotInfo::Deserialize(std::string_view segment_str) { + simdjson::padded_string segment_json(segment_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(segment_json); auto segment_snapshot = MakeShared(); - segment_snapshot->segment_id_ = segment_json["segment_id"]; - segment_snapshot->segment_dir_ = segment_json["segment_dir"]; + segment_snapshot->segment_id_ = doc["segment_id"].get(); + segment_snapshot->segment_dir_ = doc["segment_dir"].get(); - segment_snapshot->first_delete_ts_ = segment_json["first_delete_ts"]; - segment_snapshot->deprecate_ts_ = segment_json["deprecate_ts"]; - segment_snapshot->row_count_ = segment_json["row_count"]; - segment_snapshot->actual_row_count_ = segment_json["actual_row_count"]; - segment_snapshot->status_ = static_cast(segment_json["segment_status"]); + segment_snapshot->first_delete_ts_ = doc["first_delete_ts"].get(); + segment_snapshot->deprecate_ts_ = doc["deprecate_ts"].get(); + segment_snapshot->row_count_ = doc["row_count"].get(); + segment_snapshot->actual_row_count_ = doc["actual_row_count"].get(); + segment_snapshot->status_ = (SegmentStatus)(u8)doc["segment_status"].get(); - for (const auto &block_json : segment_json["blocks"]) { - auto block_snapshot = BlockSnapshotInfo::Deserialize(block_json); + for (auto block_json : doc["blocks"]) { + auto block_snapshot = BlockSnapshotInfo::Deserialize(block_json.raw_json()); segment_snapshot->block_snapshots_.emplace_back(block_snapshot); } return segment_snapshot; @@ -125,7 +134,7 @@ nlohmann::json ChunkIndexSnapshotInfo::Serialize() { return json_res; } -SharedPtr ChunkIndexSnapshotInfo::Deserialize(const nlohmann::json &chunk_index_json) { +SharedPtr ChunkIndexSnapshotInfo::Deserialize(std::string_view chunk_index_str) { auto chunk_index_snapshot = MakeShared(); return chunk_index_snapshot; } @@ -139,11 +148,14 @@ nlohmann::json SegmentIndexSnapshotInfo::Serialize() { return json_res; } -SharedPtr SegmentIndexSnapshotInfo::Deserialize(const nlohmann::json &segment_index_json) { +SharedPtr SegmentIndexSnapshotInfo::Deserialize(std::string_view segment_index_str) { + simdjson::padded_string segment_index_json(segment_index_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(segment_index_json); auto segment_index_snapshot = MakeShared(); - segment_index_snapshot->segment_id_ = segment_index_json["segment_id"]; - for (const auto &chunk_index_json : segment_index_json["chunk_indexes"]) { - auto chunk_index_snapshot = ChunkIndexSnapshotInfo::Deserialize(chunk_index_json); + segment_index_snapshot->segment_id_ = doc["segment_id"].get(); + for (auto chunk_index_json : doc["chunk_indexes"]) { + auto chunk_index_snapshot = ChunkIndexSnapshotInfo::Deserialize(chunk_index_json.raw_json()); segment_index_snapshot->chunk_index_snapshots_.emplace_back(chunk_index_snapshot); } return segment_index_snapshot; @@ -159,12 +171,15 @@ nlohmann::json TableIndexSnapshotInfo::Serialize() { return json_res; } -SharedPtr TableIndexSnapshotInfo::Deserialize(const nlohmann::json &table_index_json) { +SharedPtr TableIndexSnapshotInfo::Deserialize(std::string_view table_index_str) { + simdjson::padded_string table_index_json(table_index_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(table_index_json); auto table_index_snapshot = MakeShared(); - table_index_snapshot->index_dir_ = MakeShared(table_index_json["index_dir"]); - table_index_snapshot->index_base_ = IndexBase::Deserialize(table_index_json["index_base"]); - for (const auto &segment_index_json : table_index_json["segment_indexes"]) { - auto segment_index_snapshot = SegmentIndexSnapshotInfo::Deserialize(segment_index_json); + table_index_snapshot->index_dir_ = MakeShared(doc["index_dir"].get()); + table_index_snapshot->index_base_ = IndexBase::Deserialize(doc["index_base"].raw_json()); + for (auto segment_index_json : doc["segment_indexes"]) { + auto segment_index_snapshot = SegmentIndexSnapshotInfo::Deserialize(segment_index_json.raw_json()); table_index_snapshot->index_by_segment_.emplace(segment_index_snapshot->segment_id_, segment_index_snapshot); } return table_index_snapshot; @@ -387,68 +402,70 @@ Tuple, Status> TableSnapshotInfo::Deserialize(const RecoverableError(status); } - nlohmann::json snapshot_meta_json = nlohmann::json::parse(json_str); + simdjson::padded_string snapshot_meta_json(json_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(snapshot_meta_json); // LOG_INFO(snapshot_meta_json.dump()); SharedPtr table_snapshot = MakeShared(); - table_snapshot->snapshot_name_ = snapshot_meta_json["snapshot_name"]; - SnapshotScope scope = static_cast(snapshot_meta_json["snapshot_scope"]); + table_snapshot->snapshot_name_ = doc["snapshot_name"].get(); + SnapshotScope scope = (SnapshotScope)(uint8_t)doc["snapshot_scope"].get(); if (scope != SnapshotScope::kTable) { return {nullptr, Status::Unknown("Invalid snapshot scope")}; } table_snapshot->scope_ = SnapshotScope::kTable; - table_snapshot->version_ = snapshot_meta_json["version"]; - table_snapshot->db_name_ = snapshot_meta_json["database_name"]; - table_snapshot->table_name_ = snapshot_meta_json["table_name"]; - table_snapshot->table_comment_ = snapshot_meta_json["table_comment"]; - - table_snapshot->txn_id_ = snapshot_meta_json["txn_id"]; - table_snapshot->begin_ts_ = snapshot_meta_json["begin_ts"]; - table_snapshot->commit_ts_ = snapshot_meta_json["commit_ts"]; - table_snapshot->max_commit_ts_ = snapshot_meta_json["max_commit_ts"]; - table_snapshot->table_entry_dir_ = snapshot_meta_json["table_entry_dir"]; - table_snapshot->next_column_id_ = snapshot_meta_json["next_column_id"]; - table_snapshot->unsealed_id_ = snapshot_meta_json["unsealed_id"]; - table_snapshot->next_segment_id_ = snapshot_meta_json["next_segment_id"]; - table_snapshot->row_count_ = snapshot_meta_json["row_count"]; - - for (const auto &column_def_json : snapshot_meta_json["column_definition"]) { - SharedPtr data_type = DataType::Deserialize(column_def_json["column_type"]); - i64 column_id = column_def_json["column_id"]; - String column_name = column_def_json["column_name"]; + table_snapshot->version_ = doc["version"].get(); + table_snapshot->db_name_ = doc["database_name"].get(); + table_snapshot->table_name_ = doc["table_name"].get(); + table_snapshot->table_comment_ = doc["table_comment"].get(); + + table_snapshot->txn_id_ = doc["txn_id"].get(); + table_snapshot->begin_ts_ = doc["begin_ts"].get(); + table_snapshot->commit_ts_ = doc["commit_ts"].get(); + table_snapshot->max_commit_ts_ = doc["max_commit_ts"].get(); + table_snapshot->table_entry_dir_ = doc["table_entry_dir"].get(); + table_snapshot->next_column_id_ = doc["next_column_id"].get(); + table_snapshot->unsealed_id_ = doc["unsealed_id"].get(); + table_snapshot->next_segment_id_ = doc["next_segment_id"].get(); + table_snapshot->row_count_ = doc["row_count"].get(); + + for (simdjson::array array = doc["column_definition"]; simdjson::simdjson_result column_def_json : array) { + SharedPtr data_type = DataType::Deserialize(column_def_json.raw_json()); + i64 column_id = column_def_json["column_id"].get(); + String column_name = column_def_json["column_name"].get(); std::set constraints; - if (column_def_json.contains("constraints")) { - for (const auto &column_constraint : column_def_json["constraints"]) { - ConstraintType constraint = column_constraint; + if (simdjson::array constraints_json; doc["constraints"].get(constraints_json) == simdjson::SUCCESS) { + for (auto column_constraint : constraints_json) { + ConstraintType constraint = (ConstraintType)(char)column_constraint.get(); constraints.emplace(constraint); } } String comment; - if (column_def_json.contains("column_comment")) { - comment = column_def_json["column_comment"]; + if (String comment_json; doc["column_comment"].get(comment_json) == simdjson::SUCCESS) { + comment = comment_json; } SharedPtr default_expr = nullptr; - if (column_def_json.contains("default")) { - default_expr = ConstantExpr::Deserialize(column_def_json["default"]); + if (auto default_expr_json = doc["default"]; default_expr_json.error() == simdjson::SUCCESS) { + default_expr = ConstantExpr::Deserialize(default_expr_json); } SharedPtr column_def = MakeShared(column_id, data_type, column_name, constraints, comment, default_expr); table_snapshot->columns_.emplace_back(column_def); } - for (const auto &segment_meta_json : snapshot_meta_json["segments"]) { - SharedPtr segment_snapshot = SegmentSnapshotInfo::Deserialize(segment_meta_json); + for (simdjson::array array = doc["segments"]; auto segment_meta_json : array) { + SharedPtr segment_snapshot = SegmentSnapshotInfo::Deserialize(segment_meta_json.raw_json()); table_snapshot->segment_snapshots_.emplace(segment_snapshot->segment_id_, segment_snapshot); } - for (const auto &table_index_meta_json : snapshot_meta_json["table_indexes"]) { - SharedPtr table_index_snapshot = TableIndexSnapshotInfo::Deserialize(table_index_meta_json); + for (simdjson::array array = doc["table_indexes"]; auto table_index_meta_json : array) { + SharedPtr table_index_snapshot = TableIndexSnapshotInfo::Deserialize(table_index_meta_json.raw_json()); table_snapshot->table_index_snapshots_.emplace(*table_index_snapshot->index_base_->index_name_, table_index_snapshot); } diff --git a/src/storage/common/snapshot_info.cppm b/src/storage/common/snapshot_info.cppm index f55ab822b1..bf6085e17f 100644 --- a/src/storage/common/snapshot_info.cppm +++ b/src/storage/common/snapshot_info.cppm @@ -44,7 +44,7 @@ export struct BlockColumnSnapshotInfo { Vector> outline_snapshots_; nlohmann::json Serialize(); - static SharedPtr Deserialize(const nlohmann::json &column_block_json); + static SharedPtr Deserialize(std::string_view column_block_str); }; export struct BlockSnapshotInfo { @@ -56,7 +56,7 @@ export struct BlockSnapshotInfo { String fast_rough_filter_; nlohmann::json Serialize(); - static SharedPtr Deserialize(const nlohmann::json &block_json); + static SharedPtr Deserialize(std::string_view block_str); }; export struct SegmentSnapshotInfo { @@ -70,7 +70,7 @@ export struct SegmentSnapshotInfo { Vector> block_snapshots_; nlohmann::json Serialize(); - static SharedPtr Deserialize(const nlohmann::json &segment_json); + static SharedPtr Deserialize(std::string_view segment_str); }; export struct ChunkIndexSnapshotInfo { @@ -78,14 +78,14 @@ export struct ChunkIndexSnapshotInfo { String base_name_; Vector files_; nlohmann::json Serialize(); - static SharedPtr Deserialize(const nlohmann::json &chunk_index_json); + static SharedPtr Deserialize(std::string_view chunk_index_str); }; export struct SegmentIndexSnapshotInfo { SegmentID segment_id_; Vector> chunk_index_snapshots_{}; nlohmann::json Serialize(); - static SharedPtr Deserialize(const nlohmann::json &segment_index_json); + static SharedPtr Deserialize(std::string_view segment_index_str); }; export struct TableIndexSnapshotInfo { @@ -93,7 +93,7 @@ export struct TableIndexSnapshotInfo { SharedPtr index_dir_{}; Map> index_by_segment_{}; nlohmann::json Serialize(); - static SharedPtr Deserialize(const nlohmann::json &table_index_json); + static SharedPtr Deserialize(std::string_view table_index_str); }; export struct TableSnapshotInfo : public SnapshotInfo { From 422784371b0a74d10a5757b7f6fe91b644c23cf6 Mon Sep 17 00:00:00 2001 From: Long <2262328655@qq.com> Date: Mon, 30 Jun 2025 15:34:08 +0800 Subject: [PATCH 4/9] Replace Deserialize method by simdjson. Signed-off-by: Long <2262328655@qq.com> --- src/storage/catalog/meta/chunk_index_meta.cpp | 15 +++--- .../catalog/meta/chunk_index_meta.cppm | 3 +- src/storage/definition/index_full_text.cpp | 2 +- src/storage/definition/index_full_text.cppm | 2 +- src/storage/persistence/obj_stat_accessor.cpp | 46 +++++++++++-------- .../persistence/obj_stat_accessor.cppm | 6 +-- src/storage/persistence/obj_status.cpp | 42 ++++++++--------- src/storage/persistence/obj_status.cppm | 4 +- .../persistence/persistence_manager.cpp | 16 +++---- .../persistence/persistence_manager.cppm | 4 +- 10 files changed, 71 insertions(+), 69 deletions(-) diff --git a/src/storage/catalog/meta/chunk_index_meta.cpp b/src/storage/catalog/meta/chunk_index_meta.cpp index 3d6ef780d3..b13903f848 100644 --- a/src/storage/catalog/meta/chunk_index_meta.cpp +++ b/src/storage/catalog/meta/chunk_index_meta.cpp @@ -59,11 +59,14 @@ void ChunkIndexMetaInfo::ToJson(nlohmann::json &json) const { json["index_size"] = index_size_; } -void ChunkIndexMetaInfo::FromJson(const nlohmann::json &json) { - base_name_ = json["base_name"].get(); - base_row_id_ = RowID::FromUint64(json["base_row_id"].get()); - row_cnt_ = json["row_count"].get(); - index_size_ = json["index_size"].get(); +void ChunkIndexMetaInfo::FromJson(std::string_view json_str) { + simdjson::padded_string json(json_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(json); + base_name_ = doc["base_name"].get(); + base_row_id_ = RowID::FromUint64(doc["base_row_id"].get()); + row_cnt_ = doc["row_count"].get(); + index_size_ = doc["index_size"].get(); } ChunkIndexMeta::ChunkIndexMeta(ChunkID chunk_id, SegmentIndexMeta &segment_index_meta) @@ -553,7 +556,7 @@ Status ChunkIndexMeta::LoadChunkInfo() { return s; } chunk_info_ = ChunkIndexMetaInfo(); - chunk_info_->FromJson(nlohmann::json::parse(chunk_info_str)); + chunk_info_->FromJson(chunk_info_str); return Status::OK(); } diff --git a/src/storage/catalog/meta/chunk_index_meta.cppm b/src/storage/catalog/meta/chunk_index_meta.cppm index 63a3627b2c..1627e8ed9b 100644 --- a/src/storage/catalog/meta/chunk_index_meta.cppm +++ b/src/storage/catalog/meta/chunk_index_meta.cppm @@ -39,7 +39,8 @@ export struct ChunkIndexMetaInfo { void ToJson(nlohmann::json &json) const; - void FromJson(const nlohmann::json &json); + void FromJson(std::string_view json_str); + static String IndexFileName(ChunkID chunk_id) { return fmt::format("chunk_{}.idx", chunk_id); } }; diff --git a/src/storage/definition/index_full_text.cpp b/src/storage/definition/index_full_text.cpp index 7eaf8a09ba..b1b81a4661 100644 --- a/src/storage/definition/index_full_text.cpp +++ b/src/storage/definition/index_full_text.cpp @@ -120,7 +120,7 @@ nlohmann::json IndexFullText::Serialize() const { return res; } -SharedPtr IndexFullText::Deserialize(const nlohmann::json &) { +SharedPtr IndexFullText::Deserialize(std::string_view index_def_str) { Status status = Status::NotSupport("Not implemented"); RecoverableError(status); return nullptr; diff --git a/src/storage/definition/index_full_text.cppm b/src/storage/definition/index_full_text.cppm index 4ec4de463d..f0cd15f486 100644 --- a/src/storage/definition/index_full_text.cppm +++ b/src/storage/definition/index_full_text.cppm @@ -63,7 +63,7 @@ public: virtual nlohmann::json Serialize() const override; - static SharedPtr Deserialize(const nlohmann::json &index_def_json); + static SharedPtr Deserialize(std::string_view index_def_str); bool IsRealtime() const { return FlagIsRealtime(flag_); } diff --git a/src/storage/persistence/obj_stat_accessor.cpp b/src/storage/persistence/obj_stat_accessor.cpp index aa742847fc..249d22e495 100644 --- a/src/storage/persistence/obj_stat_accessor.cpp +++ b/src/storage/persistence/obj_stat_accessor.cpp @@ -290,17 +290,19 @@ nlohmann::json ObjectStatAccessor_LocalStorage::Serialize() { return json_obj; } -void ObjectStatAccessor_LocalStorage::Deserialize(const nlohmann::json &obj) { +void ObjectStatAccessor_LocalStorage::Deserialize(std::string_view obj_str) { + simdjson::padded_string obj_json(obj_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(obj_json); std::unique_lock lock(mutex_); - SizeT len = 0; - if (obj.contains("obj_stat_size")) { - len = obj["obj_stat_size"]; - } - for (SizeT i = 0; i < len; ++i) { - auto &json_pair = obj["obj_stat_array"][i]; - String obj_key = json_pair["obj_key"]; + // SizeT len = 0; + // if (SizeT len_json; doc["obj_stat_size"].get(len_json) == simdjson::SUCCESS) { + // len = len_json; + // } + for (simdjson::array array = doc["obj_stat_array"]; auto item: array) { + String obj_key = item["obj_key"].get(); ObjStat obj_stat; - obj_stat.Deserialize(json_pair["obj_stat"]); + obj_stat.Deserialize(item["obj_stat"].raw_json()); obj_stat.cached_ = ObjCached::kCached; obj_map_.emplace(obj_key, std::move(obj_stat)); LOG_TRACE(fmt::format("Deserialize added object {}", obj_key)); @@ -316,8 +318,9 @@ void ObjectStatAccessor_LocalStorage::Deserialize(KVInstance *kv_instance) { std::unique_lock lock(mutex_); while (iter->Valid() && iter->Key().starts_with(obj_stat_prefix)) { String obj_key = iter->Key().ToString().substr(obj_stat_prefix_len); + String obj_value = iter->Value().ToString(); ObjStat obj_stat; - obj_stat.Deserialize(iter->Value().ToString()); + obj_stat.Deserialize(obj_value); obj_stat.cached_ = ObjCached::kCached; LOG_TRACE(fmt::format("Deserialize added object {}", obj_key)); obj_map_.emplace(std::move(obj_key), std::move(obj_stat)); @@ -417,17 +420,19 @@ nlohmann::json ObjectStatAccessor_ObjectStorage::Serialize() { return json_obj; } -void ObjectStatAccessor_ObjectStorage::Deserialize(const nlohmann::json &obj) { - SizeT len = 0; - if (obj.contains("obj_stat_size")) { - len = obj["obj_stat_size"]; - } +void ObjectStatAccessor_ObjectStorage::Deserialize(std::string_view obj_str) { + simdjson::padded_string obj_json(obj_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(obj_json); + // SizeT len = 0; + // if (auto item = doc["obj_stat_size"]; item.error() == simdjson::SUCCESS) { + // len = item.get(); + // } std::unique_lock lock(mutex_); - for (SizeT i = 0; i < len; ++i) { - auto &json_pair = obj["obj_stat_array"][i]; - String obj_key = json_pair["obj_key"]; + for (simdjson::array array = doc["obj_stat_array"]; auto item: array) { + String obj_key = item["obj_key"].get(); ObjStat obj_stat; - obj_stat.Deserialize(json_pair["obj_stat"]); + obj_stat.Deserialize(item["obj_stat"].raw_json()); obj_stat.cached_ = ObjCached::kNotCached; obj_map_.PutNew(obj_key, std::move(obj_stat)); LOG_TRACE(fmt::format("Deserialize added object {}", obj_key)); @@ -443,8 +448,9 @@ void ObjectStatAccessor_ObjectStorage::Deserialize(KVInstance *kv_instance) { std::unique_lock lock(mutex_); while (iter->Valid() && iter->Key().starts_with(obj_stat_prefix)) { String obj_key = iter->Key().ToString().substr(obj_stat_prefix_len); + String obj_value = iter->Value().ToString(); ObjStat obj_stat; - obj_stat.Deserialize(iter->Value().ToString()); + obj_stat.Deserialize(obj_value); obj_stat.cached_ = ObjCached::kNotCached; LOG_TRACE(fmt::format("Deserialize added object {}", obj_key)); obj_map_.PutNew(std::move(obj_key), std::move(obj_stat)); diff --git a/src/storage/persistence/obj_stat_accessor.cppm b/src/storage/persistence/obj_stat_accessor.cppm index 1c3f168936..705afe876e 100644 --- a/src/storage/persistence/obj_stat_accessor.cppm +++ b/src/storage/persistence/obj_stat_accessor.cppm @@ -97,7 +97,7 @@ public: virtual nlohmann::json Serialize() = 0; - virtual void Deserialize(const nlohmann::json &obj) = 0; + virtual void Deserialize(std::string_view obj_str) = 0; virtual void Deserialize(KVInstance *kv_instance) = 0; @@ -129,7 +129,7 @@ public: nlohmann::json Serialize() override; - void Deserialize(const nlohmann::json &obj) override; + void Deserialize(std::string_view obj_str) override; void Deserialize(KVInstance *kv_instance) override; @@ -163,7 +163,7 @@ public: nlohmann::json Serialize() override; - void Deserialize(const nlohmann::json &obj) override; + void Deserialize(std::string_view obj_str) override; void Deserialize(KVInstance *kv_instance) override; diff --git a/src/storage/persistence/obj_status.cpp b/src/storage/persistence/obj_status.cpp index 29bd66bd77..b5ce38b436 100644 --- a/src/storage/persistence/obj_status.cpp +++ b/src/storage/persistence/obj_status.cpp @@ -39,25 +39,6 @@ nlohmann::json ObjStat::Serialize() const { return obj; } -void ObjStat::Deserialize(const nlohmann::json &obj) { - ref_count_ = 0; - obj_size_ = obj["obj_size"]; - parts_ = obj["parts"]; - if (obj.contains("deleted_ranges")) { - SizeT start = 0; - SizeT end = 0; - for (auto &range_obj : obj["deleted_ranges"]) { - if (range_obj.contains("start")) { - start = range_obj["start"]; - } - if (range_obj.contains("end")) { - end = range_obj["end"]; - } - deleted_ranges_.emplace(Range{.start_ = start, .end_ = end}); - } - } -} - String ObjStat::ToString() const { nlohmann::json obj; obj["obj_size"] = obj_size_; @@ -72,9 +53,26 @@ String ObjStat::ToString() const { return obj.dump(); } -void ObjStat::Deserialize(const String &str) { - nlohmann::json obj = nlohmann::json::parse(str); - Deserialize(obj); +void ObjStat::Deserialize(std::string_view str) { + simdjson::padded_string obj_json(str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(obj_json); + ref_count_ = 0; + obj_size_ = doc["obj_size"]; + parts_ = doc["parts"]; + if (simdjson::array array; doc["deleted_ranges"].get(array) == simdjson::SUCCESS) { + SizeT start = 0; + SizeT end = 0; + for (auto range_obj : array) { + if (auto item = range_obj["start"]; item.error() == simdjson::SUCCESS) { + start = item.get(); + } + if (auto item = range_obj["end"]; item.error() == simdjson::SUCCESS) { + end = item.get(); + } + deleted_ranges_.emplace(Range{.start_ = start, .end_ = end}); + } + } } SizeT ObjStat::GetSizeInBytes() const { diff --git a/src/storage/persistence/obj_status.cppm b/src/storage/persistence/obj_status.cppm index 289256faf2..740ac7fa28 100644 --- a/src/storage/persistence/obj_status.cppm +++ b/src/storage/persistence/obj_status.cppm @@ -83,11 +83,9 @@ export struct ObjStat { nlohmann::json Serialize() const; - void Deserialize(const nlohmann::json &obj); - String ToString() const; - void Deserialize(const String &str); + void Deserialize(std::string_view str); SizeT GetSizeInBytes() const; diff --git a/src/storage/persistence/persistence_manager.cpp b/src/storage/persistence/persistence_manager.cpp index ef3aa3747e..d449e08800 100644 --- a/src/storage/persistence/persistence_manager.cpp +++ b/src/storage/persistence/persistence_manager.cpp @@ -43,15 +43,13 @@ nlohmann::json ObjAddr::Serialize() const { return obj; } -void ObjAddr::Deserialize(const nlohmann::json &obj) { - obj_key_ = obj["obj_key"]; - part_offset_ = obj["part_offset"]; - part_size_ = obj["part_size"]; -} - -void ObjAddr::Deserialize(const String &str) { - nlohmann::json obj = nlohmann::json::parse(str); - Deserialize(obj); +void ObjAddr::Deserialize(std::string_view obj_str) { + simdjson::padded_string obj_json(obj_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(obj_json); + obj_key_ = doc["obj_key"].get(); + part_offset_ = doc["part_offset"].get(); + part_size_ = doc["part_size"].get(); } SizeT ObjAddr::GetSizeInBytes() const { return sizeof(int32_t) + obj_key_.size() + sizeof(SizeT) + sizeof(SizeT); } diff --git a/src/storage/persistence/persistence_manager.cppm b/src/storage/persistence/persistence_manager.cppm index d4c423bef0..80960d48d9 100644 --- a/src/storage/persistence/persistence_manager.cppm +++ b/src/storage/persistence/persistence_manager.cppm @@ -38,9 +38,7 @@ export struct ObjAddr { nlohmann::json Serialize() const; - void Deserialize(const nlohmann::json &obj); - - void Deserialize(const String &str); + void Deserialize(std::string_view obj_str); SizeT GetSizeInBytes() const; From 69178dee4be06af448dda90833fe4e7f0f9abc8d Mon Sep 17 00:00:00 2001 From: Long <2262328655@qq.com> Date: Mon, 30 Jun 2025 19:17:16 +0800 Subject: [PATCH 5/9] Replace parse by simdjson. Signed-off-by: Long <2262328655@qq.com> --- src/storage/catalog/meta/meta_tree.cpp | 6 ++++-- src/storage/catalog/new_catalog.cpp | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/storage/catalog/meta/meta_tree.cpp b/src/storage/catalog/meta/meta_tree.cpp index 7334e93098..502ef3bc81 100644 --- a/src/storage/catalog/meta/meta_tree.cpp +++ b/src/storage/catalog/meta/meta_tree.cpp @@ -563,8 +563,10 @@ SharedPtr MetaTree::MakeMetaTree(const Vector> &met switch (meta_key->type_) { case MetaType::kPmObject: { auto pm_path_key = static_cast(meta_key.get()); - nlohmann::json pm_path_json = nlohmann::json::parse(pm_path_key->value_); - String object_key = pm_path_json["obj_key"]; + simdjson::padded_string json(pm_path_key->value_); + simdjson::parser parser; + simdjson::document doc = parser.iterate(json); + String object_key = doc["obj_key"].get(); if (object_key == "KEY_EMPTY") { continue; } diff --git a/src/storage/catalog/new_catalog.cpp b/src/storage/catalog/new_catalog.cpp index 0afdca6413..d8c1967956 100644 --- a/src/storage/catalog/new_catalog.cpp +++ b/src/storage/catalog/new_catalog.cpp @@ -402,8 +402,10 @@ Vector> NewCatalog::MakeMetaKeys() const { auto new_end = std::remove_if(meta_keys.begin(), meta_keys.end(), [&](const auto &meta_key) { if (meta_key->type_ == MetaType::kPmObject) { auto pm_path_key = static_cast(meta_key.get()); - nlohmann::json pm_path_json = nlohmann::json::parse(pm_path_key->value_); - String object_key = pm_path_json["obj_key"]; + simdjson::padded_string json(pm_path_key->value_); + simdjson::parser parser; + simdjson::document doc = parser.iterate(json); + String object_key = doc["obj_key"].get(); if (object_key == "KEY_EMPTY") { kv_instance_ptr->Delete(KeyEncode::PMObjectKey(pm_path_key->path_key_)); return true; From 57622f495476e1730e5e4c2180ae5a8018d80df0 Mon Sep 17 00:00:00 2001 From: Long <2262328655@qq.com> Date: Mon, 30 Jun 2025 20:08:03 +0800 Subject: [PATCH 6/9] Replace BuildConstantExprFromJson by simdjson. Signed-off-by: Long <2262328655@qq.com> --- src/parser/expr/fusion_expr.cpp | 146 ++++++++++++++++++++------------ 1 file changed, 92 insertions(+), 54 deletions(-) diff --git a/src/parser/expr/fusion_expr.cpp b/src/parser/expr/fusion_expr.cpp index 9a44774c83..877bd360f3 100644 --- a/src/parser/expr/fusion_expr.cpp +++ b/src/parser/expr/fusion_expr.cpp @@ -41,79 +41,118 @@ void FusionExpr::JobAfterParser() { } } -std::shared_ptr BuildConstantExprFromJson(const nlohmann::json &json_object) { - switch (json_object.type()) { - case nlohmann::json::value_t::boolean: { +std::shared_ptr BuildConstantExprFromJson(std::string_view json_str) { + simdjson::padded_string json(json_str); + simdjson::ondemand::parser parser; + simdjson::ondemand::document doc = parser.iterate(json); + switch (doc.type()) { + case simdjson::ondemand::json_type::boolean: { auto res = std::make_shared(LiteralType::kBoolean); - res->bool_value_ = json_object.get(); + res->bool_value_ = doc.get(); return res; } - case nlohmann::json::value_t::number_unsigned: - case nlohmann::json::value_t::number_integer: { - auto res = std::make_shared(LiteralType::kInteger); - res->integer_value_ = json_object.get(); - return res; - } - case nlohmann::json::value_t::number_float: { - auto res = std::make_shared(LiteralType::kDouble); - res->double_value_ = json_object.get(); - return res; + case simdjson::ondemand::json_type::number: { + simdjson::ondemand::number num = doc.get_number(); + switch (num.get_number_type()) { + case simdjson::ondemand::number_type::signed_integer: + case simdjson::ondemand::number_type::unsigned_integer: { + auto res = std::make_shared(LiteralType::kInteger); + res->integer_value_ = (int64_t)num; + return res; + } + case simdjson::ondemand::number_type::floating_point_number: { + auto res = std::make_shared(LiteralType::kDouble); + res->double_value_ = (double)num; + return res; + } + default: { + const auto error_info = fmt::format("Unrecognized json object type in number"); + ParserError(error_info); + return nullptr; + } + } } - case nlohmann::json::value_t::string: { + case simdjson::ondemand::json_type::string: { auto res = std::make_shared(LiteralType::kString); - auto str = json_object.get(); - res->str_value_ = strdup(json_object.get().c_str()); + auto str = doc.get(); + res->str_value_ = strdup(((std::string)doc.get()).c_str()); return res; } - case nlohmann::json::value_t::array: { - const uint32_t array_size = json_object.size(); + case simdjson::ondemand::json_type::array: { + const uint32_t array_size = doc.count_elements(); if (array_size == 0) { const auto error_info = "Empty json array!"; ParserError(error_info); return nullptr; } - switch (json_object[0].type()) { - case nlohmann::json::value_t::boolean: - case nlohmann::json::value_t::number_unsigned: - case nlohmann::json::value_t::number_integer: { - auto res = std::make_shared(LiteralType::kIntegerArray); - res->long_array_.resize(array_size); - for (uint32_t i = 0; i < array_size; ++i) { - res->long_array_[i] = json_object[i].get(); + std::vector json_strs(array_size); + std::vector values(array_size); + for (size_t index = 0; auto field : doc.get_array()) { + values[index] = field.value(); + json_strs[index++] = values[index].raw_json(); + } + switch (values[0].type()) { + case simdjson::ondemand::json_type::boolean: + case simdjson::ondemand::json_type::number: { + std::vector nums(array_size); + for (size_t index = 0; auto item : values) { + nums[index++] = item.get_number(); } - return res; - } - case nlohmann::json::value_t::number_float: { - auto res = std::make_shared(LiteralType::kDoubleArray); - res->double_array_.resize(array_size); - for (uint32_t i = 0; i < array_size; ++i) { - res->double_array_[i] = json_object[i].get(); + switch (nums[0].get_number_type()) { + case simdjson::ondemand::number_type::signed_integer: + case simdjson::ondemand::number_type::unsigned_integer: { + auto res = std::make_shared(LiteralType::kIntegerArray); + res->long_array_.resize(array_size); + for (uint32_t i = 0; i < array_size; ++i) { + res->long_array_[i] = (int64_t)nums[i]; + } + return res; + } + case simdjson::ondemand::number_type::floating_point_number: { + auto res = std::make_shared(LiteralType::kDoubleArray); + res->double_array_.resize(array_size); + for (uint32_t i = 0; i < array_size; ++i) { + res->double_array_[i] = (double)nums[i]; + } + return res; + } + default: { + const auto error_info = fmt::format("Unrecognized json object type in array"); + ParserError(error_info); + return nullptr; + } } - return res; } - case nlohmann::json::value_t::array: { + case simdjson::ondemand::json_type::array: { auto res = std::make_shared(LiteralType::kSubArrayArray); res->sub_array_array_.resize(array_size); for (uint32_t i = 0; i < array_size; ++i) { - res->sub_array_array_[i] = BuildConstantExprFromJson(json_object[i]); + res->sub_array_array_[i] = BuildConstantExprFromJson(json_strs[i]); } return res; } default: { - const auto error_info = fmt::format("Unrecognized json object type in array: {}", json_object.type_name()); + const auto error_info = fmt::format("Unrecognized json object type in array"); ParserError(error_info); return nullptr; } } } - case nlohmann::json::value_t::object: { + case simdjson::ondemand::json_type::object: { std::shared_ptr res = nullptr; - for (auto iter = json_object.begin(); iter != json_object.end(); ++iter) { - int64_t key = std::stoll(iter.key()); - const auto &value_obj = iter.value(); - switch (value_obj.type()) { - case nlohmann::json::value_t::number_unsigned: - case nlohmann::json::value_t::number_integer: { + for (auto field : doc.get_object()) { + int64_t field_key = std::stoll(std::string((std::string_view)field.unescaped_key())); + auto field_value = field.value(); + if (doc.type() != simdjson::ondemand::json_type::number) { + const auto error_info = fmt::format("Unrecognized json object type in array"); + ParserError(error_info); + return nullptr; + } + + simdjson::ondemand::number num = field_value.get_number(); + switch (num.get_number_type()) { + case simdjson::ondemand::number_type::signed_integer: + case simdjson::ondemand::number_type::unsigned_integer: { if (res.get() == nullptr) { res = std::make_shared(LiteralType::kLongSparseArray); } else if (res->literal_type_ != LiteralType::kLongSparseArray) { @@ -121,11 +160,11 @@ std::shared_ptr BuildConstantExprFromJson(const nlohmann::json &js ParserError(error_info); return nullptr; } - res->long_sparse_array_.first.push_back(key); - res->long_sparse_array_.second.push_back(value_obj.get()); + res->long_sparse_array_.first.push_back(field_key); + res->long_sparse_array_.second.push_back((int64_t)num); break; } - case nlohmann::json::value_t::number_float: { + case simdjson::ondemand::number_type::floating_point_number: { if (res.get() == nullptr) { res = std::make_shared(LiteralType::kDoubleSparseArray); } else if (res->literal_type_ != LiteralType::kDoubleSparseArray) { @@ -133,12 +172,12 @@ std::shared_ptr BuildConstantExprFromJson(const nlohmann::json &js ParserError(error_info); return nullptr; } - res->double_sparse_array_.first.push_back(key); - res->double_sparse_array_.second.push_back(value_obj.get()); + res->double_sparse_array_.first.push_back(field_key); + res->double_sparse_array_.second.push_back((double)num); break; } default: { - const auto error_info = fmt::format("Unrecognized json object type in array: {}", json_object.type_name()); + const auto error_info = fmt::format("Unrecognized json object type in array"); ParserError(error_info); return nullptr; } @@ -147,7 +186,7 @@ std::shared_ptr BuildConstantExprFromJson(const nlohmann::json &js return res; } default: { - const auto error_info = fmt::format("Unrecognized json object type: {}", json_object.type_name()); + const auto error_info = fmt::format("Unrecognized json object type"); ParserError(error_info); return nullptr; } @@ -181,8 +220,7 @@ std::unique_ptr GetFusionMatchTensorExpr(SearchOptions &search_ column_expr->names_.emplace_back(column_name); auto cast_column_expr = static_cast(column_expr.release()); match_tensor_expr->SetSearchColumn(cast_column_expr); - const auto json_obj = nlohmann::json::parse(search_tensor); - const auto tensor_expr = BuildConstantExprFromJson(json_obj); + const auto tensor_expr = BuildConstantExprFromJson(search_tensor); match_tensor_expr->SetQueryTensorStr(std::move(tensor_data_type), tensor_expr.get()); return match_tensor_expr; } From 3730bd5e120dc20aef8b4ccf8907527483095a75 Mon Sep 17 00:00:00 2001 From: Long <2262328655@qq.com> Date: Tue, 1 Jul 2025 16:33:06 +0800 Subject: [PATCH 7/9] Replace RankFeaturesAnalyzer::AnalyzeImpl by simdjson. Signed-off-by: Long <2262328655@qq.com> --- src/common/analyzer/rank_features_analyzer.cpp | 16 +++++++++------- src/common/third_party.cppm | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/common/analyzer/rank_features_analyzer.cpp b/src/common/analyzer/rank_features_analyzer.cpp index c99bdf0b9a..b6cf64ebac 100644 --- a/src/common/analyzer/rank_features_analyzer.cpp +++ b/src/common/analyzer/rank_features_analyzer.cpp @@ -25,19 +25,21 @@ import third_party; namespace infinity { int RankFeaturesAnalyzer::AnalyzeImpl(const Term &input, void *data, HookType func) { - nlohmann::json line_json = nlohmann::json::parse(input.text_); + simdjson::padded_string json(input.text_); + simdjson::parser parser; + simdjson::document doc = parser.iterate(json); u32 offset = 0; - for (const auto &element : line_json) { - if (element.is_object()) { - for (auto it = element.begin(); it != element.end(); ++it) { - std::string key = it.key(); - float value = it.value(); + for (auto element : doc.get_array()) { + auto item = element.value(); + if (item.type() == simdjson::json_type::object) { + for (auto field : item.get_object()) { + std::string_view key = field.unescaped_key(); + float value = field.value().get(); u16 weight = SmallFloat::Float122ToUInt16(value); func(data, key.data(), key.size(), offset++, 0, false, weight); } } } - return 0; } diff --git a/src/common/third_party.cppm b/src/common/third_party.cppm index 8f467d6fb9..b63996b73a 100644 --- a/src/common/third_party.cppm +++ b/src/common/third_party.cppm @@ -180,6 +180,7 @@ export using ondemand::document; export using ondemand::object; export using ondemand::array; export using ondemand::value; +export using ondemand::json_type; } namespace magic_enum { From 6a7f6c263c1e7815f407adbf869471814511dbaa Mon Sep 17 00:00:00 2001 From: Long <2262328655@qq.com> Date: Tue, 1 Jul 2025 16:34:10 +0800 Subject: [PATCH 8/9] Replace fulltext_benchmark by simdjson. --- .../local_infinity/fulltext/fulltext_benchmark.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp b/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp index c6c2110033..a4cd44a2fe 100644 --- a/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp +++ b/benchmark/local_infinity/fulltext/fulltext_benchmark.cpp @@ -62,12 +62,14 @@ void ReadJsonl(std::ifstream &input_file, SizeT lines_to_read, Vector(val_str); + assert(error == simdjson::SUCCESS); char *val_buf = (char *)malloc(val_str.length() + 1); memcpy(val_buf, val_str.data(), val_str.length()); val_buf[val_str.length()] = '\0'; From fcd390fc001e1287cdf11fbcb4718260e20770ce Mon Sep 17 00:00:00 2001 From: Long <2262328655@qq.com> Date: Tue, 1 Jul 2025 19:01:32 +0800 Subject: [PATCH 9/9] Replace PhysicalImport by simdjson. Signed-off-by: Long <2262328655@qq.com> --- src/common/third_party.cppm | 2 + src/executor/operator/physical_import.cpp | 341 ++++++++++++--------- src/executor/operator/physical_import.cppm | 6 +- src/network/http/http_search.cpp | 2 +- src/network/http_server.cpp | 6 +- 5 files changed, 213 insertions(+), 144 deletions(-) diff --git a/src/common/third_party.cppm b/src/common/third_party.cppm index b63996b73a..554c274e48 100644 --- a/src/common/third_party.cppm +++ b/src/common/third_party.cppm @@ -180,7 +180,9 @@ export using ondemand::document; export using ondemand::object; export using ondemand::array; export using ondemand::value; +export using ondemand::number; export using ondemand::json_type; +export using ondemand::number_type; } namespace magic_enum { diff --git a/src/executor/operator/physical_import.cpp b/src/executor/operator/physical_import.cpp index d4a67ec386..8517273602 100644 --- a/src/executor/operator/physical_import.cpp +++ b/src/executor/operator/physical_import.cpp @@ -520,12 +520,11 @@ void PhysicalImport::NewImportJSONL(QueryContext *query_context, ImportOperatorS if (!stream_reader->ReadLine(json_str)) { break; } - nlohmann::json line_json = nlohmann::json::parse(json_str); if (!import_ctx->CheckInit()) { import_ctx->Init(); } - JSONLRowHandler(line_json, import_ctx->GetColumnVectors()); + JSONLRowHandler(json_str, import_ctx->GetColumnVectors()); import_ctx->AddRowCnt(); if (import_ctx->CheckFull()) { @@ -540,47 +539,44 @@ void PhysicalImport::NewImportJSONL(QueryContext *query_context, ImportOperatorS } void PhysicalImport::NewImportJSON(QueryContext *query_context, ImportOperatorState *import_op_state, Vector> &data_blocks) { - nlohmann::json json_arr; - { - auto [file_handle, status] = VirtualStore::Open(file_path_, FileAccessMode::kRead); - if (!status.ok()) { - UnrecoverableError(status.message()); - } - - i64 file_size = file_handle->FileSize(); - if (file_size == -1) { - UnrecoverableError("Can't get file size"); - } - String json_str(file_size, 0); - auto [read_n, status_read] = file_handle->Read(json_str.data(), file_size); - if (!status_read.ok()) { - UnrecoverableError(status_read.message()); - } - if ((i64)read_n != file_size) { - String error_message = fmt::format("Read file size {} doesn't match with file size {}.", read_n, file_size); - UnrecoverableError(error_message); - } - - if (read_n == 0) { - auto result_msg = MakeUnique(fmt::format("Empty JSON file, IMPORT 0 Rows")); - import_op_state->result_msg_ = std::move(result_msg); - return; - } - - json_arr = nlohmann::json::parse(json_str); + auto [file_handle, status] = VirtualStore::Open(file_path_, FileAccessMode::kRead); + if (!status.ok()) { + UnrecoverableError(status.message()); + } + i64 file_size = file_handle->FileSize(); + if (file_size == -1) { + UnrecoverableError("Can't get file size"); + } + String json_str(file_size, 0); + auto [read_n, status_read] = file_handle->Read(json_str.data(), file_size); + if (!status_read.ok()) { + UnrecoverableError(status_read.message()); } - if (!json_arr.is_array()) { + if ((i64)read_n != file_size) { + String error_message = fmt::format("Read file size {} doesn't match with file size {}.", read_n, file_size); + UnrecoverableError(error_message); + } + if (read_n == 0) { + auto result_msg = MakeUnique(fmt::format("Empty JSON file, IMPORT 0 Rows")); + import_op_state->result_msg_ = std::move(result_msg); + return; + } + + simdjson::padded_string json_arr(json_str); + simdjson::parser parser; + simdjson::document doc = parser.iterate(json_arr); + if (doc.type() != simdjson::json_type::array) { auto result_msg = MakeUnique(fmt::format("Invalid json format, IMPORT 0 rows")); import_op_state->result_msg_ = std::move(result_msg); return; } auto import_ctx = MakeUnique(table_info_->column_defs_); - for (const auto &json_entry : json_arr) { + for (auto element : doc.get_array()) { if (!import_ctx->CheckInit()) { import_ctx->Init(); } - JSONLRowHandler(json_entry, import_ctx->GetColumnVectors()); + JSONLRowHandler(element.value().raw_json(), import_ctx->GetColumnVectors()); import_ctx->AddRowCnt(); if (import_ctx->CheckFull()) { @@ -671,82 +667,126 @@ void PhysicalImport::NewCSVRowHandler(void *context_raw_ptr) { } } -SharedPtr BuildConstantExprFromJson(const nlohmann::json &json_object) { - switch (json_object.type()) { - case nlohmann::json::value_t::boolean: { +SharedPtr BuildConstantExprFromJson(std::string_view object_sv) { + simdjson::padded_string json_str(object_sv); + simdjson::parser parser; + simdjson::document doc = parser.iterate(json_str); + switch (doc.type()) { + case simdjson::json_type::boolean: { auto res = MakeShared(LiteralType::kBoolean); - res->bool_value_ = json_object.get(); - return res; - } - case nlohmann::json::value_t::number_unsigned: - case nlohmann::json::value_t::number_integer: { - auto res = MakeShared(LiteralType::kInteger); - res->integer_value_ = json_object.get(); + res->bool_value_ = doc.get(); return res; } - case nlohmann::json::value_t::number_float: { - auto res = MakeShared(LiteralType::kDouble); - res->double_value_ = json_object.get(); - return res; + case simdjson::json_type::number: { + simdjson::number num = doc.get_number(); + switch (num.get_number_type()) { + case simdjson::number_type::unsigned_integer: + case simdjson::number_type::signed_integer: { + auto res = MakeShared(LiteralType::kInteger); + res->integer_value_ = (i64)num; + return res; + } + case simdjson::number_type::floating_point_number: { + auto res = MakeShared(LiteralType::kDouble); + res->double_value_ = (double)num; + return res; + } + default: { + const auto error_info = fmt::format("Unrecognized object number type"); + RecoverableError(Status::ImportFileFormatError(error_info)); + return nullptr; + } + } } - case nlohmann::json::value_t::string: { + case simdjson::json_type::string: { auto res = MakeShared(LiteralType::kString); - const auto str = json_object.get(); + const String str = doc.get(); res->str_value_ = strdup(str.c_str()); return res; } - case nlohmann::json::value_t::array: { - const u32 array_size = json_object.size(); + case simdjson::json_type::array: { + const u32 array_size = doc.count_elements(); if (array_size == 0) { const auto error_info = "Empty json array!"; RecoverableError(Status::ImportFileFormatError(error_info)); return nullptr; } - switch (json_object[0].type()) { - case nlohmann::json::value_t::boolean: - case nlohmann::json::value_t::number_unsigned: - case nlohmann::json::value_t::number_integer: { - auto res = MakeShared(LiteralType::kIntegerArray); - res->long_array_.resize(array_size); - for (u32 i = 0; i < array_size; ++i) { - res->long_array_[i] = json_object[i].get(); + std::vector json_strs(array_size); + std::vector values(array_size); + for (size_t index = 0; auto field : doc.get_array()) { + values[index] = field.value(); + json_strs[index++] = values[index].raw_json(); + } + switch (values[0].type()) { + case simdjson::json_type::boolean: + case simdjson::json_type::number: { + std::vector nums(array_size); + for (size_t index = 0; auto item : values) { + nums[index++] = item.get_number(); } - return res; - } - case nlohmann::json::value_t::number_float: { - auto res = MakeShared(LiteralType::kDoubleArray); - res->double_array_.resize(array_size); - for (u32 i = 0; i < array_size; ++i) { - res->double_array_[i] = json_object[i].get(); + switch (nums[0].get_number_type()) { + case simdjson::number_type::unsigned_integer: + case simdjson::number_type::signed_integer: { + auto res = MakeShared(LiteralType::kIntegerArray); + res->long_array_.resize(array_size); + for (u32 i = 0; i < array_size; ++i) { + res->long_array_[i] = (i64)nums[i]; + } + return res; + } + case simdjson::number_type::floating_point_number: { + auto res = MakeShared(LiteralType::kDoubleArray); + res->double_array_.resize(array_size); + for (u32 i = 0; i < array_size; ++i) { + res->double_array_[i] = (double)nums[i]; + } + return res; + } + default: { + const auto error_info = fmt::format("Unrecognized object number type"); + RecoverableError(Status::ImportFileFormatError(error_info)); + return nullptr; + } } - return res; } - case nlohmann::json::value_t::array: { + case simdjson::json_type::array: { auto res = MakeShared(LiteralType::kSubArrayArray); res->sub_array_array_.resize(array_size); for (u32 i = 0; i < array_size; ++i) { - res->sub_array_array_[i] = BuildConstantExprFromJson(json_object[i]); + res->sub_array_array_[i] = BuildConstantExprFromJson(json_strs[i]); } return res; } default: { - const auto error_info = fmt::format("Unrecognized json object type in array: {}", json_object.type_name()); + const auto error_info = fmt::format("Unrecognized json object type in array"); RecoverableError(Status::ImportFileFormatError(error_info)); return nullptr; } } } - case nlohmann::json::value_t::object: { - if (json_object.size() == 1 && json_object.begin().key() == "array") { - const auto &array_obj = json_object.begin().value(); - if (array_obj.type() != nlohmann::json::value_t::array) { - const auto error_info = fmt::format("Unrecognized json object type in array: {}, expect array!", array_obj.type_name()); + case simdjson::json_type::object: { + const u32 array_size = doc.count_fields(); + if (array_size != 1) { + const auto error_info = fmt::format("Unrecognized json object size: Expacted 1, but got {}", array_size); + RecoverableError(Status::ImportFileFormatError(error_info)); + return nullptr; + } + for (auto obj : doc.get_object()) { + String key = String((std::string_view)obj.unescaped_key()); + if (key != "array") { + const auto error_info = fmt::format("Unrecognized json key name: Expacted array, but got {}", key); + RecoverableError(Status::ImportFileFormatError(error_info)); + return nullptr; + } + auto value = obj.value(); + if (value.type() != simdjson::json_type::array) { + const auto error_info = fmt::format("Unrecognized json object type in array"); RecoverableError(Status::ImportFileFormatError(error_info)); return nullptr; } auto res = MakeShared(LiteralType::kCurlyBracketsArray); - for (const auto &elem : array_obj) { - auto elem_expr = BuildConstantExprFromJson(elem); + for (auto elem : value) { + auto elem_expr = BuildConstantExprFromJson(elem.raw_json()); if (!elem_expr) { RecoverableError(Status::ImportFileFormatError("Failed to build expr for element of array!")); return nullptr; @@ -754,21 +794,17 @@ SharedPtr BuildConstantExprFromJson(const nlohmann::json &json_obj res->curly_brackets_array_.push_back(std::move(elem_expr)); } return res; - } else { - const auto error_info = fmt::format("Unrecognized json object type: {}", json_object.type_name()); - RecoverableError(Status::ImportFileFormatError(error_info)); - return nullptr; } } default: { - const auto error_info = fmt::format("Unrecognized json object type: {}", json_object.type_name()); + const auto error_info = fmt::format("Unrecognized json object type"); RecoverableError(Status::ImportFileFormatError(error_info)); return nullptr; } } } -SharedPtr BuildConstantSparseExprFromJson(const nlohmann::json &json_object, const SparseInfo *sparse_info) { +SharedPtr BuildConstantSparseExprFromJson(std::string_view object_sv, const SparseInfo *sparse_info) { SharedPtr res = nullptr; switch (sparse_info->DataType()) { case EmbeddingDataType::kElemBit: { @@ -796,63 +832,91 @@ SharedPtr BuildConstantSparseExprFromJson(const nlohmann::json &js return nullptr; } } - if (json_object.size() == 0) { - return res; - } - switch (json_object.type()) { - case nlohmann::json::value_t::array: { - const u32 array_size = json_object.size(); - switch (json_object[0].type()) { - case nlohmann::json::value_t::number_unsigned: - case nlohmann::json::value_t::number_integer: { + simdjson::padded_string json(object_sv); + simdjson::parser parser; + simdjson::document doc = parser.iterate(json); + switch (doc.type()) { + case simdjson::json_type::array: { + const u32 array_size = doc.count_elements(); + if (array_size == 0) { + return res; + } + std::vector json_strs(array_size); + std::vector values(array_size); + for (size_t index = 0; auto field : doc.get_array()) { + values[index] = field.value(); + json_strs[index++] = values[index].raw_json(); + } + if (values[0].type() != simdjson::json_type::number) { + const auto error_info = fmt::format("Unrecognized json object type"); + RecoverableError(Status::ImportFileFormatError(error_info)); + return nullptr; + } + std::vector nums(array_size); + for (size_t index = 0; auto item : values) { + nums[index++] = item.get_number(); + } + switch (nums[0].get_number_type()) { + case simdjson::number_type::unsigned_integer: + case simdjson::number_type::signed_integer: { res->long_array_.resize(array_size); for (u32 i = 0; i < array_size; ++i) { - res->long_array_[i] = json_object[i].get(); + res->long_array_[i] = (i64)nums[i]; } return res; } default: { - const auto error_info = fmt::format("Unrecognized json object type in array: {}", json_object.type_name()); + const auto error_info = fmt::format("Unrecognized json object type in array"); RecoverableError(Status::ImportFileFormatError(error_info)); return nullptr; } } } - case nlohmann::json::value_t::object: { + case simdjson::json_type::object: { + const u32 object_size = doc.count_fields(); + if (object_size == 0) { + return res; + } HashSet key_set; - for (auto iter = json_object.begin(); iter != json_object.end(); ++iter) { - i64 key = std::stoll(iter.key()); - auto [_, insert_ok] = key_set.insert(key); + for (auto field : doc.get_object()) { + i64 field_key = std::stoll(String((std::string_view)field.unescaped_key())); + auto field_value = field.value(); + auto [_, insert_ok] = key_set.insert(field_key); if (!insert_ok) { - const auto error_info = fmt::format("Duplicate key {} in sparse array!", key); + const auto error_info = fmt::format("Duplicate key {} in sparse array!", field_key); RecoverableError(Status::ImportFileFormatError(error_info)); return nullptr; } + if (field_value.type() != simdjson::json_type::number) { + const auto error_info = fmt::format("Unrecognized json object type in array"); + RecoverableError(Status::ImportFileFormatError(error_info)); + return nullptr; + } + + simdjson::number num = field_value.get_number(); if (res->literal_type_ == LiteralType::kLongSparseArray) { - const auto &value_obj = iter.value(); - switch (value_obj.type()) { - case nlohmann::json::value_t::number_unsigned: - case nlohmann::json::value_t::number_integer: { - res->long_sparse_array_.first.push_back(key); - res->long_sparse_array_.second.push_back(value_obj.get()); + switch (num.get_number_type()) { + case simdjson::number_type::unsigned_integer: + case simdjson::number_type::signed_integer: { + res->long_sparse_array_.first.push_back(field_key); + res->long_sparse_array_.second.push_back((i64)num); break; } default: { - const auto error_info = fmt::format("Unrecognized json object type in array: {}", json_object.type_name()); + const auto error_info = fmt::format("Unrecognized json object type in array"); RecoverableError(Status::ImportFileFormatError(error_info)); return nullptr; } } } else { - const auto &value_obj = iter.value(); - switch (value_obj.type()) { - case nlohmann::json::value_t::number_float: { - res->double_sparse_array_.first.push_back(key); - res->double_sparse_array_.second.push_back(value_obj.get()); + switch (num.get_number_type()) { + case simdjson::number_type::floating_point_number: { + res->double_sparse_array_.first.push_back(field_key); + res->double_sparse_array_.second.push_back((double)num); break; } default: { - const auto error_info = fmt::format("Unrecognized json object type in array: {}", json_object.type_name()); + const auto error_info = fmt::format("Unrecognized json object type in array"); RecoverableError(Status::ImportFileFormatError(error_info)); return nullptr; } @@ -862,64 +926,67 @@ SharedPtr BuildConstantSparseExprFromJson(const nlohmann::json &js return res; } default: { - const auto error_info = fmt::format("Unrecognized json object type: {}", json_object.type_name()); + const auto error_info = fmt::format("Unrecognized json object type"); RecoverableError(Status::ImportFileFormatError(error_info)); return nullptr; } } } -void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vector> &column_vectors) { +void PhysicalImport::JSONLRowHandler(std::string_view line_sv, Vector> &column_vectors) { + simdjson::padded_string json_str(line_sv); + simdjson::parser parser; + simdjson::document doc = parser.iterate(json_str); for (SizeT i = 0; auto &column_vector_ptr : column_vectors) { ColumnVector &column_vector = *column_vector_ptr; const ColumnDef *column_def = table_info_->GetColumnDefByIdx(i++); - if (line_json.contains(column_def->name_)) { + if (simdjson::value val; doc[column_def->name_].get(val) == simdjson::SUCCESS) { switch (column_vector.data_type()->type()) { case LogicalType::kBoolean: { - bool v = line_json[column_def->name_]; + bool v = val.get(); column_vector.AppendByPtr(reinterpret_cast(&v)); break; } case LogicalType::kTinyInt: { - i8 v = line_json[column_def->name_]; + i8 v = val.get(); column_vector.AppendByPtr(reinterpret_cast(&v)); break; } case LogicalType::kSmallInt: { - i16 v = line_json[column_def->name_]; + i16 v = val.get(); column_vector.AppendByPtr(reinterpret_cast(&v)); break; } case LogicalType::kInteger: { - i32 v = line_json[column_def->name_]; + i32 v = val.get(); column_vector.AppendByPtr(reinterpret_cast(&v)); break; } case LogicalType::kBigInt: { - i64 v = line_json[column_def->name_]; + i64 v = val.get(); column_vector.AppendByPtr(reinterpret_cast(&v)); break; } case LogicalType::kFloat16: { - float v = line_json[column_def->name_]; + float v = val.get(); Float16T float16_v(v); column_vector.AppendByPtr(reinterpret_cast(&float16_v)); break; } case LogicalType::kBFloat16: { - float v = line_json[column_def->name_]; + float v = val.get(v); BFloat16T bfloat16_v(v); column_vector.AppendByPtr(reinterpret_cast(&bfloat16_v)); break; } case LogicalType::kFloat: { - float v = line_json[column_def->name_]; + float v = val.get(v); column_vector.AppendByPtr(reinterpret_cast(&v)); break; } case LogicalType::kDouble: { - double v = line_json[column_def->name_]; + double v = val.get(v); column_vector.AppendByPtr(reinterpret_cast(&v)); break; } @@ -928,7 +995,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vectorname_].get(); + std::string_view str_view = doc[column_def->name_]; column_vector.AppendByStringView(str_view); break; } @@ -937,7 +1004,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, VectorDimension(); switch (embedding_info->Type()) { case EmbeddingDataType::kElemBit: { - const auto i8_embedding = line_json[column_def->name_].get>(); + const Vector i8_embedding = doc[column_def->name_].get>(); const SizeT embedding_dim = i8_embedding.size(); if (embedding_dim != dim) { Status status = Status::InvalidJsonFormat( @@ -958,7 +1025,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vectorname_].get>(); + const Vector embedding = doc[column_def->name_].get>(); SizeT embedding_dim = embedding.size(); if (embedding_dim != dim) { Status status = Status::InvalidJsonFormat( @@ -969,7 +1036,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vectorname_].get>(); + const Vector embedding = doc[column_def->name_].get>(); SizeT embedding_dim = embedding.size(); if (embedding_dim != dim) { Status status = Status::InvalidJsonFormat( @@ -980,7 +1047,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vectorname_].get>(); + const Vector embedding = doc[column_def->name_].get>(); SizeT embedding_dim = embedding.size(); if (embedding_dim != dim) { Status status = Status::InvalidJsonFormat( @@ -991,7 +1058,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vectorname_].get>(); + const Vector embedding = doc[column_def->name_].get>(); SizeT embedding_dim = embedding.size(); if (embedding_dim != dim) { Status status = Status::InvalidJsonFormat( @@ -1002,7 +1069,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vectorname_].get>(); + const Vector embedding = doc[column_def->name_].get>(); SizeT embedding_dim = embedding.size(); if (embedding_dim != dim) { Status status = Status::InvalidJsonFormat( @@ -1013,7 +1080,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vectorname_].get>(); + const Vector f_embedding = doc[column_def->name_].get>(); SizeT embedding_dim = f_embedding.size(); if (embedding_dim != dim) { Status status = Status::InvalidJsonFormat( @@ -1028,7 +1095,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vectorname_].get>(); + const Vector f_embedding = doc[column_def->name_].get>(); SizeT embedding_dim = f_embedding.size(); if (embedding_dim != dim) { Status status = Status::InvalidJsonFormat( @@ -1043,7 +1110,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vectorname_].get>(); + const Vector embedding = doc[column_def->name_].get>(); SizeT embedding_dim = embedding.size(); if (embedding_dim != dim) { Status status = Status::InvalidJsonFormat( @@ -1054,7 +1121,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vectorname_].get>(); + const Vector embedding = doc[column_def->name_].get>(); SizeT embedding_dim = embedding.size(); if (embedding_dim != dim) { Status status = Status::InvalidJsonFormat( @@ -1077,7 +1144,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vector const_expr = BuildConstantExprFromJson(line_json[column_def->name_]); + SharedPtr const_expr = BuildConstantExprFromJson(doc[column_def->name_].raw_json()); if (const_expr.get() == nullptr) { RecoverableError(Status::ImportFileFormatError("Invalid json object.")); } @@ -1086,7 +1153,7 @@ void PhysicalImport::JSONLRowHandler(const nlohmann::json &line_json, Vector(column_vector.data_type()->type_info().get()); - SharedPtr const_expr = BuildConstantSparseExprFromJson(line_json[column_def->name_], sparse_info); + SharedPtr const_expr = BuildConstantSparseExprFromJson(doc[column_def->name_].raw_json(), sparse_info); const_expr->TrySortSparseVec(column_def); if (const_expr.get() == nullptr) { RecoverableError(Status::ImportFileFormatError("Invalid json object.")); diff --git a/src/executor/operator/physical_import.cppm b/src/executor/operator/physical_import.cppm index 7fb1186398..1ad9cbdbd3 100644 --- a/src/executor/operator/physical_import.cppm +++ b/src/executor/operator/physical_import.cppm @@ -94,7 +94,7 @@ private: static void NewCSVRowHandler(void *); - void JSONLRowHandler(const nlohmann::json &line_json, Vector> &column_vectors); + void JSONLRowHandler(std::string_view line_json, Vector> &column_vectors); void ParquetValueHandler(const SharedPtr &array, ColumnVector &column_vector, u64 value_idx); @@ -109,7 +109,7 @@ private: char delimiter_{','}; }; -export SharedPtr BuildConstantExprFromJson(const nlohmann::json &json_object); -export SharedPtr BuildConstantSparseExprFromJson(const nlohmann::json &json_object, const SparseInfo *sparse_info); +export SharedPtr BuildConstantExprFromJson(std::string_view json_object); +export SharedPtr BuildConstantSparseExprFromJson(std::string_view json_object, const SparseInfo *sparse_info); } // namespace infinity \ No newline at end of file diff --git a/src/network/http/http_search.cpp b/src/network/http/http_search.cpp index a6991af28a..dce736e02f 100644 --- a/src/network/http/http_search.cpp +++ b/src/network/http/http_search.cpp @@ -1158,7 +1158,7 @@ UniquePtr HTTPSearch::ParseMatchTensor(const nlohmann::json &js return nullptr; } try { - tensor_expr = BuildConstantExprFromJson(field_json_obj.value()); + tensor_expr = BuildConstantExprFromJson(field_json_obj.value().dump()); } catch (std::exception &e) { response["error_code"] = ErrorCode::kInvalidExpression; response["error_message"] = fmt::format("Invalid query_tensor, error info: {}", e.what()); diff --git a/src/network/http_server.cpp b/src/network/http_server.cpp index 9cd3e955d2..76873f17d7 100644 --- a/src/network/http_server.cpp +++ b/src/network/http_server.cpp @@ -208,12 +208,12 @@ infinity::Status ParseColumnDefs(const nlohmann::json &fields, Vectortype()) { case LogicalType::kSparse: { - default_expr = BuildConstantSparseExprFromJson(field_element["default"], + default_expr = BuildConstantSparseExprFromJson(field_element["default"].dump(), dynamic_cast(column_type->type_info().get())); break; } default: { - default_expr = BuildConstantExprFromJson(field_element["default"]); + default_expr = BuildConstantExprFromJson(field_element["default"].dump()); break; } } @@ -1188,7 +1188,7 @@ class InsertHandler final : public HttpRequestHandler { if (value.size() == 1 && value.begin().key() == "array") { SharedPtr array_expr; try { - auto array_result = BuildConstantExprFromJson(value); + auto array_result = BuildConstantExprFromJson(value.dump()); if (!array_result) { throw std::runtime_error("Empty return value!"); }