8000 Limit the embedding dimension to 65536 by JinHai-CN · Pull Request #1547 · infiniflow/infinity · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Limit the embedding dimension to 65536 #1547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions python/test/cases/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ def test_insert(self):
self.test_infinity_obj._test_insert_tensor_array()


@pytest.mark.parametrize("types", ["vector,65535,int", "vector,65535,float"])
@pytest.mark.parametrize("types_examples", [[{"c1": [1] * 65535}],
[{"c1": [4] * 65535}],
[{"c1": [-9999999] * 65535}],
[{"c1": [1.1] * 65535}],
[{"c1": [-9999999.988] * 65535}],
@pytest.mark.parametrize("types", ["vector,16384,int", "vector,16384,float"])
@pytest.mark.parametrize("types_examples", [[{"c1": [1] * 16384}],
[{"c1": [4] * 16384}],
[{"c1": [-9999999] * 16384}],
[{"c1": [1.1] * 16384}],
[{"c1": [-9999999.988] * 16384}],
])
def test_insert_big_embedding_various_type(self, types, types_examples):
self.test_infinity_obj._test_insert_big_embedding_various_type(types, types_examples)
Expand Down
2 changes: 1 addition & 1 deletion python/test/cases/test_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def test_with_various_index_knn_distance_combination(self, check_data, index_col
def test_zero_dimension_vector(self):
self.test_infinity_obj._test_zero_dimension_vector()

@pytest.mark.parametrize("dim", [1000, 10000, 100000, 200000])
@pytest.mark.parametrize("dim", [1000, 16384])
def test_big_dimension_vector(self, dim):
self.test_infinity_obj._test_big_dimension_vector(dim)

Expand Down
20 changes: 10 additions & 10 deletions python/test/internal/test_insert.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,16 +359,16 @@ def _test_insert_big_embedding(self):
"""
db_obj = self.infinity_obj.get_database("default_db")
db_obj.drop_table("test_insert_big_embedding", ConflictType.Ignore)
table_obj = db_obj.create_table("test_insert_big_embedding", {"c1": {"type": "vector,65535,int"}},
table_obj = db_obj.create_table("test_insert_big_embedding", {"c1": {"type": "vector,16384,int"}},
ConflictType.Error)
assert table_obj
res = table_obj.insert([{"c1": [1] * 65535}])
res = table_obj.insert([{"c1": [1] * 16384}])
assert res.error_code == ErrorCode.OK
res = table_obj.insert([{"c1": [4] * 65535}])
res = table_obj.insert([{"c1": [4] * 16384}])
assert res.error_code == ErrorCode.OK
res = table_obj.insert([{"c1": [7] * 65535}])
res = table_obj.insert([{"c1": [7] * 16384}])
assert res.error_code == ErrorCode.OK
res = table_obj.insert([{"c1": [-9999999] * 65535}])
res = table_obj.insert([{"c1": [-9999999] * 16384}])
assert res.error_code == ErrorCode.OK

res = db_obj.drop_table(
Expand All @@ -384,16 +384,16 @@ def _test_insert_big_embedding_float(self):
db_obj = self.infinity_obj.get_database("default_db")
db_obj.drop_table("test_insert_big_embedding_float",
ConflictType.Ignore)
table_obj = db_obj.create_table("test_insert_big_embedding_float", {"c1": {"type": "vector,65535,float"}},
table_obj = db_obj.create_table("test_insert_big_embedding_float", {"c1": {"type": "vector,16384,float"}},
ConflictType.Error)
assert table_obj
res = table_obj.insert([{"c1": [1] * 65535}])
res = table_obj.insert([{"c1": [1] * 16384}])
assert res.error_code == ErrorCode.OK
res = table_obj.insert([{"c1": [-9999999] * 65535}])
res = table_obj.insert([{"c1": [-9999999] * 16384}])
assert res.error_code == ErrorCode.OK
res = table_obj.insert([{"c1": [1.1] * 65535}])
res = table_obj.insert([{"c1": [1.1] * 16384}])
assert res.error_code == ErrorCode.OK
res = table_obj.insert([{"c1": [-9999999.988] * 65535}])
res = table_obj.insert([{"c1": [-9999999.988] * 16384}])
assert res.error_code == ErrorCode.OK

res = db_obj.drop_table(
Expand Down
2 changes: 2 additions & 0 deletions python/test/internal/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,8 @@ def _test_create_or_drop_same_table_in_different_thread(self):
# wait all threads finished
concurrent.futures.wait(futures)

db_obj.drop_table(table_name, ConflictType.Ignore)

# create empty column table
def _test_create_empty_column_table(self):
"""
2 changes: 1 addition & 1 deletion src/common/default_values.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ export {
constexpr i64 MAX_VARCHAR_SIZE = 65536;
constexpr i64 MAX_BLOB_SIZE = 65536L * 65536L;
constexpr i64 MAX_BITMAP_SIZE = 65536;
constexpr i64 EMBEDDING_LIMIT = 65536;
constexpr i64 EMBEDDING_LIMIT = 16384;
constexpr auto PG_MSG_BUFFER_SIZE = 4096u;

// column vector related constants
Expand Down
20 changes: 16 additions & 4 deletions src/planner/logical_planner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ import create_index_info;
import create_collection_info;
import create_view_info;
import drop_collection_info;
import embedding_info;
import type_info;
import drop_index_info;
import drop_schema_info;
import drop_table_info;
Expand Down Expand Up @@ -464,7 +466,8 @@ Status LogicalPlanner::BuildCreateTable(const CreateStatement *statement, Shared
}
}

switch (create_table_info->column_defs_[idx]->type()->type()) {
const DataType* data_type = create_table_info->column_defs_[idx]->type().get();
switch (data_type->type()) {
case LogicalType::kBoolean:
case LogicalType::kTinyInt:
case LogicalType::kSmallInt:
Expand All @@ -478,15 +481,24 @@ Status LogicalPlanner::BuildCreateTable(const CreateStatement *statement, Shared
case LogicalType::kDate:
case LogicalType::kTime:
case LogicalType::kTimestamp:
case LogicalType::kDateTime:
case LogicalType::kEmbedding:
case LogicalType::kDateTime: {
break;
}
case LogicalType::kEmbedding: {
TypeInfo* type_info_ptr = data_type->type_info().get();
EmbeddingInfo* embedding_info = static_cast<EmbeddingInfo*>(type_info_ptr);
if(embedding_info->Dimension() > EMBEDDING_LIMIT) {
return Status::NotSupport(fmt::format("Embedding data limit is {}, which larger than limit {}", embedding_info->Dimension(), EMBEDDING_LIMIT));
}
break;
}
case LogicalType::kTensor:
case LogicalType::kTensorArray:
case LogicalType::kSparse: {
break;
}
default: {
return Status::NotSupport(fmt::format("Not supported data type: {}", create_table_info->column_defs_[idx]->type()->ToString()));
return Status::NotSupport(fmt::format("Not supported data type: {}", data_type->ToString()));
}
}

Expand Down
Loading
0