8000 Update python SDK by JinHai-CN · Pull Request #1592 · infiniflow/infinity · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Update python SDK #1592

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions example/fulltext_search_zh.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@

import infinity

"""
Checkout https://github.com/infiniflow/resource.git under /var/infinity (defined by 'resource_dir' of config file). The jieba dict is
/var/infinity/resource/jieba/dict/jieba.dict.utf8
"""

try:
# open a local directory to store the data
infinity_instance = infinity.connect("/var/infinity")
Expand Down
46 changes: 23 additions & 23 deletions python/infinity/local_infinity/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def get_database(self, db_name: str):

def create_table(self, db_name: str, table_name: str, column_defs: list[WrapColumnDef],
conflict_type: ConflictType = ConflictType.kError, properties: list = None):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
create_table_options = WrapCreateTableOptions()
create_table_options.conflict_type = conflict_type
Expand All @@ -93,56 +93,56 @@ def create_table(self, db_name: str, table_name: str, column_defs: list[WrapColu
create_table_options))

def drop_table(self, db_name: str, table_name: str, conflict_type: ConflictType = ConflictType.kError):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
drop_table_options = DropTableOptions()
drop_table_options.conflict_type = conflict_type
return self.convert_res(self.client.DropTable(db_name, table_name, drop_table_options))

def get_table(self, db_name: str, table_name: str):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.GetTable(db_name, table_name))

def list_tables(self, db_name: str):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.ListTables(db_name), has_table_names=True)

def show_table(self, db_name: str, table_name: str):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.ShowTable(db_name, table_name))

def create_index(self, db_name: str, table_name: str, index_name: str, index_info_list: list[WrapIndexInfo],
conflict_type: ConflictType = ConflictType.kError):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
create_index_options = CreateIndexOptions()
create_index_options.conflict_type = conflict_type
return self.convert_res(
self.client.CreateIndex(db_name, table_name, index_name, index_info_list, create_index_options))

def show_index(self, db_name: str, table_name: str, index_name: str):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.ShowIndex(db_name, table_name, index_name))

def list_indexes(self, db_name: str, table_name: str):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.ListTableIndexes(db_name, table_name))

def drop_index(self, db_name: str, table_name: str, index_name: str,
conflict_type: ConflictType = ConflictType.kError):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
drop_index_options = DropIndexOptions()
drop_index_options.conflict_type = conflict_type
return self.convert_res(self.client.DropIndex(db_name, table_name, index_name, drop_index_options))

def insert(self, db_name: str, table_name: str, column_names: list[str], fields):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
retry = 0
inner_ex = None
Expand All @@ -156,18 +156,18 @@ def insert(self, db_name: str, table_name: str, column_names: list[str], fields)
return PyErrorCode.TOO_MANY_CONNECTIONS, "insert failed with exception: " + str(inner_ex)

def import_data(self, db_name: str, table_name: str, file_name: str, import_options):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.Import(db_name, table_name, file_name, import_options))

def export_data(self, db_name: str, table_name: str, file_name: str, export_options, columns: list[str]):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.Export(db_name, table_name, columns, file_name, export_options))

def select(self, db_name: str, table_name: str, select_list: list[WrapParsedExpr], search_expr,
where_expr, limit_expr, offset_expr, group_by_list=None):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.Search(db_name, table_name, select_list,
wrap_search_expr=search_expr, where_expr=where_expr,
Expand All @@ -176,54 +176,54 @@ def select(self, db_name: str, table_name: str, select_list: list[WrapParsedExpr

def explain(self, db_name: str, table_name: str, explain_type, select_list, search_expr,
where_expr, group_by_list, limit_expr, offset_expr):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.Explain(db_name, table_name, explain_type, select_list,
search_expr, where_expr),
has_result_data=True)

def delete(self, db_name: str, table_name: str, where_expr):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.Delete(db_name, table_name, where_expr))

def update(self, db_name: str, table_name: str, where_expr, update_expr_array):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.Update(db_name, table_name, where_expr, update_expr_array))

def show_tables(self, db_name: str):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.ShowTables(db_name), has_result_data=True)

def show_columns(self, db_name: str, table_name: str):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.ShowColumns(db_name, table_name), has_result_data=True)

def show_segments(self, db_name: str, table_name: str):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.ShowSegments(db_name, table_name))

def show_segment(self, db_name: str, table_name: str, segment_id: int):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.ShowSegment(db_name, table_name, segment_id))

def show_blocks(self, db_name: str, table_name: str, segment_id: int):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.ShowBlocks(db_name, table_name, segment_id))

def show_block(self, db_name: str, table_name: str, segment_id: int, block_id: int):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.ShowBlock(db_name, table_name, segment_id, block_id))

def show_block_column(self, db_name: str, table_name: str, segment_id: int, block_id: int, column_id: int):
if self.client == None:
if self.client is None:
raise Exception("Local infinity is not connected")
return self.convert_res(self.client.ShowBlockColumn(db_name, table_name, segment_id, block_id, column_id))

Expand Down
78 changes: 45 additions & 33 deletions python/infinity/local_infinity/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,23 @@ def get_ordinary_info(column_info, column_defs, column_name, index):
proto_column_type.logical_type = LogicalType.kTinyInt
case "int16":
proto_column_type.logical_type = LogicalType.kSmallInt
case "int32" | "int" | "integer":
case "int32":
proto_column_type.logical_type = LogicalType.kInteger
case "int":
proto_column_type.logical_type = LogicalType.kInteger
case "integer":
proto_column_type.logical_type = LogicalType.kInteger
case "int64":
proto_column_type.logical_type = LogicalType.kBigInt
case "int128":
proto_column_type.logical_type = LogicalType.kHugeInt
case "float" | "float32":
case "float":
proto_column_type.logical_type = LogicalType.kFloat
case "float32":
proto_column_type.logical_type = LogicalType.kFloat
case "double" | "float64":
case "double":
proto_column_type.logical_type = LogicalType.kDouble
case "float64":
proto_column_type.logical_type = LogicalType.kDouble
case "float16":
proto_column_type.logical_type = LogicalType.kFloat16
Expand Down Expand Up @@ -124,38 +132,42 @@ def get_embedding_info(column_info, column_defs, column_name, index):
proto_column_def.column_name = column_name
column_type = WrapDataType()

if column_big_info[0] == "vector":
column_type.logical_type = LogicalType.kEmbedding
elif column_big_info[0] == "tensor":
column_type.logical_type = LogicalType.kTensor
elif column_big_info[0] == "tensorarray":
column_type.logical_type = LogicalType.kTensorArray
else:
raise InfinityException(ErrorCode.FTS_INDEX_NOT_EXIST, f"Unknown column type: {column_big_info[0]}")
match column_big_info[0]:
case "vector":
column_type.logical_type = LogicalType.kEmbedding
case "tensor":
column_type.logical_type = LogicalType.kTensor
case "tensorarray":
column_type.logical_type = LogicalType.kTensorArray
case _:
raise InfinityException(ErrorCode.INVALID_DATA_TYPE, f"Unknown data type: {column_big_info[0]}")

embedding_type = WrapEmbeddingType()
if element_type == "bit":
embedding_type.element_type = EmbeddingDataType.kElemBit
elif element_type == "float32" or element_type == "float":
embedding_type.element_type = EmbeddingDataType.kElemFloat
elif element_type == "float64" or element_type == "double":
embedding_type.element_type = EmbeddingDataType.kElemDouble
elif element_type == "float16":
embedding_type.element_type = EmbeddingDataType.kElemFloat16
elif element_type == "bfloat16":
embedding_type.element_type = EmbeddingDataType.kElemBFloat16
elif element_type == "uint8":
embedding_type.element_type = EmbeddingDataType.kElemUInt8
elif element_type == "int8":
embedding_type.element_type = EmbeddingDataType.kElemInt8
elif element_type == "int16":
embedding_type.element_type = EmbeddingDataType.kElemInt16
elif element_type == "int32" or element_type == "int":
embedding_type.element_type = EmbeddingDataType.kElemInt32
elif element_type == "int64":
embedding_type.element_type = EmbeddingDataType.kElemInt64
else:
raise InfinityException(ErrorCode.INVALID_EMBEDDING_DATA_TYPE, f"Unknown element type: {element_type}")

match element_type:
case "bit":
embedding_type.element_type = EmbeddingDataType.kElemBit
case "float32" | "float":
embedding_type.element_type = EmbeddingDataType.kElemFloat
case "float64" | "double":
embedding_type.element_type = EmbeddingDataType.kElemDouble
case "float16":
embedding_type.element_type = EmbeddingDataType.kElemFloat16
case "bfloat16":
embedding_type.element_type = EmbeddingDataType.kElemBFloat16
case "uint8":
embedding_type.element_type = EmbeddingDataType.kElemUInt8
case "int8":
embedding_type.element_type = EmbeddingDataType.kElemInt8
case "int16":
embedding_type.element_type = EmbeddingDataType.kElemInt16
case "int32" | "int":
embedding_type.element_type = EmbeddingDataType.kElemInt32
case "int64":
embedding_type.element_type = EmbeddingDataType.kElemInt64
case _:
raise InfinityException(ErrorCode.INVALID_EMBEDDING_DATA_TYPE, f"Unknown element type: {element_type}")

embedding_type.dimension = int(length)
assert isinstance(embedding_type, WrapEmbeddingType)
assert embedding_type.element_type is not None
Expand Down
2 changes: 1 addition & 1 deletion python/infinity/remote_thrift/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def get_ordinary_info(column_info, column_defs, column_name, index):
proto_column_type.logic_type = ttypes.LogicType.TinyInt
case "int16":
proto_column_type.logic_type = ttypes.LogicType.SmallInt
case "int32" | "int" | "integer":
case "integer" | "int32" | "int":
proto_column_type.logic_type = ttypes.LogicType.Integer
case "int64":
proto_column_type.logic_type = ttypes.LogicType.BigInt
Expand Down
6 changes: 6 additions & 0 deletions python/test_pysdk/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,18 @@
parent = os.path.join(os.getcwd(), os.pardir)
pparent = os.path.join(parent, os.pardir)
local_infinity_path = os.path.abspath(pparent)
current_python_path = os.path.abspath(pparent) + '/python'

print(current_path, local_infinity_path)

if local_infinity_path in sys.path:
sys.path.remove(local_infinity_path)

if current_python_path in sys.path:
sys.path.remove(current_python_path)

print(sys.path)

import infinity
import pytest
from infinity.errors import ErrorCode
Expand Down
12 changes: 9 additions & 3 deletions src/executor/operator/physical_export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,9 +385,15 @@ SizeT PhysicalExport::ExportToFVECS(QueryContext *query_context, ExportOperatorS
}

EmbeddingInfo *embedding_type_info = static_cast<EmbeddingInfo *>(data_type->type_info().get());
if (embedding_type_info->Type() != EmbeddingDataType::kElemFloat) {
Status status = Status::NotSupport("Only float element type embedding is supported now.");
RecoverableError(status);
switch(embedding_type_info->Type()) {
case kElemFloat: {
// Supported
break;
}
default: {
Status status = Status::NotSupport(fmt::format("Type: {}, only float element type embedding is supported now", EmbeddingType::EmbeddingDataType2String(embedding_type_info->Type())));
RecoverableError(status);
}
}

i32 dimension = embedding_type_info->Dimension();
Expand Down
Loading
Loading
0