From 55410e3661bc3ffa966a4aa2ad75a2d3cdf7dbdc Mon Sep 17 00:00:00 2001 From: writinwaters Date: Tue, 23 Jul 2024 21:12:46 +0800 Subject: [PATCH 01/12] minor --- docs/references/pysdk_api_reference.md | 88 +++++++++++++++++--------- 1 file changed, 59 insertions(+), 29 deletions(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index 8142000b14..453e620f16 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -524,49 +524,77 @@ res.index_names #['my_index'] ## insert -**Table.insert(*data*)** +```python +Table.insert(data) +``` -Insert records into the current table. +Inserts records (rows) of data into the current table. ### Parameters -- **data** : list - a list of dict which contains information of a record, and must be in line with the table schama. - - dict - - key: **column name :str** - - value: ***str, int, float, list(vector)*** +**data** : `json`, *Required* +Data to insert. Infinity supports inserting multiple rows to a table at one time in the form of `json` (one record) or `json` list (multiple records), with each key-value pair corresponding to a column name and table cell value. + +- key: column name in `str` format. +- value: `str`, `int`, `varchar`, `float`, or `list(vector)` ### Returns - Success: `True` - Failure: `Exception` +:::note +When inserting incomplete records of data, ensure that all uninserted columns have default values. Otherwise, an error will occur. +::: + ### Examples + ```python -table_obj.insert({"profile": [1.1, 2.2, 3.3], "age": 30, "c3": "Michael"}) +# Insert one row to the table +table_obj.insert({"c1": [1.1, 2.2, 3.3], "c2": 30, "c3": "Michael"}) +``` +```python +# Insert three rows of column c1, or a column c1 to the table table_obj.insert([{"c1": [1.1, 2.2, 3.3]}, {"c1": [4.4, 5.5, 6.6]}, {"c1": [7.7, 8.8, 9.9]}]) ``` ## import_data -**Table.import_data(*filepath, import_options = None*)** +```python +Table.import_data(filepath, import_options = None) +``` -Imports data from a file into the table. +Imports data from a specified file into the current table. ### Parameters -- **file_path : str** -- **options : dict** a dict which could contain three fields, 'file_type', 'delimiter' and 'header'. If these are not specifyed in the passing parameters, default value is 'csv', ',' and False repectively. - - file_type: str - - `'csv'` (default) - - `'fvecs'` - - `'json'` - - `'jsonl'` - - delimiter : `str` - used to decode csv file (defalut: `','`) - - header : bool - specify whether the csv file has header(defalut: `False`) +#### file_path: `str`, *Required* + +Absolute path to the file for export. Supported file types include: +- `csv` +- `json` +- `jsonl` + +#### import_options: `json` + +Example: `{"header":True, "delimiter": "\t", file_type}` + +- **header**: `bool` + Whether to display table header or not. Works with **.csv** files only: + - `True`: Display table header. + - `False`: (Default) Do not display table header. + +- **delimiter**: `str`, *Optional*, Defaults to "," + Delimiter to separate columns. Works with **.csv** files only. + +- **file_type**: `str`, *Required* + The type of the imported file. Supported file types include: + - `csv` + - `json` + - `jsonl` + + ### Returns @@ -589,7 +617,7 @@ Exports the current table to a specified file. ### Parameters -#### file_path: `str` *Required* +#### file_path: `str`, *Required* Absolute path to the file for export. Supported file types include: @@ -598,29 +626,31 @@ Absolute path to the file for export. Supported file types include: #### export_options: `json` -- **header**: `bool` *Optional* +Example: `{"header": False, "delimiter": "\t", "file_type": "jsonl", "offset": 2, "limit": 5}` + +- **header**: `bool`, *Optional* Whether to display table header or not. Works with **.csv** files only: - `True`: Display table header. - `False`: (Default) Do not display table header. -- **delimiter**: `str` *Optional* Defaults to "," +- **delimiter**: `str`, *Optional*, Defaults to "," Delimiter to separate columns. Works with **.csv** files only. -- **file_type**: `str` *Required* +- **file_type**: `str`, *Required* The type of the exported file. Supported file types include: - `csv` - `jsonl` -- **offset**: `int` *Optional* +- **offset**: `int`, *Optional* Index specifying the starting row for export. Usually used in conjunction with `limit`. If not specified, the file export starts from the first row. -- **limit**: `int` *Optional* +- **limit**: `int`, *Optional* The maximum number of rows to export. Usually used in conjunction with `offset`. If the table's row count exceeds `offset` + `limit`, the excess rows are excluded from the export. -- **row_limit**: `int` *Optional* +- **row_limit**: `int`, *Optional* Used when you have a large table and need to break the output file into multiple parts. This argument sets the row limit for each part. If you specify **test_export_file.csv** as the file name, the exported files will be named **test_export_file.csv**, **test_export_file.csv.part1**, **test_export_file.csv.part2**, and so one. -#### columns: `[str]` *Optional* +#### columns: `[str]`, *Optional* Columns to export to the output file, for example, `["num", "name", "score"]`. If not specified, the entire table is exported. From d782e149b6d5cd9f4fd3767beaf81e4a28a8a949 Mon Sep 17 00:00:00 2001 From: writinwaters Date: Wed, 24 Jul 2024 10:47:17 +0800 Subject: [PATCH 02/12] Updates per feedback --- docs/references/pysdk_api_reference.md | 33 +++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index 453e620f16..1f40c512fc 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -538,15 +538,36 @@ Data to insert. Infinity supports inserting multiple rows to a table at one time - key: column name in `str` format. - value: `str`, `int`, `varchar`, `float`, or `list(vector)` +:::tip NOTE +Bath row limit: 8,192. You are allowed to insert a maximum of 8,192 rows at once. +::: + +:::note +When inserting incomplete records of data, ensure that all uninserted columns have default values. Otherwise, an error will occur. + +```python +table_instance = db_instance.create_table( + "my_table", + { + "c1": {"type": "int"}, + "c2": { + "type": "int", + "default": 18, + }, + "c3": { + "type": "varchar", + "default": "A" + }, + }, + ) +``` +::: + ### Returns - Success: `True` - Failure: `Exception` -:::note -When inserting incomplete records of data, ensure that all uninserted columns have default values. Otherwise, an error will occur. -::: - ### Examples ```python @@ -562,7 +583,7 @@ table_obj.insert([{"c1": [1.1, 2.2, 3.3]}, {"c1": [4.4, 5.5, 6.6]}, {"c1": [7.7, ## import_data ```python -Table.import_data(filepath, import_options = None) +Table.import_data(filepath, import_options) ``` Imports data from a specified file into the current table. @@ -610,7 +631,7 @@ table_obj.import_data(test_csv_dir, None) ## export_data ```python -Table.export_data(filepath, export_options = None, columns = None) +Table.export_data(filepath, export_options, columns = None) ``` Exports the current table to a specified file. From 70df548d7ce89295b87df11596634178d60957b9 Mon Sep 17 00:00:00 2001 From: writinwaters Date: Wed, 24 Jul 2024 10:50:16 +0800 Subject: [PATCH 03/12] minor format updates --- docs/references/pysdk_api_reference.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index 1f40c512fc..a456a0df0a 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -545,7 +545,7 @@ Bath row limit: 8,192. You are allowed to insert a maximum of 8,192 rows at once :::note When inserting incomplete records of data, ensure that all uninserted columns have default values. Otherwise, an error will occur. -```python +```python {7,11} table_instance = db_instance.create_table( "my_table", { From 4fa0c804afe3043c198b1834873618742317fa35 Mon Sep 17 00:00:00 2001 From: writinwaters Date: Wed, 24 Jul 2024 14:34:18 +0800 Subject: [PATCH 04/12] Added examples. --- docs/references/pysdk_api_reference.md | 59 +++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index a456a0df0a..7a6baf4bb4 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -535,15 +535,20 @@ Inserts records (rows) of data into the current table. **data** : `json`, *Required* Data to insert. Infinity supports inserting multiple rows to a table at one time in the form of `json` (one record) or `json` list (multiple records), with each key-value pair corresponding to a column name and table cell value. -- key: column name in `str` format. -- value: `str`, `int`, `varchar`, `float`, or `list(vector)` +- key: Column name in `str` format. +- value: Table cell value. Supported data types include: + - Primitive: `Union(int8, int16, int32, int, int64, float, float32, double, float64, bool, varchar)` + - Vector in `Union(list[int], list[float], list[float32])` + - Sparse vector: `{"indices": list[int], "values": Union(list[int], list[float], list[float32])}` + - Tensor: `Union(list[int], list[float], list[float32, np.ndarray[int], np.ndarray[float], np.ndarray[float32]])` + - Tensor array: `Union(list[int], list[float], list[float32, np.ndarray[int], np.ndarray[float], np.ndarray[float32]])` :::tip NOTE Bath row limit: 8,192. You are allowed to insert a maximum of 8,192 rows at once. ::: :::note -When inserting incomplete records of data, ensure that all uninserted columns have default values. Otherwise, an error will occur. +When inserting incomplete records of data, ensure that all uninserted columns have default values when calling `create_table`. Otherwise, an error will occur. ```python {7,11} table_instance = db_instance.create_table( @@ -571,13 +576,53 @@ table_instance = db_instance.create_table( ### Examples ```python -# Insert one row to the table -table_obj.insert({"c1": [1.1, 2.2, 3.3], "c2": 30, "c3": "Michael"}) +# Insert one row +table_obj.insert({"c1": 1, "vec": [1.1, 2.2, 3.3]}) ``` ```python -# Insert three rows of column c1, or a column c1 to the table -table_obj.insert([{"c1": [1.1, 2.2, 3.3]}, {"c1": [4.4, 5.5, 6.6]}, {"c1": [7.7, 8.8, 9.9]}]) +# Insert three rows of column vec, or a column vec to the table +table_obj.insert([{"vec": [1.1, 2.2, 3.3]}, {"vec": [4.4, 5.5, 6.6]}, {"vec": [7.7, 8.8, 9.9]}]) +``` + +```python +# Insert three rows +table_instance.insert( + [ + { + "num": 1, + "vec": {"indices": [10, 20, 30], "values": [1.1, 2.2, 3.3]} + }, + { + "num": 2, + "vec": {"indices": [40, 50, 60], "values": [4.4, 5.5, 6.6]} + }, + { + "num": 3, + "vec": {"indices": [70, 80, 90], "values": [7.7, 8.8, 9.9]} + }, + ] +) +``` + +```python +# Insert three rows +table_instance.insert( + [ + { + "num": 1, + "vec": {"indices": [10, 20, 30], "values": [1.1, 2.2, 3.3]} + }, + { + "num": 2, + "vec": {"indices": [40, 50, 60], "values": [4.4, 5.5, 6.6]} + }, + { + "num": 3, + "vec": {"indices": [70, 80, 90], "values": [7.7, 8.8, 9.9]} + }, + ] +) ``` ## import_data From 246832488652b0c425517a188ad6af9bce616161 Mon Sep 17 00:00:00 2001 From: writinwaters Date: Wed, 24 Jul 2024 14:49:18 +0800 Subject: [PATCH 05/12] Updated supported data types --- docs/references/pysdk_api_reference.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index 7a6baf4bb4..bc4e1ad719 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -538,10 +538,10 @@ Data to insert. Infinity supports inserting multiple rows to a table at one time - key: Column name in `str` format. - value: Table cell value. Supported data types include: - Primitive: `Union(int8, int16, int32, int, int64, float, float32, double, float64, bool, varchar)` - - Vector in `Union(list[int], list[float], list[float32])` - - Sparse vector: `{"indices": list[int], "values": Union(list[int], list[float], list[float32])}` - - Tensor: `Union(list[int], list[float], list[float32, np.ndarray[int], np.ndarray[float], np.ndarray[float32]])` - - Tensor array: `Union(list[int], list[float], list[float32, np.ndarray[int], np.ndarray[float], np.ndarray[float32]])` + - Vector in `Union(list[float], list[float32])` + - Sparse vector: `{"indices": Union(list[int], list[int8] list[uint8], "values": Union(list[float], list[float32])}` + - Tensor: `Union(list[float], list[float32], np.ndarray[float], np.ndarray[float32])` + - Tensor array: `Union(list[np.ndarray[float]], list[np.ndarray[float32]])` :::tip NOTE Bath row limit: 8,192. You are allowed to insert a maximum of 8,192 rows at once. From 49d20656214133689c226755525564ea0e02f6c7 Mon Sep 17 00:00:00 2001 From: writinwaters Date: Wed, 24 Jul 2024 17:26:59 +0800 Subject: [PATCH 06/12] Added examples --- docs/references/pysdk_api_reference.md | 125 ++++++++++++++++--------- 1 file changed, 79 insertions(+), 46 deletions(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index bc4e1ad719..5853538395 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -535,37 +535,13 @@ Inserts records (rows) of data into the current table. **data** : `json`, *Required* Data to insert. Infinity supports inserting multiple rows to a table at one time in the form of `json` (one record) or `json` list (multiple records), with each key-value pair corresponding to a column name and table cell value. -- key: Column name in `str` format. -- value: Table cell value. Supported data types include: - - Primitive: `Union(int8, int16, int32, int, int64, float, float32, double, float64, bool, varchar)` - - Vector in `Union(list[float], list[float32])` - - Sparse vector: `{"indices": Union(list[int], list[int8] list[uint8], "values": Union(list[float], list[float32])}` - - Tensor: `Union(list[float], list[float32], np.ndarray[float], np.ndarray[float32])` - - Tensor array: `Union(list[np.ndarray[float]], list[np.ndarray[float32]])` - :::tip NOTE -Bath row limit: 8,192. You are allowed to insert a maximum of 8,192 rows at once. +Batch row limit: 8,192. You are allowed to insert a maximum of 8,192 rows at once. ::: :::note -When inserting incomplete records of data, ensure that all uninserted columns have default values when calling `create_table`. Otherwise, an error will occur. - -```python {7,11} -table_instance = db_instance.create_table( - "my_table", - { - "c1": {"type": "int"}, - "c2": { - "type": "int", - "default": 18, - }, - "c3": { - "type": "varchar", - "default": "A" - }, - }, - ) -``` +When inserting incomplete records of data, ensure that all uninserted columns have default values when calling `create_table`. Otherwise, an error will occur. +For information about setting default column values, see `create_table`. ::: ### Returns @@ -575,56 +551,113 @@ table_instance = db_instance.create_table( ### Examples -```python -# Insert one row -table_obj.insert({"c1": 1, "vec": [1.1, 2.2, 3.3]}) +#### Insert primitives + +```python {12,14} +# Create a table with four primitive columns: +table_instance = db_instance.create_table("primitive_table", { + "c1": {"type": "integer"}, + "c2": {"type": "varchar"}, + "c3": {"type": "float"}, + "c4": { + "type": "bool", + "default": False, + }, +}) +# Insert a complete record (row) into the table: +table_instance.insert("c1": 1, "c2": "Tom", "c3": 90.5, "c4": True) +# Insert an incomplete record (row), with the "c4" column defaulting to False: +table_instance.insert("c1": 2, "c2": "Jeffery", "C3": 88.0) ``` +#### Insert vectors + ```python -# Insert three rows of column vec, or a column vec to the table -table_obj.insert([{"vec": [1.1, 2.2, 3.3]}, {"vec": [4.4, 5.5, 6.6]}, {"vec": [7.7, 8.8, 9.9]}]) +# Create a table with a 3-d vector column "cvc": +table_instance = db_instance.create_table("vector_table", { + "c1": { + "type": "integer", + "default": 2024, + }, + "cvc": {"type": "vector,3,float"}, +}) +# Insert one complete record (row) into the table: +table_obj.insert({"c1": 2023, "cvc": [1.1, 2.2, 3.3]}) +# Insert three rows into the vector column "cvc", with the "c1" column defaulting to 2024: +table_obj.insert([{"cvc": [1.1, 2.2, 3.3]}, {"cvc": [4.4, 5.5, 6.6]}, {"cvc": [7.7, 8.8, 9.9]}]) ``` +#### Insert sparse vectors ```python -# Insert three rows +# Create a table with a 100-d sparse vector column "csp": +table_instance = db_instance.create_table("sparse_vector_table", { + "c1": { + "type": "integer", + "default": 2024, + }, + "csp": {"type": "sparse, 100,float,int"} +}) + +# Insert three rows into the table: +# `indices` specifies the correspoing indices to the values in `values`. +# Note that the third row sets "c1" as 2024 by default. table_instance.insert( [ { - "num": 1, - "vec": {"indices": [10, 20, 30], "values": [1.1, 2.2, 3.3]} + "c1": 2022, + "csp": {"indices": [10, 20, 30], "values": [1.1, 2.2, 3.3]} }, { - "num": 2, - "vec": {"indices": [40, 50, 60], "values": [4.4, 5.5, 6.6]} + "c1": 2023, + "csp": {"indices": [40, 50, 60], "values": [4.4, 5.5, 6.6]} }, { - "num": 3, - "vec": {"indices": [70, 80, 90], "values": [7.7, 8.8, 9.9]} + "csp": {"indices": [70, 80, 90], "values": [7.7, 8.8, 9.9]} }, ] ) ``` +#### Insert tensors + ```python -# Insert three rows +# Create a table with a tensor column "cts": +table_instance = db_instance.create_table("tensor_table", { + "c1": { + "type": "integer", + "default": 2024, + } + "cts": {"type": "tensor,4,float"} +}) +# Insert three rows into the tensor column "cts", with the "c1" column defaulting to 2024: table_instance.insert( [ { - "num": 1, - "vec": {"indices": [10, 20, 30], "values": [1.1, 2.2, 3.3]} + "cts": [[1.0, 0.0, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]], }, { - "num": 2, - "vec": {"indices": [40, 50, 60], "values": [4.4, 5.5, 6.6]} + "cts": [[4.0, 0.0, 4.3, 4.5], [4.0, 4.2, 4.4, 5.0]], }, { - "num": 3, - "vec": {"indices": [70, 80, 90], "values": [7.7, 8.8, 9.9]} + "cts": [[0.9, 0.1, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]], }, ] ) ``` +#### Insert tensor arrays + +```python +# Creat a table with only one tensor array column "cta": +table_instance = db_instance.create_table("tensor_array_table", { + "cta": { + "type": "tensorarray,2,int" + } +}) + +table_instance.insert([{"cta": [[[1, 2], [3, 4]], [[5, 6]]]}]) +``` + ## import_data ```python From f37471f7975f1faa50fde63dd12be0ab3758887d Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Wed, 24 Jul 2024 18:05:17 +0800 Subject: [PATCH 07/12] Update INSERT API Signed-off-by: Jin Hai --- docs/references/pysdk_api_reference.md | 126 +++++++++---------------- 1 file changed, 44 insertions(+), 82 deletions(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index 5853538395..f0912b0138 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -528,19 +528,19 @@ res.index_names #['my_index'] Table.insert(data) ``` -Inserts records (rows) of data into the current table. +Inserts rows (rows) of data into the current table. ### Parameters **data** : `json`, *Required* -Data to insert. Infinity supports inserting multiple rows to a table at one time in the form of `json` (one record) or `json` list (multiple records), with each key-value pair corresponding to a column name and table cell value. +Data to insert. Infinity supports inserting multiple rows to a table at one time in the form of `json` (one record) or `json` list (multiple rows), with each key-value pair corresponding to a column name and table cell value. :::tip NOTE Batch row limit: 8,192. You are allowed to insert a maximum of 8,192 rows at once. ::: :::note -When inserting incomplete records of data, ensure that all uninserted columns have default values when calling `create_table`. Otherwise, an error will occur. +When inserting incomplete rows of data, ensure that all uninserted columns have default values when calling `create_table`. Otherwise, an error will occur. For information about setting default column values, see `create_table`. ::: @@ -556,106 +556,68 @@ For information about setting default column values, see `create_table`. ```python {12,14} # Create a table with four primitive columns: table_instance = db_instance.create_table("primitive_table", { - "c1": {"type": "integer"}, - "c2": {"type": "varchar"}, - "c3": {"type": "float"}, - "c4": { - "type": "bool", - "default": False, - }, + "c1": {"type": "int8"}, + "c2": {"type": "int16"}, + "c3": {"type": "int"}, + "c4": {"type": "int32"}, # Same as int + "c5": {"type": "integer"}, # Same as int + "c6": {"type": "int64"}, + "c7": {"type": "varchar"}, + "c8": {"type": "float"}, + "c8": {"type": "float32"}, # Same as float + "c8": {"type": "double"}, + "c8": {"type": "float64"}, # Same as double + "c9": {"type": "bool", "default": False}, }) -# Insert a complete record (row) into the table: + +# Insert a complete row into the table: table_instance.insert("c1": 1, "c2": "Tom", "c3": 90.5, "c4": True) -# Insert an incomplete record (row), with the "c4" column defaulting to False: -table_instance.insert("c1": 2, "c2": "Jeffery", "C3": 88.0) + +# Insert an incomplete row, with the "c4" column defaulting to False: +table_instance.insert("c1": 2, "c2": "Jeffery", "c3": 88.0) ``` #### Insert vectors ```python -# Create a table with a 3-d vector column "cvc": -table_instance = db_instance.create_table("vector_table", { - "c1": { - "type": "integer", - "default": 2024, - }, - "cvc": {"type": "vector,3,float"}, -}) -# Insert one complete record (row) into the table: -table_obj.insert({"c1": 2023, "cvc": [1.1, 2.2, 3.3]}) -# Insert three rows into the vector column "cvc", with the "c1" column defaulting to 2024: -table_obj.insert([{"cvc": [1.1, 2.2, 3.3]}, {"cvc": [4.4, 5.5, 6.6]}, {"cvc": [7.7, 8.8, 9.9]}]) +# Create a table with a integer column and a 3-d vector column: +table_instance = db_instance.create_table("vector_table", {"c1": {"type": "integer", "default": 2024}, "vector_column": {"type": "vector,3,float"}}) + +# Insert one complete row into the table: +table_obj.insert({"c1": 2023, "vector_column": [1.1, 2.2, 3.3]}) + +# Insert three rows into the table: +table_obj.insert([{"vector_column": [1.1, 2.2, 3.3]}, {"vector_column": [4.4, 5.5, 6.6]}, {"vector_column": [7.7, 8.8, 9.9]}]) ``` #### Insert sparse vectors ```python -# Create a table with a 100-d sparse vector column "csp": -table_instance = db_instance.create_table("sparse_vector_table", { - "c1": { - "type": "integer", - "default": 2024, - }, - "csp": {"type": "sparse, 100,float,int"} -}) +# Create a table with a integer column and a 100-d sparse vector column: +table_instance = db_instance.create_table("sparse_vector_table", {"c1": {"type": "integer"}, "sparse_column": {"type": "sparse,100,float,int"}}) # Insert three rows into the table: # `indices` specifies the correspoing indices to the values in `values`. -# Note that the third row sets "c1" as 2024 by default. -table_instance.insert( - [ - { - "c1": 2022, - "csp": {"indices": [10, 20, 30], "values": [1.1, 2.2, 3.3]} - }, - { - "c1": 2023, - "csp": {"indices": [40, 50, 60], "values": [4.4, 5.5, 6.6]} - }, - { - "csp": {"indices": [70, 80, 90], "values": [7.7, 8.8, 9.9]} - }, - ] -) +# Note that the second row sets "c1" as 2024 by default. +table_instance.insert([{"c1": 2022, "sparse_column": {"indices": [10, 20, 30], "values": [1.1, 2.2, 3.3]}, {"sparse_column": {"indices": [70, 80, 90], "values": [7.7, 8.8, 9.9]}}}]) ``` #### Insert tensors ```python -# Create a table with a tensor column "cts": -table_instance = db_instance.create_table("tensor_table", { - "c1": { - "type": "integer", - "default": 2024, - } - "cts": {"type": "tensor,4,float"} -}) -# Insert three rows into the tensor column "cts", with the "c1" column defaulting to 2024: -table_instance.insert( - [ - { - "cts": [[1.0, 0.0, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]], - }, - { - "cts": [[4.0, 0.0, 4.3, 4.5], [4.0, 4.2, 4.4, 5.0]], - }, - { - "cts": [[0.9, 0.1, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]], - }, - ] -) +# Create a table with a tensor column: +table_instance = db_instance.create_table("tensor_table", {"c1": {"type": "integer", "default": 2024}, "tensor_column": {"type": "tensor,4,float"}}) + +# Insert one row into the table, with the "c1" column defaulting to 2024: +table_instance.insert([{"tensor_column": [[1.0, 0.0, 0.0, 0.0], [1.1, 0.0, 0.0, 0.0]]}]) ``` #### Insert tensor arrays ```python -# Creat a table with only one tensor array column "cta": -table_instance = db_instance.create_table("tensor_array_table", { - "cta": { - "type": "tensorarray,2,int" - } -}) +# Creat a table with only one tensor array column: +table_instance = db_instance.create_table("tensor_array_table", {"tensor_array_column": {"type": "tensorarray,2,float"}}) -table_instance.insert([{"cta": [[[1, 2], [3, 4]], [[5, 6]]]}]) +table_instance.insert([{"tensor_array_column": [[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0]]]}]) ``` ## import_data @@ -866,7 +828,7 @@ table_obj.filter("(-7 < c1 or 9 >= c1) and (c2 = 3)") **Table.knn(*vector_column_name, embedding_data, embedding_data_type, distance_type, topn, knn_params = None*)** -Build a KNN search expression. Find the top n closet records to the given vector. +Build a KNN search expression. Find the top n closet rows to the given vector. ### Parameters @@ -931,7 +893,7 @@ Create a full-text search expression. The column where text is searched, and has create full-text index on it before. - **matching_text : str** - **options_text : str** - 'topn=2': Retrieve the two most relevant records. The `topn` is `10` by default. + 'topn=2': Retrieve the two most relevant rows. The `topn` is `10` by default. ### Returns @@ -957,7 +919,7 @@ for question in questions: **Table.match_tensor(*vector_column_name, tensor_data, tensor_data_type, method_type, topn, extra_option)** -Build a KNN tensor search expression. Find the top n closet records to the given tensor according to chosen method. +Build a KNN tensor search expression. Find the top n closet rows to the given tensor according to chosen method. For example, find k most match tensors generated by ColBERT. @@ -1004,7 +966,7 @@ Build a fusion expression. Common options: - - 'topn=10': Retrieve the 10 most relevant records. The defualt value is `100`. + - 'topn=10': Retrieve the 10 most relevant rows. The defualt value is `100`. Dedicated options of rrf: From 6ced6c08a9c56341a513a6dd767f07d3feae052f Mon Sep 17 00:00:00 2001 From: writinwaters Date: Wed, 24 Jul 2024 19:15:29 +0800 Subject: [PATCH 08/12] Updated example --- docs/references/pysdk_api_reference.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index f0912b0138..c1eb8c79a0 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -664,8 +664,16 @@ Example: `{"header":True, "delimiter": "\t", file_type}` ### Examples +#### Import a csv file + +```python +table_instance.import_data(os.getcwd() + "/your_file.csv", {"header": False, "file_type": "csv", "delimiter": "\t"}) +``` + +#### Import a jsonl file + ```python -table_obj.import_data(test_csv_dir, None) +table_instance.import_data(os.getcwd() + "/your_file.jsonl", {"file_type": "csv"}) ``` ## export_data @@ -723,8 +731,8 @@ Columns to export to the output file, for example, `["num", "name", "score"]`. I ### Examples ```python - table_instance.export_data(os.getcwd() + "/export_data.jsonl", - {"header": False, "file_type": "jsonl", "delimiter": ",", "row_limit": 2}, ["num", "name", "score"]) +table_instance.export_data(os.getcwd() + "/export_data.jsonl", + {"header": False, "file_type": "jsonl", "delimiter": ",", "row_limit": 2}, ["num", "name", "score"]) ``` ## delete From 0f41bd74e5316948f1711a19f5c829193e79ac3d Mon Sep 17 00:00:00 2001 From: writinwaters Date: Wed, 24 Jul 2024 19:35:12 +0800 Subject: [PATCH 09/12] minor editorial updates --- docs/references/pysdk_api_reference.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index c1eb8c79a0..e891342e42 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -528,7 +528,7 @@ res.index_names #['my_index'] Table.insert(data) ``` -Inserts rows (rows) of data into the current table. +Inserts rows of data into the current table. ### Parameters From 4847c3d26e864098e2ab8af2e6fe92b39f6dd445 Mon Sep 17 00:00:00 2001 From: writinwaters Date: Wed, 24 Jul 2024 20:10:18 +0800 Subject: [PATCH 10/12] minor updates to python examples --- docs/references/pysdk_api_reference.md | 11 +++++++++-- example/export_data.py | 2 +- example/import_data.py | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index e891342e42..8a8a0af9a2 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -730,9 +730,16 @@ Columns to export to the output file, for example, `["num", "name", "score"]`. I ### Examples +#### Export your table to a csv file + +```python +table_instance.export_data(os.getcwd() + "/export_data.csv", {"header": True, "file_type": "csv", "delimiter": ",", "offset": 2, "limit": 7, "row_limit": 3}, ["num", "name", "score"]) +``` + +#### Export your table to a jsonl file + ```python -table_instance.export_data(os.getcwd() + "/export_data.jsonl", - {"header": False, "file_type": "jsonl", "delimiter": ",", "row_limit": 2}, ["num", "name", "score"]) +table_instance.export_data(os.getcwd() + "/export_data.jsonl", {"file_type": "jsonl", "offset": 1, "limit": 8, "row_limit": 2}, ["num", "name", "score"]) ``` ## delete diff --git a/example/export_data.py b/example/export_data.py index f06094796d..5bcc3704e7 100644 --- a/example/export_data.py +++ b/example/export_data.py @@ -111,7 +111,7 @@ # TODO also show how to export other type of file table_instance.export_data(os.getcwd() + "/export_data.jsonl", - {"header": False, "file_type": "jsonl", "delimiter": ",", "row_limit": 2}, ["num", "name", "score"]) + {"file_type": "jsonl", "offset": 2, "limit": 7, "row_limit": 2}, ["num", "name", "score"]) infinity_instance.disconnect() diff --git a/example/import_data.py b/example/import_data.py index 7dc87875a5..5d92ef00ca 100644 --- a/example/import_data.py +++ b/example/import_data.py @@ -43,7 +43,7 @@ # TODO also show how to import other type of file table_instance.import_data(project_directory + "/../test/data/csv/fulltext_delete.csv", - {"file_type": "csv", "delimiter": "\t"}) + {"header": True, "file_type": "csv", "delimiter": "\t"}) result = table_instance.output(["num", "doc"]).to_pl() print(result) From 3351ee6b71d0ff362da0a4b04ccf6d53e9a23525 Mon Sep 17 00:00:00 2001 From: writinwaters Date: Wed, 24 Jul 2024 20:47:59 +0800 Subject: [PATCH 11/12] minor --- docs/references/pysdk_api_reference.md | 32 +++++++++++++------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index 8a8a0af9a2..5789142e3c 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -553,28 +553,28 @@ For information about setting default column values, see `create_table`. #### Insert primitives -```python {12,14} +```python # Create a table with four primitive columns: table_instance = db_instance.create_table("primitive_table", { - "c1": {"type": "int8"}, - "c2": {"type": "int16"}, - "c3": {"type": "int"}, - "c4": {"type": "int32"}, # Same as int - "c5": {"type": "integer"}, # Same as int - "c6": {"type": "int64"}, + "c1": {"type": "int8", "default": 0}, + "c2": {"type": "int16", "default": 0}, + "c3": {"type": "int", "default": 0}, + "c4": {"type": "int32", "default": 0}, # Same as int + "c5": {"type": "integer", "default": 0}, # Same as int + "c6": {"type": "int64", "default": 0}, "c7": {"type": "varchar"}, - "c8": {"type": "float"}, - "c8": {"type": "float32"}, # Same as float - "c8": {"type": "double"}, - "c8": {"type": "float64"}, # Same as double - "c9": {"type": "bool", "default": False}, + "c8": {"type": "float", "default": 1.0}, + "c9": {"type": "float32", "default": 1.0}, # Same as float + "c10": {"type": "double", "default": 1.0}, + "c11": {"type": "float64", "default": 1.0}, # Same as double + "c12": {"type": "bool", "default": False}, }) -# Insert a complete row into the table: -table_instance.insert("c1": 1, "c2": "Tom", "c3": 90.5, "c4": True) +# Insert an incomplete row, with the rest columns defaulting to their defaults: +table_instance.insert({"c1": 1, "c7": "Tom", "c12": True}) -# Insert an incomplete row, with the "c4" column defaulting to False: -table_instance.insert("c1": 2, "c2": "Jeffery", "c3": 88.0) +# Insert an incomplete row, with the rest columns defaulting to their defaults: +table_instance.insert({"c1": 2, "c7": "Jeffery"}) ``` #### Insert vectors From 4eff6f7af731ef1c172ea7faa373c9c7dbaa64cc Mon Sep 17 00:00:00 2001 From: writinwaters Date: Wed, 24 Jul 2024 20:51:47 +0800 Subject: [PATCH 12/12] minor --- docs/references/pysdk_api_reference.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/references/pysdk_api_reference.md b/docs/references/pysdk_api_reference.md index 5789142e3c..7e213b2ba8 100644 --- a/docs/references/pysdk_api_reference.md +++ b/docs/references/pysdk_api_reference.md @@ -570,11 +570,8 @@ table_instance = db_instance.create_table("primitive_table", { "c12": {"type": "bool", "default": False}, }) -# Insert an incomplete row, with the rest columns defaulting to their defaults: +# Insert an incomplete row, with remaining cells defaulting to their column defaults: table_instance.insert({"c1": 1, "c7": "Tom", "c12": True}) - -# Insert an incomplete row, with the rest columns defaulting to their defaults: -table_instance.insert({"c1": 2, "c7": "Jeffery"}) ``` #### Insert vectors