diff --git a/extension/core_functions/function_list.cpp b/extension/core_functions/function_list.cpp index 4957fb935044..52db5e11baeb 100644 --- a/extension/core_functions/function_list.cpp +++ b/extension/core_functions/function_list.cpp @@ -385,8 +385,8 @@ static const StaticFunctionDefinition core_functions[] = { DUCKDB_SCALAR_FUNCTION(UrlDecodeFun), DUCKDB_SCALAR_FUNCTION(UrlEncodeFun), DUCKDB_SCALAR_FUNCTION(UUIDFun), - DUCKDB_SCALAR_FUNCTION_SET(ExtractUuidTimestampFun), - DUCKDB_SCALAR_FUNCTION_SET(ExtractUuidVerisonFun), + DUCKDB_SCALAR_FUNCTION(UUIDExtractTimestampFun), + DUCKDB_SCALAR_FUNCTION(UUIDExtractVersionFun), DUCKDB_SCALAR_FUNCTION(UUIDv4Fun), DUCKDB_SCALAR_FUNCTION(UUIDv7Fun), DUCKDB_AGGREGATE_FUNCTION(VarPopFun), diff --git a/extension/core_functions/include/core_functions/scalar/random_functions.hpp b/extension/core_functions/include/core_functions/scalar/random_functions.hpp index 5febc3ccfa63..34d849e6b45c 100644 --- a/extension/core_functions/include/core_functions/scalar/random_functions.hpp +++ b/extension/core_functions/include/core_functions/scalar/random_functions.hpp @@ -66,22 +66,22 @@ struct UUIDv7Fun { static ScalarFunction GetFunction(); }; -struct ExtractUuidVerisonFun { +struct UUIDExtractVersionFun { static constexpr const char *Name = "uuid_extract_version"; static constexpr const char *Parameters = "uuid"; static constexpr const char *Description = "Extract a version for the given UUID."; static constexpr const char *Example = "uuid_extract_version('019482e4-1441-7aad-8127-eec99573b0a0')"; - static ScalarFunctionSet GetFunctions(); + static ScalarFunction GetFunction(); }; -struct ExtractUuidTimestampFun { +struct UUIDExtractTimestampFun { static constexpr const char *Name = "uuid_extract_timestamp"; static constexpr const char *Parameters = "uuid"; static constexpr const char *Description = "Extract the timestamp for the given UUID v7."; static constexpr const char *Example = "uuid_extract_timestamp('019482e4-1441-7aad-8127-eec99573b0a0')"; - static ScalarFunctionSet GetFunctions(); + static ScalarFunction GetFunction(); }; } // namespace duckdb diff --git a/extension/core_functions/scalar/random/functions.json b/extension/core_functions/scalar/random/functions.json index 5ec24539d21b..777f64bf5656 100644 --- a/extension/core_functions/scalar/random/functions.json +++ b/extension/core_functions/scalar/random/functions.json @@ -38,15 +38,15 @@ "parameters": "uuid", "description": "Extract a version for the given UUID.", "example": "uuid_extract_version('019482e4-1441-7aad-8127-eec99573b0a0')", - "type": "scalar_function_set", - "struct": "ExtractUuidVerisonFun" + "type": "scalar_function", + "struct": "UUIDExtractVersionFun" }, { "name": "uuid_extract_timestamp", "parameters": "uuid", "description": "Extract the timestamp for the given UUID v7.", "example": "uuid_extract_timestamp('019482e4-1441-7aad-8127-eec99573b0a0')", - "type": "scalar_function_set", - "struct": "ExtractUuidTimestampFun" + "type": "scalar_function", + "struct": "UUIDExtractTimestampFun" } ] diff --git a/extension/core_functions/scalar/random/random.cpp b/extension/core_functions/scalar/random/random.cpp index abb7498829c0..2f0ea5bb6df3 100644 --- a/extension/core_functions/scalar/random/random.cpp +++ b/extension/core_functions/scalar/random/random.cpp @@ -9,18 +9,6 @@ namespace duckdb { -struct ExtractVersionStrOperator { - template - static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { - const idx_t len = input.GetSize(); - if (len != 36) { - throw InvalidInputException("Given string '%s' is invalid UUID.", input.GetString()); - } - // UUIDv4 and UUIDv7 stores version as the 15-th uint8_t. - return input.GetPointer()[14] - '0'; - } -}; - struct ExtractVersionUuidOperator { template static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { @@ -51,19 +39,6 @@ struct ExtractTimestampUuidOperator { } }; -struct ExtractTimestampStrOperator { - template - static RESULT_TYPE Operation(INPUT_TYPE input, Vector &result) { - // Validate whether the give input is a valid UUID. - hugeint_t uuid_hugeint; - if (!BaseUUID::FromCString(input.GetData(), input.GetSize(), uuid_hugeint)) { - throw InvalidInputException("Given string '%s' is invalid UUID.", input.GetString()); - } - - return ExtractTimestampUuidOperator::Operation(uuid_hugeint, result); - } -}; - template static void ExtractVersionFunction(DataChunk &args, ExpressionState &state, Vector &result) { D_ASSERT(args.ColumnCount() == 1); @@ -157,22 +132,14 @@ ScalarFunction UUIDv7Fun::GetFunction() { return uuid_v7_function; } -ScalarFunctionSet ExtractUuidVerisonFun::GetFunctions() { - ScalarFunctionSet version_extraction; - version_extraction.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::UINTEGER, - ExtractVersionFunction)); - version_extraction.AddFunction(ScalarFunction({LogicalType::UUID}, LogicalType::UINTEGER, - ExtractVersionFunction)); - return version_extraction; +ScalarFunction UUIDExtractVersionFun::GetFunction() { + return ScalarFunction({LogicalType::UUID}, LogicalType::UINTEGER, + ExtractVersionFunction); } -ScalarFunctionSet ExtractUuidTimestampFun::GetFunctions() { - ScalarFunctionSet timestamp_extraction; - timestamp_extraction.AddFunction(ScalarFunction({LogicalType::VARCHAR}, LogicalType::TIMESTAMP_TZ, - ExtractTimestampFunction)); - timestamp_extraction.AddFunction(ScalarFunction({LogicalType::UUID}, LogicalType::TIMESTAMP_TZ, - ExtractTimestampFunction)); - return timestamp_extraction; +ScalarFunction UUIDExtractTimestampFun::GetFunction() { + return ScalarFunction({LogicalType::UUID}, LogicalType::TIMESTAMP_TZ, + ExtractTimestampFunction); } } // namespace duckdb diff --git a/src/execution/operator/persistent/physical_batch_copy_to_file.cpp b/src/execution/operator/persistent/physical_batch_copy_to_file.cpp index aed33d38d67b..86560e2567af 100644 --- a/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +++ b/src/execution/operator/persistent/physical_batch_copy_to_file.cpp @@ -67,10 +67,9 @@ class FixedBatchCopyGlobalState : public GlobalSinkState { static constexpr const idx_t MINIMUM_MEMORY_PER_COLUMN_PER_THREAD = 4ULL * 1024ULL * 1024ULL; public: - explicit FixedBatchCopyGlobalState(ClientContext &context_p, unique_ptr global_state, - idx_t minimum_memory_per_thread) - : memory_manager(context_p, minimum_memory_per_thread), rows_copied(0), global_state(std::move(global_state)), - batch_size(0), scheduled_batch_index(0), flushed_batch_index(0), any_flushing(false), any_finished(false), + explicit FixedBatchCopyGlobalState(ClientContext &context_p, idx_t minimum_memory_per_thread) + : memory_manager(context_p, minimum_memory_per_thread), initialized(false), rows_copied(0), batch_size(0), + scheduled_batch_index(0), flushed_batch_index(0), any_flushing(false), any_finished(false), minimum_memory_per_thread(minimum_memory_per_thread) { } @@ -78,6 +77,8 @@ class FixedBatchCopyGlobalState : public GlobalSinkState { BatchTaskManager task_manager; mutex lock; mutex flush_lock; + //! Whether or not the copy has been initialized + atomic initialized; //! The total number of rows copied to the file atomic rows_copied; //! Global copy state @@ -101,6 +102,24 @@ class FixedBatchCopyGlobalState : public GlobalSinkState { //! Written file info (for RETURN_STATS) unique_ptr written_file_info; + void Initialize(ClientContext &context, const PhysicalBatchCopyToFile &op) { + if (initialized) { + return; + } + lock_guard guard(lock); + if (initialized) { + return; + } + // initialize writing to the file + global_state = op.function.copy_to_initialize_global(context, *op.bind_data, op.file_path); + if (op.return_type == CopyFunctionReturnType::WRITTEN_FILE_STATISTICS) { + written_file_info = make_uniq(op.file_path); + written_file_info->file_stats = make_uniq(); + op.function.copy_to_get_written_statistics(context, *op.bind_data, *global_state, + *written_file_info->file_stats); + } + } + void AddBatchData(idx_t batch_index, unique_ptr new_batch, idx_t memory_usage) { // move the batch data to the set of prepared batch data lock_guard l(lock); @@ -186,6 +205,10 @@ SinkResultType PhysicalBatchCopyToFile::Sink(ExecutionContext &context, DataChun return Sink(context, chunk, input); } } + if (!write_empty_file) { + // if we are not writing empty files - initialize after we have received rows + gstate.Initialize(context.client, *this); + } if (!state.collection) { state.InitializeCollection(context.client, *this); state.batch_index = batch_index; @@ -288,7 +311,7 @@ SinkFinalizeType PhysicalBatchCopyToFile::FinalFlush(ClientContext &context, Glo if (gstate.scheduled_batch_index != gstate.flushed_batch_index) { throw InternalException("Not all batches were flushed to disk - incomplete file?"); } - if (function.copy_to_finalize) { + if (function.copy_to_finalize && gstate.global_state) { function.copy_to_finalize(context, *bind_data, *gstate.global_state); if (use_tmp_file) { @@ -599,13 +622,10 @@ unique_ptr PhysicalBatchCopyToFile::GetGlobalSinkState(ClientCo // request memory based on the minimum amount of memory per column auto minimum_memory_per_thread = FixedBatchCopyGlobalState::MINIMUM_MEMORY_PER_COLUMN_PER_THREAD * children[0].get().GetTypes().size(); - auto result = make_uniq( - context, function.copy_to_initialize_global(context, *bind_data, file_path), minimum_memory_per_thread); - if (return_type == CopyFunctionReturnType::WRITTEN_FILE_STATISTICS) { - result->written_file_info = make_uniq(file_path); - result->written_file_info->file_stats = make_uniq(); - function.copy_to_get_written_statistics(context, *bind_data, *result->global_state, - *result->written_file_info->file_stats); + auto result = make_uniq(context, minimum_memory_per_thread); + if (write_empty_file) { + // if we are writing the file also if it is empty - initialize now + result->Initialize(context, *this); } result->batch_size = function.desired_batch_size ? function.desired_batch_size(context, *bind_data) : 0; return std::move(result); @@ -617,19 +637,28 @@ unique_ptr PhysicalBatchCopyToFile::GetGlobalSinkState(ClientCo SourceResultType PhysicalBatchCopyToFile::GetData(ExecutionContext &context, DataChunk &chunk, OperatorSourceInput &input) const { auto &g = sink_state->Cast(); - chunk.SetCardinality(1); auto fp = use_tmp_file ? PhysicalCopyToFile::GetNonTmpFile(context.client, file_path) : file_path; switch (return_type) { case CopyFunctionReturnType::CHANGED_ROWS: chunk.SetValue(0, 0, Value::BIGINT(NumericCast(g.rows_copied.load()))); + chunk.SetCardinality(1); break; case CopyFunctionReturnType::CHANGED_ROWS_AND_FILE_LIST: { + vector file_list; + if (g.global_state) { + file_list.emplace_back(std::move(fp)); + } chunk.SetValue(0, 0, Value::BIGINT(NumericCast(g.rows_copied.load()))); - chunk.SetValue(1, 0, Value::LIST(LogicalType::VARCHAR, {fp})); + chunk.SetValue(1, 0, Value::LIST(LogicalType::VARCHAR, std::move(file_list))); + chunk.SetCardinality(1); break; } case CopyFunctionReturnType::WRITTEN_FILE_STATISTICS: { - PhysicalCopyToFile::ReturnStatistics(chunk, 0, *g.written_file_info); + if (g.written_file_info) { + g.written_file_info->file_path = std::move(fp); + PhysicalCopyToFile::ReturnStatistics(chunk, 0, *g.written_file_info); + chunk.SetCardinality(1); + } break; } default: diff --git a/src/execution/operator/persistent/physical_copy_to_file.cpp b/src/execution/operator/persistent/physical_copy_to_file.cpp index a6e2a5b94c66..7a9ec55e2980 100644 --- a/src/execution/operator/persistent/physical_copy_to_file.cpp +++ b/src/execution/operator/persistent/physical_copy_to_file.cpp @@ -47,11 +47,12 @@ using vector_of_value_map_t = unordered_map, T, VectorOfValuesHash class CopyToFunctionGlobalState : public GlobalSinkState { public: - explicit CopyToFunctionGlobalState(ClientContext &context, unique_ptr global_state) - : rows_copied(0), last_file_offset(0), global_state(std::move(global_state)) { + explicit CopyToFunctionGlobalState(ClientContext &context) + : initialized(false), rows_copied(0), last_file_offset(0) { max_open_files = ClientConfig::GetConfig(context).partitioned_write_max_open_files; } StorageLock lock; + atomic initialized; atomic rows_copied; atomic last_file_offset; unique_ptr global_state; @@ -64,6 +65,24 @@ class CopyToFunctionGlobalState : public GlobalSinkState { //! Max open files idx_t max_open_files; + void Initialize(ClientContext &context, const PhysicalCopyToFile &op) { + if (initialized) { + return; + } + auto write_lock = lock.GetExclusiveLock(); + if (initialized) { + return; + } + // initialize writing to the file + global_state = op.function.copy_to_initialize_global(context, *op.bind_data, op.file_path); + auto written_file_info = AddFile(*write_lock, op.file_path, op.return_type); + if (written_file_info) { + op.function.copy_to_get_written_statistics(context, *op.bind_data, *global_state, + *written_file_info->file_stats); + } + initialized = true; + } + void CreateDir(const string &dir_path, FileSystem &fs) { if (created_directories.find(dir_path) != created_directories.end()) { // already attempted to create this directory @@ -400,7 +419,7 @@ unique_ptr PhysicalCopyToFile::GetGlobalSinkState(ClientContext CheckDirectory(fs, file_path, overwrite_mode); } - auto state = make_uniq(context, nullptr); + auto state = make_uniq(context); if (!per_thread_output && rotate) { auto global_lock = state->lock.GetExclusiveLock(); state->global_state = CreateFileState(context, *state, *global_lock); @@ -413,13 +432,10 @@ unique_ptr PhysicalCopyToFile::GetGlobalSinkState(ClientContext return std::move(state); } - auto state = make_uniq( - context, function.copy_to_initialize_global(context, *bind_data, file_path)); - auto global_lock = state->lock.GetExclusiveLock(); - auto written_file_info = state->AddFile(*global_lock, file_path, return_type); - if (written_file_info) { - function.copy_to_get_written_statistics(context, *bind_data, *state->global_state, - *written_file_info->file_stats); + auto state = make_uniq(context); + if (write_empty_file) { + // if we are writing the file also if it is empty - initialize now + state->Initialize(context, *this); } return std::move(state); } @@ -460,6 +476,10 @@ SinkResultType PhysicalCopyToFile::Sink(ExecutionContext &context, DataChunk &ch auto &g = input.global_state.Cast(); auto &l = input.local_state.Cast(); + if (!write_empty_file) { + // if we are only writing the file when there are rows to write we need to initialize here + g.Initialize(context.client, *this); + } g.rows_copied += chunk.size(); if (partition_output) { @@ -523,7 +543,7 @@ SinkCombineResultType PhysicalCopyToFile::Combine(ExecutionContext &context, Ope // File in global state may change with FILE_SIZE_BYTES/rotate, need to grab lock auto lock = g.lock.GetSharedLock(); function.copy_to_combine(context, *bind_data, *g.global_state, *l.local_state); - } else { + } else if (g.global_state) { function.copy_to_combine(context, *bind_data, *g.global_state, *l.local_state); } } @@ -549,7 +569,7 @@ SinkFinalizeType PhysicalCopyToFile::Finalize(Pipeline &pipeline, Event &event, } return SinkFinalizeType::READY; } - if (function.copy_to_finalize) { + if (function.copy_to_finalize && gstate.global_state) { function.copy_to_finalize(context, *bind_data, *gstate.global_state); if (use_tmp_file) { @@ -633,6 +653,9 @@ SourceResultType PhysicalCopyToFile::GetData(ExecutionContext &context, DataChun idx_t count = next_end - source_state.offset; for (idx_t i = 0; i < count; i++) { auto &file_entry = *g.written_files[source_state.offset + i]; + if (use_tmp_file) { + file_entry.file_path = GetNonTmpFile(context.client, file_entry.file_path); + } ReturnStatistics(chunk, i, file_entry); } chunk.SetCardinality(count); @@ -650,7 +673,11 @@ SourceResultType PhysicalCopyToFile::GetData(ExecutionContext &context, DataChun chunk.SetValue(0, 0, Value::BIGINT(NumericCast(g.rows_copied.load()))); vector file_name_list; for (auto &file_info : g.written_files) { - file_name_list.emplace_back(file_info->file_path); + if (use_tmp_file) { + file_name_list.emplace_back(GetNonTmpFile(context.client, file_info->file_path)); + } else { + file_name_list.emplace_back(file_info->file_path); + } } chunk.SetValue(1, 0, Value::LIST(LogicalType::VARCHAR, std::move(file_name_list))); break; diff --git a/src/execution/physical_plan/plan_copy_to_file.cpp b/src/execution/physical_plan/plan_copy_to_file.cpp index 567fadf57960..bc81a3ddf7e4 100644 --- a/src/execution/physical_plan/plan_copy_to_file.cpp +++ b/src/execution/physical_plan/plan_copy_to_file.cpp @@ -41,6 +41,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile &op) { cast_copy.use_tmp_file = op.use_tmp_file; cast_copy.children.push_back(plan); cast_copy.return_type = op.return_type; + cast_copy.write_empty_file = op.write_empty_file; return copy; } @@ -67,6 +68,7 @@ PhysicalOperator &PhysicalPlanGenerator::CreatePlan(LogicalCopyToFile &op) { cast_copy.names = op.names; cast_copy.expected_types = op.expected_types; cast_copy.parallel = mode == CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE; + cast_copy.write_empty_file = op.write_empty_file; cast_copy.children.push_back(plan); return copy; diff --git a/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp b/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp index 56f8a43a509d..adb2f4722b2e 100644 --- a/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +++ b/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp @@ -30,6 +30,7 @@ class PhysicalBatchCopyToFile : public PhysicalOperator { string file_path; bool use_tmp_file; CopyFunctionReturnType return_type; + bool write_empty_file; public: // Source interface diff --git a/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp b/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp index eee5b94e5dd7..9fc2f0b27d0a 100644 --- a/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp +++ b/src/include/duckdb/execution/operator/persistent/physical_copy_to_file.hpp @@ -51,6 +51,7 @@ class PhysicalCopyToFile : public PhysicalOperator { bool partition_output; bool write_partition_columns; + bool write_empty_file; vector partition_columns; vector names; vector expected_types; diff --git a/src/include/duckdb/planner/operator/logical_copy_to_file.hpp b/src/include/duckdb/planner/operator/logical_copy_to_file.hpp index 5ec454b0e01c..4e5502747bdb 100644 --- a/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +++ b/src/include/duckdb/planner/operator/logical_copy_to_file.hpp @@ -42,6 +42,7 @@ class LogicalCopyToFile : public LogicalOperator { bool partition_output; bool write_partition_columns; + bool write_empty_file = true; vector partition_columns; vector names; vector expected_types; diff --git a/src/planner/binder/statement/bind_copy.cpp b/src/planner/binder/statement/bind_copy.cpp index 8bddebe965d1..0278ab8b0376 100644 --- a/src/planner/binder/statement/bind_copy.cpp +++ b/src/planner/binder/statement/bind_copy.cpp @@ -56,6 +56,7 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt, CopyToType copy_to_type) bool seen_overwrite_mode = false; bool seen_filepattern = false; bool write_partition_columns = false; + bool write_empty_file = true; CopyFunctionReturnType return_type = CopyFunctionReturnType::CHANGED_ROWS; CopyFunctionBindInput bind_input(*stmt.info); @@ -127,6 +128,8 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt, CopyToType copy_to_type) } } else if (loption == "write_partition_columns") { write_partition_columns = GetBooleanArg(context, option.second); + } else if (loption == "write_empty_file") { + write_empty_file = GetBooleanArg(context, option.second); } else { stmt.info->options[option.first] = option.second; } @@ -226,6 +229,18 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt, CopyToType copy_to_type) "Can't combine file rotation (e.g., ROW_GROUPS_PER_FILE) and PARTITION_BY for COPY"); } } + if (!write_empty_file) { + if (rotate) { + throw NotImplementedException( + "Can't combine WRITE_EMPTY_FILE false with file rotation (e.g., ROW_GROUPS_PER_FILE)"); + } + if (per_thread_output) { + throw NotImplementedException("Can't combine WRITE_EMPTY_FILE false with PER_THREAD_OUTPUT"); + } + if (!partition_cols.empty()) { + throw NotImplementedException("Can't combine WRITE_EMPTY_FILE false with PARTITION_BY"); + } + } if (return_type == CopyFunctionReturnType::WRITTEN_FILE_STATISTICS && !copy_function.function.copy_to_get_written_statistics) { throw NotImplementedException("RETURN_STATS is not supported for the \"%s\" copy format", stmt.info->format); @@ -246,6 +261,7 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt, CopyToType copy_to_type) copy->partition_output = !partition_cols.empty(); copy->write_partition_columns = write_partition_columns; copy->partition_columns = std::move(partition_cols); + copy->write_empty_file = write_empty_file; copy->return_type = return_type; copy->names = unique_column_names; diff --git a/src/planner/operator/logical_copy_to_file.cpp b/src/planner/operator/logical_copy_to_file.cpp index f24abcd5dc4e..adfae8da1a23 100644 --- a/src/planner/operator/logical_copy_to_file.cpp +++ b/src/planner/operator/logical_copy_to_file.cpp @@ -66,6 +66,7 @@ void LogicalCopyToFile::Serialize(Serializer &serializer) const { serializer.WriteProperty(214, "rotate", rotate); serializer.WriteProperty(215, "return_type", return_type); serializer.WritePropertyWithDefault(216, "write_partition_columns", write_partition_columns, true); + serializer.WritePropertyWithDefault(217, "write_empty_file", write_empty_file, true); } unique_ptr LogicalCopyToFile::Deserialize(Deserializer &deserializer) { @@ -110,6 +111,7 @@ unique_ptr LogicalCopyToFile::Deserialize(Deserializer &deseria auto return_type = deserializer.ReadPropertyWithExplicitDefault(215, "return_type", CopyFunctionReturnType::CHANGED_ROWS); auto write_partition_columns = deserializer.ReadPropertyWithExplicitDefault(216, "write_partition_columns", true); + auto write_empty_file = deserializer.ReadPropertyWithExplicitDefault(217, "write_empty_file", true); if (!has_serialize) { // If not serialized, re-bind with the copy info @@ -137,6 +139,7 @@ unique_ptr LogicalCopyToFile::Deserialize(Deserializer &deseria result->rotate = rotate; result->return_type = return_type; result->write_partition_columns = write_partition_columns; + result->write_empty_file = write_empty_file; return std::move(result); } diff --git a/test/sql/copy/parquet/writer/skip_empty_write.test b/test/sql/copy/parquet/writer/skip_empty_write.test new file mode 100644 index 000000000000..04b0bd8c168d --- /dev/null +++ b/test/sql/copy/parquet/writer/skip_empty_write.test @@ -0,0 +1,66 @@ +# name: test/sql/copy/parquet/writer/skip_empty_write.test +# description: Parquet writer WRITE_EMPTY_FILE false option +# group: [writer] + +require parquet + +statement ok +PRAGMA enable_verification + +statement ok +CREATE TABLE empty_tbl(i INT, j VARCHAR); + +statement ok +CREATE TABLE tbl AS FROM range(10000) t(i) UNION ALL SELECT 100000 + +# basic usage +statement ok +copy (select 42 where 42=84) to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false) + +query I +SELECT COUNT(*) FROM glob('__TEST_DIR__/empty.parquet') +---- +0 + +foreach preserve_order true false + +statement ok +SET preserve_insertion_order=${preserve_order} + +# no file name returned +query IIIIII +copy (select 42 where 42=84) to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, RETURN_STATS) +---- + +# now with a table +query IIIIII +copy empty_tbl to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, RETURN_STATS) +---- + +query II +copy empty_tbl to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, RETURN_FILES) +---- +0 [] + +query IIIIII +copy (from tbl where i = 20000) to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, RETURN_STATS) +---- + +endloop + + +# these combinations are not allowed +statement error +copy tbl to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, ROW_GROUPS_PER_FILE 1) +---- +Can't combine + +statement error +copy empty_tbl to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, PARTITION_BY (i)) +---- +Can't combine + +statement error +copy tbl to '__TEST_DIR__/empty.parquet' (WRITE_EMPTY_FILE false, PER_THREAD_OUTPUT) +---- +Can't combine diff --git a/test/sql/function/uuid/test_uuid_function.test b/test/sql/function/uuid/test_uuid_function.test index 65396cd205e2..ad6239675a1f 100644 --- a/test/sql/function/uuid/test_uuid_function.test +++ b/test/sql/function/uuid/test_uuid_function.test @@ -2,23 +2,11 @@ # description: Test uuid related functions # group: [uuid] -# Test invalid UUID in string format. -statement error -SELECT uuid_extract_version('1234'); ----- -Given string '1234' is invalid UUID. - -# Test UUIDv7 in string format. query I SELECT uuid_extract_version('ac227128-7d55-7ee0-a765-5025cc52e55a'); ---- 7 -query I -SELECT uuid_extract_version(uuidv7()::STRING); ----- -7 - query I SELECT uuid_extract_version(uuidv7()); ---- @@ -30,16 +18,6 @@ SELECT uuid_extract_version('ac227128-7d55-4ee0-a765-5025cc52e55a'); ---- 4 -query I -SELECT uuid_extract_version(uuidv4()::STRING); ----- -4 - -query I -SELECT uuid_extract_version(gen_random_uuid()::STRING); ----- -4 - query I SELECT uuid_extract_version(uuidv4()); ---- @@ -50,12 +28,6 @@ SELECT uuid_extract_version(gen_random_uuid()); ---- 4 -# Test timestamp extraction for invalid string. -statement error -SELECT uuid_extract_timestamp('1234'); ----- -Given string '1234' is invalid UUID. - # Test timestamp extraction with UUID v4. statement error SELECT uuid_extract_timestamp(uuidv4());