From 070083a8281295ebde593397965e4e6af899ab6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Mon, 20 Jan 2025 15:11:16 +0100 Subject: [PATCH 1/9] mfu: index hardlinks during tree walk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During tree walk with details, regular files with more than one nlink are temporarily placed in a hardlinks flist. This flist is then globally ordered by names and ranked to select one reference path per inode, and flag all other paths to this inodes as hardlinks. The sorted hardlinks flist is finally merged in global flist with all other items. The paths name ordering is performed to ensure reproducibility between two similar trees, thus minimizing the differences for dcmp and dsync eventually. This commit introduces a new structure inodes_hardlink_map_t used to temporarily associate paths to inodes in reference/hardlinks solving logic. The type elem_t receives 2 new members: nlink, the number of links on an inode, and ref, the reference path to this inode. The ref is NULL except on hardlinks. This commit also introduces a new filetype MFU_TYPE_HARDLINK, which is used to distinguish hardlinks to inodes from reference paths which have MFU_TYPE_FILE type. The packed flist element now contains the filetype, even when details are enabled, as there is now way to determine if an element is a regular file or a hardlink based on stat result. New functions mfu_[un]pack_sized_str() are introduced to manage packing and unpackaging of optional strings with maximum length. Signed-off-by: Rémi Palancher --- src/common/mfu_flist.c | 119 +++++++---- src/common/mfu_flist.h | 14 +- src/common/mfu_flist_internal.h | 9 + src/common/mfu_flist_walk.c | 356 +++++++++++++++++++++++++++++++- src/common/mfu_util.c | 20 ++ src/common/mfu_util.h | 8 + 6 files changed, 481 insertions(+), 45 deletions(-) diff --git a/src/common/mfu_flist.c b/src/common/mfu_flist.c index 9b13a3a52..d14e017a5 100644 --- a/src/common/mfu_flist.c +++ b/src/common/mfu_flist.c @@ -125,7 +125,7 @@ static size_t list_elem_pack2_size(int detail, uint64_t chars, const elem_t* ele { size_t size; if (detail) { - size = 2 * 4 + chars + 0 * 4 + 10 * 8; + size = 2 * 4 + chars + 1 * 4 + 10 * 8 + 8 + chars; } else { size = 2 * 4 + chars + 1 * 4; @@ -154,11 +154,7 @@ static size_t list_elem_pack2(void* buf, int detail, uint64_t chars, const elem_ mfu_pack_uint32(&ptr, (uint32_t) chars); /* copy in file name */ - char* file = elem->file; - if (file != NULL) { - strcpy(ptr, file); - } - ptr += chars; + mfu_pack_sized_str(&ptr, elem->file, chars); #ifdef DAOS_SUPPORT /* copy in values for obj ids */ @@ -166,6 +162,7 @@ static size_t list_elem_pack2(void* buf, int detail, uint64_t chars, const elem_ mfu_pack_uint64(&ptr, elem->obj_id_hi); #endif + mfu_pack_uint32(&ptr, elem->type); if (detail) { /* copy in fields */ mfu_pack_uint64(&ptr, elem->mode); @@ -178,10 +175,8 @@ static size_t list_elem_pack2(void* buf, int detail, uint64_t chars, const elem_ mfu_pack_uint64(&ptr, elem->ctime); mfu_pack_uint64(&ptr, elem->ctime_nsec); mfu_pack_uint64(&ptr, elem->size); - } - else { - /* just have the file type */ - mfu_pack_uint32(&ptr, elem->type); + mfu_pack_uint64(&ptr, elem->nlink); + mfu_pack_sized_str(&ptr, elem->ref, chars); } size_t bytes = (size_t)(ptr - start); @@ -203,15 +198,11 @@ static size_t list_elem_unpack2(const void* buf, elem_t* elem) uint32_t chars; mfu_unpack_uint32(&ptr, &chars); - /* get name and advance pointer */ - const char* file = ptr; - ptr += chars; - - /* copy path */ - elem->file = MFU_STRDUP(file); + /* get name */ + mfu_unpack_sized_str(&ptr, &elem->file, chars); /* set depth */ - elem->depth = mfu_flist_compute_depth(file); + elem->depth = mfu_flist_compute_depth(elem->file); elem->detail = (int) detail; @@ -221,6 +212,10 @@ static size_t list_elem_unpack2(const void* buf, elem_t* elem) mfu_unpack_uint64(&ptr, &elem->obj_id_hi); #endif + uint32_t type; + mfu_unpack_uint32(&ptr, &type); + elem->type = (mfu_filetype) type; + if (detail) { /* extract fields */ mfu_unpack_uint64(&ptr, &elem->mode); @@ -233,14 +228,8 @@ static size_t list_elem_unpack2(const void* buf, elem_t* elem) mfu_unpack_uint64(&ptr, &elem->ctime); mfu_unpack_uint64(&ptr, &elem->ctime_nsec); mfu_unpack_uint64(&ptr, &elem->size); - /* use mode to set file type */ - elem->type = mfu_flist_mode_to_filetype((mode_t)elem->mode); - } - else { - /* only have type */ - uint32_t type; - mfu_unpack_uint32(&ptr, &type); - elem->type = (mfu_filetype) type; + mfu_unpack_uint64(&ptr, &elem->nlink); + mfu_unpack_sized_str(&ptr, &elem->ref, chars); } size_t bytes = (size_t)(ptr - start); @@ -346,6 +335,8 @@ static void list_insert_copy(flist_t* flist, elem_t* src) elem->ctime = src->ctime; elem->ctime_nsec = src->ctime_nsec; elem->size = src->size; + elem->nlink = src->nlink; + elem->ref = MFU_STRDUP(src->ref); /* append element to tail of linked list */ mfu_flist_insert_elem(flist, elem); @@ -368,6 +359,9 @@ void mfu_flist_insert_stat(flist_t* flist, const char* fpath, mode_t mode, const /* set file type */ elem->type = mfu_flist_mode_to_filetype(mode); + /* hardlinks references are discovered afterwhile */ + elem->ref = NULL; + /* copy stat info */ if (sb != NULL) { elem->detail = 1; @@ -389,6 +383,7 @@ void mfu_flist_insert_stat(flist_t* flist, const char* fpath, mode_t mode, const elem->ctime_nsec = nsecs; elem->size = (uint64_t) sb->st_size; + elem->nlink = (uint64_t) sb->st_nlink; /* TODO: link to user and group names? */ } @@ -409,6 +404,7 @@ static void list_delete(flist_t* flist) while (current != NULL) { elem_t* next = current->next; mfu_free(¤t->file); + mfu_free(¤t->ref); mfu_free(¤t); current = next; } @@ -987,6 +983,28 @@ uint64_t mfu_flist_file_get_size(mfu_flist bflist, uint64_t idx) return ret; } +uint64_t mfu_flist_file_get_nlink(mfu_flist bflist, uint64_t idx) +{ + uint64_t ret = (uint64_t) - 1; + flist_t* flist = (flist_t*) bflist; + elem_t* elem = list_get_elem(flist, idx); + if (elem != NULL && flist->detail) { + ret = elem->nlink; + } + return ret; +} + +const char* mfu_flist_file_get_ref(mfu_flist bflist, uint64_t idx) +{ + const char* ref = NULL; + flist_t* flist = (flist_t*) bflist; + elem_t* elem = list_get_elem(flist, idx); + if (elem != NULL) { + ref = elem->ref; + } + return ref; +} + const char* mfu_flist_file_get_username(mfu_flist bflist, uint64_t idx) { const char* ret = NULL; @@ -1171,6 +1189,19 @@ void mfu_flist_file_set_size(mfu_flist bflist, uint64_t idx, uint64_t size) return; } +void mfu_flist_file_set_ref(mfu_flist bflist, uint64_t idx, const char* ref) +{ + flist_t* flist = (flist_t*) bflist; + elem_t* elem = list_get_elem(flist, idx); + if (elem != NULL) { + /* free existing name if there is one */ + mfu_free(&elem->ref); + /* set new ref*/ + elem->ref = MFU_STRDUP(ref); + } + return; +} + mfu_flist mfu_flist_subset(mfu_flist src) { /* allocate a new file list */ @@ -1353,6 +1384,8 @@ uint64_t mfu_flist_file_create(mfu_flist bflist) elem->ctime = 0; elem->ctime_nsec = 0; elem->size = 0; + elem->nlink = 0; + elem->ref = NULL; /* for DAOS */ #ifdef DAOS_SUPPORT @@ -1817,11 +1850,12 @@ void mfu_flist_print_summary(mfu_flist flist) MPI_Comm_size(MPI_COMM_WORLD, &ranks); /* initlialize counters */ - uint64_t total_dirs = 0; - uint64_t total_files = 0; - uint64_t total_links = 0; - uint64_t total_unknown = 0; - uint64_t total_bytes = 0; + uint64_t total_dirs = 0; + uint64_t total_files = 0; + uint64_t total_links = 0; + uint64_t total_hardlinks = 0; + uint64_t total_unknown = 0; + uint64_t total_bytes = 0; /* step through and print data */ uint64_t idx = 0; @@ -1839,8 +1873,12 @@ void mfu_flist_print_summary(mfu_flist flist) total_dirs++; } else if (S_ISREG(mode)) { - total_files++; - total_bytes += size; + if (mfu_flist_file_get_ref(flist, idx) != NULL) { + total_hardlinks++; + } else { + total_files++; + total_bytes += size; + } } else if (S_ISLNK(mode)) { total_links++; @@ -1863,6 +1901,9 @@ void mfu_flist_print_summary(mfu_flist flist) else if (type == MFU_TYPE_LINK) { total_links++; } + else if (type == MFU_TYPE_HARDLINK) { + total_hardlinks++; + } else { /* unknown file type */ total_unknown++; @@ -1874,13 +1915,14 @@ void mfu_flist_print_summary(mfu_flist flist) } /* get total directories, files, links, and bytes */ - uint64_t all_dirs, all_files, all_links, all_unknown, all_bytes; + uint64_t all_dirs, all_files, all_links, all_hardlinks, all_unknown, all_bytes; + MPI_Allreduce(&total_dirs, &all_dirs, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&total_files, &all_files, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&total_links, &all_links, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&total_hardlinks, &all_hardlinks, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&total_unknown, &all_unknown, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&total_bytes, &all_bytes, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); uint64_t all_count = mfu_flist_global_size(flist); - MPI_Allreduce(&total_dirs, &all_dirs, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&total_files, &all_files, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&total_links, &all_links, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&total_unknown, &all_unknown, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - MPI_Allreduce(&total_bytes, &all_bytes, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); /* convert total size to units */ if (rank == 0) { @@ -1888,6 +1930,7 @@ void mfu_flist_print_summary(mfu_flist flist) MFU_LOG(MFU_LOG_INFO, " Directories: %llu", (unsigned long long) all_dirs); MFU_LOG(MFU_LOG_INFO, " Files: %llu", (unsigned long long) all_files); MFU_LOG(MFU_LOG_INFO, " Links: %llu", (unsigned long long) all_links); + MFU_LOG(MFU_LOG_INFO, " Hardlinks: %llu", (unsigned long long) all_hardlinks); /* MFU_LOG(MFU_LOG_INFO, " Unknown: %lu", (unsigned long long) all_unknown); */ if (mfu_flist_have_detail(flist)) { diff --git a/src/common/mfu_flist.h b/src/common/mfu_flist.h index 58dc0c433..9a8f2dfe1 100644 --- a/src/common/mfu_flist.h +++ b/src/common/mfu_flist.h @@ -94,11 +94,12 @@ extern "C" { * so changing their values can break backwards compatibility * in reading any such files */ typedef enum mfu_filetypes_e { - MFU_TYPE_NULL = 0, /* type not set */ - MFU_TYPE_UNKNOWN = 1, /* type not known */ - MFU_TYPE_FILE = 2, /* regular file */ - MFU_TYPE_DIR = 3, /* directory */ - MFU_TYPE_LINK = 4, /* symlink */ + MFU_TYPE_NULL = 0, /* type not set */ + MFU_TYPE_UNKNOWN = 1, /* type not known */ + MFU_TYPE_FILE = 2, /* regular file */ + MFU_TYPE_DIR = 3, /* directory */ + MFU_TYPE_LINK = 4, /* symlink */ + MFU_TYPE_HARDLINK = 5, /* hardlink */ } mfu_filetype; /* define handle type to a file list */ @@ -395,6 +396,8 @@ uint64_t mfu_flist_file_get_mtime_nsec(mfu_flist flist, uint64_t index); uint64_t mfu_flist_file_get_ctime(mfu_flist flist, uint64_t index); uint64_t mfu_flist_file_get_ctime_nsec(mfu_flist flist, uint64_t index); uint64_t mfu_flist_file_get_size(mfu_flist flist, uint64_t index); +uint64_t mfu_flist_file_get_nlink(mfu_flist flist, uint64_t idx); +const char* mfu_flist_file_get_ref(mfu_flist flist, uint64_t idx); uint64_t mfu_flist_file_get_perm(mfu_flist flist, uint64_t index); #if DCOPY_USE_XATTRS void *mfu_flist_file_get_acl(mfu_flist bflist, uint64_t idx, ssize_t *acl_size, char *type); @@ -420,6 +423,7 @@ void mfu_flist_file_set_mtime_nsec(mfu_flist flist, uint64_t index, uint64_t mti void mfu_flist_file_set_ctime(mfu_flist flist, uint64_t index, uint64_t ctime); void mfu_flist_file_set_ctime_nsec(mfu_flist flist, uint64_t index, uint64_t ctime_nsec); void mfu_flist_file_set_size(mfu_flist flist, uint64_t index, uint64_t size); +void mfu_flist_file_set_ref(mfu_flist flist, uint64_t index, const char* ref); #if DCOPY_USE_XATTRS //void *mfu_flist_file_set_acl(mfu_flist bflist, uint64_t idx, ssize_t *acl_size, char *type); #endif diff --git a/src/common/mfu_flist_internal.h b/src/common/mfu_flist_internal.h index 01606c0b7..67f744b3d 100644 --- a/src/common/mfu_flist_internal.h +++ b/src/common/mfu_flist_internal.h @@ -40,12 +40,21 @@ typedef struct list_elem { uint64_t ctime; /* create time */ uint64_t ctime_nsec; /* create time nanoseconds */ uint64_t size; /* file size in bytes */ + uint64_t nlink; /* number of links to inode */ + char* ref; /* reference path for hardlinks */ struct list_elem* next; /* pointer to next item */ /* vars for a non-posix DAOS copy */ uint64_t obj_id_lo; uint64_t obj_id_hi; } elem_t; +/* linked list inode/path pairs used during walk */ +typedef struct inodes_hardlinks_map { + uint64_t *inodes; /* array of inodes numbers for each item in hardlinks temporary list */ + uint64_t count; /* number of inodes */ + uint64_t cap; /* current capacity */ +} inodes_hardlink_map_t; + /* holds an array of objects: users, groups, or file data */ typedef struct { void* buf; /* pointer to memory buffer holding data */ diff --git a/src/common/mfu_flist_walk.c b/src/common/mfu_flist_walk.c index 7a5ce6ecc..e41790f71 100644 --- a/src/common/mfu_flist_walk.c +++ b/src/common/mfu_flist_walk.c @@ -48,6 +48,8 @@ static uint64_t CURRENT_NUM_DIRS; static const char** CURRENT_DIRS; static flist_t* CURRENT_LIST; +static flist_t* HARDLINKS_TMP_LIST; +static inodes_hardlink_map_t* HARDLINKS_INODES_MAP; static int SET_DIR_PERMS; static int REMOVE_FILES; static int DEREFERENCE; @@ -494,6 +496,67 @@ static void walk_stat_create(CIRCLE_handle* handle) } } +/* allocate and initialize a new inodes map */ +inodes_hardlink_map_t* inodes_map_new() +{ + /* allocate memory for map, cast it to handle, initialize and return */ + inodes_hardlink_map_t* map = (inodes_hardlink_map_t*) MFU_MALLOC(sizeof(inodes_hardlink_map_t)); + + map->inodes = NULL; + map->count = 0; + map->cap = 0; + + return map; +} + +/* free memory of inodes map */ +inodes_hardlink_map_t* inodes_map_free(inodes_hardlink_map_t** map) +{ + mfu_free(&(*map)->inodes); + mfu_free(map); +} + +/* add new element to running list index, allocates additional + * capactiy for index if needed */ +static void inodes_map_insert(inodes_hardlink_map_t* map, uint64_t inode) +{ + /* if we have no capacity for the index, + * initialize with a small array */ + uint64_t cap = map->cap; + if (cap == 0) { + /* have no index at all, initialize it */ + uint64_t new_capacity = 32; + size_t index_size = new_capacity * sizeof(uint64_t); + map->inodes = (uint64_t*) MFU_MALLOC(index_size); + map->cap = new_capacity; + } + + map->count++; + + /* check that our index has space before we add it */ + uint64_t count = map->count; + if (count == cap) { + /* we have exhausted the current capacity of the index array, + * allocate a new memory region that is double the size */ + uint64_t new_capacity = cap * 2; + size_t index_size = new_capacity * sizeof(uint64_t); + uint64_t* new_inodes = (uint64_t*) MFU_MALLOC(index_size); + + /* copy over existing list */ + memcpy(new_inodes, map->inodes, count * sizeof(uint64_t)); + + /* free the old index memory and assign the new one */ + mfu_free(&map->inodes); + map->inodes = new_inodes; + map->cap = new_capacity; + } + + /* append the item to the index */ + map->inodes[count - 1] = inode; + + return; +} + /** Callback given to process the dataset. */ static void walk_stat_process(CIRCLE_handle* handle) { @@ -527,8 +590,13 @@ static void walk_stat_process(CIRCLE_handle* handle) if (REMOVE_FILES && !S_ISDIR(st.st_mode)) { mfu_file_unlink(path, mfu_file); } else { - /* record info for item in list */ - mfu_flist_insert_stat(CURRENT_LIST, path, st.st_mode, &st); + if (S_ISREG(st.st_mode) && st.st_nlink > 1) { + /* record info for item in temporary hardlinks list and inodes map */ + mfu_flist_insert_stat(HARDLINKS_TMP_LIST, path, st.st_mode, &st); + inodes_map_insert(HARDLINKS_INODES_MAP, (uint64_t)st.st_ino); + } else + /* record info for item in list */ + mfu_flist_insert_stat(CURRENT_LIST, path, st.st_mode, &st); } /* recurse into directory */ @@ -552,6 +620,281 @@ static void walk_stat_process(CIRCLE_handle* handle) return; } +/* sort elements in flist and inodes by name and place them in sorted_list and + * sorted_inodes respectively. */ +static void walk_hardlinks_sort_names(flist_t* flist, inodes_hardlink_map_t* inodes, flist_t** sorted_flist, inodes_hardlink_map_t** sorted_inodes) { + + uint64_t incount = mfu_flist_size(flist); + uint64_t chars = mfu_flist_file_max_name(flist); + + /* create datatype for packed file list element */ + MPI_Datatype dt_elem; + size_t bytes = mfu_flist_file_pack_size(flist); + MPI_Type_contiguous((int)bytes, MPI_BYTE, &dt_elem); + + MPI_Datatype dt_key; + DTCMP_Op op_str; + DTCMP_Str_create_ascend(chars, &dt_key, &op_str); + + /* build keysat type */ + MPI_Datatype dt_keysat, keysat_types[3] = { dt_key, MPI_UINT64_T, dt_elem }; + if (DTCMP_Type_create_series(3, keysat_types, &dt_keysat) != DTCMP_SUCCESS) { + MFU_ABORT(1, "Failed to create keysat type"); + } + + /* get extent of key type */ + MPI_Aint key_lb, key_extent; + MPI_Type_get_extent(dt_key, &key_lb, &key_extent); + + /* get extent of keysat type */ + MPI_Aint inode_lb, inode_extent; + MPI_Type_get_extent(MPI_UINT64_T, &inode_lb, &inode_extent); + + /* get extent of keysat type */ + MPI_Aint keysat_lb, keysat_extent; + MPI_Type_get_extent(dt_keysat, &keysat_lb, &keysat_extent); + + /* compute size of sort element and allocate buffer */ + size_t sortbufsize = (size_t)keysat_extent * incount; + void* sortbuf = MFU_MALLOC(sortbufsize); + + /* copy data into sort elements */ + char* sortptr = (char*) sortbuf; + for (uint64_t idx=0; idxinodes[idx]; + + sortptr += inode_extent; + /* pack file element */ + sortptr += mfu_flist_file_pack(sortptr, flist, idx); + } + + /* sort data */ + void* outsortbuf; + int outsortcount; + DTCMP_Handle handle; + int sort_rc = DTCMP_Sortz( + sortbuf, (int)incount, &outsortbuf, &outsortcount, + dt_key, dt_keysat, op_str, DTCMP_FLAG_NONE, + MPI_COMM_WORLD, &handle + ); + if (sort_rc != DTCMP_SUCCESS) { + MFU_ABORT(1, "Failed to sort data"); + } + + /* free input buffer holding sort elements */ + mfu_free(&sortbuf); + + /* create a new list as subset of original list */ + *sorted_flist = mfu_flist_subset(flist); + *sorted_inodes = inodes_map_new(); + + /* step through sorted data filenames */ + sortptr = (char*) outsortbuf; + for (uint64_t idx=0; idx<(uint64_t)outsortcount; idx++) { + sortptr += key_extent; + inodes_map_insert(*sorted_inodes, *(uint64_t*)sortptr); + sortptr += inode_extent; + sortptr += mfu_flist_file_unpack(sortptr, *sorted_flist); + } + + /* compute summary of new list */ + mfu_flist_summarize(*sorted_flist); + + /* free memory */ + DTCMP_Free(&handle); + + DTCMP_Op_free(&op_str); + MPI_Type_free(&dt_keysat); + MPI_Type_free(&dt_key); + MPI_Type_free(&dt_elem); + +} + +/* rank elements in flist by inodes in order to determine reference and secondary + * links (aka. hardlinks). */ +static void walk_hardlinks_rank(flist_t* flist, inodes_hardlink_map_t* inodes) { + + uint64_t incount = mfu_flist_size(flist); + uint64_t chars = mfu_flist_file_max_name(flist); + + uint64_t* rankbuf = NULL; + if(incount) + rankbuf = (uint64_t*) MFU_MALLOC(sizeof(uint64_t)*incount); + + for(int idx=0; idxinodes[idx]; + + uint64_t groups = 0; + uint64_t output_bytes = incount * sizeof(uint64_t); + uint64_t* group_id = (uint64_t*) MFU_MALLOC(output_bytes); + uint64_t* group_ranks = (uint64_t*) MFU_MALLOC(output_bytes); + uint64_t* group_rank = (uint64_t*) MFU_MALLOC(output_bytes); + int rank_rc = DTCMP_Rankv( + (int)incount, rankbuf, &groups, group_id, group_ranks, + group_rank, MPI_UINT64_T, MPI_UINT64_T, DTCMP_OP_UINT64T_ASCEND, DTCMP_FLAG_NONE, + MPI_COMM_WORLD); + + if (rank_rc != DTCMP_SUCCESS) { + MFU_ABORT(1, "Failed to rank hardlinks inodes"); + } + + /* The rank 0 is considered the reference link to the inode (ie. the regular + * file). Set file type MFU_TYPE_HARDLINK on all other elements. */ + for(int idx=0; idxinodes[idx]; + sendptr += inode_extent; + strncpy(sendptr, name, chars); + sendptr += (struct_extent - inode_extent); + } + } + } + + MPI_Allgatherv(sendbuf, nb_local_refs, dt_struct, recvbuf, recvcounts, recvdispls, dt_struct, MPI_COMM_WORLD); + + /* set reference on all local hardlinks */ + char* recvptr = (char*) recvbuf; + for (int i = 0; i < (int) ranks; i++) { + for (int j = 0; j < recvcounts[i]; j++) { + uint64_t inode = *(uint64_t *)recvptr; + const char* ref = recvptr + inode_extent; + /* look for indexes with the name inode and set the refs accordingly */ + for (int idx = 0; idx < incount; idx++) { + mfu_filetype type = mfu_flist_file_get_type(flist, idx); + if(inodes->inodes[idx] == inode && type == MFU_TYPE_HARDLINK) { + mfu_flist_file_set_ref(flist, idx, ref); + } + } + recvptr += struct_extent; + } + } + + mfu_free(&recvcounts); + mfu_free(&recvdispls); + mfu_free(&recvbuf); + mfu_free(&sendbuf); + MPI_Type_free(&dt_struct); + +} + +/* extend flist with add all items from sorted_hardlinks_flist */ +static void walk_hardlinks_merge(flist_t* flist, flist_t* sorted_hardlinks_flist) { + + uint64_t incount = mfu_flist_size(sorted_hardlinks_flist); + for(uint64_t idx=0; idxdetail = 0; if (walk_opts->use_stat) { flist->detail = 1; + HARDLINKS_TMP_LIST->detail = 1; if (flist->have_users == 0) { mfu_flist_usrgrp_get_users(flist); } @@ -667,6 +1013,12 @@ int mfu_flist_walk_paths(uint64_t num_paths, const char** paths, CIRCLE_begin(); CIRCLE_finalize(); + /* compute hardlinks temporary list global summary */ + mfu_flist_summarize(HARDLINKS_TMP_LIST); + + /* resolve hardlinks and merge them in flist */ + walk_resolve_hardlinks(flist, HARDLINKS_TMP_LIST, HARDLINKS_INODES_MAP); + /* compute global summary */ mfu_flist_summarize(bflist); diff --git a/src/common/mfu_util.c b/src/common/mfu_util.c index 5b374cfc1..7d58a3583 100644 --- a/src/common/mfu_util.c +++ b/src/common/mfu_util.c @@ -577,6 +577,26 @@ void mfu_unpack_uint64(const char** pptr, uint64_t* value) *pptr += 8; } +void mfu_pack_sized_str(char** pptr, char* value, uint64_t chars) +{ + char* ptr = *pptr; + if (value == NULL) + *ptr = (char) 0; + else + strncpy(ptr, value, (size_t)chars); + *pptr += chars; +} + +void mfu_unpack_sized_str(const char** pptr, char** value, uint64_t chars) +{ + const char* ptr = *pptr; + if(*ptr == 0) + *value = NULL; + else + *value = MFU_STRDUP(ptr); + *pptr += chars; +} + /* Bob Jenkins one-at-a-time hash: http://en.wikipedia.org/wiki/Jenkins_hash_function */ uint32_t mfu_hash_jenkins(const char* key, size_t len) { diff --git a/src/common/mfu_util.h b/src/common/mfu_util.h index f8e257208..1b6a8da4f 100644 --- a/src/common/mfu_util.h +++ b/src/common/mfu_util.h @@ -236,6 +236,14 @@ void mfu_pack_uint64(char** pptr, uint64_t value); * host order and advance pointer */ void mfu_unpack_uint64(const char** pptr, uint64_t* value); +/* given address of pointer to buffer, pack value into buffer in + * network order and advance pointer */ + void mfu_pack_sized_str(char** pptr, char* value, uint64_t chars); + + /* given address of pointer to buffer, unpack value into buffer in + * host order and advance pointer */ + void mfu_unpack_sized_str(const char** pptr, char** value, uint64_t chars); + /* Bob Jenkins one-at-a-time hash: http://en.wikipedia.org/wiki/Jenkins_hash_function */ uint32_t mfu_hash_jenkins(const char* key, size_t len); From 130bd7cf84a8cab354dccce5633b5c9f9581328b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Mon, 20 Jan 2025 15:12:06 +0100 Subject: [PATCH 2/9] dcp: support hardlinks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for hardlinks in dcp. This function renames existing functions mfu_create_hardlink[s]() to mfu_create_hardlink[s]_dest() to reflect their purpose related to --link-dest option. Two new functions mfu_create_hardlink[s]() are introduced to create all hardlinks in destination directory with the appropriate link destination. The summary at the end of copy is modified to mention hardlinks operations. Signed-off-by: Rémi Palancher --- src/common/mfu_flist_copy.c | 242 +++++++++++++++++++++++++++++++----- 1 file changed, 214 insertions(+), 28 deletions(-) diff --git a/src/common/mfu_flist_copy.c b/src/common/mfu_flist_copy.c index 1e724feac..8f7ffa543 100644 --- a/src/common/mfu_flist_copy.c +++ b/src/common/mfu_flist_copy.c @@ -80,6 +80,7 @@ typedef struct { int64_t total_dirs; /* sum of all directories */ int64_t total_files; /* sum of all files */ int64_t total_links; /* sum of all symlinks */ + int64_t total_hardlinks; /* sum of all hardlinks */ int64_t total_size; /* sum of all file sizes */ int64_t total_bytes_copied; /* total bytes written */ time_t time_started; /* time when dcp command started */ @@ -734,6 +735,13 @@ static int mfu_copy_set_metadata( for (idx = 0; idx < size; idx++) { /* TODO: skip file if it's not readable */ + mfu_filetype type = mfu_flist_file_get_type(list, idx); + /* skip hardlinks as metadata changes on reference paths also affect + * hardlinks */ + if (type == MFU_TYPE_HARDLINK) { + continue; + } + /* get source name of item */ const char* name = mfu_flist_file_get_name(list, idx); @@ -1363,7 +1371,7 @@ static int mfu_create_file( /* creates hardlink in destpath for specified file, identifies source path * returns 0 on success and -1 on error */ -static int mfu_create_hardlink( +static int mfu_create_hardlink_dest( mfu_flist list, uint64_t idx, const mfu_param_path* srcpath, @@ -1543,7 +1551,7 @@ static int mfu_create_files( /* creates hardlinks, * returns 0 on success and -1 on error */ -static int mfu_create_hardlinks( +static int mfu_create_hardlinks_dest( int levels, int minlevel, mfu_flist* lists, @@ -1612,9 +1620,9 @@ static int mfu_create_hardlinks( continue; } - int tmp_rc = mfu_create_hardlink(list, idx, srcpath, - destpath, copy_opts, - mfu_src_file, mfu_dst_file); + int tmp_rc = mfu_create_hardlink_dest(list, idx, srcpath, + destpath, copy_opts, + mfu_src_file, mfu_dst_file); if (tmp_rc != 0) { rc = -1; } @@ -1635,6 +1643,171 @@ static int mfu_create_hardlinks( return rc; } +/* tracks number of hardlinks created by this process */ +static uint64_t hardlinks_total_count; + +/* progress message to print while creating hardlinks */ +static void create_hardlinks_progress_fn(const uint64_t* vals, int count, int complete, int ranks, double secs) +{ + /* get number of items created so far */ + uint64_t items = vals[0]; + + /* compute item rate */ + double item_rate = 0.0; + if (secs > 0) { + item_rate = (double)items / secs; + } + + /* compute percentage of items created */ + double percent = 0.0; + if (hardlinks_total_count > 0) { + percent = (double)items * 100.0 / (double)hardlinks_total_count; + } + + /* estimate seconds remaining */ + double secs_remaining = -1.0; + if (item_rate > 0.0) { + secs_remaining = (double)(hardlinks_total_count - items) / item_rate; + } + + if (complete < ranks) { + MFU_LOG(MFU_LOG_INFO, "Created %llu items (%.0f%%) in %.3lf secs (%.3lf items/sec) %.0f secs left ...", + items, percent, secs, item_rate, secs_remaining); + } else { + MFU_LOG(MFU_LOG_INFO, "Created %llu items (%.0f%%) in %.3lf secs (%.3lf items/sec) done", + items, percent, secs, item_rate); + } +} + +/* creates hardlink in destpath for specified file, identifies source path + * returns 0 on success and -1 on error */ +static int mfu_create_hardlink( + mfu_flist list, + uint64_t idx, + int numpaths, + const mfu_param_path* paths, + const mfu_param_path* destpath, + mfu_copy_opts_t* copy_opts, + mfu_file_t* mfu_src_file, + mfu_file_t* mfu_dst_file) +{ + /* assume we'll succeed */ + int rc = 0; + + const char* name = mfu_flist_file_get_name(list, idx); + const char* ref = mfu_flist_file_get_ref(list, idx); + + /* get reference name */ + const char* src_path = mfu_param_path_copy_dest(ref, numpaths, + paths, destpath, copy_opts, mfu_src_file, mfu_dst_file); + + /* get destination name */ + const char* dest_path = mfu_param_path_copy_dest(name, numpaths, + paths, destpath, copy_opts, mfu_src_file, mfu_dst_file); + + /* No need to copy it */ + if (dest_path == NULL) { + return 0; + } + + rc = mfu_hardlink(src_path, dest_path); + if (rc != 0) { + MFU_LOG(MFU_LOG_ERR, "Failed to create hardlink %s --> %s", + dest_path, src_path); + mfu_free(&src_path); + mfu_free(&dest_path); + return rc; + } + + /* free source path */ + mfu_free(&src_path); + + /* free destination path */ + mfu_free(&dest_path); + + /* increment our hardlinks count by one */ + mfu_copy_stats.total_hardlinks++; + + return rc; +} + +/* creates hardlinks, + * returns 0 on success and -1 on error */ +static int mfu_create_hardlinks( + mfu_flist list, + int numpaths, + const mfu_param_path* paths, + const mfu_param_path* destpath, + mfu_copy_opts_t* copy_opts, + mfu_file_t* mfu_src_file, + mfu_file_t* mfu_dst_file) +{ + int rc = 0; + flist_t* flist = (flist_t*)list; + + /* get current rank */ + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + /* iterate over items and create hardlink for each */ + uint64_t idx; + uint64_t size = mfu_flist_size(list); + uint64_t hardlinks_local_count = 0; + + /* get type of item */ + for (idx = 0; idx < size; idx++) { + mfu_filetype type = mfu_flist_file_get_type(list, idx); + if (type == MFU_TYPE_HARDLINK) { + hardlinks_local_count++; + } + } + + /* get total for print percent progress while creating */ + hardlinks_total_count = 0; + MPI_Allreduce(&hardlinks_local_count, &hardlinks_total_count, 1, + MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + + /* bail early if there is no work to do */ + if (hardlinks_total_count == 0) { + return rc; + } + + /* indicate to user what phase we're in */ + if (rank == 0) { + MFU_LOG(MFU_LOG_INFO, "Linking %llu files.", hardlinks_total_count); + } + + /* start progress messages for creating files */ + mfu_progress* create_prog = mfu_progress_start(mfu_progress_timeout, 1, + MPI_COMM_WORLD, create_hardlinks_progress_fn); + + uint64_t total_count = 0; + + for (idx = 0; idx < size; idx++) { + mfu_filetype type = mfu_flist_file_get_type(list, idx); + if (type == MFU_TYPE_HARDLINK) { + int tmp_rc = mfu_create_hardlink(list, idx, numpaths, paths, + destpath, copy_opts, + mfu_src_file, mfu_dst_file); + if (tmp_rc != 0) { + rc = -1; + } + + /* update number of files we have created for progress messages */ + total_count++; + mfu_progress_update(&total_count, create_prog); + } + } + + /* wait for all procs to finish */ + MPI_Barrier(MPI_COMM_WORLD); + + /* finalize progress messages */ + mfu_progress_complete(&total_count, &create_prog); + + return rc; +} + /* hold state for copy progress messages */ static mfu_progress* copy_prog; @@ -2472,6 +2645,7 @@ int mfu_flist_copy( mfu_copy_stats.total_dirs = 0; mfu_copy_stats.total_files = 0; mfu_copy_stats.total_links = 0; + mfu_copy_stats.total_hardlinks = 0; mfu_copy_stats.total_size = 0; mfu_copy_stats.total_bytes_copied = 0; @@ -2550,7 +2724,7 @@ int mfu_flist_copy( mfu_flist* lists2; mfu_flist_array_by_depth(spreadlist, &levels2, &minlevel2, &lists2); - /* create files and links */ + /* create files and symlinks */ tmp_rc = mfu_create_files(levels2, minlevel2, lists2, numpaths, paths, destpath, copy_opts, mfu_src_file, mfu_dst_file); if (tmp_rc < 0) { @@ -2593,23 +2767,25 @@ int mfu_flist_copy( double rel_time = mfu_copy_stats.wtime_ended - mfu_copy_stats.wtime_started; /* prep our values into buffer */ - int64_t values[5]; + int64_t values[6]; values[0] = mfu_copy_stats.total_dirs; values[1] = mfu_copy_stats.total_files; values[2] = mfu_copy_stats.total_links; - values[3] = mfu_copy_stats.total_size; - values[4] = mfu_copy_stats.total_bytes_copied; + values[3] = mfu_copy_stats.total_hardlinks; + values[4] = mfu_copy_stats.total_size; + values[5] = mfu_copy_stats.total_bytes_copied; /* sum values across processes */ - int64_t sums[5]; - MPI_Allreduce(values, sums, 5, MPI_INT64_T, MPI_SUM, MPI_COMM_WORLD); + int64_t sums[6]; + MPI_Allreduce(values, sums, 6, MPI_INT64_T, MPI_SUM, MPI_COMM_WORLD); /* extract results from allreduce */ - int64_t agg_dirs = sums[0]; - int64_t agg_files = sums[1]; - int64_t agg_links = sums[2]; - int64_t agg_size = sums[3]; - int64_t agg_copied = sums[4]; + int64_t agg_dirs = sums[0]; + int64_t agg_files = sums[1]; + int64_t agg_links = sums[2]; + int64_t agg_hardlinks = sums[3]; + int64_t agg_size = sums[4]; + int64_t agg_copied = sums[5]; /* compute rate of copy */ double agg_rate = (double)agg_copied / rel_time; @@ -2690,6 +2866,13 @@ int mfu_flist_copy( mfu_sync_all("Syncing directory updates to disk."); } + /* create hardlinks */ + tmp_rc = mfu_create_hardlinks(src_cp_list, numpaths, paths, destpath, + copy_opts, mfu_src_file, mfu_dst_file); + if (tmp_rc < 0) { + rc = -1; + } + /* free our lists of levels */ mfu_flist_array_free(levels, &lists); @@ -2706,23 +2889,25 @@ int mfu_flist_copy( mfu_copy_stats.wtime_started; /* prep our values into buffer */ - int64_t values[5]; + int64_t values[6]; values[0] = mfu_copy_stats.total_dirs; values[1] = mfu_copy_stats.total_files; values[2] = mfu_copy_stats.total_links; - values[3] = mfu_copy_stats.total_size; - values[4] = mfu_copy_stats.total_bytes_copied; + values[3] = mfu_copy_stats.total_hardlinks; + values[4] = mfu_copy_stats.total_size; + values[5] = mfu_copy_stats.total_bytes_copied; /* sum values across processes */ - int64_t sums[5]; - MPI_Allreduce(values, sums, 5, MPI_INT64_T, MPI_SUM, MPI_COMM_WORLD); + int64_t sums[6]; + MPI_Allreduce(values, sums, 6, MPI_INT64_T, MPI_SUM, MPI_COMM_WORLD); /* extract results from allreduce */ - int64_t agg_dirs = sums[0]; - int64_t agg_files = sums[1]; - int64_t agg_links = sums[2]; - int64_t agg_size = sums[3]; - int64_t agg_copied = sums[4]; + int64_t agg_dirs = sums[0]; + int64_t agg_files = sums[1]; + int64_t agg_links = sums[2]; + int64_t agg_hardlinks = sums[3]; + int64_t agg_size = sums[4]; + int64_t agg_copied = sums[5]; /* compute rate of copy */ double agg_rate = (double)agg_copied / rel_time; @@ -2742,7 +2927,7 @@ int mfu_flist_copy( strftime(endtime_str, 256, "%b-%d-%Y,%H:%M:%S", localend); /* total number of items */ - int64_t agg_items = agg_dirs + agg_files + agg_links; + int64_t agg_items = agg_dirs + agg_files + agg_links + agg_hardlinks; /* convert size to units */ double agg_size_tmp; @@ -2761,6 +2946,7 @@ int mfu_flist_copy( MFU_LOG(MFU_LOG_INFO, " Directories: %" PRId64, agg_dirs); MFU_LOG(MFU_LOG_INFO, " Files: %" PRId64, agg_files); MFU_LOG(MFU_LOG_INFO, " Links: %" PRId64, agg_links); + MFU_LOG(MFU_LOG_INFO, " Hardlinks: %" PRId64, agg_hardlinks); MFU_LOG(MFU_LOG_INFO, "Data: %.3lf %s (%" PRId64 " bytes)", agg_size_tmp, agg_size_units, agg_size); @@ -3154,7 +3340,7 @@ int mfu_flist_hardlink( * under any directories that were created). We can imrove this if someone * has better idea for it. */ /* create hard links */ - tmp_rc = mfu_create_hardlinks(levels, minlevel, lists, + tmp_rc = mfu_create_hardlinks_dest(levels, minlevel, lists, srcpath, destpath, copy_opts, mfu_src_file, mfu_dst_file); if (tmp_rc < 0) { rc = -1; From 775f65a660b1a64567a441be49fb8236cc7561c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Fri, 24 Jan 2025 10:27:12 +0100 Subject: [PATCH 3/9] dcmp: support hardlinks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support of hardlinks in dcmp. The reference paths of hardlinks in source and destination are compared. If not equal, strmap is updated to flag them as different. The branchs in items comparison logic is now based on filetype recorded in flist rather than the file mode as there is no way to distinguish reference paths and hardlinks with just the mode, both are regular files. Signed-off-by: Rémi Palancher --- src/dcmp/dcmp.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/dcmp/dcmp.c b/src/dcmp/dcmp.c index 082e849ca..3e397af0e 100644 --- a/src/dcmp/dcmp.c +++ b/src/dcmp/dcmp.c @@ -1040,11 +1040,9 @@ static int dcmp_strmap_compare( dcmp_strmap_item_update(src_map, key, DCMPF_EXIST, DCMPS_COMMON); dcmp_strmap_item_update(dst_map, key, DCMPF_EXIST, DCMPS_COMMON); - /* get modes of files */ - mode_t src_mode = (mode_t) mfu_flist_file_get_mode(src_list, - src_index); - mode_t dst_mode = (mode_t) mfu_flist_file_get_mode(dst_list, - dst_index); + /* get types of files */ + mfu_filetype src_type = mfu_flist_file_get_type(src_list, src_index); + mfu_filetype dst_type = mfu_flist_file_get_type(dst_list, dst_index); tmp_rc = dcmp_compare_metadata(src_list, src_map, src_index, dst_list, dst_map, dst_index, @@ -1061,7 +1059,7 @@ static int dcmp_strmap_compare( } /* check whether files are of the same type */ - if ((src_mode & S_IFMT) != (dst_mode & S_IFMT)) { + if (src_type != dst_type) { /* file type is different, no need to go any futher */ dcmp_strmap_item_update(src_map, key, DCMPF_TYPE, DCMPS_DIFFER); dcmp_strmap_item_update(dst_map, key, DCMPF_TYPE, DCMPS_DIFFER); @@ -1084,16 +1082,17 @@ static int dcmp_strmap_compare( continue; } - /* for now, we can only compare content of regular files and symlinks */ - if (! S_ISREG(dst_mode) && ! S_ISLNK(dst_mode)) { - /* not regular file or symlink, take them as common content */ + /* for now, we can only compare content of regular files, symlinks and + * hardlinks targets */ + if (dst_type != MFU_TYPE_FILE && dst_type != MFU_TYPE_LINK && dst_type != MFU_TYPE_HARDLINK) { + /* not regular file, take them as common content */ dcmp_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_COMMON); dcmp_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_COMMON); continue; } /* For symlinks, compare targets */ - if (S_ISLNK(dst_mode)) { + if (dst_type == MFU_TYPE_LINK) { const char* src_name = mfu_flist_file_get_name(src_list, src_index); const char* dst_name = mfu_flist_file_get_name(dst_list, dst_index); int compare_rc = mfu_compare_symlinks(src_name, dst_name, mfu_src_file, mfu_dst_file); @@ -1116,6 +1115,22 @@ static int dcmp_strmap_compare( continue; } + /* compare hardlink references */ + if (dst_type == MFU_TYPE_HARDLINK) { + const char* src_ref = mfu_flist_file_get_ref(src_list, src_index) + strlen_prefix; + const char* dst_ref = mfu_flist_file_get_ref(dst_list, dst_index) + strlen(dest_path->path); + if(!strcmp(src_ref, dst_ref)) { + /* update to say contents of the hardlinks were found to be the same */ + dcmp_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_COMMON); + dcmp_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_COMMON); + } else { + /* update to say contents of the hardlinks were found to be different */ + dcmp_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_DIFFER); + dcmp_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_DIFFER); + } + continue; + } + dcmp_state state; tmp_rc = dcmp_strmap_item_state(src_map, key, DCMPF_SIZE, &state); assert(tmp_rc == 0); From 84d37ef074c831e1c330474b64930a3c82535ef3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Wed, 29 Jan 2025 17:14:03 +0100 Subject: [PATCH 4/9] dsync: support hardlinks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support of hardlinks in dsync. The reference paths of hardlinks in source and destination are compared. If not equal, strmap is updated to flag them as different. The branchs in items comparison logic is now based on filetype recorded in flist rather than the file mode as there is no way to distinguish reference paths and hardlinks with just the mode, both are regular files. Additional logic is added with dsync_remove_hardlinks_with_removed_ref() function to detect hardlinks whose references paths are marked for deletion in destination. In this case, all the hardlinks pointing to this reference are also marked for being replaced to avoid residual links pointing to wrong inodes. Signed-off-by: Rémi Palancher --- src/common/mfu_flist_copy.c | 2 +- src/dsync/dsync.c | 203 ++++++++++++++++++++++++++++++++++-- 2 files changed, 195 insertions(+), 10 deletions(-) diff --git a/src/common/mfu_flist_copy.c b/src/common/mfu_flist_copy.c index 8f7ffa543..1e060e895 100644 --- a/src/common/mfu_flist_copy.c +++ b/src/common/mfu_flist_copy.c @@ -1613,7 +1613,7 @@ static int mfu_create_hardlinks_dest( for (idx = 0; idx < size; idx++) { /* get type of item */ mfu_filetype type = mfu_flist_file_get_type(list, idx); - if (type != MFU_TYPE_FILE) { + if (type != MFU_TYPE_FILE && type != MFU_TYPE_HARDLINK) { MFU_LOG(MFU_LOG_ERR, "Can't create link for unregular files."); rc = -1; total_count++; diff --git a/src/dsync/dsync.c b/src/dsync/dsync.c index 2b4276f4a..9554772ba 100644 --- a/src/dsync/dsync.c +++ b/src/dsync/dsync.c @@ -1581,6 +1581,166 @@ static int dsync_strmap_compare_link_dest( return rc; } +/* For all local references files (ie. regular files with nlink > 1), flag all + * hardlinks using these files as references as having different content and + * place them in destination removal list and source copy list. Return -1 on + * error on any task. */ +static int dsync_remove_hardlinks_with_removed_refs( + mfu_flist src_list, + mfu_flist src_cp_list, + strmap* src_map, + mfu_flist dst_list, + mfu_flist dst_remove_list, + strmap* dst_map, + mfu_file_t* mfu_src_file, + mfu_file_t* mfu_dst_file +) { + /* assume we'll succeed */ + int rc = 0; + int tmp_rc; + + uint64_t chars = mfu_flist_file_max_name(dst_remove_list); + + /* bail out if there is nothing removed in destination */ + if (!chars) { + return 0; + } + + int ranks; + MPI_Comm_size(MPI_COMM_WORLD, &ranks); + + /* Count all local references selected for removal. */ + int local_removed_refs = 0; + uint64_t remove_count = mfu_flist_size(dst_remove_list); + for(uint64_t idx=0; idx 1) { + local_removed_refs++; + } + } + + /* get number of references removed by all tasks */ + int* recvcounts = (int*) MFU_MALLOC(ranks * sizeof(int)); + MPI_Allgather(&local_removed_refs, 1, MPI_INT, + recvcounts, 1, MPI_INT, MPI_COMM_WORLD); + + MPI_Aint char_lb, char_extent; + MPI_Type_get_extent(MPI_CHAR, &char_lb, &char_extent); + + /* compute displacements and total number of bytes that we'll receive */ + size_t allbytes = 0; + int disp = 0; + int* recvdispls = (int*) MFU_MALLOC(ranks * sizeof(int)); + + for (int i = 0; i < (int) ranks; i++) { + /* adjust values in recvcounts for MPI_Allgatherv() */ + recvcounts[i] *= chars; + recvdispls[i] = disp; + disp += (int) recvcounts[i]; + allbytes += (size_t) recvcounts[i]; + } + + /* allocate memory for recv buffers */ + char* recvbuf = MFU_MALLOC(allbytes); + void* sendbuf = NULL; + + /* fill sendbuf with names of local references that will be removed */ + if (local_removed_refs) { + sendbuf = MFU_MALLOC((size_t)char_extent * chars * local_removed_refs); + char* sendptr = (char*) sendbuf; + for(int idx=0; idx 1) { + strncpy(sendptr, name, chars); + sendptr += char_extent * chars; + } + } + } + + MPI_Allgatherv(sendbuf, local_removed_refs * chars, MPI_CHAR, + recvbuf, recvcounts, recvdispls, MPI_CHAR, MPI_COMM_WORLD); + + /* iterate of all reference names received */ + uint64_t count = mfu_flist_size(dst_list); + char* recvptr = (char*) recvbuf; + for (int i = 0; i < (int) ranks; i++) { + for (int j = 0; jpath); + + if(strcmp(src_ref, dst_ref)) { + /* take them as differ content */ + dsync_strmap_item_update(src_map, key, DCMPF_CONTENT, DCMPS_DIFFER); + dsync_strmap_item_update(dst_map, key, DCMPF_CONTENT, DCMPS_DIFFER); + + if (!options.dry_run) { + mfu_flist_file_copy(src_list, src_index, src_cp_list); + mfu_flist_file_copy(dst_list, dst_index, dst_remove_list); + } + } + continue; + } + /* first check whether file sizes match */ dsync_state state; tmp_rc = dsync_strmap_item_state(src_map, key, DCMPF_SIZE, &state); @@ -1874,6 +2049,16 @@ static int dsync_strmap_compare( } } + mfu_flist_summarize(dst_remove_list); + + /* For all references (ie. regular files with nlink > 1) in dst_remove_list, + * select all hardlinks pointing to this reference for removal as well. */ + tmp_rc = dsync_remove_hardlinks_with_removed_refs(src_list, src_cp_list, + src_map, dst_list, dst_remove_list, dst_map, mfu_src_file, mfu_dst_file); + if (tmp_rc < 0) { + rc = -1; + } + /* wait for all procs to finish before stopping timer */ MPI_Barrier(MPI_COMM_WORLD); From 433f1c849c5c21e1ae647b9cd2d6afecd457d676 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Thu, 6 Mar 2025 11:37:06 +0100 Subject: [PATCH 5/9] dtar: support hardlinks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for hardlinks in dtar, in all supported create and extract algorithms. New structure entry_list_t is introduced, it is used in some extract algorithms to fill a temporary a list of hardlinks entries to create in a second pass, after all other files are created. Signed-off-by: Rémi Palancher --- src/common/mfu_flist_archive.c | 281 +++++++++++++++++++++++++++------ 1 file changed, 231 insertions(+), 50 deletions(-) diff --git a/src/common/mfu_flist_archive.c b/src/common/mfu_flist_archive.c index f5ba86143..dfbea893a 100644 --- a/src/common/mfu_flist_archive.c +++ b/src/common/mfu_flist_archive.c @@ -70,6 +70,12 @@ typedef struct { size_t io_bufsize; /* size of memory i/o buffer in bytes */ } DTAR_writer_t; +/* linked list of archive entries */ +typedef struct entry_list { + struct archive_entry* entry; + struct entry_list* next; +} entry_list_t; + DTAR_writer_t DTAR_writer; /* state of open archive file and I/O buffer */ mfu_flist DTAR_flist; /* source flist of set of items being copied into archive */ @@ -93,6 +99,22 @@ static void DTAR_exit(int code) exit(code); } +/* append a copy of entry in entries list and move current */ +static void entries_list_add(entry_list_t** entries, entry_list_t** current, struct archive_entry* entry) { + entry_list_t* new_entry = (entry_list_t*) malloc(sizeof(entry_list_t)); + new_entry->entry = archive_entry_clone(entry); + new_entry->next = NULL; + if(!*entries) { + *entries = new_entry; + } + if(!*current) { + *current = new_entry; + } else { + (*current)->next = new_entry; + *current = (*current)->next; + } +} + /**************************************** * Cache opened files to avoid repeated open/close of * the same file when using libcicle @@ -436,6 +458,15 @@ static int encode_header( rc = MFU_FAILURE; } + /* hardlink not managed by archive_read_disk_entry_from_file(), call + * archive_entry_set_hardlink() on the entry afterwhile. */ + + if (type == MFU_TYPE_HARDLINK) { + const char* target = mfu_flist_file_get_ref(flist, idx); + const char* reltarget = mfu_param_path_relative(target, cwdpath); + archive_entry_set_hardlink(entry, reltarget); + } + /* we can free the archive now that we have the entry */ archive_read_free(source); @@ -487,6 +518,10 @@ static int encode_header( ); rc = MFU_FAILURE; } + } else if (type == MFU_TYPE_HARDLINK) { + const char* target = mfu_flist_file_get_ref(flist, idx); + const char* reltarget = mfu_param_path_relative(target, cwdpath); + archive_entry_copy_hardlink(entry, reltarget); } } @@ -2146,7 +2181,7 @@ static int compute_entry_sizes( /* identify item type to compute its size in the archive */ mfu_filetype type = mfu_flist_file_get_type(flist, idx); - if (type == MFU_TYPE_DIR || type == MFU_TYPE_LINK) { + if (type == MFU_TYPE_DIR || type == MFU_TYPE_LINK || type == MFU_TYPE_HARDLINK) { /* directories and symlinks only need the header */ uint64_t header_size; encode_header(flist, idx, cwdpath, @@ -2651,9 +2686,11 @@ int mfu_flist_archive_create( /* write headers for our files */ for (idx = 0; idx < listsize; idx++) { - /* we currently only support regular files, directories, and symlinks */ + /* we currently only support regular files, directories, symlinks and + * hardlinks. */ mfu_filetype type = mfu_flist_file_get_type(flist, idx); - if (type == MFU_TYPE_FILE || type == MFU_TYPE_DIR || type == MFU_TYPE_LINK) { + if (type == MFU_TYPE_FILE || type == MFU_TYPE_DIR || + type == MFU_TYPE_LINK || type == MFU_TYPE_HARDLINK) { /* write header for this item to the archive, * this sets DTAR_err on any error */ write_header(flist, idx, cwdpath, @@ -3837,9 +3874,20 @@ static void insert_entry_into_flist( mfu_flist_file_set_name(flist, idx, fullpath); mfu_free(&fullpath); - /* get mode of entry, and deduce mfu type */ + /* get mode of entry */ mode_t mode = archive_entry_mode(entry); - mfu_filetype type = mfu_flist_mode_to_filetype(mode); + mfu_filetype type = MFU_TYPE_UNKNOWN; + + /* If hardlink target is defined, set type accordingly and reference. Else + * deduce type from mode. */ + const char* target = archive_entry_hardlink(entry); + if (target != NULL) { + type = MFU_TYPE_HARDLINK; + mfu_flist_file_set_ref(flist, idx, target); + } else { + type = mfu_flist_mode_to_filetype(mode); + } + mfu_flist_file_set_type(flist, idx, type); mfu_flist_file_set_mode(flist, idx, mode); @@ -4213,6 +4261,57 @@ static void extract1_progress_fn(const uint64_t* vals, int count, int complete, } } +/* extract list of entries from archive, update progress and free the list */ +static int extract_archive_list_entries(struct archive* ext, entry_list_t** entries) { + + /* assume we'll succeed */ + int rc = MFU_SUCCESS; + entry_list_t *current_entry = NULL, *previous_entry = NULL; + + current_entry = *entries; + while(current_entry && rc == MFU_SUCCESS) { + /* create entry on disk */ + int r = archive_write_header(ext, current_entry->entry); + if (r != ARCHIVE_OK) { + MFU_LOG(MFU_LOG_ERR, "writing entry %s", + archive_error_string(ext) + ); + rc = MFU_FAILURE; + break; + } + + /* set any properties on the item that need to be set at end, + * e.g., turn off write bit on a file we just wrote or set timestamps */ + r = archive_write_finish_entry(ext); + if (r != ARCHIVE_OK) { + MFU_LOG(MFU_LOG_ERR, "finish writing entry %s", + archive_error_string(ext) + ); + rc = MFU_FAILURE; + break; + } + + /* increment our count of items extracted */ + reduce_buf[REDUCE_ITEMS]++; + + /* update number of items we have completed for progress messages */ + mfu_progress_update(reduce_buf, extract_prog); + + /* jump to next entry and free current entry */ + previous_entry = current_entry; + current_entry = current_entry->next; + archive_entry_free(previous_entry->entry); + mfu_free(&previous_entry); + previous_entry = NULL; + } + + /* Free entries list */ + *entries = NULL; + + return rc; +} + + /* compute total bytes in regular files in flist */ static uint64_t flist_sum_bytes(mfu_flist flist) { @@ -4302,6 +4401,7 @@ static int extract_files_offsets_libarchive( } /* iterate over and extract each item we're responsible for */ + entry_list_t* hardlink_entries = NULL, *current_hardlink_entry = NULL; uint64_t count = 0; while (count < entry_count && rc == MFU_SUCCESS) { /* seek to start of the entry in the archive file */ @@ -4362,6 +4462,15 @@ static int extract_files_offsets_libarchive( break; } + /* if hardlink entry, add a copy of this entry in hardlink_entries list + * for later processing */ + if (archive_entry_hardlink(entry) != NULL) { + entries_list_add(&hardlink_entries, ¤t_hardlink_entry, entry); + /* advance to our next entry */ + count++; + continue; + } + /* got an entry, create corresponding item on disk and * then copy data */ r = archive_write_header(ext, entry); @@ -4426,6 +4535,16 @@ static int extract_files_offsets_libarchive( count++; } + /* wait for all tasks to write regular files */ + MPI_Barrier(MPI_COMM_WORLD); + + /* extract pending hardlinks entries */ + r = extract_archive_list_entries(ext, &hardlink_entries); + if (r != MFU_SUCCESS) { + MFU_LOG(MFU_LOG_ERR, "Failed to extract hardlink entries for archive"); + rc = MFU_FAILURE; + } + /* close out our write archive, this may update timestamps and permissions on items */ r = archive_write_close(ext); if (r != ARCHIVE_OK) { @@ -4830,8 +4949,7 @@ static int extract_files( int ranks; MPI_Comm_size(MPI_COMM_WORLD, &ranks); - /* iterate over all entry from the start of the file, - * looking to find the range of items it is responsible for */ + entry_list_t* hardlink_entries = NULL, *current_hardlink_entry = NULL; uint64_t count = 0; while (rc == MFU_SUCCESS) { /* read the next entry from the archive */ @@ -4850,6 +4968,16 @@ static int extract_files( /* write item out to disk if this is one of our assigned items */ if (count % ranks == mfu_rank) { + + /* if hardlink entry, add a copy of this entry in hardlink_entries list + * for later processing */ + if (archive_entry_hardlink(entry) != NULL) { + entries_list_add(&hardlink_entries, ¤t_hardlink_entry, entry); + /* advance to next entry in the archive */ + count++; + continue; + } + /* create item on disk */ r = archive_write_header(ext, entry); if (r != ARCHIVE_OK) { @@ -4889,6 +5017,16 @@ static int extract_files( count++; } + /* wait for all tasks to write regular files */ + MPI_Barrier(MPI_COMM_WORLD); + + /* extract pending hardlinks entries */ + r = extract_archive_list_entries(ext, &hardlink_entries); + if (r != MFU_SUCCESS) { + MFU_LOG(MFU_LOG_ERR, "Failed to extract hardlink entries for archive"); + rc = MFU_FAILURE; + } + /* free off our write archive, this may update timestamps and permissions on items */ r = archive_write_close(ext); if (r != ARCHIVE_OK) { @@ -4947,8 +5085,8 @@ static int extract_files( } /* iterate through our portion of the given file list, - * identify symlinks and extract them from archive */ -static int extract_symlinks( + * identify symlinks and hardlinks and extract them from archive */ +static int extract_links( const char* filename, /* name of archive file */ mfu_flist flist, /* file list of items */ uint64_t* offsets, /* offset of each item in the archive */ @@ -4956,31 +5094,37 @@ static int extract_symlinks( { int rc = MFU_SUCCESS; - /* iterate over all items in our list and count symlinks */ - uint64_t count = 0; + /* iterate over all items in our list, count symlinks and hardlinks */ + uint64_t count_symlinks = 0, count_hardlinks = 0; uint64_t idx; uint64_t size = mfu_flist_size(flist); for (idx = 0; idx < size; idx++) { mfu_filetype type = mfu_flist_file_get_type(flist, idx); if (type == MFU_TYPE_LINK) { /* found a symlink */ - count++; + count_symlinks++; + } + if (type == MFU_TYPE_HARDLINK) { + /* found a hardlink */ + count_hardlinks++; } } /* count total number of links */ - uint64_t all_count; - MPI_Allreduce(&count, &all_count, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + uint64_t all_count_symlinks, all_count_hardlinks; + MPI_Allreduce(&count_symlinks, &all_count_symlinks, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(&count_hardlinks, &all_count_hardlinks, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); /* bail out early if there is nothing to do */ - if (all_count == 0) { + if (all_count_symlinks + all_count_hardlinks == 0) { return MFU_SUCCESS; } /* let user know what we're doing */ MPI_Barrier(MPI_COMM_WORLD); if (mfu_rank == 0) { - MFU_LOG(MFU_LOG_INFO, "Creating %llu symlinks", (unsigned long long)all_count); + MFU_LOG(MFU_LOG_INFO, "Creating %llu symlinks", (unsigned long long)all_count_symlinks); + MFU_LOG(MFU_LOG_INFO, "Creating %llu hardlinks", (unsigned long long)all_count_hardlinks); } /* open the archive file for reading */ @@ -5008,10 +5152,14 @@ static int extract_symlinks( for (idx = 0; idx < size; idx++) { /* skip entries that are not symlinks */ mfu_filetype type = mfu_flist_file_get_type(flist, idx); - if (type != MFU_TYPE_LINK) { - /* not a symlink, go to next item */ + char* type_s; + if (type == MFU_TYPE_LINK) { + type_s = "symlink"; + } else if (type == MFU_TYPE_HARDLINK) { + type_s = "hardlink"; + } else + /* not a symlink or hardlink, go to next item */ continue; - } /* got a symlink, get its path */ const char* name = mfu_flist_file_get_name(flist, idx); @@ -5040,8 +5188,8 @@ static int extract_symlinks( /* use a small read block size, since we just need the header */ int r = archive_read_open_fd(a, fd, 10240); if (r != ARCHIVE_OK) { - MFU_LOG(MFU_LOG_ERR, "opening archive to extract symlink `%s' at offset %llu %s", - name, offset, archive_error_string(a) + MFU_LOG(MFU_LOG_ERR, "opening archive to extract %s `%s' at offset %llu %s", + type_s, name, offset, archive_error_string(a) ); archive_read_free(a); rc = MFU_FAILURE; @@ -5052,8 +5200,8 @@ static int extract_symlinks( struct archive_entry* entry; r = archive_read_next_header(a, &entry); if (r == ARCHIVE_EOF) { - MFU_LOG(MFU_LOG_ERR, "Unexpected end of archive while extracting symlink `%s' at offset %llu", - name, offset + MFU_LOG(MFU_LOG_ERR, "Unexpected end of archive while extracting %s `%s' at offset %llu", + type_s, name, offset ); archive_read_close(a); archive_read_free(a); @@ -5061,8 +5209,8 @@ static int extract_symlinks( continue; } if (r != ARCHIVE_OK) { - MFU_LOG(MFU_LOG_ERR, "Extracting symlink '%s' at offset %llu %s", - name, offset, archive_error_string(a) + MFU_LOG(MFU_LOG_ERR, "Extracting %s '%s' at offset %llu %s", + type_s, name, offset, archive_error_string(a) ); archive_read_close(a); archive_read_free(a); @@ -5070,33 +5218,66 @@ static int extract_symlinks( continue; } - /* get target of the link */ - const char* target = archive_entry_symlink(entry); - if (target == NULL) { - MFU_LOG(MFU_LOG_ERR, "Item is not a symlink as expected `%s'", - name); - archive_read_close(a); - archive_read_free(a); - rc = MFU_FAILURE; - continue; - } + if (type == MFU_TYPE_LINK) { + /* get target of the symlink */ + const char* target = archive_entry_symlink(entry); + if (target == NULL) { + MFU_LOG(MFU_LOG_ERR, "Item is not a symlink as expected `%s'", + name); + archive_read_close(a); + archive_read_free(a); + rc = MFU_FAILURE; + continue; + } + /* create the symlink on the file system */ + int symlink_rc = mfu_symlink(target, name); + if (symlink_rc != 0) { + /* TODO: check whether user wants overwrite */ + if (errno == EEXIST) { + /* failed because something exists, + * attempt to delete item and try again */ + mfu_unlink(name); + symlink_rc = mfu_symlink(target, name); + } - /* create the link on the file system */ - int symlink_rc = mfu_symlink(target, name); - if (symlink_rc != 0) { - /* TODO: check whether user wants overwrite */ - if (errno == EEXIST) { - /* failed because something exists, - * attempt to delete item and try again */ - mfu_unlink(name); - symlink_rc = mfu_symlink(target, name); + /* if we still failed, give up */ + if (symlink_rc != 0) { + MFU_LOG(MFU_LOG_ERR, "Failed to set symlink `%s' (errno=%d %s)", + name, errno, strerror(errno)); + rc = MFU_FAILURE; + } } + } - /* if we still failed, give up */ - if (symlink_rc != 0) { - MFU_LOG(MFU_LOG_ERR, "Failed to set symlink `%s' (errno=%d %s)", - name, errno, strerror(errno)); + if (type == MFU_TYPE_HARDLINK) { + /* get target of the hardlink */ + const char* target = archive_entry_hardlink(entry); + if (target == NULL) { + MFU_LOG(MFU_LOG_ERR, "Item is not a hardlink as expected `%s'", + name); + archive_read_close(a); + archive_read_free(a); rc = MFU_FAILURE; + continue; + } + + /* create the hardlink on the file system */ + int hardlink_rc = mfu_hardlink(target, name); + if (hardlink_rc != 0) { + /* TODO: check whether user wants overwrite */ + if (errno == EEXIST) { + /* failed because something exists, + * attempt to delete item and try again */ + mfu_unlink(name); + hardlink_rc = mfu_hardlink(target, name); + } + + /* if we still failed, give up */ + if (hardlink_rc != 0) { + MFU_LOG(MFU_LOG_ERR, "Failed to set hardlink `%s' (errno=%d %s)", + name, errno, strerror(errno)); + rc = MFU_FAILURE; + } } } @@ -5594,8 +5775,8 @@ int mfu_flist_archive_extract( * create the files in advance */ mfu_flist_mknod(flist, create_opts); - /* create symlinks */ - int tmp_rc = extract_symlinks(filename, flist, offsets, opts); + /* create symlinks and hardlinks */ + int tmp_rc = extract_links(filename, flist, offsets, opts); if (tmp_rc != MFU_SUCCESS) { /* tried but failed to get some symlink, so mark as failure */ ret = tmp_rc; From 11070dbf203a29b8a2ab23ea10bbc8637292b563 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Tue, 11 Mar 2025 16:05:17 +0100 Subject: [PATCH 6/9] mfu: introduce cache format v5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce cache format v5 which supports hardlinks encoding with nlink and reference paths. New read_cache_v5() is basically similar to read_cache_v4() except the calls to list_elem_pack_size[_le4]() and list_insert_ptr[_le4](). Signed-off-by: Rémi Palancher --- src/common/mfu_flist_io.c | 337 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 322 insertions(+), 15 deletions(-) diff --git a/src/common/mfu_flist_io.c b/src/common/mfu_flist_io.c index 0b0a2e59e..37039bff7 100644 --- a/src/common/mfu_flist_io.c +++ b/src/common/mfu_flist_io.c @@ -226,7 +226,7 @@ static void list_elem_decode(char* buf, elem_t* elem) /* create a datatype to hold file name and stat info */ /* return number of bytes needed to pack element */ -static size_t list_elem_pack_size(int detail, uint64_t chars, const elem_t* elem) +static size_t list_elem_pack_size_le4(int detail, uint64_t chars, const elem_t* elem) { size_t size; if (detail) { @@ -238,6 +238,18 @@ static size_t list_elem_pack_size(int detail, uint64_t chars, const elem_t* elem return size; } +static size_t list_elem_pack_size(int detail, uint64_t chars, const elem_t* elem) +{ + size_t size; + if (detail) { + size = chars + 1 * 4 + 11 * 8 + chars; + } + else { + size = chars + 1 * 4; + } + return size; +} + /* pack element into buffer and return number of bytes written */ static size_t list_elem_pack(void* buf, int detail, uint64_t chars, const elem_t* elem) { @@ -246,9 +258,9 @@ static size_t list_elem_pack(void* buf, int detail, uint64_t chars, const elem_t char* ptr = start; /* copy in file name */ - char* file = elem->file; - strncpy(ptr, file, chars); - ptr += chars; + mfu_pack_sized_str(&ptr, elem->file, chars); + + mfu_pack_io_uint32(&ptr, elem->type); if (detail) { mfu_pack_io_uint64(&ptr, elem->mode); @@ -261,18 +273,16 @@ static size_t list_elem_pack(void* buf, int detail, uint64_t chars, const elem_t mfu_pack_io_uint64(&ptr, elem->ctime); mfu_pack_io_uint64(&ptr, elem->ctime_nsec); mfu_pack_io_uint64(&ptr, elem->size); - } - else { - /* just have the file type */ - mfu_pack_io_uint32(&ptr, elem->type); + mfu_pack_io_uint64(&ptr, elem->nlink); + mfu_pack_sized_str(&ptr, elem->ref, chars); } size_t bytes = (size_t)(ptr - start); return bytes; } -/* unpack element from buffer and return number of bytes read */ -static size_t list_elem_unpack(const void* buf, int detail, uint64_t chars, elem_t* elem) +/* unpack element (encoded in format v4 or below) from buffer and return number of bytes read */ +static size_t list_elem_unpack_le4(const void* buf, int detail, uint64_t chars, elem_t* elem) { const char* start = (const char*) buf; const char* ptr = start; @@ -289,6 +299,9 @@ static size_t list_elem_unpack(const void* buf, int detail, uint64_t chars, elem elem->detail = detail; + /* ref is not not supported in format v4 or below */ + elem->ref = NULL; + if (detail) { /* extract fields */ mfu_unpack_io_uint64(&ptr, &elem->mode); @@ -313,6 +326,42 @@ static size_t list_elem_unpack(const void* buf, int detail, uint64_t chars, elem return bytes; } +/* unpack element from buffer and return number of bytes read */ +static size_t list_elem_unpack(const void* buf, int detail, uint64_t chars, elem_t* elem) +{ + const char* start = (const char*) buf; + const char* ptr = start; + + /* get name */ + mfu_unpack_sized_str(&ptr, &elem->file, chars); + + /* set depth */ + elem->depth = mfu_flist_compute_depth(elem->file); + + mfu_unpack_io_uint32(&ptr, &elem->type); + + elem->detail = detail; + + if (detail) { + /* extract fields */ + mfu_unpack_io_uint64(&ptr, &elem->mode); + mfu_unpack_io_uint64(&ptr, &elem->uid); + mfu_unpack_io_uint64(&ptr, &elem->gid); + mfu_unpack_io_uint64(&ptr, &elem->atime); + mfu_unpack_io_uint64(&ptr, &elem->atime_nsec); + mfu_unpack_io_uint64(&ptr, &elem->mtime); + mfu_unpack_io_uint64(&ptr, &elem->mtime_nsec); + mfu_unpack_io_uint64(&ptr, &elem->ctime); + mfu_unpack_io_uint64(&ptr, &elem->ctime_nsec); + mfu_unpack_io_uint64(&ptr, &elem->size); + mfu_unpack_io_uint64(&ptr, &elem->nlink); + mfu_unpack_sized_str(&ptr, &elem->ref, chars); + } + + size_t bytes = (size_t)(ptr - start); + return bytes; +} + /* insert a file given a pointer to packed data */ static void list_insert_decode(flist_t* flist, char* buf) { @@ -328,6 +377,21 @@ static void list_insert_decode(flist_t* flist, char* buf) return; } +/* insert a file given a pointer to packed data */ +static size_t list_insert_ptr_le4(flist_t* flist, char* ptr, int detail, uint64_t chars) +{ + /* create new element to record file path, file type, and stat info */ + elem_t* elem = (elem_t*) MFU_MALLOC(sizeof(elem_t)); + + /* get name and advance pointer */ + size_t bytes = list_elem_unpack_le4(ptr, detail, chars, elem); + + /* append element to tail of linked list */ + mfu_flist_insert_elem(flist, elem); + + return bytes; +} + /* insert a file given a pointer to packed data */ static size_t list_insert_ptr(flist_t* flist, char* ptr, int detail, uint64_t chars) { @@ -803,7 +867,7 @@ static void read_cache_v3( uint64_t packcount = 0; while (packcount < (uint64_t) read_count) { /* unpack item from buffer and advance pointer */ - list_insert_ptr(flist, ptr, 1, chars); + list_insert_ptr_le4(flist, ptr, 1, chars); ptr += extent_file; packcount++; } @@ -988,6 +1052,247 @@ static void read_cache_v4( disp += (MPI_Offset) group_buf_size; } + /* read files, if any */ + if (all_count > 0 && chars > 0) { + /* get size of file element */ + size_t elem_size = list_elem_pack_size_le4(flist->detail, (int)chars, NULL); + + /* in order to avoid blowing out memory, we'll pack into a smaller + * buffer and iteratively make many collective reads */ + + /* allocate a buffer, ensure it's large enough to hold at least one + * complete record */ + size_t bufsize = 1024 * 1024; + if (bufsize < elem_size) { + bufsize = elem_size; + } + void* buf = MFU_MALLOC(bufsize); + + /* compute number of items we can fit in each read iteration */ + uint64_t bufcount = (uint64_t)bufsize / (uint64_t)elem_size; + + /* determine number of iterations we need to read all items */ + uint64_t iters = count / bufcount; + if (iters * bufcount < count) { + iters++; + } + + /* compute max iterations across all procs */ + uint64_t all_iters; + MPI_Allreduce(&iters, &all_iters, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD); + + /* set file view to be sequence of datatypes past header */ + mpirc = MPI_File_set_view(fh, disp, MPI_BYTE, MPI_BYTE, datarep, MPI_INFO_NULL); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to set view on file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + /* compute byte offset to read our element */ + MPI_Offset read_offset = (MPI_Offset)offset * elem_size; + + /* iterate with multiple reads until all records are read */ + uint64_t totalcount = 0; + while (all_iters > 0) { + /* determine number to read */ + int read_count = (int) bufcount; + uint64_t remaining = count - totalcount; + if (remaining < bufcount) { + read_count = (int) remaining; + } + + /* TODO: read_at_all w/ external32 is broken in ROMIO as of MPICH-3.2rc1 */ + + /* compute number of bytes to read */ + int read_size = read_count * (int)elem_size; + + /* issue a collective read */ + //MPI_File_read_at_all(fh, read_offset, buf, read_size, MPI_BYTE, &status); + mpirc = MPI_File_read_at(fh, read_offset, buf, read_size, MPI_BYTE, &status); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to read file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + /* update our offset with the number of items we just read */ + read_offset += (MPI_Offset)read_size; + totalcount += (uint64_t) read_count; + + /* unpack data from buffer into list */ + char* ptr = (char*) buf; + uint64_t packcount = 0; + while (packcount < (uint64_t) read_count) { + /* unpack item from buffer and advance pointer */ + list_insert_ptr_le4(flist, ptr, 1, chars); + ptr += elem_size; + packcount++; + } + + /* one less iteration */ + all_iters--; + } + + /* free buffer */ + mfu_free(&buf); + } + + /* create maps of users and groups */ + mfu_flist_usrgrp_create_map(&flist->users, flist->user_id2name); + mfu_flist_usrgrp_create_map(&flist->groups, flist->group_id2name); + + *outdisp = disp; + return; +} + +/* file format: same as v4 except nlink and ref added in list elements to + * support hardlinks */ + static void read_cache_v5( + const char* name, + MPI_Offset* outdisp, + MPI_File fh, + const char* datarep, + flist_t* flist) +{ + MPI_Status status; + + MPI_Offset disp = *outdisp; + + /* indicate that we have stat data */ + flist->detail = 1; + + /* pointer to users, groups, and file buffer data structure */ + buf_t* users = &flist->users; + buf_t* groups = &flist->groups; + + /* get our rank */ + int rank, ranks; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &ranks); + + /* rank 0 reads and broadcasts header */ + uint64_t header[6]; + int header_size = 6 * 8; /* 6 consecutive uint64_t */ + int mpirc = MPI_File_set_view(fh, disp, MPI_BYTE, MPI_BYTE, datarep, MPI_INFO_NULL); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to set view on file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + if (rank == 0) { + uint64_t header_packed[6]; + mpirc = MPI_File_read_at(fh, 0, header_packed, header_size, MPI_BYTE, &status); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to read file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + const char* ptr = (const char*) header_packed; + mfu_unpack_io_uint64(&ptr, &header[0]); + mfu_unpack_io_uint64(&ptr, &header[1]); + mfu_unpack_io_uint64(&ptr, &header[2]); + mfu_unpack_io_uint64(&ptr, &header[3]); + mfu_unpack_io_uint64(&ptr, &header[4]); + mfu_unpack_io_uint64(&ptr, &header[5]); + } + MPI_Bcast(header, 6, MPI_UINT64_T, 0, MPI_COMM_WORLD); + disp += header_size; + + uint64_t all_count; + users->count = header[0]; + users->chars = header[1]; + groups->count = header[2]; + groups->chars = header[3]; + all_count = header[4]; + uint64_t chars = header[5]; + + /* compute count for each process */ + uint64_t count = all_count / (uint64_t)ranks; + uint64_t remainder = all_count - count * (uint64_t)ranks; + if ((uint64_t)rank < remainder) { + count++; + } + + /* get our offset */ + uint64_t offset; + MPI_Exscan(&count, &offset, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + if (rank == 0) { + offset = 0; + } + + /* read users, if any */ + if (users->count > 0 && users->chars > 0) { + /* create type */ + mfu_flist_usrgrp_create_stridtype((int)users->chars, &(users->dt)); + + /* get extent */ + MPI_Aint lb_user, extent_user; + MPI_Type_get_extent(users->dt, &lb_user, &extent_user); + + /* allocate memory to hold data */ + size_t bufsize_user = users->count * (size_t)extent_user; + users->buf = (void*) MFU_MALLOC(bufsize_user); + users->bufsize = bufsize_user; + + /* set view to read data */ + mpirc = MPI_File_set_view(fh, disp, MPI_BYTE, MPI_BYTE, datarep, MPI_INFO_NULL); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to set view on file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + /* read data */ + int user_buf_size = (int) buft_pack_size(users); + if (rank == 0) { + char* user_buf = (char*) MFU_MALLOC(user_buf_size); + mpirc = MPI_File_read_at(fh, 0, user_buf, user_buf_size, MPI_BYTE, &status); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to read file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + buft_unpack(user_buf, users); + mfu_free(&user_buf); + } + MPI_Bcast(users->buf, (int)users->count, users->dt, 0, MPI_COMM_WORLD); + disp += (MPI_Offset) user_buf_size; + } + + /* read groups, if any */ + if (groups->count > 0 && groups->chars > 0) { + /* create type */ + mfu_flist_usrgrp_create_stridtype((int)groups->chars, &(groups->dt)); + + /* get extent */ + MPI_Aint lb_group, extent_group; + MPI_Type_get_extent(groups->dt, &lb_group, &extent_group); + + /* allocate memory to hold data */ + size_t bufsize_group = groups->count * (size_t)extent_group; + groups->buf = (void*) MFU_MALLOC(bufsize_group); + groups->bufsize = bufsize_group; + + /* set view to read data */ + mpirc = MPI_File_set_view(fh, disp, MPI_BYTE, MPI_BYTE, datarep, MPI_INFO_NULL); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to set view on file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + + /* read data */ + int group_buf_size = (int) buft_pack_size(groups); + if (rank == 0) { + char* group_buf = (char*) MFU_MALLOC(group_buf_size); + mpirc = MPI_File_read_at(fh, 0, group_buf, group_buf_size, MPI_BYTE, &status); + if (mpirc != MPI_SUCCESS) { + MPI_Error_string(mpirc, mpierrstr, &mpierrlen); + MFU_ABORT(1, "Failed to read file: `%s' rc=%d %s", name, mpirc, mpierrstr); + } + buft_unpack(group_buf, groups); + mfu_free(&group_buf); + } + MPI_Bcast(groups->buf, (int)groups->count, groups->dt, 0, MPI_COMM_WORLD); + disp += (MPI_Offset) group_buf_size; + } + /* read files, if any */ if (all_count > 0 && chars > 0) { /* get size of file element */ @@ -1139,7 +1444,9 @@ void mfu_flist_read_cache( disp += 1 * 8; /* 9 consecutive uint64_t types in external32 */ /* read data from file */ - if (version == 4) { + if (version == 5) { + read_cache_v5(name, &disp, fh, datarep, flist); + } else if (version == 4) { read_cache_v4(name, &disp, fh, datarep, flist); } else if (version == 3) { /* need a couple of dummy params to record walk start and end times */ @@ -1326,7 +1633,7 @@ static void write_cache_readdir_variable( return; } -static void write_cache_stat_v4( +static void write_cache_stat_v5( const char* name, flist_t* flist) { @@ -1389,7 +1696,7 @@ static void write_cache_stat_v4( int header_bytes = 7 * 8; uint64_t header[7]; char* ptr = (char*) header; - mfu_pack_io_uint64(&ptr, 4); /* file version */ + mfu_pack_io_uint64(&ptr, 5); /* file version */ mfu_pack_io_uint64(&ptr, users->count); /* number of user records */ mfu_pack_io_uint64(&ptr, users->chars); /* number of chars in user name */ mfu_pack_io_uint64(&ptr, groups->count); /* number of group records */ @@ -1563,7 +1870,7 @@ void mfu_flist_write_cache( if (all_count > 0) { if (flist->detail) { - write_cache_stat_v4(name, flist); + write_cache_stat_v5(name, flist); } else { write_cache_readdir_variable(name, flist); From b0d6176c6a73f45fa560350bc5066bdf3d94b06b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Tue, 11 Mar 2025 16:09:15 +0100 Subject: [PATCH 7/9] dcp: support hardlinks with --input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dcp reads input list from cache, place files with more than one links in a temporary list and resolve hardlinks, similary to the logic implemented in walk with details. Signed-off-by: Rémi Palancher --- src/common/mfu_flist_walk.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/common/mfu_flist_walk.c b/src/common/mfu_flist_walk.c index e41790f71..a22097d66 100644 --- a/src/common/mfu_flist_walk.c +++ b/src/common/mfu_flist_walk.c @@ -1095,9 +1095,13 @@ void mfu_flist_stat( mfu_file_t* mfu_file) { flist_t* file_list = (flist_t*)flist; + /* lists to track and resolve hardlinks */ + flist_t* hardlinks_tmp_list = mfu_flist_new(); + inodes_hardlink_map_t* hardlinks_inodes_map = inodes_map_new(); /* we will stat all items in output list, so set detail to 1 */ file_list->detail = 1; + hardlinks_tmp_list->detail = 1; /* get user data if needed */ if (file_list->have_users == 0) { @@ -1161,10 +1165,21 @@ void mfu_flist_stat( } } - /* insert item into output list */ - mfu_flist_insert_stat(flist, name, st.st_mode, &st); + if (S_ISREG(st.st_mode) && st.st_nlink > 1) { + /* record info for item in temporary hardlinks list and inodes map */ + mfu_flist_insert_stat(hardlinks_tmp_list, name, st.st_mode, &st); + inodes_map_insert(hardlinks_inodes_map, (uint64_t)st.st_ino); + } else + /* record info for item in list */ + mfu_flist_insert_stat(flist, name, st.st_mode, &st); + } + /* compute hardlinks temporary list global summary */ + mfu_flist_summarize(hardlinks_tmp_list); + /* resolve hardlinks and merge them in flist */ + walk_resolve_hardlinks(flist, hardlinks_tmp_list, hardlinks_inodes_map); + /* compute global summary */ mfu_flist_summarize(flist); } From cab10d8ce5a1fdd772ea77dc8df36e6fb2b7226b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Fri, 7 Feb 2025 18:29:22 +0100 Subject: [PATCH 8/9] tests: automatic functional tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This command adds many functional tests of dcmp, dcp, dsync, dtar and dwalk, executed and automatically validated with Python standard unittest library. This is designed to be easy to execute and integrate in continuous integration systems. Set two environment variables to define respectively the path to mpifileutils binaries and arguments provided to mpirun, eg: $ export MFU_BIN=~/dev/bin $ export MFU_MPIRUN_ARGS="--bind-to none --oversubscribe -N 4" And run all the tests: $ python3 -m unittest discover -v test Or: $ pytest # require pytest The suite has utilities to check similarity between two trees, with the possibility to specific paths and attributes (eg. mtime). It is also possible to assert specific command outputs. Most tests are run against a specific testing file tree to cover many cases. Other tests are run with a file tree generated by dfilemaker. Signed-off-by: Rémi Palancher --- test/tests/__init__.py | 0 test/tests/lib.py | 418 +++++++++++++++++++++++++++++++ test/tests/test_dcmp.py | 180 +++++++++++++ test/tests/test_dcp.py | 208 +++++++++++++++ test/tests/test_dsync.py | 529 +++++++++++++++++++++++++++++++++++++++ test/tests/test_dtar.py | 320 +++++++++++++++++++++++ test/tests/test_dwalk.py | 176 +++++++++++++ 7 files changed, 1831 insertions(+) create mode 100644 test/tests/__init__.py create mode 100644 test/tests/lib.py create mode 100644 test/tests/test_dcmp.py create mode 100644 test/tests/test_dcp.py create mode 100644 test/tests/test_dsync.py create mode 100644 test/tests/test_dtar.py create mode 100644 test/tests/test_dwalk.py diff --git a/test/tests/__init__.py b/test/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/tests/lib.py b/test/tests/lib.py new file mode 100644 index 000000000..e23c6e6ae --- /dev/null +++ b/test/tests/lib.py @@ -0,0 +1,418 @@ +#!/usr/bin/python3 + +import unittest +import dataclasses +from pathlib import Path +import tempfile +import os +import stat +import subprocess +import typing as t +import shlex + +import yaml + + +def mpirun_cmd(): + mpirun = os.environ.get("MFU_MPIRUN_CMD", "mpirun") + args = os.environ.get("MFU_MPIRUN_ARGS") + if args is None: + args = [] + else: + args = args.split(" ") + return [mpirun] + args + + +def mfu_cmd(cmd): + return mpirun_cmd() + [os.path.join(os.environ["MFU_BIN"], cmd)] + + +def dfilemaker_cmd(): + return mfu_cmd("dfilemaker") + + +def dwalk_cmd(): + return mfu_cmd("dwalk") + + +def dfind_cmd(): + return mfu_cmd("dfind") + + +def dsync_cmd(): + return mfu_cmd("dsync") + + +def dcp_cmd(): + return mfu_cmd("dcp") + + +def dcmp_cmd(): + return mfu_cmd("dcmp") + + +def dtar_cmd(): + return mfu_cmd("dtar") + + +def create_file(path: Path): + fh = open(path, "w+") + fh.close() + + +# Global variables holding the real dataclasses instanciated in tests for +# comparisons. +RegularFile = None +Symlink = None +Directory = None + + +@dataclasses.dataclass +class FilesystemObject: + parent: Path = dataclasses.field(compare=False) + name: str + mode: int + uid: int + gid: int + mtime: int + + @property + def path(self) -> Path: + return self.parent / self.name + + +@dataclasses.dataclass +class _RegularFile(FilesystemObject): + inode: int = dataclasses.field(compare=False) + nlink: int + size: int + + def __str__(self): + return ( + f"File[{self.path} inode:{self.inode} nlink:{self.nlink}, " + f"size:{self.size}, mode: {stat.filemode(self.mode)}, " + f"uid:{self.uid}, gid:{self.gid}, mtime:{self.mtime}]" + ) + + +@dataclasses.dataclass +class _Symlink(FilesystemObject): + target: str + + def __str__(self): + return ( + f"Symlink[{self.path}→{self.target} " + f"mode: {stat.filemode(self.mode)}, " + f"uid:{self.uid}, gid:{self.gid}, mtime:{self.mtime}]" + ) + + +@dataclasses.dataclass +class _Directory(FilesystemObject): + content: dict = dataclasses.field(default_factory=dict) + + def get(self, path: str): + value = self.content + components = path.split(os.path.sep) + for component in components[:-1]: + value = value.get(component) + return value.get(components[-1]) + + def __str__(self): + return ( + f"Directory[{self.path} " + f"mode: {stat.filemode(self.mode)}, " + f"uid:{self.uid}, gid:{self.gid}, mtime:{self.mtime}]" + ) + + def dump(self, indent=0): + if not self.content: + print(f"{' '*indent}∅") + for name, item in self.content.items(): + print(f"{' '*indent}{name:20}: {item}") + if isinstance(item, Directory): + item.dump(indent + 2) + + @classmethod + def from_path(cls, path: Path): + fs_o_stat = path.stat() + dir_o = cls( + path.parent, + path.name, + fs_o_stat.st_mode, + fs_o_stat.st_uid, + fs_o_stat.st_gid, + fs_o_stat.st_mtime, + ) + for item in Path(path).iterdir(): + fs_o_stat = item.lstat() + if item.is_symlink(): + fs_o = Symlink( + dir_o.path, + item.name, + fs_o_stat.st_mode, + fs_o_stat.st_uid, + fs_o_stat.st_gid, + fs_o_stat.st_mtime, + str(item.readlink()), + ) + elif item.is_dir(): + fs_o = Directory.from_path(item) + else: + fs_o = RegularFile( + dir_o.path, + item.name, + fs_o_stat.st_mode, + fs_o_stat.st_uid, + fs_o_stat.st_gid, + fs_o_stat.st_mtime, + fs_o_stat.st_ino, + fs_o_stat.st_nlink, + fs_o_stat.st_size, + ) + dir_o.content[item.name] = fs_o + return dir_o + + +class FileTree(_Directory): + + def dump(self): + print(f"\nFile tree {self.path}:") + super().dump() + + +def _create_dir_content(path: Path, layout: dict): + for s_item, attrs in layout.items(): + fs_o_type = attrs.get("type", "file") + if fs_o_type == "hardlink": + continue + item = path / s_item + match fs_o_type: + case "file": + create_file(item) + case "symlink": + item.symlink_to(attrs["target"]) + case "dir": + item.mkdir() + _create_dir_content(item, attrs["layout"]) + + +def _create_hardlinks(path: Path, layout: dict): + for s_item, attrs in layout.items(): + fs_o_type = attrs.get("type", "file") + if fs_o_type not in ["hardlink", "dir"]: + continue + item = path / s_item + match fs_o_type: + case "hardlink": + target = path / attrs["target"] + item.hardlink_to(target) + case "dir": + _create_hardlinks(item, attrs["layout"]) + + +def create_filetree_from_yaml(path: str, layout: str): + layout = yaml.safe_load(layout) + # 1st pass without hardlinks + _create_dir_content(path, layout) + # create hardlinks recursively on 2nd pass + _create_hardlinks(path, layout) + + +BASIC_FILES_LAYOUT = """ +file1: {} +file2: {} +file3: {} +symlink2: + type: symlink + target: file2 +hardlink3: + type: hardlink + target: file3 +dir1: + type: dir + layout: + file4: {} + symlink1: + type: symlink + target: ../file1 +""" + + +def create_basic_layout(dst): + create_filetree_from_yaml( + path=dst, + layout=BASIC_FILES_LAYOUT, + ) + + +class TestFileTreeCmp(unittest.TestCase): + + def setUp(self): + self._tmp_src = tempfile.TemporaryDirectory() + self.src = Path(self._tmp_src.name) + self._tmp_dst = tempfile.TemporaryDirectory() + self.dst = Path(self._tmp_dst.name) + self.archive = self.dst / "archive.tar" # used for dtar tests + + def tearDown(self): + self._tmp_src.cleanup() + self._tmp_dst.cleanup() + + def assertSameFileTree( + self, + dir1: Directory, + dir2: Directory, + root_dir1: Path, + root_dir2: Path, + ignore_paths: None | list[str] = None, + ): + if ignore_paths is None: + ignore_paths = [] + try: + # discard ignored items in this directory + if ignore_paths: + dir1.content = { + key: value + for key, value in dir1.content.items() + if str(value.path.relative_to(root_dir1)) + not in ignore_paths + } + dir2.content = { + key: value + for key, value in dir2.content.items() + if str(value.path.relative_to(root_dir2)) + not in ignore_paths + } + self.assertCountEqual( + dir1.content.keys(), + dir2.content.keys(), + f"Directories {dir1} and {dir2} do not have the same content", + ) + for key in dir1.content.keys(): + fso_1 = dir1.content[key] + fso_2 = dir2.content[key] + self.assertEqual( + type(fso_1), + type(fso_2), + f"Paths {fso_1} and {fso_2} do not have the same type", + ) + if isinstance(fso_1, Directory): + self.assertSameFileTree( + fso_1, fso_2, root_dir1, root_dir2, ignore_paths + ) + else: + self.assertEqual( + fso_1, + fso_2, + f"Paths {fso_1} and {fso_2} are not equal", + ) + except AssertionError as err: + dir1.dump() + dir2.dump() + raise AssertionError(err) + + def assertSrcDstEqual( + self, + ignore_paths=None, + ignore_nlink=False, + ignore_mtime=False, + dest: t.Optional[Path] = None, + ): + if dest is None: + dest = self.dst + global RegularFile + global Symlink + global Directory + if ignore_nlink or ignore_mtime: + # Create new dataclasses in which nlink or mtime are ignored in + # __eq__ operator. + RegularFile = dataclasses.make_dataclass( + "RegularFile", + [ + ("nlink", int, dataclasses.field(compare=not ignore_nlink)), + ("mtime", int, dataclasses.field(compare=not ignore_mtime)), + ], + bases=(_RegularFile,), + ) + Symlink = dataclasses.make_dataclass( + "Symlink", + [ + ("mtime", int, dataclasses.field(compare=not ignore_mtime)), + ], + bases=(_Symlink,), + ) + Directory = dataclasses.make_dataclass( + "Directory", + [ + ("mtime", int, dataclasses.field(compare=not ignore_mtime)), + ], + bases=(_Directory,), + ) + + else: + RegularFile = _RegularFile + Symlink = _Symlink + Directory = _Directory + + ft_src = FileTree.from_path(self.src) + ft_dst = FileTree.from_path(dest) + self.assertSameFileTree(ft_src, ft_dst, self.src, dest, ignore_paths) + + def assertInProcStdout(self, proc, msg): + # Remove timestamp prefix from dsync output + def untimestamp_line(line): + if line.startswith("["): + return line.split(" ", maxsplit=1)[1] + return line + + unprefixed_output = "\n".join( + [ + untimestamp_line(line) + for line in proc.stdout.decode().strip().split("\n") + ] + ) + msg = msg.strip() + if msg not in unprefixed_output: + raise AssertionError( + "Unable to find message in output.\n" + f" - Message:\n{msg}\n" + f" - Output:\n{unprefixed_output}" + ) + + def run_cmd( + self, + cmd: t.List[str | Path | int], + cwd: t.Optional[Path] = None, + env: t.Optional[t.Dict[str, str]] = None, + ) -> subprocess.CompletedProcess: + _env = os.environ.copy() + if env: + _env.update(env) + + def cmd_result(proc) -> str: + return ( + f" - exit code: {proc.returncode}\n" + f" - stdout:\n{proc.stdout.decode()}\n" + f" - stderr:\n{proc.stderr.decode()}\n" + ) + + cmd_s = shlex.join([str(arg) for arg in cmd]) + print(f"\n→ Running command: {cmd_s}") + try: + proc = subprocess.run( + cmd, check=True, capture_output=True, cwd=cwd, env=_env + ) + print(cmd_result(proc)) + except subprocess.CalledProcessError as err: + raise AssertionError( + "Command error:\n" f" - command: {cmd_s}\n" + cmd_result(err) + ) from err + else: + return proc + + def run_dfilemaker(self, dest: t.Optional[Path] = None): + if not dest: + dest = self.src + cmd = dfilemaker_cmd() + [dest] + return self.run_cmd(cmd) diff --git a/test/tests/test_dcmp.py b/test/tests/test_dcmp.py new file mode 100644 index 000000000..a9839c69d --- /dev/null +++ b/test/tests/test_dcmp.py @@ -0,0 +1,180 @@ +#!/usr/bin/python3 + +import textwrap +from pathlib import Path +import typing as t + +from .lib import ( + TestFileTreeCmp, + dcmp_cmd, + dsync_cmd, + create_basic_layout, + create_file, +) + + +class TestDcp(TestFileTreeCmp): + + def run_dcmp( + self, + dest: t.Optional[Path] = None, + ): + if not dest: + dest = self.dst + cmd = dcmp_cmd() + [self.src, dest] + return self.run_cmd(cmd) + + def run_dsync(self): + cmd = dsync_cmd() + [self.src, self.dst] + return self.run_cmd(cmd) + + +class TestDcpBasic(TestDcp): + + def setUp(self): + super().setUp() + create_basic_layout(self.src) + + def test_dcmp(self): + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items : 9 + """ + ), + ) + + def test_dcmp_after_dsync(self): + self.run_dsync() + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 9 (Src: 9 Dest: 9) + Number of items that exist only in one directory: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same type: 9 (Src: 9 Dest: 9) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 9 (Src: 9 Dest: 9) + Number of items that exist in both directories and have different contents: 0 (Src: 0 Dest: 0) + """ + ), + ) + + def test_dcmp_additional_file(self): + self.run_dsync() + create_file(self.dst / "newfile") + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 9 (Src: 9 Dest: 9) + Number of items that exist only in one directory: N/A (Src: 0 Dest: 1) + Number of items that exist in both directories and have the same type: 9 (Src: 9 Dest: 9) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 9 (Src: 9 Dest: 9) + Number of items that exist in both directories and have different contents: 0 (Src: 0 Dest: 0) + """ + ), + ) + + def test_dcmp_missing_file(self): + self.run_dsync() + (self.dst / "file1").unlink() + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 8 (Src: 8 Dest: 8) + Number of items that exist only in one directory: N/A (Src: 1 Dest: 0) + Number of items that exist in both directories and have the same type: 8 (Src: 8 Dest: 8) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 8 (Src: 8 Dest: 8) + Number of items that exist in both directories and have different contents: 0 (Src: 0 Dest: 0) + """ + ), + ) + + def test_dcmp_different_type(self): + self.run_dsync() + # change file1 in destination from regular file to directory + (self.dst / "file1").unlink() + (self.dst / "file1").mkdir() + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 9 (Src: 9 Dest: 9) + Number of items that exist only in one directory: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same type: 8 (Src: 8 Dest: 8) + Number of items that exist in both directories and have different types: 1 (Src: 1 Dest: 1) + Number of items that exist in both directories and have the same content: 8 (Src: 8 Dest: 8) + Number of items that exist in both directories and have different contents: 1 (Src: 1 Dest: 1) + """ + ), + ) + + def test_dcmp_different_symlink_target(self): + self.run_dsync() + # change target of symlink2 from file2 to file1 in destination + (self.dst / "symlink2").unlink() + (self.dst / "symlink2").symlink_to("file1") + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 9 (Src: 9 Dest: 9) + Number of items that exist only in one directory: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same type: 9 (Src: 9 Dest: 9) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 8 (Src: 8 Dest: 8) + Number of items that exist in both directories and have different contents: 1 (Src: 1 Dest: 1) + """ + ), + ) + + def test_dcmp_different_hardlink_target(self): + self.run_dsync() + # change target of hardlink3 from file3 to file1 in destination + (self.dst / "hardlink3").unlink() + (self.dst / "hardlink3").hardlink_to(self.dst / "file1") + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 9 (Src: 9 Dest: 9) + Number of items that exist only in one directory: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same type: 9 (Src: 9 Dest: 9) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 8 (Src: 8 Dest: 8) + Number of items that exist in both directories and have different contents: 1 (Src: 1 Dest: 1) + """ + ), + ) + + def test_dcmp_different_content(self): + self.run_dsync() + # change content of file1 in destination + with open(self.dst / "file1", "w") as fh: + fh.write("whatever") + proc = self.run_dcmp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Number of items that exist in both directories: 9 (Src: 9 Dest: 9) + Number of items that exist only in one directory: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same type: 9 (Src: 9 Dest: 9) + Number of items that exist in both directories and have different types: 0 (Src: 0 Dest: 0) + Number of items that exist in both directories and have the same content: 8 (Src: 8 Dest: 8) + Number of items that exist in both directories and have different contents: 1 (Src: 1 Dest: 1) + """ + ), + ) diff --git a/test/tests/test_dcp.py b/test/tests/test_dcp.py new file mode 100644 index 000000000..1dc5c7ffb --- /dev/null +++ b/test/tests/test_dcp.py @@ -0,0 +1,208 @@ +#!/usr/bin/python3 + +import os +import tempfile +import typing as t +from pathlib import Path +import textwrap + +from .lib import ( + TestFileTreeCmp, + dcp_cmd, + dwalk_cmd, + dfind_cmd, + create_basic_layout, +) + + +class TestDcp(TestFileTreeCmp): + + def run_dcp( + self, + dereference: bool = False, + preserve: bool = False, + chunk: t.Optional[str] = None, + buffer: t.Optional[str] = None, + input: t.Optional[str] = None, + dest: t.Optional[Path] = None, + ): + if not dest: + dest = self.dst + # Remove destination directory as it is created by dcp. + dest.rmdir() + cmd = dcp_cmd() + [self.src, dest] + if dereference: + cmd.insert(len(cmd) - 2, "--dereference") + if preserve: + cmd.insert(len(cmd) - 2, "--preserve") + if chunk: + cmd[-2:0] = ["--chunksize", chunk] + if buffer: + cmd[-2:0] = ["--bufsize", buffer] + if input: + cmd[-2:0] = ["--input", input] + return self.run_cmd(cmd) + + def run_dwalk( + self, + output: t.Optional[Path] = None, + lite: bool = False, + ): + cmd = dwalk_cmd() + [self.src] + if output: + cmd[-1:0] = ["--output", output] + if lite: + cmd.insert(len(cmd) - 1, "--lite") + return self.run_cmd(cmd) + + def run_dfind( + self, + output: t.Optional[Path] = None, + ): + cmd = dfind_cmd() + [self.src] + if output: + cmd[-1:0] = ["--output", output] + return self.run_cmd(cmd) + + +class TestDcpBasic(TestDcp): + + def setUp(self): + super().setUp() + create_basic_layout(self.src) + + def test_dcp(self): + proc = self.run_dcp() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + # With basic dcp, files should have the same type and metadata except + # mtime that is not copied. + self.assertSrcDstEqual(ignore_mtime=True) + + def test_dcp_dereference(self): + # Add content of file2, the target of symlink2 + with open(self.src / "file2", "w") as fh: + fh.write("original") + self.run_dcp(dereference=True) + # Check source and destination have the same content, except for + # symlinks and mtime. + self.assertSrcDstEqual( + ignore_paths=["symlink2", "dir1/symlink1"], ignore_mtime=True + ) + # Check symlink2 is not a symlink in destination + self.assertTrue((self.src / "symlink2").is_symlink()) + self.assertFalse((self.dst / "symlink2").is_symlink()) + # Check symlink2 has its own inode, distinct from file2 + self.assertNotEqual( + (self.dst / "symlink2").stat().st_ino, + (self.dst / "file2").stat().st_ino, + ) + # Check both files have the same original content + for filename in ["file2", "symlink2"]: + with open(self.dst / filename) as fh: + self.assertEqual(fh.read(), "original") + + def test_dcp_preserve(self): + self.run_dcp(preserve=True) + # With dcp --preserve, files must have the same metadata in source and + # destination. + self.assertSrcDstEqual() + + def test_dcp_preserve_chmod(self): + # Change some file modes in source + (self.src / "file1").chmod(0o400) + (self.src / "hardlink3").chmod(0o400) + (self.src / "dir1").chmod(0o700) + self.run_dcp(preserve=True) + # With dcp --preserve, files must have the same metadata in source and + # destination. + self.assertSrcDstEqual() + + def test_dcp_chunksize(self): + # Add 16MB of data in a file to have multiple chunks + with open(self.src / "file1", "wb") as fh: + fh.write(os.urandom(16 * 10**6)) + self.run_dcp(chunk="1MB") + self.assertSrcDstEqual(ignore_mtime=True) + + def test_dcp_bufsize(self): + # Add 16MB of data in a file to fill multiple buffer + with open(self.src / "file1", "wb") as fh: + fh.write(os.urandom(16 * 10**6)) + self.run_dcp(buffer="1MB") + self.assertSrcDstEqual(ignore_mtime=True) + + def test_dcp_dwalk_input(self): + with tempfile.NamedTemporaryFile() as fh: + self.run_dwalk(output=fh.name) + proc = self.run_dcp(input=fh.name) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual(ignore_mtime=True) + + def test_dcp_dwalk_input_lite(self): + with tempfile.NamedTemporaryFile() as fh: + self.run_dwalk(output=fh.name, lite=True) + proc = self.run_dcp(input=fh.name) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual(ignore_mtime=True) + + def test_dcp_dfind_input(self): + with tempfile.NamedTemporaryFile() as fh: + self.run_dfind(output=fh.name) + proc = self.run_dcp(input=fh.name) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual(ignore_mtime=True) + + +class TestDcpDfilemaker(TestDcp): + + def setUp(self): + super().setUp() + self.run_dfilemaker() + + def test_dcp(self): + self.run_dcp() + self.assertSrcDstEqual(ignore_mtime=True) diff --git a/test/tests/test_dsync.py b/test/tests/test_dsync.py new file mode 100644 index 000000000..2a471d6ea --- /dev/null +++ b/test/tests/test_dsync.py @@ -0,0 +1,529 @@ +#!/usr/bin/python3 + +import tempfile +import textwrap +from pathlib import Path +import os +import typing as t + +from .lib import TestFileTreeCmp, dsync_cmd, create_basic_layout, create_file + + +class TestDsync(TestFileTreeCmp): + + def run_dsync( + self, + delete: bool = False, + contents: bool = False, + dereference: bool = False, + dry_run: bool = False, + link_dest: t.Optional[Path] = None, + chunk: t.Optional[str] = None, + buffer: t.Optional[str] = None, + batch: t.Optional[int] = None, + dest: t.Optional[Path] = None, + ): + if not dest: + dest = self.dst + cmd = dsync_cmd() + [self.src, dest] + if delete: + cmd.insert(len(cmd) - 2, "--delete") + if contents: + cmd.insert(len(cmd) - 2, "--contents") + if dereference: + cmd.insert(len(cmd) - 2, "--dereference") + if dry_run: + cmd.insert(len(cmd) - 2, "--dryrun") + if link_dest: + cmd[-2:0] = ["--link-dest", link_dest] + if chunk: + cmd[-2:0] = ["--chunksize", chunk] + if buffer: + cmd[-2:0] = ["--bufsize", buffer] + if batch: + cmd[-2:0] = ["--batch-files", str(batch)] + return self.run_cmd(cmd) + + +class TestDsyncBasic(TestDsync): + + def setUp(self): + super().setUp() + create_basic_layout(self.src) + + def test_dsync_empty_dest(self): + proc = self.run_dsync() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 8 + Directories: 1 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_overwrite_dest(self): + # Modify a file dest with different content and check it is overwritten. + with open(self.src / "file2", "w+") as fh: + fh.write("original") + self.run_dsync() + with open(self.dst / "file2", "w+") as fh: + fh.write("modified") + proc = self.run_dsync() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 1 + Links: 0 + Hardlinks: 0 + """ + ), + ) + self.assertSrcDstEqual() + with open(self.dst / "file2", "r") as fh: + self.assertEqual(fh.read(), "original") + + def test_dsync_remove_source(self): + # Synchronize, remove one file in source, re-synchronize and check file + # is still present in dest without dsync delete option. + self.run_dsync() + (self.src / "file2").unlink() + proc = self.run_dsync() + self.assertInProcStdout( + proc, + "Comparing file sizes and modification times of 3 items", + ) + self.assertSrcDstEqual(ignore_paths=["file2"]) + self.assertFalse((self.src / "file2").exists()) + self.assertTrue((self.dst / "file2").exists()) + + def test_dsync_remove_source_delete(self): + # Synchronize, remove one file in source, re-synchronize and check file + # is also removed in dest with dsync delete option. + self.run_dsync() + (self.src / "file1").unlink() + proc = self.run_dsync(delete=True) + self.assertInProcStdout(proc, "Removing 1 items") + self.assertSrcDstEqual() + + def test_dsync_file_in_dst(self): + # Create non-conflicting file in dst, synchronize and check the file + # still exists after sync. + additional_file = self.dst / "other-file" + create_file(additional_file) + proc = self.run_dsync() + # Check dsync reported 2 items in destination + self.assertInProcStdout(proc, "Walked 2 items in ") + # Check dsync reported to copy everything from source + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 8 + Directories: 1 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual(ignore_paths=[additional_file.name]) + self.assertTrue(additional_file.exists()) + + def test_dsync_file_in_dst_delete(self): + # Create non-conflicting file in dst, synchronize with delete option and + # check the file is removed after sync. + additional_file = self.dst / "other-file" + create_file(additional_file) + proc = self.run_dsync(delete=True) + # Check dsync reported to remove an item + self.assertInProcStdout( + proc, + "Removed 1 items in ", + ) + self.assertSrcDstEqual() + self.assertFalse(additional_file.exists()) + + def test_dsync_symlink_dereference(self): + # Add content of file2, the target of symlink2 + with open(self.src / "file2", "w") as fh: + fh.write("original") + self.run_dsync(dereference=True) + # Check source and destination have the same content, except for + # symlinks. + self.assertSrcDstEqual(ignore_paths=["symlink2", "dir1/symlink1"]) + # Check symlink2 is not a symlink in destination + self.assertTrue((self.src / "symlink2").is_symlink()) + self.assertFalse((self.dst / "symlink2").is_symlink()) + # Check symlink2 has its own inode, distinct from file2 + self.assertNotEqual( + (self.dst / "symlink2").stat().st_ino, + (self.dst / "file2").stat().st_ino, + ) + # Check both files have the same original content + for filename in ["file2", "symlink2"]: + with open(self.dst / filename) as fh: + self.assertEqual(fh.read(), "original") + + def test_dsync_symlink_target_change(self): + # Synchronize, change symlink2 target in source, re-synchronize and + # check. + self.run_dsync() + (self.src / "symlink2").unlink() + (self.src / "symlink2").symlink_to("file1") + proc = self.run_dsync() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 0 + Links: 1 + Hardlinks: 0 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_symlink_dereference_target_nlinks(self): + # change symlink2 target in source for file3 which has nlink > 1 and + # synchronize with dereference. + (self.src / "symlink2").unlink() + (self.src / "symlink2").symlink_to("file3") + proc = self.run_dsync(dereference=True) + + # FIXME: when symlinks target file with multiple links (nlink > 1), + # dsync creates in destination an additional link to this inode instead + # of a regular copy for this symlink. + + # Check source and destination have the same content, except for + # symlinks2, hardlink3 and file3 which have 3 nlinks in destination. + self.assertSrcDstEqual( + ignore_paths=["file3", "hardlink3", "symlink2", "dir1/symlink1"] + ) + self.assertEqual((self.dst / "file3").stat().st_nlink, 3) + self.assertEqual((self.dst / "hardlink3").stat().st_nlink, 3) + self.assertEqual((self.dst / "symlink2").stat().st_nlink, 3) + + # Check inode of symlink2 and file3 are the same. + self.assertEqual( + (self.dst / "symlink2").stat().st_ino, + (self.dst / "file3").stat().st_ino, + ) + + # Check dsync reported creation of 2 hardlinks and 0 symlink. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 8 + Directories: 1 + Files: 5 + Links: 0 + Hardlinks: 2 + """ + ), + ) + + def test_dsync_transform_hardlink(self): + # Synchronize, transform hardlink in standalone inode, re-synchronize + # and check. + self.run_dsync() + (self.src / "hardlink3").unlink() + create_file(self.src / "hardlink3") + proc = self.run_dsync() + # Check dsync reported one modified file. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 1 + Links: 0 + Hardlinks: 0 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_sync_same_size_mtime(self): + # Fill source file2 with 64 random bytes and sync in empty dst. + random1 = os.urandom(64) + with open(self.src / "file2", "wb") as fh: + fh.write(random1) + previous_atime = (self.src / "file2").stat().st_atime_ns + previous_mtime = (self.src / "file2").stat().st_mtime_ns + self.run_dsync() + + # Update source file2 with other 64 random bytes, restore mtime and + # resync. + random2 = os.urandom(64) + with open(self.src / "file2", "wb") as fh: + fh.write(random2) + os.utime(self.src / "file2", ns=(previous_atime, previous_mtime)) + proc = self.run_dsync() + self.assertInProcStdout( + proc, "Comparing file sizes and modification times of 4 items" + ) + # Check src/dst metadata are equal but file2 still contains first 64 + # random bytes. + self.assertSrcDstEqual() + with open(self.dst / "file2", "rb") as fh: + self.assertEqual(fh.read(), random1) + + def test_dsync_sync_same_diff_mtime(self): + # Fill source file2 with 64 random bytes and sync in empty dst. + random1 = os.urandom(64) + with open(self.src / "file2", "wb") as fh: + fh.write(random1) + self.run_dsync() + + # Update source file2 with other 64 random bytes and resync. + random2 = os.urandom(64) + with open(self.src / "file2", "wb") as fh: + fh.write(random2) + proc = self.run_dsync() + # Check dsync reported one modified file. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 1 + Links: 0 + Hardlinks: 0 + """ + ), + ) + + # Check src/dst metadata are equal but file2 is updated with new content + # because of mtime difference. + self.assertSrcDstEqual() + with open(self.dst / "file2", "rb") as fh: + self.assertEqual(fh.read(), random2) + + def test_dsync_sync_same_size_mtime_contents(self): + # Fill source file2 with 64 random bytes and sync in empty dst. + random1 = os.urandom(64) + random2 = os.urandom(64) + with open(self.src / "file2", "wb") as fh: + fh.write(random1) + previous_atime = (self.src / "file2").stat().st_atime_ns + previous_mtime = (self.src / "file2").stat().st_mtime_ns + self.run_dsync() + + # Update source file2 with other 64 random bytes, restore mtime and + # resync with --contents. + with open(self.src / "file2", "wb") as fh: + fh.write(random2) + os.utime(self.src / "file2", ns=(previous_atime, previous_mtime)) + proc = self.run_dsync(contents=True) + self.assertInProcStdout( + proc, + "Comparing file contents of 4 items", + ) + # FIXME: mtime on file2 do not match on src/dst even though it has been + # updated with second dsync. + self.assertSrcDstEqual(ignore_paths=["file2"]) + with open(self.dst / "file2", "rb") as fh: + self.assertEqual(fh.read(), random2) + + def test_dsync_hardlink_dest_ref_changed(self): + # Create a conflicting file in dest with different content and check it + # is overwritten. + with open(self.src / "file3", "w+") as fh: + fh.write("original") + self.run_dsync() + with open(self.dst / "file3", "w+") as fh: + fh.write("modified") + proc = self.run_dsync() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 2 + Directories: 0 + Files: 1 + Links: 0 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual() + for filename in ["file3", "hardlink3"]: + with open(self.dst / filename, "r") as fh: + self.assertEqual(fh.read(), "original") + + def test_dsync_hardlink_outside_tree(self): + # Create temporary file outside src and dst tree, create hardlink in src + # to this temporary file, sync and check. The destination should contain + # a copy of the file (ie. with st_nlink 1). + with tempfile.NamedTemporaryFile() as outside_file: + (self.src / "hardlink3").unlink() + (self.src / "hardlink3").hardlink_to(outside_file.name) + proc = self.run_dsync() + # Check dsync reported the hardlink1 as a regular file. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 8 + Directories: 1 + Files: 5 + Links: 2 + Hardlinks: 0 + """ + ), + ) + self.assertSrcDstEqual(ignore_paths=["hardlink3"]) + self.assertEqual((self.src / "hardlink3").stat().st_nlink, 2) + self.assertEqual((self.dst / "hardlink3").stat().st_nlink, 1) + # When the temporary file outside src and dst tree is removed, src and + # dst must be equal. + self.assertSrcDstEqual() + + def test_dsync_change_hardlink_dest(self): + # Synchronize, change hardlink destination, re-synchronize + # and check. + self.run_dsync() + (self.src / "hardlink3").unlink() + (self.src / "hardlink3").hardlink_to(self.src / "file2") + proc = self.run_dsync() + # Check dsync reported hardlink to be updated. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 0 + Links: 0 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_add_hardlink_same_inode(self): + # Synchronize, add hardlink on inode which has already multiple links, + # re-synchronize and check. + self.run_dsync() + (self.src / "hardlink2").hardlink_to(self.src / "file3") + proc = self.run_dsync() + # Check dsync reported hardlink to be updated. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 0 + Links: 0 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_add_hardlink_another_inode(self): + # Synchronize, add hardlink on inode with one link, re-synchronize and + # check. + self.run_dsync() + (self.src / "hardlink2").hardlink_to(self.src / "file2") + proc = self.run_dsync() + # Check dsync reported hardlink to be updated. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 1 + Directories: 0 + Files: 0 + Links: 0 + Hardlinks: 1 + """ + ), + ) + self.assertSrcDstEqual() + + def test_dsync_dry_run_empty(self): + # Check destination stays empty after dsync --dry-run. + self.run_dsync(dry_run=True) + self.assertCountEqual(list(self.dst.iterdir()), []) + + def test_dsync_dry_run_no_update(self): + # Check destination is not updated after second dsync with --dry-run + self.run_dsync() + (self.src / "file2").unlink() + create_file(self.src / "newfile") + self.run_dsync(dry_run=True) + self.assertTrue((self.dst / "file2").exists()) + self.assertFalse((self.dst / "newfile").exists()) + self.assertSrcDstEqual(ignore_paths=["file2", "newfile"]) + + def test_dsync_link_dest(self): + with tempfile.TemporaryDirectory() as _link_dest: + link_dest = Path(_link_dest) + # First sync in link destination + self.run_dsync(dest=link_dest) + # Add file in source + create_file(self.src / "newfile") + # Run dsync in destination with link destination + self.run_dsync(link_dest=link_dest) + # Check source and destination are the same, except for nlinks + self.assertSrcDstEqual(ignore_nlink=True) + # Check new file is present in destination but not in link + # destination. + self.assertTrue((self.dst / "newfile").exists()) + self.assertFalse((link_dest / "newfile").exists()) + # Check file which did not change between two synchronizations share + # the same inode in destination and link destination. + self.assertEqual( + (self.dst / "file1").stat().st_ino, + (link_dest / "file1").stat().st_ino, + ) + + def test_dsync_chunksize(self): + # Add 16MB of data in a file to have multiple chunks + with open(self.src / "file1", "wb") as fh: + fh.write(os.urandom(16 * 10**6)) + self.run_dsync(chunk="1MB") + self.assertSrcDstEqual() + + def test_dsync_bufsize(self): + # Add 16MB of data in a file to fill multiple buffer + with open(self.src / "file1", "wb") as fh: + fh.write(os.urandom(16 * 10**6)) + self.run_dsync(buffer="1MB") + self.assertSrcDstEqual() + + def test_dsync_batch(self): + self.run_dsync(batch=2) + self.assertSrcDstEqual() + + +class TestDsyncDfilemaker(TestDsync): + + def setUp(self): + super().setUp() + self.run_dfilemaker() + + def test_dsync_twice(self): + self.run_dsync() + self.assertSrcDstEqual() + proc = self.run_dsync() + self.assertInProcStdout( + proc, "Comparing file sizes and modification times of 1000 items" + ) + self.assertSrcDstEqual() diff --git a/test/tests/test_dtar.py b/test/tests/test_dtar.py new file mode 100644 index 000000000..613bf767f --- /dev/null +++ b/test/tests/test_dtar.py @@ -0,0 +1,320 @@ +#!/usr/bin/python3 + +import tarfile +from pathlib import Path +import os +import textwrap +import typing as t + +import yaml +import xattr + +from .lib import ( + TestFileTreeCmp, + dtar_cmd, + create_basic_layout, + BASIC_FILES_LAYOUT, +) + + +class TestDtar(TestFileTreeCmp): + + def run_dtar( + self, + extract: bool = False, + preserve_xattrs: bool = False, + preserve_acls: bool = False, + preserve_flags: bool = False, + env: t.Optional[t.Dict[str, str]] = None, + ): + if extract: + cmd = dtar_cmd() + ["-xf", self.archive] + if preserve_xattrs: + cmd.insert(len(cmd) - 2, "--preserve-xattrs") + if preserve_acls: + cmd.insert(len(cmd) - 2, "--preserve-acls") + if preserve_flags: + cmd.insert(len(cmd) - 2, "--preserve-flags") + cwd = self.dst + else: + cmd = dtar_cmd() + [ + "-cf", + self.archive, + self.src.name, + ] + if preserve_xattrs: + cmd.insert(len(cmd) - 3, "--preserve-xattrs") + if preserve_acls: + cmd.insert(len(cmd) - 3, "--preserve-acls") + if preserve_flags: + cmd.insert(len(cmd) - 3, "--preserve-flags") + cwd = self.src.parent + + return self.run_cmd(cmd, cwd=cwd, env=env) + + def assertArchiveBasicTree(self): + """Compare tarfile with YAML layout""" + tar = tarfile.open(self.archive) + tree = yaml.safe_load(BASIC_FILES_LAYOUT) + self.assertSameDirArchive(tree, tar, Path(self.src.name)) + tar.close() + + def assertSameDirArchive(self, tree, tar, subdir: Path): + for key, value in tree.items(): + try: + member = tar.getmember(f"{subdir}/{key}") + except KeyError: + raise AssertionError( + f"Unable to find {subdir}/{key} in archive" + ) + else: + print(f"Comparing {subdir}/{key} in archive with source layout") + match value.get("type", "file"): + case "file": + self.assertTrue(member.isreg()) + case "symlink": + self.assertTrue(member.issym()) + case "hardlink": + self.assertTrue(member.islnk()) + case "dir": + self.assertTrue(member.isdir()) + self.assertSameDirArchive( + tree[key]["layout"], tar, subdir / key + ) + + +class TestDtarBasic(TestDtar): + + def setUp(self): + super().setUp() + create_basic_layout(self.src) + + def add_data_in_files(self): + size = 16 * 10**6 + with open(self.src / "file1", "wb") as fh: + fh.write(os.urandom(size)) + with open(self.src / "file2", "wb") as fh: + fh.write(os.urandom(size)) + with open(self.src / "file3", "wb") as fh: + fh.write(os.urandom(size)) + with open(self.src / "dir1" / "file4", "wb") as fh: + fh.write(os.urandom(size)) + + def test_dtar_create(self): + self.add_data_in_files() + proc = self.run_dtar() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + self.assertArchiveBasicTree() + + def test_dtar_create_algo_chunk(self): + self.add_data_in_files() + proc = self.run_dtar(env={"MFU_FLIST_ARCHIVE_CREATE": "CHUNK"}) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + self.assertArchiveBasicTree() + + def test_dtar_create_algo_libcircle(self): + self.add_data_in_files() + proc = self.run_dtar(env={"MFU_FLIST_ARCHIVE_CREATE": "LIBCIRCLE"}) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + self.assertArchiveBasicTree() + + def test_dtar_extract(self): + self.run_dtar() + proc = self.run_dtar(extract=True) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Creating 2 directories + Creating 4 files + Creating 2 symlinks + Creating 1 hardlinks + """ + ), + ) + self.archive.unlink() + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + + def test_dtar_extract_algo_libarchive(self): + self.run_dtar() + proc = self.run_dtar( + extract=True, env={"MFU_FLIST_ARCHIVE_EXTRACT": "LIBARCHIVE"} + ) + # With this algorithm, index file *.tar.dtaridx is counted. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 10 + """ + ), + ) + self.archive.unlink() + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + + def test_dtar_extract_algo_libarchive_idx(self): + self.run_dtar() + proc = self.run_dtar( + extract=True, env={"MFU_FLIST_ARCHIVE_EXTRACT": "LIBARCHIVE_IDX"} + ) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + """ + ), + ) + self.archive.unlink() + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + + def test_dtar_extract_algo_chunk(self): + self.run_dtar() + proc = self.run_dtar( + extract=True, env={"MFU_FLIST_ARCHIVE_EXTRACT": "CHUNK"} + ) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + """ + ), + ) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Creating 2 directories + Creating 4 files + Creating 2 symlinks + Creating 1 hardlinks + """ + ), + ) + self.archive.unlink() + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + + def test_dtar_extract_algo_libcircle(self): + self.run_dtar() + proc = self.run_dtar( + extract=True, env={"MFU_FLIST_ARCHIVE_EXTRACT": "LIBCIRCLE"} + ) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + """ + ), + ) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Creating 2 directories + Creating 4 files + Creating 2 symlinks + Creating 1 hardlinks + """ + ), + ) + self.archive.unlink() + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + + def test_dtar_preserve_xattrs(self): + self.add_data_in_files() + # add xattr + xattr.setxattr(self.src / "file1", "user.xdg.comment", "test".encode()) + original_xattrs = xattr.listxattr(self.src / "file1") + original_value = xattr.getxattr(self.src / "file1", "user.xdg.comment") + self.run_dtar(preserve_xattrs=True) + # check archive can be read by python standard library + self.assertArchiveBasicTree() + self.run_dtar(extract=True, preserve_xattrs=True) + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + new_xattrs = xattr.listxattr(self.dst / self.src.name / "file1") + new_value = xattr.getxattr( + self.dst / self.src.name / "file1", "user.xdg.comment" + ) + self.assertCountEqual(original_xattrs, new_xattrs) + self.assertEqual(original_value, new_value) + + def test_dtar_preserve_acls(self): + self.add_data_in_files() + # add posix ACL + self.run_cmd(["setfacl", "-m", "user:root:r", self.src / "file1"]) + original_xattrs = xattr.listxattr(self.src / "file1") + original_acl = xattr.getxattr( + self.src / "file1", "system.posix_acl_access" + ) + self.run_dtar(preserve_acls=True) + # check archive can be read by python standard library + self.assertArchiveBasicTree() + self.run_dtar(extract=True, preserve_acls=True) + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + new_xattrs = xattr.listxattr(self.dst / self.src.name / "file1") + new_acl = xattr.getxattr( + self.dst / self.src.name / "file1", "system.posix_acl_access" + ) + self.assertCountEqual(original_xattrs, new_xattrs) + self.assertEqual(original_acl, new_acl) + + def test_dtar_preserve_flags(self): + self.add_data_in_files() + # add noatime flag + self.run_cmd(["chattr", "+A", self.src / "file1"]) + self.run_dtar(preserve_flags=True) + # check archive can be read by python standard library + self.assertArchiveBasicTree() + self.run_dtar(extract=True, preserve_flags=True) + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) + output = self.run_cmd(["lsattr", self.dst / self.src.name / "file1"]) + self.assertIn("A", output.stdout.decode().split(" ")[0]) + + +class TestDtarDfilemaker(TestDtar): + + def setUp(self): + super().setUp() + self.run_dfilemaker() + + def test_dtar(self): + # Create and extract an archive with tree generated by dfilemaker and + # compare. + self.run_dtar() + self.run_dtar(extract=True) + self.assertSrcDstEqual(ignore_mtime=True, dest=self.dst / self.src.name) diff --git a/test/tests/test_dwalk.py b/test/tests/test_dwalk.py new file mode 100644 index 000000000..ac117e6dd --- /dev/null +++ b/test/tests/test_dwalk.py @@ -0,0 +1,176 @@ +#!/usr/bin/python3 + +import tempfile +import os +import typing as t +from pathlib import Path +import textwrap + +from .lib import ( + TestFileTreeCmp, + dwalk_cmd, + create_basic_layout, +) + + +class TestDwalk(TestFileTreeCmp): + + def run_dwalk( + self, + output: t.Optional[Path] = None, + input: t.Optional[Path] = None, + lite: bool = False, + text: bool = False, + ): + cmd = dwalk_cmd() + if output: + cmd += ["--output", output] + if input: + cmd += ["--input", input] + if lite: + cmd.append("--lite") + if text: + cmd.append("--text") + if not input: + cmd.append(self.src) + return self.run_cmd(cmd) + + +class TestDwalkBasic(TestDwalk): + + def setUp(self): + super().setUp() + oldmask = os.umask(0o022) + create_basic_layout(self.src) + os.umask(oldmask) + + def test_walk(self): + proc = self.run_dwalk() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + + def test_walk_output(self): + # Create and delete a temporary file but keep its name. + with tempfile.NamedTemporaryFile() as fh: + output = Path(fh.name) + proc = self.run_dwalk(output=output) + # Check dwalk has created file + self.assertTrue(output.exists()) + output.unlink() + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + + def test_walk_output_lite(self): + # Create and delete a temporary file but keep its name. + with tempfile.NamedTemporaryFile() as fh: + output = Path(fh.name) + proc = self.run_dwalk(output=output, lite=True) + # dwalk with --lite do not call stat(), it is unable to detect hardlinks. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 0 + """ + ), + ) + # Check dwalk has created file + self.assertTrue(output.exists()) + with open(output) as fh: + content = fh.read() + output.unlink() + for entry in [ + f"{self.src}|D", + f"{self.src}/file3|F", + f"{self.src}/hardlink3|F", + f"{self.src}/file2|F", + f"{self.src}/file1|F", + f"{self.src}/symlink2|L", + f"{self.src}/dir1|D", + f"{self.src}/dir1/file4|F", + f"{self.src}/dir1/symlink1|L", + ]: + self.assertIn(entry, content) + + def test_walk_output_text(self): + with tempfile.NamedTemporaryFile(mode="w+") as fh: + output = Path(fh.name) + self.run_dwalk(output=output, text=True) + # Check dwalk has created file + self.assertTrue(output.exists()) + with open(output) as fh: + content = fh.read() + + for entry in [ + rf"drwx------ .* {self.src}", + rf"drwxr-xr-x .* {self.src}/dir1", + rf"lrwxrwxrwx .* {self.src}/dir1/symlink1", + rf"-rw-r--r-- .* {self.src}/dir1/file4", + rf"lrwxrwxrwx .* {self.src}/symlink2", + rf"-rw-r--r-- .* {self.src}/file1", + rf"-rw-r--r-- .* {self.src}/file2", + rf"-rw-r--r-- .* {self.src}/file3", + rf"-rw-r--r-- .* {self.src}/hardlink3", + ]: + self.assertRegex(content, entry) + + def test_walk_input(self): + with tempfile.NamedTemporaryFile(mode="w+") as fh: + cache = Path(fh.name) + self.run_dwalk(output=cache) + proc = self.run_dwalk(input=cache) + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 4 + Links: 2 + Hardlinks: 1 + """ + ), + ) + + def test_walk_input_lite(self): + with tempfile.NamedTemporaryFile(mode="w+") as fh: + cache = Path(fh.name) + self.run_dwalk(output=cache, lite=True) + proc = self.run_dwalk(input=cache) + # lite cache do not contain hardlinks, then dwalk misses it. + self.assertInProcStdout( + proc, + textwrap.dedent( + """ + Items: 9 + Directories: 2 + Files: 5 + Links: 2 + Hardlinks: 0 + """ + ), + ) From 26ac76e55a550ccd35de4a0e02de8e1ef41117dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Palancher?= Date: Fri, 21 Feb 2025 15:22:44 +0100 Subject: [PATCH 9/9] ci: workflow to run tests in github actions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add continuous integration workflow to build and install lwgrp, libcircle, dtcmp and mpifileutils and execute Python test suite in github actions for all pull requests and merges in main branch. Signed-off-by: Rémi Palancher --- .github/workflows/ci.yml | 117 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..775657717 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,117 @@ +--- +name: Build & tests +on: + push: + branches: + - main + pull_request: + branches: + - main + workflow_dispatch: {} + +env: + # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.) + BUILD_TYPE: Release + MFU_BIN: ${{github.workspace}}/build/bin + MFU_MPIRUN_ARGS: --mca mpi_abort_print_stack 1 --bind-to none --oversubscribe -N 8 + +jobs: + build: + runs-on: ubuntu-latest + steps: + + - name: Install tests dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential libopenmpi-dev libattr1-dev libarchive-dev python3-pytest python3-xattr e2fsprogs + + # + # lwgrp + # + - name: lwgrp checkout + uses: actions/checkout@v4 + with: + repository: 'LLNL/lwgrp' + ref: 'v1.0.6' + path: 'lwgrp' + - name: lwgrp autogen + run: ./autogen.sh + working-directory: lwgrp + - name: lwgrp configure + run: ./configure --prefix=${{github.workspace}}/build --disable-static + working-directory: lwgrp + - name: lwgrp make + run: make + working-directory: lwgrp + - name: lwgrp make install + run: make install + working-directory: lwgrp + + + # + # libcircle + # + - name: libcircle checkout + uses: actions/checkout@v4 + with: + repository: 'hpc/libcircle' + ref: 'v0.3' + path: 'libcircle' + - name: libcircle autogen + run: ./autogen.sh + working-directory: libcircle + - name: libcircle configure + run: ./configure --prefix=${{github.workspace}}/build --disable-static + working-directory: libcircle + - name: libcircle make + run: make + working-directory: libcircle + - name: libcircle make install + run: make install + working-directory: libcircle + + # + # dtcmp + # + - name: dtcmp checkout + uses: actions/checkout@v4 + with: + repository: 'LLNL/dtcmp' + ref: 'v1.1.5' + path: 'dtcmp' + - name: dtcmp autogen + run: ./autogen.sh + working-directory: dtcmp + - name: dtcmp configure + run: ./configure --prefix=${{github.workspace}}/build --with-lwgrp=${{github.workspace}}/build --disable-static + working-directory: dtcmp + - name: dtcmp make + run: make + working-directory: dtcmp + - name: dtcmp make install + run: make install + working-directory: dtcmp + + # + # mpifileutils + # + - uses: actions/checkout@v4 + with: + path: 'mpifileutils' + - name: Configure CMake + run: > + cmake + -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/build + -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} + -DDTCMP_INCLUDE_DIRS=${{github.workspace}}/build/include + -DDTCMP_LIBRARIES=${{github.workspace}}/build/lib/libdtcmp.so + -DLibCircle_INCLUDE_DIRS=${{github.workspace}}/build/include + -DLibCircle_LIBRARIES=${{github.workspace}}/build/lib/libcircle.so + working-directory: mpifileutils + - name: Build + run: cmake --build mpifileutils --config ${{env.BUILD_TYPE}} + - name: Install + run: cmake --install mpifileutils + - name: Run tests + run: pytest + working-directory: mpifileutils