8000 feat: backup support paused type by zhuyi1159 · Pull Request #9203 · apecloud/kubeblocks · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

feat: backup support paused type #9203

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion apis/dataprotection/v1alpha1/backup_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ const (

// BackupPhase describes the lifecycle phase of a Backup.
// +enum
// +kubebuilder:validation:Enum={New,InProgress,Running,Completed,Failed,Deleting}
// +kubebuilder:validation:Enum={New,InProgress,Running,Completed,Failed,Deleting,Paused}
type BackupPhase string

const (
Expand All @@ -264,6 +264,9 @@ const (

// BackupPhaseDeleting means the backup and all its associated data are being deleted.
BackupPhaseDeleting BackupPhase = "Deleting"

// BackupPhasePaused means the backup is paused.
BackupPhasePaused BackupPhase = "Paused"
)

type ActionStatus struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,7 @@ spec:
- Completed
- Failed
- Deleting
- Paused
type: string
startTimestamp:
description: |-
Expand Down
66 changes: 66 additions & 0 deletions controllers/dataprotection/backup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ func (r *BackupReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr
return r.handleCompletedPhase(reqCtx, backup)
case dpv1alpha1.BackupPhaseDeleting:
return r.handleDeletingPhase(reqCtx, backup)
case dpv1alpha1.BackupPhasePaused:
return r.handlePausedPhase(reqCtx, backup)
case dpv1alpha1.BackupPhaseFailed:
if backup.Labels[dptypes.BackupTypeLabelKey] == string(dpv1alpha1.BackupTypeContinuous) {
if backup.Status.StartTimestamp.IsZero() {
Expand Down Expand Up @@ -799,6 +801,70 @@ func (r *BackupReconciler) deleteRelatedBackups(
return nil
}

func (r *BackupReconciler) handlePausedPhase(
reqCtx intctrlutil.RequestCtx,
backup *dpv1alpha1.Backup) (ctrl.Result, error) {

// 1. 挂起所有关联的备份Job
if requeue, err := r.suspendBackupJobs(reqCtx, backup); err != nil {
return intctrlutil.RequeueWithError(err, reqCtx.Log, "failed to suspend backup jobs")
} else if requeue {
return intctrlutil.RequeueAfter(5*time.Second, reqCtx.Log, "waiting for job suspension")
}

// 2. 更新备份状态为Paused(如果尚未更新)
if backup.Status.Phase != dpv1alpha1.BackupPhasePaused {
patch := client.MergeFrom(backup.DeepCopy())
backup.Status.Phase = dpv1alpha1.BackupPhasePaused
backup.Status.CompletionTimestamp = &metav1.Time{Time: r.clock.Now().UTC()}
// 记录暂停持续时间
if backup.Status.StartTimestamp != nil {
duration := backup.Status.CompletionTimestamp.Sub(backup.Status.StartTimestamp.Time).Round(time.Second)
backup.Status.Duration = &metav1.Duration{Duration: duration}
}
if err := r.Client.Status().Patch(reqCtx.Ctx, backup, patch); err != nil {
return intctrlutil.CheckedRequeueWithError(err, reqCtx.Log, "")
}
r.Recorder.Event(backup, corev1.EventTypeNormal, "BackupPaused", "Backup jobs have been suspended")
}

return intctrlutil.Reconciled()
}

// suspendBackupJobs 挂起所有关联的Job但不删除
func (r *BackupReconciler) suspendBackupJobs(
reqCtx intctrlutil.RequestCtx,
backup *dpv1alpha1.Backup) (bool, error) {

// 获取所有关联的Job
jobList := &batchv1.JobList{}
labels := dpbackup.BuildBackupWorkloadLabels(backup)
if err := r.Client.List(reqCtx.Ctx, jobList,
client.InNamespace(backup.Namespace),
client.MatchingLabels(labels)); err != nil {
return false, fmt.Errorf("failed to list backup jobs: %w", err)
}

var needRequeue bool
for _, job := range jobList.Items {
// 如果已经是挂起状态则跳过
if job.Spec.Suspend != nil && *job.Spec.Suspend {
continue
}

// 更新Job为挂起状态
patch := client.MergeFrom(job.DeepCopy())
job.Spec.Suspend = pointer.Bool(true)
if err := r.Client.Patch(reqCtx.Ctx, &job, patch); err != nil {
return false, fmt.Errorf("failed to suspend job %s: %w", job.Name, err)
}
reqCtx.Log.Info("suspended backup job", "job", job.Name)
needRequeue = true // 需要等待Job状态更新
}

return needRequeue, nil
}

// PatchBackupObjectMeta patches backup object metaObject include cluster snapshot.
func PatchBackupObjectMeta(
original *dpv1alpha1.Backup,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,7 @@ spec:
- Completed
- Failed
- Deleting
- Paused
type: string
startTimestamp:
description: |-
Expand Down
54 changes: 53 additions & 1 deletion pkg/operations/stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
"sigs.k8s.io/controller-runtime/pkg/client"

appsv1 "github.com/apecloud/kubeblocks/apis/apps/v1"
dpv1alpha1 "github.com/apecloud/kubeblocks/apis/dataprotection/v1alpha1"
opsv1alpha1 "github.com/apecloud/kubeblocks/apis/operations/v1alpha1"
"github.com/apecloud/kubeblocks/pkg/constant"
intctrlcomp "github.com/apecloud/kubeblocks/pkg/controller/component"
intctrlutil "github.com/apecloud/kubeblocks/pkg/controllerutil"
)
Expand Down Expand Up @@ -129,10 +131,60 @@
return expectProgressCount, completedCount, nil
}
compOpsHelper := newComponentOpsHelper(opsRes.OpsRequest.Spec.StopList)
return compOpsHelper.reconcileActionWithComponentOps(reqCtx, cli, opsRes, "stop", handleComponentProgress)

phase, duration, err := compOpsHelper.reconcileActionWithComponentOps(reqCtx, cli, opsRes, "stop", handleComponentProgress)

// 新增逻辑:当集群进入停止状态时暂停相关备份
if opsRes.Cluster.Status.Phase == appsv1.StoppingClusterPhase || opsRes.Cluster.Status.Phase == appsv1.StoppedClusterPhase {
if err := pauseRelatedBackups(reqCtx, cli, opsRes.Cluster); err != nil {
return opsv1alpha1.OpsFailedPhase, 0, err
}
}

return phase, duration, err
//return compOpsHelper.reconcileActionWithComponentOps(reqCtx, cli, opsRes, "stop", handleComponentProgress)

Check failure on line 145 in pkg/operations/stop.go

View workflow job for this annotation

GitHub Actions / push-pre-check (lint)

commentFormatting: put a space between `//` and comment text (gocritic)
}

// SaveLastConfiguration records last configuration to the OpsRequest.status.lastConfiguration
func (stop StopOpsHandler) SaveLastConfiguration(reqCtx intctrlutil.RequestCtx, cli client.Client, opsRes *OpsResource) error {
return nil
}

// pauseRelatedBackups 暂停与集群关联的所有运行中的备份
func pauseRelatedBackups(reqCtx intctrlutil.RequestCtx, cli client.Client, cluster *appsv1.Cluster) error {
// 1. 通过标签获取关联的所有Backup资源
backupList := &dpv1alpha1.BackupList{}
labels := client.MatchingLabels{
constant.AppInstanceLabelKey: cluster.Name, // 假设Backup使用该标签关联Cluster
}
if err := cli.List(reqCtx.Ctx, backupList, client.InNamespace(cluster.Namespace), labels); err != nil {
return err
}

// 2. 过滤出需要暂停的备份
var needUpdateBackups []*dpv1alpha1.Backup
for i := range backupList.Items {
backup := &backupList.Items[i]
if backup.Status.Phase == dpv1alpha1.BackupPhaseRunning {
needUpdateBackups = append(needUpdateBackups, backup)
}
}

// 3. 批量更新备份状态为Paused
for _, backup := range needUpdateBackups {
patch := client.MergeFrom(backup.DeepCopy())
backup.Status.Phase = dpv1alpha1.BackupPhasePaused
backup.Status.CompletionTimestamp = &metav1.Time{Time: time.Now()}
if backup.Status.StartTimestamp != nil {
duration := backup.Status.CompletionTimestamp.Sub(backup.Status.StartTimestamp.Time).Round(time.Second)
backup.Status.Duration = &metav1.Duration{Duration: duration}
}
if err := cli.Status().Patch(reqCtx.Ctx, backup, patch); err != nil {
return err
}
reqCtx.Log.Info("paused backup due to cluster stopping",
"backup", client.ObjectKeyFromObject(backup),
"cluster", cluster.Name)
}
return nil
}
Loading
0