8000 `e2e` provider is extended with `Disconnect`, `Reconnect` and `CheckUpgraded` by sergio-mena · Pull Request #852 · cometbft/cometbft · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

e2e provider is extended with Disconnect, Reconnect and CheckUpgraded #852

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 81 commits into from
May 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
85ff1f2
shim in start and stop methods
williambanfield Nov 29, 2022
5ef7449
rename to 'tendermint'
williambanfield Nov 29, 2022
14bd151
rename to 'kill tendermint'
williambanfield Nov 29, 2022
1a84bf9
add terminate vs kill
williambanfield Nov 29, 2022
dd572d4
add command package
williambanfield Nov 29, 2022
b0f8503
add param names
williambanfield Nov 29, 2022
b762c87
switch exec compose to docker pkg
williambanfield Nov 29, 2022
c72f7c0
all exec switched to docker package
williambanfield Nov 29, 2022
bd31f70
remove runner exec
williambanfield Nov 29, 2022
91fa4f3
functionality in place for start kill terminate
williambanfield Nov 29, 2022
62e4fb0
exec compose takes context
williambanfield Nov 29, 2022
b2740d3
compose verbose takes context
williambanfield Nov 29, 2022
f0f98c8
docker.exec takes context
williambanfield Nov 29, 2022
3a8ffa5
add create node to infra provider
williambanfield Nov 30, 2022
558268e
start and create calls replaced with infra provider
williambanfield Nov 30, 2022
af8d46e
commit exec pkg
williambanfield Nov 30, 2022
419d0d7
add ssh pkg
williambanfield Nov 30, 2022
31a37ee
add digital ocean package
williambanfield Nov 30, 2022
fcb9256
add client creation logic
williambanfield Nov 30, 2022
5e8028d
use ssh agent to auth to remote
williambanfield Nov 30, 2022
46ab7a3
plump infra data into tests
williambanfield Nov 30, 2022
0197fec
move agent socket dial to client connection creation
williambanfield Nov 30, 2022
b1c65de
error if auth sock not defined
williambanfield Nov 30, 2022
01c896d
allow unknown hosts to be accessed
williambanfield Nov 30, 2022
d2ef43b
first attempt at disconnect
williambanfield Dec 1, 2022
f9799ca
reject with reset
williambanfield Dec 1, 2022
e49f6ae
shim in start and stop methods
williambanfield Nov 29, 2022
1a831b2
rename to 'tendermint'
williambanfield Nov 29, 2022
64b0340
rename to 'kill tendermint'
williambanfield Nov 29, 2022
23cb6ad
add terminate vs kill
williambanfield Nov 29, 2022
6117ad3
add command package
williambanfield Nov 29, 2022
11ca11a
add param names
williambanfield Nov 29, 2022
241c96a
switch exec compose to docker pkg
williambanfield Nov 29, 2022
55c0e47
all exec switched to docker package
williambanfield Nov 29, 2022
6389d3d
remove runner exec
williambanfield Nov 29, 2022
6415af6
functionality in place for start kill terminate
williambanfield Nov 29, 2022
be8d980
exec compose takes context
williambanfield Nov 29, 2022
eb46fdc
compose verbose takes context
williambanfield Nov 29, 2022
9e4a246
docker.exec takes context
williambanfield Nov 29, 2022
eec848b
add create node to infra provider
williambanfield Nov 30, 2022
0cafe89
start and create calls replaced with infra provider
williambanfield Nov 30, 2022
db7b4a0
commit exec pkg
williambanfield Nov 30, 2022
e009a2c
add ssh pkg
williambanfield Nov 30, 2022
b0be713
add digital ocean package
williambanfield Nov 30, 2022
b7fccf0
add client creation logic
williambanfield Nov 30, 2022
de20e3d
use ssh agent to auth to remote
williambanfield Nov 30, 2022
1a2afa6
plump infra data into tests
williambanfield Nov 30, 2022
9c8c6c4
move agent socket dial to client connection creation
williambanfield Nov 30, 2022
3cfb90f
error if auth sock not defined
williambanfield Nov 30, 2022
5a6b3ab
allow unknown hosts to be accessed
williambanfield Nov 30, 2022
8ec6013
return session error
williambanfield Dec 1, 2022
59f93e9
close connection in ssh exec
williambanfield Dec 1, 2022
2bcc84e
gofmt
williambanfield Dec 1, 2022
ca78052
use IP.Equal instead of bytes.Equal
williambanfield Dec 1, 2022
644a93a
correct the nolint comment format
williambanfield Dec 1, 2022
d5119c2
remove backup for INFRASTRUCTURE_DATA missing
williambanfield Dec 5, 2022
143109e
Update test/e2e/pkg/infra/digitalocean/digitalocean.go
williambanfield Dec 5, 2022
4dd0ff5
remove hostkeyalgorithm setting
williambanfield Dec 5, 2022
ac539b1
rename IP node field
williambanfield Dec 5, 2022
8e3f608
add external address field
williambanfield Dec 5, 2022
e22b61a
use external IP in digital ocean
williambanfield Dec 5, 2022
ad9c314
Merge remote-tracking branch 'origin/main' into wb/issue-9790
williambanfield Dec 5, 2022
63b5b09
Merge remote-tracking branch 'origin/main' into wb/issue-9790
williambanfield Dec 8, 2022
64a65c7
use correct variable in e2e compose
williambanfield Dec 8, 2022
3d5b4f7
Merge branch 'main' into wb/issue-9790
williambanfield Dec 8, 2022
5353c6c
Merge branch 'main' into wb/e2e-do-disconnected
lasarojc Dec 19, 2022
f7c6821
Merge branch 'main' into sergio/58-perturb
sergio-mena May 5, 2023
afcf55d
Merge branch 'main' into sergio/58-perturb
sergio-mena May 17, 2023
384f3ae
fix node name for the case '*_u' (upgraded)
sergio-mena May 17, 2023
51db8ec
Adapt (dis)connection logic to ansible
sergio-mena May 17, 2023
e6348f0
Fixes
sergio-mena May 17, 2023
c418e52
Make check for upgrade provider-dependent
sergio-mena May 17, 2023
c14e083
fix `iptables` formatting
sergio-mena May 17, 2023
9f8c426
Fix iptables command
sergio-mena May 17, 2023
f139ba2
revert me
sergio-mena May 17, 2023
416dd20
Keep every playbook used in a different file for troubleshooting
sergio-mena May 17, 2023
b6d4751
Fixed bug that was swapping disconnect and reconnect
sergio-mena May 17, 2023
49012a1
Rename `Connect` to `Reconnect`
sergio-mena May 17, 2023
49e4390
Update test/e2e/pkg/infra/digitalocean/digitalocean.go
sergio-mena May 17, 2023
99df9ec
Merge branch 'main' into sergio/58-perturb
sergio-mena May 17, 2023
378abc9
Merge branch 'main' into sergio/58-perturb
sergio-mena May 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 81 additions & 17 deletions test/e2e/pkg/infra/digitalocean/digitalocean.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"os"
"path/filepath"
"strconv"
"strings"

e2e "github.com/cometbft/cometbft/test/e2e/pkg"
Expand All @@ -24,33 +25,63 @@ func (p *Provider) Setup() error {
return nil
}

const ymlSystemd = "systemd-action.yml"
var ymlPlaybookSeq int

func getNextPlaybookFilename() string {
const ymlPlaybookAction = "playbook-action"
ymlPlaybookSeq++
return ymlPlaybookAction + strconv.Itoa(ymlPlaybookSeq) + ".yml"
}
Comment on lines +30 to +34
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this so that playbooks don't clash with one another?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's for troubleshooting, a late addition to this PR. I wanted to see what actual playbook had been generated, but the runner was overwriting the file with new playbooks.
This way we keep the history, and can inspect.
All these files are part of the "testnet" dir, so they will be wiped out on the next run (or upon cleanup). But, if we see that the number of files is very large for a run (not the case now), we could extend this to only produce, e.g., N files and use them in round-robin.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I don't think we need to optimize this. I was just double-checking my understanding.


func (p Provider) StartNodes(ctx context.Context, nodes ...*e2e.Node) error {
nodeIPs := make([]string, len(nodes))
for i, n := range nodes {
nodeIPs[i] = n.ExternalIP.String()
}
if err := p.writePlaybook(ymlSystemd, true); err != nil {
playbook := ansibleSystemdBytes(true)
playbookFile := getNextPlaybookFilename()
if err := p.writePlaybook(playbookFile, playbook); err != nil {
return err
}

return execAnsible(ctx, p.Testnet.Dir, ymlSystemd, nodeIPs)
return execAnsible(ctx, p.Testnet.Dir, playbookFile, nodeIPs)
}
func (p Provider) StopTestnet(ctx context.Context) error {
nodeIPs := make([]string, len(p.Testnet.Nodes))
for i, n := range p.Testnet.Nodes {
nodeIPs[i] = n.ExternalIP.String()
}

if err := p.writePlaybook(ymlSystemd, false); err != nil {
playbook := ansibleSystemdBytes(false)
playbookFile := getNextPlaybookFilename()
if err := p.writePlaybook(playbookFile, playbook); err != nil {
return err
}
return execAnsible(ctx, p.Testnet.Dir, playbookFile, nodeIPs)
}
func (p Provider) Disconnect(ctx context.Context, _ string, ip string) error {
playbook := ansiblePerturbConnectionBytes(true)
playbookFile := getNextPlaybookFilename()
if err := p.writePlaybook(playbookFile, playbook); err != nil {
return err
}
return execAnsible(ctx, p.Testnet.Dir, ymlSystemd, nodeIPs)
return execAnsible(ctx, p.Testnet.Dir, playbookFile, []string{ip})
}
func (p Provider) Reconnect(ctx context.Context, _ string, ip string) error {
playbook := ansiblePerturbConnectionBytes(false)
playbookFile := getNextPlaybookFilename()
if err := p.writePlaybook(playbookFile, playbook); err != nil {
return err
}
return execAnsible(ctx, p.Testnet.Dir, playbookFile, []string{ip})
}

func (p Provider) CheckUpgraded(_ context.Context, node *e2e.Node) (string, bool, error) {
// Upgrade not supported yet by DO provider
return node.Name, false, nil
}

func (p Provider) writePlaybook(yaml string, starting bool) error {
playbook := ansibleSystemdBytes(starting)
func (p Provider) writePlaybook(yaml, playbook string) error {
//nolint: gosec
// G306: Expect WriteFile permissions to be 0600 or less
err := os.WriteFile(filepath.Join(p.Testnet.Dir, yaml), []byte(playbook), 0o644)
Expand All @@ -60,25 +91,58 @@ func (p Provider) writePlaybook(yaml string, starting bool) error {
return nil
}

// file as bytes to be written out to disk.
// ansibleStartBytes generates an Ansible playbook to start the network
func ansibleSystemdBytes(starting bool) string {
startStop := "stopped"
if starting {
startStop = "started"
}
playbook := fmt.Sprintf(`- name: start/stop testapp
const basePlaybook = `- name: e2e custom playbook
Copy link
Contributor
@thanethomson thanethomson May 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I'm pretty confident that this would work, it is rather difficult to know what the final playbook for each provider method would look like (making it more difficult to debug in case something goes wrong).

Would it not make it more readable to define a template per provider method (e.g. one for StartNodes, one for StopTestnet, one for Reconnect, etc.) and simply parameterize the parts that need to be variable?

6D40
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

making it more difficult to debug in case something goes wrong

Regarding this, check my comment above

Yes, I thought about using the template library, but in the end I decided to go simple. As we add more logic from qa-infra, we can evaluate whether to move over to templates.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interestingly I would consider using the template library to be the simpler option from a coding/debugging perspective. My concern's not so much with the generated code as it is with the Go code and the strings within the Go code.

For instance, some of the string concatenations are performed with strings with indentation where that indentation has meaning. It's usually far easier to reason about and extend when the whole template's visible, like with the Docker compose template:

tmpl, err := template.New("docker-compose").Parse(`version: '2.4'
networks:
{{ .Name }}:
labels:
e2e: true
driver: bridge
{{- if .IPv6 }}
enable_ipv6: true
{{- end }}
ipam:
driver: default
config:
- subnet: {{ .IP }}
services:
{{- range .Nodes }}
{{ .Name }}:
labels:
e2e: true
container_name: {{ .Name }}
image: {{ .Version }}
{{- if or (eq .ABCIProtocol "builtin") (eq .ABCIProtocol "builtin_unsync") }}
entrypoint: /usr/bin/entrypoint-builtin
{{- end }}
init: true
ports:
- 26656
- {{ if .ProxyPort }}{{ .ProxyPort }}:{{ end }}26657
{{- if .PrometheusProxyPort }}
- {{ .PrometheusProxyPort }}:26660
{{- end }}
- 6060 9E88
volumes:
- ./{{ .Name }}:/cometbft
- ./{{ .Name }}:/tendermint
networks:
{{ $.Name }}:
ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .InternalIP }}
{{- if ne .Version $.UpgradeVersion}}
{{ .Name }}_u:
labels:
e2e: true
container_name: {{ .Name }}_u
image: {{ $.UpgradeVersion }}
{{- if or (eq .ABCIProtocol "builtin") (eq .ABCIProtocol "builtin_unsync") }}
entrypoint: /usr/bin/entrypoint-builtin
{{- end }}
init: true
ports:
- 26656
- {{ if .ProxyPort }}{{ .ProxyPort }}:{{ end }}26657
{{- if .PrometheusProxyPort }}
- {{ .PrometheusProxyPort }}:26660
{{- end }}
- 6060
volumes:
- ./{{ .Name }}:/cometbft
- ./{{ .Name }}:/tendermint
networks:
{{ $.Name }}:
ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .InternalIP }}
{{- end }}
{{end}}`)

For the current code, we need to now know that some strings require indentation with 4 spaces, others with 2 spaces, etc., making it more difficult to debug/extend since you can't see those strings within their intended context.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You've got a point with the indentation. Current approach is not the best for that, I agree.
There's also another consideration that made me lean toward the current approach (for the time being). Playbooks have two parts:

  • the initial part which is kind of static, so it renders itself well for templating
  • the tasks, which are additive, so it's less obvious to me how a single template would help (unless you duplicate contents in many templates, as we have it in qa-infra)

Maybe, a solution that represents a best balance would be to have a template for the beginning, and then other templates, one for each of the different kind of tasks we support (shell, systemd, apt, etc).

However, I'd like to defer this to the next round of improvements.

hosts: all
gather_facts: yes
vars:
ansible_host_key_checking: false

tasks:
- name: operate on the systemd-unit
ansible.builtin.systemd:
`

func ansibleAddTask(playbook, name, contents string) string {
return playbook + " - name: " + name + "\n" + contents
}

func ansibleAddSystemdTask(playbook string, starting bool) string {
startStop := "stopped"
if starting {
startStop = "started"
}
contents := fmt.Sprintf(` ansible.builtin.systemd:
name: testappd
state: %s
enabled: yes`, startStop)

return ansibleAddTask(pl F438 aybook, "operate on the systemd-unit", contents)
}

func ansibleAddShellTasks(playbook, name string, shells ...string) string {
for _, shell := range shells {
contents := fmt.Sprintf(" shell: \"%s\"\n", shell)
playbook = ansibleAddTask(playbook, name, contents)
}
return playbook
}

// file as bytes to be written out to disk.
// ansibleStartBytes generates an Ansible playbook to start the network
func ansibleSystemdBytes(starting bool) string {
return ansibleAddSystemdTask(basePlaybook, starting)
}

func ansiblePerturbConnectionBytes(disconnect bool) string {
disconnecting := "reconnect"
op := "-D"
if disconnect {
disconnecting = "disconnect"
op = "-A"
}
playbook := basePlaybook
for _, dir := range []string{"INPUT", "OUTPUT"} {
playbook = ansibleAddShellTasks(playbook, disconnecting+" node",
fmt.Sprintf("iptables %s %s -p tcp --dport 26656 -j DROP", op, dir))
}
return playbook
}

Expand Down
21 changes: 21 additions & 0 deletions test/e2e/pkg/infra/docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,27 @@ func (p Provider) StartNodes(ctx context.Context, nodes ...*e2e.Node) error {
func (p Provider) StopTestnet(ctx context.Context) error {
return ExecCompose(ctx, p.Testnet.Dir, "down")
}
func (p Provider) Disconnect(ctx context.Context, name string, _ string) error {
return Exec(ctx, "network", "disconnect", p.Testnet.Name+"_"+p.Testnet.Name, name)
}
func (p Provider) Reconnect(ctx context.Context, name string, _ string) error {
return Exec(ctx, "network", "connect", p.Testnet.Name+"_"+p.Testnet.Name, name)
}

func (p Provider) CheckUpgraded(ctx context.Context, node *e2e.Node) (string, bool, error) {
testnet := node.Testnet
out, err := ExecComposeOutput(ctx, testnet.Dir, "ps", "-q", node.Name)
if err != nil {
return "", false, err
}
name := node.Name
upgraded := false
if len(out) == 0 {
name = name + "_u"
upgraded = true
}
return name, upgraded, nil
}

// dockerComposeBytes generates a Docker Compose config file for a testnet and returns the
// file as bytes to be written out to disk.
Expand Down
10 changes: 10 additions & 0 deletions test/e2e/pkg/infra/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,18 @@ type Provider interface {
// Stops the whole network
StopTestnet(context.Context) error

// Disconnects the node from the network
Disconnect(context.Context, string, string) error

// Reconnects the node to the network.
// This should only be called after Disconnect
Reconnect(context.Context, string, string) error

// Returns the the provider's infrastructure data
GetInfrastructureData() *e2e.InfrastructureData

// Checks whether the node has been upgraded in this run
CheckUpgraded(context.Context, *e2e.Node) (string, bool, error)
}

type ProviderData struct {
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/runner/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ func NewCLI() *CLI {
}

if cli.testnet.HasPerturbations() {
if err := Perturb(cmd.Context(), cli.testnet); err != nil {
if err := Perturb(cmd.Context(), cli.testnet, cli.infp); err != nil {
return err
}
if err := Wait(cmd.Context(), cli.testnet, 5); err != nil { // allow some txs to go through
Expand Down Expand Up @@ -209,7 +209,7 @@ func NewCLI() *CLI {
Use: "perturb",
Short: "Perturbs the testnet, e.g. by restarting or disconnecting nodes",
RunE: func(cmd *cobra.Command, args []string) error {
return Perturb(cmd.Context(), cli.testnet)
return Perturb(cmd.Context(), cli.testnet, cli.infp)
},
})

Expand Down
20 changes: 9 additions & 11 deletions test/e2e/runner/perturb.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@ import (
"github.com/cometbft/cometbft/libs/log"
rpctypes "github.com/cometbft/cometbft/rpc/core/types"
e2e "github.com/cometbft/cometbft/test/e2e/pkg"
"github.com/cometbft/cometbft/test/e2e/pkg/infra"
"github.com/cometbft/cometbft/test/e2e/pkg/infra/docker"
)

// Perturbs a running testnet.
func Perturb(ctx context.Context, testnet *e2e.Testnet) error {
func Perturb(ctx context.Context, testnet *e2e.Testnet, ifp infra.Provider) error {
for _, node := range testnet.Nodes {
for _, perturbation := range node.Perturbations {
_, err := PerturbNode(ctx, node, perturbation)
_, err := PerturbNode(ctx, node, perturbation, ifp)
if err != nil {
return err
}
Expand All @@ -27,17 +28,14 @@ func Perturb(ctx context.Context, testnet *e2e.Testnet) error {

// PerturbNode perturbs a node with a given perturbation, returning its status
// after recovering.
func PerturbNode(ctx context.Context, node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.ResultStatus, error) {
func PerturbNode(ctx context.Context, node *e2e.Node, perturbation e2e.Perturbation, ifp infra.Provider) (*rpctypes.ResultStatus, error) {
testnet := node.Testnet
out, err := docker.ExecComposeOutput(context.Background(), testnet.Dir, "ps", "-q", node.Name)

name, upgraded, err := ifp.CheckUpgraded(ctx, node)
if err != nil {
return nil, err
}
name := node.Name
upgraded := false
if len(out) == 0 {
name = name + "_u"
upgraded = true
if upgraded {
logger.Info("perturb node", "msg",
log.NewLazySprintf("Node %v already upgraded, operating on alternate container %v",
node.Name, name))
Expand All @@ -46,11 +44,11 @@ func PerturbNode(ctx context.Context, node *e2e.Node, perturbation e2e.Perturbat
switch perturbation {
case e2e.PerturbationDisconnect:
logger.Info("perturb node", "msg", log.NewLazySprintf("Disconnecting node %v...", node.Name))
if err := docker.Exec(context.Background(), "network", "disconnect", testnet.Name+"_"+testnet.Name, name); err != nil {
if err := ifp.Disconnect(context.Background(), name, node.ExternalIP.String()); err != nil {
return nil, err
}
time.Sleep(10 * time.Second)
if err := docker.Exec(context.Background(), "network", "connect", testnet.Name+"_"+testnet.Name, name); err != nil {
if err := ifp.Reconnect(context.Background(), name, node.ExternalIP.String()); err != nil {
return nil, err
}

Expand Down
0