diff --git a/systest/Makefile b/systest/Makefile index 79c8e808c8c..0465331d631 100644 --- a/systest/Makefile +++ b/systest/Makefile @@ -23,6 +23,7 @@ level ?= debug bootstrap ?= 5m storage ?= standard=1Gi node_selector ?= +node_split_size ?= namespace ?= count ?= 1 failfast ?= true @@ -73,6 +74,7 @@ template: @echo post-init-image=$(post_init_image) >> $(tmpfile) @echo keep=$(keep) >> $(tmpfile) @echo testid=$(test_id) >> $(tmpfile) + @echo node-split-size=$(node_split_size) >> $(tmpfile) .PHONY: config config: template diff --git a/systest/cluster/cluster.go b/systest/cluster/cluster.go index 3301f35dcea..436802cd901 100644 --- a/systest/cluster/cluster.go +++ b/systest/cluster/cluster.go @@ -38,6 +38,7 @@ const ( poetApp = "poet" bootnodeApp = "boot" smesherApp = "smesher" + activationApp = "activation" postServiceApp = "postservice" bootstrapperApp = "bootstrapper" bootstrapperPort = 80 @@ -161,13 +162,14 @@ func ReuseWait(cctx *testcontext.Context, opts ...Opt) (*Cluster, error) { func Default(cctx *testcontext.Context, opts ...Opt) (*Cluster, error) { cl := New(cctx, opts...) - smeshers := cctx.ClusterSize - cctx.BootnodeSize - cctx.OldSize - cctx.RemoteSize + smeshers := cctx.ClusterSize - cctx.BootnodeSize - cctx.OldSize - cctx.RemoteSize - cctx.NodeSplitSize cctx.Log.Desugar().Info("Using the following nodes", zap.Int("total", cctx.ClusterSize), zap.Int("bootnodes", cctx.BootnodeSize), zap.Int("smeshers", smeshers), zap.Int("old smeshers", cctx.OldSize), + zap.Int("node split setup", cctx.NodeSplitSize), zap.Int("remote", cctx.RemoteSize), ) @@ -196,20 +198,28 @@ func Default(cctx *testcontext.Context, opts ...Opt) (*Cluster, error) { return nil, err } - smesherKeys := keys[cctx.BootnodeSize : cctx.BootnodeSize+smeshers] + oldOffset := cctx.BootnodeSize + smeshers + smesherKeys := keys[cctx.BootnodeSize:oldOffset] if err := cl.AddSmeshers(cctx, smeshers, WithSmeshers(smesherKeys)); err != nil { return nil, err } - oldKeys := keys[cctx.BootnodeSize+smeshers : cctx.BootnodeSize+smeshers+cctx.OldSize] + remoteOffset := oldOffset + cctx.OldSize + oldKeys := keys[oldOffset:remoteOffset] if err := cl.AddSmeshers(cctx, cctx.OldSize, WithSmeshers(oldKeys), WithImage(cctx.OldImage)); err != nil { return nil, err } - remoteKeys := keys[cctx.BootnodeSize+smeshers+cctx.OldSize:] + splitNodeOffset := remoteOffset + cctx.NodeSplitSize + remoteKeys := keys[remoteOffset:splitNodeOffset] if err := cl.AddRemoteSmeshers(cctx, cctx.RemoteSize, WithSmeshers(remoteKeys)); err != nil { return nil, err } + + splitNodeKeys := keys[splitNodeOffset:] + if err := cl.AddSplitNodes(cctx, cctx.NodeSplitSize, WithSmeshers(splitNodeKeys)); err != nil { + return nil, err + } return cl, nil } @@ -315,6 +325,16 @@ func (c *Cluster) persistConfigs(ctx *testcontext.Context) error { if err != nil { return fmt.Errorf("apply cfgmap %v/%v: %w", ctx.Namespace, spacemeshConfigMapName, err) } + _, err = ctx.Client.CoreV1().ConfigMaps(ctx.Namespace).Apply( + ctx, + corev1.ConfigMap(activationConfigMapName, ctx.Namespace).WithData(map[string]string{ + attachedActivationConfig: activationConfig.Get(ctx.Parameters), + }), + apimetav1.ApplyOptions{FieldManager: "test"}, + ) + if err != nil { + return fmt.Errorf("apply cfgmap %v/%v: %w", ctx.Namespace, spacemeshConfigMapName, err) + } _, err = ctx.Client.CoreV1().ConfigMaps(ctx.Namespace).Apply( ctx, corev1.ConfigMap(certifierConfigMapName, ctx.Namespace).WithData(map[string]string{ @@ -417,6 +437,16 @@ func (c *Cluster) reuse(cctx *testcontext.Context) error { c.clients = append(c.clients, clients...) c.smeshers = len(clients) + clients, err = discoverNodes(cctx, activationApp) + if err != nil { + return err + } + for _, node := range clients { + cctx.Log.Debugw("discovered existing activation nodes", "name", node.Name) + } + c.clients = append(c.clients, clients...) + c.smeshers += len(clients) + c.poets, err = discoverNodes(cctx, poetApp) if err != nil { return err @@ -641,6 +671,67 @@ func (c *Cluster) AddRemoteSmeshers(tctx *testcontext.Context, n int, opts ...De return nil } +func (c *Cluster) AddSplitNodes(tctx *testcontext.Context, n int, opts ...DeploymentOpt) error { + if n == 0 { + return nil + } + if n == 1 { + return errors.New("split nodes size has to be at least 2 while provided size is 1") + } + if err := c.resourceControl(tctx, n); err != nil { + return err + } + if err := c.persist(tctx); err != nil { + return err + } + flags := maps.Values(c.smesherFlags) + endpoints, err := ExtractP2PEndpoints(tctx, c.clients[:c.bootnodes]) + if err != nil { + return fmt.Errorf("extracting p2p endpoints %w", err) + } + + cfg := SmesherDeploymentConfig{} + for _, opt := range opts { + opt(&cfg) + } + keys := cfg.keys + + // deploy a single node-service + dopts := []DeploymentOpt{ + WithFlags(flags...), + WithFlags(Bootnodes(endpoints...), StartSmeshing(false)), + WithSmeshers(keys[:1]), + } + clients, err := deployNodes(tctx, smesherApp, c.nextSmesher(), c.nextSmesher()+1, dopts...) + if err != nil { + return err + } + c.clients = append(c.clients, clients...) + c.smeshers += len(clients) + + nodeServer := clients[0] + c.Wait(tctx, len(c.clients)-1) + + if err := deployNodeSvc(tctx, nodeServer.Name); err != nil { + return err + } + + // deploy client services + dopts = []DeploymentOpt{ + WithFlags(flags...), + WithFlags(StartSmeshing(true)), + WithSmeshers(keys[1:]), + } + clients, err = deployActivationNodes( + tctx, nodeServer.Name, c.nextSmesher(), c.nextSmesher()+n-1, dopts...) + if err != nil { + return err + } + c.clients = append(c.clients, clients...) + c.smeshers += len(clients) + return nil +} + func (c *Cluster) AddBootstrapper(cctx *testcontext.Context, i int) error { if err := c.persist(cctx); err != nil { return err diff --git a/systest/cluster/nodes.go b/systest/cluster/nodes.go index 19848176063..46e2c70f311 100644 --- a/systest/cluster/nodes.go +++ b/systest/cluster/nodes.go @@ -52,6 +52,11 @@ var ( "configuration for smesher service", fastnet.SmesherConfig, ) + activationConfig = parameters.String( + "activation", + "configuration for activation service", + fastnet.ActivationConfig, + ) smesherResources = parameters.NewParameter( "smesher_resources", @@ -68,6 +73,21 @@ var ( }, toResources, ) + activationResources = parameters.NewParameter( + "smesher_resources", + "requests and limits for activation container", + &apiv1.ResourceRequirements{ + Requests: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("1.3"), + apiv1.ResourceMemory: resource.MustParse("800Mi"), + }, + Limits: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("1.3"), + apiv1.ResourceMemory: resource.MustParse("800Mi"), + }, + }, + toResources, + ) bootstrapperResources = parameters.NewParameter( "bootstrapper_resources", "requests and limits for bootstrapper container", @@ -111,13 +131,15 @@ func toResources(value string) (*apiv1.ResourceRequirements, error) { const ( configDir = "/etc/config/" - attachedCertifierConfig = "certifier.yaml" - attachedPoetConfig = "poet.conf" - attachedSmesherConfig = "smesher.json" + attachedCertifierConfig = "certifier.yaml" + attachedPoetConfig = "poet.conf" + attachedSmesherConfig = "smesher.json" + attachedActivationConfig = "activation.json" - certifierConfigMapName = "certifier" - poetConfigMapName = "poet" - spacemeshConfigMapName = "spacemesh" + certifierConfigMapName = "certifier" + poetConfigMapName = "poet" + spacemeshConfigMapName = "spacemesh" + activationConfigMapName = "activation" // smeshers are split in 10 approximately equal buckets // to enable running chaos mesh tasks on the different parts of the cluster. @@ -491,6 +513,7 @@ func deployNodeSvc(ctx *testcontext.Context, id string) error { corev1.ServicePort().WithName("grpc-pub").WithPort(9092).WithProtocol("TCP"), corev1.ServicePort().WithName("grpc-priv").WithPort(9093).WithProtocol("TCP"), corev1.ServicePort().WithName("grpc-post").WithPort(9094).WithProtocol("TCP"), + corev1.ServicePort().WithName("node-service-listener").WithPort(9099).WithProtocol("TCP"), ). WithClusterIP("None"), ) @@ -731,6 +754,89 @@ func deployNodes(ctx *testcontext.Context, kind string, from, to int, opts ...De return rst, nil } +func deployActivationNodes( + ctx *testcontext.Context, + node string, + from, to int, + opts ...DeploymentOpt, +) ([]*NodeClient, error) { + ctx.Log.Debugw("deploying activation nodes", "from", from, "to", to) + var ( + eg errgroup.Group + clients = make(chan *NodeClient, to-from) + cfg = SmesherDeploymentConfig{ + image: ctx.Image, + } + ) + for _, opt := range opts { + opt(&cfg) + } + if cfg.image == "" { + return nil, errors.New("go-spacemesh image must be set") + } + if delta := to - from; len(cfg.keys) > 0 && len(cfg.keys) != delta { + return nil, fmt.Errorf( + "keys must be overwritten for all or no members of the cluster: delta %d, keys %d %v", + delta, + len(cfg.keys), + cfg.keys, + ) + } + for i := from; i < to; i++ { + finalFlags := make([]DeploymentFlag, len(cfg.flags), len(cfg.flags)+ctx.PoetSize) + copy(finalFlags, cfg.flags) + if !cfg.noDefaultPoets { + var poetIds []int + for idx := 0; idx < ctx.PoetSize; idx++ { + poetIds = append(poetIds, idx) + } + finalFlags = append(finalFlags, PoetEndpoints(poetIds...)) + } + if ctx.BootstrapperSize > 1 { + finalFlags = append(finalFlags, BootstrapperUrl(BootstrapperEndpoint(i%ctx.BootstrapperSize))) + } else { + finalFlags = append(finalFlags, BootstrapperUrl(BootstrapperEndpoint(0))) + } + + var key ed25519.PrivateKey + if len(cfg.keys) > 0 { + key = cfg.keys[i-from] + } + eg.Go(func() error { + id := fmt.Sprintf("%s-%d", activationApp, i) + labels := nodeLabels(activationApp, id) + labels["bucket"] = strconv.Itoa(i % buckets) + if err := deployActivationNode( + ctx, id, node, key, cfg.image, "local.key", labels, finalFlags, + ); err != nil { + return err + } + clients <- &NodeClient{ + session: ctx, + Node: Node{ + Name: id, + P2P: 7513, + GRPC_PUB: 9092, + GRPC_PRIV: 9093, + }, + } + return nil + }) + } + if err := eg.Wait(); err != nil { + return nil, err + } + close(clients) + var rst []*NodeClient + for node := range clients { + rst = append(rst, node) + } + sort.Slice(rst, func(i, j int) bool { + return decodeOrdinal(rst[i].Name) < decodeOrdinal(rst[j].Name) + }) + return rst, nil +} + func deployRemoteNodes( ctx *testcontext.Context, from, to int, @@ -848,6 +954,119 @@ func deleteNode(ctx *testcontext.Context, id string) error { return nil } +func deployActivationNode( + ctx *testcontext.Context, + id string, + node string, + key ed25519.PrivateKey, + image string, + keyName string, + labels map[string]string, + flags []DeploymentFlag, +) error { + ctx.Log.Debugw("deploying node", "id", id) + cmd := []string{ + "/bin/go-spacemesh", + "-c=" + configDir + attachedActivationConfig, + "--pprof-server", + "--smeshing-opts-datadir=/data/post", + "-d=/data", + "--log-encoder=json", + "--metrics", + "--metrics-port=" + strconv.Itoa(prometheusScrapePort), + "--node-service-address", fmt.Sprintf("http://%s:9099", node), + "--proxy-api-v2-address", fmt.Sprintf("http://%s:9070", node), + "--grpc-json-listener", "0.0.0.0:9071", + "--proxy-listener", "0.0.0.0:9072", + } + for _, flag := range flags { + cmd = append(cmd, flag.Flag()) + } + + podSpec := corev1.PodSpec(). + WithNodeSelector(ctx.NodeSelector). + WithVolumes( + corev1.Volume().WithName("config"). + WithConfigMap(corev1.ConfigMapVolumeSource().WithName(activationConfigMapName)), + corev1.Volume().WithName("data"). + WithEmptyDir(corev1.EmptyDirVolumeSource(). + WithSizeLimit(resource.MustParse(ctx.Storage.Size))), + ). + WithDNSConfig(corev1.PodDNSConfig().WithOptions( + corev1.PodDNSConfigOption().WithName("timeout").WithValue("1"), + corev1.PodDNSConfigOption().WithName("attempts").WithValue("5"), + )). + WithContainers(corev1.Container(). + WithName("smesher"). + WithImage(image). + WithImagePullPolicy(apiv1.PullIfNotPresent). + WithPorts( + corev1.ContainerPort().WithContainerPort(7513).WithName("p2p"), + corev1.ContainerPort().WithContainerPort(9092).WithName("grpc-pub"), + corev1.ContainerPort().WithContainerPort(9093).WithName("grpc-priv"), + corev1.ContainerPort().WithContainerPort(9094).WithName("grpc-post"), + corev1.ContainerPort().WithContainerPort(prometheusScrapePort).WithName("prometheus"), + corev1.ContainerPort().WithContainerPort(phlareScrapePort).WithName("pprof"), + ). + WithVolumeMounts( + corev1.VolumeMount().WithName("data").WithMountPath("/data"), + corev1.VolumeMount().WithName("config").WithMountPath(configDir), + ). + WithResources(corev1.ResourceRequirements(). + WithRequests(activationResources.Get(ctx.Parameters).Requests). + WithLimits(activationResources.Get(ctx.Parameters).Limits), + ). + WithStartupProbe( + corev1.Probe().WithTCPSocket( + corev1.TCPSocketAction().WithPort(intstr.FromInt32(9092)), + ).WithInitialDelaySeconds(10).WithPeriodSeconds(10), + ). + WithEnv( + corev1.EnvVar().WithName("GOMAXPROCS").WithValue("4"), + ). + WithCommand(cmd...), + ) + + if key != nil { + podSpec = podSpec. + WithInitContainers( + corev1.Container(). + WithName("file-creator"). + WithImage("busybox"). + WithCommand("sh", "-c", + fmt.Sprintf("mkdir -p /data/identities && echo -n '%x' > /data/identities/%s", key, keyName), + ). + WithVolumeMounts( + corev1.VolumeMount().WithName("data").WithMountPath("/data"), + ), + ) + } + + deployment := appsv1.Deployment(id, ctx.Namespace). + WithLabels(labels). + WithSpec(appsv1.DeploymentSpec(). + WithSelector(metav1.LabelSelector().WithMatchLabels(labels)). + WithReplicas(1). + WithTemplate(corev1.PodTemplateSpec(). + WithLabels(labels). + WithAnnotations( + map[string]string{ + "prometheus.io/port": strconv.Itoa(prometheusScrapePort), + "prometheus.io/scrape": "true", + }, + ). + WithSpec(podSpec), + ), + ) + _, err := ctx.Client.AppsV1(). + Deployments(ctx.Namespace). + Apply(ctx, deployment, apimetav1.ApplyOptions{FieldManager: "test"}) + if err != nil { + return fmt.Errorf("apply pod %s: %w", id, err) + } + return nil +} + func deployNode( ctx *testcontext.Context, id string, diff --git a/systest/parameters/fastnet/activation.json b/systest/parameters/fastnet/activation.json new file mode 100644 index 00000000000..848e08847a7 --- /dev/null +++ b/systest/parameters/fastnet/activation.json @@ -0,0 +1,52 @@ +{ + "preset": "fastnet", + "main": { + "atx-versions": { + "2": 2 + } + }, + "poet": { + "phase-shift": "30s", + "cycle-gap": "30s", + "grace-period": "10s", + "positioning-atx-selection-timeout":"7s" + }, + "api": { + "grpc-public-listener": "0.0.0.0:9092", + "grpc-private-listener": "0.0.0.0:9093" + }, + "fetch": { + "servers-metrics": true, + "log-peer-stats-interval": "30s", + "servers": { + "ax/1": { + "interval": "1s", + "queue": 200, + "requests": 100 + } + } + }, + "hare3": { + "committeeupgrade": { + "layer": 16, + "size": 15 + } + }, + "smeshing": { + "smeshing-verifying-opts": { + "smeshing-opts-verifying-min-workers": 1000000 + } + }, + "certifier": { + "client": { + "max-retries": 10 + } + }, + "logging": { + "log-encoder": "json", + "txHandler": "debug", + "grpc": "debug", + "sync": "debug", + "fetcher": "debug" + } +} diff --git a/systest/parameters/fastnet/embed.go b/systest/parameters/fastnet/embed.go index 37cbc60ab86..8d2f96c8015 100644 --- a/systest/parameters/fastnet/embed.go +++ b/systest/parameters/fastnet/embed.go @@ -7,6 +7,9 @@ import ( //go:embed "smesher.json" var SmesherConfig string +//go:embed "activation.json" +var ActivationConfig string + //go:embed "poet.conf" var PoetConfig string diff --git a/systest/parameters/fastnet/smesher.json b/systest/parameters/fastnet/smesher.json index fca3cf6fc71..8cdadb854fa 100644 --- a/systest/parameters/fastnet/smesher.json +++ b/systest/parameters/fastnet/smesher.json @@ -12,6 +12,7 @@ "positioning-atx-selection-timeout":"7s" }, "api": { + "node-service-listener": "0.0.0.0:9099", "grpc-public-listener": "0.0.0.0:9092", "grpc-private-listener": "0.0.0.0:9093", "grpc-post-listener": "0.0.0.0:9094" diff --git a/systest/testcontext/context.go b/systest/testcontext/context.go index 8eb41fb0aff..c1d062ddbdc 100644 --- a/systest/testcontext/context.go +++ b/systest/testcontext/context.go @@ -120,6 +120,9 @@ var ( bsSize = parameters.Int( "bs-size", "size of bootstrappers", 1, ) + nodeSplitSize = parameters.Int( + "node-split-size", "size of node split setup", 0, + ) storage = parameters.String( "storage", "= for the storage", "standard=1Gi", ) @@ -172,6 +175,7 @@ type Context struct { RemoteSize int PoetSize int OldSize int + NodeSplitSize int BootstrapperSize int Generic client.Client TestID string @@ -363,6 +367,7 @@ func New(t *testing.T, opts ...Opt) *Context { RemoteSize: 0, PoetSize: poetSize.Get(p), OldSize: 0, + NodeSplitSize: nodeSplitSize.Get(p), BootstrapperSize: bsSize.Get(p), Image: imageFlag.Get(p), OldImage: oldImageFlag.Get(p), diff --git a/systest/tests/smeshing_test.go b/systest/tests/smeshing_test.go index 16267bbd554..7e99b4539d4 100644 --- a/systest/tests/smeshing_test.go +++ b/systest/tests/smeshing_test.go @@ -36,7 +36,9 @@ func TestSmeshing(t *testing.T) { tctx := testcontext.New(t) tctx.RemoteSize = tctx.ClusterSize / 4 // 25% of nodes are remote - tctx.OldSize = tctx.ClusterSize / 4 // 25% of nodes are old + + // commented out for node split testing + // tctx.OldSize = tctx.ClusterSize / 4 // 25% of nodes are old vests := vestingAccs{ prepareVesting(t, 3, 8, 20, 1e15, 10e15), prepareVesting(t, 5, 8, 20, 1e15, 10e15),