From 86ee4d6ad9b2235ee8710c1eed3a20d099af43ee Mon Sep 17 00:00:00 2001 From: Enrico Date: Fri, 30 Aug 2024 14:59:52 +0000 Subject: [PATCH] internal lb do not remove nodes with empty string in the zone field --- providers/gce/gce_loadbalancer_internal.go | 27 ++++++++-- .../gce/gce_loadbalancer_internal_test.go | 52 +++++++++++++++++++ 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/providers/gce/gce_loadbalancer_internal.go b/providers/gce/gce_loadbalancer_internal.go index 4c44e60c4..ebd7c40d4 100644 --- a/providers/gce/gce_loadbalancer_internal.go +++ b/providers/gce/gce_loadbalancer_internal.go @@ -603,8 +603,8 @@ func (g *Cloud) ensureInternalHealthCheck(name string, svcName types.NamespacedN return hc, nil } -func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []*v1.Node) (string, error) { - klog.V(2).Infof("ensureInternalInstanceGroup(%v, %v): checking group that it contains %v nodes [node names limited, total number of nodes: %d]", name, zone, loggableNodeNames(nodes), len(nodes)) +func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []*v1.Node, emptyZoneNodes []*v1.Node) (string, error) { + klog.V(2).Infof("ensureInternalInstanceGroup(%v, %v): checking group that it contains %v nodes [node names limited, total number of nodes: %d], the following nodes have empty string in the zone field and won't be deleted: %v", name, zone, loggableNodeNames(nodes), len(nodes), loggableNodeNames(emptyZoneNodes)) ig, err := g.GetInstanceGroup(name, zone) if err != nil && !isNotFound(err) { return "", err @@ -615,6 +615,11 @@ func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []*v1.Node) kubeNodes.Insert(n.Name) } + emptyZoneNodesNames := sets.NewString() + for _, n := range emptyZoneNodes { + emptyZoneNodesNames.Insert(n.Name) + } + // Individual InstanceGroup has a limit for 1000 instances in it. // As a result, it's not possible to add more to it. // Given that the long-term fix (AlphaFeatureILBSubsets) is already in-progress, @@ -650,7 +655,7 @@ func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []*v1.Node) } } - removeNodes := gceNodes.Difference(kubeNodes).List() + removeNodes := gceNodes.Difference(kubeNodes).Difference(emptyZoneNodesNames).List() addNodes := kubeNodes.Difference(gceNodes).List() if len(removeNodes) != 0 { @@ -689,8 +694,22 @@ func (g *Cloud) ensureInternalInstanceGroups(name string, nodes []*v1.Node) ([]s zonedNodes := splitNodesByZone(nodes) klog.V(2).Infof("ensureInternalInstanceGroups(%v): %d nodes over %d zones in region %v", name, len(nodes), len(zonedNodes), g.region) + + emptyZoneNodesNames := sets.NewString() + for _, n := range zonedNodes[""] { + emptyZoneNodesNames.Insert(n.Name) + } + + if len(emptyZoneNodesNames) > 0 { + klog.V(2).Infof("%d nodes have empty zone: %v in region %v", len(emptyZoneNodesNames), emptyZoneNodesNames, g.region) + } + var igLinks []string for zone, nodes := range zonedNodes { + if zone == "" { + continue // skip ensuring nodes with empty zone + } + if g.AlphaFeatureGate.Enabled(AlphaFeatureSkipIGsManagement) { igs, err := g.FilterInstanceGroupsByNamePrefix(name, zone) if err != nil { @@ -700,7 +719,7 @@ func (g *Cloud) ensureInternalInstanceGroups(name string, nodes []*v1.Node) ([]s igLinks = append(igLinks, ig.SelfLink) } } else { - igLink, err := g.ensureInternalInstanceGroup(name, zone, nodes) + igLink, err := g.ensureInternalInstanceGroup(name, zone, nodes, zonedNodes[""]) if err != nil { return nil, err } diff --git a/providers/gce/gce_loadbalancer_internal_test.go b/providers/gce/gce_loadbalancer_internal_test.go index f550e8f44..afa8941b3 100644 --- a/providers/gce/gce_loadbalancer_internal_test.go +++ b/providers/gce/gce_loadbalancer_internal_test.go @@ -705,6 +705,58 @@ func TestUpdateInternalLoadBalancerNodes(t *testing.T) { ) } +func TestUpdateInternalLoadBalancerNodesWithEmptyZone(t *testing.T) { + t.Parallel() + + vals := DefaultTestClusterValues() + gce, err := fakeGCECloud(vals) + require.NoError(t, err) + nodeName := "test-node-1" + node1Name := []string{nodeName} + + svc := fakeLoadbalancerService(string(LBTypeInternal)) + svc, err = gce.client.CoreV1().Services(svc.Namespace).Create(context.TODO(), svc, metav1.CreateOptions{}) + require.NoError(t, err) + nodes, err := createAndInsertNodes(gce, node1Name, vals.ZoneName) + require.NoError(t, err) + + _, err = gce.ensureInternalLoadBalancer(vals.ClusterName, vals.ClusterID, svc, nil, nodes) + assert.NoError(t, err) + + // Ensure Node has been added to instance group + igName := makeInstanceGroupName(vals.ClusterID) + instances, err := gce.ListInstancesInInstanceGroup(igName, vals.ZoneName, "ALL") + require.NoError(t, err) + assert.Equal(t, 1, len(instances)) + assert.Contains( + t, + instances[0].Instance, + fmt.Sprintf("%s/zones/%s/instances/%s", vals.ProjectID, vals.ZoneName, nodeName), + ) + + // Remove Zone from node + nodes[0].Labels[v1.LabelTopologyZone] = "" // empty zone + + lbName := gce.GetLoadBalancerName(context.TODO(), "", svc) + existingFwdRule := &compute.ForwardingRule{ + Name: lbName, + IPAddress: "", + Ports: []string{"123"}, + IPProtocol: "TCP", + LoadBalancingScheme: string(cloud.SchemeInternal), + Description: fmt.Sprintf(`{"kubernetes.io/service-name":"%s"}`, types.NamespacedName{Name: svc.Name, Namespace: svc.Namespace}.String()), + } + + _, err = gce.ensureInternalLoadBalancer(vals.ClusterName, vals.ClusterID, svc, existingFwdRule, nodes) + assert.NoError(t, err) + + // Expect load balancer to not have deleted node test-node-1 + node := &v1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}} + exist, err := gce.InstanceExists(context.TODO(), node) + require.NoError(t, err) + assert.Equal(t, exist, true) +} + func TestEnsureInternalLoadBalancerDeleted(t *testing.T) { t.Parallel()