Add glide.yaml and vendor deps

This commit is contained in:
Dalton Hubble 2016-12-03 22:43:32 -08:00
parent db918f12ad
commit 5b3d5e81bd
18880 changed files with 5166045 additions and 1 deletions

View file

@ -0,0 +1,38 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"listers.go",
"scheduler_interface.go",
"types.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/apis/extensions/v1beta1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/labels:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = ["scheduler_interface_test.go"],
library = "go_default_library",
tags = ["automanaged"],
deps = ["//pkg/api/v1:go_default_library"],
)

View file

@ -0,0 +1,19 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package scheduler contains a generic Scheduler interface and several
// implementations.
package algorithm // import "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"

View file

@ -0,0 +1,186 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
extensions "k8s.io/kubernetes/pkg/apis/extensions/v1beta1"
metav1 "k8s.io/kubernetes/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/labels"
)
// NodeLister interface represents anything that can list nodes for a scheduler.
type NodeLister interface {
// We explicitly return []*v1.Node, instead of v1.NodeList, to avoid
// performing expensive copies that are unneded.
List() ([]*v1.Node, error)
}
// FakeNodeLister implements NodeLister on a []string for test purposes.
type FakeNodeLister []*v1.Node
// List returns nodes as a []string.
func (f FakeNodeLister) List() ([]*v1.Node, error) {
return f, nil
}
// PodLister interface represents anything that can list pods for a scheduler.
type PodLister interface {
// We explicitly return []*v1.Pod, instead of v1.PodList, to avoid
// performing expensive copies that are unneded.
List(labels.Selector) ([]*v1.Pod, error)
}
// FakePodLister implements PodLister on an []v1.Pods for test purposes.
type FakePodLister []*v1.Pod
// List returns []*v1.Pod matching a query.
func (f FakePodLister) List(s labels.Selector) (selected []*v1.Pod, err error) {
for _, pod := range f {
if s.Matches(labels.Set(pod.Labels)) {
selected = append(selected, pod)
}
}
return selected, nil
}
// ServiceLister interface represents anything that can produce a list of services; the list is consumed by a scheduler.
type ServiceLister interface {
// Lists all the services
List(labels.Selector) ([]*v1.Service, error)
// Gets the services for the given pod
GetPodServices(*v1.Pod) ([]*v1.Service, error)
}
// FakeServiceLister implements ServiceLister on []v1.Service for test purposes.
type FakeServiceLister []*v1.Service
// List returns v1.ServiceList, the list of all services.
func (f FakeServiceLister) List(labels.Selector) ([]*v1.Service, error) {
return f, nil
}
// GetPodServices gets the services that have the selector that match the labels on the given pod.
func (f FakeServiceLister) GetPodServices(pod *v1.Pod) (services []*v1.Service, err error) {
var selector labels.Selector
for i := range f {
service := f[i]
// consider only services that are in the same namespace as the pod
if service.Namespace != pod.Namespace {
continue
}
selector = labels.Set(service.Spec.Selector).AsSelectorPreValidated()
if selector.Matches(labels.Set(pod.Labels)) {
services = append(services, service)
}
}
return
}
// ControllerLister interface represents anything that can produce a list of ReplicationController; the list is consumed by a scheduler.
type ControllerLister interface {
// Lists all the replication controllers
List(labels.Selector) ([]*v1.ReplicationController, error)
// Gets the services for the given pod
GetPodControllers(*v1.Pod) ([]*v1.ReplicationController, error)
}
// EmptyControllerLister implements ControllerLister on []v1.ReplicationController returning empty data
type EmptyControllerLister struct{}
// List returns nil
func (f EmptyControllerLister) List(labels.Selector) ([]*v1.ReplicationController, error) {
return nil, nil
}
// GetPodControllers returns nil
func (f EmptyControllerLister) GetPodControllers(pod *v1.Pod) (controllers []*v1.ReplicationController, err error) {
return nil, nil
}
// FakeControllerLister implements ControllerLister on []v1.ReplicationController for test purposes.
type FakeControllerLister []*v1.ReplicationController
// List returns []v1.ReplicationController, the list of all ReplicationControllers.
func (f FakeControllerLister) List(labels.Selector) ([]*v1.ReplicationController, error) {
return f, nil
}
// GetPodControllers gets the ReplicationControllers that have the selector that match the labels on the given pod
func (f FakeControllerLister) GetPodControllers(pod *v1.Pod) (controllers []*v1.ReplicationController, err error) {
var selector labels.Selector
for i := range f {
controller := f[i]
if controller.Namespace != pod.Namespace {
continue
}
selector = labels.Set(controller.Spec.Selector).AsSelectorPreValidated()
if selector.Matches(labels.Set(pod.Labels)) {
controllers = append(controllers, controller)
}
}
if len(controllers) == 0 {
err = fmt.Errorf("Could not find Replication Controller for pod %s in namespace %s with labels: %v", pod.Name, pod.Namespace, pod.Labels)
}
return
}
// ReplicaSetLister interface represents anything that can produce a list of ReplicaSet; the list is consumed by a scheduler.
type ReplicaSetLister interface {
// Gets the replicasets for the given pod
GetPodReplicaSets(*v1.Pod) ([]*extensions.ReplicaSet, error)
}
// EmptyReplicaSetLister implements ReplicaSetLister on []extensions.ReplicaSet returning empty data
type EmptyReplicaSetLister struct{}
// GetPodReplicaSets returns nil
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*extensions.ReplicaSet, err error) {
return nil, nil
}
// FakeReplicaSetLister implements ControllerLister on []extensions.ReplicaSet for test purposes.
type FakeReplicaSetLister []*extensions.ReplicaSet
// GetPodReplicaSets gets the ReplicaSets that have the selector that match the labels on the given pod
func (f FakeReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*extensions.ReplicaSet, err error) {
var selector labels.Selector
for _, rs := range f {
if rs.Namespace != pod.Namespace {
continue
}
selector, err = metav1.LabelSelectorAsSelector(rs.Spec.Selector)
if err != nil {
return
}
if selector.Matches(labels.Set(pod.Labels)) {
rss = append(rss, rs)
}
}
if len(rss) == 0 {
err = fmt.Errorf("Could not find ReplicaSet for pod %s in namespace %s with labels: %v", pod.Name, pod.Namespace, pod.Labels)
}
return
}

View file

@ -0,0 +1,59 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"error.go",
"metadata.go",
"predicates.go",
"utils.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/client/cache:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/util/runtime:go_default_library",
"//pkg/util/workqueue:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor:github.com/golang/glog",
],
)
go_test(
name = "go_default_test",
srcs = [
"predicates_test.go",
"utils_test.go",
],
library = "go_default_library",
tags = [
"automanaged",
"skip",
],
deps = [
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/util/codeinspector:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor:k8s.io/gengo/parser",
"//vendor:k8s.io/gengo/types",
],
)

View file

@ -0,0 +1,99 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
)
var (
// The predicateName tries to be consistent as the predicate name used in DefaultAlgorithmProvider defined in
// defaults.go (which tend to be stable for backward compatibility)
ErrDiskConflict = newPredicateFailureError("NoDiskConflict")
ErrVolumeZoneConflict = newPredicateFailureError("NoVolumeZoneConflict")
ErrNodeSelectorNotMatch = newPredicateFailureError("MatchNodeSelector")
ErrPodAffinityNotMatch = newPredicateFailureError("MatchInterPodAffinity")
ErrTaintsTolerationsNotMatch = newPredicateFailureError("PodToleratesNodeTaints")
ErrPodNotMatchHostName = newPredicateFailureError("HostName")
ErrPodNotFitsHostPorts = newPredicateFailureError("PodFitsHostPorts")
ErrNodeLabelPresenceViolated = newPredicateFailureError("CheckNodeLabelPresence")
ErrServiceAffinityViolated = newPredicateFailureError("CheckServiceAffinity")
ErrMaxVolumeCountExceeded = newPredicateFailureError("MaxVolumeCount")
ErrNodeUnderMemoryPressure = newPredicateFailureError("NodeUnderMemoryPressure")
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure")
// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
// as ErrFakePredicate.
ErrFakePredicate = newPredicateFailureError("FakePredicateError")
)
// InsufficientResourceError is an error type that indicates what kind of resource limit is
// hit and caused the unfitting failure.
type InsufficientResourceError struct {
// resourceName is the name of the resource that is insufficient
ResourceName v1.ResourceName
requested int64
used int64
capacity int64
}
func NewInsufficientResourceError(resourceName v1.ResourceName, requested, used, capacity int64) *InsufficientResourceError {
return &InsufficientResourceError{
ResourceName: resourceName,
requested: requested,
used: used,
capacity: capacity,
}
}
func (e *InsufficientResourceError) Error() string {
return fmt.Sprintf("Node didn't have enough resource: %s, requested: %d, used: %d, capacity: %d",
e.ResourceName, e.requested, e.used, e.capacity)
}
func (e *InsufficientResourceError) GetReason() string {
return fmt.Sprintf("Insufficient %v", e.ResourceName)
}
type PredicateFailureError struct {
PredicateName string
}
func newPredicateFailureError(predicateName string) *PredicateFailureError {
return &PredicateFailureError{PredicateName: predicateName}
}
func (e *PredicateFailureError) Error() string {
return fmt.Sprintf("Predicate %s failed", e.PredicateName)
}
func (e *PredicateFailureError) GetReason() string {
return e.PredicateName
}
type FailureReason struct {
reason string
}
func NewFailureReason(msg string) *FailureReason {
return &FailureReason{reason: msg}
}
func (e *FailureReason) GetReason() string {
return e.reason
}

View file

@ -0,0 +1,59 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
type PredicateMetadataFactory struct {
podLister algorithm.PodLister
}
func NewPredicateMetadataFactory(podLister algorithm.PodLister) algorithm.MetadataProducer {
factory := &PredicateMetadataFactory{
podLister,
}
return factory.GetMetadata
}
// GetMetadata returns the predicateMetadata used which will be used by various predicates.
func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInfoMap map[string]*schedulercache.NodeInfo) interface{} {
// If we cannot compute metadata, just return nil
if pod == nil {
return nil
}
matchingTerms, err := getMatchingAntiAffinityTerms(pod, nodeNameToInfoMap)
if err != nil {
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: GetUsedPorts(pod),
matchingAntiAffinityTerms: matchingTerms,
}
for predicateName, precomputeFunc := range predicatePrecomputations {
glog.V(4).Info("Precompute: %v", predicateName)
precomputeFunc(predicateMetadata)
}
return predicateMetadata
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,66 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
)
// FindLabelsInSet gets as many key/value pairs as possible out of a label set.
func FindLabelsInSet(labelsToKeep []string, selector labels.Set) map[string]string {
aL := make(map[string]string)
for _, l := range labelsToKeep {
if selector.Has(l) {
aL[l] = selector.Get(l)
}
}
return aL
}
// AddUnsetLabelsToMap backfills missing values with values we find in a map.
func AddUnsetLabelsToMap(aL map[string]string, labelsToAdd []string, labelSet labels.Set) {
for _, l := range labelsToAdd {
// if the label is already there, dont overwrite it.
if _, exists := aL[l]; exists {
continue
}
// otherwise, backfill this label.
if labelSet.Has(l) {
aL[l] = labelSet.Get(l)
}
}
}
// FilterPodsByNamespace filters pods outside a namespace from the given list.
func FilterPodsByNamespace(pods []*v1.Pod, ns string) []*v1.Pod {
filtered := []*v1.Pod{}
for _, nsPod := range pods {
if nsPod.Namespace == ns {
filtered = append(filtered, nsPod)
}
}
return filtered
}
// CreateSelectorFromLabels is used to define a selector that corresponds to the keys in a map.
func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
if aL == nil || len(aL) == 0 {
return labels.Everything()
}
return labels.Set(aL).AsSelector()
}

View file

@ -0,0 +1,69 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
)
// ExampleUtils is a https://blog.golang.org/examples styled unit test.
func ExampleFindLabelsInSet() {
labelSubset := labels.Set{}
labelSubset["label1"] = "value1"
labelSubset["label2"] = "value2"
// Lets make believe that these pods are on the cluster.
// Utility functions will inspect their labels, filter them, and so on.
nsPods := []*v1.Pod{
{
ObjectMeta: v1.ObjectMeta{
Name: "pod1",
Namespace: "ns1",
Labels: map[string]string{
"label1": "wontSeeThis",
"label2": "wontSeeThis",
"label3": "will_see_this",
},
},
}, // first pod which will be used via the utilities
{
ObjectMeta: v1.ObjectMeta{
Name: "pod2",
Namespace: "ns1",
},
},
{
ObjectMeta: v1.ObjectMeta{
Name: "pod3ThatWeWontSee",
},
},
}
fmt.Println(FindLabelsInSet([]string{"label1", "label2", "label3"}, nsPods[0].ObjectMeta.Labels)["label3"])
AddUnsetLabelsToMap(labelSubset, []string{"label1", "label2", "label3"}, nsPods[0].ObjectMeta.Labels)
fmt.Println(labelSubset)
for _, pod := range FilterPodsByNamespace(nsPods, "ns1") {
fmt.Print(pod.Name, ",")
}
// Output:
// will_see_this
// label1=value1,label2=value2,label3=will_see_this
// pod1,pod2,
}

View file

@ -0,0 +1,79 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"balanced_resource_allocation.go",
"image_locality.go",
"interpod_affinity.go",
"least_requested.go",
"metadata.go",
"most_requested.go",
"node_affinity.go",
"node_label.go",
"node_prefer_avoid_pods.go",
"selector_spreading.go",
"taint_toleration.go",
"test_util.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/util/node:go_default_library",
"//pkg/util/workqueue:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/predicates:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor:github.com/golang/glog",
],
)
go_test(
name = "go_default_test",
srcs = [
"balanced_resource_allocation_test.go",
"image_locality_test.go",
"interpod_affinity_test.go",
"least_requested_test.go",
"most_requested_test.go",
"node_affinity_test.go",
"node_label_test.go",
"node_prefer_avoid_pods_test.go",
"priorities_test.go",
"selector_spreading_test.go",
"taint_toleration_test.go",
],
library = "go_default_library",
tags = [
"automanaged",
"skip",
],
deps = [
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/apis/extensions/v1beta1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/util/codeinspector:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor:k8s.io/gengo/parser",
"//vendor:k8s.io/gengo/types",
],
)

View file

@ -0,0 +1,116 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"math"
"k8s.io/kubernetes/pkg/api/v1"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
const (
mb int64 = 1024 * 1024
minImgSize int64 = 23 * mb
maxImgSize int64 = 1000 * mb
)
// Also used in most/least_requested nad metadata.
// TODO: despaghettify it
func getNonZeroRequests(pod *v1.Pod) *schedulercache.Resource {
result := &schedulercache.Resource{}
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
result.MilliCPU += cpu
result.Memory += memory
}
return result
}
func calculateBalancedResourceAllocation(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU)
memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory)
score := int(0)
if cpuFraction >= 1 || memoryFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
score = 0
} else {
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
// respectively. Multilying the absolute value of the difference by 10 scales the value to
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
diff := math.Abs(cpuFraction - memoryFraction)
score = int(10 - diff*10)
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
score,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: score,
}, nil
}
func fractionOfCapacity(requested, capacity int64) float64 {
if capacity == 0 {
return 1
}
return float64(requested) / float64(capacity)
}
// BalancedResourceAllocation favors nodes with balanced resource usage rate.
// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
// close the two metrics are to each other.
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
func BalancedResourceAllocationMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var nonZeroRequest *schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
nonZeroRequest = priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadatat - fallback to computing it.
nonZeroRequest = getNonZeroRequests(pod)
}
return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo)
}

View file

@ -0,0 +1,263 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestBalancedResourceAllocation(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
machine1Spec := v1.PodSpec{
NodeName: "machine1",
}
machine2Spec := v1.PodSpec{
NodeName: "machine2",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (remaining resources) on 0-10 scale
CPU Fraction: 0 / 4000 = 0%
Memory Fraction: 0 / 10000 = 0%
Node1 Score: 10 - (0-0)*10 = 10
Node2 scores (remaining resources) on 0-10 scale
CPU Fraction: 0 / 4000 = 0 %
Memory Fraction: 0 / 10000 = 0%
Node2 Score: 10 - (0-0)*10 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 3000 / 4000= 75%
Memory Fraction: 5000 / 10000 = 50%
Node1 Score: 10 - (0.75-0.5)*10 = 7
Node2 scores on 0-10 scale
CPU Fraction: 3000 / 6000= 50%
Memory Fraction: 5000/10000 = 50%
Node2 Score: 10 - (0.5-0.5)*10 = 10
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 10}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 0 / 4000= 0%
Memory Fraction: 0 / 10000 = 0%
Node1 Score: 10 - (0-0)*10 = 10
Node2 scores on 0-10 scale
CPU Fraction: 0 / 4000= 0%
Memory Fraction: 0 / 10000 = 0%
Node2 Score: 10 - (0-0)*10 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
test: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: v1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 0 / 20000 = 0%
Node1 Score: 10 - (0.6-0)*10 = 4
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node2 Score: 10 - (0.6-0.25)*10 = 6
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 6}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node1 Score: 10 - (0.6-0.25)*10 = 6
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 10000 / 20000 = 50%
Node2 Score: 10 - (0.6-0.5)*10 = 9
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 9}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node1 Score: 10 - (0.6-0.25)*10 = 6
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 10000 / 50000 = 20%
Node2 Score: 10 - (0.6-0.2)*10 = 6
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
Memory Fraction: 0 / 10000 = 0
Node1 Score: 0
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
Memory Fraction 5000 / 10000 = 50%
Node2 Score: 0
*/
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(BalancedResourceAllocationMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,79 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images.
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
// based on the total size of those images.
// - If none of the images are present, this node will be given the lowest priority.
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
func ImageLocalityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var sumSize int64
for i := range pod.Spec.Containers {
sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i])
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: calculateScoreFromSize(sumSize),
}, nil
}
// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
// 1. Split image size range into 10 buckets.
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
func calculateScoreFromSize(sumSize int64) int {
var score int
switch {
case sumSize == 0 || sumSize < minImgSize:
// score == 0 means none of the images required by this pod are present on this
// node or the total size of the images present is too small to be taken into further consideration.
score = 0
// If existing images' total size is larger than max, just make it highest priority.
case sumSize >= maxImgSize:
score = 10
default:
score = int((10 * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
}
// Return which bucket the given size belongs to
return score
}
// checkContainerImageOnNode checks if a container image is present on a node and returns its size.
func checkContainerImageOnNode(node *v1.Node, container *v1.Container) int64 {
for _, image := range node.Status.Images {
for _, name := range image.Names {
if container.Image == name {
// Should return immediately.
return image.SizeBytes
}
}
}
return 0
}

View file

@ -0,0 +1,182 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestImageLocalityPriority(t *testing.T) {
test_40_250 := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/250",
},
},
}
test_40_140 := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/140",
},
},
}
test_min_max := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/10",
},
{
Image: "gcr.io/2000",
},
},
}
node_40_140_2000 := v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/40",
"gcr.io/40:v1",
"gcr.io/40:v1",
},
SizeBytes: int64(40 * mb),
},
{
Names: []string{
"gcr.io/140",
"gcr.io/140:v1",
},
SizeBytes: int64(140 * mb),
},
{
Names: []string{
"gcr.io/2000",
},
SizeBytes: int64(2000 * mb),
},
},
}
node_250_10 := v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/250",
},
SizeBytes: int64(250 * mb),
},
{
Names: []string{
"gcr.io/10",
"gcr.io/10:v1",
},
SizeBytes: int64(10 * mb),
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
// Pod: gcr.io/40 gcr.io/250
// Node1
// Image: gcr.io/40 40MB
// Score: (40M-23M)/97.7M + 1 = 1
// Node2
// Image: gcr.io/250 250MB
// Score: (250M-23M)/97.7M + 1 = 3
pod: &v1.Pod{Spec: test_40_250},
nodes: []*v1.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}},
test: "two images spread on two nodes, prefer the larger image one",
},
{
// Pod: gcr.io/40 gcr.io/140
// Node1
// Image: gcr.io/40 40MB, gcr.io/140 140MB
// Score: (40M+140M-23M)/97.7M + 1 = 2
// Node2
// Image: not present
// Score: 0
pod: &v1.Pod{Spec: test_40_140},
nodes: []*v1.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}},
test: "two images on one node, prefer this node",
},
{
// Pod: gcr.io/2000 gcr.io/10
// Node1
// Image: gcr.io/2000 2000MB
// Score: 2000 > max score = 10
// Node2
// Image: gcr.io/10 10MB
// Score: 10 < min score = 0
pod: &v1.Pod{Spec: test_min_max},
nodes: []*v1.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}},
test: "if exceed limit, use limit",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}
func makeImageNode(node string, status v1.NodeStatus) *v1.Node {
return &v1.Node{
ObjectMeta: v1.ObjectMeta{Name: node},
Status: status,
}
}

View file

@ -0,0 +1,241 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"sync"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/util/workqueue"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
type InterPodAffinity struct {
info predicates.NodeInfo
nodeLister algorithm.NodeLister
podLister algorithm.PodLister
hardPodAffinityWeight int
failureDomains priorityutil.Topologies
}
func NewInterPodAffinityPriority(
info predicates.NodeInfo,
nodeLister algorithm.NodeLister,
podLister algorithm.PodLister,
hardPodAffinityWeight int,
failureDomains []string) algorithm.PriorityFunction {
interPodAffinity := &InterPodAffinity{
info: info,
nodeLister: nodeLister,
podLister: podLister,
hardPodAffinityWeight: hardPodAffinityWeight,
failureDomains: priorityutil.Topologies{DefaultKeys: failureDomains},
}
return interPodAffinity.CalculateInterPodAffinityPriority
}
type podAffinityPriorityMap struct {
sync.Mutex
// nodes contain all nodes that should be considered
nodes []*v1.Node
// counts store the mapping from node name to so-far computed score of
// the node.
counts map[string]float64
// failureDomains contain default failure domains keys
failureDomains priorityutil.Topologies
// The first error that we faced.
firstError error
}
func newPodAffinityPriorityMap(nodes []*v1.Node, failureDomains priorityutil.Topologies) *podAffinityPriorityMap {
return &podAffinityPriorityMap{
nodes: nodes,
counts: make(map[string]float64, len(nodes)),
failureDomains: failureDomains,
}
}
func (p *podAffinityPriorityMap) setError(err error) {
p.Lock()
defer p.Unlock()
if p.firstError == nil {
p.firstError = err
}
}
func (p *podAffinityPriorityMap) processTerm(term *v1.PodAffinityTerm, podDefiningAffinityTerm, podToCheck *v1.Pod, fixedNode *v1.Node, weight float64) {
match, err := priorityutil.PodMatchesTermsNamespaceAndSelector(podToCheck, podDefiningAffinityTerm, term)
if err != nil {
p.setError(err)
return
}
if match {
func() {
p.Lock()
defer p.Unlock()
for _, node := range p.nodes {
if p.failureDomains.NodesHaveSameTopologyKey(node, fixedNode, term.TopologyKey) {
p.counts[node.Name] += weight
}
}
}()
}
}
func (p *podAffinityPriorityMap) processTerms(terms []v1.WeightedPodAffinityTerm, podDefiningAffinityTerm, podToCheck *v1.Pod, fixedNode *v1.Node, multiplier int) {
for i := range terms {
term := &terms[i]
p.processTerm(&term.PodAffinityTerm, podDefiningAffinityTerm, podToCheck, fixedNode, float64(term.Weight*int32(multiplier)))
}
}
// compute a sum by iterating through the elements of weightedPodAffinityTerm and adding
// "weight" to the sum if the corresponding PodAffinityTerm is satisfied for
// that node; the node(s) with the highest sum are the most preferred.
// Symmetry need to be considered for preferredDuringSchedulingIgnoredDuringExecution from podAffinity & podAntiAffinity,
// symmetry need to be considered for hard requirements from podAffinity
func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
affinity, err := v1.GetAffinityFromPodAnnotations(pod.Annotations)
if err != nil {
return nil, err
}
hasAffinityConstraints := affinity != nil && affinity.PodAffinity != nil
hasAntiAffinityConstraints := affinity != nil && affinity.PodAntiAffinity != nil
allNodeNames := make([]string, 0, len(nodeNameToInfo))
for name := range nodeNameToInfo {
allNodeNames = append(allNodeNames, name)
}
// convert the topology key based weights to the node name based weights
var maxCount float64
var minCount float64
// priorityMap stores the mapping from node name to so-far computed score of
// the node.
pm := newPodAffinityPriorityMap(nodes, ipa.failureDomains)
processPod := func(existingPod *v1.Pod) error {
existingPodNode, err := ipa.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
return err
}
existingPodAffinity, err := v1.GetAffinityFromPodAnnotations(existingPod.Annotations)
if err != nil {
return err
}
existingHasAffinityConstraints := existingPodAffinity != nil && existingPodAffinity.PodAffinity != nil
existingHasAntiAffinityConstraints := existingPodAffinity != nil && existingPodAffinity.PodAntiAffinity != nil
if hasAffinityConstraints {
// For every soft pod affinity term of <pod>, if <existingPod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPods>`s node by the term`s weight.
terms := affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, pod, existingPod, existingPodNode, 1)
}
if hasAntiAffinityConstraints {
// For every soft pod anti-affinity term of <pod>, if <existingPod> matches the term,
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>`s node by the term`s weight.
terms := affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, pod, existingPod, existingPodNode, -1)
}
if existingHasAffinityConstraints {
// For every hard pod affinity term of <existingPod>, if <pod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the constant <ipa.hardPodAffinityWeight>
if ipa.hardPodAffinityWeight > 0 {
terms := existingPodAffinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
//if len(existingPodAffinity.PodAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
// terms = append(terms, existingPodAffinity.PodAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
//}
for _, term := range terms {
pm.processTerm(&term, existingPod, pod, existingPodNode, float64(ipa.hardPodAffinityWeight))
}
}
// For every soft pod affinity term of <existingPod>, if <pod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the term's weight.
terms := existingPodAffinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, existingPod, pod, existingPodNode, 1)
}
if existingHasAntiAffinityConstraints {
// For every soft pod anti-affinity term of <existingPod>, if <pod> matches the term,
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the term's weight.
terms := existingPodAffinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, existingPod, pod, existingPodNode, -1)
}
return nil
}
processNode := func(i int) {
nodeInfo := nodeNameToInfo[allNodeNames[i]]
if hasAffinityConstraints || hasAntiAffinityConstraints {
// We need to process all the nodes.
for _, existingPod := range nodeInfo.Pods() {
if err := processPod(existingPod); err != nil {
pm.setError(err)
}
}
} else {
// The pod doesn't have any constraints - we need to check only existing
// ones that have some.
for _, existingPod := range nodeInfo.PodsWithAffinity() {
if err := processPod(existingPod); err != nil {
pm.setError(err)
}
}
}
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
if pm.firstError != nil {
return nil, pm.firstError
}
for _, node := range nodes {
if pm.counts[node.Name] > maxCount {
maxCount = pm.counts[node.Name]
}
if pm.counts[node.Name] < minCount {
minCount = pm.counts[node.Name]
}
}
// calculate final priority score for each node
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
for _, node := range nodes {
fScore := float64(0)
if (maxCount - minCount) > 0 {
fScore = 10 * ((pm.counts[node.Name] - minCount) / (maxCount - minCount))
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof("%v -> %v: InterPodAffinityPriority, Score: (%d)", pod.Name, node.Name, int(fScore))
}
}
return result, nil
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,91 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
// based on the minimum of the average of the fraction of requested to capacity.
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
func LeastRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var nonZeroRequest *schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
nonZeroRequest = priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadata - fallback to computing it.
nonZeroRequest = getNonZeroRequests(pod)
}
return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
}
// The unused capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more unused resources the higher the score is.
func calculateUnusedScore(requested int64, capacity int64, node string) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
requested, capacity, node)
return 0
}
return ((capacity - requested) * 10) / capacity
}
// Calculates host priority based on the amount of unused resources.
// 'node' has information about the resources on the node.
// 'pods' is a list of pods currently scheduled on the node.
func calculateUnusedPriority(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
cpuScore, memoryScore,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int((cpuScore + memoryScore) / 2),
}, nil
}

View file

@ -0,0 +1,263 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestLeastRequested(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
machine1Spec := v1.PodSpec{
NodeName: "machine1",
}
machine2Spec := v1.PodSpec{
NodeName: "machine2",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (remaining resources) on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (10 + 10) / 2 = 10
Node2 scores (remaining resources) on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node2 Score: (10 + 10) / 2 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 3000) *10) / 4000 = 2.5
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node1 Score: (2.5 + 5) / 2 = 3
Node2 scores on 0-10 scale
CPU Score: ((6000 - 3000) *10) / 6000 = 5
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node2 Score: (5 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (10 + 10) / 2 = 10
Node2 scores on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node2 Score: (10 + 10) / 2 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
test: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: v1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 0) *10) / 20000 = 10
Node1 Score: (4 + 10) / 2 = 7
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node2 Score: (4 + 7.5) / 2 = 5
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 5}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node1 Score: (4 + 7.5) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 10000) *10) / 20000 = 5
Node2 Score: (4 + 5) / 2 = 4
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 4}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node1 Score: (4 + 7.5) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((50000 - 10000) *10) / 50000 = 8
Node2 Score: (4 + 8) / 2 = 6
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 6000) *10) / 4000 = 0
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (0 + 10) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((4000 - 6000) *10) / 4000 = 0
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node2 Score: (0 + 5) / 2 = 2
*/
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 2}},
test: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(LeastRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,50 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// priorityMetadata is a type that is passed as metadata for priority functions
type priorityMetadata struct {
nonZeroRequest *schedulercache.Resource
podTolerations []v1.Toleration
affinity *v1.Affinity
}
// PriorityMetadata is a MetadataProducer. Node info can be nil.
func PriorityMetadata(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{} {
// If we cannot compute metadata, just return nil
if pod == nil {
return nil
}
tolerations, err := getTolerationListFromPod(pod)
if err != nil {
return nil
}
affinity, err := v1.GetAffinityFromPodAnnotations(pod.Annotations)
if err != nil {
return nil
}
return &priorityMetadata{
nonZeroRequest: getNonZeroRequests(pod),
podTolerations: tolerations,
affinity: affinity,
}
}

View file

@ -0,0 +1,94 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// MostRequestedPriority is a priority function that favors nodes with most requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
// based on the maximum of the average of the fraction of requested to capacity.
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
func MostRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var nonZeroRequest *schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
nonZeroRequest = priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadatat - fallback to computing it.
nonZeroRequest = getNonZeroRequests(pod)
}
return calculateUsedPriority(pod, nonZeroRequest, nodeInfo)
}
// The used capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more resources are used the higher the score is. This function
// is almost a reversed version of least_requested_priority.calculatUnusedScore
// (10 - calculateUnusedScore). The main difference is in rounding. It was added to
// keep the final formula clean and not to modify the widely used (by users
// in their default scheduling policies) calculateUSedScore.
func calculateUsedScore(requested int64, capacity int64, node string) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
requested, capacity, node)
return 0
}
return (requested * 10) / capacity
}
// Calculate the resource used on a node. 'node' has information about the resources on the node.
// 'pods' is a list of pods currently scheduled on the node.
func calculateUsedPriority(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
cpuScore, memoryScore,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int((cpuScore + memoryScore) / 2),
}, nil
}

View file

@ -0,0 +1,182 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestMostRequested(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (used resources) on 0-10 scale
CPU Score: (0 * 10 / 4000 = 0
Memory Score: (0 * 10) / 10000 = 0
Node1 Score: (0 + 0) / 2 = 0
Node2 scores (used resources) on 0-10 scale
CPU Score: (0 * 10 / 4000 = 0
Memory Score: (0 * 10 / 10000 = 0
Node2 Score: (0 + 0) / 2 = 0
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (3000 * 10 / 4000 = 7.5
Memory Score: (5000 * 10) / 10000 = 5
Node1 Score: (7.5 + 5) / 2 = 6
Node2 scores on 0-10 scale
CPU Score: (3000 * 10 / 6000 = 5
Memory Score: (5000 * 10 / 10000 = 5
Node2 Score: (5 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (0 * 10) / 20000 = 10
Node1 Score: (6 + 0) / 2 = 3
Node2 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (5000 * 10) / 20000 = 2.5
Node2 Score: (6 + 2.5) / 2 = 4
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 4}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (5000 * 10) / 20000 = 2.5
Node1 Score: (6 + 2.5) / 2 = 4
Node2 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (10000 * 10) / 20000 = 5
Node2 Score: (6 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 5}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(MostRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,105 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// CalculateNodeAffinityPriority prioritizes nodes according to node affinity scheduling preferences
// indicated in PreferredDuringSchedulingIgnoredDuringExecution. Each time a node match a preferredSchedulingTerm,
// it will a get an add of preferredSchedulingTerm.Weight. Thus, the more preferredSchedulingTerms
// the node satisfies and the more the preferredSchedulingTerm that is satisfied weights, the higher
// score the node gets.
func CalculateNodeAffinityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var affinity *v1.Affinity
if priorityMeta, ok := meta.(*priorityMetadata); ok {
affinity = priorityMeta.affinity
} else {
// We couldn't parse metadata - fallback to computing it.
var err error
affinity, err = v1.GetAffinityFromPodAnnotations(pod.Annotations)
if err != nil {
return schedulerapi.HostPriority{}, err
}
}
var count int32
// A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects.
// An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an
// empty PreferredSchedulingTerm matches all objects.
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
// Match PreferredDuringSchedulingIgnoredDuringExecution term by term.
for i := range affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution {
preferredSchedulingTerm := &affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution[i]
if preferredSchedulingTerm.Weight == 0 {
continue
}
// TODO: Avoid computing it for all nodes if this becomes a performance problem.
nodeSelector, err := v1.NodeSelectorRequirementsAsSelector(preferredSchedulingTerm.Preference.MatchExpressions)
if err != nil {
return schedulerapi.HostPriority{}, err
}
if nodeSelector.Matches(labels.Set(node.Labels)) {
count += preferredSchedulingTerm.Weight
}
}
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int(count),
}, nil
}
func CalculateNodeAffinityPriorityReduce(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
var maxCount int
for i := range result {
if result[i].Score > maxCount {
maxCount = result[i].Score
}
}
maxCountFloat := float64(maxCount)
var fScore float64
for i := range result {
if maxCount > 0 {
fScore = 10 * (float64(result[i].Score) / maxCountFloat)
} else {
fScore = 0
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof("%v -> %v: NodeAffinityPriority, Score: (%d)", pod.Name, result[i].Host, int(fScore))
}
result[i].Score = int(fScore)
}
return nil
}

View file

@ -0,0 +1,168 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestNodeAffinityPriority(t *testing.T) {
label1 := map[string]string{"foo": "bar"}
label2 := map[string]string{"key": "value"}
label3 := map[string]string{"az": "az1"}
label4 := map[string]string{"abc": "az11", "def": "az22"}
label5 := map[string]string{"foo": "bar", "key": "value", "az": "az1"}
affinity1 := map[string]string{
v1.AffinityAnnotationKey: `
{"nodeAffinity": {"preferredDuringSchedulingIgnoredDuringExecution": [
{
"weight": 2,
"preference": {
"matchExpressions": [
{
"key": "foo",
"operator": "In", "values": ["bar"]
}
]
}
}
]}}`,
}
affinity2 := map[string]string{
v1.AffinityAnnotationKey: `
{"nodeAffinity": {"preferredDuringSchedulingIgnoredDuringExecution": [
{
"weight": 2,
"preference": {"matchExpressions": [
{
"key": "foo",
"operator": "In", "values": ["bar"]
}
]}
},
{
"weight": 4,
"preference": {"matchExpressions": [
{
"key": "key",
"operator": "In", "values": ["value"]
}
]}
},
{
"weight": 5,
"preference": {"matchExpressions": [
{
"key": "foo",
"operator": "In", "values": ["bar"]
},
{
"key": "key",
"operator": "In", "values": ["value"]
},
{
"key": "az",
"operator": "In", "values": ["az1"]
}
]}
}
]}}`,
}
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Annotations: map[string]string{},
},
},
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "all machines are same priority as NodeAffinity is nil",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Annotations: affinity1,
},
},
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label4}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "no machine macthes preferred scheduling requirements in NodeAffinity of pod so all machines' priority is zero",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Annotations: affinity1,
},
},
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "only machine1 matches the preferred scheduling requirements of pod",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Annotations: affinity2,
},
},
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine5", Labels: label5}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine5", Score: 10}, {Host: "machine2", Score: 3}},
test: "all machines matches the preferred scheduling requirements of pod but with different priorities ",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
nap := priorityFunction(CalculateNodeAffinityPriorityMap, CalculateNodeAffinityPriorityReduce)
list, err := nap(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected %#v, \ngot %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,60 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
type NodeLabelPrioritizer struct {
label string
presence bool
}
func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
labelPrioritizer := &NodeLabelPrioritizer{
label: label,
presence: presence,
}
return labelPrioritizer.CalculateNodeLabelPriorityMap, nil
}
// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value.
// If presence is true, prioritizes nodes that have the specified label, regardless of value.
// If presence is false, prioritizes nodes that do not have the specified label.
func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
exists := labels.Set(node.Labels).Has(n.label)
score := 0
if (exists && n.presence) || (!exists && !n.presence) {
score = 10
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: score,
}, nil
}

View file

@ -0,0 +1,121 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestNewNodeLabelPriority(t *testing.T) {
label1 := map[string]string{"foo": "bar"}
label2 := map[string]string{"bar": "foo"}
label3 := map[string]string{"bar": "baz"}
tests := []struct {
nodes []*v1.Node
label string
presence bool
expectedList schedulerapi.HostPriorityList
test string
}{
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "baz",
presence: true,
test: "no match found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
label: "baz",
presence: false,
test: "no match found, presence false",
},
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "foo",
presence: true,
test: "one match found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
label: "foo",
presence: false,
test: "one match found, presence false",
},
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
label: "bar",
presence: true,
test: "two matches found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "bar",
presence: false,
test: "two matches found, presence false",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(NewNodeLabelPriority(test.label, test.presence))(nil, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,60 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func CalculateNodePreferAvoidPodsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
controllerRef := priorityutil.GetControllerRef(pod)
if controllerRef != nil {
// Ignore pods that are owned by other controller than ReplicationController
// or ReplicaSet.
if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" {
controllerRef = nil
}
}
if controllerRef == nil {
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
}
avoids, err := v1.GetAvoidPodsFromNodeAnnotations(node.Annotations)
if err != nil {
// If we cannot get annotation, assume it's schedulable there.
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
}
for i := range avoids.PreferAvoidPods {
avoid := &avoids.PreferAvoidPods[i]
if controllerRef != nil {
if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID {
return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil
}
}
}
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
}

View file

@ -0,0 +1,155 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestNodePreferAvoidPriority(t *testing.T) {
annotations1 := map[string]string{
v1.PreferAvoidPodsAnnotationKey: `
{
"preferAvoidPods": [
{
"podSignature": {
"podController": {
"apiVersion": "v1",
"kind": "ReplicationController",
"name": "foo",
"uid": "abcdef123456",
"controller": true
}
},
"reason": "some reason",
"message": "some message"
}
]
}`,
}
annotations2 := map[string]string{
v1.PreferAvoidPodsAnnotationKey: `
{
"preferAvoidPods": [
{
"podSignature": {
"podController": {
"apiVersion": "v1",
"kind": "ReplicaSet",
"name": "foo",
"uid": "qwert12345",
"controller": true
}
},
"reason": "some reason",
"message": "some message"
}
]
}`,
}
testNodes := []*v1.Node{
{
ObjectMeta: v1.ObjectMeta{Name: "machine1", Annotations: annotations1},
},
{
ObjectMeta: v1.ObjectMeta{Name: "machine2", Annotations: annotations2},
},
{
ObjectMeta: v1.ObjectMeta{Name: "machine3"},
},
}
trueVar := true
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Namespace: "default",
OwnerReferences: []v1.OwnerReference{
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
test: "pod managed by ReplicationController should avoid a node, this node get lowest priority score",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Namespace: "default",
OwnerReferences: []v1.OwnerReference{
{Kind: "RandomController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
test: "ownership by random controller should be ignored",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Namespace: "default",
OwnerReferences: []v1.OwnerReference{
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456"},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
test: "owner without Controller field set should be ignored",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Namespace: "default",
OwnerReferences: []v1.OwnerReference{
{Kind: "ReplicaSet", Name: "foo", UID: "qwert12345", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 10}},
test: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,117 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"os/exec"
"path/filepath"
"testing"
"k8s.io/gengo/parser"
"k8s.io/gengo/types"
"k8s.io/kubernetes/pkg/util/codeinspector"
)
func getPrioritySignatures() ([]*types.Signature, error) {
filePath := "./../types.go"
pkgName := filepath.Dir(filePath)
builder := parser.New()
if err := builder.AddDir(pkgName); err != nil {
return nil, err
}
universe, err := builder.FindTypes()
if err != nil {
return nil, err
}
signatures := []string{"PriorityFunction", "PriorityMapFunction", "PriorityReduceFunction"}
results := make([]*types.Signature, 0, len(signatures))
for _, signature := range signatures {
result, ok := universe[pkgName].Types[signature]
if !ok {
return nil, fmt.Errorf("%s type not defined", signature)
}
results = append(results, result.Signature)
}
return results, nil
}
func TestPrioritiesRegistered(t *testing.T) {
var functions []*types.Type
// Files and directories which priorities may be referenced
targetFiles := []string{
"./../../algorithmprovider/defaults/defaults.go", // Default algorithm
"./../../factory/plugins.go", // Registered in init()
}
// List all golang source files under ./priorities/, excluding test files and sub-directories.
files, err := codeinspector.GetSourceCodeFiles(".")
if err != nil {
t.Errorf("unexpected error: %v when listing files in current directory", err)
}
// Get all public priorities in files.
for _, filePath := range files {
fileFunctions, err := codeinspector.GetPublicFunctions("k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities", filePath)
if err == nil {
functions = append(functions, fileFunctions...)
} else {
t.Errorf("unexpected error when parsing %s: %v", filePath, err)
}
}
prioritySignatures, err := getPrioritySignatures()
if err != nil {
t.Fatalf("Couldn't get priorities signatures")
}
// Check if all public priorities are referenced in target files.
for _, function := range functions {
// Ignore functions that don't match priorities signatures.
signature := function.Underlying.Signature
match := false
for _, prioritySignature := range prioritySignatures {
if len(prioritySignature.Parameters) != len(signature.Parameters) {
continue
}
if len(prioritySignature.Results) != len(signature.Results) {
continue
}
// TODO: Check exact types of parameters and results.
match = true
}
if !match {
continue
}
args := []string{"-rl", function.Name.Name}
args = append(args, targetFiles...)
err := exec.Command("grep", args...).Run()
if err != nil {
switch err.Error() {
case "exit status 2":
t.Errorf("unexpected error when checking %s", function.Name)
case "exit status 1":
t.Errorf("priority %s is implemented as public but seems not registered or used in any other place",
function.Name)
}
}
}
}

View file

@ -0,0 +1,259 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"sync"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
metav1 "k8s.io/kubernetes/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/labels"
utilnode "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/pkg/util/workqueue"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// The maximum priority value to give to a node
// Prioritiy values range from 0-maxPriority
const maxPriority float32 = 10
// When zone information is present, give 2/3 of the weighting to zone spreading, 1/3 to node spreading
// TODO: Any way to justify this weighting?
const zoneWeighting = 2.0 / 3.0
type SelectorSpread struct {
serviceLister algorithm.ServiceLister
controllerLister algorithm.ControllerLister
replicaSetLister algorithm.ReplicaSetLister
}
func NewSelectorSpreadPriority(
serviceLister algorithm.ServiceLister,
controllerLister algorithm.ControllerLister,
replicaSetLister algorithm.ReplicaSetLister) algorithm.PriorityFunction {
selectorSpread := &SelectorSpread{
serviceLister: serviceLister,
controllerLister: controllerLister,
replicaSetLister: replicaSetLister,
}
return selectorSpread.CalculateSpreadPriority
}
// Returns selectors of services, RCs and RSs matching the given pod.
func getSelectors(pod *v1.Pod, sl algorithm.ServiceLister, cl algorithm.ControllerLister, rsl algorithm.ReplicaSetLister) []labels.Selector {
selectors := make([]labels.Selector, 0, 3)
if services, err := sl.GetPodServices(pod); err == nil {
for _, service := range services {
selectors = append(selectors, labels.SelectorFromSet(service.Spec.Selector))
}
}
if rcs, err := cl.GetPodControllers(pod); err == nil {
for _, rc := range rcs {
selectors = append(selectors, labels.SelectorFromSet(rc.Spec.Selector))
}
}
if rss, err := rsl.GetPodReplicaSets(pod); err == nil {
for _, rs := range rss {
if selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector); err == nil {
selectors = append(selectors, selector)
}
}
}
return selectors
}
func (s *SelectorSpread) getSelectors(pod *v1.Pod) []labels.Selector {
return getSelectors(pod, s.serviceLister, s.controllerLister, s.replicaSetLister)
}
// CalculateSpreadPriority spreads pods across hosts and zones, considering pods belonging to the same service or replication controller.
// When a pod is scheduled, it looks for services, RCs or RSs that match the pod, then finds existing pods that match those selectors.
// It favors nodes that have fewer existing matching pods.
// i.e. it pushes the scheduler towards a node where there's the smallest number of
// pods which match the same service, RC or RS selectors as the pod being scheduled.
// Where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
func (s *SelectorSpread) CalculateSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
selectors := s.getSelectors(pod)
// Count similar pods by node
countsByNodeName := make(map[string]float32, len(nodes))
countsByZone := make(map[string]float32, 10)
maxCountByNodeName := float32(0)
countsByNodeNameLock := sync.Mutex{}
if len(selectors) > 0 {
processNodeFunc := func(i int) {
nodeName := nodes[i].Name
count := float32(0)
for _, nodePod := range nodeNameToInfo[nodeName].Pods() {
if pod.Namespace != nodePod.Namespace {
continue
}
// When we are replacing a failed pod, we often see the previous
// deleted version while scheduling the replacement.
// Ignore the previous deleted version for spreading purposes
// (it can still be considered for resource restrictions etc.)
if nodePod.DeletionTimestamp != nil {
glog.V(4).Infof("skipping pending-deleted pod: %s/%s", nodePod.Namespace, nodePod.Name)
continue
}
matches := false
for _, selector := range selectors {
if selector.Matches(labels.Set(nodePod.ObjectMeta.Labels)) {
matches = true
break
}
}
if matches {
count++
}
}
zoneId := utilnode.GetZoneKey(nodes[i])
countsByNodeNameLock.Lock()
defer countsByNodeNameLock.Unlock()
countsByNodeName[nodeName] = count
if count > maxCountByNodeName {
maxCountByNodeName = count
}
if zoneId != "" {
countsByZone[zoneId] += count
}
}
workqueue.Parallelize(16, len(nodes), processNodeFunc)
}
// Aggregate by-zone information
// Compute the maximum number of pods hosted in any zone
haveZones := len(countsByZone) != 0
maxCountByZone := float32(0)
for _, count := range countsByZone {
if count > maxCountByZone {
maxCountByZone = count
}
}
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
//score int - scale of 0-maxPriority
// 0 being the lowest priority and maxPriority being the highest
for _, node := range nodes {
// initializing to the default/max node score of maxPriority
fScore := maxPriority
if maxCountByNodeName > 0 {
fScore = maxPriority * ((maxCountByNodeName - countsByNodeName[node.Name]) / maxCountByNodeName)
}
// If there is zone information present, incorporate it
if haveZones {
zoneId := utilnode.GetZoneKey(node)
if zoneId != "" {
zoneScore := maxPriority * ((maxCountByZone - countsByZone[zoneId]) / maxCountByZone)
fScore = (fScore * (1.0 - zoneWeighting)) + (zoneWeighting * zoneScore)
}
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, node.Name, int(fScore),
)
}
}
return result, nil
}
type ServiceAntiAffinity struct {
podLister algorithm.PodLister
serviceLister algorithm.ServiceLister
label string
}
func NewServiceAntiAffinityPriority(podLister algorithm.PodLister, serviceLister algorithm.ServiceLister, label string) algorithm.PriorityFunction {
antiAffinity := &ServiceAntiAffinity{
podLister: podLister,
serviceLister: serviceLister,
label: label,
}
return antiAffinity.CalculateAntiAffinityPriority
}
// CalculateAntiAffinityPriority spreads pods by minimizing the number of pods belonging to the same service
// on machines with the same value for a particular label.
// The label to be considered is provided to the struct (ServiceAntiAffinity).
func (s *ServiceAntiAffinity) CalculateAntiAffinityPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
var nsServicePods []*v1.Pod
if services, err := s.serviceLister.GetPodServices(pod); err == nil && len(services) > 0 {
// just use the first service and get the other pods within the service
// TODO: a separate predicate can be created that tries to handle all services for the pod
selector := labels.SelectorFromSet(services[0].Spec.Selector)
pods, err := s.podLister.List(selector)
if err != nil {
return nil, err
}
// consider only the pods that belong to the same namespace
for _, nsPod := range pods {
if nsPod.Namespace == pod.Namespace {
nsServicePods = append(nsServicePods, nsPod)
}
}
}
// separate out the nodes that have the label from the ones that don't
otherNodes := []string{}
labeledNodes := map[string]string{}
for _, node := range nodes {
if labels.Set(node.Labels).Has(s.label) {
label := labels.Set(node.Labels).Get(s.label)
labeledNodes[node.Name] = label
} else {
otherNodes = append(otherNodes, node.Name)
}
}
podCounts := map[string]int{}
for _, pod := range nsServicePods {
label, exists := labeledNodes[pod.Spec.NodeName]
if !exists {
continue
}
podCounts[label]++
}
numServicePods := len(nsServicePods)
result := []schedulerapi.HostPriority{}
//score int - scale of 0-maxPriority
// 0 being the lowest priority and maxPriority being the highest
for node := range labeledNodes {
// initializing to the default/max node score of maxPriority
fScore := float32(maxPriority)
if numServicePods > 0 {
fScore = maxPriority * (float32(numServicePods-podCounts[labeledNodes[node]]) / float32(numServicePods))
}
result = append(result, schedulerapi.HostPriority{Host: node, Score: int(fScore)})
}
// add the open nodes with a score of 0
for _, node := range otherNodes {
result = append(result, schedulerapi.HostPriority{Host: node, Score: 0})
}
return result, nil
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,116 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// CountIntolerableTaintsPreferNoSchedule gives the count of intolerable taints of a pod with effect PreferNoSchedule
func countIntolerableTaintsPreferNoSchedule(taints []v1.Taint, tolerations []v1.Toleration) (intolerableTaints int) {
for i := range taints {
taint := &taints[i]
// check only on taints that have effect PreferNoSchedule
if taint.Effect != v1.TaintEffectPreferNoSchedule {
continue
}
if !v1.TaintToleratedByTolerations(taint, tolerations) {
intolerableTaints++
}
}
return
}
// getAllTolerationEffectPreferNoSchedule gets the list of all Toleration with Effect PreferNoSchedule
func getAllTolerationPreferNoSchedule(tolerations []v1.Toleration) (tolerationList []v1.Toleration) {
for i := range tolerations {
toleration := &tolerations[i]
if len(toleration.Effect) == 0 || toleration.Effect == v1.TaintEffectPreferNoSchedule {
tolerationList = append(tolerationList, *toleration)
}
}
return
}
func getTolerationListFromPod(pod *v1.Pod) ([]v1.Toleration, error) {
tolerations, err := v1.GetTolerationsFromPodAnnotations(pod.Annotations)
if err != nil {
return nil, err
}
return getAllTolerationPreferNoSchedule(tolerations), nil
}
// ComputeTaintTolerationPriority prepares the priority list for all the nodes based on the number of intolerable taints on the node
func ComputeTaintTolerationPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var tolerationList []v1.Toleration
if priorityMeta, ok := meta.(*priorityMetadata); ok {
tolerationList = priorityMeta.podTolerations
} else {
var err error
tolerationList, err = getTolerationListFromPod(pod)
if err != nil {
return schedulerapi.HostPriority{}, err
}
}
taints, err := v1.GetTaintsFromNodeAnnotations(node.Annotations)
if err != nil {
return schedulerapi.HostPriority{}, err
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: countIntolerableTaintsPreferNoSchedule(taints, tolerationList),
}, nil
}
func ComputeTaintTolerationPriorityReduce(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
var maxCount int
for i := range result {
if result[i].Score > maxCount {
maxCount = result[i].Score
}
}
maxCountFloat := float64(maxCount)
// The maximum priority value to give to a node
// Priority values range from 0 - maxPriority
const maxPriority = float64(10)
for i := range result {
fScore := maxPriority
if maxCountFloat > 0 {
fScore = (1.0 - float64(result[i].Score)/maxCountFloat) * 10
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof("%v -> %v: Taint Toleration Priority, Score: (%d)", pod.Name, result[i].Host, int(fScore))
}
result[i].Score = int(fScore)
}
return nil
}

View file

@ -0,0 +1,225 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"encoding/json"
"reflect"
"testing"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func nodeWithTaints(nodeName string, taints []v1.Taint) *v1.Node {
taintsData, _ := json.Marshal(taints)
return &v1.Node{
ObjectMeta: v1.ObjectMeta{
Name: nodeName,
Annotations: map[string]string{
v1.TaintsAnnotationKey: string(taintsData),
},
},
}
}
func podWithTolerations(tolerations []v1.Toleration) *v1.Pod {
tolerationData, _ := json.Marshal(tolerations)
return &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Annotations: map[string]string{
v1.TolerationsAnnotationKey: string(tolerationData),
},
},
}
}
// This function will create a set of nodes and pods and test the priority
// Nodes with zero,one,two,three,four and hundred taints are created
// Pods with zero,one,two,three,four and hundred tolerations are created
func TestTaintAndToleration(t *testing.T) {
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
// basic test case
{
test: "node with taints tolerated by the pod, gets a higher score than those node with intolerable taints",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{{
Key: "foo",
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodeWithTaints("nodeB", []v1.Taint{{
Key: "foo",
Value: "blah",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: 10},
{Host: "nodeB", Score: 0},
},
},
// the count of taints that are tolerated by pod, does not matter.
{
test: "the nodes that all of their taints are tolerated by the pod, get the same score, no matter how many tolerable taints a node has",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
Operator: v1.TolerationOpEqual,
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Operator: v1.TolerationOpEqual,
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: 10},
{Host: "nodeB", Score: 10},
{Host: "nodeC", Score: 10},
},
},
// the count of taints on a node that are not tolerated by pod, matters.
{
test: "the more intolerable taints a node has, the lower score it gets.",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: 10},
{Host: "nodeB", Score: 5},
{Host: "nodeC", Score: 0},
},
},
// taints-tolerations priority only takes care about the taints and tolerations that have effect PreferNoSchedule
{
test: "only taints and tolerations that have effect PreferNoSchedule are checked by taints-tolerations priority function",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
Operator: v1.TolerationOpEqual,
Value: "arm64",
Effect: v1.TaintEffectNoSchedule,
}, {
Key: "disk-type",
Operator: v1.TolerationOpEqual,
Value: "ssd",
Effect: v1.TaintEffectNoSchedule,
},
}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: 10},
{Host: "nodeB", Score: 10},
{Host: "nodeC", Score: 0},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
ttp := priorityFunction(ComputeTaintTolerationPriorityMap, ComputeTaintTolerationPriorityReduce)
list, err := ttp(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("%s, unexpected error: %v", test.test, err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s,\nexpected:\n\t%+v,\ngot:\n\t%+v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,60 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func makeNode(node string, milliCPU, memory int64) *v1.Node {
return &v1.Node{
ObjectMeta: v1.ObjectMeta{Name: node},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
"memory": *resource.NewQuantity(memory, resource.BinarySI),
},
Allocatable: v1.ResourceList{
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
"memory": *resource.NewQuantity(memory, resource.BinarySI),
},
},
}
}
func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction {
return func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
for i := range nodes {
hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name])
if err != nil {
return nil, err
}
result = append(result, hostResult)
}
if reduceFn != nil {
if err := reduceFn(pod, nil, nodeNameToInfo, result); err != nil {
return nil, err
}
}
return result, nil
}
}

View file

@ -0,0 +1,27 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"non_zero.go",
"topologies.go",
"util.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/util/sets:go_default_library",
],
)

View file

@ -0,0 +1,50 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import "k8s.io/kubernetes/pkg/api/v1"
// For each of these resources, a pod that doesn't request the resource explicitly
// will be treated as having requested the amount indicated below, for the purpose
// of computing priority only. This ensures that when scheduling zero-request pods, such
// pods will not all be scheduled to the machine with the smallest in-use request,
// and that when scheduling regular pods, such pods will not see zero-request pods as
// consuming no resources whatsoever. We chose these values to be similar to the
// resources that we give to cluster addon pods (#10653). But they are pretty arbitrary.
// As described in #11713, we use request instead of limit to deal with resource requirements.
const DefaultMilliCpuRequest int64 = 100 // 0.1 core
const DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
// GetNonzeroRequests returns the default resource request if none is found or what is provided on the request
// TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity v1.ResourceList"
// as an additional argument here) rather than using constants
func GetNonzeroRequests(requests *v1.ResourceList) (int64, int64) {
var outMilliCPU, outMemory int64
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceCPU]; !found {
outMilliCPU = DefaultMilliCpuRequest
} else {
outMilliCPU = requests.Cpu().MilliValue()
}
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceMemory]; !found {
outMemory = DefaultMemoryRequest
} else {
outMemory = requests.Memory().Value()
}
return outMilliCPU, outMemory
}

View file

@ -0,0 +1,78 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"k8s.io/kubernetes/pkg/api/v1"
metav1 "k8s.io/kubernetes/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/pkg/util/sets"
)
// GetNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// 1. If the namespaces is nil considers the given pod's namespace
// 2. If the namespaces is empty list then considers all the namespaces
func getNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm v1.PodAffinityTerm) sets.String {
names := sets.String{}
if podAffinityTerm.Namespaces == nil {
names.Insert(pod.Namespace)
} else if len(podAffinityTerm.Namespaces) != 0 {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}
// PodMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, affinityPod *v1.Pod, term *v1.PodAffinityTerm) (bool, error) {
namespaces := getNamespacesFromPodAffinityTerm(affinityPod, *term)
if len(namespaces) != 0 && !namespaces.Has(pod.Namespace) {
return false, nil
}
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil || !selector.Matches(labels.Set(pod.Labels)) {
return false, err
}
return true, nil
}
// nodesHaveSameTopologyKeyInternal checks if nodeA and nodeB have same label value with given topologyKey as label key.
func nodesHaveSameTopologyKeyInternal(nodeA, nodeB *v1.Node, topologyKey string) bool {
return nodeA.Labels != nil && nodeB.Labels != nil && len(nodeA.Labels[topologyKey]) > 0 && nodeA.Labels[topologyKey] == nodeB.Labels[topologyKey]
}
type Topologies struct {
DefaultKeys []string
}
// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key.
// If the topologyKey is nil/empty, check if the two nodes have any of the default topologyKeys, and have same corresponding label value.
func (tps *Topologies) NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool {
if len(topologyKey) == 0 {
// assumes this is allowed only for PreferredDuringScheduling pod anti-affinity (ensured by api/validation)
for _, defaultKey := range tps.DefaultKeys {
if nodesHaveSameTopologyKeyInternal(nodeA, nodeB, defaultKey) {
return true
}
}
return false
} else {
return nodesHaveSameTopologyKeyInternal(nodeA, nodeB, topologyKey)
}
}

View file

@ -0,0 +1,32 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import "k8s.io/kubernetes/pkg/api/v1"
func GetControllerRef(pod *v1.Pod) *v1.OwnerReference {
if len(pod.OwnerReferences) == 0 {
return nil
}
for i := range pod.OwnerReferences {
ref := &pod.OwnerReferences[i]
if ref.Controller != nil && *ref.Controller {
return ref
}
}
return nil
}

View file

@ -0,0 +1,43 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
)
// SchedulerExtender is an interface for external processes to influence scheduling
// decisions made by Kubernetes. This is typically needed for resources not directly
// managed by Kubernetes.
type SchedulerExtender interface {
// Filter based on extender-implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.
Filter(pod *v1.Pod, nodes []*v1.Node) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error)
// Prioritize based on extender-implemented priority functions. The returned scores & weight
// are used to compute the weighted score for an extender. The weighted scores are added to
// the scores computed by Kubernetes scheduler. The total scores are used to do the host selection.
Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *schedulerapi.HostPriorityList, weight int, err error)
}
// ScheduleAlgorithm is an interface implemented by things that know how to schedule pods
// onto machines.
type ScheduleAlgorithm interface {
Schedule(*v1.Pod, NodeLister) (selectedMachine string, err error)
}

View file

@ -0,0 +1,60 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"testing"
"k8s.io/kubernetes/pkg/api/v1"
)
// Some functions used by multiple scheduler tests.
type schedulerTester struct {
t *testing.T
scheduler ScheduleAlgorithm
nodeLister NodeLister
}
// Call if you know exactly where pod should get scheduled.
func (st *schedulerTester) expectSchedule(pod *v1.Pod, expected string) {
actual, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
if actual != expected {
st.t.Errorf("Unexpected scheduling value: %v, expected %v", actual, expected)
}
}
// Call if you can't predict where pod will be scheduled.
func (st *schedulerTester) expectSuccess(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
}
// Call if pod should *not* schedule.
func (st *schedulerTester) expectFailure(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err == nil {
st.t.Error("Unexpected non-error")
}
}

View file

@ -0,0 +1,66 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// FitPredicate is a function that indicates if a pod fits into an existing node.
// The failure information is given by the error.
// TODO: Change interface{} to a specific type.
type FitPredicate func(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error)
// PriorityMapFunction is a function that computes per-node results for a given node.
// TODO: Figure out the exact API of this method.
// TODO: Change interface{} to a specific type.
type PriorityMapFunction func(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error)
// PriorityReduceFunction is a function that aggregated per-node results and computes
// final scores for all nodes.
// TODO: Figure out the exact API of this method.
// TODO: Change interface{} to a specific type.
type PriorityReduceFunction func(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error
// MetdataProducer is a function that computes metadata for a given pod.
type MetadataProducer func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{}
// DEPRECATED
// Use Map-Reduce pattern for priority functions.
type PriorityFunction func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error)
type PriorityConfig struct {
Map PriorityMapFunction
Reduce PriorityReduceFunction
// TODO: Remove it after migrating all functions to
// Map-Reduce pattern.
Function PriorityFunction
Weight int
}
// EmptyMetadataProducer returns a no-op MetadataProducer type.
func EmptyMetadataProducer(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{} {
return nil
}
type PredicateFailureReason interface {
GetReason() string
}
type GetEquivalencePodFunc func(pod *v1.Pod) interface{}