1
0
Fork 0
forked from barak/tarpoon

Add glide.yaml and vendor deps

This commit is contained in:
Dalton Hubble 2016-12-03 22:43:32 -08:00
parent db918f12ad
commit 5b3d5e81bd
18880 changed files with 5166045 additions and 1 deletions

70
vendor/k8s.io/kubernetes/plugin/pkg/scheduler/BUILD generated vendored Normal file
View file

@ -0,0 +1,70 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"equivalence_cache.go",
"extender.go",
"generic_scheduler.go",
"scheduler.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/client/record:go_default_library",
"//pkg/client/restclient:go_default_library",
"//pkg/util:go_default_library",
"//pkg/util/errors:go_default_library",
"//pkg/util/hash:go_default_library",
"//pkg/util/net:go_default_library",
"//pkg/util/wait:go_default_library",
"//pkg/util/workqueue:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/predicates:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/metrics:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor:github.com/golang/glog",
"//vendor:github.com/golang/groupcache/lru",
],
)
go_test(
name = "go_default_test",
srcs = [
"extender_test.go",
"generic_scheduler_test.go",
"scheduler_test.go",
],
library = "go_default_library",
tags = ["automanaged"],
deps = [
"//pkg/api/resource:go_default_library",
"//pkg/api/testapi:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/apis/extensions/v1beta1:go_default_library",
"//pkg/client/cache:go_default_library",
"//pkg/client/record:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/util/diff:go_default_library",
"//pkg/util/sets:go_default_library",
"//pkg/util/wait:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/predicates:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//plugin/pkg/scheduler/testing:go_default_library",
],
)

2
vendor/k8s.io/kubernetes/plugin/pkg/scheduler/OWNERS generated vendored Normal file
View file

@ -0,0 +1,2 @@
assignees:
- davidopp

View file

@ -0,0 +1,38 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"doc.go",
"listers.go",
"scheduler_interface.go",
"types.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/apis/extensions/v1beta1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/labels:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = ["scheduler_interface_test.go"],
library = "go_default_library",
tags = ["automanaged"],
deps = ["//pkg/api/v1:go_default_library"],
)

View file

@ -0,0 +1,19 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package scheduler contains a generic Scheduler interface and several
// implementations.
package algorithm // import "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"

View file

@ -0,0 +1,186 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
extensions "k8s.io/kubernetes/pkg/apis/extensions/v1beta1"
metav1 "k8s.io/kubernetes/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/labels"
)
// NodeLister interface represents anything that can list nodes for a scheduler.
type NodeLister interface {
// We explicitly return []*v1.Node, instead of v1.NodeList, to avoid
// performing expensive copies that are unneded.
List() ([]*v1.Node, error)
}
// FakeNodeLister implements NodeLister on a []string for test purposes.
type FakeNodeLister []*v1.Node
// List returns nodes as a []string.
func (f FakeNodeLister) List() ([]*v1.Node, error) {
return f, nil
}
// PodLister interface represents anything that can list pods for a scheduler.
type PodLister interface {
// We explicitly return []*v1.Pod, instead of v1.PodList, to avoid
// performing expensive copies that are unneded.
List(labels.Selector) ([]*v1.Pod, error)
}
// FakePodLister implements PodLister on an []v1.Pods for test purposes.
type FakePodLister []*v1.Pod
// List returns []*v1.Pod matching a query.
func (f FakePodLister) List(s labels.Selector) (selected []*v1.Pod, err error) {
for _, pod := range f {
if s.Matches(labels.Set(pod.Labels)) {
selected = append(selected, pod)
}
}
return selected, nil
}
// ServiceLister interface represents anything that can produce a list of services; the list is consumed by a scheduler.
type ServiceLister interface {
// Lists all the services
List(labels.Selector) ([]*v1.Service, error)
// Gets the services for the given pod
GetPodServices(*v1.Pod) ([]*v1.Service, error)
}
// FakeServiceLister implements ServiceLister on []v1.Service for test purposes.
type FakeServiceLister []*v1.Service
// List returns v1.ServiceList, the list of all services.
func (f FakeServiceLister) List(labels.Selector) ([]*v1.Service, error) {
return f, nil
}
// GetPodServices gets the services that have the selector that match the labels on the given pod.
func (f FakeServiceLister) GetPodServices(pod *v1.Pod) (services []*v1.Service, err error) {
var selector labels.Selector
for i := range f {
service := f[i]
// consider only services that are in the same namespace as the pod
if service.Namespace != pod.Namespace {
continue
}
selector = labels.Set(service.Spec.Selector).AsSelectorPreValidated()
if selector.Matches(labels.Set(pod.Labels)) {
services = append(services, service)
}
}
return
}
// ControllerLister interface represents anything that can produce a list of ReplicationController; the list is consumed by a scheduler.
type ControllerLister interface {
// Lists all the replication controllers
List(labels.Selector) ([]*v1.ReplicationController, error)
// Gets the services for the given pod
GetPodControllers(*v1.Pod) ([]*v1.ReplicationController, error)
}
// EmptyControllerLister implements ControllerLister on []v1.ReplicationController returning empty data
type EmptyControllerLister struct{}
// List returns nil
func (f EmptyControllerLister) List(labels.Selector) ([]*v1.ReplicationController, error) {
return nil, nil
}
// GetPodControllers returns nil
func (f EmptyControllerLister) GetPodControllers(pod *v1.Pod) (controllers []*v1.ReplicationController, err error) {
return nil, nil
}
// FakeControllerLister implements ControllerLister on []v1.ReplicationController for test purposes.
type FakeControllerLister []*v1.ReplicationController
// List returns []v1.ReplicationController, the list of all ReplicationControllers.
func (f FakeControllerLister) List(labels.Selector) ([]*v1.ReplicationController, error) {
return f, nil
}
// GetPodControllers gets the ReplicationControllers that have the selector that match the labels on the given pod
func (f FakeControllerLister) GetPodControllers(pod *v1.Pod) (controllers []*v1.ReplicationController, err error) {
var selector labels.Selector
for i := range f {
controller := f[i]
if controller.Namespace != pod.Namespace {
continue
}
selector = labels.Set(controller.Spec.Selector).AsSelectorPreValidated()
if selector.Matches(labels.Set(pod.Labels)) {
controllers = append(controllers, controller)
}
}
if len(controllers) == 0 {
err = fmt.Errorf("Could not find Replication Controller for pod %s in namespace %s with labels: %v", pod.Name, pod.Namespace, pod.Labels)
}
return
}
// ReplicaSetLister interface represents anything that can produce a list of ReplicaSet; the list is consumed by a scheduler.
type ReplicaSetLister interface {
// Gets the replicasets for the given pod
GetPodReplicaSets(*v1.Pod) ([]*extensions.ReplicaSet, error)
}
// EmptyReplicaSetLister implements ReplicaSetLister on []extensions.ReplicaSet returning empty data
type EmptyReplicaSetLister struct{}
// GetPodReplicaSets returns nil
func (f EmptyReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*extensions.ReplicaSet, err error) {
return nil, nil
}
// FakeReplicaSetLister implements ControllerLister on []extensions.ReplicaSet for test purposes.
type FakeReplicaSetLister []*extensions.ReplicaSet
// GetPodReplicaSets gets the ReplicaSets that have the selector that match the labels on the given pod
func (f FakeReplicaSetLister) GetPodReplicaSets(pod *v1.Pod) (rss []*extensions.ReplicaSet, err error) {
var selector labels.Selector
for _, rs := range f {
if rs.Namespace != pod.Namespace {
continue
}
selector, err = metav1.LabelSelectorAsSelector(rs.Spec.Selector)
if err != nil {
return
}
if selector.Matches(labels.Set(pod.Labels)) {
rss = append(rss, rs)
}
}
if len(rss) == 0 {
err = fmt.Errorf("Could not find ReplicaSet for pod %s in namespace %s with labels: %v", pod.Name, pod.Namespace, pod.Labels)
}
return
}

View file

@ -0,0 +1,59 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"error.go",
"metadata.go",
"predicates.go",
"utils.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/client/cache:go_default_library",
"//pkg/kubelet/qos:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/util/runtime:go_default_library",
"//pkg/util/workqueue:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor:github.com/golang/glog",
],
)
go_test(
name = "go_default_test",
srcs = [
"predicates_test.go",
"utils_test.go",
],
library = "go_default_library",
tags = [
"automanaged",
"skip",
],
deps = [
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/util/codeinspector:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor:k8s.io/gengo/parser",
"//vendor:k8s.io/gengo/types",
],
)

View file

@ -0,0 +1,99 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
)
var (
// The predicateName tries to be consistent as the predicate name used in DefaultAlgorithmProvider defined in
// defaults.go (which tend to be stable for backward compatibility)
ErrDiskConflict = newPredicateFailureError("NoDiskConflict")
ErrVolumeZoneConflict = newPredicateFailureError("NoVolumeZoneConflict")
ErrNodeSelectorNotMatch = newPredicateFailureError("MatchNodeSelector")
ErrPodAffinityNotMatch = newPredicateFailureError("MatchInterPodAffinity")
ErrTaintsTolerationsNotMatch = newPredicateFailureError("PodToleratesNodeTaints")
ErrPodNotMatchHostName = newPredicateFailureError("HostName")
ErrPodNotFitsHostPorts = newPredicateFailureError("PodFitsHostPorts")
ErrNodeLabelPresenceViolated = newPredicateFailureError("CheckNodeLabelPresence")
ErrServiceAffinityViolated = newPredicateFailureError("CheckServiceAffinity")
ErrMaxVolumeCountExceeded = newPredicateFailureError("MaxVolumeCount")
ErrNodeUnderMemoryPressure = newPredicateFailureError("NodeUnderMemoryPressure")
ErrNodeUnderDiskPressure = newPredicateFailureError("NodeUnderDiskPressure")
// ErrFakePredicate is used for test only. The fake predicates returning false also returns error
// as ErrFakePredicate.
ErrFakePredicate = newPredicateFailureError("FakePredicateError")
)
// InsufficientResourceError is an error type that indicates what kind of resource limit is
// hit and caused the unfitting failure.
type InsufficientResourceError struct {
// resourceName is the name of the resource that is insufficient
ResourceName v1.ResourceName
requested int64
used int64
capacity int64
}
func NewInsufficientResourceError(resourceName v1.ResourceName, requested, used, capacity int64) *InsufficientResourceError {
return &InsufficientResourceError{
ResourceName: resourceName,
requested: requested,
used: used,
capacity: capacity,
}
}
func (e *InsufficientResourceError) Error() string {
return fmt.Sprintf("Node didn't have enough resource: %s, requested: %d, used: %d, capacity: %d",
e.ResourceName, e.requested, e.used, e.capacity)
}
func (e *InsufficientResourceError) GetReason() string {
return fmt.Sprintf("Insufficient %v", e.ResourceName)
}
type PredicateFailureError struct {
PredicateName string
}
func newPredicateFailureError(predicateName string) *PredicateFailureError {
return &PredicateFailureError{PredicateName: predicateName}
}
func (e *PredicateFailureError) Error() string {
return fmt.Sprintf("Predicate %s failed", e.PredicateName)
}
func (e *PredicateFailureError) GetReason() string {
return e.PredicateName
}
type FailureReason struct {
reason string
}
func NewFailureReason(msg string) *FailureReason {
return &FailureReason{reason: msg}
}
func (e *FailureReason) GetReason() string {
return e.reason
}

View file

@ -0,0 +1,59 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
type PredicateMetadataFactory struct {
podLister algorithm.PodLister
}
func NewPredicateMetadataFactory(podLister algorithm.PodLister) algorithm.MetadataProducer {
factory := &PredicateMetadataFactory{
podLister,
}
return factory.GetMetadata
}
// GetMetadata returns the predicateMetadata used which will be used by various predicates.
func (pfactory *PredicateMetadataFactory) GetMetadata(pod *v1.Pod, nodeNameToInfoMap map[string]*schedulercache.NodeInfo) interface{} {
// If we cannot compute metadata, just return nil
if pod == nil {
return nil
}
matchingTerms, err := getMatchingAntiAffinityTerms(pod, nodeNameToInfoMap)
if err != nil {
return nil
}
predicateMetadata := &predicateMetadata{
pod: pod,
podBestEffort: isPodBestEffort(pod),
podRequest: GetResourceRequest(pod),
podPorts: GetUsedPorts(pod),
matchingAntiAffinityTerms: matchingTerms,
}
for predicateName, precomputeFunc := range predicatePrecomputations {
glog.V(4).Info("Precompute: %v", predicateName)
precomputeFunc(predicateMetadata)
}
return predicateMetadata
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,66 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
)
// FindLabelsInSet gets as many key/value pairs as possible out of a label set.
func FindLabelsInSet(labelsToKeep []string, selector labels.Set) map[string]string {
aL := make(map[string]string)
for _, l := range labelsToKeep {
if selector.Has(l) {
aL[l] = selector.Get(l)
}
}
return aL
}
// AddUnsetLabelsToMap backfills missing values with values we find in a map.
func AddUnsetLabelsToMap(aL map[string]string, labelsToAdd []string, labelSet labels.Set) {
for _, l := range labelsToAdd {
// if the label is already there, dont overwrite it.
if _, exists := aL[l]; exists {
continue
}
// otherwise, backfill this label.
if labelSet.Has(l) {
aL[l] = labelSet.Get(l)
}
}
}
// FilterPodsByNamespace filters pods outside a namespace from the given list.
func FilterPodsByNamespace(pods []*v1.Pod, ns string) []*v1.Pod {
filtered := []*v1.Pod{}
for _, nsPod := range pods {
if nsPod.Namespace == ns {
filtered = append(filtered, nsPod)
}
}
return filtered
}
// CreateSelectorFromLabels is used to define a selector that corresponds to the keys in a map.
func CreateSelectorFromLabels(aL map[string]string) labels.Selector {
if aL == nil || len(aL) == 0 {
return labels.Everything()
}
return labels.Set(aL).AsSelector()
}

View file

@ -0,0 +1,69 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package predicates
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
)
// ExampleUtils is a https://blog.golang.org/examples styled unit test.
func ExampleFindLabelsInSet() {
labelSubset := labels.Set{}
labelSubset["label1"] = "value1"
labelSubset["label2"] = "value2"
// Lets make believe that these pods are on the cluster.
// Utility functions will inspect their labels, filter them, and so on.
nsPods := []*v1.Pod{
{
ObjectMeta: v1.ObjectMeta{
Name: "pod1",
Namespace: "ns1",
Labels: map[string]string{
"label1": "wontSeeThis",
"label2": "wontSeeThis",
"label3": "will_see_this",
},
},
}, // first pod which will be used via the utilities
{
ObjectMeta: v1.ObjectMeta{
Name: "pod2",
Namespace: "ns1",
},
},
{
ObjectMeta: v1.ObjectMeta{
Name: "pod3ThatWeWontSee",
},
},
}
fmt.Println(FindLabelsInSet([]string{"label1", "label2", "label3"}, nsPods[0].ObjectMeta.Labels)["label3"])
AddUnsetLabelsToMap(labelSubset, []string{"label1", "label2", "label3"}, nsPods[0].ObjectMeta.Labels)
fmt.Println(labelSubset)
for _, pod := range FilterPodsByNamespace(nsPods, "ns1") {
fmt.Print(pod.Name, ",")
}
// Output:
// will_see_this
// label1=value1,label2=value2,label3=will_see_this
// pod1,pod2,
}

View file

@ -0,0 +1,79 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"balanced_resource_allocation.go",
"image_locality.go",
"interpod_affinity.go",
"least_requested.go",
"metadata.go",
"most_requested.go",
"node_affinity.go",
"node_label.go",
"node_prefer_avoid_pods.go",
"selector_spreading.go",
"taint_toleration.go",
"test_util.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/util/node:go_default_library",
"//pkg/util/workqueue:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/predicates:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor:github.com/golang/glog",
],
)
go_test(
name = "go_default_test",
srcs = [
"balanced_resource_allocation_test.go",
"image_locality_test.go",
"interpod_affinity_test.go",
"least_requested_test.go",
"most_requested_test.go",
"node_affinity_test.go",
"node_label_test.go",
"node_prefer_avoid_pods_test.go",
"priorities_test.go",
"selector_spreading_test.go",
"taint_toleration_test.go",
],
library = "go_default_library",
tags = [
"automanaged",
"skip",
],
deps = [
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/apis/extensions/v1beta1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/util/codeinspector:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor:k8s.io/gengo/parser",
"//vendor:k8s.io/gengo/types",
],
)

View file

@ -0,0 +1,116 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"math"
"k8s.io/kubernetes/pkg/api/v1"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// This is a reasonable size range of all container images. 90%ile of images on dockerhub drops into this range.
const (
mb int64 = 1024 * 1024
minImgSize int64 = 23 * mb
maxImgSize int64 = 1000 * mb
)
// Also used in most/least_requested nad metadata.
// TODO: despaghettify it
func getNonZeroRequests(pod *v1.Pod) *schedulercache.Resource {
result := &schedulercache.Resource{}
for i := range pod.Spec.Containers {
container := &pod.Spec.Containers[i]
cpu, memory := priorityutil.GetNonzeroRequests(&container.Resources.Requests)
result.MilliCPU += cpu
result.Memory += memory
}
return result
}
func calculateBalancedResourceAllocation(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuFraction := fractionOfCapacity(totalResources.MilliCPU, allocatableResources.MilliCPU)
memoryFraction := fractionOfCapacity(totalResources.Memory, allocatableResources.Memory)
score := int(0)
if cpuFraction >= 1 || memoryFraction >= 1 {
// if requested >= capacity, the corresponding host should never be preferred.
score = 0
} else {
// Upper and lower boundary of difference between cpuFraction and memoryFraction are -1 and 1
// respectively. Multilying the absolute value of the difference by 10 scales the value to
// 0-10 with 0 representing well balanced allocation and 10 poorly balanced. Subtracting it from
// 10 leads to the score which also scales from 0 to 10 while 10 representing well balanced.
diff := math.Abs(cpuFraction - memoryFraction)
score = int(10 - diff*10)
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: Balanced Resource Allocation, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
score,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: score,
}, nil
}
func fractionOfCapacity(requested, capacity int64) float64 {
if capacity == 0 {
return 1
}
return float64(requested) / float64(capacity)
}
// BalancedResourceAllocation favors nodes with balanced resource usage rate.
// BalancedResourceAllocation should **NOT** be used alone, and **MUST** be used together with LeastRequestedPriority.
// It calculates the difference between the cpu and memory fracion of capacity, and prioritizes the host based on how
// close the two metrics are to each other.
// Detail: score = 10 - abs(cpuFraction-memoryFraction)*10. The algorithm is partly inspired by:
// "Wei Huang et al. An Energy Efficient Virtual Machine Placement Algorithm with Balanced Resource Utilization"
func BalancedResourceAllocationMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var nonZeroRequest *schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
nonZeroRequest = priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadatat - fallback to computing it.
nonZeroRequest = getNonZeroRequests(pod)
}
return calculateBalancedResourceAllocation(pod, nonZeroRequest, nodeInfo)
}

View file

@ -0,0 +1,263 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestBalancedResourceAllocation(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
machine1Spec := v1.PodSpec{
NodeName: "machine1",
}
machine2Spec := v1.PodSpec{
NodeName: "machine2",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (remaining resources) on 0-10 scale
CPU Fraction: 0 / 4000 = 0%
Memory Fraction: 0 / 10000 = 0%
Node1 Score: 10 - (0-0)*10 = 10
Node2 scores (remaining resources) on 0-10 scale
CPU Fraction: 0 / 4000 = 0 %
Memory Fraction: 0 / 10000 = 0%
Node2 Score: 10 - (0-0)*10 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 3000 / 4000= 75%
Memory Fraction: 5000 / 10000 = 50%
Node1 Score: 10 - (0.75-0.5)*10 = 7
Node2 scores on 0-10 scale
CPU Fraction: 3000 / 6000= 50%
Memory Fraction: 5000/10000 = 50%
Node2 Score: 10 - (0.5-0.5)*10 = 10
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 10}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 0 / 4000= 0%
Memory Fraction: 0 / 10000 = 0%
Node1 Score: 10 - (0-0)*10 = 10
Node2 scores on 0-10 scale
CPU Fraction: 0 / 4000= 0%
Memory Fraction: 0 / 10000 = 0%
Node2 Score: 10 - (0-0)*10 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
test: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: v1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 0 / 20000 = 0%
Node1 Score: 10 - (0.6-0)*10 = 4
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node2 Score: 10 - (0.6-0.25)*10 = 6
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 6}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node1 Score: 10 - (0.6-0.25)*10 = 6
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 10000 / 20000 = 50%
Node2 Score: 10 - (0.6-0.5)*10 = 9
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 9}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 5000 / 20000 = 25%
Node1 Score: 10 - (0.6-0.25)*10 = 6
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 10000 = 60%
Memory Fraction: 10000 / 50000 = 20%
Node2 Score: 10 - (0.6-0.2)*10 = 6
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
Memory Fraction: 0 / 10000 = 0
Node1 Score: 0
Node2 scores on 0-10 scale
CPU Fraction: 6000 / 4000 > 100% ==> Score := 0
Memory Fraction 5000 / 10000 = 50%
Node2 Score: 0
*/
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(BalancedResourceAllocationMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,79 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// ImageLocalityPriority is a priority function that favors nodes that already have requested pod container's images.
// It will detect whether the requested images are present on a node, and then calculate a score ranging from 0 to 10
// based on the total size of those images.
// - If none of the images are present, this node will be given the lowest priority.
// - If some of the images are present on a node, the larger their sizes' sum, the higher the node's priority.
func ImageLocalityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var sumSize int64
for i := range pod.Spec.Containers {
sumSize += checkContainerImageOnNode(node, &pod.Spec.Containers[i])
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: calculateScoreFromSize(sumSize),
}, nil
}
// calculateScoreFromSize calculates the priority of a node. sumSize is sum size of requested images on this node.
// 1. Split image size range into 10 buckets.
// 2. Decide the priority of a given sumSize based on which bucket it belongs to.
func calculateScoreFromSize(sumSize int64) int {
var score int
switch {
case sumSize == 0 || sumSize < minImgSize:
// score == 0 means none of the images required by this pod are present on this
// node or the total size of the images present is too small to be taken into further consideration.
score = 0
// If existing images' total size is larger than max, just make it highest priority.
case sumSize >= maxImgSize:
score = 10
default:
score = int((10 * (sumSize - minImgSize) / (maxImgSize - minImgSize)) + 1)
}
// Return which bucket the given size belongs to
return score
}
// checkContainerImageOnNode checks if a container image is present on a node and returns its size.
func checkContainerImageOnNode(node *v1.Node, container *v1.Container) int64 {
for _, image := range node.Status.Images {
for _, name := range image.Names {
if container.Image == name {
// Should return immediately.
return image.SizeBytes
}
}
}
return 0
}

View file

@ -0,0 +1,182 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestImageLocalityPriority(t *testing.T) {
test_40_250 := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/250",
},
},
}
test_40_140 := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/40",
},
{
Image: "gcr.io/140",
},
},
}
test_min_max := v1.PodSpec{
Containers: []v1.Container{
{
Image: "gcr.io/10",
},
{
Image: "gcr.io/2000",
},
},
}
node_40_140_2000 := v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/40",
"gcr.io/40:v1",
"gcr.io/40:v1",
},
SizeBytes: int64(40 * mb),
},
{
Names: []string{
"gcr.io/140",
"gcr.io/140:v1",
},
SizeBytes: int64(140 * mb),
},
{
Names: []string{
"gcr.io/2000",
},
SizeBytes: int64(2000 * mb),
},
},
}
node_250_10 := v1.NodeStatus{
Images: []v1.ContainerImage{
{
Names: []string{
"gcr.io/250",
},
SizeBytes: int64(250 * mb),
},
{
Names: []string{
"gcr.io/10",
"gcr.io/10:v1",
},
SizeBytes: int64(10 * mb),
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
// Pod: gcr.io/40 gcr.io/250
// Node1
// Image: gcr.io/40 40MB
// Score: (40M-23M)/97.7M + 1 = 1
// Node2
// Image: gcr.io/250 250MB
// Score: (250M-23M)/97.7M + 1 = 3
pod: &v1.Pod{Spec: test_40_250},
nodes: []*v1.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine2", Score: 3}},
test: "two images spread on two nodes, prefer the larger image one",
},
{
// Pod: gcr.io/40 gcr.io/140
// Node1
// Image: gcr.io/40 40MB, gcr.io/140 140MB
// Score: (40M+140M-23M)/97.7M + 1 = 2
// Node2
// Image: not present
// Score: 0
pod: &v1.Pod{Spec: test_40_140},
nodes: []*v1.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 2}, {Host: "machine2", Score: 0}},
test: "two images on one node, prefer this node",
},
{
// Pod: gcr.io/2000 gcr.io/10
// Node1
// Image: gcr.io/2000 2000MB
// Score: 2000 > max score = 10
// Node2
// Image: gcr.io/10 10MB
// Score: 10 < min score = 0
pod: &v1.Pod{Spec: test_min_max},
nodes: []*v1.Node{makeImageNode("machine1", node_40_140_2000), makeImageNode("machine2", node_250_10)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}},
test: "if exceed limit, use limit",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(ImageLocalityPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}
func makeImageNode(node string, status v1.NodeStatus) *v1.Node {
return &v1.Node{
ObjectMeta: v1.ObjectMeta{Name: node},
Status: status,
}
}

View file

@ -0,0 +1,241 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"sync"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/util/workqueue"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
type InterPodAffinity struct {
info predicates.NodeInfo
nodeLister algorithm.NodeLister
podLister algorithm.PodLister
hardPodAffinityWeight int
failureDomains priorityutil.Topologies
}
func NewInterPodAffinityPriority(
info predicates.NodeInfo,
nodeLister algorithm.NodeLister,
podLister algorithm.PodLister,
hardPodAffinityWeight int,
failureDomains []string) algorithm.PriorityFunction {
interPodAffinity := &InterPodAffinity{
info: info,
nodeLister: nodeLister,
podLister: podLister,
hardPodAffinityWeight: hardPodAffinityWeight,
failureDomains: priorityutil.Topologies{DefaultKeys: failureDomains},
}
return interPodAffinity.CalculateInterPodAffinityPriority
}
type podAffinityPriorityMap struct {
sync.Mutex
// nodes contain all nodes that should be considered
nodes []*v1.Node
// counts store the mapping from node name to so-far computed score of
// the node.
counts map[string]float64
// failureDomains contain default failure domains keys
failureDomains priorityutil.Topologies
// The first error that we faced.
firstError error
}
func newPodAffinityPriorityMap(nodes []*v1.Node, failureDomains priorityutil.Topologies) *podAffinityPriorityMap {
return &podAffinityPriorityMap{
nodes: nodes,
counts: make(map[string]float64, len(nodes)),
failureDomains: failureDomains,
}
}
func (p *podAffinityPriorityMap) setError(err error) {
p.Lock()
defer p.Unlock()
if p.firstError == nil {
p.firstError = err
}
}
func (p *podAffinityPriorityMap) processTerm(term *v1.PodAffinityTerm, podDefiningAffinityTerm, podToCheck *v1.Pod, fixedNode *v1.Node, weight float64) {
match, err := priorityutil.PodMatchesTermsNamespaceAndSelector(podToCheck, podDefiningAffinityTerm, term)
if err != nil {
p.setError(err)
return
}
if match {
func() {
p.Lock()
defer p.Unlock()
for _, node := range p.nodes {
if p.failureDomains.NodesHaveSameTopologyKey(node, fixedNode, term.TopologyKey) {
p.counts[node.Name] += weight
}
}
}()
}
}
func (p *podAffinityPriorityMap) processTerms(terms []v1.WeightedPodAffinityTerm, podDefiningAffinityTerm, podToCheck *v1.Pod, fixedNode *v1.Node, multiplier int) {
for i := range terms {
term := &terms[i]
p.processTerm(&term.PodAffinityTerm, podDefiningAffinityTerm, podToCheck, fixedNode, float64(term.Weight*int32(multiplier)))
}
}
// compute a sum by iterating through the elements of weightedPodAffinityTerm and adding
// "weight" to the sum if the corresponding PodAffinityTerm is satisfied for
// that node; the node(s) with the highest sum are the most preferred.
// Symmetry need to be considered for preferredDuringSchedulingIgnoredDuringExecution from podAffinity & podAntiAffinity,
// symmetry need to be considered for hard requirements from podAffinity
func (ipa *InterPodAffinity) CalculateInterPodAffinityPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
affinity, err := v1.GetAffinityFromPodAnnotations(pod.Annotations)
if err != nil {
return nil, err
}
hasAffinityConstraints := affinity != nil && affinity.PodAffinity != nil
hasAntiAffinityConstraints := affinity != nil && affinity.PodAntiAffinity != nil
allNodeNames := make([]string, 0, len(nodeNameToInfo))
for name := range nodeNameToInfo {
allNodeNames = append(allNodeNames, name)
}
// convert the topology key based weights to the node name based weights
var maxCount float64
var minCount float64
// priorityMap stores the mapping from node name to so-far computed score of
// the node.
pm := newPodAffinityPriorityMap(nodes, ipa.failureDomains)
processPod := func(existingPod *v1.Pod) error {
existingPodNode, err := ipa.info.GetNodeInfo(existingPod.Spec.NodeName)
if err != nil {
return err
}
existingPodAffinity, err := v1.GetAffinityFromPodAnnotations(existingPod.Annotations)
if err != nil {
return err
}
existingHasAffinityConstraints := existingPodAffinity != nil && existingPodAffinity.PodAffinity != nil
existingHasAntiAffinityConstraints := existingPodAffinity != nil && existingPodAffinity.PodAntiAffinity != nil
if hasAffinityConstraints {
// For every soft pod affinity term of <pod>, if <existingPod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPods>`s node by the term`s weight.
terms := affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, pod, existingPod, existingPodNode, 1)
}
if hasAntiAffinityConstraints {
// For every soft pod anti-affinity term of <pod>, if <existingPod> matches the term,
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>`s node by the term`s weight.
terms := affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, pod, existingPod, existingPodNode, -1)
}
if existingHasAffinityConstraints {
// For every hard pod affinity term of <existingPod>, if <pod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the constant <ipa.hardPodAffinityWeight>
if ipa.hardPodAffinityWeight > 0 {
terms := existingPodAffinity.PodAffinity.RequiredDuringSchedulingIgnoredDuringExecution
// TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution.
//if len(existingPodAffinity.PodAffinity.RequiredDuringSchedulingRequiredDuringExecution) != 0 {
// terms = append(terms, existingPodAffinity.PodAffinity.RequiredDuringSchedulingRequiredDuringExecution...)
//}
for _, term := range terms {
pm.processTerm(&term, existingPod, pod, existingPodNode, float64(ipa.hardPodAffinityWeight))
}
}
// For every soft pod affinity term of <existingPod>, if <pod> matches the term,
// increment <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the term's weight.
terms := existingPodAffinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, existingPod, pod, existingPodNode, 1)
}
if existingHasAntiAffinityConstraints {
// For every soft pod anti-affinity term of <existingPod>, if <pod> matches the term,
// decrement <pm.counts> for every node in the cluster with the same <term.TopologyKey>
// value as that of <existingPod>'s node by the term's weight.
terms := existingPodAffinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
pm.processTerms(terms, existingPod, pod, existingPodNode, -1)
}
return nil
}
processNode := func(i int) {
nodeInfo := nodeNameToInfo[allNodeNames[i]]
if hasAffinityConstraints || hasAntiAffinityConstraints {
// We need to process all the nodes.
for _, existingPod := range nodeInfo.Pods() {
if err := processPod(existingPod); err != nil {
pm.setError(err)
}
}
} else {
// The pod doesn't have any constraints - we need to check only existing
// ones that have some.
for _, existingPod := range nodeInfo.PodsWithAffinity() {
if err := processPod(existingPod); err != nil {
pm.setError(err)
}
}
}
}
workqueue.Parallelize(16, len(allNodeNames), processNode)
if pm.firstError != nil {
return nil, pm.firstError
}
for _, node := range nodes {
if pm.counts[node.Name] > maxCount {
maxCount = pm.counts[node.Name]
}
if pm.counts[node.Name] < minCount {
minCount = pm.counts[node.Name]
}
}
// calculate final priority score for each node
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
for _, node := range nodes {
fScore := float64(0)
if (maxCount - minCount) > 0 {
fScore = 10 * ((pm.counts[node.Name] - minCount) / (maxCount - minCount))
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof("%v -> %v: InterPodAffinityPriority, Score: (%d)", pod.Name, node.Name, int(fScore))
}
}
return result, nil
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,91 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// LeastRequestedPriority is a priority function that favors nodes with fewer requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
// based on the minimum of the average of the fraction of requested to capacity.
// Details: cpu((capacity - sum(requested)) * 10 / capacity) + memory((capacity - sum(requested)) * 10 / capacity) / 2
func LeastRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var nonZeroRequest *schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
nonZeroRequest = priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadata - fallback to computing it.
nonZeroRequest = getNonZeroRequests(pod)
}
return calculateUnusedPriority(pod, nonZeroRequest, nodeInfo)
}
// The unused capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more unused resources the higher the score is.
func calculateUnusedScore(requested int64, capacity int64, node string) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
requested, capacity, node)
return 0
}
return ((capacity - requested) * 10) / capacity
}
// Calculates host priority based on the amount of unused resources.
// 'node' has information about the resources on the node.
// 'pods' is a list of pods currently scheduled on the node.
func calculateUnusedPriority(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuScore := calculateUnusedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
memoryScore := calculateUnusedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: Least Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
cpuScore, memoryScore,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int((cpuScore + memoryScore) / 2),
}, nil
}

View file

@ -0,0 +1,263 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestLeastRequested(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
machine1Spec := v1.PodSpec{
NodeName: "machine1",
}
machine2Spec := v1.PodSpec{
NodeName: "machine2",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (remaining resources) on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (10 + 10) / 2 = 10
Node2 scores (remaining resources) on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node2 Score: (10 + 10) / 2 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 3000) *10) / 4000 = 2.5
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node1 Score: (2.5 + 5) / 2 = 3
Node2 scores on 0-10 scale
CPU Score: ((6000 - 3000) *10) / 6000 = 5
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node2 Score: (5 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (10 + 10) / 2 = 10
Node2 scores on 0-10 scale
CPU Score: ((4000 - 0) *10) / 4000 = 10
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node2 Score: (10 + 10) / 2 = 10
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}},
test: "no resources requested, pods scheduled",
pods: []*v1.Pod{
{Spec: machine1Spec, ObjectMeta: v1.ObjectMeta{Labels: labels2}},
{Spec: machine1Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: machine2Spec, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 0) *10) / 20000 = 10
Node1 Score: (4 + 10) / 2 = 7
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node2 Score: (4 + 7.5) / 2 = 5
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 7}, {Host: "machine2", Score: 5}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node1 Score: (4 + 7.5) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 10000) *10) / 20000 = 5
Node2 Score: (4 + 5) / 2 = 4
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 4}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((20000 - 5000) *10) / 20000 = 7.5
Node1 Score: (4 + 7.5) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((10000 - 6000) *10) / 10000 = 4
Memory Score: ((50000 - 10000) *10) / 50000 = 8
Node2 Score: (4 + 8) / 2 = 6
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 50000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 6}},
test: "resources requested, pods scheduled with resources, differently sized machines",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: ((4000 - 6000) *10) / 4000 = 0
Memory Score: ((10000 - 0) *10) / 10000 = 10
Node1 Score: (0 + 10) / 2 = 5
Node2 scores on 0-10 scale
CPU Score: ((4000 - 6000) *10) / 4000 = 0
Memory Score: ((10000 - 5000) *10) / 10000 = 5
Node2 Score: (0 + 5) / 2 = 2
*/
pod: &v1.Pod{Spec: cpuOnly},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 5}, {Host: "machine2", Score: 2}},
test: "requested resources exceed node capacity",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
{
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 0, 0), makeNode("machine2", 0, 0)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "zero node resources, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(LeastRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,50 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// priorityMetadata is a type that is passed as metadata for priority functions
type priorityMetadata struct {
nonZeroRequest *schedulercache.Resource
podTolerations []v1.Toleration
affinity *v1.Affinity
}
// PriorityMetadata is a MetadataProducer. Node info can be nil.
func PriorityMetadata(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{} {
// If we cannot compute metadata, just return nil
if pod == nil {
return nil
}
tolerations, err := getTolerationListFromPod(pod)
if err != nil {
return nil
}
affinity, err := v1.GetAffinityFromPodAnnotations(pod.Annotations)
if err != nil {
return nil
}
return &priorityMetadata{
nonZeroRequest: getNonZeroRequests(pod),
podTolerations: tolerations,
affinity: affinity,
}
}

View file

@ -0,0 +1,94 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// MostRequestedPriority is a priority function that favors nodes with most requested resources.
// It calculates the percentage of memory and CPU requested by pods scheduled on the node, and prioritizes
// based on the maximum of the average of the fraction of requested to capacity.
// Details: (cpu(10 * sum(requested) / capacity) + memory(10 * sum(requested) / capacity)) / 2
func MostRequestedPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
var nonZeroRequest *schedulercache.Resource
if priorityMeta, ok := meta.(*priorityMetadata); ok {
nonZeroRequest = priorityMeta.nonZeroRequest
} else {
// We couldn't parse metadatat - fallback to computing it.
nonZeroRequest = getNonZeroRequests(pod)
}
return calculateUsedPriority(pod, nonZeroRequest, nodeInfo)
}
// The used capacity is calculated on a scale of 0-10
// 0 being the lowest priority and 10 being the highest.
// The more resources are used the higher the score is. This function
// is almost a reversed version of least_requested_priority.calculatUnusedScore
// (10 - calculateUnusedScore). The main difference is in rounding. It was added to
// keep the final formula clean and not to modify the widely used (by users
// in their default scheduling policies) calculateUSedScore.
func calculateUsedScore(requested int64, capacity int64, node string) int64 {
if capacity == 0 {
return 0
}
if requested > capacity {
glog.V(4).Infof("Combined requested resources %d from existing pods exceeds capacity %d on node %s",
requested, capacity, node)
return 0
}
return (requested * 10) / capacity
}
// Calculate the resource used on a node. 'node' has information about the resources on the node.
// 'pods' is a list of pods currently scheduled on the node.
func calculateUsedPriority(pod *v1.Pod, podRequests *schedulercache.Resource, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
allocatableResources := nodeInfo.AllocatableResource()
totalResources := *podRequests
totalResources.MilliCPU += nodeInfo.NonZeroRequest().MilliCPU
totalResources.Memory += nodeInfo.NonZeroRequest().Memory
cpuScore := calculateUsedScore(totalResources.MilliCPU, allocatableResources.MilliCPU, node.Name)
memoryScore := calculateUsedScore(totalResources.Memory, allocatableResources.Memory, node.Name)
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: Most Requested Priority, capacity %d millicores %d memory bytes, total request %d millicores %d memory bytes, score %d CPU %d memory",
pod.Name, node.Name,
allocatableResources.MilliCPU, allocatableResources.Memory,
totalResources.MilliCPU, totalResources.Memory,
cpuScore, memoryScore,
)
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int((cpuScore + memoryScore) / 2),
}, nil
}

View file

@ -0,0 +1,182 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestMostRequested(t *testing.T) {
labels1 := map[string]string{
"foo": "bar",
"baz": "blah",
}
labels2 := map[string]string{
"bar": "foo",
"baz": "blah",
}
noResources := v1.PodSpec{
Containers: []v1.Container{},
}
cpuOnly := v1.PodSpec{
NodeName: "machine1",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("0"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("0"),
},
},
},
},
}
cpuOnly2 := cpuOnly
cpuOnly2.NodeName = "machine2"
cpuAndMemory := v1.PodSpec{
NodeName: "machine2",
Containers: []v1.Container{
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("1000m"),
"memory": resource.MustParse("2000"),
},
},
},
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
"cpu": resource.MustParse("2000m"),
"memory": resource.MustParse("3000"),
},
},
},
},
}
tests := []struct {
pod *v1.Pod
pods []*v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
/*
Node1 scores (used resources) on 0-10 scale
CPU Score: (0 * 10 / 4000 = 0
Memory Score: (0 * 10) / 10000 = 0
Node1 Score: (0 + 0) / 2 = 0
Node2 scores (used resources) on 0-10 scale
CPU Score: (0 * 10 / 4000 = 0
Memory Score: (0 * 10 / 10000 = 0
Node2 Score: (0 + 0) / 2 = 0
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 4000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}},
test: "nothing scheduled, nothing requested",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (3000 * 10 / 4000 = 7.5
Memory Score: (5000 * 10) / 10000 = 5
Node1 Score: (7.5 + 5) / 2 = 6
Node2 scores on 0-10 scale
CPU Score: (3000 * 10 / 6000 = 5
Memory Score: (5000 * 10 / 10000 = 5
Node2 Score: (5 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 4000, 10000), makeNode("machine2", 6000, 10000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 6}, {Host: "machine2", Score: 5}},
test: "nothing scheduled, resources requested, differently sized machines",
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (0 * 10) / 20000 = 10
Node1 Score: (6 + 0) / 2 = 3
Node2 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (5000 * 10) / 20000 = 2.5
Node2 Score: (6 + 2.5) / 2 = 4
*/
pod: &v1.Pod{Spec: noResources},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 3}, {Host: "machine2", Score: 4}},
test: "no resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels2}},
{Spec: cpuOnly, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuOnly2, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
{Spec: cpuAndMemory, ObjectMeta: v1.ObjectMeta{Labels: labels1}},
},
},
{
/*
Node1 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (5000 * 10) / 20000 = 2.5
Node1 Score: (6 + 2.5) / 2 = 4
Node2 scores on 0-10 scale
CPU Score: (6000 * 10) / 10000 = 6
Memory Score: (10000 * 10) / 20000 = 5
Node2 Score: (6 + 5) / 2 = 5
*/
pod: &v1.Pod{Spec: cpuAndMemory},
nodes: []*v1.Node{makeNode("machine1", 10000, 20000), makeNode("machine2", 10000, 20000)},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 4}, {Host: "machine2", Score: 5}},
test: "resources requested, pods scheduled with resources",
pods: []*v1.Pod{
{Spec: cpuOnly},
{Spec: cpuAndMemory},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(test.pods, test.nodes)
list, err := priorityFunction(MostRequestedPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,105 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// CalculateNodeAffinityPriority prioritizes nodes according to node affinity scheduling preferences
// indicated in PreferredDuringSchedulingIgnoredDuringExecution. Each time a node match a preferredSchedulingTerm,
// it will a get an add of preferredSchedulingTerm.Weight. Thus, the more preferredSchedulingTerms
// the node satisfies and the more the preferredSchedulingTerm that is satisfied weights, the higher
// score the node gets.
func CalculateNodeAffinityPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var affinity *v1.Affinity
if priorityMeta, ok := meta.(*priorityMetadata); ok {
affinity = priorityMeta.affinity
} else {
// We couldn't parse metadata - fallback to computing it.
var err error
affinity, err = v1.GetAffinityFromPodAnnotations(pod.Annotations)
if err != nil {
return schedulerapi.HostPriority{}, err
}
}
var count int32
// A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects.
// An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an
// empty PreferredSchedulingTerm matches all objects.
if affinity != nil && affinity.NodeAffinity != nil && affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution != nil {
// Match PreferredDuringSchedulingIgnoredDuringExecution term by term.
for i := range affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution {
preferredSchedulingTerm := &affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution[i]
if preferredSchedulingTerm.Weight == 0 {
continue
}
// TODO: Avoid computing it for all nodes if this becomes a performance problem.
nodeSelector, err := v1.NodeSelectorRequirementsAsSelector(preferredSchedulingTerm.Preference.MatchExpressions)
if err != nil {
return schedulerapi.HostPriority{}, err
}
if nodeSelector.Matches(labels.Set(node.Labels)) {
count += preferredSchedulingTerm.Weight
}
}
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: int(count),
}, nil
}
func CalculateNodeAffinityPriorityReduce(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
var maxCount int
for i := range result {
if result[i].Score > maxCount {
maxCount = result[i].Score
}
}
maxCountFloat := float64(maxCount)
var fScore float64
for i := range result {
if maxCount > 0 {
fScore = 10 * (float64(result[i].Score) / maxCountFloat)
} else {
fScore = 0
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof("%v -> %v: NodeAffinityPriority, Score: (%d)", pod.Name, result[i].Host, int(fScore))
}
result[i].Score = int(fScore)
}
return nil
}

View file

@ -0,0 +1,168 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"testing"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestNodeAffinityPriority(t *testing.T) {
label1 := map[string]string{"foo": "bar"}
label2 := map[string]string{"key": "value"}
label3 := map[string]string{"az": "az1"}
label4 := map[string]string{"abc": "az11", "def": "az22"}
label5 := map[string]string{"foo": "bar", "key": "value", "az": "az1"}
affinity1 := map[string]string{
v1.AffinityAnnotationKey: `
{"nodeAffinity": {"preferredDuringSchedulingIgnoredDuringExecution": [
{
"weight": 2,
"preference": {
"matchExpressions": [
{
"key": "foo",
"operator": "In", "values": ["bar"]
}
]
}
}
]}}`,
}
affinity2 := map[string]string{
v1.AffinityAnnotationKey: `
{"nodeAffinity": {"preferredDuringSchedulingIgnoredDuringExecution": [
{
"weight": 2,
"preference": {"matchExpressions": [
{
"key": "foo",
"operator": "In", "values": ["bar"]
}
]}
},
{
"weight": 4,
"preference": {"matchExpressions": [
{
"key": "key",
"operator": "In", "values": ["value"]
}
]}
},
{
"weight": 5,
"preference": {"matchExpressions": [
{
"key": "foo",
"operator": "In", "values": ["bar"]
},
{
"key": "key",
"operator": "In", "values": ["value"]
},
{
"key": "az",
"operator": "In", "values": ["az1"]
}
]}
}
]}}`,
}
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Annotations: map[string]string{},
},
},
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "all machines are same priority as NodeAffinity is nil",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Annotations: affinity1,
},
},
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label4}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "no machine macthes preferred scheduling requirements in NodeAffinity of pod so all machines' priority is zero",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Annotations: affinity1,
},
},
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
test: "only machine1 matches the preferred scheduling requirements of pod",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Annotations: affinity2,
},
},
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine5", Labels: label5}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 1}, {Host: "machine5", Score: 10}, {Host: "machine2", Score: 3}},
test: "all machines matches the preferred scheduling requirements of pod but with different priorities ",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
nap := priorityFunction(CalculateNodeAffinityPriorityMap, CalculateNodeAffinityPriorityReduce)
list, err := nap(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: \nexpected %#v, \ngot %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,60 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
type NodeLabelPrioritizer struct {
label string
presence bool
}
func NewNodeLabelPriority(label string, presence bool) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
labelPrioritizer := &NodeLabelPrioritizer{
label: label,
presence: presence,
}
return labelPrioritizer.CalculateNodeLabelPriorityMap, nil
}
// CalculateNodeLabelPriority checks whether a particular label exists on a node or not, regardless of its value.
// If presence is true, prioritizes nodes that have the specified label, regardless of value.
// If presence is false, prioritizes nodes that do not have the specified label.
func (n *NodeLabelPrioritizer) CalculateNodeLabelPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
exists := labels.Set(node.Labels).Has(n.label)
score := 0
if (exists && n.presence) || (!exists && !n.presence) {
score = 10
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: score,
}, nil
}

View file

@ -0,0 +1,121 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestNewNodeLabelPriority(t *testing.T) {
label1 := map[string]string{"foo": "bar"}
label2 := map[string]string{"bar": "foo"}
label3 := map[string]string{"bar": "baz"}
tests := []struct {
nodes []*v1.Node
label string
presence bool
expectedList schedulerapi.HostPriorityList
test string
}{
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "baz",
presence: true,
test: "no match found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
label: "baz",
presence: false,
test: "no match found, presence false",
},
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "foo",
presence: true,
test: "one match found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
label: "foo",
presence: false,
test: "one match found, presence false",
},
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
label: "bar",
presence: true,
test: "two matches found, presence true",
},
{
nodes: []*v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "machine1", Labels: label1}},
{ObjectMeta: v1.ObjectMeta{Name: "machine2", Labels: label2}},
{ObjectMeta: v1.ObjectMeta{Name: "machine3", Labels: label3}},
},
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 0}},
label: "bar",
presence: false,
test: "two matches found, presence false",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(NewNodeLabelPriority(test.label, test.presence))(nil, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,60 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"k8s.io/kubernetes/pkg/api/v1"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func CalculateNodePreferAvoidPodsPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
controllerRef := priorityutil.GetControllerRef(pod)
if controllerRef != nil {
// Ignore pods that are owned by other controller than ReplicationController
// or ReplicaSet.
if controllerRef.Kind != "ReplicationController" && controllerRef.Kind != "ReplicaSet" {
controllerRef = nil
}
}
if controllerRef == nil {
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
}
avoids, err := v1.GetAvoidPodsFromNodeAnnotations(node.Annotations)
if err != nil {
// If we cannot get annotation, assume it's schedulable there.
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
}
for i := range avoids.PreferAvoidPods {
avoid := &avoids.PreferAvoidPods[i]
if controllerRef != nil {
if avoid.PodSignature.PodController.Kind == controllerRef.Kind && avoid.PodSignature.PodController.UID == controllerRef.UID {
return schedulerapi.HostPriority{Host: node.Name, Score: 0}, nil
}
}
}
return schedulerapi.HostPriority{Host: node.Name, Score: 10}, nil
}

View file

@ -0,0 +1,155 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"reflect"
"sort"
"testing"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestNodePreferAvoidPriority(t *testing.T) {
annotations1 := map[string]string{
v1.PreferAvoidPodsAnnotationKey: `
{
"preferAvoidPods": [
{
"podSignature": {
"podController": {
"apiVersion": "v1",
"kind": "ReplicationController",
"name": "foo",
"uid": "abcdef123456",
"controller": true
}
},
"reason": "some reason",
"message": "some message"
}
]
}`,
}
annotations2 := map[string]string{
v1.PreferAvoidPodsAnnotationKey: `
{
"preferAvoidPods": [
{
"podSignature": {
"podController": {
"apiVersion": "v1",
"kind": "ReplicaSet",
"name": "foo",
"uid": "qwert12345",
"controller": true
}
},
"reason": "some reason",
"message": "some message"
}
]
}`,
}
testNodes := []*v1.Node{
{
ObjectMeta: v1.ObjectMeta{Name: "machine1", Annotations: annotations1},
},
{
ObjectMeta: v1.ObjectMeta{Name: "machine2", Annotations: annotations2},
},
{
ObjectMeta: v1.ObjectMeta{Name: "machine3"},
},
}
trueVar := true
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Namespace: "default",
OwnerReferences: []v1.OwnerReference{
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 0}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
test: "pod managed by ReplicationController should avoid a node, this node get lowest priority score",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Namespace: "default",
OwnerReferences: []v1.OwnerReference{
{Kind: "RandomController", Name: "foo", UID: "abcdef123456", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
test: "ownership by random controller should be ignored",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Namespace: "default",
OwnerReferences: []v1.OwnerReference{
{Kind: "ReplicationController", Name: "foo", UID: "abcdef123456"},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 10}, {Host: "machine3", Score: 10}},
test: "owner without Controller field set should be ignored",
},
{
pod: &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Namespace: "default",
OwnerReferences: []v1.OwnerReference{
{Kind: "ReplicaSet", Name: "foo", UID: "qwert12345", Controller: &trueVar},
},
},
},
nodes: testNodes,
expectedList: []schedulerapi.HostPriority{{Host: "machine1", Score: 10}, {Host: "machine2", Score: 0}, {Host: "machine3", Score: 10}},
test: "pod managed by ReplicaSet should avoid a node, this node get lowest priority score",
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
list, err := priorityFunction(CalculateNodePreferAvoidPodsPriorityMap, nil)(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
// sort the two lists to avoid failures on account of different ordering
sort.Sort(test.expectedList)
sort.Sort(list)
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s: expected %#v, got %#v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,117 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"os/exec"
"path/filepath"
"testing"
"k8s.io/gengo/parser"
"k8s.io/gengo/types"
"k8s.io/kubernetes/pkg/util/codeinspector"
)
func getPrioritySignatures() ([]*types.Signature, error) {
filePath := "./../types.go"
pkgName := filepath.Dir(filePath)
builder := parser.New()
if err := builder.AddDir(pkgName); err != nil {
return nil, err
}
universe, err := builder.FindTypes()
if err != nil {
return nil, err
}
signatures := []string{"PriorityFunction", "PriorityMapFunction", "PriorityReduceFunction"}
results := make([]*types.Signature, 0, len(signatures))
for _, signature := range signatures {
result, ok := universe[pkgName].Types[signature]
if !ok {
return nil, fmt.Errorf("%s type not defined", signature)
}
results = append(results, result.Signature)
}
return results, nil
}
func TestPrioritiesRegistered(t *testing.T) {
var functions []*types.Type
// Files and directories which priorities may be referenced
targetFiles := []string{
"./../../algorithmprovider/defaults/defaults.go", // Default algorithm
"./../../factory/plugins.go", // Registered in init()
}
// List all golang source files under ./priorities/, excluding test files and sub-directories.
files, err := codeinspector.GetSourceCodeFiles(".")
if err != nil {
t.Errorf("unexpected error: %v when listing files in current directory", err)
}
// Get all public priorities in files.
for _, filePath := range files {
fileFunctions, err := codeinspector.GetPublicFunctions("k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities", filePath)
if err == nil {
functions = append(functions, fileFunctions...)
} else {
t.Errorf("unexpected error when parsing %s: %v", filePath, err)
}
}
prioritySignatures, err := getPrioritySignatures()
if err != nil {
t.Fatalf("Couldn't get priorities signatures")
}
// Check if all public priorities are referenced in target files.
for _, function := range functions {
// Ignore functions that don't match priorities signatures.
signature := function.Underlying.Signature
match := false
for _, prioritySignature := range prioritySignatures {
if len(prioritySignature.Parameters) != len(signature.Parameters) {
continue
}
if len(prioritySignature.Results) != len(signature.Results) {
continue
}
// TODO: Check exact types of parameters and results.
match = true
}
if !match {
continue
}
args := []string{"-rl", function.Name.Name}
args = append(args, targetFiles...)
err := exec.Command("grep", args...).Run()
if err != nil {
switch err.Error() {
case "exit status 2":
t.Errorf("unexpected error when checking %s", function.Name)
case "exit status 1":
t.Errorf("priority %s is implemented as public but seems not registered or used in any other place",
function.Name)
}
}
}
}

View file

@ -0,0 +1,259 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"sync"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
metav1 "k8s.io/kubernetes/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/labels"
utilnode "k8s.io/kubernetes/pkg/util/node"
"k8s.io/kubernetes/pkg/util/workqueue"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// The maximum priority value to give to a node
// Prioritiy values range from 0-maxPriority
const maxPriority float32 = 10
// When zone information is present, give 2/3 of the weighting to zone spreading, 1/3 to node spreading
// TODO: Any way to justify this weighting?
const zoneWeighting = 2.0 / 3.0
type SelectorSpread struct {
serviceLister algorithm.ServiceLister
controllerLister algorithm.ControllerLister
replicaSetLister algorithm.ReplicaSetLister
}
func NewSelectorSpreadPriority(
serviceLister algorithm.ServiceLister,
controllerLister algorithm.ControllerLister,
replicaSetLister algorithm.ReplicaSetLister) algorithm.PriorityFunction {
selectorSpread := &SelectorSpread{
serviceLister: serviceLister,
controllerLister: controllerLister,
replicaSetLister: replicaSetLister,
}
return selectorSpread.CalculateSpreadPriority
}
// Returns selectors of services, RCs and RSs matching the given pod.
func getSelectors(pod *v1.Pod, sl algorithm.ServiceLister, cl algorithm.ControllerLister, rsl algorithm.ReplicaSetLister) []labels.Selector {
selectors := make([]labels.Selector, 0, 3)
if services, err := sl.GetPodServices(pod); err == nil {
for _, service := range services {
selectors = append(selectors, labels.SelectorFromSet(service.Spec.Selector))
}
}
if rcs, err := cl.GetPodControllers(pod); err == nil {
for _, rc := range rcs {
selectors = append(selectors, labels.SelectorFromSet(rc.Spec.Selector))
}
}
if rss, err := rsl.GetPodReplicaSets(pod); err == nil {
for _, rs := range rss {
if selector, err := metav1.LabelSelectorAsSelector(rs.Spec.Selector); err == nil {
selectors = append(selectors, selector)
}
}
}
return selectors
}
func (s *SelectorSpread) getSelectors(pod *v1.Pod) []labels.Selector {
return getSelectors(pod, s.serviceLister, s.controllerLister, s.replicaSetLister)
}
// CalculateSpreadPriority spreads pods across hosts and zones, considering pods belonging to the same service or replication controller.
// When a pod is scheduled, it looks for services, RCs or RSs that match the pod, then finds existing pods that match those selectors.
// It favors nodes that have fewer existing matching pods.
// i.e. it pushes the scheduler towards a node where there's the smallest number of
// pods which match the same service, RC or RS selectors as the pod being scheduled.
// Where zone information is included on the nodes, it favors nodes in zones with fewer existing matching pods.
func (s *SelectorSpread) CalculateSpreadPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
selectors := s.getSelectors(pod)
// Count similar pods by node
countsByNodeName := make(map[string]float32, len(nodes))
countsByZone := make(map[string]float32, 10)
maxCountByNodeName := float32(0)
countsByNodeNameLock := sync.Mutex{}
if len(selectors) > 0 {
processNodeFunc := func(i int) {
nodeName := nodes[i].Name
count := float32(0)
for _, nodePod := range nodeNameToInfo[nodeName].Pods() {
if pod.Namespace != nodePod.Namespace {
continue
}
// When we are replacing a failed pod, we often see the previous
// deleted version while scheduling the replacement.
// Ignore the previous deleted version for spreading purposes
// (it can still be considered for resource restrictions etc.)
if nodePod.DeletionTimestamp != nil {
glog.V(4).Infof("skipping pending-deleted pod: %s/%s", nodePod.Namespace, nodePod.Name)
continue
}
matches := false
for _, selector := range selectors {
if selector.Matches(labels.Set(nodePod.ObjectMeta.Labels)) {
matches = true
break
}
}
if matches {
count++
}
}
zoneId := utilnode.GetZoneKey(nodes[i])
countsByNodeNameLock.Lock()
defer countsByNodeNameLock.Unlock()
countsByNodeName[nodeName] = count
if count > maxCountByNodeName {
maxCountByNodeName = count
}
if zoneId != "" {
countsByZone[zoneId] += count
}
}
workqueue.Parallelize(16, len(nodes), processNodeFunc)
}
// Aggregate by-zone information
// Compute the maximum number of pods hosted in any zone
haveZones := len(countsByZone) != 0
maxCountByZone := float32(0)
for _, count := range countsByZone {
if count > maxCountByZone {
maxCountByZone = count
}
}
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
//score int - scale of 0-maxPriority
// 0 being the lowest priority and maxPriority being the highest
for _, node := range nodes {
// initializing to the default/max node score of maxPriority
fScore := maxPriority
if maxCountByNodeName > 0 {
fScore = maxPriority * ((maxCountByNodeName - countsByNodeName[node.Name]) / maxCountByNodeName)
}
// If there is zone information present, incorporate it
if haveZones {
zoneId := utilnode.GetZoneKey(node)
if zoneId != "" {
zoneScore := maxPriority * ((maxCountByZone - countsByZone[zoneId]) / maxCountByZone)
fScore = (fScore * (1.0 - zoneWeighting)) + (zoneWeighting * zoneScore)
}
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: int(fScore)})
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.V(10).Infof(
"%v -> %v: SelectorSpreadPriority, Score: (%d)", pod.Name, node.Name, int(fScore),
)
}
}
return result, nil
}
type ServiceAntiAffinity struct {
podLister algorithm.PodLister
serviceLister algorithm.ServiceLister
label string
}
func NewServiceAntiAffinityPriority(podLister algorithm.PodLister, serviceLister algorithm.ServiceLister, label string) algorithm.PriorityFunction {
antiAffinity := &ServiceAntiAffinity{
podLister: podLister,
serviceLister: serviceLister,
label: label,
}
return antiAffinity.CalculateAntiAffinityPriority
}
// CalculateAntiAffinityPriority spreads pods by minimizing the number of pods belonging to the same service
// on machines with the same value for a particular label.
// The label to be considered is provided to the struct (ServiceAntiAffinity).
func (s *ServiceAntiAffinity) CalculateAntiAffinityPriority(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
var nsServicePods []*v1.Pod
if services, err := s.serviceLister.GetPodServices(pod); err == nil && len(services) > 0 {
// just use the first service and get the other pods within the service
// TODO: a separate predicate can be created that tries to handle all services for the pod
selector := labels.SelectorFromSet(services[0].Spec.Selector)
pods, err := s.podLister.List(selector)
if err != nil {
return nil, err
}
// consider only the pods that belong to the same namespace
for _, nsPod := range pods {
if nsPod.Namespace == pod.Namespace {
nsServicePods = append(nsServicePods, nsPod)
}
}
}
// separate out the nodes that have the label from the ones that don't
otherNodes := []string{}
labeledNodes := map[string]string{}
for _, node := range nodes {
if labels.Set(node.Labels).Has(s.label) {
label := labels.Set(node.Labels).Get(s.label)
labeledNodes[node.Name] = label
} else {
otherNodes = append(otherNodes, node.Name)
}
}
podCounts := map[string]int{}
for _, pod := range nsServicePods {
label, exists := labeledNodes[pod.Spec.NodeName]
if !exists {
continue
}
podCounts[label]++
}
numServicePods := len(nsServicePods)
result := []schedulerapi.HostPriority{}
//score int - scale of 0-maxPriority
// 0 being the lowest priority and maxPriority being the highest
for node := range labeledNodes {
// initializing to the default/max node score of maxPriority
fScore := float32(maxPriority)
if numServicePods > 0 {
fScore = maxPriority * (float32(numServicePods-podCounts[labeledNodes[node]]) / float32(numServicePods))
}
result = append(result, schedulerapi.HostPriority{Host: node, Score: int(fScore)})
}
// add the open nodes with a score of 0
for _, node := range otherNodes {
result = append(result, schedulerapi.HostPriority{Host: node, Score: 0})
}
return result, nil
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,116 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"fmt"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// CountIntolerableTaintsPreferNoSchedule gives the count of intolerable taints of a pod with effect PreferNoSchedule
func countIntolerableTaintsPreferNoSchedule(taints []v1.Taint, tolerations []v1.Toleration) (intolerableTaints int) {
for i := range taints {
taint := &taints[i]
// check only on taints that have effect PreferNoSchedule
if taint.Effect != v1.TaintEffectPreferNoSchedule {
continue
}
if !v1.TaintToleratedByTolerations(taint, tolerations) {
intolerableTaints++
}
}
return
}
// getAllTolerationEffectPreferNoSchedule gets the list of all Toleration with Effect PreferNoSchedule
func getAllTolerationPreferNoSchedule(tolerations []v1.Toleration) (tolerationList []v1.Toleration) {
for i := range tolerations {
toleration := &tolerations[i]
if len(toleration.Effect) == 0 || toleration.Effect == v1.TaintEffectPreferNoSchedule {
tolerationList = append(tolerationList, *toleration)
}
}
return
}
func getTolerationListFromPod(pod *v1.Pod) ([]v1.Toleration, error) {
tolerations, err := v1.GetTolerationsFromPodAnnotations(pod.Annotations)
if err != nil {
return nil, err
}
return getAllTolerationPreferNoSchedule(tolerations), nil
}
// ComputeTaintTolerationPriority prepares the priority list for all the nodes based on the number of intolerable taints on the node
func ComputeTaintTolerationPriorityMap(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
var tolerationList []v1.Toleration
if priorityMeta, ok := meta.(*priorityMetadata); ok {
tolerationList = priorityMeta.podTolerations
} else {
var err error
tolerationList, err = getTolerationListFromPod(pod)
if err != nil {
return schedulerapi.HostPriority{}, err
}
}
taints, err := v1.GetTaintsFromNodeAnnotations(node.Annotations)
if err != nil {
return schedulerapi.HostPriority{}, err
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: countIntolerableTaintsPreferNoSchedule(taints, tolerationList),
}, nil
}
func ComputeTaintTolerationPriorityReduce(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error {
var maxCount int
for i := range result {
if result[i].Score > maxCount {
maxCount = result[i].Score
}
}
maxCountFloat := float64(maxCount)
// The maximum priority value to give to a node
// Priority values range from 0 - maxPriority
const maxPriority = float64(10)
for i := range result {
fScore := maxPriority
if maxCountFloat > 0 {
fScore = (1.0 - float64(result[i].Score)/maxCountFloat) * 10
}
if glog.V(10) {
// We explicitly don't do glog.V(10).Infof() to avoid computing all the parameters if this is
// not logged. There is visible performance gain from it.
glog.Infof("%v -> %v: Taint Toleration Priority, Score: (%d)", pod.Name, result[i].Host, int(fScore))
}
result[i].Score = int(fScore)
}
return nil
}

View file

@ -0,0 +1,225 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"encoding/json"
"reflect"
"testing"
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func nodeWithTaints(nodeName string, taints []v1.Taint) *v1.Node {
taintsData, _ := json.Marshal(taints)
return &v1.Node{
ObjectMeta: v1.ObjectMeta{
Name: nodeName,
Annotations: map[string]string{
v1.TaintsAnnotationKey: string(taintsData),
},
},
}
}
func podWithTolerations(tolerations []v1.Toleration) *v1.Pod {
tolerationData, _ := json.Marshal(tolerations)
return &v1.Pod{
ObjectMeta: v1.ObjectMeta{
Annotations: map[string]string{
v1.TolerationsAnnotationKey: string(tolerationData),
},
},
}
}
// This function will create a set of nodes and pods and test the priority
// Nodes with zero,one,two,three,four and hundred taints are created
// Pods with zero,one,two,three,four and hundred tolerations are created
func TestTaintAndToleration(t *testing.T) {
tests := []struct {
pod *v1.Pod
nodes []*v1.Node
expectedList schedulerapi.HostPriorityList
test string
}{
// basic test case
{
test: "node with taints tolerated by the pod, gets a higher score than those node with intolerable taints",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{{
Key: "foo",
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodeWithTaints("nodeB", []v1.Taint{{
Key: "foo",
Value: "blah",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: 10},
{Host: "nodeB", Score: 0},
},
},
// the count of taints that are tolerated by pod, does not matter.
{
test: "the nodes that all of their taints are tolerated by the pod, get the same score, no matter how many tolerable taints a node has",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
Operator: v1.TolerationOpEqual,
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Operator: v1.TolerationOpEqual,
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: 10},
{Host: "nodeB", Score: 10},
{Host: "nodeC", Score: 10},
},
},
// the count of taints on a node that are not tolerated by pod, matters.
{
test: "the more intolerable taints a node has, the lower score it gets.",
pod: podWithTolerations([]v1.Toleration{{
Key: "foo",
Operator: v1.TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectPreferNoSchedule,
}}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: 10},
{Host: "nodeB", Score: 5},
{Host: "nodeC", Score: 0},
},
},
// taints-tolerations priority only takes care about the taints and tolerations that have effect PreferNoSchedule
{
test: "only taints and tolerations that have effect PreferNoSchedule are checked by taints-tolerations priority function",
pod: podWithTolerations([]v1.Toleration{
{
Key: "cpu-type",
Operator: v1.TolerationOpEqual,
Value: "arm64",
Effect: v1.TaintEffectNoSchedule,
}, {
Key: "disk-type",
Operator: v1.TolerationOpEqual,
Value: "ssd",
Effect: v1.TaintEffectNoSchedule,
},
}),
nodes: []*v1.Node{
nodeWithTaints("nodeA", []v1.Taint{}),
nodeWithTaints("nodeB", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectNoSchedule,
},
}),
nodeWithTaints("nodeC", []v1.Taint{
{
Key: "cpu-type",
Value: "arm64",
Effect: v1.TaintEffectPreferNoSchedule,
}, {
Key: "disk-type",
Value: "ssd",
Effect: v1.TaintEffectPreferNoSchedule,
},
}),
},
expectedList: []schedulerapi.HostPriority{
{Host: "nodeA", Score: 10},
{Host: "nodeB", Score: 10},
{Host: "nodeC", Score: 0},
},
},
}
for _, test := range tests {
nodeNameToInfo := schedulercache.CreateNodeNameToInfoMap(nil, test.nodes)
ttp := priorityFunction(ComputeTaintTolerationPriorityMap, ComputeTaintTolerationPriorityReduce)
list, err := ttp(test.pod, nodeNameToInfo, test.nodes)
if err != nil {
t.Errorf("%s, unexpected error: %v", test.test, err)
}
if !reflect.DeepEqual(test.expectedList, list) {
t.Errorf("%s,\nexpected:\n\t%+v,\ngot:\n\t%+v", test.test, test.expectedList, list)
}
}
}

View file

@ -0,0 +1,60 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package priorities
import (
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func makeNode(node string, milliCPU, memory int64) *v1.Node {
return &v1.Node{
ObjectMeta: v1.ObjectMeta{Name: node},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
"memory": *resource.NewQuantity(memory, resource.BinarySI),
},
Allocatable: v1.ResourceList{
"cpu": *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
"memory": *resource.NewQuantity(memory, resource.BinarySI),
},
},
}
}
func priorityFunction(mapFn algorithm.PriorityMapFunction, reduceFn algorithm.PriorityReduceFunction) algorithm.PriorityFunction {
return func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
for i := range nodes {
hostResult, err := mapFn(pod, nil, nodeNameToInfo[nodes[i].Name])
if err != nil {
return nil, err
}
result = append(result, hostResult)
}
if reduceFn != nil {
if err := reduceFn(pod, nil, nodeNameToInfo, result); err != nil {
return nil, err
}
}
return result, nil
}
}

View file

@ -0,0 +1,27 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"non_zero.go",
"topologies.go",
"util.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/util/sets:go_default_library",
],
)

View file

@ -0,0 +1,50 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import "k8s.io/kubernetes/pkg/api/v1"
// For each of these resources, a pod that doesn't request the resource explicitly
// will be treated as having requested the amount indicated below, for the purpose
// of computing priority only. This ensures that when scheduling zero-request pods, such
// pods will not all be scheduled to the machine with the smallest in-use request,
// and that when scheduling regular pods, such pods will not see zero-request pods as
// consuming no resources whatsoever. We chose these values to be similar to the
// resources that we give to cluster addon pods (#10653). But they are pretty arbitrary.
// As described in #11713, we use request instead of limit to deal with resource requirements.
const DefaultMilliCpuRequest int64 = 100 // 0.1 core
const DefaultMemoryRequest int64 = 200 * 1024 * 1024 // 200 MB
// GetNonzeroRequests returns the default resource request if none is found or what is provided on the request
// TODO: Consider setting default as a fixed fraction of machine capacity (take "capacity v1.ResourceList"
// as an additional argument here) rather than using constants
func GetNonzeroRequests(requests *v1.ResourceList) (int64, int64) {
var outMilliCPU, outMemory int64
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceCPU]; !found {
outMilliCPU = DefaultMilliCpuRequest
} else {
outMilliCPU = requests.Cpu().MilliValue()
}
// Override if un-set, but not if explicitly set to zero
if _, found := (*requests)[v1.ResourceMemory]; !found {
outMemory = DefaultMemoryRequest
} else {
outMemory = requests.Memory().Value()
}
return outMilliCPU, outMemory
}

View file

@ -0,0 +1,78 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import (
"k8s.io/kubernetes/pkg/api/v1"
metav1 "k8s.io/kubernetes/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/pkg/util/sets"
)
// GetNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// 1. If the namespaces is nil considers the given pod's namespace
// 2. If the namespaces is empty list then considers all the namespaces
func getNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm v1.PodAffinityTerm) sets.String {
names := sets.String{}
if podAffinityTerm.Namespaces == nil {
names.Insert(pod.Namespace)
} else if len(podAffinityTerm.Namespaces) != 0 {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}
// PodMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, affinityPod *v1.Pod, term *v1.PodAffinityTerm) (bool, error) {
namespaces := getNamespacesFromPodAffinityTerm(affinityPod, *term)
if len(namespaces) != 0 && !namespaces.Has(pod.Namespace) {
return false, nil
}
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil || !selector.Matches(labels.Set(pod.Labels)) {
return false, err
}
return true, nil
}
// nodesHaveSameTopologyKeyInternal checks if nodeA and nodeB have same label value with given topologyKey as label key.
func nodesHaveSameTopologyKeyInternal(nodeA, nodeB *v1.Node, topologyKey string) bool {
return nodeA.Labels != nil && nodeB.Labels != nil && len(nodeA.Labels[topologyKey]) > 0 && nodeA.Labels[topologyKey] == nodeB.Labels[topologyKey]
}
type Topologies struct {
DefaultKeys []string
}
// NodesHaveSameTopologyKey checks if nodeA and nodeB have same label value with given topologyKey as label key.
// If the topologyKey is nil/empty, check if the two nodes have any of the default topologyKeys, and have same corresponding label value.
func (tps *Topologies) NodesHaveSameTopologyKey(nodeA, nodeB *v1.Node, topologyKey string) bool {
if len(topologyKey) == 0 {
// assumes this is allowed only for PreferredDuringScheduling pod anti-affinity (ensured by api/validation)
for _, defaultKey := range tps.DefaultKeys {
if nodesHaveSameTopologyKeyInternal(nodeA, nodeB, defaultKey) {
return true
}
}
return false
} else {
return nodesHaveSameTopologyKeyInternal(nodeA, nodeB, topologyKey)
}
}

View file

@ -0,0 +1,32 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package util
import "k8s.io/kubernetes/pkg/api/v1"
func GetControllerRef(pod *v1.Pod) *v1.OwnerReference {
if len(pod.OwnerReferences) == 0 {
return nil
}
for i := range pod.OwnerReferences {
ref := &pod.OwnerReferences[i]
if ref.Controller != nil && *ref.Controller {
return ref
}
}
return nil
}

View file

@ -0,0 +1,43 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
)
// SchedulerExtender is an interface for external processes to influence scheduling
// decisions made by Kubernetes. This is typically needed for resources not directly
// managed by Kubernetes.
type SchedulerExtender interface {
// Filter based on extender-implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.
Filter(pod *v1.Pod, nodes []*v1.Node) (filteredNodes []*v1.Node, failedNodesMap schedulerapi.FailedNodesMap, err error)
// Prioritize based on extender-implemented priority functions. The returned scores & weight
// are used to compute the weighted score for an extender. The weighted scores are added to
// the scores computed by Kubernetes scheduler. The total scores are used to do the host selection.
Prioritize(pod *v1.Pod, nodes []*v1.Node) (hostPriorities *schedulerapi.HostPriorityList, weight int, err error)
}
// ScheduleAlgorithm is an interface implemented by things that know how to schedule pods
// onto machines.
type ScheduleAlgorithm interface {
Schedule(*v1.Pod, NodeLister) (selectedMachine string, err error)
}

View file

@ -0,0 +1,60 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"testing"
"k8s.io/kubernetes/pkg/api/v1"
)
// Some functions used by multiple scheduler tests.
type schedulerTester struct {
t *testing.T
scheduler ScheduleAlgorithm
nodeLister NodeLister
}
// Call if you know exactly where pod should get scheduled.
func (st *schedulerTester) expectSchedule(pod *v1.Pod, expected string) {
actual, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
if actual != expected {
st.t.Errorf("Unexpected scheduling value: %v, expected %v", actual, expected)
}
}
// Call if you can't predict where pod will be scheduled.
func (st *schedulerTester) expectSuccess(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err != nil {
st.t.Errorf("Unexpected error %v\nTried to schedule: %#v", err, pod)
return
}
}
// Call if pod should *not* schedule.
func (st *schedulerTester) expectFailure(pod *v1.Pod) {
_, err := st.scheduler.Schedule(pod, st.nodeLister)
if err == nil {
st.t.Error("Unexpected non-error")
}
}

View file

@ -0,0 +1,66 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithm
import (
"k8s.io/kubernetes/pkg/api/v1"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// FitPredicate is a function that indicates if a pod fits into an existing node.
// The failure information is given by the error.
// TODO: Change interface{} to a specific type.
type FitPredicate func(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []PredicateFailureReason, error)
// PriorityMapFunction is a function that computes per-node results for a given node.
// TODO: Figure out the exact API of this method.
// TODO: Change interface{} to a specific type.
type PriorityMapFunction func(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error)
// PriorityReduceFunction is a function that aggregated per-node results and computes
// final scores for all nodes.
// TODO: Figure out the exact API of this method.
// TODO: Change interface{} to a specific type.
type PriorityReduceFunction func(pod *v1.Pod, meta interface{}, nodeNameToInfo map[string]*schedulercache.NodeInfo, result schedulerapi.HostPriorityList) error
// MetdataProducer is a function that computes metadata for a given pod.
type MetadataProducer func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{}
// DEPRECATED
// Use Map-Reduce pattern for priority functions.
type PriorityFunction func(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error)
type PriorityConfig struct {
Map PriorityMapFunction
Reduce PriorityReduceFunction
// TODO: Remove it after migrating all functions to
// Map-Reduce pattern.
Function PriorityFunction
Weight int
}
// EmptyMetadataProducer returns a no-op MetadataProducer type.
func EmptyMetadataProducer(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo) interface{} {
return nil
}
type PredicateFailureReason interface {
GetReason() string
}
type GetEquivalencePodFunc func(pod *v1.Pod) interface{}

View file

@ -0,0 +1,29 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = ["plugins.go"],
tags = ["automanaged"],
deps = ["//plugin/pkg/scheduler/algorithmprovider/defaults:go_default_library"],
)
go_test(
name = "go_default_test",
srcs = ["plugins_test.go"],
library = "go_default_library",
tags = [
"automanaged",
"skip",
],
deps = ["//plugin/pkg/scheduler/factory:go_default_library"],
)

View file

@ -0,0 +1,47 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = ["defaults.go"],
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/cloudprovider/providers/aws:go_default_library",
"//pkg/util/sets:go_default_library",
"//plugin/pkg/scheduler:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/predicates:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities:go_default_library",
"//plugin/pkg/scheduler/factory:go_default_library",
"//vendor:github.com/golang/glog",
],
)
go_test(
name = "go_default_test",
srcs = ["compatibility_test.go"],
library = "go_default_library",
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/apimachinery/registered:go_default_library",
"//pkg/client/clientset_generated/release_1_5:go_default_library",
"//pkg/client/restclient:go_default_library",
"//pkg/runtime:go_default_library",
"//pkg/util/sets:go_default_library",
"//pkg/util/testing:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/api/latest:go_default_library",
"//plugin/pkg/scheduler/factory:go_default_library",
],
)

View file

@ -0,0 +1,355 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package defaults
import (
"fmt"
"reflect"
"testing"
"net/http/httptest"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/apimachinery/registered"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/release_1_5"
"k8s.io/kubernetes/pkg/client/restclient"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/util/sets"
utiltesting "k8s.io/kubernetes/pkg/util/testing"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
latestschedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api/latest"
"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
)
func TestCompatibility_v1_Scheduler(t *testing.T) {
// Add serialized versions of scheduler config that exercise available options to ensure compatibility between releases
schedulerFiles := map[string]struct {
JSON string
ExpectedPolicy schedulerapi.Policy
}{
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.0": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsPorts"},
{"name": "NoDiskConflict"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "LeastRequestedPriority", "weight": 1},
{"name": "ServiceSpreadingPriority", "weight": 2},
{"name": "TestServiceAntiAffinity", "weight": 3, "argument": {"serviceAntiAffinity": {"label": "zone"}}},
{"name": "TestLabelPreference", "weight": 4, "argument": {"labelPreference": {"label": "bar", "presence":true}}}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsPorts"},
{Name: "NoDiskConflict"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "LeastRequestedPriority", Weight: 1},
{Name: "ServiceSpreadingPriority", Weight: 2},
{Name: "TestServiceAntiAffinity", Weight: 3, Argument: &schedulerapi.PriorityArgument{ServiceAntiAffinity: &schedulerapi.ServiceAntiAffinity{Label: "zone"}}},
{Name: "TestLabelPreference", Weight: 4, Argument: &schedulerapi.PriorityArgument{LabelPreference: &schedulerapi.LabelPreference{Label: "bar", Presence: true}}},
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.1": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsHostPorts"},
{"name": "PodFitsResources"},
{"name": "NoDiskConflict"},
{"name": "HostName"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "TestServiceAntiAffinity", "weight": 3, "argument": {"serviceAntiAffinity": {"label": "zone"}}},
{"name": "TestLabelPreference", "weight": 4, "argument": {"labelPreference": {"label": "bar", "presence":true}}}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsHostPorts"},
{Name: "PodFitsResources"},
{Name: "NoDiskConflict"},
{Name: "HostName"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "TestServiceAntiAffinity", Weight: 3, Argument: &schedulerapi.PriorityArgument{ServiceAntiAffinity: &schedulerapi.ServiceAntiAffinity{Label: "zone"}}},
{Name: "TestLabelPreference", Weight: 4, Argument: &schedulerapi.PriorityArgument{LabelPreference: &schedulerapi.LabelPreference{Label: "bar", Presence: true}}},
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.2": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "TestServiceAntiAffinity", "weight": 3, "argument": {"serviceAntiAffinity": {"label": "zone"}}},
{"name": "TestLabelPreference", "weight": 4, "argument": {"labelPreference": {"label": "bar", "presence":true}}}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "TestServiceAntiAffinity", Weight: 3, Argument: &schedulerapi.PriorityArgument{ServiceAntiAffinity: &schedulerapi.ServiceAntiAffinity{Label: "zone"}}},
{Name: "TestLabelPreference", Weight: 4, Argument: &schedulerapi.PriorityArgument{LabelPreference: &schedulerapi.LabelPreference{Label: "bar", Presence: true}}},
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.3": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
},
},
},
// Do not change this JSON after the corresponding release has been tagged.
// A failure indicates backwards compatibility with the specified release was broken.
"1.4": {
JSON: `{
"kind": "Policy",
"apiVersion": "v1",
"predicates": [
{"name": "MatchNodeSelector"},
{"name": "PodFitsResources"},
{"name": "PodFitsHostPorts"},
{"name": "HostName"},
{"name": "NoDiskConflict"},
{"name": "NoVolumeZoneConflict"},
{"name": "PodToleratesNodeTaints"},
{"name": "CheckNodeMemoryPressure"},
{"name": "CheckNodeDiskPressure"},
{"name": "MaxEBSVolumeCount"},
{"name": "MaxGCEPDVolumeCount"},
{"name": "MatchInterPodAffinity"},
{"name": "GeneralPredicates"},
{"name": "TestServiceAffinity", "argument": {"serviceAffinity" : {"labels" : ["region"]}}},
{"name": "TestLabelsPresence", "argument": {"labelsPresence" : {"labels" : ["foo"], "presence":true}}}
],"priorities": [
{"name": "EqualPriority", "weight": 2},
{"name": "ImageLocalityPriority", "weight": 2},
{"name": "LeastRequestedPriority", "weight": 2},
{"name": "BalancedResourceAllocation", "weight": 2},
{"name": "SelectorSpreadPriority", "weight": 2},
{"name": "NodePreferAvoidPodsPriority", "weight": 2},
{"name": "NodeAffinityPriority", "weight": 2},
{"name": "TaintTolerationPriority", "weight": 2},
{"name": "InterPodAffinityPriority", "weight": 2},
{"name": "MostRequestedPriority", "weight": 2}
]
}`,
ExpectedPolicy: schedulerapi.Policy{
Predicates: []schedulerapi.PredicatePolicy{
{Name: "MatchNodeSelector"},
{Name: "PodFitsResources"},
{Name: "PodFitsHostPorts"},
{Name: "HostName"},
{Name: "NoDiskConflict"},
{Name: "NoVolumeZoneConflict"},
{Name: "PodToleratesNodeTaints"},
{Name: "CheckNodeMemoryPressure"},
{Name: "CheckNodeDiskPressure"},
{Name: "MaxEBSVolumeCount"},
{Name: "MaxGCEPDVolumeCount"},
{Name: "MatchInterPodAffinity"},
{Name: "GeneralPredicates"},
{Name: "TestServiceAffinity", Argument: &schedulerapi.PredicateArgument{ServiceAffinity: &schedulerapi.ServiceAffinity{Labels: []string{"region"}}}},
{Name: "TestLabelsPresence", Argument: &schedulerapi.PredicateArgument{LabelsPresence: &schedulerapi.LabelsPresence{Labels: []string{"foo"}, Presence: true}}},
},
Priorities: []schedulerapi.PriorityPolicy{
{Name: "EqualPriority", Weight: 2},
{Name: "ImageLocalityPriority", Weight: 2},
{Name: "LeastRequestedPriority", Weight: 2},
{Name: "BalancedResourceAllocation", Weight: 2},
{Name: "SelectorSpreadPriority", Weight: 2},
{Name: "NodePreferAvoidPodsPriority", Weight: 2},
{Name: "NodeAffinityPriority", Weight: 2},
{Name: "TaintTolerationPriority", Weight: 2},
{Name: "InterPodAffinityPriority", Weight: 2},
{Name: "MostRequestedPriority", Weight: 2},
},
},
},
}
registeredPredicates := sets.NewString(factory.ListRegisteredFitPredicates()...)
registeredPriorities := sets.NewString(factory.ListRegisteredPriorityFunctions()...)
seenPredicates := sets.NewString()
seenPriorities := sets.NewString()
for v, tc := range schedulerFiles {
fmt.Printf("%s: Testing scheduler config\n", v)
policy := schedulerapi.Policy{}
if err := runtime.DecodeInto(latestschedulerapi.Codec, []byte(tc.JSON), &policy); err != nil {
t.Errorf("%s: Error decoding: %v", v, err)
continue
}
for _, predicate := range policy.Predicates {
seenPredicates.Insert(predicate.Name)
}
for _, priority := range policy.Priorities {
seenPriorities.Insert(priority.Name)
}
if !reflect.DeepEqual(policy, tc.ExpectedPolicy) {
t.Errorf("%s: Expected:\n\t%#v\nGot:\n\t%#v", v, tc.ExpectedPolicy, policy)
}
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &registered.GroupOrDie(v1.GroupName).GroupVersion}})
if _, err := factory.NewConfigFactory(client, "some-scheduler-name", v1.DefaultHardPodAffinitySymmetricWeight, v1.DefaultFailureDomains).CreateFromConfig(policy); err != nil {
t.Errorf("%s: Error constructing: %v", v, err)
continue
}
}
if !seenPredicates.HasAll(registeredPredicates.List()...) {
t.Errorf("Registered predicates are missing from compatibility test (add to test stanza for version currently in development): %#v", registeredPredicates.Difference(seenPredicates).List())
}
if !seenPriorities.HasAll(registeredPriorities.List()...) {
t.Errorf("Registered priorities are missing from compatibility test (add to test stanza for version currently in development): %#v", registeredPriorities.Difference(seenPriorities).List())
}
}

View file

@ -0,0 +1,264 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This is the default algorithm provider for the scheduler.
package defaults
import (
"os"
"strconv"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/cloudprovider/providers/aws"
"k8s.io/kubernetes/pkg/util/sets"
"k8s.io/kubernetes/plugin/pkg/scheduler"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities"
"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
"github.com/golang/glog"
)
const (
// GCE instances can have up to 16 PD volumes attached.
DefaultMaxGCEPDVolumes = 16
ClusterAutoscalerProvider = "ClusterAutoscalerProvider"
StatefulSetKind = "StatefulSet"
)
func init() {
// Register functions that extract metadata used by predicates and priorities computations.
factory.RegisterPredicateMetadataProducerFactory(
func(args factory.PluginFactoryArgs) algorithm.MetadataProducer {
return predicates.NewPredicateMetadataFactory(args.PodLister)
})
factory.RegisterPriorityMetadataProducerFactory(
func(args factory.PluginFactoryArgs) algorithm.MetadataProducer {
return priorities.PriorityMetadata
})
// Retisters algorithm providers. By default we use 'DefaultProvider', but user can specify one to be used
// by specifying flag.
factory.RegisterAlgorithmProvider(factory.DefaultProvider, defaultPredicates(), defaultPriorities())
// Cluster autoscaler friendly scheduling algorithm.
factory.RegisterAlgorithmProvider(ClusterAutoscalerProvider, defaultPredicates(),
copyAndReplace(defaultPriorities(), "LeastRequestedPriority", "MostRequestedPriority"))
// Registers predicates and priorities that are not enabled by default, but user can pick when creating his
// own set of priorities/predicates.
// PodFitsPorts has been replaced by PodFitsHostPorts for better user understanding.
// For backwards compatibility with 1.0, PodFitsPorts is registered as well.
factory.RegisterFitPredicate("PodFitsPorts", predicates.PodFitsHostPorts)
// Fit is defined based on the absence of port conflicts.
// This predicate is actually a default predicate, because it is invoked from
// predicates.GeneralPredicates()
factory.RegisterFitPredicate("PodFitsHostPorts", predicates.PodFitsHostPorts)
// Fit is determined by resource availability.
// This predicate is actually a default predicate, because it is invoked from
// predicates.GeneralPredicates()
factory.RegisterFitPredicate("PodFitsResources", predicates.PodFitsResources)
// Fit is determined by the presence of the Host parameter and a string match
// This predicate is actually a default predicate, because it is invoked from
// predicates.GeneralPredicates()
factory.RegisterFitPredicate("HostName", predicates.PodFitsHost)
// Fit is determined by node selector query.
factory.RegisterFitPredicate("MatchNodeSelector", predicates.PodSelectorMatches)
// Use equivalence class to speed up predicates & priorities
factory.RegisterGetEquivalencePodFunction(GetEquivalencePod)
// ServiceSpreadingPriority is a priority config factory that spreads pods by minimizing
// the number of pods (belonging to the same service) on the same node.
// Register the factory so that it's available, but do not include it as part of the default priorities
// Largely replaced by "SelectorSpreadPriority", but registered for backward compatibility with 1.0
factory.RegisterPriorityConfigFactory(
"ServiceSpreadingPriority",
factory.PriorityConfigFactory{
Function: func(args factory.PluginFactoryArgs) algorithm.PriorityFunction {
return priorities.NewSelectorSpreadPriority(args.ServiceLister, algorithm.EmptyControllerLister{}, algorithm.EmptyReplicaSetLister{})
},
Weight: 1,
},
)
// EqualPriority is a prioritizer function that gives an equal weight of one to all nodes
// Register the priority function so that its available
// but do not include it as part of the default priorities
factory.RegisterPriorityFunction2("EqualPriority", scheduler.EqualPriorityMap, nil, 1)
// ImageLocalityPriority prioritizes nodes based on locality of images requested by a pod. Nodes with larger size
// of already-installed packages required by the pod will be preferred over nodes with no already-installed
// packages required by the pod or a small total size of already-installed packages required by the pod.
factory.RegisterPriorityFunction2("ImageLocalityPriority", priorities.ImageLocalityPriorityMap, nil, 1)
// Optional, cluster-autoscaler friendly priority function - give used nodes higher priority.
factory.RegisterPriorityFunction2("MostRequestedPriority", priorities.MostRequestedPriorityMap, nil, 1)
}
func defaultPredicates() sets.String {
return sets.NewString(
// Fit is determined by volume zone requirements.
factory.RegisterFitPredicateFactory(
"NoVolumeZoneConflict",
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
return predicates.NewVolumeZonePredicate(args.PVInfo, args.PVCInfo)
},
),
// Fit is determined by whether or not there would be too many AWS EBS volumes attached to the node
factory.RegisterFitPredicateFactory(
"MaxEBSVolumeCount",
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
// TODO: allow for generically parameterized scheduler predicates, because this is a bit ugly
maxVols := getMaxVols(aws.DefaultMaxEBSVolumes)
return predicates.NewMaxPDVolumeCountPredicate(predicates.EBSVolumeFilter, maxVols, args.PVInfo, args.PVCInfo)
},
),
// Fit is determined by whether or not there would be too many GCE PD volumes attached to the node
factory.RegisterFitPredicateFactory(
"MaxGCEPDVolumeCount",
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
// TODO: allow for generically parameterized scheduler predicates, because this is a bit ugly
maxVols := getMaxVols(DefaultMaxGCEPDVolumes)
return predicates.NewMaxPDVolumeCountPredicate(predicates.GCEPDVolumeFilter, maxVols, args.PVInfo, args.PVCInfo)
},
),
// Fit is determined by inter-pod affinity.
factory.RegisterFitPredicateFactory(
"MatchInterPodAffinity",
func(args factory.PluginFactoryArgs) algorithm.FitPredicate {
return predicates.NewPodAffinityPredicate(args.NodeInfo, args.PodLister, args.FailureDomains)
},
),
// Fit is determined by non-conflicting disk volumes.
factory.RegisterFitPredicate("NoDiskConflict", predicates.NoDiskConflict),
// GeneralPredicates are the predicates that are enforced by all Kubernetes components
// (e.g. kubelet and all schedulers)
factory.RegisterFitPredicate("GeneralPredicates", predicates.GeneralPredicates),
// Fit is determined based on whether a pod can tolerate all of the node's taints
factory.RegisterFitPredicate("PodToleratesNodeTaints", predicates.PodToleratesNodeTaints),
// Fit is determined by node memory pressure condition.
factory.RegisterFitPredicate("CheckNodeMemoryPressure", predicates.CheckNodeMemoryPressurePredicate),
// Fit is determined by node disk pressure condition.
factory.RegisterFitPredicate("CheckNodeDiskPressure", predicates.CheckNodeDiskPressurePredicate),
)
}
func defaultPriorities() sets.String {
return sets.NewString(
// spreads pods by minimizing the number of pods (belonging to the same service or replication controller) on the same node.
factory.RegisterPriorityConfigFactory(
"SelectorSpreadPriority",
factory.PriorityConfigFactory{
Function: func(args factory.PluginFactoryArgs) algorithm.PriorityFunction {
return priorities.NewSelectorSpreadPriority(args.ServiceLister, args.ControllerLister, args.ReplicaSetLister)
},
Weight: 1,
},
),
// pods should be placed in the same topological domain (e.g. same node, same rack, same zone, same power domain, etc.)
// as some other pods, or, conversely, should not be placed in the same topological domain as some other pods.
factory.RegisterPriorityConfigFactory(
"InterPodAffinityPriority",
factory.PriorityConfigFactory{
Function: func(args factory.PluginFactoryArgs) algorithm.PriorityFunction {
return priorities.NewInterPodAffinityPriority(args.NodeInfo, args.NodeLister, args.PodLister, args.HardPodAffinitySymmetricWeight, args.FailureDomains)
},
Weight: 1,
},
),
// Prioritize nodes by least requested utilization.
factory.RegisterPriorityFunction2("LeastRequestedPriority", priorities.LeastRequestedPriorityMap, nil, 1),
// Prioritizes nodes to help achieve balanced resource usage
factory.RegisterPriorityFunction2("BalancedResourceAllocation", priorities.BalancedResourceAllocationMap, nil, 1),
// Set this weight large enough to override all other priority functions.
// TODO: Figure out a better way to do this, maybe at same time as fixing #24720.
factory.RegisterPriorityFunction2("NodePreferAvoidPodsPriority", priorities.CalculateNodePreferAvoidPodsPriorityMap, nil, 10000),
// Prioritizes nodes that have labels matching NodeAffinity
factory.RegisterPriorityFunction2("NodeAffinityPriority", priorities.CalculateNodeAffinityPriorityMap, priorities.CalculateNodeAffinityPriorityReduce, 1),
// TODO: explain what it does.
factory.RegisterPriorityFunction2("TaintTolerationPriority", priorities.ComputeTaintTolerationPriorityMap, priorities.ComputeTaintTolerationPriorityReduce, 1),
)
}
// getMaxVols checks the max PD volumes environment variable, otherwise returning a default value
func getMaxVols(defaultVal int) int {
if rawMaxVols := os.Getenv("KUBE_MAX_PD_VOLS"); rawMaxVols != "" {
if parsedMaxVols, err := strconv.Atoi(rawMaxVols); err != nil {
glog.Errorf("Unable to parse maxiumum PD volumes value, using default of %v: %v", defaultVal, err)
} else if parsedMaxVols <= 0 {
glog.Errorf("Maximum PD volumes must be a positive value, using default of %v", defaultVal)
} else {
return parsedMaxVols
}
}
return defaultVal
}
func copyAndReplace(set sets.String, replaceWhat, replaceWith string) sets.String {
result := sets.NewString(set.List()...)
if result.Has(replaceWhat) {
result.Delete(replaceWhat)
result.Insert(replaceWith)
}
return result
}
// GetEquivalencePod returns a EquivalencePod which contains a group of pod attributes which can be reused.
func GetEquivalencePod(pod *v1.Pod) interface{} {
equivalencePod := EquivalencePod{}
// For now we only consider pods:
// 1. OwnerReferences is Controller
// 2. OwnerReferences kind is in valid controller kinds
// 3. with same OwnerReferences
// to be equivalent
if len(pod.OwnerReferences) != 0 {
for _, ref := range pod.OwnerReferences {
if *ref.Controller && isValidControllerKind(ref.Kind) {
equivalencePod.ControllerRef = ref
// a pod can only belongs to one controller
break
}
}
}
return &equivalencePod
}
// isValidControllerKind checks if a given controller's kind can be applied to equivalence pod algorithm.
func isValidControllerKind(kind string) bool {
switch kind {
// list of kinds that we cannot handle
case StatefulSetKind:
return false
default:
return true
}
}
// EquivalencePod is a group of pod attributes which can be reused as equivalence to schedule other pods.
type EquivalencePod struct {
ControllerRef v1.OwnerReference
}

View file

@ -0,0 +1,22 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This package is used to register algorithm provider plugins.
package algorithmprovider
import (
_ "k8s.io/kubernetes/plugin/pkg/scheduler/algorithmprovider/defaults"
)

View file

@ -0,0 +1,65 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package algorithmprovider
import (
"testing"
"k8s.io/kubernetes/plugin/pkg/scheduler/factory"
)
var (
algorithmProviderNames = []string{
factory.DefaultProvider,
}
)
func TestDefaultConfigExists(t *testing.T) {
p, err := factory.GetAlgorithmProvider(factory.DefaultProvider)
if err != nil {
t.Errorf("error retrieving default provider: %v", err)
}
if p == nil {
t.Error("algorithm provider config should not be nil")
}
if len(p.FitPredicateKeys) == 0 {
t.Error("default algorithm provider shouldn't have 0 fit predicates")
}
}
func TestAlgorithmProviders(t *testing.T) {
for _, pn := range algorithmProviderNames {
p, err := factory.GetAlgorithmProvider(pn)
if err != nil {
t.Errorf("error retrieving '%s' provider: %v", pn, err)
break
}
if len(p.PriorityFunctionKeys) == 0 {
t.Errorf("%s algorithm provider shouldn't have 0 priority functions", pn)
}
for _, pf := range p.PriorityFunctionKeys.List() {
if !factory.IsPriorityFunctionRegistered(pf) {
t.Errorf("priority function %s is not registered but is used in the %s algorithm provider", pf, pn)
}
}
for _, fp := range p.FitPredicateKeys.List() {
if !factory.IsFitPredicateRegistered(fp) {
t.Errorf("fit predicate %s is not registered but is used in the %s algorithm provider", fp, pn)
}
}
}
}

View file

@ -0,0 +1,27 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"register.go",
"types.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/client/restclient:go_default_library",
"//pkg/runtime:go_default_library",
"//pkg/runtime/schema:go_default_library",
],
)

View file

@ -0,0 +1,25 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = ["latest.go"],
tags = ["automanaged"],
deps = [
"//pkg/runtime:go_default_library",
"//pkg/runtime/schema:go_default_library",
"//pkg/runtime/serializer/json:go_default_library",
"//pkg/runtime/serializer/versioning:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/api/v1:go_default_library",
],
)

View file

@ -0,0 +1,53 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package latest
import (
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/runtime/schema"
"k8s.io/kubernetes/pkg/runtime/serializer/json"
"k8s.io/kubernetes/pkg/runtime/serializer/versioning"
"k8s.io/kubernetes/plugin/pkg/scheduler/api"
_ "k8s.io/kubernetes/plugin/pkg/scheduler/api/v1"
)
// Version is the string that represents the current external default version.
const Version = "v1"
// OldestVersion is the string that represents the oldest server version supported.
const OldestVersion = "v1"
// Versions is the list of versions that are recognized in code. The order provided
// may be assumed to be least feature rich to most feature rich, and clients may
// choose to prefer the latter items in the list over the former items when presented
// with a set of versions to choose.
var Versions = []string{"v1"}
// Codec is the default codec for serializing input that should use
// the latest supported version. It supports JSON by default.
var Codec runtime.Codec
func init() {
jsonSerializer := json.NewSerializer(json.DefaultMetaFactory, api.Scheme, api.Scheme, true)
Codec = versioning.NewDefaultingCodecForScheme(
api.Scheme,
jsonSerializer,
jsonSerializer,
schema.GroupVersion{Version: Version},
runtime.InternalGroupVersioner,
)
}

View file

@ -0,0 +1,55 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
metav1 "k8s.io/kubernetes/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/runtime/schema"
)
// Scheme is the default instance of runtime.Scheme to which types in the Kubernetes API are already registered.
// TODO: remove this, scheduler should not have its own scheme.
var Scheme = runtime.NewScheme()
// SchemeGroupVersion is group version used to register these objects
// TODO this should be in the "scheduler" group
var SchemeGroupVersion = schema.GroupVersion{Group: "", Version: runtime.APIVersionInternal}
var (
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
AddToScheme = SchemeBuilder.AddToScheme
)
func init() {
if err := addKnownTypes(Scheme); err != nil {
// Programmer error.
panic(err)
}
}
func addKnownTypes(scheme *runtime.Scheme) error {
if err := scheme.AddIgnoredConversionType(&metav1.TypeMeta{}, &metav1.TypeMeta{}); err != nil {
return err
}
scheme.AddKnownTypes(SchemeGroupVersion,
&Policy{},
)
return nil
}
func (obj *Policy) GetObjectKind() schema.ObjectKind { return &obj.TypeMeta }

View file

@ -0,0 +1,178 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package api
import (
"time"
"k8s.io/kubernetes/pkg/api/v1"
metav1 "k8s.io/kubernetes/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/client/restclient"
)
type Policy struct {
metav1.TypeMeta `json:",inline"`
// Holds the information to configure the fit predicate functions
Predicates []PredicatePolicy `json:"predicates"`
// Holds the information to configure the priority functions
Priorities []PriorityPolicy `json:"priorities"`
// Holds the information to communicate with the extender(s)
ExtenderConfigs []ExtenderConfig `json:"extenders"`
}
type PredicatePolicy struct {
// Identifier of the predicate policy
// For a custom predicate, the name can be user-defined
// For the Kubernetes provided predicates, the name is the identifier of the pre-defined predicate
Name string `json:"name"`
// Holds the parameters to configure the given predicate
Argument *PredicateArgument `json:"argument"`
}
type PriorityPolicy struct {
// Identifier of the priority policy
// For a custom priority, the name can be user-defined
// For the Kubernetes provided priority functions, the name is the identifier of the pre-defined priority function
Name string `json:"name"`
// The numeric multiplier for the node scores that the priority function generates
// The weight should be a positive integer
Weight int `json:"weight"`
// Holds the parameters to configure the given priority function
Argument *PriorityArgument `json:"argument"`
}
// Represents the arguments that the different types of predicates take
// Only one of its members may be specified
type PredicateArgument struct {
// The predicate that provides affinity for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAffinity *ServiceAffinity `json:"serviceAffinity"`
// The predicate that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelsPresence *LabelsPresence `json:"labelsPresence"`
}
// Represents the arguments that the different types of priorities take.
// Only one of its members may be specified
type PriorityArgument struct {
// The priority function that ensures a good spread (anti-affinity) for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAntiAffinity *ServiceAntiAffinity `json:"serviceAntiAffinity"`
// The priority function that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelPreference *LabelPreference `json:"labelPreference"`
}
// Holds the parameters that are used to configure the corresponding predicate
type ServiceAffinity struct {
// The list of labels that identify node "groups"
// All of the labels should match for the node to be considered a fit for hosting the pod
Labels []string `json:"labels"`
}
// Holds the parameters that are used to configure the corresponding predicate
type LabelsPresence struct {
// The list of labels that identify node "groups"
// All of the labels should be either present (or absent) for the node to be considered a fit for hosting the pod
Labels []string `json:"labels"`
// The boolean flag that indicates whether the labels should be present or absent from the node
Presence bool `json:"presence"`
}
// Holds the parameters that are used to configure the corresponding priority function
type ServiceAntiAffinity struct {
// Used to identify node "groups"
Label string `json:"label"`
}
// Holds the parameters that are used to configure the corresponding priority function
type LabelPreference struct {
// Used to identify node "groups"
Label string `json:"label"`
// This is a boolean flag
// If true, higher priority is given to nodes that have the label
// If false, higher priority is given to nodes that do not have the label
Presence bool `json:"presence"`
}
// Holds the parameters used to communicate with the extender. If a verb is unspecified/empty,
// it is assumed that the extender chose not to provide that extension.
type ExtenderConfig struct {
// URLPrefix at which the extender is available
URLPrefix string `json:"urlPrefix"`
// Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender.
FilterVerb string `json:"filterVerb,omitempty"`
// Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender.
PrioritizeVerb string `json:"prioritizeVerb,omitempty"`
// The numeric multiplier for the node scores that the prioritize call generates.
// The weight should be a positive integer
Weight int `json:"weight,omitempty"`
// EnableHttps specifies whether https should be used to communicate with the extender
EnableHttps bool `json:"enableHttps,omitempty"`
// TLSConfig specifies the transport layer security config
TLSConfig *restclient.TLSClientConfig `json:"tlsConfig,omitempty"`
// HTTPTimeout specifies the timeout duration for a call to the extender. Filter timeout fails the scheduling of the pod. Prioritize
// timeout is ignored, k8s/other extenders priorities are used to select the node.
HTTPTimeout time.Duration `json:"httpTimeout,omitempty"`
}
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
// nodes for a pod.
type ExtenderArgs struct {
// Pod being scheduled
Pod v1.Pod `json:"pod"`
// List of candidate nodes where the pod can be scheduled
Nodes v1.NodeList `json:"nodes"`
}
// FailedNodesMap represents the filtered out nodes, with node names and failure messages
type FailedNodesMap map[string]string
// ExtenderFilterResult represents the results of a filter call to an extender
type ExtenderFilterResult struct {
// Filtered set of nodes where the pod can be scheduled
Nodes v1.NodeList `json:"nodes,omitempty"`
// Filtered out nodes where the pod can't be scheduled and the failure messages
FailedNodes FailedNodesMap `json:"failedNodes,omitempty"`
// Error message indicating failure
Error string `json:"error,omitempty"`
}
// HostPriority represents the priority of scheduling to a particular host, higher priority is better.
type HostPriority struct {
// Name of the host
Host string `json:"host"`
// Score associated with the host
Score int `json:"score"`
}
type HostPriorityList []HostPriority
func (h HostPriorityList) Len() int {
return len(h)
}
func (h HostPriorityList) Less(i, j int) bool {
if h[i].Score == h[j].Score {
return h[i].Host < h[j].Host
}
return h[i].Score < h[j].Score
}
func (h HostPriorityList) Swap(i, j int) {
h[i], h[j] = h[j], h[i]
}

View file

@ -0,0 +1,28 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"register.go",
"types.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/apis/meta/v1:go_default_library",
"//pkg/client/restclient:go_default_library",
"//pkg/runtime:go_default_library",
"//pkg/runtime/schema:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
],
)

View file

@ -0,0 +1,48 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1
import (
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/runtime/schema"
"k8s.io/kubernetes/plugin/pkg/scheduler/api"
)
// SchemeGroupVersion is group version used to register these objects
// TODO this should be in the "scheduler" group
var SchemeGroupVersion = schema.GroupVersion{Group: "", Version: "v1"}
func init() {
if err := addKnownTypes(api.Scheme); err != nil {
// Programmer error.
panic(err)
}
}
var (
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
AddToScheme = SchemeBuilder.AddToScheme
)
func addKnownTypes(scheme *runtime.Scheme) error {
scheme.AddKnownTypes(SchemeGroupVersion,
&Policy{},
)
return nil
}
func (obj *Policy) GetObjectKind() schema.ObjectKind { return &obj.TypeMeta }

View file

@ -0,0 +1,178 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1
import (
"time"
apiv1 "k8s.io/kubernetes/pkg/api/v1"
metav1 "k8s.io/kubernetes/pkg/apis/meta/v1"
"k8s.io/kubernetes/pkg/client/restclient"
)
type Policy struct {
metav1.TypeMeta `json:",inline"`
// Holds the information to configure the fit predicate functions
Predicates []PredicatePolicy `json:"predicates"`
// Holds the information to configure the priority functions
Priorities []PriorityPolicy `json:"priorities"`
// Holds the information to communicate with the extender(s)
ExtenderConfigs []ExtenderConfig `json:"extenders"`
}
type PredicatePolicy struct {
// Identifier of the predicate policy
// For a custom predicate, the name can be user-defined
// For the Kubernetes provided predicates, the name is the identifier of the pre-defined predicate
Name string `json:"name"`
// Holds the parameters to configure the given predicate
Argument *PredicateArgument `json:"argument"`
}
type PriorityPolicy struct {
// Identifier of the priority policy
// For a custom priority, the name can be user-defined
// For the Kubernetes provided priority functions, the name is the identifier of the pre-defined priority function
Name string `json:"name"`
// The numeric multiplier for the node scores that the priority function generates
// The weight should be non-zero and can be a positive or a negative integer
Weight int `json:"weight"`
// Holds the parameters to configure the given priority function
Argument *PriorityArgument `json:"argument"`
}
// Represents the arguments that the different types of predicates take
// Only one of its members may be specified
type PredicateArgument struct {
// The predicate that provides affinity for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAffinity *ServiceAffinity `json:"serviceAffinity"`
// The predicate that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelsPresence *LabelsPresence `json:"labelsPresence"`
}
// Represents the arguments that the different types of priorities take.
// Only one of its members may be specified
type PriorityArgument struct {
// The priority function that ensures a good spread (anti-affinity) for pods belonging to a service
// It uses a label to identify nodes that belong to the same "group"
ServiceAntiAffinity *ServiceAntiAffinity `json:"serviceAntiAffinity"`
// The priority function that checks whether a particular node has a certain label
// defined or not, regardless of value
LabelPreference *LabelPreference `json:"labelPreference"`
}
// Holds the parameters that are used to configure the corresponding predicate
type ServiceAffinity struct {
// The list of labels that identify node "groups"
// All of the labels should match for the node to be considered a fit for hosting the pod
Labels []string `json:"labels"`
}
// Holds the parameters that are used to configure the corresponding predicate
type LabelsPresence struct {
// The list of labels that identify node "groups"
// All of the labels should be either present (or absent) for the node to be considered a fit for hosting the pod
Labels []string `json:"labels"`
// The boolean flag that indicates whether the labels should be present or absent from the node
Presence bool `json:"presence"`
}
// Holds the parameters that are used to configure the corresponding priority function
type ServiceAntiAffinity struct {
// Used to identify node "groups"
Label string `json:"label"`
}
// Holds the parameters that are used to configure the corresponding priority function
type LabelPreference struct {
// Used to identify node "groups"
Label string `json:"label"`
// This is a boolean flag
// If true, higher priority is given to nodes that have the label
// If false, higher priority is given to nodes that do not have the label
Presence bool `json:"presence"`
}
// Holds the parameters used to communicate with the extender. If a verb is unspecified/empty,
// it is assumed that the extender chose not to provide that extension.
type ExtenderConfig struct {
// URLPrefix at which the extender is available
URLPrefix string `json:"urlPrefix"`
// Verb for the filter call, empty if not supported. This verb is appended to the URLPrefix when issuing the filter call to extender.
FilterVerb string `json:"filterVerb,omitempty"`
// Verb for the prioritize call, empty if not supported. This verb is appended to the URLPrefix when issuing the prioritize call to extender.
PrioritizeVerb string `json:"prioritizeVerb,omitempty"`
// The numeric multiplier for the node scores that the prioritize call generates.
// The weight should be a positive integer
Weight int `json:"weight,omitempty"`
// EnableHttps specifies whether https should be used to communicate with the extender
EnableHttps bool `json:"enableHttps,omitempty"`
// TLSConfig specifies the transport layer security config
TLSConfig *restclient.TLSClientConfig `json:"tlsConfig,omitempty"`
// HTTPTimeout specifies the timeout duration for a call to the extender. Filter timeout fails the scheduling of the pod. Prioritize
// timeout is ignored, k8s/other extenders priorities are used to select the node.
HTTPTimeout time.Duration `json:"httpTimeout,omitempty"`
}
// ExtenderArgs represents the arguments needed by the extender to filter/prioritize
// nodes for a pod.
type ExtenderArgs struct {
// Pod being scheduled
Pod apiv1.Pod `json:"pod"`
// List of candidate nodes where the pod can be scheduled
Nodes apiv1.NodeList `json:"nodes"`
}
// FailedNodesMap represents the filtered out nodes, with node names and failure messages
type FailedNodesMap map[string]string
// ExtenderFilterResult represents the results of a filter call to an extender
type ExtenderFilterResult struct {
// Filtered set of nodes where the pod can be scheduled
Nodes apiv1.NodeList `json:"nodes,omitempty"`
// Filtered out nodes where the pod can't be scheduled and the failure messages
FailedNodes FailedNodesMap `json:"failedNodes,omitempty"`
// Error message indicating failure
Error string `json:"error,omitempty"`
}
// HostPriority represents the priority of scheduling to a particular host, higher priority is better.
type HostPriority struct {
// Name of the host
Host string `json:"host"`
// Score associated with the host
Score int `json:"score"`
}
type HostPriorityList []HostPriority
func (h HostPriorityList) Len() int {
return len(h)
}
func (h HostPriorityList) Less(i, j int) bool {
if h[i].Score == h[j].Score {
return h[i].Host < h[j].Host
}
return h[i].Score < h[j].Score
}
func (h HostPriorityList) Swap(i, j int) {
h[i], h[j] = h[j], h[i]
}

View file

@ -0,0 +1,29 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = ["validation.go"],
tags = ["automanaged"],
deps = [
"//pkg/util/errors:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
],
)
go_test(
name = "go_default_test",
srcs = ["validation_test.go"],
library = "go_default_library",
tags = ["automanaged"],
deps = ["//plugin/pkg/scheduler/api:go_default_library"],
)

View file

@ -0,0 +1,43 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validation
import (
"fmt"
utilerrors "k8s.io/kubernetes/pkg/util/errors"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
)
// Validate checks for errors in the Config
// It does not return early so that it can find as many errors as possible
func ValidatePolicy(policy schedulerapi.Policy) error {
validationErrors := make([]error, 0)
for _, priority := range policy.Priorities {
if priority.Weight <= 0 {
validationErrors = append(validationErrors, fmt.Errorf("Priority %s should have a positive weight applied to it", priority.Name))
}
}
for _, extender := range policy.ExtenderConfigs {
if extender.Weight < 0 {
validationErrors = append(validationErrors, fmt.Errorf("Priority for extender %s should have a non negative weight applied to it", extender.URLPrefix))
}
}
return utilerrors.NewAggregate(validationErrors)
}

View file

@ -0,0 +1,52 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validation
import (
"testing"
"k8s.io/kubernetes/plugin/pkg/scheduler/api"
)
func TestValidatePriorityWithNoWeight(t *testing.T) {
policy := api.Policy{Priorities: []api.PriorityPolicy{{Name: "NoWeightPriority"}}}
if ValidatePolicy(policy) == nil {
t.Errorf("Expected error about priority weight not being positive")
}
}
func TestValidatePriorityWithZeroWeight(t *testing.T) {
policy := api.Policy{Priorities: []api.PriorityPolicy{{Name: "NoWeightPriority", Weight: 0}}}
if ValidatePolicy(policy) == nil {
t.Errorf("Expected error about priority weight not being positive")
}
}
func TestValidatePriorityWithNonZeroWeight(t *testing.T) {
policy := api.Policy{Priorities: []api.PriorityPolicy{{Name: "WeightPriority", Weight: 2}}}
errs := ValidatePolicy(policy)
if errs != nil {
t.Errorf("Unexpected errors %v", errs)
}
}
func TestValidatePriorityWithNegativeWeight(t *testing.T) {
policy := api.Policy{Priorities: []api.PriorityPolicy{{Name: "WeightPriority", Weight: -2}}}
if ValidatePolicy(policy) == nil {
t.Errorf("Expected error about priority weight not being positive")
}
}

View file

@ -0,0 +1,134 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"hash/adler32"
"github.com/golang/groupcache/lru"
"sync"
"k8s.io/kubernetes/pkg/api/v1"
hashutil "k8s.io/kubernetes/pkg/util/hash"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
)
// TODO(harryz) figure out the right number for this, 4096 may be too big
const maxCacheEntries = 4096
type HostPredicate struct {
Fit bool
FailReasons []algorithm.PredicateFailureReason
}
type AlgorithmCache struct {
// Only consider predicates for now, priorities rely on: #31606
predicatesCache *lru.Cache
}
func newAlgorithmCache() AlgorithmCache {
return AlgorithmCache{
predicatesCache: lru.New(maxCacheEntries),
}
}
// Store a map of predicate cache with maxsize
type EquivalenceCache struct {
sync.RWMutex
getEquivalencePod algorithm.GetEquivalencePodFunc
algorithmCache map[string]AlgorithmCache
}
func NewEquivalenceCache(getEquivalencePodFunc algorithm.GetEquivalencePodFunc) *EquivalenceCache {
return &EquivalenceCache{
getEquivalencePod: getEquivalencePodFunc,
algorithmCache: make(map[string]AlgorithmCache),
}
}
// addPodPredicate adds pod predicate for equivalence class
func (ec *EquivalenceCache) addPodPredicate(podKey uint64, nodeName string, fit bool, failReasons []algorithm.PredicateFailureReason) {
if _, exist := ec.algorithmCache[nodeName]; !exist {
ec.algorithmCache[nodeName] = newAlgorithmCache()
}
ec.algorithmCache[nodeName].predicatesCache.Add(podKey, HostPredicate{Fit: fit, FailReasons: failReasons})
}
// AddPodPredicatesCache cache pod predicate for equivalence class
func (ec *EquivalenceCache) AddPodPredicatesCache(pod *v1.Pod, fitNodeList []*v1.Node, failedPredicates *FailedPredicateMap) {
equivalenceHash := ec.hashEquivalencePod(pod)
for _, fitNode := range fitNodeList {
ec.addPodPredicate(equivalenceHash, fitNode.Name, true, nil)
}
for failNodeName, failReasons := range *failedPredicates {
ec.addPodPredicate(equivalenceHash, failNodeName, false, failReasons)
}
}
// GetCachedPredicates gets cached predicates for equivalence class
func (ec *EquivalenceCache) GetCachedPredicates(pod *v1.Pod, nodes []*v1.Node) ([]*v1.Node, FailedPredicateMap, []*v1.Node) {
fitNodeList := []*v1.Node{}
failedPredicates := FailedPredicateMap{}
noCacheNodeList := []*v1.Node{}
equivalenceHash := ec.hashEquivalencePod(pod)
for _, node := range nodes {
findCache := false
if algorithmCache, exist := ec.algorithmCache[node.Name]; exist {
if cachePredicate, exist := algorithmCache.predicatesCache.Get(equivalenceHash); exist {
hostPredicate := cachePredicate.(HostPredicate)
if hostPredicate.Fit {
fitNodeList = append(fitNodeList, node)
} else {
failedPredicates[node.Name] = hostPredicate.FailReasons
}
findCache = true
}
}
if !findCache {
noCacheNodeList = append(noCacheNodeList, node)
}
}
return fitNodeList, failedPredicates, noCacheNodeList
}
// SendInvalidAlgorithmCacheReq marks AlgorithmCache item as invalid
func (ec *EquivalenceCache) SendInvalidAlgorithmCacheReq(nodeName string) {
ec.Lock()
defer ec.Unlock()
// clear the cache of this node
delete(ec.algorithmCache, nodeName)
}
// SendClearAllCacheReq marks all cached item as invalid
func (ec *EquivalenceCache) SendClearAllCacheReq() {
ec.Lock()
defer ec.Unlock()
// clear cache of all nodes
for nodeName := range ec.algorithmCache {
delete(ec.algorithmCache, nodeName)
}
}
// hashEquivalencePod returns the hash of equivalence pod.
func (ec *EquivalenceCache) hashEquivalencePod(pod *v1.Pod) uint64 {
equivalencePod := ec.getEquivalencePod(pod)
hash := adler32.New()
hashutil.DeepHashObject(hash, equivalencePod)
return uint64(hash.Sum32())
}

View file

@ -0,0 +1,187 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"time"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/client/restclient"
utilnet "k8s.io/kubernetes/pkg/util/net"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
)
const (
DefaultExtenderTimeout = 5 * time.Second
)
// HTTPExtender implements the algorithm.SchedulerExtender interface.
type HTTPExtender struct {
extenderURL string
filterVerb string
prioritizeVerb string
weight int
apiVersion string
client *http.Client
}
func makeTransport(config *schedulerapi.ExtenderConfig) (http.RoundTripper, error) {
var cfg restclient.Config
if config.TLSConfig != nil {
cfg.TLSClientConfig = *config.TLSConfig
}
if config.EnableHttps {
hasCA := len(cfg.CAFile) > 0 || len(cfg.CAData) > 0
if !hasCA {
cfg.Insecure = true
}
}
tlsConfig, err := restclient.TLSConfigFor(&cfg)
if err != nil {
return nil, err
}
if tlsConfig != nil {
return utilnet.SetTransportDefaults(&http.Transport{
TLSClientConfig: tlsConfig,
}), nil
}
return utilnet.SetTransportDefaults(&http.Transport{}), nil
}
func NewHTTPExtender(config *schedulerapi.ExtenderConfig, apiVersion string) (algorithm.SchedulerExtender, error) {
if config.HTTPTimeout.Nanoseconds() == 0 {
config.HTTPTimeout = time.Duration(DefaultExtenderTimeout)
}
transport, err := makeTransport(config)
if err != nil {
return nil, err
}
client := &http.Client{
Transport: transport,
Timeout: config.HTTPTimeout,
}
return &HTTPExtender{
extenderURL: config.URLPrefix,
apiVersion: apiVersion,
filterVerb: config.FilterVerb,
prioritizeVerb: config.PrioritizeVerb,
weight: config.Weight,
client: client,
}, nil
}
// Filter based on extender implemented predicate functions. The filtered list is
// expected to be a subset of the supplied list. failedNodesMap optionally contains
// the list of failed nodes and failure reasons.
func (h *HTTPExtender) Filter(pod *v1.Pod, nodes []*v1.Node) ([]*v1.Node, schedulerapi.FailedNodesMap, error) {
var result schedulerapi.ExtenderFilterResult
if h.filterVerb == "" {
return nodes, schedulerapi.FailedNodesMap{}, nil
}
nodeItems := make([]v1.Node, 0, len(nodes))
for _, node := range nodes {
nodeItems = append(nodeItems, *node)
}
args := schedulerapi.ExtenderArgs{
Pod: *pod,
Nodes: v1.NodeList{Items: nodeItems},
}
if err := h.send(h.filterVerb, &args, &result); err != nil {
return nil, nil, err
}
if result.Error != "" {
return nil, nil, fmt.Errorf(result.Error)
}
nodeResult := make([]*v1.Node, 0, len(result.Nodes.Items))
for i := range result.Nodes.Items {
nodeResult = append(nodeResult, &result.Nodes.Items[i])
}
return nodeResult, result.FailedNodes, nil
}
// Prioritize based on extender implemented priority functions. Weight*priority is added
// up for each such priority function. The returned score is added to the score computed
// by Kubernetes scheduler. The total score is used to do the host selection.
func (h *HTTPExtender) Prioritize(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, int, error) {
var result schedulerapi.HostPriorityList
if h.prioritizeVerb == "" {
result := schedulerapi.HostPriorityList{}
for _, node := range nodes {
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: 0})
}
return &result, 0, nil
}
nodeItems := make([]v1.Node, 0, len(nodes))
for _, node := range nodes {
nodeItems = append(nodeItems, *node)
}
args := schedulerapi.ExtenderArgs{
Pod: *pod,
Nodes: v1.NodeList{Items: nodeItems},
}
if err := h.send(h.prioritizeVerb, &args, &result); err != nil {
return nil, 0, err
}
return &result, h.weight, nil
}
// Helper function to send messages to the extender
func (h *HTTPExtender) send(action string, args interface{}, result interface{}) error {
out, err := json.Marshal(args)
if err != nil {
return err
}
url := h.extenderURL + "/" + h.apiVersion + "/" + action
req, err := http.NewRequest("POST", url, bytes.NewReader(out))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
resp, err := h.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
if err := json.Unmarshal(body, result); err != nil {
return err
}
return nil
}

View file

@ -0,0 +1,309 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"fmt"
"testing"
"time"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
type fitPredicate func(pod *v1.Pod, node *v1.Node) (bool, error)
type priorityFunc func(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, error)
type priorityConfig struct {
function priorityFunc
weight int
}
func errorPredicateExtender(pod *v1.Pod, node *v1.Node) (bool, error) {
return false, fmt.Errorf("Some error")
}
func falsePredicateExtender(pod *v1.Pod, node *v1.Node) (bool, error) {
return false, nil
}
func truePredicateExtender(pod *v1.Pod, node *v1.Node) (bool, error) {
return true, nil
}
func machine1PredicateExtender(pod *v1.Pod, node *v1.Node) (bool, error) {
if node.Name == "machine1" {
return true, nil
}
return false, nil
}
func machine2PredicateExtender(pod *v1.Pod, node *v1.Node) (bool, error) {
if node.Name == "machine2" {
return true, nil
}
return false, nil
}
func errorPrioritizerExtender(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, error) {
return &schedulerapi.HostPriorityList{}, fmt.Errorf("Some error")
}
func machine1PrioritizerExtender(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, error) {
result := schedulerapi.HostPriorityList{}
for _, node := range nodes {
score := 1
if node.Name == "machine1" {
score = 10
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: score})
}
return &result, nil
}
func machine2PrioritizerExtender(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, error) {
result := schedulerapi.HostPriorityList{}
for _, node := range nodes {
score := 1
if node.Name == "machine2" {
score = 10
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: score})
}
return &result, nil
}
func machine2Prioritizer(_ *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
result := []schedulerapi.HostPriority{}
for _, node := range nodes {
score := 1
if node.Name == "machine2" {
score = 10
}
result = append(result, schedulerapi.HostPriority{Host: node.Name, Score: score})
}
return result, nil
}
type FakeExtender struct {
predicates []fitPredicate
prioritizers []priorityConfig
weight int
}
func (f *FakeExtender) Filter(pod *v1.Pod, nodes []*v1.Node) ([]*v1.Node, schedulerapi.FailedNodesMap, error) {
filtered := []*v1.Node{}
failedNodesMap := schedulerapi.FailedNodesMap{}
for _, node := range nodes {
fits := true
for _, predicate := range f.predicates {
fit, err := predicate(pod, node)
if err != nil {
return []*v1.Node{}, schedulerapi.FailedNodesMap{}, err
}
if !fit {
fits = false
break
}
}
if fits {
filtered = append(filtered, node)
} else {
failedNodesMap[node.Name] = "FakeExtender failed"
}
}
return filtered, failedNodesMap, nil
}
func (f *FakeExtender) Prioritize(pod *v1.Pod, nodes []*v1.Node) (*schedulerapi.HostPriorityList, int, error) {
result := schedulerapi.HostPriorityList{}
combinedScores := map[string]int{}
for _, prioritizer := range f.prioritizers {
weight := prioritizer.weight
if weight == 0 {
continue
}
priorityFunc := prioritizer.function
prioritizedList, err := priorityFunc(pod, nodes)
if err != nil {
return &schedulerapi.HostPriorityList{}, 0, err
}
for _, hostEntry := range *prioritizedList {
combinedScores[hostEntry.Host] += hostEntry.Score * weight
}
}
for host, score := range combinedScores {
result = append(result, schedulerapi.HostPriority{Host: host, Score: score})
}
return &result, f.weight, nil
}
func TestGenericSchedulerWithExtenders(t *testing.T) {
tests := []struct {
name string
predicates map[string]algorithm.FitPredicate
prioritizers []algorithm.PriorityConfig
extenders []FakeExtender
extenderPredicates []fitPredicate
extenderPrioritizers []priorityConfig
nodes []string
pod *v1.Pod
pods []*v1.Pod
expectedHost string
expectsErr bool
}{
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
},
{
predicates: []fitPredicate{errorPredicateExtender},
},
},
nodes: []string{"machine1", "machine2"},
expectsErr: true,
name: "test 1",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
},
{
predicates: []fitPredicate{falsePredicateExtender},
},
},
nodes: []string{"machine1", "machine2"},
expectsErr: true,
name: "test 2",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
},
{
predicates: []fitPredicate{machine1PredicateExtender},
},
},
nodes: []string{"machine1", "machine2"},
expectedHost: "machine1",
name: "test 3",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{machine2PredicateExtender},
},
{
predicates: []fitPredicate{machine1PredicateExtender},
},
},
nodes: []string{"machine1", "machine2"},
expectsErr: true,
name: "test 4",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
prioritizers: []priorityConfig{{errorPrioritizerExtender, 10}},
weight: 1,
},
},
nodes: []string{"machine1"},
expectedHost: "machine1",
name: "test 5",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Map: EqualPriorityMap, Weight: 1}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
prioritizers: []priorityConfig{{machine1PrioritizerExtender, 10}},
weight: 1,
},
{
predicates: []fitPredicate{truePredicateExtender},
prioritizers: []priorityConfig{{machine2PrioritizerExtender, 10}},
weight: 5,
},
},
nodes: []string{"machine1", "machine2"},
expectedHost: "machine2",
name: "test 6",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Function: machine2Prioritizer, Weight: 20}},
extenders: []FakeExtender{
{
predicates: []fitPredicate{truePredicateExtender},
prioritizers: []priorityConfig{{machine1PrioritizerExtender, 10}},
weight: 1,
},
},
nodes: []string{"machine1", "machine2"},
expectedHost: "machine2", // machine2 has higher score
name: "test 7",
},
}
for _, test := range tests {
extenders := []algorithm.SchedulerExtender{}
for ii := range test.extenders {
extenders = append(extenders, &test.extenders[ii])
}
cache := schedulercache.New(time.Duration(0), wait.NeverStop)
for _, pod := range test.pods {
cache.AddPod(pod)
}
for _, name := range test.nodes {
cache.AddNode(&v1.Node{ObjectMeta: v1.ObjectMeta{Name: name}})
}
scheduler := NewGenericScheduler(
cache, test.predicates, algorithm.EmptyMetadataProducer, test.prioritizers, algorithm.EmptyMetadataProducer, extenders)
machine, err := scheduler.Schedule(test.pod, algorithm.FakeNodeLister(makeNodeList(test.nodes)))
if test.expectsErr {
if err == nil {
t.Errorf("Unexpected non-error for %s, machine %s", test.name, machine)
}
} else {
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
if test.expectedHost != machine {
t.Errorf("Failed : %s, Expected: %s, Saw: %s", test.name, test.expectedHost, machine)
}
}
}
}

View file

@ -0,0 +1,68 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"factory.go",
"plugins.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api:go_default_library",
"//pkg/api/errors:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/apis/extensions/v1beta1:go_default_library",
"//pkg/client/cache:go_default_library",
"//pkg/client/clientset_generated/release_1_5:go_default_library",
"//pkg/controller/informers:go_default_library",
"//pkg/fields:go_default_library",
"//pkg/types:go_default_library",
"//pkg/util/runtime:go_default_library",
"//pkg/util/sets:go_default_library",
"//pkg/util/validation:go_default_library",
"//plugin/pkg/scheduler:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/algorithm/predicates:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/api/validation:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
"//vendor:github.com/golang/glog",
],
)
go_test(
name = "go_default_test",
srcs = [
"factory_test.go",
"plugins_test.go",
],
library = "go_default_library",
tags = ["automanaged"],
deps = [
"//pkg/api/testapi:go_default_library",
"//pkg/api/testing:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/apimachinery/registered:go_default_library",
"//pkg/client/cache:go_default_library",
"//pkg/client/clientset_generated/release_1_5:go_default_library",
"//pkg/client/restclient:go_default_library",
"//pkg/runtime:go_default_library",
"//pkg/types:go_default_library",
"//pkg/util/testing:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//plugin/pkg/scheduler/api:go_default_library",
"//plugin/pkg/scheduler/api/latest:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
],
)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,476 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package factory
import (
"net/http"
"net/http/httptest"
"reflect"
"testing"
"time"
"k8s.io/kubernetes/pkg/api/testapi"
apitesting "k8s.io/kubernetes/pkg/api/testing"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/apimachinery/registered"
"k8s.io/kubernetes/pkg/client/cache"
clientset "k8s.io/kubernetes/pkg/client/clientset_generated/release_1_5"
"k8s.io/kubernetes/pkg/client/restclient"
"k8s.io/kubernetes/pkg/runtime"
"k8s.io/kubernetes/pkg/types"
utiltesting "k8s.io/kubernetes/pkg/util/testing"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
latestschedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api/latest"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
func TestCreate(t *testing.T) {
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &registered.GroupOrDie(v1.GroupName).GroupVersion}})
factory := NewConfigFactory(client, v1.DefaultSchedulerName, v1.DefaultHardPodAffinitySymmetricWeight, v1.DefaultFailureDomains)
factory.Create()
}
// Test configures a scheduler from a policies defined in a file
// It combines some configurable predicate/priorities with some pre-defined ones
func TestCreateFromConfig(t *testing.T) {
var configData []byte
var policy schedulerapi.Policy
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &registered.GroupOrDie(v1.GroupName).GroupVersion}})
factory := NewConfigFactory(client, v1.DefaultSchedulerName, v1.DefaultHardPodAffinitySymmetricWeight, v1.DefaultFailureDomains)
// Pre-register some predicate and priority functions
RegisterFitPredicate("PredicateOne", PredicateOne)
RegisterFitPredicate("PredicateTwo", PredicateTwo)
RegisterPriorityFunction("PriorityOne", PriorityOne, 1)
RegisterPriorityFunction("PriorityTwo", PriorityTwo, 1)
configData = []byte(`{
"kind" : "Policy",
"apiVersion" : "v1",
"predicates" : [
{"name" : "TestZoneAffinity", "argument" : {"serviceAffinity" : {"labels" : ["zone"]}}},
{"name" : "TestRequireZone", "argument" : {"labelsPresence" : {"labels" : ["zone"], "presence" : true}}},
{"name" : "PredicateOne"},
{"name" : "PredicateTwo"}
],
"priorities" : [
{"name" : "RackSpread", "weight" : 3, "argument" : {"serviceAntiAffinity" : {"label" : "rack"}}},
{"name" : "PriorityOne", "weight" : 2},
{"name" : "PriorityTwo", "weight" : 1} ]
}`)
if err := runtime.DecodeInto(latestschedulerapi.Codec, configData, &policy); err != nil {
t.Errorf("Invalid configuration: %v", err)
}
factory.CreateFromConfig(policy)
}
func TestCreateFromEmptyConfig(t *testing.T) {
var configData []byte
var policy schedulerapi.Policy
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &registered.GroupOrDie(v1.GroupName).GroupVersion}})
factory := NewConfigFactory(client, v1.DefaultSchedulerName, v1.DefaultHardPodAffinitySymmetricWeight, v1.DefaultFailureDomains)
configData = []byte(`{}`)
if err := runtime.DecodeInto(latestschedulerapi.Codec, configData, &policy); err != nil {
t.Errorf("Invalid configuration: %v", err)
}
factory.CreateFromConfig(policy)
}
func PredicateOne(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
return true, nil, nil
}
func PredicateTwo(pod *v1.Pod, meta interface{}, nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason, error) {
return true, nil, nil
}
func PriorityOne(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
return []schedulerapi.HostPriority{}, nil
}
func PriorityTwo(pod *v1.Pod, nodeNameToInfo map[string]*schedulercache.NodeInfo, nodes []*v1.Node) (schedulerapi.HostPriorityList, error) {
return []schedulerapi.HostPriority{}, nil
}
func TestDefaultErrorFunc(t *testing.T) {
testPod := &v1.Pod{
ObjectMeta: v1.ObjectMeta{Name: "foo", Namespace: "bar"},
Spec: apitesting.V1DeepEqualSafePodSpec(),
}
handler := utiltesting.FakeHandler{
StatusCode: 200,
ResponseBody: runtime.EncodeOrDie(testapi.Default.Codec(), testPod),
T: t,
}
mux := http.NewServeMux()
// FakeHandler musn't be sent requests other than the one you want to test.
mux.Handle(testapi.Default.ResourcePath("pods", "bar", "foo"), &handler)
server := httptest.NewServer(mux)
defer server.Close()
factory := NewConfigFactory(clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &registered.GroupOrDie(v1.GroupName).GroupVersion}}), v1.DefaultSchedulerName, v1.DefaultHardPodAffinitySymmetricWeight, v1.DefaultFailureDomains)
queue := cache.NewFIFO(cache.MetaNamespaceKeyFunc)
podBackoff := podBackoff{
perPodBackoff: map[types.NamespacedName]*backoffEntry{},
clock: &fakeClock{},
defaultDuration: 1 * time.Millisecond,
maxDuration: 1 * time.Second,
}
errFunc := factory.makeDefaultErrorFunc(&podBackoff, queue)
errFunc(testPod, nil)
for {
// This is a terrible way to do this but I plan on replacing this
// whole error handling system in the future. The test will time
// out if something doesn't work.
time.Sleep(10 * time.Millisecond)
got, exists, _ := queue.Get(testPod)
if !exists {
continue
}
handler.ValidateRequest(t, testapi.Default.ResourcePath("pods", "bar", "foo"), "GET", nil)
if e, a := testPod, got; !reflect.DeepEqual(e, a) {
t.Errorf("Expected %v, got %v", e, a)
}
break
}
}
func TestNodeEnumerator(t *testing.T) {
testList := &v1.NodeList{
Items: []v1.Node{
{ObjectMeta: v1.ObjectMeta{Name: "foo"}},
{ObjectMeta: v1.ObjectMeta{Name: "bar"}},
{ObjectMeta: v1.ObjectMeta{Name: "baz"}},
},
}
me := nodeEnumerator{testList}
if e, a := 3, me.Len(); e != a {
t.Fatalf("expected %v, got %v", e, a)
}
for i := range testList.Items {
gotObj := me.Get(i)
if e, a := testList.Items[i].Name, gotObj.(*v1.Node).Name; e != a {
t.Errorf("Expected %v, got %v", e, a)
}
if e, a := &testList.Items[i], gotObj; !reflect.DeepEqual(e, a) {
t.Errorf("Expected %#v, got %v#", e, a)
}
}
}
type fakeClock struct {
t time.Time
}
func (f *fakeClock) Now() time.Time {
return f.t
}
func TestBind(t *testing.T) {
table := []struct {
binding *v1.Binding
}{
{binding: &v1.Binding{
ObjectMeta: v1.ObjectMeta{
Namespace: v1.NamespaceDefault,
Name: "foo",
},
Target: v1.ObjectReference{
Name: "foohost.kubernetes.mydomain.com",
},
}},
}
for _, item := range table {
handler := utiltesting.FakeHandler{
StatusCode: 200,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &registered.GroupOrDie(v1.GroupName).GroupVersion}})
b := binder{client}
if err := b.Bind(item.binding); err != nil {
t.Errorf("Unexpected error: %v", err)
continue
}
expectedBody := runtime.EncodeOrDie(testapi.Default.Codec(), item.binding)
handler.ValidateRequest(t, testapi.Default.ResourcePath("bindings", v1.NamespaceDefault, ""), "POST", &expectedBody)
}
}
func TestBackoff(t *testing.T) {
clock := fakeClock{}
backoff := podBackoff{
perPodBackoff: map[types.NamespacedName]*backoffEntry{},
clock: &clock,
defaultDuration: 1 * time.Second,
maxDuration: 60 * time.Second,
}
tests := []struct {
podID types.NamespacedName
expectedDuration time.Duration
advanceClock time.Duration
}{
{
podID: types.NamespacedName{Namespace: "default", Name: "foo"},
expectedDuration: 1 * time.Second,
},
{
podID: types.NamespacedName{Namespace: "default", Name: "foo"},
expectedDuration: 2 * time.Second,
},
{
podID: types.NamespacedName{Namespace: "default", Name: "foo"},
expectedDuration: 4 * time.Second,
},
{
podID: types.NamespacedName{Namespace: "default", Name: "bar"},
expectedDuration: 1 * time.Second,
advanceClock: 120 * time.Second,
},
// 'foo' should have been gc'd here.
{
podID: types.NamespacedName{Namespace: "default", Name: "foo"},
expectedDuration: 1 * time.Second,
},
}
for _, test := range tests {
duration := backoff.getEntry(test.podID).getBackoff(backoff.maxDuration)
if duration != test.expectedDuration {
t.Errorf("expected: %s, got %s for %s", test.expectedDuration.String(), duration.String(), test.podID)
}
clock.t = clock.t.Add(test.advanceClock)
backoff.gc()
}
fooID := types.NamespacedName{Namespace: "default", Name: "foo"}
backoff.perPodBackoff[fooID].backoff = 60 * time.Second
duration := backoff.getEntry(fooID).getBackoff(backoff.maxDuration)
if duration != 60*time.Second {
t.Errorf("expected: 60, got %s", duration.String())
}
// Verify that we split on namespaces correctly, same name, different namespace
fooID.Namespace = "other"
duration = backoff.getEntry(fooID).getBackoff(backoff.maxDuration)
if duration != 1*time.Second {
t.Errorf("expected: 1, got %s", duration.String())
}
}
// TestResponsibleForPod tests if a pod with an annotation that should cause it to
// be picked up by the default scheduler, is in fact picked by the default scheduler
// Two schedulers are made in the test: one is default scheduler and other scheduler
// is of name "foo-scheduler". A pod must be picked up by at most one of the two
// schedulers.
func TestResponsibleForPod(t *testing.T) {
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &registered.GroupOrDie(v1.GroupName).GroupVersion}})
// factory of "default-scheduler"
factoryDefaultScheduler := NewConfigFactory(client, v1.DefaultSchedulerName, v1.DefaultHardPodAffinitySymmetricWeight, v1.DefaultFailureDomains)
// factory of "foo-scheduler"
factoryFooScheduler := NewConfigFactory(client, "foo-scheduler", v1.DefaultHardPodAffinitySymmetricWeight, v1.DefaultFailureDomains)
// scheduler annotations to be tested
schedulerAnnotationFitsDefault := map[string]string{"scheduler.alpha.kubernetes.io/name": "default-scheduler"}
schedulerAnnotationFitsFoo := map[string]string{"scheduler.alpha.kubernetes.io/name": "foo-scheduler"}
schedulerAnnotationFitsNone := map[string]string{"scheduler.alpha.kubernetes.io/name": "bar-scheduler"}
tests := []struct {
pod *v1.Pod
pickedByDefault bool
pickedByFoo bool
}{
{
// pod with no annotation "scheduler.alpha.kubernetes.io/name=<scheduler-name>" should be
// picked by the default scheduler, NOT by the one of name "foo-scheduler"
pod: &v1.Pod{ObjectMeta: v1.ObjectMeta{Name: "foo", Namespace: "bar"}},
pickedByDefault: true,
pickedByFoo: false,
},
{
// pod with annotation "scheduler.alpha.kubernetes.io/name=default-scheduler" should be picked
// by the scheduler of name "default-scheduler", NOT by the one of name "foo-scheduler"
pod: &v1.Pod{ObjectMeta: v1.ObjectMeta{Name: "foo", Namespace: "bar", Annotations: schedulerAnnotationFitsDefault}},
pickedByDefault: true,
pickedByFoo: false,
},
{
// pod with annotataion "scheduler.alpha.kubernetes.io/name=foo-scheduler" should be NOT
// be picked by the scheduler of name "default-scheduler", but by the one of name "foo-scheduler"
pod: &v1.Pod{ObjectMeta: v1.ObjectMeta{Name: "foo", Namespace: "bar", Annotations: schedulerAnnotationFitsFoo}},
pickedByDefault: false,
pickedByFoo: true,
},
{
// pod with annotataion "scheduler.alpha.kubernetes.io/name=foo-scheduler" should be NOT
// be picked by niether the scheduler of name "default-scheduler" nor the one of name "foo-scheduler"
pod: &v1.Pod{ObjectMeta: v1.ObjectMeta{Name: "foo", Namespace: "bar", Annotations: schedulerAnnotationFitsNone}},
pickedByDefault: false,
pickedByFoo: false,
},
}
for _, test := range tests {
podOfDefault := factoryDefaultScheduler.responsibleForPod(test.pod)
podOfFoo := factoryFooScheduler.responsibleForPod(test.pod)
results := []bool{podOfDefault, podOfFoo}
expected := []bool{test.pickedByDefault, test.pickedByFoo}
if !reflect.DeepEqual(results, expected) {
t.Errorf("expected: {%v, %v}, got {%v, %v}", test.pickedByDefault, test.pickedByFoo, podOfDefault, podOfFoo)
}
}
}
func TestInvalidHardPodAffinitySymmetricWeight(t *testing.T) {
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
// TODO: Uncomment when fix #19254
// defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &registered.GroupOrDie(v1.GroupName).GroupVersion}})
// factory of "default-scheduler"
factory := NewConfigFactory(client, v1.DefaultSchedulerName, -1, v1.DefaultFailureDomains)
_, err := factory.Create()
if err == nil {
t.Errorf("expected err: invalid hardPodAffinitySymmetricWeight, got nothing")
}
}
func TestInvalidFactoryArgs(t *testing.T) {
handler := utiltesting.FakeHandler{
StatusCode: 500,
ResponseBody: "",
T: t,
}
server := httptest.NewServer(&handler)
defer server.Close()
client := clientset.NewForConfigOrDie(&restclient.Config{Host: server.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &registered.GroupOrDie(v1.GroupName).GroupVersion}})
testCases := []struct {
hardPodAffinitySymmetricWeight int
failureDomains string
expectErr string
}{
{
hardPodAffinitySymmetricWeight: -1,
failureDomains: v1.DefaultFailureDomains,
expectErr: "invalid hardPodAffinitySymmetricWeight: -1, must be in the range 0-100",
},
{
hardPodAffinitySymmetricWeight: 101,
failureDomains: v1.DefaultFailureDomains,
expectErr: "invalid hardPodAffinitySymmetricWeight: 101, must be in the range 0-100",
},
{
hardPodAffinitySymmetricWeight: 0,
failureDomains: "INVALID_FAILURE_DOMAINS",
expectErr: "invalid failure domain: INVALID_FAILURE_DOMAINS",
},
}
for _, test := range testCases {
factory := NewConfigFactory(client, v1.DefaultSchedulerName, test.hardPodAffinitySymmetricWeight, test.failureDomains)
_, err := factory.Create()
if err == nil {
t.Errorf("expected err: %s, got nothing", test.expectErr)
}
}
}
func TestNodeConditionPredicate(t *testing.T) {
nodeFunc := getNodeConditionPredicate()
nodeList := &v1.NodeList{
Items: []v1.Node{
// node1 considered
{ObjectMeta: v1.ObjectMeta{Name: "node1"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}}}},
// node2 ignored - node not Ready
{ObjectMeta: v1.ObjectMeta{Name: "node2"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}}}},
// node3 ignored - node out of disk
{ObjectMeta: v1.ObjectMeta{Name: "node3"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
// node4 considered
{ObjectMeta: v1.ObjectMeta{Name: "node4"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
// node5 ignored - node out of disk
{ObjectMeta: v1.ObjectMeta{Name: "node5"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
// node6 considered
{ObjectMeta: v1.ObjectMeta{Name: "node6"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionTrue}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
// node7 ignored - node out of disk, node not Ready
{ObjectMeta: v1.ObjectMeta{Name: "node7"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}}},
// node8 ignored - node not Ready
{ObjectMeta: v1.ObjectMeta{Name: "node8"}, Status: v1.NodeStatus{Conditions: []v1.NodeCondition{{Type: v1.NodeReady, Status: v1.ConditionFalse}, {Type: v1.NodeOutOfDisk, Status: v1.ConditionFalse}}}},
// node9 ignored - node unschedulable
{ObjectMeta: v1.ObjectMeta{Name: "node9"}, Spec: v1.NodeSpec{Unschedulable: true}},
// node10 considered
{ObjectMeta: v1.ObjectMeta{Name: "node10"}, Spec: v1.NodeSpec{Unschedulable: false}},
// node11 considered
{ObjectMeta: v1.ObjectMeta{Name: "node11"}},
},
}
nodeNames := []string{}
for _, node := range nodeList.Items {
if nodeFunc(&node) {
nodeNames = append(nodeNames, node.Name)
}
}
expectedNodes := []string{"node1", "node4", "node6", "node10", "node11"}
if !reflect.DeepEqual(expectedNodes, nodeNames) {
t.Errorf("expected: %v, got %v", expectedNodes, nodeNames)
}
}

View file

@ -0,0 +1,431 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package factory
import (
"fmt"
"regexp"
"sort"
"strings"
"sync"
"k8s.io/kubernetes/pkg/util/sets"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"github.com/golang/glog"
)
// PluginFactoryArgs are passed to all plugin factory functions.
type PluginFactoryArgs struct {
PodLister algorithm.PodLister
ServiceLister algorithm.ServiceLister
ControllerLister algorithm.ControllerLister
ReplicaSetLister algorithm.ReplicaSetLister
NodeLister algorithm.NodeLister
NodeInfo predicates.NodeInfo
PVInfo predicates.PersistentVolumeInfo
PVCInfo predicates.PersistentVolumeClaimInfo
HardPodAffinitySymmetricWeight int
FailureDomains []string
}
// MetadataProducerFactory produces MetadataProducer from the given args.
type MetadataProducerFactory func(PluginFactoryArgs) algorithm.MetadataProducer
// A FitPredicateFactory produces a FitPredicate from the given args.
type FitPredicateFactory func(PluginFactoryArgs) algorithm.FitPredicate
// DEPRECATED
// Use Map-Reduce pattern for priority functions.
// A PriorityFunctionFactory produces a PriorityConfig from the given args.
type PriorityFunctionFactory func(PluginFactoryArgs) algorithm.PriorityFunction
// A PriorityFunctionFactory produces map & reduce priority functions
// from a given args.
// FIXME: Rename to PriorityFunctionFactory.
type PriorityFunctionFactory2 func(PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction)
// A PriorityConfigFactory produces a PriorityConfig from the given function and weight
type PriorityConfigFactory struct {
Function PriorityFunctionFactory
MapReduceFunction PriorityFunctionFactory2
Weight int
}
var (
schedulerFactoryMutex sync.Mutex
// maps that hold registered algorithm types
fitPredicateMap = make(map[string]FitPredicateFactory)
priorityFunctionMap = make(map[string]PriorityConfigFactory)
algorithmProviderMap = make(map[string]AlgorithmProviderConfig)
// Registered metadata producers
priorityMetadataProducer MetadataProducerFactory
predicateMetadataProducer MetadataProducerFactory
// get equivalence pod function
getEquivalencePodFunc algorithm.GetEquivalencePodFunc = nil
)
const (
DefaultProvider = "DefaultProvider"
)
type AlgorithmProviderConfig struct {
FitPredicateKeys sets.String
PriorityFunctionKeys sets.String
}
// RegisterFitPredicate registers a fit predicate with the algorithm
// registry. Returns the name with which the predicate was registered.
func RegisterFitPredicate(name string, predicate algorithm.FitPredicate) string {
return RegisterFitPredicateFactory(name, func(PluginFactoryArgs) algorithm.FitPredicate { return predicate })
}
// RegisterFitPredicateFactory registers a fit predicate factory with the
// algorithm registry. Returns the name with which the predicate was registered.
func RegisterFitPredicateFactory(name string, predicateFactory FitPredicateFactory) string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(name)
fitPredicateMap[name] = predicateFactory
return name
}
// Registers a custom fit predicate with the algorithm registry.
// Returns the name, with which the predicate was registered.
func RegisterCustomFitPredicate(policy schedulerapi.PredicatePolicy) string {
var predicateFactory FitPredicateFactory
var ok bool
validatePredicateOrDie(policy)
// generate the predicate function, if a custom type is requested
if policy.Argument != nil {
if policy.Argument.ServiceAffinity != nil {
predicateFactory = func(args PluginFactoryArgs) algorithm.FitPredicate {
predicate, precomputationFunction := predicates.NewServiceAffinityPredicate(
args.PodLister,
args.ServiceLister,
args.NodeInfo,
policy.Argument.ServiceAffinity.Labels,
)
// Once we generate the predicate we should also Register the Precomputation
predicates.RegisterPredicatePrecomputation(policy.Name, precomputationFunction)
return predicate
}
} else if policy.Argument.LabelsPresence != nil {
predicateFactory = func(args PluginFactoryArgs) algorithm.FitPredicate {
return predicates.NewNodeLabelPredicate(
policy.Argument.LabelsPresence.Labels,
policy.Argument.LabelsPresence.Presence,
)
}
}
} else if predicateFactory, ok = fitPredicateMap[policy.Name]; ok {
// checking to see if a pre-defined predicate is requested
glog.V(2).Infof("Predicate type %s already registered, reusing.", policy.Name)
return policy.Name
}
if predicateFactory == nil {
glog.Fatalf("Invalid configuration: Predicate type not found for %s", policy.Name)
}
return RegisterFitPredicateFactory(policy.Name, predicateFactory)
}
// This check is useful for testing providers.
func IsFitPredicateRegistered(name string) bool {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
_, ok := fitPredicateMap[name]
return ok
}
func RegisterPriorityMetadataProducerFactory(factory MetadataProducerFactory) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
priorityMetadataProducer = factory
}
func RegisterPredicateMetadataProducerFactory(factory MetadataProducerFactory) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
predicateMetadataProducer = factory
}
// DEPRECATED
// Use Map-Reduce pattern for priority functions.
// Registers a priority function with the algorithm registry. Returns the name,
// with which the function was registered.
func RegisterPriorityFunction(name string, function algorithm.PriorityFunction, weight int) string {
return RegisterPriorityConfigFactory(name, PriorityConfigFactory{
Function: func(PluginFactoryArgs) algorithm.PriorityFunction {
return function
},
Weight: weight,
})
}
// Registers a priority function with the algorithm registry. Returns the name,
// with which the function was registered.
// FIXME: Rename to PriorityFunctionFactory.
func RegisterPriorityFunction2(
name string,
mapFunction algorithm.PriorityMapFunction,
reduceFunction algorithm.PriorityReduceFunction,
weight int) string {
return RegisterPriorityConfigFactory(name, PriorityConfigFactory{
MapReduceFunction: func(PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
return mapFunction, reduceFunction
},
Weight: weight,
})
}
func RegisterPriorityConfigFactory(name string, pcf PriorityConfigFactory) string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(name)
priorityFunctionMap[name] = pcf
return name
}
// Registers a custom priority function with the algorithm registry.
// Returns the name, with which the priority function was registered.
func RegisterCustomPriorityFunction(policy schedulerapi.PriorityPolicy) string {
var pcf *PriorityConfigFactory
validatePriorityOrDie(policy)
// generate the priority function, if a custom priority is requested
if policy.Argument != nil {
if policy.Argument.ServiceAntiAffinity != nil {
pcf = &PriorityConfigFactory{
Function: func(args PluginFactoryArgs) algorithm.PriorityFunction {
return priorities.NewServiceAntiAffinityPriority(
args.PodLister,
args.ServiceLister,
policy.Argument.ServiceAntiAffinity.Label,
)
},
Weight: policy.Weight,
}
} else if policy.Argument.LabelPreference != nil {
pcf = &PriorityConfigFactory{
MapReduceFunction: func(args PluginFactoryArgs) (algorithm.PriorityMapFunction, algorithm.PriorityReduceFunction) {
return priorities.NewNodeLabelPriority(
policy.Argument.LabelPreference.Label,
policy.Argument.LabelPreference.Presence,
)
},
Weight: policy.Weight,
}
}
} else if existing_pcf, ok := priorityFunctionMap[policy.Name]; ok {
glog.V(2).Infof("Priority type %s already registered, reusing.", policy.Name)
// set/update the weight based on the policy
pcf = &PriorityConfigFactory{
Function: existing_pcf.Function,
MapReduceFunction: existing_pcf.MapReduceFunction,
Weight: policy.Weight,
}
}
if pcf == nil {
glog.Fatalf("Invalid configuration: Priority type not found for %s", policy.Name)
}
return RegisterPriorityConfigFactory(policy.Name, *pcf)
}
func RegisterGetEquivalencePodFunction(equivalenceFunc algorithm.GetEquivalencePodFunc) {
getEquivalencePodFunc = equivalenceFunc
}
// This check is useful for testing providers.
func IsPriorityFunctionRegistered(name string) bool {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
_, ok := priorityFunctionMap[name]
return ok
}
// Registers a new algorithm provider with the algorithm registry. This should
// be called from the init function in a provider plugin.
func RegisterAlgorithmProvider(name string, predicateKeys, priorityKeys sets.String) string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
validateAlgorithmNameOrDie(name)
algorithmProviderMap[name] = AlgorithmProviderConfig{
FitPredicateKeys: predicateKeys,
PriorityFunctionKeys: priorityKeys,
}
return name
}
// This function should not be used to modify providers. It is publicly visible for testing.
func GetAlgorithmProvider(name string) (*AlgorithmProviderConfig, error) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
var provider AlgorithmProviderConfig
provider, ok := algorithmProviderMap[name]
if !ok {
return nil, fmt.Errorf("plugin %q has not been registered", name)
}
return &provider, nil
}
func getFitPredicateFunctions(names sets.String, args PluginFactoryArgs) (map[string]algorithm.FitPredicate, error) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
predicates := map[string]algorithm.FitPredicate{}
for _, name := range names.List() {
factory, ok := fitPredicateMap[name]
if !ok {
return nil, fmt.Errorf("Invalid predicate name %q specified - no corresponding function found", name)
}
predicates[name] = factory(args)
}
return predicates, nil
}
func getPriorityMetadataProducer(args PluginFactoryArgs) (algorithm.MetadataProducer, error) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
if priorityMetadataProducer == nil {
return algorithm.EmptyMetadataProducer, nil
}
return priorityMetadataProducer(args), nil
}
func getPredicateMetadataProducer(args PluginFactoryArgs) (algorithm.MetadataProducer, error) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
if predicateMetadataProducer == nil {
return algorithm.EmptyMetadataProducer, nil
}
return predicateMetadataProducer(args), nil
}
func getPriorityFunctionConfigs(names sets.String, args PluginFactoryArgs) ([]algorithm.PriorityConfig, error) {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
configs := []algorithm.PriorityConfig{}
for _, name := range names.List() {
factory, ok := priorityFunctionMap[name]
if !ok {
return nil, fmt.Errorf("Invalid priority name %s specified - no corresponding function found", name)
}
if factory.Function != nil {
configs = append(configs, algorithm.PriorityConfig{
Function: factory.Function(args),
Weight: factory.Weight,
})
} else {
mapFunction, reduceFunction := factory.MapReduceFunction(args)
configs = append(configs, algorithm.PriorityConfig{
Map: mapFunction,
Reduce: reduceFunction,
Weight: factory.Weight,
})
}
}
return configs, nil
}
var validName = regexp.MustCompile("^[a-zA-Z0-9]([-a-zA-Z0-9]*[a-zA-Z0-9])$")
func validateAlgorithmNameOrDie(name string) {
if !validName.MatchString(name) {
glog.Fatalf("Algorithm name %v does not match the name validation regexp \"%v\".", name, validName)
}
}
func validatePredicateOrDie(predicate schedulerapi.PredicatePolicy) {
if predicate.Argument != nil {
numArgs := 0
if predicate.Argument.ServiceAffinity != nil {
numArgs++
}
if predicate.Argument.LabelsPresence != nil {
numArgs++
}
if numArgs != 1 {
glog.Fatalf("Exactly 1 predicate argument is required, numArgs: %v, Predicate: %s", numArgs, predicate.Name)
}
}
}
func validatePriorityOrDie(priority schedulerapi.PriorityPolicy) {
if priority.Argument != nil {
numArgs := 0
if priority.Argument.ServiceAntiAffinity != nil {
numArgs++
}
if priority.Argument.LabelPreference != nil {
numArgs++
}
if numArgs != 1 {
glog.Fatalf("Exactly 1 priority argument is required, numArgs: %v, Priority: %s", numArgs, priority.Name)
}
}
}
func ListRegisteredFitPredicates() []string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
names := []string{}
for name := range fitPredicateMap {
names = append(names, name)
}
return names
}
func ListRegisteredPriorityFunctions() []string {
schedulerFactoryMutex.Lock()
defer schedulerFactoryMutex.Unlock()
names := []string{}
for name := range priorityFunctionMap {
names = append(names, name)
}
return names
}
// ListAlgorithmProviders is called when listing all available algorithm providers in `kube-scheduler --help`
func ListAlgorithmProviders() string {
var availableAlgorithmProviders []string
for name := range algorithmProviderMap {
availableAlgorithmProviders = append(availableAlgorithmProviders, name)
}
sort.Strings(availableAlgorithmProviders)
return strings.Join(availableAlgorithmProviders, " | ")
}

View file

@ -0,0 +1,41 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package factory
import "testing"
func TestAlgorithmNameValidation(t *testing.T) {
algorithmNamesShouldValidate := []string{
"1SomeAlgo1rithm",
"someAlgor-ithm1",
}
algorithmNamesShouldNotValidate := []string{
"-SomeAlgorithm",
"SomeAlgorithm-",
"Some,Alg:orithm",
}
for _, name := range algorithmNamesShouldValidate {
if !validName.MatchString(name) {
t.Errorf("%v should be a valid algorithm name but is not valid.", name)
}
}
for _, name := range algorithmNamesShouldNotValidate {
if validName.MatchString(name) {
t.Errorf("%v should be an invalid algorithm name but is valid.", name)
}
}
}

View file

@ -0,0 +1,411 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"bytes"
"fmt"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/util"
"k8s.io/kubernetes/pkg/util/errors"
"k8s.io/kubernetes/pkg/util/workqueue"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
schedulerapi "k8s.io/kubernetes/plugin/pkg/scheduler/api"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
type FailedPredicateMap map[string][]algorithm.PredicateFailureReason
type FitError struct {
Pod *v1.Pod
FailedPredicates FailedPredicateMap
}
var ErrNoNodesAvailable = fmt.Errorf("no nodes available to schedule pods")
// Error returns detailed information of why the pod failed to fit on each node
func (f *FitError) Error() string {
var buf bytes.Buffer
buf.WriteString(fmt.Sprintf("pod (%s) failed to fit in any node\n", f.Pod.Name))
reasons := make(map[string]int)
for _, predicates := range f.FailedPredicates {
for _, pred := range predicates {
reasons[pred.GetReason()] += 1
}
}
sortReasonsHistogram := func() []string {
reasonStrings := []string{}
for k, v := range reasons {
reasonStrings = append(reasonStrings, fmt.Sprintf("%v (%v)", k, v))
}
sort.Strings(reasonStrings)
return reasonStrings
}
reasonMsg := fmt.Sprintf("fit failure summary on nodes : %v", strings.Join(sortReasonsHistogram(), ", "))
buf.WriteString(reasonMsg)
return buf.String()
}
type genericScheduler struct {
cache schedulercache.Cache
predicates map[string]algorithm.FitPredicate
priorityMetaProducer algorithm.MetadataProducer
predicateMetaProducer algorithm.MetadataProducer
prioritizers []algorithm.PriorityConfig
extenders []algorithm.SchedulerExtender
pods algorithm.PodLister
lastNodeIndexLock sync.Mutex
lastNodeIndex uint64
cachedNodeInfoMap map[string]*schedulercache.NodeInfo
equivalenceCache *EquivalenceCache
}
// Schedule tries to schedule the given pod to one of node in the node list.
// If it succeeds, it will return the name of the node.
// If it fails, it will return a Fiterror error with reasons.
func (g *genericScheduler) Schedule(pod *v1.Pod, nodeLister algorithm.NodeLister) (string, error) {
var trace *util.Trace
if pod != nil {
trace = util.NewTrace(fmt.Sprintf("Scheduling %s/%s", pod.Namespace, pod.Name))
} else {
trace = util.NewTrace("Scheduling <nil> pod")
}
defer trace.LogIfLong(100 * time.Millisecond)
nodes, err := nodeLister.List()
if err != nil {
return "", err
}
if len(nodes) == 0 {
return "", ErrNoNodesAvailable
}
// Used for all fit and priority funcs.
err = g.cache.UpdateNodeNameToInfoMap(g.cachedNodeInfoMap)
if err != nil {
return "", err
}
// TODO(harryz) Check if equivalenceCache is enabled and call scheduleWithEquivalenceClass here
trace.Step("Computing predicates")
filteredNodes, failedPredicateMap, err := findNodesThatFit(pod, g.cachedNodeInfoMap, nodes, g.predicates, g.extenders, g.predicateMetaProducer)
if err != nil {
return "", err
}
if len(filteredNodes) == 0 {
return "", &FitError{
Pod: pod,
FailedPredicates: failedPredicateMap,
}
}
trace.Step("Prioritizing")
metaPrioritiesInterface := g.priorityMetaProducer(pod, g.cachedNodeInfoMap)
priorityList, err := PrioritizeNodes(pod, g.cachedNodeInfoMap, metaPrioritiesInterface, g.prioritizers, filteredNodes, g.extenders)
if err != nil {
return "", err
}
trace.Step("Selecting host")
return g.selectHost(priorityList)
}
// selectHost takes a prioritized list of nodes and then picks one
// in a round-robin manner from the nodes that had the highest score.
func (g *genericScheduler) selectHost(priorityList schedulerapi.HostPriorityList) (string, error) {
if len(priorityList) == 0 {
return "", fmt.Errorf("empty priorityList")
}
sort.Sort(sort.Reverse(priorityList))
maxScore := priorityList[0].Score
firstAfterMaxScore := sort.Search(len(priorityList), func(i int) bool { return priorityList[i].Score < maxScore })
g.lastNodeIndexLock.Lock()
ix := int(g.lastNodeIndex % uint64(firstAfterMaxScore))
g.lastNodeIndex++
g.lastNodeIndexLock.Unlock()
return priorityList[ix].Host, nil
}
// Filters the nodes to find the ones that fit based on the given predicate functions
// Each node is passed through the predicate functions to determine if it is a fit
func findNodesThatFit(
pod *v1.Pod,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
nodes []*v1.Node,
predicateFuncs map[string]algorithm.FitPredicate,
extenders []algorithm.SchedulerExtender,
metadataProducer algorithm.MetadataProducer,
) ([]*v1.Node, FailedPredicateMap, error) {
var filtered []*v1.Node
failedPredicateMap := FailedPredicateMap{}
if len(predicateFuncs) == 0 {
filtered = nodes
} else {
// Create filtered list with enough space to avoid growing it
// and allow assigning.
filtered = make([]*v1.Node, len(nodes))
errs := []error{}
var predicateResultLock sync.Mutex
var filteredLen int32
// We can use the same metadata producer for all nodes.
meta := metadataProducer(pod, nodeNameToInfo)
checkNode := func(i int) {
nodeName := nodes[i].Name
fits, failedPredicates, err := podFitsOnNode(pod, meta, nodeNameToInfo[nodeName], predicateFuncs)
if err != nil {
predicateResultLock.Lock()
errs = append(errs, err)
predicateResultLock.Unlock()
return
}
if fits {
filtered[atomic.AddInt32(&filteredLen, 1)-1] = nodes[i]
} else {
predicateResultLock.Lock()
failedPredicateMap[nodeName] = failedPredicates
predicateResultLock.Unlock()
}
}
workqueue.Parallelize(16, len(nodes), checkNode)
filtered = filtered[:filteredLen]
if len(errs) > 0 {
return []*v1.Node{}, FailedPredicateMap{}, errors.NewAggregate(errs)
}
}
if len(filtered) > 0 && len(extenders) != 0 {
for _, extender := range extenders {
filteredList, failedMap, err := extender.Filter(pod, filtered)
if err != nil {
return []*v1.Node{}, FailedPredicateMap{}, err
}
for failedNodeName, failedMsg := range failedMap {
if _, found := failedPredicateMap[failedNodeName]; !found {
failedPredicateMap[failedNodeName] = []algorithm.PredicateFailureReason{}
}
failedPredicateMap[failedNodeName] = append(failedPredicateMap[failedNodeName], predicates.NewFailureReason(failedMsg))
}
filtered = filteredList
if len(filtered) == 0 {
break
}
}
}
return filtered, failedPredicateMap, nil
}
// Checks whether node with a given name and NodeInfo satisfies all predicateFuncs.
func podFitsOnNode(pod *v1.Pod, meta interface{}, info *schedulercache.NodeInfo, predicateFuncs map[string]algorithm.FitPredicate) (bool, []algorithm.PredicateFailureReason, error) {
var failedPredicates []algorithm.PredicateFailureReason
for _, predicate := range predicateFuncs {
fit, reasons, err := predicate(pod, meta, info)
if err != nil {
err := fmt.Errorf("SchedulerPredicates failed due to %v, which is unexpected.", err)
return false, []algorithm.PredicateFailureReason{}, err
}
if !fit {
failedPredicates = append(failedPredicates, reasons...)
}
}
return len(failedPredicates) == 0, failedPredicates, nil
}
// Prioritizes the nodes by running the individual priority functions in parallel.
// Each priority function is expected to set a score of 0-10
// 0 is the lowest priority score (least preferred node) and 10 is the highest
// Each priority function can also have its own weight
// The node scores returned by the priority function are multiplied by the weights to get weighted scores
// All scores are finally combined (added) to get the total weighted scores of all nodes
func PrioritizeNodes(
pod *v1.Pod,
nodeNameToInfo map[string]*schedulercache.NodeInfo,
meta interface{},
priorityConfigs []algorithm.PriorityConfig,
nodes []*v1.Node,
extenders []algorithm.SchedulerExtender,
) (schedulerapi.HostPriorityList, error) {
// If no priority configs are provided, then the EqualPriority function is applied
// This is required to generate the priority list in the required format
if len(priorityConfigs) == 0 && len(extenders) == 0 {
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
for i := range nodes {
hostPriority, err := EqualPriorityMap(pod, meta, nodeNameToInfo[nodes[i].Name])
if err != nil {
return nil, err
}
result = append(result, hostPriority)
}
return result, nil
}
var (
mu = sync.Mutex{}
wg = sync.WaitGroup{}
errs []error
)
appendError := func(err error) {
mu.Lock()
defer mu.Unlock()
errs = append(errs, err)
}
results := make([]schedulerapi.HostPriorityList, 0, len(priorityConfigs))
for range priorityConfigs {
results = append(results, nil)
}
for i, priorityConfig := range priorityConfigs {
if priorityConfig.Function != nil {
// DEPRECATED
wg.Add(1)
go func(index int, config algorithm.PriorityConfig) {
defer wg.Done()
var err error
results[index], err = config.Function(pod, nodeNameToInfo, nodes)
if err != nil {
appendError(err)
}
}(i, priorityConfig)
} else {
results[i] = make(schedulerapi.HostPriorityList, len(nodes))
}
}
processNode := func(index int) {
nodeInfo := nodeNameToInfo[nodes[index].Name]
var err error
for i := range priorityConfigs {
if priorityConfigs[i].Function != nil {
continue
}
results[i][index], err = priorityConfigs[i].Map(pod, meta, nodeInfo)
if err != nil {
appendError(err)
return
}
}
}
workqueue.Parallelize(16, len(nodes), processNode)
for i, priorityConfig := range priorityConfigs {
if priorityConfig.Reduce == nil {
continue
}
wg.Add(1)
go func(index int, config algorithm.PriorityConfig) {
defer wg.Done()
if err := config.Reduce(pod, meta, nodeNameToInfo, results[index]); err != nil {
appendError(err)
}
}(i, priorityConfig)
}
// Wait for all computations to be finished.
wg.Wait()
if len(errs) != 0 {
return schedulerapi.HostPriorityList{}, errors.NewAggregate(errs)
}
// Summarize all scores.
result := make(schedulerapi.HostPriorityList, 0, len(nodes))
// TODO: Consider parallelizing it.
for i := range nodes {
result = append(result, schedulerapi.HostPriority{Host: nodes[i].Name, Score: 0})
for j := range priorityConfigs {
result[i].Score += results[j][i].Score * priorityConfigs[j].Weight
}
}
if len(extenders) != 0 && nodes != nil {
combinedScores := make(map[string]int, len(nodeNameToInfo))
for _, extender := range extenders {
wg.Add(1)
go func(ext algorithm.SchedulerExtender) {
defer wg.Done()
prioritizedList, weight, err := ext.Prioritize(pod, nodes)
if err != nil {
// Prioritization errors from extender can be ignored, let k8s/other extenders determine the priorities
return
}
mu.Lock()
for i := range *prioritizedList {
host, score := (*prioritizedList)[i].Host, (*prioritizedList)[i].Score
combinedScores[host] += score * weight
}
mu.Unlock()
}(extender)
}
// wait for all go routines to finish
wg.Wait()
for i := range result {
result[i].Score += combinedScores[result[i].Host]
}
}
if glog.V(10) {
for i := range result {
glog.V(10).Infof("Host %s => Score %d", result[i].Host, result[i].Score)
}
}
return result, nil
}
// EqualPriority is a prioritizer function that gives an equal weight of one to all nodes
func EqualPriorityMap(_ *v1.Pod, _ interface{}, nodeInfo *schedulercache.NodeInfo) (schedulerapi.HostPriority, error) {
node := nodeInfo.Node()
if node == nil {
return schedulerapi.HostPriority{}, fmt.Errorf("node not found")
}
return schedulerapi.HostPriority{
Host: node.Name,
Score: 1,
}, nil
}
func NewGenericScheduler(
cache schedulercache.Cache,
predicates map[string]algorithm.FitPredicate,
predicateMetaProducer algorithm.MetadataProducer,
prioritizers []algorithm.PriorityConfig,
priorityMetaProducer algorithm.MetadataProducer,
extenders []algorithm.SchedulerExtender) algorithm.ScheduleAlgorithm {
return &genericScheduler{
cache: cache,
predicates: predicates,
predicateMetaProducer: predicateMetaProducer,
prioritizers: prioritizers,
priorityMetaProducer: priorityMetaProducer,
extenders: extenders,
cachedNodeInfoMap: make(map[string]*schedulercache.NodeInfo),
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,18 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = ["metrics.go"],
tags = ["automanaged"],
deps = ["//vendor:github.com/prometheus/client_golang/prometheus"],
)

View file

@ -0,0 +1,72 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
)
const schedulerSubsystem = "scheduler"
var BindingSaturationReportInterval = 1 * time.Second
var (
E2eSchedulingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Name: "e2e_scheduling_latency_microseconds",
Help: "E2e scheduling latency (scheduling algorithm + binding)",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
SchedulingAlgorithmLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Name: "scheduling_algorithm_latency_microseconds",
Help: "Scheduling algorithm latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
BindingLatency = prometheus.NewHistogram(
prometheus.HistogramOpts{
Subsystem: schedulerSubsystem,
Name: "binding_latency_microseconds",
Help: "Binding latency",
Buckets: prometheus.ExponentialBuckets(1000, 2, 15),
},
)
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
prometheus.MustRegister(E2eSchedulingLatency)
prometheus.MustRegister(SchedulingAlgorithmLatency)
prometheus.MustRegister(BindingLatency)
})
}
// Gets the time since the specified start in microseconds.
func SinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds())
}

View file

@ -0,0 +1,158 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"time"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/client/record"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/metrics"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
"github.com/golang/glog"
)
// Binder knows how to write a binding.
type Binder interface {
Bind(binding *v1.Binding) error
}
type PodConditionUpdater interface {
Update(pod *v1.Pod, podCondition *v1.PodCondition) error
}
// Scheduler watches for new unscheduled pods. It attempts to find
// nodes that they fit on and writes bindings back to the api server.
type Scheduler struct {
config *Config
}
type Config struct {
// It is expected that changes made via SchedulerCache will be observed
// by NodeLister and Algorithm.
SchedulerCache schedulercache.Cache
NodeLister algorithm.NodeLister
Algorithm algorithm.ScheduleAlgorithm
Binder Binder
// PodConditionUpdater is used only in case of scheduling errors. If we succeed
// with scheduling, PodScheduled condition will be updated in apiserver in /bind
// handler so that binding and setting PodCondition it is atomic.
PodConditionUpdater PodConditionUpdater
// NextPod should be a function that blocks until the next pod
// is available. We don't use a channel for this, because scheduling
// a pod may take some amount of time and we don't want pods to get
// stale while they sit in a channel.
NextPod func() *v1.Pod
// Error is called if there is an error. It is passed the pod in
// question, and the error
Error func(*v1.Pod, error)
// Recorder is the EventRecorder to use
Recorder record.EventRecorder
// Close this to shut down the scheduler.
StopEverything chan struct{}
}
// New returns a new scheduler.
func New(c *Config) *Scheduler {
s := &Scheduler{
config: c,
}
metrics.Register()
return s
}
// Run begins watching and scheduling. It starts a goroutine and returns immediately.
func (s *Scheduler) Run() {
go wait.Until(s.scheduleOne, 0, s.config.StopEverything)
}
func (s *Scheduler) scheduleOne() {
pod := s.config.NextPod()
glog.V(3).Infof("Attempting to schedule pod: %v/%v", pod.Namespace, pod.Name)
start := time.Now()
dest, err := s.config.Algorithm.Schedule(pod, s.config.NodeLister)
if err != nil {
glog.V(1).Infof("Failed to schedule pod: %v/%v", pod.Namespace, pod.Name)
s.config.Error(pod, err)
s.config.Recorder.Eventf(pod, v1.EventTypeWarning, "FailedScheduling", "%v", err)
s.config.PodConditionUpdater.Update(pod, &v1.PodCondition{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: v1.PodReasonUnschedulable,
})
return
}
metrics.SchedulingAlgorithmLatency.Observe(metrics.SinceInMicroseconds(start))
// Optimistically assume that the binding will succeed and send it to apiserver
// in the background.
// If the binding fails, scheduler will release resources allocated to assumed pod
// immediately.
assumed := *pod
assumed.Spec.NodeName = dest
if err := s.config.SchedulerCache.AssumePod(&assumed); err != nil {
glog.Errorf("scheduler cache AssumePod failed: %v", err)
// TODO: This means that a given pod is already in cache (which means it
// is either assumed or already added). This is most probably result of a
// BUG in retrying logic. As a temporary workaround (which doesn't fully
// fix the problem, but should reduce its impact), we simply return here,
// as binding doesn't make sense anyway.
// This should be fixed properly though.
return
}
go func() {
defer metrics.E2eSchedulingLatency.Observe(metrics.SinceInMicroseconds(start))
b := &v1.Binding{
ObjectMeta: v1.ObjectMeta{Namespace: pod.Namespace, Name: pod.Name},
Target: v1.ObjectReference{
Kind: "Node",
Name: dest,
},
}
bindingStart := time.Now()
// If binding succeeded then PodScheduled condition will be updated in apiserver so that
// it's atomic with setting host.
err := s.config.Binder.Bind(b)
if err != nil {
glog.V(1).Infof("Failed to bind pod: %v/%v", pod.Namespace, pod.Name)
if err := s.config.SchedulerCache.ForgetPod(&assumed); err != nil {
glog.Errorf("scheduler cache ForgetPod failed: %v", err)
}
s.config.Error(pod, err)
s.config.Recorder.Eventf(pod, v1.EventTypeNormal, "FailedScheduling", "Binding rejected: %v", err)
s.config.PodConditionUpdater.Update(pod, &v1.PodCondition{
Type: v1.PodScheduled,
Status: v1.ConditionFalse,
Reason: "BindingRejected",
})
return
}
metrics.BindingLatency.Observe(metrics.SinceInMicroseconds(bindingStart))
s.config.Recorder.Eventf(pod, v1.EventTypeNormal, "Scheduled", "Successfully assigned %v to %v", pod.Name, dest)
}()
}

View file

@ -0,0 +1,431 @@
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package scheduler
import (
"errors"
"fmt"
"reflect"
"testing"
"time"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/testapi"
"k8s.io/kubernetes/pkg/api/v1"
clientcache "k8s.io/kubernetes/pkg/client/cache"
"k8s.io/kubernetes/pkg/client/record"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/pkg/util/diff"
"k8s.io/kubernetes/pkg/util/wait"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
schedulertesting "k8s.io/kubernetes/plugin/pkg/scheduler/testing"
)
type fakeBinder struct {
b func(binding *v1.Binding) error
}
func (fb fakeBinder) Bind(binding *v1.Binding) error { return fb.b(binding) }
type fakePodConditionUpdater struct{}
func (fc fakePodConditionUpdater) Update(pod *v1.Pod, podCondition *v1.PodCondition) error {
return nil
}
func podWithID(id, desiredHost string) *v1.Pod {
return &v1.Pod{
ObjectMeta: v1.ObjectMeta{Name: id, SelfLink: testapi.Default.SelfLink("pods", id)},
Spec: v1.PodSpec{
NodeName: desiredHost,
},
}
}
func podWithPort(id, desiredHost string, port int) *v1.Pod {
pod := podWithID(id, desiredHost)
pod.Spec.Containers = []v1.Container{
{Name: "ctr", Ports: []v1.ContainerPort{{HostPort: int32(port)}}},
}
return pod
}
func podWithResources(id, desiredHost string, limits v1.ResourceList, requests v1.ResourceList) *v1.Pod {
pod := podWithID(id, desiredHost)
pod.Spec.Containers = []v1.Container{
{Name: "ctr", Resources: v1.ResourceRequirements{Limits: limits, Requests: requests}},
}
return pod
}
type mockScheduler struct {
machine string
err error
}
func (es mockScheduler) Schedule(pod *v1.Pod, ml algorithm.NodeLister) (string, error) {
return es.machine, es.err
}
func TestScheduler(t *testing.T) {
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(t.Logf).Stop()
errS := errors.New("scheduler")
errB := errors.New("binder")
testNode := v1.Node{ObjectMeta: v1.ObjectMeta{Name: "machine1"}}
table := []struct {
injectBindError error
sendPod *v1.Pod
algo algorithm.ScheduleAlgorithm
expectErrorPod *v1.Pod
expectAssumedPod *v1.Pod
expectError error
expectBind *v1.Binding
eventReason string
}{
{
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil},
expectBind: &v1.Binding{ObjectMeta: v1.ObjectMeta{Name: "foo"}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectAssumedPod: podWithID("foo", testNode.Name),
eventReason: "Scheduled",
}, {
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, errS},
expectError: errS,
expectErrorPod: podWithID("foo", ""),
eventReason: "FailedScheduling",
}, {
sendPod: podWithID("foo", ""),
algo: mockScheduler{testNode.Name, nil},
expectBind: &v1.Binding{ObjectMeta: v1.ObjectMeta{Name: "foo"}, Target: v1.ObjectReference{Kind: "Node", Name: testNode.Name}},
expectAssumedPod: podWithID("foo", testNode.Name),
injectBindError: errB,
expectError: errB,
expectErrorPod: podWithID("foo", ""),
eventReason: "FailedScheduling",
},
}
for i, item := range table {
var gotError error
var gotPod *v1.Pod
var gotAssumedPod *v1.Pod
var gotBinding *v1.Binding
c := &Config{
SchedulerCache: &schedulertesting.FakeCache{
AssumeFunc: func(pod *v1.Pod) {
gotAssumedPod = pod
},
},
NodeLister: algorithm.FakeNodeLister(
[]*v1.Node{&testNode},
),
Algorithm: item.algo,
Binder: fakeBinder{func(b *v1.Binding) error {
gotBinding = b
return item.injectBindError
}},
PodConditionUpdater: fakePodConditionUpdater{},
Error: func(p *v1.Pod, err error) {
gotPod = p
gotError = err
},
NextPod: func() *v1.Pod {
return item.sendPod
},
Recorder: eventBroadcaster.NewRecorder(v1.EventSource{Component: "scheduler"}),
}
s := New(c)
called := make(chan struct{})
events := eventBroadcaster.StartEventWatcher(func(e *v1.Event) {
if e, a := item.eventReason, e.Reason; e != a {
t.Errorf("%v: expected %v, got %v", i, e, a)
}
close(called)
})
s.scheduleOne()
<-called
if e, a := item.expectAssumedPod, gotAssumedPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: assumed pod: wanted %v, got %v", i, e, a)
}
if e, a := item.expectErrorPod, gotPod; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error pod: wanted %v, got %v", i, e, a)
}
if e, a := item.expectError, gotError; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error: wanted %v, got %v", i, e, a)
}
if e, a := item.expectBind, gotBinding; !reflect.DeepEqual(e, a) {
t.Errorf("%v: error: %s", i, diff.ObjectDiff(e, a))
}
events.Stop()
}
}
func TestSchedulerNoPhantomPodAfterExpire(t *testing.T) {
stop := make(chan struct{})
defer close(stop)
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(100*time.Millisecond, stop)
pod := podWithPort("pod.Name", "", 8080)
node := v1.Node{ObjectMeta: v1.ObjectMeta{Name: "machine1"}}
scache.AddNode(&node)
nodeLister := algorithm.FakeNodeLister([]*v1.Node{&node})
predicateMap := map[string]algorithm.FitPredicate{"PodFitsHostPorts": predicates.PodFitsHostPorts}
scheduler, bindingChan, _ := setupTestSchedulerWithOnePodOnNode(t, queuedPodStore, scache, nodeLister, predicateMap, pod, &node)
waitPodExpireChan := make(chan struct{})
timeout := make(chan struct{})
go func() {
for {
select {
case <-timeout:
return
default:
}
pods, err := scache.List(labels.Everything())
if err != nil {
t.Fatalf("cache.List failed: %v", err)
}
if len(pods) == 0 {
close(waitPodExpireChan)
return
}
time.Sleep(100 * time.Millisecond)
}
}()
// waiting for the assumed pod to expire
select {
case <-waitPodExpireChan:
case <-time.After(wait.ForeverTestTimeout):
close(timeout)
t.Fatalf("timeout after %v", wait.ForeverTestTimeout)
}
// We use conflicted pod ports to incur fit predicate failure if first pod not removed.
secondPod := podWithPort("bar", "", 8080)
queuedPodStore.Add(secondPod)
scheduler.scheduleOne()
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: v1.ObjectMeta{Name: "bar"},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
t.Errorf("binding want=%v, get=%v", expectBinding, b)
}
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timeout after %v", wait.ForeverTestTimeout)
}
}
func TestSchedulerNoPhantomPodAfterDelete(t *testing.T) {
stop := make(chan struct{})
defer close(stop)
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(10*time.Minute, stop)
firstPod := podWithPort("pod.Name", "", 8080)
node := v1.Node{ObjectMeta: v1.ObjectMeta{Name: "machine1"}}
scache.AddNode(&node)
nodeLister := algorithm.FakeNodeLister([]*v1.Node{&node})
predicateMap := map[string]algorithm.FitPredicate{"PodFitsHostPorts": predicates.PodFitsHostPorts}
scheduler, bindingChan, errChan := setupTestSchedulerWithOnePodOnNode(t, queuedPodStore, scache, nodeLister, predicateMap, firstPod, &node)
// We use conflicted pod ports to incur fit predicate failure.
secondPod := podWithPort("bar", "", 8080)
queuedPodStore.Add(secondPod)
// queuedPodStore: [bar:8080]
// cache: [(assumed)foo:8080]
scheduler.scheduleOne()
select {
case err := <-errChan:
expectErr := &FitError{
Pod: secondPod,
FailedPredicates: FailedPredicateMap{node.Name: []algorithm.PredicateFailureReason{predicates.ErrPodNotFitsHostPorts}},
}
if !reflect.DeepEqual(expectErr, err) {
t.Errorf("err want=%v, get=%v", expectErr, err)
}
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timeout after %v", wait.ForeverTestTimeout)
}
// We mimic the workflow of cache behavior when a pod is removed by user.
// Note: if the schedulercache timeout would be super short, the first pod would expire
// and would be removed itself (without any explicit actions on schedulercache). Even in that case,
// explicitly AddPod will as well correct the behavior.
firstPod.Spec.NodeName = node.Name
if err := scache.AddPod(firstPod); err != nil {
t.Fatalf("err: %v", err)
}
if err := scache.RemovePod(firstPod); err != nil {
t.Fatalf("err: %v", err)
}
queuedPodStore.Add(secondPod)
scheduler.scheduleOne()
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: v1.ObjectMeta{Name: "bar"},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
t.Errorf("binding want=%v, get=%v", expectBinding, b)
}
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timeout after %v", wait.ForeverTestTimeout)
}
}
// queuedPodStore: pods queued before processing.
// cache: scheduler cache that might contain assumed pods.
func setupTestSchedulerWithOnePodOnNode(t *testing.T, queuedPodStore *clientcache.FIFO, scache schedulercache.Cache,
nodeLister algorithm.FakeNodeLister, predicateMap map[string]algorithm.FitPredicate, pod *v1.Pod, node *v1.Node) (*Scheduler, chan *v1.Binding, chan error) {
scheduler, bindingChan, errChan := setupTestScheduler(queuedPodStore, scache, nodeLister, predicateMap)
queuedPodStore.Add(pod)
// queuedPodStore: [foo:8080]
// cache: []
scheduler.scheduleOne()
// queuedPodStore: []
// cache: [(assumed)foo:8080]
select {
case b := <-bindingChan:
expectBinding := &v1.Binding{
ObjectMeta: v1.ObjectMeta{Name: pod.Name},
Target: v1.ObjectReference{Kind: "Node", Name: node.Name},
}
if !reflect.DeepEqual(expectBinding, b) {
t.Errorf("binding want=%v, get=%v", expectBinding, b)
}
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timeout after %v", wait.ForeverTestTimeout)
}
return scheduler, bindingChan, errChan
}
func TestSchedulerFailedSchedulingReasons(t *testing.T) {
stop := make(chan struct{})
defer close(stop)
queuedPodStore := clientcache.NewFIFO(clientcache.MetaNamespaceKeyFunc)
scache := schedulercache.New(10*time.Minute, stop)
// Design the baseline for the pods, and we will make nodes that dont fit it later.
var cpu = int64(4)
var mem = int64(500)
podWithTooBigResourceRequests := podWithResources("bar", "", v1.ResourceList{
v1.ResourceCPU: *(resource.NewQuantity(cpu, resource.DecimalSI)),
v1.ResourceMemory: *(resource.NewQuantity(mem, resource.DecimalSI)),
}, v1.ResourceList{
v1.ResourceCPU: *(resource.NewQuantity(cpu, resource.DecimalSI)),
v1.ResourceMemory: *(resource.NewQuantity(mem, resource.DecimalSI)),
})
// create several nodes which cannot schedule the above pod
nodes := []*v1.Node{}
for i := 0; i < 100; i++ {
node := v1.Node{
ObjectMeta: v1.ObjectMeta{Name: fmt.Sprintf("machine%v", i)},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *(resource.NewQuantity(cpu/2, resource.DecimalSI)),
v1.ResourceMemory: *(resource.NewQuantity(mem/5, resource.DecimalSI)),
v1.ResourcePods: *(resource.NewQuantity(10, resource.DecimalSI)),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *(resource.NewQuantity(cpu/2, resource.DecimalSI)),
v1.ResourceMemory: *(resource.NewQuantity(mem/5, resource.DecimalSI)),
v1.ResourcePods: *(resource.NewQuantity(10, resource.DecimalSI)),
}},
}
scache.AddNode(&node)
nodes = append(nodes, &node)
}
nodeLister := algorithm.FakeNodeLister(nodes)
predicateMap := map[string]algorithm.FitPredicate{
"PodFitsResources": predicates.PodFitsResources,
}
// Create expected failure reasons for all the nodes. Hopefully they will get rolled up into a non-spammy summary.
failedPredicatesMap := FailedPredicateMap{}
for _, node := range nodes {
failedPredicatesMap[node.Name] = []algorithm.PredicateFailureReason{
predicates.NewInsufficientResourceError(v1.ResourceCPU, 4000, 0, 2000),
predicates.NewInsufficientResourceError(v1.ResourceMemory, 500, 0, 100),
}
}
scheduler, _, errChan := setupTestScheduler(queuedPodStore, scache, nodeLister, predicateMap)
queuedPodStore.Add(podWithTooBigResourceRequests)
scheduler.scheduleOne()
select {
case err := <-errChan:
expectErr := &FitError{
Pod: podWithTooBigResourceRequests,
FailedPredicates: failedPredicatesMap,
}
if len(fmt.Sprint(expectErr)) > 150 {
t.Errorf("message is too spammy ! %v ", len(fmt.Sprint(expectErr)))
}
if !reflect.DeepEqual(expectErr, err) {
t.Errorf("\n err \nWANT=%+v,\nGOT=%+v", expectErr, err)
}
case <-time.After(wait.ForeverTestTimeout):
t.Fatalf("timeout after %v", wait.ForeverTestTimeout)
}
}
// queuedPodStore: pods queued before processing.
// scache: scheduler cache that might contain assumed pods.
func setupTestScheduler(queuedPodStore *clientcache.FIFO, scache schedulercache.Cache, nodeLister algorithm.FakeNodeLister, predicateMap map[string]algorithm.FitPredicate) (*Scheduler, chan *v1.Binding, chan error) {
algo := NewGenericScheduler(
scache,
predicateMap,
algorithm.EmptyMetadataProducer,
[]algorithm.PriorityConfig{},
algorithm.EmptyMetadataProducer,
[]algorithm.SchedulerExtender{})
bindingChan := make(chan *v1.Binding, 1)
errChan := make(chan error, 1)
cfg := &Config{
SchedulerCache: scache,
NodeLister: nodeLister,
Algorithm: algo,
Binder: fakeBinder{func(b *v1.Binding) error {
bindingChan <- b
return nil
}},
NextPod: func() *v1.Pod {
return clientcache.Pop(queuedPodStore).(*v1.Pod)
},
Error: func(p *v1.Pod, err error) {
errChan <- err
},
Recorder: &record.FakeRecorder{},
PodConditionUpdater: fakePodConditionUpdater{},
}
return New(cfg), bindingChan, errChan
}

View file

@ -0,0 +1,44 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"cache.go",
"interface.go",
"node_info.go",
"util.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/client/cache:go_default_library",
"//pkg/labels:go_default_library",
"//pkg/util/wait:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
"//vendor:github.com/golang/glog",
],
)
go_test(
name = "go_default_test",
srcs = ["cache_test.go"],
library = "go_default_library",
tags = ["automanaged"],
deps = [
"//pkg/api/resource:go_default_library",
"//pkg/api/v1:go_default_library",
"//pkg/labels:go_default_library",
"//plugin/pkg/scheduler/algorithm/priorities/util:go_default_library",
],
)

View file

@ -0,0 +1,362 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
import (
"fmt"
"sync"
"time"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/pkg/util/wait"
)
var (
cleanAssumedPeriod = 1 * time.Second
)
// New returns a Cache implementation.
// It automatically starts a go routine that manages expiration of assumed pods.
// "ttl" is how long the assumed pod will get expired.
// "stop" is the channel that would close the background goroutine.
func New(ttl time.Duration, stop <-chan struct{}) Cache {
cache := newSchedulerCache(ttl, cleanAssumedPeriod, stop)
cache.run()
return cache
}
type schedulerCache struct {
stop <-chan struct{}
ttl time.Duration
period time.Duration
// This mutex guards all fields within this cache struct.
mu sync.Mutex
// a set of assumed pod keys.
// The key could further be used to get an entry in podStates.
assumedPods map[string]bool
// a map from pod key to podState.
podStates map[string]*podState
nodes map[string]*NodeInfo
}
type podState struct {
pod *v1.Pod
// Used by assumedPod to determinate expiration.
deadline *time.Time
}
func newSchedulerCache(ttl, period time.Duration, stop <-chan struct{}) *schedulerCache {
return &schedulerCache{
ttl: ttl,
period: period,
stop: stop,
nodes: make(map[string]*NodeInfo),
assumedPods: make(map[string]bool),
podStates: make(map[string]*podState),
}
}
func (cache *schedulerCache) UpdateNodeNameToInfoMap(nodeNameToInfo map[string]*NodeInfo) error {
cache.mu.Lock()
defer cache.mu.Unlock()
for name, info := range cache.nodes {
if current, ok := nodeNameToInfo[name]; !ok || current.generation != info.generation {
nodeNameToInfo[name] = info.Clone()
}
}
for name := range nodeNameToInfo {
if _, ok := cache.nodes[name]; !ok {
delete(nodeNameToInfo, name)
}
}
return nil
}
func (cache *schedulerCache) List(selector labels.Selector) ([]*v1.Pod, error) {
cache.mu.Lock()
defer cache.mu.Unlock()
var pods []*v1.Pod
for _, info := range cache.nodes {
for _, pod := range info.pods {
if selector.Matches(labels.Set(pod.Labels)) {
pods = append(pods, pod)
}
}
}
return pods, nil
}
func (cache *schedulerCache) AssumePod(pod *v1.Pod) error {
return cache.assumePod(pod, time.Now())
}
// assumePod exists for making test deterministic by taking time as input argument.
func (cache *schedulerCache) assumePod(pod *v1.Pod, now time.Time) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
if _, ok := cache.podStates[key]; ok {
return fmt.Errorf("pod %v state wasn't initial but get assumed", key)
}
cache.addPod(pod)
dl := now.Add(cache.ttl)
ps := &podState{
pod: pod,
deadline: &dl,
}
cache.podStates[key] = ps
cache.assumedPods[key] = true
return nil
}
func (cache *schedulerCache) ForgetPod(pod *v1.Pod) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
return fmt.Errorf("pod %v state was assumed on a different node", key)
}
switch {
// Only assumed pod can be forgotten.
case ok && cache.assumedPods[key]:
err := cache.removePod(pod)
if err != nil {
return err
}
delete(cache.assumedPods, key)
delete(cache.podStates, key)
default:
return fmt.Errorf("pod %v state wasn't assumed but get forgotten", key)
}
return nil
}
// Assumes that lock is already acquired.
func (cache *schedulerCache) addPod(pod *v1.Pod) {
n, ok := cache.nodes[pod.Spec.NodeName]
if !ok {
n = NewNodeInfo()
cache.nodes[pod.Spec.NodeName] = n
}
n.addPod(pod)
}
// Assumes that lock is already acquired.
func (cache *schedulerCache) updatePod(oldPod, newPod *v1.Pod) error {
if err := cache.removePod(oldPod); err != nil {
return err
}
cache.addPod(newPod)
return nil
}
// Assumes that lock is already acquired.
func (cache *schedulerCache) removePod(pod *v1.Pod) error {
n := cache.nodes[pod.Spec.NodeName]
if err := n.removePod(pod); err != nil {
return err
}
if len(n.pods) == 0 && n.node == nil {
delete(cache.nodes, pod.Spec.NodeName)
}
return nil
}
func (cache *schedulerCache) AddPod(pod *v1.Pod) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
switch {
case ok && cache.assumedPods[key]:
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
// The pod was added to a different node than it was assumed to.
glog.Warningf("Pod %v assumed to a different node than added to.", key)
// Clean this up.
cache.removePod(currState.pod)
cache.addPod(pod)
}
delete(cache.assumedPods, key)
cache.podStates[key].deadline = nil
case !ok:
// Pod was expired. We should add it back.
cache.addPod(pod)
ps := &podState{
pod: pod,
}
cache.podStates[key] = ps
default:
return fmt.Errorf("pod was already in added state. Pod key: %v", key)
}
return nil
}
func (cache *schedulerCache) UpdatePod(oldPod, newPod *v1.Pod) error {
key, err := getPodKey(oldPod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
switch {
// An assumed pod won't have Update/Remove event. It needs to have Add event
// before Update event, in which case the state would change from Assumed to Added.
case ok && !cache.assumedPods[key]:
if currState.pod.Spec.NodeName != newPod.Spec.NodeName {
glog.Errorf("Pod %v updated on a different node than previously added to.", key)
glog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
}
if err := cache.updatePod(oldPod, newPod); err != nil {
return err
}
default:
return fmt.Errorf("pod %v state wasn't added but get updated", key)
}
return nil
}
func (cache *schedulerCache) RemovePod(pod *v1.Pod) error {
key, err := getPodKey(pod)
if err != nil {
return err
}
cache.mu.Lock()
defer cache.mu.Unlock()
currState, ok := cache.podStates[key]
switch {
// An assumed pod won't have Delete/Remove event. It needs to have Add event
// before Remove event, in which case the state would change from Assumed to Added.
case ok && !cache.assumedPods[key]:
if currState.pod.Spec.NodeName != pod.Spec.NodeName {
glog.Errorf("Pod %v removed from a different node than previously added to.", key)
glog.Fatalf("Schedulercache is corrupted and can badly affect scheduling decisions")
}
err := cache.removePod(currState.pod)
if err != nil {
return err
}
delete(cache.podStates, key)
default:
return fmt.Errorf("pod state wasn't added but get removed. Pod key: %v", key)
}
return nil
}
func (cache *schedulerCache) AddNode(node *v1.Node) error {
cache.mu.Lock()
defer cache.mu.Unlock()
n, ok := cache.nodes[node.Name]
if !ok {
n = NewNodeInfo()
cache.nodes[node.Name] = n
}
return n.SetNode(node)
}
func (cache *schedulerCache) UpdateNode(oldNode, newNode *v1.Node) error {
cache.mu.Lock()
defer cache.mu.Unlock()
n, ok := cache.nodes[newNode.Name]
if !ok {
n = NewNodeInfo()
cache.nodes[newNode.Name] = n
}
return n.SetNode(newNode)
}
func (cache *schedulerCache) RemoveNode(node *v1.Node) error {
cache.mu.Lock()
defer cache.mu.Unlock()
n := cache.nodes[node.Name]
if err := n.RemoveNode(node); err != nil {
return err
}
// We remove NodeInfo for this node only if there aren't any pods on this node.
// We can't do it unconditionally, because notifications about pods are delivered
// in a different watch, and thus can potentially be observed later, even though
// they happened before node removal.
if len(n.pods) == 0 && n.node == nil {
delete(cache.nodes, node.Name)
}
return nil
}
func (cache *schedulerCache) run() {
go wait.Until(cache.cleanupExpiredAssumedPods, cache.period, cache.stop)
}
func (cache *schedulerCache) cleanupExpiredAssumedPods() {
cache.cleanupAssumedPods(time.Now())
}
// cleanupAssumedPods exists for making test deterministic by taking time as input argument.
func (cache *schedulerCache) cleanupAssumedPods(now time.Time) {
cache.mu.Lock()
defer cache.mu.Unlock()
// The size of assumedPods should be small
for key := range cache.assumedPods {
ps, ok := cache.podStates[key]
if !ok {
panic("Key found in assumed set but not in podStates. Potentially a logical error.")
}
if now.After(*ps.deadline) {
glog.Warningf("Pod %s/%s expired", ps.pod.Namespace, ps.pod.Name)
if err := cache.expirePod(key, ps); err != nil {
glog.Errorf("ExpirePod failed for %s: %v", key, err)
}
}
}
}
func (cache *schedulerCache) expirePod(key string, ps *podState) error {
if err := cache.removePod(ps.pod); err != nil {
return err
}
delete(cache.assumedPods, key)
delete(cache.podStates, key)
return nil
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,93 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
import (
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
)
// Cache collects pods' information and provides node-level aggregated information.
// It's intended for generic scheduler to do efficient lookup.
// Cache's operations are pod centric. It does incremental updates based on pod events.
// Pod events are sent via network. We don't have guaranteed delivery of all events:
// We use Reflector to list and watch from remote.
// Reflector might be slow and do a relist, which would lead to missing events.
//
// State Machine of a pod's events in scheduler's cache:
//
//
// +-------------------------------------------+ +----+
// | Add | | |
// | | | | Update
// + Assume Add v v |
//Initial +--------> Assumed +------------+---> Added <--+
// ^ + + | +
// | | | | |
// | | | Add | | Remove
// | | | | |
// | | | + |
// +----------------+ +-----------> Expired +----> Deleted
// Forget Expire
//
//
// Note that an assumed pod can expire, because if we haven't received Add event notifying us
// for a while, there might be some problems and we shouldn't keep the pod in cache anymore.
//
// Note that "Initial", "Expired", and "Deleted" pods do not actually exist in cache.
// Based on existing use cases, we are making the following assumptions:
// - No pod would be assumed twice
// - A pod could be added without going through scheduler. In this case, we will see Add but not Assume event.
// - If a pod wasn't added, it wouldn't be removed or updated.
// - Both "Expired" and "Deleted" are valid end states. In case of some problems, e.g. network issue,
// a pod might have changed its state (e.g. added and deleted) without delivering notification to the cache.
type Cache interface {
// AssumePod assumes a pod scheduled and aggregates the pod's information into its node.
// The implementation also decides the policy to expire pod before being confirmed (receiving Add event).
// After expiration, its information would be subtracted.
AssumePod(pod *v1.Pod) error
// ForgetPod removes an assumed pod from cache.
ForgetPod(pod *v1.Pod) error
// AddPod either confirms a pod if it's assumed, or adds it back if it's expired.
// If added back, the pod's information would be added again.
AddPod(pod *v1.Pod) error
// UpdatePod removes oldPod's information and adds newPod's information.
UpdatePod(oldPod, newPod *v1.Pod) error
// RemovePod removes a pod. The pod's information would be subtracted from assigned node.
RemovePod(pod *v1.Pod) error
// AddNode adds overall information about node.
AddNode(node *v1.Node) error
// UpdateNode updates overall information about node.
UpdateNode(oldNode, newNode *v1.Node) error
// RemoveNode removes overall information about node.
RemoveNode(node *v1.Node) error
// UpdateNodeNameToInfoMap updates the passed infoMap to the current contents of Cache.
// The node info contains aggregated information of pods scheduled (including assumed to be)
// on this node.
UpdateNodeNameToInfoMap(infoMap map[string]*NodeInfo) error
// List lists all cached pods (including assumed ones).
List(labels.Selector) ([]*v1.Pod, error)
}

View file

@ -0,0 +1,329 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
import (
"fmt"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/api/resource"
"k8s.io/kubernetes/pkg/api/v1"
clientcache "k8s.io/kubernetes/pkg/client/cache"
priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util"
)
var emptyResource = Resource{}
// NodeInfo is node level aggregated information.
type NodeInfo struct {
// Overall node information.
node *v1.Node
pods []*v1.Pod
podsWithAffinity []*v1.Pod
// Total requested resource of all pods on this node.
// It includes assumed pods which scheduler sends binding to apiserver but
// didn't get it as scheduled yet.
requestedResource *Resource
nonzeroRequest *Resource
// We store allocatedResources (which is Node.Status.Allocatable.*) explicitly
// as int64, to avoid conversions and accessing map.
allocatableResource *Resource
// We store allowedPodNumber (which is Node.Status.Allocatable.Pods().Value())
// explicitly as int, to avoid conversions and improve performance.
allowedPodNumber int
// Whenever NodeInfo changes, generation is bumped.
// This is used to avoid cloning it if the object didn't change.
generation int64
}
// Resource is a collection of compute resource.
type Resource struct {
MilliCPU int64
Memory int64
NvidiaGPU int64
OpaqueIntResources map[v1.ResourceName]int64
}
func (r *Resource) ResourceList() v1.ResourceList {
result := v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(r.MilliCPU, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(r.Memory, resource.BinarySI),
v1.ResourceNvidiaGPU: *resource.NewQuantity(r.NvidiaGPU, resource.DecimalSI),
}
for rName, rQuant := range r.OpaqueIntResources {
result[rName] = *resource.NewQuantity(rQuant, resource.DecimalSI)
}
return result
}
// NewNodeInfo returns a ready to use empty NodeInfo object.
// If any pods are given in arguments, their information will be aggregated in
// the returned object.
func NewNodeInfo(pods ...*v1.Pod) *NodeInfo {
ni := &NodeInfo{
requestedResource: &Resource{},
nonzeroRequest: &Resource{},
allocatableResource: &Resource{},
allowedPodNumber: 0,
generation: 0,
}
for _, pod := range pods {
ni.addPod(pod)
}
return ni
}
// Returns overall information about this node.
func (n *NodeInfo) Node() *v1.Node {
if n == nil {
return nil
}
return n.node
}
// Pods return all pods scheduled (including assumed to be) on this node.
func (n *NodeInfo) Pods() []*v1.Pod {
if n == nil {
return nil
}
return n.pods
}
// PodsWithAffinity return all pods with (anti)affinity constraints on this node.
func (n *NodeInfo) PodsWithAffinity() []*v1.Pod {
if n == nil {
return nil
}
return n.podsWithAffinity
}
func (n *NodeInfo) AllowedPodNumber() int {
if n == nil {
return 0
}
return n.allowedPodNumber
}
// RequestedResource returns aggregated resource request of pods on this node.
func (n *NodeInfo) RequestedResource() Resource {
if n == nil {
return emptyResource
}
return *n.requestedResource
}
// NonZeroRequest returns aggregated nonzero resource request of pods on this node.
func (n *NodeInfo) NonZeroRequest() Resource {
if n == nil {
return emptyResource
}
return *n.nonzeroRequest
}
// AllocatableResource returns allocatable resources on a given node.
func (n *NodeInfo) AllocatableResource() Resource {
if n == nil {
return emptyResource
}
return *n.allocatableResource
}
func (n *NodeInfo) Clone() *NodeInfo {
clone := &NodeInfo{
node: n.node,
requestedResource: &(*n.requestedResource),
nonzeroRequest: &(*n.nonzeroRequest),
allocatableResource: &(*n.allocatableResource),
allowedPodNumber: n.allowedPodNumber,
generation: n.generation,
}
if len(n.pods) > 0 {
clone.pods = append([]*v1.Pod(nil), n.pods...)
}
if len(n.podsWithAffinity) > 0 {
clone.podsWithAffinity = append([]*v1.Pod(nil), n.podsWithAffinity...)
}
return clone
}
// String returns representation of human readable format of this NodeInfo.
func (n *NodeInfo) String() string {
podKeys := make([]string, len(n.pods))
for i, pod := range n.pods {
podKeys[i] = pod.Name
}
return fmt.Sprintf("&NodeInfo{Pods:%v, RequestedResource:%#v, NonZeroRequest: %#v}", podKeys, n.requestedResource, n.nonzeroRequest)
}
func hasPodAffinityConstraints(pod *v1.Pod) bool {
affinity, err := v1.GetAffinityFromPodAnnotations(pod.Annotations)
if err != nil || affinity == nil {
return false
}
return affinity.PodAffinity != nil || affinity.PodAntiAffinity != nil
}
// addPod adds pod information to this NodeInfo.
func (n *NodeInfo) addPod(pod *v1.Pod) {
// cpu, mem, nvidia_gpu, non0_cpu, non0_mem := calculateResource(pod)
res, non0_cpu, non0_mem := calculateResource(pod)
n.requestedResource.MilliCPU += res.MilliCPU
n.requestedResource.Memory += res.Memory
n.requestedResource.NvidiaGPU += res.NvidiaGPU
if n.requestedResource.OpaqueIntResources == nil && len(res.OpaqueIntResources) > 0 {
n.requestedResource.OpaqueIntResources = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.OpaqueIntResources {
n.requestedResource.OpaqueIntResources[rName] += rQuant
}
n.nonzeroRequest.MilliCPU += non0_cpu
n.nonzeroRequest.Memory += non0_mem
n.pods = append(n.pods, pod)
if hasPodAffinityConstraints(pod) {
n.podsWithAffinity = append(n.podsWithAffinity, pod)
}
n.generation++
}
// removePod subtracts pod information to this NodeInfo.
func (n *NodeInfo) removePod(pod *v1.Pod) error {
k1, err := getPodKey(pod)
if err != nil {
return err
}
for i := range n.podsWithAffinity {
k2, err := getPodKey(n.podsWithAffinity[i])
if err != nil {
glog.Errorf("Cannot get pod key, err: %v", err)
continue
}
if k1 == k2 {
// delete the element
n.podsWithAffinity[i] = n.podsWithAffinity[len(n.podsWithAffinity)-1]
n.podsWithAffinity = n.podsWithAffinity[:len(n.podsWithAffinity)-1]
break
}
}
for i := range n.pods {
k2, err := getPodKey(n.pods[i])
if err != nil {
glog.Errorf("Cannot get pod key, err: %v", err)
continue
}
if k1 == k2 {
// delete the element
n.pods[i] = n.pods[len(n.pods)-1]
n.pods = n.pods[:len(n.pods)-1]
// reduce the resource data
res, non0_cpu, non0_mem := calculateResource(pod)
n.requestedResource.MilliCPU -= res.MilliCPU
n.requestedResource.Memory -= res.Memory
n.requestedResource.NvidiaGPU -= res.NvidiaGPU
if len(res.OpaqueIntResources) > 0 && n.requestedResource.OpaqueIntResources == nil {
n.requestedResource.OpaqueIntResources = map[v1.ResourceName]int64{}
}
for rName, rQuant := range res.OpaqueIntResources {
n.requestedResource.OpaqueIntResources[rName] -= rQuant
}
n.nonzeroRequest.MilliCPU -= non0_cpu
n.nonzeroRequest.Memory -= non0_mem
n.generation++
return nil
}
}
return fmt.Errorf("no corresponding pod %s in pods of node %s", pod.Name, n.node.Name)
}
func calculateResource(pod *v1.Pod) (res Resource, non0_cpu int64, non0_mem int64) {
for _, c := range pod.Spec.Containers {
for rName, rQuant := range c.Resources.Requests {
switch rName {
case v1.ResourceCPU:
res.MilliCPU += rQuant.MilliValue()
case v1.ResourceMemory:
res.Memory += rQuant.Value()
case v1.ResourceNvidiaGPU:
res.NvidiaGPU += rQuant.Value()
default:
if v1.IsOpaqueIntResourceName(rName) {
// Lazily allocate opaque resource map.
if res.OpaqueIntResources == nil {
res.OpaqueIntResources = map[v1.ResourceName]int64{}
}
res.OpaqueIntResources[rName] += rQuant.Value()
}
}
}
non0_cpu_req, non0_mem_req := priorityutil.GetNonzeroRequests(&c.Resources.Requests)
non0_cpu += non0_cpu_req
non0_mem += non0_mem_req
// No non-zero resources for GPUs or opaque resources.
}
return
}
// Sets the overall node information.
func (n *NodeInfo) SetNode(node *v1.Node) error {
n.node = node
for rName, rQuant := range node.Status.Allocatable {
switch rName {
case v1.ResourceCPU:
n.allocatableResource.MilliCPU = rQuant.MilliValue()
case v1.ResourceMemory:
n.allocatableResource.Memory = rQuant.Value()
case v1.ResourceNvidiaGPU:
n.allocatableResource.NvidiaGPU = rQuant.Value()
case v1.ResourcePods:
n.allowedPodNumber = int(rQuant.Value())
default:
if v1.IsOpaqueIntResourceName(rName) {
// Lazily allocate opaque resource map.
if n.allocatableResource.OpaqueIntResources == nil {
n.allocatableResource.OpaqueIntResources = map[v1.ResourceName]int64{}
}
n.allocatableResource.OpaqueIntResources[rName] = rQuant.Value()
}
}
}
n.generation++
return nil
}
// Removes the overall information about the node.
func (n *NodeInfo) RemoveNode(node *v1.Node) error {
// We don't remove NodeInfo for because there can still be some pods on this node -
// this is because notifications about pods are delivered in a different watch,
// and thus can potentially be observed later, even though they happened before
// node removal. This is handled correctly in cache.go file.
n.node = nil
n.allocatableResource = &Resource{}
n.allowedPodNumber = 0
n.generation++
return nil
}
// getPodKey returns the string key of a pod.
func getPodKey(pod *v1.Pod) (string, error) {
return clientcache.MetaNamespaceKeyFunc(pod)
}

View file

@ -0,0 +1,39 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
import "k8s.io/kubernetes/pkg/api/v1"
// CreateNodeNameToInfoMap obtains a list of pods and pivots that list into a map where the keys are node names
// and the values are the aggregated information for that node.
func CreateNodeNameToInfoMap(pods []*v1.Pod, nodes []*v1.Node) map[string]*NodeInfo {
nodeNameToInfo := make(map[string]*NodeInfo)
for _, pod := range pods {
nodeName := pod.Spec.NodeName
if _, ok := nodeNameToInfo[nodeName]; !ok {
nodeNameToInfo[nodeName] = NewNodeInfo()
}
nodeNameToInfo[nodeName].addPod(pod)
}
for _, node := range nodes {
if _, ok := nodeNameToInfo[node.Name]; !ok {
nodeNameToInfo[node.Name] = NewNodeInfo()
}
nodeNameToInfo[node.Name].SetNode(node)
}
return nodeNameToInfo
}

View file

@ -0,0 +1,25 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
"go_test",
"cgo_library",
)
go_library(
name = "go_default_library",
srcs = [
"fake_cache.go",
"pods_to_cache.go",
],
tags = ["automanaged"],
deps = [
"//pkg/api/v1:go_default_library",
"//pkg/labels:go_default_library",
"//plugin/pkg/scheduler/schedulercache:go_default_library",
],
)

View file

@ -0,0 +1,53 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
import (
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// FakeCache is used for testing
type FakeCache struct {
AssumeFunc func(*v1.Pod)
}
func (f *FakeCache) AssumePod(pod *v1.Pod) error {
f.AssumeFunc(pod)
return nil
}
func (f *FakeCache) ForgetPod(pod *v1.Pod) error { return nil }
func (f *FakeCache) AddPod(pod *v1.Pod) error { return nil }
func (f *FakeCache) UpdatePod(oldPod, newPod *v1.Pod) error { return nil }
func (f *FakeCache) RemovePod(pod *v1.Pod) error { return nil }
func (f *FakeCache) AddNode(node *v1.Node) error { return nil }
func (f *FakeCache) UpdateNode(oldNode, newNode *v1.Node) error { return nil }
func (f *FakeCache) RemoveNode(node *v1.Node) error { return nil }
func (f *FakeCache) UpdateNodeNameToInfoMap(infoMap map[string]*schedulercache.NodeInfo) error {
return nil
}
func (f *FakeCache) List(s labels.Selector) ([]*v1.Pod, error) { return nil, nil }

View file

@ -0,0 +1,55 @@
/*
Copyright 2015 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package schedulercache
import (
"k8s.io/kubernetes/pkg/api/v1"
"k8s.io/kubernetes/pkg/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache"
)
// PodsToCache is used for testing
type PodsToCache []*v1.Pod
func (p PodsToCache) AssumePod(pod *v1.Pod) error { return nil }
func (p PodsToCache) ForgetPod(pod *v1.Pod) error { return nil }
func (p PodsToCache) AddPod(pod *v1.Pod) error { return nil }
func (p PodsToCache) UpdatePod(oldPod, newPod *v1.Pod) error { return nil }
func (p PodsToCache) RemovePod(pod *v1.Pod) error { return nil }
func (p PodsToCache) AddNode(node *v1.Node) error { return nil }
func (p PodsToCache) UpdateNode(oldNode, newNode *v1.Node) error { return nil }
func (p PodsToCache) RemoveNode(node *v1.Node) error { return nil }
func (p PodsToCache) UpdateNodeNameToInfoMap(infoMap map[string]*schedulercache.NodeInfo) error {
return nil
}
func (p PodsToCache) List(s labels.Selector) (selected []*v1.Pod, err error) {
for _, pod := range p {
if s.Matches(labels.Set(pod.Labels)) {
selected = append(selected, pod)
}
}
return selected, nil
}