Add some better balancing, reconnection
This commit is contained in:
parent
715f79c9c0
commit
324ba0d160
6 changed files with 77 additions and 29 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
from ray import ray
|
from ray import ray
|
||||||
|
import sys
|
||||||
|
|
||||||
ray.connect("localhost:50050")
|
ray.connect(sys.argv[1])
|
||||||
|
|
||||||
@ray.remote
|
@ray.remote
|
||||||
def plus2(x):
|
def plus2(x):
|
||||||
|
|
@ -14,4 +15,17 @@ def fact(x):
|
||||||
return 1
|
return 1
|
||||||
return x * ray.get(fact.remote(x - 1))
|
return x * ray.get(fact.remote(x - 1))
|
||||||
|
|
||||||
print(ray.get(fact.remote(20)))
|
#print(ray.get(fact.remote(20)))
|
||||||
|
|
||||||
|
@ray.remote
|
||||||
|
def sleeper(x):
|
||||||
|
import time
|
||||||
|
time.sleep(1)
|
||||||
|
return x * 2
|
||||||
|
|
||||||
|
holder = []
|
||||||
|
for i in range(20):
|
||||||
|
holder.append(sleeper.remote(i))
|
||||||
|
|
||||||
|
print([ray.get(x) for x in holder])
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ from ray import ray
|
||||||
from ray.common import ClientObjectRef
|
from ray.common import ClientObjectRef
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Worker:
|
class Worker:
|
||||||
def __init__(self, conn_str):
|
def __init__(self, conn_str):
|
||||||
self.channel = grpc.insecure_channel(conn_str)
|
self.channel = grpc.insecure_channel(conn_str)
|
||||||
|
|
@ -37,9 +38,9 @@ class Worker:
|
||||||
continue
|
continue
|
||||||
args = self.decode_args(task)
|
args = self.decode_args(task)
|
||||||
func = self.get(task.payload_id)
|
func = self.get(task.payload_id)
|
||||||
#self.pool.submit(self.run_and_return, func, args, work.ticket)
|
self.pool.submit(self.run_and_return, func, args, work.ticket)
|
||||||
t = threading.Thread(target=self.run_and_return, args=(func, args, work.ticket))
|
#t = threading.Thread(target=self.run_and_return, args=(func, args, work.ticket))
|
||||||
t.start()
|
#t.start()
|
||||||
|
|
||||||
|
|
||||||
def run_and_return(self, func, args, ticket):
|
def run_and_return(self, func, args, ticket):
|
||||||
|
|
@ -50,6 +51,7 @@ class Worker:
|
||||||
complete_data = out_data,
|
complete_data = out_data,
|
||||||
finished_ticket = ticket,
|
finished_ticket = ticket,
|
||||||
))
|
))
|
||||||
|
print("Finished Work")
|
||||||
|
|
||||||
# def get(self, id_bytes):
|
# def get(self, id_bytes):
|
||||||
# data = self.server.GetObject(ray_client_pb2.GetRequest(
|
# data = self.server.GetObject(ray_client_pb2.GetRequest(
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,7 @@ func (r *Raylet) Workstream(conn WorkstreamConnection) error {
|
||||||
workChan: make(chan *ray_rpc.Work),
|
workChan: make(chan *ray_rpc.Work),
|
||||||
clientConn: conn,
|
clientConn: conn,
|
||||||
pool: r.Workers,
|
pool: r.Workers,
|
||||||
|
max: 3,
|
||||||
}
|
}
|
||||||
r.Workers.Register(worker)
|
r.Workers.Register(worker)
|
||||||
err := worker.Run()
|
err := worker.Run()
|
||||||
|
|
|
||||||
|
|
@ -17,25 +17,40 @@ languagePluginLoader.then(() => {
|
||||||
wsprotocol = "wss:"
|
wsprotocol = "wss:"
|
||||||
}
|
}
|
||||||
var wspath = wsprotocol + "//" + window.location.host + "/api/ws"
|
var wspath = wsprotocol + "//" + window.location.host + "/api/ws"
|
||||||
var c = new WebSocket(wspath)
|
function connect() {
|
||||||
c.onmessage = function(msg) {
|
var c = new WebSocket(wspath)
|
||||||
var workText = workTerms[Math.floor(Math.random() * workTerms.length)];
|
|
||||||
$("#output").append("<p>" + workText + "...</p>")
|
c.onopen = function() {
|
||||||
pyodide.globals.torun = msg.data
|
$("#status").text("Status: connected!")
|
||||||
pyodide.runPythonAsync("exec_work(torun)").then((res) => {
|
c.send(JSON.stringify({
|
||||||
$("#output").append("<p>Did work! 👏</p>")
|
status: 2,
|
||||||
c.send(res)
|
error_msg: "WebsocketWorker"
|
||||||
})
|
}))
|
||||||
}
|
}
|
||||||
c.onopen = function() {
|
|
||||||
$("#status").text("Status: connected!")
|
c.onmessage = function(msg) {
|
||||||
c.send(JSON.stringify({
|
var workText = workTerms[Math.floor(Math.random() * workTerms.length)];
|
||||||
status: 2,
|
$("#output").append("<p>" + workText + "...</p>")
|
||||||
error_msg: "WebsocketWorker"
|
pyodide.globals.torun = msg.data
|
||||||
}))
|
pyodide.runPythonAsync("exec_work(torun)").then((res) => {
|
||||||
}
|
$("#output").append("<p>Did work! 👏</p>")
|
||||||
c.onclose = function() {
|
c.send(res)
|
||||||
$("#status").text("Status: disconnected")
|
})
|
||||||
}
|
};
|
||||||
})
|
|
||||||
})
|
c.onclose = function(e) {
|
||||||
|
$("#status").text("Status: disconnected. reconnecting...")
|
||||||
|
console.log('Socket is closed. Reconnect will be attempted in 1 second.', e.reason);
|
||||||
|
setTimeout(function() {
|
||||||
|
connect();
|
||||||
|
}, 500);
|
||||||
|
};
|
||||||
|
|
||||||
|
c.onerror = function(err) {
|
||||||
|
console.error('Socket encountered error: ', err.message, 'Closing socket');
|
||||||
|
c.close();
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
connect();
|
||||||
|
}) })
|
||||||
|
|
|
||||||
|
|
@ -20,10 +20,12 @@ type SimpleWorker struct {
|
||||||
workChan chan *ray_rpc.Work
|
workChan chan *ray_rpc.Work
|
||||||
clientConn WorkstreamConnection
|
clientConn WorkstreamConnection
|
||||||
pool WorkerPool
|
pool WorkerPool
|
||||||
|
max int
|
||||||
|
curr int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SimpleWorker) Schedulable() bool {
|
func (s *SimpleWorker) Schedulable() bool {
|
||||||
return true
|
return s.curr < s.max
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SimpleWorker) AssignWork(work *ray_rpc.Work) error {
|
func (s *SimpleWorker) AssignWork(work *ray_rpc.Work) error {
|
||||||
|
|
@ -48,6 +50,7 @@ func (w *SimpleWorker) Run() error {
|
||||||
go func() {
|
go func() {
|
||||||
for work := range w.workChan {
|
for work := range w.workChan {
|
||||||
zap.S().Debug("Sending work")
|
zap.S().Debug("Sending work")
|
||||||
|
w.curr++
|
||||||
err = w.clientConn.Send(work)
|
err = w.clientConn.Send(work)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
zap.S().Error("Error sending:", err)
|
zap.S().Error("Error sending:", err)
|
||||||
|
|
@ -61,6 +64,7 @@ func (w *SimpleWorker) Run() error {
|
||||||
zap.S().Error("Error on channel:", err)
|
zap.S().Error("Error on channel:", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
w.curr--
|
||||||
err = w.pool.Finish(result)
|
err = w.pool.Finish(result)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
zap.S().Error("Error finishing:", err)
|
zap.S().Error("Error finishing:", err)
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ type SimpleRRWorkerPool struct {
|
||||||
workers []Worker
|
workers []Worker
|
||||||
store ObjectStore
|
store ObjectStore
|
||||||
offset int
|
offset int
|
||||||
|
pending []chan bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewRoundRobinWorkerPool(obj ObjectStore) *SimpleRRWorkerPool {
|
func NewRoundRobinWorkerPool(obj ObjectStore) *SimpleRRWorkerPool {
|
||||||
|
|
@ -57,16 +58,27 @@ func (wp *SimpleRRWorkerPool) Schedule(work *ray_rpc.Work) error {
|
||||||
wp.offset = 0
|
wp.offset = 0
|
||||||
}
|
}
|
||||||
if wp.offset == origOffset && !done {
|
if wp.offset == origOffset && !done {
|
||||||
return errors.New("No workers schedulable")
|
c := make(chan bool)
|
||||||
|
wp.pending = append(wp.pending, c)
|
||||||
|
wp.Unlock()
|
||||||
|
<-c
|
||||||
|
wp.Lock()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (wp *SimpleRRWorkerPool) Finish(status *ray_rpc.WorkStatus) error {
|
func (wp *SimpleRRWorkerPool) Finish(status *ray_rpc.WorkStatus) error {
|
||||||
|
wp.Lock()
|
||||||
|
defer wp.Unlock()
|
||||||
if status.Status != ray_rpc.COMPLETE {
|
if status.Status != ray_rpc.COMPLETE {
|
||||||
panic("todo: Only call Finish on successfully completed work")
|
panic("todo: Only call Finish on successfully completed work")
|
||||||
}
|
}
|
||||||
|
if len(wp.pending) != 0 {
|
||||||
|
c := wp.pending[0]
|
||||||
|
wp.pending = wp.pending[1:]
|
||||||
|
close(c)
|
||||||
|
}
|
||||||
id := deserializeObjectID(status.FinishedTicket.ReturnId)
|
id := deserializeObjectID(status.FinishedTicket.ReturnId)
|
||||||
return wp.store.PutObject(&Object{id, status.CompleteData})
|
return wp.store.PutObject(&Object{id, status.CompleteData})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue