diff --git a/.gitignore b/.gitignore index ff4e051..94a8ba4 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ go_raylet +.vim/ +__pycache__ diff --git a/go.mod b/go.mod index c6c3edd..cc38f4d 100644 --- a/go.mod +++ b/go.mod @@ -5,5 +5,7 @@ go 1.15 require ( github.com/gogo/protobuf v1.3.1 github.com/google/uuid v1.1.2 + go.uber.org/multierr v1.6.0 // indirect + go.uber.org/zap v1.16.0 google.golang.org/grpc v1.34.0 ) diff --git a/go.sum b/go.sum index 0dbbc7b..90ecf62 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,7 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po= @@ -27,24 +28,49 @@ github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2 h1:EVhdT+1Kseyi1/pUmXKaFxYsDNy9RQYkMWRH68J/W7Y= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= +go.uber.org/atomic v1.6.0 h1:Ezj3JGmsOnG1MoRWQkPBsKLe9DwWD9QeXzTRzzldNVk= +go.uber.org/atomic v1.6.0/go.mod h1:sABNBOSYdrvTF6hTgEIbc7YasKWGhgEQZyfxyTvoXHQ= +go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= +go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/multierr v1.5.0 h1:KCa4XfM8CWFCpxXRGok+Q0SS/0XBhMDbHHGABQLvD2A= +go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= +go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= +go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= +go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee/go.mod h1:vJERXedbb3MVM5f9Ejo0C68/HhF8uaILCdgjnY+goOA= +go.uber.org/zap v1.16.0 h1:uFRZXykJGK9lLY4HtgSw44DnIcAM+kRBP7x5m+NpAOM= +go.uber.org/zap v1.16.0/go.mod h1:MA8QOfq0BHJwdXa996Y4dYkAqRKB8/1K1QMMZVaNZjQ= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -52,6 +78,8 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a h1:1BGLXjeY4akVXGgbC9HugT3Jv3hCI0z56oJR5vAMgBU= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d h1:+R4KGOnez64A81RvjARKc4UT5/tI9ujCIVX+P5KiHuI= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -59,6 +87,10 @@ golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -83,6 +115,9 @@ google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpAD google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= diff --git a/main.go b/main.go index 86a1fcf..45bc7a2 100644 --- a/main.go +++ b/main.go @@ -8,13 +8,14 @@ import ( "os" "github.com/barakmich/go_raylet/ray_rpc" + "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" "google.golang.org/grpc/grpclog" ) var ( - gRPCPort = flag.Int("grpc-port", 50000, "The gRPC server port") + gRPCPort = flag.Int("grpc-port", 50050, "The gRPC server port") httpPort = flag.Int("http-port", 8080, "The HTTP server port") ) @@ -27,10 +28,12 @@ func init() { func main() { flag.Parse() + logger, _ := zap.NewDevelopment() + zap.ReplaceGlobals(logger) addr := fmt.Sprintf("0.0.0.0:%d", *gRPCPort) lis, err := net.Listen("tcp", addr) if err != nil { - log.Fatalln("Failed to listen:", err) + logger.Fatal("Failed to listen:", zap.Error(err)) } s := grpc.NewServer( grpc.Creds(insecure.NewCredentials()), @@ -40,8 +43,6 @@ func main() { ray_rpc.RegisterRayletWorkerConnectionServer(s, server) // Serve gRPC Server - log.Info("Serving gRPC on https://", addr) - go func() { - log.Fatal(s.Serve(lis)) - }() + fmt.Println("Serving gRPC on https://", addr) + s.Serve(lis) } diff --git a/object.go b/object.go index 7aeb45b..3caafaf 100644 --- a/object.go +++ b/object.go @@ -29,7 +29,6 @@ type Object struct { func GetObject(s ObjectStore, id ObjectID) ([]byte, error) { c := make(chan ObjectResult) - defer close(c) ids := []ObjectID{id} go func() { s.AwaitObjects(ids, c, nil) @@ -46,32 +45,92 @@ func GetObject(s ObjectStore, id ObjectID) ([]byte, error) { type MemObjectStore struct { sync.RWMutex - db map[ObjectID][]byte - prefix uint64 - printer uint64 + db map[ObjectID][]byte + prefix uint64 + printer uint64 + subscribers []chan *Object } func (mem *MemObjectStore) AwaitObjects(ids []ObjectID, c chan ObjectResult, timeout *time.Duration) { - if timeout != nil { - panic("timeout not yet implemented") - } - mem.RLock() - defer mem.RUnlock() + waitchan := make(chan *Object) + found := 0 + waitingFor := make(map[ObjectID]bool) + + // First, get everything we can + mem.Lock() for _, id := range ids { v, ok := mem.db[id] if !ok { - c <- ObjectResult{Error: ErrObjectNotFound} + waitingFor[id] = true } else { + found++ c <- ObjectResult{&Object{ID: id, Data: v}, nil} } } + + // We were lucky, and are done. + if found == len(ids) { + mem.Unlock() + close(c) + return + } + + // Wait for the rest + mem.subscribe(waitchan) + mem.Unlock() + + var timer <-chan time.Time + if timeout != nil { + timer = time.After(*timeout) + } else { + timer = make(<-chan time.Time) + } + for found != len(ids) { + giveUp := false + select { + case o := <-waitchan: + if waitingFor[o.ID] { + c <- ObjectResult{o, nil} + waitingFor[o.ID] = false + found += 1 + } + case <-timer: + giveUp = true + } + if giveUp { + break + } + } + mem.Unsubscribe(waitchan) close(c) } +func (mem *MemObjectStore) subscribe(c chan *Object) { + mem.subscribers = append(mem.subscribers, c) +} + +func (mem *MemObjectStore) Unsubscribe(c chan *Object) { + mem.Lock() + defer mem.Unlock() + mem.unsubscribe(c) +} + +func (mem *MemObjectStore) unsubscribe(c chan *Object) { + for i, s := range mem.subscribers { + if s == c { + mem.subscribers = append(mem.subscribers[:i], mem.subscribers[i+1:]...) + break + } + } +} + func (mem *MemObjectStore) PutObject(object *Object) error { mem.Lock() defer mem.Unlock() mem.db[object.ID] = object.Data + for _, s := range mem.subscribers { + s <- object + } return nil } diff --git a/python/rpc/ray_client_pb2_grpc.py b/python/rpc/ray_client_pb2_grpc.py index 6b3a4b4..4448836 100644 --- a/python/rpc/ray_client_pb2_grpc.py +++ b/python/rpc/ray_client_pb2_grpc.py @@ -2,7 +2,7 @@ """Client and server classes corresponding to protobuf-defined services.""" import grpc -import ray_client_pb2 as ray__client__pb2 +import rpc.ray_client_pb2 as ray__client__pb2 class RayletDriverStub(object): diff --git a/python/worker.py b/python/worker.py index 261a2cb..b466bb0 100644 --- a/python/worker.py +++ b/python/worker.py @@ -1,22 +1,37 @@ -from rpc import ray_client_pb2 -from rpc import ray_client_pb2_grpc +# from rpc import ray_client_pb2 +# from rpc import ray_client_pb2_grpc +import ray.core.generated.ray_client_pb2 as ray_client_pb2 +import ray.core.generated.ray_client_pb2_grpc as ray_client_pb2_grpc + import queue +import sys +import grpc +import cloudpickle +import threading +import concurrent.futures +from ray.experimental.client import ray +from ray.experimental.client.common import ClientObjectRef + class Worker: def __init__(self, conn_str): self.channel = grpc.insecure_channel(conn_str) self.worker_stub = ray_client_pb2_grpc.RayletWorkerConnectionStub(self.channel) + self.send_queue = None + self.pool = concurrent.futures.ThreadPoolExecutor(max_workers=3) self.server = ray_client_pb2_grpc.RayletDriverStub(self.channel) def begin(self): - send_queue = queue.SimpleQueue() - work_stream = self.worker_stub.Workstream(iter(send_queue.get, None)) + self.send_queue = queue.Queue() + work_stream = self.worker_stub.Workstream(iter(self.send_queue.get, None)) start = ray_client_pb2.WorkStatus( - status = ray_client_pb2.WorkStatus.StatusCode.READY + status = ray_client_pb2.WorkStatus.StatusCode.READY, + error_msg = "python_worker", ) - send_queue.put(start) + self.send_queue.put(start) for work in work_stream: + print("Got work") task = work.task if task.type != ray_client_pb2.ClientTask.RemoteExecType.FUNCTION: send_queue.put(ray_client_pb2.WorkStatus( @@ -25,25 +40,32 @@ class Worker: )) continue args = self.decode_args(task) - func_data = self.get(task.payload_id) - func = cloudpickle.loads(func_data) - res = func(*args) - out_data = cloudpickle.dumps(res) - send_queue.put(ray_client_pb2.WorkStatus( - status = ray_client_pb2.WorkStatus.StatusCode.COMPLETE, - complete_data = out_data, - finished_ticket = work.ticket, - )) + func = self.get(task.payload_id) + #self.pool.submit(self.run_and_return, func, args, work.ticket) + t = threading.Thread(target=self.run_and_return, args=(func, args, work.ticket)) + t.start() - def get(self, id_bytes): - data = self.server.GetObject(ray_client_pb2.GetRequest( - id = id_bytes, + + def run_and_return(self, func, args, ticket): + res = func(*args) + out_data = cloudpickle.dumps(res) + self.send_queue.put(ray_client_pb2.WorkStatus( + status = ray_client_pb2.WorkStatus.StatusCode.COMPLETE, + complete_data = out_data, + finished_ticket = ticket, )) - return data.data + + # def get(self, id_bytes): + # data = self.server.GetObject(ray_client_pb2.GetRequest( + # id = id_bytes, + # )) + # return cloudpickle.loads(data.data) + def get(self, id_bytes): + return ray.get(ClientObjectRef(id_bytes)) def decode_args(self, task): out = [] - for arg in arg_list: + for arg in task.args: t = self.convert_from_arg(arg) out.append(t) return out @@ -57,7 +79,8 @@ class Worker: def main(): - worker = Worker(os.args[1]) + worker = Worker(sys.argv[1]) + ray.connect(sys.argv[1], stub=worker.server) worker.begin() print("Shutting down...") worker.channel.close() diff --git a/raylet_grpc.go b/raylet_grpc.go index e6bd119..37c2d05 100644 --- a/raylet_grpc.go +++ b/raylet_grpc.go @@ -4,6 +4,7 @@ import ( "context" "github.com/barakmich/go_raylet/ray_rpc" + "go.uber.org/zap" ) type Raylet struct { @@ -12,6 +13,7 @@ type Raylet struct { } func (r *Raylet) GetObject(_ context.Context, req *ray_rpc.GetRequest) (*ray_rpc.GetResponse, error) { + zap.S().Debug("GetObject") data, err := GetObject(r.Objects, deserializeObjectID(req.Id)) if err != nil { return nil, err @@ -23,6 +25,7 @@ func (r *Raylet) GetObject(_ context.Context, req *ray_rpc.GetRequest) (*ray_rpc } func (r *Raylet) PutObject(_ context.Context, req *ray_rpc.PutRequest) (*ray_rpc.PutResponse, error) { + zap.S().Debug("PutObject") id := r.Objects.MakeID() err := r.Objects.PutObject(&Object{id, req.Data}) if err != nil { @@ -38,6 +41,7 @@ func (r *Raylet) WaitObject(_ context.Context, _ *ray_rpc.WaitRequest) (*ray_rpc } func (r *Raylet) Schedule(_ context.Context, task *ray_rpc.ClientTask) (*ray_rpc.ClientTaskTicket, error) { + zap.S().Debug("Schedule:", task.Type) id := r.Objects.MakeID() ticket := &ray_rpc.ClientTaskTicket{serializeObjectID(id)} work := &ray_rpc.Work{} diff --git a/worker_pool.go b/worker_pool.go index 653a4e6..4283705 100644 --- a/worker_pool.go +++ b/worker_pool.go @@ -6,6 +6,7 @@ import ( "sync" "github.com/barakmich/go_raylet/ray_rpc" + "go.uber.org/zap" ) type WorkstreamConnection = ray_rpc.RayletWorkerConnection_WorkstreamServer @@ -39,15 +40,16 @@ func NewRoundRobinWorkerPool(obj ObjectStore) *SimpleRRWorkerPool { func (wp *SimpleRRWorkerPool) Workstream(conn WorkstreamConnection) error { wp.Lock() - defer wp.Unlock() worker := &SimpleWorker{ - workChan: make(chan *ray_rpc.Work, 10), + workChan: make(chan *ray_rpc.Work), clientConn: conn, pool: wp, } - go worker.Main() wp.workers = append(wp.workers, worker) - return nil + wp.Unlock() + err := worker.Main() + wp.Deregister(worker) + return err } func (wp *SimpleRRWorkerPool) Schedule(work *ray_rpc.Work) error { @@ -56,6 +58,7 @@ func (wp *SimpleRRWorkerPool) Schedule(work *ray_rpc.Work) error { if len(wp.workers) == 0 { return errors.New("No workers available, try again later") } + zap.S().Info("Sending work to worker", wp.offset) wp.workers[wp.offset].workChan <- work wp.offset++ if wp.offset == len(wp.workers) { @@ -84,6 +87,7 @@ func (wp *SimpleRRWorkerPool) Close() error { func (wp *SimpleRRWorkerPool) Deregister(ptr interface{}) error { wp.Lock() defer wp.Unlock() + fmt.Println("Deregistering worker") worker := ptr.(*SimpleWorker) found := false for i, w := range wp.workers { @@ -92,6 +96,7 @@ func (wp *SimpleRRWorkerPool) Deregister(ptr interface{}) error { if wp.offset == len(wp.workers) { wp.offset = 0 } + close(worker.workChan) found = true } } @@ -101,34 +106,36 @@ func (wp *SimpleRRWorkerPool) Deregister(ptr interface{}) error { return nil } -func (w *SimpleWorker) Main() { +func (w *SimpleWorker) Main() error { sentinel, err := w.clientConn.Recv() if err != nil { - fmt.Println(err) - w.pool.Deregister(w) - return + return err } if sentinel.Status != ray_rpc.READY { - fmt.Println("Sent wrong sentinel? Closing...") - w.pool.Deregister(w) - return + return errors.New("Sent wrong sentinel? Closing...") } + fmt.Println("New worker:", sentinel.ErrorMsg) + go func() { + for work := range w.workChan { + fmt.Println("sending work") + err = w.clientConn.Send(work) + if err != nil { + fmt.Println("Error sending:", err) + return + } + } + }() for { - work, ok := <-w.workChan - if !ok { - break - } - err = w.clientConn.Send(work) - if err != nil { - fmt.Println("Error sending: %s", err) - break - } result, err := w.clientConn.Recv() + if err != nil { + fmt.Println("Error on channel:", err) + return err + } err = w.pool.Finish(result) if err != nil { - fmt.Println("Error finishing: %s", err) - break + fmt.Println("Error finishing:", err) + return err } } - w.pool.Deregister(w) + return nil }