grpclb_remote_balancer.go 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. /*
  2. *
  3. * Copyright 2017 gRPC authors.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. package grpclb
  19. import (
  20. "context"
  21. "fmt"
  22. "io"
  23. "net"
  24. "reflect"
  25. "time"
  26. timestamppb "github.com/golang/protobuf/ptypes/timestamp"
  27. "google.golang.org/grpc"
  28. "google.golang.org/grpc/balancer"
  29. lbpb "google.golang.org/grpc/balancer/grpclb/grpc_lb_v1"
  30. "google.golang.org/grpc/connectivity"
  31. "google.golang.org/grpc/grpclog"
  32. "google.golang.org/grpc/internal"
  33. "google.golang.org/grpc/internal/channelz"
  34. "google.golang.org/grpc/metadata"
  35. "google.golang.org/grpc/resolver"
  36. )
  37. // processServerList updates balaner's internal state, create/remove SubConns
  38. // and regenerates picker using the received serverList.
  39. func (lb *lbBalancer) processServerList(l *lbpb.ServerList) {
  40. if grpclog.V(2) {
  41. grpclog.Infof("lbBalancer: processing server list: %+v", l)
  42. }
  43. lb.mu.Lock()
  44. defer lb.mu.Unlock()
  45. // Set serverListReceived to true so fallback will not take effect if it has
  46. // not hit timeout.
  47. lb.serverListReceived = true
  48. // If the new server list == old server list, do nothing.
  49. if reflect.DeepEqual(lb.fullServerList, l.Servers) {
  50. if grpclog.V(2) {
  51. grpclog.Infof("lbBalancer: new serverlist same as the previous one, ignoring")
  52. }
  53. return
  54. }
  55. lb.fullServerList = l.Servers
  56. var backendAddrs []resolver.Address
  57. for i, s := range l.Servers {
  58. if s.Drop {
  59. continue
  60. }
  61. md := metadata.Pairs(lbTokeyKey, s.LoadBalanceToken)
  62. ip := net.IP(s.IpAddress)
  63. ipStr := ip.String()
  64. if ip.To4() == nil {
  65. // Add square brackets to ipv6 addresses, otherwise net.Dial() and
  66. // net.SplitHostPort() will return too many colons error.
  67. ipStr = fmt.Sprintf("[%s]", ipStr)
  68. }
  69. addr := resolver.Address{
  70. Addr: fmt.Sprintf("%s:%d", ipStr, s.Port),
  71. Metadata: &md,
  72. }
  73. if grpclog.V(2) {
  74. grpclog.Infof("lbBalancer: server list entry[%d]: ipStr:|%s|, port:|%d|, load balancer token:|%v|",
  75. i, ipStr, s.Port, s.LoadBalanceToken)
  76. }
  77. backendAddrs = append(backendAddrs, addr)
  78. }
  79. // Call refreshSubConns to create/remove SubConns. If we are in fallback,
  80. // this is also exiting fallback.
  81. lb.refreshSubConns(backendAddrs, true)
  82. }
  83. // refreshSubConns creates/removes SubConns with backendAddrs, and refreshes
  84. // balancer state and picker.
  85. //
  86. // Caller must hold lb.mu.
  87. func (lb *lbBalancer) refreshSubConns(backendAddrs []resolver.Address, fromGRPCLBServer bool) {
  88. defer func() {
  89. // Regenerate and update picker after refreshing subconns because with
  90. // cache, even if SubConn was newed/removed, there might be no state
  91. // changes (the subconn will be kept in cache, not actually
  92. // newed/removed).
  93. lb.updateStateAndPicker(true, true)
  94. }()
  95. lb.inFallback = !fromGRPCLBServer
  96. opts := balancer.NewSubConnOptions{}
  97. if fromGRPCLBServer {
  98. opts.CredsBundle = lb.grpclbBackendCreds
  99. }
  100. lb.backendAddrs = nil
  101. if lb.usePickFirst {
  102. var sc balancer.SubConn
  103. for _, sc = range lb.subConns {
  104. break
  105. }
  106. if sc != nil {
  107. sc.UpdateAddresses(backendAddrs)
  108. sc.Connect()
  109. return
  110. }
  111. // This bypasses the cc wrapper with SubConn cache.
  112. sc, err := lb.cc.cc.NewSubConn(backendAddrs, opts)
  113. if err != nil {
  114. grpclog.Warningf("grpclb: failed to create new SubConn: %v", err)
  115. return
  116. }
  117. sc.Connect()
  118. lb.subConns[backendAddrs[0]] = sc
  119. lb.scStates[sc] = connectivity.Idle
  120. return
  121. }
  122. // addrsSet is the set converted from backendAddrs, it's used to quick
  123. // lookup for an address.
  124. addrsSet := make(map[resolver.Address]struct{})
  125. // Create new SubConns.
  126. for _, addr := range backendAddrs {
  127. addrWithoutMD := addr
  128. addrWithoutMD.Metadata = nil
  129. addrsSet[addrWithoutMD] = struct{}{}
  130. lb.backendAddrs = append(lb.backendAddrs, addrWithoutMD)
  131. if _, ok := lb.subConns[addrWithoutMD]; !ok {
  132. // Use addrWithMD to create the SubConn.
  133. sc, err := lb.cc.NewSubConn([]resolver.Address{addr}, opts)
  134. if err != nil {
  135. grpclog.Warningf("grpclb: failed to create new SubConn: %v", err)
  136. continue
  137. }
  138. lb.subConns[addrWithoutMD] = sc // Use the addr without MD as key for the map.
  139. if _, ok := lb.scStates[sc]; !ok {
  140. // Only set state of new sc to IDLE. The state could already be
  141. // READY for cached SubConns.
  142. lb.scStates[sc] = connectivity.Idle
  143. }
  144. sc.Connect()
  145. }
  146. }
  147. for a, sc := range lb.subConns {
  148. // a was removed by resolver.
  149. if _, ok := addrsSet[a]; !ok {
  150. lb.cc.RemoveSubConn(sc)
  151. delete(lb.subConns, a)
  152. // Keep the state of this sc in b.scStates until sc's state becomes Shutdown.
  153. // The entry will be deleted in HandleSubConnStateChange.
  154. }
  155. }
  156. }
  157. func (lb *lbBalancer) readServerList(s *balanceLoadClientStream) error {
  158. for {
  159. reply, err := s.Recv()
  160. if err != nil {
  161. if err == io.EOF {
  162. return errServerTerminatedConnection
  163. }
  164. return fmt.Errorf("grpclb: failed to recv server list: %v", err)
  165. }
  166. if serverList := reply.GetServerList(); serverList != nil {
  167. lb.processServerList(serverList)
  168. }
  169. }
  170. }
  171. func (lb *lbBalancer) sendLoadReport(s *balanceLoadClientStream, interval time.Duration) {
  172. ticker := time.NewTicker(interval)
  173. defer ticker.Stop()
  174. for {
  175. select {
  176. case <-ticker.C:
  177. case <-s.Context().Done():
  178. return
  179. }
  180. stats := lb.clientStats.toClientStats()
  181. t := time.Now()
  182. stats.Timestamp = &timestamppb.Timestamp{
  183. Seconds: t.Unix(),
  184. Nanos: int32(t.Nanosecond()),
  185. }
  186. if err := s.Send(&lbpb.LoadBalanceRequest{
  187. LoadBalanceRequestType: &lbpb.LoadBalanceRequest_ClientStats{
  188. ClientStats: stats,
  189. },
  190. }); err != nil {
  191. return
  192. }
  193. }
  194. }
  195. func (lb *lbBalancer) callRemoteBalancer() (backoff bool, _ error) {
  196. lbClient := &loadBalancerClient{cc: lb.ccRemoteLB}
  197. ctx, cancel := context.WithCancel(context.Background())
  198. defer cancel()
  199. stream, err := lbClient.BalanceLoad(ctx, grpc.WaitForReady(true))
  200. if err != nil {
  201. return true, fmt.Errorf("grpclb: failed to perform RPC to the remote balancer %v", err)
  202. }
  203. lb.mu.Lock()
  204. lb.remoteBalancerConnected = true
  205. lb.mu.Unlock()
  206. // grpclb handshake on the stream.
  207. initReq := &lbpb.LoadBalanceRequest{
  208. LoadBalanceRequestType: &lbpb.LoadBalanceRequest_InitialRequest{
  209. InitialRequest: &lbpb.InitialLoadBalanceRequest{
  210. Name: lb.target,
  211. },
  212. },
  213. }
  214. if err := stream.Send(initReq); err != nil {
  215. return true, fmt.Errorf("grpclb: failed to send init request: %v", err)
  216. }
  217. reply, err := stream.Recv()
  218. if err != nil {
  219. return true, fmt.Errorf("grpclb: failed to recv init response: %v", err)
  220. }
  221. initResp := reply.GetInitialResponse()
  222. if initResp == nil {
  223. return true, fmt.Errorf("grpclb: reply from remote balancer did not include initial response")
  224. }
  225. if initResp.LoadBalancerDelegate != "" {
  226. return true, fmt.Errorf("grpclb: Delegation is not supported")
  227. }
  228. go func() {
  229. if d := convertDuration(initResp.ClientStatsReportInterval); d > 0 {
  230. lb.sendLoadReport(stream, d)
  231. }
  232. }()
  233. // No backoff if init req/resp handshake was successful.
  234. return false, lb.readServerList(stream)
  235. }
  236. func (lb *lbBalancer) watchRemoteBalancer() {
  237. var retryCount int
  238. for {
  239. doBackoff, err := lb.callRemoteBalancer()
  240. select {
  241. case <-lb.doneCh:
  242. return
  243. default:
  244. if err != nil {
  245. if err == errServerTerminatedConnection {
  246. grpclog.Info(err)
  247. } else {
  248. grpclog.Warning(err)
  249. }
  250. }
  251. }
  252. // Trigger a re-resolve when the stream errors.
  253. lb.cc.cc.ResolveNow(resolver.ResolveNowOption{})
  254. lb.mu.Lock()
  255. lb.remoteBalancerConnected = false
  256. lb.fullServerList = nil
  257. // Enter fallback when connection to remote balancer is lost, and the
  258. // aggregated state is not Ready.
  259. if !lb.inFallback && lb.state != connectivity.Ready {
  260. // Entering fallback.
  261. lb.refreshSubConns(lb.resolvedBackendAddrs, false)
  262. }
  263. lb.mu.Unlock()
  264. if !doBackoff {
  265. retryCount = 0
  266. continue
  267. }
  268. timer := time.NewTimer(lb.backoff.Backoff(retryCount))
  269. select {
  270. case <-timer.C:
  271. case <-lb.doneCh:
  272. timer.Stop()
  273. return
  274. }
  275. retryCount++
  276. }
  277. }
  278. func (lb *lbBalancer) dialRemoteLB(remoteLBName string) {
  279. var dopts []grpc.DialOption
  280. if creds := lb.opt.DialCreds; creds != nil {
  281. if err := creds.OverrideServerName(remoteLBName); err == nil {
  282. dopts = append(dopts, grpc.WithTransportCredentials(creds))
  283. } else {
  284. grpclog.Warningf("grpclb: failed to override the server name in the credentials: %v, using Insecure", err)
  285. dopts = append(dopts, grpc.WithInsecure())
  286. }
  287. } else if bundle := lb.grpclbClientConnCreds; bundle != nil {
  288. dopts = append(dopts, grpc.WithCredentialsBundle(bundle))
  289. } else {
  290. dopts = append(dopts, grpc.WithInsecure())
  291. }
  292. if lb.opt.Dialer != nil {
  293. dopts = append(dopts, grpc.WithContextDialer(lb.opt.Dialer))
  294. }
  295. // Explicitly set pickfirst as the balancer.
  296. dopts = append(dopts, grpc.WithBalancerName(grpc.PickFirstBalancerName))
  297. wrb := internal.WithResolverBuilder.(func(resolver.Builder) grpc.DialOption)
  298. dopts = append(dopts, wrb(lb.manualResolver))
  299. if channelz.IsOn() {
  300. dopts = append(dopts, grpc.WithChannelzParentID(lb.opt.ChannelzParentID))
  301. }
  302. // DialContext using manualResolver.Scheme, which is a random scheme
  303. // generated when init grpclb. The target scheme here is not important.
  304. //
  305. // The grpc dial target will be used by the creds (ALTS) as the authority,
  306. // so it has to be set to remoteLBName that comes from resolver.
  307. cc, err := grpc.DialContext(context.Background(), remoteLBName, dopts...)
  308. if err != nil {
  309. grpclog.Fatalf("failed to dial: %v", err)
  310. }
  311. lb.ccRemoteLB = cc
  312. go lb.watchRemoteBalancer()
  313. }