clientconn.go 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356
  1. /*
  2. *
  3. * Copyright 2014 gRPC authors.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. package grpc
  19. import (
  20. "context"
  21. "errors"
  22. "fmt"
  23. "math"
  24. "net"
  25. "reflect"
  26. "strings"
  27. "sync"
  28. "sync/atomic"
  29. "time"
  30. "google.golang.org/grpc/balancer"
  31. _ "google.golang.org/grpc/balancer/roundrobin" // To register roundrobin.
  32. "google.golang.org/grpc/codes"
  33. "google.golang.org/grpc/connectivity"
  34. "google.golang.org/grpc/credentials"
  35. "google.golang.org/grpc/grpclog"
  36. "google.golang.org/grpc/internal/backoff"
  37. "google.golang.org/grpc/internal/channelz"
  38. "google.golang.org/grpc/internal/envconfig"
  39. "google.golang.org/grpc/internal/grpcsync"
  40. "google.golang.org/grpc/internal/transport"
  41. "google.golang.org/grpc/keepalive"
  42. "google.golang.org/grpc/resolver"
  43. _ "google.golang.org/grpc/resolver/dns" // To register dns resolver.
  44. _ "google.golang.org/grpc/resolver/passthrough" // To register passthrough resolver.
  45. "google.golang.org/grpc/status"
  46. )
  47. const (
  48. // minimum time to give a connection to complete
  49. minConnectTimeout = 20 * time.Second
  50. // must match grpclbName in grpclb/grpclb.go
  51. grpclbName = "grpclb"
  52. )
  53. var (
  54. // ErrClientConnClosing indicates that the operation is illegal because
  55. // the ClientConn is closing.
  56. //
  57. // Deprecated: this error should not be relied upon by users; use the status
  58. // code of Canceled instead.
  59. ErrClientConnClosing = status.Error(codes.Canceled, "grpc: the client connection is closing")
  60. // errConnDrain indicates that the connection starts to be drained and does not accept any new RPCs.
  61. errConnDrain = errors.New("grpc: the connection is drained")
  62. // errConnClosing indicates that the connection is closing.
  63. errConnClosing = errors.New("grpc: the connection is closing")
  64. // errBalancerClosed indicates that the balancer is closed.
  65. errBalancerClosed = errors.New("grpc: balancer is closed")
  66. // invalidDefaultServiceConfigErrPrefix is used to prefix the json parsing error for the default
  67. // service config.
  68. invalidDefaultServiceConfigErrPrefix = "grpc: the provided default service config is invalid"
  69. )
  70. // The following errors are returned from Dial and DialContext
  71. var (
  72. // errNoTransportSecurity indicates that there is no transport security
  73. // being set for ClientConn. Users should either set one or explicitly
  74. // call WithInsecure DialOption to disable security.
  75. errNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithInsecure() explicitly or set credentials)")
  76. // errTransportCredsAndBundle indicates that creds bundle is used together
  77. // with other individual Transport Credentials.
  78. errTransportCredsAndBundle = errors.New("grpc: credentials.Bundle may not be used with individual TransportCredentials")
  79. // errTransportCredentialsMissing indicates that users want to transmit security
  80. // information (e.g., OAuth2 token) which requires secure connection on an insecure
  81. // connection.
  82. errTransportCredentialsMissing = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportCredentials() to set)")
  83. // errCredentialsConflict indicates that grpc.WithTransportCredentials()
  84. // and grpc.WithInsecure() are both called for a connection.
  85. errCredentialsConflict = errors.New("grpc: transport credentials are set for an insecure connection (grpc.WithTransportCredentials() and grpc.WithInsecure() are both called)")
  86. )
  87. const (
  88. defaultClientMaxReceiveMessageSize = 1024 * 1024 * 4
  89. defaultClientMaxSendMessageSize = math.MaxInt32
  90. // http2IOBufSize specifies the buffer size for sending frames.
  91. defaultWriteBufSize = 32 * 1024
  92. defaultReadBufSize = 32 * 1024
  93. )
  94. // Dial creates a client connection to the given target.
  95. func Dial(target string, opts ...DialOption) (*ClientConn, error) {
  96. return DialContext(context.Background(), target, opts...)
  97. }
  98. // DialContext creates a client connection to the given target. By default, it's
  99. // a non-blocking dial (the function won't wait for connections to be
  100. // established, and connecting happens in the background). To make it a blocking
  101. // dial, use WithBlock() dial option.
  102. //
  103. // In the non-blocking case, the ctx does not act against the connection. It
  104. // only controls the setup steps.
  105. //
  106. // In the blocking case, ctx can be used to cancel or expire the pending
  107. // connection. Once this function returns, the cancellation and expiration of
  108. // ctx will be noop. Users should call ClientConn.Close to terminate all the
  109. // pending operations after this function returns.
  110. //
  111. // The target name syntax is defined in
  112. // https://github.com/grpc/grpc/blob/master/doc/naming.md.
  113. // e.g. to use dns resolver, a "dns:///" prefix should be applied to the target.
  114. func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) {
  115. cc := &ClientConn{
  116. target: target,
  117. csMgr: &connectivityStateManager{},
  118. conns: make(map[*addrConn]struct{}),
  119. dopts: defaultDialOptions(),
  120. blockingpicker: newPickerWrapper(),
  121. czData: new(channelzData),
  122. firstResolveEvent: grpcsync.NewEvent(),
  123. }
  124. cc.retryThrottler.Store((*retryThrottler)(nil))
  125. cc.ctx, cc.cancel = context.WithCancel(context.Background())
  126. for _, opt := range opts {
  127. opt.apply(&cc.dopts)
  128. }
  129. defer func() {
  130. if err != nil {
  131. cc.Close()
  132. }
  133. }()
  134. if channelz.IsOn() {
  135. if cc.dopts.channelzParentID != 0 {
  136. cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target)
  137. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  138. Desc: "Channel Created",
  139. Severity: channelz.CtINFO,
  140. Parent: &channelz.TraceEventDesc{
  141. Desc: fmt.Sprintf("Nested Channel(id:%d) created", cc.channelzID),
  142. Severity: channelz.CtINFO,
  143. },
  144. })
  145. } else {
  146. cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, 0, target)
  147. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  148. Desc: "Channel Created",
  149. Severity: channelz.CtINFO,
  150. })
  151. }
  152. cc.csMgr.channelzID = cc.channelzID
  153. }
  154. if !cc.dopts.insecure {
  155. if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil {
  156. return nil, errNoTransportSecurity
  157. }
  158. if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil {
  159. return nil, errTransportCredsAndBundle
  160. }
  161. } else {
  162. if cc.dopts.copts.TransportCredentials != nil || cc.dopts.copts.CredsBundle != nil {
  163. return nil, errCredentialsConflict
  164. }
  165. for _, cd := range cc.dopts.copts.PerRPCCredentials {
  166. if cd.RequireTransportSecurity() {
  167. return nil, errTransportCredentialsMissing
  168. }
  169. }
  170. }
  171. if cc.dopts.defaultServiceConfigRawJSON != nil {
  172. sc, err := parseServiceConfig(*cc.dopts.defaultServiceConfigRawJSON)
  173. if err != nil {
  174. return nil, fmt.Errorf("%s: %v", invalidDefaultServiceConfigErrPrefix, err)
  175. }
  176. cc.dopts.defaultServiceConfig = sc
  177. }
  178. cc.mkp = cc.dopts.copts.KeepaliveParams
  179. if cc.dopts.copts.Dialer == nil {
  180. cc.dopts.copts.Dialer = newProxyDialer(
  181. func(ctx context.Context, addr string) (net.Conn, error) {
  182. network, addr := parseDialTarget(addr)
  183. return (&net.Dialer{}).DialContext(ctx, network, addr)
  184. },
  185. )
  186. }
  187. if cc.dopts.copts.UserAgent != "" {
  188. cc.dopts.copts.UserAgent += " " + grpcUA
  189. } else {
  190. cc.dopts.copts.UserAgent = grpcUA
  191. }
  192. if cc.dopts.timeout > 0 {
  193. var cancel context.CancelFunc
  194. ctx, cancel = context.WithTimeout(ctx, cc.dopts.timeout)
  195. defer cancel()
  196. }
  197. defer func() {
  198. select {
  199. case <-ctx.Done():
  200. conn, err = nil, ctx.Err()
  201. default:
  202. }
  203. }()
  204. scSet := false
  205. if cc.dopts.scChan != nil {
  206. // Try to get an initial service config.
  207. select {
  208. case sc, ok := <-cc.dopts.scChan:
  209. if ok {
  210. cc.sc = &sc
  211. scSet = true
  212. }
  213. default:
  214. }
  215. }
  216. if cc.dopts.bs == nil {
  217. cc.dopts.bs = backoff.Exponential{
  218. MaxDelay: DefaultBackoffConfig.MaxDelay,
  219. }
  220. }
  221. if cc.dopts.resolverBuilder == nil {
  222. // Only try to parse target when resolver builder is not already set.
  223. cc.parsedTarget = parseTarget(cc.target)
  224. grpclog.Infof("parsed scheme: %q", cc.parsedTarget.Scheme)
  225. cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
  226. if cc.dopts.resolverBuilder == nil {
  227. // If resolver builder is still nil, the parsed target's scheme is
  228. // not registered. Fallback to default resolver and set Endpoint to
  229. // the original target.
  230. grpclog.Infof("scheme %q not registered, fallback to default scheme", cc.parsedTarget.Scheme)
  231. cc.parsedTarget = resolver.Target{
  232. Scheme: resolver.GetDefaultScheme(),
  233. Endpoint: target,
  234. }
  235. cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
  236. }
  237. } else {
  238. cc.parsedTarget = resolver.Target{Endpoint: target}
  239. }
  240. creds := cc.dopts.copts.TransportCredentials
  241. if creds != nil && creds.Info().ServerName != "" {
  242. cc.authority = creds.Info().ServerName
  243. } else if cc.dopts.insecure && cc.dopts.authority != "" {
  244. cc.authority = cc.dopts.authority
  245. } else {
  246. // Use endpoint from "scheme://authority/endpoint" as the default
  247. // authority for ClientConn.
  248. cc.authority = cc.parsedTarget.Endpoint
  249. }
  250. if cc.dopts.scChan != nil && !scSet {
  251. // Blocking wait for the initial service config.
  252. select {
  253. case sc, ok := <-cc.dopts.scChan:
  254. if ok {
  255. cc.sc = &sc
  256. }
  257. case <-ctx.Done():
  258. return nil, ctx.Err()
  259. }
  260. }
  261. if cc.dopts.scChan != nil {
  262. go cc.scWatcher()
  263. }
  264. var credsClone credentials.TransportCredentials
  265. if creds := cc.dopts.copts.TransportCredentials; creds != nil {
  266. credsClone = creds.Clone()
  267. }
  268. cc.balancerBuildOpts = balancer.BuildOptions{
  269. DialCreds: credsClone,
  270. CredsBundle: cc.dopts.copts.CredsBundle,
  271. Dialer: cc.dopts.copts.Dialer,
  272. ChannelzParentID: cc.channelzID,
  273. }
  274. // Build the resolver.
  275. rWrapper, err := newCCResolverWrapper(cc)
  276. if err != nil {
  277. return nil, fmt.Errorf("failed to build resolver: %v", err)
  278. }
  279. cc.mu.Lock()
  280. cc.resolverWrapper = rWrapper
  281. cc.mu.Unlock()
  282. // A blocking dial blocks until the clientConn is ready.
  283. if cc.dopts.block {
  284. for {
  285. s := cc.GetState()
  286. if s == connectivity.Ready {
  287. break
  288. } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure {
  289. if err = cc.blockingpicker.connectionError(); err != nil {
  290. terr, ok := err.(interface {
  291. Temporary() bool
  292. })
  293. if ok && !terr.Temporary() {
  294. return nil, err
  295. }
  296. }
  297. }
  298. if !cc.WaitForStateChange(ctx, s) {
  299. // ctx got timeout or canceled.
  300. return nil, ctx.Err()
  301. }
  302. }
  303. }
  304. return cc, nil
  305. }
  306. // connectivityStateManager keeps the connectivity.State of ClientConn.
  307. // This struct will eventually be exported so the balancers can access it.
  308. type connectivityStateManager struct {
  309. mu sync.Mutex
  310. state connectivity.State
  311. notifyChan chan struct{}
  312. channelzID int64
  313. }
  314. // updateState updates the connectivity.State of ClientConn.
  315. // If there's a change it notifies goroutines waiting on state change to
  316. // happen.
  317. func (csm *connectivityStateManager) updateState(state connectivity.State) {
  318. csm.mu.Lock()
  319. defer csm.mu.Unlock()
  320. if csm.state == connectivity.Shutdown {
  321. return
  322. }
  323. if csm.state == state {
  324. return
  325. }
  326. csm.state = state
  327. if channelz.IsOn() {
  328. channelz.AddTraceEvent(csm.channelzID, &channelz.TraceEventDesc{
  329. Desc: fmt.Sprintf("Channel Connectivity change to %v", state),
  330. Severity: channelz.CtINFO,
  331. })
  332. }
  333. if csm.notifyChan != nil {
  334. // There are other goroutines waiting on this channel.
  335. close(csm.notifyChan)
  336. csm.notifyChan = nil
  337. }
  338. }
  339. func (csm *connectivityStateManager) getState() connectivity.State {
  340. csm.mu.Lock()
  341. defer csm.mu.Unlock()
  342. return csm.state
  343. }
  344. func (csm *connectivityStateManager) getNotifyChan() <-chan struct{} {
  345. csm.mu.Lock()
  346. defer csm.mu.Unlock()
  347. if csm.notifyChan == nil {
  348. csm.notifyChan = make(chan struct{})
  349. }
  350. return csm.notifyChan
  351. }
  352. // ClientConn represents a client connection to an RPC server.
  353. type ClientConn struct {
  354. ctx context.Context
  355. cancel context.CancelFunc
  356. target string
  357. parsedTarget resolver.Target
  358. authority string
  359. dopts dialOptions
  360. csMgr *connectivityStateManager
  361. balancerBuildOpts balancer.BuildOptions
  362. blockingpicker *pickerWrapper
  363. mu sync.RWMutex
  364. resolverWrapper *ccResolverWrapper
  365. sc *ServiceConfig
  366. conns map[*addrConn]struct{}
  367. // Keepalive parameter can be updated if a GoAway is received.
  368. mkp keepalive.ClientParameters
  369. curBalancerName string
  370. balancerWrapper *ccBalancerWrapper
  371. retryThrottler atomic.Value
  372. firstResolveEvent *grpcsync.Event
  373. channelzID int64 // channelz unique identification number
  374. czData *channelzData
  375. }
  376. // WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or
  377. // ctx expires. A true value is returned in former case and false in latter.
  378. // This is an EXPERIMENTAL API.
  379. func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState connectivity.State) bool {
  380. ch := cc.csMgr.getNotifyChan()
  381. if cc.csMgr.getState() != sourceState {
  382. return true
  383. }
  384. select {
  385. case <-ctx.Done():
  386. return false
  387. case <-ch:
  388. return true
  389. }
  390. }
  391. // GetState returns the connectivity.State of ClientConn.
  392. // This is an EXPERIMENTAL API.
  393. func (cc *ClientConn) GetState() connectivity.State {
  394. return cc.csMgr.getState()
  395. }
  396. func (cc *ClientConn) scWatcher() {
  397. for {
  398. select {
  399. case sc, ok := <-cc.dopts.scChan:
  400. if !ok {
  401. return
  402. }
  403. cc.mu.Lock()
  404. // TODO: load balance policy runtime change is ignored.
  405. // We may revisit this decision in the future.
  406. cc.sc = &sc
  407. cc.mu.Unlock()
  408. case <-cc.ctx.Done():
  409. return
  410. }
  411. }
  412. }
  413. // waitForResolvedAddrs blocks until the resolver has provided addresses or the
  414. // context expires. Returns nil unless the context expires first; otherwise
  415. // returns a status error based on the context.
  416. func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) error {
  417. // This is on the RPC path, so we use a fast path to avoid the
  418. // more-expensive "select" below after the resolver has returned once.
  419. if cc.firstResolveEvent.HasFired() {
  420. return nil
  421. }
  422. select {
  423. case <-cc.firstResolveEvent.Done():
  424. return nil
  425. case <-ctx.Done():
  426. return status.FromContextError(ctx.Err()).Err()
  427. case <-cc.ctx.Done():
  428. return ErrClientConnClosing
  429. }
  430. }
  431. // gRPC should resort to default service config when:
  432. // * resolver service config is disabled
  433. // * or, resolver does not return a service config or returns an invalid one.
  434. func (cc *ClientConn) fallbackToDefaultServiceConfig(sc string) bool {
  435. if cc.dopts.disableServiceConfig {
  436. return true
  437. }
  438. // The logic below is temporary, will be removed once we change the resolver.State ServiceConfig field type.
  439. // Right now, we assume that empty service config string means resolver does not return a config.
  440. if sc == "" {
  441. return true
  442. }
  443. // TODO: the logic below is temporary. Once we finish the logic to validate service config
  444. // in resolver, we will replace the logic below.
  445. _, err := parseServiceConfig(sc)
  446. return err != nil
  447. }
  448. func (cc *ClientConn) updateResolverState(s resolver.State) error {
  449. cc.mu.Lock()
  450. defer cc.mu.Unlock()
  451. // Check if the ClientConn is already closed. Some fields (e.g.
  452. // balancerWrapper) are set to nil when closing the ClientConn, and could
  453. // cause nil pointer panic if we don't have this check.
  454. if cc.conns == nil {
  455. return nil
  456. }
  457. if cc.fallbackToDefaultServiceConfig(s.ServiceConfig) {
  458. if cc.dopts.defaultServiceConfig != nil && cc.sc == nil {
  459. cc.applyServiceConfig(cc.dopts.defaultServiceConfig)
  460. }
  461. } else {
  462. // TODO: the parsing logic below will be moved inside resolver.
  463. sc, err := parseServiceConfig(s.ServiceConfig)
  464. if err != nil {
  465. return err
  466. }
  467. if cc.sc == nil || cc.sc.rawJSONString != s.ServiceConfig {
  468. cc.applyServiceConfig(sc)
  469. }
  470. }
  471. // update the service config that will be sent to balancer.
  472. if cc.sc != nil {
  473. s.ServiceConfig = cc.sc.rawJSONString
  474. }
  475. if cc.dopts.balancerBuilder == nil {
  476. // Only look at balancer types and switch balancer if balancer dial
  477. // option is not set.
  478. var isGRPCLB bool
  479. for _, a := range s.Addresses {
  480. if a.Type == resolver.GRPCLB {
  481. isGRPCLB = true
  482. break
  483. }
  484. }
  485. var newBalancerName string
  486. // TODO: use new loadBalancerConfig field with appropriate priority.
  487. if isGRPCLB {
  488. newBalancerName = grpclbName
  489. } else if cc.sc != nil && cc.sc.LB != nil {
  490. newBalancerName = *cc.sc.LB
  491. } else {
  492. newBalancerName = PickFirstBalancerName
  493. }
  494. cc.switchBalancer(newBalancerName)
  495. } else if cc.balancerWrapper == nil {
  496. // Balancer dial option was set, and this is the first time handling
  497. // resolved addresses. Build a balancer with dopts.balancerBuilder.
  498. cc.balancerWrapper = newCCBalancerWrapper(cc, cc.dopts.balancerBuilder, cc.balancerBuildOpts)
  499. }
  500. cc.balancerWrapper.updateResolverState(s)
  501. cc.firstResolveEvent.Fire()
  502. return nil
  503. }
  504. // switchBalancer starts the switching from current balancer to the balancer
  505. // with the given name.
  506. //
  507. // It will NOT send the current address list to the new balancer. If needed,
  508. // caller of this function should send address list to the new balancer after
  509. // this function returns.
  510. //
  511. // Caller must hold cc.mu.
  512. func (cc *ClientConn) switchBalancer(name string) {
  513. if strings.ToLower(cc.curBalancerName) == strings.ToLower(name) {
  514. return
  515. }
  516. grpclog.Infof("ClientConn switching balancer to %q", name)
  517. if cc.dopts.balancerBuilder != nil {
  518. grpclog.Infoln("ignoring balancer switching: Balancer DialOption used instead")
  519. return
  520. }
  521. if cc.balancerWrapper != nil {
  522. cc.balancerWrapper.close()
  523. }
  524. builder := balancer.Get(name)
  525. if channelz.IsOn() {
  526. if builder == nil {
  527. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  528. Desc: fmt.Sprintf("Channel switches to new LB policy %q due to fallback from invalid balancer name", PickFirstBalancerName),
  529. Severity: channelz.CtWarning,
  530. })
  531. } else {
  532. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  533. Desc: fmt.Sprintf("Channel switches to new LB policy %q", name),
  534. Severity: channelz.CtINFO,
  535. })
  536. }
  537. }
  538. if builder == nil {
  539. grpclog.Infof("failed to get balancer builder for: %v, using pick_first instead", name)
  540. builder = newPickfirstBuilder()
  541. }
  542. cc.curBalancerName = builder.Name()
  543. cc.balancerWrapper = newCCBalancerWrapper(cc, builder, cc.balancerBuildOpts)
  544. }
  545. func (cc *ClientConn) handleSubConnStateChange(sc balancer.SubConn, s connectivity.State) {
  546. cc.mu.Lock()
  547. if cc.conns == nil {
  548. cc.mu.Unlock()
  549. return
  550. }
  551. // TODO(bar switching) send updates to all balancer wrappers when balancer
  552. // gracefully switching is supported.
  553. cc.balancerWrapper.handleSubConnStateChange(sc, s)
  554. cc.mu.Unlock()
  555. }
  556. // newAddrConn creates an addrConn for addrs and adds it to cc.conns.
  557. //
  558. // Caller needs to make sure len(addrs) > 0.
  559. func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) {
  560. ac := &addrConn{
  561. cc: cc,
  562. addrs: addrs,
  563. scopts: opts,
  564. dopts: cc.dopts,
  565. czData: new(channelzData),
  566. resetBackoff: make(chan struct{}),
  567. }
  568. ac.ctx, ac.cancel = context.WithCancel(cc.ctx)
  569. // Track ac in cc. This needs to be done before any getTransport(...) is called.
  570. cc.mu.Lock()
  571. if cc.conns == nil {
  572. cc.mu.Unlock()
  573. return nil, ErrClientConnClosing
  574. }
  575. if channelz.IsOn() {
  576. ac.channelzID = channelz.RegisterSubChannel(ac, cc.channelzID, "")
  577. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  578. Desc: "Subchannel Created",
  579. Severity: channelz.CtINFO,
  580. Parent: &channelz.TraceEventDesc{
  581. Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelzID),
  582. Severity: channelz.CtINFO,
  583. },
  584. })
  585. }
  586. cc.conns[ac] = struct{}{}
  587. cc.mu.Unlock()
  588. return ac, nil
  589. }
  590. // removeAddrConn removes the addrConn in the subConn from clientConn.
  591. // It also tears down the ac with the given error.
  592. func (cc *ClientConn) removeAddrConn(ac *addrConn, err error) {
  593. cc.mu.Lock()
  594. if cc.conns == nil {
  595. cc.mu.Unlock()
  596. return
  597. }
  598. delete(cc.conns, ac)
  599. cc.mu.Unlock()
  600. ac.tearDown(err)
  601. }
  602. func (cc *ClientConn) channelzMetric() *channelz.ChannelInternalMetric {
  603. return &channelz.ChannelInternalMetric{
  604. State: cc.GetState(),
  605. Target: cc.target,
  606. CallsStarted: atomic.LoadInt64(&cc.czData.callsStarted),
  607. CallsSucceeded: atomic.LoadInt64(&cc.czData.callsSucceeded),
  608. CallsFailed: atomic.LoadInt64(&cc.czData.callsFailed),
  609. LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&cc.czData.lastCallStartedTime)),
  610. }
  611. }
  612. // Target returns the target string of the ClientConn.
  613. // This is an EXPERIMENTAL API.
  614. func (cc *ClientConn) Target() string {
  615. return cc.target
  616. }
  617. func (cc *ClientConn) incrCallsStarted() {
  618. atomic.AddInt64(&cc.czData.callsStarted, 1)
  619. atomic.StoreInt64(&cc.czData.lastCallStartedTime, time.Now().UnixNano())
  620. }
  621. func (cc *ClientConn) incrCallsSucceeded() {
  622. atomic.AddInt64(&cc.czData.callsSucceeded, 1)
  623. }
  624. func (cc *ClientConn) incrCallsFailed() {
  625. atomic.AddInt64(&cc.czData.callsFailed, 1)
  626. }
  627. // connect starts creating a transport.
  628. // It does nothing if the ac is not IDLE.
  629. // TODO(bar) Move this to the addrConn section.
  630. func (ac *addrConn) connect() error {
  631. ac.mu.Lock()
  632. if ac.state == connectivity.Shutdown {
  633. ac.mu.Unlock()
  634. return errConnClosing
  635. }
  636. if ac.state != connectivity.Idle {
  637. ac.mu.Unlock()
  638. return nil
  639. }
  640. ac.updateConnectivityState(connectivity.Connecting)
  641. ac.mu.Unlock()
  642. // Start a goroutine connecting to the server asynchronously.
  643. go ac.resetTransport()
  644. return nil
  645. }
  646. // tryUpdateAddrs tries to update ac.addrs with the new addresses list.
  647. //
  648. // It checks whether current connected address of ac is in the new addrs list.
  649. // - If true, it updates ac.addrs and returns true. The ac will keep using
  650. // the existing connection.
  651. // - If false, it does nothing and returns false.
  652. func (ac *addrConn) tryUpdateAddrs(addrs []resolver.Address) bool {
  653. ac.mu.Lock()
  654. defer ac.mu.Unlock()
  655. grpclog.Infof("addrConn: tryUpdateAddrs curAddr: %v, addrs: %v", ac.curAddr, addrs)
  656. if ac.state == connectivity.Shutdown {
  657. ac.addrs = addrs
  658. return true
  659. }
  660. // Unless we're busy reconnecting already, let's reconnect from the top of
  661. // the list.
  662. if ac.state != connectivity.Ready {
  663. return false
  664. }
  665. var curAddrFound bool
  666. for _, a := range addrs {
  667. if reflect.DeepEqual(ac.curAddr, a) {
  668. curAddrFound = true
  669. break
  670. }
  671. }
  672. grpclog.Infof("addrConn: tryUpdateAddrs curAddrFound: %v", curAddrFound)
  673. if curAddrFound {
  674. ac.addrs = addrs
  675. }
  676. return curAddrFound
  677. }
  678. // GetMethodConfig gets the method config of the input method.
  679. // If there's an exact match for input method (i.e. /service/method), we return
  680. // the corresponding MethodConfig.
  681. // If there isn't an exact match for the input method, we look for the default config
  682. // under the service (i.e /service/). If there is a default MethodConfig for
  683. // the service, we return it.
  684. // Otherwise, we return an empty MethodConfig.
  685. func (cc *ClientConn) GetMethodConfig(method string) MethodConfig {
  686. // TODO: Avoid the locking here.
  687. cc.mu.RLock()
  688. defer cc.mu.RUnlock()
  689. if cc.sc == nil {
  690. return MethodConfig{}
  691. }
  692. m, ok := cc.sc.Methods[method]
  693. if !ok {
  694. i := strings.LastIndex(method, "/")
  695. m = cc.sc.Methods[method[:i+1]]
  696. }
  697. return m
  698. }
  699. func (cc *ClientConn) healthCheckConfig() *healthCheckConfig {
  700. cc.mu.RLock()
  701. defer cc.mu.RUnlock()
  702. if cc.sc == nil {
  703. return nil
  704. }
  705. return cc.sc.healthCheckConfig
  706. }
  707. func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, func(balancer.DoneInfo), error) {
  708. t, done, err := cc.blockingpicker.pick(ctx, failfast, balancer.PickOptions{
  709. FullMethodName: method,
  710. })
  711. if err != nil {
  712. return nil, nil, toRPCErr(err)
  713. }
  714. return t, done, nil
  715. }
  716. func (cc *ClientConn) applyServiceConfig(sc *ServiceConfig) error {
  717. if sc == nil {
  718. // should never reach here.
  719. return fmt.Errorf("got nil pointer for service config")
  720. }
  721. cc.sc = sc
  722. if cc.sc.retryThrottling != nil {
  723. newThrottler := &retryThrottler{
  724. tokens: cc.sc.retryThrottling.MaxTokens,
  725. max: cc.sc.retryThrottling.MaxTokens,
  726. thresh: cc.sc.retryThrottling.MaxTokens / 2,
  727. ratio: cc.sc.retryThrottling.TokenRatio,
  728. }
  729. cc.retryThrottler.Store(newThrottler)
  730. } else {
  731. cc.retryThrottler.Store((*retryThrottler)(nil))
  732. }
  733. return nil
  734. }
  735. func (cc *ClientConn) resolveNow(o resolver.ResolveNowOption) {
  736. cc.mu.RLock()
  737. r := cc.resolverWrapper
  738. cc.mu.RUnlock()
  739. if r == nil {
  740. return
  741. }
  742. go r.resolveNow(o)
  743. }
  744. // ResetConnectBackoff wakes up all subchannels in transient failure and causes
  745. // them to attempt another connection immediately. It also resets the backoff
  746. // times used for subsequent attempts regardless of the current state.
  747. //
  748. // In general, this function should not be used. Typical service or network
  749. // outages result in a reasonable client reconnection strategy by default.
  750. // However, if a previously unavailable network becomes available, this may be
  751. // used to trigger an immediate reconnect.
  752. //
  753. // This API is EXPERIMENTAL.
  754. func (cc *ClientConn) ResetConnectBackoff() {
  755. cc.mu.Lock()
  756. defer cc.mu.Unlock()
  757. for ac := range cc.conns {
  758. ac.resetConnectBackoff()
  759. }
  760. }
  761. // Close tears down the ClientConn and all underlying connections.
  762. func (cc *ClientConn) Close() error {
  763. defer cc.cancel()
  764. cc.mu.Lock()
  765. if cc.conns == nil {
  766. cc.mu.Unlock()
  767. return ErrClientConnClosing
  768. }
  769. conns := cc.conns
  770. cc.conns = nil
  771. cc.csMgr.updateState(connectivity.Shutdown)
  772. rWrapper := cc.resolverWrapper
  773. cc.resolverWrapper = nil
  774. bWrapper := cc.balancerWrapper
  775. cc.balancerWrapper = nil
  776. cc.mu.Unlock()
  777. cc.blockingpicker.close()
  778. if rWrapper != nil {
  779. rWrapper.close()
  780. }
  781. if bWrapper != nil {
  782. bWrapper.close()
  783. }
  784. for ac := range conns {
  785. ac.tearDown(ErrClientConnClosing)
  786. }
  787. if channelz.IsOn() {
  788. ted := &channelz.TraceEventDesc{
  789. Desc: "Channel Deleted",
  790. Severity: channelz.CtINFO,
  791. }
  792. if cc.dopts.channelzParentID != 0 {
  793. ted.Parent = &channelz.TraceEventDesc{
  794. Desc: fmt.Sprintf("Nested channel(id:%d) deleted", cc.channelzID),
  795. Severity: channelz.CtINFO,
  796. }
  797. }
  798. channelz.AddTraceEvent(cc.channelzID, ted)
  799. // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
  800. // the entity being deleted, and thus prevent it from being deleted right away.
  801. channelz.RemoveEntry(cc.channelzID)
  802. }
  803. return nil
  804. }
  805. // addrConn is a network connection to a given address.
  806. type addrConn struct {
  807. ctx context.Context
  808. cancel context.CancelFunc
  809. cc *ClientConn
  810. dopts dialOptions
  811. acbw balancer.SubConn
  812. scopts balancer.NewSubConnOptions
  813. // transport is set when there's a viable transport (note: ac state may not be READY as LB channel
  814. // health checking may require server to report healthy to set ac to READY), and is reset
  815. // to nil when the current transport should no longer be used to create a stream (e.g. after GoAway
  816. // is received, transport is closed, ac has been torn down).
  817. transport transport.ClientTransport // The current transport.
  818. mu sync.Mutex
  819. curAddr resolver.Address // The current address.
  820. addrs []resolver.Address // All addresses that the resolver resolved to.
  821. // Use updateConnectivityState for updating addrConn's connectivity state.
  822. state connectivity.State
  823. backoffIdx int // Needs to be stateful for resetConnectBackoff.
  824. resetBackoff chan struct{}
  825. channelzID int64 // channelz unique identification number.
  826. czData *channelzData
  827. }
  828. // Note: this requires a lock on ac.mu.
  829. func (ac *addrConn) updateConnectivityState(s connectivity.State) {
  830. if ac.state == s {
  831. return
  832. }
  833. updateMsg := fmt.Sprintf("Subchannel Connectivity change to %v", s)
  834. ac.state = s
  835. if channelz.IsOn() {
  836. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  837. Desc: updateMsg,
  838. Severity: channelz.CtINFO,
  839. })
  840. }
  841. ac.cc.handleSubConnStateChange(ac.acbw, s)
  842. }
  843. // adjustParams updates parameters used to create transports upon
  844. // receiving a GoAway.
  845. func (ac *addrConn) adjustParams(r transport.GoAwayReason) {
  846. switch r {
  847. case transport.GoAwayTooManyPings:
  848. v := 2 * ac.dopts.copts.KeepaliveParams.Time
  849. ac.cc.mu.Lock()
  850. if v > ac.cc.mkp.Time {
  851. ac.cc.mkp.Time = v
  852. }
  853. ac.cc.mu.Unlock()
  854. }
  855. }
  856. func (ac *addrConn) resetTransport() {
  857. for i := 0; ; i++ {
  858. if i > 0 {
  859. ac.cc.resolveNow(resolver.ResolveNowOption{})
  860. }
  861. ac.mu.Lock()
  862. if ac.state == connectivity.Shutdown {
  863. ac.mu.Unlock()
  864. return
  865. }
  866. addrs := ac.addrs
  867. backoffFor := ac.dopts.bs.Backoff(ac.backoffIdx)
  868. // This will be the duration that dial gets to finish.
  869. dialDuration := minConnectTimeout
  870. if ac.dopts.minConnectTimeout != nil {
  871. dialDuration = ac.dopts.minConnectTimeout()
  872. }
  873. if dialDuration < backoffFor {
  874. // Give dial more time as we keep failing to connect.
  875. dialDuration = backoffFor
  876. }
  877. // We can potentially spend all the time trying the first address, and
  878. // if the server accepts the connection and then hangs, the following
  879. // addresses will never be tried.
  880. //
  881. // The spec doesn't mention what should be done for multiple addresses.
  882. // https://github.com/grpc/grpc/blob/master/doc/connection-backoff.md#proposed-backoff-algorithm
  883. connectDeadline := time.Now().Add(dialDuration)
  884. ac.mu.Unlock()
  885. newTr, addr, reconnect, err := ac.tryAllAddrs(addrs, connectDeadline)
  886. if err != nil {
  887. // After exhausting all addresses, the addrConn enters
  888. // TRANSIENT_FAILURE.
  889. ac.mu.Lock()
  890. if ac.state == connectivity.Shutdown {
  891. ac.mu.Unlock()
  892. return
  893. }
  894. ac.updateConnectivityState(connectivity.TransientFailure)
  895. // Backoff.
  896. b := ac.resetBackoff
  897. ac.mu.Unlock()
  898. timer := time.NewTimer(backoffFor)
  899. select {
  900. case <-timer.C:
  901. ac.mu.Lock()
  902. ac.backoffIdx++
  903. ac.mu.Unlock()
  904. case <-b:
  905. timer.Stop()
  906. case <-ac.ctx.Done():
  907. timer.Stop()
  908. return
  909. }
  910. continue
  911. }
  912. ac.mu.Lock()
  913. if ac.state == connectivity.Shutdown {
  914. newTr.Close()
  915. ac.mu.Unlock()
  916. return
  917. }
  918. ac.curAddr = addr
  919. ac.transport = newTr
  920. ac.backoffIdx = 0
  921. healthCheckConfig := ac.cc.healthCheckConfig()
  922. // LB channel health checking is only enabled when all the four requirements below are met:
  923. // 1. it is not disabled by the user with the WithDisableHealthCheck DialOption,
  924. // 2. the internal.HealthCheckFunc is set by importing the grpc/healthcheck package,
  925. // 3. a service config with non-empty healthCheckConfig field is provided,
  926. // 4. the current load balancer allows it.
  927. hctx, hcancel := context.WithCancel(ac.ctx)
  928. healthcheckManagingState := false
  929. if !ac.cc.dopts.disableHealthCheck && healthCheckConfig != nil && ac.scopts.HealthCheckEnabled {
  930. if ac.cc.dopts.healthCheckFunc == nil {
  931. // TODO: add a link to the health check doc in the error message.
  932. grpclog.Error("the client side LB channel health check function has not been set.")
  933. } else {
  934. // TODO(deklerk) refactor to just return transport
  935. go ac.startHealthCheck(hctx, newTr, addr, healthCheckConfig.ServiceName)
  936. healthcheckManagingState = true
  937. }
  938. }
  939. if !healthcheckManagingState {
  940. ac.updateConnectivityState(connectivity.Ready)
  941. }
  942. ac.mu.Unlock()
  943. // Block until the created transport is down. And when this happens,
  944. // we restart from the top of the addr list.
  945. <-reconnect.Done()
  946. hcancel()
  947. // Need to reconnect after a READY, the addrConn enters
  948. // TRANSIENT_FAILURE.
  949. //
  950. // This will set addrConn to TRANSIENT_FAILURE for a very short period
  951. // of time, and turns CONNECTING. It seems reasonable to skip this, but
  952. // READY-CONNECTING is not a valid transition.
  953. ac.mu.Lock()
  954. if ac.state == connectivity.Shutdown {
  955. ac.mu.Unlock()
  956. return
  957. }
  958. ac.updateConnectivityState(connectivity.TransientFailure)
  959. ac.mu.Unlock()
  960. }
  961. }
  962. // tryAllAddrs tries to creates a connection to the addresses, and stop when at the
  963. // first successful one. It returns the transport, the address and a Event in
  964. // the successful case. The Event fires when the returned transport disconnects.
  965. func (ac *addrConn) tryAllAddrs(addrs []resolver.Address, connectDeadline time.Time) (transport.ClientTransport, resolver.Address, *grpcsync.Event, error) {
  966. for _, addr := range addrs {
  967. ac.mu.Lock()
  968. if ac.state == connectivity.Shutdown {
  969. ac.mu.Unlock()
  970. return nil, resolver.Address{}, nil, errConnClosing
  971. }
  972. ac.updateConnectivityState(connectivity.Connecting)
  973. ac.transport = nil
  974. ac.cc.mu.RLock()
  975. ac.dopts.copts.KeepaliveParams = ac.cc.mkp
  976. ac.cc.mu.RUnlock()
  977. copts := ac.dopts.copts
  978. if ac.scopts.CredsBundle != nil {
  979. copts.CredsBundle = ac.scopts.CredsBundle
  980. }
  981. ac.mu.Unlock()
  982. if channelz.IsOn() {
  983. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  984. Desc: fmt.Sprintf("Subchannel picks a new address %q to connect", addr.Addr),
  985. Severity: channelz.CtINFO,
  986. })
  987. }
  988. newTr, reconnect, err := ac.createTransport(addr, copts, connectDeadline)
  989. if err == nil {
  990. return newTr, addr, reconnect, nil
  991. }
  992. ac.cc.blockingpicker.updateConnectionError(err)
  993. }
  994. // Couldn't connect to any address.
  995. return nil, resolver.Address{}, nil, fmt.Errorf("couldn't connect to any address")
  996. }
  997. // createTransport creates a connection to addr. It returns the transport and a
  998. // Event in the successful case. The Event fires when the returned transport
  999. // disconnects.
  1000. func (ac *addrConn) createTransport(addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) (transport.ClientTransport, *grpcsync.Event, error) {
  1001. prefaceReceived := make(chan struct{})
  1002. onCloseCalled := make(chan struct{})
  1003. reconnect := grpcsync.NewEvent()
  1004. target := transport.TargetInfo{
  1005. Addr: addr.Addr,
  1006. Metadata: addr.Metadata,
  1007. Authority: ac.cc.authority,
  1008. }
  1009. onGoAway := func(r transport.GoAwayReason) {
  1010. ac.mu.Lock()
  1011. ac.adjustParams(r)
  1012. ac.mu.Unlock()
  1013. reconnect.Fire()
  1014. }
  1015. onClose := func() {
  1016. close(onCloseCalled)
  1017. reconnect.Fire()
  1018. }
  1019. onPrefaceReceipt := func() {
  1020. close(prefaceReceived)
  1021. }
  1022. connectCtx, cancel := context.WithDeadline(ac.ctx, connectDeadline)
  1023. defer cancel()
  1024. if channelz.IsOn() {
  1025. copts.ChannelzParentID = ac.channelzID
  1026. }
  1027. newTr, err := transport.NewClientTransport(connectCtx, ac.cc.ctx, target, copts, onPrefaceReceipt, onGoAway, onClose)
  1028. if err != nil {
  1029. // newTr is either nil, or closed.
  1030. grpclog.Warningf("grpc: addrConn.createTransport failed to connect to %v. Err :%v. Reconnecting...", addr, err)
  1031. return nil, nil, err
  1032. }
  1033. if ac.dopts.reqHandshake == envconfig.RequireHandshakeOn {
  1034. select {
  1035. case <-time.After(connectDeadline.Sub(time.Now())):
  1036. // We didn't get the preface in time.
  1037. newTr.Close()
  1038. grpclog.Warningf("grpc: addrConn.createTransport failed to connect to %v: didn't receive server preface in time. Reconnecting...", addr)
  1039. return nil, nil, errors.New("timed out waiting for server handshake")
  1040. case <-prefaceReceived:
  1041. // We got the preface - huzzah! things are good.
  1042. case <-onCloseCalled:
  1043. // The transport has already closed - noop.
  1044. return nil, nil, errors.New("connection closed")
  1045. // TODO(deklerk) this should bail on ac.ctx.Done(). Add a test and fix.
  1046. }
  1047. }
  1048. return newTr, reconnect, nil
  1049. }
  1050. func (ac *addrConn) startHealthCheck(ctx context.Context, newTr transport.ClientTransport, addr resolver.Address, serviceName string) {
  1051. // Set up the health check helper functions
  1052. newStream := func() (interface{}, error) {
  1053. return ac.newClientStream(ctx, &StreamDesc{ServerStreams: true}, "/grpc.health.v1.Health/Watch", newTr)
  1054. }
  1055. firstReady := true
  1056. reportHealth := func(ok bool) {
  1057. ac.mu.Lock()
  1058. defer ac.mu.Unlock()
  1059. if ac.transport != newTr {
  1060. return
  1061. }
  1062. if ok {
  1063. if firstReady {
  1064. firstReady = false
  1065. ac.curAddr = addr
  1066. }
  1067. ac.updateConnectivityState(connectivity.Ready)
  1068. } else {
  1069. ac.updateConnectivityState(connectivity.TransientFailure)
  1070. }
  1071. }
  1072. err := ac.cc.dopts.healthCheckFunc(ctx, newStream, reportHealth, serviceName)
  1073. if err != nil {
  1074. if status.Code(err) == codes.Unimplemented {
  1075. if channelz.IsOn() {
  1076. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  1077. Desc: "Subchannel health check is unimplemented at server side, thus health check is disabled",
  1078. Severity: channelz.CtError,
  1079. })
  1080. }
  1081. grpclog.Error("Subchannel health check is unimplemented at server side, thus health check is disabled")
  1082. } else {
  1083. grpclog.Errorf("HealthCheckFunc exits with unexpected error %v", err)
  1084. }
  1085. }
  1086. }
  1087. func (ac *addrConn) resetConnectBackoff() {
  1088. ac.mu.Lock()
  1089. close(ac.resetBackoff)
  1090. ac.backoffIdx = 0
  1091. ac.resetBackoff = make(chan struct{})
  1092. ac.mu.Unlock()
  1093. }
  1094. // getReadyTransport returns the transport if ac's state is READY.
  1095. // Otherwise it returns nil, false.
  1096. // If ac's state is IDLE, it will trigger ac to connect.
  1097. func (ac *addrConn) getReadyTransport() (transport.ClientTransport, bool) {
  1098. ac.mu.Lock()
  1099. if ac.state == connectivity.Ready && ac.transport != nil {
  1100. t := ac.transport
  1101. ac.mu.Unlock()
  1102. return t, true
  1103. }
  1104. var idle bool
  1105. if ac.state == connectivity.Idle {
  1106. idle = true
  1107. }
  1108. ac.mu.Unlock()
  1109. // Trigger idle ac to connect.
  1110. if idle {
  1111. ac.connect()
  1112. }
  1113. return nil, false
  1114. }
  1115. // tearDown starts to tear down the addrConn.
  1116. // TODO(zhaoq): Make this synchronous to avoid unbounded memory consumption in
  1117. // some edge cases (e.g., the caller opens and closes many addrConn's in a
  1118. // tight loop.
  1119. // tearDown doesn't remove ac from ac.cc.conns.
  1120. func (ac *addrConn) tearDown(err error) {
  1121. ac.mu.Lock()
  1122. if ac.state == connectivity.Shutdown {
  1123. ac.mu.Unlock()
  1124. return
  1125. }
  1126. curTr := ac.transport
  1127. ac.transport = nil
  1128. // We have to set the state to Shutdown before anything else to prevent races
  1129. // between setting the state and logic that waits on context cancelation / etc.
  1130. ac.updateConnectivityState(connectivity.Shutdown)
  1131. ac.cancel()
  1132. ac.curAddr = resolver.Address{}
  1133. if err == errConnDrain && curTr != nil {
  1134. // GracefulClose(...) may be executed multiple times when
  1135. // i) receiving multiple GoAway frames from the server; or
  1136. // ii) there are concurrent name resolver/Balancer triggered
  1137. // address removal and GoAway.
  1138. // We have to unlock and re-lock here because GracefulClose => Close => onClose, which requires locking ac.mu.
  1139. ac.mu.Unlock()
  1140. curTr.GracefulClose()
  1141. ac.mu.Lock()
  1142. }
  1143. if channelz.IsOn() {
  1144. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  1145. Desc: "Subchannel Deleted",
  1146. Severity: channelz.CtINFO,
  1147. Parent: &channelz.TraceEventDesc{
  1148. Desc: fmt.Sprintf("Subchanel(id:%d) deleted", ac.channelzID),
  1149. Severity: channelz.CtINFO,
  1150. },
  1151. })
  1152. // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
  1153. // the entity beng deleted, and thus prevent it from being deleted right away.
  1154. channelz.RemoveEntry(ac.channelzID)
  1155. }
  1156. ac.mu.Unlock()
  1157. }
  1158. func (ac *addrConn) getState() connectivity.State {
  1159. ac.mu.Lock()
  1160. defer ac.mu.Unlock()
  1161. return ac.state
  1162. }
  1163. func (ac *addrConn) ChannelzMetric() *channelz.ChannelInternalMetric {
  1164. ac.mu.Lock()
  1165. addr := ac.curAddr.Addr
  1166. ac.mu.Unlock()
  1167. return &channelz.ChannelInternalMetric{
  1168. State: ac.getState(),
  1169. Target: addr,
  1170. CallsStarted: atomic.LoadInt64(&ac.czData.callsStarted),
  1171. CallsSucceeded: atomic.LoadInt64(&ac.czData.callsSucceeded),
  1172. CallsFailed: atomic.LoadInt64(&ac.czData.callsFailed),
  1173. LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&ac.czData.lastCallStartedTime)),
  1174. }
  1175. }
  1176. func (ac *addrConn) incrCallsStarted() {
  1177. atomic.AddInt64(&ac.czData.callsStarted, 1)
  1178. atomic.StoreInt64(&ac.czData.lastCallStartedTime, time.Now().UnixNano())
  1179. }
  1180. func (ac *addrConn) incrCallsSucceeded() {
  1181. atomic.AddInt64(&ac.czData.callsSucceeded, 1)
  1182. }
  1183. func (ac *addrConn) incrCallsFailed() {
  1184. atomic.AddInt64(&ac.czData.callsFailed, 1)
  1185. }
  1186. type retryThrottler struct {
  1187. max float64
  1188. thresh float64
  1189. ratio float64
  1190. mu sync.Mutex
  1191. tokens float64 // TODO(dfawley): replace with atomic and remove lock.
  1192. }
  1193. // throttle subtracts a retry token from the pool and returns whether a retry
  1194. // should be throttled (disallowed) based upon the retry throttling policy in
  1195. // the service config.
  1196. func (rt *retryThrottler) throttle() bool {
  1197. if rt == nil {
  1198. return false
  1199. }
  1200. rt.mu.Lock()
  1201. defer rt.mu.Unlock()
  1202. rt.tokens--
  1203. if rt.tokens < 0 {
  1204. rt.tokens = 0
  1205. }
  1206. return rt.tokens <= rt.thresh
  1207. }
  1208. func (rt *retryThrottler) successfulRPC() {
  1209. if rt == nil {
  1210. return
  1211. }
  1212. rt.mu.Lock()
  1213. defer rt.mu.Unlock()
  1214. rt.tokens += rt.ratio
  1215. if rt.tokens > rt.max {
  1216. rt.tokens = rt.max
  1217. }
  1218. }
  1219. type channelzChannel struct {
  1220. cc *ClientConn
  1221. }
  1222. func (c *channelzChannel) ChannelzMetric() *channelz.ChannelInternalMetric {
  1223. return c.cc.channelzMetric()
  1224. }
  1225. // ErrClientConnTimeout indicates that the ClientConn cannot establish the
  1226. // underlying connections within the specified timeout.
  1227. //
  1228. // Deprecated: This error is never returned by grpc and should not be
  1229. // referenced by users.
  1230. var ErrClientConnTimeout = errors.New("grpc: timed out when dialing")