retry_test.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. /*
  2. *
  3. * Copyright 2018 gRPC authors.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. package test
  19. import (
  20. "context"
  21. "fmt"
  22. "io"
  23. "os"
  24. "strconv"
  25. "strings"
  26. "testing"
  27. "time"
  28. "github.com/golang/protobuf/proto"
  29. "google.golang.org/grpc"
  30. "google.golang.org/grpc/codes"
  31. "google.golang.org/grpc/internal/envconfig"
  32. "google.golang.org/grpc/metadata"
  33. "google.golang.org/grpc/status"
  34. testpb "google.golang.org/grpc/test/grpc_testing"
  35. )
  36. func enableRetry() func() {
  37. old := envconfig.Retry
  38. envconfig.Retry = true
  39. return func() { envconfig.Retry = old }
  40. }
  41. func (s) TestRetryUnary(t *testing.T) {
  42. defer enableRetry()()
  43. i := -1
  44. ss := &stubServer{
  45. emptyCall: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
  46. i++
  47. switch i {
  48. case 0, 2, 5:
  49. return &testpb.Empty{}, nil
  50. case 6, 8, 11:
  51. return nil, status.New(codes.Internal, "non-retryable error").Err()
  52. }
  53. return nil, status.New(codes.AlreadyExists, "retryable error").Err()
  54. },
  55. }
  56. if err := ss.Start([]grpc.ServerOption{}); err != nil {
  57. t.Fatalf("Error starting endpoint server: %v", err)
  58. }
  59. defer ss.Stop()
  60. ss.newServiceConfig(`{
  61. "methodConfig": [{
  62. "name": [{"service": "grpc.testing.TestService"}],
  63. "waitForReady": true,
  64. "retryPolicy": {
  65. "MaxAttempts": 4,
  66. "InitialBackoff": ".01s",
  67. "MaxBackoff": ".01s",
  68. "BackoffMultiplier": 1.0,
  69. "RetryableStatusCodes": [ "ALREADY_EXISTS" ]
  70. }
  71. }]}`)
  72. ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
  73. for {
  74. if ctx.Err() != nil {
  75. t.Fatalf("Timed out waiting for service config update")
  76. }
  77. if ss.cc.GetMethodConfig("/grpc.testing.TestService/EmptyCall").WaitForReady != nil {
  78. break
  79. }
  80. time.Sleep(time.Millisecond)
  81. }
  82. cancel()
  83. testCases := []struct {
  84. code codes.Code
  85. count int
  86. }{
  87. {codes.OK, 0},
  88. {codes.OK, 2},
  89. {codes.OK, 5},
  90. {codes.Internal, 6},
  91. {codes.Internal, 8},
  92. {codes.Internal, 11},
  93. {codes.AlreadyExists, 15},
  94. }
  95. for _, tc := range testCases {
  96. ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
  97. _, err := ss.client.EmptyCall(ctx, &testpb.Empty{})
  98. cancel()
  99. if status.Code(err) != tc.code {
  100. t.Fatalf("EmptyCall(_, _) = _, %v; want _, <Code() = %v>", err, tc.code)
  101. }
  102. if i != tc.count {
  103. t.Fatalf("i = %v; want %v", i, tc.count)
  104. }
  105. }
  106. }
  107. func (s) TestRetryDisabledByDefault(t *testing.T) {
  108. if strings.EqualFold(os.Getenv("GRPC_GO_RETRY"), "on") {
  109. return
  110. }
  111. i := -1
  112. ss := &stubServer{
  113. emptyCall: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
  114. i++
  115. switch i {
  116. case 0:
  117. return nil, status.New(codes.AlreadyExists, "retryable error").Err()
  118. }
  119. return &testpb.Empty{}, nil
  120. },
  121. }
  122. if err := ss.Start([]grpc.ServerOption{}); err != nil {
  123. t.Fatalf("Error starting endpoint server: %v", err)
  124. }
  125. defer ss.Stop()
  126. ss.newServiceConfig(`{
  127. "methodConfig": [{
  128. "name": [{"service": "grpc.testing.TestService"}],
  129. "waitForReady": true,
  130. "retryPolicy": {
  131. "MaxAttempts": 4,
  132. "InitialBackoff": ".01s",
  133. "MaxBackoff": ".01s",
  134. "BackoffMultiplier": 1.0,
  135. "RetryableStatusCodes": [ "ALREADY_EXISTS" ]
  136. }
  137. }]}`)
  138. ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
  139. for {
  140. if ctx.Err() != nil {
  141. t.Fatalf("Timed out waiting for service config update")
  142. }
  143. if ss.cc.GetMethodConfig("/grpc.testing.TestService/EmptyCall").WaitForReady != nil {
  144. break
  145. }
  146. time.Sleep(time.Millisecond)
  147. }
  148. cancel()
  149. testCases := []struct {
  150. code codes.Code
  151. count int
  152. }{
  153. {codes.AlreadyExists, 0},
  154. }
  155. for _, tc := range testCases {
  156. ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
  157. _, err := ss.client.EmptyCall(ctx, &testpb.Empty{})
  158. cancel()
  159. if status.Code(err) != tc.code {
  160. t.Fatalf("EmptyCall(_, _) = _, %v; want _, <Code() = %v>", err, tc.code)
  161. }
  162. if i != tc.count {
  163. t.Fatalf("i = %v; want %v", i, tc.count)
  164. }
  165. }
  166. }
  167. func (s) TestRetryThrottling(t *testing.T) {
  168. defer enableRetry()()
  169. i := -1
  170. ss := &stubServer{
  171. emptyCall: func(context.Context, *testpb.Empty) (*testpb.Empty, error) {
  172. i++
  173. switch i {
  174. case 0, 3, 6, 10, 11, 12, 13, 14, 16, 18:
  175. return &testpb.Empty{}, nil
  176. }
  177. return nil, status.New(codes.Unavailable, "retryable error").Err()
  178. },
  179. }
  180. if err := ss.Start([]grpc.ServerOption{}); err != nil {
  181. t.Fatalf("Error starting endpoint server: %v", err)
  182. }
  183. defer ss.Stop()
  184. ss.newServiceConfig(`{
  185. "methodConfig": [{
  186. "name": [{"service": "grpc.testing.TestService"}],
  187. "waitForReady": true,
  188. "retryPolicy": {
  189. "MaxAttempts": 4,
  190. "InitialBackoff": ".01s",
  191. "MaxBackoff": ".01s",
  192. "BackoffMultiplier": 1.0,
  193. "RetryableStatusCodes": [ "UNAVAILABLE" ]
  194. }
  195. }],
  196. "retryThrottling": {
  197. "maxTokens": 10,
  198. "tokenRatio": 0.5
  199. }
  200. }`)
  201. ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
  202. for {
  203. if ctx.Err() != nil {
  204. t.Fatalf("Timed out waiting for service config update")
  205. }
  206. if ss.cc.GetMethodConfig("/grpc.testing.TestService/EmptyCall").WaitForReady != nil {
  207. break
  208. }
  209. time.Sleep(time.Millisecond)
  210. }
  211. cancel()
  212. testCases := []struct {
  213. code codes.Code
  214. count int
  215. }{
  216. {codes.OK, 0}, // tokens = 10
  217. {codes.OK, 3}, // tokens = 8.5 (10 - 2 failures + 0.5 success)
  218. {codes.OK, 6}, // tokens = 6
  219. {codes.Unavailable, 8}, // tokens = 5 -- first attempt is retried; second aborted.
  220. {codes.Unavailable, 9}, // tokens = 4
  221. {codes.OK, 10}, // tokens = 4.5
  222. {codes.OK, 11}, // tokens = 5
  223. {codes.OK, 12}, // tokens = 5.5
  224. {codes.OK, 13}, // tokens = 6
  225. {codes.OK, 14}, // tokens = 6.5
  226. {codes.OK, 16}, // tokens = 5.5
  227. {codes.Unavailable, 17}, // tokens = 4.5
  228. }
  229. for _, tc := range testCases {
  230. ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
  231. _, err := ss.client.EmptyCall(ctx, &testpb.Empty{})
  232. cancel()
  233. if status.Code(err) != tc.code {
  234. t.Errorf("EmptyCall(_, _) = _, %v; want _, <Code() = %v>", err, tc.code)
  235. }
  236. if i != tc.count {
  237. t.Errorf("i = %v; want %v", i, tc.count)
  238. }
  239. }
  240. }
  241. func (s) TestRetryStreaming(t *testing.T) {
  242. defer enableRetry()()
  243. req := func(b byte) *testpb.StreamingOutputCallRequest {
  244. return &testpb.StreamingOutputCallRequest{Payload: &testpb.Payload{Body: []byte{b}}}
  245. }
  246. res := func(b byte) *testpb.StreamingOutputCallResponse {
  247. return &testpb.StreamingOutputCallResponse{Payload: &testpb.Payload{Body: []byte{b}}}
  248. }
  249. largePayload, _ := newPayload(testpb.PayloadType_COMPRESSABLE, 500)
  250. type serverOp func(stream testpb.TestService_FullDuplexCallServer) error
  251. type clientOp func(stream testpb.TestService_FullDuplexCallClient) error
  252. // Server Operations
  253. sAttempts := func(n int) serverOp {
  254. return func(stream testpb.TestService_FullDuplexCallServer) error {
  255. const key = "grpc-previous-rpc-attempts"
  256. md, ok := metadata.FromIncomingContext(stream.Context())
  257. if !ok {
  258. return status.Errorf(codes.Internal, "server: no header metadata received")
  259. }
  260. if got := md[key]; len(got) != 1 || got[0] != strconv.Itoa(n) {
  261. return status.Errorf(codes.Internal, "server: metadata = %v; want <contains %q: %q>", md, key, n)
  262. }
  263. return nil
  264. }
  265. }
  266. sReq := func(b byte) serverOp {
  267. return func(stream testpb.TestService_FullDuplexCallServer) error {
  268. want := req(b)
  269. if got, err := stream.Recv(); err != nil || !proto.Equal(got, want) {
  270. return status.Errorf(codes.Internal, "server: Recv() = %v, %v; want %v, <nil>", got, err, want)
  271. }
  272. return nil
  273. }
  274. }
  275. sReqPayload := func(p *testpb.Payload) serverOp {
  276. return func(stream testpb.TestService_FullDuplexCallServer) error {
  277. want := &testpb.StreamingOutputCallRequest{Payload: p}
  278. if got, err := stream.Recv(); err != nil || !proto.Equal(got, want) {
  279. return status.Errorf(codes.Internal, "server: Recv() = %v, %v; want %v, <nil>", got, err, want)
  280. }
  281. return nil
  282. }
  283. }
  284. sRes := func(b byte) serverOp {
  285. return func(stream testpb.TestService_FullDuplexCallServer) error {
  286. msg := res(b)
  287. if err := stream.Send(msg); err != nil {
  288. return status.Errorf(codes.Internal, "server: Send(%v) = %v; want <nil>", msg, err)
  289. }
  290. return nil
  291. }
  292. }
  293. sErr := func(c codes.Code) serverOp {
  294. return func(stream testpb.TestService_FullDuplexCallServer) error {
  295. return status.New(c, "").Err()
  296. }
  297. }
  298. sCloseSend := func() serverOp {
  299. return func(stream testpb.TestService_FullDuplexCallServer) error {
  300. if msg, err := stream.Recv(); msg != nil || err != io.EOF {
  301. return status.Errorf(codes.Internal, "server: Recv() = %v, %v; want <nil>, io.EOF", msg, err)
  302. }
  303. return nil
  304. }
  305. }
  306. sPushback := func(s string) serverOp {
  307. return func(stream testpb.TestService_FullDuplexCallServer) error {
  308. stream.SetTrailer(metadata.MD{"grpc-retry-pushback-ms": []string{s}})
  309. return nil
  310. }
  311. }
  312. // Client Operations
  313. cReq := func(b byte) clientOp {
  314. return func(stream testpb.TestService_FullDuplexCallClient) error {
  315. msg := req(b)
  316. if err := stream.Send(msg); err != nil {
  317. return fmt.Errorf("client: Send(%v) = %v; want <nil>", msg, err)
  318. }
  319. return nil
  320. }
  321. }
  322. cReqPayload := func(p *testpb.Payload) clientOp {
  323. return func(stream testpb.TestService_FullDuplexCallClient) error {
  324. msg := &testpb.StreamingOutputCallRequest{Payload: p}
  325. if err := stream.Send(msg); err != nil {
  326. return fmt.Errorf("client: Send(%v) = %v; want <nil>", msg, err)
  327. }
  328. return nil
  329. }
  330. }
  331. cRes := func(b byte) clientOp {
  332. return func(stream testpb.TestService_FullDuplexCallClient) error {
  333. want := res(b)
  334. if got, err := stream.Recv(); err != nil || !proto.Equal(got, want) {
  335. return fmt.Errorf("client: Recv() = %v, %v; want %v, <nil>", got, err, want)
  336. }
  337. return nil
  338. }
  339. }
  340. cErr := func(c codes.Code) clientOp {
  341. return func(stream testpb.TestService_FullDuplexCallClient) error {
  342. want := status.New(c, "").Err()
  343. if c == codes.OK {
  344. want = io.EOF
  345. }
  346. res, err := stream.Recv()
  347. if res != nil ||
  348. ((err == nil) != (want == nil)) ||
  349. (want != nil && err.Error() != want.Error()) {
  350. return fmt.Errorf("client: Recv() = %v, %v; want <nil>, %v", res, err, want)
  351. }
  352. return nil
  353. }
  354. }
  355. cCloseSend := func() clientOp {
  356. return func(stream testpb.TestService_FullDuplexCallClient) error {
  357. if err := stream.CloseSend(); err != nil {
  358. return fmt.Errorf("client: CloseSend() = %v; want <nil>", err)
  359. }
  360. return nil
  361. }
  362. }
  363. var curTime time.Time
  364. cGetTime := func() clientOp {
  365. return func(_ testpb.TestService_FullDuplexCallClient) error {
  366. curTime = time.Now()
  367. return nil
  368. }
  369. }
  370. cCheckElapsed := func(d time.Duration) clientOp {
  371. return func(_ testpb.TestService_FullDuplexCallClient) error {
  372. if elapsed := time.Since(curTime); elapsed < d {
  373. return fmt.Errorf("elapsed time: %v; want >= %v", elapsed, d)
  374. }
  375. return nil
  376. }
  377. }
  378. cHdr := func() clientOp {
  379. return func(stream testpb.TestService_FullDuplexCallClient) error {
  380. _, err := stream.Header()
  381. return err
  382. }
  383. }
  384. cCtx := func() clientOp {
  385. return func(stream testpb.TestService_FullDuplexCallClient) error {
  386. stream.Context()
  387. return nil
  388. }
  389. }
  390. testCases := []struct {
  391. desc string
  392. serverOps []serverOp
  393. clientOps []clientOp
  394. }{{
  395. desc: "Non-retryable error code",
  396. serverOps: []serverOp{sReq(1), sErr(codes.Internal)},
  397. clientOps: []clientOp{cReq(1), cErr(codes.Internal)},
  398. }, {
  399. desc: "One retry necessary",
  400. serverOps: []serverOp{sReq(1), sErr(codes.Unavailable), sReq(1), sAttempts(1), sRes(1)},
  401. clientOps: []clientOp{cReq(1), cRes(1), cErr(codes.OK)},
  402. }, {
  403. desc: "Exceed max attempts (4); check attempts header on server",
  404. serverOps: []serverOp{
  405. sReq(1), sErr(codes.Unavailable),
  406. sReq(1), sAttempts(1), sErr(codes.Unavailable),
  407. sAttempts(2), sReq(1), sErr(codes.Unavailable),
  408. sAttempts(3), sReq(1), sErr(codes.Unavailable),
  409. },
  410. clientOps: []clientOp{cReq(1), cErr(codes.Unavailable)},
  411. }, {
  412. desc: "Multiple requests",
  413. serverOps: []serverOp{
  414. sReq(1), sReq(2), sErr(codes.Unavailable),
  415. sReq(1), sReq(2), sRes(5),
  416. },
  417. clientOps: []clientOp{cReq(1), cReq(2), cRes(5), cErr(codes.OK)},
  418. }, {
  419. desc: "Multiple successive requests",
  420. serverOps: []serverOp{
  421. sReq(1), sErr(codes.Unavailable),
  422. sReq(1), sReq(2), sErr(codes.Unavailable),
  423. sReq(1), sReq(2), sReq(3), sRes(5),
  424. },
  425. clientOps: []clientOp{cReq(1), cReq(2), cReq(3), cRes(5), cErr(codes.OK)},
  426. }, {
  427. desc: "No retry after receiving",
  428. serverOps: []serverOp{
  429. sReq(1), sErr(codes.Unavailable),
  430. sReq(1), sRes(3), sErr(codes.Unavailable),
  431. },
  432. clientOps: []clientOp{cReq(1), cRes(3), cErr(codes.Unavailable)},
  433. }, {
  434. desc: "No retry after header",
  435. serverOps: []serverOp{sReq(1), sErr(codes.Unavailable)},
  436. clientOps: []clientOp{cReq(1), cHdr(), cErr(codes.Unavailable)},
  437. }, {
  438. desc: "No retry after context",
  439. serverOps: []serverOp{sReq(1), sErr(codes.Unavailable)},
  440. clientOps: []clientOp{cReq(1), cCtx(), cErr(codes.Unavailable)},
  441. }, {
  442. desc: "Replaying close send",
  443. serverOps: []serverOp{
  444. sReq(1), sReq(2), sCloseSend(), sErr(codes.Unavailable),
  445. sReq(1), sReq(2), sCloseSend(), sRes(1), sRes(3), sRes(5),
  446. },
  447. clientOps: []clientOp{cReq(1), cReq(2), cCloseSend(), cRes(1), cRes(3), cRes(5), cErr(codes.OK)},
  448. }, {
  449. desc: "Negative server pushback - no retry",
  450. serverOps: []serverOp{sReq(1), sPushback("-1"), sErr(codes.Unavailable)},
  451. clientOps: []clientOp{cReq(1), cErr(codes.Unavailable)},
  452. }, {
  453. desc: "Non-numeric server pushback - no retry",
  454. serverOps: []serverOp{sReq(1), sPushback("xxx"), sErr(codes.Unavailable)},
  455. clientOps: []clientOp{cReq(1), cErr(codes.Unavailable)},
  456. }, {
  457. desc: "Multiple server pushback values - no retry",
  458. serverOps: []serverOp{sReq(1), sPushback("100"), sPushback("10"), sErr(codes.Unavailable)},
  459. clientOps: []clientOp{cReq(1), cErr(codes.Unavailable)},
  460. }, {
  461. desc: "1s server pushback - delayed retry",
  462. serverOps: []serverOp{sReq(1), sPushback("1000"), sErr(codes.Unavailable), sReq(1), sRes(2)},
  463. clientOps: []clientOp{cGetTime(), cReq(1), cRes(2), cCheckElapsed(time.Second), cErr(codes.OK)},
  464. }, {
  465. desc: "Overflowing buffer - no retry",
  466. serverOps: []serverOp{sReqPayload(largePayload), sErr(codes.Unavailable)},
  467. clientOps: []clientOp{cReqPayload(largePayload), cErr(codes.Unavailable)},
  468. }}
  469. var serverOpIter int
  470. var serverOps []serverOp
  471. ss := &stubServer{
  472. fullDuplexCall: func(stream testpb.TestService_FullDuplexCallServer) error {
  473. for serverOpIter < len(serverOps) {
  474. op := serverOps[serverOpIter]
  475. serverOpIter++
  476. if err := op(stream); err != nil {
  477. return err
  478. }
  479. }
  480. return nil
  481. },
  482. }
  483. if err := ss.Start([]grpc.ServerOption{}, grpc.WithDefaultCallOptions(grpc.MaxRetryRPCBufferSize(200))); err != nil {
  484. t.Fatalf("Error starting endpoint server: %v", err)
  485. }
  486. defer ss.Stop()
  487. ss.newServiceConfig(`{
  488. "methodConfig": [{
  489. "name": [{"service": "grpc.testing.TestService"}],
  490. "waitForReady": true,
  491. "retryPolicy": {
  492. "MaxAttempts": 4,
  493. "InitialBackoff": ".01s",
  494. "MaxBackoff": ".01s",
  495. "BackoffMultiplier": 1.0,
  496. "RetryableStatusCodes": [ "UNAVAILABLE" ]
  497. }
  498. }]}`)
  499. ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
  500. for {
  501. if ctx.Err() != nil {
  502. t.Fatalf("Timed out waiting for service config update")
  503. }
  504. if ss.cc.GetMethodConfig("/grpc.testing.TestService/FullDuplexCall").WaitForReady != nil {
  505. break
  506. }
  507. time.Sleep(time.Millisecond)
  508. }
  509. cancel()
  510. for _, tc := range testCases {
  511. func() {
  512. serverOpIter = 0
  513. serverOps = tc.serverOps
  514. ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
  515. defer cancel()
  516. stream, err := ss.client.FullDuplexCall(ctx)
  517. if err != nil {
  518. t.Fatalf("%v: Error while creating stream: %v", tc.desc, err)
  519. }
  520. for _, op := range tc.clientOps {
  521. if err := op(stream); err != nil {
  522. t.Errorf("%v: %v", tc.desc, err)
  523. break
  524. }
  525. }
  526. if serverOpIter != len(serverOps) {
  527. t.Errorf("%v: serverOpIter = %v; want %v", tc.desc, serverOpIter, len(serverOps))
  528. }
  529. }()
  530. }
  531. }