3 * Copyright 2019 gRPC authors.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
25 #include <grpc/grpc.h>
26 #include <grpc/support/alloc.h>
27 #include <grpc/support/atm.h>
28 #include <grpc/support/log.h>
29 #include <grpc/support/port_platform.h>
30 #include <grpc/support/string_util.h>
31 #include <grpc/support/time.h>
32 #include <grpcpp/channel.h>
33 #include <grpcpp/client_context.h>
34 #include <grpcpp/create_channel.h>
35 #include <grpcpp/health_check_service_interface.h>
36 #include <grpcpp/server.h>
37 #include <grpcpp/server_builder.h>
39 #include "src/core/lib/backoff/backoff.h"
40 #include "src/core/lib/gpr/env.h"
42 #include "src/proto/grpc/testing/echo.grpc.pb.h"
43 #include "test/core/util/port.h"
44 #include "test/core/util/test_config.h"
45 #include "test/cpp/end2end/test_service_impl.h"
47 #include <gtest/gtest.h>
50 using grpc::testing::EchoRequest;
51 using grpc::testing::EchoResponse;
57 class FlakyNetworkTest : public ::testing::Test {
60 : server_host_("grpctest"),
62 ipv4_address_("10.0.0.1"),
64 kRequestMessage_("🖖") {}
67 std::ostringstream cmd;
68 // create interface_ with address ipv4_address_
69 cmd << "ip addr add " << ipv4_address_ << netmask_ << " dev " << interface_;
70 std::system(cmd.str().c_str());
73 void InterfaceDown() {
74 std::ostringstream cmd;
76 cmd << "ip addr del " << ipv4_address_ << netmask_ << " dev " << interface_;
77 std::system(cmd.str().c_str());
81 std::ostringstream cmd;
82 // Add DNS entry for server_host_ in /etc/hosts
83 cmd << "echo '" << ipv4_address_ << " " << server_host_
85 std::system(cmd.str().c_str());
89 std::ostringstream cmd;
90 // Remove DNS entry for server_host_ from /etc/hosts
91 // NOTE: we can't do this in one step with sed -i because when we are
92 // running under docker, the file is mounted by docker so we can't change
93 // its inode from within the container (sed -i creates a new file and
94 // replaces the old file, which changes the inode)
95 cmd << "sed '/" << server_host_ << "/d' /etc/hosts > /etc/hosts.orig";
96 std::system(cmd.str().c_str());
101 cmd << "cat /etc/hosts.orig > /etc/hosts";
102 std::system(cmd.str().c_str());
106 std::ostringstream cmd;
107 // drop packets with src IP = ipv4_address_
108 cmd << "iptables -A INPUT -s " << ipv4_address_ << " -j DROP";
110 std::system(cmd.str().c_str());
114 // drop packets with dst IP = ipv4_address_
115 cmd << "iptables -A INPUT -d " << ipv4_address_ << " -j DROP";
118 void RestoreNetwork() {
119 std::ostringstream cmd;
120 // remove iptables rule to drop packets with src IP = ipv4_address_
121 cmd << "iptables -D INPUT -s " << ipv4_address_ << " -j DROP";
122 std::system(cmd.str().c_str());
125 // remove iptables rule to drop packets with dest IP = ipv4_address_
126 cmd << "iptables -D INPUT -d " << ipv4_address_ << " -j DROP";
129 void FlakeNetwork() {
130 std::ostringstream cmd;
131 // Emulate a flaky network connection over interface_. Add a delay of 100ms
132 // +/- 590ms, 3% packet loss, 1% duplicates and 0.1% corrupt packets.
133 cmd << "tc qdisc replace dev " << interface_
134 << " root netem delay 100ms 50ms distribution normal loss 3% duplicate "
136 std::system(cmd.str().c_str());
139 void UnflakeNetwork() {
140 // Remove simulated network flake on interface_
141 std::ostringstream cmd;
142 cmd << "tc qdisc del dev " << interface_ << " root netem";
143 std::system(cmd.str().c_str());
156 void SetUp() override {
162 void TearDown() override {
169 // TODO (pjaikumar): Ideally, we should allocate the port dynamically using
170 // grpc_pick_unused_port_or_die(). That doesn't work inside some docker
171 // containers because port_server listens on localhost which maps to
172 // ip6-looopback, but ipv6 support is not enabled by default in docker.
175 server_.reset(new ServerData(port_));
176 server_->Start(server_host_);
178 void StopServer() { server_->Shutdown(); }
180 std::unique_ptr<grpc::testing::EchoTestService::Stub> BuildStub(
181 const std::shared_ptr<Channel>& channel) {
182 return grpc::testing::EchoTestService::NewStub(channel);
185 std::shared_ptr<Channel> BuildChannel(
186 const grpc::string& lb_policy_name,
187 ChannelArguments args = ChannelArguments()) {
188 if (lb_policy_name.size() > 0) {
189 args.SetLoadBalancingPolicyName(lb_policy_name);
190 } // else, default to pick first
191 std::ostringstream server_address;
192 server_address << server_host_ << ":" << port_;
193 return CreateCustomChannel(server_address.str(),
194 InsecureChannelCredentials(), args);
198 const std::unique_ptr<grpc::testing::EchoTestService::Stub>& stub,
199 int timeout_ms = 0, bool wait_for_ready = false) {
200 auto response = std::unique_ptr<EchoResponse>(new EchoResponse());
202 request.set_message(kRequestMessage_);
203 ClientContext context;
204 if (timeout_ms > 0) {
205 context.set_deadline(grpc_timeout_milliseconds_to_deadline(timeout_ms));
207 // See https://github.com/grpc/grpc/blob/master/doc/wait-for-ready.md for
208 // details of wait-for-ready semantics
209 if (wait_for_ready) {
210 context.set_wait_for_ready(true);
212 Status status = stub->Echo(&context, request, response.get());
213 auto ok = status.ok();
215 gpr_log(GPR_DEBUG, "RPC returned %s\n", response->message().c_str());
217 gpr_log(GPR_DEBUG, "RPC failed: %s", status.error_message().c_str());
224 std::unique_ptr<Server> server_;
225 TestServiceImpl service_;
226 std::unique_ptr<std::thread> thread_;
227 bool server_ready_ = false;
229 explicit ServerData(int port) { port_ = port; }
231 void Start(const grpc::string& server_host) {
232 gpr_log(GPR_INFO, "starting server on port %d", port_);
234 std::unique_lock<std::mutex> lock(mu);
235 std::condition_variable cond;
236 thread_.reset(new std::thread(
237 std::bind(&ServerData::Serve, this, server_host, &mu, &cond)));
238 cond.wait(lock, [this] { return server_ready_; });
239 server_ready_ = false;
240 gpr_log(GPR_INFO, "server startup complete");
243 void Serve(const grpc::string& server_host, std::mutex* mu,
244 std::condition_variable* cond) {
245 std::ostringstream server_address;
246 server_address << server_host << ":" << port_;
247 ServerBuilder builder;
248 builder.AddListeningPort(server_address.str(),
249 InsecureServerCredentials());
250 builder.RegisterService(&service_);
251 server_ = builder.BuildAndStart();
252 std::lock_guard<std::mutex> lock(*mu);
253 server_ready_ = true;
258 server_->Shutdown(grpc_timeout_milliseconds_to_deadline(0));
263 bool WaitForChannelNotReady(Channel* channel, int timeout_seconds = 5) {
264 const gpr_timespec deadline =
265 grpc_timeout_seconds_to_deadline(timeout_seconds);
266 grpc_connectivity_state state;
267 while ((state = channel->GetState(false /* try_to_connect */)) ==
268 GRPC_CHANNEL_READY) {
269 if (!channel->WaitForStateChange(state, deadline)) return false;
274 bool WaitForChannelReady(Channel* channel, int timeout_seconds = 5) {
275 const gpr_timespec deadline =
276 grpc_timeout_seconds_to_deadline(timeout_seconds);
277 grpc_connectivity_state state;
278 while ((state = channel->GetState(true /* try_to_connect */)) !=
279 GRPC_CHANNEL_READY) {
280 if (!channel->WaitForStateChange(state, deadline)) return false;
286 const grpc::string server_host_;
287 const grpc::string interface_;
288 const grpc::string ipv4_address_;
289 const grpc::string netmask_;
290 std::unique_ptr<grpc::testing::EchoTestService::Stub> stub_;
291 std::unique_ptr<ServerData> server_;
292 const int SERVER_PORT = 32750;
294 const grpc::string kRequestMessage_;
297 // Network interface connected to server flaps
298 TEST_F(FlakyNetworkTest, NetworkTransition) {
299 const int kKeepAliveTimeMs = 1000;
300 const int kKeepAliveTimeoutMs = 1000;
301 ChannelArguments args;
302 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
303 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
304 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
305 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
307 auto channel = BuildChannel("pick_first", args);
308 auto stub = BuildStub(channel);
309 // Channel should be in READY state after we send an RPC
310 EXPECT_TRUE(SendRpc(stub));
311 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
313 std::atomic_bool shutdown{false};
314 std::thread sender = std::thread([this, &stub, &shutdown]() {
316 if (shutdown.load()) {
320 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
324 // bring down network
326 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
327 // bring network interface back up
329 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
330 // Restore DNS entry for server
332 EXPECT_TRUE(WaitForChannelReady(channel.get()));
333 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
334 shutdown.store(true);
338 // Traffic to server server is blackholed temporarily with keepalives enabled
339 TEST_F(FlakyNetworkTest, ServerUnreachableWithKeepalive) {
340 const int kKeepAliveTimeMs = 1000;
341 const int kKeepAliveTimeoutMs = 1000;
342 const int kReconnectBackoffMs = 1000;
343 ChannelArguments args;
344 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
345 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
346 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
347 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
348 // max time for a connection attempt
349 args.SetInt(GRPC_ARG_MIN_RECONNECT_BACKOFF_MS, kReconnectBackoffMs);
350 // max time between reconnect attempts
351 args.SetInt(GRPC_ARG_MAX_RECONNECT_BACKOFF_MS, kReconnectBackoffMs);
353 gpr_log(GPR_DEBUG, "FlakyNetworkTest.ServerUnreachableWithKeepalive start");
354 auto channel = BuildChannel("pick_first", args);
355 auto stub = BuildStub(channel);
356 // Channel should be in READY state after we send an RPC
357 EXPECT_TRUE(SendRpc(stub));
358 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
360 std::atomic_bool shutdown{false};
361 std::thread sender = std::thread([this, &stub, &shutdown]() {
363 if (shutdown.load()) {
367 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
371 // break network connectivity
372 gpr_log(GPR_DEBUG, "Adding iptables rule to drop packets");
374 std::this_thread::sleep_for(std::chrono::milliseconds(10000));
375 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
376 // bring network interface back up
378 gpr_log(GPR_DEBUG, "Removed iptables rule to drop packets");
379 EXPECT_TRUE(WaitForChannelReady(channel.get()));
380 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
381 shutdown.store(true);
383 gpr_log(GPR_DEBUG, "FlakyNetworkTest.ServerUnreachableWithKeepalive end");
387 // Traffic to server server is blackholed temporarily with keepalives disabled
388 TEST_F(FlakyNetworkTest, ServerUnreachableNoKeepalive) {
389 auto channel = BuildChannel("pick_first", ChannelArguments());
390 auto stub = BuildStub(channel);
391 // Channel should be in READY state after we send an RPC
392 EXPECT_TRUE(SendRpc(stub));
393 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
395 // break network connectivity
398 std::thread sender = std::thread([this, &stub]() {
399 // RPC with deadline should timeout
400 EXPECT_FALSE(SendRpc(stub, /*timeout_ms=*/500, /*wait_for_ready=*/true));
401 // RPC without deadline forever until call finishes
402 EXPECT_TRUE(SendRpc(stub, /*timeout_ms=*/0, /*wait_for_ready=*/true));
405 std::this_thread::sleep_for(std::chrono::milliseconds(2000));
406 // bring network interface back up
409 // wait for RPC to finish
413 // Send RPCs over a flaky network connection
414 TEST_F(FlakyNetworkTest, FlakyNetwork) {
415 const int kKeepAliveTimeMs = 1000;
416 const int kKeepAliveTimeoutMs = 1000;
417 const int kMessageCount = 100;
418 ChannelArguments args;
419 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
420 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
421 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
422 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
424 auto channel = BuildChannel("pick_first", args);
425 auto stub = BuildStub(channel);
426 // Channel should be in READY state after we send an RPC
427 EXPECT_TRUE(SendRpc(stub));
428 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
430 // simulate flaky network (packet loss, corruption and delays)
432 for (int i = 0; i < kMessageCount; ++i) {
435 // remove network flakiness
437 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
440 // Server is shutdown gracefully and restarted. Client keepalives are enabled
441 TEST_F(FlakyNetworkTest, ServerRestartKeepaliveEnabled) {
442 const int kKeepAliveTimeMs = 1000;
443 const int kKeepAliveTimeoutMs = 1000;
444 ChannelArguments args;
445 args.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, kKeepAliveTimeMs);
446 args.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, kKeepAliveTimeoutMs);
447 args.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1);
448 args.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0);
450 auto channel = BuildChannel("pick_first", args);
451 auto stub = BuildStub(channel);
452 // Channel should be in READY state after we send an RPC
453 EXPECT_TRUE(SendRpc(stub));
454 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
456 // server goes down, client should detect server going down and calls should
459 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
460 EXPECT_FALSE(SendRpc(stub));
462 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
464 // server restarts, calls succeed
466 EXPECT_TRUE(WaitForChannelReady(channel.get()));
467 // EXPECT_TRUE(SendRpc(stub));
470 // Server is shutdown gracefully and restarted. Client keepalives are enabled
471 TEST_F(FlakyNetworkTest, ServerRestartKeepaliveDisabled) {
472 auto channel = BuildChannel("pick_first", ChannelArguments());
473 auto stub = BuildStub(channel);
474 // Channel should be in READY state after we send an RPC
475 EXPECT_TRUE(SendRpc(stub));
476 EXPECT_EQ(channel->GetState(false), GRPC_CHANNEL_READY);
478 // server sends GOAWAY when it's shutdown, so client attempts to reconnect
480 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
482 EXPECT_TRUE(WaitForChannelNotReady(channel.get()));
484 std::this_thread::sleep_for(std::chrono::milliseconds(1000));
486 // server restarts, calls succeed
488 EXPECT_TRUE(WaitForChannelReady(channel.get()));
492 } // namespace testing
496 int main(int argc, char** argv) {
497 ::testing::InitGoogleTest(&argc, argv);
498 grpc_test_init(argc, argv);
499 auto result = RUN_ALL_TESTS();