Imported Upstream version 1.27.0
[platform/upstream/grpc.git] / test / cpp / end2end / xds_end2end_test.cc
1 /*
2  *
3  * Copyright 2017 gRPC authors.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18
19 #include <memory>
20 #include <mutex>
21 #include <numeric>
22 #include <set>
23 #include <sstream>
24 #include <thread>
25
26 #include <grpc/grpc.h>
27 #include <grpc/support/alloc.h>
28 #include <grpc/support/log.h>
29 #include <grpc/support/string_util.h>
30 #include <grpc/support/time.h>
31 #include <grpcpp/channel.h>
32 #include <grpcpp/client_context.h>
33 #include <grpcpp/create_channel.h>
34 #include <grpcpp/server.h>
35 #include <grpcpp/server_builder.h>
36
37 #include "src/core/ext/filters/client_channel/backup_poller.h"
38 #include "src/core/ext/filters/client_channel/parse_address.h"
39 #include "src/core/ext/filters/client_channel/resolver/fake/fake_resolver.h"
40 #include "src/core/ext/filters/client_channel/server_address.h"
41 #include "src/core/lib/gpr/env.h"
42 #include "src/core/lib/gpr/tmpfile.h"
43 #include "src/core/lib/gprpp/map.h"
44 #include "src/core/lib/gprpp/ref_counted_ptr.h"
45 #include "src/core/lib/gprpp/sync.h"
46 #include "src/core/lib/iomgr/sockaddr.h"
47 #include "src/core/lib/security/credentials/fake/fake_credentials.h"
48 #include "src/cpp/client/secure_credentials.h"
49 #include "src/cpp/server/secure_server_credentials.h"
50
51 #include "test/core/util/port.h"
52 #include "test/core/util/test_config.h"
53 #include "test/cpp/end2end/test_service_impl.h"
54
55 #include "src/proto/grpc/testing/echo.grpc.pb.h"
56 #include "src/proto/grpc/testing/xds/ads_for_test.grpc.pb.h"
57 #include "src/proto/grpc/testing/xds/cds_for_test.grpc.pb.h"
58 #include "src/proto/grpc/testing/xds/eds_for_test.grpc.pb.h"
59 #include "src/proto/grpc/testing/xds/lrs_for_test.grpc.pb.h"
60
61 #include <gmock/gmock.h>
62 #include <gtest/gtest.h>
63
64 // TODO(dgq): Other scenarios in need of testing:
65 // - Send a serverlist with faulty ip:port addresses (port > 2^16, etc).
66 // - Test reception of invalid serverlist
67 // - Test against a non-LB server.
68 // - Random LB server closing the stream unexpectedly.
69 //
70 // Findings from end to end testing to be covered here:
71 // - Handling of LB servers restart, including reconnection after backing-off
72 //   retries.
73 // - Destruction of load balanced channel (and therefore of xds instance)
74 //   while:
75 //   1) the internal LB call is still active. This should work by virtue
76 //   of the weak reference the LB call holds. The call should be terminated as
77 //   part of the xds shutdown process.
78 //   2) the retry timer is active. Again, the weak reference it holds should
79 //   prevent a premature call to \a glb_destroy.
80
81 namespace grpc {
82 namespace testing {
83 namespace {
84
85 using std::chrono::system_clock;
86
87 using ::envoy::api::v2::Cluster;
88 using ::envoy::api::v2::ClusterLoadAssignment;
89 using ::envoy::api::v2::DiscoveryRequest;
90 using ::envoy::api::v2::DiscoveryResponse;
91 using ::envoy::api::v2::FractionalPercent;
92 using ::envoy::service::discovery::v2::AggregatedDiscoveryService;
93 using ::envoy::service::load_stats::v2::ClusterStats;
94 using ::envoy::service::load_stats::v2::LoadReportingService;
95 using ::envoy::service::load_stats::v2::LoadStatsRequest;
96 using ::envoy::service::load_stats::v2::LoadStatsResponse;
97 using ::envoy::service::load_stats::v2::UpstreamLocalityStats;
98
99 constexpr char kCdsTypeUrl[] = "type.googleapis.com/envoy.api.v2.Cluster";
100 constexpr char kEdsTypeUrl[] =
101     "type.googleapis.com/envoy.api.v2.ClusterLoadAssignment";
102 constexpr char kDefaultLocalityRegion[] = "xds_default_locality_region";
103 constexpr char kDefaultLocalityZone[] = "xds_default_locality_zone";
104 constexpr char kLbDropType[] = "lb";
105 constexpr char kThrottleDropType[] = "throttle";
106 constexpr int kDefaultLocalityWeight = 3;
107 constexpr int kDefaultLocalityPriority = 0;
108
109 constexpr char kBootstrapFile[] =
110     "{\n"
111     "  \"xds_servers\": [\n"
112     "    {\n"
113     "      \"server_uri\": \"fake:///lb\",\n"
114     "      \"channel_creds\": [\n"
115     "        {\n"
116     "          \"type\": \"fake\"\n"
117     "        }\n"
118     "      ]\n"
119     "    }\n"
120     "  ],\n"
121     "  \"node\": {\n"
122     "    \"id\": \"xds_end2end_test\",\n"
123     "    \"cluster\": \"test\",\n"
124     "    \"metadata\": {\n"
125     "      \"foo\": \"bar\"\n"
126     "    },\n"
127     "    \"locality\": {\n"
128     "      \"region\": \"corp\",\n"
129     "      \"zone\": \"svl\",\n"
130     "      \"subzone\": \"mp3\"\n"
131     "    }\n"
132     "  }\n"
133     "}\n";
134
135 constexpr char kBootstrapFileBad[] =
136     "{\n"
137     "  \"xds_servers\": [\n"
138     "    {\n"
139     "      \"server_uri\": \"fake:///wrong_lb\",\n"
140     "      \"channel_creds\": [\n"
141     "        {\n"
142     "          \"type\": \"fake\"\n"
143     "        }\n"
144     "      ]\n"
145     "    }\n"
146     "  ],\n"
147     "  \"node\": {\n"
148     "  }\n"
149     "}\n";
150
151 char* g_bootstrap_file;
152 char* g_bootstrap_file_bad;
153
154 void WriteBootstrapFiles() {
155   char* bootstrap_file;
156   FILE* out = gpr_tmpfile("xds_bootstrap", &bootstrap_file);
157   fputs(kBootstrapFile, out);
158   fclose(out);
159   g_bootstrap_file = bootstrap_file;
160   out = gpr_tmpfile("xds_bootstrap_bad", &bootstrap_file);
161   fputs(kBootstrapFileBad, out);
162   fclose(out);
163   g_bootstrap_file_bad = bootstrap_file;
164 }
165
166 // Helper class to minimize the number of unique ports we use for this test.
167 class PortSaver {
168  public:
169   int GetPort() {
170     if (idx_ >= ports_.size()) {
171       ports_.push_back(grpc_pick_unused_port_or_die());
172     }
173     return ports_[idx_++];
174   }
175
176   void Reset() { idx_ = 0; }
177
178  private:
179   std::vector<int> ports_;
180   size_t idx_ = 0;
181 };
182
183 PortSaver* g_port_saver = nullptr;
184
185 template <typename ServiceType>
186 class CountedService : public ServiceType {
187  public:
188   size_t request_count() {
189     grpc_core::MutexLock lock(&mu_);
190     return request_count_;
191   }
192
193   size_t response_count() {
194     grpc_core::MutexLock lock(&mu_);
195     return response_count_;
196   }
197
198   void IncreaseResponseCount() {
199     grpc_core::MutexLock lock(&mu_);
200     ++response_count_;
201   }
202   void IncreaseRequestCount() {
203     grpc_core::MutexLock lock(&mu_);
204     ++request_count_;
205   }
206
207   void ResetCounters() {
208     grpc_core::MutexLock lock(&mu_);
209     request_count_ = 0;
210     response_count_ = 0;
211   }
212
213  protected:
214   grpc_core::Mutex mu_;
215
216  private:
217   size_t request_count_ = 0;
218   size_t response_count_ = 0;
219 };
220
221 using BackendService = CountedService<TestServiceImpl>;
222 using AdsService = CountedService<AggregatedDiscoveryService::Service>;
223 using LrsService = CountedService<LoadReportingService::Service>;
224
225 const char g_kCallCredsMdKey[] = "Balancer should not ...";
226 const char g_kCallCredsMdValue[] = "... receive me";
227
228 class BackendServiceImpl : public BackendService {
229  public:
230   BackendServiceImpl() {}
231
232   Status Echo(ServerContext* context, const EchoRequest* request,
233               EchoResponse* response) override {
234     // Backend should receive the call credentials metadata.
235     auto call_credentials_entry =
236         context->client_metadata().find(g_kCallCredsMdKey);
237     EXPECT_NE(call_credentials_entry, context->client_metadata().end());
238     if (call_credentials_entry != context->client_metadata().end()) {
239       EXPECT_EQ(call_credentials_entry->second, g_kCallCredsMdValue);
240     }
241     IncreaseRequestCount();
242     const auto status = TestServiceImpl::Echo(context, request, response);
243     IncreaseResponseCount();
244     AddClient(context->peer());
245     return status;
246   }
247
248   void Start() {}
249   void Shutdown() {}
250
251   std::set<grpc::string> clients() {
252     grpc_core::MutexLock lock(&clients_mu_);
253     return clients_;
254   }
255
256  private:
257   void AddClient(const grpc::string& client) {
258     grpc_core::MutexLock lock(&clients_mu_);
259     clients_.insert(client);
260   }
261
262   grpc_core::Mutex mu_;
263   grpc_core::Mutex clients_mu_;
264   std::set<grpc::string> clients_;
265 };
266
267 class ClientStats {
268  public:
269   struct LocalityStats {
270     // Converts from proto message class.
271     LocalityStats(const UpstreamLocalityStats& upstream_locality_stats)
272         : total_successful_requests(
273               upstream_locality_stats.total_successful_requests()),
274           total_requests_in_progress(
275               upstream_locality_stats.total_requests_in_progress()),
276           total_error_requests(upstream_locality_stats.total_error_requests()),
277           total_issued_requests(
278               upstream_locality_stats.total_issued_requests()) {}
279
280     uint64_t total_successful_requests;
281     uint64_t total_requests_in_progress;
282     uint64_t total_error_requests;
283     uint64_t total_issued_requests;
284   };
285
286   // Converts from proto message class.
287   ClientStats(const ClusterStats& cluster_stats)
288       : total_dropped_requests_(cluster_stats.total_dropped_requests()) {
289     for (const auto& input_locality_stats :
290          cluster_stats.upstream_locality_stats()) {
291       locality_stats_.emplace(input_locality_stats.locality().sub_zone(),
292                               LocalityStats(input_locality_stats));
293     }
294     for (const auto& input_dropped_requests :
295          cluster_stats.dropped_requests()) {
296       dropped_requests_.emplace(input_dropped_requests.category(),
297                                 input_dropped_requests.dropped_count());
298     }
299   }
300
301   uint64_t total_successful_requests() const {
302     uint64_t sum = 0;
303     for (auto& p : locality_stats_) {
304       sum += p.second.total_successful_requests;
305     }
306     return sum;
307   }
308   uint64_t total_requests_in_progress() const {
309     uint64_t sum = 0;
310     for (auto& p : locality_stats_) {
311       sum += p.second.total_requests_in_progress;
312     }
313     return sum;
314   }
315   uint64_t total_error_requests() const {
316     uint64_t sum = 0;
317     for (auto& p : locality_stats_) {
318       sum += p.second.total_error_requests;
319     }
320     return sum;
321   }
322   uint64_t total_issued_requests() const {
323     uint64_t sum = 0;
324     for (auto& p : locality_stats_) {
325       sum += p.second.total_issued_requests;
326     }
327     return sum;
328   }
329   uint64_t total_dropped_requests() const { return total_dropped_requests_; }
330   uint64_t dropped_requests(const grpc::string& category) const {
331     auto iter = dropped_requests_.find(category);
332     GPR_ASSERT(iter != dropped_requests_.end());
333     return iter->second;
334   }
335
336  private:
337   std::map<grpc::string, LocalityStats> locality_stats_;
338   uint64_t total_dropped_requests_;
339   std::map<grpc::string, uint64_t> dropped_requests_;
340 };
341
342 // TODO(roth): Change this service to a real fake.
343 class AdsServiceImpl : public AdsService {
344  public:
345   enum ResponseState {
346     NOT_SENT,
347     SENT,
348     ACKED,
349     NACKED,
350   };
351
352   struct ResponseArgs {
353     struct Locality {
354       Locality(const grpc::string& sub_zone, std::vector<int> ports,
355                int lb_weight = kDefaultLocalityWeight,
356                int priority = kDefaultLocalityPriority,
357                std::vector<envoy::api::v2::HealthStatus> health_statuses = {})
358           : sub_zone(std::move(sub_zone)),
359             ports(std::move(ports)),
360             lb_weight(lb_weight),
361             priority(priority),
362             health_statuses(std::move(health_statuses)) {}
363
364       const grpc::string sub_zone;
365       std::vector<int> ports;
366       int lb_weight;
367       int priority;
368       std::vector<envoy::api::v2::HealthStatus> health_statuses;
369     };
370
371     ResponseArgs() = default;
372     explicit ResponseArgs(std::vector<Locality> locality_list)
373         : locality_list(std::move(locality_list)) {}
374
375     std::vector<Locality> locality_list;
376     std::map<grpc::string, uint32_t> drop_categories;
377     FractionalPercent::DenominatorType drop_denominator =
378         FractionalPercent::MILLION;
379   };
380
381   using Stream = ServerReaderWriter<DiscoveryResponse, DiscoveryRequest>;
382   using ResponseDelayPair = std::pair<DiscoveryResponse, int>;
383
384   AdsServiceImpl(bool enable_load_reporting) {
385     default_cluster_.set_name("application_target_name");
386     default_cluster_.set_type(envoy::api::v2::Cluster::EDS);
387     default_cluster_.mutable_eds_cluster_config()
388         ->mutable_eds_config()
389         ->mutable_ads();
390     default_cluster_.set_lb_policy(envoy::api::v2::Cluster::ROUND_ROBIN);
391     if (enable_load_reporting) {
392       default_cluster_.mutable_lrs_server()->mutable_self();
393     }
394     cds_response_data_ = {
395         {"application_target_name", default_cluster_},
396     };
397   }
398
399   void HandleCdsRequest(DiscoveryRequest* request, Stream* stream) {
400     gpr_log(GPR_INFO, "ADS[%p]: received CDS request '%s'", this,
401             request->DebugString().c_str());
402     const std::string version_str = "version_1";
403     const std::string nonce_str = "nonce_1";
404     grpc_core::MutexLock lock(&ads_mu_);
405     if (cds_response_state_ == NOT_SENT) {
406       DiscoveryResponse response;
407       response.set_type_url(kCdsTypeUrl);
408       response.set_version_info(version_str);
409       response.set_nonce(nonce_str);
410       for (const auto& cluster_name : request->resource_names()) {
411         auto iter = cds_response_data_.find(cluster_name);
412         if (iter == cds_response_data_.end()) continue;
413         response.add_resources()->PackFrom(iter->second);
414       }
415       stream->Write(response);
416       cds_response_state_ = SENT;
417     } else if (cds_response_state_ == SENT) {
418       GPR_ASSERT(!request->response_nonce().empty());
419       cds_response_state_ =
420           request->version_info() == version_str ? ACKED : NACKED;
421     }
422   }
423
424   void HandleEdsRequest(DiscoveryRequest* request, Stream* stream) {
425     gpr_log(GPR_INFO, "ADS[%p]: received EDS request '%s'", this,
426             request->DebugString().c_str());
427     IncreaseRequestCount();
428     std::vector<ResponseDelayPair> responses_and_delays;
429     {
430       grpc_core::MutexLock lock(&ads_mu_);
431       responses_and_delays = eds_responses_and_delays_;
432     }
433     // Send response.
434     for (const auto& p : responses_and_delays) {
435       const DiscoveryResponse& response = p.first;
436       const int delay_ms = p.second;
437       gpr_log(GPR_INFO, "ADS[%p]: sleeping for %d ms...", this, delay_ms);
438       if (delay_ms > 0) {
439         gpr_sleep_until(grpc_timeout_milliseconds_to_deadline(delay_ms));
440       }
441       gpr_log(GPR_INFO, "ADS[%p]: Woke up! Sending response '%s'", this,
442               response.DebugString().c_str());
443       IncreaseResponseCount();
444       stream->Write(response);
445     }
446   }
447
448   Status StreamAggregatedResources(ServerContext* context,
449                                    Stream* stream) override {
450     gpr_log(GPR_INFO, "ADS[%p]: StreamAggregatedResources starts", this);
451     [&]() {
452       {
453         grpc_core::MutexLock lock(&ads_mu_);
454         if (ads_done_) return;
455       }
456       // Balancer shouldn't receive the call credentials metadata.
457       EXPECT_EQ(context->client_metadata().find(g_kCallCredsMdKey),
458                 context->client_metadata().end());
459       // Keep servicing requests until the EDS response has been sent back.
460       DiscoveryRequest request;
461       // TODO(roth): For each supported type, we currently only handle one
462       // request without replying to any new requests (for ACK/NACK or new
463       // resource names). It's not causing a big problem now but should be
464       // fixed.
465       bool eds_sent = false;
466       while (!eds_sent || cds_response_state_ == SENT) {
467         if (!stream->Read(&request)) return;
468         if (request.type_url() == kCdsTypeUrl) {
469           HandleCdsRequest(&request, stream);
470         } else if (request.type_url() == kEdsTypeUrl) {
471           HandleEdsRequest(&request, stream);
472           eds_sent = true;
473         }
474       }
475       // Wait until notified done.
476       grpc_core::MutexLock lock(&ads_mu_);
477       ads_cond_.WaitUntil(&ads_mu_, [this] { return ads_done_; });
478     }();
479     gpr_log(GPR_INFO, "ADS[%p]: StreamAggregatedResources done", this);
480     return Status::OK;
481   }
482
483   Cluster GetDefaultCluster() const { return default_cluster_; }
484
485   void SetCdsResponse(
486       std::map<std::string /*cluster_name*/, Cluster> cds_response_data) {
487     cds_response_data_ = std::move(cds_response_data);
488   }
489
490   ResponseState cds_response_state() {
491     grpc_core::MutexLock lock(&ads_mu_);
492     return cds_response_state_;
493   }
494
495   void AddEdsResponse(const DiscoveryResponse& response, int send_after_ms) {
496     grpc_core::MutexLock lock(&ads_mu_);
497     eds_responses_and_delays_.push_back(
498         std::make_pair(response, send_after_ms));
499   }
500
501   void Start() {
502     grpc_core::MutexLock lock(&ads_mu_);
503     ads_done_ = false;
504     eds_responses_and_delays_.clear();
505   }
506
507   void Shutdown() {
508     {
509       grpc_core::MutexLock lock(&ads_mu_);
510       NotifyDoneWithAdsCallLocked();
511       eds_responses_and_delays_.clear();
512     }
513     gpr_log(GPR_INFO, "ADS[%p]: shut down", this);
514   }
515
516   static DiscoveryResponse BuildResponse(const ResponseArgs& args) {
517     ClusterLoadAssignment assignment;
518     assignment.set_cluster_name("application_target_name");
519     for (const auto& locality : args.locality_list) {
520       auto* endpoints = assignment.add_endpoints();
521       endpoints->mutable_load_balancing_weight()->set_value(locality.lb_weight);
522       endpoints->set_priority(locality.priority);
523       endpoints->mutable_locality()->set_region(kDefaultLocalityRegion);
524       endpoints->mutable_locality()->set_zone(kDefaultLocalityZone);
525       endpoints->mutable_locality()->set_sub_zone(locality.sub_zone);
526       for (size_t i = 0; i < locality.ports.size(); ++i) {
527         const int& port = locality.ports[i];
528         auto* lb_endpoints = endpoints->add_lb_endpoints();
529         if (locality.health_statuses.size() > i &&
530             locality.health_statuses[i] !=
531                 envoy::api::v2::HealthStatus::UNKNOWN) {
532           lb_endpoints->set_health_status(locality.health_statuses[i]);
533         }
534         auto* endpoint = lb_endpoints->mutable_endpoint();
535         auto* address = endpoint->mutable_address();
536         auto* socket_address = address->mutable_socket_address();
537         socket_address->set_address("127.0.0.1");
538         socket_address->set_port_value(port);
539       }
540     }
541     if (!args.drop_categories.empty()) {
542       auto* policy = assignment.mutable_policy();
543       for (const auto& p : args.drop_categories) {
544         const grpc::string& name = p.first;
545         const uint32_t parts_per_million = p.second;
546         auto* drop_overload = policy->add_drop_overloads();
547         drop_overload->set_category(name);
548         auto* drop_percentage = drop_overload->mutable_drop_percentage();
549         drop_percentage->set_numerator(parts_per_million);
550         drop_percentage->set_denominator(args.drop_denominator);
551       }
552     }
553     DiscoveryResponse response;
554     response.set_type_url(kEdsTypeUrl);
555     response.add_resources()->PackFrom(assignment);
556     return response;
557   }
558
559   void NotifyDoneWithAdsCall() {
560     grpc_core::MutexLock lock(&ads_mu_);
561     NotifyDoneWithAdsCallLocked();
562   }
563
564   void NotifyDoneWithAdsCallLocked() {
565     if (!ads_done_) {
566       ads_done_ = true;
567       ads_cond_.Broadcast();
568     }
569   }
570
571  private:
572   grpc_core::CondVar ads_cond_;
573   // Protect the members below.
574   grpc_core::Mutex ads_mu_;
575   bool ads_done_ = false;
576   // CDS response data.
577   Cluster default_cluster_;
578   std::map<std::string /*cluster_name*/, Cluster> cds_response_data_;
579   ResponseState cds_response_state_ = NOT_SENT;
580   // EDS response data.
581   std::vector<ResponseDelayPair> eds_responses_and_delays_;
582 };
583
584 class LrsServiceImpl : public LrsService {
585  public:
586   using Stream = ServerReaderWriter<LoadStatsResponse, LoadStatsRequest>;
587
588   explicit LrsServiceImpl(int client_load_reporting_interval_seconds)
589       : client_load_reporting_interval_seconds_(
590             client_load_reporting_interval_seconds) {}
591
592   Status StreamLoadStats(ServerContext* /*context*/, Stream* stream) override {
593     gpr_log(GPR_INFO, "LRS[%p]: StreamLoadStats starts", this);
594     // Read request.
595     LoadStatsRequest request;
596     if (stream->Read(&request)) {
597       if (client_load_reporting_interval_seconds_ > 0) {
598         IncreaseRequestCount();
599         // Send response.
600         LoadStatsResponse response;
601         auto server_name = request.cluster_stats()[0].cluster_name();
602         GPR_ASSERT(server_name != "");
603         response.add_clusters(server_name);
604         response.mutable_load_reporting_interval()->set_seconds(
605             client_load_reporting_interval_seconds_);
606         stream->Write(response);
607         IncreaseResponseCount();
608         // Wait for report.
609         request.Clear();
610         if (stream->Read(&request)) {
611           gpr_log(GPR_INFO, "LRS[%p]: received client load report message '%s'",
612                   this, request.DebugString().c_str());
613           GPR_ASSERT(request.cluster_stats().size() == 1);
614           const ClusterStats& cluster_stats = request.cluster_stats()[0];
615           // We need to acquire the lock here in order to prevent the notify_one
616           // below from firing before its corresponding wait is executed.
617           grpc_core::MutexLock lock(&load_report_mu_);
618           GPR_ASSERT(client_stats_ == nullptr);
619           client_stats_.reset(new ClientStats(cluster_stats));
620           load_report_ready_ = true;
621           load_report_cond_.Signal();
622         }
623       }
624       // Wait until notified done.
625       grpc_core::MutexLock lock(&lrs_mu_);
626       lrs_cv_.WaitUntil(&lrs_mu_, [this] { return lrs_done; });
627     }
628     gpr_log(GPR_INFO, "LRS[%p]: StreamLoadStats done", this);
629     return Status::OK;
630   }
631
632   void Start() {
633     lrs_done = false;
634     load_report_ready_ = false;
635     client_stats_.reset();
636   }
637
638   void Shutdown() {
639     {
640       grpc_core::MutexLock lock(&lrs_mu_);
641       NotifyDoneWithLrsCallLocked();
642     }
643     gpr_log(GPR_INFO, "LRS[%p]: shut down", this);
644   }
645
646   ClientStats* WaitForLoadReport() {
647     grpc_core::MutexLock lock(&load_report_mu_);
648     load_report_cond_.WaitUntil(&load_report_mu_,
649                                 [this] { return load_report_ready_; });
650     load_report_ready_ = false;
651     return client_stats_.get();
652   }
653
654   void NotifyDoneWithLrsCall() {
655     grpc_core::MutexLock lock(&lrs_mu_);
656     NotifyDoneWithLrsCallLocked();
657   }
658
659   void NotifyDoneWithLrsCallLocked() {
660     if (!lrs_done) {
661       lrs_done = true;
662       lrs_cv_.Broadcast();
663     }
664   }
665
666  private:
667   const int client_load_reporting_interval_seconds_;
668
669   grpc_core::CondVar lrs_cv_;
670   // Protect lrs_done.
671   grpc_core::Mutex lrs_mu_;
672   bool lrs_done = false;
673
674   grpc_core::CondVar load_report_cond_;
675   // Protect the members below.
676   grpc_core::Mutex load_report_mu_;
677   std::unique_ptr<ClientStats> client_stats_;
678   bool load_report_ready_ = false;
679 };
680
681 class TestType {
682  public:
683   TestType(bool use_xds_resolver, bool enable_load_reporting)
684       : use_xds_resolver_(use_xds_resolver),
685         enable_load_reporting_(enable_load_reporting) {}
686
687   bool use_xds_resolver() const { return use_xds_resolver_; }
688   bool enable_load_reporting() const { return enable_load_reporting_; }
689
690   grpc::string AsString() const {
691     grpc::string retval = (use_xds_resolver_ ? "XdsResolver" : "FakeResolver");
692     if (enable_load_reporting_) retval += "WithLoadReporting";
693     return retval;
694   }
695
696  private:
697   const bool use_xds_resolver_;
698   const bool enable_load_reporting_;
699 };
700
701 class XdsEnd2endTest : public ::testing::TestWithParam<TestType> {
702  protected:
703   XdsEnd2endTest(size_t num_backends, size_t num_balancers,
704                  int client_load_reporting_interval_seconds = 100)
705       : server_host_("localhost"),
706         num_backends_(num_backends),
707         num_balancers_(num_balancers),
708         client_load_reporting_interval_seconds_(
709             client_load_reporting_interval_seconds) {}
710
711   static void SetUpTestCase() {
712     // Make the backup poller poll very frequently in order to pick up
713     // updates from all the subchannels's FDs.
714     GPR_GLOBAL_CONFIG_SET(grpc_client_channel_backup_poll_interval_ms, 1);
715 #if TARGET_OS_IPHONE
716     // Workaround Apple CFStream bug
717     gpr_setenv("grpc_cfstream", "0");
718 #endif
719     grpc_init();
720   }
721
722   static void TearDownTestCase() { grpc_shutdown(); }
723
724   void SetUp() override {
725     gpr_setenv("GRPC_XDS_BOOTSTRAP", g_bootstrap_file);
726     g_port_saver->Reset();
727     response_generator_ =
728         grpc_core::MakeRefCounted<grpc_core::FakeResolverResponseGenerator>();
729     lb_channel_response_generator_ =
730         grpc_core::MakeRefCounted<grpc_core::FakeResolverResponseGenerator>();
731     // Start the backends.
732     for (size_t i = 0; i < num_backends_; ++i) {
733       backends_.emplace_back(new BackendServerThread);
734       backends_.back()->Start(server_host_);
735     }
736     // Start the load balancers.
737     for (size_t i = 0; i < num_balancers_; ++i) {
738       balancers_.emplace_back(
739           new BalancerServerThread(GetParam().enable_load_reporting()
740                                        ? client_load_reporting_interval_seconds_
741                                        : 0));
742       balancers_.back()->Start(server_host_);
743     }
744     ResetStub();
745   }
746
747   void TearDown() override {
748     ShutdownAllBackends();
749     for (auto& balancer : balancers_) balancer->Shutdown();
750   }
751
752   void StartAllBackends() {
753     for (auto& backend : backends_) backend->Start(server_host_);
754   }
755
756   void StartBackend(size_t index) { backends_[index]->Start(server_host_); }
757
758   void ShutdownAllBackends() {
759     for (auto& backend : backends_) backend->Shutdown();
760   }
761
762   void ShutdownBackend(size_t index) { backends_[index]->Shutdown(); }
763
764   void ResetStub(int fallback_timeout = 0, int failover_timeout = 0,
765                  const grpc::string& expected_targets = "") {
766     ChannelArguments args;
767     // TODO(juanlishen): Add setter to ChannelArguments.
768     if (fallback_timeout > 0) {
769       args.SetInt(GRPC_ARG_XDS_FALLBACK_TIMEOUT_MS, fallback_timeout);
770     }
771     if (failover_timeout > 0) {
772       args.SetInt(GRPC_ARG_XDS_FAILOVER_TIMEOUT_MS, failover_timeout);
773     }
774     // If the parent channel is using the fake resolver, we inject the
775     // response generator for the parent here, and then SetNextResolution()
776     // will inject the xds channel's response generator via the parent's
777     // response generator.
778     //
779     // In contrast, if we are using the xds resolver, then the parent
780     // channel never uses a response generator, and we inject the xds
781     // channel's response generator here.
782     args.SetPointer(GRPC_ARG_FAKE_RESOLVER_RESPONSE_GENERATOR,
783                     GetParam().use_xds_resolver()
784                         ? lb_channel_response_generator_.get()
785                         : response_generator_.get());
786     if (!expected_targets.empty()) {
787       args.SetString(GRPC_ARG_FAKE_SECURITY_EXPECTED_TARGETS, expected_targets);
788     }
789     grpc::string scheme =
790         GetParam().use_xds_resolver() ? "xds-experimental" : "fake";
791     std::ostringstream uri;
792     uri << scheme << ":///" << kApplicationTargetName_;
793     // TODO(dgq): templatize tests to run everything using both secure and
794     // insecure channel credentials.
795     grpc_channel_credentials* channel_creds =
796         grpc_fake_transport_security_credentials_create();
797     grpc_call_credentials* call_creds = grpc_md_only_test_credentials_create(
798         g_kCallCredsMdKey, g_kCallCredsMdValue, false);
799     std::shared_ptr<ChannelCredentials> creds(
800         new SecureChannelCredentials(grpc_composite_channel_credentials_create(
801             channel_creds, call_creds, nullptr)));
802     call_creds->Unref();
803     channel_creds->Unref();
804     channel_ = ::grpc::CreateCustomChannel(uri.str(), creds, args);
805     stub_ = grpc::testing::EchoTestService::NewStub(channel_);
806   }
807
808   void ResetBackendCounters() {
809     for (auto& backend : backends_) backend->backend_service()->ResetCounters();
810   }
811
812   bool SeenAllBackends(size_t start_index = 0, size_t stop_index = 0) {
813     if (stop_index == 0) stop_index = backends_.size();
814     for (size_t i = start_index; i < stop_index; ++i) {
815       if (backends_[i]->backend_service()->request_count() == 0) return false;
816     }
817     return true;
818   }
819
820   void SendRpcAndCount(int* num_total, int* num_ok, int* num_failure,
821                        int* num_drops) {
822     const Status status = SendRpc();
823     if (status.ok()) {
824       ++*num_ok;
825     } else {
826       if (status.error_message() == "Call dropped by load balancing policy") {
827         ++*num_drops;
828       } else {
829         ++*num_failure;
830       }
831     }
832     ++*num_total;
833   }
834
835   std::tuple<int, int, int> WaitForAllBackends(size_t start_index = 0,
836                                                size_t stop_index = 0) {
837     int num_ok = 0;
838     int num_failure = 0;
839     int num_drops = 0;
840     int num_total = 0;
841     while (!SeenAllBackends(start_index, stop_index)) {
842       SendRpcAndCount(&num_total, &num_ok, &num_failure, &num_drops);
843     }
844     ResetBackendCounters();
845     gpr_log(GPR_INFO,
846             "Performed %d warm up requests against the backends. "
847             "%d succeeded, %d failed, %d dropped.",
848             num_total, num_ok, num_failure, num_drops);
849     return std::make_tuple(num_ok, num_failure, num_drops);
850   }
851
852   void WaitForBackend(size_t backend_idx, bool reset_counters = true) {
853     gpr_log(GPR_INFO, "========= WAITING FOR BACKEND %lu ==========",
854             static_cast<unsigned long>(backend_idx));
855     do {
856       (void)SendRpc();
857     } while (backends_[backend_idx]->backend_service()->request_count() == 0);
858     if (reset_counters) ResetBackendCounters();
859     gpr_log(GPR_INFO, "========= BACKEND %lu READY ==========",
860             static_cast<unsigned long>(backend_idx));
861   }
862
863   grpc_core::ServerAddressList CreateAddressListFromPortList(
864       const std::vector<int>& ports) {
865     grpc_core::ServerAddressList addresses;
866     for (int port : ports) {
867       char* lb_uri_str;
868       gpr_asprintf(&lb_uri_str, "ipv4:127.0.0.1:%d", port);
869       grpc_uri* lb_uri = grpc_uri_parse(lb_uri_str, true);
870       GPR_ASSERT(lb_uri != nullptr);
871       grpc_resolved_address address;
872       GPR_ASSERT(grpc_parse_uri(lb_uri, &address));
873       addresses.emplace_back(address.addr, address.len, nullptr);
874       grpc_uri_destroy(lb_uri);
875       gpr_free(lb_uri_str);
876     }
877     return addresses;
878   }
879
880   void SetNextResolution(const std::vector<int>& ports,
881                          grpc_core::FakeResolverResponseGenerator*
882                              lb_channel_response_generator = nullptr) {
883     if (GetParam().use_xds_resolver()) return;  // Not used with xds resolver.
884     grpc_core::ExecCtx exec_ctx;
885     grpc_core::Resolver::Result result;
886     result.addresses = CreateAddressListFromPortList(ports);
887     grpc_error* error = GRPC_ERROR_NONE;
888     const char* service_config_json =
889         GetParam().enable_load_reporting()
890             ? kDefaultServiceConfig_
891             : kDefaultServiceConfigWithoutLoadReporting_;
892     result.service_config =
893         grpc_core::ServiceConfig::Create(service_config_json, &error);
894     GRPC_ERROR_UNREF(error);
895     grpc_arg arg = grpc_core::FakeResolverResponseGenerator::MakeChannelArg(
896         lb_channel_response_generator == nullptr
897             ? lb_channel_response_generator_.get()
898             : lb_channel_response_generator);
899     result.args = grpc_channel_args_copy_and_add(nullptr, &arg, 1);
900     response_generator_->SetResponse(std::move(result));
901   }
902
903   void SetNextResolutionForLbChannelAllBalancers(
904       const char* service_config_json = nullptr,
905       grpc_core::FakeResolverResponseGenerator* lb_channel_response_generator =
906           nullptr) {
907     std::vector<int> ports;
908     for (size_t i = 0; i < balancers_.size(); ++i) {
909       ports.emplace_back(balancers_[i]->port());
910     }
911     SetNextResolutionForLbChannel(ports, service_config_json,
912                                   lb_channel_response_generator);
913   }
914
915   void SetNextResolutionForLbChannel(
916       const std::vector<int>& ports, const char* service_config_json = nullptr,
917       grpc_core::FakeResolverResponseGenerator* lb_channel_response_generator =
918           nullptr) {
919     grpc_core::ExecCtx exec_ctx;
920     grpc_core::Resolver::Result result;
921     result.addresses = CreateAddressListFromPortList(ports);
922     if (service_config_json != nullptr) {
923       grpc_error* error = GRPC_ERROR_NONE;
924       result.service_config =
925           grpc_core::ServiceConfig::Create(service_config_json, &error);
926       GRPC_ERROR_UNREF(error);
927     }
928     if (lb_channel_response_generator == nullptr) {
929       lb_channel_response_generator = lb_channel_response_generator_.get();
930     }
931     lb_channel_response_generator->SetResponse(std::move(result));
932   }
933
934   void SetNextReresolutionResponse(const std::vector<int>& ports) {
935     grpc_core::ExecCtx exec_ctx;
936     grpc_core::Resolver::Result result;
937     result.addresses = CreateAddressListFromPortList(ports);
938     response_generator_->SetReresolutionResponse(std::move(result));
939   }
940
941   const std::vector<int> GetBackendPorts(size_t start_index = 0,
942                                          size_t stop_index = 0) const {
943     if (stop_index == 0) stop_index = backends_.size();
944     std::vector<int> backend_ports;
945     for (size_t i = start_index; i < stop_index; ++i) {
946       backend_ports.push_back(backends_[i]->port());
947     }
948     return backend_ports;
949   }
950
951   void ScheduleResponseForBalancer(size_t i, const DiscoveryResponse& response,
952                                    int delay_ms) {
953     balancers_[i]->ads_service()->AddEdsResponse(response, delay_ms);
954   }
955
956   Status SendRpc(EchoResponse* response = nullptr, int timeout_ms = 1000,
957                  bool wait_for_ready = false) {
958     const bool local_response = (response == nullptr);
959     if (local_response) response = new EchoResponse;
960     EchoRequest request;
961     request.set_message(kRequestMessage_);
962     ClientContext context;
963     context.set_deadline(grpc_timeout_milliseconds_to_deadline(timeout_ms));
964     if (wait_for_ready) context.set_wait_for_ready(true);
965     Status status = stub_->Echo(&context, request, response);
966     if (local_response) delete response;
967     return status;
968   }
969
970   void CheckRpcSendOk(const size_t times = 1, const int timeout_ms = 1000,
971                       bool wait_for_ready = false) {
972     for (size_t i = 0; i < times; ++i) {
973       EchoResponse response;
974       const Status status = SendRpc(&response, timeout_ms, wait_for_ready);
975       EXPECT_TRUE(status.ok()) << "code=" << status.error_code()
976                                << " message=" << status.error_message();
977       EXPECT_EQ(response.message(), kRequestMessage_);
978     }
979   }
980
981   void CheckRpcSendFailure() {
982     const Status status = SendRpc();
983     EXPECT_FALSE(status.ok());
984   }
985
986   class ServerThread {
987    public:
988     ServerThread() : port_(g_port_saver->GetPort()) {}
989     virtual ~ServerThread(){};
990
991     void Start(const grpc::string& server_host) {
992       gpr_log(GPR_INFO, "starting %s server on port %d", Type(), port_);
993       GPR_ASSERT(!running_);
994       running_ = true;
995       StartAllServices();
996       grpc_core::Mutex mu;
997       // We need to acquire the lock here in order to prevent the notify_one
998       // by ServerThread::Serve from firing before the wait below is hit.
999       grpc_core::MutexLock lock(&mu);
1000       grpc_core::CondVar cond;
1001       thread_.reset(new std::thread(
1002           std::bind(&ServerThread::Serve, this, server_host, &mu, &cond)));
1003       cond.Wait(&mu);
1004       gpr_log(GPR_INFO, "%s server startup complete", Type());
1005     }
1006
1007     void Serve(const grpc::string& server_host, grpc_core::Mutex* mu,
1008                grpc_core::CondVar* cond) {
1009       // We need to acquire the lock here in order to prevent the notify_one
1010       // below from firing before its corresponding wait is executed.
1011       grpc_core::MutexLock lock(mu);
1012       std::ostringstream server_address;
1013       server_address << server_host << ":" << port_;
1014       ServerBuilder builder;
1015       std::shared_ptr<ServerCredentials> creds(new SecureServerCredentials(
1016           grpc_fake_transport_security_server_credentials_create()));
1017       builder.AddListeningPort(server_address.str(), creds);
1018       RegisterAllServices(&builder);
1019       server_ = builder.BuildAndStart();
1020       cond->Signal();
1021     }
1022
1023     void Shutdown() {
1024       if (!running_) return;
1025       gpr_log(GPR_INFO, "%s about to shutdown", Type());
1026       ShutdownAllServices();
1027       server_->Shutdown(grpc_timeout_milliseconds_to_deadline(0));
1028       thread_->join();
1029       gpr_log(GPR_INFO, "%s shutdown completed", Type());
1030       running_ = false;
1031     }
1032
1033     int port() const { return port_; }
1034
1035    private:
1036     virtual void RegisterAllServices(ServerBuilder* builder) = 0;
1037     virtual void StartAllServices() = 0;
1038     virtual void ShutdownAllServices() = 0;
1039
1040     virtual const char* Type() = 0;
1041
1042     const int port_;
1043     std::unique_ptr<Server> server_;
1044     std::unique_ptr<std::thread> thread_;
1045     bool running_ = false;
1046   };
1047
1048   class BackendServerThread : public ServerThread {
1049    public:
1050     BackendServiceImpl* backend_service() { return &backend_service_; }
1051
1052    private:
1053     void RegisterAllServices(ServerBuilder* builder) override {
1054       builder->RegisterService(&backend_service_);
1055     }
1056
1057     void StartAllServices() override { backend_service_.Start(); }
1058
1059     void ShutdownAllServices() override { backend_service_.Shutdown(); }
1060
1061     const char* Type() override { return "Backend"; }
1062
1063     BackendServiceImpl backend_service_;
1064   };
1065
1066   class BalancerServerThread : public ServerThread {
1067    public:
1068     explicit BalancerServerThread(int client_load_reporting_interval = 0)
1069         : ads_service_(client_load_reporting_interval > 0),
1070           lrs_service_(client_load_reporting_interval) {}
1071
1072     AdsServiceImpl* ads_service() { return &ads_service_; }
1073     LrsServiceImpl* lrs_service() { return &lrs_service_; }
1074
1075    private:
1076     void RegisterAllServices(ServerBuilder* builder) override {
1077       builder->RegisterService(&ads_service_);
1078       builder->RegisterService(&lrs_service_);
1079     }
1080
1081     void StartAllServices() override {
1082       ads_service_.Start();
1083       lrs_service_.Start();
1084     }
1085
1086     void ShutdownAllServices() override {
1087       ads_service_.Shutdown();
1088       lrs_service_.Shutdown();
1089     }
1090
1091     const char* Type() override { return "Balancer"; }
1092
1093     AdsServiceImpl ads_service_;
1094     LrsServiceImpl lrs_service_;
1095   };
1096
1097   const grpc::string server_host_;
1098   const size_t num_backends_;
1099   const size_t num_balancers_;
1100   const int client_load_reporting_interval_seconds_;
1101   std::shared_ptr<Channel> channel_;
1102   std::unique_ptr<grpc::testing::EchoTestService::Stub> stub_;
1103   std::vector<std::unique_ptr<BackendServerThread>> backends_;
1104   std::vector<std::unique_ptr<BalancerServerThread>> balancers_;
1105   grpc_core::RefCountedPtr<grpc_core::FakeResolverResponseGenerator>
1106       response_generator_;
1107   grpc_core::RefCountedPtr<grpc_core::FakeResolverResponseGenerator>
1108       lb_channel_response_generator_;
1109   const grpc::string kRequestMessage_ = "Live long and prosper.";
1110   const grpc::string kApplicationTargetName_ = "application_target_name";
1111   const char* kDefaultServiceConfig_ =
1112       "{\n"
1113       "  \"loadBalancingConfig\":[\n"
1114       "    { \"does_not_exist\":{} },\n"
1115       "    { \"xds_experimental\":{\n"
1116       "      \"lrsLoadReportingServerName\": \"\"\n"
1117       "    } }\n"
1118       "  ]\n"
1119       "}";
1120   const char* kDefaultServiceConfigWithoutLoadReporting_ =
1121       "{\n"
1122       "  \"loadBalancingConfig\":[\n"
1123       "    { \"does_not_exist\":{} },\n"
1124       "    { \"xds_experimental\":{\n"
1125       "    } }\n"
1126       "  ]\n"
1127       "}";
1128 };
1129
1130 class BasicTest : public XdsEnd2endTest {
1131  public:
1132   BasicTest() : XdsEnd2endTest(4, 1) {}
1133 };
1134
1135 // Tests that the balancer sends the correct response to the client, and the
1136 // client sends RPCs to the backends using the default child policy.
1137 TEST_P(BasicTest, Vanilla) {
1138   SetNextResolution({});
1139   SetNextResolutionForLbChannelAllBalancers();
1140   const size_t kNumRpcsPerAddress = 100;
1141   AdsServiceImpl::ResponseArgs args({
1142       {"locality0", GetBackendPorts()},
1143   });
1144   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1145   // Make sure that trying to connect works without a call.
1146   channel_->GetState(true /* try_to_connect */);
1147   // We need to wait for all backends to come online.
1148   WaitForAllBackends();
1149   // Send kNumRpcsPerAddress RPCs per server.
1150   CheckRpcSendOk(kNumRpcsPerAddress * num_backends_);
1151   // Each backend should have gotten 100 requests.
1152   for (size_t i = 0; i < backends_.size(); ++i) {
1153     EXPECT_EQ(kNumRpcsPerAddress,
1154               backends_[i]->backend_service()->request_count());
1155   }
1156   // The ADS service got a single request, and sent a single response.
1157   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1158   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1159   // Check LB policy name for the channel.
1160   EXPECT_EQ(
1161       (GetParam().use_xds_resolver() ? "cds_experimental" : "xds_experimental"),
1162       channel_->GetLoadBalancingPolicyName());
1163 }
1164
1165 TEST_P(BasicTest, IgnoresUnhealthyEndpoints) {
1166   SetNextResolution({});
1167   SetNextResolutionForLbChannelAllBalancers();
1168   const size_t kNumRpcsPerAddress = 100;
1169   AdsServiceImpl::ResponseArgs args({
1170       {"locality0",
1171        GetBackendPorts(),
1172        kDefaultLocalityWeight,
1173        kDefaultLocalityPriority,
1174        {envoy::api::v2::HealthStatus::DRAINING}},
1175   });
1176   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1177   // Make sure that trying to connect works without a call.
1178   channel_->GetState(true /* try_to_connect */);
1179   // We need to wait for all backends to come online.
1180   WaitForAllBackends(/*start_index=*/1);
1181   // Send kNumRpcsPerAddress RPCs per server.
1182   CheckRpcSendOk(kNumRpcsPerAddress * (num_backends_ - 1));
1183   // Each backend should have gotten 100 requests.
1184   for (size_t i = 1; i < backends_.size(); ++i) {
1185     EXPECT_EQ(kNumRpcsPerAddress,
1186               backends_[i]->backend_service()->request_count());
1187   }
1188   // The ADS service got a single request, and sent a single response.
1189   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1190   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1191 }
1192
1193 // Tests that subchannel sharing works when the same backend is listed multiple
1194 // times.
1195 TEST_P(BasicTest, SameBackendListedMultipleTimes) {
1196   SetNextResolution({});
1197   SetNextResolutionForLbChannelAllBalancers();
1198   // Same backend listed twice.
1199   std::vector<int> ports(2, backends_[0]->port());
1200   AdsServiceImpl::ResponseArgs args({
1201       {"locality0", ports},
1202   });
1203   const size_t kNumRpcsPerAddress = 10;
1204   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1205   // We need to wait for the backend to come online.
1206   WaitForBackend(0);
1207   // Send kNumRpcsPerAddress RPCs per server.
1208   CheckRpcSendOk(kNumRpcsPerAddress * ports.size());
1209   // Backend should have gotten 20 requests.
1210   EXPECT_EQ(kNumRpcsPerAddress * ports.size(),
1211             backends_[0]->backend_service()->request_count());
1212   // And they should have come from a single client port, because of
1213   // subchannel sharing.
1214   EXPECT_EQ(1UL, backends_[0]->backend_service()->clients().size());
1215 }
1216
1217 // Tests that RPCs will be blocked until a non-empty serverlist is received.
1218 TEST_P(BasicTest, InitiallyEmptyServerlist) {
1219   SetNextResolution({});
1220   SetNextResolutionForLbChannelAllBalancers();
1221   const int kServerlistDelayMs = 500 * grpc_test_slowdown_factor();
1222   const int kCallDeadlineMs = kServerlistDelayMs * 2;
1223   // First response is an empty serverlist, sent right away.
1224   AdsServiceImpl::ResponseArgs::Locality empty_locality("locality0", {});
1225   AdsServiceImpl::ResponseArgs args({
1226       empty_locality,
1227   });
1228   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1229   // Send non-empty serverlist only after kServerlistDelayMs.
1230   args = AdsServiceImpl::ResponseArgs({
1231       {"locality0", GetBackendPorts()},
1232   });
1233   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args),
1234                               kServerlistDelayMs);
1235   const auto t0 = system_clock::now();
1236   // Client will block: LB will initially send empty serverlist.
1237   CheckRpcSendOk(1, kCallDeadlineMs, true /* wait_for_ready */);
1238   const auto ellapsed_ms =
1239       std::chrono::duration_cast<std::chrono::milliseconds>(
1240           system_clock::now() - t0);
1241   // but eventually, the LB sends a serverlist update that allows the call to
1242   // proceed. The call delay must be larger than the delay in sending the
1243   // populated serverlist but under the call's deadline (which is enforced by
1244   // the call's deadline).
1245   EXPECT_GT(ellapsed_ms.count(), kServerlistDelayMs);
1246   // The ADS service got a single request.
1247   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1248   // and sent two responses.
1249   EXPECT_EQ(2U, balancers_[0]->ads_service()->response_count());
1250 }
1251
1252 // Tests that RPCs will fail with UNAVAILABLE instead of DEADLINE_EXCEEDED if
1253 // all the servers are unreachable.
1254 TEST_P(BasicTest, AllServersUnreachableFailFast) {
1255   SetNextResolution({});
1256   SetNextResolutionForLbChannelAllBalancers();
1257   const size_t kNumUnreachableServers = 5;
1258   std::vector<int> ports;
1259   for (size_t i = 0; i < kNumUnreachableServers; ++i) {
1260     ports.push_back(g_port_saver->GetPort());
1261   }
1262   AdsServiceImpl::ResponseArgs args({
1263       {"locality0", ports},
1264   });
1265   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1266   const Status status = SendRpc();
1267   // The error shouldn't be DEADLINE_EXCEEDED.
1268   EXPECT_EQ(StatusCode::UNAVAILABLE, status.error_code());
1269   // The ADS service got a single request, and sent a single response.
1270   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1271   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1272 }
1273
1274 // Tests that RPCs fail when the backends are down, and will succeed again after
1275 // the backends are restarted.
1276 TEST_P(BasicTest, BackendsRestart) {
1277   SetNextResolution({});
1278   SetNextResolutionForLbChannelAllBalancers();
1279   AdsServiceImpl::ResponseArgs args({
1280       {"locality0", GetBackendPorts()},
1281   });
1282   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1283   WaitForAllBackends();
1284   // Stop backends.  RPCs should fail.
1285   ShutdownAllBackends();
1286   CheckRpcSendFailure();
1287   // Restart all backends.  RPCs should start succeeding again.
1288   StartAllBackends();
1289   CheckRpcSendOk(1 /* times */, 2000 /* timeout_ms */,
1290                  true /* wait_for_ready */);
1291 }
1292
1293 using SecureNamingTest = BasicTest;
1294
1295 // Tests that secure naming check passes if target name is expected.
1296 TEST_P(SecureNamingTest, TargetNameIsExpected) {
1297   // TODO(juanlishen): Use separate fake creds for the balancer channel.
1298   ResetStub(0, 0, kApplicationTargetName_ + ";lb");
1299   SetNextResolution({});
1300   SetNextResolutionForLbChannel({balancers_[0]->port()});
1301   const size_t kNumRpcsPerAddress = 100;
1302   AdsServiceImpl::ResponseArgs args({
1303       {"locality0", GetBackendPorts()},
1304   });
1305   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1306   // Make sure that trying to connect works without a call.
1307   channel_->GetState(true /* try_to_connect */);
1308   // We need to wait for all backends to come online.
1309   WaitForAllBackends();
1310   // Send kNumRpcsPerAddress RPCs per server.
1311   CheckRpcSendOk(kNumRpcsPerAddress * num_backends_);
1312   // Each backend should have gotten 100 requests.
1313   for (size_t i = 0; i < backends_.size(); ++i) {
1314     EXPECT_EQ(kNumRpcsPerAddress,
1315               backends_[i]->backend_service()->request_count());
1316   }
1317   // The ADS service got a single request, and sent a single response.
1318   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1319   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1320 }
1321
1322 // Tests that secure naming check fails if target name is unexpected.
1323 TEST_P(SecureNamingTest, TargetNameIsUnexpected) {
1324   gpr_setenv("GRPC_XDS_BOOTSTRAP", g_bootstrap_file_bad);
1325   ::testing::FLAGS_gtest_death_test_style = "threadsafe";
1326   // Make sure that we blow up (via abort() from the security connector) when
1327   // the name from the balancer doesn't match expectations.
1328   ASSERT_DEATH_IF_SUPPORTED(
1329       {
1330         ResetStub(0, 0, kApplicationTargetName_ + ";lb");
1331         SetNextResolution({});
1332         SetNextResolutionForLbChannel({balancers_[0]->port()});
1333         channel_->WaitForConnected(grpc_timeout_seconds_to_deadline(1));
1334       },
1335       "");
1336 }
1337
1338 using CdsTest = BasicTest;
1339
1340 // Tests that CDS client should send an ACK upon correct CDS response.
1341 TEST_P(CdsTest, Vanilla) {
1342   SetNextResolution({});
1343   SetNextResolutionForLbChannelAllBalancers();
1344   SendRpc();
1345   EXPECT_EQ(balancers_[0]->ads_service()->cds_response_state(),
1346             AdsServiceImpl::ACKED);
1347 }
1348
1349 // Tests that CDS client should send a NACK if the cluster type in CDS response
1350 // is other than EDS.
1351 TEST_P(CdsTest, WrongClusterType) {
1352   auto cluster = balancers_[0]->ads_service()->GetDefaultCluster();
1353   cluster.set_type(envoy::api::v2::Cluster::STATIC);
1354   balancers_[0]->ads_service()->SetCdsResponse(
1355       {{"application_target_name", std::move(cluster)}});
1356   SetNextResolution({});
1357   SetNextResolutionForLbChannelAllBalancers();
1358   SendRpc();
1359   EXPECT_EQ(balancers_[0]->ads_service()->cds_response_state(),
1360             AdsServiceImpl::NACKED);
1361 }
1362
1363 // Tests that CDS client should send a NACK if the eds_config in CDS response is
1364 // other than ADS.
1365 TEST_P(CdsTest, WrongEdsConfig) {
1366   auto cluster = balancers_[0]->ads_service()->GetDefaultCluster();
1367   cluster.mutable_eds_cluster_config()->mutable_eds_config()->mutable_self();
1368   balancers_[0]->ads_service()->SetCdsResponse(
1369       {{"application_target_name", std::move(cluster)}});
1370   SetNextResolution({});
1371   SetNextResolutionForLbChannelAllBalancers();
1372   SendRpc();
1373   EXPECT_EQ(balancers_[0]->ads_service()->cds_response_state(),
1374             AdsServiceImpl::NACKED);
1375 }
1376
1377 // Tests that CDS client should send a NACK if the lb_policy in CDS response is
1378 // other than ROUND_ROBIN.
1379 TEST_P(CdsTest, WrongLbPolicy) {
1380   auto cluster = balancers_[0]->ads_service()->GetDefaultCluster();
1381   cluster.set_lb_policy(envoy::api::v2::Cluster::LEAST_REQUEST);
1382   balancers_[0]->ads_service()->SetCdsResponse(
1383       {{"application_target_name", std::move(cluster)}});
1384   SetNextResolution({});
1385   SetNextResolutionForLbChannelAllBalancers();
1386   SendRpc();
1387   EXPECT_EQ(balancers_[0]->ads_service()->cds_response_state(),
1388             AdsServiceImpl::NACKED);
1389 }
1390
1391 // Tests that CDS client should send a NACK if the lrs_server in CDS response is
1392 // other than SELF.
1393 TEST_P(CdsTest, WrongLrsServer) {
1394   auto cluster = balancers_[0]->ads_service()->GetDefaultCluster();
1395   cluster.mutable_lrs_server()->mutable_ads();
1396   balancers_[0]->ads_service()->SetCdsResponse(
1397       {{"application_target_name", std::move(cluster)}});
1398   SetNextResolution({});
1399   SetNextResolutionForLbChannelAllBalancers();
1400   SendRpc();
1401   EXPECT_EQ(balancers_[0]->ads_service()->cds_response_state(),
1402             AdsServiceImpl::NACKED);
1403 }
1404
1405 using LocalityMapTest = BasicTest;
1406
1407 // Tests that the localities in a locality map are picked according to their
1408 // weights.
1409 TEST_P(LocalityMapTest, WeightedRoundRobin) {
1410   SetNextResolution({});
1411   SetNextResolutionForLbChannelAllBalancers();
1412   const size_t kNumRpcs = 5000;
1413   const int kLocalityWeight0 = 2;
1414   const int kLocalityWeight1 = 8;
1415   const int kTotalLocalityWeight = kLocalityWeight0 + kLocalityWeight1;
1416   const double kLocalityWeightRate0 =
1417       static_cast<double>(kLocalityWeight0) / kTotalLocalityWeight;
1418   const double kLocalityWeightRate1 =
1419       static_cast<double>(kLocalityWeight1) / kTotalLocalityWeight;
1420   // ADS response contains 2 localities, each of which contains 1 backend.
1421   AdsServiceImpl::ResponseArgs args({
1422       {"locality0", GetBackendPorts(0, 1), kLocalityWeight0},
1423       {"locality1", GetBackendPorts(1, 2), kLocalityWeight1},
1424   });
1425   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1426   // Wait for both backends to be ready.
1427   WaitForAllBackends(0, 2);
1428   // Send kNumRpcs RPCs.
1429   CheckRpcSendOk(kNumRpcs);
1430   // The locality picking rates should be roughly equal to the expectation.
1431   const double locality_picked_rate_0 =
1432       static_cast<double>(backends_[0]->backend_service()->request_count()) /
1433       kNumRpcs;
1434   const double locality_picked_rate_1 =
1435       static_cast<double>(backends_[1]->backend_service()->request_count()) /
1436       kNumRpcs;
1437   const double kErrorTolerance = 0.2;
1438   EXPECT_THAT(locality_picked_rate_0,
1439               ::testing::AllOf(
1440                   ::testing::Ge(kLocalityWeightRate0 * (1 - kErrorTolerance)),
1441                   ::testing::Le(kLocalityWeightRate0 * (1 + kErrorTolerance))));
1442   EXPECT_THAT(locality_picked_rate_1,
1443               ::testing::AllOf(
1444                   ::testing::Ge(kLocalityWeightRate1 * (1 - kErrorTolerance)),
1445                   ::testing::Le(kLocalityWeightRate1 * (1 + kErrorTolerance))));
1446   // The ADS service got a single request, and sent a single response.
1447   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1448   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1449 }
1450
1451 // Tests that the locality map can work properly even when it contains a large
1452 // number of localities.
1453 TEST_P(LocalityMapTest, StressTest) {
1454   SetNextResolution({});
1455   SetNextResolutionForLbChannelAllBalancers();
1456   const size_t kNumLocalities = 100;
1457   // The first ADS response contains kNumLocalities localities, each of which
1458   // contains backend 0.
1459   AdsServiceImpl::ResponseArgs args;
1460   for (size_t i = 0; i < kNumLocalities; ++i) {
1461     grpc::string name = "locality" + std::to_string(i);
1462     AdsServiceImpl::ResponseArgs::Locality locality(name,
1463                                                     {backends_[0]->port()});
1464     args.locality_list.emplace_back(std::move(locality));
1465   }
1466   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1467   // The second ADS response contains 1 locality, which contains backend 1.
1468   args = AdsServiceImpl::ResponseArgs({
1469       {"locality0", GetBackendPorts(1, 2)},
1470   });
1471   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args),
1472                               60 * 1000);
1473   // Wait until backend 0 is ready, before which kNumLocalities localities are
1474   // received and handled by the xds policy.
1475   WaitForBackend(0, /*reset_counters=*/false);
1476   EXPECT_EQ(0U, backends_[1]->backend_service()->request_count());
1477   // Wait until backend 1 is ready, before which kNumLocalities localities are
1478   // removed by the xds policy.
1479   WaitForBackend(1);
1480   // The ADS service got a single request.
1481   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1482   // and sent two responses.
1483   EXPECT_EQ(2U, balancers_[0]->ads_service()->response_count());
1484 }
1485
1486 // Tests that the localities in a locality map are picked correctly after update
1487 // (addition, modification, deletion).
1488 TEST_P(LocalityMapTest, UpdateMap) {
1489   SetNextResolution({});
1490   SetNextResolutionForLbChannelAllBalancers();
1491   const size_t kNumRpcs = 1000;
1492   // The locality weight for the first 3 localities.
1493   const std::vector<int> kLocalityWeights0 = {2, 3, 4};
1494   const double kTotalLocalityWeight0 =
1495       std::accumulate(kLocalityWeights0.begin(), kLocalityWeights0.end(), 0);
1496   std::vector<double> locality_weight_rate_0;
1497   for (int weight : kLocalityWeights0) {
1498     locality_weight_rate_0.push_back(weight / kTotalLocalityWeight0);
1499   }
1500   // Delete the first locality, keep the second locality, change the third
1501   // locality's weight from 4 to 2, and add a new locality with weight 6.
1502   const std::vector<int> kLocalityWeights1 = {3, 2, 6};
1503   const double kTotalLocalityWeight1 =
1504       std::accumulate(kLocalityWeights1.begin(), kLocalityWeights1.end(), 0);
1505   std::vector<double> locality_weight_rate_1 = {
1506       0 /* placeholder for locality 0 */};
1507   for (int weight : kLocalityWeights1) {
1508     locality_weight_rate_1.push_back(weight / kTotalLocalityWeight1);
1509   }
1510   AdsServiceImpl::ResponseArgs args({
1511       {"locality0", GetBackendPorts(0, 1), 2},
1512       {"locality1", GetBackendPorts(1, 2), 3},
1513       {"locality2", GetBackendPorts(2, 3), 4},
1514   });
1515   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1516   args = AdsServiceImpl::ResponseArgs({
1517       {"locality1", GetBackendPorts(1, 2), 3},
1518       {"locality2", GetBackendPorts(2, 3), 2},
1519       {"locality3", GetBackendPorts(3, 4), 6},
1520   });
1521   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 5000);
1522   // Wait for the first 3 backends to be ready.
1523   WaitForAllBackends(0, 3);
1524   gpr_log(GPR_INFO, "========= BEFORE FIRST BATCH ==========");
1525   // Send kNumRpcs RPCs.
1526   CheckRpcSendOk(kNumRpcs);
1527   gpr_log(GPR_INFO, "========= DONE WITH FIRST BATCH ==========");
1528   // The picking rates of the first 3 backends should be roughly equal to the
1529   // expectation.
1530   std::vector<double> locality_picked_rates;
1531   for (size_t i = 0; i < 3; ++i) {
1532     locality_picked_rates.push_back(
1533         static_cast<double>(backends_[i]->backend_service()->request_count()) /
1534         kNumRpcs);
1535   }
1536   const double kErrorTolerance = 0.2;
1537   for (size_t i = 0; i < 3; ++i) {
1538     EXPECT_THAT(
1539         locality_picked_rates[i],
1540         ::testing::AllOf(
1541             ::testing::Ge(locality_weight_rate_0[i] * (1 - kErrorTolerance)),
1542             ::testing::Le(locality_weight_rate_0[i] * (1 + kErrorTolerance))));
1543   }
1544   // Backend 3 hasn't received any request.
1545   EXPECT_EQ(0U, backends_[3]->backend_service()->request_count());
1546   // The ADS service got a single request, and sent a single response.
1547   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1548   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1549   // Wait until the locality update has been processed, as signaled by backend 3
1550   // receiving a request.
1551   WaitForBackend(3);
1552   gpr_log(GPR_INFO, "========= BEFORE SECOND BATCH ==========");
1553   // Send kNumRpcs RPCs.
1554   CheckRpcSendOk(kNumRpcs);
1555   gpr_log(GPR_INFO, "========= DONE WITH SECOND BATCH ==========");
1556   // Backend 0 no longer receives any request.
1557   EXPECT_EQ(0U, backends_[0]->backend_service()->request_count());
1558   // The picking rates of the last 3 backends should be roughly equal to the
1559   // expectation.
1560   locality_picked_rates = {0 /* placeholder for backend 0 */};
1561   for (size_t i = 1; i < 4; ++i) {
1562     locality_picked_rates.push_back(
1563         static_cast<double>(backends_[i]->backend_service()->request_count()) /
1564         kNumRpcs);
1565   }
1566   for (size_t i = 1; i < 4; ++i) {
1567     EXPECT_THAT(
1568         locality_picked_rates[i],
1569         ::testing::AllOf(
1570             ::testing::Ge(locality_weight_rate_1[i] * (1 - kErrorTolerance)),
1571             ::testing::Le(locality_weight_rate_1[i] * (1 + kErrorTolerance))));
1572   }
1573   // The ADS service got a single request.
1574   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1575   // and sent two responses.
1576   EXPECT_EQ(2U, balancers_[0]->ads_service()->response_count());
1577 }
1578
1579 class FailoverTest : public BasicTest {
1580  public:
1581   FailoverTest() { ResetStub(0, 100, ""); }
1582 };
1583
1584 // Localities with the highest priority are used when multiple priority exist.
1585 TEST_P(FailoverTest, ChooseHighestPriority) {
1586   SetNextResolution({});
1587   SetNextResolutionForLbChannelAllBalancers();
1588   AdsServiceImpl::ResponseArgs args({
1589       {"locality0", GetBackendPorts(0, 1), kDefaultLocalityWeight, 1},
1590       {"locality1", GetBackendPorts(1, 2), kDefaultLocalityWeight, 2},
1591       {"locality2", GetBackendPorts(2, 3), kDefaultLocalityWeight, 3},
1592       {"locality3", GetBackendPorts(3, 4), kDefaultLocalityWeight, 0},
1593   });
1594   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1595   WaitForBackend(3, false);
1596   for (size_t i = 0; i < 3; ++i) {
1597     EXPECT_EQ(0U, backends_[i]->backend_service()->request_count());
1598   }
1599   // The ADS service got a single request, and sent a single response.
1600   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1601   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1602 }
1603
1604 // If the higher priority localities are not reachable, failover to the highest
1605 // priority among the rest.
1606 TEST_P(FailoverTest, Failover) {
1607   SetNextResolution({});
1608   SetNextResolutionForLbChannelAllBalancers();
1609   AdsServiceImpl::ResponseArgs args({
1610       {"locality0", GetBackendPorts(0, 1), kDefaultLocalityWeight, 1},
1611       {"locality1", GetBackendPorts(1, 2), kDefaultLocalityWeight, 2},
1612       {"locality2", GetBackendPorts(2, 3), kDefaultLocalityWeight, 3},
1613       {"locality3", GetBackendPorts(3, 4), kDefaultLocalityWeight, 0},
1614   });
1615   ShutdownBackend(3);
1616   ShutdownBackend(0);
1617   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1618   WaitForBackend(1, false);
1619   for (size_t i = 0; i < 4; ++i) {
1620     if (i == 1) continue;
1621     EXPECT_EQ(0U, backends_[i]->backend_service()->request_count());
1622   }
1623   // The ADS service got a single request, and sent a single response.
1624   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1625   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1626 }
1627
1628 // If a locality with higher priority than the current one becomes ready,
1629 // switch to it.
1630 TEST_P(FailoverTest, SwitchBackToHigherPriority) {
1631   SetNextResolution({});
1632   SetNextResolutionForLbChannelAllBalancers();
1633   const size_t kNumRpcs = 100;
1634   AdsServiceImpl::ResponseArgs args({
1635       {"locality0", GetBackendPorts(0, 1), kDefaultLocalityWeight, 1},
1636       {"locality1", GetBackendPorts(1, 2), kDefaultLocalityWeight, 2},
1637       {"locality2", GetBackendPorts(2, 3), kDefaultLocalityWeight, 3},
1638       {"locality3", GetBackendPorts(3, 4), kDefaultLocalityWeight, 0},
1639   });
1640   ShutdownBackend(3);
1641   ShutdownBackend(0);
1642   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1643   WaitForBackend(1, false);
1644   for (size_t i = 0; i < 4; ++i) {
1645     if (i == 1) continue;
1646     EXPECT_EQ(0U, backends_[i]->backend_service()->request_count());
1647   }
1648   StartBackend(0);
1649   WaitForBackend(0);
1650   CheckRpcSendOk(kNumRpcs);
1651   EXPECT_EQ(kNumRpcs, backends_[0]->backend_service()->request_count());
1652   // The ADS service got a single request, and sent a single response.
1653   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1654   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1655 }
1656
1657 // The first update only contains unavailable priorities. The second update
1658 // contains available priorities.
1659 TEST_P(FailoverTest, UpdateInitialUnavailable) {
1660   SetNextResolution({});
1661   SetNextResolutionForLbChannelAllBalancers();
1662   AdsServiceImpl::ResponseArgs args({
1663       {"locality0", GetBackendPorts(0, 1), kDefaultLocalityWeight, 0},
1664       {"locality1", GetBackendPorts(1, 2), kDefaultLocalityWeight, 1},
1665   });
1666   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1667   args = AdsServiceImpl::ResponseArgs({
1668       {"locality0", GetBackendPorts(0, 1), kDefaultLocalityWeight, 0},
1669       {"locality1", GetBackendPorts(1, 2), kDefaultLocalityWeight, 1},
1670       {"locality2", GetBackendPorts(2, 3), kDefaultLocalityWeight, 2},
1671       {"locality3", GetBackendPorts(3, 4), kDefaultLocalityWeight, 3},
1672   });
1673   ShutdownBackend(0);
1674   ShutdownBackend(1);
1675   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 1000);
1676   gpr_timespec deadline = gpr_time_add(gpr_now(GPR_CLOCK_REALTIME),
1677                                        gpr_time_from_millis(500, GPR_TIMESPAN));
1678   // Send 0.5 second worth of RPCs.
1679   do {
1680     CheckRpcSendFailure();
1681   } while (gpr_time_cmp(gpr_now(GPR_CLOCK_REALTIME), deadline) < 0);
1682   WaitForBackend(2, false);
1683   for (size_t i = 0; i < 4; ++i) {
1684     if (i == 2) continue;
1685     EXPECT_EQ(0U, backends_[i]->backend_service()->request_count());
1686   }
1687   // The ADS service got a single request, and sent a single response.
1688   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1689   EXPECT_EQ(2U, balancers_[0]->ads_service()->response_count());
1690 }
1691
1692 // Tests that after the localities' priorities are updated, we still choose the
1693 // highest READY priority with the updated localities.
1694 TEST_P(FailoverTest, UpdatePriority) {
1695   SetNextResolution({});
1696   SetNextResolutionForLbChannelAllBalancers();
1697   const size_t kNumRpcs = 100;
1698   AdsServiceImpl::ResponseArgs args({
1699       {"locality0", GetBackendPorts(0, 1), kDefaultLocalityWeight, 1},
1700       {"locality1", GetBackendPorts(1, 2), kDefaultLocalityWeight, 2},
1701       {"locality2", GetBackendPorts(2, 3), kDefaultLocalityWeight, 3},
1702       {"locality3", GetBackendPorts(3, 4), kDefaultLocalityWeight, 0},
1703   });
1704   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1705   args = AdsServiceImpl::ResponseArgs({
1706       {"locality0", GetBackendPorts(0, 1), kDefaultLocalityWeight, 2},
1707       {"locality1", GetBackendPorts(1, 2), kDefaultLocalityWeight, 0},
1708       {"locality2", GetBackendPorts(2, 3), kDefaultLocalityWeight, 1},
1709       {"locality3", GetBackendPorts(3, 4), kDefaultLocalityWeight, 3},
1710   });
1711   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 1000);
1712   WaitForBackend(3, false);
1713   for (size_t i = 0; i < 3; ++i) {
1714     EXPECT_EQ(0U, backends_[i]->backend_service()->request_count());
1715   }
1716   WaitForBackend(1);
1717   CheckRpcSendOk(kNumRpcs);
1718   EXPECT_EQ(kNumRpcs, backends_[1]->backend_service()->request_count());
1719   // The ADS service got a single request, and sent a single response.
1720   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1721   EXPECT_EQ(2U, balancers_[0]->ads_service()->response_count());
1722 }
1723
1724 using DropTest = BasicTest;
1725
1726 // Tests that RPCs are dropped according to the drop config.
1727 TEST_P(DropTest, Vanilla) {
1728   SetNextResolution({});
1729   SetNextResolutionForLbChannelAllBalancers();
1730   const size_t kNumRpcs = 5000;
1731   const uint32_t kDropPerMillionForLb = 100000;
1732   const uint32_t kDropPerMillionForThrottle = 200000;
1733   const double kDropRateForLb = kDropPerMillionForLb / 1000000.0;
1734   const double kDropRateForThrottle = kDropPerMillionForThrottle / 1000000.0;
1735   const double KDropRateForLbAndThrottle =
1736       kDropRateForLb + (1 - kDropRateForLb) * kDropRateForThrottle;
1737   // The ADS response contains two drop categories.
1738   AdsServiceImpl::ResponseArgs args({
1739       {"locality0", GetBackendPorts()},
1740   });
1741   args.drop_categories = {{kLbDropType, kDropPerMillionForLb},
1742                           {kThrottleDropType, kDropPerMillionForThrottle}};
1743   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1744   WaitForAllBackends();
1745   // Send kNumRpcs RPCs and count the drops.
1746   size_t num_drops = 0;
1747   for (size_t i = 0; i < kNumRpcs; ++i) {
1748     EchoResponse response;
1749     const Status status = SendRpc(&response);
1750     if (!status.ok() &&
1751         status.error_message() == "Call dropped by load balancing policy") {
1752       ++num_drops;
1753     } else {
1754       EXPECT_TRUE(status.ok()) << "code=" << status.error_code()
1755                                << " message=" << status.error_message();
1756       EXPECT_EQ(response.message(), kRequestMessage_);
1757     }
1758   }
1759   // The drop rate should be roughly equal to the expectation.
1760   const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
1761   const double kErrorTolerance = 0.2;
1762   EXPECT_THAT(
1763       seen_drop_rate,
1764       ::testing::AllOf(
1765           ::testing::Ge(KDropRateForLbAndThrottle * (1 - kErrorTolerance)),
1766           ::testing::Le(KDropRateForLbAndThrottle * (1 + kErrorTolerance))));
1767   // The ADS service got a single request, and sent a single response.
1768   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1769   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1770 }
1771
1772 // Tests that drop config is converted correctly from per hundred.
1773 TEST_P(DropTest, DropPerHundred) {
1774   SetNextResolution({});
1775   SetNextResolutionForLbChannelAllBalancers();
1776   const size_t kNumRpcs = 5000;
1777   const uint32_t kDropPerHundredForLb = 10;
1778   const double kDropRateForLb = kDropPerHundredForLb / 100.0;
1779   // The ADS response contains one drop category.
1780   AdsServiceImpl::ResponseArgs args({
1781       {"locality0", GetBackendPorts()},
1782   });
1783   args.drop_categories = {{kLbDropType, kDropPerHundredForLb}};
1784   args.drop_denominator = FractionalPercent::HUNDRED;
1785   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1786   WaitForAllBackends();
1787   // Send kNumRpcs RPCs and count the drops.
1788   size_t num_drops = 0;
1789   for (size_t i = 0; i < kNumRpcs; ++i) {
1790     EchoResponse response;
1791     const Status status = SendRpc(&response);
1792     if (!status.ok() &&
1793         status.error_message() == "Call dropped by load balancing policy") {
1794       ++num_drops;
1795     } else {
1796       EXPECT_TRUE(status.ok()) << "code=" << status.error_code()
1797                                << " message=" << status.error_message();
1798       EXPECT_EQ(response.message(), kRequestMessage_);
1799     }
1800   }
1801   // The drop rate should be roughly equal to the expectation.
1802   const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
1803   const double kErrorTolerance = 0.2;
1804   EXPECT_THAT(
1805       seen_drop_rate,
1806       ::testing::AllOf(::testing::Ge(kDropRateForLb * (1 - kErrorTolerance)),
1807                        ::testing::Le(kDropRateForLb * (1 + kErrorTolerance))));
1808   // The ADS service got a single request, and sent a single response.
1809   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1810   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1811 }
1812
1813 // Tests that drop config is converted correctly from per ten thousand.
1814 TEST_P(DropTest, DropPerTenThousand) {
1815   SetNextResolution({});
1816   SetNextResolutionForLbChannelAllBalancers();
1817   const size_t kNumRpcs = 5000;
1818   const uint32_t kDropPerTenThousandForLb = 1000;
1819   const double kDropRateForLb = kDropPerTenThousandForLb / 10000.0;
1820   // The ADS response contains one drop category.
1821   AdsServiceImpl::ResponseArgs args({
1822       {"locality0", GetBackendPorts()},
1823   });
1824   args.drop_categories = {{kLbDropType, kDropPerTenThousandForLb}};
1825   args.drop_denominator = FractionalPercent::TEN_THOUSAND;
1826   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1827   WaitForAllBackends();
1828   // Send kNumRpcs RPCs and count the drops.
1829   size_t num_drops = 0;
1830   for (size_t i = 0; i < kNumRpcs; ++i) {
1831     EchoResponse response;
1832     const Status status = SendRpc(&response);
1833     if (!status.ok() &&
1834         status.error_message() == "Call dropped by load balancing policy") {
1835       ++num_drops;
1836     } else {
1837       EXPECT_TRUE(status.ok()) << "code=" << status.error_code()
1838                                << " message=" << status.error_message();
1839       EXPECT_EQ(response.message(), kRequestMessage_);
1840     }
1841   }
1842   // The drop rate should be roughly equal to the expectation.
1843   const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
1844   const double kErrorTolerance = 0.2;
1845   EXPECT_THAT(
1846       seen_drop_rate,
1847       ::testing::AllOf(::testing::Ge(kDropRateForLb * (1 - kErrorTolerance)),
1848                        ::testing::Le(kDropRateForLb * (1 + kErrorTolerance))));
1849   // The ADS service got a single request, and sent a single response.
1850   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1851   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1852 }
1853
1854 // Tests that drop is working correctly after update.
1855 TEST_P(DropTest, Update) {
1856   SetNextResolution({});
1857   SetNextResolutionForLbChannelAllBalancers();
1858   const size_t kNumRpcs = 1000;
1859   const uint32_t kDropPerMillionForLb = 100000;
1860   const uint32_t kDropPerMillionForThrottle = 200000;
1861   const double kDropRateForLb = kDropPerMillionForLb / 1000000.0;
1862   const double kDropRateForThrottle = kDropPerMillionForThrottle / 1000000.0;
1863   const double KDropRateForLbAndThrottle =
1864       kDropRateForLb + (1 - kDropRateForLb) * kDropRateForThrottle;
1865   // The first ADS response contains one drop category.
1866   AdsServiceImpl::ResponseArgs args({
1867       {"locality0", GetBackendPorts()},
1868   });
1869   args.drop_categories = {{kLbDropType, kDropPerMillionForLb}};
1870   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1871   // The second ADS response contains two drop categories.
1872   // TODO(juanlishen): Change the ADS response sending to deterministic style
1873   // (e.g., by using condition variable) so that we can shorten the test
1874   // duration.
1875   args.drop_categories = {{kLbDropType, kDropPerMillionForLb},
1876                           {kThrottleDropType, kDropPerMillionForThrottle}};
1877   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 10000);
1878   WaitForAllBackends();
1879   // Send kNumRpcs RPCs and count the drops.
1880   size_t num_drops = 0;
1881   gpr_log(GPR_INFO, "========= BEFORE FIRST BATCH ==========");
1882   for (size_t i = 0; i < kNumRpcs; ++i) {
1883     EchoResponse response;
1884     const Status status = SendRpc(&response);
1885     if (!status.ok() &&
1886         status.error_message() == "Call dropped by load balancing policy") {
1887       ++num_drops;
1888     } else {
1889       EXPECT_TRUE(status.ok()) << "code=" << status.error_code()
1890                                << " message=" << status.error_message();
1891       EXPECT_EQ(response.message(), kRequestMessage_);
1892     }
1893   }
1894   gpr_log(GPR_INFO, "========= DONE WITH FIRST BATCH ==========");
1895   // The drop rate should be roughly equal to the expectation.
1896   double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
1897   const double kErrorTolerance = 0.3;
1898   EXPECT_THAT(
1899       seen_drop_rate,
1900       ::testing::AllOf(::testing::Ge(kDropRateForLb * (1 - kErrorTolerance)),
1901                        ::testing::Le(kDropRateForLb * (1 + kErrorTolerance))));
1902   // Wait until the drop rate increases to the middle of the two configs, which
1903   // implies that the update has been in effect.
1904   const double kDropRateThreshold =
1905       (kDropRateForLb + KDropRateForLbAndThrottle) / 2;
1906   size_t num_rpcs = kNumRpcs;
1907   while (seen_drop_rate < kDropRateThreshold) {
1908     EchoResponse response;
1909     const Status status = SendRpc(&response);
1910     ++num_rpcs;
1911     if (!status.ok() &&
1912         status.error_message() == "Call dropped by load balancing policy") {
1913       ++num_drops;
1914     } else {
1915       EXPECT_TRUE(status.ok()) << "code=" << status.error_code()
1916                                << " message=" << status.error_message();
1917       EXPECT_EQ(response.message(), kRequestMessage_);
1918     }
1919     seen_drop_rate = static_cast<double>(num_drops) / num_rpcs;
1920   }
1921   // Send kNumRpcs RPCs and count the drops.
1922   num_drops = 0;
1923   gpr_log(GPR_INFO, "========= BEFORE SECOND BATCH ==========");
1924   for (size_t i = 0; i < kNumRpcs; ++i) {
1925     EchoResponse response;
1926     const Status status = SendRpc(&response);
1927     if (!status.ok() &&
1928         status.error_message() == "Call dropped by load balancing policy") {
1929       ++num_drops;
1930     } else {
1931       EXPECT_TRUE(status.ok()) << "code=" << status.error_code()
1932                                << " message=" << status.error_message();
1933       EXPECT_EQ(response.message(), kRequestMessage_);
1934     }
1935   }
1936   gpr_log(GPR_INFO, "========= DONE WITH SECOND BATCH ==========");
1937   // The new drop rate should be roughly equal to the expectation.
1938   seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
1939   EXPECT_THAT(
1940       seen_drop_rate,
1941       ::testing::AllOf(
1942           ::testing::Ge(KDropRateForLbAndThrottle * (1 - kErrorTolerance)),
1943           ::testing::Le(KDropRateForLbAndThrottle * (1 + kErrorTolerance))));
1944   // The ADS service got a single request,
1945   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1946   // and sent two responses
1947   EXPECT_EQ(2U, balancers_[0]->ads_service()->response_count());
1948 }
1949
1950 // Tests that all the RPCs are dropped if any drop category drops 100%.
1951 TEST_P(DropTest, DropAll) {
1952   SetNextResolution({});
1953   SetNextResolutionForLbChannelAllBalancers();
1954   const size_t kNumRpcs = 1000;
1955   const uint32_t kDropPerMillionForLb = 100000;
1956   const uint32_t kDropPerMillionForThrottle = 1000000;
1957   // The ADS response contains two drop categories.
1958   AdsServiceImpl::ResponseArgs args({
1959       {"locality0", GetBackendPorts()},
1960   });
1961   args.drop_categories = {{kLbDropType, kDropPerMillionForLb},
1962                           {kThrottleDropType, kDropPerMillionForThrottle}};
1963   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
1964   // Send kNumRpcs RPCs and all of them are dropped.
1965   for (size_t i = 0; i < kNumRpcs; ++i) {
1966     EchoResponse response;
1967     const Status status = SendRpc(&response);
1968     EXPECT_TRUE(!status.ok() && status.error_message() ==
1969                                     "Call dropped by load balancing policy");
1970   }
1971   // The ADS service got a single request, and sent a single response.
1972   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
1973   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
1974 }
1975
1976 using FallbackTest = BasicTest;
1977
1978 // Tests that RPCs are handled by the fallback backends before the serverlist is
1979 // received, but will be handled by the serverlist after it's received.
1980 TEST_P(FallbackTest, Vanilla) {
1981   const int kFallbackTimeoutMs = 200 * grpc_test_slowdown_factor();
1982   const int kServerlistDelayMs = 500 * grpc_test_slowdown_factor();
1983   const size_t kNumBackendsInResolution = backends_.size() / 2;
1984   ResetStub(kFallbackTimeoutMs);
1985   SetNextResolution(GetBackendPorts(0, kNumBackendsInResolution));
1986   SetNextResolutionForLbChannelAllBalancers();
1987   // Send non-empty serverlist only after kServerlistDelayMs.
1988   AdsServiceImpl::ResponseArgs args({
1989       {"locality0", GetBackendPorts(kNumBackendsInResolution)},
1990   });
1991   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args),
1992                               kServerlistDelayMs);
1993   // Wait until all the fallback backends are reachable.
1994   WaitForAllBackends(0 /* start_index */,
1995                      kNumBackendsInResolution /* stop_index */);
1996   gpr_log(GPR_INFO, "========= BEFORE FIRST BATCH ==========");
1997   CheckRpcSendOk(kNumBackendsInResolution);
1998   gpr_log(GPR_INFO, "========= DONE WITH FIRST BATCH ==========");
1999   // Fallback is used: each backend returned by the resolver should have
2000   // gotten one request.
2001   for (size_t i = 0; i < kNumBackendsInResolution; ++i) {
2002     EXPECT_EQ(1U, backends_[i]->backend_service()->request_count());
2003   }
2004   for (size_t i = kNumBackendsInResolution; i < backends_.size(); ++i) {
2005     EXPECT_EQ(0U, backends_[i]->backend_service()->request_count());
2006   }
2007   // Wait until the serverlist reception has been processed and all backends
2008   // in the serverlist are reachable.
2009   WaitForAllBackends(kNumBackendsInResolution /* start_index */);
2010   gpr_log(GPR_INFO, "========= BEFORE SECOND BATCH ==========");
2011   CheckRpcSendOk(backends_.size() - kNumBackendsInResolution);
2012   gpr_log(GPR_INFO, "========= DONE WITH SECOND BATCH ==========");
2013   // Serverlist is used: each backend returned by the balancer should
2014   // have gotten one request.
2015   for (size_t i = 0; i < kNumBackendsInResolution; ++i) {
2016     EXPECT_EQ(0U, backends_[i]->backend_service()->request_count());
2017   }
2018   for (size_t i = kNumBackendsInResolution; i < backends_.size(); ++i) {
2019     EXPECT_EQ(1U, backends_[i]->backend_service()->request_count());
2020   }
2021   // The ADS service got a single request, and sent a single response.
2022   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
2023   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
2024 }
2025
2026 // Tests that RPCs are handled by the updated fallback backends before
2027 // serverlist is received,
2028 TEST_P(FallbackTest, Update) {
2029   const int kFallbackTimeoutMs = 200 * grpc_test_slowdown_factor();
2030   const int kServerlistDelayMs = 500 * grpc_test_slowdown_factor();
2031   const size_t kNumBackendsInResolution = backends_.size() / 3;
2032   const size_t kNumBackendsInResolutionUpdate = backends_.size() / 3;
2033   ResetStub(kFallbackTimeoutMs);
2034   SetNextResolution(GetBackendPorts(0, kNumBackendsInResolution));
2035   SetNextResolutionForLbChannelAllBalancers();
2036   // Send non-empty serverlist only after kServerlistDelayMs.
2037   AdsServiceImpl::ResponseArgs args({
2038       {"locality0", GetBackendPorts(kNumBackendsInResolution +
2039                                     kNumBackendsInResolutionUpdate)},
2040   });
2041   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args),
2042                               kServerlistDelayMs);
2043   // Wait until all the fallback backends are reachable.
2044   WaitForAllBackends(0 /* start_index */,
2045                      kNumBackendsInResolution /* stop_index */);
2046   gpr_log(GPR_INFO, "========= BEFORE FIRST BATCH ==========");
2047   CheckRpcSendOk(kNumBackendsInResolution);
2048   gpr_log(GPR_INFO, "========= DONE WITH FIRST BATCH ==========");
2049   // Fallback is used: each backend returned by the resolver should have
2050   // gotten one request.
2051   for (size_t i = 0; i < kNumBackendsInResolution; ++i) {
2052     EXPECT_EQ(1U, backends_[i]->backend_service()->request_count());
2053   }
2054   for (size_t i = kNumBackendsInResolution; i < backends_.size(); ++i) {
2055     EXPECT_EQ(0U, backends_[i]->backend_service()->request_count());
2056   }
2057   SetNextResolution(GetBackendPorts(
2058       kNumBackendsInResolution,
2059       kNumBackendsInResolution + kNumBackendsInResolutionUpdate));
2060   // Wait until the resolution update has been processed and all the new
2061   // fallback backends are reachable.
2062   WaitForAllBackends(kNumBackendsInResolution /* start_index */,
2063                      kNumBackendsInResolution +
2064                          kNumBackendsInResolutionUpdate /* stop_index */);
2065   gpr_log(GPR_INFO, "========= BEFORE SECOND BATCH ==========");
2066   CheckRpcSendOk(kNumBackendsInResolutionUpdate);
2067   gpr_log(GPR_INFO, "========= DONE WITH SECOND BATCH ==========");
2068   // The resolution update is used: each backend in the resolution update should
2069   // have gotten one request.
2070   for (size_t i = 0; i < kNumBackendsInResolution; ++i) {
2071     EXPECT_EQ(0U, backends_[i]->backend_service()->request_count());
2072   }
2073   for (size_t i = kNumBackendsInResolution;
2074        i < kNumBackendsInResolution + kNumBackendsInResolutionUpdate; ++i) {
2075     EXPECT_EQ(1U, backends_[i]->backend_service()->request_count());
2076   }
2077   for (size_t i = kNumBackendsInResolution + kNumBackendsInResolutionUpdate;
2078        i < backends_.size(); ++i) {
2079     EXPECT_EQ(0U, backends_[i]->backend_service()->request_count());
2080   }
2081   // Wait until the serverlist reception has been processed and all backends
2082   // in the serverlist are reachable.
2083   WaitForAllBackends(kNumBackendsInResolution +
2084                      kNumBackendsInResolutionUpdate /* start_index */);
2085   gpr_log(GPR_INFO, "========= BEFORE THIRD BATCH ==========");
2086   CheckRpcSendOk(backends_.size() - kNumBackendsInResolution -
2087                  kNumBackendsInResolutionUpdate);
2088   gpr_log(GPR_INFO, "========= DONE WITH THIRD BATCH ==========");
2089   // Serverlist is used: each backend returned by the balancer should
2090   // have gotten one request.
2091   for (size_t i = 0;
2092        i < kNumBackendsInResolution + kNumBackendsInResolutionUpdate; ++i) {
2093     EXPECT_EQ(0U, backends_[i]->backend_service()->request_count());
2094   }
2095   for (size_t i = kNumBackendsInResolution + kNumBackendsInResolutionUpdate;
2096        i < backends_.size(); ++i) {
2097     EXPECT_EQ(1U, backends_[i]->backend_service()->request_count());
2098   }
2099   // The ADS service got a single request, and sent a single response.
2100   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
2101   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
2102 }
2103
2104 // Tests that fallback will kick in immediately if the balancer channel fails.
2105 TEST_P(FallbackTest, FallbackEarlyWhenBalancerChannelFails) {
2106   const int kFallbackTimeoutMs = 10000 * grpc_test_slowdown_factor();
2107   ResetStub(kFallbackTimeoutMs);
2108   // Return an unreachable balancer and one fallback backend.
2109   SetNextResolution({backends_[0]->port()});
2110   SetNextResolutionForLbChannel({g_port_saver->GetPort()});
2111   // Send RPC with deadline less than the fallback timeout and make sure it
2112   // succeeds.
2113   CheckRpcSendOk(/* times */ 1, /* timeout_ms */ 1000,
2114                  /* wait_for_ready */ false);
2115 }
2116
2117 // Tests that fallback will kick in immediately if the balancer call fails.
2118 TEST_P(FallbackTest, FallbackEarlyWhenBalancerCallFails) {
2119   const int kFallbackTimeoutMs = 10000 * grpc_test_slowdown_factor();
2120   ResetStub(kFallbackTimeoutMs);
2121   // Return one balancer and one fallback backend.
2122   SetNextResolution({backends_[0]->port()});
2123   SetNextResolutionForLbChannelAllBalancers();
2124   // Balancer drops call without sending a serverlist.
2125   balancers_[0]->ads_service()->NotifyDoneWithAdsCall();
2126   // Send RPC with deadline less than the fallback timeout and make sure it
2127   // succeeds.
2128   CheckRpcSendOk(/* times */ 1, /* timeout_ms */ 1000,
2129                  /* wait_for_ready */ false);
2130 }
2131
2132 // Tests that fallback mode is entered if balancer response is received but the
2133 // backends can't be reached.
2134 TEST_P(FallbackTest, FallbackIfResponseReceivedButChildNotReady) {
2135   const int kFallbackTimeoutMs = 500 * grpc_test_slowdown_factor();
2136   ResetStub(kFallbackTimeoutMs);
2137   SetNextResolution({backends_[0]->port()});
2138   SetNextResolutionForLbChannelAllBalancers();
2139   // Send a serverlist that only contains an unreachable backend before fallback
2140   // timeout.
2141   AdsServiceImpl::ResponseArgs args({
2142       {"locality0", {g_port_saver->GetPort()}},
2143   });
2144   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
2145   // Because no child policy is ready before fallback timeout, we enter fallback
2146   // mode.
2147   WaitForBackend(0);
2148 }
2149
2150 // Tests that fallback mode is exited if the balancer tells the client to drop
2151 // all the calls.
2152 TEST_P(FallbackTest, FallbackModeIsExitedWhenBalancerSaysToDropAllCalls) {
2153   // Return an unreachable balancer and one fallback backend.
2154   SetNextResolution({backends_[0]->port()});
2155   SetNextResolutionForLbChannel({g_port_saver->GetPort()});
2156   // Enter fallback mode because the LB channel fails to connect.
2157   WaitForBackend(0);
2158   // Return a new balancer that sends a response to drop all calls.
2159   AdsServiceImpl::ResponseArgs args({
2160       {"locality0", GetBackendPorts()},
2161   });
2162   args.drop_categories = {{kLbDropType, 1000000}};
2163   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
2164   SetNextResolutionForLbChannelAllBalancers();
2165   // Send RPCs until failure.
2166   gpr_timespec deadline = gpr_time_add(
2167       gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_millis(5000, GPR_TIMESPAN));
2168   do {
2169     auto status = SendRpc();
2170     if (!status.ok()) break;
2171   } while (gpr_time_cmp(gpr_now(GPR_CLOCK_REALTIME), deadline) < 0);
2172   CheckRpcSendFailure();
2173 }
2174
2175 // Tests that fallback mode is exited if the child policy becomes ready.
2176 TEST_P(FallbackTest, FallbackModeIsExitedAfterChildRready) {
2177   // Return an unreachable balancer and one fallback backend.
2178   SetNextResolution({backends_[0]->port()});
2179   SetNextResolutionForLbChannel({g_port_saver->GetPort()});
2180   // Enter fallback mode because the LB channel fails to connect.
2181   WaitForBackend(0);
2182   // Return a new balancer that sends a dead backend.
2183   ShutdownBackend(1);
2184   AdsServiceImpl::ResponseArgs args({
2185       {"locality0", {backends_[1]->port()}},
2186   });
2187   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
2188   SetNextResolutionForLbChannelAllBalancers();
2189   // The state (TRANSIENT_FAILURE) update from the child policy will be ignored
2190   // because we are still in fallback mode.
2191   gpr_timespec deadline = gpr_time_add(gpr_now(GPR_CLOCK_REALTIME),
2192                                        gpr_time_from_millis(500, GPR_TIMESPAN));
2193   // Send 0.5 second worth of RPCs.
2194   do {
2195     CheckRpcSendOk();
2196   } while (gpr_time_cmp(gpr_now(GPR_CLOCK_REALTIME), deadline) < 0);
2197   // After the backend is restarted, the child policy will eventually be READY,
2198   // and we will exit fallback mode.
2199   StartBackend(1);
2200   WaitForBackend(1);
2201   // We have exited fallback mode, so calls will go to the child policy
2202   // exclusively.
2203   CheckRpcSendOk(100);
2204   EXPECT_EQ(0U, backends_[0]->backend_service()->request_count());
2205   EXPECT_EQ(100U, backends_[1]->backend_service()->request_count());
2206 }
2207
2208 class BalancerUpdateTest : public XdsEnd2endTest {
2209  public:
2210   BalancerUpdateTest() : XdsEnd2endTest(4, 3) {}
2211 };
2212
2213 // Tests that the old LB call is still used after the balancer address update as
2214 // long as that call is still alive.
2215 TEST_P(BalancerUpdateTest, UpdateBalancersButKeepUsingOriginalBalancer) {
2216   SetNextResolution({});
2217   SetNextResolutionForLbChannelAllBalancers();
2218   AdsServiceImpl::ResponseArgs args({
2219       {"locality0", {backends_[0]->port()}},
2220   });
2221   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
2222   args = AdsServiceImpl::ResponseArgs({
2223       {"locality0", {backends_[1]->port()}},
2224   });
2225   ScheduleResponseForBalancer(1, AdsServiceImpl::BuildResponse(args), 0);
2226   // Wait until the first backend is ready.
2227   WaitForBackend(0);
2228   // Send 10 requests.
2229   gpr_log(GPR_INFO, "========= BEFORE FIRST BATCH ==========");
2230   CheckRpcSendOk(10);
2231   gpr_log(GPR_INFO, "========= DONE WITH FIRST BATCH ==========");
2232   // All 10 requests should have gone to the first backend.
2233   EXPECT_EQ(10U, backends_[0]->backend_service()->request_count());
2234   // The ADS service of balancer 0 got a single request, and sent a single
2235   // response.
2236   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
2237   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
2238   EXPECT_EQ(0U, balancers_[1]->ads_service()->request_count());
2239   EXPECT_EQ(0U, balancers_[1]->ads_service()->response_count());
2240   EXPECT_EQ(0U, balancers_[2]->ads_service()->request_count());
2241   EXPECT_EQ(0U, balancers_[2]->ads_service()->response_count());
2242   gpr_log(GPR_INFO, "========= ABOUT TO UPDATE 1 ==========");
2243   SetNextResolutionForLbChannel({balancers_[1]->port()});
2244   gpr_log(GPR_INFO, "========= UPDATE 1 DONE ==========");
2245   EXPECT_EQ(0U, backends_[1]->backend_service()->request_count());
2246   gpr_timespec deadline = gpr_time_add(
2247       gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_millis(10000, GPR_TIMESPAN));
2248   // Send 10 seconds worth of RPCs
2249   do {
2250     CheckRpcSendOk();
2251   } while (gpr_time_cmp(gpr_now(GPR_CLOCK_REALTIME), deadline) < 0);
2252   // The current LB call is still working, so xds continued using it to the
2253   // first balancer, which doesn't assign the second backend.
2254   EXPECT_EQ(0U, backends_[1]->backend_service()->request_count());
2255   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
2256   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
2257   EXPECT_EQ(0U, balancers_[1]->ads_service()->request_count());
2258   EXPECT_EQ(0U, balancers_[1]->ads_service()->response_count());
2259   EXPECT_EQ(0U, balancers_[2]->ads_service()->request_count());
2260   EXPECT_EQ(0U, balancers_[2]->ads_service()->response_count());
2261 }
2262
2263 // Tests that the old LB call is still used after multiple balancer address
2264 // updates as long as that call is still alive. Send an update with the same set
2265 // of LBs as the one in SetUp() in order to verify that the LB channel inside
2266 // xds keeps the initial connection (which by definition is also present in the
2267 // update).
2268 TEST_P(BalancerUpdateTest, Repeated) {
2269   SetNextResolution({});
2270   SetNextResolutionForLbChannelAllBalancers();
2271   AdsServiceImpl::ResponseArgs args({
2272       {"locality0", {backends_[0]->port()}},
2273   });
2274   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
2275   args = AdsServiceImpl::ResponseArgs({
2276       {"locality0", {backends_[1]->port()}},
2277   });
2278   ScheduleResponseForBalancer(1, AdsServiceImpl::BuildResponse(args), 0);
2279   // Wait until the first backend is ready.
2280   WaitForBackend(0);
2281   // Send 10 requests.
2282   gpr_log(GPR_INFO, "========= BEFORE FIRST BATCH ==========");
2283   CheckRpcSendOk(10);
2284   gpr_log(GPR_INFO, "========= DONE WITH FIRST BATCH ==========");
2285   // All 10 requests should have gone to the first backend.
2286   EXPECT_EQ(10U, backends_[0]->backend_service()->request_count());
2287   // The ADS service of balancer 0 got a single request, and sent a single
2288   // response.
2289   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
2290   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
2291   EXPECT_EQ(0U, balancers_[1]->ads_service()->request_count());
2292   EXPECT_EQ(0U, balancers_[1]->ads_service()->response_count());
2293   EXPECT_EQ(0U, balancers_[2]->ads_service()->request_count());
2294   EXPECT_EQ(0U, balancers_[2]->ads_service()->response_count());
2295   std::vector<int> ports;
2296   ports.emplace_back(balancers_[0]->port());
2297   ports.emplace_back(balancers_[1]->port());
2298   ports.emplace_back(balancers_[2]->port());
2299   gpr_log(GPR_INFO, "========= ABOUT TO UPDATE 1 ==========");
2300   SetNextResolutionForLbChannel(ports);
2301   gpr_log(GPR_INFO, "========= UPDATE 1 DONE ==========");
2302   EXPECT_EQ(0U, backends_[1]->backend_service()->request_count());
2303   gpr_timespec deadline = gpr_time_add(
2304       gpr_now(GPR_CLOCK_REALTIME), gpr_time_from_millis(10000, GPR_TIMESPAN));
2305   // Send 10 seconds worth of RPCs
2306   do {
2307     CheckRpcSendOk();
2308   } while (gpr_time_cmp(gpr_now(GPR_CLOCK_REALTIME), deadline) < 0);
2309   // xds continued using the original LB call to the first balancer, which
2310   // doesn't assign the second backend.
2311   EXPECT_EQ(0U, backends_[1]->backend_service()->request_count());
2312   ports.clear();
2313   ports.emplace_back(balancers_[0]->port());
2314   ports.emplace_back(balancers_[1]->port());
2315   gpr_log(GPR_INFO, "========= ABOUT TO UPDATE 2 ==========");
2316   SetNextResolutionForLbChannel(ports);
2317   gpr_log(GPR_INFO, "========= UPDATE 2 DONE ==========");
2318   EXPECT_EQ(0U, backends_[1]->backend_service()->request_count());
2319   deadline = gpr_time_add(gpr_now(GPR_CLOCK_REALTIME),
2320                           gpr_time_from_millis(10000, GPR_TIMESPAN));
2321   // Send 10 seconds worth of RPCs
2322   do {
2323     CheckRpcSendOk();
2324   } while (gpr_time_cmp(gpr_now(GPR_CLOCK_REALTIME), deadline) < 0);
2325   // xds continued using the original LB call to the first balancer, which
2326   // doesn't assign the second backend.
2327   EXPECT_EQ(0U, backends_[1]->backend_service()->request_count());
2328 }
2329
2330 // Tests that if the balancer is down, the RPCs will still be sent to the
2331 // backends according to the last balancer response, until a new balancer is
2332 // reachable.
2333 TEST_P(BalancerUpdateTest, DeadUpdate) {
2334   SetNextResolution({});
2335   SetNextResolutionForLbChannel({balancers_[0]->port()});
2336   AdsServiceImpl::ResponseArgs args({
2337       {"locality0", {backends_[0]->port()}},
2338   });
2339   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
2340   args = AdsServiceImpl::ResponseArgs({
2341       {"locality0", {backends_[1]->port()}},
2342   });
2343   ScheduleResponseForBalancer(1, AdsServiceImpl::BuildResponse(args), 0);
2344   // Start servers and send 10 RPCs per server.
2345   gpr_log(GPR_INFO, "========= BEFORE FIRST BATCH ==========");
2346   CheckRpcSendOk(10);
2347   gpr_log(GPR_INFO, "========= DONE WITH FIRST BATCH ==========");
2348   // All 10 requests should have gone to the first backend.
2349   EXPECT_EQ(10U, backends_[0]->backend_service()->request_count());
2350   // Kill balancer 0
2351   gpr_log(GPR_INFO, "********** ABOUT TO KILL BALANCER 0 *************");
2352   balancers_[0]->Shutdown();
2353   gpr_log(GPR_INFO, "********** KILLED BALANCER 0 *************");
2354   // This is serviced by the existing child policy.
2355   gpr_log(GPR_INFO, "========= BEFORE SECOND BATCH ==========");
2356   CheckRpcSendOk(10);
2357   gpr_log(GPR_INFO, "========= DONE WITH SECOND BATCH ==========");
2358   // All 10 requests should again have gone to the first backend.
2359   EXPECT_EQ(20U, backends_[0]->backend_service()->request_count());
2360   EXPECT_EQ(0U, backends_[1]->backend_service()->request_count());
2361   // The ADS service of balancer 0 got a single request, and sent a single
2362   // response.
2363   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
2364   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
2365   EXPECT_EQ(0U, balancers_[1]->ads_service()->request_count());
2366   EXPECT_EQ(0U, balancers_[1]->ads_service()->response_count());
2367   EXPECT_EQ(0U, balancers_[2]->ads_service()->request_count());
2368   EXPECT_EQ(0U, balancers_[2]->ads_service()->response_count());
2369   gpr_log(GPR_INFO, "========= ABOUT TO UPDATE 1 ==========");
2370   SetNextResolutionForLbChannel({balancers_[1]->port()});
2371   gpr_log(GPR_INFO, "========= UPDATE 1 DONE ==========");
2372   // Wait until update has been processed, as signaled by the second backend
2373   // receiving a request. In the meantime, the client continues to be serviced
2374   // (by the first backend) without interruption.
2375   EXPECT_EQ(0U, backends_[1]->backend_service()->request_count());
2376   WaitForBackend(1);
2377   // This is serviced by the updated RR policy
2378   backends_[1]->backend_service()->ResetCounters();
2379   gpr_log(GPR_INFO, "========= BEFORE THIRD BATCH ==========");
2380   CheckRpcSendOk(10);
2381   gpr_log(GPR_INFO, "========= DONE WITH THIRD BATCH ==========");
2382   // All 10 requests should have gone to the second backend.
2383   EXPECT_EQ(10U, backends_[1]->backend_service()->request_count());
2384   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
2385   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
2386   // The second balancer, published as part of the first update, may end up
2387   // getting two requests (that is, 1 <= #req <= 2) if the LB call retry timer
2388   // firing races with the arrival of the update containing the second
2389   // balancer.
2390   EXPECT_GE(balancers_[1]->ads_service()->request_count(), 1U);
2391   EXPECT_GE(balancers_[1]->ads_service()->response_count(), 1U);
2392   EXPECT_LE(balancers_[1]->ads_service()->request_count(), 2U);
2393   EXPECT_LE(balancers_[1]->ads_service()->response_count(), 2U);
2394   EXPECT_EQ(0U, balancers_[2]->ads_service()->request_count());
2395   EXPECT_EQ(0U, balancers_[2]->ads_service()->response_count());
2396 }
2397
2398 // The re-resolution tests are deferred because they rely on the fallback mode,
2399 // which hasn't been supported.
2400
2401 // TODO(juanlishen): Add TEST_P(BalancerUpdateTest, ReresolveDeadBackend).
2402
2403 // TODO(juanlishen): Add TEST_P(UpdatesWithClientLoadReportingTest,
2404 // ReresolveDeadBalancer)
2405
2406 class ClientLoadReportingTest : public XdsEnd2endTest {
2407  public:
2408   ClientLoadReportingTest() : XdsEnd2endTest(4, 1, 3) {}
2409 };
2410
2411 // Tests that the load report received at the balancer is correct.
2412 TEST_P(ClientLoadReportingTest, Vanilla) {
2413   SetNextResolution({});
2414   SetNextResolutionForLbChannel({balancers_[0]->port()});
2415   const size_t kNumRpcsPerAddress = 100;
2416   // TODO(juanlishen): Partition the backends after multiple localities is
2417   // tested.
2418   AdsServiceImpl::ResponseArgs args({
2419       {"locality0", GetBackendPorts()},
2420   });
2421   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
2422   // Wait until all backends are ready.
2423   int num_ok = 0;
2424   int num_failure = 0;
2425   int num_drops = 0;
2426   std::tie(num_ok, num_failure, num_drops) = WaitForAllBackends();
2427   // Send kNumRpcsPerAddress RPCs per server.
2428   CheckRpcSendOk(kNumRpcsPerAddress * num_backends_);
2429   // Each backend should have gotten 100 requests.
2430   for (size_t i = 0; i < backends_.size(); ++i) {
2431     EXPECT_EQ(kNumRpcsPerAddress,
2432               backends_[i]->backend_service()->request_count());
2433   }
2434   // The ADS service got a single request, and sent a single response.
2435   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
2436   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
2437   // The LRS service got a single request, and sent a single response.
2438   EXPECT_EQ(1U, balancers_[0]->lrs_service()->request_count());
2439   EXPECT_EQ(1U, balancers_[0]->lrs_service()->response_count());
2440   // The load report received at the balancer should be correct.
2441   ClientStats* client_stats = balancers_[0]->lrs_service()->WaitForLoadReport();
2442   EXPECT_EQ(kNumRpcsPerAddress * num_backends_ + num_ok,
2443             client_stats->total_successful_requests());
2444   EXPECT_EQ(0U, client_stats->total_requests_in_progress());
2445   EXPECT_EQ(kNumRpcsPerAddress * num_backends_ + num_ok,
2446             client_stats->total_issued_requests());
2447   EXPECT_EQ(0U, client_stats->total_error_requests());
2448   EXPECT_EQ(0U, client_stats->total_dropped_requests());
2449 }
2450
2451 // Tests that if the balancer restarts, the client load report contains the
2452 // stats before and after the restart correctly.
2453 TEST_P(ClientLoadReportingTest, BalancerRestart) {
2454   SetNextResolution({});
2455   SetNextResolutionForLbChannel({balancers_[0]->port()});
2456   const size_t kNumBackendsFirstPass = backends_.size() / 2;
2457   const size_t kNumBackendsSecondPass =
2458       backends_.size() - kNumBackendsFirstPass;
2459   AdsServiceImpl::ResponseArgs args({
2460       {"locality0", GetBackendPorts(0, kNumBackendsFirstPass)},
2461   });
2462   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
2463   // Wait until all backends returned by the balancer are ready.
2464   int num_ok = 0;
2465   int num_failure = 0;
2466   int num_drops = 0;
2467   std::tie(num_ok, num_failure, num_drops) =
2468       WaitForAllBackends(/* start_index */ 0,
2469                          /* stop_index */ kNumBackendsFirstPass);
2470   ClientStats* client_stats = balancers_[0]->lrs_service()->WaitForLoadReport();
2471   EXPECT_EQ(static_cast<size_t>(num_ok),
2472             client_stats->total_successful_requests());
2473   EXPECT_EQ(0U, client_stats->total_requests_in_progress());
2474   EXPECT_EQ(0U, client_stats->total_error_requests());
2475   EXPECT_EQ(0U, client_stats->total_dropped_requests());
2476   // Shut down the balancer.
2477   balancers_[0]->Shutdown();
2478   // We should continue using the last EDS response we received from the
2479   // balancer before it was shut down.
2480   // Note: We need to use WaitForAllBackends() here instead of just
2481   // CheckRpcSendOk(kNumBackendsFirstPass), because when the balancer
2482   // shuts down, the XdsClient will generate an error to the
2483   // ServiceConfigWatcher, which will cause the xds resolver to send a
2484   // no-op update to the LB policy.  When this update gets down to the
2485   // round_robin child policy for the locality, it will generate a new
2486   // subchannel list, which resets the start index randomly.  So we need
2487   // to be a little more permissive here to avoid spurious failures.
2488   ResetBackendCounters();
2489   int num_started = std::get<0>(WaitForAllBackends(
2490       /* start_index */ 0, /* stop_index */ kNumBackendsFirstPass));
2491   // Now restart the balancer, this time pointing to the new backends.
2492   balancers_[0]->Start(server_host_);
2493   args = AdsServiceImpl::ResponseArgs({
2494       {"locality0", GetBackendPorts(kNumBackendsFirstPass)},
2495   });
2496   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
2497   // Wait for queries to start going to one of the new backends.
2498   // This tells us that we're now using the new serverlist.
2499   std::tie(num_ok, num_failure, num_drops) =
2500       WaitForAllBackends(/* start_index */ kNumBackendsFirstPass);
2501   num_started += num_ok + num_failure + num_drops;
2502   // Send one RPC per backend.
2503   CheckRpcSendOk(kNumBackendsSecondPass);
2504   num_started += kNumBackendsSecondPass;
2505   // Check client stats.
2506   client_stats = balancers_[0]->lrs_service()->WaitForLoadReport();
2507   EXPECT_EQ(num_started, client_stats->total_successful_requests());
2508   EXPECT_EQ(0U, client_stats->total_requests_in_progress());
2509   EXPECT_EQ(0U, client_stats->total_error_requests());
2510   EXPECT_EQ(0U, client_stats->total_dropped_requests());
2511 }
2512
2513 class ClientLoadReportingWithDropTest : public XdsEnd2endTest {
2514  public:
2515   ClientLoadReportingWithDropTest() : XdsEnd2endTest(4, 1, 20) {}
2516 };
2517
2518 // Tests that the drop stats are correctly reported by client load reporting.
2519 TEST_P(ClientLoadReportingWithDropTest, Vanilla) {
2520   SetNextResolution({});
2521   SetNextResolutionForLbChannelAllBalancers();
2522   const size_t kNumRpcs = 3000;
2523   const uint32_t kDropPerMillionForLb = 100000;
2524   const uint32_t kDropPerMillionForThrottle = 200000;
2525   const double kDropRateForLb = kDropPerMillionForLb / 1000000.0;
2526   const double kDropRateForThrottle = kDropPerMillionForThrottle / 1000000.0;
2527   const double KDropRateForLbAndThrottle =
2528       kDropRateForLb + (1 - kDropRateForLb) * kDropRateForThrottle;
2529   // The ADS response contains two drop categories.
2530   AdsServiceImpl::ResponseArgs args({
2531       {"locality0", GetBackendPorts()},
2532   });
2533   args.drop_categories = {{kLbDropType, kDropPerMillionForLb},
2534                           {kThrottleDropType, kDropPerMillionForThrottle}};
2535   ScheduleResponseForBalancer(0, AdsServiceImpl::BuildResponse(args), 0);
2536   int num_ok = 0;
2537   int num_failure = 0;
2538   int num_drops = 0;
2539   std::tie(num_ok, num_failure, num_drops) = WaitForAllBackends();
2540   const size_t num_warmup = num_ok + num_failure + num_drops;
2541   // Send kNumRpcs RPCs and count the drops.
2542   for (size_t i = 0; i < kNumRpcs; ++i) {
2543     EchoResponse response;
2544     const Status status = SendRpc(&response);
2545     if (!status.ok() &&
2546         status.error_message() == "Call dropped by load balancing policy") {
2547       ++num_drops;
2548     } else {
2549       EXPECT_TRUE(status.ok()) << "code=" << status.error_code()
2550                                << " message=" << status.error_message();
2551       EXPECT_EQ(response.message(), kRequestMessage_);
2552     }
2553   }
2554   // The drop rate should be roughly equal to the expectation.
2555   const double seen_drop_rate = static_cast<double>(num_drops) / kNumRpcs;
2556   const double kErrorTolerance = 0.2;
2557   EXPECT_THAT(
2558       seen_drop_rate,
2559       ::testing::AllOf(
2560           ::testing::Ge(KDropRateForLbAndThrottle * (1 - kErrorTolerance)),
2561           ::testing::Le(KDropRateForLbAndThrottle * (1 + kErrorTolerance))));
2562   // Check client stats.
2563   ClientStats* client_stats = balancers_[0]->lrs_service()->WaitForLoadReport();
2564   EXPECT_EQ(num_drops, client_stats->total_dropped_requests());
2565   const size_t total_rpc = num_warmup + kNumRpcs;
2566   EXPECT_THAT(
2567       client_stats->dropped_requests(kLbDropType),
2568       ::testing::AllOf(
2569           ::testing::Ge(total_rpc * kDropRateForLb * (1 - kErrorTolerance)),
2570           ::testing::Le(total_rpc * kDropRateForLb * (1 + kErrorTolerance))));
2571   EXPECT_THAT(client_stats->dropped_requests(kThrottleDropType),
2572               ::testing::AllOf(
2573                   ::testing::Ge(total_rpc * (1 - kDropRateForLb) *
2574                                 kDropRateForThrottle * (1 - kErrorTolerance)),
2575                   ::testing::Le(total_rpc * (1 - kDropRateForLb) *
2576                                 kDropRateForThrottle * (1 + kErrorTolerance))));
2577   // The ADS service got a single request, and sent a single response.
2578   EXPECT_EQ(1U, balancers_[0]->ads_service()->request_count());
2579   EXPECT_EQ(1U, balancers_[0]->ads_service()->response_count());
2580 }
2581
2582 grpc::string TestTypeName(const ::testing::TestParamInfo<TestType>& info) {
2583   return info.param.AsString();
2584 }
2585
2586 INSTANTIATE_TEST_SUITE_P(XdsTest, BasicTest,
2587                          ::testing::Values(TestType(false, true),
2588                                            TestType(false, false),
2589                                            TestType(true, false),
2590                                            TestType(true, true)),
2591                          &TestTypeName);
2592
2593 INSTANTIATE_TEST_SUITE_P(XdsTest, SecureNamingTest,
2594                          ::testing::Values(TestType(false, true),
2595                                            TestType(false, false),
2596                                            TestType(true, false),
2597                                            TestType(true, true)),
2598                          &TestTypeName);
2599
2600 // CDS depends on XdsResolver.
2601 INSTANTIATE_TEST_SUITE_P(XdsTest, CdsTest,
2602                          ::testing::Values(TestType(true, false),
2603                                            TestType(true, true)),
2604                          &TestTypeName);
2605
2606 INSTANTIATE_TEST_SUITE_P(XdsTest, LocalityMapTest,
2607                          ::testing::Values(TestType(false, true),
2608                                            TestType(false, false),
2609                                            TestType(true, false),
2610                                            TestType(true, true)),
2611                          &TestTypeName);
2612
2613 INSTANTIATE_TEST_SUITE_P(XdsTest, FailoverTest,
2614                          ::testing::Values(TestType(false, true),
2615                                            TestType(false, false),
2616                                            TestType(true, false),
2617                                            TestType(true, true)),
2618                          &TestTypeName);
2619
2620 INSTANTIATE_TEST_SUITE_P(XdsTest, DropTest,
2621                          ::testing::Values(TestType(false, true),
2622                                            TestType(false, false),
2623                                            TestType(true, false),
2624                                            TestType(true, true)),
2625                          &TestTypeName);
2626
2627 // Fallback does not work with xds resolver.
2628 INSTANTIATE_TEST_SUITE_P(XdsTest, FallbackTest,
2629                          ::testing::Values(TestType(false, true),
2630                                            TestType(false, false)),
2631                          &TestTypeName);
2632
2633 INSTANTIATE_TEST_SUITE_P(XdsTest, BalancerUpdateTest,
2634                          ::testing::Values(TestType(false, true),
2635                                            TestType(false, false),
2636                                            TestType(true, true)),
2637                          &TestTypeName);
2638
2639 // Load reporting tests are not run with load reporting disabled.
2640 INSTANTIATE_TEST_SUITE_P(XdsTest, ClientLoadReportingTest,
2641                          ::testing::Values(TestType(false, true),
2642                                            TestType(true, true)),
2643                          &TestTypeName);
2644
2645 // Load reporting tests are not run with load reporting disabled.
2646 INSTANTIATE_TEST_SUITE_P(XdsTest, ClientLoadReportingWithDropTest,
2647                          ::testing::Values(TestType(false, true),
2648                                            TestType(true, true)),
2649                          &TestTypeName);
2650
2651 }  // namespace
2652 }  // namespace testing
2653 }  // namespace grpc
2654
2655 int main(int argc, char** argv) {
2656   grpc::testing::TestEnvironment env(argc, argv);
2657   ::testing::InitGoogleTest(&argc, argv);
2658   grpc::testing::WriteBootstrapFiles();
2659   grpc::testing::g_port_saver = new grpc::testing::PortSaver();
2660   const auto result = RUN_ALL_TESTS();
2661   return result;
2662 }