Imported Upstream version 1.26.0
[platform/upstream/grpc.git] / src / core / ext / filters / client_channel / resolving_lb_policy.cc
1 /*
2  *
3  * Copyright 2015 gRPC authors.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18
19 #include <grpc/support/port_platform.h>
20
21 #include "src/core/ext/filters/client_channel/resolving_lb_policy.h"
22
23 #include <inttypes.h>
24 #include <limits.h>
25 #include <stdbool.h>
26 #include <stdio.h>
27 #include <string.h>
28
29 #include <grpc/support/alloc.h>
30 #include <grpc/support/log.h>
31 #include <grpc/support/string_util.h>
32 #include <grpc/support/sync.h>
33
34 #include "src/core/ext/filters/client_channel/backup_poller.h"
35 #include "src/core/ext/filters/client_channel/http_connect_handshaker.h"
36 #include "src/core/ext/filters/client_channel/lb_policy_registry.h"
37 #include "src/core/ext/filters/client_channel/proxy_mapper_registry.h"
38 #include "src/core/ext/filters/client_channel/resolver_registry.h"
39 #include "src/core/ext/filters/client_channel/retry_throttle.h"
40 #include "src/core/ext/filters/client_channel/server_address.h"
41 #include "src/core/ext/filters/client_channel/service_config.h"
42 #include "src/core/ext/filters/client_channel/subchannel.h"
43 #include "src/core/ext/filters/deadline/deadline_filter.h"
44 #include "src/core/lib/backoff/backoff.h"
45 #include "src/core/lib/channel/channel_args.h"
46 #include "src/core/lib/channel/connected_channel.h"
47 #include "src/core/lib/channel/status_util.h"
48 #include "src/core/lib/gpr/string.h"
49 #include "src/core/lib/gprpp/inlined_vector.h"
50 #include "src/core/lib/gprpp/manual_constructor.h"
51 #include "src/core/lib/gprpp/sync.h"
52 #include "src/core/lib/iomgr/combiner.h"
53 #include "src/core/lib/iomgr/iomgr.h"
54 #include "src/core/lib/iomgr/polling_entity.h"
55 #include "src/core/lib/profiling/timers.h"
56 #include "src/core/lib/slice/slice_internal.h"
57 #include "src/core/lib/slice/slice_string_helpers.h"
58 #include "src/core/lib/surface/channel.h"
59 #include "src/core/lib/transport/connectivity_state.h"
60 #include "src/core/lib/transport/error_utils.h"
61 #include "src/core/lib/transport/metadata.h"
62 #include "src/core/lib/transport/metadata_batch.h"
63 #include "src/core/lib/transport/static_metadata.h"
64 #include "src/core/lib/transport/status_metadata.h"
65
66 namespace grpc_core {
67
68 //
69 // ResolvingLoadBalancingPolicy::ResolverResultHandler
70 //
71
72 class ResolvingLoadBalancingPolicy::ResolverResultHandler
73     : public Resolver::ResultHandler {
74  public:
75   explicit ResolverResultHandler(
76       RefCountedPtr<ResolvingLoadBalancingPolicy> parent)
77       : parent_(std::move(parent)) {}
78
79   ~ResolverResultHandler() {
80     if (GRPC_TRACE_FLAG_ENABLED(*(parent_->tracer_))) {
81       gpr_log(GPR_INFO, "resolving_lb=%p: resolver shutdown complete",
82               parent_.get());
83     }
84   }
85
86   void ReturnResult(Resolver::Result result) override {
87     parent_->OnResolverResultChangedLocked(std::move(result));
88   }
89
90   void ReturnError(grpc_error* error) override {
91     parent_->OnResolverError(error);
92   }
93
94  private:
95   RefCountedPtr<ResolvingLoadBalancingPolicy> parent_;
96 };
97
98 //
99 // ResolvingLoadBalancingPolicy::ResolvingControlHelper
100 //
101
102 class ResolvingLoadBalancingPolicy::ResolvingControlHelper
103     : public LoadBalancingPolicy::ChannelControlHelper {
104  public:
105   explicit ResolvingControlHelper(
106       RefCountedPtr<ResolvingLoadBalancingPolicy> parent)
107       : parent_(std::move(parent)) {}
108
109   RefCountedPtr<SubchannelInterface> CreateSubchannel(
110       const grpc_channel_args& args) override {
111     if (parent_->resolver_ == nullptr) return nullptr;  // Shutting down.
112     if (!CalledByCurrentChild() && !CalledByPendingChild()) return nullptr;
113     return parent_->channel_control_helper()->CreateSubchannel(args);
114   }
115
116   void UpdateState(grpc_connectivity_state state,
117                    std::unique_ptr<SubchannelPicker> picker) override {
118     if (parent_->resolver_ == nullptr) return;  // Shutting down.
119     // If this request is from the pending child policy, ignore it until
120     // it reports READY, at which point we swap it into place.
121     if (CalledByPendingChild()) {
122       if (GRPC_TRACE_FLAG_ENABLED(*(parent_->tracer_))) {
123         gpr_log(GPR_INFO,
124                 "resolving_lb=%p helper=%p: pending child policy %p reports "
125                 "state=%s",
126                 parent_.get(), this, child_, ConnectivityStateName(state));
127       }
128       if (state != GRPC_CHANNEL_READY) return;
129       grpc_pollset_set_del_pollset_set(
130           parent_->lb_policy_->interested_parties(),
131           parent_->interested_parties());
132       parent_->lb_policy_ = std::move(parent_->pending_lb_policy_);
133     } else if (!CalledByCurrentChild()) {
134       // This request is from an outdated child, so ignore it.
135       return;
136     }
137     parent_->channel_control_helper()->UpdateState(state, std::move(picker));
138   }
139
140   void RequestReresolution() override {
141     // If there is a pending child policy, ignore re-resolution requests
142     // from the current child policy (or any outdated child).
143     if (parent_->pending_lb_policy_ != nullptr && !CalledByPendingChild()) {
144       return;
145     }
146     if (GRPC_TRACE_FLAG_ENABLED(*(parent_->tracer_))) {
147       gpr_log(GPR_INFO, "resolving_lb=%p: started name re-resolving",
148               parent_.get());
149     }
150     if (parent_->resolver_ != nullptr) {
151       parent_->resolver_->RequestReresolutionLocked();
152     }
153   }
154
155   void AddTraceEvent(TraceSeverity /*severity*/,
156                      StringView /*message*/) override {}
157
158   void set_child(LoadBalancingPolicy* child) { child_ = child; }
159
160  private:
161   bool CalledByPendingChild() const {
162     GPR_ASSERT(child_ != nullptr);
163     return child_ == parent_->pending_lb_policy_.get();
164   }
165
166   bool CalledByCurrentChild() const {
167     GPR_ASSERT(child_ != nullptr);
168     return child_ == parent_->lb_policy_.get();
169   };
170
171   RefCountedPtr<ResolvingLoadBalancingPolicy> parent_;
172   LoadBalancingPolicy* child_ = nullptr;
173 };
174
175 //
176 // ResolvingLoadBalancingPolicy
177 //
178
179 ResolvingLoadBalancingPolicy::ResolvingLoadBalancingPolicy(
180     Args args, TraceFlag* tracer, grpc_core::UniquePtr<char> target_uri,
181     ProcessResolverResultCallback process_resolver_result,
182     void* process_resolver_result_user_data)
183     : LoadBalancingPolicy(std::move(args)),
184       tracer_(tracer),
185       target_uri_(std::move(target_uri)),
186       process_resolver_result_(process_resolver_result),
187       process_resolver_result_user_data_(process_resolver_result_user_data) {
188   GPR_ASSERT(process_resolver_result != nullptr);
189   resolver_ = ResolverRegistry::CreateResolver(
190       target_uri_.get(), args.args, interested_parties(), combiner(),
191       MakeUnique<ResolverResultHandler>(Ref()));
192   // Since the validity of args has been checked when create the channel,
193   // CreateResolver() must return a non-null result.
194   GPR_ASSERT(resolver_ != nullptr);
195   if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
196     gpr_log(GPR_INFO, "resolving_lb=%p: starting name resolution", this);
197   }
198   channel_control_helper()->UpdateState(GRPC_CHANNEL_CONNECTING,
199                                         MakeUnique<QueuePicker>(Ref()));
200   resolver_->StartLocked();
201 }
202
203 ResolvingLoadBalancingPolicy::~ResolvingLoadBalancingPolicy() {
204   GPR_ASSERT(resolver_ == nullptr);
205   GPR_ASSERT(lb_policy_ == nullptr);
206 }
207
208 void ResolvingLoadBalancingPolicy::ShutdownLocked() {
209   if (resolver_ != nullptr) {
210     resolver_.reset();
211     if (lb_policy_ != nullptr) {
212       if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
213         gpr_log(GPR_INFO, "resolving_lb=%p: shutting down lb_policy=%p", this,
214                 lb_policy_.get());
215       }
216       grpc_pollset_set_del_pollset_set(lb_policy_->interested_parties(),
217                                        interested_parties());
218       lb_policy_.reset();
219     }
220     if (pending_lb_policy_ != nullptr) {
221       if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
222         gpr_log(GPR_INFO, "resolving_lb=%p: shutting down pending lb_policy=%p",
223                 this, pending_lb_policy_.get());
224       }
225       grpc_pollset_set_del_pollset_set(pending_lb_policy_->interested_parties(),
226                                        interested_parties());
227       pending_lb_policy_.reset();
228     }
229   }
230 }
231
232 void ResolvingLoadBalancingPolicy::ExitIdleLocked() {
233   if (lb_policy_ != nullptr) {
234     lb_policy_->ExitIdleLocked();
235     if (pending_lb_policy_ != nullptr) pending_lb_policy_->ExitIdleLocked();
236   }
237 }
238
239 void ResolvingLoadBalancingPolicy::ResetBackoffLocked() {
240   if (resolver_ != nullptr) {
241     resolver_->ResetBackoffLocked();
242     resolver_->RequestReresolutionLocked();
243   }
244   if (lb_policy_ != nullptr) lb_policy_->ResetBackoffLocked();
245   if (pending_lb_policy_ != nullptr) pending_lb_policy_->ResetBackoffLocked();
246 }
247
248 void ResolvingLoadBalancingPolicy::OnResolverError(grpc_error* error) {
249   if (resolver_ == nullptr) {
250     GRPC_ERROR_UNREF(error);
251     return;
252   }
253   if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
254     gpr_log(GPR_INFO, "resolving_lb=%p: resolver transient failure: %s", this,
255             grpc_error_string(error));
256   }
257   // If we already have an LB policy from a previous resolution
258   // result, then we continue to let it set the connectivity state.
259   // Otherwise, we go into TRANSIENT_FAILURE.
260   if (lb_policy_ == nullptr) {
261     grpc_error* state_error = GRPC_ERROR_CREATE_REFERENCING_FROM_STATIC_STRING(
262         "Resolver transient failure", &error, 1);
263     channel_control_helper()->UpdateState(
264         GRPC_CHANNEL_TRANSIENT_FAILURE,
265         MakeUnique<TransientFailurePicker>(state_error));
266   }
267   GRPC_ERROR_UNREF(error);
268 }
269
270 void ResolvingLoadBalancingPolicy::CreateOrUpdateLbPolicyLocked(
271     const char* lb_policy_name,
272     RefCountedPtr<LoadBalancingPolicy::Config> lb_policy_config,
273     Resolver::Result result, TraceStringVector* trace_strings) {
274   // If the child policy name changes, we need to create a new child
275   // policy.  When this happens, we leave child_policy_ as-is and store
276   // the new child policy in pending_child_policy_.  Once the new child
277   // policy transitions into state READY, we swap it into child_policy_,
278   // replacing the original child policy.  So pending_child_policy_ is
279   // non-null only between when we apply an update that changes the child
280   // policy name and when the new child reports state READY.
281   //
282   // Updates can arrive at any point during this transition.  We always
283   // apply updates relative to the most recently created child policy,
284   // even if the most recent one is still in pending_child_policy_.  This
285   // is true both when applying the updates to an existing child policy
286   // and when determining whether we need to create a new policy.
287   //
288   // As a result of this, there are several cases to consider here:
289   //
290   // 1. We have no existing child policy (i.e., we have started up but
291   //    have not yet received a serverlist from the balancer or gone
292   //    into fallback mode; in this case, both child_policy_ and
293   //    pending_child_policy_ are null).  In this case, we create a
294   //    new child policy and store it in child_policy_.
295   //
296   // 2. We have an existing child policy and have no pending child policy
297   //    from a previous update (i.e., either there has not been a
298   //    previous update that changed the policy name, or we have already
299   //    finished swapping in the new policy; in this case, child_policy_
300   //    is non-null but pending_child_policy_ is null).  In this case:
301   //    a. If child_policy_->name() equals child_policy_name, then we
302   //       update the existing child policy.
303   //    b. If child_policy_->name() does not equal child_policy_name,
304   //       we create a new policy.  The policy will be stored in
305   //       pending_child_policy_ and will later be swapped into
306   //       child_policy_ by the helper when the new child transitions
307   //       into state READY.
308   //
309   // 3. We have an existing child policy and have a pending child policy
310   //    from a previous update (i.e., a previous update set
311   //    pending_child_policy_ as per case 2b above and that policy has
312   //    not yet transitioned into state READY and been swapped into
313   //    child_policy_; in this case, both child_policy_ and
314   //    pending_child_policy_ are non-null).  In this case:
315   //    a. If pending_child_policy_->name() equals child_policy_name,
316   //       then we update the existing pending child policy.
317   //    b. If pending_child_policy->name() does not equal
318   //       child_policy_name, then we create a new policy.  The new
319   //       policy is stored in pending_child_policy_ (replacing the one
320   //       that was there before, which will be immediately shut down)
321   //       and will later be swapped into child_policy_ by the helper
322   //       when the new child transitions into state READY.
323   const bool create_policy =
324       // case 1
325       lb_policy_ == nullptr ||
326       // case 2b
327       (pending_lb_policy_ == nullptr &&
328        strcmp(lb_policy_->name(), lb_policy_name) != 0) ||
329       // case 3b
330       (pending_lb_policy_ != nullptr &&
331        strcmp(pending_lb_policy_->name(), lb_policy_name) != 0);
332   LoadBalancingPolicy* policy_to_update = nullptr;
333   if (create_policy) {
334     // Cases 1, 2b, and 3b: create a new child policy.
335     // If lb_policy_ is null, we set it (case 1), else we set
336     // pending_lb_policy_ (cases 2b and 3b).
337     if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
338       gpr_log(GPR_INFO, "resolving_lb=%p: Creating new %schild policy %s", this,
339               lb_policy_ == nullptr ? "" : "pending ", lb_policy_name);
340     }
341     auto& lb_policy = lb_policy_ == nullptr ? lb_policy_ : pending_lb_policy_;
342     lb_policy =
343         CreateLbPolicyLocked(lb_policy_name, *result.args, trace_strings);
344     policy_to_update = lb_policy.get();
345   } else {
346     // Cases 2a and 3a: update an existing policy.
347     // If we have a pending child policy, send the update to the pending
348     // policy (case 3a), else send it to the current policy (case 2a).
349     policy_to_update = pending_lb_policy_ != nullptr ? pending_lb_policy_.get()
350                                                      : lb_policy_.get();
351   }
352   GPR_ASSERT(policy_to_update != nullptr);
353   // Update the policy.
354   if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
355     gpr_log(GPR_INFO, "resolving_lb=%p: Updating %schild policy %p", this,
356             policy_to_update == pending_lb_policy_.get() ? "pending " : "",
357             policy_to_update);
358   }
359   UpdateArgs update_args;
360   update_args.addresses = std::move(result.addresses);
361   update_args.config = std::move(lb_policy_config);
362   // TODO(roth): Once channel args is converted to C++, use std::move() here.
363   update_args.args = result.args;
364   result.args = nullptr;
365   policy_to_update->UpdateLocked(std::move(update_args));
366 }
367
368 // Creates a new LB policy.
369 // Updates trace_strings to indicate what was done.
370 OrphanablePtr<LoadBalancingPolicy>
371 ResolvingLoadBalancingPolicy::CreateLbPolicyLocked(
372     const char* lb_policy_name, const grpc_channel_args& args,
373     TraceStringVector* trace_strings) {
374   ResolvingControlHelper* helper = new ResolvingControlHelper(Ref());
375   LoadBalancingPolicy::Args lb_policy_args;
376   lb_policy_args.combiner = combiner();
377   lb_policy_args.channel_control_helper =
378       std::unique_ptr<ChannelControlHelper>(helper);
379   lb_policy_args.args = &args;
380   OrphanablePtr<LoadBalancingPolicy> lb_policy =
381       LoadBalancingPolicyRegistry::CreateLoadBalancingPolicy(
382           lb_policy_name, std::move(lb_policy_args));
383   if (GPR_UNLIKELY(lb_policy == nullptr)) {
384     gpr_log(GPR_ERROR, "could not create LB policy \"%s\"", lb_policy_name);
385     char* str;
386     gpr_asprintf(&str, "Could not create LB policy \"%s\"", lb_policy_name);
387     trace_strings->push_back(str);
388     return nullptr;
389   }
390   helper->set_child(lb_policy.get());
391   if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
392     gpr_log(GPR_INFO, "resolving_lb=%p: created new LB policy \"%s\" (%p)",
393             this, lb_policy_name, lb_policy.get());
394   }
395   char* str;
396   gpr_asprintf(&str, "Created new LB policy \"%s\"", lb_policy_name);
397   trace_strings->push_back(str);
398   grpc_pollset_set_add_pollset_set(lb_policy->interested_parties(),
399                                    interested_parties());
400   return lb_policy;
401 }
402
403 void ResolvingLoadBalancingPolicy::MaybeAddTraceMessagesForAddressChangesLocked(
404     bool resolution_contains_addresses, TraceStringVector* trace_strings) {
405   if (!resolution_contains_addresses &&
406       previous_resolution_contained_addresses_) {
407     trace_strings->push_back(gpr_strdup("Address list became empty"));
408   } else if (resolution_contains_addresses &&
409              !previous_resolution_contained_addresses_) {
410     trace_strings->push_back(gpr_strdup("Address list became non-empty"));
411   }
412   previous_resolution_contained_addresses_ = resolution_contains_addresses;
413 }
414
415 void ResolvingLoadBalancingPolicy::ConcatenateAndAddChannelTraceLocked(
416     TraceStringVector* trace_strings) const {
417   if (!trace_strings->empty()) {
418     gpr_strvec v;
419     gpr_strvec_init(&v);
420     gpr_strvec_add(&v, gpr_strdup("Resolution event: "));
421     bool is_first = 1;
422     for (size_t i = 0; i < trace_strings->size(); ++i) {
423       if (!is_first) gpr_strvec_add(&v, gpr_strdup(", "));
424       is_first = false;
425       gpr_strvec_add(&v, (*trace_strings)[i]);
426     }
427     size_t len = 0;
428     grpc_core::UniquePtr<char> message(gpr_strvec_flatten(&v, &len));
429     channel_control_helper()->AddTraceEvent(ChannelControlHelper::TRACE_INFO,
430                                             StringView(message.get()));
431     gpr_strvec_destroy(&v);
432   }
433 }
434
435 void ResolvingLoadBalancingPolicy::OnResolverResultChangedLocked(
436     Resolver::Result result) {
437   // Handle race conditions.
438   if (resolver_ == nullptr) return;
439   if (GRPC_TRACE_FLAG_ENABLED(*tracer_)) {
440     gpr_log(GPR_INFO, "resolving_lb=%p: got resolver result", this);
441   }
442   // We only want to trace the address resolution in the follow cases:
443   // (a) Address resolution resulted in service config change.
444   // (b) Address resolution that causes number of backends to go from
445   //     zero to non-zero.
446   // (c) Address resolution that causes number of backends to go from
447   //     non-zero to zero.
448   // (d) Address resolution that causes a new LB policy to be created.
449   //
450   // We track a list of strings to eventually be concatenated and traced.
451   TraceStringVector trace_strings;
452   const bool resolution_contains_addresses = result.addresses.size() > 0;
453   // Process the resolver result.
454   const char* lb_policy_name = nullptr;
455   RefCountedPtr<LoadBalancingPolicy::Config> lb_policy_config;
456   bool service_config_changed = false;
457   char* service_config_error_string = nullptr;
458   if (process_resolver_result_ != nullptr) {
459     grpc_error* service_config_error = GRPC_ERROR_NONE;
460     service_config_changed = process_resolver_result_(
461         process_resolver_result_user_data_, result, &lb_policy_name,
462         &lb_policy_config, &service_config_error);
463     if (service_config_error != GRPC_ERROR_NONE) {
464       service_config_error_string =
465           gpr_strdup(grpc_error_string(service_config_error));
466       if (lb_policy_name == nullptr) {
467         // Use an empty lb_policy_name as an indicator that we received an
468         // invalid service config and we don't have a fallback service config.
469         OnResolverError(service_config_error);
470       } else {
471         GRPC_ERROR_UNREF(service_config_error);
472       }
473     }
474   } else {
475     lb_policy_name = child_policy_name_.get();
476     lb_policy_config = child_lb_config_;
477   }
478   if (lb_policy_name != nullptr) {
479     // Create or update LB policy, as needed.
480     CreateOrUpdateLbPolicyLocked(lb_policy_name, lb_policy_config,
481                                  std::move(result), &trace_strings);
482   }
483   // Add channel trace event.
484   if (service_config_changed) {
485     // TODO(ncteisen): might be worth somehow including a snippet of the
486     // config in the trace, at the risk of bloating the trace logs.
487     trace_strings.push_back(gpr_strdup("Service config changed"));
488   }
489   if (service_config_error_string != nullptr) {
490     trace_strings.push_back(service_config_error_string);
491     service_config_error_string = nullptr;
492   }
493   MaybeAddTraceMessagesForAddressChangesLocked(resolution_contains_addresses,
494                                                &trace_strings);
495   ConcatenateAndAddChannelTraceLocked(&trace_strings);
496 }
497
498 }  // namespace grpc_core