859ad9946c82a06a223932d35ff90caac30ed64b
[platform/upstream/grpc.git] / src / cpp / server / load_reporter / load_reporter_async_service_impl.cc
1 /*
2  *
3  * Copyright 2018 gRPC authors.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  */
18
19 #include <grpc/support/port_platform.h>
20
21 #include "src/cpp/server/load_reporter/load_reporter_async_service_impl.h"
22
23 namespace grpc {
24 namespace load_reporter {
25
26 void LoadReporterAsyncServiceImpl::CallableTag::Run(bool ok) {
27   GPR_ASSERT(handler_function_ != nullptr);
28   GPR_ASSERT(handler_ != nullptr);
29   handler_function_(std::move(handler_), ok);
30 }
31
32 LoadReporterAsyncServiceImpl::LoadReporterAsyncServiceImpl(
33     std::unique_ptr<ServerCompletionQueue> cq)
34     : cq_(std::move(cq)) {
35   thread_ = std::unique_ptr<::grpc_core::Thread>(
36       new ::grpc_core::Thread("server_load_reporting", Work, this));
37   std::unique_ptr<CpuStatsProvider> cpu_stats_provider = nullptr;
38 #if defined(GPR_LINUX) || defined(GPR_WINDOWS) || defined(GPR_APPLE)
39   cpu_stats_provider.reset(new CpuStatsProviderDefaultImpl());
40 #endif
41   load_reporter_ = std::unique_ptr<LoadReporter>(new LoadReporter(
42       kFeedbackSampleWindowSeconds,
43       std::unique_ptr<CensusViewProvider>(new CensusViewProviderDefaultImpl()),
44       std::move(cpu_stats_provider)));
45 }
46
47 LoadReporterAsyncServiceImpl::~LoadReporterAsyncServiceImpl() {
48   // We will reach here after the server starts shutting down.
49   shutdown_ = true;
50   {
51     std::unique_lock<std::mutex> lock(cq_shutdown_mu_);
52     cq_->Shutdown();
53   }
54   if (next_fetch_and_sample_alarm_ != nullptr)
55     next_fetch_and_sample_alarm_->Cancel();
56   thread_->Join();
57 }
58
59 void LoadReporterAsyncServiceImpl::ScheduleNextFetchAndSample() {
60   auto next_fetch_and_sample_time =
61       gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC),
62                    gpr_time_from_millis(kFetchAndSampleIntervalSeconds * 1000,
63                                         GPR_TIMESPAN));
64   {
65     std::unique_lock<std::mutex> lock(cq_shutdown_mu_);
66     if (shutdown_) return;
67     // TODO(juanlishen): Improve the Alarm implementation to reuse a single
68     // instance for multiple events.
69     next_fetch_and_sample_alarm_.reset(new Alarm);
70     next_fetch_and_sample_alarm_->Set(cq_.get(), next_fetch_and_sample_time,
71                                       this);
72   }
73   gpr_log(GPR_DEBUG, "[LRS %p] Next fetch-and-sample scheduled.", this);
74 }
75
76 void LoadReporterAsyncServiceImpl::FetchAndSample(bool ok) {
77   if (!ok) {
78     gpr_log(GPR_INFO, "[LRS %p] Fetch-and-sample is stopped.", this);
79     return;
80   }
81   gpr_log(GPR_DEBUG, "[LRS %p] Starting a fetch-and-sample...", this);
82   load_reporter_->FetchAndSample();
83   ScheduleNextFetchAndSample();
84 }
85
86 void LoadReporterAsyncServiceImpl::Work(void* arg) {
87   LoadReporterAsyncServiceImpl* service =
88       reinterpret_cast<LoadReporterAsyncServiceImpl*>(arg);
89   service->FetchAndSample(true /* ok */);
90   // TODO(juanlishen): This is a workaround to wait for the cq to be ready. Need
91   // to figure out why cq is not ready after service starts.
92   gpr_sleep_until(gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC),
93                                gpr_time_from_seconds(1, GPR_TIMESPAN)));
94   ReportLoadHandler::CreateAndStart(service->cq_.get(), service,
95                                     service->load_reporter_.get());
96   void* tag;
97   bool ok;
98   while (true) {
99     if (!service->cq_->Next(&tag, &ok)) {
100       // The completion queue is shutting down.
101       GPR_ASSERT(service->shutdown_);
102       break;
103     }
104     if (tag == service) {
105       service->FetchAndSample(ok);
106     } else {
107       auto* next_step = static_cast<CallableTag*>(tag);
108       next_step->Run(ok);
109     }
110   }
111 }
112
113 void LoadReporterAsyncServiceImpl::StartThread() { thread_->Start(); }
114
115 void LoadReporterAsyncServiceImpl::ReportLoadHandler::CreateAndStart(
116     ServerCompletionQueue* cq, LoadReporterAsyncServiceImpl* service,
117     LoadReporter* load_reporter) {
118   std::shared_ptr<ReportLoadHandler> handler =
119       std::make_shared<ReportLoadHandler>(cq, service, load_reporter);
120   ReportLoadHandler* p = handler.get();
121   {
122     std::unique_lock<std::mutex> lock(service->cq_shutdown_mu_);
123     if (service->shutdown_) return;
124     p->on_done_notified_ =
125         CallableTag(std::bind(&ReportLoadHandler::OnDoneNotified, p,
126                               std::placeholders::_1, std::placeholders::_2),
127                     handler);
128     p->next_inbound_ =
129         CallableTag(std::bind(&ReportLoadHandler::OnRequestDelivered, p,
130                               std::placeholders::_1, std::placeholders::_2),
131                     std::move(handler));
132     p->ctx_.AsyncNotifyWhenDone(&p->on_done_notified_);
133     service->RequestReportLoad(&p->ctx_, &p->stream_, cq, cq,
134                                &p->next_inbound_);
135   }
136 }
137
138 LoadReporterAsyncServiceImpl::ReportLoadHandler::ReportLoadHandler(
139     ServerCompletionQueue* cq, LoadReporterAsyncServiceImpl* service,
140     LoadReporter* load_reporter)
141     : cq_(cq),
142       service_(service),
143       load_reporter_(load_reporter),
144       stream_(&ctx_),
145       call_status_(WAITING_FOR_DELIVERY) {}
146
147 void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnRequestDelivered(
148     std::shared_ptr<ReportLoadHandler> self, bool ok) {
149   if (ok) {
150     call_status_ = DELIVERED;
151   } else {
152     // AsyncNotifyWhenDone() needs to be called before the call starts, but the
153     // tag will not pop out if the call never starts (
154     // https://github.com/grpc/grpc/issues/10136). So we need to manually
155     // release the ownership of the handler in this case.
156     GPR_ASSERT(on_done_notified_.ReleaseHandler() != nullptr);
157   }
158   if (!ok || shutdown_) {
159     // The value of ok being false means that the server is shutting down.
160     Shutdown(std::move(self), "OnRequestDelivered");
161     return;
162   }
163   // Spawn a new handler instance to serve the next new client. Every handler
164   // instance will deallocate itself when it's done.
165   CreateAndStart(cq_, service_, load_reporter_);
166   {
167     std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
168     if (service_->shutdown_) {
169       lock.release()->unlock();
170       Shutdown(std::move(self), "OnRequestDelivered");
171       return;
172     }
173     next_inbound_ =
174         CallableTag(std::bind(&ReportLoadHandler::OnReadDone, this,
175                               std::placeholders::_1, std::placeholders::_2),
176                     std::move(self));
177     stream_.Read(&request_, &next_inbound_);
178   }
179   // LB ID is unique for each load reporting stream.
180   lb_id_ = load_reporter_->GenerateLbId();
181   gpr_log(GPR_INFO,
182           "[LRS %p] Call request delivered (lb_id_: %s, handler: %p). "
183           "Start reading the initial request...",
184           service_, lb_id_.c_str(), this);
185 }
186
187 void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnReadDone(
188     std::shared_ptr<ReportLoadHandler> self, bool ok) {
189   if (!ok || shutdown_) {
190     if (!ok && call_status_ < INITIAL_REQUEST_RECEIVED) {
191       // The client may have half-closed the stream or the stream is broken.
192       gpr_log(GPR_INFO,
193               "[LRS %p] Failed reading the initial request from the stream "
194               "(lb_id_: %s, handler: %p, done_notified: %d, is_cancelled: %d).",
195               service_, lb_id_.c_str(), this, static_cast<int>(done_notified_),
196               static_cast<int>(is_cancelled_));
197     }
198     Shutdown(std::move(self), "OnReadDone");
199     return;
200   }
201   // We only receive one request, which is the initial request.
202   if (call_status_ < INITIAL_REQUEST_RECEIVED) {
203     if (!request_.has_initial_request()) {
204       Shutdown(std::move(self), "OnReadDone+initial_request_not_found");
205     } else {
206       call_status_ = INITIAL_REQUEST_RECEIVED;
207       const auto& initial_request = request_.initial_request();
208       load_balanced_hostname_ = initial_request.load_balanced_hostname();
209       load_key_ = initial_request.load_key();
210       load_reporter_->ReportStreamCreated(load_balanced_hostname_, lb_id_,
211                                           load_key_);
212       const auto& load_report_interval = initial_request.load_report_interval();
213       load_report_interval_ms_ =
214           static_cast<unsigned long>(load_report_interval.seconds() * 1000 +
215                                      load_report_interval.nanos() / 1000);
216       gpr_log(
217           GPR_INFO,
218           "[LRS %p] Initial request received. Start load reporting (load "
219           "balanced host: %s, interval: %lu ms, lb_id_: %s, handler: %p)...",
220           service_, load_balanced_hostname_.c_str(), load_report_interval_ms_,
221           lb_id_.c_str(), this);
222       SendReport(self, true /* ok */);
223       // Expect this read to fail.
224       {
225         std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
226         if (service_->shutdown_) {
227           lock.release()->unlock();
228           Shutdown(std::move(self), "OnReadDone");
229           return;
230         }
231         next_inbound_ =
232             CallableTag(std::bind(&ReportLoadHandler::OnReadDone, this,
233                                   std::placeholders::_1, std::placeholders::_2),
234                         std::move(self));
235         stream_.Read(&request_, &next_inbound_);
236       }
237     }
238   } else {
239     // Another request received! This violates the spec.
240     gpr_log(GPR_ERROR,
241             "[LRS %p] Another request received (lb_id_: %s, handler: %p).",
242             service_, lb_id_.c_str(), this);
243     Shutdown(std::move(self), "OnReadDone+second_request");
244   }
245 }
246
247 void LoadReporterAsyncServiceImpl::ReportLoadHandler::ScheduleNextReport(
248     std::shared_ptr<ReportLoadHandler> self, bool ok) {
249   if (!ok || shutdown_) {
250     Shutdown(std::move(self), "ScheduleNextReport");
251     return;
252   }
253   auto next_report_time = gpr_time_add(
254       gpr_now(GPR_CLOCK_MONOTONIC),
255       gpr_time_from_millis(load_report_interval_ms_, GPR_TIMESPAN));
256   {
257     std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
258     if (service_->shutdown_) {
259       lock.release()->unlock();
260       Shutdown(std::move(self), "ScheduleNextReport");
261       return;
262     }
263     next_outbound_ =
264         CallableTag(std::bind(&ReportLoadHandler::SendReport, this,
265                               std::placeholders::_1, std::placeholders::_2),
266                     std::move(self));
267     // TODO(juanlishen): Improve the Alarm implementation to reuse a single
268     // instance for multiple events.
269     next_report_alarm_.reset(new Alarm);
270     next_report_alarm_->Set(cq_, next_report_time, &next_outbound_);
271   }
272   gpr_log(GPR_DEBUG,
273           "[LRS %p] Next load report scheduled (lb_id_: %s, handler: %p).",
274           service_, lb_id_.c_str(), this);
275 }
276
277 void LoadReporterAsyncServiceImpl::ReportLoadHandler::SendReport(
278     std::shared_ptr<ReportLoadHandler> self, bool ok) {
279   if (!ok || shutdown_) {
280     Shutdown(std::move(self), "SendReport");
281     return;
282   }
283   ::grpc::lb::v1::LoadReportResponse response;
284   auto loads = load_reporter_->GenerateLoads(load_balanced_hostname_, lb_id_);
285   response.mutable_load()->Swap(&loads);
286   auto feedback = load_reporter_->GenerateLoadBalancingFeedback();
287   response.mutable_load_balancing_feedback()->Swap(&feedback);
288   if (call_status_ < INITIAL_RESPONSE_SENT) {
289     auto initial_response = response.mutable_initial_response();
290     initial_response->set_load_balancer_id(lb_id_);
291     initial_response->set_implementation_id(
292         ::grpc::lb::v1::InitialLoadReportResponse::CPP);
293     initial_response->set_server_version(kVersion);
294     call_status_ = INITIAL_RESPONSE_SENT;
295   }
296   {
297     std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
298     if (service_->shutdown_) {
299       lock.release()->unlock();
300       Shutdown(std::move(self), "SendReport");
301       return;
302     }
303     next_outbound_ =
304         CallableTag(std::bind(&ReportLoadHandler::ScheduleNextReport, this,
305                               std::placeholders::_1, std::placeholders::_2),
306                     std::move(self));
307     stream_.Write(response, &next_outbound_);
308     gpr_log(GPR_INFO,
309             "[LRS %p] Sending load report (lb_id_: %s, handler: %p, loads "
310             "count: %d)...",
311             service_, lb_id_.c_str(), this, response.load().size());
312   }
313 }
314
315 void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnDoneNotified(
316     std::shared_ptr<ReportLoadHandler> self, bool ok) {
317   GPR_ASSERT(ok);
318   done_notified_ = true;
319   if (ctx_.IsCancelled()) {
320     is_cancelled_ = true;
321   }
322   gpr_log(GPR_INFO,
323           "[LRS %p] Load reporting call is notified done (handler: %p, "
324           "is_cancelled: %d).",
325           service_, this, static_cast<int>(is_cancelled_));
326   Shutdown(std::move(self), "OnDoneNotified");
327 }
328
329 void LoadReporterAsyncServiceImpl::ReportLoadHandler::Shutdown(
330     std::shared_ptr<ReportLoadHandler> self, const char* reason) {
331   if (!shutdown_) {
332     gpr_log(GPR_INFO,
333             "[LRS %p] Shutting down the handler (lb_id_: %s, handler: %p, "
334             "reason: %s).",
335             service_, lb_id_.c_str(), this, reason);
336     shutdown_ = true;
337     if (call_status_ >= INITIAL_REQUEST_RECEIVED) {
338       load_reporter_->ReportStreamClosed(load_balanced_hostname_, lb_id_);
339       next_report_alarm_->Cancel();
340     }
341   }
342   // OnRequestDelivered() may be called after OnDoneNotified(), so we need to
343   // try to Finish() every time we are in Shutdown().
344   if (call_status_ >= DELIVERED && call_status_ < FINISH_CALLED) {
345     std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
346     if (!service_->shutdown_) {
347       on_finish_done_ =
348           CallableTag(std::bind(&ReportLoadHandler::OnFinishDone, this,
349                                 std::placeholders::_1, std::placeholders::_2),
350                       std::move(self));
351       // TODO(juanlishen): Maybe add a message proto for the client to
352       // explicitly cancel the stream so that we can return OK status in such
353       // cases.
354       stream_.Finish(Status::CANCELLED, &on_finish_done_);
355       call_status_ = FINISH_CALLED;
356     }
357   }
358 }
359
360 void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnFinishDone(
361     std::shared_ptr<ReportLoadHandler> self, bool ok) {
362   if (ok) {
363     gpr_log(GPR_INFO,
364             "[LRS %p] Load reporting finished (lb_id_: %s, handler: %p).",
365             service_, lb_id_.c_str(), this);
366   }
367 }
368
369 }  // namespace load_reporter
370 }  // namespace grpc