3 * Copyright 2018 gRPC authors.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
19 #include <grpc/support/port_platform.h>
21 #include "src/cpp/server/load_reporter/load_reporter_async_service_impl.h"
24 namespace load_reporter {
26 void LoadReporterAsyncServiceImpl::CallableTag::Run(bool ok) {
27 GPR_ASSERT(handler_function_ != nullptr);
28 GPR_ASSERT(handler_ != nullptr);
29 handler_function_(std::move(handler_), ok);
32 LoadReporterAsyncServiceImpl::LoadReporterAsyncServiceImpl(
33 std::unique_ptr<ServerCompletionQueue> cq)
34 : cq_(std::move(cq)) {
35 thread_ = std::unique_ptr<::grpc_core::Thread>(
36 new ::grpc_core::Thread("server_load_reporting", Work, this));
37 std::unique_ptr<CpuStatsProvider> cpu_stats_provider = nullptr;
38 #if defined(GPR_LINUX) || defined(GPR_WINDOWS) || defined(GPR_APPLE)
39 cpu_stats_provider.reset(new CpuStatsProviderDefaultImpl());
41 load_reporter_ = std::unique_ptr<LoadReporter>(new LoadReporter(
42 kFeedbackSampleWindowSeconds,
43 std::unique_ptr<CensusViewProvider>(new CensusViewProviderDefaultImpl()),
44 std::move(cpu_stats_provider)));
47 LoadReporterAsyncServiceImpl::~LoadReporterAsyncServiceImpl() {
48 // We will reach here after the server starts shutting down.
51 std::unique_lock<std::mutex> lock(cq_shutdown_mu_);
54 if (next_fetch_and_sample_alarm_ != nullptr)
55 next_fetch_and_sample_alarm_->Cancel();
59 void LoadReporterAsyncServiceImpl::ScheduleNextFetchAndSample() {
60 auto next_fetch_and_sample_time =
61 gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC),
62 gpr_time_from_millis(kFetchAndSampleIntervalSeconds * 1000,
65 std::unique_lock<std::mutex> lock(cq_shutdown_mu_);
66 if (shutdown_) return;
67 // TODO(juanlishen): Improve the Alarm implementation to reuse a single
68 // instance for multiple events.
69 next_fetch_and_sample_alarm_.reset(new Alarm);
70 next_fetch_and_sample_alarm_->Set(cq_.get(), next_fetch_and_sample_time,
73 gpr_log(GPR_DEBUG, "[LRS %p] Next fetch-and-sample scheduled.", this);
76 void LoadReporterAsyncServiceImpl::FetchAndSample(bool ok) {
78 gpr_log(GPR_INFO, "[LRS %p] Fetch-and-sample is stopped.", this);
81 gpr_log(GPR_DEBUG, "[LRS %p] Starting a fetch-and-sample...", this);
82 load_reporter_->FetchAndSample();
83 ScheduleNextFetchAndSample();
86 void LoadReporterAsyncServiceImpl::Work(void* arg) {
87 LoadReporterAsyncServiceImpl* service =
88 reinterpret_cast<LoadReporterAsyncServiceImpl*>(arg);
89 service->FetchAndSample(true /* ok */);
90 // TODO(juanlishen): This is a workaround to wait for the cq to be ready. Need
91 // to figure out why cq is not ready after service starts.
92 gpr_sleep_until(gpr_time_add(gpr_now(GPR_CLOCK_MONOTONIC),
93 gpr_time_from_seconds(1, GPR_TIMESPAN)));
94 ReportLoadHandler::CreateAndStart(service->cq_.get(), service,
95 service->load_reporter_.get());
99 if (!service->cq_->Next(&tag, &ok)) {
100 // The completion queue is shutting down.
101 GPR_ASSERT(service->shutdown_);
104 if (tag == service) {
105 service->FetchAndSample(ok);
107 auto* next_step = static_cast<CallableTag*>(tag);
113 void LoadReporterAsyncServiceImpl::StartThread() { thread_->Start(); }
115 void LoadReporterAsyncServiceImpl::ReportLoadHandler::CreateAndStart(
116 ServerCompletionQueue* cq, LoadReporterAsyncServiceImpl* service,
117 LoadReporter* load_reporter) {
118 std::shared_ptr<ReportLoadHandler> handler =
119 std::make_shared<ReportLoadHandler>(cq, service, load_reporter);
120 ReportLoadHandler* p = handler.get();
122 std::unique_lock<std::mutex> lock(service->cq_shutdown_mu_);
123 if (service->shutdown_) return;
124 p->on_done_notified_ =
125 CallableTag(std::bind(&ReportLoadHandler::OnDoneNotified, p,
126 std::placeholders::_1, std::placeholders::_2),
129 CallableTag(std::bind(&ReportLoadHandler::OnRequestDelivered, p,
130 std::placeholders::_1, std::placeholders::_2),
132 p->ctx_.AsyncNotifyWhenDone(&p->on_done_notified_);
133 service->RequestReportLoad(&p->ctx_, &p->stream_, cq, cq,
138 LoadReporterAsyncServiceImpl::ReportLoadHandler::ReportLoadHandler(
139 ServerCompletionQueue* cq, LoadReporterAsyncServiceImpl* service,
140 LoadReporter* load_reporter)
143 load_reporter_(load_reporter),
145 call_status_(WAITING_FOR_DELIVERY) {}
147 void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnRequestDelivered(
148 std::shared_ptr<ReportLoadHandler> self, bool ok) {
150 call_status_ = DELIVERED;
152 // AsyncNotifyWhenDone() needs to be called before the call starts, but the
153 // tag will not pop out if the call never starts (
154 // https://github.com/grpc/grpc/issues/10136). So we need to manually
155 // release the ownership of the handler in this case.
156 GPR_ASSERT(on_done_notified_.ReleaseHandler() != nullptr);
158 if (!ok || shutdown_) {
159 // The value of ok being false means that the server is shutting down.
160 Shutdown(std::move(self), "OnRequestDelivered");
163 // Spawn a new handler instance to serve the next new client. Every handler
164 // instance will deallocate itself when it's done.
165 CreateAndStart(cq_, service_, load_reporter_);
167 std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
168 if (service_->shutdown_) {
169 lock.release()->unlock();
170 Shutdown(std::move(self), "OnRequestDelivered");
174 CallableTag(std::bind(&ReportLoadHandler::OnReadDone, this,
175 std::placeholders::_1, std::placeholders::_2),
177 stream_.Read(&request_, &next_inbound_);
179 // LB ID is unique for each load reporting stream.
180 lb_id_ = load_reporter_->GenerateLbId();
182 "[LRS %p] Call request delivered (lb_id_: %s, handler: %p). "
183 "Start reading the initial request...",
184 service_, lb_id_.c_str(), this);
187 void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnReadDone(
188 std::shared_ptr<ReportLoadHandler> self, bool ok) {
189 if (!ok || shutdown_) {
190 if (!ok && call_status_ < INITIAL_REQUEST_RECEIVED) {
191 // The client may have half-closed the stream or the stream is broken.
193 "[LRS %p] Failed reading the initial request from the stream "
194 "(lb_id_: %s, handler: %p, done_notified: %d, is_cancelled: %d).",
195 service_, lb_id_.c_str(), this, static_cast<int>(done_notified_),
196 static_cast<int>(is_cancelled_));
198 Shutdown(std::move(self), "OnReadDone");
201 // We only receive one request, which is the initial request.
202 if (call_status_ < INITIAL_REQUEST_RECEIVED) {
203 if (!request_.has_initial_request()) {
204 Shutdown(std::move(self), "OnReadDone+initial_request_not_found");
206 call_status_ = INITIAL_REQUEST_RECEIVED;
207 const auto& initial_request = request_.initial_request();
208 load_balanced_hostname_ = initial_request.load_balanced_hostname();
209 load_key_ = initial_request.load_key();
210 load_reporter_->ReportStreamCreated(load_balanced_hostname_, lb_id_,
212 const auto& load_report_interval = initial_request.load_report_interval();
213 load_report_interval_ms_ =
214 static_cast<unsigned long>(load_report_interval.seconds() * 1000 +
215 load_report_interval.nanos() / 1000);
218 "[LRS %p] Initial request received. Start load reporting (load "
219 "balanced host: %s, interval: %lu ms, lb_id_: %s, handler: %p)...",
220 service_, load_balanced_hostname_.c_str(), load_report_interval_ms_,
221 lb_id_.c_str(), this);
222 SendReport(self, true /* ok */);
223 // Expect this read to fail.
225 std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
226 if (service_->shutdown_) {
227 lock.release()->unlock();
228 Shutdown(std::move(self), "OnReadDone");
232 CallableTag(std::bind(&ReportLoadHandler::OnReadDone, this,
233 std::placeholders::_1, std::placeholders::_2),
235 stream_.Read(&request_, &next_inbound_);
239 // Another request received! This violates the spec.
241 "[LRS %p] Another request received (lb_id_: %s, handler: %p).",
242 service_, lb_id_.c_str(), this);
243 Shutdown(std::move(self), "OnReadDone+second_request");
247 void LoadReporterAsyncServiceImpl::ReportLoadHandler::ScheduleNextReport(
248 std::shared_ptr<ReportLoadHandler> self, bool ok) {
249 if (!ok || shutdown_) {
250 Shutdown(std::move(self), "ScheduleNextReport");
253 auto next_report_time = gpr_time_add(
254 gpr_now(GPR_CLOCK_MONOTONIC),
255 gpr_time_from_millis(load_report_interval_ms_, GPR_TIMESPAN));
257 std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
258 if (service_->shutdown_) {
259 lock.release()->unlock();
260 Shutdown(std::move(self), "ScheduleNextReport");
264 CallableTag(std::bind(&ReportLoadHandler::SendReport, this,
265 std::placeholders::_1, std::placeholders::_2),
267 // TODO(juanlishen): Improve the Alarm implementation to reuse a single
268 // instance for multiple events.
269 next_report_alarm_.reset(new Alarm);
270 next_report_alarm_->Set(cq_, next_report_time, &next_outbound_);
273 "[LRS %p] Next load report scheduled (lb_id_: %s, handler: %p).",
274 service_, lb_id_.c_str(), this);
277 void LoadReporterAsyncServiceImpl::ReportLoadHandler::SendReport(
278 std::shared_ptr<ReportLoadHandler> self, bool ok) {
279 if (!ok || shutdown_) {
280 Shutdown(std::move(self), "SendReport");
283 ::grpc::lb::v1::LoadReportResponse response;
284 auto loads = load_reporter_->GenerateLoads(load_balanced_hostname_, lb_id_);
285 response.mutable_load()->Swap(&loads);
286 auto feedback = load_reporter_->GenerateLoadBalancingFeedback();
287 response.mutable_load_balancing_feedback()->Swap(&feedback);
288 if (call_status_ < INITIAL_RESPONSE_SENT) {
289 auto initial_response = response.mutable_initial_response();
290 initial_response->set_load_balancer_id(lb_id_);
291 initial_response->set_implementation_id(
292 ::grpc::lb::v1::InitialLoadReportResponse::CPP);
293 initial_response->set_server_version(kVersion);
294 call_status_ = INITIAL_RESPONSE_SENT;
297 std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
298 if (service_->shutdown_) {
299 lock.release()->unlock();
300 Shutdown(std::move(self), "SendReport");
304 CallableTag(std::bind(&ReportLoadHandler::ScheduleNextReport, this,
305 std::placeholders::_1, std::placeholders::_2),
307 stream_.Write(response, &next_outbound_);
309 "[LRS %p] Sending load report (lb_id_: %s, handler: %p, loads "
311 service_, lb_id_.c_str(), this, response.load().size());
315 void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnDoneNotified(
316 std::shared_ptr<ReportLoadHandler> self, bool ok) {
318 done_notified_ = true;
319 if (ctx_.IsCancelled()) {
320 is_cancelled_ = true;
323 "[LRS %p] Load reporting call is notified done (handler: %p, "
324 "is_cancelled: %d).",
325 service_, this, static_cast<int>(is_cancelled_));
326 Shutdown(std::move(self), "OnDoneNotified");
329 void LoadReporterAsyncServiceImpl::ReportLoadHandler::Shutdown(
330 std::shared_ptr<ReportLoadHandler> self, const char* reason) {
333 "[LRS %p] Shutting down the handler (lb_id_: %s, handler: %p, "
335 service_, lb_id_.c_str(), this, reason);
337 if (call_status_ >= INITIAL_REQUEST_RECEIVED) {
338 load_reporter_->ReportStreamClosed(load_balanced_hostname_, lb_id_);
339 next_report_alarm_->Cancel();
342 // OnRequestDelivered() may be called after OnDoneNotified(), so we need to
343 // try to Finish() every time we are in Shutdown().
344 if (call_status_ >= DELIVERED && call_status_ < FINISH_CALLED) {
345 std::unique_lock<std::mutex> lock(service_->cq_shutdown_mu_);
346 if (!service_->shutdown_) {
348 CallableTag(std::bind(&ReportLoadHandler::OnFinishDone, this,
349 std::placeholders::_1, std::placeholders::_2),
351 // TODO(juanlishen): Maybe add a message proto for the client to
352 // explicitly cancel the stream so that we can return OK status in such
354 stream_.Finish(Status::CANCELLED, &on_finish_done_);
355 call_status_ = FINISH_CALLED;
360 void LoadReporterAsyncServiceImpl::ReportLoadHandler::OnFinishDone(
361 std::shared_ptr<ReportLoadHandler> self, bool ok) {
364 "[LRS %p] Load reporting finished (lb_id_: %s, handler: %p).",
365 service_, lb_id_.c_str(), this);
369 } // namespace load_reporter