Upstream version 7.36.149.0
[platform/framework/web/crosswalk.git] / src / net / socket / tcp_socket_libevent.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/socket/tcp_socket.h"
6
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <netdb.h>
10 #include <netinet/in.h>
11 #include <netinet/tcp.h>
12 #include <sys/socket.h>
13
14 #include "base/callback_helpers.h"
15 #include "base/logging.h"
16 #include "base/metrics/histogram.h"
17 #include "base/metrics/stats_counters.h"
18 #include "base/posix/eintr_wrapper.h"
19 #include "build/build_config.h"
20 #include "net/base/address_list.h"
21 #include "net/base/connection_type_histograms.h"
22 #include "net/base/io_buffer.h"
23 #include "net/base/ip_endpoint.h"
24 #include "net/base/net_errors.h"
25 #include "net/base/net_util.h"
26 #include "net/base/network_change_notifier.h"
27 #include "net/socket/socket_net_log_params.h"
28
29 // If we don't have a definition for TCPI_OPT_SYN_DATA, create one.
30 #ifndef TCPI_OPT_SYN_DATA
31 #define TCPI_OPT_SYN_DATA 32
32 #endif
33
34 namespace net {
35
36 namespace {
37
38 // SetTCPNoDelay turns on/off buffering in the kernel. By default, TCP sockets
39 // will wait up to 200ms for more data to complete a packet before transmitting.
40 // After calling this function, the kernel will not wait. See TCP_NODELAY in
41 // `man 7 tcp`.
42 bool SetTCPNoDelay(int fd, bool no_delay) {
43   int on = no_delay ? 1 : 0;
44   int error = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
45   return error == 0;
46 }
47
48 // SetTCPKeepAlive sets SO_KEEPALIVE.
49 bool SetTCPKeepAlive(int fd, bool enable, int delay) {
50   int on = enable ? 1 : 0;
51   if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on))) {
52     PLOG(ERROR) << "Failed to set SO_KEEPALIVE on fd: " << fd;
53     return false;
54   }
55
56   // If we disabled TCP keep alive, our work is done here.
57   if (!enable)
58     return true;
59
60 #if defined(OS_LINUX) || defined(OS_ANDROID)
61   // Set seconds until first TCP keep alive.
62   if (setsockopt(fd, SOL_TCP, TCP_KEEPIDLE, &delay, sizeof(delay))) {
63     PLOG(ERROR) << "Failed to set TCP_KEEPIDLE on fd: " << fd;
64     return false;
65   }
66   // Set seconds between TCP keep alives.
67   if (setsockopt(fd, SOL_TCP, TCP_KEEPINTVL, &delay, sizeof(delay))) {
68     PLOG(ERROR) << "Failed to set TCP_KEEPINTVL on fd: " << fd;
69     return false;
70   }
71 #endif
72   return true;
73 }
74
75 int MapAcceptError(int os_error) {
76   switch (os_error) {
77     // If the client aborts the connection before the server calls accept,
78     // POSIX specifies accept should fail with ECONNABORTED. The server can
79     // ignore the error and just call accept again, so we map the error to
80     // ERR_IO_PENDING. See UNIX Network Programming, Vol. 1, 3rd Ed., Sec.
81     // 5.11, "Connection Abort before accept Returns".
82     case ECONNABORTED:
83       return ERR_IO_PENDING;
84     default:
85       return MapSystemError(os_error);
86   }
87 }
88
89 int MapConnectError(int os_error) {
90   switch (os_error) {
91     case EACCES:
92       return ERR_NETWORK_ACCESS_DENIED;
93     case ETIMEDOUT:
94       return ERR_CONNECTION_TIMED_OUT;
95     default: {
96       int net_error = MapSystemError(os_error);
97       if (net_error == ERR_FAILED)
98         return ERR_CONNECTION_FAILED;  // More specific than ERR_FAILED.
99
100       // Give a more specific error when the user is offline.
101       if (net_error == ERR_ADDRESS_UNREACHABLE &&
102           NetworkChangeNotifier::IsOffline()) {
103         return ERR_INTERNET_DISCONNECTED;
104       }
105       return net_error;
106     }
107   }
108 }
109
110 }  // namespace
111
112 //-----------------------------------------------------------------------------
113
114 TCPSocketLibevent::Watcher::Watcher(
115     const base::Closure& read_ready_callback,
116     const base::Closure& write_ready_callback)
117     : read_ready_callback_(read_ready_callback),
118       write_ready_callback_(write_ready_callback) {
119 }
120
121 TCPSocketLibevent::Watcher::~Watcher() {
122 }
123
124 void TCPSocketLibevent::Watcher::OnFileCanReadWithoutBlocking(int /* fd */) {
125   if (!read_ready_callback_.is_null())
126     read_ready_callback_.Run();
127   else
128     NOTREACHED();
129 }
130
131 void TCPSocketLibevent::Watcher::OnFileCanWriteWithoutBlocking(int /* fd */) {
132   if (!write_ready_callback_.is_null())
133     write_ready_callback_.Run();
134   else
135     NOTREACHED();
136 }
137
138 TCPSocketLibevent::TCPSocketLibevent(NetLog* net_log,
139                                      const NetLog::Source& source)
140     : socket_(kInvalidSocket),
141       accept_watcher_(base::Bind(&TCPSocketLibevent::DidCompleteAccept,
142                                  base::Unretained(this)),
143                       base::Closure()),
144       accept_socket_(NULL),
145       accept_address_(NULL),
146       read_watcher_(base::Bind(&TCPSocketLibevent::DidCompleteRead,
147                                base::Unretained(this)),
148                     base::Closure()),
149       write_watcher_(base::Closure(),
150                      base::Bind(&TCPSocketLibevent::DidCompleteConnectOrWrite,
151                                 base::Unretained(this))),
152       read_buf_len_(0),
153       write_buf_len_(0),
154       use_tcp_fastopen_(IsTCPFastOpenEnabled()),
155       tcp_fastopen_connected_(false),
156       fast_open_status_(FAST_OPEN_STATUS_UNKNOWN),
157       waiting_connect_(false),
158       connect_os_error_(0),
159       logging_multiple_connect_attempts_(false),
160       net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) {
161   net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
162                       source.ToEventParametersCallback());
163 }
164
165 TCPSocketLibevent::~TCPSocketLibevent() {
166   net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
167   if (tcp_fastopen_connected_) {
168     UMA_HISTOGRAM_ENUMERATION("Net.TcpFastOpenSocketConnection",
169                               fast_open_status_, FAST_OPEN_MAX_VALUE);
170   }
171   Close();
172 }
173
174 int TCPSocketLibevent::Open(AddressFamily family) {
175   DCHECK(CalledOnValidThread());
176   DCHECK_EQ(socket_, kInvalidSocket);
177
178   socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM,
179                                  IPPROTO_TCP);
180   if (socket_ < 0) {
181     PLOG(ERROR) << "CreatePlatformSocket() returned an error";
182     return MapSystemError(errno);
183   }
184
185   if (SetNonBlocking(socket_)) {
186     int result = MapSystemError(errno);
187     Close();
188     return result;
189   }
190
191   return OK;
192 }
193
194 int TCPSocketLibevent::AdoptConnectedSocket(int socket,
195                                             const IPEndPoint& peer_address) {
196   DCHECK(CalledOnValidThread());
197   DCHECK_EQ(socket_, kInvalidSocket);
198
199   socket_ = socket;
200
201   if (SetNonBlocking(socket_)) {
202     int result = MapSystemError(errno);
203     Close();
204     return result;
205   }
206
207   peer_address_.reset(new IPEndPoint(peer_address));
208
209   return OK;
210 }
211
212 int TCPSocketLibevent::Bind(const IPEndPoint& address) {
213   DCHECK(CalledOnValidThread());
214   DCHECK_NE(socket_, kInvalidSocket);
215
216   SockaddrStorage storage;
217   if (!address.ToSockAddr(storage.addr, &storage.addr_len))
218     return ERR_ADDRESS_INVALID;
219
220   int result = bind(socket_, storage.addr, storage.addr_len);
221   if (result < 0) {
222     PLOG(ERROR) << "bind() returned an error";
223     return MapSystemError(errno);
224   }
225
226   return OK;
227 }
228
229 int TCPSocketLibevent::Listen(int backlog) {
230   DCHECK(CalledOnValidThread());
231   DCHECK_GT(backlog, 0);
232   DCHECK_NE(socket_, kInvalidSocket);
233
234   int result = listen(socket_, backlog);
235   if (result < 0) {
236     PLOG(ERROR) << "listen() returned an error";
237     return MapSystemError(errno);
238   }
239
240   return OK;
241 }
242
243 int TCPSocketLibevent::Accept(scoped_ptr<TCPSocketLibevent>* socket,
244                               IPEndPoint* address,
245                               const CompletionCallback& callback) {
246   DCHECK(CalledOnValidThread());
247   DCHECK(socket);
248   DCHECK(address);
249   DCHECK(!callback.is_null());
250   DCHECK(accept_callback_.is_null());
251
252   net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT);
253
254   int result = AcceptInternal(socket, address);
255
256   if (result == ERR_IO_PENDING) {
257     if (!base::MessageLoopForIO::current()->WatchFileDescriptor(
258             socket_, true, base::MessageLoopForIO::WATCH_READ,
259             &accept_socket_watcher_, &accept_watcher_)) {
260       PLOG(ERROR) << "WatchFileDescriptor failed on read";
261       return MapSystemError(errno);
262     }
263
264     accept_socket_ = socket;
265     accept_address_ = address;
266     accept_callback_ = callback;
267   }
268
269   return result;
270 }
271
272 int TCPSocketLibevent::Connect(const IPEndPoint& address,
273                                const CompletionCallback& callback) {
274   DCHECK(CalledOnValidThread());
275   DCHECK_NE(socket_, kInvalidSocket);
276   DCHECK(!waiting_connect_);
277
278   // |peer_address_| will be non-NULL if Connect() has been called. Unless
279   // Close() is called to reset the internal state, a second call to Connect()
280   // is not allowed.
281   // Please note that we don't allow a second Connect() even if the previous
282   // Connect() has failed. Connecting the same |socket_| again after a
283   // connection attempt failed results in unspecified behavior according to
284   // POSIX.
285   DCHECK(!peer_address_);
286
287   if (!logging_multiple_connect_attempts_)
288     LogConnectBegin(AddressList(address));
289
290   peer_address_.reset(new IPEndPoint(address));
291
292   int rv = DoConnect();
293   if (rv == ERR_IO_PENDING) {
294     // Synchronous operation not supported.
295     DCHECK(!callback.is_null());
296     write_callback_ = callback;
297     waiting_connect_ = true;
298   } else {
299     DoConnectComplete(rv);
300   }
301
302   return rv;
303 }
304
305 bool TCPSocketLibevent::IsConnected() const {
306   DCHECK(CalledOnValidThread());
307
308   if (socket_ == kInvalidSocket || waiting_connect_)
309     return false;
310
311   if (use_tcp_fastopen_ && !tcp_fastopen_connected_ && peer_address_) {
312     // With TCP FastOpen, we pretend that the socket is connected.
313     // This allows GetPeerAddress() to return peer_address_.
314     return true;
315   }
316
317   // Check if connection is alive.
318   char c;
319   int rv = HANDLE_EINTR(recv(socket_, &c, 1, MSG_PEEK));
320   if (rv == 0)
321     return false;
322   if (rv == -1 && errno != EAGAIN && errno != EWOULDBLOCK)
323     return false;
324
325   return true;
326 }
327
328 bool TCPSocketLibevent::IsConnectedAndIdle() const {
329   DCHECK(CalledOnValidThread());
330
331   if (socket_ == kInvalidSocket || waiting_connect_)
332     return false;
333
334   // TODO(wtc): should we also handle the TCP FastOpen case here,
335   // as we do in IsConnected()?
336
337   // Check if connection is alive and we haven't received any data
338   // unexpectedly.
339   char c;
340   int rv = HANDLE_EINTR(recv(socket_, &c, 1, MSG_PEEK));
341   if (rv >= 0)
342     return false;
343   if (errno != EAGAIN && errno != EWOULDBLOCK)
344     return false;
345
346   return true;
347 }
348
349 int TCPSocketLibevent::Read(IOBuffer* buf,
350                             int buf_len,
351                             const CompletionCallback& callback) {
352   DCHECK(CalledOnValidThread());
353   DCHECK_NE(kInvalidSocket, socket_);
354   DCHECK(!waiting_connect_);
355   DCHECK(read_callback_.is_null());
356   // Synchronous operation not supported
357   DCHECK(!callback.is_null());
358   DCHECK_GT(buf_len, 0);
359
360   int nread = HANDLE_EINTR(read(socket_, buf->data(), buf_len));
361   if (nread >= 0) {
362     base::StatsCounter read_bytes("tcp.read_bytes");
363     read_bytes.Add(nread);
364     net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, nread,
365                                   buf->data());
366     RecordFastOpenStatus();
367     return nread;
368   }
369   if (errno != EAGAIN && errno != EWOULDBLOCK) {
370     int net_error = MapSystemError(errno);
371     net_log_.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR,
372                       CreateNetLogSocketErrorCallback(net_error, errno));
373     return net_error;
374   }
375
376   if (!base::MessageLoopForIO::current()->WatchFileDescriptor(
377           socket_, true, base::MessageLoopForIO::WATCH_READ,
378           &read_socket_watcher_, &read_watcher_)) {
379     DVLOG(1) << "WatchFileDescriptor failed on read, errno " << errno;
380     return MapSystemError(errno);
381   }
382
383   read_buf_ = buf;
384   read_buf_len_ = buf_len;
385   read_callback_ = callback;
386   return ERR_IO_PENDING;
387 }
388
389 int TCPSocketLibevent::Write(IOBuffer* buf,
390                              int buf_len,
391                              const CompletionCallback& callback) {
392   DCHECK(CalledOnValidThread());
393   DCHECK_NE(kInvalidSocket, socket_);
394   DCHECK(!waiting_connect_);
395   DCHECK(write_callback_.is_null());
396   // Synchronous operation not supported
397   DCHECK(!callback.is_null());
398   DCHECK_GT(buf_len, 0);
399
400   int nwrite = InternalWrite(buf, buf_len);
401   if (nwrite >= 0) {
402     base::StatsCounter write_bytes("tcp.write_bytes");
403     write_bytes.Add(nwrite);
404     net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, nwrite,
405                                   buf->data());
406     return nwrite;
407   }
408   if (errno != EAGAIN && errno != EWOULDBLOCK) {
409     int net_error = MapSystemError(errno);
410     net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
411                       CreateNetLogSocketErrorCallback(net_error, errno));
412     return net_error;
413   }
414
415   if (!base::MessageLoopForIO::current()->WatchFileDescriptor(
416           socket_, true, base::MessageLoopForIO::WATCH_WRITE,
417           &write_socket_watcher_, &write_watcher_)) {
418     DVLOG(1) << "WatchFileDescriptor failed on write, errno " << errno;
419     return MapSystemError(errno);
420   }
421
422   write_buf_ = buf;
423   write_buf_len_ = buf_len;
424   write_callback_ = callback;
425   return ERR_IO_PENDING;
426 }
427
428 int TCPSocketLibevent::GetLocalAddress(IPEndPoint* address) const {
429   DCHECK(CalledOnValidThread());
430   DCHECK(address);
431
432   SockaddrStorage storage;
433   if (getsockname(socket_, storage.addr, &storage.addr_len) < 0)
434     return MapSystemError(errno);
435   if (!address->FromSockAddr(storage.addr, storage.addr_len))
436     return ERR_ADDRESS_INVALID;
437
438   return OK;
439 }
440
441 int TCPSocketLibevent::GetPeerAddress(IPEndPoint* address) const {
442   DCHECK(CalledOnValidThread());
443   DCHECK(address);
444   if (!IsConnected())
445     return ERR_SOCKET_NOT_CONNECTED;
446   *address = *peer_address_;
447   return OK;
448 }
449
450 int TCPSocketLibevent::SetDefaultOptionsForServer() {
451   DCHECK(CalledOnValidThread());
452   return SetAddressReuse(true);
453 }
454
455 void TCPSocketLibevent::SetDefaultOptionsForClient() {
456   DCHECK(CalledOnValidThread());
457
458   // This mirrors the behaviour on Windows. See the comment in
459   // tcp_socket_win.cc after searching for "NODELAY".
460   SetTCPNoDelay(socket_, true);  // If SetTCPNoDelay fails, we don't care.
461
462   // TCP keep alive wakes up the radio, which is expensive on mobile. Do not
463   // enable it there. It's useful to prevent TCP middleboxes from timing out
464   // connection mappings. Packets for timed out connection mappings at
465   // middleboxes will either lead to:
466   // a) Middleboxes sending TCP RSTs. It's up to higher layers to check for this
467   // and retry. The HTTP network transaction code does this.
468   // b) Middleboxes just drop the unrecognized TCP packet. This leads to the TCP
469   // stack retransmitting packets per TCP stack retransmission timeouts, which
470   // are very high (on the order of seconds). Given the number of
471   // retransmissions required before killing the connection, this can lead to
472   // tens of seconds or even minutes of delay, depending on OS.
473 #if !defined(OS_ANDROID) && !defined(OS_IOS)
474   const int kTCPKeepAliveSeconds = 45;
475
476   SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds);
477 #endif
478 }
479
480 int TCPSocketLibevent::SetAddressReuse(bool allow) {
481   DCHECK(CalledOnValidThread());
482
483   // SO_REUSEADDR is useful for server sockets to bind to a recently unbound
484   // port. When a socket is closed, the end point changes its state to TIME_WAIT
485   // and wait for 2 MSL (maximum segment lifetime) to ensure the remote peer
486   // acknowledges its closure. For server sockets, it is usually safe to
487   // bind to a TIME_WAIT end point immediately, which is a widely adopted
488   // behavior.
489   //
490   // Note that on *nix, SO_REUSEADDR does not enable the TCP socket to bind to
491   // an end point that is already bound by another socket. To do that one must
492   // set SO_REUSEPORT instead. This option is not provided on Linux prior
493   // to 3.9.
494   //
495   // SO_REUSEPORT is provided in MacOS X and iOS.
496   int boolean_value = allow ? 1 : 0;
497   int rv = setsockopt(socket_, SOL_SOCKET, SO_REUSEADDR, &boolean_value,
498                       sizeof(boolean_value));
499   if (rv < 0)
500     return MapSystemError(errno);
501   return OK;
502 }
503
504 int TCPSocketLibevent::SetReceiveBufferSize(int32 size) {
505   DCHECK(CalledOnValidThread());
506   int rv = setsockopt(socket_, SOL_SOCKET, SO_RCVBUF,
507                       reinterpret_cast<const char*>(&size), sizeof(size));
508   return (rv == 0) ? OK : MapSystemError(errno);
509 }
510
511 int TCPSocketLibevent::SetSendBufferSize(int32 size) {
512   DCHECK(CalledOnValidThread());
513   int rv = setsockopt(socket_, SOL_SOCKET, SO_SNDBUF,
514                       reinterpret_cast<const char*>(&size), sizeof(size));
515   return (rv == 0) ? OK : MapSystemError(errno);
516 }
517
518 bool TCPSocketLibevent::SetKeepAlive(bool enable, int delay) {
519   DCHECK(CalledOnValidThread());
520   return SetTCPKeepAlive(socket_, enable, delay);
521 }
522
523 bool TCPSocketLibevent::SetNoDelay(bool no_delay) {
524   DCHECK(CalledOnValidThread());
525   return SetTCPNoDelay(socket_, no_delay);
526 }
527
528 void TCPSocketLibevent::Close() {
529   DCHECK(CalledOnValidThread());
530
531   bool ok = accept_socket_watcher_.StopWatchingFileDescriptor();
532   DCHECK(ok);
533   ok = read_socket_watcher_.StopWatchingFileDescriptor();
534   DCHECK(ok);
535   ok = write_socket_watcher_.StopWatchingFileDescriptor();
536   DCHECK(ok);
537
538   if (socket_ != kInvalidSocket) {
539     if (IGNORE_EINTR(close(socket_)) < 0)
540       PLOG(ERROR) << "close";
541     socket_ = kInvalidSocket;
542   }
543
544   if (!accept_callback_.is_null()) {
545     accept_socket_ = NULL;
546     accept_address_ = NULL;
547     accept_callback_.Reset();
548   }
549
550   if (!read_callback_.is_null()) {
551     read_buf_ = NULL;
552     read_buf_len_ = 0;
553     read_callback_.Reset();
554   }
555
556   if (!write_callback_.is_null()) {
557     write_buf_ = NULL;
558     write_buf_len_ = 0;
559     write_callback_.Reset();
560   }
561
562   tcp_fastopen_connected_ = false;
563   fast_open_status_ = FAST_OPEN_STATUS_UNKNOWN;
564   waiting_connect_ = false;
565   peer_address_.reset();
566   connect_os_error_ = 0;
567 }
568
569 bool TCPSocketLibevent::UsingTCPFastOpen() const {
570   return use_tcp_fastopen_;
571 }
572
573 void TCPSocketLibevent::StartLoggingMultipleConnectAttempts(
574     const AddressList& addresses) {
575   if (!logging_multiple_connect_attempts_) {
576     logging_multiple_connect_attempts_ = true;
577     LogConnectBegin(addresses);
578   } else {
579     NOTREACHED();
580   }
581 }
582
583 void TCPSocketLibevent::EndLoggingMultipleConnectAttempts(int net_error) {
584   if (logging_multiple_connect_attempts_) {
585     LogConnectEnd(net_error);
586     logging_multiple_connect_attempts_ = false;
587   } else {
588     NOTREACHED();
589   }
590 }
591
592 int TCPSocketLibevent::AcceptInternal(scoped_ptr<TCPSocketLibevent>* socket,
593                                       IPEndPoint* address) {
594   SockaddrStorage storage;
595   int new_socket = HANDLE_EINTR(accept(socket_,
596                                        storage.addr,
597                                        &storage.addr_len));
598   if (new_socket < 0) {
599     int net_error = MapAcceptError(errno);
600     if (net_error != ERR_IO_PENDING)
601       net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
602     return net_error;
603   }
604
605   IPEndPoint ip_end_point;
606   if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) {
607     NOTREACHED();
608     if (IGNORE_EINTR(close(new_socket)) < 0)
609       PLOG(ERROR) << "close";
610     int net_error = ERR_ADDRESS_INVALID;
611     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
612     return net_error;
613   }
614   scoped_ptr<TCPSocketLibevent> tcp_socket(new TCPSocketLibevent(
615       net_log_.net_log(), net_log_.source()));
616   int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point);
617   if (adopt_result != OK) {
618     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result);
619     return adopt_result;
620   }
621   *socket = tcp_socket.Pass();
622   *address = ip_end_point;
623   net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT,
624                     CreateNetLogIPEndPointCallback(&ip_end_point));
625   return OK;
626 }
627
628 int TCPSocketLibevent::DoConnect() {
629   DCHECK_EQ(0, connect_os_error_);
630
631   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
632                       CreateNetLogIPEndPointCallback(peer_address_.get()));
633
634   // Connect the socket.
635   if (!use_tcp_fastopen_) {
636     SockaddrStorage storage;
637     if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len))
638       return ERR_ADDRESS_INVALID;
639
640     if (!HANDLE_EINTR(connect(socket_, storage.addr, storage.addr_len))) {
641       // Connected without waiting!
642       return OK;
643     }
644   } else {
645     // With TCP FastOpen, we pretend that the socket is connected.
646     DCHECK(!tcp_fastopen_connected_);
647     return OK;
648   }
649
650   // Check if the connect() failed synchronously.
651   connect_os_error_ = errno;
652   if (connect_os_error_ != EINPROGRESS)
653     return MapConnectError(connect_os_error_);
654
655   // Otherwise the connect() is going to complete asynchronously, so watch
656   // for its completion.
657   if (!base::MessageLoopForIO::current()->WatchFileDescriptor(
658           socket_, true, base::MessageLoopForIO::WATCH_WRITE,
659           &write_socket_watcher_, &write_watcher_)) {
660     connect_os_error_ = errno;
661     DVLOG(1) << "WatchFileDescriptor failed: " << connect_os_error_;
662     return MapSystemError(connect_os_error_);
663   }
664
665   return ERR_IO_PENDING;
666 }
667
668 void TCPSocketLibevent::DoConnectComplete(int result) {
669   // Log the end of this attempt (and any OS error it threw).
670   int os_error = connect_os_error_;
671   connect_os_error_ = 0;
672   if (result != OK) {
673     net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
674                       NetLog::IntegerCallback("os_error", os_error));
675   } else {
676     net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
677   }
678
679   if (!logging_multiple_connect_attempts_)
680     LogConnectEnd(result);
681 }
682
683 void TCPSocketLibevent::LogConnectBegin(const AddressList& addresses) {
684   base::StatsCounter connects("tcp.connect");
685   connects.Increment();
686
687   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
688                       addresses.CreateNetLogCallback());
689 }
690
691 void TCPSocketLibevent::LogConnectEnd(int net_error) {
692   if (net_error == OK)
693     UpdateConnectionTypeHistograms(CONNECTION_ANY);
694
695   if (net_error != OK) {
696     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
697     return;
698   }
699
700   SockaddrStorage storage;
701   int rv = getsockname(socket_, storage.addr, &storage.addr_len);
702   if (rv != 0) {
703     PLOG(ERROR) << "getsockname() [rv: " << rv << "] error: ";
704     NOTREACHED();
705     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
706     return;
707   }
708
709   net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT,
710                     CreateNetLogSourceAddressCallback(storage.addr,
711                                                       storage.addr_len));
712 }
713
714 void TCPSocketLibevent::DidCompleteRead() {
715   RecordFastOpenStatus();
716   if (read_callback_.is_null())
717     return;
718
719   int bytes_transferred;
720   bytes_transferred = HANDLE_EINTR(read(socket_, read_buf_->data(),
721                                         read_buf_len_));
722
723   int result;
724   if (bytes_transferred >= 0) {
725     result = bytes_transferred;
726     base::StatsCounter read_bytes("tcp.read_bytes");
727     read_bytes.Add(bytes_transferred);
728     net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, result,
729                                   read_buf_->data());
730   } else {
731     result = MapSystemError(errno);
732     if (result != ERR_IO_PENDING) {
733       net_log_.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR,
734                         CreateNetLogSocketErrorCallback(result, errno));
735     }
736   }
737
738   if (result != ERR_IO_PENDING) {
739     read_buf_ = NULL;
740     read_buf_len_ = 0;
741     bool ok = read_socket_watcher_.StopWatchingFileDescriptor();
742     DCHECK(ok);
743     base::ResetAndReturn(&read_callback_).Run(result);
744   }
745 }
746
747 void TCPSocketLibevent::DidCompleteWrite() {
748   if (write_callback_.is_null())
749     return;
750
751   int bytes_transferred;
752   bytes_transferred = HANDLE_EINTR(write(socket_, write_buf_->data(),
753                                          write_buf_len_));
754
755   int result;
756   if (bytes_transferred >= 0) {
757     result = bytes_transferred;
758     base::StatsCounter write_bytes("tcp.write_bytes");
759     write_bytes.Add(bytes_transferred);
760     net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, result,
761                                   write_buf_->data());
762   } else {
763     result = MapSystemError(errno);
764     if (result != ERR_IO_PENDING) {
765       net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
766                         CreateNetLogSocketErrorCallback(result, errno));
767     }
768   }
769
770   if (result != ERR_IO_PENDING) {
771     write_buf_ = NULL;
772     write_buf_len_ = 0;
773     write_socket_watcher_.StopWatchingFileDescriptor();
774     base::ResetAndReturn(&write_callback_).Run(result);
775   }
776 }
777
778 void TCPSocketLibevent::DidCompleteConnect() {
779   DCHECK(waiting_connect_);
780
781   // Get the error that connect() completed with.
782   int os_error = 0;
783   socklen_t len = sizeof(os_error);
784   if (getsockopt(socket_, SOL_SOCKET, SO_ERROR, &os_error, &len) < 0)
785     os_error = errno;
786
787   int result = MapConnectError(os_error);
788   connect_os_error_ = os_error;
789   if (result != ERR_IO_PENDING) {
790     DoConnectComplete(result);
791     waiting_connect_ = false;
792     write_socket_watcher_.StopWatchingFileDescriptor();
793     base::ResetAndReturn(&write_callback_).Run(result);
794   }
795 }
796
797 void TCPSocketLibevent::DidCompleteConnectOrWrite() {
798   if (waiting_connect_)
799     DidCompleteConnect();
800   else
801     DidCompleteWrite();
802 }
803
804 void TCPSocketLibevent::DidCompleteAccept() {
805   DCHECK(CalledOnValidThread());
806
807   int result = AcceptInternal(accept_socket_, accept_address_);
808   if (result != ERR_IO_PENDING) {
809     accept_socket_ = NULL;
810     accept_address_ = NULL;
811     bool ok = accept_socket_watcher_.StopWatchingFileDescriptor();
812     DCHECK(ok);
813     CompletionCallback callback = accept_callback_;
814     accept_callback_.Reset();
815     callback.Run(result);
816   }
817 }
818
819 int TCPSocketLibevent::InternalWrite(IOBuffer* buf, int buf_len) {
820   int nwrite;
821   if (use_tcp_fastopen_ && !tcp_fastopen_connected_) {
822     SockaddrStorage storage;
823     if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len)) {
824       // Set errno to EADDRNOTAVAIL so that MapSystemError will map it to
825       // ERR_ADDRESS_INVALID later.
826       errno = EADDRNOTAVAIL;
827       return -1;
828     }
829
830     int flags = 0x20000000; // Magic flag to enable TCP_FASTOPEN.
831 #if defined(OS_LINUX)
832     // sendto() will fail with EPIPE when the system doesn't support TCP Fast
833     // Open. Theoretically that shouldn't happen since the caller should check
834     // for system support on startup, but users may dynamically disable TCP Fast
835     // Open via sysctl.
836     flags |= MSG_NOSIGNAL;
837 #endif // defined(OS_LINUX)
838     nwrite = HANDLE_EINTR(sendto(socket_,
839                                  buf->data(),
840                                  buf_len,
841                                  flags,
842                                  storage.addr,
843                                  storage.addr_len));
844     tcp_fastopen_connected_ = true;
845
846     if (nwrite < 0) {
847       DCHECK_NE(EPIPE, errno);
848
849       // If errno == EINPROGRESS, that means the kernel didn't have a cookie
850       // and would block. The kernel is internally doing a connect() though.
851       // Remap EINPROGRESS to EAGAIN so we treat this the same as our other
852       // asynchronous cases. Note that the user buffer has not been copied to
853       // kernel space.
854       if (errno == EINPROGRESS) {
855         errno = EAGAIN;
856         fast_open_status_ = FAST_OPEN_SLOW_CONNECT_RETURN;
857       } else {
858         fast_open_status_ = FAST_OPEN_ERROR;
859       }
860     } else {
861       fast_open_status_ = FAST_OPEN_FAST_CONNECT_RETURN;
862     }
863   } else {
864     nwrite = HANDLE_EINTR(write(socket_, buf->data(), buf_len));
865   }
866   return nwrite;
867 }
868
869 void TCPSocketLibevent::RecordFastOpenStatus() {
870   if (use_tcp_fastopen_ &&
871       (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN ||
872        fast_open_status_ == FAST_OPEN_SLOW_CONNECT_RETURN)) {
873     DCHECK_NE(FAST_OPEN_STATUS_UNKNOWN, fast_open_status_);
874     bool getsockopt_success(false);
875     bool server_acked_data(false);
876 #if defined(TCP_INFO)
877     // Probe to see the if the socket used TCP Fast Open.
878     tcp_info info;
879     socklen_t info_len = sizeof(tcp_info);
880     getsockopt_success =
881         getsockopt(socket_, IPPROTO_TCP, TCP_INFO, &info, &info_len) == 0 &&
882         info_len == sizeof(tcp_info);
883     server_acked_data = getsockopt_success &&
884         (info.tcpi_options & TCPI_OPT_SYN_DATA);
885 #endif
886     if (getsockopt_success) {
887       if (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN) {
888         fast_open_status_ = (server_acked_data ? FAST_OPEN_SYN_DATA_ACK :
889                              FAST_OPEN_SYN_DATA_NACK);
890       } else {
891         fast_open_status_ = (server_acked_data ? FAST_OPEN_NO_SYN_DATA_ACK :
892                              FAST_OPEN_NO_SYN_DATA_NACK);
893       }
894     } else {
895       fast_open_status_ = (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN ?
896                            FAST_OPEN_SYN_DATA_FAILED :
897                            FAST_OPEN_NO_SYN_DATA_FAILED);
898     }
899   }
900 }
901
902 }  // namespace net