If set, disable the director function while the server is
in backup mode to avoid packet loops for DR/TUN methods.
+conn_reuse_mode - INTEGER
+ 1 - default
+
+ Controls how ipvs will deal with connections that are detected
+ port reuse. It is a bitmap, with the values being:
+
+ 0: disable any special handling on port reuse. The new
+ connection will be delivered to the same real server that was
+ servicing the previous connection. This will effectively
+ disable expire_nodest_conn.
+
+ bit 1: enable rescheduling of new connections when it is safe.
+ That is, whenever expire_nodest_conn and for TCP sockets, when
+ the connection is in TIME_WAIT state (which is only possible if
+ you use NAT mode).
+
+ bit 2: it is bit 1 plus, for TCP connections, when connections
+ are in FIN_WAIT state, as this is the last state seen by load
+ balancer in Direct Routing mode. This bit helps on adding new
+ real servers to a very busy cluster.
+
conntrack - BOOLEAN
0 - disabled (default)
not 0 - enabled
int sysctl_nat_icmp_send;
int sysctl_pmtu_disc;
int sysctl_backup_only;
+ int sysctl_conn_reuse_mode;
/* ip_vs_lblc */
int sysctl_lblc_expiration;
ipvs->sysctl_backup_only;
}
+static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_conn_reuse_mode;
+}
+
#else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
return 0;
}
+static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
+{
+ return 1;
+}
+
#endif
/* IPVS core functions
}
}
+static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
+ int conn_reuse_mode)
+{
+ /* Controlled (FTP DATA or persistence)? */
+ if (cp->control)
+ return false;
+
+ switch (cp->protocol) {
+ case IPPROTO_TCP:
+ return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
+ ((conn_reuse_mode & 2) &&
+ (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
+ (cp->flags & IP_VS_CONN_F_NOOUTPUT));
+ case IPPROTO_SCTP:
+ return cp->state == IP_VS_SCTP_S_CLOSED;
+ default:
+ return false;
+ }
+}
+
/* Handle response packets: rewrite addresses and send away...
*/
static unsigned int
struct ip_vs_conn *cp;
int ret, pkts;
struct netns_ipvs *ipvs;
+ int conn_reuse_mode;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
*/
cp = pp->conn_in_get(af, skb, &iph, 0);
- if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest &&
- unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs &&
- is_new_conn(skb, &iph)) {
- ip_vs_conn_expire_now(cp);
+ conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
+ if (conn_reuse_mode && !iph.fragoffs &&
+ is_new_conn(skb, &iph) && cp &&
+ ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+ unlikely(!atomic_read(&cp->dest->weight))) ||
+ unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
+ if (!atomic_read(&cp->n_control))
+ ip_vs_conn_expire_now(cp);
__ip_vs_conn_put(cp);
cp = NULL;
}
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "conn_reuse_mode",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
ipvs->sysctl_pmtu_disc = 1;
tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
tbl[idx++].data = &ipvs->sysctl_backup_only;
+ ipvs->sysctl_conn_reuse_mode = 1;
+ tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
struct ip_vs_conn *cp;
struct netns_ipvs *ipvs = net_ipvs(net);
- if (!(flags & IP_VS_CONN_F_TEMPLATE))
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
cp = ip_vs_conn_in_get(param);
- else
+ if (cp && ((cp->dport != dport) ||
+ !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
+ if (!(flags & IP_VS_CONN_F_INACTIVE)) {
+ ip_vs_conn_expire_now(cp);
+ __ip_vs_conn_put(cp);
+ cp = NULL;
+ } else {
+ /* This is the expiration message for the
+ * connection that was already replaced, so we
+ * just ignore it.
+ */
+ __ip_vs_conn_put(cp);
+ kfree(param->pe_data);
+ return;
+ }
+ }
+ } else {
cp = ip_vs_ct_in_get(param);
+ }
if (cp) {
/* Free pe_data */