pxtcp.c revision f9b7c399f212857656a229cce0d8335ab16798ec
/* -*- indent-tabs-mode: nil; -*- */
#define LOG_GROUP LOG_GROUP_NAT_SERVICE
#include "winutils.h"
#include "pxtcp.h"
#include "proxy.h"
#include "proxy_pollmgr.h"
#include "pxremap.h"
#include "portfwd.h" /* fwspec */
#ifndef RT_OS_WINDOWS
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#ifdef RT_OS_SOLARIS
#include <sys/filio.h> /* FIONREAD is BSD'ism */
#endif
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <poll.h>
#include <err.h> /* BSD'ism */
#else
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <iprt/stdint.h>
#include "winpoll.h"
#endif
#include "lwip/opt.h"
#include "lwip/sys.h"
#include "lwip/tcpip.h"
#include "lwip/netif.h"
#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
#include "lwip/icmp.h"
#include "lwip/icmp6.h"
/* NetBSD doesn't report POLLHUP for TCP sockets */
#ifdef __NetBSD__
# define HAVE_TCP_POLLHUP 0
#else
# define HAVE_TCP_POLLHUP 1
#endif
/**
* Ring buffer for inbound data. Filled with data from the host
* socket on poll manager thread. Data consumed by scheduling
* tcp_write() to the pcb on the lwip thread.
*
* NB: There is actually third party present, the lwip stack itself.
* Thus the buffer doesn't have dual free vs. data split, but rather
* three-way free / send and unACKed data / unsent data split.
*/
struct ringbuf {
char *buf;
size_t bufsize;
/*
* Start of free space, producer writes here (up till "unacked").
*/
volatile size_t vacant;
/*
* Start of sent but unacknowledged data. The data are "owned" by
* the stack as it may need to retransmit. This is the free space
* limit for producer.
*/
volatile size_t unacked;
/*
* Start of unsent data, consumer reads/sends from here (up till
* "vacant"). Not declared volatile since it's only accessed from
* the consumer thread.
*/
size_t unsent;
};
/**
*/
struct pxtcp {
/**
* Our poll manager handler. Must be first, strong/weak
* references depend on this "inheritance".
*/
struct pollmgr_handler pmhdl;
/**
* lwIP (internal/guest) side of the proxied connection.
*/
struct tcp_pcb *pcb;
/**
* Host (external) side of the proxied connection.
*/
SOCKET sock;
/**
* Socket events we are currently polling for.
*/
int events;
/**
* Socket error. Currently used to save connect(2) errors so that
* we can decide if we need to send ICMP error.
*/
int sockerr;
/**
* Interface that we have got the SYN from. Needed to send ICMP
* with correct source address.
*/
struct netif *netif;
/**
* For tentatively accepted connections for which we are in
* process of connecting to the real destination this is the
* initial pbuf that we might need to build ICMP error.
*
* When connection is established this is used to hold outbound
* pbuf chain received by pxtcp_pcb_recv() but not yet completely
* forwarded over the socket. We cannot "return" it to lwIP since
* the head of the chain is already sent and freed.
*/
struct pbuf *unsent;
/**
* Guest has closed its side. Reported to pxtcp_pcb_recv() only
* once and we might not be able to forward it immediately if we
* have unsent pbuf.
*/
int outbound_close;
/**
* Outbound half-close has been done on the socket.
*/
int outbound_close_done;
/**
* External has closed its side. We might not be able to forward
* it immediately if we have unforwarded data.
*/
int inbound_close;
/**
* Inbound half-close has been done on the pcb.
*/
int inbound_close_done;
/**
* On systems that report POLLHUP as soon as the final FIN is
* received on a socket we cannot continue polling for the rest of
* input, so we have to read (pull) last data from the socket on
* the lwIP thread instead of polling/pushing it from the poll
* manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
*/
int inbound_pull;
/**
* When poll manager schedules delete we may not be able to delete
* a pxtcp immediately if not all inbound data has been acked by
* the guest: lwIP may need to resend and the data are in pxtcp's
* inbuf::buf. We defer delete until all data are acked to
* pxtcp_pcb_sent().
*
* It's also implied by inbound_pull. It probably means that
* "deferred" is not a very fortunate name.
*/
int deferred_delete;
/**
* Ring-buffer for inbound data.
*/
struct ringbuf inbuf;
/**
* lwIP thread's strong reference to us.
*/
struct pollmgr_refptr *rp;
/*
* We use static messages to call functions on the lwIP thread to
* void malloc/free overhead.
*/
struct tcpip_msg msg_delete; /* delete pxtcp */
struct tcpip_msg msg_reset; /* reset connection and delete pxtcp */
struct tcpip_msg msg_accept; /* confirm accept of proxied connection */
struct tcpip_msg msg_outbound; /* trigger send of outbound data */
struct tcpip_msg msg_inbound; /* trigger send of inbound data */
struct tcpip_msg msg_inpull; /* trigger pull of last inbound data */
};
static struct pxtcp *pxtcp_allocate(void);
static void pxtcp_free(struct pxtcp *);
static void pxtcp_pcb_associate(struct pxtcp *, struct tcp_pcb *);
static void pxtcp_pcb_dissociate(struct pxtcp *);
/* poll manager callbacks for pxtcp related channels */
static int pxtcp_pmgr_chan_add(struct pollmgr_handler *, SOCKET, int);
static int pxtcp_pmgr_chan_pollout(struct pollmgr_handler *, SOCKET, int);
static int pxtcp_pmgr_chan_pollin(struct pollmgr_handler *, SOCKET, int);
#if !HAVE_TCP_POLLHUP
static int pxtcp_pmgr_chan_del(struct pollmgr_handler *, SOCKET, int);
#endif
static int pxtcp_pmgr_chan_reset(struct pollmgr_handler *, SOCKET, int);
/* helper functions for sending/receiving pxtcp over poll manager channels */
static ssize_t pxtcp_chan_send(enum pollmgr_slot_t, struct pxtcp *);
static ssize_t pxtcp_chan_send_weak(enum pollmgr_slot_t, struct pxtcp *);
static struct pxtcp *pxtcp_chan_recv(struct pollmgr_handler *, SOCKET, int);
static struct pxtcp *pxtcp_chan_recv_strong(struct pollmgr_handler *, SOCKET, int);
/* poll manager callbacks for individual sockets */
static int pxtcp_pmgr_connect(struct pollmgr_handler *, SOCKET, int);
static int pxtcp_pmgr_pump(struct pollmgr_handler *, SOCKET, int);
/* get incoming traffic into ring buffer */
static ssize_t pxtcp_sock_read(struct pxtcp *, int *);
static ssize_t pxtcp_sock_recv(struct pxtcp *, IOVEC *, size_t); /* default */
/* convenience functions for poll manager callbacks */
static int pxtcp_schedule_delete(struct pxtcp *);
static int pxtcp_schedule_reset(struct pxtcp *);
static int pxtcp_schedule_reject(struct pxtcp *);
/* lwip thread callbacks called via proxy_lwip_post() */
static void pxtcp_pcb_delete_pxtcp(void *);
static void pxtcp_pcb_reset_pxtcp(void *);
static void pxtcp_pcb_accept_refuse(void *);
static void pxtcp_pcb_accept_confirm(void *);
static void pxtcp_pcb_write_outbound(void *);
static void pxtcp_pcb_write_inbound(void *);
static void pxtcp_pcb_pull_inbound(void *);
/* tcp pcb callbacks */
static err_t pxtcp_pcb_heard(void *, struct tcp_pcb *, err_t); /* global */
static err_t pxtcp_pcb_accept(void *, struct tcp_pcb *, err_t);
static err_t pxtcp_pcb_connected(void *, struct tcp_pcb *, err_t);
static err_t pxtcp_pcb_recv(void *, struct tcp_pcb *, struct pbuf *, err_t);
static err_t pxtcp_pcb_sent(void *, struct tcp_pcb *, u16_t);
static err_t pxtcp_pcb_poll(void *, struct tcp_pcb *);
static void pxtcp_pcb_err(void *, err_t);
static err_t pxtcp_pcb_forward_outbound(struct pxtcp *, struct pbuf *);
static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
static ssize_t pxtcp_sock_send(struct pxtcp *, IOVEC *, size_t);
static void pxtcp_pcb_forward_inbound(struct pxtcp *);
static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
DECLINLINE(int) pxtcp_pcb_forward_inbound_done(const struct pxtcp *);
static void pxtcp_pcb_schedule_poll(struct pxtcp *);
static void pxtcp_pcb_cancel_poll(struct pxtcp *);
static void pxtcp_pcb_reject(struct netif *, struct tcp_pcb *, struct pbuf *, int);
DECLINLINE(void) pxtcp_pcb_maybe_deferred_delete(struct pxtcp *);
/* poll manager handlers for pxtcp channels */
static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
#if !HAVE_TCP_POLLHUP
static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
#endif
static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
/**
* Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
* manager threads haven't been created yet.
*/
void
pxtcp_init(void)
{
/*
* Create channels.
*/
#define CHANNEL(SLOT, NAME) do { \
NAME##_hdl.callback = NAME; \
NAME##_hdl.data = NULL; \
NAME##_hdl.slot = -1; \
pollmgr_add_chan(SLOT, &NAME##_hdl); \
} while (0)
CHANNEL(POLLMGR_CHAN_PXTCP_ADD, pxtcp_pmgr_chan_add);
CHANNEL(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp_pmgr_chan_pollin);
CHANNEL(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp_pmgr_chan_pollout);
#if !HAVE_TCP_POLLHUP
CHANNEL(POLLMGR_CHAN_PXTCP_DEL, pxtcp_pmgr_chan_del);
#endif
CHANNEL(POLLMGR_CHAN_PXTCP_RESET, pxtcp_pmgr_chan_reset);
#undef CHANNEL
/*
* Listen to outgoing connection from guest(s).
*/
tcp_proxy_accept(pxtcp_pcb_heard);
}
/**
* Syntactic sugar for sending pxtcp pointer over poll manager
* channel. Used by lwip thread functions.
*/
static ssize_t
pxtcp_chan_send(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
{
return pollmgr_chan_send(slot, &pxtcp, sizeof(pxtcp));
}
/**
* Syntactic sugar for sending weak reference to pxtcp over poll
* manager channel. Used by lwip thread functions.
*/
static ssize_t
pxtcp_chan_send_weak(enum pollmgr_slot_t slot, struct pxtcp *pxtcp)
{
pollmgr_refptr_weak_ref(pxtcp->rp);
return pollmgr_chan_send(slot, &pxtcp->rp, sizeof(pxtcp->rp));
}
/**
* Counterpart of pxtcp_chan_send().
*/
static struct pxtcp *
pxtcp_chan_recv(struct pollmgr_handler *handler, SOCKET fd, int revents)
{
struct pxtcp *pxtcp;
pxtcp = (struct pxtcp *)pollmgr_chan_recv_ptr(handler, fd, revents);
return pxtcp;
}
/**
* Counterpart of pxtcp_chan_send_weak().
*/
static struct pxtcp *
pxtcp_chan_recv_strong(struct pollmgr_handler *handler, SOCKET fd, int revents)
{
struct pollmgr_refptr *rp;
struct pollmgr_handler *base;
struct pxtcp *pxtcp;
rp = (struct pollmgr_refptr *)pollmgr_chan_recv_ptr(handler, fd, revents);
base = (struct pollmgr_handler *)pollmgr_refptr_get(rp);
pxtcp = (struct pxtcp *)base;
return pxtcp;
}
/**
* Register pxtcp with poll manager.
*
* Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
* error handling is different in these two cases, we leave it up to
* the caller.
*/
int
pxtcp_pmgr_add(struct pxtcp *pxtcp)
{
int status;
LWIP_ASSERT1(pxtcp != NULL);
LWIP_ASSERT1(pxtcp->sock >= 0);
LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
status = pollmgr_add(&pxtcp->pmhdl, pxtcp->sock, pxtcp->events);
return status;
}
/**
* Unregister pxtcp with poll manager.
*
* Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
* leg).
*/
void
pxtcp_pmgr_del(struct pxtcp *pxtcp)
{
LWIP_ASSERT1(pxtcp != NULL);
pollmgr_del_slot(pxtcp->pmhdl.slot);
}
/**
* POLLMGR_CHAN_PXTCP_ADD handler.
*
* Get new pxtcp from lwip thread and start polling its socket.
*/
static int
pxtcp_pmgr_chan_add(struct pollmgr_handler *handler, SOCKET fd, int revents)
{
struct pxtcp *pxtcp;
int status;
pxtcp = pxtcp_chan_recv(handler, fd, revents);
DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
(void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
status = pxtcp_pmgr_add(pxtcp);
if (status < 0) {
(void) pxtcp_schedule_reset(pxtcp);
}
return POLLIN;
}
/**
* POLLMGR_CHAN_PXTCP_POLLOUT handler.
*
* pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
* and failed, it now requests us to poll the socket for POLLOUT and
* schedule pxtcp_pcb_forward_outbound() when sock is writable again.
*/
static int
pxtcp_pmgr_chan_pollout(struct pollmgr_handler *handler, SOCKET fd, int revents)
{
struct pxtcp *pxtcp;
pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
DPRINTF0(("pxtcp_pollout: pxtcp %p\n", (void *)pxtcp));
if (pxtcp == NULL) {
return POLLIN;
}
LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
pxtcp->events |= POLLOUT;
pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
return POLLIN;
}
/**
* POLLMGR_CHAN_PXTCP_POLLIN handler.
*/
static int
pxtcp_pmgr_chan_pollin(struct pollmgr_handler *handler, SOCKET fd, int revents)
{
struct pxtcp *pxtcp;
pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
DPRINTF2(("pxtcp_pollin: pxtcp %p\n", (void *)pxtcp));
if (pxtcp == NULL) {
return POLLIN;
}
LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
LWIP_ASSERT1(pxtcp->pmhdl.slot > 0);
if (pxtcp->inbound_close) {
return POLLIN;
}
pxtcp->events |= POLLIN;
pollmgr_update_events(pxtcp->pmhdl.slot, pxtcp->events);
return POLLIN;
}
#if !HAVE_TCP_POLLHUP
/**
* POLLMGR_CHAN_PXTCP_DEL handler.
*
* Schedule pxtcp deletion. We only need this if host system doesn't
* report POLLHUP for fully closed tcp sockets.
*/
static int
pxtcp_pmgr_chan_del(struct pollmgr_handler *handler, SOCKET fd, int revents)
{
struct pxtcp *pxtcp;
pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
if (pxtcp == NULL) {
return POLLIN;
}
DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
(void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
LWIP_ASSERT1(pxtcp->inbound_close); /* EOF read */
LWIP_ASSERT1(pxtcp->outbound_close_done); /* EOF sent */
pxtcp_pmgr_del(pxtcp);
(void) pxtcp_schedule_delete(pxtcp);
return POLLIN;
}
#endif /* !HAVE_TCP_POLLHUP */
/**
* POLLMGR_CHAN_PXTCP_RESET handler.
*
* Close the socket with RST and delete pxtcp.
*/
static int
pxtcp_pmgr_chan_reset(struct pollmgr_handler *handler, SOCKET fd, int revents)
{
struct pxtcp *pxtcp;
pxtcp = pxtcp_chan_recv_strong(handler, fd, revents);
if (pxtcp == NULL) {
return POLLIN;
}
DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
(void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
LWIP_ASSERT1(pxtcp->pmhdl.callback != NULL);
LWIP_ASSERT1(pxtcp->pmhdl.data == (void *)pxtcp);
pxtcp_pmgr_del(pxtcp);
proxy_reset_socket(pxtcp->sock);
pxtcp->sock = INVALID_SOCKET;
(void) pxtcp_schedule_reset(pxtcp);
return POLLIN;
}
static struct pxtcp *
pxtcp_allocate(void)
{
struct pxtcp *pxtcp;
pxtcp = (struct pxtcp *)malloc(sizeof(*pxtcp));
if (pxtcp == NULL) {
return NULL;
}
pxtcp->pmhdl.callback = NULL;
pxtcp->pmhdl.data = (void *)pxtcp;
pxtcp->pmhdl.slot = -1;
pxtcp->pcb = NULL;
pxtcp->sock = INVALID_SOCKET;
pxtcp->events = 0;
pxtcp->sockerr = 0;
pxtcp->netif = NULL;
pxtcp->unsent = NULL;
pxtcp->outbound_close = 0;
pxtcp->outbound_close_done = 0;
pxtcp->inbound_close = 0;
pxtcp->inbound_close_done = 0;
pxtcp->inbound_pull = 0;
pxtcp->deferred_delete = 0;
pxtcp->inbuf.bufsize = 64 * 1024;
pxtcp->inbuf.buf = (char *)malloc(pxtcp->inbuf.bufsize);
if (pxtcp->inbuf.buf == NULL) {
free(pxtcp);
return NULL;
}
pxtcp->inbuf.vacant = 0;
pxtcp->inbuf.unacked = 0;
pxtcp->inbuf.unsent = 0;
pxtcp->rp = pollmgr_refptr_create(&pxtcp->pmhdl);
if (pxtcp->rp == NULL) {
free(pxtcp->inbuf.buf);
free(pxtcp);
return NULL;
}
#define CALLBACK_MSG(MSG, FUNC) \
do { \
pxtcp->MSG.type = TCPIP_MSG_CALLBACK_STATIC; \
pxtcp->MSG.sem = NULL; \
pxtcp->MSG.msg.cb.function = FUNC; \
pxtcp->MSG.msg.cb.ctx = (void *)pxtcp; \
} while (0)
CALLBACK_MSG(msg_delete, pxtcp_pcb_delete_pxtcp);
CALLBACK_MSG(msg_reset, pxtcp_pcb_reset_pxtcp);
CALLBACK_MSG(msg_accept, pxtcp_pcb_accept_confirm);
CALLBACK_MSG(msg_outbound, pxtcp_pcb_write_outbound);
CALLBACK_MSG(msg_inbound, pxtcp_pcb_write_inbound);
CALLBACK_MSG(msg_inpull, pxtcp_pcb_pull_inbound);
#undef CALLBACK_MSG
return pxtcp;
}
/**
* Exported to fwtcp to create pxtcp for incoming port-forwarded
* connections. Completed with pcb in pxtcp_pcb_connect().
*/
struct pxtcp *
pxtcp_create_forwarded(SOCKET sock)
{
struct pxtcp *pxtcp;
pxtcp = pxtcp_allocate();
if (pxtcp == NULL) {
return NULL;
}
pxtcp->sock = sock;
pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
pxtcp->events = 0;
return pxtcp;
}
static void
pxtcp_pcb_associate(struct pxtcp *pxtcp, struct tcp_pcb *pcb)
{
LWIP_ASSERT1(pxtcp != NULL);
LWIP_ASSERT1(pcb != NULL);
pxtcp->pcb = pcb;
tcp_arg(pcb, pxtcp);
tcp_recv(pcb, pxtcp_pcb_recv);
tcp_sent(pcb, pxtcp_pcb_sent);
tcp_poll(pcb, NULL, 255);
tcp_err(pcb, pxtcp_pcb_err);
}
static void
pxtcp_free(struct pxtcp *pxtcp)
{
if (pxtcp->unsent != NULL) {
pbuf_free(pxtcp->unsent);
}
if (pxtcp->inbuf.buf != NULL) {
free(pxtcp->inbuf.buf);
}
free(pxtcp);
}
/**
* Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
* fwtcp failed to register with poll manager to post to lwip thread
* for doing connect.
*/
void
pxtcp_cancel_forwarded(struct pxtcp *pxtcp)
{
LWIP_ASSERT1(pxtcp->pcb == NULL);
pxtcp_pcb_reset_pxtcp(pxtcp);
}
static void
pxtcp_pcb_dissociate(struct pxtcp *pxtcp)
{
if (pxtcp == NULL || pxtcp->pcb == NULL) {
return;
}
DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
__func__, (void *)pxtcp, (void *)pxtcp->pcb));
/*
* We must have dissociated from a fully closed pcb immediately
* since lwip recycles them and we don't wan't to mess with what
* would be someone else's pcb that we happen to have a stale
* pointer to.
*/
LWIP_ASSERT1(pxtcp->pcb->callback_arg == pxtcp);
tcp_recv(pxtcp->pcb, NULL);
tcp_sent(pxtcp->pcb, NULL);
tcp_poll(pxtcp->pcb, NULL, 255);
tcp_err(pxtcp->pcb, NULL);
tcp_arg(pxtcp->pcb, NULL);
pxtcp->pcb = NULL;
}
/**
* Lwip thread callback invoked via pxtcp::msg_delete
*
* Since we use static messages to communicate to the lwip thread, we
* cannot delete pxtcp without making sure there are no unprocessed
* messages in the lwip thread mailbox.
*
* The easiest way to ensure that is to send this "delete" message as
* the last one and when it's processed we know there are no more and
* it's safe to delete pxtcp.
*
* Poll manager handlers should use pxtcp_schedule_delete()
* convenience function.
*/
static void
pxtcp_pcb_delete_pxtcp(void *ctx)
{
struct pxtcp *pxtcp = (struct pxtcp *)ctx;
DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
__func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock,
(pxtcp->deferred_delete && !pxtcp->inbound_pull
? " (was deferred)" : "")));
LWIP_ASSERT1(pxtcp != NULL);
LWIP_ASSERT1(pxtcp->pmhdl.slot < 0);
LWIP_ASSERT1(pxtcp->outbound_close_done);
LWIP_ASSERT1(pxtcp->inbound_close); /* not necessarily done */
/*
* pxtcp is no longer registered with poll manager, so it's safe
* to close the socket.
*/
if (pxtcp->sock != INVALID_SOCKET) {
closesocket(pxtcp->sock);
pxtcp->sock = INVALID_SOCKET;
}
/*
* We might have already dissociated from a fully closed pcb, or
* guest might have sent us a reset while msg_delete was in
* transit. If there's no pcb, we are done.
*/
if (pxtcp->pcb == NULL) {
pollmgr_refptr_unref(pxtcp->rp);
pxtcp_free(pxtcp);
return;
}
/*
* Have we completely forwarded all inbound traffic to the guest?
*
* We may still be waiting for ACKs. We may have failed to send
* some of the data (tcp_write() failed with ERR_MEM). We may
* have failed to send the FIN (tcp_shutdown() failed with
* ERR_MEM).
*/
if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
pxtcp_pcb_dissociate(pxtcp);
pollmgr_refptr_unref(pxtcp->rp);
pxtcp_free(pxtcp);
}
else {
DPRINTF2(("delete: pxtcp %p; pcb %p:"
" unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
(void *)pxtcp, (void *)pxtcp->pcb,
(int)pxtcp->inbuf.unacked,
(int)pxtcp->inbuf.unsent,
(int)pxtcp->inbuf.vacant,
pxtcp->inbound_close_done ? "FIN sent" : "FIN is NOT sent"));
LWIP_ASSERT1(!pxtcp->deferred_delete);
pxtcp->deferred_delete = 1;
}
}
/**
* If we couldn't delete pxtcp right away in the msg_delete callback
* from the poll manager thread, we repeat the check at the end of
* relevant pcb callbacks.
*/
DECLINLINE(void)
pxtcp_pcb_maybe_deferred_delete(struct pxtcp *pxtcp)
{
if (pxtcp->deferred_delete && pxtcp_pcb_forward_inbound_done(pxtcp)) {
pxtcp_pcb_delete_pxtcp(pxtcp);
}
}
/**
* Poll manager callbacks should use this convenience wrapper to
* schedule pxtcp deletion on the lwip thread and to deregister from
* the poll manager.
*/
static int
pxtcp_schedule_delete(struct pxtcp *pxtcp)
{
/*
* If pollmgr_refptr_get() is called by any channel before
* scheduled deletion happens, let them know we are gone.
*/
pxtcp->pmhdl.slot = -1;
/*
* Schedule deletion. Since poll manager thread may be pre-empted
* right after we send the message, the deletion may actually
* happen on the lwip thread before we return from this function,
* so it's not safe to refer to pxtcp after this call.
*/
proxy_lwip_post(&pxtcp->msg_delete);
/* tell poll manager to deregister us */
return -1;
}
/**
* Lwip thread callback invoked via pxtcp::msg_reset
*
* Like pxtcp_pcb_delete(), but sends RST to the guest before
* deleting this pxtcp.
*/
static void
pxtcp_pcb_reset_pxtcp(void *ctx)
{
struct pxtcp *pxtcp = (struct pxtcp *)ctx;
LWIP_ASSERT1(pxtcp != NULL);
DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
__func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
if (pxtcp->sock != INVALID_SOCKET) {
proxy_reset_socket(pxtcp->sock);
pxtcp->sock = INVALID_SOCKET;
}
if (pxtcp->pcb != NULL) {
struct tcp_pcb *pcb = pxtcp->pcb;
pxtcp_pcb_dissociate(pxtcp);
tcp_abort(pcb);
}
pollmgr_refptr_unref(pxtcp->rp);
pxtcp_free(pxtcp);
}
/**
* Poll manager callbacks should use this convenience wrapper to
* schedule pxtcp reset and deletion on the lwip thread and to
* deregister from the poll manager.
*
* See pxtcp_schedule_delete() for additional comments.
*/
static int
pxtcp_schedule_reset(struct pxtcp *pxtcp)
{
pxtcp->pmhdl.slot = -1;
proxy_lwip_post(&pxtcp->msg_reset);
return -1;
}
/**
* Reject proxy connection attempt. Depending on the cause (sockerr)
* we may just drop the pcb silently, generate an ICMP datagram or
* send TCP reset.
*/
static void
pxtcp_pcb_reject(struct netif *netif, struct tcp_pcb *pcb,
struct pbuf *p, int sockerr)
{
struct netif *oif;
int reset = 0;
oif = ip_current_netif();
ip_current_netif() = netif;
if (sockerr == ECONNREFUSED) {
reset = 1;
}
else if (PCB_ISIPV6(pcb)) {
if (sockerr == EHOSTDOWN) {
icmp6_dest_unreach(p, ICMP6_DUR_ADDRESS); /* XXX: ??? */
}
else if (sockerr == EHOSTUNREACH
|| sockerr == ENETDOWN
|| sockerr == ENETUNREACH)
{
icmp6_dest_unreach(p, ICMP6_DUR_NO_ROUTE);
}
}
else {
if (sockerr == EHOSTDOWN
|| sockerr == EHOSTUNREACH
|| sockerr == ENETDOWN
|| sockerr == ENETUNREACH)
{
icmp_dest_unreach(p, ICMP_DUR_HOST);
}
}
ip_current_netif() = oif;
tcp_abandon(pcb, reset);
}
/**
* Called from poll manager thread via pxtcp::msg_accept when proxy
* failed to connect to the destination. Also called when we failed
* to register pxtcp with poll manager.
*
* This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
* how this unestablished connection is terminated.
*/
static void
pxtcp_pcb_accept_refuse(void *ctx)
{
struct pxtcp *pxtcp = (struct pxtcp *)ctx;
DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: %R[sockerr]\n",
__func__, (void *)pxtcp, (void *)pxtcp->pcb,
pxtcp->sock, pxtcp->sockerr));
LWIP_ASSERT1(pxtcp != NULL);
LWIP_ASSERT1(pxtcp->sock == INVALID_SOCKET);
if (pxtcp->pcb != NULL) {
struct tcp_pcb *pcb = pxtcp->pcb;
pxtcp_pcb_dissociate(pxtcp);
pxtcp_pcb_reject(pxtcp->netif, pcb, pxtcp->unsent, pxtcp->sockerr);
}
pollmgr_refptr_unref(pxtcp->rp);
pxtcp_free(pxtcp);
}
/**
* Convenience wrapper for poll manager connect callback to reject
* connection attempt.
*
* Like pxtcp_schedule_reset(), but the callback is more discriminate
* in how this unestablished connection is terminated.
*/
static int
pxtcp_schedule_reject(struct pxtcp *pxtcp)
{
pxtcp->msg_accept.msg.cb.function = pxtcp_pcb_accept_refuse;
pxtcp->pmhdl.slot = -1;
proxy_lwip_post(&pxtcp->msg_accept);
return -1;
}
/**
* Global tcp_proxy_accept() callback for proxied outgoing TCP
* connections from guest(s).
*/
static err_t
pxtcp_pcb_heard(void *arg, struct tcp_pcb *newpcb, err_t error)
{
struct pbuf *p = (struct pbuf *)arg;
struct pxtcp *pxtcp;
ipX_addr_t dst_addr;
int sdom;
SOCKET sock;
ssize_t nsent;
int sockerr = 0;
LWIP_UNUSED_ARG(error); /* always ERR_OK */
/*
* TCP first calls accept callback when it receives the first SYN
* and "tentatively accepts" new proxied connection attempt. When
* proxy "confirms" the SYN and sends SYN|ACK and the guest
* replies with ACK the accept callback is called again, this time
* with the established connection.
*/
LWIP_ASSERT1(newpcb->state == SYN_RCVD_0);
tcp_accept(newpcb, pxtcp_pcb_accept);
tcp_arg(newpcb, NULL);
tcp_setprio(newpcb, TCP_PRIO_MAX);
pxremap_outbound_ipX(PCB_ISIPV6(newpcb), &dst_addr, &newpcb->local_ip);
sdom = PCB_ISIPV6(newpcb) ? PF_INET6 : PF_INET;
sock = proxy_connected_socket(sdom, SOCK_STREAM,
&dst_addr, newpcb->local_port);
if (sock == INVALID_SOCKET) {
sockerr = SOCKERRNO();
goto abort;
}
pxtcp = pxtcp_allocate();
if (pxtcp == NULL) {
proxy_reset_socket(sock);
goto abort;
}
/* save initial datagram in case we need to reply with ICMP */
pbuf_ref(p);
pxtcp->unsent = p;
pxtcp->netif = ip_current_netif();
pxtcp_pcb_associate(pxtcp, newpcb);
pxtcp->sock = sock;
pxtcp->pmhdl.callback = pxtcp_pmgr_connect;
pxtcp->events = POLLOUT;
nsent = pxtcp_chan_send(POLLMGR_CHAN_PXTCP_ADD, pxtcp);
if (nsent < 0) {
pxtcp->sock = INVALID_SOCKET;
proxy_reset_socket(sock);
pxtcp_pcb_accept_refuse(pxtcp);
return ERR_ABRT;
}
return ERR_OK;
abort:
DPRINTF0(("%s: pcb %p, sock %d: %R[sockerr]\n",
__func__, (void *)newpcb, sock, sockerr));
pxtcp_pcb_reject(ip_current_netif(), newpcb, p, sockerr);
return ERR_ABRT;
}
/**
* tcp_proxy_accept() callback for accepted proxied outgoing TCP
* connections from guest(s). This is "real" accept with three-way
* handshake completed.
*/
static err_t
pxtcp_pcb_accept(void *arg, struct tcp_pcb *pcb, err_t error)
{
struct pxtcp *pxtcp = (struct pxtcp *)arg;
LWIP_UNUSED_ARG(pcb); /* used only in asserts */
LWIP_UNUSED_ARG(error); /* always ERR_OK */
LWIP_ASSERT1(pxtcp != NULL);
LWIP_ASSERT1(pxtcp->pcb = pcb);
LWIP_ASSERT1(pcb->callback_arg == pxtcp);
/* send any inbound data that are already queued */
pxtcp_pcb_forward_inbound(pxtcp);
return ERR_OK;
}
/**
* Initial poll manager callback for proxied outgoing TCP connections.
* pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
*
* Waits for connect(2) to the destination to complete. On success
* replaces itself with pxtcp_pmgr_pump() callback common to all
* established TCP connections.
*/
static int
pxtcp_pmgr_connect(struct pollmgr_handler *handler, SOCKET fd, int revents)
{
struct pxtcp *pxtcp;
pxtcp = (struct pxtcp *)handler->data;
LWIP_ASSERT1(handler == &pxtcp->pmhdl);
LWIP_ASSERT1(fd == pxtcp->sock);
if (revents & (POLLNVAL | POLLHUP | POLLERR)) {
if (revents & POLLNVAL) {
pxtcp->sock = INVALID_SOCKET;
pxtcp->sockerr = ETIMEDOUT;
}
else {
socklen_t optlen = (socklen_t)sizeof(pxtcp->sockerr);
int status;
SOCKET s;
status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
(char *)&pxtcp->sockerr, &optlen);
if (status < 0) { /* should not happen */
DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n",
__func__, fd, SOCKERRNO()));
}
else {
DPRINTF(("%s: sock %d: connect: %R[sockerr]\n",
__func__, fd, pxtcp->sockerr));
}
s = pxtcp->sock;
pxtcp->sock = INVALID_SOCKET;
closesocket(s);
}
return pxtcp_schedule_reject(pxtcp);
}
if (revents & POLLOUT) { /* connect is successful */
/* confirm accept to the guest */
proxy_lwip_post(&pxtcp->msg_accept);
/*
* Switch to common callback used for all established proxied
* connections.
*/
pxtcp->pmhdl.callback = pxtcp_pmgr_pump;
/*
* Initially we poll for incoming traffic only. Outgoing
* traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
* it will ask us to poll for POLLOUT too.
*/
pxtcp->events = POLLIN;
return pxtcp->events;
}
/* should never get here */
DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
__func__, (void *)pxtcp, fd, revents));
return pxtcp_schedule_reset(pxtcp);
}
/**
* Called from poll manager thread via pxtcp::msg_accept when proxy
* connected to the destination. Finalize accept by sending SYN|ACK
* to the guest.
*/
static void
pxtcp_pcb_accept_confirm(void *ctx)
{
struct pxtcp *pxtcp = (struct pxtcp *)ctx;
err_t error;
LWIP_ASSERT1(pxtcp != NULL);
if (pxtcp->pcb == NULL) {
return;
}
/* we are not going to reply with ICMP, so we can drop initial pbuf */
LWIP_ASSERT1(pxtcp->unsent != NULL);
pbuf_free(pxtcp->unsent);
pxtcp->unsent = NULL;
error = tcp_proxy_accept_confirm(pxtcp->pcb);
/*
* If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
* abandons the pcb. Retrying that is not very easy, since it
* would require keeping "fractional state". From guest's point
* of view there is no reply to its SYN so it will either resend
* the SYN (effetively triggering full connection retry for us),
* or it will eventually time out.
*/
if (error == ERR_ABRT) {
pxtcp->pcb = NULL; /* pcb is gone */
pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
}
/*
* else if (error != ERR_OK): even if tcp_output() failed with
* ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
* retransmitted eventually.
*/
}
/**
* Entry point for port-forwarding.
*
* fwtcp accepts new incoming connection, creates pxtcp for the socket
* (with no pcb yet) and adds it to the poll manager (polling for
* errors only). Then it calls this function to construct the pcb and
* perform connection to the guest.
*/
void
pxtcp_pcb_connect(struct pxtcp *pxtcp, const struct fwspec *fwspec)
{
struct sockaddr_storage ss;
socklen_t sslen;
struct tcp_pcb *pcb;
ipX_addr_t src_addr, dst_addr;
u16_t src_port, dst_port;
int status;
err_t error;
LWIP_ASSERT1(pxtcp != NULL);
LWIP_ASSERT1(pxtcp->pcb == NULL);
LWIP_ASSERT1(fwspec->stype == SOCK_STREAM);
pcb = tcp_new();
if (pcb == NULL) {
goto reset;
}
tcp_setprio(pcb, TCP_PRIO_MAX);
pxtcp_pcb_associate(pxtcp, pcb);
sslen = sizeof(ss);
status = getpeername(pxtcp->sock, (struct sockaddr *)&ss, &sslen);
if (status == SOCKET_ERROR) {
goto reset;
}
/* nit: comapres PF and AF, but they are the same everywhere */
LWIP_ASSERT1(ss.ss_family == fwspec->sdom);
status = fwany_ipX_addr_set_src(&src_addr, (const struct sockaddr *)&ss);
if (status == PXREMAP_FAILED) {
goto reset;
}
if (ss.ss_family == PF_INET) {
const struct sockaddr_in *peer4 = (const struct sockaddr_in *)&ss;
src_port = peer4->sin_port;
memcpy(&dst_addr.ip4, &fwspec->dst.sin.sin_addr, sizeof(ip_addr_t));
dst_port = fwspec->dst.sin.sin_port;
}
else { /* PF_INET6 */
const struct sockaddr_in6 *peer6 = (const struct sockaddr_in6 *)&ss;
ip_set_v6(pcb, 1);
src_port = peer6->sin6_port;
memcpy(&dst_addr.ip6, &fwspec->dst.sin6.sin6_addr, sizeof(ip6_addr_t));
dst_port = fwspec->dst.sin6.sin6_port;
}
/* lwip port arguments are in host order */
src_port = ntohs(src_port);
dst_port = ntohs(dst_port);
error = tcp_proxy_bind(pcb, ipX_2_ip(&src_addr), src_port);
if (error != ERR_OK) {
goto reset;
}
error = tcp_connect(pcb, ipX_2_ip(&dst_addr), dst_port,
/* callback: */ pxtcp_pcb_connected);
if (error != ERR_OK) {
goto reset;
}
return;
reset:
pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
}
/**
* Port-forwarded connection to guest is successful, pump data.
*/
static err_t
pxtcp_pcb_connected(void *arg, struct tcp_pcb *pcb, err_t error)
{
struct pxtcp *pxtcp = (struct pxtcp *)arg;
LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
LWIP_UNUSED_ARG(error);
LWIP_ASSERT1(pxtcp != NULL);
LWIP_ASSERT1(pxtcp->pcb == pcb);
LWIP_ASSERT1(pcb->callback_arg == pxtcp);
LWIP_UNUSED_ARG(pcb);
DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
__func__, (void *)pxtcp, (void *)pxtcp->pcb, pxtcp->sock));
/* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
return ERR_OK;
}
/**
* tcp_recv() callback.
*/
static err_t
pxtcp_pcb_recv(void *arg, struct tcp_pcb *pcb, struct pbuf *p, err_t error)
{
struct pxtcp *pxtcp = (struct pxtcp *)arg;
LWIP_ASSERT1(error == ERR_OK); /* always called with ERR_OK */
LWIP_UNUSED_ARG(error);
LWIP_ASSERT1(pxtcp != NULL);
LWIP_ASSERT1(pxtcp->pcb == pcb);
LWIP_ASSERT1(pcb->callback_arg == pxtcp);
LWIP_UNUSED_ARG(pcb);
/*
* Have we done sending previous batch?
*/
if (pxtcp->unsent != NULL) {
if (p != NULL) {
/*
* Return an error to tell TCP to hold onto that pbuf.
* It will be presented to us later from tcp_fasttmr().
*/
return ERR_WOULDBLOCK;
}
else {
/*
* Unlike data, p == NULL indicating orderly shutdown is
* NOT presented to us again
*/
pxtcp->outbound_close = 1;
return ERR_OK;
}
}
/*
* Guest closed?
*/
if (p == NULL) {
pxtcp->outbound_close = 1;
pxtcp_pcb_forward_outbound_close(pxtcp);
return ERR_OK;
}
/*
* Got data, send what we can without blocking.
*/
return pxtcp_pcb_forward_outbound(pxtcp, p);
}
/**
* Guest half-closed its TX side of the connection.
*
* Called either immediately from pxtcp_pcb_recv() when it gets NULL,
* or from pxtcp_pcb_forward_outbound() when it finishes forwarding
* previously unsent data and sees pxtcp::outbound_close flag saved by
* pxtcp_pcb_recv().
*/
static void
pxtcp_pcb_forward_outbound_close(struct pxtcp *pxtcp)
{
struct tcp_pcb *pcb;
LWIP_ASSERT1(pxtcp != NULL);
LWIP_ASSERT1(pxtcp->outbound_close);
LWIP_ASSERT1(!pxtcp->outbound_close_done);
pcb = pxtcp->pcb;
LWIP_ASSERT1(pcb != NULL);
DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
(void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
/*
* NB: set the flag first, since shutdown() will trigger POLLHUP
* if inbound is already closed, and poll manager asserts
* outbound_close_done (may be it should not?).
*/
pxtcp->outbound_close_done = 1;
shutdown(pxtcp->sock, SHUT_WR); /* half-close the socket */
#if !HAVE_TCP_POLLHUP
/*
* On NetBSD POLLHUP is not reported for TCP sockets, so we need
* to nudge poll manager manually.
*/
if (pxtcp->inbound_close) {
pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_DEL, pxtcp);
}
#endif
/* no more outbound data coming to us */
tcp_recv(pcb, NULL);
/*
* If we have already done inbound close previously (active close
* on the pcb), then we must not hold onto a pcb in TIME_WAIT
* state since those will be recycled by lwip when it runs out of
* free pcbs in the pool.
*
* The test is true also for a pcb in CLOSING state that waits
* just for the ACK of its FIN (to transition to TIME_WAIT).
*/
if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
pxtcp_pcb_dissociate(pxtcp);
}
}
/**
* Forward outbound data from pcb to socket.
*
* Called by pxtcp_pcb_recv() to forward new data and by callout
* triggered by POLLOUT on the socket to send previously unsent data.
*
* (Re)scehdules one-time callout if not all data are sent.
*/
static err_t
pxtcp_pcb_forward_outbound(struct pxtcp *pxtcp, struct pbuf *p)
{
struct pbuf *qs, *q;
size_t qoff;
size_t forwarded;
int sockerr;
LWIP_ASSERT1(pxtcp->unsent == NULL || pxtcp->unsent == p);
forwarded = 0;
sockerr = 0;
q = NULL;
qoff = 0;
qs = p;
while (qs != NULL) {
IOVEC iov[8];
const size_t iovsize = sizeof(iov)/sizeof(iov[0]);
size_t fwd1;
ssize_t nsent;
size_t i;
fwd1 = 0;
for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
LWIP_ASSERT1(q->len > 0);
IOVEC_SET_BASE(iov[i], q->payload);
IOVEC_SET_LEN(iov[i], q->len);
fwd1 += q->len;
}
/*
* TODO: This is where application-level proxy can hook into
* to process outbound traffic.
*/
nsent = pxtcp_sock_send(pxtcp, iov, i);
if (nsent == (ssize_t)fwd1) {
/* successfully sent this chain fragment completely */
forwarded += nsent;
qs = q;
}
else if (nsent >= 0) {
/* successfully sent only some data */
forwarded += nsent;
/* find the first pbuf that was not completely forwarded */
qoff = nsent;
for (i = 0, q = qs; i < iovsize && q != NULL; ++i, q = q->next) {
if (qoff < q->len) {
break;
}
qoff -= q->len;
}
LWIP_ASSERT1(q != NULL);
LWIP_ASSERT1(qoff < q->len);
break;
}
else {
sockerr = SOCKERRNO();
/*
* Some errors are really not errors - if we get them,
* it's not different from getting nsent == 0, so filter
* them out here.
*/
if (proxy_error_is_transient(sockerr)) {
sockerr = 0;
}
q = qs;
qoff = 0;
break;
}
}
if (forwarded > 0) {
tcp_recved(pxtcp->pcb, (u16_t)forwarded);
}
if (q == NULL) { /* everything is forwarded? */
LWIP_ASSERT1(sockerr == 0);
LWIP_ASSERT1(forwarded == p->tot_len);
pxtcp->unsent = NULL;
pbuf_free(p);
if (pxtcp->outbound_close) {
pxtcp_pcb_forward_outbound_close(pxtcp);
}
}
else {
if (q != p) {
/* free forwarded pbufs at the beginning of the chain */
pbuf_ref(q);
pbuf_free(p);
}
if (qoff > 0) {
/* advance payload pointer past the forwarded part */
pbuf_header(q, -(s16_t)qoff);
}
pxtcp->unsent = q;
/*
* Have sendmsg() failed?
*
* Connection reset will be detected by poll and
* pxtcp_schedule_reset() will be called.
*
* Otherwise something *really* unexpected must have happened,
* so we'd better abort.
*/
if (sockerr != 0 && sockerr != ECONNRESET) {
struct tcp_pcb *pcb = pxtcp->pcb;
pxtcp_pcb_dissociate(pxtcp);
tcp_abort(pcb);
/* call error callback manually since we've already dissociated */
pxtcp_pcb_err((void *)pxtcp, ERR_ABRT);
return ERR_ABRT;
}
/* schedule one-shot POLLOUT on the socket */
pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLOUT, pxtcp);
}
return ERR_OK;
}
#if !defined(RT_OS_WINDOWS)
static ssize_t
pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
{
struct msghdr mh;
ssize_t nsent;
#ifdef MSG_NOSIGNAL
const int send_flags = MSG_NOSIGNAL;
#else
const int send_flags = 0;
#endif
memset(&mh, 0, sizeof(mh));
mh.msg_iov = iov;
mh.msg_iovlen = iovlen;
nsent = sendmsg(pxtcp->sock, &mh, send_flags);
return nsent;
}
#else /* RT_OS_WINDOWS */
static ssize_t
pxtcp_sock_send(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
{
DWORD nsent;
int status;
status = WSASend(pxtcp->sock, iov, (DWORD)iovlen, &nsent,
0, NULL, NULL);
if (status == SOCKET_ERROR) {
nsent = -1;
}
return nsent;
}
#endif /* RT_OS_WINDOWS */
/**
* Callback from poll manager (on POLLOUT) to send data from
* pxtcp::unsent pbuf to socket.
*/
static void
pxtcp_pcb_write_outbound(void *ctx)
{
struct pxtcp *pxtcp = (struct pxtcp *)ctx;
LWIP_ASSERT1(pxtcp != NULL);
if (pxtcp->pcb == NULL) {
return;
}
pxtcp_pcb_forward_outbound(pxtcp, pxtcp->unsent);
}
/**
* Common poll manager callback used by both outgoing and incoming
* (port-forwarded) connections that has connected socket.
*/
static int
pxtcp_pmgr_pump(struct pollmgr_handler *handler, SOCKET fd, int revents)
{
struct pxtcp *pxtcp;
int status;
int sockerr;
pxtcp = (struct pxtcp *)handler->data;
LWIP_ASSERT1(handler == &pxtcp->pmhdl);
LWIP_ASSERT1(fd == pxtcp->sock);
if (revents & POLLNVAL) {
pxtcp->sock = INVALID_SOCKET;
return pxtcp_schedule_reset(pxtcp);
}
if (revents & POLLERR) {
socklen_t optlen = (socklen_t)sizeof(sockerr);
status = getsockopt(pxtcp->sock, SOL_SOCKET, SO_ERROR,
(char *)&sockerr, &optlen);
if (status < 0) { /* should not happen */
DPRINTF(("sock %d: SO_ERROR failed: %R[sockerr]\n",
fd, SOCKERRNO()));
}
else {
DPRINTF0(("sock %d: %R[sockerr]\n", fd, sockerr));
}
return pxtcp_schedule_reset(pxtcp);
}
if (revents & POLLOUT) {
pxtcp->events &= ~POLLOUT;
proxy_lwip_post(&pxtcp->msg_outbound);
}
if (revents & POLLIN) {
ssize_t nread;
int stop_pollin;
nread = pxtcp_sock_read(pxtcp, &stop_pollin);
if (nread < 0) {
sockerr = -(int)nread;
DPRINTF0(("sock %d: %R[sockerr]\n", fd, sockerr));
return pxtcp_schedule_reset(pxtcp);
}
if (stop_pollin) {
pxtcp->events &= ~POLLIN;
}
if (nread > 0) {
proxy_lwip_post(&pxtcp->msg_inbound);
#if !HAVE_TCP_POLLHUP
/*
* If host does not report POLLHUP for closed sockets
* (e.g. NetBSD) we should check for full close manually.
*/
if (pxtcp->inbound_close && pxtcp->outbound_close_done) {
LWIP_ASSERT1((revents & POLLHUP) == 0);
return pxtcp_schedule_delete(pxtcp);
}
#endif
}
}
#if !HAVE_TCP_POLLHUP
LWIP_ASSERT1((revents & POLLHUP) == 0);
#else
if (revents & POLLHUP) {
/*
* Linux and Darwin seems to report POLLHUP when both
* directions are shut down. And they do report POLLHUP even
* when there's unread data (which they aslo report as POLLIN
* along with that POLLHUP).
*
* FreeBSD (from source inspection) seems to follow Linux,
* reporting POLLHUP when both directions are shut down, but
* POLLHUP is always accompanied with POLLIN.
*
* NetBSD never reports POLLHUP for sockets.
*
* ---
*
* If external half-closes first, we don't get POLLHUP, we
* recv 0 bytes from the socket as EOF indicator, stop polling
* for POLLIN and poll with events == 0 (with occasional
* one-shot POLLOUT). When guest eventually closes, we get
* POLLHUP.
*
* If guest half-closes first things are more tricky. As soon
* as host sees the FIN from external it will spam POLLHUP,
* even when there's unread data. The problem is that we
* might have stopped polling for POLLIN because the ring
* buffer is full or we were polling POLLIN but can't read all
* of the data becuase buffer doesn't have enough space.
* Either way, there's unread data but we can't keep polling
* the socket.
*/
DPRINTF(("sock %d: HUP\n", fd));
LWIP_ASSERT1(pxtcp->outbound_close_done);
if (pxtcp->inbound_close) {
/* there's no unread data, we are done */
return pxtcp_schedule_delete(pxtcp);
}
else {
/* DPRINTF */ {
#ifndef RT_OS_WINDOWS
int unread;
#else
u_long unread;
#endif
status = ioctlsocket(fd, FIONREAD, &unread);
if (status == SOCKET_ERROR) {
DPRINTF2(("sock %d: FIONREAD: %R[sockerr]\n",
fd, SOCKERRNO()));
}
else {
DPRINTF2(("sock %d: %d UNREAD bytes\n", fd, unread));
}
}
/*
* We cannot just set a flag here and let pxtcp_pcb_sent()
* notice and start pulling, because if we are preempted
* before setting the flag and all data in inbuf is ACKed
* there will be no more calls to pxtcp_pcb_sent() to
* notice the flag.
*
* We cannot set a flag and then send a message to make
* sure it noticed, because if it has and it has read all
* data while the message is in transit it will delete
* pxtcp.
*
* In a sense this message is like msg_delete (except we
* ask to pull some data first).
*/
proxy_lwip_post(&pxtcp->msg_inpull);
pxtcp->pmhdl.slot = -1;
return -1;
}
/* NOTREACHED */
} /* POLLHUP */
#endif /* HAVE_TCP_POLLHUP */
return pxtcp->events;
}
/**
* Read data from socket to ringbuf. This may be used both on lwip
* and poll manager threads.
*
* Flag pointed to by pstop is set when further reading is impossible,
* either temporary when buffer is full, or permanently when EOF is
* received.
*
* Returns number of bytes read. NB: EOF is reported as 1!
*
* Returns zero if nothing was read, either because buffer is full, or
* if no data is available (EWOULDBLOCK, EINTR &c).
*
* Returns -errno on real socket errors.
*/
static ssize_t
pxtcp_sock_read(struct pxtcp *pxtcp, int *pstop)
{
IOVEC iov[2];
size_t iovlen;
ssize_t nread;
const size_t sz = pxtcp->inbuf.bufsize;
size_t beg, lim, wrnew;
*pstop = 0;
beg = pxtcp->inbuf.vacant;
IOVEC_SET_BASE(iov[0], &pxtcp->inbuf.buf[beg]);
/* lim is the index we can NOT write to */
lim = pxtcp->inbuf.unacked;
if (lim == 0) {
lim = sz - 1; /* empty slot at the end */
}
else if (lim == 1) {
lim = sz; /* empty slot at the beginning */
}
else {
--lim;
}
if (beg == lim) {
/*
* Buffer is full, stop polling for POLLIN.
*
* pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
* data, freeing space in the ring buffer.
*/
*pstop = 1;
return 0;
}
if (beg < lim) {
/* free space in one chunk */
iovlen = 1;
IOVEC_SET_LEN(iov[0], lim - beg);
}
else {
/* free space in two chunks */
iovlen = 2;
IOVEC_SET_LEN(iov[0], sz - beg);
IOVEC_SET_BASE(iov[1], &pxtcp->inbuf.buf[0]);
IOVEC_SET_LEN(iov[1], lim);
}
/*
* TODO: This is where application-level proxy can hook into to
* process inbound traffic.
*/
nread = pxtcp_sock_recv(pxtcp, iov, iovlen);
if (nread > 0) {
wrnew = beg + nread;
if (wrnew >= sz) {
wrnew -= sz;
}
pxtcp->inbuf.vacant = wrnew;
DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
(void *)pxtcp, pxtcp->sock, (int)nread));
return nread;
}
else if (nread == 0) {
*pstop = 1;
pxtcp->inbound_close = 1;
DPRINTF2(("pxtcp %p: sock %d read EOF\n",
(void *)pxtcp, pxtcp->sock));
return 1;
}
else {
int sockerr = SOCKERRNO();
if (proxy_error_is_transient(sockerr)) {
/* haven't read anything, just return */
DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
(void *)pxtcp, pxtcp->sock));
return 0;
}
else {
/* socket error! */
DPRINTF0(("pxtcp %p: sock %d read: %R[sockerr]\n",
(void *)pxtcp, pxtcp->sock, sockerr));
return -sockerr;
}
}
}
#if !defined(RT_OS_WINDOWS)
static ssize_t
pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
{
struct msghdr mh;
ssize_t nread;
memset(&mh, 0, sizeof(mh));
mh.msg_iov = iov;
mh.msg_iovlen = iovlen;
nread = recvmsg(pxtcp->sock, &mh, 0);
return nread;
}
#else /* RT_OS_WINDOWS */
static ssize_t
pxtcp_sock_recv(struct pxtcp *pxtcp, IOVEC *iov, size_t iovlen)
{
DWORD flags;
DWORD nread;
int status;
flags = 0;
status = WSARecv(pxtcp->sock, iov, (DWORD)iovlen, &nread,
&flags, NULL, NULL);
if (status == SOCKET_ERROR) {
nread = -1;
}
return (ssize_t)nread;
}
#endif /* RT_OS_WINDOWS */
/**
* Callback from poll manager (pxtcp::msg_inbound) to trigger output
* from ringbuf to guest.
*/
static void
pxtcp_pcb_write_inbound(void *ctx)
{
struct pxtcp *pxtcp = (struct pxtcp *)ctx;
LWIP_ASSERT1(pxtcp != NULL);
if (pxtcp->pcb == NULL) {
return;
}
pxtcp_pcb_forward_inbound(pxtcp);
}
/**
* tcp_poll() callback
*
* We swtich it on when tcp_write() or tcp_shutdown() fail with
* ERR_MEM to prevent connection from stalling. If there are ACKs or
* more inbound data then pxtcp_pcb_forward_inbound() will be
* triggered again, but if neither happens, tcp_poll() comes to the
* rescue.
*/
static err_t
pxtcp_pcb_poll(void *arg, struct tcp_pcb *pcb)
{
struct pxtcp *pxtcp = (struct pxtcp *)arg;
LWIP_UNUSED_ARG(pcb);
DPRINTF2(("%s: pxtcp %p; pcb %p\n",
__func__, (void *)pxtcp, (void *)pxtcp->pcb));
pxtcp_pcb_forward_inbound(pxtcp);
/*
* If the last thing holding up deletion of the pxtcp was failed
* tcp_shutdown() and it succeeded, we may be the last callback.
*/
pxtcp_pcb_maybe_deferred_delete(pxtcp);
return ERR_OK;
}
static void
pxtcp_pcb_schedule_poll(struct pxtcp *pxtcp)
{
tcp_poll(pxtcp->pcb, pxtcp_pcb_poll, 0);
}
static void
pxtcp_pcb_cancel_poll(struct pxtcp *pxtcp)
{
tcp_poll(pxtcp->pcb, NULL, 255);
}
/**
* Forward inbound data from ring buffer to the guest.
*
* Scheduled by poll manager thread after it receives more data into
* the ring buffer (we have more data to send).
* Also called from tcp_sent() callback when guest ACKs some data,
* increasing pcb->snd_buf (we are permitted to send more data).
*
* Also called from tcp_poll() callback if previous attempt to forward
* inbound data failed with ERR_MEM (we need to try again).
*/
static void
pxtcp_pcb_forward_inbound(struct pxtcp *pxtcp)
{
struct tcp_pcb *pcb;
size_t sndbuf;
size_t beg, lim, sndlim;
size_t toeob, tolim;
size_t nsent;
err_t error;
LWIP_ASSERT1(pxtcp != NULL);
pcb = pxtcp->pcb;
if (pcb == NULL) {
return;
}
if (/* __predict_false */ pcb->state < ESTABLISHED) {
/*
* If we have just confirmed accept of this connection, the
* pcb is in SYN_RCVD state and we still haven't received the
* ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
* transition that lwip decrements pcb->acked so that that ACK
* is not reported to pxtcp_pcb_sent(). If we send something
* now and immediately close (think "daytime", e.g.) while
* still in SYN_RCVD state, we will move directly to
* FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
* report it to pxtcp_pcb_sent().
*/
DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
(void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
return;
}
beg = pxtcp->inbuf.unsent; /* private to lwip thread */
lim = pxtcp->inbuf.vacant;
if (beg == lim) {
if (pxtcp->inbound_close && !pxtcp->inbound_close_done) {
pxtcp_pcb_forward_inbound_close(pxtcp);
tcp_output(pcb);
return;
}
/*
* Else, there's no data to send.
*
* If there is free space in the buffer, producer will
* reschedule us as it receives more data and vacant (lim)
* advances.
*
* If buffer is full when all data have been passed to
* tcp_write() but not yet acknowledged, we will advance
* unacked on ACK, freeing some space for producer to write to
* (then see above).
*/
return;
}
sndbuf = tcp_sndbuf(pcb);
if (sndbuf == 0) {
/*
* Can't send anything now. As guest ACKs some data, TCP will
* call pxtcp_pcb_sent() callback and we will come here again.
*/
return;
}
nsent = 0;
/*
* We have three limits to consider:
* - how much data we have in the ringbuf
* - how much data we are allowed to send
* - ringbuf size
*/
toeob = pxtcp->inbuf.bufsize - beg;
if (lim < beg) { /* lim wrapped */
if (sndbuf < toeob) { /* but we are limited by sndbuf */
/* so beg is not going to wrap, treat sndbuf as lim */
lim = beg + sndbuf; /* ... and proceed to the simple case */
}
else { /* we are limited by the end of the buffer, beg will wrap */
u8_t maybemore;
if (toeob == sndbuf || lim == 0) {
maybemore = 0;
}
else {
maybemore = TCP_WRITE_FLAG_MORE;
}
error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], toeob, maybemore);
if (error != ERR_OK) {
goto writeerr;
}
nsent += toeob;
pxtcp->inbuf.unsent = 0; /* wrap */
if (maybemore) {
beg = 0;
sndbuf -= toeob;
}
else {
/* we are done sending, but ... */
goto check_inbound_close;
}
}
}
LWIP_ASSERT1(beg < lim);
sndlim = beg + sndbuf;
if (lim > sndlim) {
lim = sndlim;
}
tolim = lim - beg;
if (tolim > 0) {
error = tcp_write(pcb, &pxtcp->inbuf.buf[beg], (u16_t)tolim, 0);
if (error != ERR_OK) {
goto writeerr;
}
nsent += tolim;
pxtcp->inbuf.unsent = lim;
}
check_inbound_close:
if (pxtcp->inbound_close && pxtcp->inbuf.unsent == pxtcp->inbuf.vacant) {
pxtcp_pcb_forward_inbound_close(pxtcp);
}
DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
(void *)pxtcp, (void *)pcb, (int)nsent));
tcp_output(pcb);
pxtcp_pcb_cancel_poll(pxtcp);
return;
writeerr:
if (error == ERR_MEM) {
if (nsent > 0) { /* first write succeeded, second failed */
DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
(void *)pxtcp, (void *)pcb, (int)nsent));
tcp_output(pcb);
}
DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
(void *)pxtcp, (void *)pcb));
pxtcp_pcb_schedule_poll(pxtcp);
}
else {
DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
(void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
/* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
LWIP_ASSERT1(error == ERR_MEM);
}
}
static void
pxtcp_pcb_forward_inbound_close(struct pxtcp *pxtcp)
{
struct tcp_pcb *pcb;
err_t error;
LWIP_ASSERT1(pxtcp != NULL);
LWIP_ASSERT1(pxtcp->inbound_close);
LWIP_ASSERT1(!pxtcp->inbound_close_done);
LWIP_ASSERT1(pxtcp->inbuf.unsent == pxtcp->inbuf.vacant);
pcb = pxtcp->pcb;
LWIP_ASSERT1(pcb != NULL);
DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
(void *)pxtcp, (void *)pcb, tcp_debug_state_str(pcb->state)));
error = tcp_shutdown(pcb, /*RX*/ 0, /*TX*/ 1);
if (error != ERR_OK) {
DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
" tcp_shutdown: error=%s\n",
(void *)pxtcp, (void *)pcb, proxy_lwip_strerr(error)));
pxtcp_pcb_schedule_poll(pxtcp);
return;
}
pxtcp_pcb_cancel_poll(pxtcp);
pxtcp->inbound_close_done = 1;
/*
* If we have already done outbound close previously (passive
* close on the pcb), then we must not hold onto a pcb in LAST_ACK
* state since those will be deleted by lwip when that last ack
* comes from the guest.
*
* NB: We do NOT check for deferred delete here, even though we
* have just set one of its conditions, inbound_close_done. We
* let pcb callbacks that called us do that. It's simpler and
* cleaner that way.
*/
if (pxtcp->outbound_close_done && pxtcp_pcb_forward_inbound_done(pxtcp)) {
pxtcp_pcb_dissociate(pxtcp);
}
}
/**
* Check that all forwarded inbound data is sent and acked, and that
* inbound close is scheduled (we aren't called back when it's acked).
*/
DECLINLINE(int)
pxtcp_pcb_forward_inbound_done(const struct pxtcp *pxtcp)
{
return (pxtcp->inbound_close_done /* also implies that all data forwarded */
&& pxtcp->inbuf.unacked == pxtcp->inbuf.unsent);
}
/**
* tcp_sent() callback - guest acknowledged len bytes.
*
* We can advance inbuf::unacked index, making more free space in the
* ringbuf and wake up producer on poll manager thread.
*
* We can also try to send more data if we have any since pcb->snd_buf
* was increased and we are now permitted to send more.
*/
static err_t
pxtcp_pcb_sent(void *arg, struct tcp_pcb *pcb, u16_t len)
{
struct pxtcp *pxtcp = (struct pxtcp *)arg;
size_t unacked;
LWIP_ASSERT1(pxtcp != NULL);
LWIP_ASSERT1(pxtcp->pcb == pcb);
LWIP_ASSERT1(pcb->callback_arg == pxtcp);
LWIP_UNUSED_ARG(pcb); /* only in assert */
DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
" unacked %d, unsent %d, vacant %d\n",
__func__, (void *)pxtcp, (void *)pcb, (int)len,
(int)pxtcp->inbuf.unacked,
(int)pxtcp->inbuf.unsent,
(int)pxtcp->inbuf.vacant));
if (/* __predict_false */ len == 0) {
/* we are notified to start pulling */
LWIP_ASSERT1(pxtcp->outbound_close_done);
LWIP_ASSERT1(!pxtcp->inbound_close);
LWIP_ASSERT1(pxtcp->inbound_pull);
unacked = pxtcp->inbuf.unacked;
}
else {
/*
* Advance unacked index. Guest acknowledged the data, so it
* won't be needed again for potential retransmits.
*/
unacked = pxtcp->inbuf.unacked + len;
if (unacked > pxtcp->inbuf.bufsize) {
unacked -= pxtcp->inbuf.bufsize;
}
pxtcp->inbuf.unacked = unacked;
}
/* arrange for more inbound data */
if (!pxtcp->inbound_close) {
if (!pxtcp->inbound_pull) {
/* wake up producer, in case it has stopped polling for POLLIN */
pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_POLLIN, pxtcp);
#ifdef RT_OS_WINDOWS
/**
* We have't got enought room in ring buffer to read atm,
* but we don't want to lose notification from WSAW4ME when
* space would be available, so we reset event with empty recv
*/
recv(pxtcp->sock, NULL, 0, 0);
#endif
}
else {
ssize_t nread;
int stop_pollin; /* ignored */
nread = pxtcp_sock_read(pxtcp, &stop_pollin);
if (nread < 0) {
int sockerr = -(int)nread;
LWIP_UNUSED_ARG(sockerr);
DPRINTF0(("%s: sock %d: %R[sockerr]\n",
__func__, pxtcp->sock, sockerr));
/*
* Since we are pulling, pxtcp is no longer registered
* with poll manager so we can kill it directly.
*/
pxtcp_pcb_reset_pxtcp(pxtcp);
return ERR_ABRT;
}
}
}
/* forward more data if we can */
if (!pxtcp->inbound_close_done) {
pxtcp_pcb_forward_inbound(pxtcp);
/*
* NB: we might have dissociated from a pcb that transitioned
* to LAST_ACK state, so don't refer to pcb below.
*/
}
/* have we got all the acks? */
if (pxtcp->inbound_close /* no more new data */
&& pxtcp->inbuf.unsent == pxtcp->inbuf.vacant /* all data is sent */
&& unacked == pxtcp->inbuf.unsent) /* ... and is acked */
{
char *buf;
DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
__func__, (void *)pxtcp, (void *)pxtcp->pcb));
/* no more retransmits, so buf is not needed */
buf = pxtcp->inbuf.buf;
pxtcp->inbuf.buf = NULL;
free(buf);
/* no more acks, so no more callbacks */
if (pxtcp->pcb != NULL) {
tcp_sent(pxtcp->pcb, NULL);
}
/*
* We may be the last callback for this pcb if we have also
* successfully forwarded inbound_close.
*/
pxtcp_pcb_maybe_deferred_delete(pxtcp);
}
return ERR_OK;
}
/**
* Callback from poll manager (pxtcp::msg_inpull) to switch
* pxtcp_pcb_sent() to actively pull the last bits of input. See
* POLLHUP comment in pxtcp_pmgr_pump().
*
* pxtcp::sock is deregistered from poll manager after this callback
* is scheduled.
*/
static void
pxtcp_pcb_pull_inbound(void *ctx)
{
struct pxtcp *pxtcp = (struct pxtcp *)ctx;
LWIP_ASSERT1(pxtcp != NULL);
if (pxtcp->pcb == NULL) {
DPRINTF(("%s: pxtcp %p: PCB IS GONE\n", __func__, (void *)pxtcp));
pxtcp_pcb_reset_pxtcp(pxtcp);
return;
}
DPRINTF(("%s: pxtcp %p: pcb %p\n",
__func__, (void *)pxtcp, (void *)pxtcp->pcb));
pxtcp->inbound_pull = 1;
pxtcp->deferred_delete = 1;
pxtcp_pcb_sent(pxtcp, pxtcp->pcb, 0);
}
/**
* tcp_err() callback.
*
* pcb is not passed to this callback since it may be already
* deallocated by the stack, but we can't do anything useful with it
* anyway since connection is gone.
*/
static void
pxtcp_pcb_err(void *arg, err_t error)
{
struct pxtcp *pxtcp = (struct pxtcp *)arg;
LWIP_ASSERT1(pxtcp != NULL);
/*
* ERR_CLSD is special - it is reported here when:
*
* . guest has already half-closed
* . we send FIN to guest when external half-closes
* . guest acks that FIN
*
* Since connection is closed but receive has been already closed
* lwip can only report this via tcp_err. At this point the pcb
* is still alive, so we can peek at it if need be.
*
* The interesting twist is when the ACK from guest that akcs our
* FIN also acks some data. In this scenario lwip will NOT call
* tcp_sent() callback with the ACK for that last bit of data but
* instead will call tcp_err with ERR_CLSD right away. Since that
* ACK also acknowledges all the data, we should run some of
* pxtcp_pcb_sent() logic here.
*/
if (error == ERR_CLSD) {
struct tcp_pcb *pcb = pxtcp->pcb; /* still alive */
DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
" pcb->acked %d;"
" unacked %d, unsent %d, vacant %d\n",
(void *)pxtcp, (void *)pcb,
pcb->acked,
(int)pxtcp->inbuf.unacked,
(int)pxtcp->inbuf.unsent,
(int)pxtcp->inbuf.vacant));
LWIP_ASSERT1(pxtcp->pcb == pcb);
LWIP_ASSERT1(pcb->callback_arg == pxtcp);
if (pcb->acked > 0) {
pxtcp_pcb_sent(pxtcp, pcb, pcb->acked);
}
return;
}
DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
(void *)pxtcp, proxy_lwip_strerr(error)));
pxtcp->pcb = NULL; /* pcb is gone */
if (pxtcp->deferred_delete) {
pxtcp_pcb_reset_pxtcp(pxtcp);
}
else {
pxtcp_chan_send_weak(POLLMGR_CHAN_PXTCP_RESET, pxtcp);
}
}