pxtcp.c revision 336abf709b664cf025b12d41a0970c332e21b0c2
/* -*- indent-tabs-mode: nil; -*- */
#define LOG_GROUP LOG_GROUP_NAT_SERVICE
#include "winutils.h"
#include "pxtcp.h"
#include "proxy.h"
#include "proxy_pollmgr.h"
#include "pxremap.h"
#include "portfwd.h" /* fwspec */
#ifndef RT_OS_WINDOWS
#ifdef RT_OS_SOLARIS
#endif
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <poll.h>
#include <err.h> /* BSD'ism */
#else
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "winpoll.h"
#endif
#include "lwip/tcp_impl.h" /* XXX: to access tcp_abandon() */
/*
* Different OSes have different quirks in reporting POLLHUP for TCP
* sockets.
*
* Using shutdown(2) "how" values here would be more readable, but
* since SHUT_RD is 0, we can't use 0 for "none", unfortunately.
*/
#if defined(RT_OS_NETBSD) || defined(RT_OS_SOLARIS)
# define HAVE_TCP_POLLHUP 0 /* not reported */
#elif defined(RT_OS_DARWIN)
#else
#endif
/**
* Ring buffer for inbound data. Filled with data from the host
* socket on poll manager thread. Data consumed by scheduling
* tcp_write() to the pcb on the lwip thread.
*
* NB: There is actually third party present, the lwip stack itself.
* Thus the buffer doesn't have dual free vs. data split, but rather
* three-way free / send and unACKed data / unsent data split.
*/
struct ringbuf {
char *buf;
/*
* Start of free space, producer writes here (up till "unacked").
*/
/*
* Start of sent but unacknowledged data. The data are "owned" by
* the stack as it may need to retransmit. This is the free space
* limit for producer.
*/
/*
* "vacant"). Not declared volatile since it's only accessed from
* the consumer thread.
*/
};
/**
*/
struct pxtcp {
/**
* references depend on this "inheritance".
*/
struct pollmgr_handler pmhdl;
/**
*/
/**
* Host (external) side of the proxied connection.
*/
/**
* Socket events we are currently polling for.
*/
int events;
/**
* Socket error. Currently used to save connect(2) errors so that
* we can decide if we need to send ICMP error.
*/
int sockerr;
/**
* Interface that we have got the SYN from. Needed to send ICMP
* with correct source address.
*/
/**
* For tentatively accepted connections for which we are in
* process of connecting to the real destination this is the
* initial pbuf that we might need to build ICMP error.
*
* When connection is established this is used to hold outbound
* pbuf chain received by pxtcp_pcb_recv() but not yet completely
* forwarded over the socket. We cannot "return" it to lwIP since
* the head of the chain is already sent and freed.
*/
/**
* Guest has closed its side. Reported to pxtcp_pcb_recv() only
* once and we might not be able to forward it immediately if we
* have unsent pbuf.
*/
int outbound_close;
/**
* Outbound half-close has been done on the socket.
*/
int outbound_close_done;
/**
* External has closed its side. We might not be able to forward
* it immediately if we have unforwarded data.
*/
int inbound_close;
/**
* Inbound half-close has been done on the pcb.
*/
int inbound_close_done;
/**
* On systems that report POLLHUP as soon as the final FIN is
* received on a socket we cannot continue polling for the rest of
* input, so we have to read (pull) last data from the socket on
* manager thread. See comment in pxtcp_pmgr_pump() POLLHUP case.
*/
int inbound_pull;
/**
* When poll manager schedules delete we may not be able to delete
* a pxtcp immediately if not all inbound data has been acked by
* the guest: lwIP may need to resend and the data are in pxtcp's
* inbuf::buf. We defer delete until all data are acked to
* pxtcp_pcb_sent().
*/
int deferred_delete;
/**
* Ring-buffer for inbound data.
*/
/**
* lwIP thread's strong reference to us.
*/
struct pollmgr_refptr *rp;
/*
* We use static messages to call functions on the lwIP thread to
*/
};
static struct pxtcp *pxtcp_allocate(void);
static void pxtcp_free(struct pxtcp *);
static void pxtcp_pcb_dissociate(struct pxtcp *);
/* poll manager callbacks for pxtcp related channels */
#if !(HAVE_TCP_POLLHUP & POLLOUT)
#endif
/* poll manager callbacks for individual sockets */
/* get incoming traffic into ring buffer */
/* convenience functions for poll manager callbacks */
static int pxtcp_schedule_delete(struct pxtcp *);
static int pxtcp_schedule_reset(struct pxtcp *);
static int pxtcp_schedule_reject(struct pxtcp *);
/* lwip thread callbacks called via proxy_lwip_post() */
static void pxtcp_pcb_delete_pxtcp(void *);
static void pxtcp_pcb_reset_pxtcp(void *);
static void pxtcp_pcb_accept_refuse(void *);
static void pxtcp_pcb_accept_confirm(void *);
static void pxtcp_pcb_write_outbound(void *);
static void pxtcp_pcb_write_inbound(void *);
static void pxtcp_pcb_pull_inbound(void *);
/* tcp pcb callbacks */
static void pxtcp_pcb_err(void *, err_t);
static void pxtcp_pcb_forward_outbound_close(struct pxtcp *);
static void pxtcp_pcb_forward_inbound(struct pxtcp *);
static void pxtcp_pcb_forward_inbound_close(struct pxtcp *);
static void pxtcp_pcb_schedule_poll(struct pxtcp *);
static void pxtcp_pcb_cancel_poll(struct pxtcp *);
/* poll manager handlers for pxtcp channels */
static struct pollmgr_handler pxtcp_pmgr_chan_add_hdl;
static struct pollmgr_handler pxtcp_pmgr_chan_pollout_hdl;
static struct pollmgr_handler pxtcp_pmgr_chan_pollin_hdl;
#if !(HAVE_TCP_POLLHUP & POLLOUT)
static struct pollmgr_handler pxtcp_pmgr_chan_del_hdl;
#endif
static struct pollmgr_handler pxtcp_pmgr_chan_reset_hdl;
/**
* Init PXTCP - must be run when neither lwIP tcpip thread, nor poll
* manager threads haven't been created yet.
*/
void
pxtcp_init(void)
{
/*
* Create channels.
*/
} while (0)
#if !(HAVE_TCP_POLLHUP & POLLOUT)
#endif
/*
* Listen to outgoing connection from guest(s).
*/
}
/**
* Syntactic sugar for sending pxtcp pointer over poll manager
* channel. Used by lwip thread functions.
*/
static ssize_t
{
}
/**
* Syntactic sugar for sending weak reference to pxtcp over poll
* manager channel. Used by lwip thread functions.
*/
static ssize_t
{
}
/**
* Counterpart of pxtcp_chan_send().
*/
static struct pxtcp *
{
return pxtcp;
}
/**
* Counterpart of pxtcp_chan_send_weak().
*/
static struct pxtcp *
{
struct pollmgr_refptr *rp;
struct pollmgr_handler *base;
return pxtcp;
}
/**
* Register pxtcp with poll manager.
*
* Used for POLLMGR_CHAN_PXTCP_ADD and by port-forwarding. Since
* error handling is different in these two cases, we leave it up to
* the caller.
*/
int
{
int status;
return status;
}
/**
* Unregister pxtcp with poll manager.
*
* Used for POLLMGR_CHAN_PXTCP_RESET and by port-forwarding (on error
* leg).
*/
void
{
}
/**
* POLLMGR_CHAN_PXTCP_ADD handler.
*
* Get new pxtcp from lwip thread and start polling its socket.
*/
static int
{
int status;
DPRINTF0(("pxtcp_add: new pxtcp %p; pcb %p; sock %d\n",
if (status < 0) {
(void) pxtcp_schedule_reset(pxtcp);
}
return POLLIN;
}
/**
* POLLMGR_CHAN_PXTCP_POLLOUT handler.
*
* pxtcp_pcb_forward_outbound() on the lwIP thread tried to send data
* and failed, it now requests us to poll the socket for POLLOUT and
* schedule pxtcp_pcb_forward_outbound() when sock is writable again.
*/
static int
{
return POLLIN;
}
return POLLIN;
}
/**
* POLLMGR_CHAN_PXTCP_POLLIN handler.
*/
static int
{
return POLLIN;
}
if (pxtcp->inbound_close) {
return POLLIN;
}
return POLLIN;
}
#if !(HAVE_TCP_POLLHUP & POLLOUT)
/**
* POLLMGR_CHAN_PXTCP_DEL handler.
*
* Schedule pxtcp deletion. We only need this if host system doesn't
* report POLLHUP for fully closed tcp sockets.
*/
static int
{
return POLLIN;
}
DPRINTF(("PXTCP_DEL: pxtcp %p; pcb %p; sock %d\n",
(void) pxtcp_schedule_delete(pxtcp);
return POLLIN;
}
#endif /* !(HAVE_TCP_POLLHUP & POLLOUT) */
/**
* POLLMGR_CHAN_PXTCP_RESET handler.
*
* Close the socket with RST and delete pxtcp.
*/
static int
{
return POLLIN;
}
DPRINTF0(("PXTCP_RESET: pxtcp %p; pcb %p; sock %d\n",
(void) pxtcp_schedule_reset(pxtcp);
return POLLIN;
}
static struct pxtcp *
pxtcp_allocate(void)
{
return NULL;
}
pxtcp->outbound_close = 0;
pxtcp->outbound_close_done = 0;
pxtcp->inbound_close = 0;
pxtcp->inbound_close_done = 0;
pxtcp->inbound_pull = 0;
pxtcp->deferred_delete = 0;
return NULL;
}
return NULL;
}
do { \
} while (0)
return pxtcp;
}
/**
* Exported to fwtcp to create pxtcp for incoming port-forwarded
* connections. Completed with pcb in pxtcp_pcb_connect().
*/
struct pxtcp *
{
pxtcp = pxtcp_allocate();
return NULL;
}
return pxtcp;
}
static void
{
}
static void
{
}
}
}
/**
* Counterpart to pxtcp_create_forwarded() to destruct pxtcp that
* fwtcp failed to register with poll manager to post to lwip thread
* for doing connect.
*/
void
{
}
static void
{
return;
}
DPRINTF(("%s: pxtcp %p <-> pcb %p\n",
/*
* We must have dissociated from a fully closed pcb immediately
* since lwip recycles them and we don't wan't to mess with what
* would be someone else's pcb that we happen to have a stale
* pointer to.
*/
}
/**
* Lwip thread callback invoked via pxtcp::msg_delete
*
* Since we use static messages to communicate to the lwip thread, we
* cannot delete pxtcp without making sure there are no unprocessed
* messages in the lwip thread mailbox.
*
* The easiest way to ensure that is to send this "delete" message as
* the last one and when it's processed we know there are no more and
* it's safe to delete pxtcp.
*
* Poll manager handlers should use pxtcp_schedule_delete()
* convenience function.
*/
static void
pxtcp_pcb_delete_pxtcp(void *ctx)
{
DPRINTF(("%s: pxtcp %p, pcb %p, sock %d%s\n",
? " (was deferred)" : "")));
/*
* pxtcp is no longer registered with poll manager, so it's safe
* to close the socket.
*/
}
/*
* We might have already dissociated from a fully closed pcb, or
* guest might have sent us a reset while msg_delete was in
* transit. If there's no pcb, we are done.
*/
return;
}
/*
* Have we completely forwarded all inbound traffic to the guest?
*
* We may still be waiting for ACKs. We may have failed to send
* some of the data (tcp_write() failed with ERR_MEM). We may
* have failed to send the FIN (tcp_shutdown() failed with
* ERR_MEM).
*/
if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
}
else {
DPRINTF2(("delete: pxtcp %p; pcb %p:"
" unacked %d, unsent %d, vacant %d, %s - DEFER!\n",
}
}
/**
* If we couldn't delete pxtcp right away in the msg_delete callback
* from the poll manager thread, we repeat the check at the end of
* relevant pcb callbacks.
*/
DECLINLINE(void)
{
}
}
/**
* Poll manager callbacks should use this convenience wrapper to
* schedule pxtcp deletion on the lwip thread and to deregister from
* the poll manager.
*/
static int
{
/*
* If pollmgr_refptr_get() is called by any channel before
* scheduled deletion happens, let them know we are gone.
*/
/*
* Schedule deletion. Since poll manager thread may be pre-empted
* right after we send the message, the deletion may actually
* happen on the lwip thread before we return from this function,
* so it's not safe to refer to pxtcp after this call.
*/
/* tell poll manager to deregister us */
return -1;
}
/**
* Lwip thread callback invoked via pxtcp::msg_reset
*
* Like pxtcp_pcb_delete(), but sends RST to the guest before
* deleting this pxtcp.
*/
static void
pxtcp_pcb_reset_pxtcp(void *ctx)
{
DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d\n",
}
}
}
/**
* Poll manager callbacks should use this convenience wrapper to
* schedule pxtcp reset and deletion on the lwip thread and to
* deregister from the poll manager.
*
* See pxtcp_schedule_delete() for additional comments.
*/
static int
{
return -1;
}
/**
* Reject proxy connection attempt. Depending on the cause (sockerr)
* we may just drop the pcb silently, generate an ICMP datagram or
* send TCP reset.
*/
static void
{
int reset = 0;
oif = ip_current_netif();
ip_current_netif() = netif;
if (sockerr == ECONNREFUSED) {
reset = 1;
}
else if (PCB_ISIPV6(pcb)) {
}
else if (sockerr == EHOSTUNREACH
|| sockerr == ENETUNREACH)
{
}
}
else {
|| sockerr == EHOSTUNREACH
|| sockerr == ENETUNREACH)
{
}
}
ip_current_netif() = oif;
}
/**
* Called from poll manager thread via pxtcp::msg_accept when proxy
* failed to connect to the destination. Also called when we failed
* to register pxtcp with poll manager.
*
* This is like pxtcp_pcb_reset_pxtcp() but is more discriminate in
* how this unestablished connection is terminated.
*/
static void
pxtcp_pcb_accept_refuse(void *ctx)
{
DPRINTF0(("%s: pxtcp %p, pcb %p, sock %d: %R[sockerr]\n",
}
}
/**
* Convenience wrapper for poll manager connect callback to reject
* connection attempt.
*
* Like pxtcp_schedule_reset(), but the callback is more discriminate
* in how this unestablished connection is terminated.
*/
static int
{
return -1;
}
/**
* Global tcp_proxy_accept() callback for proxied outgoing TCP
* connections from guest(s).
*/
static err_t
{
int sdom;
int sockerr = 0;
/*
* TCP first calls accept callback when it receives the first SYN
* and "tentatively accepts" new proxied connection attempt. When
* proxy "confirms" the SYN and sends SYN|ACK and the guest
* replies with ACK the accept callback is called again, this time
* with the established connection.
*/
if (sock == INVALID_SOCKET) {
goto abort;
}
pxtcp = pxtcp_allocate();
goto abort;
}
/* save initial datagram in case we need to reply with ICMP */
pbuf_ref(p);
if (nsent < 0) {
return ERR_ABRT;
}
return ERR_OK;
DPRINTF0(("%s: pcb %p, sock %d: %R[sockerr]\n",
return ERR_ABRT;
}
/**
* tcp_proxy_accept() callback for accepted proxied outgoing TCP
* connections from guest(s). This is "real" accept with three-way
* handshake completed.
*/
static err_t
{
/* send any inbound data that are already queued */
return ERR_OK;
}
/**
* Initial poll manager callback for proxied outgoing TCP connections.
* pxtcp_pcb_accept() sets pxtcp::pmhdl::callback to this.
*
* Waits for connect(2) to the destination to complete. On success
* replaces itself with pxtcp_pmgr_pump() callback common to all
* established TCP connections.
*/
static int
{
}
else {
int status;
SOCKET s;
if (status < 0) { /* should not happen */
DPRINTF(("%s: sock %d: SO_ERROR failed: %R[sockerr]\n",
}
else {
DPRINTF(("%s: sock %d: connect: %R[sockerr]\n",
}
closesocket(s);
}
return pxtcp_schedule_reject(pxtcp);
}
/* confirm accept to the guest */
/*
* Switch to common callback used for all established proxied
* connections.
*/
/*
* Initially we poll for incoming traffic only. Outgoing
* traffic is fast-forwarded by pxtcp_pcb_recv(); if it fails
* it will ask us to poll for POLLOUT too.
*/
}
/* should never get here */
DPRINTF0(("%s: pxtcp %p, sock %d: unexpected revents 0x%x\n",
return pxtcp_schedule_reset(pxtcp);
}
/**
* Called from poll manager thread via pxtcp::msg_accept when proxy
* connected to the destination. Finalize accept by sending SYN|ACK
* to the guest.
*/
static void
pxtcp_pcb_accept_confirm(void *ctx)
{
return;
}
/* we are not going to reply with ICMP, so we can drop initial pbuf */
/*
* If lwIP failed to enqueue SYN|ACK because it's out of pbufs it
* abandons the pcb. Retrying that is not very easy, since it
* would require keeping "fractional state". From guest's point
* of view there is no reply to its SYN so it will either resend
* the SYN (effetively triggering full connection retry for us),
* or it will eventually time out.
*/
}
/*
* else if (error != ERR_OK): even if tcp_output() failed with
* ERR_MEM - don't give up, that SYN|ACK is enqueued and will be
* retransmitted eventually.
*/
}
/**
* Entry point for port-forwarding.
*
* fwtcp accepts new incoming connection, creates pxtcp for the socket
* (with no pcb yet) and adds it to the poll manager (polling for
* errors only). Then it calls this function to construct the pcb and
* perform connection to the guest.
*/
void
{
struct sockaddr_storage ss;
int status;
goto reset;
}
if (status == SOCKET_ERROR) {
goto reset;
}
/* nit: comapres PF and AF, but they are the same everywhere */
if (status == PXREMAP_FAILED) {
goto reset;
}
}
else { /* PF_INET6 */
}
/* lwip port arguments are in host order */
goto reset;
}
/* callback: */ pxtcp_pcb_connected);
goto reset;
}
return;
}
/**
* Port-forwarded connection to guest is successful, pump data.
*/
static err_t
{
DPRINTF0(("%s: new pxtcp %p; pcb %p; sock %d\n",
/* ACK on connection is like ACK on data in pxtcp_pcb_sent() */
return ERR_OK;
}
/**
* tcp_recv() callback.
*/
static err_t
{
/*
* Have we done sending previous batch?
*/
if (p != NULL) {
/*
* Return an error to tell TCP to hold onto that pbuf.
* It will be presented to us later from tcp_fasttmr().
*/
return ERR_WOULDBLOCK;
}
else {
/*
* Unlike data, p == NULL indicating orderly shutdown is
* NOT presented to us again
*/
return ERR_OK;
}
}
/*
* Guest closed?
*/
if (p == NULL) {
return ERR_OK;
}
/*
* Got data, send what we can without blocking.
*/
return pxtcp_pcb_forward_outbound(pxtcp, p);
}
/**
* Guest half-closed its TX side of the connection.
*
* Called either immediately from pxtcp_pcb_recv() when it gets NULL,
* or from pxtcp_pcb_forward_outbound() when it finishes forwarding
* previously unsent data and sees pxtcp::outbound_close flag saved by
* pxtcp_pcb_recv().
*/
static void
{
DPRINTF(("outbound_close: pxtcp %p; pcb %p %s\n",
/* set the flag first, since shutdown() may trigger POLLHUP */
#if !(HAVE_TCP_POLLHUP & POLLOUT)
/*
* We need to nudge poll manager manually, since OS will not
* report POLLHUP.
*/
if (pxtcp->inbound_close) {
}
#endif
/* no more outbound data coming to us */
/*
* If we have already done inbound close previously (active close
* on the pcb), then we must not hold onto a pcb in TIME_WAIT
* state since those will be recycled by lwip when it runs out of
* free pcbs in the pool.
*
* The test is true also for a pcb in CLOSING state that waits
* just for the ACK of its FIN (to transition to TIME_WAIT).
*/
if (pxtcp_pcb_forward_inbound_done(pxtcp)) {
}
}
/**
* Forward outbound data from pcb to socket.
*
* Called by pxtcp_pcb_recv() to forward new data and by callout
* triggered by POLLOUT on the socket to send previously unsent data.
*
* (Re)scehdules one-time callout if not all data are sent.
*/
static err_t
{
int sockerr;
forwarded = 0;
sockerr = 0;
q = NULL;
qoff = 0;
qs = p;
size_t i;
fwd1 = 0;
LWIP_ASSERT1(q->len > 0);
}
/*
* TODO: This is where application-level proxy can hook into
* to process outbound traffic.
*/
/* successfully sent this chain fragment completely */
qs = q;
}
else if (nsent >= 0) {
/* successfully sent only some data */
/* find the first pbuf that was not completely forwarded */
break;
}
}
LWIP_ASSERT1(q != NULL);
break;
}
else {
/*
* Some errors are really not errors - if we get them,
* it's not different from getting nsent == 0, so filter
* them out here.
*/
if (proxy_error_is_transient(sockerr)) {
sockerr = 0;
}
q = qs;
qoff = 0;
break;
}
}
if (forwarded > 0) {
}
if (q == NULL) { /* everything is forwarded? */
LWIP_ASSERT1(sockerr == 0);
pbuf_free(p);
if (pxtcp->outbound_close) {
}
}
else {
if (q != p) {
/* free forwarded pbufs at the beginning of the chain */
pbuf_ref(q);
pbuf_free(p);
}
if (qoff > 0) {
/* advance payload pointer past the forwarded part */
}
/*
* Have sendmsg() failed?
*
* Connection reset will be detected by poll and
* pxtcp_schedule_reset() will be called.
*
* Otherwise something *really* unexpected must have happened,
* so we'd better abort.
*/
/* call error callback manually since we've already dissociated */
return ERR_ABRT;
}
/* schedule one-shot POLLOUT on the socket */
}
return ERR_OK;
}
#if !defined(RT_OS_WINDOWS)
static ssize_t
{
#ifdef MSG_NOSIGNAL
const int send_flags = MSG_NOSIGNAL;
#else
const int send_flags = 0;
#endif
if (nsent < 0) {
}
return nsent;
}
#else /* RT_OS_WINDOWS */
static ssize_t
{
int status;
if (status == SOCKET_ERROR) {
return -SOCKERRNO();
}
return nsent;
}
#endif /* RT_OS_WINDOWS */
/**
* Callback from poll manager (on POLLOUT) to send data from
* pxtcp::unsent pbuf to socket.
*/
static void
pxtcp_pcb_write_outbound(void *ctx)
{
return;
}
}
/**
* Common poll manager callback used by both outgoing and incoming
* (port-forwarded) connections that has connected socket.
*/
static int
{
int status;
int sockerr;
return pxtcp_schedule_reset(pxtcp);
}
if (status < 0) { /* should not happen */
DPRINTF(("sock %d: SO_ERROR failed: %R[sockerr]\n",
}
else {
}
return pxtcp_schedule_reset(pxtcp);
}
}
int stop_pollin;
if (nread < 0) {
return pxtcp_schedule_reset(pxtcp);
}
if (stop_pollin) {
}
if (nread > 0) {
#if !HAVE_TCP_POLLHUP
/*
* If host does not report POLLHUP for closed sockets
* (e.g. NetBSD) we should check for full close manually.
*/
return pxtcp_schedule_delete(pxtcp);
}
#endif
}
}
#if !HAVE_TCP_POLLHUP
#else
#if HAVE_TCP_POLLHUP == POLLIN
/*
* Remote closed inbound.
*/
if (!pxtcp->outbound_close_done) {
/*
* We might still need to poll for POLLOUT, but we can not
* poll for POLLIN anymore (even if not all data are read)
* because we will be spammed by POLLHUP.
*/
if (!pxtcp->inbound_close) {
/* the rest of the input has to be pulled */
}
}
else
#endif
/*
* Both directions are closed.
*/
{
if (pxtcp->inbound_close) {
/* there's no unread data, we are done */
return pxtcp_schedule_delete(pxtcp);
}
else {
/* pull the rest of the input first (deferred_delete) */
return -1;
}
/* NOTREACHED */
}
}
#endif /* HAVE_TCP_POLLHUP */
}
/**
* Read data from socket to ringbuf. This may be used both on lwip
* and poll manager threads.
*
* Flag pointed to by pstop is set when further reading is impossible,
* either temporary when buffer is full, or permanently when EOF is
* received.
*
* Returns number of bytes read. NB: EOF is reported as 1!
*
* Returns zero if nothing was read, either because buffer is full, or
* if no data is available (EWOULDBLOCK, EINTR &c).
*
* Returns -errno on real socket errors.
*/
static ssize_t
{
*pstop = 0;
/* lim is the index we can NOT write to */
if (lim == 0) {
}
else if (lim == 1) {
}
else {
--lim;
}
/*
* Buffer is full, stop polling for POLLIN.
*
* pxtcp_pcb_sent() will re-enable POLLIN when guest ACKs
* data, freeing space in the ring buffer.
*/
*pstop = 1;
return 0;
}
/* free space in one chunk */
iovlen = 1;
}
else {
/* free space in two chunks */
iovlen = 2;
}
/*
* TODO: This is where application-level proxy can hook into to
* process inbound traffic.
*/
if (nread > 0) {
}
DPRINTF2(("pxtcp %p: sock %d read %d bytes\n",
return nread;
}
else if (nread == 0) {
*pstop = 1;
DPRINTF2(("pxtcp %p: sock %d read EOF\n",
return 1;
}
else {
if (proxy_error_is_transient(sockerr)) {
/* haven't read anything, just return */
DPRINTF2(("pxtcp %p: sock %d read cancelled\n",
return 0;
}
else {
/* socket error! */
DPRINTF0(("pxtcp %p: sock %d read: %R[sockerr]\n",
return -sockerr;
}
}
}
#if !defined(RT_OS_WINDOWS)
static ssize_t
{
if (nread < 0) {
}
return nread;
}
#else /* RT_OS_WINDOWS */
static ssize_t
{
int status;
flags = 0;
if (status == SOCKET_ERROR) {
return -SOCKERRNO();
}
}
#endif /* RT_OS_WINDOWS */
/**
* Callback from poll manager (pxtcp::msg_inbound) to trigger output
* from ringbuf to guest.
*/
static void
pxtcp_pcb_write_inbound(void *ctx)
{
return;
}
}
/**
* tcp_poll() callback
*
* We swtich it on when tcp_write() or tcp_shutdown() fail with
* ERR_MEM to prevent connection from stalling. If there are ACKs or
* more inbound data then pxtcp_pcb_forward_inbound() will be
* triggered again, but if neither happens, tcp_poll() comes to the
* rescue.
*/
static err_t
{
DPRINTF2(("%s: pxtcp %p; pcb %p\n",
/*
* If the last thing holding up deletion of the pxtcp was failed
* tcp_shutdown() and it succeeded, we may be the last callback.
*/
return ERR_OK;
}
static void
{
}
static void
{
}
/**
* Forward inbound data from ring buffer to the guest.
*
* Scheduled by poll manager thread after it receives more data into
* the ring buffer (we have more data to send).
* Also called from tcp_sent() callback when guest ACKs some data,
* increasing pcb->snd_buf (we are permitted to send more data).
*
* Also called from tcp_poll() callback if previous attempt to forward
* inbound data failed with ERR_MEM (we need to try again).
*/
static void
{
return;
}
/*
* If we have just confirmed accept of this connection, the
* pcb is in SYN_RCVD state and we still haven't received the
* ACK of our SYN. It's only in SYN_RCVD -> ESTABLISHED
* transition that lwip decrements pcb->acked so that that ACK
* is not reported to pxtcp_pcb_sent(). If we send something
* now and immediately close (think "daytime", e.g.) while
* still in SYN_RCVD state, we will move directly to
* FIN_WAIT_1 and when our confirming SYN is ACK'ed lwip will
* report it to pxtcp_pcb_sent().
*/
DPRINTF2(("forward_inbound: pxtcp %p; pcb %p %s - later...\n",
return;
}
return;
}
/*
* Else, there's no data to send.
*
* If there is free space in the buffer, producer will
* reschedule us as it receives more data and vacant (lim)
* advances.
*
* If buffer is full when all data have been passed to
* tcp_write() but not yet acknowledged, we will advance
* unacked on ACK, freeing some space for producer to write to
* (then see above).
*/
return;
}
if (sndbuf == 0) {
/*
* Can't send anything now. As guest ACKs some data, TCP will
* call pxtcp_pcb_sent() callback and we will come here again.
*/
return;
}
nsent = 0;
/*
* We have three limits to consider:
* - how much data we have in the ringbuf
* - how much data we are allowed to send
* - ringbuf size
*/
/* so beg is not going to wrap, treat sndbuf as lim */
}
else { /* we are limited by the end of the buffer, beg will wrap */
maybemore = 0;
}
else {
}
goto writeerr;
}
if (maybemore) {
beg = 0;
}
else {
/* we are done sending, but ... */
goto check_inbound_close;
}
}
}
}
if (tolim > 0) {
goto writeerr;
}
}
}
DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes\n",
return;
if (nsent > 0) { /* first write succeeded, second failed */
DPRINTF2(("forward_inbound: pxtcp %p, pcb %p: sent %d bytes only\n",
}
DPRINTF(("forward_inbound: pxtcp %p, pcb %p: ERR_MEM\n",
}
else {
DPRINTF(("forward_inbound: pxtcp %p, pcb %p: %s\n",
/* XXX: We shouldn't get ERR_ARG. Check ERR_CONN conditions early? */
}
}
static void
{
DPRINTF(("inbound_close: pxtcp %p; pcb %p: %s\n",
DPRINTF(("inbound_close: pxtcp %p; pcb %p:"
" tcp_shutdown: error=%s\n",
return;
}
/*
* If we have already done outbound close previously (passive
* close on the pcb), then we must not hold onto a pcb in LAST_ACK
* state since those will be deleted by lwip when that last ack
* comes from the guest.
*
* NB: We do NOT check for deferred delete here, even though we
* have just set one of its conditions, inbound_close_done. We
* let pcb callbacks that called us do that. It's simpler and
* cleaner that way.
*/
}
}
/**
* Check that all forwarded inbound data is sent and acked, and that
* inbound close is scheduled (we aren't called back when it's acked).
*/
DECLINLINE(int)
{
}
/**
* tcp_sent() callback - guest acknowledged len bytes.
*
* We can advance inbuf::unacked index, making more free space in the
* ringbuf and wake up producer on poll manager thread.
*
* We can also try to send more data if we have any since pcb->snd_buf
* was increased and we are now permitted to send more.
*/
static err_t
{
DPRINTF2(("%s: pxtcp %p; pcb %p: +%d ACKed:"
" unacked %d, unsent %d, vacant %d\n",
if (/* __predict_false */ len == 0) {
/* we are notified to start pulling */
}
else {
/*
* Advance unacked index. Guest acknowledged the data, so it
* won't be needed again for potential retransmits.
*/
}
}
/* arrange for more inbound data */
if (!pxtcp->inbound_close) {
if (!pxtcp->inbound_pull) {
/* wake up producer, in case it has stopped polling for POLLIN */
#ifdef RT_OS_WINDOWS
/**
* We have't got enought room in ring buffer to read atm,
* but we don't want to lose notification from WSAW4ME when
* space would be available, so we reset event with empty recv
*/
#endif
}
else {
int stop_pollin; /* ignored */
if (nread < 0) {
DPRINTF0(("%s: sock %d: %R[sockerr]\n",
/*
* Since we are pulling, pxtcp is no longer registered
* with poll manager so we can kill it directly.
*/
return ERR_ABRT;
}
}
}
/* forward more data if we can */
if (!pxtcp->inbound_close_done) {
/*
* NB: we might have dissociated from a pcb that transitioned
* to LAST_ACK state, so don't refer to pcb below.
*/
}
/* have we got all the acks? */
{
char *buf;
DPRINTF(("%s: pxtcp %p; pcb %p; all data ACKed\n",
/* no more retransmits, so buf is not needed */
/* no more acks, so no more callbacks */
}
/*
* We may be the last callback for this pcb if we have also
* successfully forwarded inbound_close.
*/
}
return ERR_OK;
}
/**
* Callback from poll manager (pxtcp::msg_inpull) to switch
* pxtcp_pcb_sent() to actively pull the last bits of input. See
* POLLHUP comment in pxtcp_pmgr_pump().
*
* pxtcp::sock is deregistered from poll manager after this callback
* is scheduled.
*/
static void
pxtcp_pcb_pull_inbound(void *ctx)
{
return;
}
if (pxtcp->outbound_close_done) {
DPRINTF(("%s: pxtcp %p: pcb %p (deferred delete)\n",
}
else {
DPRINTF(("%s: pxtcp %p: pcb %p\n",
}
}
/**
* tcp_err() callback.
*
* pcb is not passed to this callback since it may be already
* deallocated by the stack, but we can't do anything useful with it
* anyway since connection is gone.
*/
static void
{
/*
* ERR_CLSD is special - it is reported here when:
*
* . guest has already half-closed
* . we send FIN to guest when external half-closes
* . guest acks that FIN
*
* Since connection is closed but receive has been already closed
* lwip can only report this via tcp_err. At this point the pcb
* is still alive, so we can peek at it if need be.
*
* The interesting twist is when the ACK from guest that akcs our
* FIN also acks some data. In this scenario lwip will NOT call
* tcp_sent() callback with the ACK for that last bit of data but
* instead will call tcp_err with ERR_CLSD right away. Since that
* ACK also acknowledges all the data, we should run some of
* pxtcp_pcb_sent() logic here.
*/
DPRINTF2(("ERR_CLSD: pxtcp %p; pcb %p:"
" pcb->acked %d;"
" unacked %d, unsent %d, vacant %d\n",
}
return;
}
DPRINTF0(("tcp_err: pxtcp=%p, error=%s\n",
if (pxtcp->deferred_delete) {
}
else {
}
}