rpcmod.c revision 2695d4f4d1e2a6022c8a279d40c3cb750964974d
#
endif /* _SYSCALL32_IMPL */ * Could not install module, cleanup previous * Load up the RDMA plugins and initialize the stats. Even if the * plugins loadup fails, but rpcmod was successfully installed the * counters still get initialized. * Get our identification into ldi. This is used for loading * other modules, e.g. rpcib. * The unload entry point fails, because we advertise entry points into * rpcmod from the rest of kRPC: rpcmod_release(). * To save instructions, since STREAMS ignores the return value * from these functions, they are defined as void here. Kind of icky, but... * The write put procedure is simply putnext to conserve stack space. * The write service procedure is not used to queue data, but instead to * synchronize with flow control. * Read side has no service procedure. * Per rpcmod "slot" data structure. q->q_ptr points to one of these. int rm_type;
/* Client or server side stream */ #
define RM_CLOSING 0x1 /* somebody is trying to close slot */ /* must be first in the structure. */ int mir_type;
/* Client or server side stream */ * mir_head_mp points the first mblk being collected in * the current RPC message. Record headers are removed * before data is linked into mir_head_mp. * mir_tail_mp points to the last mblk in the message * chain starting at mir_head_mp. It is only valid * if mir_head_mp is non-NULL and is used to add new * data blocks to the end of chain quickly. * mir_frag_len starts at -4 for beginning of each fragment. * When this length is negative, it indicates the number of * bytes that rpcmod needs to complete the record marker * header. When it is positive or zero, it holds the number * of bytes that have arrived for the current fragment and * are held in mir_header_mp. * Fragment header as collected for the current fragment. * It holds the last-fragment indicator and the number * of bytes in the fragment. /* side until outbound flow control */ * On client streams, mir_clntreq is 0 or 1; it is set * to 1 whenever a new request is sent out (mir_wput) * and cleared when the timer fires (mir_timer). If * the timer fires with this value equal to 0, then the * stream is considered idle and kRPC is notified. * On server streams, stop accepting messages * This value is copied from clnt_idle_timeout or * svc_idle_timeout during the appropriate ioctl. * This value is set to lbolt * every time a client stream sends or receives data. * Even if the timer message arrives, we don't shutdown * lbolt >= MSEC_TO_TICK(mir_idle_timeout)+mir_use_timestamp. * This value is kept in HZ. * This pointer is set to &clnt_max_msg_size or * &svc_max_msg_size during the appropriate ioctl. /* Server-side fields. */ int mir_ref_cnt;
/* Reference count: server side only */ /* counts the number of references */ /* that a kernel RPC server thread */ /* (see svc_run()) has on this rpcmod */ /* slot. Effectively, it is the */ /* number of unprocessed messages */ /* that have been passed up to the */ * these fields are for both client and server, but for debugging, * it is easier to have these last in the structure. * Not an info-ack, so free it. This is ok because we should * not be receiving data until the open finishes: rpcmod * is pushed well before the end-point is bound to an address. "rpcmodopen_end:(%s)",
"q->qptr");
* Allocate the required messages upfront. * rpcmodopen - open routine gets called when the module gets pushed * Initialize entry points to release a rpcmod slot (and an input * message if supplied) and to send an output message to the module * Only sufficiently privileged users can use this module, and it * is assumed that they will use this module properly, and NOT send * bulk data from downstream. * Allocate slot data structure. * slot type will be set by kRPC client and server ioctl's * rpcmodclose - This routine gets called when the module gets popped * Mark our state as closing. * Check and see if there are any messages on the queue. If so, send * the messages, regardless whether the downstream module is ready to * call into SVC to clean the queue * Block while there are kRPC threads with a reference * It is now safe to remove this queue from the stream. No kRPC * threads have a reference to the stream, and none ever will, * because RM_CLOSING is set. /* Notify kRPC that this stream is going away. */ * rpcmodrput - Module read put procedure. This is called from * the module, driver, or stream head downstream. * Forward this message to kRPC if it is data. * Check if the module is being popped. * Make sure the header is sane. * Call clnt_clts_dispatch_notify, so that it * can pass the message to the proper caller. * Don't discard the header just yet since the * client may need the sender's address. * rm_krpc_cell is exclusively used by the kRPC * CLTS server. Try to submit the message to * kRPC. Since this is an unreliable channel, we * can just free the message in case the kRPC * does not accept new messages. * Raise the reference count on this * module to prevent it from being * popped before kRPC generates the }
/* end switch(rmp->rm_type) */ * Make sure the header is sane * In the case where a unit data error has been * received, all we need to do is clear the message from RPCLOG(
32,
"rpcmodrput: unitdata error received at " }
/* end else if (pptr->type == T_UDERROR_IND) */ }
/* end switch (mp->b_datap->db_type) */ * Return codes are not looked at by the STREAMS framework. * Check to see if we can send the message downstream. * The first canputnext failed. Try again except this time with the * lock held, so that we can check the state of the stream to see if * it is closing. If either of these conditions evaluate to true * canputnext failed again and the stream is not closing. * Place the message on the queue and let the service * procedure handle the message. * pass the ioctl downstream and hope someone * down there knows how to handle it. * This is something we definitely do not know how to handle, just * pass the message downstream * Module write service procedure. This is called by downstream modules * for back enabling during flow control. * Get messages that may be queued and send them down stream * Optimize the service procedure for the server-side, by * avoiding a call to canputnext(). * For now, just free the message. * This part of rpcmod is pushed on a connection-oriented transport for use * by RPC. It serves to bypass the Stream head, implements * the record marking protocol, and dispatches incoming RPC messages. /* Default idle timer values */ * Don't block service procedure (and mir_close) if * we are in the process of closing. * Timeout for subsequent notifications of idle connection. This is * typically used to clean up after a wedged orderly release. * Since the mir_mutex lock needs to be released to call * untimeout(), we need to make sure that no other thread * can start/stop the timer (changing mir_timer_id) during * that time. The mir_timer_call bit and the mir_timer_cv * condition variable are used to synchronize this. Setting * mir_timer_call also tells mir_timer() (refer to the comments * in mir_timer()) that it does not need to do anything. /* Only start the timer when it is not closing. */ * This loop is a bit tacky -- it walks the STREAMS list of * flow-controlled messages. RPCLOG(
32,
"rpcmod: mir_close of q 0x%p\n", (
void *)q);
* Set mir_closing so we get notified when MIR_SVC_QUIESCED() * is TRUE. And mir_timer_start() won't start the timer again. * This will prevent more requests from arriving and * will force rpcmod to ignore flow control. * call into SVC to clean the queue * Bugid 1253810 - Force the write service * procedure to send its messages, regardless * whether the downstream module is ready /* Notify kRPC that this stream is going away. */ * This is server side only (RPC_SERVER). RPCLOG(
16,
"rpcmod: mir_svc_idle_stop of q 0x%p\n", (
void *)q);
* This is server side only (RPC_SERVER). * Start idle processing, which will include setting idle timer if the * stream is not being closed. RPCLOG(
16,
"rpcmod: mir_svc_idle_start q 0x%p\n", (
void *)q);
* Don't re-start idle timer if we are closing queues. RPCLOG(
16,
"mir_svc_idle_start - closing: 0x%p\n",
* We will call mir_svc_idle_start() whenever MIR_SVC_QUIESCED() * is true. When it is true, and we are in the process of * closing the stream, signal any thread waiting in RPCLOG(
16,
"mir_svc_idle_start - reset %s timer\n",
* Normal condition, start the idle timer. If an orderly * release has been sent, set the timeout to wait for the * client to close its side of the connection. Otherwise, * use the normal idle timeout. RPCLOG(
32,
"rpcmod: mir_open of q 0x%p\n", (
void *)q);
/* Set variables used directly by kRPC. */ /* Allocate a zero'ed out mir structure for this stream. */ * We set hold inbound here so that incoming messages will * be held on the read-side queue until the stream is completely * initialized with a RPC_CLIENT or RPC_SERVER ioctl. During * the ioctl processing, the flag is cleared and any messages that * arrived between the open and the ioctl are delivered to kRPC. * Early data should never arrive on a client stream since * servers only respond to our requests and we do not send any. * until after the stream is initialized. Early data is * very common on a server stream where the client will start * sending data as soon as the connection is made (and this * is especially true with TCP where the protocol accepts the * connection before nfsd or kRPC is notified about it). * Start the record marker looking for a 4-byte header. When * this length is negative, it indicates that rpcmod is looking * for bytes to consume for the record marker header. When it * is positive, it holds the number of bytes that have arrived * for the current fragment and are being held in mir_header_mp. * We noenable the read-side queue because we don't want it * automatically enabled by putq. We enable it explicitly * in mir_wsrv when appropriate. (See additional comments on * flow control at the beginning of mir_rsrv.) * Read-side put routine for both the client and server side. Does the * record marking for incoming RPC messages, and when complete, dispatches * the message to either the client or server. * If the stream has not been set up as a RPC_CLIENT or RPC_SERVER * with the corresponding ioctl, then don't accept * any inbound data. This should never happen for streams * created by nfsd or client-side kRPC because they are careful * to set the mode of the stream before doing anything else. RPCLOG(
1,
"mir_rput: runt TPI message (%d bytes)\n",
/* Throw away the T_DATA_IND block and continue with data. */ * If a module on the stream is trying set the Stream head's * high water mark, then set our hiwater to the requested * value. We are the "stream head" for all inbound * data messages since messages are passed directly to kRPC. RPCLOG(
32,
"on q 0x%p\n", (
void *)q);
* If this connection is closing, don't accept any new messages. /* Get local copies for quicker access. */ /* Loop, processing each message block in the mp chain separately. */ * Drop zero-length mblks to prevent unbounded kernel memory * If frag_len is negative, we're still in the process of * building frag_header -- try to complete it with this mblk. * We consumed this mblk while trying to complete the * fragment header. Free it and move on. * Now frag_header has the number of bytes in this fragment * and we're just waiting to collect them all. Chain our * latest mblk onto the list and see if we now have enough * bytes to complete the fragment. * We still haven't received enough data to complete * the fragment, so continue on to the next mblk. * We've got a complete fragment. If there are excess bytes, * then they're part of the next fragment's header (of either * this RPC message or the next RPC message). Split that part * into its own mblk so that we can safely freeb() it when * building frag_header above. * Relink the message chain so that the next mblk is * the next fragment header, followed by the rest of * Data in the new mblk begins at the next fragment, * and data in the old mblk ends at the next fragment. * Reset frag_len and frag_header for the next fragment. * The current fragment is complete, but more * fragments need to be processed before we can * pass along the RPC message headed at head_mp. * We've got a complete RPC message; pass it to the * Mark this stream as active. This marker * is used in mir_timer(). * Check for flow control before passing the * If the reference count is 0 * request), then the stream is * transitioning from idle to * non-idle. In this case, we * Count # of times this happens. Should * be never, but experience shows * If the outbound side of the stream is * flow controlled, then hold this message * until client catches up. mir_hold_inbound * is set in mir_wput and cleared in mir_wsrv. RPCLOG(
1,
"mir_rput: unknown mir_type %d\n",
* Reset the chain since we're starting on a new RPC message. * Sanity check the message length; if it's too large mir_check_len() * will shutdown the connection, drop mir_mutex, and return non-zero. /* Save our local copies back in the mir structure. */ * The timer is stopped after the whole message chain is processed. * The reason is that stopping the timer releases the mir_mutex * lock temporarily. This means that the request can be serviced * while we are still processing the message chain. This is not * good. So we stop the timer here instead. * Note that if the timer fires before we stop it, it will not * do any harm as MIR_SVC_QUIESCED() is false and mir_timer() RPCLOG(
16,
"mir_rput: stopping idle timer on 0x%p because " "ref cnt going to non zero\n", (
void *)
WR(q));
* We are disconnecting, but not necessarily * closing. By not closing, we will fail to * pick up a possibly changed global timeout value, * unless we store it now. * Even though we are unconnected, we still * leave the idle timer going on the client. The * reason for is that if we've disconnected due * to a server-side disconnect, reset, or connection * timeout, there is a possibility the client may * retry the RPC request. This retry needs to done on * the same bound address for the server to interpret * it as such. However, we don't want * to wait forever for that possibility. If the * end-point stays unconnected for mir_idle_timeout * units of time, then that is a signal to the * connection manager to give up waiting for the * application (eg. NFS) to send a retry. RPCLOG(
1,
"mir_rput_proto T_ERROR_ACK for queue 0x%p",
RPCLOG(
1,
"mir_rput: unexpected message %d " * If this is a listening stream, then shut * mark this as a listen endpoint RPCLOG(
16,
"mir_rput_proto: got %s indication\n",
* For listen endpoint just pass * If client wants to break off connection, record * If we are idle, then send the orderly release * or disconnect indication to nfsd. RPCLOG(
16,
"mir_rput_proto: not idle, so " "upstream on 0x%p\n", (
void *)q);
* Hold the indication until we get idle * If there already is an indication stored, * replace it if the new one is a disconnect. The * reasoning is that disconnection takes less time * to process, and once a client decides to * disconnect, we should do that. RPCLOG(
16,
"mir_rput_proto: replacing" " indication with disconnect on" RPCLOG(
16,
"mir_rput_proto: already " "indication. freeing ord rel " "ind on 0x%p\n", (
void *)q);
/* nfsd handles server-side non-data messages. */ * The server-side read queues are used to hold inbound messages while * outbound flow control is exerted. When outbound flow control is * relieved, mir_wsrv qenables the read-side queue. Read-side queues * are not enabled by STREAMS and are explicitly noenable'ed in mir_open. * If we were idle, turn off idle timer * since we aren't idle any more. * Count # of times this happens. Should be * never, but experience shows otherwise. * The timer is stopped after all the messages are processed. * The reason is that stopping the timer releases the mir_mutex * lock temporarily. This means that the request can be serviced * while we are still processing the message queue. This is not * good. So we stop the timer here instead. RPCLOG(
16,
"mir_rsrv stopping idle timer on 0x%p because ref " "cnt going to non zero\n", (
void *)
WR(q));
RPCLOG(
16,
"mir_rsrv: line %d: sending a held " * Called to send an event code to nfsd/lockd so that it initiates * Create an M_DATA message with the event code and pass it to the * Stream head (nfsd or whoever created the stream will consume it). RPCLOG(
16,
"mir_svc_policy_notify: could not allocate event " * Server side: start the close phase. We want to get this rpcmod slot in an * idle state before mir_close() is called. * Do not accept any more messages. * Next two statements will make the read service procedure * free everything stuck in the streams read queue. * It's not necessary because enabling the write queue will * have the same effect, but why not speed the process along? * Meanwhile force the write service procedure to send the * responses downstream, regardless of flow control. * This routine is called directly by kRPC after a request is completed, * whether a reply was sent or the request was dropped. * Start idle processing if this is the last reference. RPCLOG(
16,
"mir_svc_release: sending a held " * Start idle processing if this is the last reference. RPCLOG(
16,
"mir_svc_release starting idle timer on 0x%p " "because ref cnt is zero\n", (
void *)
wq);
* Wake up the thread waiting to close. * This routine is called by server-side kRPC when it is ready to * handle inbound messages on the stream. * no longer need to take the mir_mutex because the * mir_setup_complete field has been moved out of * the binary field protected by the mir_mutex. * client side wrapper for stopping timer with normal idle timeout. * client side wrapper for stopping timer with normal idle timeout. * client side only. Forces rpcmod to stop sending T_ORDREL_REQs on * end-points that aren't connected. RPCLOG(
1,
"mir_clnt_idle_do_stop: wq 0x%p\n", (
void *)
wq);
* Timer handler. It handles idle timeout and memory shortage problem. * mir_timer_call is set only when either mir_timer_[start|stop] * is progressing. And mir_timer() can only be run while they * are progressing if the timer is being stopped. So just * For clients, the timer fires at clnt_idle_timeout * intervals. If the activity marker (mir_clntreq) is * zero, then the stream has been idle since the last * timer event and we notify kRPC. If mir_clntreq is * non-zero, then the stream is active and we just * restart the timer for another interval. mir_clntreq * is set to 1 in mir_wput for every request passed * If this was a memory shortage timer reset the idle * timeout regardless; the mir_clntreq will not be a * The timer is initially started in mir_wput during * RPC_CLIENT ioctl processing. * The timer interval can be changed for individual * streams with the ND variable "mir_idle_timeout". printf(
"mir_timer[%d < %d + %d]: reset client timer " printf(
"mir_timer[%d]: doing client timeout\n",
now /
hz);
* We are disconnecting, but not necessarily * closing. By not closing, we will fail to * pick up a possibly changed global timeout value, * unless we store it now. * We pass T_ORDREL_REQ as an integer value * to kRPC as the indication that the stream * is idle. This is not a T_ORDREL_REQ message, * it is just a convenient value since we call * the same kRPC routine for T_ORDREL_INDs and * For servers, the timer is only running when the stream * is really idle or memory is short. The timer is started * by mir_wput when mir_type is set to RPC_SERVER and * by mir_svc_idle_start whenever the stream goes idle * (mir_ref_cnt == 0). The timer is cancelled in * mir_rput whenever a new inbound request is passed to kRPC * and the stream was previously idle. * The timer interval can be changed for individual * streams with the ND variable "mir_idle_timeout". * If the stream is not idle do nothing. * If there is no packet queued up in read queue, the stream * is really idle so notify nfsd to close it. RPCLOG(
16,
"mir_timer: telling stream head listener " "to close stream (0x%p)\n", (
void *)
RD(
wq));
RPCLOG(
1,
"mir_timer: unexpected mir_type %d\n",
* Called by the RPC package to send either a call or a return, or a * transport connection request. Adds the record marking header. RPCLOG(
16,
"mir_wput wq 0x%p: got data after T_ORDREL_REQ\n",
/* Stick in the 4 byte record marking header. */ * Since we know that M_DATA messages are created exclusively * by kRPC, we expect that kRPC will leave room for our header * and 4 byte align which is normal for XDR. * If kRPC (or someone else) does not cooperate, then we * just throw away the message. RPCLOG(
1,
"mir_wput: kRPC did not leave space for record " "fragment header (%d bytes left)\n",
* For the client, set mir_clntreq to indicate that the * If we haven't already queued some data and the downstream module * can accept more data, send it on, otherwise we queue the message * and take other actions depending on mir_type. * Now we pass the RPC message downstream. * Check for a previous duplicate request on the * queue. If there is one, then we throw away * the current message and let the previous one * go through. If we can't find a duplicate, then * send this one. This tap dance is an effort * to reduce traffic and processing requirements * Set mir_hold_inbound so that new inbound RPC * messages will be held until the client catches * up on the earlier replies. This flag is cleared * in mir_wsrv after flow control is relieved; * the read-side queue is also enabled at that time. * Clear mir_hold_inbound which was set to 1 by * mir_open. This flag is not used on client * Start the idle timer. See mir_timer() for more * information on how client timers work. * We don't clear mir_hold_inbound here because * mir_hold_inbound is used in the flow control * model. If we cleared it here, then we'd commit * a small violation to the model where the transport * might immediately block downstream flow. * Start the idle timer. See mir_timer() for more * information on how server timers work. * Note that it is important to start the idle timer * here so that connections time out even if we * never receive any data on them. RPCLOG(
16,
"mir_wput_other starting idle timer on 0x%p " "because we got RPC_SERVER ioctl\n", (
void *)q);
* We are likely being called from the context of a * service procedure. So we need to enqueue. However * enqueing may put our message behind data messages. * So flush the data first. /* Don't pass T_DATA_REQ messages downstream. */ RPCLOG(
8,
"mir_wput_other wq 0x%p: got T_ORDREL_REQ\n",
* We are likely being called from * clnt_dispatch_notifyall(). Sending * a T_ORDREL_REQ will result in * a some kind of _IND message being sent, * will be another call to * clnt_dispatch_notifyall(). To keep the stack * lean, queue this message. * Mark the structure such that we don't accept any * more requests from client. We could defer this * until we actually send the orderly release * request downstream, but all that does is delay * the closing of this stream. RPCLOG(
16,
"mir_wput_other wq 0x%p: got T_ORDREL_REQ " " so calling mir_svc_start_close\n", (
void *)q);
* If we have sent down a T_ORDREL_REQ, don't send * If the stream is not idle, then we hold the * orderly release until it becomes idle. This * ensures that kRPC will be able to reply to * all requests that we have passed to it. * We also queue the request if there is data already * queued, because we cannot allow the T_ORDREL_REQ * to go before data. When we had a separate reply * count, this was not a problem, because the * reply count was reconciled when mir_wsrv() RPCLOG(
16,
"mir_wput_other: queuing " "T_ORDREL_REQ on 0x%p\n", (
void *)q);
* Mark the structure so that we know we sent * an orderly release request, and reset the idle timer. RPCLOG(
16,
"mir_wput_other: calling mir_svc_idle_start" " on 0x%p because we got T_ORDREL_REQ\n",
* When we break, we will putnext the T_ORDREL_REQ. * Restart timer in case mir_clnt_idle_do_stop() was * T_DISCON_REQ is one of the interesting default * cases here. Ideally, an M_FLUSH is done before * T_DISCON_REQ is done. However, that is somewhat * T_DISCON_REQ, and let the service procedure RPCLOG(
32,
"mir_wput_other: flushing " RPCLOG(
32,
"mir_wput_other: ignoring " "M_FLUSH on wq 0x%p\n", (
void *)q);
* Do not send any more data if we have sent * Make sure that the stream can really handle more * Now we pass the RPC message downstream. * This is not an RPC message, pass it downstream * (ignoring flow control) if the server side is not sending a * T_ORDREL_REQ downstream. * Don't send two T_ORDRELs * Mark the structure so that we know we sent an orderly * release request. We will check to see slot is idle at the * end of this routine, and if so, reset the idle timer to * handle orderly release timeouts. RPCLOG(
16,
"mir_wsrv: sending ordrel req on q 0x%p\n",
* Send the orderly release downstream. If there are other * pending replies we won't be able to send them. However, * the only reason we should send the orderly release is if * we were idle, or if an unusual event occurred. * If we call mir_svc_idle_start() below, then * clearing mir_inwservice here will also result in * any thread waiting in mir_close() to be signaled. * If idle we call mir_svc_idle_start to start the timer (or wakeup * a close). Also make sure not to start the idle timer on the * listener stream. This can cause nfsd to send an orderly release * command on the listener stream. RPCLOG(
16,
"mir_wsrv: calling mir_svc_idle_start on 0x%p " "because mir slot is idle\n", (
void *)q);
* If outbound flow control has been relieved, then allow new * inbound requests to be processed. * We are disconnecting, but not necessarily * closing. By not closing, we will fail to * pick up a possibly changed global timeout value, * unless we store it now. * T_DISCON_REQ is passed to kRPC as an integer value * (this is not a TPI message). It is used as a * convenient value to indicate a sanity check * failure -- the same kRPC routine is also called * for T_DISCON_INDs and T_ORDREL_INDs. RPCLOG(
16,
"mir_disconnect: telling " "stream head listener to disconnect stream " * Sanity check the message length, and if it's too large, shutdown the * connection. Returns 1 if the connection is shutdown; 0 otherwise. "kRPC: record fragment from %s of size(%d) exceeds " "maximum (%u). Disconnecting",