smb_oplock.c revision c8ec8eea9849cac239663c46be8a7f5d2ba7ca00
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "@(#)smb_oplock.c 1.5 08/08/07 SMI"
/*
* SMB Locking library functions.
*/
#include <smbsrv/smb_incl.h>
#include <smbsrv/smb_fsops.h>
/*
* Oplock functionality enable/disable
*/
/*
* Magic 0xFF 'S' 'M' 'B'
* smb_com a byte, the "first" command
* Error a 4-byte union, ignored in a request
* smb_flg a one byte set of eight flags
* smb_flg2 a two byte set of 16 flags
* . twelve reserved bytes, have a role
* in connectionless transports (IPX, UDP?)
* smb_tid a 16-bit tree ID, a mount point sorta,
* 0xFFFF is this command does not have
* or require a tree context
* smb_pid a 16-bit process ID
* smb_uid a 16-bit user ID, specific to this "session"
* and mapped to a system (bona-fide) UID
* smb_mid a 16-bit multiplex ID, used to differentiate
* multiple simultaneous requests from the same
* process (pid) (ref RPC "xid")
*
* SMB_COM_LOCKING_ANDX allows both locking and/or unlocking of file range(s).
*
* Client Request Description
* ================================== =================================
*
* UCHAR WordCount; Count of parameter words = 8
* UCHAR AndXCommand; Secondary (X) command; 0xFF = none
* UCHAR AndXReserved; Reserved (must be 0)
* USHORT AndXOffset; Offset to next command WordCount
* USHORT Fid; File handle
* UCHAR LockType; See LockType table below
* UCHAR OplockLevel; The new oplock level
* ULONG Timeout; Milliseconds to wait for unlock
* USHORT NumberOfUnlocks; Num. unlock range structs following
* USHORT NumberOfLocks; Num. lock range structs following
* USHORT ByteCount; Count of data bytes
* LOCKING_ANDX_RANGE Unlocks[]; Unlock ranges
* LOCKING_ANDX_RANGE Locks[]; Lock ranges
*
* LockType Flag Name Value Description
* ============================ ===== ================================
*
* LOCKING_ANDX_SHARED_LOCK 0x01 Read-only lock
* LOCKING_ANDX_OPLOCK_RELEASE 0x02 Oplock break notification
* LOCKING_ANDX_CHANGE_LOCKTYPE 0x04 Change lock type
* LOCKING_ANDX_CANCEL_LOCK 0x08 Cancel outstanding request
* LOCKING_ANDX_LARGE_FILES 0x10 Large file locking format
*
* LOCKING_ANDX_RANGE Format
* =====================================================================
*
* USHORT Pid; PID of process "owning" lock
* ULONG Offset; Offset to bytes to [un]lock
* ULONG Length; Number of bytes to [un]lock
*
* Large File LOCKING_ANDX_RANGE Format
* =====================================================================
*
* USHORT Pid; PID of process "owning" lock
* USHORT Pad; Pad to DWORD align (mbz)
* ULONG OffsetHigh; Offset to bytes to [un]lock
* (high)
* ULONG OffsetLow; Offset to bytes to [un]lock (low)
* ULONG LengthHigh; Number of bytes to [un]lock
* (high)
* ULONG LengthLow; Number of bytes to [un]lock (low)
*
* Server Response Description
* ================================== =================================
*
* UCHAR WordCount; Count of parameter words = 2
* UCHAR AndXCommand; Secondary (X) command; 0xFF =
* none
* UCHAR AndXReserved; Reserved (must be 0)
* USHORT AndXOffset; Offset to next command WordCount
* USHORT ByteCount; Count of data bytes = 0
*
*/
/*
* smb_oplock_acquire
*
* Attempt to acquire an oplock. Note that the oplock granted may be
* none, i.e. the oplock was not granted.
*
* Grant an oplock to the requestor if this session is the only one
* that has the file open, regardless of the number of instances of
* the file opened by this session.
*
* However, if there is no oplock on this file and there is already
* at least one open, we will not grant an oplock, even if the only
* existing opens are from the same client. This is "server discretion."
*
* An oplock may need to be broken in order for one to be granted, and
* depending on what action is taken by the other client (unlock or close),
* an oplock may or may not be granted. (The breaking of an oplock is
* done earlier in the calling path.)
*
* XXX: Node synchronization is not yet implemented. However, racing
* opens are handled thus:
*
* A racing oplock acquire can happen in the open path between
* smb_oplock_break() and smb_fsop_open(), but no later. (Once
* the file is open via smb_fsop_open()/VOP_OPEN,
* smb_fsop_oplock_install() will not be able to install an oplock,
* which requires an open count of 1.)
*
* Hence, we can safely break any oplock that came in after the
* smb_oplock_break() done previously in the open path, knowing that
* no other racing oplock acquisitions should be able to succeed
* because we already have the file open (see above).
*
* The type of oplock being requested is passed in op->my_flags. The result
* is also returned in op->my_flags.
*
* (Note that exclusive and batch oplocks are treated interchangeably.)
*
* The Returns NT status codes:
* NT_STATUS_SUCCESS
* NT_STATUS_CONNECTION_DISCONNECTED
*/
DWORD
smb_oplock_acquire(
smb_request_t *sr,
smb_ofile_t *of,
struct open_param *op)
{
smb_node_t *node;
unsigned int level;
ASSERT(sr);
ASSERT(of);
ASSERT(op);
ASSERT(op->fqi.last_attr.sa_vattr.va_type == VREG);
level = op->my_flags & MYF_OPLOCK_MASK;
op->my_flags &= ~MYF_OPLOCK_MASK;
if ((sr->sr_cfg->skc_oplock_enable == 0) ||
smb_tree_has_feature(of->f_tree, SMB_TREE_NO_OPLOCKS))
return (NT_STATUS_SUCCESS);
if (!((MYF_IS_EXCLUSIVE_OPLOCK(level)) ||
(MYF_IS_BATCH_OPLOCK(level))))
return (NT_STATUS_SUCCESS);
node = of->f_node;
smb_rwx_rwenter(&node->n_lock, RW_WRITER);
if (EXCLUSIVE_OPLOCK_IN_FORCE(node) ||
BATCH_OPLOCK_IN_FORCE(node)) {
smb_rwx_rwexit(&node->n_lock);
if (SMB_SAME_SESSION(sr->session,
node->n_oplock.op_ofile->f_session)) {
op->my_flags |= level;
return (NT_STATUS_SUCCESS);
} else if (SMB_ATTR_ONLY_OPEN(op)) {
ASSERT(!(op->my_flags & MYF_OPLOCK_MASK));
return (NT_STATUS_SUCCESS);
}
smb_oplock_break(node);
smb_rwx_rwenter(&node->n_lock, RW_WRITER);
}
if (smb_fsop_oplock_install(node, of->f_mode) != 0) {
smb_rwx_rwexit(&node->n_lock);
return (NT_STATUS_SUCCESS);
}
node->n_oplock.op_ofile = of;
node->n_oplock.op_ipaddr = sr->session->ipaddr;
node->n_oplock.op_kid = sr->session->s_kid;
node->flags &= ~NODE_OPLOCKS_IN_FORCE;
if (MYF_IS_EXCLUSIVE_OPLOCK(level))
node->flags |= NODE_EXCLUSIVE_OPLOCK;
if (MYF_IS_BATCH_OPLOCK(level))
node->flags |= NODE_BATCH_OPLOCK;
op->my_flags |= level;
smb_rwx_rwexit(&node->n_lock);
return (NT_STATUS_SUCCESS);
}
/*
* smb_oplock_break
*
* The oplock break may succeed for multiple reasons: file close, oplock
* release, holder connection dropped, requesting client disconnect etc.
* Whatever the reason, the oplock should be broken when this function
* returns. The exceptions are when the client making this request gets
* disconnected or when another client is handling the break and it gets
* disconnected.
*
* Returns NT status codes:
* NT_STATUS_SUCCESS No oplock in force, i.e. the
* oplock has been broken.
* NT_STATUS_CONNECTION_DISCONNECTED Requesting client disconnected.
* NT_STATUS_INTERNAL_ERROR
*/
void
smb_oplock_break(smb_node_t *node)
{
smb_session_t *oplock_session;
smb_ofile_t *oplock_ofile;
struct mbuf_chain mbc;
int retries = 0;
clock_t elapsed_time;
clock_t max_time;
boolean_t flag;
smb_rwx_rwenter(&node->n_lock, RW_WRITER);
if (!OPLOCKS_IN_FORCE(node)) {
smb_rwx_rwexit(&node->n_lock);
return;
}
if (node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) {
elapsed_time = 0;
max_time = MSEC_TO_TICK(smb_oplock_timeout * OPLOCK_RETRIES);
/*
* Another client is already attempting to break the oplock.
* We wait for it to finish. If the caller was trying to
* acquire an oplock, he should retry in case the client's
* connection was dropped while trying to break the oplock.
*
* If the holder's connection has been dropped, we yield to
* allow the thread handling the break to detect it and set
* the flags.
*/
while ((node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) &&
(elapsed_time < max_time)) {
clock_t timeleft;
timeleft = smb_rwx_rwwait(&node->n_lock, max_time);
if (timeleft == -1) {
elapsed_time = max_time;
} else {
elapsed_time += max_time - timeleft;
}
}
/*
* If there are no oplocks in force we're done.
*/
if (!OPLOCKS_IN_FORCE(node)) {
smb_rwx_rwexit(&node->n_lock);
return;
} else {
/*
* This is an anomalous condition.
* Cancel/release the oplock.
*/
smb_oplock_release(node, B_TRUE);
smb_rwx_rwexit(&node->n_lock);
return;
}
}
oplock_ofile = node->n_oplock.op_ofile;
ASSERT(oplock_ofile);
oplock_session = oplock_ofile->f_session;
ASSERT(oplock_session);
/*
* Start oplock break.
*/
node->n_oplock.op_flags |= OPLOCK_FLAG_BREAKING;
smb_rwx_rwexit(&node->n_lock);
max_time = MSEC_TO_TICK(smb_oplock_timeout);
do {
MBC_INIT(&mbc, MLEN);
(void) smb_mbc_encodef(&mbc, "Mb19.wwwwbb3.ww10.",
SMB_COM_LOCKING_ANDX, oplock_ofile->f_tree->t_tid,
0xffff, 0, 0xffff, 8, 0xff, oplock_ofile->f_fid,
LOCKING_ANDX_OPLOCK_RELEASE);
flag = B_TRUE;
smb_rwx_rwenter(&oplock_session->s_lock, RW_WRITER);
while (flag) {
switch (oplock_session->s_state) {
case SMB_SESSION_STATE_DISCONNECTED:
case SMB_SESSION_STATE_TERMINATED:
smb_rwx_rwexit(&oplock_session->s_lock);
smb_rwx_rwenter(&node->n_lock, RW_WRITER);
node->flags &= ~NODE_OPLOCKS_IN_FORCE;
node->n_oplock.op_flags &=
~OPLOCK_FLAG_BREAKING;
node->n_oplock.op_ofile = NULL;
node->n_oplock.op_ipaddr = 0;
node->n_oplock.op_kid = 0;
smb_rwx_rwexit(&node->n_lock);
return;
case SMB_SESSION_STATE_OPLOCK_BREAKING:
flag = B_FALSE;
break;
case SMB_SESSION_STATE_NEGOTIATED:
oplock_session->s_state =
SMB_SESSION_STATE_OPLOCK_BREAKING;
flag = B_FALSE;
break;
default:
(void) smb_rwx_rwwait(&oplock_session->s_lock,
-1);
break;
}
}
smb_rwx_rwexit(&oplock_session->s_lock);
(void) smb_session_send(oplock_session, 0, &mbc);
elapsed_time = 0;
smb_rwx_rwenter(&node->n_lock, RW_WRITER);
while ((node->n_oplock.op_flags & OPLOCK_FLAG_BREAKING) &&
(elapsed_time < max_time)) {
clock_t timeleft;
timeleft = smb_rwx_rwwait(&node->n_lock, max_time);
if (timeleft == -1) {
elapsed_time = max_time;
} else {
elapsed_time += max_time - timeleft;
}
}
if (!OPLOCKS_IN_FORCE(node)) {
/*
* smb_oplock_release() was called
*/
smb_rwx_rwexit(&node->n_lock);
return;
}
} while (++retries < OPLOCK_RETRIES);
/*
* Retries exhausted and timed out.
* Cancel the oplock and continue.
*/
smb_oplock_release(node, B_TRUE);
smb_rwx_rwexit(&node->n_lock);
}
/*
* smb_oplock_release
*
* This function uninstalls the FEM oplock monitors and
* clears all flags in relation to an oplock on the
* given node.
*
* The function can be called with the node->n_lock held
* or not held.
*/
void /*ARGSUSED*/
smb_oplock_release(smb_node_t *node, boolean_t have_rwx)
{
if (!have_rwx)
smb_rwx_rwenter(&node->n_lock, RW_WRITER);
if (!OPLOCKS_IN_FORCE(node)) {
if (!have_rwx)
smb_rwx_rwexit(&node->n_lock);
return;
}
smb_fsop_oplock_uninstall(node);
node->flags &= ~NODE_OPLOCKS_IN_FORCE;
node->n_oplock.op_flags &= ~OPLOCK_FLAG_BREAKING;
node->n_oplock.op_ofile = NULL;
node->n_oplock.op_ipaddr = 0;
node->n_oplock.op_kid = 0;
if (!have_rwx)
smb_rwx_rwexit(&node->n_lock);
}
/*
* smb_oplock_conflict
*
* The two checks on "session" and "op" are primarily for the open path.
* Other CIFS functions may call smb_oplock_conflict() with a session
* pointer so as to do the session check.
*/
boolean_t
smb_oplock_conflict(smb_node_t *node, smb_session_t *session,
struct open_param *op)
{
smb_session_t *oplock_session;
smb_ofile_t *oplock_ofile;
smb_rwx_rwenter(&node->n_lock, RW_READER);
if (!OPLOCKS_IN_FORCE(node)) {
smb_rwx_rwexit(&node->n_lock);
return (B_FALSE);
}
oplock_ofile = node->n_oplock.op_ofile;
ASSERT(oplock_ofile);
oplock_session = oplock_ofile->f_session;
ASSERT(oplock_session);
if (SMB_SAME_SESSION(session, oplock_session)) {
smb_rwx_rwexit(&node->n_lock);
return (B_FALSE);
}
if (SMB_ATTR_ONLY_OPEN(op)) {
smb_rwx_rwexit(&node->n_lock);
return (B_FALSE);
}
smb_rwx_rwexit(&node->n_lock);
return (B_TRUE);
}