lx/sys/lx_autofs.h

	lx_autofs.h revision 9acbbeaf2a1ffe5c14b244867d427714fab43c5c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#ifndef	_LX_AUTOFS_H
#define	_LX_AUTOFS_H

#pragma ident	"%Z%%M%	%I%	%E% SMI"

/*
 * The lx_autofs filesystem exists to emulate the Linux autofs filesystem
 * and provide support for the Linux "automount" automounter.
 *
 *
 *
 * +++ Linux automounter background.
 *
 * Linux has two automounters: "amd" and "automount"
 *
 * 1) "amd" is a userland NFS server.  It basically mounts an NFS filesystem
 * at an automount point, and it acts as the NFS server for the mount.  When
 * an access is done to that NFS filesystem, the access is redirected by the
 * kernel to the "amd" process via rpc.  "amd" then looks up any information
 * required to resolve the requests, mounts real NFS filesystems if
 * necessary, and returns.  "amd" has it's own strange configuration
 * mechanism that doesn't seem to be very compatabile with Solaris's network
 * based automounter map support.
 *
 * 2) "automount" is the other Linux automounter.  It utilizes a kernel
 * filesystem (autofs) to provide it's functionality.  Basically, it mounts
 * the autofs filesystem at any automounter controlled mount point.  This
 * filesystem then intercepts and redirects lookup operations (and only
 * lookup ops) to the userland automounter process via a pipe.  (The
 * pipe to the automounter is establised via mount options when the autofs
 * filesystem is mounted.)  When the automounter recieves a request via this
 * pipe, it does lookups to whatever backing store it's configured to use,
 * does mkdir operations on the autofs filesystem, mounts remote NFS
 * filesystems on any leaf directories it just created, and signals the
 * autofs filesystem via an ioctl to let it know that the lookup can
 * continue.
 *
 *
 *
 * +++ Linux autofs (and automount daemon) notes
 *
 * Since we're mimicking the behavior of the Linux autofs filesystem it's
 * important to document some of it's observed behavior here since there's
 * no doubt that in the future this behavior will change.  These comments
 * apply to the behavior of the automounter as observed on a system
 * running Linux v2.4.21 (autofs is bundled with the Linux kernel).
 *
 * A) Autofs allows root owned, non-automounter processes to create
 * directories in the autofs filesystem.  The autofs filesystem treats the
 * automounter's process group as special, but it doesn't prevent root
 * processes outside of the automounter's process group from creating new
 * directories in the autofs filesystem.
 *
 * B) Autofs doesn't allow creation of any non-directory entries in the
 * autofs filesystem.  No entity can create files (e.g. /bin/touch or
 * VOP_CREATE/VOP_SYMLINK/etc.)  The only entries that can exist within
 * the autofs filesystem are directories.
 *
 * C) Autofs only intercepts vop lookup operations.  Notably, it does _not_
 * intercept and re-direct vop readdir operations.  This means that the
 * observed behavior of the Linux automounter can be considerably different
 * from that of the Solaris automounter.  Specifically, on Solaris if autofs
 * mount point is mounted _without_ the -nobrowse option then if a user does
 * an ls operation (which translates into a vop readdir operation) then the
 * automounter will intercept that operation and list all the possible
 * directories and mount points without actually mounting any filesystems.
 * Essentially, all automounter managed mount points on Linux will behave
 * like "-nobrowse" mount points on Solaris.  Here's an example to
 * illustrate this.  If /ws was mounted on Solaris without the -nobrowse
 * option and an auto_ws yp map was setup as the backing store for this
 * mount point, then an "ls /ws" would list all the keys in the map as
 * valid directories, but an "ls /ws" on Linux would list an emptry
 * directory.
 *
 * D) NFS mounts are performed by the automount process.  When the automount
 * process gets a redirected lookup request, it determines _all_ the
 * possible remote mount points for that request, creates directory paths
 * via mkdir, and mounts the remote filesystems on the newly created paths.
 * So for example, if a machine called mcescher exported /var/crash and
 * /var/core, an "ls /net/mcescher" would result in the following actions
 * being done by the automounter:
 * 	mkdir /net/mcescher
 * 	mkdir /net/mcescher/var
 * 	mkdir /net/mcescher/var/crash
 * 	mkdir /net/mcescher/var/core
 * 	mount mcescher:/var/crash /var/crash
 * 	mount mcescher:/var/crash /var/core
 * once the automounter compleated the work above it would signal the autofs
 * filesystem (via an ioctl) that the lookup could continue.
 *
 * E.1) Autofs only redirects vop lookup operations for path entries that
 * don't already exist in the autofs filesystem.  So for the example above,
 * an initial (after the start of the automounter) "ls /net/mcescher" would
 * result in a request to the automounter.  A subsequest "ls /net/mcescher"
 * would not result in a request to the automounter.  Even if
 * /net/mcescher/var/crash and /net/mcescher/var/core were manually unmounted
 * after the initial "ls /net/mcescher", a subsequest "ls /net/mcescher"
 * would not result in a new request to the automounter.
 *
 * E.2) Autofs lookup requests that are sent to the automounter only include
 * the root directory path component.  So for example, after starting up
 * the automounter if a user were to do a "ls /net/mcescher/var/crash", the
 * lookup request actually sent to the automounter would just be for
 * "mcescher".  (The same request as if the user had done "ls /net/mcescher".)
 *
 * E.3) The two statements above aren't entirely entirely true.  The Linux
 * autofs filesystem will also redirect lookup operations for leaf
 * directories that don't have a filesystem mounted on them.  Using the
 * example above, if a user did a "ls /net/mcescher", then manually
 * unmounted /net/mcescher/var/crash, and then did an "ls
 * /net/mcescher/var/crash", this would result in a request for
 * "mcescher/var/crash" being sent to the automounter.  The strange thing
 * (a Linux bug perhaps) is that the automounter won't do anything with this
 * request and the lookup will fail.
 *
 * F) The autofs filesystem communication protocol (what ioctls it supports
 * and what data it passes to the automount process) are versioned.  The
 * source for the userland automount daemon (i looked at version v3.1.7)
 * seemed to support two versions of the Linux kernel autofs implementation.
 * Both versions supported communiciation with a pipe and the format of the
 * structure passed via this pipe was the same.  The difference between the
 * two versions was in the functionality supported.  (The v3 version has
 * additional ioctls to support automount timeouts.)
 *
 *
 *
 * +++ lx_autofs notes
 *
 * 1) In general, the lx_autofs filesystem tries to mimic the behavior of the
 * Linux autofs filesystem with the following exceptions:
 *
 * 	1.1) We don't bother to implement the E.3 functionality listed above
 * 	since it doesn't appear to be of any use.
 *
 * 	1.2) We only implement v2 of the automounter protocol since
 * 	implementing v3 would take a _lot_ more work.  If this proves to be a
 * 	problem we can re-visit this decision later.  (More details about v3
 * 	support are included in comments below.)
 *
 * 2) In general, the approach taken for lx_autofs is to keep it as simple
 * as possible and to minimize it's memory usage.  To do this all information
 * about the contents of the lx_autofs filesystem are mirrored in the
 * underlying filesystem that lx_autofs is mounted on and most vop operations
 * are simply passed onto this underlying filesystem.  This means we don't
 * have to implement most the complex operations that a full filesystem
 * normally has to implement.  It also means that most of our filesystem state
 * (wrt the contents of the filesystem) doesn't actually have to be stored
 * in memory, we can simply go to the underlying filesystem to get it when
 * it's requested.  For the purposes of discussion, we'll call the underlying
 * filesystem the "backing store."
 *
 * The backing store is actually directory called ".lx_afs" which is created in
 * the directory where the lx_autofs filesystem is mounted.  When the lx_autofs
 * filesystem is unmounted this backing store directory is deleted.  If this
 * directory exists at mount time (perhaps the system crashed while a previous
 * lx_autofs instance was mounted at the same location) it will be deleted.
 * There are a few implications of using a backing store worth mentioning.
 *
 * 	2.1) lx_autofs can't be mounted on a read only filesystem.  If this
 * 	proves to be a problem we can probably move the location of the
 * 	backing store.
 *
 * 	2.2) If the backing store filesystem runs out of space then the
 * 	automounter process won't be able to create more directories and mount
 * 	new filesystems.  Of course, strange failures usually happen when
 * 	filesystems run out of space.
 *
 * 3) Why aren't we using gfs?  gfs has two different usage models.
 *
 * 	3.1) I'm my own filesystem but i'm using gfs to help with managing
 * 	readdir operations.
 *
 * 	3.2) I'm a gfs filesystem and gfs is managing all my vnodes
 *
 * We're not using the 3.1 interfaces because we don't implement readdir
 * ourselves.  We pass all readdir operations onto the backing store
 * filesystem and utilize its readdir implementation.
 *
 * We're not using the 3.2 interfaces because they are really designed for
 * in memory filesystems where all of the filesystem state is stored in
 * memory.  They don't lend themselves to filesystems where part of the
 * state is in memory and part of the state is on disk.
 *
 * For more information on gfs take a look at the block comments in the
 * top of gfs.c
 */

#ifdef	__cplusplus
extern "C" {
#endif

/*
 * Note that the name of the actual Solaris filesystem is lx_afs and not
 * lx_autofs.  This is becase filesystem names are stupidly limited to 8
 * characters.
 */
#define	LX_AUTOFS_NAME			"lx_afs"

/*
 * Mount options supported.
 */
#define	LX_MNTOPT_FD			"fd"
#define	LX_MNTOPT_PGRP			"pgrp"
#define	LX_MNTOPT_MINPROTO		"minproto"
#define	LX_MNTOPT_MAXPROTO		"maxproto"

/* Version of the Linux kernel automount protocol we support. */
#define	LX_AUTOFS_PROTO_VERSION		2

/*
 * Command structure sent to automount process from lx_autofs via a pipe.
 * This structure is the same for v2 and v3 of the automount protocol
 * (the communication pipe is established at mount time).
 */
typedef struct lx_autofs_pkt {
	int	lap_protover;	/* protocol version number */
	int	lap_constant;	/* always set to 0 */
	int	lap_id;		/* every pkt must have a unique id */
	int	lap_name_len;	/* don't include newline or NULL */
	char	lap_name[256];	/* path component to lookup */
} lx_autofs_pkt_t;

/*
 * Ioctls supprted (v2 protocol).
 */
#define	LX_AUTOFS_IOC_READY		0x00009360 /* arg: int */
#define	LX_AUTOFS_IOC_FAIL		0x00009361 /* arg: int */
#define	LX_AUTOFS_IOC_CATATONIC		0x00009362 /* arg: <none> */

/*
 * Ioctls not supported (v3 protocol).
 *
 * Initially we're only going to support v2 of the Linux kernel automount
 * protocol.  This means that we don't support the following ioctls.
 *
 * 1) The protocol version ioctl (by not supporting it the automounter
 * will assume version 2).
 *
 * 2) Automounter timeout ioctls.  For v3 and later the automounter can
 * be started with a timeout option.  It will notify the filesystem of
 * this timeout and, if any automounter filesystem root directory entry
 * is not in use, it will notify the automounter via the LX_AUTOFS_IOC_EXPIRE
 * ioctl.  For example, if the timeout is 60 seconds, the Linux
 * automounter will use the LX_AUTOFS_IOC_EXPIRE ioctl to query for
 * timeouts more often than that.  (v3.1.7 of the automount daemon would
 * perform this ioctl every <timeout>/4 seconds.)  Then, if the autofs
 * filesystem will
 * report top level directories that aren't in use to the automounter
 * via this ioctl.  If /net was managed by the automounter and
 * there were the following mount points:
 *	/net/jurassic/var/crash
 *	/net/mcescher/var/crash
 * and no one was looking at any crash dumps on mcescher but someone
 * was analyzing a crash dump on jurassic, then after <timeout> seconds
 * had passed the autofs filesystem would let the automounter know that
 * "mcescher" could be unmounted.  (Note the granularity of notification
 * is directories in the root of the autofs filesystem.)  Here's two
 * ideas for how this functionality could be implemented on Solaris:
 *
 * 2.1) The easy incomplete way.  Don't do any in-use detection.  Simply
 * tell the automounter it can try to unmount the filesystem every time
 * the specified timeout passes.  If the filesystem is in use then the
 * unmount will fail.  This would break down for remote hosts with multiple
 * mounts.  For example, if the automounter had mounted the following
 * filesystems:
 *	/net/jurassic/var/crash
 *	/net/jurassic/var/core
 * and the user was looking at a core file, and the timeout expired, the
 * automounter would recieve notification to unmount "jurassic".  Then
 * it would unmount crash (which would succeed) and then to try unmount
 * core (which would fail).  After that (since the automounter only
 * performs mounts for failed lookups in the root autofs directory)
 * future access to /net/jurassic/var/crash would result to access
 * to an empty autofs directory.  We might be able to work around
 * this by caching which root autofs directories we've timed out,
 * then any access to paths that contain those directories could be
 * stalled and we could resend another request to the automounter.
 * This could work if the automounter ignores mount failures.
 *
 * 2.2) The hard correct way.  The real difficulty here is detecting
 * files in use on other filesystems (say NFS) that have been mounted
 * on top of autofs.  (Detecting in use autofs vnodes should be easy.)
 * to do this we would probably have to create a new brand op to intercept
 * mount/umount filesystem operations.  Then using this entry point we
 * could detect mounts of other filesystems on top of lx_autofs.  When
 * a successful mount finishes we would use the FEM (file event
 * monitoring) framework to push a module onto that filesystem and
 * intercept VOP operations that allocate/free vnodes in that filesystem.
 * (We would also then have to track mount operations on top of that
 * filesystem, etc.)  this would allow us to properly detect any
 * usage of subdirectories of an autofs directory.
 */
#define	LX_AUTOFS_IOC_PROTOVER		0x80049363 /* arg: int */
#define	LX_AUTOFS_IOC_EXPIRE		0x81109365 /* arg: lx_autofs_expire * */
#define	LX_AUTOFS_IOC_SETTIMEOUT	0xc0049364 /* arg: ulong_t */

typedef struct lx_autofs_expire {
	int	lap_protover;	/* protol version number */
	int	lap_constant;	/* always set to 1 */
	int	lap_name_len;	/* don't include newline or NULL */
	char	lap_name[256];	/* path component that has timed out */
} lx_autofs_expire_t;

#ifdef	__cplusplus
}
#endif

#endif	/* _LX_AUTOFS_H */