clone.c revision 9acbbeaf2a1ffe5c14b244867d427714fab43c5c
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#pragma ident "%Z%%M% %I% %E% SMI"
#include <assert.h>
#include <errno.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <ucontext.h>
#include <thread.h>
#include <strings.h>
#include <libintl.h>
#include <sys/inttypes.h>
#include <sys/segments.h>
#include <signal.h>
#include <sys/lx_types.h>
#include <sys/lx_signal.h>
#include <sys/lx_syscall.h>
#include <sys/lx_brand.h>
#include <sys/lx_debug.h>
#include <sys/lx_thread.h>
#define LX_CSIGNAL 0x000000ff
#define LX_CLONE_VM 0x00000100
#define LX_CLONE_FS 0x00000200
#define LX_CLONE_FILES 0x00000400
#define LX_CLONE_SIGHAND 0x00000800
#define LX_CLONE_PID 0x00001000
#define LX_CLONE_PTRACE 0x00002000
#define LX_CLONE_VFORK 0x00004000
#define LX_CLONE_PARENT 0x00008000
#define LX_CLONE_THREAD 0x00010000
#define LX_CLONE_SYSVSEM 0x00040000
#define LX_CLONE_SETTLS 0x00080000
#define LX_CLONE_PARENT_SETTID 0x00100000
#define LX_CLONE_CHILD_CLEARTID 0x00200000
#define LX_CLONE_DETACH 0x00400000
#define LX_CLONE_CHILD_SETTID 0x01000000
#define SHARED_AS \
#define LX_EXIT 1
#define LX_EXIT_GROUP 2
/*
* This is dicey. This seems to be an internal glibc structure, and not
* part of any external interface. Thus, it is subject to change without
* notice. FWIW, clone(2) itself seems to be an internal (or at least
* unstable) interface, since strace(1) shows it differently than the man
* page.
*/
struct lx_desc
{
};
struct clone_state {
void *c_retaddr; /* instr after clone()'s int80 */
int c_flags; /* flags to clone(2) */
int c_sig; /* signal to send on thread exit */
void *c_stk; /* %esp of new thread */
void *c_ptidp;
void *c_ctidp;
volatile int *c_clone_res; /* pid/error returned to cloner */
};
extern void lx_setup_clone(uintptr_t, void *, void *);
/*
* Counter incremented when we vfork(2) ourselves, and decremented when the
* vfork(2)ed child exit(2)s or exec(2)s.
*/
static int is_vforked = 0;
int
{
/*
* If we are a vfork(2)ed child, we need to exit as quickly and
* cleanly as possible to avoid corrupting our parent.
*/
if (is_vforked != 0) {
is_vforked--;
}
"%s: unable to read thread-specific data: %s"),
/*
* Block all signals in the exit context to avoid taking any signals
* (to the degree possible) while exiting.
*/
/*
* This thread is exiting. Restore the state of the thread to
* what it was before we started running linux code.
*/
/*
* If we returned from the setcontext(2), something is very wrong.
*/
/*NOTREACHED*/
return (0);
}
int
{
/*
* If we are a vfork(2)ed child, we need to exit as quickly and
* cleanly as possible to avoid corrupting our parent.
*/
if (is_vforked != 0) {
is_vforked--;
}
"%s: unable to read thread-specific data: %s"),
/*
* Block all signals in the exit context to avoid taking any signals
* (to the degree possible) while exiting.
*/
/*
* This thread is exiting. Restore the state of the thread to
* what it was before we started running linux code.
*/
/*
* If we returned from the setcontext(2), something is very wrong.
*/
/*NOTREACHED*/
return (0);
}
static void *
clone_start(void *arg)
{
int rval;
/*
* Let the kernel finish setting up all the needed state for this
* new thread.
*
* We already created the thread using the thr_create(3C) library
* call, so most of the work required to emulate lx_clone(2) has
* been done by the time we get to this point. Instead of creating
* a new brandsys(2) subcommand to perform the last few bits of
* bookkeeping, we just use the lx_clone() slot in the syscall
* table.
*/
lx_debug("\tre-vectoring to lx kernel module to complete lx_clone()");
lx_debug("\tLX_SYS_clone(0x%x, 0x%p, 0x%p, 0x%p, 0x%p)",
NULL);
/*
* At this point the parent is waiting for cs->c_clone_res to go
* non-zero to indicate the thread has been cloned. The value set
* in cs->c_clone_res will be used for the return value from
* clone().
*/
if (rval < 0) {
return (NULL);
}
"Unable to set affinity mask in child thread: %s"),
}
/* Initialize the thread specific data for this thread. */
/*
* Use the address of the stack-allocated lx_tsd as the
* per-thread storage area to cache various values for later
* use.
*
* This address is only used by this thread, so there is no
* danger of other threads using this storage area, nor of it
* being accessed once this stack frame has been freed.
*/
gettext("Unable to set thread-specific ptr for clone: %s"),
}
/*
* Save the current context of this thread.
*
* We'll restore this context when this thread attempts to exit.
*/
"Unable to initialize thread-specific exit context: %s"),
}
/*
* Do the final stack twiddling, reset %gs, and return to the
* clone(2) path.
*/
if (lx_tsd.lxtsd_exit == 0) {
"Unable to release held signals for child "
}
/*
* Let the parent know that the clone has (effectively) been
* completed.
*/
/* lx_setup_clone() should never return. */
assert(0);
}
/*
* We are here because the Linux application called the exit() or
* exit_group() system call. In turn the brand library did a
* setcontext() to jump to the thread context state saved in
* getcontext(), above.
*/
else
assert(0);
/*NOTREACHED*/
}
int
{
struct clone_state *cs;
volatile int clone_res;
int sig;
int rval;
if (flags & LX_CLONE_SETTLS) {
lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p ldt=0x%p "
} else {
lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p)",
}
/*
* Only supported for pid 0 on Linux
*/
if (flags & LX_CLONE_PID)
return (-EINVAL);
/*
* CLONE_THREAD require CLONE_SIGHAND. CLONE_THREAD and
* CLONE_DETACHED must both be either set or cleared.
*/
return (-EINVAL);
rp = lx_syscall_regs();
/* See if this is a fork() operation or a thr_create(). */
if (flags & LX_CLONE_PARENT) {
"clone(2) only supports CLONE_PARENT "
"for threads.\n"));
return (-ENOTSUP);
}
if (flags & LX_CLONE_PTRACE)
if (flags & LX_CLONE_VFORK) {
is_vforked++;
if (rval != 0)
is_vforked--;
} else {
}
/* Parent just returns */
if (rval != 0)
/*
* If provided, the child needs its new stack set up.
*/
if (cldstk)
return (0);
}
/*
* We have very restricted support.... only exactly these flags are
* supported
*/
"clone(2) requires that all or none of CLONE_VM "
"CLONE_FS, CLONE_FILES, and CLONE_SIGHAND be set.\n"));
return (-ENOTSUP);
}
"clone(2) requires the caller to allocate the "
"child's stack.\n"));
return (-ENOTSUP);
}
/*
* If we want a signal-on-exit, ensure that the signal is valid.
*/
"clone(2) passed unsupported signal: %d"), sig);
return (-ENOTSUP);
}
/*
* To avoid malloc() here, we steal a part of the new thread's
* stack to store all the info that thread might need for
* initialization. We also make it 64-bit aligned for good
* measure.
*/
cs = (struct clone_state *)
"Unable to get affinity mask for parent thread: %s"),
/*
* We want the new thread to return directly to the return site for
* the system call.
*/
clone_res = 0;
(void) sigfillset(&sigmask);
/*
* Block all signals because the thread we create won't be able to
* properly handle them until it's fully set up.
*/
return (-errno);
}
/*
* Release any pending signals
*/
/*
* Wait for the child to be created and have its tid assigned.
*/
if (rval == 0) {
while (clone_res == 0)
;
}
return (rval);
}