fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * CDDL HEADER START
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * The contents of this file are subject to the terms of the
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Common Development and Distribution License (the "License").
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * You may not use this file except in compliance with the License.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * See the License for the specific language governing permissions
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * and limitations under the License.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * When distributing Covered Code, include this CDDL HEADER in each
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * If applicable, add the following below this CDDL HEADER, with the
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * fields enclosed by brackets "[]" replaced with your own identifying
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * information: Portions Copyright [yyyy] [name of copyright owner]
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * CDDL HEADER END
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Use is subject to license terms.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forteint _sd_node_recovery; /* node recovery in progress */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_async_recovery:
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * 0 = flush and wait
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * 1 = clone and async-write
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * 2 = quicksort, clone, and async-write
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * quicksort allows contiguous blocks to be joined,
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * which may greatly improve recovery time for raid devices.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * if kmem_alloc fails, acts as _sd_async_recovery == 1
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Fortestatic int xmem_inval_hit, xmem_inval_miss, xmem_inval_inuse;
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * flag to inhibit reset of remote SCSI buses and sending of
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * nodedown callback if mirror was deconfigured properly.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * - prevents trashing any I/O that may be happening on the mirror
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * node during a normal shutdown and prevents undesired simckd failover.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Forward declare all statics that are used before defined to enforce
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * parameter checking
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Some (if not all) of these could be removed if the code were reordered
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Fortestatic void _sd_health_thread(void);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Fortestatic void _sd_cache_recover(void);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Fortestatic void _sd_remote_enable(void);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Fortestatic int _sd_failover_file_open(void);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Fortestatic void _sd_failover_done(void);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Fortestatic void _sd_wait_for_dirty(void);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Fortestatic void _sdbc_clear_warm_start(void);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forteextern void sdbc_requeue_head_dm_try(_sd_cctl_t *cc_ent);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sdbc_ft_unload - cache is being unloaded (or failed to load).
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Deallocate any global lock/sv that we created.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sdbc_ft_load - cache is being loaded. Allocate all global lock/sv
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * that we need. Return 0 if we succeed. If we fail return -1 (don't
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * need to do the unload step as we expect our caller to do that).
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* _sd_ft_data is sure to be zeroes, don't need to bzero it */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte mutex_init(&_sd_ft_data.fi_lock, NULL, MUTEX_DRIVER, NULL);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte cv_init(&_sd_ft_data.fi_rem_sv, NULL, CV_DRIVER, NULL);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte mutex_init(&_sd_ft_data.fi_sleep, NULL, MUTEX_DRIVER, NULL);
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana (void (*)(void *))_sd_health_thread, 0, TRUE));
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_health_thread -- daemon thread on each node watches if mirror
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * node to has crashed, and it needs to flush the mirrors cache entries.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Note we do *not* detect that the node has come up again, but wait
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * for the node to inform us that it is up via _sd_cache_reenable().
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* clear _sd_ft_data in case this is a cache re-enable w/o unload */
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_NOTE, "!sdbc(_sd_health_thread) safestore "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "is %s. Fast writes %s",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* CONSTCOND */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* NB evaluation order is important here for nvmem systems */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Hash invalidate here. We do not want data from
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * previous failover incarnation to be cache hits, if
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * the 2 failover happens within a short time
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * don't change mirror state when warm starting
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * nvmem systems. _sd_mirror_down() is called in
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * in _sd_remote_enable() on nvmem systems if the
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * media is down.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* Assume other side is still up */
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana "!sdbc(_sd_health_thread)"
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "Safestore is down. Fast writes %s",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* Wait for cache to drain and panic */
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana "!sdbc(_sd_health_thread)"
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte " dirty blocks flushed");
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana "!sdbc(_sd_health_thread)"
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte " Cache on node %d is down. "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "Fast writes %s",
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana "!sdbc(_sd_health_thread)"
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte " Cache recovery in progress");
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana "!sdbc(_sd_health_thread) %s Cache recovery done",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* restore previous state */
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana "!sdbc(_sd_health_thread) Fast writes %s",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sdbc_recovery_io_wait - wait for i/o being done directly
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * out of safe storage to complete. If the i/o does not make any
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * progress within about 25 seconds we return EIO otherwise return 0.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Wait for numio to reach 0.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * If numio has not changed for 85+ seconds,
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * break & pin blocks
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_WARN, "!sdbc(_sdbc_recovery_io_wait) %d "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "recovery i/o's not done", _sd_ft_data.fi_numio);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_recovery_wait()
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * while _sd_node_recovery is set, accesses to mirrored devices will block
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * (_sd_node_recovery-1) is count of blocked threads.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte blk = _sd_node_recovery ? _sd_node_recovery++ : 0;
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte cv_wait(&_sd_ft_data.fi_rem_sv, &_sd_ft_data.fi_lock);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_recovery_wblk_wait - wait for recovery i/o to a device
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * to cease. If the file is closed or the cache is disabled
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * first return an error otherwise return 0.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * A device is being recovered from our point of view either
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * during failover or by putting a disk back online after
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * a disk failure.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * This code is used to delay access to a device while recovery
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * writes are in progress from either a failover or while flushing
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * i/o after a failed disk has been repaired.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* spawn writer if none */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Recover from a crash of another node:
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * 1) Open all remote files
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * 2) Allocate other node's buffers and new buffer headers
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * 3) Flush all dirty buffers to disk
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * 4) Deallocate resources
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte SDTRACE(ST_ENTER|SDF_RECOVER, SDT_INV_CD, 0, SDT_INV_BL, 0, 0);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* allow cache config to proceed */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* wait for sequential recovery to complete */
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana "!sdbc %ssynchronous recovery complete "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "%d cache blocks processed",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte SDTRACE(ST_EXIT|SDF_RECOVER, SDT_INV_CD, 0, SDT_INV_BL, 0, 0);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_ft_clone -- clone cache block from ft area, retry write or pin.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte_sd_ft_clone(ss_centry_info_t *ft_cent, int async)
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte SDTRACE(ST_ENTER|SDF_FT_CLONE, cd, BLK_FBAS, cblk, dirty, _SD_NO_NET);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if ((cdi->cd_info->sh_failed != 2) && !FILE_OPENED(cd)) {
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_WARN, "!sdbc(_sd_ft_clone) recovery "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "write failed: cd %x; cblk %" NSC_SZFMT "; dirty %x",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte return (-1);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * allocate new cache entry and read data
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if (SSOP_READ_CBLOCK(sdbc_safestore, res, (void *)ent->cc_data,
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_WARN, "!sdbc(_sd_ft_clone) read of "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "pinned data block failed. cannot recover "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "0x%p size 0x%x", (void *)res, CACHE_BLOCK_SIZE);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* _sd_process_failure ?? */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte return (-1);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte ent->cc_flag |= (ft_cent->sc_flag & CC_PINNABLE);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_process_failure() adds to failed list & does pinned callback
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * otherwise async flush
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if (cdi->cd_info->sh_failed) { /* raw device open/reserve failed */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * this is sync write with asynchronous callback
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * (queue to disk and return).
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte SDTRACE(ST_EXIT|SDF_FT_CLONE, cd, BLK_FBAS, cblk, dirty, _SD_NO_NET);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_repin_cd - scan for dirty blocks held by mirror node.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * sdbc on this node is being attached to cd. If sdbc on other
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * node had failed writes (pinnable or not) we need to take
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * responsbility for them now here.
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_WARN, "!sdbc(_sd_cache_mirror_enable) "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "Configured mirror %x. Got message from %x",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if (_sd_node_recovery) (void) _sd_recovery_wait();
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if (_sd_cache_initialized && _sd_is_mirror_down()) {
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* make sure any pinned data we have is now refreshed */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte for (i = 0; i < sdbc_max_devs; i++)
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana "!sdbc(_sd_cache_mirror_enable) Cache on "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "mirror node %d is up. Fast writes enabled",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * two stage mirror disable:
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * stage 0: set FORCED_WRTHRU hint (cache shutdown started)
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * stage 1: mirror shutdown completed
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte _sd_ft_data.fi_host_state = _SD_HOST_DECONFIGURED;
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * set the fault tolerant data to indicate the state
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * of the safestore host. set mode to writethru if appropriate
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * if single node local safestore or ram safestore
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * then mark host state as carashed/_SD_HOST_NONE and set writethru
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if (ncall_alloc(_SD_MIRROR_HOST, 0, _SD_NO_NET, &ncall)) {
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte r = ncall_send(ncall, 0, SD_ENABLE, _SD_SELF_HOST);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte _sd_mirror_cache_down(); /* mirror up, but no cache */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if (ncall_alloc(_SD_MIRROR_HOST, 0, 0, &ncall) == 0)
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte (void) ncall_send(ncall, NCALL_ASYNC, SD_DISABLE,
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte ncall_reply(ncall, _sd_cache_mirror_enable(*ap));
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte#else /* (_SD_FAULT_RES) */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte#endif /* (_SD_FAULT_RES) */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * invalidate cache hash table entries for given device
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * or (-1) all devices belonging to mirrored node
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Skip if device doesn't match or pinned.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * (-1) skip attached cd's
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana if ((CD != -1 && (cd != CD || CENTRY_PINNED(cc_ent))) ||
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* cc_inuse is set, delete on block match */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* attempt to que head */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_cd_online(cd,discard)
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * clear local error state.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * if (discard && _attached != _SD_SELF_HOST) then release buffers.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * if (!discard && _attached != _SD_MIRROR_HOST) then re-issue I/Os
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * (add to dirty pending queue).
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * EINVAL invalid device or not failed
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * EBUSY attached by this node, or by active mirror
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte _sd_cctl_t *cc_ent, *cc_next, *cc_last, *cc_first, *cc_next_chain;
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * in the case where a failed device has been closed and
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * then re-opened, sh_failed will be zero because it is
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * cleared in _sd_open_cd(). hence the test for
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _pinned != _SD_SELF_HOST which allows the restore to
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * proceed in this scenario.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* can't discard while attached on multinode systems */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if (discard && (cdi->cd_global->sv_attached == _SD_SELF_HOST))
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte (cdi->cd_global->sv_attached == _SD_MIRROR_HOST) &&
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* prevent any new i/o from arriving for this cd */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* Clear PEND_DIRTY, iocount & iostatus */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* was FAST */
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_WARN, "!sdbc(_sd_cd_online) count %d vs numfail %d",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte _sd_enqueue_dirty_chain(cd, cc_first, cc_last, num);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* make sure data gets flushed in case there is no new I/O */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * This node has disk attached, discard pins held by mirror
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte (void) ncall_send(ncall, NCALL_ASYNC, SD_CD_DISCARD, cd);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte SDTRACE(ST_ENTER|SDF_ONLINE, cd, 1, SDT_INV_BL, 1, 0);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte SDTRACE(ST_EXIT|SDF_ONLINE, cd, 1, SDT_INV_BL, 1, r);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_failover_file_open -
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * on failover, open devices which are not attached by this node.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * If the cd is open and reserved we certainly don't
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * need to do it again. However the recovery code
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * must be racing some other cache usage which could
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * be bad. We really need to be able to lock out
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * all cache activity for this cd that is not tied
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * to the recovery process. This doesn't seem to be
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * feasible in sdbc since a competing thread could
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * already be finished doing an alloc_buf. If this
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * hole is to be closed sd-ctl must be more in
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * control of the failover process.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * this constuct says that, on non-nvmem systems,
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * if we are attempting to open a "local" device and
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * nothing is pinned, then continue. i.e. open only
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * remote devices or devices that have pinned data.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * for recovery on nvmem systems we open all devices.
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana ((cd_gl->sv_attached != _SD_MIRROR_HOST) &&
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana if (!cd_gl->sv_volname || !cd_gl->sv_volname[0])
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if (_sd_open_cd(cd_gl->sv_volname, cd, flag) < 0) {
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_WARN, "!sdbc(_sd_failover_file_open) "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "Unable to open disk partition %s",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if (rc == 0) {
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte cblocks_processed += sdbc_recover_vol(cd_gl->sv_vol, cd);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte ss_centry_info_t *sdbc_get_cinfo_byres(ss_resource_t *);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* setup the key to get a volume directory stream of centrys */
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_WARN, "!sdbc(sdbc_recover_vol): "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "cannot recover volume %s",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* cycle through the cdir getting resource tokens and reading centrys */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /*CONSTANTCONDITION*/
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if ((err = SSOP_GETCDIRENT(sdbc_safestore, &cdir, ¢ry))
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_WARN, "!sdbc(sdbc_recover_vol): "
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana "cache entry read failure %s %p",
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte break; /* done */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * this get into double caching consistency
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * need to resolve this jgk
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte if ((cinfo = sdbc_get_cinfo_byres(centry.sc_res)) == NULL) {
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte /* should not happen */
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_WARN, "!sdbc(sdbc_recover_vol): "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "invalid ss resource %p", (void *)centry.sc_res);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte bcopy(¢ry, cinfo, sizeof (ss_centry_info_t));
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * ss should return a stream of dirty blocks ordered
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * by block number. if it turns out that ss will not support
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * this then sorting for async recovery will have to be
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * done here jgk
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * clone mirror cache entry and do
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * async I/O or sync I/O or pin if sh_failed
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana "!sdbc(sdbc_recover_vol) %d cache blocks processed for "
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana "volume %s", cblocks_processed, cd_gl->sv_volname);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_failover_done -
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * mark failover open'd devices as requiring nsc_release()
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * when all queued I/O's have drained.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte#endif /* (_SD_FAULT_RES) */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_uncommit - discard local buffer modifications
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * clear the valid bits.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte_sd_uncommit(_sd_buf_handle_t *handle, nsc_off_t fba_pos, nsc_size_t fba_len,
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte sdbc_cblk_fba_t st_cblk_len; /* FBA len of starting cache block */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte sdbc_cblk_fba_t end_cblk_len; /* FBA len of ending cache block */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte sdbc_cblk_fba_t st_cblk_off; /* FBA offset into starting cblock */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte SDTRACE(ST_ENTER|SDF_UNCOMMIT, cd, fba_len, fba_pos, flag, 0);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte while (CENTRY_BLK(cc_ent) != FBA_TO_BLK_NUM(fba_pos))
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Check if remote write-cache spool is dirty,
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * if not, we can just discard local valid bits.
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_WARN, "!fba_len %" NSC_SZFMT " end_cblk_len %d in "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte SDTRACE(ST_EXIT|SDF_UNCOMMIT, cd, fba_len, fba_pos, flag, 0);
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * _sd_wait_for_flush - wait for all i/o for this cd to cease.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * This function assumes that no further i/o are being issued
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * against this device. This assumption is enforced by sd-ctl
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * when called from _sd_flush_cd. Recovery also uses this
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * wait and it enforces this assumption (somewhat imperfectly)
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * by using cd_recovering.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * We must see progress in getting i/o complete within 25 seconds
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * or we will return an error. If we complete normally (all i/o done)
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * we return 0.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte int tries = 0, used, last_used = 0, inprogress = 0;
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * Wait for WBLK_USED to reach 0.
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte * If unchanged for 32+ seconds returns EAGAIN
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte (void) cd_writer(cd); /* spawn writer if not already running */
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte while (((used = _SD_CD_WBLK_USED(cd)) != 0) || cdi->cd_writer) {
3270659f55e0928d6edec3d26217cc29398a8149Srikanth, Ramana cmn_err(CE_WARN, "!sdbc(_sd_wait_for_flush) "
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte "%s still has %d blocks pending %d"
fcf3ce441efd61da9bb2884968af01cb7c1452ccJohn Forte " in progress (@ %lx)",