199767f8919635c4928607450d9e0abb932109ceToomas Soome * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
199767f8919635c4928607450d9e0abb932109ceToomas Soome * Copyright 2015 Toomas Soome <tsoome@me.com>
199767f8919635c4928607450d9e0abb932109ceToomas Soome * All rights reserved.
199767f8919635c4928607450d9e0abb932109ceToomas Soome * Redistribution and use in source and binary forms, with or without
199767f8919635c4928607450d9e0abb932109ceToomas Soome * modification, are permitted provided that the following conditions
199767f8919635c4928607450d9e0abb932109ceToomas Soome * 1. Redistributions of source code must retain the above copyright
199767f8919635c4928607450d9e0abb932109ceToomas Soome * notice, this list of conditions and the following disclaimer.
199767f8919635c4928607450d9e0abb932109ceToomas Soome * 2. Redistributions in binary form must reproduce the above copyright
199767f8919635c4928607450d9e0abb932109ceToomas Soome * notice, this list of conditions and the following disclaimer in the
199767f8919635c4928607450d9e0abb932109ceToomas Soome * documentation and/or other materials provided with the distribution.
199767f8919635c4928607450d9e0abb932109ceToomas Soome * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
199767f8919635c4928607450d9e0abb932109ceToomas Soome * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
199767f8919635c4928607450d9e0abb932109ceToomas Soome * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
199767f8919635c4928607450d9e0abb932109ceToomas Soome * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
199767f8919635c4928607450d9e0abb932109ceToomas Soome * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
199767f8919635c4928607450d9e0abb932109ceToomas Soome * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
199767f8919635c4928607450d9e0abb932109ceToomas Soome * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
199767f8919635c4928607450d9e0abb932109ceToomas Soome * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
199767f8919635c4928607450d9e0abb932109ceToomas Soome * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
199767f8919635c4928607450d9e0abb932109ceToomas Soome * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
199767f8919635c4928607450d9e0abb932109ceToomas Soome * SUCH DAMAGE.
199767f8919635c4928607450d9e0abb932109ceToomas Soome * Simple hashed block cache
199767f8919635c4928607450d9e0abb932109ceToomas Soome/* #define BCACHE_DEBUG */
199767f8919635c4928607450d9e0abb932109ceToomas Soome# define DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args)
199767f8919635c4928607450d9e0abb932109ceToomas Soome * bcache per device node. cache is allocated on device first open and freed
199767f8919635c4928607450d9e0abb932109ceToomas Soome * on last close, to save memory. The issue there is the size; biosdisk
199767f8919635c4928607450d9e0abb932109ceToomas Soome * supports up to 31 (0x1f) devices. Classic setup would use single disk
199767f8919635c4928607450d9e0abb932109ceToomas Soome * to boot from, but this has changed with zfs.
199767f8919635c4928607450d9e0abb932109ceToomas Soomestatic u_int bcache_total_nblks; /* set by bcache_init */
199767f8919635c4928607450d9e0abb932109ceToomas Soomestatic u_int bcache_blksize; /* set by bcache_init */
199767f8919635c4928607450d9e0abb932109ceToomas Soomestatic u_int bcache_numdev; /* set by bcache_add_dev */
199767f8919635c4928607450d9e0abb932109ceToomas Soome/* statistics */
199767f8919635c4928607450d9e0abb932109ceToomas Soomestatic u_int bcache_units; /* number of devices with cache */
199767f8919635c4928607450d9e0abb932109ceToomas Soomestatic u_int bcache_unit_nblks; /* nblocks per unit */
199767f8919635c4928607450d9e0abb932109ceToomas Soome#define BHASH(bc, blkno) ((blkno) & ((bc)->bcache_nblks - 1))
199767f8919635c4928607450d9e0abb932109ceToomas Soome ((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno))
199767f8919635c4928607450d9e0abb932109ceToomas Soomestatic void bcache_invalidate(struct bcache *bc, daddr_t blkno);
199767f8919635c4928607450d9e0abb932109ceToomas Soomestatic void bcache_insert(struct bcache *bc, daddr_t blkno);
199767f8919635c4928607450d9e0abb932109ceToomas Soomestatic void bcache_free_instance(struct bcache *bc);
199767f8919635c4928607450d9e0abb932109ceToomas Soome * Initialise the cache for (nblks) of (bsize).
199767f8919635c4928607450d9e0abb932109ceToomas Soome /* set up control data */
199767f8919635c4928607450d9e0abb932109ceToomas Soome * add number of devices to bcache. we have to divide cache space
199767f8919635c4928607450d9e0abb932109ceToomas Soome * between the devices, so bcache_add_dev() can be used to set up the
199767f8919635c4928607450d9e0abb932109ceToomas Soome * number. The issue is, we need to get the number before actual allocations.
199767f8919635c4928607450d9e0abb932109ceToomas Soome * bcache_add_dev() is supposed to be called from device init() call, so the
199767f8919635c4928607450d9e0abb932109ceToomas Soome * assumption is, devsw dv_init is called for plain devices first, and
199767f8919635c4928607450d9e0abb932109ceToomas Soome * for zfs, last.
199767f8919635c4928607450d9e0abb932109ceToomas Soome struct bcache *bc = malloc(sizeof (struct bcache));
199767f8919635c4928607450d9e0abb932109ceToomas Soome * the bcache block count must be power of 2 for hash function
199767f8919635c4928607450d9e0abb932109ceToomas Soome bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
199767f8919635c4928607450d9e0abb932109ceToomas Soome /* dont error out yet. fall back to 32 blocks and try again */
199767f8919635c4928607450d9e0abb932109ceToomas Soome bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
199767f8919635c4928607450d9e0abb932109ceToomas Soome bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl));
199767f8919635c4928607450d9e0abb932109ceToomas Soome if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) {
199767f8919635c4928607450d9e0abb932109ceToomas Soome /* Flush the cache */
199767f8919635c4928607450d9e0abb932109ceToomas Soome bc->ra = BCACHE_READAHEAD; /* optimistic read ahead */
199767f8919635c4928607450d9e0abb932109ceToomas Soome * Handle a write request; write directly to the disk, and populate the
199767f8919635c4928607450d9e0abb932109ceToomas Soome * cache with the new values.
199767f8919635c4928607450d9e0abb932109ceToomas Soomewrite_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
199767f8919635c4928607450d9e0abb932109ceToomas Soome struct bcache_devdata *dd = (struct bcache_devdata *)devdata;
199767f8919635c4928607450d9e0abb932109ceToomas Soome /* Invalidate the blocks being written */
199767f8919635c4928607450d9e0abb932109ceToomas Soome for (i = 0; i < nblk; i++) {
199767f8919635c4928607450d9e0abb932109ceToomas Soome /* Write the blocks */
199767f8919635c4928607450d9e0abb932109ceToomas Soome return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf, rsize));
199767f8919635c4928607450d9e0abb932109ceToomas Soome * Handle a read request; fill in parts of the request that can
199767f8919635c4928607450d9e0abb932109ceToomas Soome * be satisfied by the cache, use the supplied strategy routine to do
199767f8919635c4928607450d9e0abb932109ceToomas Soome * device I/O and then use the I/O results to populate the cache.
199767f8919635c4928607450d9e0abb932109ceToomas Soomeread_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
199767f8919635c4928607450d9e0abb932109ceToomas Soome struct bcache_devdata *dd = (struct bcache_devdata *)devdata;
199767f8919635c4928607450d9e0abb932109ceToomas Soome return (-1);
199767f8919635c4928607450d9e0abb932109ceToomas Soome /* Satisfy any cache hits up front, break on first miss */
199767f8919635c4928607450d9e0abb932109ceToomas Soome for (i = 0; i < nblk; i++) {
199767f8919635c4928607450d9e0abb932109ceToomas Soome if (nblk - i > BCACHE_MINREADAHEAD && bc->ra > BCACHE_MINREADAHEAD)
199767f8919635c4928607450d9e0abb932109ceToomas Soome if (complete) { /* whole set was in cache, return it */
199767f8919635c4928607450d9e0abb932109ceToomas Soome bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset,
199767f8919635c4928607450d9e0abb932109ceToomas Soome * Fill in any misses. From check we have i pointing to first missing
199767f8919635c4928607450d9e0abb932109ceToomas Soome * block, read in all remaining blocks + readahead.
199767f8919635c4928607450d9e0abb932109ceToomas Soome * We have space at least for nblk - i before bcache wraps.
199767f8919635c4928607450d9e0abb932109ceToomas Soome p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk));
199767f8919635c4928607450d9e0abb932109ceToomas Soome r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */
199767f8919635c4928607450d9e0abb932109ceToomas Soome p_size = MIN(r_size, nblk - i); /* read at least those blocks */
199767f8919635c4928607450d9e0abb932109ceToomas Soome ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size);
199767f8919635c4928607450d9e0abb932109ceToomas Soome if (ra != bc->bcache_nblks) { /* do we have RA space? */
199767f8919635c4928607450d9e0abb932109ceToomas Soome /* invalidate bcache */
199767f8919635c4928607450d9e0abb932109ceToomas Soome for (i = 0; i < p_size; i++) {
199767f8919635c4928607450d9e0abb932109ceToomas Soome * with read-ahead, it may happen we are attempting to read past
199767f8919635c4928607450d9e0abb932109ceToomas Soome * disk end, as bcache has no information about disk size.
199767f8919635c4928607450d9e0abb932109ceToomas Soome * in such case we should get partial read if some blocks can be
199767f8919635c4928607450d9e0abb932109ceToomas Soome * read or error, if no blocks can be read.
199767f8919635c4928607450d9e0abb932109ceToomas Soome * in either case we should return the data in bcache and only
199767f8919635c4928607450d9e0abb932109ceToomas Soome * return error if there is no data.
199767f8919635c4928607450d9e0abb932109ceToomas Soome result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, 0,
199767f8919635c4928607450d9e0abb932109ceToomas Soome for (i = 0; i < r_size; i++)
199767f8919635c4928607450d9e0abb932109ceToomas Soome /* update ra statistics */
199767f8919635c4928607450d9e0abb932109ceToomas Soome /* check how much data can we copy */
199767f8919635c4928607450d9e0abb932109ceToomas Soome for (i = 0; i < nblk; i++) {
199767f8919635c4928607450d9e0abb932109ceToomas Soome bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset,
199767f8919635c4928607450d9e0abb932109ceToomas Soome * Requests larger than 1/2 cache size will be bypassed and go
199767f8919635c4928607450d9e0abb932109ceToomas Soome * directly to the disk. XXX tune this.
199767f8919635c4928607450d9e0abb932109ceToomas Soomebcache_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
199767f8919635c4928607450d9e0abb932109ceToomas Soome struct bcache_devdata *dd = (struct bcache_devdata *)devdata;
199767f8919635c4928607450d9e0abb932109ceToomas Soome /* bypass large requests, or when the cache is inactive */
199767f8919635c4928607450d9e0abb932109ceToomas Soome (offset == 0 && ((size * 2 / bcache_blksize) > bcache_nblks))) {
199767f8919635c4928607450d9e0abb932109ceToomas Soome DEBUG("bypass %d from %d", size / bcache_blksize, blk);
199767f8919635c4928607450d9e0abb932109ceToomas Soome return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf,
199767f8919635c4928607450d9e0abb932109ceToomas Soome /* normalize offset */
199767f8919635c4928607450d9e0abb932109ceToomas Soome cblk = bcache_nblks - BHASH(bc, blk); /* # of blocks left */
199767f8919635c4928607450d9e0abb932109ceToomas Soome * we may have error from read ahead, if we have read some data
199767f8919635c4928607450d9e0abb932109ceToomas Soome * return partial read.
199767f8919635c4928607450d9e0abb932109ceToomas Soome return write_strategy(devdata, rw, blk, offset, size, buf, rsize);
199767f8919635c4928607450d9e0abb932109ceToomas Soome * Free allocated bcache instance
199767f8919635c4928607450d9e0abb932109ceToomas Soome * Insert a block into the cache.
199767f8919635c4928607450d9e0abb932109ceToomas Soome DEBUG("insert blk %llu -> %u # %d", blkno, cand, bcache_bcount);
199767f8919635c4928607450d9e0abb932109ceToomas Soome bc->bcache_ctl[cand].bc_count = bcache_bcount++;
199767f8919635c4928607450d9e0abb932109ceToomas Soome * Invalidate a block from the cache.
199767f8919635c4928607450d9e0abb932109ceToomas Soomebcache_invalidate(struct bcache *bc, daddr_t blkno)
199767f8919635c4928607450d9e0abb932109ceToomas SoomeCOMMAND_SET(bcachestat, "bcachestat", "get disk block cache stats", command_bcache);
199767f8919635c4928607450d9e0abb932109ceToomas Soomecommand_bcache(int argc, char *argv[] __attribute((unused)))
199767f8919635c4928607450d9e0abb932109ceToomas Soome printf("\ncache blocks: %d\n", bcache_total_nblks);
199767f8919635c4928607450d9e0abb932109ceToomas Soome printf("cache readahead: %d\n", bcache_rablks);
199767f8919635c4928607450d9e0abb932109ceToomas Soome printf("unit cache blocks: %d\n", bcache_unit_nblks);
199767f8919635c4928607450d9e0abb932109ceToomas Soome printf("%d ops %d bypasses %d hits %d misses\n", bcache_ops,