c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota/*
16e76cdd6e3cfaac7d91c3b0644ee1bc6cf52347agiri * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
16e76cdd6e3cfaac7d91c3b0644ee1bc6cf52347agiri
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota/*
16e76cdd6e3cfaac7d91c3b0644ee1bc6cf52347agiri * This file contains code imported from the OFED rds source file cong.c
16e76cdd6e3cfaac7d91c3b0644ee1bc6cf52347agiri * Oracle elects to have and use the contents of cong.c under and governed
16e76cdd6e3cfaac7d91c3b0644ee1bc6cf52347agiri * by the OpenIB.org BSD license (see below for full license text). However,
16e76cdd6e3cfaac7d91c3b0644ee1bc6cf52347agiri * the following notice accompanied the original version of this file:
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
16e76cdd6e3cfaac7d91c3b0644ee1bc6cf52347agiri
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota/*
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * Copyright (c) 2007 Oracle. All rights reserved.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * This software is available to you under a choice of one of two
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * licenses. You may choose to be licensed under the terms of the GNU
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * General Public License (GPL) Version 2, available from the file
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * COPYING in the main directory of this source tree, or the
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * OpenIB.org BSD license below:
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * Redistribution and use in source and binary forms, with or
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * without modification, are permitted provided that the following
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * conditions are met:
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * - Redistributions of source code must retain the above
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * copyright notice, this list of conditions and the following
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * disclaimer.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * - Redistributions in binary form must reproduce the above
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * copyright notice, this list of conditions and the following
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * disclaimer in the documentation and/or other materials
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * provided with the distribution.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * SOFTWARE.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota#include <sys/rds.h>
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota#include <sys/ib/clients/rdsv3/rdsv3.h>
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota#include <sys/ib/clients/rdsv3/rdsv3_impl.h>
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota#include <sys/ib/clients/rdsv3/rdsv3_debug.h>
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota/*
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * This file implements the receive side of the unconventional congestion
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * management in RDS.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * Messages waiting in the receive queue on the receiving socket are accounted
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * against the sockets SO_RCVBUF option value. Only the payload bytes in the
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * message are accounted for. If the number of bytes queued equals or exceeds
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * rcvbuf then the socket is congested. All sends attempted to this socket's
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * address should return block or return -EWOULDBLOCK.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * Applications are expected to be reasonably tuned such that this situation
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * very rarely occurs. An application encountering this "back-pressure" is
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * considered a bug.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * This is implemented by having each node maintain bitmaps which indicate
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * which ports on bound addresses are congested. As the bitmap changes it is
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * sent through all the connections which terminate in the local address of the
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * bitmap which changed.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * The bitmaps are allocated as connections are brought up. This avoids
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * allocation in the interrupt handling path which queues messages on sockets.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * The dense bitmaps let transports send the entire bitmap on any bitmap change
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * reasonably efficiently. This is much easier to implement than some
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * finer-grained communication of per-port congestion. The sender does a very
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * inexpensive bit test to test if the port it's about to send to is congested
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * or not.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota/*
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * Interaction with poll is a tad tricky. We want all processes stuck in
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * poll to wake up and check whether a congested destination became uncongested.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * The really sad thing is we have no idea which destinations the application
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * wants to send to - we don't even know which rdsv3_connections are involved.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * So until we implement a more flexible rds poll interface, we have to make
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * do with this:
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * We maintain a global counter that is incremented each time a congestion map
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * update is received. Each rds socket tracks this value, and if rdsv3_poll
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * finds that the saved generation number is smaller than the global generation
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * number, it wakes up the process.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otastatic atomic_t rdsv3_cong_generation = ATOMIC_INIT(0);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota/*
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * Congestion monitoring
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otastatic struct list rdsv3_cong_monitor;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otastatic krwlock_t rdsv3_cong_monitor_lock;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota/*
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * Yes, a global lock. It's used so infrequently that it's worth keeping it
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * global to simplify the locking. It's only used in the following
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * circumstances:
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * - on connection buildup to associate a conn with its maps
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * - on map changes to inform conns of a new map to send
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * It's sadly ordered under the socket callback lock and the connection lock.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * Receive paths can mark ports congested from interrupt context so the
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * lock masks interrupts.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otastatic kmutex_t rdsv3_cong_lock;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otastatic struct avl_tree rdsv3_cong_tree;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otastatic struct rdsv3_cong_map *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_tree_walk(uint32_be_t addr, struct rdsv3_cong_map *insert)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota struct rdsv3_cong_map *map;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota avl_index_t where;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (insert) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota map = avl_find(&rdsv3_cong_tree, insert, &where);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (map == NULL) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota avl_insert(&rdsv3_cong_tree, insert, where);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (NULL);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota } else {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota struct rdsv3_cong_map map1;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota map1.m_addr = addr;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota map = avl_find(&rdsv3_cong_tree, &map1, &where);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (map);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota/*
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * There is only ever one bitmap for any address. Connections try and allocate
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * these bitmaps in the process getting pointers to them. The bitmaps are only
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * ever freed as the module is removed after all connections have been freed.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otastatic struct rdsv3_cong_map *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_from_addr(uint32_be_t addr)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota struct rdsv3_cong_map *map;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota struct rdsv3_cong_map *ret = NULL;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota unsigned long zp;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota unsigned long i;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_from_addr", "Enter(addr: %x)", ntohl(addr));
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota map = kmem_zalloc(sizeof (struct rdsv3_cong_map), KM_NOSLEEP);
5d5562f583b2b6affe19bdce0b3c8b1840d667a4Eiji Ota if (!map)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (NULL);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota map->m_addr = addr;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rdsv3_init_waitqueue(&map->m_waitq);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota list_create(&map->m_conn_list, sizeof (struct rdsv3_connection),
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota offsetof(struct rdsv3_connection, c_map_item));
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota for (i = 0; i < RDSV3_CONG_MAP_PAGES; i++) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota zp = (unsigned long)kmem_zalloc(PAGE_SIZE, KM_NOSLEEP);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (zp == 0)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota goto out;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota map->m_page_addrs[i] = zp;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_enter(&rdsv3_cong_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota ret = rdsv3_cong_tree_walk(addr, map);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_exit(&rdsv3_cong_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
5d5562f583b2b6affe19bdce0b3c8b1840d667a4Eiji Ota if (!ret) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota ret = map;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota map = NULL;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otaout:
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (map) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota for (i = 0; i < RDSV3_CONG_MAP_PAGES && map->m_page_addrs[i];
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota i++)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota kmem_free((void *)map->m_page_addrs[i], PAGE_SIZE);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota kmem_free(map, sizeof (*map));
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF5("rdsv3_cong_from_addr", "map %p for addr %x",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota ret, ntohl(addr));
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (ret);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota/*
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * Put the conn on its local map's list. This is called when the conn is
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * really added to the hash. It's nested under the rdsv3_conn_lock, sadly.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otavoid
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_add_conn(struct rdsv3_connection *conn)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_add_conn", "Enter(conn: %p)", conn);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF5("rdsv3_cong_add_conn", "conn %p now on map %p",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota conn, conn->c_lcong);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_enter(&rdsv3_cong_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota list_insert_tail(&conn->c_lcong->m_conn_list, conn);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_exit(&rdsv3_cong_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_add_conn", "Return(conn: %p)", conn);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otavoid
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_remove_conn(struct rdsv3_connection *conn)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_remove_conn", "Enter(conn: %p)", conn);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF5("rdsv3_cong_remove_conn", "removing conn %p from map %p",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota conn, conn->c_lcong);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_enter(&rdsv3_cong_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota list_remove_node(&conn->c_map_item);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_exit(&rdsv3_cong_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_remove_conn", "Return(conn: %p)", conn);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otaint
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_get_maps(struct rdsv3_connection *conn)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota conn->c_lcong = rdsv3_cong_from_addr(conn->c_laddr);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota conn->c_fcong = rdsv3_cong_from_addr(conn->c_faddr);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
5d5562f583b2b6affe19bdce0b3c8b1840d667a4Eiji Ota if (!(conn->c_lcong && conn->c_fcong))
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (-ENOMEM);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (0);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otavoid
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_queue_updates(struct rdsv3_cong_map *map)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota struct rdsv3_connection *conn;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_queue_updates", "Enter(map: %p)", map);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_enter(&rdsv3_cong_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_FOR_EACH_LIST_NODE(conn, &map->m_conn_list, c_map_item) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (!test_and_set_bit(0, &conn->c_map_queued)) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rdsv3_stats_inc(s_cong_update_queued);
5d5562f583b2b6affe19bdce0b3c8b1840d667a4Eiji Ota (void) rdsv3_send_xmit(conn);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_exit(&rdsv3_cong_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_queue_updates", "Return(map: %p)", map);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otavoid
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_map_updated(struct rdsv3_cong_map *map, uint64_t portmask)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_map_updated",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota "waking map %p for %u.%u.%u.%u",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota map, NIPQUAD(map->m_addr));
cadbfdc3bdb156e92d7a88978bc98ea87f6e037fEiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rdsv3_stats_inc(s_cong_update_received);
1a5e258f5471356ca102c7176637cdce45bac147Josef 'Jeff' Sipek atomic_inc_32(&rdsv3_cong_generation);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota#if 0
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji OtaXXX
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (waitqueue_active(&map->m_waitq))
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota#endif
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rdsv3_wake_up(&map->m_waitq);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (portmask && !list_is_empty(&rdsv3_cong_monitor)) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota struct rdsv3_sock *rs;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rw_enter(&rdsv3_cong_monitor_lock, RW_READER);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_FOR_EACH_LIST_NODE(rs, &rdsv3_cong_monitor,
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rs_cong_list) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_enter(&rs->rs_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rs->rs_cong_notify |= (rs->rs_cong_mask & portmask);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rs->rs_cong_mask &= ~portmask;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_exit(&rs->rs_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (rs->rs_cong_notify)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rdsv3_wake_sk_sleep(rs);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rw_exit(&rdsv3_cong_monitor_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_map_updated", "Return(map: %p)", map);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otaint
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_updated_since(unsigned long *recent)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota unsigned long gen = atomic_get(&rdsv3_cong_generation);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (*recent == gen)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (0);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota *recent = gen;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (1);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota/*
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * We're called under the locking that protects the sockets receive buffer
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * consumption. This makes it a lot easier for the caller to only call us
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * when it knows that an existing set bit needs to be cleared, and vice versa.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * We can't block and we need to deal with concurrent sockets working against
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * the same per-address map.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otavoid
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_set_bit(struct rdsv3_cong_map *map, uint16_be_t port)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota unsigned long i;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota unsigned long off;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_set_bit",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota "setting congestion for %u.%u.%u.%u:%u in map %p",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota NIPQUAD(map->m_addr), ntohs(port), map);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota i = ntohs(port) / RDSV3_CONG_MAP_PAGE_BITS;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota off = ntohs(port) % RDSV3_CONG_MAP_PAGE_BITS;
cadbfdc3bdb156e92d7a88978bc98ea87f6e037fEiji Ota set_le_bit(off, (void *)map->m_page_addrs[i]);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otavoid
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_clear_bit(struct rdsv3_cong_map *map, uint16_be_t port)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota unsigned long i;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota unsigned long off;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_clear_bit",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota "clearing congestion for %u.%u.%u.%u:%u in map %p\n",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota NIPQUAD(map->m_addr), ntohs(port), map);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota i = ntohs(port) / RDSV3_CONG_MAP_PAGE_BITS;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota off = ntohs(port) % RDSV3_CONG_MAP_PAGE_BITS;
cadbfdc3bdb156e92d7a88978bc98ea87f6e037fEiji Ota clear_le_bit(off, (void *)map->m_page_addrs[i]);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otastatic int
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_test_bit(struct rdsv3_cong_map *map, uint16_be_t port)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota unsigned long i;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota unsigned long off;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota i = ntohs(port) / RDSV3_CONG_MAP_PAGE_BITS;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota off = ntohs(port) % RDSV3_CONG_MAP_PAGE_BITS;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF5("rdsv3_cong_test_bit", "port: 0x%x i = %lx off = %lx",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota ntohs(port), i, off);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
cadbfdc3bdb156e92d7a88978bc98ea87f6e037fEiji Ota return (test_le_bit(off, (void *)map->m_page_addrs[i]));
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otavoid
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_add_socket(struct rdsv3_sock *rs)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_add_socket", "Enter(rs: %p)", rs);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rw_enter(&rdsv3_cong_monitor_lock, RW_WRITER);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (!list_link_active(&rs->rs_cong_list))
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota list_insert_head(&rdsv3_cong_monitor, rs);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rw_exit(&rdsv3_cong_monitor_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otavoid
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_remove_socket(struct rdsv3_sock *rs)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota struct rdsv3_cong_map *map;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_remove_socket", "Enter(rs: %p)", rs);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rw_enter(&rdsv3_cong_monitor_lock, RW_WRITER);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota list_remove_node(&rs->rs_cong_list);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rw_exit(&rdsv3_cong_monitor_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota /* update congestion map for now-closed port */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_enter(&rdsv3_cong_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota map = rdsv3_cong_tree_walk(rs->rs_bound_addr, NULL);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_exit(&rdsv3_cong_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (map && rdsv3_cong_test_bit(map, rs->rs_bound_port)) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rdsv3_cong_clear_bit(map, rs->rs_bound_port);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rdsv3_cong_queue_updates(map);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otaint
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_wait(struct rdsv3_cong_map *map, uint16_be_t port, int nonblock,
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota struct rdsv3_sock *rs)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
6e18d381c642549b8bb1774a803d3510aec6baafagiri int ret = 0;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_wait", "Enter(rs: %p, mode: %d)",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rs, nonblock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (!rdsv3_cong_test_bit(map, port))
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (0);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (nonblock) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (rs && rs->rs_cong_monitor) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota /*
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * It would have been nice to have an atomic set_bit on
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * a uint64_t.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_enter(&rs->rs_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rs->rs_cong_mask |=
fe817b6022080da0a98b5d2d8cd179f594d6ca5eEiji Ota RDS_CONG_MONITOR_MASK(ntohs(port));
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_exit(&rs->rs_lock);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota /*
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * Test again - a congestion update may have arrived in
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * the meantime.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (!rdsv3_cong_test_bit(map, port))
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (0);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rdsv3_stats_inc(s_cong_send_error);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (-ENOBUFS);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rdsv3_stats_inc(s_cong_send_blocked);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF3("rdsv3_cong_wait", "waiting on map %p for port %u",
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota map, ntohs(port));
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
6e18d381c642549b8bb1774a803d3510aec6baafagiri#if 0
6e18d381c642549b8bb1774a803d3510aec6baafagiri ret = rdsv3_wait_sig(&map->m_waitq, !rdsv3_cong_test_bit(map, port));
6e18d381c642549b8bb1774a803d3510aec6baafagiri if (ret == 0)
6e18d381c642549b8bb1774a803d3510aec6baafagiri return (-ERESTART);
6e18d381c642549b8bb1774a803d3510aec6baafagiri return (0);
6e18d381c642549b8bb1774a803d3510aec6baafagiri#else
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_enter(&map->m_waitq.waitq_mutex);
6e18d381c642549b8bb1774a803d3510aec6baafagiri map->m_waitq.waitq_waiters++;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota while (rdsv3_cong_test_bit(map, port)) {
6e18d381c642549b8bb1774a803d3510aec6baafagiri ret = cv_wait_sig(&map->m_waitq.waitq_cv,
6e18d381c642549b8bb1774a803d3510aec6baafagiri &map->m_waitq.waitq_mutex);
6e18d381c642549b8bb1774a803d3510aec6baafagiri if (ret == 0) {
5e12ddada2833f3aa285210603ce9aaeb8be35ccEiji Ota ret = -EINTR;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota break;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
6e18d381c642549b8bb1774a803d3510aec6baafagiri map->m_waitq.waitq_waiters--;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_exit(&map->m_waitq.waitq_mutex);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (ret);
6e18d381c642549b8bb1774a803d3510aec6baafagiri#endif
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otavoid
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_exit(void)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota struct rdsv3_cong_map *map;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota unsigned long i;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_exit", "Enter");
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota while ((map = avl_first(&rdsv3_cong_tree))) {
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF5("rdsv3_cong_exit", "freeing map %p\n", map);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota avl_remove(&rdsv3_cong_tree, map);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota for (i = 0; i < RDSV3_CONG_MAP_PAGES && map->m_page_addrs[i];
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota i++)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota kmem_free((void *)map->m_page_addrs[i], PAGE_SIZE);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota kmem_free(map, sizeof (*map));
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota }
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota RDSV3_DPRINTF4("rdsv3_cong_exit", "Return");
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota/*
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota * Allocate a RDS message containing a congestion update.
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota */
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otastruct rdsv3_message *
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_update_alloc(struct rdsv3_connection *conn)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota struct rdsv3_cong_map *map = conn->c_lcong;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota struct rdsv3_message *rm;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rm = rdsv3_message_map_pages(map->m_page_addrs, RDSV3_CONG_MAP_BYTES);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (!IS_ERR(rm))
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rm->m_inc.i_hdr.h_flags = RDSV3_FLAG_CONG_BITMAP;
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (rm);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otastatic int
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_compare(const void *map1, const void *map2)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota#define addr1 ((struct rdsv3_cong_map *)map1)->m_addr
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota#define addr2 ((struct rdsv3_cong_map *)map2)->m_addr
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (addr1 < addr2)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (-1);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota if (addr1 > addr2)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (1);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota return (0);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otavoid
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Otardsv3_cong_init(void)
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota{
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota list_create(&rdsv3_cong_monitor, sizeof (struct rdsv3_sock),
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota offsetof(struct rdsv3_sock, rs_cong_list));
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota rw_init(&rdsv3_cong_monitor_lock, NULL, RW_DRIVER, NULL);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota mutex_init(&rdsv3_cong_lock, NULL, MUTEX_DRIVER, NULL);
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota avl_create(&rdsv3_cong_tree, rdsv3_cong_compare,
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota sizeof (struct rdsv3_cong_map), offsetof(struct rdsv3_cong_map,
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota m_rb_node));
c0dd49bdd68c0d758a67d56f07826f3b45cfc664Eiji Ota}