You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
880 lines
22 KiB
880 lines
22 KiB
/*
|
|
* drivers/block/vs_block_client.c
|
|
*
|
|
* Copyright (c) 2012-2018 General Dynamics
|
|
* Copyright (c) 2014 Open Kernel Labs, Inc.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* block vservice client driver
|
|
*
|
|
* Function vs_block_client_vs_alloc() is partially derived from
|
|
* drivers/block/brd.c (brd_alloc())
|
|
*
|
|
*/
|
|
|
|
#include <linux/device.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/init.h>
|
|
#include <linux/blkdev.h>
|
|
#include <linux/hdreg.h>
|
|
#include <linux/genhd.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/kref.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/list.h>
|
|
#include <linux/version.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/module.h>
|
|
#include <linux/moduleparam.h>
|
|
|
|
#include <vservices/buffer.h>
|
|
#include <vservices/protocol/block/types.h>
|
|
#include <vservices/protocol/block/common.h>
|
|
#include <vservices/protocol/block/client.h>
|
|
#include <vservices/service.h>
|
|
#include <vservices/session.h>
|
|
#include <vservices/wait.h>
|
|
|
|
#define bio_sector(bio) (bio)->bi_iter.bi_sector
|
|
#define bio_size(bio) (bio)->bi_iter.bi_size
|
|
|
|
#if !defined(bio_flags)
|
|
#define bio_flags(bio) bio->bi_opf
|
|
#endif
|
|
|
|
#define CLIENT_BLKDEV_NAME "vblock"
|
|
|
|
#define PERDEV_MINORS 256
|
|
|
|
struct block_client;
|
|
|
|
struct vs_block_device {
|
|
/*
|
|
* The client that created this block device. A reference is held
|
|
* to the client until the block device is released, so this pointer
|
|
* should always be valid. However, the client may since have reset;
|
|
* so it should only be used if, after locking it, its blkdev pointer
|
|
* points back to this block device.
|
|
*/
|
|
struct block_client *client;
|
|
|
|
int id;
|
|
struct gendisk *disk;
|
|
struct request_queue *queue;
|
|
|
|
struct kref kref;
|
|
};
|
|
|
|
struct block_client {
|
|
struct vs_client_block_state client;
|
|
struct vs_service_device *service;
|
|
|
|
/* Tasklet & queue for bouncing buffers out of read acks */
|
|
struct tasklet_struct rx_tasklet;
|
|
struct list_head rx_queue;
|
|
struct spinlock rx_queue_lock;
|
|
|
|
/*
|
|
* The current virtual block device. This gets replaced when we do
|
|
* a reset since other parts of the kernel (e.g. vfs) may still
|
|
* be accessing the disk.
|
|
*/
|
|
struct vs_block_device *blkdev;
|
|
|
|
/* Shared work item for disk creation */
|
|
struct work_struct disk_creation_work;
|
|
|
|
struct kref kref;
|
|
};
|
|
|
|
#define state_to_block_client(state) \
|
|
container_of(state, struct block_client, client)
|
|
|
|
static int block_client_major;
|
|
|
|
/* Unique identifier allocation for virtual block devices */
|
|
static DEFINE_IDA(vs_block_ida);
|
|
static DEFINE_MUTEX(vs_block_ida_lock);
|
|
|
|
static int
|
|
block_client_vs_to_linux_error(vservice_block_block_io_error_t vs_err)
|
|
{
|
|
switch (vs_err) {
|
|
case VSERVICE_BLOCK_INVALID_INDEX:
|
|
return -EILSEQ;
|
|
case VSERVICE_BLOCK_MEDIA_FAILURE:
|
|
return -EIO;
|
|
case VSERVICE_BLOCK_MEDIA_TIMEOUT:
|
|
return -ETIMEDOUT;
|
|
case VSERVICE_BLOCK_UNSUPPORTED_COMMAND:
|
|
return -ENOTSUPP;
|
|
case VSERVICE_BLOCK_SERVICE_RESET:
|
|
return -ENXIO;
|
|
default:
|
|
WARN_ON(vs_err);
|
|
return 0;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void vs_block_client_kfree(struct kref *kref)
|
|
{
|
|
struct block_client *client =
|
|
container_of(kref, struct block_client, kref);
|
|
|
|
vs_put_service(client->service);
|
|
kfree(client);
|
|
}
|
|
|
|
static void vs_block_client_put(struct block_client *client)
|
|
{
|
|
kref_put(&client->kref, vs_block_client_kfree);
|
|
}
|
|
|
|
static void vs_block_device_kfree(struct kref *kref)
|
|
{
|
|
struct vs_block_device *blkdev =
|
|
container_of(kref, struct vs_block_device, kref);
|
|
|
|
/* Delete the disk and clean up its queue */
|
|
del_gendisk(blkdev->disk);
|
|
blk_cleanup_queue(blkdev->queue);
|
|
put_disk(blkdev->disk);
|
|
|
|
mutex_lock(&vs_block_ida_lock);
|
|
ida_remove(&vs_block_ida, blkdev->id);
|
|
mutex_unlock(&vs_block_ida_lock);
|
|
|
|
if (blkdev->client)
|
|
vs_block_client_put(blkdev->client);
|
|
|
|
kfree(blkdev);
|
|
}
|
|
|
|
static void vs_block_device_put(struct vs_block_device *blkdev)
|
|
{
|
|
kref_put(&blkdev->kref, vs_block_device_kfree);
|
|
}
|
|
|
|
static void
|
|
vs_block_client_blkdev_release(struct gendisk *disk, fmode_t mode)
|
|
{
|
|
struct vs_block_device *blkdev = disk->private_data;
|
|
|
|
if (WARN_ON(!blkdev))
|
|
return;
|
|
|
|
vs_block_device_put(blkdev);
|
|
}
|
|
|
|
static int vs_block_client_blkdev_open(struct block_device *bdev, fmode_t mode)
|
|
{
|
|
struct vs_block_device *blkdev = bdev->bd_disk->private_data;
|
|
struct block_client *client;
|
|
int err = -ENXIO;
|
|
|
|
if (!blkdev || !kref_get_unless_zero(&blkdev->kref))
|
|
goto fail_get_blkdev;
|
|
|
|
client = blkdev->client;
|
|
if (WARN_ON(!client))
|
|
goto fail_lock_client;
|
|
|
|
if (!vs_state_lock_safe(&client->client)) {
|
|
err = -ENODEV;
|
|
goto fail_lock_client;
|
|
}
|
|
|
|
if (blkdev != client->blkdev) {
|
|
/* The client has reset, this blkdev is no longer usable */
|
|
err = -ENXIO;
|
|
goto fail_check_client;
|
|
}
|
|
|
|
if ((mode & FMODE_WRITE) > 0 && client->client.readonly) {
|
|
dev_dbg(&client->service->dev,
|
|
"opening a readonly disk as writable\n");
|
|
err = -EROFS;
|
|
goto fail_check_client;
|
|
}
|
|
|
|
vs_state_unlock(&client->client);
|
|
|
|
return 0;
|
|
|
|
fail_check_client:
|
|
vs_state_unlock(&client->client);
|
|
fail_lock_client:
|
|
vs_block_device_put(blkdev);
|
|
fail_get_blkdev:
|
|
return err;
|
|
}
|
|
|
|
static int vs_block_client_blkdev_getgeo(struct block_device *bdev,
|
|
struct hd_geometry *geo)
|
|
{
|
|
/* These numbers are some default sane values for disk geometry. */
|
|
geo->cylinders = get_capacity(bdev->bd_disk) / (4 * 16);
|
|
geo->heads = 4;
|
|
geo->sectors = 16;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Indirectly determine linux block layer sector size and ensure that our
|
|
* sector size matches.
|
|
*/
|
|
static int vs_block_client_check_sector_size(struct block_client *client,
|
|
struct bio *bio)
|
|
{
|
|
if (unlikely(!bio_sectors(bio))) {
|
|
dev_err(&client->service->dev, "zero-length bio");
|
|
return -EIO;
|
|
}
|
|
|
|
if (unlikely(bio_size(bio) % client->client.sector_size)) {
|
|
dev_err(&client->service->dev,
|
|
"bio has %zd bytes, which is unexpected "
|
|
"for sector_size of %zd bytes",
|
|
(size_t)bio_size(bio),
|
|
(size_t)client->client.sector_size);
|
|
return -EIO;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct block_device_operations block_client_ops = {
|
|
.getgeo = vs_block_client_blkdev_getgeo,
|
|
.open = vs_block_client_blkdev_open,
|
|
.release = vs_block_client_blkdev_release,
|
|
.owner = THIS_MODULE,
|
|
};
|
|
|
|
static int block_client_send_write_req(struct block_client *client,
|
|
struct bio *bio)
|
|
{
|
|
struct vs_client_block_state *state = &client->client;
|
|
struct vs_mbuf *mbuf;
|
|
struct vs_pbuf pbuf;
|
|
struct bio_vec *bvec;
|
|
int err;
|
|
bool flush, nodelay, commit;
|
|
struct bvec_iter iter;
|
|
struct bio_vec bvec_local;
|
|
|
|
err = vs_block_client_check_sector_size(client, bio);
|
|
if (err < 0)
|
|
goto fail;
|
|
|
|
do {
|
|
/* Wait until it's possible to send a write request */
|
|
err = vs_wait_state_nointr(state,
|
|
vs_client_block_io_req_write_can_send(state));
|
|
if (err == -ECANCELED)
|
|
err = -ENXIO;
|
|
if (err < 0)
|
|
goto fail;
|
|
|
|
/* Wait for quota, while sending a write remains possible */
|
|
mbuf = vs_wait_alloc_nointr(state,
|
|
vs_client_block_io_req_write_can_send(state),
|
|
vs_client_block_io_alloc_req_write(
|
|
state, &pbuf, GFP_KERNEL));
|
|
err = IS_ERR(mbuf) ? PTR_ERR(mbuf) : 0;
|
|
|
|
/* Retry if sending is no longer possible */
|
|
} while (err == -ECANCELED);
|
|
|
|
if (err < 0)
|
|
goto fail;
|
|
|
|
vs_pbuf_resize(&pbuf, 0);
|
|
|
|
bvec = &bvec_local;
|
|
bio_for_each_segment(bvec_local, bio, iter)
|
|
{
|
|
unsigned long flags;
|
|
void *buf = bvec_kmap_irq(bvec, &flags);
|
|
flush_kernel_dcache_page(bvec->bv_page);
|
|
err = vs_pbuf_append(&pbuf, buf, bvec->bv_len);
|
|
bvec_kunmap_irq(buf, &flags);
|
|
if (err < 0) {
|
|
dev_err(&client->service->dev,
|
|
"pbuf copy failed with err %d\n", err);
|
|
err = -EIO;
|
|
goto fail_free_write;
|
|
}
|
|
}
|
|
|
|
if (unlikely(vs_pbuf_size(&pbuf) != bio_size(bio))) {
|
|
dev_err(&client->service->dev,
|
|
"pbuf size is wrong: %zd, should be %zd\n",
|
|
vs_pbuf_size(&pbuf), (size_t)bio_size(bio));
|
|
err = -EIO;
|
|
goto fail_free_write;
|
|
}
|
|
|
|
flush = (bio_flags(bio) & REQ_PREFLUSH);
|
|
commit = (bio_flags(bio) & REQ_FUA);
|
|
nodelay = (bio_flags(bio) & REQ_SYNC);
|
|
err = vs_client_block_io_req_write(state, bio, bio_sector(bio),
|
|
bio_sectors(bio), nodelay, flush, commit, pbuf, mbuf);
|
|
|
|
if (err) {
|
|
dev_err(&client->service->dev,
|
|
"write req failed with err %d\n", err);
|
|
goto fail_free_write;
|
|
}
|
|
|
|
return 0;
|
|
|
|
fail_free_write:
|
|
vs_client_block_io_free_req_write(state, &pbuf, mbuf);
|
|
fail:
|
|
return err;
|
|
}
|
|
|
|
static int block_client_send_read_req(struct block_client *client,
|
|
struct bio *bio)
|
|
{
|
|
struct vs_client_block_state *state = &client->client;
|
|
int err;
|
|
bool flush, nodelay;
|
|
|
|
err = vs_block_client_check_sector_size(client, bio);
|
|
if (err < 0)
|
|
return err;
|
|
flush = (bio_flags(bio) & REQ_PREFLUSH);
|
|
nodelay = (bio_flags(bio) & REQ_SYNC);
|
|
do {
|
|
/* Wait until it's possible to send a read request */
|
|
err = vs_wait_state_nointr(state,
|
|
vs_client_block_io_req_read_can_send(state));
|
|
if (err == -ECANCELED)
|
|
err = -ENXIO;
|
|
if (err < 0)
|
|
break;
|
|
|
|
/* Wait for quota, while sending a read remains possible */
|
|
err = vs_wait_send_nointr(state,
|
|
vs_client_block_io_req_read_can_send(state),
|
|
vs_client_block_io_req_read(state, bio,
|
|
bio_sector(bio), bio_sectors(bio),
|
|
nodelay, flush, GFP_KERNEL));
|
|
} while (err == -ECANCELED);
|
|
|
|
return err;
|
|
}
|
|
|
|
static blk_qc_t
|
|
vs_block_client_make_request(struct request_queue *q, struct bio *bio)
|
|
{
|
|
struct vs_block_device *blkdev = bio->bi_disk->private_data;
|
|
struct block_client *client;
|
|
int err = 0;
|
|
|
|
client = blkdev->client;
|
|
if (!client || !kref_get_unless_zero(&client->kref)) {
|
|
err = -ENODEV;
|
|
goto fail_get_client;
|
|
}
|
|
|
|
blk_queue_split(q, &bio);
|
|
|
|
if (!vs_state_lock_safe(&client->client)) {
|
|
err = -ENODEV;
|
|
goto fail_lock_client;
|
|
}
|
|
|
|
if (client->blkdev != blkdev) {
|
|
/* Client has reset, this block device is no longer usable */
|
|
err = -EIO;
|
|
goto fail_check_client;
|
|
}
|
|
|
|
if (bio_data_dir(bio) == WRITE)
|
|
err = block_client_send_write_req(client, bio);
|
|
else
|
|
err = block_client_send_read_req(client, bio);
|
|
|
|
fail_check_client:
|
|
if (err == -ENOLINK)
|
|
err = -EIO;
|
|
else
|
|
vs_state_unlock(&client->client);
|
|
fail_lock_client:
|
|
vs_block_client_put(client);
|
|
fail_get_client:
|
|
|
|
if (err < 0) {
|
|
bio->bi_status = err;
|
|
bio_endio(bio);
|
|
}
|
|
|
|
return BLK_QC_T_NONE;
|
|
}
|
|
|
|
static int vs_block_client_get_blkdev_id(struct block_client *client)
|
|
{
|
|
int id;
|
|
int ret;
|
|
|
|
retry:
|
|
ret = ida_pre_get(&vs_block_ida, GFP_KERNEL);
|
|
if (ret == 0)
|
|
return -ENOMEM;
|
|
|
|
mutex_lock(&vs_block_ida_lock);
|
|
ret = ida_get_new(&vs_block_ida, &id);
|
|
mutex_unlock(&vs_block_ida_lock);
|
|
|
|
if (ret == -EAGAIN)
|
|
goto retry;
|
|
|
|
return id;
|
|
}
|
|
|
|
static int vs_block_client_disk_add(struct block_client *client)
|
|
{
|
|
struct vs_block_device *blkdev;
|
|
unsigned int max_hw_sectors;
|
|
int err;
|
|
|
|
dev_dbg(&client->service->dev, "device add\n");
|
|
|
|
blkdev = kzalloc(sizeof(*blkdev), GFP_KERNEL);
|
|
if (!blkdev) {
|
|
err = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
|
|
kref_init(&blkdev->kref);
|
|
blkdev->id = vs_block_client_get_blkdev_id(client);
|
|
if (blkdev->id < 0) {
|
|
err = blkdev->id;
|
|
goto fail_free_blkdev;
|
|
}
|
|
|
|
if ((blkdev->id * PERDEV_MINORS) >> MINORBITS) {
|
|
err = -ENODEV;
|
|
goto fail_remove_ida;
|
|
}
|
|
|
|
blkdev->queue = blk_alloc_queue(GFP_KERNEL);
|
|
if (!blkdev->queue) {
|
|
dev_err(&client->service->dev,
|
|
"Error initializing blk queue\n");
|
|
err = -ENOMEM;
|
|
goto fail_remove_ida;
|
|
}
|
|
|
|
blk_queue_make_request(blkdev->queue, vs_block_client_make_request);
|
|
blk_queue_bounce_limit(blkdev->queue, BLK_BOUNCE_ANY);
|
|
blk_queue_dma_alignment(blkdev->queue, 0);
|
|
|
|
/*
|
|
* Mark this as a paravirtualised device. This is just an alias
|
|
* of QUEUE_FLAG_NONROT, which prevents the I/O schedulers trying
|
|
* to wait for the disk to spin.
|
|
*/
|
|
queue_flag_set_unlocked(QUEUE_FLAG_VIRT, blkdev->queue);
|
|
|
|
blkdev->queue->queuedata = blkdev;
|
|
|
|
blkdev->client = client;
|
|
kref_get(&client->kref);
|
|
|
|
max_hw_sectors = min_t(sector_t, BLK_DEF_MAX_SECTORS,
|
|
client->client.segment_size /
|
|
client->client.sector_size);
|
|
blk_queue_max_hw_sectors(blkdev->queue, max_hw_sectors);
|
|
blk_queue_logical_block_size(blkdev->queue,
|
|
client->client.sector_size);
|
|
blk_queue_physical_block_size(blkdev->queue,
|
|
client->client.sector_size);
|
|
|
|
blkdev->disk = alloc_disk(PERDEV_MINORS);
|
|
if (!blkdev->disk) {
|
|
dev_err(&client->service->dev, "Error allocating disk\n");
|
|
err = -ENOMEM;
|
|
goto fail_free_blk_queue;
|
|
}
|
|
|
|
if (client->client.readonly) {
|
|
dev_dbg(&client->service->dev, "set device as readonly\n");
|
|
set_disk_ro(blkdev->disk, true);
|
|
}
|
|
|
|
blkdev->disk->major = block_client_major;
|
|
blkdev->disk->first_minor = blkdev->id * PERDEV_MINORS;
|
|
blkdev->disk->fops = &block_client_ops;
|
|
blkdev->disk->private_data = blkdev;
|
|
blkdev->disk->queue = blkdev->queue;
|
|
blkdev->disk->flags |= GENHD_FL_EXT_DEVT;
|
|
|
|
/*
|
|
* The block device name is vblock<x>, where x is a unique
|
|
* identifier. Userspace should rename or symlink the device
|
|
* appropriately, typically by processing the add uevent.
|
|
*
|
|
* If a virtual block device is reset then it may re-open with a
|
|
* different identifier if something still holds a reference to
|
|
* the old device (such as a userspace application having an open
|
|
* file handle).
|
|
*/
|
|
snprintf(blkdev->disk->disk_name, sizeof(blkdev->disk->disk_name),
|
|
"%s%d", CLIENT_BLKDEV_NAME, blkdev->id);
|
|
set_capacity(blkdev->disk, client->client.device_sectors *
|
|
(client->client.sector_size >> 9));
|
|
|
|
/*
|
|
* We need to hold a reference on blkdev across add_disk(), to make
|
|
* sure a concurrent reset does not immediately release the blkdev
|
|
* and call del_gendisk().
|
|
*/
|
|
kref_get(&blkdev->kref);
|
|
|
|
vs_service_state_lock(client->service);
|
|
if (!VSERVICE_BASE_STATE_IS_RUNNING(client->client.state.base)) {
|
|
vs_service_state_unlock(client->service);
|
|
err = -ENXIO;
|
|
goto fail_free_blk_queue;
|
|
}
|
|
client->blkdev = blkdev;
|
|
vs_service_state_unlock(client->service);
|
|
|
|
device_add_disk(&client->service->dev, blkdev->disk);
|
|
dev_dbg(&client->service->dev, "added block disk '%s'\n",
|
|
blkdev->disk->disk_name);
|
|
|
|
/* Release the reference taken above. */
|
|
vs_block_device_put(blkdev);
|
|
|
|
return 0;
|
|
|
|
fail_free_blk_queue:
|
|
blk_cleanup_queue(blkdev->queue);
|
|
fail_remove_ida:
|
|
mutex_lock(&vs_block_ida_lock);
|
|
ida_remove(&vs_block_ida, blkdev->id);
|
|
mutex_unlock(&vs_block_ida_lock);
|
|
fail_free_blkdev:
|
|
kfree(blkdev);
|
|
fail:
|
|
return err;
|
|
}
|
|
|
|
static void vs_block_client_disk_creation_work(struct work_struct *work)
|
|
{
|
|
struct block_client *client = container_of(work,
|
|
struct block_client, disk_creation_work);
|
|
struct vs_block_device *blkdev;
|
|
bool running;
|
|
|
|
vs_service_state_lock(client->service);
|
|
blkdev = client->blkdev;
|
|
running = VSERVICE_BASE_STATE_IS_RUNNING(client->client.state.base);
|
|
|
|
dev_dbg(&client->service->dev,
|
|
"disk changed: blkdev = %pK, running = %d\n",
|
|
client->blkdev, running);
|
|
if (!blkdev && running) {
|
|
dev_dbg(&client->service->dev, "adding block disk\n");
|
|
vs_service_state_unlock(client->service);
|
|
vs_block_client_disk_add(client);
|
|
} else {
|
|
vs_service_state_unlock(client->service);
|
|
}
|
|
}
|
|
|
|
static void vs_block_client_rx_tasklet(unsigned long data);
|
|
|
|
static struct vs_client_block_state *
|
|
vs_block_client_alloc(struct vs_service_device *service)
|
|
{
|
|
struct block_client *client;
|
|
|
|
client = kzalloc(sizeof(*client), GFP_KERNEL);
|
|
if (!client) {
|
|
dev_err(&service->dev, "Error allocating client struct\n");
|
|
return NULL;
|
|
}
|
|
|
|
vs_get_service(service);
|
|
client->service = service;
|
|
|
|
INIT_LIST_HEAD(&client->rx_queue);
|
|
spin_lock_init(&client->rx_queue_lock);
|
|
tasklet_init(&client->rx_tasklet, vs_block_client_rx_tasklet,
|
|
(unsigned long)client);
|
|
tasklet_disable(&client->rx_tasklet);
|
|
|
|
INIT_WORK(&client->disk_creation_work,
|
|
vs_block_client_disk_creation_work);
|
|
kref_init(&client->kref);
|
|
|
|
dev_dbg(&service->dev, "New block client %pK\n", client);
|
|
|
|
return &client->client;
|
|
}
|
|
|
|
static void vs_block_client_release(struct vs_client_block_state *state)
|
|
{
|
|
struct block_client *client = state_to_block_client(state);
|
|
|
|
flush_work(&client->disk_creation_work);
|
|
|
|
vs_block_client_put(client);
|
|
}
|
|
|
|
/* FIXME: Jira ticket SDK-2459 - anjaniv */
|
|
static void vs_block_client_closed(struct vs_client_block_state *state)
|
|
{
|
|
struct block_client *client = state_to_block_client(state);
|
|
|
|
/*
|
|
* Stop the RX bounce tasklet and clean up its queue. We can wait for
|
|
* it to stop safely because it doesn't need to acquire the state
|
|
* lock, only the RX lock which we acquire after it is disabled.
|
|
*/
|
|
tasklet_disable(&client->rx_tasklet);
|
|
spin_lock(&client->rx_queue_lock);
|
|
while (!list_empty(&client->rx_queue)) {
|
|
struct vs_mbuf *mbuf = list_first_entry(&client->rx_queue,
|
|
struct vs_mbuf, queue);
|
|
struct vs_pbuf pbuf;
|
|
list_del(&mbuf->queue);
|
|
vs_client_block_io_getbufs_ack_read(state, &pbuf, mbuf);
|
|
vs_client_block_io_free_ack_read(state, &pbuf, mbuf);
|
|
}
|
|
spin_unlock(&client->rx_queue_lock);
|
|
|
|
if (client->blkdev) {
|
|
struct vs_block_device *blkdev = client->blkdev;
|
|
char service_remove[] = "REMOVING_SERVICE=1";
|
|
/* + 9 because "DEVNAME=" is 8 chars plus 1 for '\0' */
|
|
char devname[sizeof(blkdev->disk->disk_name) + 9];
|
|
char *envp[] = { service_remove, devname, NULL };
|
|
|
|
dev_dbg(&client->service->dev, "removing block disk\n");
|
|
|
|
/*
|
|
* Send a change event with DEVNAME to allow the block helper
|
|
* script to remove any server sessions which use either
|
|
* v${SERVICE_NAME} or ${DEVNAME}. The remove event generated
|
|
* by the session driver doesn't include DEVNAME so the only
|
|
* way for userspace to map SERVICE_NAME to DEVNAME is by the
|
|
* symlink added when the client service was created. If that
|
|
* symlink has been deleted, there's no other way to connect
|
|
* the two names.
|
|
*/
|
|
snprintf(devname, sizeof(devname), "DEVNAME=%s",
|
|
blkdev->disk->disk_name);
|
|
kobject_uevent_env(&client->service->dev.kobj, KOBJ_CHANGE,
|
|
envp);
|
|
|
|
/*
|
|
* We are done with the device now. The block device will only
|
|
* get removed once there are no more users (e.g. userspace
|
|
* applications).
|
|
*/
|
|
client->blkdev = NULL;
|
|
vs_block_device_put(blkdev);
|
|
}
|
|
}
|
|
|
|
static void vs_block_client_opened(struct vs_client_block_state *state)
|
|
{
|
|
struct block_client *client = state_to_block_client(state);
|
|
|
|
#if !defined(CONFIG_LBDAF) && !defined(CONFIG_64BIT)
|
|
if ((state->device_sectors * (state->sector_size >> 9))
|
|
>> (sizeof(sector_t) * 8)) {
|
|
dev_err(&client->service->dev,
|
|
"Client doesn't support full capacity large block devices\n");
|
|
vs_client_block_close(state);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
/* Unblock the RX bounce tasklet. */
|
|
tasklet_enable(&client->rx_tasklet);
|
|
|
|
/*
|
|
* The block device allocation needs to sleep, so we defer it to a
|
|
* work queue.
|
|
*/
|
|
queue_work(client->service->work_queue, &client->disk_creation_work);
|
|
}
|
|
|
|
static int vs_block_client_ack_read(struct vs_client_block_state *state,
|
|
void *tag, struct vs_pbuf pbuf, struct vs_mbuf *mbuf)
|
|
{
|
|
struct block_client *client = state_to_block_client(state);
|
|
struct bio *bio = tag;
|
|
struct bio_vec *bvec;
|
|
int err = 0;
|
|
size_t bytes_read = 0;
|
|
struct bio_vec bvec_local;
|
|
struct bvec_iter iter;
|
|
|
|
bvec = &bvec_local;
|
|
bio_for_each_segment(bvec_local, bio, iter)
|
|
{
|
|
unsigned long flags;
|
|
void *buf;
|
|
if (vs_pbuf_size(&pbuf) < bytes_read + bvec->bv_len) {
|
|
dev_err(&client->service->dev,
|
|
"bio read overrun: %zu into %zu byte response, but need %zd bytes\n",
|
|
bytes_read, vs_pbuf_size(&pbuf),
|
|
(size_t)bvec->bv_len);
|
|
err = -EIO;
|
|
break;
|
|
}
|
|
buf = bvec_kmap_irq(bvec, &flags);
|
|
memcpy(buf, vs_pbuf_data(&pbuf) + bytes_read, bvec->bv_len);
|
|
flush_kernel_dcache_page(bvec->bv_page);
|
|
bvec_kunmap_irq(buf, &flags);
|
|
bytes_read += bvec->bv_len;
|
|
}
|
|
|
|
vs_client_block_io_free_ack_read(state, &pbuf, mbuf);
|
|
|
|
bio->bi_status = err;
|
|
bio_endio(bio);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void vs_block_client_rx_tasklet(unsigned long data)
|
|
{
|
|
struct block_client *client = (struct block_client *)data;
|
|
struct vs_mbuf *mbuf;
|
|
struct vs_pbuf pbuf;
|
|
|
|
spin_lock(&client->rx_queue_lock);
|
|
|
|
/* The list shouldn't be empty. */
|
|
if (WARN_ON(list_empty(&client->rx_queue))) {
|
|
spin_unlock(&client->rx_queue_lock);
|
|
return;
|
|
}
|
|
|
|
/* Get the next mbuf, and reschedule ourselves if there are more. */
|
|
mbuf = list_first_entry(&client->rx_queue, struct vs_mbuf, queue);
|
|
list_del(&mbuf->queue);
|
|
if (!list_empty(&client->rx_queue))
|
|
tasklet_schedule(&client->rx_tasklet);
|
|
|
|
spin_unlock(&client->rx_queue_lock);
|
|
|
|
/* Process the ack. */
|
|
vs_client_block_io_getbufs_ack_read(&client->client, &pbuf, mbuf);
|
|
vs_block_client_ack_read(&client->client, mbuf->priv, pbuf, mbuf);
|
|
}
|
|
|
|
static int vs_block_client_queue_ack_read(struct vs_client_block_state *state,
|
|
void *tag, struct vs_pbuf pbuf, struct vs_mbuf *mbuf)
|
|
{
|
|
struct block_client *client = state_to_block_client(state);
|
|
|
|
spin_lock(&client->rx_queue_lock);
|
|
list_add_tail(&mbuf->queue, &client->rx_queue);
|
|
mbuf->priv = tag;
|
|
spin_unlock(&client->rx_queue_lock);
|
|
|
|
tasklet_schedule(&client->rx_tasklet);
|
|
|
|
wake_up(&state->service->quota_wq);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vs_block_client_ack_write(struct vs_client_block_state *state,
|
|
void *tag)
|
|
{
|
|
struct bio *bio = tag;
|
|
|
|
if (WARN_ON(!bio))
|
|
return -EPROTO;
|
|
|
|
bio->bi_status = 0;
|
|
bio_endio(bio);
|
|
|
|
wake_up(&state->service->quota_wq);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vs_block_client_nack_io(struct vs_client_block_state *state,
|
|
void *tag, vservice_block_block_io_error_t err)
|
|
{
|
|
struct bio *bio = tag;
|
|
|
|
if (WARN_ON(!bio))
|
|
return -EPROTO;
|
|
|
|
bio->bi_status = block_client_vs_to_linux_error(err);
|
|
bio_endio(bio);
|
|
|
|
wake_up(&state->service->quota_wq);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct vs_client_block block_client_driver = {
|
|
.rx_atomic = true,
|
|
.alloc = vs_block_client_alloc,
|
|
.release = vs_block_client_release,
|
|
.opened = vs_block_client_opened,
|
|
.closed = vs_block_client_closed,
|
|
.io = {
|
|
.ack_read = vs_block_client_queue_ack_read,
|
|
.nack_read = vs_block_client_nack_io,
|
|
.ack_write = vs_block_client_ack_write,
|
|
.nack_write = vs_block_client_nack_io,
|
|
}
|
|
};
|
|
|
|
static int __init vs_block_client_init(void)
|
|
{
|
|
int err;
|
|
|
|
block_client_major = register_blkdev(0, CLIENT_BLKDEV_NAME);
|
|
if (block_client_major < 0) {
|
|
pr_err("Err registering blkdev\n");
|
|
err = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
|
|
err = vservice_block_client_register(&block_client_driver,
|
|
"block_client_driver");
|
|
if (err)
|
|
goto fail_unregister_blkdev;
|
|
|
|
return 0;
|
|
|
|
fail_unregister_blkdev:
|
|
unregister_blkdev(block_client_major, CLIENT_BLKDEV_NAME);
|
|
fail:
|
|
return err;
|
|
}
|
|
|
|
static void __exit vs_block_client_exit(void)
|
|
{
|
|
vservice_block_client_unregister(&block_client_driver);
|
|
unregister_blkdev(block_client_major, CLIENT_BLKDEV_NAME);
|
|
}
|
|
|
|
module_init(vs_block_client_init);
|
|
module_exit(vs_block_client_exit);
|
|
|
|
MODULE_DESCRIPTION("OKL4 Virtual Services Block Client Driver");
|
|
MODULE_AUTHOR("Open Kernel Labs, Inc");
|
|
|