From: Joel Becker Date: Thu, 28 Jan 2010 23:00:49 +0000 (-0800) Subject: ocfs2_dlmfs: Move to its own directory X-Git-Tag: v2.6.34-rc1~214^2~15 X-Git-Url: https://openfabrics.org/gitweb/?a=commitdiff_plain;h=34a9dd7e29e9129fec40c645a03f1bbbe810e771;p=~emulex%2Finfiniband.git ocfs2_dlmfs: Move to its own directory We're going to remove the tie between ocfs2_dlmfs and o2dlm. ocfs2_dlmfs doesn't belong in the fs/ocfs2/dlm directory anymore. Here we move it to fs/ocfs2/dlmfs. Signed-off-by: Joel Becker --- diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 600d2d2ade1..791c0886c06 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile @@ -46,6 +46,7 @@ ocfs2_stackglue-objs := stackglue.o ocfs2_stack_o2cb-objs := stack_o2cb.o ocfs2_stack_user-objs := stack_user.o +obj-$(CONFIG_OCFS2_FS) += dlmfs/ # cluster/ is always needed when OCFS2_FS for masklog support obj-$(CONFIG_OCFS2_FS) += cluster/ obj-$(CONFIG_OCFS2_FS_O2CB) += dlm/ diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile index 19036137570..dcebf0d920f 100644 --- a/fs/ocfs2/dlm/Makefile +++ b/fs/ocfs2/dlm/Makefile @@ -1,8 +1,7 @@ EXTRA_CFLAGS += -Ifs/ocfs2 -obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o ocfs2_dlmfs.o +obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \ dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o -ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c deleted file mode 100644 index ddf55dafba2..00000000000 --- a/fs/ocfs2/dlm/dlmfs.c +++ /dev/null @@ -1,710 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmfs.c - * - * Code which implements the kernel side of a minimal userspace - * interface to our DLM. This file handles the virtual file system - * used for communication with userspace. Credit should go to ramfs, - * which was a template for the fs side of this module. - * - * Copyright (C) 2003, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -/* Simple VFS hooks based on: */ -/* - * Resizable simple ram filesystem for Linux. - * - * Copyright (C) 2000 Linus Torvalds. - * 2000 Transmeta Corp. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - - -#include "cluster/nodemanager.h" -#include "cluster/heartbeat.h" -#include "cluster/tcp.h" - -#include "dlmapi.h" - -#include "userdlm.h" - -#include "dlmfsver.h" - -#define MLOG_MASK_PREFIX ML_DLMFS -#include "cluster/masklog.h" - -#include "ocfs2_lockingver.h" - -static const struct super_operations dlmfs_ops; -static const struct file_operations dlmfs_file_operations; -static const struct inode_operations dlmfs_dir_inode_operations; -static const struct inode_operations dlmfs_root_inode_operations; -static const struct inode_operations dlmfs_file_inode_operations; -static struct kmem_cache *dlmfs_inode_cache; - -struct workqueue_struct *user_dlm_worker; - -/* - * This is the userdlmfs locking protocol version. - * - * See fs/ocfs2/dlmglue.c for more details on locking versions. - */ -static const struct dlm_protocol_version user_locking_protocol = { - .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, - .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, -}; - - -/* - * These are the ABI capabilities of dlmfs. - * - * Over time, dlmfs has added some features that were not part of the - * initial ABI. Unfortunately, some of these features are not detectable - * via standard usage. For example, Linux's default poll always returns - * POLLIN, so there is no way for a caller of poll(2) to know when dlmfs - * added poll support. Instead, we provide this list of new capabilities. - * - * Capabilities is a read-only attribute. We do it as a module parameter - * so we can discover it whether dlmfs is built in, loaded, or even not - * loaded. - * - * The ABI features are local to this machine's dlmfs mount. This is - * distinct from the locking protocol, which is concerned with inter-node - * interaction. - * - * Capabilities: - * - bast : POLLIN against the file descriptor of a held lock - * signifies a bast fired on the lock. - */ -#define DLMFS_CAPABILITIES "bast" -extern int param_set_dlmfs_capabilities(const char *val, - struct kernel_param *kp) -{ - printk(KERN_ERR "%s: readonly parameter\n", kp->name); - return -EINVAL; -} -static int param_get_dlmfs_capabilities(char *buffer, - struct kernel_param *kp) -{ - return strlcpy(buffer, DLMFS_CAPABILITIES, - strlen(DLMFS_CAPABILITIES) + 1); -} -module_param_call(capabilities, param_set_dlmfs_capabilities, - param_get_dlmfs_capabilities, NULL, 0444); -MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES); - - -/* - * decodes a set of open flags into a valid lock level and a set of flags. - * returns < 0 if we have invalid flags - * flags which mean something to us: - * O_RDONLY -> PRMODE level - * O_WRONLY -> EXMODE level - * - * O_NONBLOCK -> LKM_NOQUEUE - */ -static int dlmfs_decode_open_flags(int open_flags, - int *level, - int *flags) -{ - if (open_flags & (O_WRONLY|O_RDWR)) - *level = LKM_EXMODE; - else - *level = LKM_PRMODE; - - *flags = 0; - if (open_flags & O_NONBLOCK) - *flags |= LKM_NOQUEUE; - - return 0; -} - -static int dlmfs_file_open(struct inode *inode, - struct file *file) -{ - int status, level, flags; - struct dlmfs_filp_private *fp = NULL; - struct dlmfs_inode_private *ip; - - if (S_ISDIR(inode->i_mode)) - BUG(); - - mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino, - file->f_flags); - - status = dlmfs_decode_open_flags(file->f_flags, &level, &flags); - if (status < 0) - goto bail; - - /* We don't want to honor O_APPEND at read/write time as it - * doesn't make sense for LVB writes. */ - file->f_flags &= ~O_APPEND; - - fp = kmalloc(sizeof(*fp), GFP_NOFS); - if (!fp) { - status = -ENOMEM; - goto bail; - } - fp->fp_lock_level = level; - - ip = DLMFS_I(inode); - - status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags); - if (status < 0) { - /* this is a strange error to return here but I want - * to be able userspace to be able to distinguish a - * valid lock request from one that simply couldn't be - * granted. */ - if (flags & LKM_NOQUEUE && status == -EAGAIN) - status = -ETXTBSY; - kfree(fp); - goto bail; - } - - file->private_data = fp; -bail: - return status; -} - -static int dlmfs_file_release(struct inode *inode, - struct file *file) -{ - int level, status; - struct dlmfs_inode_private *ip = DLMFS_I(inode); - struct dlmfs_filp_private *fp = - (struct dlmfs_filp_private *) file->private_data; - - if (S_ISDIR(inode->i_mode)) - BUG(); - - mlog(0, "close called on inode %lu\n", inode->i_ino); - - status = 0; - if (fp) { - level = fp->fp_lock_level; - if (level != LKM_IVMODE) - user_dlm_cluster_unlock(&ip->ip_lockres, level); - - kfree(fp); - file->private_data = NULL; - } - - return 0; -} - -static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) -{ - int event = 0; - struct inode *inode = file->f_path.dentry->d_inode; - struct dlmfs_inode_private *ip = DLMFS_I(inode); - - poll_wait(file, &ip->ip_lockres.l_event, wait); - - spin_lock(&ip->ip_lockres.l_lock); - if (ip->ip_lockres.l_flags & USER_LOCK_BLOCKED) - event = POLLIN | POLLRDNORM; - spin_unlock(&ip->ip_lockres.l_lock); - - return event; -} - -static ssize_t dlmfs_file_read(struct file *filp, - char __user *buf, - size_t count, - loff_t *ppos) -{ - int bytes_left; - ssize_t readlen; - char *lvb_buf; - struct inode *inode = filp->f_path.dentry->d_inode; - - mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", - inode->i_ino, count, *ppos); - - if (*ppos >= i_size_read(inode)) - return 0; - - if (!count) - return 0; - - if (!access_ok(VERIFY_WRITE, buf, count)) - return -EFAULT; - - /* don't read past the lvb */ - if ((count + *ppos) > i_size_read(inode)) - readlen = i_size_read(inode) - *ppos; - else - readlen = count - *ppos; - - lvb_buf = kmalloc(readlen, GFP_NOFS); - if (!lvb_buf) - return -ENOMEM; - - user_dlm_read_lvb(inode, lvb_buf, readlen); - bytes_left = __copy_to_user(buf, lvb_buf, readlen); - readlen -= bytes_left; - - kfree(lvb_buf); - - *ppos = *ppos + readlen; - - mlog(0, "read %zd bytes\n", readlen); - return readlen; -} - -static ssize_t dlmfs_file_write(struct file *filp, - const char __user *buf, - size_t count, - loff_t *ppos) -{ - int bytes_left; - ssize_t writelen; - char *lvb_buf; - struct inode *inode = filp->f_path.dentry->d_inode; - - mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", - inode->i_ino, count, *ppos); - - if (*ppos >= i_size_read(inode)) - return -ENOSPC; - - if (!count) - return 0; - - if (!access_ok(VERIFY_READ, buf, count)) - return -EFAULT; - - /* don't write past the lvb */ - if ((count + *ppos) > i_size_read(inode)) - writelen = i_size_read(inode) - *ppos; - else - writelen = count - *ppos; - - lvb_buf = kmalloc(writelen, GFP_NOFS); - if (!lvb_buf) - return -ENOMEM; - - bytes_left = copy_from_user(lvb_buf, buf, writelen); - writelen -= bytes_left; - if (writelen) - user_dlm_write_lvb(inode, lvb_buf, writelen); - - kfree(lvb_buf); - - *ppos = *ppos + writelen; - mlog(0, "wrote %zd bytes\n", writelen); - return writelen; -} - -static void dlmfs_init_once(void *foo) -{ - struct dlmfs_inode_private *ip = - (struct dlmfs_inode_private *) foo; - - ip->ip_dlm = NULL; - ip->ip_parent = NULL; - - inode_init_once(&ip->ip_vfs_inode); -} - -static struct inode *dlmfs_alloc_inode(struct super_block *sb) -{ - struct dlmfs_inode_private *ip; - - ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS); - if (!ip) - return NULL; - - return &ip->ip_vfs_inode; -} - -static void dlmfs_destroy_inode(struct inode *inode) -{ - kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); -} - -static void dlmfs_clear_inode(struct inode *inode) -{ - int status; - struct dlmfs_inode_private *ip; - - if (!inode) - return; - - mlog(0, "inode %lu\n", inode->i_ino); - - ip = DLMFS_I(inode); - - if (S_ISREG(inode->i_mode)) { - status = user_dlm_destroy_lock(&ip->ip_lockres); - if (status < 0) - mlog_errno(status); - iput(ip->ip_parent); - goto clear_fields; - } - - mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm); - /* we must be a directory. If required, lets unregister the - * dlm context now. */ - if (ip->ip_dlm) - user_dlm_unregister_context(ip->ip_dlm); -clear_fields: - ip->ip_parent = NULL; - ip->ip_dlm = NULL; -} - -static struct backing_dev_info dlmfs_backing_dev_info = { - .name = "ocfs2-dlmfs", - .ra_pages = 0, /* No readahead */ - .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, -}; - -static struct inode *dlmfs_get_root_inode(struct super_block *sb) -{ - struct inode *inode = new_inode(sb); - int mode = S_IFDIR | 0755; - struct dlmfs_inode_private *ip; - - if (inode) { - ip = DLMFS_I(inode); - - inode->i_mode = mode; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); - inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - inc_nlink(inode); - - inode->i_fop = &simple_dir_operations; - inode->i_op = &dlmfs_root_inode_operations; - } - - return inode; -} - -static struct inode *dlmfs_get_inode(struct inode *parent, - struct dentry *dentry, - int mode) -{ - struct super_block *sb = parent->i_sb; - struct inode * inode = new_inode(sb); - struct dlmfs_inode_private *ip; - - if (!inode) - return NULL; - - inode->i_mode = mode; - inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); - inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - - ip = DLMFS_I(inode); - ip->ip_dlm = DLMFS_I(parent)->ip_dlm; - - switch (mode & S_IFMT) { - default: - /* for now we don't support anything other than - * directories and regular files. */ - BUG(); - break; - case S_IFREG: - inode->i_op = &dlmfs_file_inode_operations; - inode->i_fop = &dlmfs_file_operations; - - i_size_write(inode, DLM_LVB_LEN); - - user_dlm_lock_res_init(&ip->ip_lockres, dentry); - - /* released at clear_inode time, this insures that we - * get to drop the dlm reference on each lock *before* - * we call the unregister code for releasing parent - * directories. */ - ip->ip_parent = igrab(parent); - BUG_ON(!ip->ip_parent); - break; - case S_IFDIR: - inode->i_op = &dlmfs_dir_inode_operations; - inode->i_fop = &simple_dir_operations; - - /* directory inodes start off with i_nlink == - * 2 (for "." entry) */ - inc_nlink(inode); - break; - } - - if (parent->i_mode & S_ISGID) { - inode->i_gid = parent->i_gid; - if (S_ISDIR(mode)) - inode->i_mode |= S_ISGID; - } - - return inode; -} - -/* - * File creation. Allocate an inode, and we're done.. - */ -/* SMP-safe */ -static int dlmfs_mkdir(struct inode * dir, - struct dentry * dentry, - int mode) -{ - int status; - struct inode *inode = NULL; - struct qstr *domain = &dentry->d_name; - struct dlmfs_inode_private *ip; - struct dlm_ctxt *dlm; - struct dlm_protocol_version proto = user_locking_protocol; - - mlog(0, "mkdir %.*s\n", domain->len, domain->name); - - /* verify that we have a proper domain */ - if (domain->len >= O2NM_MAX_NAME_LEN) { - status = -EINVAL; - mlog(ML_ERROR, "invalid domain name for directory.\n"); - goto bail; - } - - inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR); - if (!inode) { - status = -ENOMEM; - mlog_errno(status); - goto bail; - } - - ip = DLMFS_I(inode); - - dlm = user_dlm_register_context(domain, &proto); - if (IS_ERR(dlm)) { - status = PTR_ERR(dlm); - mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", - status, domain->len, domain->name); - goto bail; - } - ip->ip_dlm = dlm; - - inc_nlink(dir); - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ - - status = 0; -bail: - if (status < 0) - iput(inode); - return status; -} - -static int dlmfs_create(struct inode *dir, - struct dentry *dentry, - int mode, - struct nameidata *nd) -{ - int status = 0; - struct inode *inode; - struct qstr *name = &dentry->d_name; - - mlog(0, "create %.*s\n", name->len, name->name); - - /* verify name is valid and doesn't contain any dlm reserved - * characters */ - if (name->len >= USER_DLM_LOCK_ID_MAX_LEN || - name->name[0] == '$') { - status = -EINVAL; - mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len, - name->name); - goto bail; - } - - inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG); - if (!inode) { - status = -ENOMEM; - mlog_errno(status); - goto bail; - } - - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ -bail: - return status; -} - -static int dlmfs_unlink(struct inode *dir, - struct dentry *dentry) -{ - int status; - struct inode *inode = dentry->d_inode; - - mlog(0, "unlink inode %lu\n", inode->i_ino); - - /* if there are no current holders, or none that are waiting - * to acquire a lock, this basically destroys our lockres. */ - status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); - if (status < 0) { - mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n", - dentry->d_name.len, dentry->d_name.name, status); - goto bail; - } - status = simple_unlink(dir, dentry); -bail: - return status; -} - -static int dlmfs_fill_super(struct super_block * sb, - void * data, - int silent) -{ - struct inode * inode; - struct dentry * root; - - sb->s_maxbytes = MAX_LFS_FILESIZE; - sb->s_blocksize = PAGE_CACHE_SIZE; - sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = DLMFS_MAGIC; - sb->s_op = &dlmfs_ops; - inode = dlmfs_get_root_inode(sb); - if (!inode) - return -ENOMEM; - - root = d_alloc_root(inode); - if (!root) { - iput(inode); - return -ENOMEM; - } - sb->s_root = root; - return 0; -} - -static const struct file_operations dlmfs_file_operations = { - .open = dlmfs_file_open, - .release = dlmfs_file_release, - .poll = dlmfs_file_poll, - .read = dlmfs_file_read, - .write = dlmfs_file_write, -}; - -static const struct inode_operations dlmfs_dir_inode_operations = { - .create = dlmfs_create, - .lookup = simple_lookup, - .unlink = dlmfs_unlink, -}; - -/* this way we can restrict mkdir to only the toplevel of the fs. */ -static const struct inode_operations dlmfs_root_inode_operations = { - .lookup = simple_lookup, - .mkdir = dlmfs_mkdir, - .rmdir = simple_rmdir, -}; - -static const struct super_operations dlmfs_ops = { - .statfs = simple_statfs, - .alloc_inode = dlmfs_alloc_inode, - .destroy_inode = dlmfs_destroy_inode, - .clear_inode = dlmfs_clear_inode, - .drop_inode = generic_delete_inode, -}; - -static const struct inode_operations dlmfs_file_inode_operations = { - .getattr = simple_getattr, -}; - -static int dlmfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) -{ - return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); -} - -static struct file_system_type dlmfs_fs_type = { - .owner = THIS_MODULE, - .name = "ocfs2_dlmfs", - .get_sb = dlmfs_get_sb, - .kill_sb = kill_litter_super, -}; - -static int __init init_dlmfs_fs(void) -{ - int status; - int cleanup_inode = 0, cleanup_worker = 0; - - dlmfs_print_version(); - - status = bdi_init(&dlmfs_backing_dev_info); - if (status) - return status; - - dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", - sizeof(struct dlmfs_inode_private), - 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - dlmfs_init_once); - if (!dlmfs_inode_cache) { - status = -ENOMEM; - goto bail; - } - cleanup_inode = 1; - - user_dlm_worker = create_singlethread_workqueue("user_dlm"); - if (!user_dlm_worker) { - status = -ENOMEM; - goto bail; - } - cleanup_worker = 1; - - status = register_filesystem(&dlmfs_fs_type); -bail: - if (status) { - if (cleanup_inode) - kmem_cache_destroy(dlmfs_inode_cache); - if (cleanup_worker) - destroy_workqueue(user_dlm_worker); - bdi_destroy(&dlmfs_backing_dev_info); - } else - printk("OCFS2 User DLM kernel interface loaded\n"); - return status; -} - -static void __exit exit_dlmfs_fs(void) -{ - unregister_filesystem(&dlmfs_fs_type); - - flush_workqueue(user_dlm_worker); - destroy_workqueue(user_dlm_worker); - - kmem_cache_destroy(dlmfs_inode_cache); - - bdi_destroy(&dlmfs_backing_dev_info); -} - -MODULE_AUTHOR("Oracle"); -MODULE_LICENSE("GPL"); - -module_init(init_dlmfs_fs) -module_exit(exit_dlmfs_fs) diff --git a/fs/ocfs2/dlm/dlmfsver.c b/fs/ocfs2/dlm/dlmfsver.c deleted file mode 100644 index a733b3321f8..00000000000 --- a/fs/ocfs2/dlm/dlmfsver.c +++ /dev/null @@ -1,42 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmfsver.c - * - * version string - * - * Copyright (C) 2002, 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include -#include - -#include "dlmfsver.h" - -#define DLM_BUILD_VERSION "1.5.0" - -#define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION - -void dlmfs_print_version(void) -{ - printk(KERN_INFO "%s\n", VERSION_STR); -} - -MODULE_DESCRIPTION(VERSION_STR); - -MODULE_VERSION(DLM_BUILD_VERSION); diff --git a/fs/ocfs2/dlm/dlmfsver.h b/fs/ocfs2/dlm/dlmfsver.h deleted file mode 100644 index f35eadbed25..00000000000 --- a/fs/ocfs2/dlm/dlmfsver.h +++ /dev/null @@ -1,31 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * dlmver.h - * - * Function prototypes - * - * Copyright (C) 2005 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef DLMFS_VER_H -#define DLMFS_VER_H - -void dlmfs_print_version(void); - -#endif /* DLMFS_VER_H */ diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c deleted file mode 100644 index 4cb1d3dae25..00000000000 --- a/fs/ocfs2/dlm/userdlm.c +++ /dev/null @@ -1,676 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * userdlm.c - * - * Code which implements the kernel side of a minimal userspace - * interface to our DLM. - * - * Many of the functions here are pared down versions of dlmglue.c - * functions. - * - * Copyright (C) 2003, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#include - -#include -#include -#include -#include - - -#include "cluster/nodemanager.h" -#include "cluster/heartbeat.h" -#include "cluster/tcp.h" - -#include "dlmapi.h" - -#include "userdlm.h" - -#define MLOG_MASK_PREFIX ML_DLMFS -#include "cluster/masklog.h" - -static inline int user_check_wait_flag(struct user_lock_res *lockres, - int flag) -{ - int ret; - - spin_lock(&lockres->l_lock); - ret = lockres->l_flags & flag; - spin_unlock(&lockres->l_lock); - - return ret; -} - -static inline void user_wait_on_busy_lock(struct user_lock_res *lockres) - -{ - wait_event(lockres->l_event, - !user_check_wait_flag(lockres, USER_LOCK_BUSY)); -} - -static inline void user_wait_on_blocked_lock(struct user_lock_res *lockres) - -{ - wait_event(lockres->l_event, - !user_check_wait_flag(lockres, USER_LOCK_BLOCKED)); -} - -/* I heart container_of... */ -static inline struct dlm_ctxt * -dlm_ctxt_from_user_lockres(struct user_lock_res *lockres) -{ - struct dlmfs_inode_private *ip; - - ip = container_of(lockres, - struct dlmfs_inode_private, - ip_lockres); - return ip->ip_dlm; -} - -static struct inode * -user_dlm_inode_from_user_lockres(struct user_lock_res *lockres) -{ - struct dlmfs_inode_private *ip; - - ip = container_of(lockres, - struct dlmfs_inode_private, - ip_lockres); - return &ip->ip_vfs_inode; -} - -static inline void user_recover_from_dlm_error(struct user_lock_res *lockres) -{ - spin_lock(&lockres->l_lock); - lockres->l_flags &= ~USER_LOCK_BUSY; - spin_unlock(&lockres->l_lock); -} - -#define user_log_dlm_error(_func, _stat, _lockres) do { \ - mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ - "resource %.*s: %s\n", dlm_errname(_stat), _func, \ - _lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \ -} while (0) - -/* WARNING: This function lives in a world where the only three lock - * levels are EX, PR, and NL. It *will* have to be adjusted when more - * lock types are added. */ -static inline int user_highest_compat_lock_level(int level) -{ - int new_level = LKM_EXMODE; - - if (level == LKM_EXMODE) - new_level = LKM_NLMODE; - else if (level == LKM_PRMODE) - new_level = LKM_PRMODE; - return new_level; -} - -static void user_ast(void *opaque) -{ - struct user_lock_res *lockres = opaque; - struct dlm_lockstatus *lksb; - - mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen, - lockres->l_name); - - spin_lock(&lockres->l_lock); - - lksb = &(lockres->l_lksb); - if (lksb->status != DLM_NORMAL) { - mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n", - lksb->status, lockres->l_namelen, lockres->l_name); - spin_unlock(&lockres->l_lock); - return; - } - - mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, - "Lockres %.*s, requested ivmode. flags 0x%x\n", - lockres->l_namelen, lockres->l_name, lockres->l_flags); - - /* we're downconverting. */ - if (lockres->l_requested < lockres->l_level) { - if (lockres->l_requested <= - user_highest_compat_lock_level(lockres->l_blocking)) { - lockres->l_blocking = LKM_NLMODE; - lockres->l_flags &= ~USER_LOCK_BLOCKED; - } - } - - lockres->l_level = lockres->l_requested; - lockres->l_requested = LKM_IVMODE; - lockres->l_flags |= USER_LOCK_ATTACHED; - lockres->l_flags &= ~USER_LOCK_BUSY; - - spin_unlock(&lockres->l_lock); - - wake_up(&lockres->l_event); -} - -static inline void user_dlm_grab_inode_ref(struct user_lock_res *lockres) -{ - struct inode *inode; - inode = user_dlm_inode_from_user_lockres(lockres); - if (!igrab(inode)) - BUG(); -} - -static void user_dlm_unblock_lock(struct work_struct *work); - -static void __user_dlm_queue_lockres(struct user_lock_res *lockres) -{ - if (!(lockres->l_flags & USER_LOCK_QUEUED)) { - user_dlm_grab_inode_ref(lockres); - - INIT_WORK(&lockres->l_work, user_dlm_unblock_lock); - - queue_work(user_dlm_worker, &lockres->l_work); - lockres->l_flags |= USER_LOCK_QUEUED; - } -} - -static void __user_dlm_cond_queue_lockres(struct user_lock_res *lockres) -{ - int queue = 0; - - if (!(lockres->l_flags & USER_LOCK_BLOCKED)) - return; - - switch (lockres->l_blocking) { - case LKM_EXMODE: - if (!lockres->l_ex_holders && !lockres->l_ro_holders) - queue = 1; - break; - case LKM_PRMODE: - if (!lockres->l_ex_holders) - queue = 1; - break; - default: - BUG(); - } - - if (queue) - __user_dlm_queue_lockres(lockres); -} - -static void user_bast(void *opaque, int level) -{ - struct user_lock_res *lockres = opaque; - - mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n", - lockres->l_namelen, lockres->l_name, level); - - spin_lock(&lockres->l_lock); - lockres->l_flags |= USER_LOCK_BLOCKED; - if (level > lockres->l_blocking) - lockres->l_blocking = level; - - __user_dlm_queue_lockres(lockres); - spin_unlock(&lockres->l_lock); - - wake_up(&lockres->l_event); -} - -static void user_unlock_ast(void *opaque, enum dlm_status status) -{ - struct user_lock_res *lockres = opaque; - - mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen, - lockres->l_name); - - if (status != DLM_NORMAL && status != DLM_CANCELGRANT) - mlog(ML_ERROR, "Dlm returns status %d\n", status); - - spin_lock(&lockres->l_lock); - /* The teardown flag gets set early during the unlock process, - * so test the cancel flag to make sure that this ast isn't - * for a concurrent cancel. */ - if (lockres->l_flags & USER_LOCK_IN_TEARDOWN - && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { - lockres->l_level = LKM_IVMODE; - } else if (status == DLM_CANCELGRANT) { - /* We tried to cancel a convert request, but it was - * already granted. Don't clear the busy flag - the - * ast should've done this already. */ - BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); - lockres->l_flags &= ~USER_LOCK_IN_CANCEL; - goto out_noclear; - } else { - BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); - /* Cancel succeeded, we want to re-queue */ - lockres->l_requested = LKM_IVMODE; /* cancel an - * upconvert - * request. */ - lockres->l_flags &= ~USER_LOCK_IN_CANCEL; - /* we want the unblock thread to look at it again - * now. */ - if (lockres->l_flags & USER_LOCK_BLOCKED) - __user_dlm_queue_lockres(lockres); - } - - lockres->l_flags &= ~USER_LOCK_BUSY; -out_noclear: - spin_unlock(&lockres->l_lock); - - wake_up(&lockres->l_event); -} - -static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres) -{ - struct inode *inode; - inode = user_dlm_inode_from_user_lockres(lockres); - iput(inode); -} - -static void user_dlm_unblock_lock(struct work_struct *work) -{ - int new_level, status; - struct user_lock_res *lockres = - container_of(work, struct user_lock_res, l_work); - struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); - - mlog(0, "processing lockres %.*s\n", lockres->l_namelen, - lockres->l_name); - - spin_lock(&lockres->l_lock); - - mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), - "Lockres %.*s, flags 0x%x\n", - lockres->l_namelen, lockres->l_name, lockres->l_flags); - - /* notice that we don't clear USER_LOCK_BLOCKED here. If it's - * set, we want user_ast clear it. */ - lockres->l_flags &= ~USER_LOCK_QUEUED; - - /* It's valid to get here and no longer be blocked - if we get - * several basts in a row, we might be queued by the first - * one, the unblock thread might run and clear the queued - * flag, and finally we might get another bast which re-queues - * us before our ast for the downconvert is called. */ - if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { - spin_unlock(&lockres->l_lock); - goto drop_ref; - } - - if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { - spin_unlock(&lockres->l_lock); - goto drop_ref; - } - - if (lockres->l_flags & USER_LOCK_BUSY) { - if (lockres->l_flags & USER_LOCK_IN_CANCEL) { - spin_unlock(&lockres->l_lock); - goto drop_ref; - } - - lockres->l_flags |= USER_LOCK_IN_CANCEL; - spin_unlock(&lockres->l_lock); - - status = dlmunlock(dlm, - &lockres->l_lksb, - LKM_CANCEL, - user_unlock_ast, - lockres); - if (status != DLM_NORMAL) - user_log_dlm_error("dlmunlock", status, lockres); - goto drop_ref; - } - - /* If there are still incompat holders, we can exit safely - * without worrying about re-queueing this lock as that will - * happen on the last call to user_cluster_unlock. */ - if ((lockres->l_blocking == LKM_EXMODE) - && (lockres->l_ex_holders || lockres->l_ro_holders)) { - spin_unlock(&lockres->l_lock); - mlog(0, "can't downconvert for ex: ro = %u, ex = %u\n", - lockres->l_ro_holders, lockres->l_ex_holders); - goto drop_ref; - } - - if ((lockres->l_blocking == LKM_PRMODE) - && lockres->l_ex_holders) { - spin_unlock(&lockres->l_lock); - mlog(0, "can't downconvert for pr: ex = %u\n", - lockres->l_ex_holders); - goto drop_ref; - } - - /* yay, we can downconvert now. */ - new_level = user_highest_compat_lock_level(lockres->l_blocking); - lockres->l_requested = new_level; - lockres->l_flags |= USER_LOCK_BUSY; - mlog(0, "Downconvert lock from %d to %d\n", - lockres->l_level, new_level); - spin_unlock(&lockres->l_lock); - - /* need lock downconvert request now... */ - status = dlmlock(dlm, - new_level, - &lockres->l_lksb, - LKM_CONVERT|LKM_VALBLK, - lockres->l_name, - lockres->l_namelen, - user_ast, - lockres, - user_bast); - if (status != DLM_NORMAL) { - user_log_dlm_error("dlmlock", status, lockres); - user_recover_from_dlm_error(lockres); - } - -drop_ref: - user_dlm_drop_inode_ref(lockres); -} - -static inline void user_dlm_inc_holders(struct user_lock_res *lockres, - int level) -{ - switch(level) { - case LKM_EXMODE: - lockres->l_ex_holders++; - break; - case LKM_PRMODE: - lockres->l_ro_holders++; - break; - default: - BUG(); - } -} - -/* predict what lock level we'll be dropping down to on behalf - * of another node, and return true if the currently wanted - * level will be compatible with it. */ -static inline int -user_may_continue_on_blocked_lock(struct user_lock_res *lockres, - int wanted) -{ - BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); - - return wanted <= user_highest_compat_lock_level(lockres->l_blocking); -} - -int user_dlm_cluster_lock(struct user_lock_res *lockres, - int level, - int lkm_flags) -{ - int status, local_flags; - struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); - - if (level != LKM_EXMODE && - level != LKM_PRMODE) { - mlog(ML_ERROR, "lockres %.*s: invalid request!\n", - lockres->l_namelen, lockres->l_name); - status = -EINVAL; - goto bail; - } - - mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n", - lockres->l_namelen, lockres->l_name, - (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", - lkm_flags); - -again: - if (signal_pending(current)) { - status = -ERESTARTSYS; - goto bail; - } - - spin_lock(&lockres->l_lock); - - /* We only compare against the currently granted level - * here. If the lock is blocked waiting on a downconvert, - * we'll get caught below. */ - if ((lockres->l_flags & USER_LOCK_BUSY) && - (level > lockres->l_level)) { - /* is someone sitting in dlm_lock? If so, wait on - * them. */ - spin_unlock(&lockres->l_lock); - - user_wait_on_busy_lock(lockres); - goto again; - } - - if ((lockres->l_flags & USER_LOCK_BLOCKED) && - (!user_may_continue_on_blocked_lock(lockres, level))) { - /* is the lock is currently blocked on behalf of - * another node */ - spin_unlock(&lockres->l_lock); - - user_wait_on_blocked_lock(lockres); - goto again; - } - - if (level > lockres->l_level) { - local_flags = lkm_flags | LKM_VALBLK; - if (lockres->l_level != LKM_IVMODE) - local_flags |= LKM_CONVERT; - - lockres->l_requested = level; - lockres->l_flags |= USER_LOCK_BUSY; - spin_unlock(&lockres->l_lock); - - BUG_ON(level == LKM_IVMODE); - BUG_ON(level == LKM_NLMODE); - - /* call dlm_lock to upgrade lock now */ - status = dlmlock(dlm, - level, - &lockres->l_lksb, - local_flags, - lockres->l_name, - lockres->l_namelen, - user_ast, - lockres, - user_bast); - if (status != DLM_NORMAL) { - if ((lkm_flags & LKM_NOQUEUE) && - (status == DLM_NOTQUEUED)) - status = -EAGAIN; - else { - user_log_dlm_error("dlmlock", status, lockres); - status = -EINVAL; - } - user_recover_from_dlm_error(lockres); - goto bail; - } - - user_wait_on_busy_lock(lockres); - goto again; - } - - user_dlm_inc_holders(lockres, level); - spin_unlock(&lockres->l_lock); - - status = 0; -bail: - return status; -} - -static inline void user_dlm_dec_holders(struct user_lock_res *lockres, - int level) -{ - switch(level) { - case LKM_EXMODE: - BUG_ON(!lockres->l_ex_holders); - lockres->l_ex_holders--; - break; - case LKM_PRMODE: - BUG_ON(!lockres->l_ro_holders); - lockres->l_ro_holders--; - break; - default: - BUG(); - } -} - -void user_dlm_cluster_unlock(struct user_lock_res *lockres, - int level) -{ - if (level != LKM_EXMODE && - level != LKM_PRMODE) { - mlog(ML_ERROR, "lockres %.*s: invalid request!\n", - lockres->l_namelen, lockres->l_name); - return; - } - - spin_lock(&lockres->l_lock); - user_dlm_dec_holders(lockres, level); - __user_dlm_cond_queue_lockres(lockres); - spin_unlock(&lockres->l_lock); -} - -void user_dlm_write_lvb(struct inode *inode, - const char *val, - unsigned int len) -{ - struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; - char *lvb = lockres->l_lksb.lvb; - - BUG_ON(len > DLM_LVB_LEN); - - spin_lock(&lockres->l_lock); - - BUG_ON(lockres->l_level < LKM_EXMODE); - memcpy(lvb, val, len); - - spin_unlock(&lockres->l_lock); -} - -void user_dlm_read_lvb(struct inode *inode, - char *val, - unsigned int len) -{ - struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; - char *lvb = lockres->l_lksb.lvb; - - BUG_ON(len > DLM_LVB_LEN); - - spin_lock(&lockres->l_lock); - - BUG_ON(lockres->l_level < LKM_PRMODE); - memcpy(val, lvb, len); - - spin_unlock(&lockres->l_lock); -} - -void user_dlm_lock_res_init(struct user_lock_res *lockres, - struct dentry *dentry) -{ - memset(lockres, 0, sizeof(*lockres)); - - spin_lock_init(&lockres->l_lock); - init_waitqueue_head(&lockres->l_event); - lockres->l_level = LKM_IVMODE; - lockres->l_requested = LKM_IVMODE; - lockres->l_blocking = LKM_IVMODE; - - /* should have been checked before getting here. */ - BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN); - - memcpy(lockres->l_name, - dentry->d_name.name, - dentry->d_name.len); - lockres->l_namelen = dentry->d_name.len; -} - -int user_dlm_destroy_lock(struct user_lock_res *lockres) -{ - int status = -EBUSY; - struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); - - mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name); - - spin_lock(&lockres->l_lock); - if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { - spin_unlock(&lockres->l_lock); - return 0; - } - - lockres->l_flags |= USER_LOCK_IN_TEARDOWN; - - while (lockres->l_flags & USER_LOCK_BUSY) { - spin_unlock(&lockres->l_lock); - - user_wait_on_busy_lock(lockres); - - spin_lock(&lockres->l_lock); - } - - if (lockres->l_ro_holders || lockres->l_ex_holders) { - spin_unlock(&lockres->l_lock); - goto bail; - } - - status = 0; - if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { - spin_unlock(&lockres->l_lock); - goto bail; - } - - lockres->l_flags &= ~USER_LOCK_ATTACHED; - lockres->l_flags |= USER_LOCK_BUSY; - spin_unlock(&lockres->l_lock); - - status = dlmunlock(dlm, - &lockres->l_lksb, - LKM_VALBLK, - user_unlock_ast, - lockres); - if (status != DLM_NORMAL) { - user_log_dlm_error("dlmunlock", status, lockres); - status = -EINVAL; - goto bail; - } - - user_wait_on_busy_lock(lockres); - - status = 0; -bail: - return status; -} - -struct dlm_ctxt *user_dlm_register_context(struct qstr *name, - struct dlm_protocol_version *proto) -{ - struct dlm_ctxt *dlm; - u32 dlm_key; - char *domain; - - domain = kmalloc(name->len + 1, GFP_NOFS); - if (!domain) { - mlog_errno(-ENOMEM); - return ERR_PTR(-ENOMEM); - } - - dlm_key = crc32_le(0, name->name, name->len); - - snprintf(domain, name->len + 1, "%.*s", name->len, name->name); - - dlm = dlm_register_domain(domain, dlm_key, proto); - if (IS_ERR(dlm)) - mlog_errno(PTR_ERR(dlm)); - - kfree(domain); - return dlm; -} - -void user_dlm_unregister_context(struct dlm_ctxt *dlm) -{ - dlm_unregister_domain(dlm); -} diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h deleted file mode 100644 index 0c3cc03c61f..00000000000 --- a/fs/ocfs2/dlm/userdlm.h +++ /dev/null @@ -1,113 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; -*- - * vim: noexpandtab sw=8 ts=8 sts=0: - * - * userdlm.h - * - * Userspace dlm defines - * - * Copyright (C) 2002, 2004 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - - -#ifndef USERDLM_H -#define USERDLM_H - -#include -#include -#include -#include - -/* user_lock_res->l_flags flags. */ -#define USER_LOCK_ATTACHED (0x00000001) /* we have initialized - * the lvb */ -#define USER_LOCK_BUSY (0x00000002) /* we are currently in - * dlm_lock */ -#define USER_LOCK_BLOCKED (0x00000004) /* blocked waiting to - * downconvert*/ -#define USER_LOCK_IN_TEARDOWN (0x00000008) /* we're currently - * destroying this - * lock. */ -#define USER_LOCK_QUEUED (0x00000010) /* lock is on the - * workqueue */ -#define USER_LOCK_IN_CANCEL (0x00000020) - -struct user_lock_res { - spinlock_t l_lock; - - int l_flags; - -#define USER_DLM_LOCK_ID_MAX_LEN 32 - char l_name[USER_DLM_LOCK_ID_MAX_LEN]; - int l_namelen; - int l_level; - unsigned int l_ro_holders; - unsigned int l_ex_holders; - struct dlm_lockstatus l_lksb; - - int l_requested; - int l_blocking; - - wait_queue_head_t l_event; - - struct work_struct l_work; -}; - -extern struct workqueue_struct *user_dlm_worker; - -void user_dlm_lock_res_init(struct user_lock_res *lockres, - struct dentry *dentry); -int user_dlm_destroy_lock(struct user_lock_res *lockres); -int user_dlm_cluster_lock(struct user_lock_res *lockres, - int level, - int lkm_flags); -void user_dlm_cluster_unlock(struct user_lock_res *lockres, - int level); -void user_dlm_write_lvb(struct inode *inode, - const char *val, - unsigned int len); -void user_dlm_read_lvb(struct inode *inode, - char *val, - unsigned int len); -struct dlm_ctxt *user_dlm_register_context(struct qstr *name, - struct dlm_protocol_version *proto); -void user_dlm_unregister_context(struct dlm_ctxt *dlm); - -struct dlmfs_inode_private { - struct dlm_ctxt *ip_dlm; - - struct user_lock_res ip_lockres; /* unused for directories. */ - struct inode *ip_parent; - - struct inode ip_vfs_inode; -}; - -static inline struct dlmfs_inode_private * -DLMFS_I(struct inode *inode) -{ - return container_of(inode, - struct dlmfs_inode_private, - ip_vfs_inode); -} - -struct dlmfs_filp_private { - int fp_lock_level; -}; - -#define DLMFS_MAGIC 0x76a9f425 - -#endif /* USERDLM_H */ diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile new file mode 100644 index 00000000000..df69b4856d0 --- /dev/null +++ b/fs/ocfs2/dlmfs/Makefile @@ -0,0 +1,5 @@ +EXTRA_CFLAGS += -Ifs/ocfs2 + +obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o + +ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c new file mode 100644 index 00000000000..e21ce0e5fc4 --- /dev/null +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -0,0 +1,710 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * dlmfs.c + * + * Code which implements the kernel side of a minimal userspace + * interface to our DLM. This file handles the virtual file system + * used for communication with userspace. Credit should go to ramfs, + * which was a template for the fs side of this module. + * + * Copyright (C) 2003, 2004 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +/* Simple VFS hooks based on: */ +/* + * Resizable simple ram filesystem for Linux. + * + * Copyright (C) 2000 Linus Torvalds. + * 2000 Transmeta Corp. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +#include "cluster/nodemanager.h" +#include "cluster/heartbeat.h" +#include "cluster/tcp.h" + +#include "dlm/dlmapi.h" + +#include "userdlm.h" + +#include "dlmfsver.h" + +#define MLOG_MASK_PREFIX ML_DLMFS +#include "cluster/masklog.h" + +#include "ocfs2_lockingver.h" + +static const struct super_operations dlmfs_ops; +static const struct file_operations dlmfs_file_operations; +static const struct inode_operations dlmfs_dir_inode_operations; +static const struct inode_operations dlmfs_root_inode_operations; +static const struct inode_operations dlmfs_file_inode_operations; +static struct kmem_cache *dlmfs_inode_cache; + +struct workqueue_struct *user_dlm_worker; + +/* + * This is the userdlmfs locking protocol version. + * + * See fs/ocfs2/dlmglue.c for more details on locking versions. + */ +static const struct dlm_protocol_version user_locking_protocol = { + .pv_major = OCFS2_LOCKING_PROTOCOL_MAJOR, + .pv_minor = OCFS2_LOCKING_PROTOCOL_MINOR, +}; + + +/* + * These are the ABI capabilities of dlmfs. + * + * Over time, dlmfs has added some features that were not part of the + * initial ABI. Unfortunately, some of these features are not detectable + * via standard usage. For example, Linux's default poll always returns + * POLLIN, so there is no way for a caller of poll(2) to know when dlmfs + * added poll support. Instead, we provide this list of new capabilities. + * + * Capabilities is a read-only attribute. We do it as a module parameter + * so we can discover it whether dlmfs is built in, loaded, or even not + * loaded. + * + * The ABI features are local to this machine's dlmfs mount. This is + * distinct from the locking protocol, which is concerned with inter-node + * interaction. + * + * Capabilities: + * - bast : POLLIN against the file descriptor of a held lock + * signifies a bast fired on the lock. + */ +#define DLMFS_CAPABILITIES "bast" +extern int param_set_dlmfs_capabilities(const char *val, + struct kernel_param *kp) +{ + printk(KERN_ERR "%s: readonly parameter\n", kp->name); + return -EINVAL; +} +static int param_get_dlmfs_capabilities(char *buffer, + struct kernel_param *kp) +{ + return strlcpy(buffer, DLMFS_CAPABILITIES, + strlen(DLMFS_CAPABILITIES) + 1); +} +module_param_call(capabilities, param_set_dlmfs_capabilities, + param_get_dlmfs_capabilities, NULL, 0444); +MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES); + + +/* + * decodes a set of open flags into a valid lock level and a set of flags. + * returns < 0 if we have invalid flags + * flags which mean something to us: + * O_RDONLY -> PRMODE level + * O_WRONLY -> EXMODE level + * + * O_NONBLOCK -> LKM_NOQUEUE + */ +static int dlmfs_decode_open_flags(int open_flags, + int *level, + int *flags) +{ + if (open_flags & (O_WRONLY|O_RDWR)) + *level = LKM_EXMODE; + else + *level = LKM_PRMODE; + + *flags = 0; + if (open_flags & O_NONBLOCK) + *flags |= LKM_NOQUEUE; + + return 0; +} + +static int dlmfs_file_open(struct inode *inode, + struct file *file) +{ + int status, level, flags; + struct dlmfs_filp_private *fp = NULL; + struct dlmfs_inode_private *ip; + + if (S_ISDIR(inode->i_mode)) + BUG(); + + mlog(0, "open called on inode %lu, flags 0x%x\n", inode->i_ino, + file->f_flags); + + status = dlmfs_decode_open_flags(file->f_flags, &level, &flags); + if (status < 0) + goto bail; + + /* We don't want to honor O_APPEND at read/write time as it + * doesn't make sense for LVB writes. */ + file->f_flags &= ~O_APPEND; + + fp = kmalloc(sizeof(*fp), GFP_NOFS); + if (!fp) { + status = -ENOMEM; + goto bail; + } + fp->fp_lock_level = level; + + ip = DLMFS_I(inode); + + status = user_dlm_cluster_lock(&ip->ip_lockres, level, flags); + if (status < 0) { + /* this is a strange error to return here but I want + * to be able userspace to be able to distinguish a + * valid lock request from one that simply couldn't be + * granted. */ + if (flags & LKM_NOQUEUE && status == -EAGAIN) + status = -ETXTBSY; + kfree(fp); + goto bail; + } + + file->private_data = fp; +bail: + return status; +} + +static int dlmfs_file_release(struct inode *inode, + struct file *file) +{ + int level, status; + struct dlmfs_inode_private *ip = DLMFS_I(inode); + struct dlmfs_filp_private *fp = + (struct dlmfs_filp_private *) file->private_data; + + if (S_ISDIR(inode->i_mode)) + BUG(); + + mlog(0, "close called on inode %lu\n", inode->i_ino); + + status = 0; + if (fp) { + level = fp->fp_lock_level; + if (level != LKM_IVMODE) + user_dlm_cluster_unlock(&ip->ip_lockres, level); + + kfree(fp); + file->private_data = NULL; + } + + return 0; +} + +static unsigned int dlmfs_file_poll(struct file *file, poll_table *wait) +{ + int event = 0; + struct inode *inode = file->f_path.dentry->d_inode; + struct dlmfs_inode_private *ip = DLMFS_I(inode); + + poll_wait(file, &ip->ip_lockres.l_event, wait); + + spin_lock(&ip->ip_lockres.l_lock); + if (ip->ip_lockres.l_flags & USER_LOCK_BLOCKED) + event = POLLIN | POLLRDNORM; + spin_unlock(&ip->ip_lockres.l_lock); + + return event; +} + +static ssize_t dlmfs_file_read(struct file *filp, + char __user *buf, + size_t count, + loff_t *ppos) +{ + int bytes_left; + ssize_t readlen; + char *lvb_buf; + struct inode *inode = filp->f_path.dentry->d_inode; + + mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", + inode->i_ino, count, *ppos); + + if (*ppos >= i_size_read(inode)) + return 0; + + if (!count) + return 0; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + /* don't read past the lvb */ + if ((count + *ppos) > i_size_read(inode)) + readlen = i_size_read(inode) - *ppos; + else + readlen = count - *ppos; + + lvb_buf = kmalloc(readlen, GFP_NOFS); + if (!lvb_buf) + return -ENOMEM; + + user_dlm_read_lvb(inode, lvb_buf, readlen); + bytes_left = __copy_to_user(buf, lvb_buf, readlen); + readlen -= bytes_left; + + kfree(lvb_buf); + + *ppos = *ppos + readlen; + + mlog(0, "read %zd bytes\n", readlen); + return readlen; +} + +static ssize_t dlmfs_file_write(struct file *filp, + const char __user *buf, + size_t count, + loff_t *ppos) +{ + int bytes_left; + ssize_t writelen; + char *lvb_buf; + struct inode *inode = filp->f_path.dentry->d_inode; + + mlog(0, "inode %lu, count = %zu, *ppos = %llu\n", + inode->i_ino, count, *ppos); + + if (*ppos >= i_size_read(inode)) + return -ENOSPC; + + if (!count) + return 0; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + /* don't write past the lvb */ + if ((count + *ppos) > i_size_read(inode)) + writelen = i_size_read(inode) - *ppos; + else + writelen = count - *ppos; + + lvb_buf = kmalloc(writelen, GFP_NOFS); + if (!lvb_buf) + return -ENOMEM; + + bytes_left = copy_from_user(lvb_buf, buf, writelen); + writelen -= bytes_left; + if (writelen) + user_dlm_write_lvb(inode, lvb_buf, writelen); + + kfree(lvb_buf); + + *ppos = *ppos + writelen; + mlog(0, "wrote %zd bytes\n", writelen); + return writelen; +} + +static void dlmfs_init_once(void *foo) +{ + struct dlmfs_inode_private *ip = + (struct dlmfs_inode_private *) foo; + + ip->ip_dlm = NULL; + ip->ip_parent = NULL; + + inode_init_once(&ip->ip_vfs_inode); +} + +static struct inode *dlmfs_alloc_inode(struct super_block *sb) +{ + struct dlmfs_inode_private *ip; + + ip = kmem_cache_alloc(dlmfs_inode_cache, GFP_NOFS); + if (!ip) + return NULL; + + return &ip->ip_vfs_inode; +} + +static void dlmfs_destroy_inode(struct inode *inode) +{ + kmem_cache_free(dlmfs_inode_cache, DLMFS_I(inode)); +} + +static void dlmfs_clear_inode(struct inode *inode) +{ + int status; + struct dlmfs_inode_private *ip; + + if (!inode) + return; + + mlog(0, "inode %lu\n", inode->i_ino); + + ip = DLMFS_I(inode); + + if (S_ISREG(inode->i_mode)) { + status = user_dlm_destroy_lock(&ip->ip_lockres); + if (status < 0) + mlog_errno(status); + iput(ip->ip_parent); + goto clear_fields; + } + + mlog(0, "we're a directory, ip->ip_dlm = 0x%p\n", ip->ip_dlm); + /* we must be a directory. If required, lets unregister the + * dlm context now. */ + if (ip->ip_dlm) + user_dlm_unregister_context(ip->ip_dlm); +clear_fields: + ip->ip_parent = NULL; + ip->ip_dlm = NULL; +} + +static struct backing_dev_info dlmfs_backing_dev_info = { + .name = "ocfs2-dlmfs", + .ra_pages = 0, /* No readahead */ + .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, +}; + +static struct inode *dlmfs_get_root_inode(struct super_block *sb) +{ + struct inode *inode = new_inode(sb); + int mode = S_IFDIR | 0755; + struct dlmfs_inode_private *ip; + + if (inode) { + ip = DLMFS_I(inode); + + inode->i_mode = mode; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inc_nlink(inode); + + inode->i_fop = &simple_dir_operations; + inode->i_op = &dlmfs_root_inode_operations; + } + + return inode; +} + +static struct inode *dlmfs_get_inode(struct inode *parent, + struct dentry *dentry, + int mode) +{ + struct super_block *sb = parent->i_sb; + struct inode * inode = new_inode(sb); + struct dlmfs_inode_private *ip; + + if (!inode) + return NULL; + + inode->i_mode = mode; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); + inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + + ip = DLMFS_I(inode); + ip->ip_dlm = DLMFS_I(parent)->ip_dlm; + + switch (mode & S_IFMT) { + default: + /* for now we don't support anything other than + * directories and regular files. */ + BUG(); + break; + case S_IFREG: + inode->i_op = &dlmfs_file_inode_operations; + inode->i_fop = &dlmfs_file_operations; + + i_size_write(inode, DLM_LVB_LEN); + + user_dlm_lock_res_init(&ip->ip_lockres, dentry); + + /* released at clear_inode time, this insures that we + * get to drop the dlm reference on each lock *before* + * we call the unregister code for releasing parent + * directories. */ + ip->ip_parent = igrab(parent); + BUG_ON(!ip->ip_parent); + break; + case S_IFDIR: + inode->i_op = &dlmfs_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == + * 2 (for "." entry) */ + inc_nlink(inode); + break; + } + + if (parent->i_mode & S_ISGID) { + inode->i_gid = parent->i_gid; + if (S_ISDIR(mode)) + inode->i_mode |= S_ISGID; + } + + return inode; +} + +/* + * File creation. Allocate an inode, and we're done.. + */ +/* SMP-safe */ +static int dlmfs_mkdir(struct inode * dir, + struct dentry * dentry, + int mode) +{ + int status; + struct inode *inode = NULL; + struct qstr *domain = &dentry->d_name; + struct dlmfs_inode_private *ip; + struct dlm_ctxt *dlm; + struct dlm_protocol_version proto = user_locking_protocol; + + mlog(0, "mkdir %.*s\n", domain->len, domain->name); + + /* verify that we have a proper domain */ + if (domain->len >= O2NM_MAX_NAME_LEN) { + status = -EINVAL; + mlog(ML_ERROR, "invalid domain name for directory.\n"); + goto bail; + } + + inode = dlmfs_get_inode(dir, dentry, mode | S_IFDIR); + if (!inode) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + + ip = DLMFS_I(inode); + + dlm = user_dlm_register_context(domain, &proto); + if (IS_ERR(dlm)) { + status = PTR_ERR(dlm); + mlog(ML_ERROR, "Error %d could not register domain \"%.*s\"\n", + status, domain->len, domain->name); + goto bail; + } + ip->ip_dlm = dlm; + + inc_nlink(dir); + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ + + status = 0; +bail: + if (status < 0) + iput(inode); + return status; +} + +static int dlmfs_create(struct inode *dir, + struct dentry *dentry, + int mode, + struct nameidata *nd) +{ + int status = 0; + struct inode *inode; + struct qstr *name = &dentry->d_name; + + mlog(0, "create %.*s\n", name->len, name->name); + + /* verify name is valid and doesn't contain any dlm reserved + * characters */ + if (name->len >= USER_DLM_LOCK_ID_MAX_LEN || + name->name[0] == '$') { + status = -EINVAL; + mlog(ML_ERROR, "invalid lock name, %.*s\n", name->len, + name->name); + goto bail; + } + + inode = dlmfs_get_inode(dir, dentry, mode | S_IFREG); + if (!inode) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ +bail: + return status; +} + +static int dlmfs_unlink(struct inode *dir, + struct dentry *dentry) +{ + int status; + struct inode *inode = dentry->d_inode; + + mlog(0, "unlink inode %lu\n", inode->i_ino); + + /* if there are no current holders, or none that are waiting + * to acquire a lock, this basically destroys our lockres. */ + status = user_dlm_destroy_lock(&DLMFS_I(inode)->ip_lockres); + if (status < 0) { + mlog(ML_ERROR, "unlink %.*s, error %d from destroy\n", + dentry->d_name.len, dentry->d_name.name, status); + goto bail; + } + status = simple_unlink(dir, dentry); +bail: + return status; +} + +static int dlmfs_fill_super(struct super_block * sb, + void * data, + int silent) +{ + struct inode * inode; + struct dentry * root; + + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = DLMFS_MAGIC; + sb->s_op = &dlmfs_ops; + inode = dlmfs_get_root_inode(sb); + if (!inode) + return -ENOMEM; + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return -ENOMEM; + } + sb->s_root = root; + return 0; +} + +static const struct file_operations dlmfs_file_operations = { + .open = dlmfs_file_open, + .release = dlmfs_file_release, + .poll = dlmfs_file_poll, + .read = dlmfs_file_read, + .write = dlmfs_file_write, +}; + +static const struct inode_operations dlmfs_dir_inode_operations = { + .create = dlmfs_create, + .lookup = simple_lookup, + .unlink = dlmfs_unlink, +}; + +/* this way we can restrict mkdir to only the toplevel of the fs. */ +static const struct inode_operations dlmfs_root_inode_operations = { + .lookup = simple_lookup, + .mkdir = dlmfs_mkdir, + .rmdir = simple_rmdir, +}; + +static const struct super_operations dlmfs_ops = { + .statfs = simple_statfs, + .alloc_inode = dlmfs_alloc_inode, + .destroy_inode = dlmfs_destroy_inode, + .clear_inode = dlmfs_clear_inode, + .drop_inode = generic_delete_inode, +}; + +static const struct inode_operations dlmfs_file_inode_operations = { + .getattr = simple_getattr, +}; + +static int dlmfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); +} + +static struct file_system_type dlmfs_fs_type = { + .owner = THIS_MODULE, + .name = "ocfs2_dlmfs", + .get_sb = dlmfs_get_sb, + .kill_sb = kill_litter_super, +}; + +static int __init init_dlmfs_fs(void) +{ + int status; + int cleanup_inode = 0, cleanup_worker = 0; + + dlmfs_print_version(); + + status = bdi_init(&dlmfs_backing_dev_info); + if (status) + return status; + + dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", + sizeof(struct dlmfs_inode_private), + 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + dlmfs_init_once); + if (!dlmfs_inode_cache) { + status = -ENOMEM; + goto bail; + } + cleanup_inode = 1; + + user_dlm_worker = create_singlethread_workqueue("user_dlm"); + if (!user_dlm_worker) { + status = -ENOMEM; + goto bail; + } + cleanup_worker = 1; + + status = register_filesystem(&dlmfs_fs_type); +bail: + if (status) { + if (cleanup_inode) + kmem_cache_destroy(dlmfs_inode_cache); + if (cleanup_worker) + destroy_workqueue(user_dlm_worker); + bdi_destroy(&dlmfs_backing_dev_info); + } else + printk("OCFS2 User DLM kernel interface loaded\n"); + return status; +} + +static void __exit exit_dlmfs_fs(void) +{ + unregister_filesystem(&dlmfs_fs_type); + + flush_workqueue(user_dlm_worker); + destroy_workqueue(user_dlm_worker); + + kmem_cache_destroy(dlmfs_inode_cache); + + bdi_destroy(&dlmfs_backing_dev_info); +} + +MODULE_AUTHOR("Oracle"); +MODULE_LICENSE("GPL"); + +module_init(init_dlmfs_fs) +module_exit(exit_dlmfs_fs) diff --git a/fs/ocfs2/dlmfs/dlmfsver.c b/fs/ocfs2/dlmfs/dlmfsver.c new file mode 100644 index 00000000000..a733b3321f8 --- /dev/null +++ b/fs/ocfs2/dlmfs/dlmfsver.c @@ -0,0 +1,42 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * dlmfsver.c + * + * version string + * + * Copyright (C) 2002, 2005 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include + +#include "dlmfsver.h" + +#define DLM_BUILD_VERSION "1.5.0" + +#define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION + +void dlmfs_print_version(void) +{ + printk(KERN_INFO "%s\n", VERSION_STR); +} + +MODULE_DESCRIPTION(VERSION_STR); + +MODULE_VERSION(DLM_BUILD_VERSION); diff --git a/fs/ocfs2/dlmfs/dlmfsver.h b/fs/ocfs2/dlmfs/dlmfsver.h new file mode 100644 index 00000000000..f35eadbed25 --- /dev/null +++ b/fs/ocfs2/dlmfs/dlmfsver.h @@ -0,0 +1,31 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * dlmver.h + * + * Function prototypes + * + * Copyright (C) 2005 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef DLMFS_VER_H +#define DLMFS_VER_H + +void dlmfs_print_version(void); + +#endif /* DLMFS_VER_H */ diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c new file mode 100644 index 00000000000..6adae70cee8 --- /dev/null +++ b/fs/ocfs2/dlmfs/userdlm.c @@ -0,0 +1,676 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * userdlm.c + * + * Code which implements the kernel side of a minimal userspace + * interface to our DLM. + * + * Many of the functions here are pared down versions of dlmglue.c + * functions. + * + * Copyright (C) 2003, 2004 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include + +#include +#include +#include +#include + + +#include "cluster/nodemanager.h" +#include "cluster/heartbeat.h" +#include "cluster/tcp.h" + +#include "dlm/dlmapi.h" + +#include "userdlm.h" + +#define MLOG_MASK_PREFIX ML_DLMFS +#include "cluster/masklog.h" + +static inline int user_check_wait_flag(struct user_lock_res *lockres, + int flag) +{ + int ret; + + spin_lock(&lockres->l_lock); + ret = lockres->l_flags & flag; + spin_unlock(&lockres->l_lock); + + return ret; +} + +static inline void user_wait_on_busy_lock(struct user_lock_res *lockres) + +{ + wait_event(lockres->l_event, + !user_check_wait_flag(lockres, USER_LOCK_BUSY)); +} + +static inline void user_wait_on_blocked_lock(struct user_lock_res *lockres) + +{ + wait_event(lockres->l_event, + !user_check_wait_flag(lockres, USER_LOCK_BLOCKED)); +} + +/* I heart container_of... */ +static inline struct dlm_ctxt * +dlm_ctxt_from_user_lockres(struct user_lock_res *lockres) +{ + struct dlmfs_inode_private *ip; + + ip = container_of(lockres, + struct dlmfs_inode_private, + ip_lockres); + return ip->ip_dlm; +} + +static struct inode * +user_dlm_inode_from_user_lockres(struct user_lock_res *lockres) +{ + struct dlmfs_inode_private *ip; + + ip = container_of(lockres, + struct dlmfs_inode_private, + ip_lockres); + return &ip->ip_vfs_inode; +} + +static inline void user_recover_from_dlm_error(struct user_lock_res *lockres) +{ + spin_lock(&lockres->l_lock); + lockres->l_flags &= ~USER_LOCK_BUSY; + spin_unlock(&lockres->l_lock); +} + +#define user_log_dlm_error(_func, _stat, _lockres) do { \ + mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \ + "resource %.*s: %s\n", dlm_errname(_stat), _func, \ + _lockres->l_namelen, _lockres->l_name, dlm_errmsg(_stat)); \ +} while (0) + +/* WARNING: This function lives in a world where the only three lock + * levels are EX, PR, and NL. It *will* have to be adjusted when more + * lock types are added. */ +static inline int user_highest_compat_lock_level(int level) +{ + int new_level = LKM_EXMODE; + + if (level == LKM_EXMODE) + new_level = LKM_NLMODE; + else if (level == LKM_PRMODE) + new_level = LKM_PRMODE; + return new_level; +} + +static void user_ast(void *opaque) +{ + struct user_lock_res *lockres = opaque; + struct dlm_lockstatus *lksb; + + mlog(0, "AST fired for lockres %.*s\n", lockres->l_namelen, + lockres->l_name); + + spin_lock(&lockres->l_lock); + + lksb = &(lockres->l_lksb); + if (lksb->status != DLM_NORMAL) { + mlog(ML_ERROR, "lksb status value of %u on lockres %.*s\n", + lksb->status, lockres->l_namelen, lockres->l_name); + spin_unlock(&lockres->l_lock); + return; + } + + mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, + "Lockres %.*s, requested ivmode. flags 0x%x\n", + lockres->l_namelen, lockres->l_name, lockres->l_flags); + + /* we're downconverting. */ + if (lockres->l_requested < lockres->l_level) { + if (lockres->l_requested <= + user_highest_compat_lock_level(lockres->l_blocking)) { + lockres->l_blocking = LKM_NLMODE; + lockres->l_flags &= ~USER_LOCK_BLOCKED; + } + } + + lockres->l_level = lockres->l_requested; + lockres->l_requested = LKM_IVMODE; + lockres->l_flags |= USER_LOCK_ATTACHED; + lockres->l_flags &= ~USER_LOCK_BUSY; + + spin_unlock(&lockres->l_lock); + + wake_up(&lockres->l_event); +} + +static inline void user_dlm_grab_inode_ref(struct user_lock_res *lockres) +{ + struct inode *inode; + inode = user_dlm_inode_from_user_lockres(lockres); + if (!igrab(inode)) + BUG(); +} + +static void user_dlm_unblock_lock(struct work_struct *work); + +static void __user_dlm_queue_lockres(struct user_lock_res *lockres) +{ + if (!(lockres->l_flags & USER_LOCK_QUEUED)) { + user_dlm_grab_inode_ref(lockres); + + INIT_WORK(&lockres->l_work, user_dlm_unblock_lock); + + queue_work(user_dlm_worker, &lockres->l_work); + lockres->l_flags |= USER_LOCK_QUEUED; + } +} + +static void __user_dlm_cond_queue_lockres(struct user_lock_res *lockres) +{ + int queue = 0; + + if (!(lockres->l_flags & USER_LOCK_BLOCKED)) + return; + + switch (lockres->l_blocking) { + case LKM_EXMODE: + if (!lockres->l_ex_holders && !lockres->l_ro_holders) + queue = 1; + break; + case LKM_PRMODE: + if (!lockres->l_ex_holders) + queue = 1; + break; + default: + BUG(); + } + + if (queue) + __user_dlm_queue_lockres(lockres); +} + +static void user_bast(void *opaque, int level) +{ + struct user_lock_res *lockres = opaque; + + mlog(0, "Blocking AST fired for lockres %.*s. Blocking level %d\n", + lockres->l_namelen, lockres->l_name, level); + + spin_lock(&lockres->l_lock); + lockres->l_flags |= USER_LOCK_BLOCKED; + if (level > lockres->l_blocking) + lockres->l_blocking = level; + + __user_dlm_queue_lockres(lockres); + spin_unlock(&lockres->l_lock); + + wake_up(&lockres->l_event); +} + +static void user_unlock_ast(void *opaque, enum dlm_status status) +{ + struct user_lock_res *lockres = opaque; + + mlog(0, "UNLOCK AST called on lock %.*s\n", lockres->l_namelen, + lockres->l_name); + + if (status != DLM_NORMAL && status != DLM_CANCELGRANT) + mlog(ML_ERROR, "Dlm returns status %d\n", status); + + spin_lock(&lockres->l_lock); + /* The teardown flag gets set early during the unlock process, + * so test the cancel flag to make sure that this ast isn't + * for a concurrent cancel. */ + if (lockres->l_flags & USER_LOCK_IN_TEARDOWN + && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { + lockres->l_level = LKM_IVMODE; + } else if (status == DLM_CANCELGRANT) { + /* We tried to cancel a convert request, but it was + * already granted. Don't clear the busy flag - the + * ast should've done this already. */ + BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); + lockres->l_flags &= ~USER_LOCK_IN_CANCEL; + goto out_noclear; + } else { + BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); + /* Cancel succeeded, we want to re-queue */ + lockres->l_requested = LKM_IVMODE; /* cancel an + * upconvert + * request. */ + lockres->l_flags &= ~USER_LOCK_IN_CANCEL; + /* we want the unblock thread to look at it again + * now. */ + if (lockres->l_flags & USER_LOCK_BLOCKED) + __user_dlm_queue_lockres(lockres); + } + + lockres->l_flags &= ~USER_LOCK_BUSY; +out_noclear: + spin_unlock(&lockres->l_lock); + + wake_up(&lockres->l_event); +} + +static inline void user_dlm_drop_inode_ref(struct user_lock_res *lockres) +{ + struct inode *inode; + inode = user_dlm_inode_from_user_lockres(lockres); + iput(inode); +} + +static void user_dlm_unblock_lock(struct work_struct *work) +{ + int new_level, status; + struct user_lock_res *lockres = + container_of(work, struct user_lock_res, l_work); + struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); + + mlog(0, "processing lockres %.*s\n", lockres->l_namelen, + lockres->l_name); + + spin_lock(&lockres->l_lock); + + mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), + "Lockres %.*s, flags 0x%x\n", + lockres->l_namelen, lockres->l_name, lockres->l_flags); + + /* notice that we don't clear USER_LOCK_BLOCKED here. If it's + * set, we want user_ast clear it. */ + lockres->l_flags &= ~USER_LOCK_QUEUED; + + /* It's valid to get here and no longer be blocked - if we get + * several basts in a row, we might be queued by the first + * one, the unblock thread might run and clear the queued + * flag, and finally we might get another bast which re-queues + * us before our ast for the downconvert is called. */ + if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { + spin_unlock(&lockres->l_lock); + goto drop_ref; + } + + if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { + spin_unlock(&lockres->l_lock); + goto drop_ref; + } + + if (lockres->l_flags & USER_LOCK_BUSY) { + if (lockres->l_flags & USER_LOCK_IN_CANCEL) { + spin_unlock(&lockres->l_lock); + goto drop_ref; + } + + lockres->l_flags |= USER_LOCK_IN_CANCEL; + spin_unlock(&lockres->l_lock); + + status = dlmunlock(dlm, + &lockres->l_lksb, + LKM_CANCEL, + user_unlock_ast, + lockres); + if (status != DLM_NORMAL) + user_log_dlm_error("dlmunlock", status, lockres); + goto drop_ref; + } + + /* If there are still incompat holders, we can exit safely + * without worrying about re-queueing this lock as that will + * happen on the last call to user_cluster_unlock. */ + if ((lockres->l_blocking == LKM_EXMODE) + && (lockres->l_ex_holders || lockres->l_ro_holders)) { + spin_unlock(&lockres->l_lock); + mlog(0, "can't downconvert for ex: ro = %u, ex = %u\n", + lockres->l_ro_holders, lockres->l_ex_holders); + goto drop_ref; + } + + if ((lockres->l_blocking == LKM_PRMODE) + && lockres->l_ex_holders) { + spin_unlock(&lockres->l_lock); + mlog(0, "can't downconvert for pr: ex = %u\n", + lockres->l_ex_holders); + goto drop_ref; + } + + /* yay, we can downconvert now. */ + new_level = user_highest_compat_lock_level(lockres->l_blocking); + lockres->l_requested = new_level; + lockres->l_flags |= USER_LOCK_BUSY; + mlog(0, "Downconvert lock from %d to %d\n", + lockres->l_level, new_level); + spin_unlock(&lockres->l_lock); + + /* need lock downconvert request now... */ + status = dlmlock(dlm, + new_level, + &lockres->l_lksb, + LKM_CONVERT|LKM_VALBLK, + lockres->l_name, + lockres->l_namelen, + user_ast, + lockres, + user_bast); + if (status != DLM_NORMAL) { + user_log_dlm_error("dlmlock", status, lockres); + user_recover_from_dlm_error(lockres); + } + +drop_ref: + user_dlm_drop_inode_ref(lockres); +} + +static inline void user_dlm_inc_holders(struct user_lock_res *lockres, + int level) +{ + switch(level) { + case LKM_EXMODE: + lockres->l_ex_holders++; + break; + case LKM_PRMODE: + lockres->l_ro_holders++; + break; + default: + BUG(); + } +} + +/* predict what lock level we'll be dropping down to on behalf + * of another node, and return true if the currently wanted + * level will be compatible with it. */ +static inline int +user_may_continue_on_blocked_lock(struct user_lock_res *lockres, + int wanted) +{ + BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); + + return wanted <= user_highest_compat_lock_level(lockres->l_blocking); +} + +int user_dlm_cluster_lock(struct user_lock_res *lockres, + int level, + int lkm_flags) +{ + int status, local_flags; + struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); + + if (level != LKM_EXMODE && + level != LKM_PRMODE) { + mlog(ML_ERROR, "lockres %.*s: invalid request!\n", + lockres->l_namelen, lockres->l_name); + status = -EINVAL; + goto bail; + } + + mlog(0, "lockres %.*s: asking for %s lock, passed flags = 0x%x\n", + lockres->l_namelen, lockres->l_name, + (level == LKM_EXMODE) ? "LKM_EXMODE" : "LKM_PRMODE", + lkm_flags); + +again: + if (signal_pending(current)) { + status = -ERESTARTSYS; + goto bail; + } + + spin_lock(&lockres->l_lock); + + /* We only compare against the currently granted level + * here. If the lock is blocked waiting on a downconvert, + * we'll get caught below. */ + if ((lockres->l_flags & USER_LOCK_BUSY) && + (level > lockres->l_level)) { + /* is someone sitting in dlm_lock? If so, wait on + * them. */ + spin_unlock(&lockres->l_lock); + + user_wait_on_busy_lock(lockres); + goto again; + } + + if ((lockres->l_flags & USER_LOCK_BLOCKED) && + (!user_may_continue_on_blocked_lock(lockres, level))) { + /* is the lock is currently blocked on behalf of + * another node */ + spin_unlock(&lockres->l_lock); + + user_wait_on_blocked_lock(lockres); + goto again; + } + + if (level > lockres->l_level) { + local_flags = lkm_flags | LKM_VALBLK; + if (lockres->l_level != LKM_IVMODE) + local_flags |= LKM_CONVERT; + + lockres->l_requested = level; + lockres->l_flags |= USER_LOCK_BUSY; + spin_unlock(&lockres->l_lock); + + BUG_ON(level == LKM_IVMODE); + BUG_ON(level == LKM_NLMODE); + + /* call dlm_lock to upgrade lock now */ + status = dlmlock(dlm, + level, + &lockres->l_lksb, + local_flags, + lockres->l_name, + lockres->l_namelen, + user_ast, + lockres, + user_bast); + if (status != DLM_NORMAL) { + if ((lkm_flags & LKM_NOQUEUE) && + (status == DLM_NOTQUEUED)) + status = -EAGAIN; + else { + user_log_dlm_error("dlmlock", status, lockres); + status = -EINVAL; + } + user_recover_from_dlm_error(lockres); + goto bail; + } + + user_wait_on_busy_lock(lockres); + goto again; + } + + user_dlm_inc_holders(lockres, level); + spin_unlock(&lockres->l_lock); + + status = 0; +bail: + return status; +} + +static inline void user_dlm_dec_holders(struct user_lock_res *lockres, + int level) +{ + switch(level) { + case LKM_EXMODE: + BUG_ON(!lockres->l_ex_holders); + lockres->l_ex_holders--; + break; + case LKM_PRMODE: + BUG_ON(!lockres->l_ro_holders); + lockres->l_ro_holders--; + break; + default: + BUG(); + } +} + +void user_dlm_cluster_unlock(struct user_lock_res *lockres, + int level) +{ + if (level != LKM_EXMODE && + level != LKM_PRMODE) { + mlog(ML_ERROR, "lockres %.*s: invalid request!\n", + lockres->l_namelen, lockres->l_name); + return; + } + + spin_lock(&lockres->l_lock); + user_dlm_dec_holders(lockres, level); + __user_dlm_cond_queue_lockres(lockres); + spin_unlock(&lockres->l_lock); +} + +void user_dlm_write_lvb(struct inode *inode, + const char *val, + unsigned int len) +{ + struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; + char *lvb = lockres->l_lksb.lvb; + + BUG_ON(len > DLM_LVB_LEN); + + spin_lock(&lockres->l_lock); + + BUG_ON(lockres->l_level < LKM_EXMODE); + memcpy(lvb, val, len); + + spin_unlock(&lockres->l_lock); +} + +void user_dlm_read_lvb(struct inode *inode, + char *val, + unsigned int len) +{ + struct user_lock_res *lockres = &DLMFS_I(inode)->ip_lockres; + char *lvb = lockres->l_lksb.lvb; + + BUG_ON(len > DLM_LVB_LEN); + + spin_lock(&lockres->l_lock); + + BUG_ON(lockres->l_level < LKM_PRMODE); + memcpy(val, lvb, len); + + spin_unlock(&lockres->l_lock); +} + +void user_dlm_lock_res_init(struct user_lock_res *lockres, + struct dentry *dentry) +{ + memset(lockres, 0, sizeof(*lockres)); + + spin_lock_init(&lockres->l_lock); + init_waitqueue_head(&lockres->l_event); + lockres->l_level = LKM_IVMODE; + lockres->l_requested = LKM_IVMODE; + lockres->l_blocking = LKM_IVMODE; + + /* should have been checked before getting here. */ + BUG_ON(dentry->d_name.len >= USER_DLM_LOCK_ID_MAX_LEN); + + memcpy(lockres->l_name, + dentry->d_name.name, + dentry->d_name.len); + lockres->l_namelen = dentry->d_name.len; +} + +int user_dlm_destroy_lock(struct user_lock_res *lockres) +{ + int status = -EBUSY; + struct dlm_ctxt *dlm = dlm_ctxt_from_user_lockres(lockres); + + mlog(0, "asked to destroy %.*s\n", lockres->l_namelen, lockres->l_name); + + spin_lock(&lockres->l_lock); + if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { + spin_unlock(&lockres->l_lock); + return 0; + } + + lockres->l_flags |= USER_LOCK_IN_TEARDOWN; + + while (lockres->l_flags & USER_LOCK_BUSY) { + spin_unlock(&lockres->l_lock); + + user_wait_on_busy_lock(lockres); + + spin_lock(&lockres->l_lock); + } + + if (lockres->l_ro_holders || lockres->l_ex_holders) { + spin_unlock(&lockres->l_lock); + goto bail; + } + + status = 0; + if (!(lockres->l_flags & USER_LOCK_ATTACHED)) { + spin_unlock(&lockres->l_lock); + goto bail; + } + + lockres->l_flags &= ~USER_LOCK_ATTACHED; + lockres->l_flags |= USER_LOCK_BUSY; + spin_unlock(&lockres->l_lock); + + status = dlmunlock(dlm, + &lockres->l_lksb, + LKM_VALBLK, + user_unlock_ast, + lockres); + if (status != DLM_NORMAL) { + user_log_dlm_error("dlmunlock", status, lockres); + status = -EINVAL; + goto bail; + } + + user_wait_on_busy_lock(lockres); + + status = 0; +bail: + return status; +} + +struct dlm_ctxt *user_dlm_register_context(struct qstr *name, + struct dlm_protocol_version *proto) +{ + struct dlm_ctxt *dlm; + u32 dlm_key; + char *domain; + + domain = kmalloc(name->len + 1, GFP_NOFS); + if (!domain) { + mlog_errno(-ENOMEM); + return ERR_PTR(-ENOMEM); + } + + dlm_key = crc32_le(0, name->name, name->len); + + snprintf(domain, name->len + 1, "%.*s", name->len, name->name); + + dlm = dlm_register_domain(domain, dlm_key, proto); + if (IS_ERR(dlm)) + mlog_errno(PTR_ERR(dlm)); + + kfree(domain); + return dlm; +} + +void user_dlm_unregister_context(struct dlm_ctxt *dlm) +{ + dlm_unregister_domain(dlm); +} diff --git a/fs/ocfs2/dlmfs/userdlm.h b/fs/ocfs2/dlmfs/userdlm.h new file mode 100644 index 00000000000..0c3cc03c61f --- /dev/null +++ b/fs/ocfs2/dlmfs/userdlm.h @@ -0,0 +1,113 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * userdlm.h + * + * Userspace dlm defines + * + * Copyright (C) 2002, 2004 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + + +#ifndef USERDLM_H +#define USERDLM_H + +#include +#include +#include +#include + +/* user_lock_res->l_flags flags. */ +#define USER_LOCK_ATTACHED (0x00000001) /* we have initialized + * the lvb */ +#define USER_LOCK_BUSY (0x00000002) /* we are currently in + * dlm_lock */ +#define USER_LOCK_BLOCKED (0x00000004) /* blocked waiting to + * downconvert*/ +#define USER_LOCK_IN_TEARDOWN (0x00000008) /* we're currently + * destroying this + * lock. */ +#define USER_LOCK_QUEUED (0x00000010) /* lock is on the + * workqueue */ +#define USER_LOCK_IN_CANCEL (0x00000020) + +struct user_lock_res { + spinlock_t l_lock; + + int l_flags; + +#define USER_DLM_LOCK_ID_MAX_LEN 32 + char l_name[USER_DLM_LOCK_ID_MAX_LEN]; + int l_namelen; + int l_level; + unsigned int l_ro_holders; + unsigned int l_ex_holders; + struct dlm_lockstatus l_lksb; + + int l_requested; + int l_blocking; + + wait_queue_head_t l_event; + + struct work_struct l_work; +}; + +extern struct workqueue_struct *user_dlm_worker; + +void user_dlm_lock_res_init(struct user_lock_res *lockres, + struct dentry *dentry); +int user_dlm_destroy_lock(struct user_lock_res *lockres); +int user_dlm_cluster_lock(struct user_lock_res *lockres, + int level, + int lkm_flags); +void user_dlm_cluster_unlock(struct user_lock_res *lockres, + int level); +void user_dlm_write_lvb(struct inode *inode, + const char *val, + unsigned int len); +void user_dlm_read_lvb(struct inode *inode, + char *val, + unsigned int len); +struct dlm_ctxt *user_dlm_register_context(struct qstr *name, + struct dlm_protocol_version *proto); +void user_dlm_unregister_context(struct dlm_ctxt *dlm); + +struct dlmfs_inode_private { + struct dlm_ctxt *ip_dlm; + + struct user_lock_res ip_lockres; /* unused for directories. */ + struct inode *ip_parent; + + struct inode ip_vfs_inode; +}; + +static inline struct dlmfs_inode_private * +DLMFS_I(struct inode *inode) +{ + return container_of(inode, + struct dlmfs_inode_private, + ip_vfs_inode); +} + +struct dlmfs_filp_private { + int fp_lock_level; +}; + +#define DLMFS_MAGIC 0x76a9f425 + +#endif /* USERDLM_H */