diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/nfsd | |
download | kernel_samsung_smdk4412-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz kernel_samsung_smdk4412-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.bz2 kernel_samsung_smdk4412-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'fs/nfsd')
-rw-r--r-- | fs/nfsd/Makefile | 12 | ||||
-rw-r--r-- | fs/nfsd/auth.c | 63 | ||||
-rw-r--r-- | fs/nfsd/export.c | 1200 | ||||
-rw-r--r-- | fs/nfsd/lockd.c | 79 | ||||
-rw-r--r-- | fs/nfsd/nfs3proc.c | 702 | ||||
-rw-r--r-- | fs/nfsd/nfs3xdr.c | 1092 | ||||
-rw-r--r-- | fs/nfsd/nfs4acl.c | 954 | ||||
-rw-r--r-- | fs/nfsd/nfs4callback.c | 547 | ||||
-rw-r--r-- | fs/nfsd/nfs4idmap.c | 588 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 984 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 3320 | ||||
-rw-r--r-- | fs/nfsd/nfs4xdr.c | 2536 | ||||
-rw-r--r-- | fs/nfsd/nfscache.c | 328 | ||||
-rw-r--r-- | fs/nfsd/nfsctl.c | 438 | ||||
-rw-r--r-- | fs/nfsd/nfsfh.c | 532 | ||||
-rw-r--r-- | fs/nfsd/nfsproc.c | 605 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 385 | ||||
-rw-r--r-- | fs/nfsd/nfsxdr.c | 511 | ||||
-rw-r--r-- | fs/nfsd/stats.c | 101 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 1859 |
20 files changed, 16836 insertions, 0 deletions
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile new file mode 100644 index 00000000000..b8680a247f8 --- /dev/null +++ b/fs/nfsd/Makefile @@ -0,0 +1,12 @@ +# +# Makefile for the Linux nfs server +# + +obj-$(CONFIG_NFSD) += nfsd.o + +nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ + export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o +nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o +nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ + nfs4acl.o nfs4callback.o +nfsd-objs := $(nfsd-y) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c new file mode 100644 index 00000000000..cfe9ce88161 --- /dev/null +++ b/fs/nfsd/auth.c @@ -0,0 +1,63 @@ +/* + * linux/fs/nfsd/auth.c + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/svcauth.h> +#include <linux/nfsd/nfsd.h> + +#define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE)) + +int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) +{ + struct svc_cred *cred = &rqstp->rq_cred; + int i; + int ret; + + if (exp->ex_flags & NFSEXP_ALLSQUASH) { + cred->cr_uid = exp->ex_anon_uid; + cred->cr_gid = exp->ex_anon_gid; + put_group_info(cred->cr_group_info); + cred->cr_group_info = groups_alloc(0); + } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) { + struct group_info *gi; + if (!cred->cr_uid) + cred->cr_uid = exp->ex_anon_uid; + if (!cred->cr_gid) + cred->cr_gid = exp->ex_anon_gid; + gi = groups_alloc(cred->cr_group_info->ngroups); + if (gi) + for (i = 0; i < cred->cr_group_info->ngroups; i++) { + if (!GROUP_AT(cred->cr_group_info, i)) + GROUP_AT(gi, i) = exp->ex_anon_gid; + else + GROUP_AT(gi, i) = GROUP_AT(cred->cr_group_info, i); + } + put_group_info(cred->cr_group_info); + cred->cr_group_info = gi; + } + + if (cred->cr_uid != (uid_t) -1) + current->fsuid = cred->cr_uid; + else + current->fsuid = exp->ex_anon_uid; + if (cred->cr_gid != (gid_t) -1) + current->fsgid = cred->cr_gid; + else + current->fsgid = exp->ex_anon_gid; + + if (!cred->cr_group_info) + return -ENOMEM; + ret = set_current_groups(cred->cr_group_info); + if ((cred->cr_uid)) { + cap_t(current->cap_effective) &= ~CAP_NFSD_MASK; + } else { + cap_t(current->cap_effective) |= (CAP_NFSD_MASK & + current->cap_permitted); + } + return ret; +} diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c new file mode 100644 index 00000000000..9a11aa39e2e --- /dev/null +++ b/fs/nfsd/export.c @@ -0,0 +1,1200 @@ +#define MSNFS /* HACK HACK */ +/* + * linux/fs/nfsd/export.c + * + * NFS exporting and validation. + * + * We maintain a list of clients, each of which has a list of + * exports. To export an fs to a given client, you first have + * to create the client entry with NFSCTL_ADDCLIENT, which + * creates a client control block and adds it to the hash + * table. Then, you call NFSCTL_EXPORT for each fs. + * + * + * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de> + */ + +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/stat.h> +#include <linux/in.h> +#include <linux/seq_file.h> +#include <linux/syscalls.h> +#include <linux/rwsem.h> +#include <linux/dcache.h> +#include <linux/namei.h> +#include <linux/mount.h> +#include <linux/hash.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/nfsfh.h> +#include <linux/nfsd/syscall.h> +#include <linux/lockd/bind.h> + +#define NFSDDBG_FACILITY NFSDDBG_EXPORT +#define NFSD_PARANOIA 1 + +typedef struct auth_domain svc_client; +typedef struct svc_export svc_export; + +static void exp_do_unexport(svc_export *unexp); +static int exp_verify_string(char *cp, int max); + +/* + * We have two caches. + * One maps client+vfsmnt+dentry to export options - the export map + * The other maps client+filehandle-fragment to export options. - the expkey map + * + * The export options are actually stored in the first map, and the + * second map contains a reference to the entry in the first map. + */ + +#define EXPKEY_HASHBITS 8 +#define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS) +#define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1) +static struct cache_head *expkey_table[EXPKEY_HASHMAX]; + +static inline int svc_expkey_hash(struct svc_expkey *item) +{ + int hash = item->ek_fsidtype; + char * cp = (char*)item->ek_fsid; + int len = key_len(item->ek_fsidtype); + + hash ^= hash_mem(cp, len, EXPKEY_HASHBITS); + hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS); + return hash & EXPKEY_HASHMASK; +} + +void expkey_put(struct cache_head *item, struct cache_detail *cd) +{ + if (cache_put(item, cd)) { + struct svc_expkey *key = container_of(item, struct svc_expkey, h); + if (test_bit(CACHE_VALID, &item->flags) && + !test_bit(CACHE_NEGATIVE, &item->flags)) + exp_put(key->ek_export); + auth_domain_put(key->ek_client); + kfree(key); + } +} + +static void expkey_request(struct cache_detail *cd, + struct cache_head *h, + char **bpp, int *blen) +{ + /* client fsidtype \xfsid */ + struct svc_expkey *ek = container_of(h, struct svc_expkey, h); + char type[5]; + + qword_add(bpp, blen, ek->ek_client->name); + snprintf(type, 5, "%d", ek->ek_fsidtype); + qword_add(bpp, blen, type); + qword_addhex(bpp, blen, (char*)ek->ek_fsid, key_len(ek->ek_fsidtype)); + (*bpp)[-1] = '\n'; +} + +static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *, int); +static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) +{ + /* client fsidtype fsid [path] */ + char *buf; + int len; + struct auth_domain *dom = NULL; + int err; + int fsidtype; + char *ep; + struct svc_expkey key; + + if (mesg[mlen-1] != '\n') + return -EINVAL; + mesg[mlen-1] = 0; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + err = -ENOMEM; + if (!buf) goto out; + + err = -EINVAL; + if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + goto out; + + err = -ENOENT; + dom = auth_domain_find(buf); + if (!dom) + goto out; + dprintk("found domain %s\n", buf); + + err = -EINVAL; + if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + goto out; + fsidtype = simple_strtoul(buf, &ep, 10); + if (*ep) + goto out; + dprintk("found fsidtype %d\n", fsidtype); + if (fsidtype > 2) + goto out; + if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + goto out; + dprintk("found fsid length %d\n", len); + if (len != key_len(fsidtype)) + goto out; + + /* OK, we seem to have a valid key */ + key.h.flags = 0; + key.h.expiry_time = get_expiry(&mesg); + if (key.h.expiry_time == 0) + goto out; + + key.ek_client = dom; + key.ek_fsidtype = fsidtype; + memcpy(key.ek_fsid, buf, len); + + /* now we want a pathname, or empty meaning NEGATIVE */ + if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0) + goto out; + dprintk("Path seems to be <%s>\n", buf); + err = 0; + if (len == 0) { + struct svc_expkey *ek; + set_bit(CACHE_NEGATIVE, &key.h.flags); + ek = svc_expkey_lookup(&key, 1); + if (ek) + expkey_put(&ek->h, &svc_expkey_cache); + } else { + struct nameidata nd; + struct svc_expkey *ek; + struct svc_export *exp; + err = path_lookup(buf, 0, &nd); + if (err) + goto out; + + dprintk("Found the path %s\n", buf); + exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL); + + err = -ENOENT; + if (!exp) + goto out_nd; + key.ek_export = exp; + dprintk("And found export\n"); + + ek = svc_expkey_lookup(&key, 1); + if (ek) + expkey_put(&ek->h, &svc_expkey_cache); + exp_put(exp); + err = 0; + out_nd: + path_release(&nd); + } + cache_flush(); + out: + if (dom) + auth_domain_put(dom); + if (buf) + kfree(buf); + return err; +} + +static int expkey_show(struct seq_file *m, + struct cache_detail *cd, + struct cache_head *h) +{ + struct svc_expkey *ek ; + + if (h ==NULL) { + seq_puts(m, "#domain fsidtype fsid [path]\n"); + return 0; + } + ek = container_of(h, struct svc_expkey, h); + seq_printf(m, "%s %d 0x%08x", ek->ek_client->name, + ek->ek_fsidtype, ek->ek_fsid[0]); + if (ek->ek_fsidtype != 1) + seq_printf(m, "%08x", ek->ek_fsid[1]); + if (ek->ek_fsidtype == 2) + seq_printf(m, "%08x", ek->ek_fsid[2]); + if (test_bit(CACHE_VALID, &h->flags) && + !test_bit(CACHE_NEGATIVE, &h->flags)) { + seq_printf(m, " "); + seq_path(m, ek->ek_export->ex_mnt, ek->ek_export->ex_dentry, "\\ \t\n"); + } + seq_printf(m, "\n"); + return 0; +} + +struct cache_detail svc_expkey_cache = { + .hash_size = EXPKEY_HASHMAX, + .hash_table = expkey_table, + .name = "nfsd.fh", + .cache_put = expkey_put, + .cache_request = expkey_request, + .cache_parse = expkey_parse, + .cache_show = expkey_show, +}; + +static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b) +{ + if (a->ek_fsidtype != b->ek_fsidtype || + a->ek_client != b->ek_client || + memcmp(a->ek_fsid, b->ek_fsid, key_len(a->ek_fsidtype)) != 0) + return 0; + return 1; +} + +static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item) +{ + cache_get(&item->ek_client->h); + new->ek_client = item->ek_client; + new->ek_fsidtype = item->ek_fsidtype; + new->ek_fsid[0] = item->ek_fsid[0]; + new->ek_fsid[1] = item->ek_fsid[1]; + new->ek_fsid[2] = item->ek_fsid[2]; +} + +static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item) +{ + cache_get(&item->ek_export->h); + new->ek_export = item->ek_export; +} + +static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */ + +#define EXPORT_HASHBITS 8 +#define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) +#define EXPORT_HASHMASK (EXPORT_HASHMAX -1) + +static struct cache_head *export_table[EXPORT_HASHMAX]; + +static inline int svc_export_hash(struct svc_export *item) +{ + int rv; + + rv = hash_ptr(item->ex_client, EXPORT_HASHBITS); + rv ^= hash_ptr(item->ex_dentry, EXPORT_HASHBITS); + rv ^= hash_ptr(item->ex_mnt, EXPORT_HASHBITS); + return rv; +} + +void svc_export_put(struct cache_head *item, struct cache_detail *cd) +{ + if (cache_put(item, cd)) { + struct svc_export *exp = container_of(item, struct svc_export, h); + dput(exp->ex_dentry); + mntput(exp->ex_mnt); + auth_domain_put(exp->ex_client); + kfree(exp); + } +} + +static void svc_export_request(struct cache_detail *cd, + struct cache_head *h, + char **bpp, int *blen) +{ + /* client path */ + struct svc_export *exp = container_of(h, struct svc_export, h); + char *pth; + + qword_add(bpp, blen, exp->ex_client->name); + pth = d_path(exp->ex_dentry, exp->ex_mnt, *bpp, *blen); + if (IS_ERR(pth)) { + /* is this correct? */ + (*bpp)[0] = '\n'; + return; + } + qword_add(bpp, blen, pth); + (*bpp)[-1] = '\n'; +} + +static struct svc_export *svc_export_lookup(struct svc_export *, int); + +static int check_export(struct inode *inode, int flags) +{ + + /* We currently export only dirs and regular files. + * This is what umountd does. + */ + if (!S_ISDIR(inode->i_mode) && + !S_ISREG(inode->i_mode)) + return -ENOTDIR; + + /* There are two requirements on a filesystem to be exportable. + * 1: We must be able to identify the filesystem from a number. + * either a device number (so FS_REQUIRES_DEV needed) + * or an FSID number (so NFSEXP_FSID needed). + * 2: We must be able to find an inode from a filehandle. + * This means that s_export_op must be set. + */ + if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) && + !(flags & NFSEXP_FSID)) { + dprintk("exp_export: export of non-dev fs without fsid"); + return -EINVAL; + } + if (!inode->i_sb->s_export_op) { + dprintk("exp_export: export of invalid fs type.\n"); + return -EINVAL; + } + + /* Ok, we can export it */; + if (!inode->i_sb->s_export_op->find_exported_dentry) + inode->i_sb->s_export_op->find_exported_dentry = + find_exported_dentry; + return 0; + +} + +static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) +{ + /* client path expiry [flags anonuid anongid fsid] */ + char *buf; + int len; + int err; + struct auth_domain *dom = NULL; + struct nameidata nd; + struct svc_export exp, *expp; + int an_int; + + nd.dentry = NULL; + + if (mesg[mlen-1] != '\n') + return -EINVAL; + mesg[mlen-1] = 0; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + err = -ENOMEM; + if (!buf) goto out; + + /* client */ + len = qword_get(&mesg, buf, PAGE_SIZE); + err = -EINVAL; + if (len <= 0) goto out; + + err = -ENOENT; + dom = auth_domain_find(buf); + if (!dom) + goto out; + + /* path */ + err = -EINVAL; + if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + goto out; + err = path_lookup(buf, 0, &nd); + if (err) goto out; + + exp.h.flags = 0; + exp.ex_client = dom; + exp.ex_mnt = nd.mnt; + exp.ex_dentry = nd.dentry; + + /* expiry */ + err = -EINVAL; + exp.h.expiry_time = get_expiry(&mesg); + if (exp.h.expiry_time == 0) + goto out; + + /* flags */ + err = get_int(&mesg, &an_int); + if (err == -ENOENT) + set_bit(CACHE_NEGATIVE, &exp.h.flags); + else { + if (err || an_int < 0) goto out; + exp.ex_flags= an_int; + + /* anon uid */ + err = get_int(&mesg, &an_int); + if (err) goto out; + exp.ex_anon_uid= an_int; + + /* anon gid */ + err = get_int(&mesg, &an_int); + if (err) goto out; + exp.ex_anon_gid= an_int; + + /* fsid */ + err = get_int(&mesg, &an_int); + if (err) goto out; + exp.ex_fsid = an_int; + + err = check_export(nd.dentry->d_inode, exp.ex_flags); + if (err) goto out; + } + + expp = svc_export_lookup(&exp, 1); + if (expp) + exp_put(expp); + err = 0; + cache_flush(); + out: + if (nd.dentry) + path_release(&nd); + if (dom) + auth_domain_put(dom); + if (buf) + kfree(buf); + return err; +} + +static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong); + +static int svc_export_show(struct seq_file *m, + struct cache_detail *cd, + struct cache_head *h) +{ + struct svc_export *exp ; + + if (h ==NULL) { + seq_puts(m, "#path domain(flags)\n"); + return 0; + } + exp = container_of(h, struct svc_export, h); + seq_path(m, exp->ex_mnt, exp->ex_dentry, " \t\n\\"); + seq_putc(m, '\t'); + seq_escape(m, exp->ex_client->name, " \t\n\\"); + seq_putc(m, '('); + if (test_bit(CACHE_VALID, &h->flags) && + !test_bit(CACHE_NEGATIVE, &h->flags)) + exp_flags(m, exp->ex_flags, exp->ex_fsid, + exp->ex_anon_uid, exp->ex_anon_gid); + seq_puts(m, ")\n"); + return 0; +} +struct cache_detail svc_export_cache = { + .hash_size = EXPORT_HASHMAX, + .hash_table = export_table, + .name = "nfsd.export", + .cache_put = svc_export_put, + .cache_request = svc_export_request, + .cache_parse = svc_export_parse, + .cache_show = svc_export_show, +}; + +static inline int svc_export_match(struct svc_export *a, struct svc_export *b) +{ + return a->ex_client == b->ex_client && + a->ex_dentry == b->ex_dentry && + a->ex_mnt == b->ex_mnt; +} +static inline void svc_export_init(struct svc_export *new, struct svc_export *item) +{ + cache_get(&item->ex_client->h); + new->ex_client = item->ex_client; + new->ex_dentry = dget(item->ex_dentry); + new->ex_mnt = mntget(item->ex_mnt); +} + +static inline void svc_export_update(struct svc_export *new, struct svc_export *item) +{ + new->ex_flags = item->ex_flags; + new->ex_anon_uid = item->ex_anon_uid; + new->ex_anon_gid = item->ex_anon_gid; + new->ex_fsid = item->ex_fsid; +} + +static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */ + + +struct svc_expkey * +exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) +{ + struct svc_expkey key, *ek; + int err; + + if (!clp) + return NULL; + + key.ek_client = clp; + key.ek_fsidtype = fsid_type; + memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); + + ek = svc_expkey_lookup(&key, 0); + if (ek != NULL) + if ((err = cache_check(&svc_expkey_cache, &ek->h, reqp))) + ek = ERR_PTR(err); + return ek; +} + +static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, + struct svc_export *exp) +{ + struct svc_expkey key, *ek; + + key.ek_client = clp; + key.ek_fsidtype = fsid_type; + memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); + key.ek_export = exp; + key.h.expiry_time = NEVER; + key.h.flags = 0; + + ek = svc_expkey_lookup(&key, 1); + if (ek) { + expkey_put(&ek->h, &svc_expkey_cache); + return 0; + } + return -ENOMEM; +} + +/* + * Find the client's export entry matching xdev/xino. + */ +static inline struct svc_expkey * +exp_get_key(svc_client *clp, dev_t dev, ino_t ino) +{ + u32 fsidv[3]; + + if (old_valid_dev(dev)) { + mk_fsid_v0(fsidv, dev, ino); + return exp_find_key(clp, 0, fsidv, NULL); + } + mk_fsid_v3(fsidv, dev, ino); + return exp_find_key(clp, 3, fsidv, NULL); +} + +/* + * Find the client's export entry matching fsid + */ +static inline struct svc_expkey * +exp_get_fsid_key(svc_client *clp, int fsid) +{ + u32 fsidv[2]; + + mk_fsid_v1(fsidv, fsid); + + return exp_find_key(clp, 1, fsidv, NULL); +} + +svc_export * +exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, + struct cache_req *reqp) +{ + struct svc_export *exp, key; + + if (!clp) + return NULL; + + key.ex_client = clp; + key.ex_mnt = mnt; + key.ex_dentry = dentry; + + exp = svc_export_lookup(&key, 0); + if (exp != NULL) + switch (cache_check(&svc_export_cache, &exp->h, reqp)) { + case 0: break; + case -EAGAIN: + exp = ERR_PTR(-EAGAIN); + break; + default: + exp = NULL; + } + + return exp; +} + +/* + * Find the export entry for a given dentry. + */ +struct svc_export * +exp_parent(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry, + struct cache_req *reqp) +{ + svc_export *exp; + + dget(dentry); + exp = exp_get_by_name(clp, mnt, dentry, reqp); + + while (exp == NULL && !IS_ROOT(dentry)) { + struct dentry *parent; + + parent = dget_parent(dentry); + dput(dentry); + dentry = parent; + exp = exp_get_by_name(clp, mnt, dentry, reqp); + } + dput(dentry); + return exp; +} + +/* + * Hashtable locking. Write locks are placed only by user processes + * wanting to modify export information. + * Write locking only done in this file. Read locking + * needed externally. + */ + +static DECLARE_RWSEM(hash_sem); + +void +exp_readlock(void) +{ + down_read(&hash_sem); +} + +static inline void +exp_writelock(void) +{ + down_write(&hash_sem); +} + +void +exp_readunlock(void) +{ + up_read(&hash_sem); +} + +static inline void +exp_writeunlock(void) +{ + up_write(&hash_sem); +} + +static void exp_fsid_unhash(struct svc_export *exp) +{ + struct svc_expkey *ek; + + if ((exp->ex_flags & NFSEXP_FSID) == 0) + return; + + ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); + if (ek && !IS_ERR(ek)) { + ek->h.expiry_time = get_seconds()-1; + expkey_put(&ek->h, &svc_expkey_cache); + } + svc_expkey_cache.nextcheck = get_seconds(); +} + +static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) +{ + u32 fsid[2]; + + if ((exp->ex_flags & NFSEXP_FSID) == 0) + return 0; + + mk_fsid_v1(fsid, exp->ex_fsid); + return exp_set_key(clp, 1, fsid, exp); +} + +static int exp_hash(struct auth_domain *clp, struct svc_export *exp) +{ + u32 fsid[2]; + struct inode *inode = exp->ex_dentry->d_inode; + dev_t dev = inode->i_sb->s_dev; + + if (old_valid_dev(dev)) { + mk_fsid_v0(fsid, dev, inode->i_ino); + return exp_set_key(clp, 0, fsid, exp); + } + mk_fsid_v3(fsid, dev, inode->i_ino); + return exp_set_key(clp, 3, fsid, exp); +} + +static void exp_unhash(struct svc_export *exp) +{ + struct svc_expkey *ek; + struct inode *inode = exp->ex_dentry->d_inode; + + ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); + if (ek && !IS_ERR(ek)) { + ek->h.expiry_time = get_seconds()-1; + expkey_put(&ek->h, &svc_expkey_cache); + } + svc_expkey_cache.nextcheck = get_seconds(); +} + +/* + * Export a file system. + */ +int +exp_export(struct nfsctl_export *nxp) +{ + svc_client *clp; + struct svc_export *exp = NULL; + struct svc_export new; + struct svc_expkey *fsid_key = NULL; + struct nameidata nd; + int err; + + /* Consistency check */ + err = -EINVAL; + if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) || + !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX)) + goto out; + + dprintk("exp_export called for %s:%s (%x/%ld fl %x).\n", + nxp->ex_client, nxp->ex_path, + (unsigned)nxp->ex_dev, (long)nxp->ex_ino, + nxp->ex_flags); + + /* Try to lock the export table for update */ + exp_writelock(); + + /* Look up client info */ + if (!(clp = auth_domain_find(nxp->ex_client))) + goto out_unlock; + + + /* Look up the dentry */ + err = path_lookup(nxp->ex_path, 0, &nd); + if (err) + goto out_unlock; + err = -EINVAL; + + exp = exp_get_by_name(clp, nd.mnt, nd.dentry, NULL); + + /* must make sure there won't be an ex_fsid clash */ + if ((nxp->ex_flags & NFSEXP_FSID) && + (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) && + !IS_ERR(fsid_key) && + fsid_key->ek_export && + fsid_key->ek_export != exp) + goto finish; + + if (exp) { + /* just a flags/id/fsid update */ + + exp_fsid_unhash(exp); + exp->ex_flags = nxp->ex_flags; + exp->ex_anon_uid = nxp->ex_anon_uid; + exp->ex_anon_gid = nxp->ex_anon_gid; + exp->ex_fsid = nxp->ex_dev; + + err = exp_fsid_hash(clp, exp); + goto finish; + } + + err = check_export(nd.dentry->d_inode, nxp->ex_flags); + if (err) goto finish; + + err = -ENOMEM; + + dprintk("nfsd: creating export entry %p for client %p\n", exp, clp); + + new.h.expiry_time = NEVER; + new.h.flags = 0; + new.ex_client = clp; + new.ex_mnt = nd.mnt; + new.ex_dentry = nd.dentry; + new.ex_flags = nxp->ex_flags; + new.ex_anon_uid = nxp->ex_anon_uid; + new.ex_anon_gid = nxp->ex_anon_gid; + new.ex_fsid = nxp->ex_dev; + + exp = svc_export_lookup(&new, 1); + + if (exp == NULL) + goto finish; + + err = 0; + + if (exp_hash(clp, exp) || + exp_fsid_hash(clp, exp)) { + /* failed to create at least one index */ + exp_do_unexport(exp); + cache_flush(); + err = -ENOMEM; + } + +finish: + if (exp) + exp_put(exp); + if (fsid_key && !IS_ERR(fsid_key)) + expkey_put(&fsid_key->h, &svc_expkey_cache); + if (clp) + auth_domain_put(clp); + path_release(&nd); +out_unlock: + exp_writeunlock(); +out: + return err; +} + +/* + * Unexport a file system. The export entry has already + * been removed from the client's list of exported fs's. + */ +static void +exp_do_unexport(svc_export *unexp) +{ + unexp->h.expiry_time = get_seconds()-1; + svc_export_cache.nextcheck = get_seconds(); + exp_unhash(unexp); + exp_fsid_unhash(unexp); +} + + +/* + * unexport syscall. + */ +int +exp_unexport(struct nfsctl_export *nxp) +{ + struct auth_domain *dom; + svc_export *exp; + struct nameidata nd; + int err; + + /* Consistency check */ + if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) || + !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX)) + return -EINVAL; + + exp_writelock(); + + err = -EINVAL; + dom = auth_domain_find(nxp->ex_client); + if (!dom) { + dprintk("nfsd: unexport couldn't find %s\n", nxp->ex_client); + goto out_unlock; + } + + err = path_lookup(nxp->ex_path, 0, &nd); + if (err) + goto out_domain; + + err = -EINVAL; + exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL); + path_release(&nd); + if (!exp) + goto out_domain; + + exp_do_unexport(exp); + exp_put(exp); + err = 0; + +out_domain: + auth_domain_put(dom); + cache_flush(); +out_unlock: + exp_writeunlock(); + return err; +} + +/* + * Obtain the root fh on behalf of a client. + * This could be done in user space, but I feel that it adds some safety + * since its harder to fool a kernel module than a user space program. + */ +int +exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) +{ + struct svc_export *exp; + struct nameidata nd; + struct inode *inode; + struct svc_fh fh; + int err; + + err = -EPERM; + /* NB: we probably ought to check that it's NUL-terminated */ + if (path_lookup(path, 0, &nd)) { + printk("nfsd: exp_rootfh path not found %s", path); + return err; + } + inode = nd.dentry->d_inode; + + dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", + path, nd.dentry, clp->name, + inode->i_sb->s_id, inode->i_ino); + exp = exp_parent(clp, nd.mnt, nd.dentry, NULL); + if (!exp) { + dprintk("nfsd: exp_rootfh export not found.\n"); + goto out; + } + + /* + * fh must be initialized before calling fh_compose + */ + fh_init(&fh, maxsize); + if (fh_compose(&fh, exp, nd.dentry, NULL)) + err = -EINVAL; + else + err = 0; + memcpy(f, &fh.fh_handle, sizeof(struct knfsd_fh)); + fh_put(&fh); + exp_put(exp); +out: + path_release(&nd); + return err; +} + +/* + * Called when we need the filehandle for the root of the pseudofs, + * for a given NFSv4 client. The root is defined to be the + * export point with fsid==0 + */ +int +exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp, + struct cache_req *creq) +{ + struct svc_expkey *fsid_key; + int rv; + u32 fsidv[2]; + + mk_fsid_v1(fsidv, 0); + + fsid_key = exp_find_key(clp, 1, fsidv, creq); + if (IS_ERR(fsid_key) && PTR_ERR(fsid_key) == -EAGAIN) + return nfserr_dropit; + if (!fsid_key || IS_ERR(fsid_key)) + return nfserr_perm; + + rv = fh_compose(fhp, fsid_key->ek_export, + fsid_key->ek_export->ex_dentry, NULL); + expkey_put(&fsid_key->h, &svc_expkey_cache); + return rv; +} + +/* Iterator */ + +static void *e_start(struct seq_file *m, loff_t *pos) +{ + loff_t n = *pos; + unsigned hash, export; + struct cache_head *ch; + + exp_readlock(); + read_lock(&svc_export_cache.hash_lock); + if (!n--) + return (void *)1; + hash = n >> 32; + export = n & ((1LL<<32) - 1); + + + for (ch=export_table[hash]; ch; ch=ch->next) + if (!export--) + return ch; + n &= ~((1LL<<32) - 1); + do { + hash++; + n += 1LL<<32; + } while(hash < EXPORT_HASHMAX && export_table[hash]==NULL); + if (hash >= EXPORT_HASHMAX) + return NULL; + *pos = n+1; + return export_table[hash]; +} + +static void *e_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct cache_head *ch = p; + int hash = (*pos >> 32); + + if (p == (void *)1) + hash = 0; + else if (ch->next == NULL) { + hash++; + *pos += 1LL<<32; + } else { + ++*pos; + return ch->next; + } + *pos &= ~((1LL<<32) - 1); + while (hash < EXPORT_HASHMAX && export_table[hash] == NULL) { + hash++; + *pos += 1LL<<32; + } + if (hash >= EXPORT_HASHMAX) + return NULL; + ++*pos; + return export_table[hash]; +} + +static void e_stop(struct seq_file *m, void *p) +{ + read_unlock(&svc_export_cache.hash_lock); + exp_readunlock(); +} + +static struct flags { + int flag; + char *name[2]; +} expflags[] = { + { NFSEXP_READONLY, {"ro", "rw"}}, + { NFSEXP_INSECURE_PORT, {"insecure", ""}}, + { NFSEXP_ROOTSQUASH, {"root_squash", "no_root_squash"}}, + { NFSEXP_ALLSQUASH, {"all_squash", ""}}, + { NFSEXP_ASYNC, {"async", "sync"}}, + { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}}, + { NFSEXP_NOHIDE, {"nohide", ""}}, + { NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, + { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, + { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}}, +#ifdef MSNFS + { NFSEXP_MSNFS, {"msnfs", ""}}, +#endif + { 0, {"", ""}} +}; + +static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong) +{ + int first = 0; + struct flags *flg; + + for (flg = expflags; flg->flag; flg++) { + int state = (flg->flag & flag)?0:1; + if (*flg->name[state]) + seq_printf(m, "%s%s", first++?",":"", flg->name[state]); + } + if (flag & NFSEXP_FSID) + seq_printf(m, "%sfsid=%d", first++?",":"", fsid); + if (anonu != (uid_t)-2 && anonu != (0x10000-2)) + seq_printf(m, "%sanonuid=%d", first++?",":"", anonu); + if (anong != (gid_t)-2 && anong != (0x10000-2)) + seq_printf(m, "%sanongid=%d", first++?",":"", anong); +} + +static int e_show(struct seq_file *m, void *p) +{ + struct cache_head *cp = p; + struct svc_export *exp = container_of(cp, struct svc_export, h); + svc_client *clp; + + if (p == (void *)1) { + seq_puts(m, "# Version 1.1\n"); + seq_puts(m, "# Path Client(Flags) # IPs\n"); + return 0; + } + + clp = exp->ex_client; + cache_get(&exp->h); + if (cache_check(&svc_export_cache, &exp->h, NULL)) + return 0; + if (cache_put(&exp->h, &svc_export_cache)) BUG(); + return svc_export_show(m, &svc_export_cache, cp); +} + +struct seq_operations nfs_exports_op = { + .start = e_start, + .next = e_next, + .stop = e_stop, + .show = e_show, +}; + +/* + * Add or modify a client. + * Change requests may involve the list of host addresses. The list of + * exports and possibly existing uid maps are left untouched. + */ +int +exp_addclient(struct nfsctl_client *ncp) +{ + struct auth_domain *dom; + int i, err; + + /* First, consistency check. */ + err = -EINVAL; + if (! exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX)) + goto out; + if (ncp->cl_naddr > NFSCLNT_ADDRMAX) + goto out; + + /* Lock the hashtable */ + exp_writelock(); + + dom = unix_domain_find(ncp->cl_ident); + + err = -ENOMEM; + if (!dom) + goto out_unlock; + + /* Insert client into hashtable. */ + for (i = 0; i < ncp->cl_naddr; i++) + auth_unix_add_addr(ncp->cl_addrlist[i], dom); + + auth_unix_forget_old(dom); + auth_domain_put(dom); + + err = 0; + +out_unlock: + exp_writeunlock(); +out: + return err; +} + +/* + * Delete a client given an identifier. + */ +int +exp_delclient(struct nfsctl_client *ncp) +{ + int err; + struct auth_domain *dom; + + err = -EINVAL; + if (!exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX)) + goto out; + + /* Lock the hashtable */ + exp_writelock(); + + dom = auth_domain_find(ncp->cl_ident); + /* just make sure that no addresses work + * and that it will expire soon + */ + if (dom) { + err = auth_unix_forget_old(dom); + dom->h.expiry_time = get_seconds(); + auth_domain_put(dom); + } + + exp_writeunlock(); +out: + return err; +} + +/* + * Verify that string is non-empty and does not exceed max length. + */ +static int +exp_verify_string(char *cp, int max) +{ + int i; + + for (i = 0; i < max; i++) + if (!cp[i]) + return i; + cp[i] = 0; + printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); + return 0; +} + +/* + * Initialize the exports module. + */ +void +nfsd_export_init(void) +{ + dprintk("nfsd: initializing export module.\n"); + + cache_register(&svc_export_cache); + cache_register(&svc_expkey_cache); + +} + +/* + * Flush exports table - called when last nfsd thread is killed + */ +void +nfsd_export_flush(void) +{ + exp_writelock(); + cache_purge(&svc_expkey_cache); + cache_purge(&svc_export_cache); + exp_writeunlock(); +} + +/* + * Shutdown the exports module. + */ +void +nfsd_export_shutdown(void) +{ + + dprintk("nfsd: shutting down export module.\n"); + + exp_writelock(); + + if (cache_unregister(&svc_expkey_cache)) + printk(KERN_ERR "nfsd: failed to unregister expkey cache\n"); + if (cache_unregister(&svc_export_cache)) + printk(KERN_ERR "nfsd: failed to unregister export cache\n"); + svcauth_unix_purge(); + + exp_writeunlock(); + dprintk("nfsd: export shutdown complete.\n"); +} diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c new file mode 100644 index 00000000000..7b889ff15ae --- /dev/null +++ b/fs/nfsd/lockd.c @@ -0,0 +1,79 @@ +/* + * linux/fs/nfsd/lockd.c + * + * This file contains all the stubs needed when communicating with lockd. + * This level of indirection is necessary so we can run nfsd+lockd without + * requiring the nfs client to be compiled in/loaded, and vice versa. + * + * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/mount.h> +#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/lockd/bind.h> + +#define NFSDDBG_FACILITY NFSDDBG_LOCKD + +/* + * Note: we hold the dentry use count while the file is open. + */ +static u32 +nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) +{ + u32 nfserr; + struct svc_fh fh; + + /* must initialize before using! but maxsize doesn't matter */ + fh_init(&fh,0); + fh.fh_handle.fh_size = f->size; + memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size); + fh.fh_export = NULL; + + exp_readlock(); + nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp); + fh_put(&fh); + rqstp->rq_client = NULL; + exp_readunlock(); + /* nlm and nfsd don't share error codes. + * we invent: 0 = no error + * 1 = stale file handle + * 2 = other error + */ + switch (nfserr) { + case nfs_ok: + return 0; + case nfserr_stale: + return 1; + default: + return 2; + } +} + +static void +nlm_fclose(struct file *filp) +{ + fput(filp); +} + +static struct nlmsvc_binding nfsd_nlm_ops = { + .fopen = nlm_fopen, /* open file for locking */ + .fclose = nlm_fclose, /* close file */ +}; + +void +nfsd_lockd_init(void) +{ + dprintk("nfsd: initializing lockd\n"); + nlmsvc_ops = &nfsd_nlm_ops; +} + +void +nfsd_lockd_shutdown(void) +{ + nlmsvc_ops = NULL; +} diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c new file mode 100644 index 00000000000..041380fe667 --- /dev/null +++ b/fs/nfsd/nfs3proc.c @@ -0,0 +1,702 @@ +/* + * linux/fs/nfsd/nfs3proc.c + * + * Process version 3 NFS requests. + * + * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/linkage.h> +#include <linux/time.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/ext2_fs.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/net.h> +#include <linux/in.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/major.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr3.h> +#include <linux/nfs3.h> + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +#define RETURN_STATUS(st) { resp->status = (st); return (st); } + +static int nfs3_ftypes[] = { + 0, /* NF3NON */ + S_IFREG, /* NF3REG */ + S_IFDIR, /* NF3DIR */ + S_IFBLK, /* NF3BLK */ + S_IFCHR, /* NF3CHR */ + S_IFLNK, /* NF3LNK */ + S_IFSOCK, /* NF3SOCK */ + S_IFIFO, /* NF3FIFO */ +}; + +/* + * NULL call. + */ +static int +nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get a file's attributes + */ +static int +nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, + struct nfsd3_attrstat *resp) +{ + int nfserr; + + dprintk("nfsd: GETATTR(3) %s\n", + SVCFH_fmt(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); + RETURN_STATUS(nfserr); +} + +/* + * Set a file's attributes + */ +static int +nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp, + struct nfsd3_attrstat *resp) +{ + int nfserr; + + dprintk("nfsd: SETATTR(3) %s\n", + SVCFH_fmt(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs, + argp->check_guard, argp->guardtime); + RETURN_STATUS(nfserr); +} + +/* + * Look up a path name component + */ +static int +nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, + struct nfsd3_diropres *resp) +{ + int nfserr; + + dprintk("nfsd: LOOKUP(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + fh_copy(&resp->dirfh, &argp->fh); + fh_init(&resp->fh, NFS3_FHSIZE); + + nfserr = nfsd_lookup(rqstp, &resp->dirfh, + argp->name, + argp->len, + &resp->fh); + RETURN_STATUS(nfserr); +} + +/* + * Check file access + */ +static int +nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp, + struct nfsd3_accessres *resp) +{ + int nfserr; + + dprintk("nfsd: ACCESS(3) %s 0x%x\n", + SVCFH_fmt(&argp->fh), + argp->access); + + fh_copy(&resp->fh, &argp->fh); + resp->access = argp->access; + nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL); + RETURN_STATUS(nfserr); +} + +/* + * Read a symlink. + */ +static int +nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp, + struct nfsd3_readlinkres *resp) +{ + int nfserr; + + dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh)); + + /* Read the symlink. */ + fh_copy(&resp->fh, &argp->fh); + resp->len = NFS3_MAXPATHLEN; + nfserr = nfsd_readlink(rqstp, &resp->fh, argp->buffer, &resp->len); + RETURN_STATUS(nfserr); +} + +/* + * Read a portion of a file. + */ +static int +nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, + struct nfsd3_readres *resp) +{ + int nfserr; + + dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", + SVCFH_fmt(&argp->fh), + (unsigned long) argp->count, + (unsigned long) argp->offset); + + /* Obtain buffer pointer for payload. + * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) + * + 1 (xdr opaque byte count) = 26 + */ + + resp->count = argp->count; + if (NFSSVC_MAXBLKSIZE < resp->count) + resp->count = NFSSVC_MAXBLKSIZE; + + svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4); + + fh_copy(&resp->fh, &argp->fh); + nfserr = nfsd_read(rqstp, &resp->fh, NULL, + argp->offset, + argp->vec, argp->vlen, + &resp->count); + if (nfserr == 0) { + struct inode *inode = resp->fh.fh_dentry->d_inode; + + resp->eof = (argp->offset + resp->count) >= inode->i_size; + } + + RETURN_STATUS(nfserr); +} + +/* + * Write data to a file + */ +static int +nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, + struct nfsd3_writeres *resp) +{ + int nfserr; + + dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n", + SVCFH_fmt(&argp->fh), + argp->len, + (unsigned long) argp->offset, + argp->stable? " stable" : ""); + + fh_copy(&resp->fh, &argp->fh); + resp->committed = argp->stable; + nfserr = nfsd_write(rqstp, &resp->fh, NULL, + argp->offset, + argp->vec, argp->vlen, + argp->len, + &resp->committed); + resp->count = argp->count; + RETURN_STATUS(nfserr); +} + +/* + * With NFSv3, CREATE processing is a lot easier than with NFSv2. + * At least in theory; we'll see how it fares in practice when the + * first reports about SunOS compatibility problems start to pour in... + */ +static int +nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, + struct nfsd3_diropres *resp) +{ + svc_fh *dirfhp, *newfhp = NULL; + struct iattr *attr; + u32 nfserr; + + dprintk("nfsd: CREATE(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + dirfhp = fh_copy(&resp->dirfh, &argp->fh); + newfhp = fh_init(&resp->fh, NFS3_FHSIZE); + attr = &argp->attrs; + + /* Get the directory inode */ + nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_CREATE); + if (nfserr) + RETURN_STATUS(nfserr); + + /* Unfudge the mode bits */ + attr->ia_mode &= ~S_IFMT; + if (!(attr->ia_valid & ATTR_MODE)) { + attr->ia_valid |= ATTR_MODE; + attr->ia_mode = S_IFREG; + } else { + attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG; + } + + /* Now create the file and set attributes */ + nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len, + attr, newfhp, + argp->createmode, argp->verf, NULL); + + RETURN_STATUS(nfserr); +} + +/* + * Make directory. This operation is not idempotent. + */ +static int +nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, + struct nfsd3_diropres *resp) +{ + int nfserr; + + dprintk("nfsd: MKDIR(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + argp->attrs.ia_valid &= ~ATTR_SIZE; + fh_copy(&resp->dirfh, &argp->fh); + fh_init(&resp->fh, NFS3_FHSIZE); + nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, + &argp->attrs, S_IFDIR, 0, &resp->fh); + + RETURN_STATUS(nfserr); +} + +static int +nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp, + struct nfsd3_diropres *resp) +{ + int nfserr; + + dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n", + SVCFH_fmt(&argp->ffh), + argp->flen, argp->fname, + argp->tlen, argp->tname); + + fh_copy(&resp->dirfh, &argp->ffh); + fh_init(&resp->fh, NFS3_FHSIZE); + nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen, + argp->tname, argp->tlen, + &resp->fh, &argp->attrs); + RETURN_STATUS(nfserr); +} + +/* + * Make socket/fifo/device. + */ +static int +nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp, + struct nfsd3_diropres *resp) +{ + int nfserr, type; + dev_t rdev = 0; + + dprintk("nfsd: MKNOD(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + fh_copy(&resp->dirfh, &argp->fh); + fh_init(&resp->fh, NFS3_FHSIZE); + + if (argp->ftype == 0 || argp->ftype >= NF3BAD) + RETURN_STATUS(nfserr_inval); + if (argp->ftype == NF3CHR || argp->ftype == NF3BLK) { + rdev = MKDEV(argp->major, argp->minor); + if (MAJOR(rdev) != argp->major || + MINOR(rdev) != argp->minor) + RETURN_STATUS(nfserr_inval); + } else + if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO) + RETURN_STATUS(nfserr_inval); + + type = nfs3_ftypes[argp->ftype]; + nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len, + &argp->attrs, type, rdev, &resp->fh); + + RETURN_STATUS(nfserr); +} + +/* + * Remove file/fifo/socket etc. + */ +static int +nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, + struct nfsd3_attrstat *resp) +{ + int nfserr; + + dprintk("nfsd: REMOVE(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + /* Unlink. -S_IFDIR means file must not be a directory */ + fh_copy(&resp->fh, &argp->fh); + nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len); + RETURN_STATUS(nfserr); +} + +/* + * Remove a directory + */ +static int +nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp, + struct nfsd3_attrstat *resp) +{ + int nfserr; + + dprintk("nfsd: RMDIR(3) %s %.*s\n", + SVCFH_fmt(&argp->fh), + argp->len, + argp->name); + + fh_copy(&resp->fh, &argp->fh); + nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); + RETURN_STATUS(nfserr); +} + +static int +nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp, + struct nfsd3_renameres *resp) +{ + int nfserr; + + dprintk("nfsd: RENAME(3) %s %.*s ->\n", + SVCFH_fmt(&argp->ffh), + argp->flen, + argp->fname); + dprintk("nfsd: -> %s %.*s\n", + SVCFH_fmt(&argp->tfh), + argp->tlen, + argp->tname); + + fh_copy(&resp->ffh, &argp->ffh); + fh_copy(&resp->tfh, &argp->tfh); + nfserr = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen, + &resp->tfh, argp->tname, argp->tlen); + RETURN_STATUS(nfserr); +} + +static int +nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp, + struct nfsd3_linkres *resp) +{ + int nfserr; + + dprintk("nfsd: LINK(3) %s ->\n", + SVCFH_fmt(&argp->ffh)); + dprintk("nfsd: -> %s %.*s\n", + SVCFH_fmt(&argp->tfh), + argp->tlen, + argp->tname); + + fh_copy(&resp->fh, &argp->ffh); + fh_copy(&resp->tfh, &argp->tfh); + nfserr = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen, + &resp->fh); + RETURN_STATUS(nfserr); +} + +/* + * Read a portion of a directory. + */ +static int +nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, + struct nfsd3_readdirres *resp) +{ + int nfserr, count; + + dprintk("nfsd: READDIR(3) %s %d bytes at %d\n", + SVCFH_fmt(&argp->fh), + argp->count, (u32) argp->cookie); + + /* Make sure we've room for the NULL ptr & eof flag, and shrink to + * client read size */ + count = (argp->count >> 2) - 2; + + /* Read directory and encode entries on the fly */ + fh_copy(&resp->fh, &argp->fh); + + resp->buflen = count; + resp->common.err = nfs_ok; + resp->buffer = argp->buffer; + resp->rqstp = rqstp; + nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie, + &resp->common, nfs3svc_encode_entry); + memcpy(resp->verf, argp->verf, 8); + resp->count = resp->buffer - argp->buffer; + if (resp->offset) + xdr_encode_hyper(resp->offset, argp->cookie); + + RETURN_STATUS(nfserr); +} + +/* + * Read a portion of a directory, including file handles and attrs. + * For now, we choose to ignore the dircount parameter. + */ +static int +nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, + struct nfsd3_readdirres *resp) +{ + int nfserr, count = 0; + loff_t offset; + int i; + caddr_t page_addr = NULL; + + dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", + SVCFH_fmt(&argp->fh), + argp->count, (u32) argp->cookie); + + /* Convert byte count to number of words (i.e. >> 2), + * and reserve room for the NULL ptr & eof flag (-2 words) */ + resp->count = (argp->count >> 2) - 2; + + /* Read directory and encode entries on the fly */ + fh_copy(&resp->fh, &argp->fh); + + resp->common.err = nfs_ok; + resp->buffer = argp->buffer; + resp->buflen = resp->count; + resp->rqstp = rqstp; + offset = argp->cookie; + nfserr = nfsd_readdir(rqstp, &resp->fh, + &offset, + &resp->common, + nfs3svc_encode_entry_plus); + memcpy(resp->verf, argp->verf, 8); + for (i=1; i<rqstp->rq_resused ; i++) { + page_addr = page_address(rqstp->rq_respages[i]); + + if (((caddr_t)resp->buffer >= page_addr) && + ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) { + count += (caddr_t)resp->buffer - page_addr; + break; + } + count += PAGE_SIZE; + } + resp->count = count >> 2; + if (resp->offset) { + if (unlikely(resp->offset1)) { + /* we ended up with offset on a page boundary */ + *resp->offset = htonl(offset >> 32); + *resp->offset1 = htonl(offset & 0xffffffff); + resp->offset1 = NULL; + } else { + xdr_encode_hyper(resp->offset, offset); + } + } + + RETURN_STATUS(nfserr); +} + +/* + * Get file system stats + */ +static int +nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, + struct nfsd3_fsstatres *resp) +{ + int nfserr; + + dprintk("nfsd: FSSTAT(3) %s\n", + SVCFH_fmt(&argp->fh)); + + nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); + fh_put(&argp->fh); + RETURN_STATUS(nfserr); +} + +/* + * Get file system info + */ +static int +nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, + struct nfsd3_fsinfores *resp) +{ + int nfserr; + + dprintk("nfsd: FSINFO(3) %s\n", + SVCFH_fmt(&argp->fh)); + + resp->f_rtmax = NFSSVC_MAXBLKSIZE; + resp->f_rtpref = NFSSVC_MAXBLKSIZE; + resp->f_rtmult = PAGE_SIZE; + resp->f_wtmax = NFSSVC_MAXBLKSIZE; + resp->f_wtpref = NFSSVC_MAXBLKSIZE; + resp->f_wtmult = PAGE_SIZE; + resp->f_dtpref = PAGE_SIZE; + resp->f_maxfilesize = ~(u32) 0; + resp->f_properties = NFS3_FSF_DEFAULT; + + nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP); + + /* Check special features of the file system. May request + * different read/write sizes for file systems known to have + * problems with large blocks */ + if (nfserr == 0) { + struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; + + /* Note that we don't care for remote fs's here */ + if (sb->s_magic == 0x4d44 /* MSDOS_SUPER_MAGIC */) { + resp->f_properties = NFS3_FSF_BILLYBOY; + } + resp->f_maxfilesize = sb->s_maxbytes; + } + + fh_put(&argp->fh); + RETURN_STATUS(nfserr); +} + +/* + * Get pathconf info for the specified file + */ +static int +nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, + struct nfsd3_pathconfres *resp) +{ + int nfserr; + + dprintk("nfsd: PATHCONF(3) %s\n", + SVCFH_fmt(&argp->fh)); + + /* Set default pathconf */ + resp->p_link_max = 255; /* at least */ + resp->p_name_max = 255; /* at least */ + resp->p_no_trunc = 0; + resp->p_chown_restricted = 1; + resp->p_case_insensitive = 0; + resp->p_case_preserving = 1; + + nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP); + + if (nfserr == 0) { + struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; + + /* Note that we don't care for remote fs's here */ + switch (sb->s_magic) { + case EXT2_SUPER_MAGIC: + resp->p_link_max = EXT2_LINK_MAX; + resp->p_name_max = EXT2_NAME_LEN; + break; + case 0x4d44: /* MSDOS_SUPER_MAGIC */ + resp->p_case_insensitive = 1; + resp->p_case_preserving = 0; + break; + } + } + + fh_put(&argp->fh); + RETURN_STATUS(nfserr); +} + + +/* + * Commit a file (range) to stable storage. + */ +static int +nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp, + struct nfsd3_commitres *resp) +{ + int nfserr; + + dprintk("nfsd: COMMIT(3) %s %u@%Lu\n", + SVCFH_fmt(&argp->fh), + argp->count, + (unsigned long long) argp->offset); + + if (argp->offset > NFS_OFFSET_MAX) + RETURN_STATUS(nfserr_inval); + + fh_copy(&resp->fh, &argp->fh); + nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count); + + RETURN_STATUS(nfserr); +} + + +/* + * NFSv3 Server procedures. + * Only the results of non-idempotent operations are cached. + */ +#define nfs3svc_decode_voidargs NULL +#define nfs3svc_release_void NULL +#define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle +#define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat +#define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat +#define nfsd3_mkdirargs nfsd3_createargs +#define nfsd3_readdirplusargs nfsd3_readdirargs +#define nfsd3_fhandleargs nfsd_fhandle +#define nfsd3_fhandleres nfsd3_attrstat +#define nfsd3_attrstatres nfsd3_attrstat +#define nfsd3_wccstatres nfsd3_attrstat +#define nfsd3_createres nfsd3_diropres +#define nfsd3_voidres nfsd3_voidargs +struct nfsd3_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsd3_proc_##name, \ + (kxdrproc_t) nfs3svc_decode_##argt##args, \ + (kxdrproc_t) nfs3svc_encode_##rest##res, \ + (kxdrproc_t) nfs3svc_release_##relt, \ + sizeof(struct nfsd3_##argt##args), \ + sizeof(struct nfsd3_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status*/ +#define FH 17 /* filehandle with length */ +#define AT 21 /* attributes */ +#define pAT (1+AT) /* post attributes - conditional */ +#define WC (7+pAT) /* WCC attributes */ + +static struct svc_procedure nfsd_procedures3[22] = { + PROC(null, void, void, void, RC_NOCACHE, ST), + PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), + PROC(setattr, sattr, wccstat, fhandle, RC_REPLBUFF, ST+WC), + PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT), + PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1), + PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4), + PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE), + PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4), + PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), + PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), + PROC(symlink, symlink, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), + PROC(mknod, mknod, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC), + PROC(remove, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC), + PROC(rmdir, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC), + PROC(rename, rename, rename, fhandle2, RC_REPLBUFF, ST+WC+WC), + PROC(link, link, link, fhandle2, RC_REPLBUFF, ST+pAT+WC), + PROC(readdir, readdir, readdir, fhandle, RC_NOCACHE, 0), + PROC(readdirplus,readdirplus, readdir, fhandle, RC_NOCACHE, 0), + PROC(fsstat, fhandle, fsstat, void, RC_NOCACHE, ST+pAT+2*6+1), + PROC(fsinfo, fhandle, fsinfo, void, RC_NOCACHE, ST+pAT+12), + PROC(pathconf, fhandle, pathconf, void, RC_NOCACHE, ST+pAT+6), + PROC(commit, commit, commit, fhandle, RC_NOCACHE, ST+WC+2), +}; + +struct svc_version nfsd_version3 = { + .vs_vers = 3, + .vs_nproc = 22, + .vs_proc = nfsd_procedures3, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS3_SVC_XDRSIZE, +}; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c new file mode 100644 index 00000000000..11f806835c5 --- /dev/null +++ b/fs/nfsd/nfs3xdr.c @@ -0,0 +1,1092 @@ +/* + * linux/fs/nfsd/nfs3xdr.c + * + * XDR support for nfsd/protocol version 3. + * + * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> + * + * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()! + */ + +#include <linux/types.h> +#include <linux/time.h> +#include <linux/nfs3.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/dcache.h> +#include <linux/namei.h> +#include <linux/mm.h> +#include <linux/vfs.h> +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/xdr3.h> + +#define NFSDDBG_FACILITY NFSDDBG_XDR + +#ifdef NFSD_OPTIMIZE_SPACE +# define inline +#endif + + +/* + * Mapping of S_IF* types to NFS file types + */ +static u32 nfs3_ftypes[] = { + NF3NON, NF3FIFO, NF3CHR, NF3BAD, + NF3DIR, NF3BAD, NF3BLK, NF3BAD, + NF3REG, NF3BAD, NF3LNK, NF3BAD, + NF3SOCK, NF3BAD, NF3LNK, NF3BAD, +}; + +/* + * XDR functions for basic NFS types + */ +static inline u32 * +encode_time3(u32 *p, struct timespec *time) +{ + *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec); + return p; +} + +static inline u32 * +decode_time3(u32 *p, struct timespec *time) +{ + time->tv_sec = ntohl(*p++); + time->tv_nsec = ntohl(*p++); + return p; +} + +static inline u32 * +decode_fh(u32 *p, struct svc_fh *fhp) +{ + unsigned int size; + fh_init(fhp, NFS3_FHSIZE); + size = ntohl(*p++); + if (size > NFS3_FHSIZE) + return NULL; + + memcpy(&fhp->fh_handle.fh_base, p, size); + fhp->fh_handle.fh_size = size; + return p + XDR_QUADLEN(size); +} + +static inline u32 * +encode_fh(u32 *p, struct svc_fh *fhp) +{ + unsigned int size = fhp->fh_handle.fh_size; + *p++ = htonl(size); + if (size) p[XDR_QUADLEN(size)-1]=0; + memcpy(p, &fhp->fh_handle.fh_base, size); + return p + XDR_QUADLEN(size); +} + +/* + * Decode a file name and make sure that the path contains + * no slashes or null bytes. + */ +static inline u32 * +decode_filename(u32 *p, char **namp, int *lenp) +{ + char *name; + int i; + + if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) { + for (i = 0, name = *namp; i < *lenp; i++, name++) { + if (*name == '\0' || *name == '/') + return NULL; + } + } + + return p; +} + +static inline u32 * +decode_sattr3(u32 *p, struct iattr *iap) +{ + u32 tmp; + + iap->ia_valid = 0; + + if (*p++) { + iap->ia_valid |= ATTR_MODE; + iap->ia_mode = ntohl(*p++); + } + if (*p++) { + iap->ia_valid |= ATTR_UID; + iap->ia_uid = ntohl(*p++); + } + if (*p++) { + iap->ia_valid |= ATTR_GID; + iap->ia_gid = ntohl(*p++); + } + if (*p++) { + u64 newsize; + + iap->ia_valid |= ATTR_SIZE; + p = xdr_decode_hyper(p, &newsize); + if (newsize <= NFS_OFFSET_MAX) + iap->ia_size = newsize; + else + iap->ia_size = NFS_OFFSET_MAX; + } + if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ + iap->ia_valid |= ATTR_ATIME; + } else if (tmp == 2) { /* set to client time */ + iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; + iap->ia_atime.tv_sec = ntohl(*p++); + iap->ia_atime.tv_nsec = ntohl(*p++); + } + if ((tmp = ntohl(*p++)) == 1) { /* set to server time */ + iap->ia_valid |= ATTR_MTIME; + } else if (tmp == 2) { /* set to client time */ + iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET; + iap->ia_mtime.tv_sec = ntohl(*p++); + iap->ia_mtime.tv_nsec = ntohl(*p++); + } + return p; +} + +static inline u32 * +encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + struct vfsmount *mnt = fhp->fh_export->ex_mnt; + struct dentry *dentry = fhp->fh_dentry; + struct kstat stat; + struct timespec time; + + vfs_getattr(mnt, dentry, &stat); + + *p++ = htonl(nfs3_ftypes[(stat.mode & S_IFMT) >> 12]); + *p++ = htonl((u32) stat.mode); + *p++ = htonl((u32) stat.nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); + if (S_ISLNK(stat.mode) && stat.size > NFS3_MAXPATHLEN) { + p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); + } else { + p = xdr_encode_hyper(p, (u64) stat.size); + } + p = xdr_encode_hyper(p, ((u64)stat.blocks) << 9); + *p++ = htonl((u32) MAJOR(stat.rdev)); + *p++ = htonl((u32) MINOR(stat.rdev)); + if (is_fsid(fhp, rqstp->rq_reffh)) + p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); + else + p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat.dev)); + p = xdr_encode_hyper(p, (u64) stat.ino); + p = encode_time3(p, &stat.atime); + lease_get_mtime(dentry->d_inode, &time); + p = encode_time3(p, &time); + p = encode_time3(p, &stat.ctime); + + return p; +} + +static inline u32 * +encode_saved_post_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + + /* Attributes to follow */ + *p++ = xdr_one; + + *p++ = htonl(nfs3_ftypes[(fhp->fh_post_mode & S_IFMT) >> 12]); + *p++ = htonl((u32) fhp->fh_post_mode); + *p++ = htonl((u32) fhp->fh_post_nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, fhp->fh_post_uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, fhp->fh_post_gid)); + if (S_ISLNK(fhp->fh_post_mode) && fhp->fh_post_size > NFS3_MAXPATHLEN) { + p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN); + } else { + p = xdr_encode_hyper(p, (u64) fhp->fh_post_size); + } + p = xdr_encode_hyper(p, ((u64)fhp->fh_post_blocks) << 9); + *p++ = fhp->fh_post_rdev[0]; + *p++ = fhp->fh_post_rdev[1]; + if (is_fsid(fhp, rqstp->rq_reffh)) + p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); + else + p = xdr_encode_hyper(p, (u64)huge_encode_dev(inode->i_sb->s_dev)); + p = xdr_encode_hyper(p, (u64) inode->i_ino); + p = encode_time3(p, &fhp->fh_post_atime); + p = encode_time3(p, &fhp->fh_post_mtime); + p = encode_time3(p, &fhp->fh_post_ctime); + + return p; +} + +/* + * Encode post-operation attributes. + * The inode may be NULL if the call failed because of a stale file + * handle. In this case, no attributes are returned. + */ +static u32 * +encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + struct dentry *dentry = fhp->fh_dentry; + if (dentry && dentry->d_inode != NULL) { + *p++ = xdr_one; /* attributes follow */ + return encode_fattr3(rqstp, p, fhp); + } + *p++ = xdr_zero; + return p; +} + +/* + * Enocde weak cache consistency data + */ +static u32 * +encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + struct dentry *dentry = fhp->fh_dentry; + + if (dentry && dentry->d_inode && fhp->fh_post_saved) { + if (fhp->fh_pre_saved) { + *p++ = xdr_one; + p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size); + p = encode_time3(p, &fhp->fh_pre_mtime); + p = encode_time3(p, &fhp->fh_pre_ctime); + } else { + *p++ = xdr_zero; + } + return encode_saved_post_attr(rqstp, p, fhp); + } + /* no pre- or post-attrs */ + *p++ = xdr_zero; + return encode_post_op_attr(rqstp, p, fhp); +} + + +/* + * XDR decode functions + */ +int +nfs3svc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_sattrargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_sattr3(p, &args->attrs))) + return 0; + + if ((args->check_guard = ntohl(*p++)) != 0) { + struct timespec time; + p = decode_time3(p, &time); + args->guardtime = time.tv_sec; + } + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_diropargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_diropargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_accessargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + args->access = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readargs *args) +{ + unsigned int len; + int v,pn; + + if (!(p = decode_fh(p, &args->fh)) + || !(p = xdr_decode_hyper(p, &args->offset))) + return 0; + + len = args->count = ntohl(*p++); + + if (len > NFSSVC_MAXBLKSIZE) + len = NFSSVC_MAXBLKSIZE; + + /* set up the kvec */ + v=0; + while (len > 0) { + pn = rqstp->rq_resused; + svc_take_page(rqstp); + args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE; + len -= args->vec[v].iov_len; + v++; + } + args->vlen = v; + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_writeargs *args) +{ + unsigned int len, v, hdr; + + if (!(p = decode_fh(p, &args->fh)) + || !(p = xdr_decode_hyper(p, &args->offset))) + return 0; + + args->count = ntohl(*p++); + args->stable = ntohl(*p++); + len = args->len = ntohl(*p++); + + hdr = (void*)p - rqstp->rq_arg.head[0].iov_base; + if (rqstp->rq_arg.len < len + hdr) + return 0; + + args->vec[0].iov_base = (void*)p; + args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr; + + if (len > NFSSVC_MAXBLKSIZE) + len = NFSSVC_MAXBLKSIZE; + v= 0; + while (len > args->vec[v].iov_len) { + len -= args->vec[v].iov_len; + v++; + args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); + args->vec[v].iov_len = PAGE_SIZE; + } + args->vec[v].iov_len = len; + args->vlen = v+1; + + return args->count == args->len && args->vec[0].iov_len > 0; +} + +int +nfs3svc_decode_createargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_createargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + switch (args->createmode = ntohl(*p++)) { + case NFS3_CREATE_UNCHECKED: + case NFS3_CREATE_GUARDED: + if (!(p = decode_sattr3(p, &args->attrs))) + return 0; + break; + case NFS3_CREATE_EXCLUSIVE: + args->verf = p; + p += 2; + break; + default: + return 0; + } + + return xdr_argsize_check(rqstp, p); +} +int +nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_createargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len)) + || !(p = decode_sattr3(p, &args->attrs))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_symlinkargs *args) +{ + unsigned int len; + int avail; + char *old, *new; + struct kvec *vec; + + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_sattr3(p, &args->attrs)) + ) + return 0; + /* now decode the pathname, which might be larger than the first page. + * As we have to check for nul's anyway, we copy it into a new page + * This page appears in the rq_res.pages list, but as pages_len is always + * 0, it won't get in the way + */ + svc_take_page(rqstp); + len = ntohl(*p++); + if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE) + return 0; + args->tname = new = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); + args->tlen = len; + /* first copy and check from the first page */ + old = (char*)p; + vec = &rqstp->rq_arg.head[0]; + avail = vec->iov_len - (old - (char*)vec->iov_base); + while (len && avail && *old) { + *new++ = *old++; + len--; + avail--; + } + /* now copy next page if there is one */ + if (len && !avail && rqstp->rq_arg.page_len) { + avail = rqstp->rq_arg.page_len; + if (avail > PAGE_SIZE) avail = PAGE_SIZE; + old = page_address(rqstp->rq_arg.pages[0]); + } + while (len && avail && *old) { + *new++ = *old++; + len--; + avail--; + } + *new = '\0'; + if (len) + return 0; + + return 1; +} + +int +nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_mknodargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + args->ftype = ntohl(*p++); + + if (args->ftype == NF3BLK || args->ftype == NF3CHR + || args->ftype == NF3SOCK || args->ftype == NF3FIFO) { + if (!(p = decode_sattr3(p, &args->attrs))) + return 0; + } + + if (args->ftype == NF3BLK || args->ftype == NF3CHR) { + args->major = ntohl(*p++); + args->minor = ntohl(*p++); + } + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_renameargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_renameargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readlinkargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + svc_take_page(rqstp); + args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_linkargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_linkargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readdirargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + p = xdr_decode_hyper(p, &args->cookie); + args->verf = p; p += 2; + args->dircount = ~0; + args->count = ntohl(*p++); + + if (args->count > PAGE_SIZE) + args->count = PAGE_SIZE; + + svc_take_page(rqstp); + args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readdirargs *args) +{ + int len, pn; + + if (!(p = decode_fh(p, &args->fh))) + return 0; + p = xdr_decode_hyper(p, &args->cookie); + args->verf = p; p += 2; + args->dircount = ntohl(*p++); + args->count = ntohl(*p++); + + len = (args->count > NFSSVC_MAXBLKSIZE) ? NFSSVC_MAXBLKSIZE : + args->count; + args->count = len; + + while (len > 0) { + pn = rqstp->rq_resused; + svc_take_page(rqstp); + if (!args->buffer) + args->buffer = page_address(rqstp->rq_respages[pn]); + len -= PAGE_SIZE; + } + + return xdr_argsize_check(rqstp, p); +} + +int +nfs3svc_decode_commitargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_commitargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + p = xdr_decode_hyper(p, &args->offset); + args->count = ntohl(*p++); + + return xdr_argsize_check(rqstp, p); +} + +/* + * XDR encode functions + */ +/* + * There must be an encoding function for void results so svc_process + * will work properly. + */ +int +nfs3svc_encode_voidres(struct svc_rqst *rqstp, u32 *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +/* GETATTR */ +int +nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_attrstat *resp) +{ + if (resp->status == 0) + p = encode_fattr3(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); +} + +/* SETATTR, REMOVE, RMDIR */ +int +nfs3svc_encode_wccstat(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_attrstat *resp) +{ + p = encode_wcc_data(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); +} + +/* LOOKUP */ +int +nfs3svc_encode_diropres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_diropres *resp) +{ + if (resp->status == 0) { + p = encode_fh(p, &resp->fh); + p = encode_post_op_attr(rqstp, p, &resp->fh); + } + p = encode_post_op_attr(rqstp, p, &resp->dirfh); + return xdr_ressize_check(rqstp, p); +} + +/* ACCESS */ +int +nfs3svc_encode_accessres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_accessres *resp) +{ + p = encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0) + *p++ = htonl(resp->access); + return xdr_ressize_check(rqstp, p); +} + +/* READLINK */ +int +nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readlinkres *resp) +{ + p = encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0) { + *p++ = htonl(resp->len); + xdr_ressize_check(rqstp, p); + rqstp->rq_res.page_len = resp->len; + if (resp->len & 3) { + /* need to pad the tail */ + rqstp->rq_restailpage = 0; + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); + } + return 1; + } else + return xdr_ressize_check(rqstp, p); +} + +/* READ */ +int +nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readres *resp) +{ + p = encode_post_op_attr(rqstp, p, &resp->fh); + if (resp->status == 0) { + *p++ = htonl(resp->count); + *p++ = htonl(resp->eof); + *p++ = htonl(resp->count); /* xdr opaque count */ + xdr_ressize_check(rqstp, p); + /* now update rqstp->rq_res to reflect data aswell */ + rqstp->rq_res.page_len = resp->count; + if (resp->count & 3) { + /* need to pad the tail */ + rqstp->rq_restailpage = 0; + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3); + } + return 1; + } else + return xdr_ressize_check(rqstp, p); +} + +/* WRITE */ +int +nfs3svc_encode_writeres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_writeres *resp) +{ + p = encode_wcc_data(rqstp, p, &resp->fh); + if (resp->status == 0) { + *p++ = htonl(resp->count); + *p++ = htonl(resp->committed); + *p++ = htonl(nfssvc_boot.tv_sec); + *p++ = htonl(nfssvc_boot.tv_usec); + } + return xdr_ressize_check(rqstp, p); +} + +/* CREATE, MKDIR, SYMLINK, MKNOD */ +int +nfs3svc_encode_createres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_diropres *resp) +{ + if (resp->status == 0) { + *p++ = xdr_one; + p = encode_fh(p, &resp->fh); + p = encode_post_op_attr(rqstp, p, &resp->fh); + } + p = encode_wcc_data(rqstp, p, &resp->dirfh); + return xdr_ressize_check(rqstp, p); +} + +/* RENAME */ +int +nfs3svc_encode_renameres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_renameres *resp) +{ + p = encode_wcc_data(rqstp, p, &resp->ffh); + p = encode_wcc_data(rqstp, p, &resp->tfh); + return xdr_ressize_check(rqstp, p); +} + +/* LINK */ +int +nfs3svc_encode_linkres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_linkres *resp) +{ + p = encode_post_op_attr(rqstp, p, &resp->fh); + p = encode_wcc_data(rqstp, p, &resp->tfh); + return xdr_ressize_check(rqstp, p); +} + +/* READDIR */ +int +nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_readdirres *resp) +{ + p = encode_post_op_attr(rqstp, p, &resp->fh); + + if (resp->status == 0) { + /* stupid readdir cookie */ + memcpy(p, resp->verf, 8); p += 2; + xdr_ressize_check(rqstp, p); + if (rqstp->rq_res.head[0].iov_len + (2<<2) > PAGE_SIZE) + return 1; /*No room for trailer */ + rqstp->rq_res.page_len = (resp->count) << 2; + + /* add the 'tail' to the end of the 'head' page - page 0. */ + rqstp->rq_restailpage = 0; + rqstp->rq_res.tail[0].iov_base = p; + *p++ = 0; /* no more entries */ + *p++ = htonl(resp->common.err == nfserr_eof); + rqstp->rq_res.tail[0].iov_len = 2<<2; + return 1; + } else + return xdr_ressize_check(rqstp, p); +} + +static inline u32 * +encode_entry_baggage(struct nfsd3_readdirres *cd, u32 *p, const char *name, + int namlen, ino_t ino) +{ + *p++ = xdr_one; /* mark entry present */ + p = xdr_encode_hyper(p, ino); /* file id */ + p = xdr_encode_array(p, name, namlen);/* name length & name */ + + cd->offset = p; /* remember pointer */ + p = xdr_encode_hyper(p, NFS_OFFSET_MAX);/* offset of next entry */ + + return p; +} + +static inline u32 * +encode_entryplus_baggage(struct nfsd3_readdirres *cd, u32 *p, + struct svc_fh *fhp) +{ + p = encode_post_op_attr(cd->rqstp, p, fhp); + *p++ = xdr_one; /* yes, a file handle follows */ + p = encode_fh(p, fhp); + fh_put(fhp); + return p; +} + +static int +compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, + const char *name, int namlen) +{ + struct svc_export *exp; + struct dentry *dparent, *dchild; + int rv = 0; + + dparent = cd->fh.fh_dentry; + exp = cd->fh.fh_export; + + fh_init(fhp, NFS3_FHSIZE); + if (isdotent(name, namlen)) { + if (namlen == 2) { + dchild = dget_parent(dparent); + if (dchild == dparent) { + /* filesystem root - cannot return filehandle for ".." */ + dput(dchild); + return 1; + } + } else + dchild = dget(dparent); + } else + dchild = lookup_one_len(name, dparent, namlen); + if (IS_ERR(dchild)) + return 1; + if (d_mountpoint(dchild) || + fh_compose(fhp, exp, dchild, &cd->fh) != 0 || + !dchild->d_inode) + rv = 1; + dput(dchild); + return rv; +} + +/* + * Encode a directory entry. This one works for both normal readdir + * and readdirplus. + * The normal readdir reply requires 2 (fileid) + 1 (stringlen) + * + string + 2 (cookie) + 1 (next) words, i.e. 6 + strlen. + * + * The readdirplus baggage is 1+21 words for post_op_attr, plus the + * file handle. + */ + +#define NFS3_ENTRY_BAGGAGE (2 + 1 + 2 + 1) +#define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2)) +static int +encode_entry(struct readdir_cd *ccd, const char *name, + int namlen, off_t offset, ino_t ino, unsigned int d_type, int plus) +{ + struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres, + common); + u32 *p = cd->buffer; + caddr_t curr_page_addr = NULL; + int pn; /* current page number */ + int slen; /* string (name) length */ + int elen; /* estimated entry length in words */ + int num_entry_words = 0; /* actual number of words */ + + if (cd->offset) { + u64 offset64 = offset; + + if (unlikely(cd->offset1)) { + /* we ended up with offset on a page boundary */ + *cd->offset = htonl(offset64 >> 32); + *cd->offset1 = htonl(offset64 & 0xffffffff); + cd->offset1 = NULL; + } else { + xdr_encode_hyper(cd->offset, (u64) offset); + } + } + + /* + dprintk("encode_entry(%.*s @%ld%s)\n", + namlen, name, (long) offset, plus? " plus" : ""); + */ + + /* truncate filename if too long */ + if (namlen > NFS3_MAXNAMLEN) + namlen = NFS3_MAXNAMLEN; + + slen = XDR_QUADLEN(namlen); + elen = slen + NFS3_ENTRY_BAGGAGE + + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0); + + if (cd->buflen < elen) { + cd->common.err = nfserr_toosmall; + return -EINVAL; + } + + /* determine which page in rq_respages[] we are currently filling */ + for (pn=1; pn < cd->rqstp->rq_resused; pn++) { + curr_page_addr = page_address(cd->rqstp->rq_respages[pn]); + + if (((caddr_t)cd->buffer >= curr_page_addr) && + ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE)) + break; + } + + if ((caddr_t)(cd->buffer + elen) < (curr_page_addr + PAGE_SIZE)) { + /* encode entry in current page */ + + p = encode_entry_baggage(cd, p, name, namlen, ino); + + /* throw in readdirplus baggage */ + if (plus) { + struct svc_fh fh; + + if (compose_entry_fh(cd, &fh, name, namlen) > 0) { + *p++ = 0; + *p++ = 0; + } else + p = encode_entryplus_baggage(cd, p, &fh); + } + num_entry_words = p - cd->buffer; + } else if (cd->rqstp->rq_respages[pn+1] != NULL) { + /* temporarily encode entry into next page, then move back to + * current and next page in rq_respages[] */ + u32 *p1, *tmp; + int len1, len2; + + /* grab next page for temporary storage of entry */ + p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]); + + p1 = encode_entry_baggage(cd, p1, name, namlen, ino); + + /* throw in readdirplus baggage */ + if (plus) { + struct svc_fh fh; + + if (compose_entry_fh(cd, &fh, name, namlen) > 0) { + /* zero out the filehandle */ + *p1++ = 0; + *p1++ = 0; + } else + p1 = encode_entryplus_baggage(cd, p1, &fh); + } + + /* determine entry word length and lengths to go in pages */ + num_entry_words = p1 - tmp; + len1 = curr_page_addr + PAGE_SIZE - (caddr_t)cd->buffer; + if ((num_entry_words << 2) < len1) { + /* the actual number of words in the entry is less + * than elen and can still fit in the current page + */ + memmove(p, tmp, num_entry_words << 2); + p += num_entry_words; + + /* update offset */ + cd->offset = cd->buffer + (cd->offset - tmp); + } else { + unsigned int offset_r = (cd->offset - tmp) << 2; + + /* update pointer to offset location. + * This is a 64bit quantity, so we need to + * deal with 3 cases: + * - entirely in first page + * - entirely in second page + * - 4 bytes in each page + */ + if (offset_r + 8 <= len1) { + cd->offset = p + (cd->offset - tmp); + } else if (offset_r >= len1) { + cd->offset -= len1 >> 2; + } else { + /* sitting on the fence */ + BUG_ON(offset_r != len1 - 4); + cd->offset = p + (cd->offset - tmp); + cd->offset1 = tmp; + } + + len2 = (num_entry_words << 2) - len1; + + /* move from temp page to current and next pages */ + memmove(p, tmp, len1); + memmove(tmp, (caddr_t)tmp+len1, len2); + + p = tmp + (len2 >> 2); + } + } + else { + cd->common.err = nfserr_toosmall; + return -EINVAL; + } + + cd->buflen -= num_entry_words; + cd->buffer = p; + cd->common.err = nfs_ok; + return 0; + +} + +int +nfs3svc_encode_entry(struct readdir_cd *cd, const char *name, + int namlen, loff_t offset, ino_t ino, unsigned int d_type) +{ + return encode_entry(cd, name, namlen, offset, ino, d_type, 0); +} + +int +nfs3svc_encode_entry_plus(struct readdir_cd *cd, const char *name, + int namlen, loff_t offset, ino_t ino, unsigned int d_type) +{ + return encode_entry(cd, name, namlen, offset, ino, d_type, 1); +} + +/* FSSTAT */ +int +nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_fsstatres *resp) +{ + struct kstatfs *s = &resp->stats; + u64 bs = s->f_bsize; + + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */ + p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */ + p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */ + p = xdr_encode_hyper(p, s->f_files); /* total inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */ + p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */ + *p++ = htonl(resp->invarsec); /* mean unchanged time */ + } + return xdr_ressize_check(rqstp, p); +} + +/* FSINFO */ +int +nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_fsinfores *resp) +{ + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + *p++ = htonl(resp->f_rtmax); + *p++ = htonl(resp->f_rtpref); + *p++ = htonl(resp->f_rtmult); + *p++ = htonl(resp->f_wtmax); + *p++ = htonl(resp->f_wtpref); + *p++ = htonl(resp->f_wtmult); + *p++ = htonl(resp->f_dtpref); + p = xdr_encode_hyper(p, resp->f_maxfilesize); + *p++ = xdr_one; + *p++ = xdr_zero; + *p++ = htonl(resp->f_properties); + } + + return xdr_ressize_check(rqstp, p); +} + +/* PATHCONF */ +int +nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_pathconfres *resp) +{ + *p++ = xdr_zero; /* no post_op_attr */ + + if (resp->status == 0) { + *p++ = htonl(resp->p_link_max); + *p++ = htonl(resp->p_name_max); + *p++ = htonl(resp->p_no_trunc); + *p++ = htonl(resp->p_chown_restricted); + *p++ = htonl(resp->p_case_insensitive); + *p++ = htonl(resp->p_case_preserving); + } + + return xdr_ressize_check(rqstp, p); +} + +/* COMMIT */ +int +nfs3svc_encode_commitres(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_commitres *resp) +{ + p = encode_wcc_data(rqstp, p, &resp->fh); + /* Write verifier */ + if (resp->status == 0) { + *p++ = htonl(nfssvc_boot.tv_sec); + *p++ = htonl(nfssvc_boot.tv_usec); + } + return xdr_ressize_check(rqstp, p); +} + +/* + * XDR release functions + */ +int +nfs3svc_release_fhandle(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_attrstat *resp) +{ + fh_put(&resp->fh); + return 1; +} + +int +nfs3svc_release_fhandle2(struct svc_rqst *rqstp, u32 *p, + struct nfsd3_fhandle_pair *resp) +{ + fh_put(&resp->fh1); + fh_put(&resp->fh2); + return 1; +} diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c new file mode 100644 index 00000000000..11ebf6c4aa5 --- /dev/null +++ b/fs/nfsd/nfs4acl.c @@ -0,0 +1,954 @@ +/* + * fs/nfs4acl/acl.c + * + * Common NFSv4 ACL handling code. + * + * Copyright (c) 2002, 2003 The Regents of the University of Michigan. + * All rights reserved. + * + * Marius Aamodt Eriksen <marius@umich.edu> + * Jeff Sedlak <jsedlak@umich.edu> + * J. Bruce Fields <bfields@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/module.h> +#include <linux/nfs_fs.h> +#include <linux/posix_acl.h> +#include <linux/nfs4.h> +#include <linux/nfs4_acl.h> + + +/* mode bit translations: */ +#define NFS4_READ_MODE (NFS4_ACE_READ_DATA) +#define NFS4_WRITE_MODE (NFS4_ACE_WRITE_DATA | NFS4_ACE_APPEND_DATA) +#define NFS4_EXECUTE_MODE NFS4_ACE_EXECUTE +#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE) +#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL) + +/* We don't support these bits; insist they be neither allowed nor denied */ +#define NFS4_MASK_UNSUPP (NFS4_ACE_DELETE | NFS4_ACE_WRITE_OWNER \ + | NFS4_ACE_READ_NAMED_ATTRS | NFS4_ACE_WRITE_NAMED_ATTRS) + +/* flags used to simulate posix default ACLs */ +#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \ + | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE) + +#define MASK_EQUAL(mask1, mask2) \ + ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) + +static u32 +mask_from_posix(unsigned short perm, unsigned int flags) +{ + int mask = NFS4_ANYONE_MODE; + + if (flags & NFS4_ACL_OWNER) + mask |= NFS4_OWNER_MODE; + if (perm & ACL_READ) + mask |= NFS4_READ_MODE; + if (perm & ACL_WRITE) + mask |= NFS4_WRITE_MODE; + if ((perm & ACL_WRITE) && (flags & NFS4_ACL_DIR)) + mask |= NFS4_ACE_DELETE_CHILD; + if (perm & ACL_EXECUTE) + mask |= NFS4_EXECUTE_MODE; + return mask; +} + +static u32 +deny_mask(u32 allow_mask, unsigned int flags) +{ + u32 ret = ~allow_mask & ~NFS4_MASK_UNSUPP; + if (!(flags & NFS4_ACL_DIR)) + ret &= ~NFS4_ACE_DELETE_CHILD; + return ret; +} + +/* XXX: modify functions to return NFS errors; they're only ever + * used by nfs code, after all.... */ + +static int +mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags) +{ + u32 ignore = 0; + + if (!(flags & NFS4_ACL_DIR)) + ignore |= NFS4_ACE_DELETE_CHILD; /* ignore it */ + perm |= ignore; + *mode = 0; + if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE) + *mode |= ACL_READ; + if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE) + *mode |= ACL_WRITE; + if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE) + *mode |= ACL_EXECUTE; + if (!MASK_EQUAL(perm, ignore|mask_from_posix(*mode, flags))) + return -EINVAL; + return 0; +} + +struct ace_container { + struct nfs4_ace *ace; + struct list_head ace_l; +}; + +static short ace2type(struct nfs4_ace *); +static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); +static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int); +int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); +int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *); + +struct nfs4_acl * +nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, + unsigned int flags) +{ + struct nfs4_acl *acl; + int error = -EINVAL; + + if ((pacl != NULL && + (posix_acl_valid(pacl) < 0 || pacl->a_count == 0)) || + (dpacl != NULL && + (posix_acl_valid(dpacl) < 0 || dpacl->a_count == 0))) + goto out_err; + + acl = nfs4_acl_new(); + if (acl == NULL) { + error = -ENOMEM; + goto out_err; + } + + if (pacl != NULL) { + error = _posix_to_nfsv4_one(pacl, acl, + flags & ~NFS4_ACL_TYPE_DEFAULT); + if (error < 0) + goto out_acl; + } + + if (dpacl != NULL) { + error = _posix_to_nfsv4_one(dpacl, acl, + flags | NFS4_ACL_TYPE_DEFAULT); + if (error < 0) + goto out_acl; + } + + return acl; + +out_acl: + nfs4_acl_free(acl); +out_err: + acl = ERR_PTR(error); + + return acl; +} + +static int +nfs4_acl_add_pair(struct nfs4_acl *acl, int eflag, u32 mask, int whotype, + uid_t owner, unsigned int flags) +{ + int error; + + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, + eflag, mask, whotype, owner); + if (error < 0) + return error; + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + eflag, deny_mask(mask, flags), whotype, owner); + return error; +} + +/* We assume the acl has been verified with posix_acl_valid. */ +static int +_posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl, + unsigned int flags) +{ + struct posix_acl_entry *pa, *pe, *group_owner_entry; + int error = -EINVAL; + u32 mask, mask_mask; + int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ? + NFS4_INHERITANCE_FLAGS : 0); + + BUG_ON(pacl->a_count < 3); + pe = pacl->a_entries + pacl->a_count; + pa = pe - 2; /* if mask entry exists, it's second from the last. */ + if (pa->e_tag == ACL_MASK) + mask_mask = deny_mask(mask_from_posix(pa->e_perm, flags), flags); + else + mask_mask = 0; + + pa = pacl->a_entries; + BUG_ON(pa->e_tag != ACL_USER_OBJ); + mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER); + error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_OWNER, 0, flags); + if (error < 0) + goto out; + pa++; + + while (pa->e_tag == ACL_USER) { + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + eflag, mask_mask, NFS4_ACL_WHO_NAMED, pa->e_id); + if (error < 0) + goto out; + + + error = nfs4_acl_add_pair(acl, eflag, mask, + NFS4_ACL_WHO_NAMED, pa->e_id, flags); + if (error < 0) + goto out; + pa++; + } + + /* In the case of groups, we apply allow ACEs first, then deny ACEs, + * since a user can be in more than one group. */ + + /* allow ACEs */ + + if (pacl->a_count > 3) { + BUG_ON(pa->e_tag != ACL_GROUP_OBJ); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask, + NFS4_ACL_WHO_GROUP, 0); + if (error < 0) + goto out; + } + group_owner_entry = pa; + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, + NFS4_ACL_WHO_GROUP, 0); + if (error < 0) + goto out; + pa++; + + while (pa->e_tag == ACL_GROUP) { + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask, + NFS4_ACL_WHO_NAMED, pa->e_id); + if (error < 0) + goto out; + + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, + NFS4_ACL_WHO_NAMED, pa->e_id); + if (error < 0) + goto out; + pa++; + } + + /* deny ACEs */ + + pa = group_owner_entry; + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, + deny_mask(mask, flags), NFS4_ACL_WHO_GROUP, 0); + if (error < 0) + goto out; + pa++; + while (pa->e_tag == ACL_GROUP) { + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, + NFS4_ACE_IDENTIFIER_GROUP | eflag, + deny_mask(mask, flags), NFS4_ACL_WHO_NAMED, pa->e_id); + if (error < 0) + goto out; + pa++; + } + + if (pa->e_tag == ACL_MASK) + pa++; + BUG_ON(pa->e_tag != ACL_OTHER); + mask = mask_from_posix(pa->e_perm, flags); + error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_EVERYONE, 0, flags); + +out: + return error; +} + +static void +sort_pacl_range(struct posix_acl *pacl, int start, int end) { + int sorted = 0, i; + struct posix_acl_entry tmp; + + /* We just do a bubble sort; easy to do in place, and we're not + * expecting acl's to be long enough to justify anything more. */ + while (!sorted) { + sorted = 1; + for (i = start; i < end; i++) { + if (pacl->a_entries[i].e_id + > pacl->a_entries[i+1].e_id) { + sorted = 0; + tmp = pacl->a_entries[i]; + pacl->a_entries[i] = pacl->a_entries[i+1]; + pacl->a_entries[i+1] = tmp; + } + } + } +} + +static void +sort_pacl(struct posix_acl *pacl) +{ + /* posix_acl_valid requires that users and groups be in order + * by uid/gid. */ + int i, j; + + if (pacl->a_count <= 4) + return; /* no users or groups */ + i = 1; + while (pacl->a_entries[i].e_tag == ACL_USER) + i++; + sort_pacl_range(pacl, 1, i-1); + + BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ); + j = i++; + while (pacl->a_entries[j].e_tag == ACL_GROUP) + j++; + sort_pacl_range(pacl, i, j-1); + return; +} + +static int +write_pace(struct nfs4_ace *ace, struct posix_acl *pacl, + struct posix_acl_entry **pace, short tag, unsigned int flags) +{ + struct posix_acl_entry *this = *pace; + + if (*pace == pacl->a_entries + pacl->a_count) + return -EINVAL; /* fell off the end */ + (*pace)++; + this->e_tag = tag; + if (tag == ACL_USER_OBJ) + flags |= NFS4_ACL_OWNER; + if (mode_from_nfs4(ace->access_mask, &this->e_perm, flags)) + return -EINVAL; + this->e_id = (tag == ACL_USER || tag == ACL_GROUP ? + ace->who : ACL_UNDEFINED_ID); + return 0; +} + +static struct nfs4_ace * +get_next_v4_ace(struct list_head **p, struct list_head *head) +{ + struct nfs4_ace *ace; + + *p = (*p)->next; + if (*p == head) + return NULL; + ace = list_entry(*p, struct nfs4_ace, l_ace); + + return ace; +} + +int +nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl, + struct posix_acl **dpacl, unsigned int flags) +{ + struct nfs4_acl *dacl; + int error = -ENOMEM; + + *pacl = NULL; + *dpacl = NULL; + + dacl = nfs4_acl_new(); + if (dacl == NULL) + goto out; + + error = nfs4_acl_split(acl, dacl); + if (error < 0) + goto out_acl; + + if (pacl != NULL) { + if (acl->naces == 0) { + error = -ENODATA; + goto try_dpacl; + } + + *pacl = _nfsv4_to_posix_one(acl, flags); + if (IS_ERR(*pacl)) { + error = PTR_ERR(*pacl); + *pacl = NULL; + goto out_acl; + } + } + +try_dpacl: + if (dpacl != NULL) { + if (dacl->naces == 0) { + if (pacl == NULL || *pacl == NULL) + error = -ENODATA; + goto out_acl; + } + + error = 0; + *dpacl = _nfsv4_to_posix_one(dacl, flags); + if (IS_ERR(*dpacl)) { + error = PTR_ERR(*dpacl); + *dpacl = NULL; + goto out_acl; + } + } + +out_acl: + if (error && pacl) { + posix_acl_release(*pacl); + *pacl = NULL; + } + nfs4_acl_free(dacl); +out: + return error; +} + +static int +same_who(struct nfs4_ace *a, struct nfs4_ace *b) +{ + return a->whotype == b->whotype && + (a->whotype != NFS4_ACL_WHO_NAMED || a->who == b->who); +} + +static int +complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny, + unsigned int flags) +{ + int ignore = 0; + if (!(flags & NFS4_ACL_DIR)) + ignore |= NFS4_ACE_DELETE_CHILD; + return MASK_EQUAL(ignore|deny_mask(allow->access_mask, flags), + ignore|deny->access_mask) && + allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && + deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE && + allow->flag == deny->flag && + same_who(allow, deny); +} + +static inline int +user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct posix_acl *pacl, struct posix_acl_entry **pace, + unsigned int flags) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace2type(ace) != ACL_USER_OBJ) + goto out; + error = write_pace(ace, pacl, pace, ACL_USER_OBJ, flags); + if (error < 0) + goto out; + error = -EINVAL; + ace2 = get_next_v4_ace(p, &n4acl->ace_head); + if (ace2 == NULL) + goto out; + if (!complementary_ace_pair(ace, ace2, flags)) + goto out; + error = 0; +out: + return error; +} + +static inline int +users_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct nfs4_ace **mask_ace, + struct posix_acl *pacl, struct posix_acl_entry **pace, + unsigned int flags) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + while (ace2type(ace) == ACL_USER) { + if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) + goto out; + if (*mask_ace && + !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) + goto out; + *mask_ace = ace; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) + goto out; + error = write_pace(ace, pacl, pace, ACL_USER, flags); + if (error < 0) + goto out; + error = -EINVAL; + ace2 = get_next_v4_ace(p, &n4acl->ace_head); + if (ace2 == NULL) + goto out; + if (!complementary_ace_pair(ace, ace2, flags)) + goto out; + if ((*mask_ace)->flag != ace2->flag || + !same_who(*mask_ace, ace2)) + goto out; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + } + error = 0; +out: + return error; +} + +static inline int +group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct nfs4_ace **mask_ace, + struct posix_acl *pacl, struct posix_acl_entry **pace, + unsigned int flags) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + struct ace_container *ac; + struct list_head group_l; + + INIT_LIST_HEAD(&group_l); + ace = list_entry(*p, struct nfs4_ace, l_ace); + + /* group owner (mask and allow aces) */ + + if (pacl->a_count != 3) { + /* then the group owner should be preceded by mask */ + if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) + goto out; + if (*mask_ace && + !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) + goto out; + *mask_ace = ace; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + + if ((*mask_ace)->flag != ace->flag || !same_who(*mask_ace, ace)) + goto out; + } + + if (ace2type(ace) != ACL_GROUP_OBJ) + goto out; + + ac = kmalloc(sizeof(*ac), GFP_KERNEL); + error = -ENOMEM; + if (ac == NULL) + goto out; + ac->ace = ace; + list_add_tail(&ac->ace_l, &group_l); + + error = -EINVAL; + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) + goto out; + + error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, flags); + if (error < 0) + goto out; + + error = -EINVAL; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + + /* groups (mask and allow aces) */ + + while (ace2type(ace) == ACL_GROUP) { + if (*mask_ace == NULL) + goto out; + + if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE || + !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) + goto out; + *mask_ace = ace; + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + ac = kmalloc(sizeof(*ac), GFP_KERNEL); + error = -ENOMEM; + if (ac == NULL) + goto out; + error = -EINVAL; + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE || + !same_who(ace, *mask_ace)) + goto out; + + ac->ace = ace; + list_add_tail(&ac->ace_l, &group_l); + + error = write_pace(ace, pacl, pace, ACL_GROUP, flags); + if (error < 0) + goto out; + error = -EINVAL; + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + } + + /* group owner (deny ace) */ + + if (ace2type(ace) != ACL_GROUP_OBJ) + goto out; + ac = list_entry(group_l.next, struct ace_container, ace_l); + ace2 = ac->ace; + if (!complementary_ace_pair(ace2, ace, flags)) + goto out; + list_del(group_l.next); + kfree(ac); + + /* groups (deny aces) */ + + while (!list_empty(&group_l)) { + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace2type(ace) != ACL_GROUP) + goto out; + ac = list_entry(group_l.next, struct ace_container, ace_l); + ace2 = ac->ace; + if (!complementary_ace_pair(ace2, ace, flags)) + goto out; + list_del(group_l.next); + kfree(ac); + } + + ace = get_next_v4_ace(p, &n4acl->ace_head); + if (ace == NULL) + goto out; + if (ace2type(ace) != ACL_OTHER) + goto out; + error = 0; +out: + while (!list_empty(&group_l)) { + ac = list_entry(group_l.next, struct ace_container, ace_l); + list_del(group_l.next); + kfree(ac); + } + return error; +} + +static inline int +mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct nfs4_ace **mask_ace, + struct posix_acl *pacl, struct posix_acl_entry **pace, + unsigned int flags) +{ + int error = -EINVAL; + struct nfs4_ace *ace; + + ace = list_entry(*p, struct nfs4_ace, l_ace); + if (pacl->a_count != 3) { + if (*mask_ace == NULL) + goto out; + (*mask_ace)->access_mask = deny_mask((*mask_ace)->access_mask, flags); + write_pace(*mask_ace, pacl, pace, ACL_MASK, flags); + } + error = 0; +out: + return error; +} + +static inline int +other_from_v4(struct nfs4_acl *n4acl, struct list_head **p, + struct posix_acl *pacl, struct posix_acl_entry **pace, + unsigned int flags) +{ + int error = -EINVAL; + struct nfs4_ace *ace, *ace2; + + ace = list_entry(*p, struct nfs4_ace, l_ace); + if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) + goto out; + error = write_pace(ace, pacl, pace, ACL_OTHER, flags); + if (error < 0) + goto out; + error = -EINVAL; + ace2 = get_next_v4_ace(p, &n4acl->ace_head); + if (ace2 == NULL) + goto out; + if (!complementary_ace_pair(ace, ace2, flags)) + goto out; + error = 0; +out: + return error; +} + +static int +calculate_posix_ace_count(struct nfs4_acl *n4acl) +{ + if (n4acl->naces == 6) /* owner, owner group, and other only */ + return 3; + else { /* Otherwise there must be a mask entry. */ + /* Also, the remaining entries are for named users and + * groups, and come in threes (mask, allow, deny): */ + if (n4acl->naces < 7) + return -1; + if ((n4acl->naces - 7) % 3) + return -1; + return 4 + (n4acl->naces - 7)/3; + } +} + + +static struct posix_acl * +_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags) +{ + struct posix_acl *pacl; + int error = -EINVAL, nace = 0; + struct list_head *p; + struct nfs4_ace *mask_ace = NULL; + struct posix_acl_entry *pace; + + nace = calculate_posix_ace_count(n4acl); + if (nace < 0) + goto out_err; + + pacl = posix_acl_alloc(nace, GFP_KERNEL); + error = -ENOMEM; + if (pacl == NULL) + goto out_err; + + pace = &pacl->a_entries[0]; + p = &n4acl->ace_head; + + error = user_obj_from_v4(n4acl, &p, pacl, &pace, flags); + if (error) + goto out_acl; + + error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags); + if (error) + goto out_acl; + + error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace, + flags); + if (error) + goto out_acl; + + error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags); + if (error) + goto out_acl; + error = other_from_v4(n4acl, &p, pacl, &pace, flags); + if (error) + goto out_acl; + + error = -EINVAL; + if (p->next != &n4acl->ace_head) + goto out_acl; + if (pace != pacl->a_entries + pacl->a_count) + goto out_acl; + + sort_pacl(pacl); + + return pacl; +out_acl: + posix_acl_release(pacl); +out_err: + pacl = ERR_PTR(error); + return pacl; +} + +int +nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl) +{ + struct list_head *h, *n; + struct nfs4_ace *ace; + int error = 0; + + list_for_each_safe(h, n, &acl->ace_head) { + ace = list_entry(h, struct nfs4_ace, l_ace); + + if ((ace->flag & NFS4_INHERITANCE_FLAGS) + != NFS4_INHERITANCE_FLAGS) + continue; + + error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, + ace->access_mask, ace->whotype, ace->who) == -1; + if (error < 0) + goto out; + + list_del(h); + kfree(ace); + acl->naces--; + } + +out: + return error; +} + +static short +ace2type(struct nfs4_ace *ace) +{ + switch (ace->whotype) { + case NFS4_ACL_WHO_NAMED: + return (ace->flag & NFS4_ACE_IDENTIFIER_GROUP ? + ACL_GROUP : ACL_USER); + case NFS4_ACL_WHO_OWNER: + return ACL_USER_OBJ; + case NFS4_ACL_WHO_GROUP: + return ACL_GROUP_OBJ; + case NFS4_ACL_WHO_EVERYONE: + return ACL_OTHER; + } + BUG(); + return -1; +} + +EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4); +EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix); + +struct nfs4_acl * +nfs4_acl_new(void) +{ + struct nfs4_acl *acl; + + if ((acl = kmalloc(sizeof(*acl), GFP_KERNEL)) == NULL) + return NULL; + + acl->naces = 0; + INIT_LIST_HEAD(&acl->ace_head); + + return acl; +} + +void +nfs4_acl_free(struct nfs4_acl *acl) +{ + struct list_head *h; + struct nfs4_ace *ace; + + if (!acl) + return; + + while (!list_empty(&acl->ace_head)) { + h = acl->ace_head.next; + list_del(h); + ace = list_entry(h, struct nfs4_ace, l_ace); + kfree(ace); + } + + kfree(acl); + + return; +} + +int +nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, + int whotype, uid_t who) +{ + struct nfs4_ace *ace; + + if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL) + return -1; + + ace->type = type; + ace->flag = flag; + ace->access_mask = access_mask; + ace->whotype = whotype; + ace->who = who; + + list_add_tail(&ace->l_ace, &acl->ace_head); + acl->naces++; + + return 0; +} + +static struct { + char *string; + int stringlen; + int type; +} s2t_map[] = { + { + .string = "OWNER@", + .stringlen = sizeof("OWNER@") - 1, + .type = NFS4_ACL_WHO_OWNER, + }, + { + .string = "GROUP@", + .stringlen = sizeof("GROUP@") - 1, + .type = NFS4_ACL_WHO_GROUP, + }, + { + .string = "EVERYONE@", + .stringlen = sizeof("EVERYONE@") - 1, + .type = NFS4_ACL_WHO_EVERYONE, + }, +}; + +int +nfs4_acl_get_whotype(char *p, u32 len) +{ + int i; + + for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) { + if (s2t_map[i].stringlen == len && + 0 == memcmp(s2t_map[i].string, p, len)) + return s2t_map[i].type; + } + return NFS4_ACL_WHO_NAMED; +} + +int +nfs4_acl_write_who(int who, char *p) +{ + int i; + + for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) { + if (s2t_map[i].type == who) { + memcpy(p, s2t_map[i].string, s2t_map[i].stringlen); + return s2t_map[i].stringlen; + } + } + BUG(); + return -1; +} + +static inline int +match_who(struct nfs4_ace *ace, uid_t owner, gid_t group, uid_t who) +{ + switch (ace->whotype) { + case NFS4_ACL_WHO_NAMED: + return who == ace->who; + case NFS4_ACL_WHO_OWNER: + return who == owner; + case NFS4_ACL_WHO_GROUP: + return who == group; + case NFS4_ACL_WHO_EVERYONE: + return 1; + default: + return 0; + } +} + +EXPORT_SYMBOL(nfs4_acl_new); +EXPORT_SYMBOL(nfs4_acl_free); +EXPORT_SYMBOL(nfs4_acl_add_ace); +EXPORT_SYMBOL(nfs4_acl_get_whotype); +EXPORT_SYMBOL(nfs4_acl_write_who); diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c new file mode 100644 index 00000000000..c70de9c2af7 --- /dev/null +++ b/fs/nfsd/nfs4callback.c @@ -0,0 +1,547 @@ +/* + * linux/fs/nfsd/nfs4callback.c + * + * Copyright (c) 2001 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/inet.h> +#include <linux/errno.h> +#include <linux/delay.h> +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/state.h> +#include <linux/sunrpc/sched.h> +#include <linux/nfs4.h> + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +#define NFSPROC4_CB_NULL 0 +#define NFSPROC4_CB_COMPOUND 1 + +/* declarations */ +static void nfs4_cb_null(struct rpc_task *task); +extern spinlock_t recall_lock; + +/* Index of predefined Linux callback client operations */ + +enum { + NFSPROC4_CLNT_CB_NULL = 0, + NFSPROC4_CLNT_CB_RECALL, +}; + +enum nfs_cb_opnum4 { + OP_CB_RECALL = 4, +}; + +#define NFS4_MAXTAGLEN 20 + +#define NFS4_enc_cb_null_sz 0 +#define NFS4_dec_cb_null_sz 0 +#define cb_compound_enc_hdr_sz 4 +#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2)) +#define op_enc_sz 1 +#define op_dec_sz 2 +#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2)) +#define enc_stateid_sz 16 +#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \ + 1 + enc_stateid_sz + \ + enc_nfs4_fh_sz) + +#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ + op_dec_sz) + +/* +* Generic encode routines from fs/nfs/nfs4xdr.c +*/ +static inline u32 * +xdr_writemem(u32 *p, const void *ptr, int nbytes) +{ + int tmp = XDR_QUADLEN(nbytes); + if (!tmp) + return p; + p[tmp-1] = 0; + memcpy(p, ptr, nbytes); + return p + tmp; +} + +#define WRITE32(n) *p++ = htonl(n) +#define WRITEMEM(ptr,nbytes) do { \ + p = xdr_writemem(p, ptr, nbytes); \ +} while (0) +#define RESERVE_SPACE(nbytes) do { \ + p = xdr_reserve_space(xdr, nbytes); \ + if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ + BUG_ON(!p); \ +} while (0) + +/* + * Generic decode routines from fs/nfs/nfs4xdr.c + */ +#define DECODE_TAIL \ + status = 0; \ +out: \ + return status; \ +xdr_error: \ + dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \ + status = -EIO; \ + goto out + +#define READ32(x) (x) = ntohl(*p++) +#define READ64(x) do { \ + (x) = (u64)ntohl(*p++) << 32; \ + (x) |= ntohl(*p++); \ +} while (0) +#define READTIME(x) do { \ + p++; \ + (x.tv_sec) = ntohl(*p++); \ + (x.tv_nsec) = ntohl(*p++); \ +} while (0) +#define READ_BUF(nbytes) do { \ + p = xdr_inline_decode(xdr, nbytes); \ + if (!p) { \ + dprintk("NFSD: %s: reply buffer overflowed in line %d.", \ + __FUNCTION__, __LINE__); \ + return -EIO; \ + } \ +} while (0) + +struct nfs4_cb_compound_hdr { + int status; + u32 ident; + u32 nops; + u32 taglen; + char * tag; +}; + +static struct { +int stat; +int errno; +} nfs_cb_errtbl[] = { + { NFS4_OK, 0 }, + { NFS4ERR_PERM, EPERM }, + { NFS4ERR_NOENT, ENOENT }, + { NFS4ERR_IO, EIO }, + { NFS4ERR_NXIO, ENXIO }, + { NFS4ERR_ACCESS, EACCES }, + { NFS4ERR_EXIST, EEXIST }, + { NFS4ERR_XDEV, EXDEV }, + { NFS4ERR_NOTDIR, ENOTDIR }, + { NFS4ERR_ISDIR, EISDIR }, + { NFS4ERR_INVAL, EINVAL }, + { NFS4ERR_FBIG, EFBIG }, + { NFS4ERR_NOSPC, ENOSPC }, + { NFS4ERR_ROFS, EROFS }, + { NFS4ERR_MLINK, EMLINK }, + { NFS4ERR_NAMETOOLONG, ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, ENOTEMPTY }, + { NFS4ERR_DQUOT, EDQUOT }, + { NFS4ERR_STALE, ESTALE }, + { NFS4ERR_BADHANDLE, EBADHANDLE }, + { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, + { NFS4ERR_NOTSUPP, ENOTSUPP }, + { NFS4ERR_TOOSMALL, ETOOSMALL }, + { NFS4ERR_SERVERFAULT, ESERVERFAULT }, + { NFS4ERR_BADTYPE, EBADTYPE }, + { NFS4ERR_LOCKED, EAGAIN }, + { NFS4ERR_RESOURCE, EREMOTEIO }, + { NFS4ERR_SYMLINK, ELOOP }, + { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP }, + { NFS4ERR_DEADLOCK, EDEADLK }, + { -1, EIO } +}; + +static int +nfs_cb_stat_to_errno(int stat) +{ + int i; + for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) { + if (nfs_cb_errtbl[i].stat == stat) + return nfs_cb_errtbl[i].errno; + } + /* If we cannot translate the error, the recovery routines should + * handle it. + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ + return stat; +} + +/* + * XDR encode + */ + +static int +encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr) +{ + u32 * p; + + RESERVE_SPACE(16); + WRITE32(0); /* tag length is always 0 */ + WRITE32(NFS4_MINOR_VERSION); + WRITE32(hdr->ident); + WRITE32(hdr->nops); + return 0; +} + +static int +encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) +{ + u32 *p; + int len = cb_rec->cbr_fhlen; + + RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); + WRITE32(OP_CB_RECALL); + WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t)); + WRITE32(cb_rec->cbr_trunc); + WRITE32(len); + WRITEMEM(cb_rec->cbr_fhval, len); + return 0; +} + +static int +nfs4_xdr_enc_cb_null(struct rpc_rqst *req, u32 *p) +{ + struct xdr_stream xdrs, *xdr = &xdrs; + + xdr_init_encode(&xdrs, &req->rq_snd_buf, p); + RESERVE_SPACE(0); + return 0; +} + +static int +nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, u32 *p, struct nfs4_cb_recall *args) +{ + struct xdr_stream xdr; + struct nfs4_cb_compound_hdr hdr = { + .ident = args->cbr_ident, + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_cb_compound_hdr(&xdr, &hdr); + return (encode_cb_recall(&xdr, args)); +} + + +static int +decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){ + u32 *p; + + READ_BUF(8); + READ32(hdr->status); + READ32(hdr->taglen); + READ_BUF(hdr->taglen + 4); + hdr->tag = (char *)p; + p += XDR_QUADLEN(hdr->taglen); + READ32(hdr->nops); + return 0; +} + +static int +decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +{ + u32 *p; + u32 op; + int32_t nfserr; + + READ_BUF(8); + READ32(op); + if (op != expected) { + dprintk("NFSD: decode_cb_op_hdr: Callback server returned " + " operation %d but we issued a request for %d\n", + op, expected); + return -EIO; + } + READ32(nfserr); + if (nfserr != NFS_OK) + return -nfs_cb_stat_to_errno(nfserr); + return 0; +} + +static int +nfs4_xdr_dec_cb_null(struct rpc_rqst *req, u32 *p) +{ + return 0; +} + +static int +nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, u32 *p) +{ + struct xdr_stream xdr; + struct nfs4_cb_compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_cb_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_cb_op_hdr(&xdr, OP_CB_RECALL); +out: + return status; +} + +/* + * RPC procedure tables + */ +#ifndef MAX +# define MAX(a, b) (((a) > (b))? (a) : (b)) +#endif + +#define PROC(proc, call, argtype, restype) \ +[NFSPROC4_CLNT_##proc] = { \ + .p_proc = NFSPROC4_CB_##call, \ + .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \ + .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \ + .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ +} + +struct rpc_procinfo nfs4_cb_procedures[] = { + PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null), + PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall), +}; + +struct rpc_version nfs_cb_version4 = { + .number = 1, + .nrprocs = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]), + .procs = nfs4_cb_procedures +}; + +static struct rpc_version * nfs_cb_version[] = { + NULL, + &nfs_cb_version4, +}; + +/* + * Use the SETCLIENTID credential + */ +struct rpc_cred * +nfsd4_lookupcred(struct nfs4_client *clp, int taskflags) +{ + struct auth_cred acred; + struct rpc_clnt *clnt = clp->cl_callback.cb_client; + struct rpc_cred *ret; + + get_group_info(clp->cl_cred.cr_group_info); + acred.uid = clp->cl_cred.cr_uid; + acred.gid = clp->cl_cred.cr_gid; + acred.group_info = clp->cl_cred.cr_group_info; + + dprintk("NFSD: looking up %s cred\n", + clnt->cl_auth->au_ops->au_name); + ret = rpcauth_lookup_credcache(clnt->cl_auth, &acred, taskflags); + put_group_info(clp->cl_cred.cr_group_info); + return ret; +} + +/* + * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... + */ +void +nfsd4_probe_callback(struct nfs4_client *clp) +{ + struct sockaddr_in addr; + struct nfs4_callback *cb = &clp->cl_callback; + struct rpc_timeout timeparms; + struct rpc_xprt * xprt; + struct rpc_program * program = &cb->cb_program; + struct rpc_stat * stat = &cb->cb_stat; + struct rpc_clnt * clnt; + struct rpc_message msg = { + .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], + .rpc_argp = clp, + }; + char hostname[32]; + int status; + + dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n", + cb->cb_parsed, atomic_read(&cb->cb_set)); + if (!cb->cb_parsed || atomic_read(&cb->cb_set)) + return; + + /* Initialize address */ + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(cb->cb_port); + addr.sin_addr.s_addr = htonl(cb->cb_addr); + + /* Initialize timeout */ + timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ; + timeparms.to_retries = 0; + timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ; + timeparms.to_exponential = 1; + + /* Create RPC transport */ + if (!(xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms))) { + dprintk("NFSD: couldn't create callback transport!\n"); + goto out_err; + } + + /* Initialize rpc_program */ + program->name = "nfs4_cb"; + program->number = cb->cb_prog; + program->nrvers = sizeof(nfs_cb_version)/sizeof(nfs_cb_version[0]); + program->version = nfs_cb_version; + program->stats = stat; + + /* Initialize rpc_stat */ + memset(stat, 0, sizeof(struct rpc_stat)); + stat->program = program; + + /* Create RPC client + * + * XXX AUTH_UNIX only - need AUTH_GSS.... + */ + sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr)); + if (!(clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX))) { + dprintk("NFSD: couldn't create callback client\n"); + goto out_xprt; + } + clnt->cl_intr = 0; + clnt->cl_softrtry = 1; + clnt->cl_chatty = 1; + + /* Kick rpciod, put the call on the wire. */ + + if (rpciod_up() != 0) { + dprintk("nfsd: couldn't start rpciod for callbacks!\n"); + goto out_clnt; + } + + /* the task holds a reference to the nfs4_client struct */ + cb->cb_client = clnt; + atomic_inc(&clp->cl_count); + + msg.rpc_cred = nfsd4_lookupcred(clp,0); + if (IS_ERR(msg.rpc_cred)) + goto out_rpciod; + status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL); + put_rpccred(msg.rpc_cred); + + if (status != 0) { + dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); + goto out_rpciod; + } + return; + +out_rpciod: + atomic_dec(&clp->cl_count); + rpciod_down(); +out_clnt: + rpc_shutdown_client(clnt); + goto out_err; +out_xprt: + xprt_destroy(xprt); +out_err: + dprintk("NFSD: warning: no callback path to client %.*s\n", + (int)clp->cl_name.len, clp->cl_name.data); + cb->cb_client = NULL; +} + +static void +nfs4_cb_null(struct rpc_task *task) +{ + struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; + struct nfs4_callback *cb = &clp->cl_callback; + u32 addr = htonl(cb->cb_addr); + + dprintk("NFSD: nfs4_cb_null task->tk_status %d\n", task->tk_status); + + if (task->tk_status < 0) { + dprintk("NFSD: callback establishment to client %.*s failed\n", + (int)clp->cl_name.len, clp->cl_name.data); + goto out; + } + atomic_set(&cb->cb_set, 1); + dprintk("NFSD: callback set to client %u.%u.%u.%u\n", NIPQUAD(addr)); +out: + put_nfs4_client(clp); +} + +/* + * called with dp->dl_count inc'ed. + * nfs4_lock_state() may or may not have been called. + */ +void +nfsd4_cb_recall(struct nfs4_delegation *dp) +{ + struct nfs4_client *clp = dp->dl_client; + struct rpc_clnt *clnt = clp->cl_callback.cb_client; + struct nfs4_cb_recall *cbr = &dp->dl_recall; + struct rpc_message msg = { + .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], + .rpc_argp = cbr, + }; + int retries = 1; + int status = 0; + + if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt) + return; + + msg.rpc_cred = nfsd4_lookupcred(clp, 0); + if (IS_ERR(msg.rpc_cred)) + goto out; + + cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ + cbr->cbr_dp = dp; + + status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); + while (retries--) { + switch (status) { + case -EIO: + /* Network partition? */ + case -EBADHANDLE: + case -NFS4ERR_BAD_STATEID: + /* Race: client probably got cb_recall + * before open reply granting delegation */ + break; + default: + goto out_put_cred; + } + ssleep(2); + status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT); + } +out_put_cred: + put_rpccred(msg.rpc_cred); +out: + if (status == -EIO) + atomic_set(&clp->cl_callback.cb_set, 0); + /* Success or failure, now we're either waiting for lease expiration + * or deleg_return. */ + dprintk("NFSD: nfs4_cb_recall: dp %p dl_flock %p dl_count %d\n",dp, dp->dl_flock, atomic_read(&dp->dl_count)); + nfs4_put_delegation(dp); + return; +} diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c new file mode 100644 index 00000000000..4ba540841cf --- /dev/null +++ b/fs/nfsd/nfs4idmap.c @@ -0,0 +1,588 @@ +/* + * fs/nfsd/nfs4idmap.c + * + * Mapping of UID/GIDs to name and vice versa. + * + * Copyright (c) 2002, 2003 The Regents of the University of + * Michigan. All rights reserved. + * + * Marius Aamodt Eriksen <marius@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> + +#include <linux/mm.h> +#include <linux/utsname.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfs.h> +#include <linux/nfs4.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <linux/smp_lock.h> +#include <linux/sunrpc/cache.h> +#include <linux/nfsd_idmap.h> +#include <linux/list.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <linux/seq_file.h> +#include <linux/sunrpc/svcauth.h> + +/* + * Cache entry + */ + +/* + * XXX we know that IDMAP_NAMESZ < PAGE_SIZE, but it's ugly to rely on + * that. + */ + +#define IDMAP_TYPE_USER 0 +#define IDMAP_TYPE_GROUP 1 + +struct ent { + struct cache_head h; + int type; /* User / Group */ + uid_t id; + char name[IDMAP_NAMESZ]; + char authname[IDMAP_NAMESZ]; +}; + +#define DefineSimpleCacheLookupMap(STRUCT, FUNC) \ + DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ + (struct STRUCT *item, int set), /*no setup */, \ + & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ + STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0) + +/* Common entry handling */ + +#define ENT_HASHBITS 8 +#define ENT_HASHMAX (1 << ENT_HASHBITS) +#define ENT_HASHMASK (ENT_HASHMAX - 1) + +static inline void +ent_init(struct ent *new, struct ent *itm) +{ + new->id = itm->id; + new->type = itm->type; + + strlcpy(new->name, itm->name, sizeof(new->name)); + strlcpy(new->authname, itm->authname, sizeof(new->name)); +} + +static inline void +ent_update(struct ent *new, struct ent *itm) +{ + ent_init(new, itm); +} + +void +ent_put(struct cache_head *ch, struct cache_detail *cd) +{ + if (cache_put(ch, cd)) { + struct ent *map = container_of(ch, struct ent, h); + kfree(map); + } +} + +/* + * ID -> Name cache + */ + +static struct cache_head *idtoname_table[ENT_HASHMAX]; + +static uint32_t +idtoname_hash(struct ent *ent) +{ + uint32_t hash; + + hash = hash_str(ent->authname, ENT_HASHBITS); + hash = hash_long(hash ^ ent->id, ENT_HASHBITS); + + /* Flip LSB for user/group */ + if (ent->type == IDMAP_TYPE_GROUP) + hash ^= 1; + + return hash; +} + +static void +idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, + int *blen) +{ + struct ent *ent = container_of(ch, struct ent, h); + char idstr[11]; + + qword_add(bpp, blen, ent->authname); + snprintf(idstr, sizeof(idstr), "%d", ent->id); + qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user"); + qword_add(bpp, blen, idstr); + + (*bpp)[-1] = '\n'; +} + +static inline int +idtoname_match(struct ent *a, struct ent *b) +{ + return (a->id == b->id && a->type == b->type && + strcmp(a->authname, b->authname) == 0); +} + +static int +idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) +{ + struct ent *ent; + + if (h == NULL) { + seq_puts(m, "#domain type id [name]\n"); + return 0; + } + ent = container_of(h, struct ent, h); + seq_printf(m, "%s %s %d", ent->authname, + ent->type == IDMAP_TYPE_GROUP ? "group" : "user", + ent->id); + if (test_bit(CACHE_VALID, &h->flags)) + seq_printf(m, " %s", ent->name); + seq_printf(m, "\n"); + return 0; +} + +static void +warn_no_idmapd(struct cache_detail *detail) +{ + printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n", + detail->last_close? "died" : "not been started"); +} + + +static int idtoname_parse(struct cache_detail *, char *, int); +static struct ent *idtoname_lookup(struct ent *, int); + +struct cache_detail idtoname_cache = { + .hash_size = ENT_HASHMAX, + .hash_table = idtoname_table, + .name = "nfs4.idtoname", + .cache_put = ent_put, + .cache_request = idtoname_request, + .cache_parse = idtoname_parse, + .cache_show = idtoname_show, + .warn_no_listener = warn_no_idmapd, +}; + +int +idtoname_parse(struct cache_detail *cd, char *buf, int buflen) +{ + struct ent ent, *res; + char *buf1, *bp; + int error = -EINVAL; + + if (buf[buflen - 1] != '\n') + return (-EINVAL); + buf[buflen - 1]= '\0'; + + buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (buf1 == NULL) + return (-ENOMEM); + + memset(&ent, 0, sizeof(ent)); + + /* Authentication name */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + memcpy(ent.authname, buf1, sizeof(ent.authname)); + + /* Type */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + ent.type = strcmp(buf1, "user") == 0 ? + IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; + + /* ID */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + ent.id = simple_strtoul(buf1, &bp, 10); + if (bp == buf1) + goto out; + + /* expiry */ + ent.h.expiry_time = get_expiry(&buf); + if (ent.h.expiry_time == 0) + goto out; + + /* Name */ + error = qword_get(&buf, buf1, PAGE_SIZE); + if (error == -EINVAL) + goto out; + if (error == -ENOENT) + set_bit(CACHE_NEGATIVE, &ent.h.flags); + else { + if (error >= IDMAP_NAMESZ) { + error = -EINVAL; + goto out; + } + memcpy(ent.name, buf1, sizeof(ent.name)); + } + error = -ENOMEM; + if ((res = idtoname_lookup(&ent, 1)) == NULL) + goto out; + + ent_put(&res->h, &idtoname_cache); + + error = 0; +out: + kfree(buf1); + + return error; +} + +static DefineSimpleCacheLookupMap(ent, idtoname); + +/* + * Name -> ID cache + */ + +static struct cache_head *nametoid_table[ENT_HASHMAX]; + +static inline int +nametoid_hash(struct ent *ent) +{ + return hash_str(ent->name, ENT_HASHBITS); +} + +void +nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, + int *blen) +{ + struct ent *ent = container_of(ch, struct ent, h); + + qword_add(bpp, blen, ent->authname); + qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user"); + qword_add(bpp, blen, ent->name); + + (*bpp)[-1] = '\n'; +} + +static inline int +nametoid_match(struct ent *a, struct ent *b) +{ + return (a->type == b->type && strcmp(a->name, b->name) == 0 && + strcmp(a->authname, b->authname) == 0); +} + +static int +nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) +{ + struct ent *ent; + + if (h == NULL) { + seq_puts(m, "#domain type name [id]\n"); + return 0; + } + ent = container_of(h, struct ent, h); + seq_printf(m, "%s %s %s", ent->authname, + ent->type == IDMAP_TYPE_GROUP ? "group" : "user", + ent->name); + if (test_bit(CACHE_VALID, &h->flags)) + seq_printf(m, " %d", ent->id); + seq_printf(m, "\n"); + return 0; +} + +static struct ent *nametoid_lookup(struct ent *, int); +int nametoid_parse(struct cache_detail *, char *, int); + +struct cache_detail nametoid_cache = { + .hash_size = ENT_HASHMAX, + .hash_table = nametoid_table, + .name = "nfs4.nametoid", + .cache_put = ent_put, + .cache_request = nametoid_request, + .cache_parse = nametoid_parse, + .cache_show = nametoid_show, + .warn_no_listener = warn_no_idmapd, +}; + +int +nametoid_parse(struct cache_detail *cd, char *buf, int buflen) +{ + struct ent ent, *res; + char *buf1; + int error = -EINVAL; + + if (buf[buflen - 1] != '\n') + return (-EINVAL); + buf[buflen - 1]= '\0'; + + buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (buf1 == NULL) + return (-ENOMEM); + + memset(&ent, 0, sizeof(ent)); + + /* Authentication name */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + memcpy(ent.authname, buf1, sizeof(ent.authname)); + + /* Type */ + if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) + goto out; + ent.type = strcmp(buf1, "user") == 0 ? + IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; + + /* Name */ + error = qword_get(&buf, buf1, PAGE_SIZE); + if (error <= 0 || error >= IDMAP_NAMESZ) + goto out; + memcpy(ent.name, buf1, sizeof(ent.name)); + + /* expiry */ + ent.h.expiry_time = get_expiry(&buf); + if (ent.h.expiry_time == 0) + goto out; + + /* ID */ + error = get_int(&buf, &ent.id); + if (error == -EINVAL) + goto out; + if (error == -ENOENT) + set_bit(CACHE_NEGATIVE, &ent.h.flags); + + error = -ENOMEM; + if ((res = nametoid_lookup(&ent, 1)) == NULL) + goto out; + + ent_put(&res->h, &nametoid_cache); + error = 0; +out: + kfree(buf1); + + return (error); +} + +static DefineSimpleCacheLookupMap(ent, nametoid); + +/* + * Exported API + */ + +void +nfsd_idmap_init(void) +{ + cache_register(&idtoname_cache); + cache_register(&nametoid_cache); +} + +void +nfsd_idmap_shutdown(void) +{ + cache_unregister(&idtoname_cache); + cache_unregister(&nametoid_cache); +} + +/* + * Deferred request handling + */ + +struct idmap_defer_req { + struct cache_req req; + struct cache_deferred_req deferred_req; + wait_queue_head_t waitq; + atomic_t count; +}; + +static inline void +put_mdr(struct idmap_defer_req *mdr) +{ + if (atomic_dec_and_test(&mdr->count)) + kfree(mdr); +} + +static inline void +get_mdr(struct idmap_defer_req *mdr) +{ + atomic_inc(&mdr->count); +} + +static void +idmap_revisit(struct cache_deferred_req *dreq, int toomany) +{ + struct idmap_defer_req *mdr = + container_of(dreq, struct idmap_defer_req, deferred_req); + + wake_up(&mdr->waitq); + put_mdr(mdr); +} + +static struct cache_deferred_req * +idmap_defer(struct cache_req *req) +{ + struct idmap_defer_req *mdr = + container_of(req, struct idmap_defer_req, req); + + mdr->deferred_req.revisit = idmap_revisit; + get_mdr(mdr); + return (&mdr->deferred_req); +} + +static inline int +do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *, int), struct ent *key, + struct cache_detail *detail, struct ent **item, + struct idmap_defer_req *mdr) +{ + *item = lookup_fn(key, 0); + if (!*item) + return -ENOMEM; + return cache_check(detail, &(*item)->h, &mdr->req); +} + +static inline int +do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *, int), + struct ent *key, struct cache_detail *detail, + struct ent **item) +{ + int ret = -ENOMEM; + + *item = lookup_fn(key, 0); + if (!*item) + goto out_err; + ret = -ETIMEDOUT; + if (!test_bit(CACHE_VALID, &(*item)->h.flags) + || (*item)->h.expiry_time < get_seconds() + || detail->flush_time > (*item)->h.last_refresh) + goto out_put; + ret = -ENOENT; + if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags)) + goto out_put; + return 0; +out_put: + ent_put(&(*item)->h, detail); +out_err: + *item = NULL; + return ret; +} + +static int +idmap_lookup(struct svc_rqst *rqstp, + struct ent *(*lookup_fn)(struct ent *, int), struct ent *key, + struct cache_detail *detail, struct ent **item) +{ + struct idmap_defer_req *mdr; + int ret; + + mdr = kmalloc(sizeof(*mdr), GFP_KERNEL); + if (!mdr) + return -ENOMEM; + memset(mdr, 0, sizeof(*mdr)); + atomic_set(&mdr->count, 1); + init_waitqueue_head(&mdr->waitq); + mdr->req.defer = idmap_defer; + ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr); + if (ret == -EAGAIN) { + wait_event_interruptible_timeout(mdr->waitq, + test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ); + ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item); + } + put_mdr(mdr); + return ret; +} + +static int +idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, + uid_t *id) +{ + struct ent *item, key = { + .type = type, + }; + int ret; + + if (namelen + 1 > sizeof(key.name)) + return -EINVAL; + memcpy(key.name, name, namelen); + key.name[namelen] = '\0'; + strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); + ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); + if (ret == -ENOENT) + ret = -ESRCH; /* nfserr_badname */ + if (ret) + return ret; + *id = item->id; + ent_put(&item->h, &nametoid_cache); + return 0; +} + +static int +idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) +{ + struct ent *item, key = { + .id = id, + .type = type, + }; + int ret; + + strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); + ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item); + if (ret == -ENOENT) + return sprintf(name, "%u", id); + if (ret) + return ret; + ret = strlen(item->name); + BUG_ON(ret > IDMAP_NAMESZ); + memcpy(name, item->name, ret); + ent_put(&item->h, &idtoname_cache); + return ret; +} + +int +nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, + __u32 *id) +{ + return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); +} + +int +nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, + __u32 *id) +{ + return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); +} + +int +nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +{ + return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); +} + +int +nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) +{ + return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); +} diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c new file mode 100644 index 00000000000..e8158741e8b --- /dev/null +++ b/fs/nfsd/nfs4proc.c @@ -0,0 +1,984 @@ +/* + * fs/nfsd/nfs4proc.c + * + * Server-side procedures for NFSv4. + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Note: some routines in this file are just trivial wrappers + * (e.g. nfsd4_lookup()) defined solely for the sake of consistent + * naming. Since all such routines have been declared "inline", + * there shouldn't be any associated overhead. At some point in + * the future, I might inline these "by hand" to clean up a + * little. + */ + +#include <linux/param.h> +#include <linux/major.h> +#include <linux/slab.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfs4.h> +#include <linux/nfsd/state.h> +#include <linux/nfsd/xdr4.h> +#include <linux/nfs4_acl.h> + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +static inline void +fh_dup2(struct svc_fh *dst, struct svc_fh *src) +{ + fh_put(dst); + dget(src->fh_dentry); + if (src->fh_export) + cache_get(&src->fh_export->h); + *dst = *src; +} + +static int +do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +{ + int accmode, status; + + if (open->op_truncate && + !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) + return nfserr_inval; + + accmode = MAY_NOP; + if (open->op_share_access & NFS4_SHARE_ACCESS_READ) + accmode = MAY_READ; + if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE) + accmode |= (MAY_WRITE | MAY_TRUNC); + accmode |= MAY_OWNER_OVERRIDE; + + status = fh_verify(rqstp, current_fh, S_IFREG, accmode); + + return status; +} + +static int +do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +{ + struct svc_fh resfh; + int status; + + fh_init(&resfh, NFS4_FHSIZE); + open->op_truncate = 0; + + if (open->op_create) { + /* + * Note: create modes (UNCHECKED,GUARDED...) are the same + * in NFSv4 as in v3. + */ + status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data, + open->op_fname.len, &open->op_iattr, + &resfh, open->op_createmode, + (u32 *)open->op_verf.data, &open->op_truncate); + } + else { + status = nfsd_lookup(rqstp, current_fh, + open->op_fname.data, open->op_fname.len, &resfh); + fh_unlock(current_fh); + } + + if (!status) { + set_change_info(&open->op_cinfo, current_fh); + + /* set reply cache */ + fh_dup2(current_fh, &resfh); + open->op_stateowner->so_replay.rp_openfh_len = + resfh.fh_handle.fh_size; + memcpy(open->op_stateowner->so_replay.rp_openfh, + &resfh.fh_handle.fh_base, + resfh.fh_handle.fh_size); + + status = do_open_permission(rqstp, current_fh, open); + } + + fh_put(&resfh); + return status; +} + +static int +do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +{ + int status; + + /* Only reclaims from previously confirmed clients are valid */ + if ((status = nfs4_check_open_reclaim(&open->op_clientid))) + return status; + + /* We don't know the target directory, and therefore can not + * set the change info + */ + + memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info)); + + /* set replay cache */ + open->op_stateowner->so_replay.rp_openfh_len = current_fh->fh_handle.fh_size; + memcpy(open->op_stateowner->so_replay.rp_openfh, + ¤t_fh->fh_handle.fh_base, + current_fh->fh_handle.fh_size); + + open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && + (open->op_iattr.ia_size == 0); + + status = do_open_permission(rqstp, current_fh, open); + + return status; +} + + +static inline int +nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +{ + int status; + dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", + (int)open->op_fname.len, open->op_fname.data, + open->op_stateowner); + + if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + return nfserr_grace; + + if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + return nfserr_no_grace; + + /* This check required by spec. */ + if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) + return nfserr_inval; + + nfs4_lock_state(); + + /* check seqid for replay. set nfs4_owner */ + status = nfsd4_process_open1(open); + if (status == NFSERR_REPLAY_ME) { + struct nfs4_replay *rp = &open->op_stateowner->so_replay; + fh_put(current_fh); + current_fh->fh_handle.fh_size = rp->rp_openfh_len; + memcpy(¤t_fh->fh_handle.fh_base, rp->rp_openfh, + rp->rp_openfh_len); + status = fh_verify(rqstp, current_fh, 0, MAY_NOP); + if (status) + dprintk("nfsd4_open: replay failed" + " restoring previous filehandle\n"); + else + status = NFSERR_REPLAY_ME; + } + if (status) + goto out; + switch (open->op_claim_type) { + case NFS4_OPEN_CLAIM_NULL: + /* + * (1) set CURRENT_FH to the file being opened, + * creating it if necessary, (2) set open->op_cinfo, + * (3) set open->op_truncate if the file is to be + * truncated after opening, (4) do permission checking. + */ + status = do_open_lookup(rqstp, current_fh, open); + if (status) + goto out; + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + /* + * The CURRENT_FH is already set to the file being + * opened. (1) set open->op_cinfo, (2) set + * open->op_truncate if the file is to be truncated + * after opening, (3) do permission checking. + */ + status = do_open_fhandle(rqstp, current_fh, open); + if (status) + goto out; + break; + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + printk("NFSD: unsupported OPEN claim type %d\n", + open->op_claim_type); + status = nfserr_notsupp; + goto out; + default: + printk("NFSD: Invalid OPEN claim type %d\n", + open->op_claim_type); + status = nfserr_inval; + goto out; + } + /* + * nfsd4_process_open2() does the actual opening of the file. If + * successful, it (1) truncates the file if open->op_truncate was + * set, (2) sets open->op_stateid, (3) sets open->op_delegation. + */ + status = nfsd4_process_open2(rqstp, current_fh, open); +out: + if (open->op_stateowner) + nfs4_get_stateowner(open->op_stateowner); + nfs4_unlock_state(); + return status; +} + +/* + * filehandle-manipulating ops. + */ +static inline int +nfsd4_getfh(struct svc_fh *current_fh, struct svc_fh **getfh) +{ + if (!current_fh->fh_dentry) + return nfserr_nofilehandle; + + *getfh = current_fh; + return nfs_ok; +} + +static inline int +nfsd4_putfh(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_putfh *putfh) +{ + fh_put(current_fh); + current_fh->fh_handle.fh_size = putfh->pf_fhlen; + memcpy(¤t_fh->fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen); + return fh_verify(rqstp, current_fh, 0, MAY_NOP); +} + +static inline int +nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh) +{ + int status; + + fh_put(current_fh); + status = exp_pseudoroot(rqstp->rq_client, current_fh, + &rqstp->rq_chandle); + if (!status) + status = nfserrno(nfsd_setuser(rqstp, current_fh->fh_export)); + return status; +} + +static inline int +nfsd4_restorefh(struct svc_fh *current_fh, struct svc_fh *save_fh) +{ + if (!save_fh->fh_dentry) + return nfserr_restorefh; + + fh_dup2(current_fh, save_fh); + return nfs_ok; +} + +static inline int +nfsd4_savefh(struct svc_fh *current_fh, struct svc_fh *save_fh) +{ + if (!current_fh->fh_dentry) + return nfserr_nofilehandle; + + fh_dup2(save_fh, current_fh); + return nfs_ok; +} + +/* + * misc nfsv4 ops + */ +static inline int +nfsd4_access(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_access *access) +{ + if (access->ac_req_access & ~NFS3_ACCESS_FULL) + return nfserr_inval; + + access->ac_resp_access = access->ac_req_access; + return nfsd_access(rqstp, current_fh, &access->ac_resp_access, &access->ac_supported); +} + +static inline int +nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_commit *commit) +{ + int status; + + u32 *p = (u32 *)commit->co_verf.data; + *p++ = nfssvc_boot.tv_sec; + *p++ = nfssvc_boot.tv_usec; + + status = nfsd_commit(rqstp, current_fh, commit->co_offset, commit->co_count); + if (status == nfserr_symlink) + status = nfserr_inval; + return status; +} + +static int +nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_create *create) +{ + struct svc_fh resfh; + int status; + dev_t rdev; + + fh_init(&resfh, NFS4_FHSIZE); + + status = fh_verify(rqstp, current_fh, S_IFDIR, MAY_CREATE); + if (status == nfserr_symlink) + status = nfserr_notdir; + if (status) + return status; + + switch (create->cr_type) { + case NF4LNK: + /* ugh! we have to null-terminate the linktext, or + * vfs_symlink() will choke. it is always safe to + * null-terminate by brute force, since at worst we + * will overwrite the first byte of the create namelen + * in the XDR buffer, which has already been extracted + * during XDR decode. + */ + create->cr_linkname[create->cr_linklen] = 0; + + status = nfsd_symlink(rqstp, current_fh, create->cr_name, + create->cr_namelen, create->cr_linkname, + create->cr_linklen, &resfh, &create->cr_iattr); + break; + + case NF4BLK: + rdev = MKDEV(create->cr_specdata1, create->cr_specdata2); + if (MAJOR(rdev) != create->cr_specdata1 || + MINOR(rdev) != create->cr_specdata2) + return nfserr_inval; + status = nfsd_create(rqstp, current_fh, create->cr_name, + create->cr_namelen, &create->cr_iattr, + S_IFBLK, rdev, &resfh); + break; + + case NF4CHR: + rdev = MKDEV(create->cr_specdata1, create->cr_specdata2); + if (MAJOR(rdev) != create->cr_specdata1 || + MINOR(rdev) != create->cr_specdata2) + return nfserr_inval; + status = nfsd_create(rqstp, current_fh, create->cr_name, + create->cr_namelen, &create->cr_iattr, + S_IFCHR, rdev, &resfh); + break; + + case NF4SOCK: + status = nfsd_create(rqstp, current_fh, create->cr_name, + create->cr_namelen, &create->cr_iattr, + S_IFSOCK, 0, &resfh); + break; + + case NF4FIFO: + status = nfsd_create(rqstp, current_fh, create->cr_name, + create->cr_namelen, &create->cr_iattr, + S_IFIFO, 0, &resfh); + break; + + case NF4DIR: + create->cr_iattr.ia_valid &= ~ATTR_SIZE; + status = nfsd_create(rqstp, current_fh, create->cr_name, + create->cr_namelen, &create->cr_iattr, + S_IFDIR, 0, &resfh); + break; + + default: + status = nfserr_badtype; + } + + if (!status) { + fh_unlock(current_fh); + set_change_info(&create->cr_cinfo, current_fh); + fh_dup2(current_fh, &resfh); + } + + fh_put(&resfh); + return status; +} + +static inline int +nfsd4_getattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_getattr *getattr) +{ + int status; + + status = fh_verify(rqstp, current_fh, 0, MAY_NOP); + if (status) + return status; + + if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) + return nfserr_inval; + + getattr->ga_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; + getattr->ga_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; + + getattr->ga_fhp = current_fh; + return nfs_ok; +} + +static inline int +nfsd4_link(struct svc_rqst *rqstp, struct svc_fh *current_fh, + struct svc_fh *save_fh, struct nfsd4_link *link) +{ + int status = nfserr_nofilehandle; + + if (!save_fh->fh_dentry) + return status; + status = nfsd_link(rqstp, current_fh, link->li_name, link->li_namelen, save_fh); + if (!status) + set_change_info(&link->li_cinfo, current_fh); + return status; +} + +static int +nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh) +{ + struct svc_fh tmp_fh; + int ret; + + fh_init(&tmp_fh, NFS4_FHSIZE); + if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh, + &rqstp->rq_chandle)) != 0) + return ret; + if (tmp_fh.fh_dentry == current_fh->fh_dentry) { + fh_put(&tmp_fh); + return nfserr_noent; + } + fh_put(&tmp_fh); + return nfsd_lookup(rqstp, current_fh, "..", 2, current_fh); +} + +static inline int +nfsd4_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lookup *lookup) +{ + return nfsd_lookup(rqstp, current_fh, lookup->lo_name, lookup->lo_len, current_fh); +} + +static inline int +nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read) +{ + int status; + struct file *filp = NULL; + + /* no need to check permission - this will be done in nfsd_read() */ + + if (read->rd_offset >= OFFSET_MAX) + return nfserr_inval; + + nfs4_lock_state(); + /* check stateid */ + if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid, + CHECK_FH | RD_STATE, &filp))) { + dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); + goto out; + } + status = nfs_ok; +out: + nfs4_unlock_state(); + read->rd_rqstp = rqstp; + read->rd_fhp = current_fh; + read->rd_filp = filp; + return status; +} + +static inline int +nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readdir *readdir) +{ + u64 cookie = readdir->rd_cookie; + static const nfs4_verifier zeroverf; + + /* no need to check permission - this will be done in nfsd_readdir() */ + + if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) + return nfserr_inval; + + readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; + readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; + + if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || + (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) + return nfserr_bad_cookie; + + readdir->rd_rqstp = rqstp; + readdir->rd_fhp = current_fh; + return nfs_ok; +} + +static inline int +nfsd4_readlink(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readlink *readlink) +{ + readlink->rl_rqstp = rqstp; + readlink->rl_fhp = current_fh; + return nfs_ok; +} + +static inline int +nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_remove *remove) +{ + int status; + + status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen); + if (status == nfserr_symlink) + return nfserr_notdir; + if (!status) { + fh_unlock(current_fh); + set_change_info(&remove->rm_cinfo, current_fh); + } + return status; +} + +static inline int +nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh, + struct svc_fh *save_fh, struct nfsd4_rename *rename) +{ + int status = nfserr_nofilehandle; + + if (!save_fh->fh_dentry) + return status; + status = nfsd_rename(rqstp, save_fh, rename->rn_sname, + rename->rn_snamelen, current_fh, + rename->rn_tname, rename->rn_tnamelen); + + /* the underlying filesystem returns different error's than required + * by NFSv4. both save_fh and current_fh have been verified.. */ + if (status == nfserr_isdir) + status = nfserr_exist; + else if ((status == nfserr_notdir) && + (S_ISDIR(save_fh->fh_dentry->d_inode->i_mode) && + S_ISDIR(current_fh->fh_dentry->d_inode->i_mode))) + status = nfserr_exist; + else if (status == nfserr_symlink) + status = nfserr_notdir; + + if (!status) { + set_change_info(&rename->rn_sinfo, current_fh); + set_change_info(&rename->rn_tinfo, save_fh); + } + return status; +} + +static inline int +nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr) +{ + int status = nfs_ok; + + if (!current_fh->fh_dentry) + return nfserr_nofilehandle; + + status = nfs_ok; + if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { + nfs4_lock_state(); + if ((status = nfs4_preprocess_stateid_op(current_fh, + &setattr->sa_stateid, + CHECK_FH | WR_STATE, NULL))) { + dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); + goto out_unlock; + } + nfs4_unlock_state(); + } + status = nfs_ok; + if (setattr->sa_acl != NULL) + status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl); + if (status) + goto out; + status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr, + 0, (time_t)0); +out: + return status; +out_unlock: + nfs4_unlock_state(); + return status; +} + +static inline int +nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_write *write) +{ + stateid_t *stateid = &write->wr_stateid; + struct file *filp = NULL; + u32 *p; + int status = nfs_ok; + + /* no need to check permission - this will be done in nfsd_write() */ + + if (write->wr_offset >= OFFSET_MAX) + return nfserr_inval; + + nfs4_lock_state(); + if ((status = nfs4_preprocess_stateid_op(current_fh, stateid, + CHECK_FH | WR_STATE, &filp))) { + dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); + goto out; + } + nfs4_unlock_state(); + + write->wr_bytes_written = write->wr_buflen; + write->wr_how_written = write->wr_stable_how; + p = (u32 *)write->wr_verifier.data; + *p++ = nfssvc_boot.tv_sec; + *p++ = nfssvc_boot.tv_usec; + + status = nfsd_write(rqstp, current_fh, filp, write->wr_offset, + write->wr_vec, write->wr_vlen, write->wr_buflen, + &write->wr_how_written); + + if (status == nfserr_symlink) + status = nfserr_inval; + return status; +out: + nfs4_unlock_state(); + return status; +} + +/* This routine never returns NFS_OK! If there are no other errors, it + * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the + * attributes matched. VERIFY is implemented by mapping NFSERR_SAME + * to NFS_OK after the call; NVERIFY by mapping NFSERR_NOT_SAME to NFS_OK. + */ +static int +nfsd4_verify(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_verify *verify) +{ + u32 *buf, *p; + int count; + int status; + + status = fh_verify(rqstp, current_fh, 0, MAY_NOP); + if (status) + return status; + + if ((verify->ve_bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) + || (verify->ve_bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) + return nfserr_attrnotsupp; + if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) + || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) + return nfserr_inval; + if (verify->ve_attrlen & 3) + return nfserr_inval; + + /* count in words: + * bitmap_len(1) + bitmap(2) + attr_len(1) = 4 + */ + count = 4 + (verify->ve_attrlen >> 2); + buf = kmalloc(count << 2, GFP_KERNEL); + if (!buf) + return nfserr_resource; + + status = nfsd4_encode_fattr(current_fh, current_fh->fh_export, + current_fh->fh_dentry, buf, + &count, verify->ve_bmval, + rqstp); + + /* this means that nfsd4_encode_fattr() ran out of space */ + if (status == nfserr_resource && count == 0) + status = nfserr_not_same; + if (status) + goto out_kfree; + + p = buf + 3; + status = nfserr_not_same; + if (ntohl(*p++) != verify->ve_attrlen) + goto out_kfree; + if (!memcmp(p, verify->ve_attrval, verify->ve_attrlen)) + status = nfserr_same; + +out_kfree: + kfree(buf); + return status; +} + +/* + * NULL call. + */ +static int +nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + + +/* + * COMPOUND call. + */ +static int +nfsd4_proc_compound(struct svc_rqst *rqstp, + struct nfsd4_compoundargs *args, + struct nfsd4_compoundres *resp) +{ + struct nfsd4_op *op; + struct svc_fh *current_fh = NULL; + struct svc_fh *save_fh = NULL; + struct nfs4_stateowner *replay_owner = NULL; + int slack_space; /* in words, not bytes! */ + int status; + + status = nfserr_resource; + current_fh = kmalloc(sizeof(*current_fh), GFP_KERNEL); + if (current_fh == NULL) + goto out; + fh_init(current_fh, NFS4_FHSIZE); + save_fh = kmalloc(sizeof(*save_fh), GFP_KERNEL); + if (save_fh == NULL) + goto out; + fh_init(save_fh, NFS4_FHSIZE); + + resp->xbuf = &rqstp->rq_res; + resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; + resp->tagp = resp->p; + /* reserve space for: taglen, tag, and opcnt */ + resp->p += 2 + XDR_QUADLEN(args->taglen); + resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE; + resp->taglen = args->taglen; + resp->tag = args->tag; + resp->opcnt = 0; + resp->rqstp = rqstp; + + /* + * According to RFC3010, this takes precedence over all other errors. + */ + status = nfserr_minor_vers_mismatch; + if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) + goto out; + + status = nfs_ok; + while (!status && resp->opcnt < args->opcnt) { + op = &args->ops[resp->opcnt++]; + + /* + * The XDR decode routines may have pre-set op->status; + * for example, if there is a miscellaneous XDR error + * it will be set to nfserr_bad_xdr. + */ + if (op->status) + goto encode_op; + + /* We must be able to encode a successful response to + * this operation, with enough room left over to encode a + * failed response to the next operation. If we don't + * have enough room, fail with ERR_RESOURCE. + */ +/* FIXME - is slack_space *really* words, or bytes??? - neilb */ + slack_space = (char *)resp->end - (char *)resp->p; + if (slack_space < COMPOUND_SLACK_SPACE + COMPOUND_ERR_SLACK_SPACE) { + BUG_ON(slack_space < COMPOUND_ERR_SLACK_SPACE); + op->status = nfserr_resource; + goto encode_op; + } + + /* All operations except RENEW, SETCLIENTID, RESTOREFH + * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH + * require a valid current filehandle + * + * SETATTR NOFILEHANDLE error handled in nfsd4_setattr + * due to required returned bitmap argument + */ + if ((!current_fh->fh_dentry) && + !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) || + (op->opnum == OP_SETCLIENTID) || + (op->opnum == OP_SETCLIENTID_CONFIRM) || + (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) || + (op->opnum == OP_RELEASE_LOCKOWNER) || + (op->opnum == OP_SETATTR))) { + op->status = nfserr_nofilehandle; + goto encode_op; + } + switch (op->opnum) { + case OP_ACCESS: + op->status = nfsd4_access(rqstp, current_fh, &op->u.access); + break; + case OP_CLOSE: + op->status = nfsd4_close(rqstp, current_fh, &op->u.close); + replay_owner = op->u.close.cl_stateowner; + break; + case OP_COMMIT: + op->status = nfsd4_commit(rqstp, current_fh, &op->u.commit); + break; + case OP_CREATE: + op->status = nfsd4_create(rqstp, current_fh, &op->u.create); + break; + case OP_DELEGRETURN: + op->status = nfsd4_delegreturn(rqstp, current_fh, &op->u.delegreturn); + break; + case OP_GETATTR: + op->status = nfsd4_getattr(rqstp, current_fh, &op->u.getattr); + break; + case OP_GETFH: + op->status = nfsd4_getfh(current_fh, &op->u.getfh); + break; + case OP_LINK: + op->status = nfsd4_link(rqstp, current_fh, save_fh, &op->u.link); + break; + case OP_LOCK: + op->status = nfsd4_lock(rqstp, current_fh, &op->u.lock); + replay_owner = op->u.lock.lk_stateowner; + break; + case OP_LOCKT: + op->status = nfsd4_lockt(rqstp, current_fh, &op->u.lockt); + break; + case OP_LOCKU: + op->status = nfsd4_locku(rqstp, current_fh, &op->u.locku); + replay_owner = op->u.locku.lu_stateowner; + break; + case OP_LOOKUP: + op->status = nfsd4_lookup(rqstp, current_fh, &op->u.lookup); + break; + case OP_LOOKUPP: + op->status = nfsd4_lookupp(rqstp, current_fh); + break; + case OP_NVERIFY: + op->status = nfsd4_verify(rqstp, current_fh, &op->u.nverify); + if (op->status == nfserr_not_same) + op->status = nfs_ok; + break; + case OP_OPEN: + op->status = nfsd4_open(rqstp, current_fh, &op->u.open); + replay_owner = op->u.open.op_stateowner; + break; + case OP_OPEN_CONFIRM: + op->status = nfsd4_open_confirm(rqstp, current_fh, &op->u.open_confirm); + replay_owner = op->u.open_confirm.oc_stateowner; + break; + case OP_OPEN_DOWNGRADE: + op->status = nfsd4_open_downgrade(rqstp, current_fh, &op->u.open_downgrade); + replay_owner = op->u.open_downgrade.od_stateowner; + break; + case OP_PUTFH: + op->status = nfsd4_putfh(rqstp, current_fh, &op->u.putfh); + break; + case OP_PUTROOTFH: + op->status = nfsd4_putrootfh(rqstp, current_fh); + break; + case OP_READ: + op->status = nfsd4_read(rqstp, current_fh, &op->u.read); + break; + case OP_READDIR: + op->status = nfsd4_readdir(rqstp, current_fh, &op->u.readdir); + break; + case OP_READLINK: + op->status = nfsd4_readlink(rqstp, current_fh, &op->u.readlink); + break; + case OP_REMOVE: + op->status = nfsd4_remove(rqstp, current_fh, &op->u.remove); + break; + case OP_RENAME: + op->status = nfsd4_rename(rqstp, current_fh, save_fh, &op->u.rename); + break; + case OP_RENEW: + op->status = nfsd4_renew(&op->u.renew); + break; + case OP_RESTOREFH: + op->status = nfsd4_restorefh(current_fh, save_fh); + break; + case OP_SAVEFH: + op->status = nfsd4_savefh(current_fh, save_fh); + break; + case OP_SETATTR: + op->status = nfsd4_setattr(rqstp, current_fh, &op->u.setattr); + break; + case OP_SETCLIENTID: + op->status = nfsd4_setclientid(rqstp, &op->u.setclientid); + break; + case OP_SETCLIENTID_CONFIRM: + op->status = nfsd4_setclientid_confirm(rqstp, &op->u.setclientid_confirm); + break; + case OP_VERIFY: + op->status = nfsd4_verify(rqstp, current_fh, &op->u.verify); + if (op->status == nfserr_same) + op->status = nfs_ok; + break; + case OP_WRITE: + op->status = nfsd4_write(rqstp, current_fh, &op->u.write); + break; + case OP_RELEASE_LOCKOWNER: + op->status = nfsd4_release_lockowner(rqstp, &op->u.release_lockowner); + break; + default: + BUG_ON(op->status == nfs_ok); + break; + } + +encode_op: + if (op->status == NFSERR_REPLAY_ME) { + op->replay = &replay_owner->so_replay; + nfsd4_encode_replay(resp, op); + status = op->status = op->replay->rp_status; + } else { + nfsd4_encode_operation(resp, op); + status = op->status; + } + if (replay_owner && (replay_owner != (void *)(-1))) { + nfs4_put_stateowner(replay_owner); + replay_owner = NULL; + } + } + +out: + nfsd4_release_compoundargs(args); + if (current_fh) + fh_put(current_fh); + kfree(current_fh); + if (save_fh) + fh_put(save_fh); + kfree(save_fh); + return status; +} + +#define nfs4svc_decode_voidargs NULL +#define nfs4svc_release_void NULL +#define nfsd4_voidres nfsd4_voidargs +#define nfs4svc_release_compound NULL +struct nfsd4_voidargs { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsd4_proc_##name, \ + (kxdrproc_t) nfs4svc_decode_##argt##args, \ + (kxdrproc_t) nfs4svc_encode_##rest##res, \ + (kxdrproc_t) nfs4svc_release_##relt, \ + sizeof(struct nfsd4_##argt##args), \ + sizeof(struct nfsd4_##rest##res), \ + 0, \ + cache, \ + respsize, \ + } + +/* + * TODO: At the present time, the NFSv4 server does not do XID caching + * of requests. Implementing XID caching would not be a serious problem, + * although it would require a mild change in interfaces since one + * doesn't know whether an NFSv4 request is idempotent until after the + * XDR decode. However, XID caching totally confuses pynfs (Peter + * Astrand's regression testsuite for NFSv4 servers), which reuses + * XID's liberally, so I've left it unimplemented until pynfs generates + * better XID's. + */ +static struct svc_procedure nfsd_procedures4[2] = { + PROC(null, void, void, void, RC_NOCACHE, 1), + PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE) +}; + +struct svc_version nfsd_version4 = { + .vs_vers = 4, + .vs_nproc = 2, + .vs_proc = nfsd_procedures4, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS4_SVC_XDRSIZE, +}; + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c new file mode 100644 index 00000000000..579f7fea796 --- /dev/null +++ b/fs/nfsd/nfs4state.c @@ -0,0 +1,3320 @@ +/* +* linux/fs/nfsd/nfs4state.c +* +* Copyright (c) 2001 The Regents of the University of Michigan. +* All rights reserved. +* +* Kendrick Smith <kmsmith@umich.edu> +* Andy Adamson <kandros@umich.edu> +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* 3. Neither the name of the University nor the names of its +* contributors may be used to endorse or promote products derived +* from this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED +* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +* +*/ + +#include <linux/param.h> +#include <linux/major.h> +#include <linux/slab.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/mount.h> +#include <linux/workqueue.h> +#include <linux/smp_lock.h> +#include <linux/kthread.h> +#include <linux/nfs4.h> +#include <linux/nfsd/state.h> +#include <linux/nfsd/xdr4.h> + +#define NFSDDBG_FACILITY NFSDDBG_PROC + +/* Globals */ +static time_t lease_time = 90; /* default lease time */ +static time_t old_lease_time = 90; /* past incarnation lease time */ +static u32 nfs4_reclaim_init = 0; +time_t boot_time; +static time_t grace_end = 0; +static u32 current_clientid = 1; +static u32 current_ownerid = 1; +static u32 current_fileid = 1; +static u32 current_delegid = 1; +static u32 nfs4_init; +stateid_t zerostateid; /* bits all 0 */ +stateid_t onestateid; /* bits all 1 */ + +/* debug counters */ +u32 list_add_perfile = 0; +u32 list_del_perfile = 0; +u32 add_perclient = 0; +u32 del_perclient = 0; +u32 alloc_file = 0; +u32 free_file = 0; +u32 vfsopen = 0; +u32 vfsclose = 0; +u32 alloc_delegation= 0; +u32 free_delegation= 0; + +/* forward declarations */ +struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); +static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); +static void release_stateid_lockowners(struct nfs4_stateid *open_stp); + +/* Locking: + * + * client_sema: + * protects clientid_hashtbl[], clientstr_hashtbl[], + * unconfstr_hashtbl[], uncofid_hashtbl[]. + */ +static DECLARE_MUTEX(client_sema); + +void +nfs4_lock_state(void) +{ + down(&client_sema); +} + +void +nfs4_unlock_state(void) +{ + up(&client_sema); +} + +static inline u32 +opaque_hashval(const void *ptr, int nbytes) +{ + unsigned char *cptr = (unsigned char *) ptr; + + u32 x = 0; + while (nbytes--) { + x *= 37; + x += *cptr++; + } + return x; +} + +/* forward declarations */ +static void release_stateowner(struct nfs4_stateowner *sop); +static void release_stateid(struct nfs4_stateid *stp, int flags); +static void release_file(struct nfs4_file *fp); + +/* + * Delegation state + */ + +/* recall_lock protects the del_recall_lru */ +spinlock_t recall_lock; +static struct list_head del_recall_lru; + +static struct nfs4_delegation * +alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) +{ + struct nfs4_delegation *dp; + struct nfs4_file *fp = stp->st_file; + struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; + + dprintk("NFSD alloc_init_deleg\n"); + if ((dp = kmalloc(sizeof(struct nfs4_delegation), + GFP_KERNEL)) == NULL) + return dp; + INIT_LIST_HEAD(&dp->dl_del_perfile); + INIT_LIST_HEAD(&dp->dl_del_perclnt); + INIT_LIST_HEAD(&dp->dl_recall_lru); + dp->dl_client = clp; + dp->dl_file = fp; + dp->dl_flock = NULL; + get_file(stp->st_vfs_file); + dp->dl_vfs_file = stp->st_vfs_file; + dp->dl_type = type; + dp->dl_recall.cbr_dp = NULL; + dp->dl_recall.cbr_ident = cb->cb_ident; + dp->dl_recall.cbr_trunc = 0; + dp->dl_stateid.si_boot = boot_time; + dp->dl_stateid.si_stateownerid = current_delegid++; + dp->dl_stateid.si_fileid = 0; + dp->dl_stateid.si_generation = 0; + dp->dl_fhlen = current_fh->fh_handle.fh_size; + memcpy(dp->dl_fhval, ¤t_fh->fh_handle.fh_base, + current_fh->fh_handle.fh_size); + dp->dl_time = 0; + atomic_set(&dp->dl_count, 1); + list_add(&dp->dl_del_perfile, &fp->fi_del_perfile); + list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt); + alloc_delegation++; + return dp; +} + +void +nfs4_put_delegation(struct nfs4_delegation *dp) +{ + if (atomic_dec_and_test(&dp->dl_count)) { + dprintk("NFSD: freeing dp %p\n",dp); + kfree(dp); + free_delegation++; + } +} + +/* Remove the associated file_lock first, then remove the delegation. + * lease_modify() is called to remove the FS_LEASE file_lock from + * the i_flock list, eventually calling nfsd's lock_manager + * fl_release_callback. + */ +static void +nfs4_close_delegation(struct nfs4_delegation *dp) +{ + struct file *filp = dp->dl_vfs_file; + + dprintk("NFSD: close_delegation dp %p\n",dp); + dp->dl_vfs_file = NULL; + /* The following nfsd_close may not actually close the file, + * but we want to remove the lease in any case. */ + setlease(filp, F_UNLCK, &dp->dl_flock); + nfsd_close(filp); + vfsclose++; +} + +/* Called under the state lock. */ +static void +unhash_delegation(struct nfs4_delegation *dp) +{ + list_del_init(&dp->dl_del_perfile); + list_del_init(&dp->dl_del_perclnt); + spin_lock(&recall_lock); + list_del_init(&dp->dl_recall_lru); + spin_unlock(&recall_lock); + nfs4_close_delegation(dp); + nfs4_put_delegation(dp); +} + +/* + * SETCLIENTID state + */ + +/* Hash tables for nfs4_clientid state */ +#define CLIENT_HASH_BITS 4 +#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS) +#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1) + +#define clientid_hashval(id) \ + ((id) & CLIENT_HASH_MASK) +#define clientstr_hashval(name, namelen) \ + (opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK) +/* + * reclaim_str_hashtbl[] holds known client info from previous reset/reboot + * used in reboot/reset lease grace period processing + * + * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed + * setclientid_confirmed info. + * + * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed + * setclientid info. + * + * client_lru holds client queue ordered by nfs4_client.cl_time + * for lease renewal. + * + * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time + * for last close replay. + */ +static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE]; +static int reclaim_str_hashtbl_size = 0; +static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; +static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; +static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; +static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; +static struct list_head client_lru; +static struct list_head close_lru; + +static inline void +renew_client(struct nfs4_client *clp) +{ + /* + * Move client to the end to the LRU list. + */ + dprintk("renewing client (clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, + clp->cl_clientid.cl_id); + list_move_tail(&clp->cl_lru, &client_lru); + clp->cl_time = get_seconds(); +} + +/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ +static int +STALE_CLIENTID(clientid_t *clid) +{ + if (clid->cl_boot == boot_time) + return 0; + dprintk("NFSD stale clientid (%08x/%08x)\n", + clid->cl_boot, clid->cl_id); + return 1; +} + +/* + * XXX Should we use a slab cache ? + * This type of memory management is somewhat inefficient, but we use it + * anyway since SETCLIENTID is not a common operation. + */ +static inline struct nfs4_client * +alloc_client(struct xdr_netobj name) +{ + struct nfs4_client *clp; + + if ((clp = kmalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) { + memset(clp, 0, sizeof(*clp)); + if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) { + memcpy(clp->cl_name.data, name.data, name.len); + clp->cl_name.len = name.len; + } + else { + kfree(clp); + clp = NULL; + } + } + return clp; +} + +static inline void +free_client(struct nfs4_client *clp) +{ + if (clp->cl_cred.cr_group_info) + put_group_info(clp->cl_cred.cr_group_info); + kfree(clp->cl_name.data); + kfree(clp); +} + +void +put_nfs4_client(struct nfs4_client *clp) +{ + if (atomic_dec_and_test(&clp->cl_count)) + free_client(clp); +} + +static void +expire_client(struct nfs4_client *clp) +{ + struct nfs4_stateowner *sop; + struct nfs4_delegation *dp; + struct nfs4_callback *cb = &clp->cl_callback; + struct rpc_clnt *clnt = clp->cl_callback.cb_client; + struct list_head reaplist; + + dprintk("NFSD: expire_client cl_count %d\n", + atomic_read(&clp->cl_count)); + + /* shutdown rpc client, ending any outstanding recall rpcs */ + if (atomic_read(&cb->cb_set) == 1 && clnt) { + rpc_shutdown_client(clnt); + clnt = clp->cl_callback.cb_client = NULL; + } + + INIT_LIST_HEAD(&reaplist); + spin_lock(&recall_lock); + while (!list_empty(&clp->cl_del_perclnt)) { + dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt); + dprintk("NFSD: expire client. dp %p, fp %p\n", dp, + dp->dl_flock); + list_del_init(&dp->dl_del_perclnt); + list_move(&dp->dl_recall_lru, &reaplist); + } + spin_unlock(&recall_lock); + while (!list_empty(&reaplist)) { + dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); + list_del_init(&dp->dl_recall_lru); + unhash_delegation(dp); + } + list_del(&clp->cl_idhash); + list_del(&clp->cl_strhash); + list_del(&clp->cl_lru); + while (!list_empty(&clp->cl_perclient)) { + sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient); + release_stateowner(sop); + } + put_nfs4_client(clp); +} + +static struct nfs4_client * +create_client(struct xdr_netobj name) { + struct nfs4_client *clp; + + if (!(clp = alloc_client(name))) + goto out; + atomic_set(&clp->cl_count, 1); + atomic_set(&clp->cl_callback.cb_set, 0); + clp->cl_callback.cb_parsed = 0; + INIT_LIST_HEAD(&clp->cl_idhash); + INIT_LIST_HEAD(&clp->cl_strhash); + INIT_LIST_HEAD(&clp->cl_perclient); + INIT_LIST_HEAD(&clp->cl_del_perclnt); + INIT_LIST_HEAD(&clp->cl_lru); +out: + return clp; +} + +static void +copy_verf(struct nfs4_client *target, nfs4_verifier *source) { + memcpy(target->cl_verifier.data, source->data, sizeof(target->cl_verifier.data)); +} + +static void +copy_clid(struct nfs4_client *target, struct nfs4_client *source) { + target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; + target->cl_clientid.cl_id = source->cl_clientid.cl_id; +} + +static void +copy_cred(struct svc_cred *target, struct svc_cred *source) { + + target->cr_uid = source->cr_uid; + target->cr_gid = source->cr_gid; + target->cr_group_info = source->cr_group_info; + get_group_info(target->cr_group_info); +} + +static int +cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) { + if (!n1 || !n2) + return 0; + return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len)); +} + +static int +cmp_verf(nfs4_verifier *v1, nfs4_verifier *v2) { + return(!memcmp(v1->data,v2->data,sizeof(v1->data))); +} + +static int +cmp_clid(clientid_t * cl1, clientid_t * cl2) { + return((cl1->cl_boot == cl2->cl_boot) && + (cl1->cl_id == cl2->cl_id)); +} + +/* XXX what about NGROUP */ +static int +cmp_creds(struct svc_cred *cr1, struct svc_cred *cr2){ + return(cr1->cr_uid == cr2->cr_uid); + +} + +static void +gen_clid(struct nfs4_client *clp) { + clp->cl_clientid.cl_boot = boot_time; + clp->cl_clientid.cl_id = current_clientid++; +} + +static void +gen_confirm(struct nfs4_client *clp) { + struct timespec tv; + u32 * p; + + tv = CURRENT_TIME; + p = (u32 *)clp->cl_confirm.data; + *p++ = tv.tv_sec; + *p++ = tv.tv_nsec; +} + +static int +check_name(struct xdr_netobj name) { + + if (name.len == 0) + return 0; + if (name.len > NFS4_OPAQUE_LIMIT) { + printk("NFSD: check_name: name too long(%d)!\n", name.len); + return 0; + } + return 1; +} + +void +add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) +{ + unsigned int idhashval; + + list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]); + idhashval = clientid_hashval(clp->cl_clientid.cl_id); + list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]); + list_add_tail(&clp->cl_lru, &client_lru); + clp->cl_time = get_seconds(); +} + +void +move_to_confirmed(struct nfs4_client *clp) +{ + unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); + unsigned int strhashval; + + dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp); + list_del_init(&clp->cl_strhash); + list_del_init(&clp->cl_idhash); + list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); + strhashval = clientstr_hashval(clp->cl_name.data, + clp->cl_name.len); + list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); + renew_client(clp); +} + +static struct nfs4_client * +find_confirmed_client(clientid_t *clid) +{ + struct nfs4_client *clp; + unsigned int idhashval = clientid_hashval(clid->cl_id); + + list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) { + if (cmp_clid(&clp->cl_clientid, clid)) + return clp; + } + return NULL; +} + +static struct nfs4_client * +find_unconfirmed_client(clientid_t *clid) +{ + struct nfs4_client *clp; + unsigned int idhashval = clientid_hashval(clid->cl_id); + + list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) { + if (cmp_clid(&clp->cl_clientid, clid)) + return clp; + } + return NULL; +} + +/* a helper function for parse_callback */ +static int +parse_octet(unsigned int *lenp, char **addrp) +{ + unsigned int len = *lenp; + char *p = *addrp; + int n = -1; + char c; + + for (;;) { + if (!len) + break; + len--; + c = *p++; + if (c == '.') + break; + if ((c < '0') || (c > '9')) { + n = -1; + break; + } + if (n < 0) + n = 0; + n = (n * 10) + (c - '0'); + if (n > 255) { + n = -1; + break; + } + } + *lenp = len; + *addrp = p; + return n; +} + +/* parse and set the setclientid ipv4 callback address */ +int +parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) +{ + int temp = 0; + u32 cbaddr = 0; + u16 cbport = 0; + u32 addrlen = addr_len; + char *addr = addr_val; + int i, shift; + + /* ipaddress */ + shift = 24; + for(i = 4; i > 0 ; i--) { + if ((temp = parse_octet(&addrlen, &addr)) < 0) { + return 0; + } + cbaddr |= (temp << shift); + if (shift > 0) + shift -= 8; + } + *cbaddrp = cbaddr; + + /* port */ + shift = 8; + for(i = 2; i > 0 ; i--) { + if ((temp = parse_octet(&addrlen, &addr)) < 0) { + return 0; + } + cbport |= (temp << shift); + if (shift > 0) + shift -= 8; + } + *cbportp = cbport; + return 1; +} + +void +gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) +{ + struct nfs4_callback *cb = &clp->cl_callback; + + /* Currently, we only support tcp for the callback channel */ + if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3)) + goto out_err; + + if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val, + &cb->cb_addr, &cb->cb_port))) + goto out_err; + cb->cb_prog = se->se_callback_prog; + cb->cb_ident = se->se_callback_ident; + cb->cb_parsed = 1; + return; +out_err: + printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " + "will not receive delegations\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + + cb->cb_parsed = 0; + return; +} + +/* + * RFC 3010 has a complex implmentation description of processing a + * SETCLIENTID request consisting of 5 bullets, labeled as + * CASE0 - CASE4 below. + * + * NOTES: + * callback information will be processed in a future patch + * + * an unconfirmed record is added when: + * NORMAL (part of CASE 4): there is no confirmed nor unconfirmed record. + * CASE 1: confirmed record found with matching name, principal, + * verifier, and clientid. + * CASE 2: confirmed record found with matching name, principal, + * and there is no unconfirmed record with matching + * name and principal + * + * an unconfirmed record is replaced when: + * CASE 3: confirmed record found with matching name, principal, + * and an unconfirmed record is found with matching + * name, principal, and with clientid and + * confirm that does not match the confirmed record. + * CASE 4: there is no confirmed record with matching name and + * principal. there is an unconfirmed record with + * matching name, principal. + * + * an unconfirmed record is deleted when: + * CASE 1: an unconfirmed record that matches input name, verifier, + * and confirmed clientid. + * CASE 4: any unconfirmed records with matching name and principal + * that exist after an unconfirmed record has been replaced + * as described above. + * + */ +int +nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid) +{ + u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr; + struct xdr_netobj clname = { + .len = setclid->se_namelen, + .data = setclid->se_name, + }; + nfs4_verifier clverifier = setclid->se_verf; + unsigned int strhashval; + struct nfs4_client * conf, * unconf, * new, * clp; + int status; + + status = nfserr_inval; + if (!check_name(clname)) + goto out; + + /* + * XXX The Duplicate Request Cache (DRC) has been checked (??) + * We get here on a DRC miss. + */ + + strhashval = clientstr_hashval(clname.data, clname.len); + + conf = NULL; + nfs4_lock_state(); + list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) { + if (!cmp_name(&clp->cl_name, &clname)) + continue; + /* + * CASE 0: + * clname match, confirmed, different principal + * or different ip_address + */ + status = nfserr_clid_inuse; + if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)) { + printk("NFSD: setclientid: string in use by client" + "(clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + goto out; + } + if (clp->cl_addr != ip_addr) { + printk("NFSD: setclientid: string in use by client" + "(clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + goto out; + } + + /* + * cl_name match from a previous SETCLIENTID operation + * XXX check for additional matches? + */ + conf = clp; + break; + } + unconf = NULL; + list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) { + if (!cmp_name(&clp->cl_name, &clname)) + continue; + /* cl_name match from a previous SETCLIENTID operation */ + unconf = clp; + break; + } + status = nfserr_resource; + if (!conf) { + /* + * CASE 4: + * placed first, because it is the normal case. + */ + if (unconf) + expire_client(unconf); + if (!(new = create_client(clname))) + goto out; + copy_verf(new, &clverifier); + new->cl_addr = ip_addr; + copy_cred(&new->cl_cred,&rqstp->rq_cred); + gen_clid(new); + gen_confirm(new); + gen_callback(new, setclid); + add_to_unconfirmed(new, strhashval); + } else if (cmp_verf(&conf->cl_verifier, &clverifier)) { + /* + * CASE 1: + * cl_name match, confirmed, principal match + * verifier match: probable callback update + * + * remove any unconfirmed nfs4_client with + * matching cl_name, cl_verifier, and cl_clientid + * + * create and insert an unconfirmed nfs4_client with same + * cl_name, cl_verifier, and cl_clientid as existing + * nfs4_client, but with the new callback info and a + * new cl_confirm + */ + if ((unconf) && + cmp_verf(&unconf->cl_verifier, &conf->cl_verifier) && + cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) { + expire_client(unconf); + } + if (!(new = create_client(clname))) + goto out; + copy_verf(new,&conf->cl_verifier); + new->cl_addr = ip_addr; + copy_cred(&new->cl_cred,&rqstp->rq_cred); + copy_clid(new, conf); + gen_confirm(new); + gen_callback(new, setclid); + add_to_unconfirmed(new,strhashval); + } else if (!unconf) { + /* + * CASE 2: + * clname match, confirmed, principal match + * verfier does not match + * no unconfirmed. create a new unconfirmed nfs4_client + * using input clverifier, clname, and callback info + * and generate a new cl_clientid and cl_confirm. + */ + if (!(new = create_client(clname))) + goto out; + copy_verf(new,&clverifier); + new->cl_addr = ip_addr; + copy_cred(&new->cl_cred,&rqstp->rq_cred); + gen_clid(new); + gen_confirm(new); + gen_callback(new, setclid); + add_to_unconfirmed(new, strhashval); + } else if (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm)) { + /* + * CASE3: + * confirmed found (name, principal match) + * confirmed verifier does not match input clverifier + * + * unconfirmed found (name match) + * confirmed->cl_confirm != unconfirmed->cl_confirm + * + * remove unconfirmed. + * + * create an unconfirmed nfs4_client + * with same cl_name as existing confirmed nfs4_client, + * but with new callback info, new cl_clientid, + * new cl_verifier and a new cl_confirm + */ + expire_client(unconf); + if (!(new = create_client(clname))) + goto out; + copy_verf(new,&clverifier); + new->cl_addr = ip_addr; + copy_cred(&new->cl_cred,&rqstp->rq_cred); + gen_clid(new); + gen_confirm(new); + gen_callback(new, setclid); + add_to_unconfirmed(new, strhashval); + } else { + /* No cases hit !!! */ + status = nfserr_inval; + goto out; + + } + setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; + setclid->se_clientid.cl_id = new->cl_clientid.cl_id; + memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data)); + status = nfs_ok; +out: + nfs4_unlock_state(); + return status; +} + + +/* + * RFC 3010 has a complex implmentation description of processing a + * SETCLIENTID_CONFIRM request consisting of 4 bullets describing + * processing on a DRC miss, labeled as CASE1 - CASE4 below. + * + * NOTE: callback information will be processed here in a future patch + */ +int +nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm) +{ + u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr; + struct nfs4_client *clp, *conf = NULL, *unconf = NULL; + nfs4_verifier confirm = setclientid_confirm->sc_confirm; + clientid_t * clid = &setclientid_confirm->sc_clientid; + int status; + + if (STALE_CLIENTID(clid)) + return nfserr_stale_clientid; + /* + * XXX The Duplicate Request Cache (DRC) has been checked (??) + * We get here on a DRC miss. + */ + + nfs4_lock_state(); + clp = find_confirmed_client(clid); + if (clp) { + status = nfserr_inval; + /* + * Found a record for this clientid. If the IP addresses + * don't match, return ERR_INVAL just as if the record had + * not been found. + */ + if (clp->cl_addr != ip_addr) { + printk("NFSD: setclientid: string in use by client" + "(clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + goto out; + } + conf = clp; + } + clp = find_unconfirmed_client(clid); + if (clp) { + status = nfserr_inval; + if (clp->cl_addr != ip_addr) { + printk("NFSD: setclientid: string in use by client" + "(clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + goto out; + } + unconf = clp; + } + /* CASE 1: + * unconf record that matches input clientid and input confirm. + * conf record that matches input clientid. + * conf and unconf records match names, verifiers + */ + if ((conf && unconf) && + (cmp_verf(&unconf->cl_confirm, &confirm)) && + (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) && + (cmp_name(&conf->cl_name,&unconf->cl_name)) && + (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) { + if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) + status = nfserr_clid_inuse; + else { + expire_client(conf); + clp = unconf; + move_to_confirmed(unconf); + status = nfs_ok; + } + goto out; + } + /* CASE 2: + * conf record that matches input clientid. + * if unconf record that matches input clientid, then unconf->cl_name + * or unconf->cl_verifier don't match the conf record. + */ + if ((conf && !unconf) || + ((conf && unconf) && + (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) || + !cmp_name(&conf->cl_name, &unconf->cl_name)))) { + if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) { + status = nfserr_clid_inuse; + } else { + clp = conf; + status = nfs_ok; + } + goto out; + } + /* CASE 3: + * conf record not found. + * unconf record found. + * unconf->cl_confirm matches input confirm + */ + if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) { + if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) { + status = nfserr_clid_inuse; + } else { + status = nfs_ok; + clp = unconf; + move_to_confirmed(unconf); + } + goto out; + } + /* CASE 4: + * conf record not found, or if conf, then conf->cl_confirm does not + * match input confirm. + * unconf record not found, or if unconf, then unconf->cl_confirm + * does not match input confirm. + */ + if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) && + (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) { + status = nfserr_stale_clientid; + goto out; + } + /* check that we have hit one of the cases...*/ + status = nfserr_inval; + goto out; +out: + if (!status) + nfsd4_probe_callback(clp); + nfs4_unlock_state(); + return status; +} + +/* + * Open owner state (share locks) + */ + +/* hash tables for nfs4_stateowner */ +#define OWNER_HASH_BITS 8 +#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS) +#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1) + +#define ownerid_hashval(id) \ + ((id) & OWNER_HASH_MASK) +#define ownerstr_hashval(clientid, ownername) \ + (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK) + +static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE]; +static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE]; + +/* hash table for nfs4_file */ +#define FILE_HASH_BITS 8 +#define FILE_HASH_SIZE (1 << FILE_HASH_BITS) +#define FILE_HASH_MASK (FILE_HASH_SIZE - 1) +/* hash table for (open)nfs4_stateid */ +#define STATEID_HASH_BITS 10 +#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS) +#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1) + +#define file_hashval(x) \ + hash_ptr(x, FILE_HASH_BITS) +#define stateid_hashval(owner_id, file_id) \ + (((owner_id) + (file_id)) & STATEID_HASH_MASK) + +static struct list_head file_hashtbl[FILE_HASH_SIZE]; +static struct list_head stateid_hashtbl[STATEID_HASH_SIZE]; + +/* OPEN Share state helper functions */ +static inline struct nfs4_file * +alloc_init_file(struct inode *ino) +{ + struct nfs4_file *fp; + unsigned int hashval = file_hashval(ino); + + if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) { + INIT_LIST_HEAD(&fp->fi_hash); + INIT_LIST_HEAD(&fp->fi_perfile); + INIT_LIST_HEAD(&fp->fi_del_perfile); + list_add(&fp->fi_hash, &file_hashtbl[hashval]); + fp->fi_inode = igrab(ino); + fp->fi_id = current_fileid++; + alloc_file++; + return fp; + } + return NULL; +} + +static void +release_all_files(void) +{ + int i; + struct nfs4_file *fp; + + for (i=0;i<FILE_HASH_SIZE;i++) { + while (!list_empty(&file_hashtbl[i])) { + fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash); + /* this should never be more than once... */ + if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) { + printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp); + } + release_file(fp); + } + } +} + +kmem_cache_t *stateowner_slab = NULL; + +static int +nfsd4_init_slabs(void) +{ + stateowner_slab = kmem_cache_create("nfsd4_stateowners", + sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL); + if (stateowner_slab == NULL) { + dprintk("nfsd4: out of memory while initializing nfsv4\n"); + return -ENOMEM; + } + return 0; +} + +static void +nfsd4_free_slabs(void) +{ + int status = 0; + + if (stateowner_slab) + status = kmem_cache_destroy(stateowner_slab); + stateowner_slab = NULL; + BUG_ON(status); +} + +void +nfs4_free_stateowner(struct kref *kref) +{ + struct nfs4_stateowner *sop = + container_of(kref, struct nfs4_stateowner, so_ref); + kfree(sop->so_owner.data); + kmem_cache_free(stateowner_slab, sop); +} + +static inline struct nfs4_stateowner * +alloc_stateowner(struct xdr_netobj *owner) +{ + struct nfs4_stateowner *sop; + + if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) { + if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) { + memcpy(sop->so_owner.data, owner->data, owner->len); + sop->so_owner.len = owner->len; + kref_init(&sop->so_ref); + return sop; + } + kmem_cache_free(stateowner_slab, sop); + } + return NULL; +} + +static struct nfs4_stateowner * +alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) { + struct nfs4_stateowner *sop; + struct nfs4_replay *rp; + unsigned int idhashval; + + if (!(sop = alloc_stateowner(&open->op_owner))) + return NULL; + idhashval = ownerid_hashval(current_ownerid); + INIT_LIST_HEAD(&sop->so_idhash); + INIT_LIST_HEAD(&sop->so_strhash); + INIT_LIST_HEAD(&sop->so_perclient); + INIT_LIST_HEAD(&sop->so_perfilestate); + INIT_LIST_HEAD(&sop->so_perlockowner); /* not used */ + INIT_LIST_HEAD(&sop->so_close_lru); + sop->so_time = 0; + list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]); + list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]); + list_add(&sop->so_perclient, &clp->cl_perclient); + add_perclient++; + sop->so_is_open_owner = 1; + sop->so_id = current_ownerid++; + sop->so_client = clp; + sop->so_seqid = open->op_seqid; + sop->so_confirmed = 0; + rp = &sop->so_replay; + rp->rp_status = NFSERR_SERVERFAULT; + rp->rp_buflen = 0; + rp->rp_buf = rp->rp_ibuf; + return sop; +} + +static void +release_stateid_lockowners(struct nfs4_stateid *open_stp) +{ + struct nfs4_stateowner *lock_sop; + + while (!list_empty(&open_stp->st_perlockowner)) { + lock_sop = list_entry(open_stp->st_perlockowner.next, + struct nfs4_stateowner, so_perlockowner); + /* list_del(&open_stp->st_perlockowner); */ + BUG_ON(lock_sop->so_is_open_owner); + release_stateowner(lock_sop); + } +} + +static void +unhash_stateowner(struct nfs4_stateowner *sop) +{ + struct nfs4_stateid *stp; + + list_del(&sop->so_idhash); + list_del(&sop->so_strhash); + if (sop->so_is_open_owner) { + list_del(&sop->so_perclient); + del_perclient++; + } + list_del(&sop->so_perlockowner); + while (!list_empty(&sop->so_perfilestate)) { + stp = list_entry(sop->so_perfilestate.next, + struct nfs4_stateid, st_perfilestate); + if (sop->so_is_open_owner) + release_stateid(stp, OPEN_STATE); + else + release_stateid(stp, LOCK_STATE); + } +} + +static void +release_stateowner(struct nfs4_stateowner *sop) +{ + unhash_stateowner(sop); + list_del(&sop->so_close_lru); + nfs4_put_stateowner(sop); +} + +static inline void +init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { + struct nfs4_stateowner *sop = open->op_stateowner; + unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); + + INIT_LIST_HEAD(&stp->st_hash); + INIT_LIST_HEAD(&stp->st_perfilestate); + INIT_LIST_HEAD(&stp->st_perlockowner); + INIT_LIST_HEAD(&stp->st_perfile); + list_add(&stp->st_hash, &stateid_hashtbl[hashval]); + list_add(&stp->st_perfilestate, &sop->so_perfilestate); + list_add_perfile++; + list_add(&stp->st_perfile, &fp->fi_perfile); + stp->st_stateowner = sop; + stp->st_file = fp; + stp->st_stateid.si_boot = boot_time; + stp->st_stateid.si_stateownerid = sop->so_id; + stp->st_stateid.si_fileid = fp->fi_id; + stp->st_stateid.si_generation = 0; + stp->st_access_bmap = 0; + stp->st_deny_bmap = 0; + __set_bit(open->op_share_access, &stp->st_access_bmap); + __set_bit(open->op_share_deny, &stp->st_deny_bmap); +} + +static void +release_stateid(struct nfs4_stateid *stp, int flags) +{ + struct file *filp = stp->st_vfs_file; + + list_del(&stp->st_hash); + list_del_perfile++; + list_del(&stp->st_perfile); + list_del(&stp->st_perfilestate); + if (flags & OPEN_STATE) { + release_stateid_lockowners(stp); + stp->st_vfs_file = NULL; + nfsd_close(filp); + vfsclose++; + } else if (flags & LOCK_STATE) + locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner); + kfree(stp); + stp = NULL; +} + +static void +release_file(struct nfs4_file *fp) +{ + free_file++; + list_del(&fp->fi_hash); + iput(fp->fi_inode); + kfree(fp); +} + +void +move_to_close_lru(struct nfs4_stateowner *sop) +{ + dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); + + unhash_stateowner(sop); + list_add_tail(&sop->so_close_lru, &close_lru); + sop->so_time = get_seconds(); +} + +void +release_state_owner(struct nfs4_stateid *stp, int flag) +{ + struct nfs4_stateowner *sop = stp->st_stateowner; + struct nfs4_file *fp = stp->st_file; + + dprintk("NFSD: release_state_owner\n"); + release_stateid(stp, flag); + + /* place unused nfs4_stateowners on so_close_lru list to be + * released by the laundromat service after the lease period + * to enable us to handle CLOSE replay + */ + if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) + move_to_close_lru(sop); + /* unused nfs4_file's are releseed. XXX slab cache? */ + if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) { + release_file(fp); + } +} + +static int +cmp_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, clientid_t *clid) { + return ((sop->so_owner.len == owner->len) && + !memcmp(sop->so_owner.data, owner->data, owner->len) && + (sop->so_client->cl_clientid.cl_id == clid->cl_id)); +} + +static struct nfs4_stateowner * +find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open) +{ + struct nfs4_stateowner *so = NULL; + + list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) { + if (cmp_owner_str(so, &open->op_owner, &open->op_clientid)) + return so; + } + return NULL; +} + +/* search file_hashtbl[] for file */ +static struct nfs4_file * +find_file(struct inode *ino) +{ + unsigned int hashval = file_hashval(ino); + struct nfs4_file *fp; + + list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { + if (fp->fi_inode == ino) + return fp; + } + return NULL; +} + +#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0) +#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0) + +void +set_access(unsigned int *access, unsigned long bmap) { + int i; + + *access = 0; + for (i = 1; i < 4; i++) { + if (test_bit(i, &bmap)) + *access |= i; + } +} + +void +set_deny(unsigned int *deny, unsigned long bmap) { + int i; + + *deny = 0; + for (i = 0; i < 4; i++) { + if (test_bit(i, &bmap)) + *deny |= i ; + } +} + +static int +test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { + unsigned int access, deny; + + set_access(&access, stp->st_access_bmap); + set_deny(&deny, stp->st_deny_bmap); + if ((access & open->op_share_deny) || (deny & open->op_share_access)) + return 0; + return 1; +} + +/* + * Called to check deny when READ with all zero stateid or + * WRITE with all zero or all one stateid + */ +int +nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) +{ + struct inode *ino = current_fh->fh_dentry->d_inode; + struct nfs4_file *fp; + struct nfs4_stateid *stp; + + dprintk("NFSD: nfs4_share_conflict\n"); + + fp = find_file(ino); + if (fp) { + /* Search for conflicting share reservations */ + list_for_each_entry(stp, &fp->fi_perfile, st_perfile) { + if (test_bit(deny_type, &stp->st_deny_bmap) || + test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) + return nfserr_share_denied; + } + } + return nfs_ok; +} + +static inline void +nfs4_file_downgrade(struct file *filp, unsigned int share_access) +{ + if (share_access & NFS4_SHARE_ACCESS_WRITE) { + put_write_access(filp->f_dentry->d_inode); + filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE; + } +} + +/* + * Recall a delegation + */ +static int +do_recall(void *__dp) +{ + struct nfs4_delegation *dp = __dp; + + daemonize("nfsv4-recall"); + + nfsd4_cb_recall(dp); + return 0; +} + +/* + * Spawn a thread to perform a recall on the delegation represented + * by the lease (file_lock) + * + * Called from break_lease() with lock_kernel() held. + * Note: we assume break_lease will only call this *once* for any given + * lease. + */ +static +void nfsd_break_deleg_cb(struct file_lock *fl) +{ + struct nfs4_delegation *dp= (struct nfs4_delegation *)fl->fl_owner; + struct task_struct *t; + + dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl); + if (!dp) + return; + + /* We're assuming the state code never drops its reference + * without first removing the lease. Since we're in this lease + * callback (and since the lease code is serialized by the kernel + * lock) we know the server hasn't removed the lease yet, we know + * it's safe to take a reference: */ + atomic_inc(&dp->dl_count); + + spin_lock(&recall_lock); + list_add_tail(&dp->dl_recall_lru, &del_recall_lru); + spin_unlock(&recall_lock); + + /* only place dl_time is set. protected by lock_kernel*/ + dp->dl_time = get_seconds(); + + /* XXX need to merge NFSD_LEASE_TIME with fs/locks.c:lease_break_time */ + fl->fl_break_time = jiffies + NFSD_LEASE_TIME * HZ; + + t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall"); + if (IS_ERR(t)) { + struct nfs4_client *clp = dp->dl_client; + + printk(KERN_INFO "NFSD: Callback thread failed for " + "for client (clientid %08x/%08x)\n", + clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); + nfs4_put_delegation(dp); + } +} + +/* + * The file_lock is being reapd. + * + * Called by locks_free_lock() with lock_kernel() held. + */ +static +void nfsd_release_deleg_cb(struct file_lock *fl) +{ + struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner; + + dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d\n", fl,dp, atomic_read(&dp->dl_count)); + + if (!(fl->fl_flags & FL_LEASE) || !dp) + return; + dp->dl_flock = NULL; +} + +/* + * Set the delegation file_lock back pointer. + * + * Called from __setlease() with lock_kernel() held. + */ +static +void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl) +{ + struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner; + + dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp); + if (!dp) + return; + dp->dl_flock = new; +} + +/* + * Called from __setlease() with lock_kernel() held + */ +static +int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try) +{ + struct nfs4_delegation *onlistd = + (struct nfs4_delegation *)onlist->fl_owner; + struct nfs4_delegation *tryd = + (struct nfs4_delegation *)try->fl_owner; + + if (onlist->fl_lmops != try->fl_lmops) + return 0; + + return onlistd->dl_client == tryd->dl_client; +} + + +static +int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) +{ + if (arg & F_UNLCK) + return lease_modify(onlist, arg); + else + return -EAGAIN; +} + +struct lock_manager_operations nfsd_lease_mng_ops = { + .fl_break = nfsd_break_deleg_cb, + .fl_release_private = nfsd_release_deleg_cb, + .fl_copy_lock = nfsd_copy_lock_deleg_cb, + .fl_mylease = nfsd_same_client_deleg_cb, + .fl_change = nfsd_change_deleg_cb, +}; + + +/* + * nfsd4_process_open1() + * lookup stateowner. + * found: + * check confirmed + * confirmed: + * check seqid + * not confirmed: + * delete owner + * create new owner + * notfound: + * verify clientid + * create new owner + * + * called with nfs4_lock_state() held. + */ +int +nfsd4_process_open1(struct nfsd4_open *open) +{ + int status; + clientid_t *clientid = &open->op_clientid; + struct nfs4_client *clp = NULL; + unsigned int strhashval; + struct nfs4_stateowner *sop = NULL; + + status = nfserr_inval; + if (!check_name(open->op_owner)) + goto out; + + if (STALE_CLIENTID(&open->op_clientid)) + return nfserr_stale_clientid; + + strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner); + sop = find_openstateowner_str(strhashval, open); + if (sop) { + open->op_stateowner = sop; + /* check for replay */ + if (open->op_seqid == sop->so_seqid){ + if (sop->so_replay.rp_buflen) + return NFSERR_REPLAY_ME; + else { + /* The original OPEN failed so spectacularly + * that we don't even have replay data saved! + * Therefore, we have no choice but to continue + * processing this OPEN; presumably, we'll + * fail again for the same reason. + */ + dprintk("nfsd4_process_open1:" + " replay with no replay cache\n"); + goto renew; + } + } else if (sop->so_confirmed) { + if (open->op_seqid == sop->so_seqid + 1) + goto renew; + status = nfserr_bad_seqid; + goto out; + } else { + /* If we get here, we received an OPEN for an + * unconfirmed nfs4_stateowner. Since the seqid's are + * different, purge the existing nfs4_stateowner, and + * instantiate a new one. + */ + clp = sop->so_client; + release_stateowner(sop); + } + } else { + /* nfs4_stateowner not found. + * Verify clientid and instantiate new nfs4_stateowner. + * If verify fails this is presumably the result of the + * client's lease expiring. + */ + status = nfserr_expired; + clp = find_confirmed_client(clientid); + if (clp == NULL) + goto out; + } + status = nfserr_resource; + sop = alloc_init_open_stateowner(strhashval, clp, open); + if (sop == NULL) + goto out; + open->op_stateowner = sop; +renew: + status = nfs_ok; + renew_client(sop->so_client); +out: + if (status && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + status = nfserr_reclaim_bad; + return status; +} + +static int +nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp) +{ + struct nfs4_stateid *local; + int status = nfserr_share_denied; + struct nfs4_stateowner *sop = open->op_stateowner; + + list_for_each_entry(local, &fp->fi_perfile, st_perfile) { + /* ignore lock owners */ + if (local->st_stateowner->so_is_open_owner == 0) + continue; + /* remember if we have seen this open owner */ + if (local->st_stateowner == sop) + *stpp = local; + /* check for conflicting share reservations */ + if (!test_share(local, open)) + goto out; + } + status = 0; +out: + return status; +} + +static int +nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, + struct svc_fh *cur_fh, int flags) +{ + struct nfs4_stateid *stp; + int status; + + stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL); + if (stp == NULL) + return nfserr_resource; + + status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, &stp->st_vfs_file); + if (status) { + if (status == nfserr_dropit) + status = nfserr_jukebox; + kfree(stp); + return status; + } + vfsopen++; + *stpp = stp; + return 0; +} + +static inline int +nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh, + struct nfsd4_open *open) +{ + struct iattr iattr = { + .ia_valid = ATTR_SIZE, + .ia_size = 0, + }; + if (!open->op_truncate) + return 0; + if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE)) + return -EINVAL; + return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0); +} + +static int +nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open) +{ + struct file *filp = stp->st_vfs_file; + struct inode *inode = filp->f_dentry->d_inode; + unsigned int share_access; + int status; + + set_access(&share_access, stp->st_access_bmap); + share_access = ~share_access; + share_access &= open->op_share_access; + + if (!(share_access & NFS4_SHARE_ACCESS_WRITE)) + return nfsd4_truncate(rqstp, cur_fh, open); + + status = get_write_access(inode); + if (status) + return nfserrno(status); + status = nfsd4_truncate(rqstp, cur_fh, open); + if (status) { + put_write_access(inode); + return status; + } + /* remember the open */ + filp->f_mode = (filp->f_mode | FMODE_WRITE) & ~FMODE_READ; + set_bit(open->op_share_access, &stp->st_access_bmap); + set_bit(open->op_share_deny, &stp->st_deny_bmap); + + return nfs_ok; +} + + +/* decrement seqid on successful reclaim, it will be bumped in encode_open */ +static void +nfs4_set_claim_prev(struct nfsd4_open *open, int *status) +{ + if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) { + if (*status) + *status = nfserr_reclaim_bad; + else { + open->op_stateowner->so_confirmed = 1; + open->op_stateowner->so_seqid--; + } + } +} + +/* + * Attempt to hand out a delegation. + */ +static void +nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp) +{ + struct nfs4_delegation *dp; + struct nfs4_stateowner *sop = stp->st_stateowner; + struct nfs4_callback *cb = &sop->so_client->cl_callback; + struct file_lock fl, *flp = &fl; + int status, flag = 0; + + flag = NFS4_OPEN_DELEGATE_NONE; + if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL + || !atomic_read(&cb->cb_set) || !sop->so_confirmed) + goto out; + + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) + flag = NFS4_OPEN_DELEGATE_WRITE; + else + flag = NFS4_OPEN_DELEGATE_READ; + + dp = alloc_init_deleg(sop->so_client, stp, fh, flag); + if (dp == NULL) { + flag = NFS4_OPEN_DELEGATE_NONE; + goto out; + } + locks_init_lock(&fl); + fl.fl_lmops = &nfsd_lease_mng_ops; + fl.fl_flags = FL_LEASE; + fl.fl_end = OFFSET_MAX; + fl.fl_owner = (fl_owner_t)dp; + fl.fl_file = stp->st_vfs_file; + fl.fl_pid = current->tgid; + + /* setlease checks to see if delegation should be handed out. + * the lock_manager callbacks fl_mylease and fl_change are used + */ + if ((status = setlease(stp->st_vfs_file, + flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) { + dprintk("NFSD: setlease failed [%d], no delegation\n", status); + list_del(&dp->dl_del_perfile); + list_del(&dp->dl_del_perclnt); + nfs4_put_delegation(dp); + free_delegation++; + flag = NFS4_OPEN_DELEGATE_NONE; + goto out; + } + + memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid)); + + dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n", + dp->dl_stateid.si_boot, + dp->dl_stateid.si_stateownerid, + dp->dl_stateid.si_fileid, + dp->dl_stateid.si_generation); +out: + open->op_delegate_type = flag; +} + +/* + * called with nfs4_lock_state() held. + */ +int +nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) +{ + struct nfs4_file *fp = NULL; + struct inode *ino = current_fh->fh_dentry->d_inode; + struct nfs4_stateid *stp = NULL; + int status; + + status = nfserr_inval; + if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny)) + goto out; + /* + * Lookup file; if found, lookup stateid and check open request, + * and check for delegations in the process of being recalled. + * If not found, create the nfs4_file struct + */ + fp = find_file(ino); + if (fp) { + if ((status = nfs4_check_open(fp, open, &stp))) + goto out; + } else { + status = nfserr_resource; + fp = alloc_init_file(ino); + if (fp == NULL) + goto out; + } + + /* + * OPEN the file, or upgrade an existing OPEN. + * If truncate fails, the OPEN fails. + */ + if (stp) { + /* Stateid was found, this is an OPEN upgrade */ + status = nfs4_upgrade_open(rqstp, current_fh, stp, open); + if (status) + goto out; + } else { + /* Stateid was not found, this is a new OPEN */ + int flags = 0; + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) + flags = MAY_WRITE; + else + flags = MAY_READ; + if ((status = nfs4_new_open(rqstp, &stp, current_fh, flags))) + goto out; + init_stateid(stp, fp, open); + status = nfsd4_truncate(rqstp, current_fh, open); + if (status) { + release_stateid(stp, OPEN_STATE); + goto out; + } + } + memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t)); + + /* + * Attempt to hand out a delegation. No error return, because the + * OPEN succeeds even if we fail. + */ + nfs4_open_delegation(current_fh, open, stp); + + status = nfs_ok; + + dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n", + stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid, + stp->st_stateid.si_fileid, stp->st_stateid.si_generation); +out: + /* take the opportunity to clean up unused state */ + if (fp && list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) + release_file(fp); + + /* CLAIM_PREVIOUS has different error returns */ + nfs4_set_claim_prev(open, &status); + /* + * To finish the open response, we just need to set the rflags. + */ + open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX; + if (!open->op_stateowner->so_confirmed) + open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; + + return status; +} + +static struct work_struct laundromat_work; +static void laundromat_main(void *); +static DECLARE_WORK(laundromat_work, laundromat_main, NULL); + +int +nfsd4_renew(clientid_t *clid) +{ + struct nfs4_client *clp; + int status; + + nfs4_lock_state(); + dprintk("process_renew(%08x/%08x): starting\n", + clid->cl_boot, clid->cl_id); + status = nfserr_stale_clientid; + if (STALE_CLIENTID(clid)) + goto out; + clp = find_confirmed_client(clid); + status = nfserr_expired; + if (clp == NULL) { + /* We assume the client took too long to RENEW. */ + dprintk("nfsd4_renew: clientid not found!\n"); + goto out; + } + renew_client(clp); + status = nfserr_cb_path_down; + if (!list_empty(&clp->cl_del_perclnt) + && !atomic_read(&clp->cl_callback.cb_set)) + goto out; + status = nfs_ok; +out: + nfs4_unlock_state(); + return status; +} + +time_t +nfs4_laundromat(void) +{ + struct nfs4_client *clp; + struct nfs4_stateowner *sop; + struct nfs4_delegation *dp; + struct list_head *pos, *next, reaplist; + time_t cutoff = get_seconds() - NFSD_LEASE_TIME; + time_t t, clientid_val = NFSD_LEASE_TIME; + time_t u, test_val = NFSD_LEASE_TIME; + + nfs4_lock_state(); + + dprintk("NFSD: laundromat service - starting\n"); + list_for_each_safe(pos, next, &client_lru) { + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { + t = clp->cl_time - cutoff; + if (clientid_val > t) + clientid_val = t; + break; + } + dprintk("NFSD: purging unused client (clientid %08x)\n", + clp->cl_clientid.cl_id); + expire_client(clp); + } + INIT_LIST_HEAD(&reaplist); + spin_lock(&recall_lock); + list_for_each_safe(pos, next, &del_recall_lru) { + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) { + u = dp->dl_time - cutoff; + if (test_val > u) + test_val = u; + break; + } + dprintk("NFSD: purging unused delegation dp %p, fp %p\n", + dp, dp->dl_flock); + list_move(&dp->dl_recall_lru, &reaplist); + } + spin_unlock(&recall_lock); + list_for_each_safe(pos, next, &reaplist) { + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + list_del_init(&dp->dl_recall_lru); + unhash_delegation(dp); + } + test_val = NFSD_LEASE_TIME; + list_for_each_safe(pos, next, &close_lru) { + sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); + if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { + u = sop->so_time - cutoff; + if (test_val > u) + test_val = u; + break; + } + dprintk("NFSD: purging unused open stateowner (so_id %d)\n", + sop->so_id); + list_del(&sop->so_close_lru); + nfs4_put_stateowner(sop); + } + if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) + clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; + nfs4_unlock_state(); + return clientid_val; +} + +void +laundromat_main(void *not_used) +{ + time_t t; + + t = nfs4_laundromat(); + dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); + schedule_delayed_work(&laundromat_work, t*HZ); +} + +/* search ownerid_hashtbl[] and close_lru for stateid owner + * (stateid->si_stateownerid) + */ +struct nfs4_stateowner * +find_openstateowner_id(u32 st_id, int flags) { + struct nfs4_stateowner *local = NULL; + + dprintk("NFSD: find_openstateowner_id %d\n", st_id); + if (flags & CLOSE_STATE) { + list_for_each_entry(local, &close_lru, so_close_lru) { + if (local->so_id == st_id) + return local; + } + } + return NULL; +} + +static inline int +nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) +{ + return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_dentry->d_inode; +} + +static int +STALE_STATEID(stateid_t *stateid) +{ + if (stateid->si_boot == boot_time) + return 0; + printk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n", + stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid, + stateid->si_generation); + return 1; +} + +static inline int +access_permit_read(unsigned long access_bmap) +{ + return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap); +} + +static inline int +access_permit_write(unsigned long access_bmap) +{ + return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) || + test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); +} + +static +int nfs4_check_openmode(struct nfs4_stateid *stp, int flags) +{ + int status = nfserr_openmode; + + if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap))) + goto out; + if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap))) + goto out; + status = nfs_ok; +out: + return status; +} + +static inline int +nfs4_check_delegmode(struct nfs4_delegation *dp, int flags) +{ + if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ)) + return nfserr_openmode; + else + return nfs_ok; +} + +static inline int +check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) +{ + /* Trying to call delegreturn with a special stateid? Yuch: */ + if (!(flags & (RD_STATE | WR_STATE))) + return nfserr_bad_stateid; + else if (ONE_STATEID(stateid) && (flags & RD_STATE)) + return nfs_ok; + else if (nfs4_in_grace()) { + /* Answer in remaining cases depends on existance of + * conflicting state; so we must wait out the grace period. */ + return nfserr_grace; + } else if (flags & WR_STATE) + return nfs4_share_conflict(current_fh, + NFS4_SHARE_DENY_WRITE); + else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */ + return nfs4_share_conflict(current_fh, + NFS4_SHARE_DENY_READ); +} + +/* + * Allow READ/WRITE during grace period on recovered state only for files + * that are not able to provide mandatory locking. + */ +static inline int +io_during_grace_disallowed(struct inode *inode, int flags) +{ + return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) + && MANDATORY_LOCK(inode); +} + +/* +* Checks for stateid operations +*/ +int +nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp) +{ + struct nfs4_stateid *stp = NULL; + struct nfs4_delegation *dp = NULL; + stateid_t *stidp; + struct inode *ino = current_fh->fh_dentry->d_inode; + int status; + + dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n", + stateid->si_boot, stateid->si_stateownerid, + stateid->si_fileid, stateid->si_generation); + if (filpp) + *filpp = NULL; + + if (io_during_grace_disallowed(ino, flags)) + return nfserr_grace; + + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) + return check_special_stateids(current_fh, stateid, flags); + + /* STALE STATEID */ + status = nfserr_stale_stateid; + if (STALE_STATEID(stateid)) + goto out; + + /* BAD STATEID */ + status = nfserr_bad_stateid; + if (!stateid->si_fileid) { /* delegation stateid */ + if(!(dp = find_delegation_stateid(ino, stateid))) { + dprintk("NFSD: delegation stateid not found\n"); + if (nfs4_in_grace()) + status = nfserr_grace; + goto out; + } + stidp = &dp->dl_stateid; + } else { /* open or lock stateid */ + if (!(stp = find_stateid(stateid, flags))) { + dprintk("NFSD: open or lock stateid not found\n"); + if (nfs4_in_grace()) + status = nfserr_grace; + goto out; + } + if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) + goto out; + if (!stp->st_stateowner->so_confirmed) + goto out; + stidp = &stp->st_stateid; + } + if (stateid->si_generation > stidp->si_generation) + goto out; + + /* OLD STATEID */ + status = nfserr_old_stateid; + if (stateid->si_generation < stidp->si_generation) + goto out; + if (stp) { + if ((status = nfs4_check_openmode(stp,flags))) + goto out; + renew_client(stp->st_stateowner->so_client); + if (filpp) + *filpp = stp->st_vfs_file; + } else if (dp) { + if ((status = nfs4_check_delegmode(dp, flags))) + goto out; + renew_client(dp->dl_client); + if (flags & DELEG_RET) + unhash_delegation(dp); + if (filpp) + *filpp = dp->dl_vfs_file; + } + status = nfs_ok; +out: + return status; +} + + +/* + * Checks for sequence id mutating operations. + */ +int +nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid) +{ + int status; + struct nfs4_stateid *stp; + struct nfs4_stateowner *sop; + + dprintk("NFSD: preprocess_seqid_op: seqid=%d " + "stateid = (%08x/%08x/%08x/%08x)\n", seqid, + stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid, + stateid->si_generation); + + *stpp = NULL; + *sopp = NULL; + + status = nfserr_bad_stateid; + if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) { + printk("NFSD: preprocess_seqid_op: magic stateid!\n"); + goto out; + } + + status = nfserr_stale_stateid; + if (STALE_STATEID(stateid)) + goto out; + /* + * We return BAD_STATEID if filehandle doesn't match stateid, + * the confirmed flag is incorrecly set, or the generation + * number is incorrect. + * If there is no entry in the openfile table for this id, + * we can't always return BAD_STATEID; + * this might be a retransmitted CLOSE which has arrived after + * the openfile has been released. + */ + if (!(stp = find_stateid(stateid, flags))) + goto no_nfs4_stateid; + + status = nfserr_bad_stateid; + + /* for new lock stateowners: + * check that the lock->v.new.open_stateid + * refers to an open stateowner + * + * check that the lockclid (nfs4_lock->v.new.clientid) is the same + * as the open_stateid->st_stateowner->so_client->clientid + */ + if (lockclid) { + struct nfs4_stateowner *sop = stp->st_stateowner; + struct nfs4_client *clp = sop->so_client; + + if (!sop->so_is_open_owner) + goto out; + if (!cmp_clid(&clp->cl_clientid, lockclid)) + goto out; + } + + if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { + printk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); + goto out; + } + + *stpp = stp; + *sopp = sop = stp->st_stateowner; + + /* + * We now validate the seqid and stateid generation numbers. + * For the moment, we ignore the possibility of + * generation number wraparound. + */ + if (seqid != sop->so_seqid + 1) + goto check_replay; + + if (sop->so_confirmed) { + if (flags & CONFIRM) { + printk("NFSD: preprocess_seqid_op: expected unconfirmed stateowner!\n"); + goto out; + } + } + else { + if (!(flags & CONFIRM)) { + printk("NFSD: preprocess_seqid_op: stateowner not confirmed yet!\n"); + goto out; + } + } + if (stateid->si_generation > stp->st_stateid.si_generation) { + printk("NFSD: preprocess_seqid_op: future stateid?!\n"); + goto out; + } + + status = nfserr_old_stateid; + if (stateid->si_generation < stp->st_stateid.si_generation) { + printk("NFSD: preprocess_seqid_op: old stateid!\n"); + goto out; + } + /* XXX renew the client lease here */ + status = nfs_ok; + +out: + return status; + +no_nfs4_stateid: + + /* + * We determine whether this is a bad stateid or a replay, + * starting by trying to look up the stateowner. + * If stateowner is not found - stateid is bad. + */ + if (!(sop = find_openstateowner_id(stateid->si_stateownerid, flags))) { + printk("NFSD: preprocess_seqid_op: no stateowner or nfs4_stateid!\n"); + status = nfserr_bad_stateid; + goto out; + } + *sopp = sop; + +check_replay: + if (seqid == sop->so_seqid) { + printk("NFSD: preprocess_seqid_op: retransmission?\n"); + /* indicate replay to calling function */ + status = NFSERR_REPLAY_ME; + } else { + printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid +1, seqid); + + *sopp = NULL; + status = nfserr_bad_seqid; + } + goto out; +} + +int +nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc) +{ + int status; + struct nfs4_stateowner *sop; + struct nfs4_stateid *stp; + + dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", + (int)current_fh->fh_dentry->d_name.len, + current_fh->fh_dentry->d_name.name); + + if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) + goto out; + + nfs4_lock_state(); + + if ((status = nfs4_preprocess_seqid_op(current_fh, oc->oc_seqid, + &oc->oc_req_stateid, + CHECK_FH | CONFIRM | OPEN_STATE, + &oc->oc_stateowner, &stp, NULL))) + goto out; + + sop = oc->oc_stateowner; + sop->so_confirmed = 1; + update_stateid(&stp->st_stateid); + memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t)); + dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d " + "stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid, + stp->st_stateid.si_boot, + stp->st_stateid.si_stateownerid, + stp->st_stateid.si_fileid, + stp->st_stateid.si_generation); +out: + if (oc->oc_stateowner) + nfs4_get_stateowner(oc->oc_stateowner); + nfs4_unlock_state(); + return status; +} + + +/* + * unset all bits in union bitmap (bmap) that + * do not exist in share (from successful OPEN_DOWNGRADE) + */ +static void +reset_union_bmap_access(unsigned long access, unsigned long *bmap) +{ + int i; + for (i = 1; i < 4; i++) { + if ((i & access) != i) + __clear_bit(i, bmap); + } +} + +static void +reset_union_bmap_deny(unsigned long deny, unsigned long *bmap) +{ + int i; + for (i = 0; i < 4; i++) { + if ((i & deny) != i) + __clear_bit(i, bmap); + } +} + +int +nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od) +{ + int status; + struct nfs4_stateid *stp; + unsigned int share_access; + + dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", + (int)current_fh->fh_dentry->d_name.len, + current_fh->fh_dentry->d_name.name); + + if (!TEST_ACCESS(od->od_share_access) || !TEST_DENY(od->od_share_deny)) + return nfserr_inval; + + nfs4_lock_state(); + if ((status = nfs4_preprocess_seqid_op(current_fh, od->od_seqid, + &od->od_stateid, + CHECK_FH | OPEN_STATE, + &od->od_stateowner, &stp, NULL))) + goto out; + + status = nfserr_inval; + if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { + dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", + stp->st_access_bmap, od->od_share_access); + goto out; + } + if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) { + dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", + stp->st_deny_bmap, od->od_share_deny); + goto out; + } + set_access(&share_access, stp->st_access_bmap); + nfs4_file_downgrade(stp->st_vfs_file, + share_access & ~od->od_share_access); + + reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap); + reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); + + update_stateid(&stp->st_stateid); + memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t)); + status = nfs_ok; +out: + if (od->od_stateowner) + nfs4_get_stateowner(od->od_stateowner); + nfs4_unlock_state(); + return status; +} + +/* + * nfs4_unlock_state() called after encode + */ +int +nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close) +{ + int status; + struct nfs4_stateid *stp; + + dprintk("NFSD: nfsd4_close on file %.*s\n", + (int)current_fh->fh_dentry->d_name.len, + current_fh->fh_dentry->d_name.name); + + nfs4_lock_state(); + /* check close_lru for replay */ + if ((status = nfs4_preprocess_seqid_op(current_fh, close->cl_seqid, + &close->cl_stateid, + CHECK_FH | OPEN_STATE | CLOSE_STATE, + &close->cl_stateowner, &stp, NULL))) + goto out; + /* + * Return success, but first update the stateid. + */ + status = nfs_ok; + update_stateid(&stp->st_stateid); + memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t)); + + /* release_state_owner() calls nfsd_close() if needed */ + release_state_owner(stp, OPEN_STATE); +out: + if (close->cl_stateowner) + nfs4_get_stateowner(close->cl_stateowner); + nfs4_unlock_state(); + return status; +} + +int +nfsd4_delegreturn(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_delegreturn *dr) +{ + int status; + + if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) + goto out; + + nfs4_lock_state(); + status = nfs4_preprocess_stateid_op(current_fh, &dr->dr_stateid, DELEG_RET, NULL); + nfs4_unlock_state(); +out: + return status; +} + + +/* + * Lock owner state (byte-range locks) + */ +#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start)) +#define LOCK_HASH_BITS 8 +#define LOCK_HASH_SIZE (1 << LOCK_HASH_BITS) +#define LOCK_HASH_MASK (LOCK_HASH_SIZE - 1) + +#define lockownerid_hashval(id) \ + ((id) & LOCK_HASH_MASK) + +static inline unsigned int +lock_ownerstr_hashval(struct inode *inode, u32 cl_id, + struct xdr_netobj *ownername) +{ + return (file_hashval(inode) + cl_id + + opaque_hashval(ownername->data, ownername->len)) + & LOCK_HASH_MASK; +} + +static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE]; +static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; +static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE]; + +struct nfs4_stateid * +find_stateid(stateid_t *stid, int flags) +{ + struct nfs4_stateid *local = NULL; + u32 st_id = stid->si_stateownerid; + u32 f_id = stid->si_fileid; + unsigned int hashval; + + dprintk("NFSD: find_stateid flags 0x%x\n",flags); + if ((flags & LOCK_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) { + hashval = stateid_hashval(st_id, f_id); + list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) { + if ((local->st_stateid.si_stateownerid == st_id) && + (local->st_stateid.si_fileid == f_id)) + return local; + } + } + if ((flags & OPEN_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) { + hashval = stateid_hashval(st_id, f_id); + list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) { + if ((local->st_stateid.si_stateownerid == st_id) && + (local->st_stateid.si_fileid == f_id)) + return local; + } + } else + printk("NFSD: find_stateid: ERROR: no state flag\n"); + return NULL; +} + +static struct nfs4_delegation * +find_delegation_stateid(struct inode *ino, stateid_t *stid) +{ + struct nfs4_delegation *dp = NULL; + struct nfs4_file *fp = NULL; + u32 st_id; + + dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n", + stid->si_boot, stid->si_stateownerid, + stid->si_fileid, stid->si_generation); + + st_id = stid->si_stateownerid; + fp = find_file(ino); + if (fp) { + list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) { + if(dp->dl_stateid.si_stateownerid == st_id) { + dprintk("NFSD: find_delegation dp %p\n",dp); + return dp; + } + } + } + return NULL; +} + +/* + * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that + * we can't properly handle lock requests that go beyond the (2^63 - 1)-th + * byte, because of sign extension problems. Since NFSv4 calls for 64-bit + * locking, this prevents us from being completely protocol-compliant. The + * real solution to this problem is to start using unsigned file offsets in + * the VFS, but this is a very deep change! + */ +static inline void +nfs4_transform_lock_offset(struct file_lock *lock) +{ + if (lock->fl_start < 0) + lock->fl_start = OFFSET_MAX; + if (lock->fl_end < 0) + lock->fl_end = OFFSET_MAX; +} + +int +nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval) +{ + struct nfs4_stateowner *local = NULL; + int status = 0; + + if (hashval >= LOCK_HASH_SIZE) + goto out; + list_for_each_entry(local, &lock_ownerid_hashtbl[hashval], so_idhash) { + if (local == sop) { + status = 1; + goto out; + } + } +out: + return status; +} + + +static inline void +nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny) +{ + struct nfs4_stateowner *sop = (struct nfs4_stateowner *) fl->fl_owner; + unsigned int hval = lockownerid_hashval(sop->so_id); + + deny->ld_sop = NULL; + if (nfs4_verify_lock_stateowner(sop, hval)) { + kref_get(&sop->so_ref); + deny->ld_sop = sop; + deny->ld_clientid = sop->so_client->cl_clientid; + } + deny->ld_start = fl->fl_start; + deny->ld_length = ~(u64)0; + if (fl->fl_end != ~(u64)0) + deny->ld_length = fl->fl_end - fl->fl_start + 1; + deny->ld_type = NFS4_READ_LT; + if (fl->fl_type != F_RDLCK) + deny->ld_type = NFS4_WRITE_LT; +} + +static struct nfs4_stateowner * +find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid) +{ + struct nfs4_stateowner *local = NULL; + int i; + + for (i = 0; i < LOCK_HASH_SIZE; i++) { + list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) { + if (!cmp_owner_str(local, owner, clid)) + continue; + return local; + } + } + return NULL; +} + +static struct nfs4_stateowner * +find_lockstateowner_str(struct inode *inode, clientid_t *clid, + struct xdr_netobj *owner) +{ + unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner); + struct nfs4_stateowner *op; + + list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) { + if (cmp_owner_str(op, owner, clid)) + return op; + } + return NULL; +} + +/* + * Alloc a lock owner structure. + * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has + * occured. + * + * strhashval = lock_ownerstr_hashval + * so_seqid = lock->lk_new_lock_seqid - 1: it gets bumped in encode + */ + +static struct nfs4_stateowner * +alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) { + struct nfs4_stateowner *sop; + struct nfs4_replay *rp; + unsigned int idhashval; + + if (!(sop = alloc_stateowner(&lock->lk_new_owner))) + return NULL; + idhashval = lockownerid_hashval(current_ownerid); + INIT_LIST_HEAD(&sop->so_idhash); + INIT_LIST_HEAD(&sop->so_strhash); + INIT_LIST_HEAD(&sop->so_perclient); + INIT_LIST_HEAD(&sop->so_perfilestate); + INIT_LIST_HEAD(&sop->so_perlockowner); + INIT_LIST_HEAD(&sop->so_close_lru); /* not used */ + sop->so_time = 0; + list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]); + list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]); + list_add(&sop->so_perlockowner, &open_stp->st_perlockowner); + sop->so_is_open_owner = 0; + sop->so_id = current_ownerid++; + sop->so_client = clp; + sop->so_seqid = lock->lk_new_lock_seqid - 1; + sop->so_confirmed = 1; + rp = &sop->so_replay; + rp->rp_status = NFSERR_SERVERFAULT; + rp->rp_buflen = 0; + rp->rp_buf = rp->rp_ibuf; + return sop; +} + +struct nfs4_stateid * +alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp) +{ + struct nfs4_stateid *stp; + unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); + + if ((stp = kmalloc(sizeof(struct nfs4_stateid), + GFP_KERNEL)) == NULL) + goto out; + INIT_LIST_HEAD(&stp->st_hash); + INIT_LIST_HEAD(&stp->st_perfile); + INIT_LIST_HEAD(&stp->st_perfilestate); + INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */ + list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]); + list_add(&stp->st_perfile, &fp->fi_perfile); + list_add_perfile++; + list_add(&stp->st_perfilestate, &sop->so_perfilestate); + stp->st_stateowner = sop; + stp->st_file = fp; + stp->st_stateid.si_boot = boot_time; + stp->st_stateid.si_stateownerid = sop->so_id; + stp->st_stateid.si_fileid = fp->fi_id; + stp->st_stateid.si_generation = 0; + stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */ + stp->st_access_bmap = open_stp->st_access_bmap; + stp->st_deny_bmap = open_stp->st_deny_bmap; + +out: + return stp; +} + +int +check_lock_length(u64 offset, u64 length) +{ + return ((length == 0) || ((length != ~(u64)0) && + LOFF_OVERFLOW(offset, length))); +} + +/* + * LOCK operation + */ +int +nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock) +{ + struct nfs4_stateowner *lock_sop = NULL, *open_sop = NULL; + struct nfs4_stateid *lock_stp; + struct file *filp; + struct file_lock file_lock; + struct file_lock *conflock; + int status = 0; + unsigned int strhashval; + + dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", + (long long) lock->lk_offset, + (long long) lock->lk_length); + + if (nfs4_in_grace() && !lock->lk_reclaim) + return nfserr_grace; + if (!nfs4_in_grace() && lock->lk_reclaim) + return nfserr_no_grace; + + if (check_lock_length(lock->lk_offset, lock->lk_length)) + return nfserr_inval; + + nfs4_lock_state(); + + if (lock->lk_is_new) { + /* + * Client indicates that this is a new lockowner. + * Use open owner and open stateid to create lock owner and lock + * stateid. + */ + struct nfs4_stateid *open_stp = NULL; + struct nfs4_file *fp; + + status = nfserr_stale_clientid; + if (STALE_CLIENTID(&lock->lk_new_clientid)) { + printk("NFSD: nfsd4_lock: clientid is stale!\n"); + goto out; + } + + /* is the new lock seqid presented by the client zero? */ + status = nfserr_bad_seqid; + if (lock->v.new.lock_seqid != 0) + goto out; + + /* validate and update open stateid and open seqid */ + status = nfs4_preprocess_seqid_op(current_fh, + lock->lk_new_open_seqid, + &lock->lk_new_open_stateid, + CHECK_FH | OPEN_STATE, + &open_sop, &open_stp, + &lock->v.new.clientid); + if (status) { + if (lock->lk_reclaim) + status = nfserr_reclaim_bad; + goto out; + } + /* create lockowner and lock stateid */ + fp = open_stp->st_file; + strhashval = lock_ownerstr_hashval(fp->fi_inode, + open_sop->so_client->cl_clientid.cl_id, + &lock->v.new.owner); + /* + * If we already have this lock owner, the client is in + * error (or our bookeeping is wrong!) + * for asking for a 'new lock'. + */ + status = nfserr_bad_stateid; + lock_sop = find_lockstateowner(&lock->v.new.owner, + &lock->v.new.clientid); + if (lock_sop) + goto out; + status = nfserr_resource; + if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock))) + goto out; + if ((lock_stp = alloc_init_lock_stateid(lock->lk_stateowner, + fp, open_stp)) == NULL) { + release_stateowner(lock->lk_stateowner); + lock->lk_stateowner = NULL; + goto out; + } + /* bump the open seqid used to create the lock */ + open_sop->so_seqid++; + } else { + /* lock (lock owner + lock stateid) already exists */ + status = nfs4_preprocess_seqid_op(current_fh, + lock->lk_old_lock_seqid, + &lock->lk_old_lock_stateid, + CHECK_FH | LOCK_STATE, + &lock->lk_stateowner, &lock_stp, NULL); + if (status) + goto out; + } + /* lock->lk_stateowner and lock_stp have been created or found */ + filp = lock_stp->st_vfs_file; + + if ((status = fh_verify(rqstp, current_fh, S_IFREG, MAY_LOCK))) { + printk("NFSD: nfsd4_lock: permission denied!\n"); + goto out; + } + + locks_init_lock(&file_lock); + switch (lock->lk_type) { + case NFS4_READ_LT: + case NFS4_READW_LT: + file_lock.fl_type = F_RDLCK; + break; + case NFS4_WRITE_LT: + case NFS4_WRITEW_LT: + file_lock.fl_type = F_WRLCK; + break; + default: + status = nfserr_inval; + goto out; + } + file_lock.fl_owner = (fl_owner_t) lock->lk_stateowner; + file_lock.fl_pid = current->tgid; + file_lock.fl_file = filp; + file_lock.fl_flags = FL_POSIX; + + file_lock.fl_start = lock->lk_offset; + if ((lock->lk_length == ~(u64)0) || + LOFF_OVERFLOW(lock->lk_offset, lock->lk_length)) + file_lock.fl_end = ~(u64)0; + else + file_lock.fl_end = lock->lk_offset + lock->lk_length - 1; + nfs4_transform_lock_offset(&file_lock); + + /* + * Try to lock the file in the VFS. + * Note: locks.c uses the BKL to protect the inode's lock list. + */ + + status = posix_lock_file(filp, &file_lock); + if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) + file_lock.fl_ops->fl_release_private(&file_lock); + dprintk("NFSD: nfsd4_lock: posix_lock_file status %d\n",status); + switch (-status) { + case 0: /* success! */ + update_stateid(&lock_stp->st_stateid); + memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid, + sizeof(stateid_t)); + goto out; + case (EAGAIN): + goto conflicting_lock; + case (EDEADLK): + status = nfserr_deadlock; + default: + dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status); + goto out_destroy_new_stateid; + } + +conflicting_lock: + dprintk("NFSD: nfsd4_lock: conflicting lock found!\n"); + status = nfserr_denied; + /* XXX There is a race here. Future patch needed to provide + * an atomic posix_lock_and_test_file + */ + if (!(conflock = posix_test_lock(filp, &file_lock))) { + status = nfserr_serverfault; + goto out; + } + nfs4_set_lock_denied(conflock, &lock->lk_denied); + +out_destroy_new_stateid: + if (lock->lk_is_new) { + dprintk("NFSD: nfsd4_lock: destroy new stateid!\n"); + /* + * An error encountered after instantiation of the new + * stateid has forced us to destroy it. + */ + if (!seqid_mutating_err(status)) + open_sop->so_seqid--; + + release_state_owner(lock_stp, LOCK_STATE); + } +out: + if (lock->lk_stateowner) + nfs4_get_stateowner(lock->lk_stateowner); + nfs4_unlock_state(); + return status; +} + +/* + * LOCKT operation + */ +int +nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lockt *lockt) +{ + struct inode *inode; + struct file file; + struct file_lock file_lock; + struct file_lock *conflicting_lock; + int status; + + if (nfs4_in_grace()) + return nfserr_grace; + + if (check_lock_length(lockt->lt_offset, lockt->lt_length)) + return nfserr_inval; + + lockt->lt_stateowner = NULL; + nfs4_lock_state(); + + status = nfserr_stale_clientid; + if (STALE_CLIENTID(&lockt->lt_clientid)) { + printk("NFSD: nfsd4_lockt: clientid is stale!\n"); + goto out; + } + + if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) { + printk("NFSD: nfsd4_lockt: fh_verify() failed!\n"); + if (status == nfserr_symlink) + status = nfserr_inval; + goto out; + } + + inode = current_fh->fh_dentry->d_inode; + locks_init_lock(&file_lock); + switch (lockt->lt_type) { + case NFS4_READ_LT: + case NFS4_READW_LT: + file_lock.fl_type = F_RDLCK; + break; + case NFS4_WRITE_LT: + case NFS4_WRITEW_LT: + file_lock.fl_type = F_WRLCK; + break; + default: + printk("NFSD: nfs4_lockt: bad lock type!\n"); + status = nfserr_inval; + goto out; + } + + lockt->lt_stateowner = find_lockstateowner_str(inode, + &lockt->lt_clientid, &lockt->lt_owner); + if (lockt->lt_stateowner) + file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner; + file_lock.fl_pid = current->tgid; + file_lock.fl_flags = FL_POSIX; + + file_lock.fl_start = lockt->lt_offset; + if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length)) + file_lock.fl_end = ~(u64)0; + else + file_lock.fl_end = lockt->lt_offset + lockt->lt_length - 1; + + nfs4_transform_lock_offset(&file_lock); + + /* posix_test_lock uses the struct file _only_ to resolve the inode. + * since LOCKT doesn't require an OPEN, and therefore a struct + * file may not exist, pass posix_test_lock a struct file with + * only the dentry:inode set. + */ + memset(&file, 0, sizeof (struct file)); + file.f_dentry = current_fh->fh_dentry; + + status = nfs_ok; + conflicting_lock = posix_test_lock(&file, &file_lock); + if (conflicting_lock) { + status = nfserr_denied; + nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied); + } +out: + nfs4_unlock_state(); + return status; +} + +int +nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku) +{ + struct nfs4_stateid *stp; + struct file *filp = NULL; + struct file_lock file_lock; + int status; + + dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n", + (long long) locku->lu_offset, + (long long) locku->lu_length); + + if (check_lock_length(locku->lu_offset, locku->lu_length)) + return nfserr_inval; + + nfs4_lock_state(); + + if ((status = nfs4_preprocess_seqid_op(current_fh, + locku->lu_seqid, + &locku->lu_stateid, + CHECK_FH | LOCK_STATE, + &locku->lu_stateowner, &stp, NULL))) + goto out; + + filp = stp->st_vfs_file; + BUG_ON(!filp); + locks_init_lock(&file_lock); + file_lock.fl_type = F_UNLCK; + file_lock.fl_owner = (fl_owner_t) locku->lu_stateowner; + file_lock.fl_pid = current->tgid; + file_lock.fl_file = filp; + file_lock.fl_flags = FL_POSIX; + file_lock.fl_start = locku->lu_offset; + + if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length)) + file_lock.fl_end = ~(u64)0; + else + file_lock.fl_end = locku->lu_offset + locku->lu_length - 1; + nfs4_transform_lock_offset(&file_lock); + + /* + * Try to unlock the file in the VFS. + */ + status = posix_lock_file(filp, &file_lock); + if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) + file_lock.fl_ops->fl_release_private(&file_lock); + if (status) { + printk("NFSD: nfs4_locku: posix_lock_file failed!\n"); + goto out_nfserr; + } + /* + * OK, unlock succeeded; the only thing left to do is update the stateid. + */ + update_stateid(&stp->st_stateid); + memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t)); + +out: + if (locku->lu_stateowner) + nfs4_get_stateowner(locku->lu_stateowner); + nfs4_unlock_state(); + return status; + +out_nfserr: + status = nfserrno(status); + goto out; +} + +/* + * returns + * 1: locks held by lockowner + * 0: no locks held by lockowner + */ +static int +check_for_locks(struct file *filp, struct nfs4_stateowner *lowner) +{ + struct file_lock **flpp; + struct inode *inode = filp->f_dentry->d_inode; + int status = 0; + + lock_kernel(); + for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { + if ((*flpp)->fl_owner == (fl_owner_t)lowner) + status = 1; + goto out; + } +out: + unlock_kernel(); + return status; +} + +int +nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner) +{ + clientid_t *clid = &rlockowner->rl_clientid; + struct nfs4_stateowner *local = NULL; + struct xdr_netobj *owner = &rlockowner->rl_owner; + int status; + + dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", + clid->cl_boot, clid->cl_id); + + /* XXX check for lease expiration */ + + status = nfserr_stale_clientid; + if (STALE_CLIENTID(clid)) { + printk("NFSD: nfsd4_release_lockowner: clientid is stale!\n"); + return status; + } + + nfs4_lock_state(); + + status = nfs_ok; + local = find_lockstateowner(owner, clid); + if (local) { + struct nfs4_stateid *stp; + + /* check for any locks held by any stateid + * associated with the (lock) stateowner */ + status = nfserr_locks_held; + list_for_each_entry(stp, &local->so_perfilestate, + st_perfilestate) { + if (check_for_locks(stp->st_vfs_file, local)) + goto out; + } + /* no locks held by (lock) stateowner */ + status = nfs_ok; + release_stateowner(local); + } +out: + nfs4_unlock_state(); + return status; +} + +static inline struct nfs4_client_reclaim * +alloc_reclaim(int namelen) +{ + struct nfs4_client_reclaim *crp = NULL; + + crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); + if (!crp) + return NULL; + crp->cr_name.data = kmalloc(namelen, GFP_KERNEL); + if (!crp->cr_name.data) { + kfree(crp); + return NULL; + } + return crp; +} + +/* + * failure => all reset bets are off, nfserr_no_grace... + */ +static int +nfs4_client_to_reclaim(char *name, int namlen) +{ + unsigned int strhashval; + struct nfs4_client_reclaim *crp = NULL; + + dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name); + crp = alloc_reclaim(namlen); + if (!crp) + return 0; + strhashval = clientstr_hashval(name, namlen); + INIT_LIST_HEAD(&crp->cr_strhash); + list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]); + memcpy(crp->cr_name.data, name, namlen); + crp->cr_name.len = namlen; + reclaim_str_hashtbl_size++; + return 1; +} + +static void +nfs4_release_reclaim(void) +{ + struct nfs4_client_reclaim *crp = NULL; + int i; + + BUG_ON(!nfs4_reclaim_init); + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&reclaim_str_hashtbl[i])) { + crp = list_entry(reclaim_str_hashtbl[i].next, + struct nfs4_client_reclaim, cr_strhash); + list_del(&crp->cr_strhash); + kfree(crp->cr_name.data); + kfree(crp); + reclaim_str_hashtbl_size--; + } + } + BUG_ON(reclaim_str_hashtbl_size); +} + +/* + * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ +struct nfs4_client_reclaim * +nfs4_find_reclaim_client(clientid_t *clid) +{ + unsigned int strhashval; + struct nfs4_client *clp; + struct nfs4_client_reclaim *crp = NULL; + + + /* find clientid in conf_id_hashtbl */ + clp = find_confirmed_client(clid); + if (clp == NULL) + return NULL; + + dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n", + clp->cl_name.len, clp->cl_name.data); + + /* find clp->cl_name in reclaim_str_hashtbl */ + strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len); + list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) { + if (cmp_name(&crp->cr_name, &clp->cl_name)) { + return crp; + } + } + return NULL; +} + +/* +* Called from OPEN. Look for clientid in reclaim list. +*/ +int +nfs4_check_open_reclaim(clientid_t *clid) +{ + struct nfs4_client_reclaim *crp; + + if ((crp = nfs4_find_reclaim_client(clid)) == NULL) + return nfserr_reclaim_bad; + return nfs_ok; +} + + +/* + * Start and stop routines + */ + +static void +__nfs4_state_init(void) +{ + int i; + time_t grace_time; + + if (!nfs4_reclaim_init) { + for (i = 0; i < CLIENT_HASH_SIZE; i++) + INIT_LIST_HEAD(&reclaim_str_hashtbl[i]); + reclaim_str_hashtbl_size = 0; + nfs4_reclaim_init = 1; + } + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + INIT_LIST_HEAD(&conf_id_hashtbl[i]); + INIT_LIST_HEAD(&conf_str_hashtbl[i]); + INIT_LIST_HEAD(&unconf_str_hashtbl[i]); + INIT_LIST_HEAD(&unconf_id_hashtbl[i]); + } + for (i = 0; i < FILE_HASH_SIZE; i++) { + INIT_LIST_HEAD(&file_hashtbl[i]); + } + for (i = 0; i < OWNER_HASH_SIZE; i++) { + INIT_LIST_HEAD(&ownerstr_hashtbl[i]); + INIT_LIST_HEAD(&ownerid_hashtbl[i]); + } + for (i = 0; i < STATEID_HASH_SIZE; i++) { + INIT_LIST_HEAD(&stateid_hashtbl[i]); + INIT_LIST_HEAD(&lockstateid_hashtbl[i]); + } + for (i = 0; i < LOCK_HASH_SIZE; i++) { + INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]); + INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); + } + memset(&zerostateid, 0, sizeof(stateid_t)); + memset(&onestateid, ~0, sizeof(stateid_t)); + + INIT_LIST_HEAD(&close_lru); + INIT_LIST_HEAD(&client_lru); + INIT_LIST_HEAD(&del_recall_lru); + spin_lock_init(&recall_lock); + boot_time = get_seconds(); + grace_time = max(old_lease_time, lease_time); + if (reclaim_str_hashtbl_size == 0) + grace_time = 0; + if (grace_time) + printk("NFSD: starting %ld-second grace period\n", grace_time); + grace_end = boot_time + grace_time; + INIT_WORK(&laundromat_work,laundromat_main, NULL); + schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ); +} + +int +nfs4_state_init(void) +{ + int status; + + if (nfs4_init) + return 0; + status = nfsd4_init_slabs(); + if (status) + return status; + __nfs4_state_init(); + nfs4_init = 1; + return 0; +} + +int +nfs4_in_grace(void) +{ + return get_seconds() < grace_end; +} + +void +set_no_grace(void) +{ + printk("NFSD: ERROR in reboot recovery. State reclaims will fail.\n"); + grace_end = get_seconds(); +} + +time_t +nfs4_lease_time(void) +{ + return lease_time; +} + +static void +__nfs4_state_shutdown(void) +{ + int i; + struct nfs4_client *clp = NULL; + struct nfs4_delegation *dp = NULL; + struct nfs4_stateowner *sop = NULL; + struct list_head *pos, *next, reaplist; + + list_for_each_safe(pos, next, &close_lru) { + sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); + list_del(&sop->so_close_lru); + nfs4_put_stateowner(sop); + } + + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + while (!list_empty(&conf_id_hashtbl[i])) { + clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); + expire_client(clp); + } + while (!list_empty(&unconf_str_hashtbl[i])) { + clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash); + expire_client(clp); + } + } + INIT_LIST_HEAD(&reaplist); + spin_lock(&recall_lock); + list_for_each_safe(pos, next, &del_recall_lru) { + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + list_move(&dp->dl_recall_lru, &reaplist); + } + spin_unlock(&recall_lock); + list_for_each_safe(pos, next, &reaplist) { + dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); + list_del_init(&dp->dl_recall_lru); + unhash_delegation(dp); + } + + release_all_files(); + cancel_delayed_work(&laundromat_work); + flush_scheduled_work(); + nfs4_init = 0; + dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n", + list_add_perfile, list_del_perfile); + dprintk("NFSD: add_perclient %d del_perclient %d\n", + add_perclient, del_perclient); + dprintk("NFSD: alloc_file %d free_file %d\n", + alloc_file, free_file); + dprintk("NFSD: vfsopen %d vfsclose %d\n", + vfsopen, vfsclose); + dprintk("NFSD: alloc_delegation %d free_delegation %d\n", + alloc_delegation, free_delegation); + +} + +void +nfs4_state_shutdown(void) +{ + nfs4_lock_state(); + nfs4_release_reclaim(); + __nfs4_state_shutdown(); + nfsd4_free_slabs(); + nfs4_unlock_state(); +} + +/* + * Called when leasetime is changed. + * + * if nfsd is not started, simply set the global lease. + * + * if nfsd(s) are running, lease change requires nfsv4 state to be reset. + * e.g: boot_time is reset, existing nfs4_client structs are + * used to fill reclaim_str_hashtbl, then all state (except for the + * reclaim_str_hashtbl) is re-initialized. + * + * if the old lease time is greater than the new lease time, the grace + * period needs to be set to the old lease time to allow clients to reclaim + * their state. XXX - we may want to set the grace period == lease time + * after an initial grace period == old lease time + * + * if an error occurs in this process, the new lease is set, but the server + * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace + * which means OPEN/LOCK/READ/WRITE will fail during grace period. + * + * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and + * OPEN and LOCK reclaims. + */ +void +nfs4_reset_lease(time_t leasetime) +{ + struct nfs4_client *clp; + int i; + + printk("NFSD: New leasetime %ld\n",leasetime); + if (!nfs4_init) + return; + nfs4_lock_state(); + old_lease_time = lease_time; + lease_time = leasetime; + + nfs4_release_reclaim(); + + /* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */ + for (i = 0; i < CLIENT_HASH_SIZE; i++) { + list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) { + if (!nfs4_client_to_reclaim(clp->cl_name.data, + clp->cl_name.len)) { + nfs4_release_reclaim(); + goto init_state; + } + } + } +init_state: + __nfs4_state_shutdown(); + __nfs4_state_init(); + nfs4_unlock_state(); +} + diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c new file mode 100644 index 00000000000..36a058a112d --- /dev/null +++ b/fs/nfsd/nfs4xdr.c @@ -0,0 +1,2536 @@ +/* + * fs/nfs/nfs4xdr.c + * + * Server-side XDR for NFSv4 + * + * Copyright (c) 2002 The Regents of the University of Michigan. + * All rights reserved. + * + * Kendrick Smith <kmsmith@umich.edu> + * Andy Adamson <andros@umich.edu> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * TODO: Neil Brown made the following observation: We currently + * initially reserve NFSD_BUFSIZE space on the transmit queue and + * never release any of that until the request is complete. + * It would be good to calculate a new maximum response size while + * decoding the COMPOUND, and call svc_reserve with this number + * at the end of nfs4svc_decode_compoundargs. + */ + +#include <linux/param.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/vfs.h> +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/clnt.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/state.h> +#include <linux/nfsd/xdr4.h> +#include <linux/nfsd_idmap.h> +#include <linux/nfs4.h> +#include <linux/nfs4_acl.h> + +#define NFSDDBG_FACILITY NFSDDBG_XDR + +static int +check_filename(char *str, int len, int err) +{ + int i; + + if (len == 0) + return nfserr_inval; + if (isdotent(str, len)) + return err; + for (i = 0; i < len; i++) + if (str[i] == '/') + return err; + return 0; +} + +/* + * START OF "GENERIC" DECODE ROUTINES. + * These may look a little ugly since they are imported from a "generic" + * set of XDR encode/decode routines which are intended to be shared by + * all of our NFSv4 implementations (OpenBSD, MacOS X...). + * + * If the pain of reading these is too great, it should be a straightforward + * task to translate them into Linux-specific versions which are more + * consistent with the style used in NFSv2/v3... + */ +#define DECODE_HEAD \ + u32 *p; \ + int status +#define DECODE_TAIL \ + status = 0; \ +out: \ + return status; \ +xdr_error: \ + printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ + status = nfserr_bad_xdr; \ + goto out + +#define READ32(x) (x) = ntohl(*p++) +#define READ64(x) do { \ + (x) = (u64)ntohl(*p++) << 32; \ + (x) |= ntohl(*p++); \ +} while (0) +#define READTIME(x) do { \ + p++; \ + (x) = ntohl(*p++); \ + p++; \ +} while (0) +#define READMEM(x,nbytes) do { \ + x = (char *)p; \ + p += XDR_QUADLEN(nbytes); \ +} while (0) +#define SAVEMEM(x,nbytes) do { \ + if (!(x = (p==argp->tmp || p == argp->tmpp) ? \ + savemem(argp, p, nbytes) : \ + (char *)p)) { \ + printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ + goto xdr_error; \ + } \ + p += XDR_QUADLEN(nbytes); \ +} while (0) +#define COPYMEM(x,nbytes) do { \ + memcpy((x), p, nbytes); \ + p += XDR_QUADLEN(nbytes); \ +} while (0) + +/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */ +#define READ_BUF(nbytes) do { \ + if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) { \ + p = argp->p; \ + argp->p += XDR_QUADLEN(nbytes); \ + } else if (!(p = read_buf(argp, nbytes))) { \ + printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ + goto xdr_error; \ + } \ +} while (0) + +u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes) +{ + /* We want more bytes than seem to be available. + * Maybe we need a new page, maybe we have just run out + */ + int avail = (char*)argp->end - (char*)argp->p; + u32 *p; + if (avail + argp->pagelen < nbytes) + return NULL; + if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */ + return NULL; + /* ok, we can do it with the current plus the next page */ + if (nbytes <= sizeof(argp->tmp)) + p = argp->tmp; + else { + if (argp->tmpp) + kfree(argp->tmpp); + p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL); + if (!p) + return NULL; + + } + memcpy(p, argp->p, avail); + /* step to next page */ + argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen < PAGE_SIZE) { + argp->end = p + (argp->pagelen>>2); + argp->pagelen = 0; + } else { + argp->end = p + (PAGE_SIZE>>2); + argp->pagelen -= PAGE_SIZE; + } + memcpy(((char*)p)+avail, argp->p, (nbytes - avail)); + argp->p += XDR_QUADLEN(nbytes - avail); + return p; +} + +static int +defer_free(struct nfsd4_compoundargs *argp, + void (*release)(const void *), void *p) +{ + struct tmpbuf *tb; + + tb = kmalloc(sizeof(*tb), GFP_KERNEL); + if (!tb) + return -ENOMEM; + tb->buf = p; + tb->release = release; + tb->next = argp->to_free; + argp->to_free = tb; + return 0; +} + +char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) +{ + void *new = NULL; + if (p == argp->tmp) { + new = kmalloc(nbytes, GFP_KERNEL); + if (!new) return NULL; + p = new; + memcpy(p, argp->tmp, nbytes); + } else { + if (p != argp->tmpp) + BUG(); + argp->tmpp = NULL; + } + if (defer_free(argp, kfree, p)) { + kfree(new); + return NULL; + } else + return (char *)p; +} + + +static int +nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval) +{ + u32 bmlen; + DECODE_HEAD; + + bmval[0] = 0; + bmval[1] = 0; + + READ_BUF(4); + READ32(bmlen); + if (bmlen > 1000) + goto xdr_error; + + READ_BUF(bmlen << 2); + if (bmlen > 0) + READ32(bmval[0]); + if (bmlen > 1) + READ32(bmval[1]); + + DECODE_TAIL; +} + +static int +nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, + struct nfs4_acl **acl) +{ + int expected_len, len = 0; + u32 dummy32; + char *buf; + + DECODE_HEAD; + iattr->ia_valid = 0; + if ((status = nfsd4_decode_bitmap(argp, bmval))) + return status; + + /* + * According to spec, unsupported attributes return ERR_NOTSUPP; + * read-only attributes return ERR_INVAL. + */ + if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) + return nfserr_attrnotsupp; + if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1)) + return nfserr_inval; + + READ_BUF(4); + READ32(expected_len); + + if (bmval[0] & FATTR4_WORD0_SIZE) { + READ_BUF(8); + len += 8; + READ64(iattr->ia_size); + iattr->ia_valid |= ATTR_SIZE; + } + if (bmval[0] & FATTR4_WORD0_ACL) { + int nace, i; + struct nfs4_ace ace; + + READ_BUF(4); len += 4; + READ32(nace); + + *acl = nfs4_acl_new(); + if (*acl == NULL) { + status = -ENOMEM; + goto out_nfserr; + } + defer_free(argp, (void (*)(const void *))nfs4_acl_free, *acl); + + for (i = 0; i < nace; i++) { + READ_BUF(16); len += 16; + READ32(ace.type); + READ32(ace.flag); + READ32(ace.access_mask); + READ32(dummy32); + READ_BUF(dummy32); + len += XDR_QUADLEN(dummy32) << 2; + READMEM(buf, dummy32); + ace.whotype = nfs4_acl_get_whotype(buf, dummy32); + status = 0; + if (ace.whotype != NFS4_ACL_WHO_NAMED) + ace.who = 0; + else if (ace.flag & NFS4_ACE_IDENTIFIER_GROUP) + status = nfsd_map_name_to_gid(argp->rqstp, + buf, dummy32, &ace.who); + else + status = nfsd_map_name_to_uid(argp->rqstp, + buf, dummy32, &ace.who); + if (status) + goto out_nfserr; + if (nfs4_acl_add_ace(*acl, ace.type, ace.flag, + ace.access_mask, ace.whotype, ace.who) != 0) { + status = -ENOMEM; + goto out_nfserr; + } + } + } else + *acl = NULL; + if (bmval[1] & FATTR4_WORD1_MODE) { + READ_BUF(4); + len += 4; + READ32(iattr->ia_mode); + iattr->ia_mode &= (S_IFMT | S_IALLUGO); + iattr->ia_valid |= ATTR_MODE; + } + if (bmval[1] & FATTR4_WORD1_OWNER) { + READ_BUF(4); + len += 4; + READ32(dummy32); + READ_BUF(dummy32); + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) + goto out_nfserr; + iattr->ia_valid |= ATTR_UID; + } + if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) { + READ_BUF(4); + len += 4; + READ32(dummy32); + READ_BUF(dummy32); + len += (XDR_QUADLEN(dummy32) << 2); + READMEM(buf, dummy32); + if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) + goto out_nfserr; + iattr->ia_valid |= ATTR_GID; + } + if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) { + READ_BUF(4); + len += 4; + READ32(dummy32); + switch (dummy32) { + case NFS4_SET_TO_CLIENT_TIME: + /* We require the high 32 bits of 'seconds' to be 0, and we ignore + all 32 bits of 'nseconds'. */ + READ_BUF(12); + len += 12; + READ32(dummy32); + if (dummy32) + return nfserr_inval; + READ32(iattr->ia_atime.tv_sec); + READ32(iattr->ia_atime.tv_nsec); + if (iattr->ia_atime.tv_nsec >= (u32)1000000000) + return nfserr_inval; + iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET); + break; + case NFS4_SET_TO_SERVER_TIME: + iattr->ia_valid |= ATTR_ATIME; + break; + default: + goto xdr_error; + } + } + if (bmval[1] & FATTR4_WORD1_TIME_METADATA) { + /* We require the high 32 bits of 'seconds' to be 0, and we ignore + all 32 bits of 'nseconds'. */ + READ_BUF(12); + len += 12; + READ32(dummy32); + if (dummy32) + return nfserr_inval; + READ32(iattr->ia_ctime.tv_sec); + READ32(iattr->ia_ctime.tv_nsec); + if (iattr->ia_ctime.tv_nsec >= (u32)1000000000) + return nfserr_inval; + iattr->ia_valid |= ATTR_CTIME; + } + if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { + READ_BUF(4); + len += 4; + READ32(dummy32); + switch (dummy32) { + case NFS4_SET_TO_CLIENT_TIME: + /* We require the high 32 bits of 'seconds' to be 0, and we ignore + all 32 bits of 'nseconds'. */ + READ_BUF(12); + len += 12; + READ32(dummy32); + if (dummy32) + return nfserr_inval; + READ32(iattr->ia_mtime.tv_sec); + READ32(iattr->ia_mtime.tv_nsec); + if (iattr->ia_mtime.tv_nsec >= (u32)1000000000) + return nfserr_inval; + iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET); + break; + case NFS4_SET_TO_SERVER_TIME: + iattr->ia_valid |= ATTR_MTIME; + break; + default: + goto xdr_error; + } + } + if (len != expected_len) + goto xdr_error; + + DECODE_TAIL; + +out_nfserr: + status = nfserrno(status); + goto out; +} + +static int +nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(access->ac_req_access); + + DECODE_TAIL; +} + +static int +nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) +{ + DECODE_HEAD; + + close->cl_stateowner = NULL; + READ_BUF(4 + sizeof(stateid_t)); + READ32(close->cl_seqid); + READ32(close->cl_stateid.si_generation); + COPYMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; +} + + +static int +nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit) +{ + DECODE_HEAD; + + READ_BUF(12); + READ64(commit->co_offset); + READ32(commit->co_count); + + DECODE_TAIL; +} + +static int +nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(create->cr_type); + switch (create->cr_type) { + case NF4LNK: + READ_BUF(4); + READ32(create->cr_linklen); + READ_BUF(create->cr_linklen); + SAVEMEM(create->cr_linkname, create->cr_linklen); + break; + case NF4BLK: + case NF4CHR: + READ_BUF(8); + READ32(create->cr_specdata1); + READ32(create->cr_specdata2); + break; + case NF4SOCK: + case NF4FIFO: + case NF4DIR: + default: + break; + } + + READ_BUF(4); + READ32(create->cr_namelen); + READ_BUF(create->cr_namelen); + SAVEMEM(create->cr_name, create->cr_namelen); + if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) + return status; + + if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl))) + goto out; + + DECODE_TAIL; +} + +static inline int +nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) +{ + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t)); + READ32(dr->dr_stateid.si_generation); + COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; +} + +static inline int +nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr) +{ + return nfsd4_decode_bitmap(argp, getattr->ga_bmval); +} + +static int +nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(link->li_namelen); + READ_BUF(link->li_namelen); + SAVEMEM(link->li_name, link->li_namelen); + if ((status = check_filename(link->li_name, link->li_namelen, nfserr_inval))) + return status; + + DECODE_TAIL; +} + +static int +nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) +{ + DECODE_HEAD; + + lock->lk_stateowner = NULL; + /* + * type, reclaim(boolean), offset, length, new_lock_owner(boolean) + */ + READ_BUF(28); + READ32(lock->lk_type); + if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT)) + goto xdr_error; + READ32(lock->lk_reclaim); + READ64(lock->lk_offset); + READ64(lock->lk_length); + READ32(lock->lk_is_new); + + if (lock->lk_is_new) { + READ_BUF(36); + READ32(lock->lk_new_open_seqid); + READ32(lock->lk_new_open_stateid.si_generation); + + COPYMEM(&lock->lk_new_open_stateid.si_opaque, sizeof(stateid_opaque_t)); + READ32(lock->lk_new_lock_seqid); + COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); + READ32(lock->lk_new_owner.len); + READ_BUF(lock->lk_new_owner.len); + READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); + } else { + READ_BUF(20); + READ32(lock->lk_old_lock_stateid.si_generation); + COPYMEM(&lock->lk_old_lock_stateid.si_opaque, sizeof(stateid_opaque_t)); + READ32(lock->lk_old_lock_seqid); + } + + DECODE_TAIL; +} + +static int +nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) +{ + DECODE_HEAD; + + READ_BUF(32); + READ32(lockt->lt_type); + if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT)) + goto xdr_error; + READ64(lockt->lt_offset); + READ64(lockt->lt_length); + COPYMEM(&lockt->lt_clientid, 8); + READ32(lockt->lt_owner.len); + READ_BUF(lockt->lt_owner.len); + READMEM(lockt->lt_owner.data, lockt->lt_owner.len); + + DECODE_TAIL; +} + +static int +nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) +{ + DECODE_HEAD; + + locku->lu_stateowner = NULL; + READ_BUF(24 + sizeof(stateid_t)); + READ32(locku->lu_type); + if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) + goto xdr_error; + READ32(locku->lu_seqid); + READ32(locku->lu_stateid.si_generation); + COPYMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); + READ64(locku->lu_offset); + READ64(locku->lu_length); + + DECODE_TAIL; +} + +static int +nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(lookup->lo_len); + READ_BUF(lookup->lo_len); + SAVEMEM(lookup->lo_name, lookup->lo_len); + if ((status = check_filename(lookup->lo_name, lookup->lo_len, nfserr_noent))) + return status; + + DECODE_TAIL; +} + +static int +nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) +{ + DECODE_HEAD; + + memset(open->op_bmval, 0, sizeof(open->op_bmval)); + open->op_iattr.ia_valid = 0; + open->op_stateowner = NULL; + + /* seqid, share_access, share_deny, clientid, ownerlen */ + READ_BUF(16 + sizeof(clientid_t)); + READ32(open->op_seqid); + READ32(open->op_share_access); + READ32(open->op_share_deny); + COPYMEM(&open->op_clientid, sizeof(clientid_t)); + READ32(open->op_owner.len); + + /* owner, open_flag */ + READ_BUF(open->op_owner.len + 4); + SAVEMEM(open->op_owner.data, open->op_owner.len); + READ32(open->op_create); + switch (open->op_create) { + case NFS4_OPEN_NOCREATE: + break; + case NFS4_OPEN_CREATE: + READ_BUF(4); + READ32(open->op_createmode); + switch (open->op_createmode) { + case NFS4_CREATE_UNCHECKED: + case NFS4_CREATE_GUARDED: + if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl))) + goto out; + break; + case NFS4_CREATE_EXCLUSIVE: + READ_BUF(8); + COPYMEM(open->op_verf.data, 8); + break; + default: + goto xdr_error; + } + break; + default: + goto xdr_error; + } + + /* open_claim */ + READ_BUF(4); + READ32(open->op_claim_type); + switch (open->op_claim_type) { + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_DELEGATE_PREV: + READ_BUF(4); + READ32(open->op_fname.len); + READ_BUF(open->op_fname.len); + SAVEMEM(open->op_fname.data, open->op_fname.len); + if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + return status; + break; + case NFS4_OPEN_CLAIM_PREVIOUS: + READ_BUF(4); + READ32(open->op_delegate_type); + break; + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + READ_BUF(sizeof(stateid_t) + 4); + COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t)); + READ32(open->op_fname.len); + READ_BUF(open->op_fname.len); + SAVEMEM(open->op_fname.data, open->op_fname.len); + if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval))) + return status; + break; + default: + goto xdr_error; + } + + DECODE_TAIL; +} + +static int +nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf) +{ + DECODE_HEAD; + + open_conf->oc_stateowner = NULL; + READ_BUF(4 + sizeof(stateid_t)); + READ32(open_conf->oc_req_stateid.si_generation); + COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t)); + READ32(open_conf->oc_seqid); + + DECODE_TAIL; +} + +static int +nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down) +{ + DECODE_HEAD; + + open_down->od_stateowner = NULL; + READ_BUF(12 + sizeof(stateid_t)); + READ32(open_down->od_stateid.si_generation); + COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t)); + READ32(open_down->od_seqid); + READ32(open_down->od_share_access); + READ32(open_down->od_share_deny); + + DECODE_TAIL; +} + +static int +nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(putfh->pf_fhlen); + if (putfh->pf_fhlen > NFS4_FHSIZE) + goto xdr_error; + READ_BUF(putfh->pf_fhlen); + SAVEMEM(putfh->pf_fhval, putfh->pf_fhlen); + + DECODE_TAIL; +} + +static int +nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) +{ + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t) + 12); + READ32(read->rd_stateid.si_generation); + COPYMEM(&read->rd_stateid.si_opaque, sizeof(stateid_opaque_t)); + READ64(read->rd_offset); + READ32(read->rd_length); + + DECODE_TAIL; +} + +static int +nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir) +{ + DECODE_HEAD; + + READ_BUF(24); + READ64(readdir->rd_cookie); + COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data)); + READ32(readdir->rd_dircount); /* just in case you needed a useless field... */ + READ32(readdir->rd_maxcount); + if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval))) + goto out; + + DECODE_TAIL; +} + +static int +nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(remove->rm_namelen); + READ_BUF(remove->rm_namelen); + SAVEMEM(remove->rm_name, remove->rm_namelen); + if ((status = check_filename(remove->rm_name, remove->rm_namelen, nfserr_noent))) + return status; + + DECODE_TAIL; +} + +static int +nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename) +{ + DECODE_HEAD; + + READ_BUF(4); + READ32(rename->rn_snamelen); + READ_BUF(rename->rn_snamelen + 4); + SAVEMEM(rename->rn_sname, rename->rn_snamelen); + READ32(rename->rn_tnamelen); + READ_BUF(rename->rn_tnamelen); + SAVEMEM(rename->rn_tname, rename->rn_tnamelen); + if ((status = check_filename(rename->rn_sname, rename->rn_snamelen, nfserr_noent))) + return status; + if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen, nfserr_inval))) + return status; + + DECODE_TAIL; +} + +static int +nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid) +{ + DECODE_HEAD; + + READ_BUF(sizeof(clientid_t)); + COPYMEM(clientid, sizeof(clientid_t)); + + DECODE_TAIL; +} + +static int +nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) +{ + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t)); + READ32(setattr->sa_stateid.si_generation); + COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t)); + if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl))) + goto out; + + DECODE_TAIL; +} + +static int +nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid) +{ + DECODE_HEAD; + + READ_BUF(12); + COPYMEM(setclientid->se_verf.data, 8); + READ32(setclientid->se_namelen); + + READ_BUF(setclientid->se_namelen + 8); + SAVEMEM(setclientid->se_name, setclientid->se_namelen); + READ32(setclientid->se_callback_prog); + READ32(setclientid->se_callback_netid_len); + + READ_BUF(setclientid->se_callback_netid_len + 4); + SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len); + READ32(setclientid->se_callback_addr_len); + + READ_BUF(setclientid->se_callback_addr_len + 4); + SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len); + READ32(setclientid->se_callback_ident); + + DECODE_TAIL; +} + +static int +nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c) +{ + DECODE_HEAD; + + READ_BUF(8 + sizeof(nfs4_verifier)); + COPYMEM(&scd_c->sc_clientid, 8); + COPYMEM(&scd_c->sc_confirm, sizeof(nfs4_verifier)); + + DECODE_TAIL; +} + +/* Also used for NVERIFY */ +static int +nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify) +{ +#if 0 + struct nfsd4_compoundargs save = { + .p = argp->p, + .end = argp->end, + .rqstp = argp->rqstp, + }; + u32 ve_bmval[2]; + struct iattr ve_iattr; /* request */ + struct nfs4_acl *ve_acl; /* request */ +#endif + DECODE_HEAD; + + if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval))) + goto out; + + /* For convenience's sake, we compare raw xdr'd attributes in + * nfsd4_proc_verify; however we still decode here just to return + * correct error in case of bad xdr. */ +#if 0 + status = nfsd4_decode_fattr(ve_bmval, &ve_iattr, &ve_acl); + if (status == nfserr_inval) { + status = nfserrno(status); + goto out; + } +#endif + READ_BUF(4); + READ32(verify->ve_attrlen); + READ_BUF(verify->ve_attrlen); + SAVEMEM(verify->ve_attrval, verify->ve_attrlen); + + DECODE_TAIL; +} + +static int +nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) +{ + int avail; + int v; + int len; + DECODE_HEAD; + + READ_BUF(sizeof(stateid_opaque_t) + 20); + READ32(write->wr_stateid.si_generation); + COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t)); + READ64(write->wr_offset); + READ32(write->wr_stable_how); + if (write->wr_stable_how > 2) + goto xdr_error; + READ32(write->wr_buflen); + + /* Sorry .. no magic macros for this.. * + * READ_BUF(write->wr_buflen); + * SAVEMEM(write->wr_buf, write->wr_buflen); + */ + avail = (char*)argp->end - (char*)argp->p; + if (avail + argp->pagelen < write->wr_buflen) { + printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); + goto xdr_error; + } + write->wr_vec[0].iov_base = p; + write->wr_vec[0].iov_len = avail; + v = 0; + len = write->wr_buflen; + while (len > write->wr_vec[v].iov_len) { + len -= write->wr_vec[v].iov_len; + v++; + write->wr_vec[v].iov_base = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen >= PAGE_SIZE) { + write->wr_vec[v].iov_len = PAGE_SIZE; + argp->pagelen -= PAGE_SIZE; + } else { + write->wr_vec[v].iov_len = argp->pagelen; + argp->pagelen -= len; + } + } + argp->end = (u32*) (write->wr_vec[v].iov_base + write->wr_vec[v].iov_len); + argp->p = (u32*) (write->wr_vec[v].iov_base + (XDR_QUADLEN(len) << 2)); + write->wr_vec[v].iov_len = len; + write->wr_vlen = v+1; + + DECODE_TAIL; +} + +static int +nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) +{ + DECODE_HEAD; + + READ_BUF(12); + COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); + READ32(rlockowner->rl_owner.len); + READ_BUF(rlockowner->rl_owner.len); + READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); + + DECODE_TAIL; +} + +static int +nfsd4_decode_compound(struct nfsd4_compoundargs *argp) +{ + DECODE_HEAD; + struct nfsd4_op *op; + int i; + + /* + * XXX: According to spec, we should check the tag + * for UTF-8 compliance. I'm postponing this for + * now because it seems that some clients do use + * binary tags. + */ + READ_BUF(4); + READ32(argp->taglen); + READ_BUF(argp->taglen + 8); + SAVEMEM(argp->tag, argp->taglen); + READ32(argp->minorversion); + READ32(argp->opcnt); + + if (argp->taglen > NFSD4_MAX_TAGLEN) + goto xdr_error; + if (argp->opcnt > 100) + goto xdr_error; + + if (argp->opcnt > sizeof(argp->iops)/sizeof(argp->iops[0])) { + argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); + if (!argp->ops) { + argp->ops = argp->iops; + printk(KERN_INFO "nfsd: couldn't allocate room for COMPOUND\n"); + goto xdr_error; + } + } + + for (i = 0; i < argp->opcnt; i++) { + op = &argp->ops[i]; + op->replay = NULL; + + /* + * We can't use READ_BUF() here because we need to handle + * a missing opcode as an OP_WRITE + 1. So we need to check + * to see if we're truly at the end of our buffer or if there + * is another page we need to flip to. + */ + + if (argp->p == argp->end) { + if (argp->pagelen < 4) { + /* There isn't an opcode still on the wire */ + op->opnum = OP_WRITE + 1; + op->status = nfserr_bad_xdr; + argp->opcnt = i+1; + break; + } + + /* + * False alarm. We just hit a page boundary, but there + * is still data available. Move pointer across page + * boundary. *snip from READ_BUF* + */ + argp->p = page_address(argp->pagelist[0]); + argp->pagelist++; + if (argp->pagelen < PAGE_SIZE) { + argp->end = p + (argp->pagelen>>2); + argp->pagelen = 0; + } else { + argp->end = p + (PAGE_SIZE>>2); + argp->pagelen -= PAGE_SIZE; + } + } + op->opnum = ntohl(*argp->p++); + + switch (op->opnum) { + case 2: /* Reserved operation */ + op->opnum = OP_ILLEGAL; + if (argp->minorversion == 0) + op->status = nfserr_op_illegal; + else + op->status = nfserr_minor_vers_mismatch; + break; + case OP_ACCESS: + op->status = nfsd4_decode_access(argp, &op->u.access); + break; + case OP_CLOSE: + op->status = nfsd4_decode_close(argp, &op->u.close); + break; + case OP_COMMIT: + op->status = nfsd4_decode_commit(argp, &op->u.commit); + break; + case OP_CREATE: + op->status = nfsd4_decode_create(argp, &op->u.create); + break; + case OP_DELEGRETURN: + op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn); + break; + case OP_GETATTR: + op->status = nfsd4_decode_getattr(argp, &op->u.getattr); + break; + case OP_GETFH: + op->status = nfs_ok; + break; + case OP_LINK: + op->status = nfsd4_decode_link(argp, &op->u.link); + break; + case OP_LOCK: + op->status = nfsd4_decode_lock(argp, &op->u.lock); + break; + case OP_LOCKT: + op->status = nfsd4_decode_lockt(argp, &op->u.lockt); + break; + case OP_LOCKU: + op->status = nfsd4_decode_locku(argp, &op->u.locku); + break; + case OP_LOOKUP: + op->status = nfsd4_decode_lookup(argp, &op->u.lookup); + break; + case OP_LOOKUPP: + op->status = nfs_ok; + break; + case OP_NVERIFY: + op->status = nfsd4_decode_verify(argp, &op->u.nverify); + break; + case OP_OPEN: + op->status = nfsd4_decode_open(argp, &op->u.open); + break; + case OP_OPEN_CONFIRM: + op->status = nfsd4_decode_open_confirm(argp, &op->u.open_confirm); + break; + case OP_OPEN_DOWNGRADE: + op->status = nfsd4_decode_open_downgrade(argp, &op->u.open_downgrade); + break; + case OP_PUTFH: + op->status = nfsd4_decode_putfh(argp, &op->u.putfh); + break; + case OP_PUTROOTFH: + op->status = nfs_ok; + break; + case OP_READ: + op->status = nfsd4_decode_read(argp, &op->u.read); + break; + case OP_READDIR: + op->status = nfsd4_decode_readdir(argp, &op->u.readdir); + break; + case OP_READLINK: + op->status = nfs_ok; + break; + case OP_REMOVE: + op->status = nfsd4_decode_remove(argp, &op->u.remove); + break; + case OP_RENAME: + op->status = nfsd4_decode_rename(argp, &op->u.rename); + break; + case OP_RESTOREFH: + op->status = nfs_ok; + break; + case OP_RENEW: + op->status = nfsd4_decode_renew(argp, &op->u.renew); + break; + case OP_SAVEFH: + op->status = nfs_ok; + break; + case OP_SETATTR: + op->status = nfsd4_decode_setattr(argp, &op->u.setattr); + break; + case OP_SETCLIENTID: + op->status = nfsd4_decode_setclientid(argp, &op->u.setclientid); + break; + case OP_SETCLIENTID_CONFIRM: + op->status = nfsd4_decode_setclientid_confirm(argp, &op->u.setclientid_confirm); + break; + case OP_VERIFY: + op->status = nfsd4_decode_verify(argp, &op->u.verify); + break; + case OP_WRITE: + op->status = nfsd4_decode_write(argp, &op->u.write); + break; + case OP_RELEASE_LOCKOWNER: + op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner); + break; + default: + op->opnum = OP_ILLEGAL; + op->status = nfserr_op_illegal; + break; + } + + if (op->status) { + argp->opcnt = i+1; + break; + } + } + + DECODE_TAIL; +} +/* + * END OF "GENERIC" DECODE ROUTINES. + */ + +/* + * START OF "GENERIC" ENCODE ROUTINES. + * These may look a little ugly since they are imported from a "generic" + * set of XDR encode/decode routines which are intended to be shared by + * all of our NFSv4 implementations (OpenBSD, MacOS X...). + * + * If the pain of reading these is too great, it should be a straightforward + * task to translate them into Linux-specific versions which are more + * consistent with the style used in NFSv2/v3... + */ +#define ENCODE_HEAD u32 *p + +#define WRITE32(n) *p++ = htonl(n) +#define WRITE64(n) do { \ + *p++ = htonl((u32)((n) >> 32)); \ + *p++ = htonl((u32)(n)); \ +} while (0) +#define WRITEMEM(ptr,nbytes) do { \ + *(p + XDR_QUADLEN(nbytes) -1) = 0; \ + memcpy(p, ptr, nbytes); \ + p += XDR_QUADLEN(nbytes); \ +} while (0) +#define WRITECINFO(c) do { \ + *p++ = htonl(c.atomic); \ + *p++ = htonl(c.before_ctime_sec); \ + *p++ = htonl(c.before_ctime_nsec); \ + *p++ = htonl(c.after_ctime_sec); \ + *p++ = htonl(c.after_ctime_nsec); \ +} while (0) + +#define RESERVE_SPACE(nbytes) do { \ + p = resp->p; \ + BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \ +} while (0) +#define ADJUST_ARGS() resp->p = p + +/* + * Header routine to setup seqid operation replay cache + */ +#define ENCODE_SEQID_OP_HEAD \ + u32 *p; \ + u32 *save; \ + \ + save = resp->p; + +/* + * Routine for encoding the result of a + * "seqid-mutating" NFSv4 operation. This is + * where seqids are incremented, and the + * replay cache is filled. + */ + +#define ENCODE_SEQID_OP_TAIL(stateowner) do { \ + if (seqid_mutating_err(nfserr) && stateowner) { \ + if (stateowner->so_confirmed) \ + stateowner->so_seqid++; \ + stateowner->so_replay.rp_status = nfserr; \ + stateowner->so_replay.rp_buflen = \ + (((char *)(resp)->p - (char *)save)); \ + memcpy(stateowner->so_replay.rp_buf, save, \ + stateowner->so_replay.rp_buflen); \ + } } while (0); + + +static u32 nfs4_ftypes[16] = { + NF4BAD, NF4FIFO, NF4CHR, NF4BAD, + NF4DIR, NF4BAD, NF4BLK, NF4BAD, + NF4REG, NF4BAD, NF4LNK, NF4BAD, + NF4SOCK, NF4BAD, NF4LNK, NF4BAD, +}; + +static int +nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group, + u32 **p, int *buflen) +{ + int status; + + if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4) + return nfserr_resource; + if (whotype != NFS4_ACL_WHO_NAMED) + status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1)); + else if (group) + status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1)); + else + status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1)); + if (status < 0) + return nfserrno(status); + *p = xdr_encode_opaque(*p, NULL, status); + *buflen -= (XDR_QUADLEN(status) << 2) + 4; + BUG_ON(*buflen < 0); + return 0; +} + +static inline int +nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, u32 **p, int *buflen) +{ + return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen); +} + +static inline int +nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, u32 **p, int *buflen) +{ + return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen); +} + +static inline int +nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group, + u32 **p, int *buflen) +{ + return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen); +} + + +/* + * Note: @fhp can be NULL; in this case, we might have to compose the filehandle + * ourselves. + * + * @countp is the buffer size in _words_; upon successful return this becomes + * replaced with the number of words written. + */ +int +nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, u32 *buffer, int *countp, u32 *bmval, + struct svc_rqst *rqstp) +{ + u32 bmval0 = bmval[0]; + u32 bmval1 = bmval[1]; + struct kstat stat; + struct svc_fh tempfh; + struct kstatfs statfs; + int buflen = *countp << 2; + u32 *attrlenp; + u32 dummy; + u64 dummy64; + u32 *p = buffer; + int status; + int aclsupport = 0; + struct nfs4_acl *acl = NULL; + + BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); + BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); + BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1); + + status = vfs_getattr(exp->ex_mnt, dentry, &stat); + if (status) + goto out_nfserr; + if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL)) || + (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | + FATTR4_WORD1_SPACE_TOTAL))) { + status = vfs_statfs(dentry->d_inode->i_sb, &statfs); + if (status) + goto out_nfserr; + } + if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) { + fh_init(&tempfh, NFS4_FHSIZE); + status = fh_compose(&tempfh, exp, dentry, NULL); + if (status) + goto out; + fhp = &tempfh; + } + if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT + | FATTR4_WORD0_SUPPORTED_ATTRS)) { + status = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); + aclsupport = (status == 0); + if (bmval0 & FATTR4_WORD0_ACL) { + if (status == -EOPNOTSUPP) + bmval0 &= ~FATTR4_WORD0_ACL; + else if (status == -EINVAL) { + status = nfserr_attrnotsupp; + goto out; + } else if (status != 0) + goto out_nfserr; + } + } + if ((buflen -= 16) < 0) + goto out_resource; + + WRITE32(2); + WRITE32(bmval0); + WRITE32(bmval1); + attrlenp = p++; /* to be backfilled later */ + + if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) { + if ((buflen -= 12) < 0) + goto out_resource; + WRITE32(2); + WRITE32(aclsupport ? + NFSD_SUPPORTED_ATTRS_WORD0 : + NFSD_SUPPORTED_ATTRS_WORD0 & ~FATTR4_WORD0_ACL); + WRITE32(NFSD_SUPPORTED_ATTRS_WORD1); + } + if (bmval0 & FATTR4_WORD0_TYPE) { + if ((buflen -= 4) < 0) + goto out_resource; + dummy = nfs4_ftypes[(stat.mode & S_IFMT) >> 12]; + if (dummy == NF4BAD) + goto out_serverfault; + WRITE32(dummy); + } + if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME ); + } + if (bmval0 & FATTR4_WORD0_CHANGE) { + /* + * Note: This _must_ be consistent with the scheme for writing + * change_info, so any changes made here must be reflected there + * as well. (See xdr4.h:set_change_info() and the WRITECINFO() + * macro above.) + */ + if ((buflen -= 8) < 0) + goto out_resource; + WRITE32(stat.ctime.tv_sec); + WRITE32(stat.ctime.tv_nsec); + } + if (bmval0 & FATTR4_WORD0_SIZE) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64(stat.size); + } + if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_NAMED_ATTR) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(0); + } + if (bmval0 & FATTR4_WORD0_FSID) { + if ((buflen -= 16) < 0) + goto out_resource; + if (is_fsid(fhp, rqstp->rq_reffh)) { + WRITE64((u64)exp->ex_fsid); + WRITE64((u64)0); + } else { + WRITE32(0); + WRITE32(MAJOR(stat.dev)); + WRITE32(0); + WRITE32(MINOR(stat.dev)); + } + } + if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(0); + } + if (bmval0 & FATTR4_WORD0_LEASE_TIME) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(NFSD_LEASE_TIME); + } + if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(0); + } + if (bmval0 & FATTR4_WORD0_ACL) { + struct nfs4_ace *ace; + struct list_head *h; + + if (acl == NULL) { + if ((buflen -= 4) < 0) + goto out_resource; + + WRITE32(0); + goto out_acl; + } + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(acl->naces); + + list_for_each(h, &acl->ace_head) { + ace = list_entry(h, struct nfs4_ace, l_ace); + + if ((buflen -= 4*3) < 0) + goto out_resource; + WRITE32(ace->type); + WRITE32(ace->flag); + WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); + status = nfsd4_encode_aclname(rqstp, ace->whotype, + ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP, + &p, &buflen); + if (status == nfserr_resource) + goto out_resource; + if (status) + goto out; + } + } +out_acl: + if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(aclsupport ? + ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0); + } + if (bmval0 & FATTR4_WORD0_CANSETTIME) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_FILEHANDLE) { + buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4; + if (buflen < 0) + goto out_resource; + WRITE32(fhp->fh_handle.fh_size); + WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size); + } + if (bmval0 & FATTR4_WORD0_FILEID) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64((u64) stat.ino); + } + if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64((u64) statfs.f_ffree); + } + if (bmval0 & FATTR4_WORD0_FILES_FREE) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64((u64) statfs.f_ffree); + } + if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64((u64) statfs.f_files); + } + if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64(~(u64)0); + } + if (bmval0 & FATTR4_WORD0_MAXLINK) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(255); + } + if (bmval0 & FATTR4_WORD0_MAXNAME) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(~(u32) 0); + } + if (bmval0 & FATTR4_WORD0_MAXREAD) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64((u64) NFSSVC_MAXBLKSIZE); + } + if (bmval0 & FATTR4_WORD0_MAXWRITE) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE64((u64) NFSSVC_MAXBLKSIZE); + } + if (bmval1 & FATTR4_WORD1_MODE) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(stat.mode & S_IALLUGO); + } + if (bmval1 & FATTR4_WORD1_NO_TRUNC) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(1); + } + if (bmval1 & FATTR4_WORD1_NUMLINKS) { + if ((buflen -= 4) < 0) + goto out_resource; + WRITE32(stat.nlink); + } + if (bmval1 & FATTR4_WORD1_OWNER) { + status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen); + if (status == nfserr_resource) + goto out_resource; + if (status) + goto out; + } + if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { + status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen); + if (status == nfserr_resource) + goto out_resource; + if (status) + goto out; + } + if (bmval1 & FATTR4_WORD1_RAWDEV) { + if ((buflen -= 8) < 0) + goto out_resource; + WRITE32((u32) MAJOR(stat.rdev)); + WRITE32((u32) MINOR(stat.rdev)); + } + if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { + if ((buflen -= 8) < 0) + goto out_resource; + dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize; + WRITE64(dummy64); + } + if (bmval1 & FATTR4_WORD1_SPACE_FREE) { + if ((buflen -= 8) < 0) + goto out_resource; + dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize; + WRITE64(dummy64); + } + if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { + if ((buflen -= 8) < 0) + goto out_resource; + dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize; + WRITE64(dummy64); + } + if (bmval1 & FATTR4_WORD1_SPACE_USED) { + if ((buflen -= 8) < 0) + goto out_resource; + dummy64 = (u64)stat.blocks << 9; + WRITE64(dummy64); + } + if (bmval1 & FATTR4_WORD1_TIME_ACCESS) { + if ((buflen -= 12) < 0) + goto out_resource; + WRITE32(0); + WRITE32(stat.atime.tv_sec); + WRITE32(stat.atime.tv_nsec); + } + if (bmval1 & FATTR4_WORD1_TIME_DELTA) { + if ((buflen -= 12) < 0) + goto out_resource; + WRITE32(0); + WRITE32(1); + WRITE32(0); + } + if (bmval1 & FATTR4_WORD1_TIME_METADATA) { + if ((buflen -= 12) < 0) + goto out_resource; + WRITE32(0); + WRITE32(stat.ctime.tv_sec); + WRITE32(stat.ctime.tv_nsec); + } + if (bmval1 & FATTR4_WORD1_TIME_MODIFY) { + if ((buflen -= 12) < 0) + goto out_resource; + WRITE32(0); + WRITE32(stat.mtime.tv_sec); + WRITE32(stat.mtime.tv_nsec); + } + if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { + struct dentry *mnt_pnt, *mnt_root; + + if ((buflen -= 8) < 0) + goto out_resource; + mnt_root = exp->ex_mnt->mnt_root; + if (mnt_root->d_inode == dentry->d_inode) { + mnt_pnt = exp->ex_mnt->mnt_mountpoint; + WRITE64((u64) mnt_pnt->d_inode->i_ino); + } else + WRITE64((u64) stat.ino); + } + *attrlenp = htonl((char *)p - (char *)attrlenp - 4); + *countp = p - buffer; + status = nfs_ok; + +out: + nfs4_acl_free(acl); + if (fhp == &tempfh) + fh_put(&tempfh); + return status; +out_nfserr: + status = nfserrno(status); + goto out; +out_resource: + *countp = 0; + status = nfserr_resource; + goto out; +out_serverfault: + status = nfserr_serverfault; + goto out; +} + +static int +nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, + const char *name, int namlen, u32 *p, int *buflen) +{ + struct svc_export *exp = cd->rd_fhp->fh_export; + struct dentry *dentry; + int nfserr; + + dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen); + if (IS_ERR(dentry)) + return nfserrno(PTR_ERR(dentry)); + + exp_get(exp); + if (d_mountpoint(dentry)) { + if (nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp)) { + /* + * -EAGAIN is the only error returned from + * nfsd_cross_mnt() and it indicates that an + * up-call has been initiated to fill in the export + * options on exp. When the answer comes back, + * this call will be retried. + */ + nfserr = nfserr_dropit; + goto out_put; + } + + } + nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval, + cd->rd_rqstp); +out_put: + dput(dentry); + exp_put(exp); + return nfserr; +} + +static u32 * +nfsd4_encode_rdattr_error(u32 *p, int buflen, int nfserr) +{ + u32 *attrlenp; + + if (buflen < 6) + return NULL; + *p++ = htonl(2); + *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */ + *p++ = htonl(0); /* bmval1 */ + + attrlenp = p++; + *p++ = nfserr; /* no htonl */ + *attrlenp = htonl((char *)p - (char *)attrlenp - 4); + return p; +} + +static int +nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen, + loff_t offset, ino_t ino, unsigned int d_type) +{ + struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common); + int buflen; + u32 *p = cd->buffer; + int nfserr = nfserr_toosmall; + + /* In nfsv4, "." and ".." never make it onto the wire.. */ + if (name && isdotent(name, namlen)) { + cd->common.err = nfs_ok; + return 0; + } + + if (cd->offset) + xdr_encode_hyper(cd->offset, (u64) offset); + + buflen = cd->buflen - 4 - XDR_QUADLEN(namlen); + if (buflen < 0) + goto fail; + + *p++ = xdr_one; /* mark entry present */ + cd->offset = p; /* remember pointer */ + p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ + p = xdr_encode_array(p, name, namlen); /* name length & name */ + + nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen); + switch (nfserr) { + case nfs_ok: + p += buflen; + break; + case nfserr_resource: + nfserr = nfserr_toosmall; + goto fail; + case nfserr_dropit: + goto fail; + default: + /* + * If the client requested the RDATTR_ERROR attribute, + * we stuff the error code into this attribute + * and continue. If this attribute was not requested, + * then in accordance with the spec, we fail the + * entire READDIR operation(!) + */ + if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)) + goto fail; + nfserr = nfserr_toosmall; + p = nfsd4_encode_rdattr_error(p, buflen, nfserr); + if (p == NULL) + goto fail; + } + cd->buflen -= (p - cd->buffer); + cd->buffer = p; + cd->common.err = nfs_ok; + return 0; +fail: + cd->common.err = nfserr; + return -EINVAL; +} + +static void +nfsd4_encode_access(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_access *access) +{ + ENCODE_HEAD; + + if (!nfserr) { + RESERVE_SPACE(8); + WRITE32(access->ac_supported); + WRITE32(access->ac_resp_access); + ADJUST_ARGS(); + } +} + +static void +nfsd4_encode_close(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_close *close) +{ + ENCODE_SEQID_OP_HEAD; + + if (!nfserr) { + RESERVE_SPACE(sizeof(stateid_t)); + WRITE32(close->cl_stateid.si_generation); + WRITEMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); + ADJUST_ARGS(); + } + ENCODE_SEQID_OP_TAIL(close->cl_stateowner); +} + + +static void +nfsd4_encode_commit(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_commit *commit) +{ + ENCODE_HEAD; + + if (!nfserr) { + RESERVE_SPACE(8); + WRITEMEM(commit->co_verf.data, 8); + ADJUST_ARGS(); + } +} + +static void +nfsd4_encode_create(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_create *create) +{ + ENCODE_HEAD; + + if (!nfserr) { + RESERVE_SPACE(32); + WRITECINFO(create->cr_cinfo); + WRITE32(2); + WRITE32(create->cr_bmval[0]); + WRITE32(create->cr_bmval[1]); + ADJUST_ARGS(); + } +} + +static int +nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_getattr *getattr) +{ + struct svc_fh *fhp = getattr->ga_fhp; + int buflen; + + if (nfserr) + return nfserr; + + buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); + nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, + resp->p, &buflen, getattr->ga_bmval, + resp->rqstp); + + if (!nfserr) + resp->p += buflen; + return nfserr; +} + +static void +nfsd4_encode_getfh(struct nfsd4_compoundres *resp, int nfserr, struct svc_fh *fhp) +{ + unsigned int len; + ENCODE_HEAD; + + if (!nfserr) { + len = fhp->fh_handle.fh_size; + RESERVE_SPACE(len + 4); + WRITE32(len); + WRITEMEM(&fhp->fh_handle.fh_base, len); + ADJUST_ARGS(); + } +} + +/* +* Including all fields other than the name, a LOCK4denied structure requires +* 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes. +*/ +static void +nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld) +{ + ENCODE_HEAD; + + RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0)); + WRITE64(ld->ld_start); + WRITE64(ld->ld_length); + WRITE32(ld->ld_type); + if (ld->ld_sop) { + WRITEMEM(&ld->ld_clientid, 8); + WRITE32(ld->ld_sop->so_owner.len); + WRITEMEM(ld->ld_sop->so_owner.data, ld->ld_sop->so_owner.len); + kref_put(&ld->ld_sop->so_ref, nfs4_free_stateowner); + } else { /* non - nfsv4 lock in conflict, no clientid nor owner */ + WRITE64((u64)0); /* clientid */ + WRITE32(0); /* length of owner name */ + } + ADJUST_ARGS(); +} + +static void +nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock *lock) +{ + + ENCODE_SEQID_OP_HEAD; + + if (!nfserr) { + RESERVE_SPACE(4 + sizeof(stateid_t)); + WRITE32(lock->lk_resp_stateid.si_generation); + WRITEMEM(&lock->lk_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); + ADJUST_ARGS(); + } else if (nfserr == nfserr_denied) + nfsd4_encode_lock_denied(resp, &lock->lk_denied); + + ENCODE_SEQID_OP_TAIL(lock->lk_stateowner); +} + +static void +nfsd4_encode_lockt(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lockt *lockt) +{ + if (nfserr == nfserr_denied) + nfsd4_encode_lock_denied(resp, &lockt->lt_denied); +} + +static void +nfsd4_encode_locku(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_locku *locku) +{ + ENCODE_SEQID_OP_HEAD; + + if (!nfserr) { + RESERVE_SPACE(sizeof(stateid_t)); + WRITE32(locku->lu_stateid.si_generation); + WRITEMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); + ADJUST_ARGS(); + } + + ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); +} + + +static void +nfsd4_encode_link(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_link *link) +{ + ENCODE_HEAD; + + if (!nfserr) { + RESERVE_SPACE(20); + WRITECINFO(link->li_cinfo); + ADJUST_ARGS(); + } +} + + +static void +nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open *open) +{ + ENCODE_SEQID_OP_HEAD; + + if (nfserr) + goto out; + + RESERVE_SPACE(36 + sizeof(stateid_t)); + WRITE32(open->op_stateid.si_generation); + WRITEMEM(&open->op_stateid.si_opaque, sizeof(stateid_opaque_t)); + WRITECINFO(open->op_cinfo); + WRITE32(open->op_rflags); + WRITE32(2); + WRITE32(open->op_bmval[0]); + WRITE32(open->op_bmval[1]); + WRITE32(open->op_delegate_type); + ADJUST_ARGS(); + + switch (open->op_delegate_type) { + case NFS4_OPEN_DELEGATE_NONE: + break; + case NFS4_OPEN_DELEGATE_READ: + RESERVE_SPACE(20 + sizeof(stateid_t)); + WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); + WRITE32(0); + + /* + * TODO: ACE's in delegations + */ + WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + WRITE32(0); + WRITE32(0); + WRITE32(0); /* XXX: is NULL principal ok? */ + ADJUST_ARGS(); + break; + case NFS4_OPEN_DELEGATE_WRITE: + RESERVE_SPACE(32 + sizeof(stateid_t)); + WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); + WRITE32(0); + + /* + * TODO: space_limit's in delegations + */ + WRITE32(NFS4_LIMIT_SIZE); + WRITE32(~(u32)0); + WRITE32(~(u32)0); + + /* + * TODO: ACE's in delegations + */ + WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE); + WRITE32(0); + WRITE32(0); + WRITE32(0); /* XXX: is NULL principal ok? */ + ADJUST_ARGS(); + break; + default: + BUG(); + } + /* XXX save filehandle here */ +out: + ENCODE_SEQID_OP_TAIL(open->op_stateowner); +} + +static void +nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_confirm *oc) +{ + ENCODE_SEQID_OP_HEAD; + + if (!nfserr) { + RESERVE_SPACE(sizeof(stateid_t)); + WRITE32(oc->oc_resp_stateid.si_generation); + WRITEMEM(&oc->oc_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); + ADJUST_ARGS(); + } + + ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); +} + +static void +nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_downgrade *od) +{ + ENCODE_SEQID_OP_HEAD; + + if (!nfserr) { + RESERVE_SPACE(sizeof(stateid_t)); + WRITE32(od->od_stateid.si_generation); + WRITEMEM(&od->od_stateid.si_opaque, sizeof(stateid_opaque_t)); + ADJUST_ARGS(); + } + + ENCODE_SEQID_OP_TAIL(od->od_stateowner); +} + +static int +nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read *read) +{ + u32 eof; + int v, pn; + unsigned long maxcount; + long len; + ENCODE_HEAD; + + if (nfserr) + return nfserr; + if (resp->xbuf->page_len) + return nfserr_resource; + + RESERVE_SPACE(8); /* eof flag and byte count */ + + maxcount = NFSSVC_MAXBLKSIZE; + if (maxcount > read->rd_length) + maxcount = read->rd_length; + + len = maxcount; + v = 0; + while (len > 0) { + pn = resp->rqstp->rq_resused; + svc_take_page(resp->rqstp); + read->rd_iov[v].iov_base = page_address(resp->rqstp->rq_respages[pn]); + read->rd_iov[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE; + v++; + len -= PAGE_SIZE; + } + read->rd_vlen = v; + + nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp, + read->rd_offset, read->rd_iov, read->rd_vlen, + &maxcount); + + if (nfserr == nfserr_symlink) + nfserr = nfserr_inval; + if (nfserr) + return nfserr; + eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); + + WRITE32(eof); + WRITE32(maxcount); + ADJUST_ARGS(); + resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; + + resp->xbuf->page_len = maxcount; + + /* read zero bytes -> don't set up tail */ + if(!maxcount) + return 0; + + /* set up page for remaining responses */ + svc_take_page(resp->rqstp); + resp->xbuf->tail[0].iov_base = + page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1; + resp->xbuf->tail[0].iov_len = 0; + resp->p = resp->xbuf->tail[0].iov_base; + resp->end = resp->p + PAGE_SIZE/4; + + if (maxcount&3) { + *(resp->p)++ = 0; + resp->xbuf->tail[0].iov_base += maxcount&3; + resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); + } + return 0; +} + +static int +nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_readlink *readlink) +{ + int maxcount; + char *page; + ENCODE_HEAD; + + if (nfserr) + return nfserr; + if (resp->xbuf->page_len) + return nfserr_resource; + + svc_take_page(resp->rqstp); + page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + + maxcount = PAGE_SIZE; + RESERVE_SPACE(4); + + /* + * XXX: By default, the ->readlink() VFS op will truncate symlinks + * if they would overflow the buffer. Is this kosher in NFSv4? If + * not, one easy fix is: if ->readlink() precisely fills the buffer, + * assume that truncation occurred, and return NFS4ERR_RESOURCE. + */ + nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); + if (nfserr == nfserr_isdir) + return nfserr_inval; + if (nfserr) + return nfserr; + + WRITE32(maxcount); + ADJUST_ARGS(); + resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; + + svc_take_page(resp->rqstp); + resp->xbuf->tail[0].iov_base = + page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1; + resp->xbuf->tail[0].iov_len = 0; + resp->p = resp->xbuf->tail[0].iov_base; + resp->end = resp->p + PAGE_SIZE/4; + + resp->xbuf->page_len = maxcount; + if (maxcount&3) { + *(resp->p)++ = 0; + resp->xbuf->tail[0].iov_base += maxcount&3; + resp->xbuf->tail[0].iov_len = 4 - (maxcount&3); + } + return 0; +} + +static int +nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_readdir *readdir) +{ + int maxcount; + loff_t offset; + u32 *page, *savep; + ENCODE_HEAD; + + if (nfserr) + return nfserr; + if (resp->xbuf->page_len) + return nfserr_resource; + + RESERVE_SPACE(8); /* verifier */ + savep = p; + + /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ + WRITE32(0); + WRITE32(0); + ADJUST_ARGS(); + resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base; + + maxcount = PAGE_SIZE; + if (maxcount > readdir->rd_maxcount) + maxcount = readdir->rd_maxcount; + + /* + * Convert from bytes to words, account for the two words already + * written, make sure to leave two words at the end for the next + * pointer and eof field. + */ + maxcount = (maxcount >> 2) - 4; + if (maxcount < 0) { + nfserr = nfserr_toosmall; + goto err_no_verf; + } + + svc_take_page(resp->rqstp); + page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + readdir->common.err = 0; + readdir->buflen = maxcount; + readdir->buffer = page; + readdir->offset = NULL; + + offset = readdir->rd_cookie; + nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, + &offset, + &readdir->common, nfsd4_encode_dirent); + if (nfserr == nfs_ok && + readdir->common.err == nfserr_toosmall && + readdir->buffer == page) + nfserr = nfserr_toosmall; + if (nfserr == nfserr_symlink) + nfserr = nfserr_notdir; + if (nfserr) + goto err_no_verf; + + if (readdir->offset) + xdr_encode_hyper(readdir->offset, offset); + + p = readdir->buffer; + *p++ = 0; /* no more entries */ + *p++ = htonl(readdir->common.err == nfserr_eof); + resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + + /* allocate a page for the tail */ + svc_take_page(resp->rqstp); + resp->xbuf->tail[0].iov_base = + page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); + resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1; + resp->xbuf->tail[0].iov_len = 0; + resp->p = resp->xbuf->tail[0].iov_base; + resp->end = resp->p + PAGE_SIZE/4; + + return 0; +err_no_verf: + p = savep; + ADJUST_ARGS(); + return nfserr; +} + +static void +nfsd4_encode_remove(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_remove *remove) +{ + ENCODE_HEAD; + + if (!nfserr) { + RESERVE_SPACE(20); + WRITECINFO(remove->rm_cinfo); + ADJUST_ARGS(); + } +} + +static void +nfsd4_encode_rename(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_rename *rename) +{ + ENCODE_HEAD; + + if (!nfserr) { + RESERVE_SPACE(40); + WRITECINFO(rename->rn_sinfo); + WRITECINFO(rename->rn_tinfo); + ADJUST_ARGS(); + } +} + +/* + * The SETATTR encode routine is special -- it always encodes a bitmap, + * regardless of the error status. + */ +static void +nfsd4_encode_setattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_setattr *setattr) +{ + ENCODE_HEAD; + + RESERVE_SPACE(12); + if (nfserr) { + WRITE32(2); + WRITE32(0); + WRITE32(0); + } + else { + WRITE32(2); + WRITE32(setattr->sa_bmval[0]); + WRITE32(setattr->sa_bmval[1]); + } + ADJUST_ARGS(); +} + +static void +nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_setclientid *scd) +{ + ENCODE_HEAD; + + if (!nfserr) { + RESERVE_SPACE(8 + sizeof(nfs4_verifier)); + WRITEMEM(&scd->se_clientid, 8); + WRITEMEM(&scd->se_confirm, sizeof(nfs4_verifier)); + ADJUST_ARGS(); + } + else if (nfserr == nfserr_clid_inuse) { + RESERVE_SPACE(8); + WRITE32(0); + WRITE32(0); + ADJUST_ARGS(); + } +} + +static void +nfsd4_encode_write(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_write *write) +{ + ENCODE_HEAD; + + if (!nfserr) { + RESERVE_SPACE(16); + WRITE32(write->wr_bytes_written); + WRITE32(write->wr_how_written); + WRITEMEM(write->wr_verifier.data, 8); + ADJUST_ARGS(); + } +} + +void +nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +{ + u32 *statp; + ENCODE_HEAD; + + RESERVE_SPACE(8); + WRITE32(op->opnum); + statp = p++; /* to be backfilled at the end */ + ADJUST_ARGS(); + + switch (op->opnum) { + case OP_ACCESS: + nfsd4_encode_access(resp, op->status, &op->u.access); + break; + case OP_CLOSE: + nfsd4_encode_close(resp, op->status, &op->u.close); + break; + case OP_COMMIT: + nfsd4_encode_commit(resp, op->status, &op->u.commit); + break; + case OP_CREATE: + nfsd4_encode_create(resp, op->status, &op->u.create); + break; + case OP_DELEGRETURN: + break; + case OP_GETATTR: + op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr); + break; + case OP_GETFH: + nfsd4_encode_getfh(resp, op->status, op->u.getfh); + break; + case OP_LINK: + nfsd4_encode_link(resp, op->status, &op->u.link); + break; + case OP_LOCK: + nfsd4_encode_lock(resp, op->status, &op->u.lock); + break; + case OP_LOCKT: + nfsd4_encode_lockt(resp, op->status, &op->u.lockt); + break; + case OP_LOCKU: + nfsd4_encode_locku(resp, op->status, &op->u.locku); + break; + case OP_LOOKUP: + break; + case OP_LOOKUPP: + break; + case OP_NVERIFY: + break; + case OP_OPEN: + nfsd4_encode_open(resp, op->status, &op->u.open); + break; + case OP_OPEN_CONFIRM: + nfsd4_encode_open_confirm(resp, op->status, &op->u.open_confirm); + break; + case OP_OPEN_DOWNGRADE: + nfsd4_encode_open_downgrade(resp, op->status, &op->u.open_downgrade); + break; + case OP_PUTFH: + break; + case OP_PUTROOTFH: + break; + case OP_READ: + op->status = nfsd4_encode_read(resp, op->status, &op->u.read); + break; + case OP_READDIR: + op->status = nfsd4_encode_readdir(resp, op->status, &op->u.readdir); + break; + case OP_READLINK: + op->status = nfsd4_encode_readlink(resp, op->status, &op->u.readlink); + break; + case OP_REMOVE: + nfsd4_encode_remove(resp, op->status, &op->u.remove); + break; + case OP_RENAME: + nfsd4_encode_rename(resp, op->status, &op->u.rename); + break; + case OP_RENEW: + break; + case OP_RESTOREFH: + break; + case OP_SAVEFH: + break; + case OP_SETATTR: + nfsd4_encode_setattr(resp, op->status, &op->u.setattr); + break; + case OP_SETCLIENTID: + nfsd4_encode_setclientid(resp, op->status, &op->u.setclientid); + break; + case OP_SETCLIENTID_CONFIRM: + break; + case OP_VERIFY: + break; + case OP_WRITE: + nfsd4_encode_write(resp, op->status, &op->u.write); + break; + case OP_RELEASE_LOCKOWNER: + break; + default: + break; + } + + /* + * Note: We write the status directly, instead of using WRITE32(), + * since it is already in network byte order. + */ + *statp = op->status; +} + +/* + * Encode the reply stored in the stateowner reply cache + * + * XDR note: do not encode rp->rp_buflen: the buffer contains the + * previously sent already encoded operation. + * + * called with nfs4_lock_state() held + */ +void +nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +{ + ENCODE_HEAD; + struct nfs4_replay *rp = op->replay; + + BUG_ON(!rp); + + RESERVE_SPACE(8); + WRITE32(op->opnum); + *p++ = rp->rp_status; /* already xdr'ed */ + ADJUST_ARGS(); + + RESERVE_SPACE(rp->rp_buflen); + WRITEMEM(rp->rp_buf, rp->rp_buflen); + ADJUST_ARGS(); +} + +/* + * END OF "GENERIC" ENCODE ROUTINES. + */ + +int +nfs4svc_encode_voidres(struct svc_rqst *rqstp, u32 *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args) +{ + if (args->ops != args->iops) { + kfree(args->ops); + args->ops = args->iops; + } + if (args->tmpp) { + kfree(args->tmpp); + args->tmpp = NULL; + } + while (args->to_free) { + struct tmpbuf *tb = args->to_free; + args->to_free = tb->next; + tb->release(tb->buf); + kfree(tb); + } +} + +int +nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundargs *args) +{ + int status; + + args->p = p; + args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; + args->pagelist = rqstp->rq_arg.pages; + args->pagelen = rqstp->rq_arg.page_len; + args->tmpp = NULL; + args->to_free = NULL; + args->ops = args->iops; + args->rqstp = rqstp; + + status = nfsd4_decode_compound(args); + if (status) { + nfsd4_release_compoundargs(args); + } + return !status; +} + +int +nfs4svc_encode_compoundres(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundres *resp) +{ + /* + * All that remains is to write the tag and operation count... + */ + struct kvec *iov; + p = resp->tagp; + *p++ = htonl(resp->taglen); + memcpy(p, resp->tag, resp->taglen); + p += XDR_QUADLEN(resp->taglen); + *p++ = htonl(resp->opcnt); + + if (rqstp->rq_res.page_len) + iov = &rqstp->rq_res.tail[0]; + else + iov = &rqstp->rq_res.head[0]; + iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base; + BUG_ON(iov->iov_len > PAGE_SIZE); + return 1; +} + +/* + * Local variables: + * c-basic-offset: 8 + * End: + */ diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c new file mode 100644 index 00000000000..119e4d4495b --- /dev/null +++ b/fs/nfsd/nfscache.c @@ -0,0 +1,328 @@ +/* + * linux/fs/nfsd/nfscache.c + * + * Request reply cache. This is currently a global cache, but this may + * change in the future and be a per-client cache. + * + * This code is heavily inspired by the 44BSD implementation, although + * it does things a bit differently. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/kernel.h> +#include <linux/time.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/spinlock.h> +#include <linux/list.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> + +/* Size of reply cache. Common values are: + * 4.3BSD: 128 + * 4.4BSD: 256 + * Solaris2: 1024 + * DEC Unix: 512-4096 + */ +#define CACHESIZE 1024 +#define HASHSIZE 64 +#define REQHASH(xid) ((((xid) >> 24) ^ (xid)) & (HASHSIZE-1)) + +static struct hlist_head * hash_list; +static struct list_head lru_head; +static int cache_disabled = 1; + +static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); + +/* + * locking for the reply cache: + * A cache entry is "single use" if c_state == RC_INPROG + * Otherwise, it when accessing _prev or _next, the lock must be held. + */ +static DEFINE_SPINLOCK(cache_lock); + +void +nfsd_cache_init(void) +{ + struct svc_cacherep *rp; + int i; + + INIT_LIST_HEAD(&lru_head); + i = CACHESIZE; + while(i) { + rp = kmalloc(sizeof(*rp), GFP_KERNEL); + if (!rp) break; + list_add(&rp->c_lru, &lru_head); + rp->c_state = RC_UNUSED; + rp->c_type = RC_NOCACHE; + INIT_HLIST_NODE(&rp->c_hash); + i--; + } + + if (i) + printk (KERN_ERR "nfsd: cannot allocate all %d cache entries, only got %d\n", + CACHESIZE, CACHESIZE-i); + + hash_list = kmalloc (HASHSIZE * sizeof(struct hlist_head), GFP_KERNEL); + if (!hash_list) { + nfsd_cache_shutdown(); + printk (KERN_ERR "nfsd: cannot allocate %Zd bytes for hash list\n", + HASHSIZE * sizeof(struct hlist_head)); + return; + } + memset(hash_list, 0, HASHSIZE * sizeof(struct hlist_head)); + + cache_disabled = 0; +} + +void +nfsd_cache_shutdown(void) +{ + struct svc_cacherep *rp; + + while (!list_empty(&lru_head)) { + rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); + if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) + kfree(rp->c_replvec.iov_base); + list_del(&rp->c_lru); + kfree(rp); + } + + cache_disabled = 1; + + if (hash_list) + kfree (hash_list); + hash_list = NULL; +} + +/* + * Move cache entry to end of LRU list + */ +static void +lru_put_end(struct svc_cacherep *rp) +{ + list_del(&rp->c_lru); + list_add_tail(&rp->c_lru, &lru_head); +} + +/* + * Move a cache entry from one hash list to another + */ +static void +hash_refile(struct svc_cacherep *rp) +{ + hlist_del_init(&rp->c_hash); + hlist_add_head(&rp->c_hash, hash_list + REQHASH(rp->c_xid)); +} + +/* + * Try to find an entry matching the current call in the cache. When none + * is found, we grab the oldest unlocked entry off the LRU list. + * Note that no operation within the loop may sleep. + */ +int +nfsd_cache_lookup(struct svc_rqst *rqstp, int type) +{ + struct hlist_node *hn; + struct hlist_head *rh; + struct svc_cacherep *rp; + u32 xid = rqstp->rq_xid, + proto = rqstp->rq_prot, + vers = rqstp->rq_vers, + proc = rqstp->rq_proc; + unsigned long age; + int rtn; + + rqstp->rq_cacherep = NULL; + if (cache_disabled || type == RC_NOCACHE) { + nfsdstats.rcnocache++; + return RC_DOIT; + } + + spin_lock(&cache_lock); + rtn = RC_DOIT; + + rh = &hash_list[REQHASH(xid)]; + hlist_for_each_entry(rp, hn, rh, c_hash) { + if (rp->c_state != RC_UNUSED && + xid == rp->c_xid && proc == rp->c_proc && + proto == rp->c_prot && vers == rp->c_vers && + time_before(jiffies, rp->c_timestamp + 120*HZ) && + memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) { + nfsdstats.rchits++; + goto found_entry; + } + } + nfsdstats.rcmisses++; + + /* This loop shouldn't take more than a few iterations normally */ + { + int safe = 0; + list_for_each_entry(rp, &lru_head, c_lru) { + if (rp->c_state != RC_INPROG) + break; + if (safe++ > CACHESIZE) { + printk("nfsd: loop in repcache LRU list\n"); + cache_disabled = 1; + goto out; + } + } + } + + /* This should not happen */ + if (rp == NULL) { + static int complaints; + + printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); + if (++complaints > 5) { + printk(KERN_WARNING "nfsd: disabling repcache.\n"); + cache_disabled = 1; + } + goto out; + } + + rqstp->rq_cacherep = rp; + rp->c_state = RC_INPROG; + rp->c_xid = xid; + rp->c_proc = proc; + rp->c_addr = rqstp->rq_addr; + rp->c_prot = proto; + rp->c_vers = vers; + rp->c_timestamp = jiffies; + + hash_refile(rp); + + /* release any buffer */ + if (rp->c_type == RC_REPLBUFF) { + kfree(rp->c_replvec.iov_base); + rp->c_replvec.iov_base = NULL; + } + rp->c_type = RC_NOCACHE; + out: + spin_unlock(&cache_lock); + return rtn; + +found_entry: + /* We found a matching entry which is either in progress or done. */ + age = jiffies - rp->c_timestamp; + rp->c_timestamp = jiffies; + lru_put_end(rp); + + rtn = RC_DROPIT; + /* Request being processed or excessive rexmits */ + if (rp->c_state == RC_INPROG || age < RC_DELAY) + goto out; + + /* From the hall of fame of impractical attacks: + * Is this a user who tries to snoop on the cache? */ + rtn = RC_DOIT; + if (!rqstp->rq_secure && rp->c_secure) + goto out; + + /* Compose RPC reply header */ + switch (rp->c_type) { + case RC_NOCACHE: + break; + case RC_REPLSTAT: + svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat); + rtn = RC_REPLY; + break; + case RC_REPLBUFF: + if (!nfsd_cache_append(rqstp, &rp->c_replvec)) + goto out; /* should not happen */ + rtn = RC_REPLY; + break; + default: + printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); + rp->c_state = RC_UNUSED; + } + + goto out; +} + +/* + * Update a cache entry. This is called from nfsd_dispatch when + * the procedure has been executed and the complete reply is in + * rqstp->rq_res. + * + * We're copying around data here rather than swapping buffers because + * the toplevel loop requires max-sized buffers, which would be a waste + * of memory for a cache with a max reply size of 100 bytes (diropokres). + * + * If we should start to use different types of cache entries tailored + * specifically for attrstat and fh's, we may save even more space. + * + * Also note that a cachetype of RC_NOCACHE can legally be passed when + * nfsd failed to encode a reply that otherwise would have been cached. + * In this case, nfsd_cache_update is called with statp == NULL. + */ +void +nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp) +{ + struct svc_cacherep *rp; + struct kvec *resv = &rqstp->rq_res.head[0], *cachv; + int len; + + if (!(rp = rqstp->rq_cacherep) || cache_disabled) + return; + + len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); + len >>= 2; + + /* Don't cache excessive amounts of data and XDR failures */ + if (!statp || len > (256 >> 2)) { + rp->c_state = RC_UNUSED; + return; + } + + switch (cachetype) { + case RC_REPLSTAT: + if (len != 1) + printk("nfsd: RC_REPLSTAT/reply len %d!\n",len); + rp->c_replstat = *statp; + break; + case RC_REPLBUFF: + cachv = &rp->c_replvec; + cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); + if (!cachv->iov_base) { + spin_lock(&cache_lock); + rp->c_state = RC_UNUSED; + spin_unlock(&cache_lock); + return; + } + cachv->iov_len = len << 2; + memcpy(cachv->iov_base, statp, len << 2); + break; + } + spin_lock(&cache_lock); + lru_put_end(rp); + rp->c_secure = rqstp->rq_secure; + rp->c_type = cachetype; + rp->c_state = RC_DONE; + rp->c_timestamp = jiffies; + spin_unlock(&cache_lock); + return; +} + +/* + * Copy cached reply to current reply buffer. Should always fit. + * FIXME as reply is in a page, we should just attach the page, and + * keep a refcount.... + */ +static int +nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) +{ + struct kvec *vec = &rqstp->rq_res.head[0]; + + if (vec->iov_len + data->iov_len > PAGE_SIZE) { + printk(KERN_WARNING "nfsd: cached reply too large (%Zd).\n", + data->iov_len); + return 0; + } + memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len); + vec->iov_len += data->iov_len; + return 1; +} diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c new file mode 100644 index 00000000000..161afdcb8f7 --- /dev/null +++ b/fs/nfsd/nfsctl.c @@ -0,0 +1,438 @@ +/* + * linux/fs/nfsd/nfsctl.c + * + * Syscall interface to knfsd. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/linkage.h> +#include <linux/time.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/fcntl.h> +#include <linux/net.h> +#include <linux/in.h> +#include <linux/syscalls.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/pagemap.h> +#include <linux/init.h> + +#include <linux/nfs.h> +#include <linux/nfsd_idmap.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> +#include <linux/nfsd/syscall.h> +#include <linux/nfsd/interface.h> + +#include <asm/uaccess.h> + +/* + * We have a single directory with 9 nodes in it. + */ +enum { + NFSD_Root = 1, + NFSD_Svc, + NFSD_Add, + NFSD_Del, + NFSD_Export, + NFSD_Unexport, + NFSD_Getfd, + NFSD_Getfs, + NFSD_List, + NFSD_Fh, + NFSD_Threads, + NFSD_Leasetime, +}; + +/* + * write() for these nodes. + */ +static ssize_t write_svc(struct file *file, char *buf, size_t size); +static ssize_t write_add(struct file *file, char *buf, size_t size); +static ssize_t write_del(struct file *file, char *buf, size_t size); +static ssize_t write_export(struct file *file, char *buf, size_t size); +static ssize_t write_unexport(struct file *file, char *buf, size_t size); +static ssize_t write_getfd(struct file *file, char *buf, size_t size); +static ssize_t write_getfs(struct file *file, char *buf, size_t size); +static ssize_t write_filehandle(struct file *file, char *buf, size_t size); +static ssize_t write_threads(struct file *file, char *buf, size_t size); +static ssize_t write_leasetime(struct file *file, char *buf, size_t size); + +static ssize_t (*write_op[])(struct file *, char *, size_t) = { + [NFSD_Svc] = write_svc, + [NFSD_Add] = write_add, + [NFSD_Del] = write_del, + [NFSD_Export] = write_export, + [NFSD_Unexport] = write_unexport, + [NFSD_Getfd] = write_getfd, + [NFSD_Getfs] = write_getfs, + [NFSD_Fh] = write_filehandle, + [NFSD_Threads] = write_threads, + [NFSD_Leasetime] = write_leasetime, +}; + +static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) +{ + ino_t ino = file->f_dentry->d_inode->i_ino; + char *data; + ssize_t rv; + + if (ino >= sizeof(write_op)/sizeof(write_op[0]) || !write_op[ino]) + return -EINVAL; + + data = simple_transaction_get(file, buf, size); + if (IS_ERR(data)) + return PTR_ERR(data); + + rv = write_op[ino](file, data, size); + if (rv>0) { + simple_transaction_set(file, rv); + rv = size; + } + return rv; +} + +static struct file_operations transaction_ops = { + .write = nfsctl_transaction_write, + .read = simple_transaction_read, + .release = simple_transaction_release, +}; + +extern struct seq_operations nfs_exports_op; +static int exports_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &nfs_exports_op); +} + +static struct file_operations exports_operations = { + .open = exports_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/*----------------------------------------------------------------------------*/ +/* + * payload - write methods + * If the method has a response, the response should be put in buf, + * and the length returned. Otherwise return 0 or and -error. + */ + +static ssize_t write_svc(struct file *file, char *buf, size_t size) +{ + struct nfsctl_svc *data; + if (size < sizeof(*data)) + return -EINVAL; + data = (struct nfsctl_svc*) buf; + return nfsd_svc(data->svc_port, data->svc_nthreads); +} + +static ssize_t write_add(struct file *file, char *buf, size_t size) +{ + struct nfsctl_client *data; + if (size < sizeof(*data)) + return -EINVAL; + data = (struct nfsctl_client *)buf; + return exp_addclient(data); +} + +static ssize_t write_del(struct file *file, char *buf, size_t size) +{ + struct nfsctl_client *data; + if (size < sizeof(*data)) + return -EINVAL; + data = (struct nfsctl_client *)buf; + return exp_delclient(data); +} + +static ssize_t write_export(struct file *file, char *buf, size_t size) +{ + struct nfsctl_export *data; + if (size < sizeof(*data)) + return -EINVAL; + data = (struct nfsctl_export*)buf; + return exp_export(data); +} + +static ssize_t write_unexport(struct file *file, char *buf, size_t size) +{ + struct nfsctl_export *data; + + if (size < sizeof(*data)) + return -EINVAL; + data = (struct nfsctl_export*)buf; + return exp_unexport(data); +} + +static ssize_t write_getfs(struct file *file, char *buf, size_t size) +{ + struct nfsctl_fsparm *data; + struct sockaddr_in *sin; + struct auth_domain *clp; + int err = 0; + struct knfsd_fh *res; + + if (size < sizeof(*data)) + return -EINVAL; + data = (struct nfsctl_fsparm*)buf; + err = -EPROTONOSUPPORT; + if (data->gd_addr.sa_family != AF_INET) + goto out; + sin = (struct sockaddr_in *)&data->gd_addr; + if (data->gd_maxlen > NFS3_FHSIZE) + data->gd_maxlen = NFS3_FHSIZE; + + res = (struct knfsd_fh*)buf; + + exp_readlock(); + if (!(clp = auth_unix_lookup(sin->sin_addr))) + err = -EPERM; + else { + err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen); + auth_domain_put(clp); + } + exp_readunlock(); + if (err == 0) + err = res->fh_size + (int)&((struct knfsd_fh*)0)->fh_base; + out: + return err; +} + +static ssize_t write_getfd(struct file *file, char *buf, size_t size) +{ + struct nfsctl_fdparm *data; + struct sockaddr_in *sin; + struct auth_domain *clp; + int err = 0; + struct knfsd_fh fh; + char *res; + + if (size < sizeof(*data)) + return -EINVAL; + data = (struct nfsctl_fdparm*)buf; + err = -EPROTONOSUPPORT; + if (data->gd_addr.sa_family != AF_INET) + goto out; + err = -EINVAL; + if (data->gd_version < 2 || data->gd_version > NFSSVC_MAXVERS) + goto out; + + res = buf; + sin = (struct sockaddr_in *)&data->gd_addr; + exp_readlock(); + if (!(clp = auth_unix_lookup(sin->sin_addr))) + err = -EPERM; + else { + err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE); + auth_domain_put(clp); + } + exp_readunlock(); + + if (err == 0) { + memset(res,0, NFS_FHSIZE); + memcpy(res, &fh.fh_base, fh.fh_size); + err = NFS_FHSIZE; + } + out: + return err; +} + +static ssize_t write_filehandle(struct file *file, char *buf, size_t size) +{ + /* request is: + * domain path maxsize + * response is + * filehandle + * + * qword quoting is used, so filehandle will be \x.... + */ + char *dname, *path; + int maxsize; + char *mesg = buf; + int len; + struct auth_domain *dom; + struct knfsd_fh fh; + + if (buf[size-1] != '\n') + return -EINVAL; + buf[size-1] = 0; + + dname = mesg; + len = qword_get(&mesg, dname, size); + if (len <= 0) return -EINVAL; + + path = dname+len+1; + len = qword_get(&mesg, path, size); + if (len <= 0) return -EINVAL; + + len = get_int(&mesg, &maxsize); + if (len) + return len; + + if (maxsize < NFS_FHSIZE) + return -EINVAL; + if (maxsize > NFS3_FHSIZE) + maxsize = NFS3_FHSIZE; + + if (qword_get(&mesg, mesg, size)>0) + return -EINVAL; + + /* we have all the words, they are in buf.. */ + dom = unix_domain_find(dname); + if (!dom) + return -ENOMEM; + + len = exp_rootfh(dom, path, &fh, maxsize); + auth_domain_put(dom); + if (len) + return len; + + mesg = buf; len = SIMPLE_TRANSACTION_LIMIT; + qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size); + mesg[-1] = '\n'; + return mesg - buf; +} + +extern int nfsd_nrthreads(void); + +static ssize_t write_threads(struct file *file, char *buf, size_t size) +{ + /* if size > 0, look for a number of threads and call nfsd_svc + * then write out number of threads as reply + */ + char *mesg = buf; + int rv; + if (size > 0) { + int newthreads; + rv = get_int(&mesg, &newthreads); + if (rv) + return rv; + if (newthreads <0) + return -EINVAL; + rv = nfsd_svc(2049, newthreads); + if (rv) + return rv; + } + sprintf(buf, "%d\n", nfsd_nrthreads()); + return strlen(buf); +} + +extern time_t nfs4_leasetime(void); + +static ssize_t write_leasetime(struct file *file, char *buf, size_t size) +{ + /* if size > 10 seconds, call + * nfs4_reset_lease() then write out the new lease (seconds) as reply + */ + char *mesg = buf; + int rv; + + if (size > 0) { + int lease; + rv = get_int(&mesg, &lease); + if (rv) + return rv; + if (lease < 10 || lease > 3600) + return -EINVAL; + nfs4_reset_lease(lease); + } + sprintf(buf, "%ld\n", nfs4_lease_time()); + return strlen(buf); +} + +/*----------------------------------------------------------------------------*/ +/* + * populating the filesystem. + */ + +static int nfsd_fill_super(struct super_block * sb, void * data, int silent) +{ + static struct tree_descr nfsd_files[] = { + [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, + [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, + [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, + [NFSD_Export] = {".export", &transaction_ops, S_IWUSR}, + [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, + [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, + [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, +#ifdef CONFIG_NFSD_V4 + [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, +#endif + /* last one */ {""} + }; + return simple_fill_super(sb, 0x6e667364, nfsd_files); +} + +static struct super_block *nfsd_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return get_sb_single(fs_type, flags, data, nfsd_fill_super); +} + +static struct file_system_type nfsd_fs_type = { + .owner = THIS_MODULE, + .name = "nfsd", + .get_sb = nfsd_get_sb, + .kill_sb = kill_litter_super, +}; + +static int __init init_nfsd(void) +{ + int retval; + printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); + + nfsd_stat_init(); /* Statistics */ + nfsd_cache_init(); /* RPC reply cache */ + nfsd_export_init(); /* Exports table */ + nfsd_lockd_init(); /* lockd->nfsd callbacks */ +#ifdef CONFIG_NFSD_V4 + nfsd_idmap_init(); /* Name to ID mapping */ +#endif /* CONFIG_NFSD_V4 */ + if (proc_mkdir("fs/nfs", NULL)) { + struct proc_dir_entry *entry; + entry = create_proc_entry("fs/nfs/exports", 0, NULL); + if (entry) + entry->proc_fops = &exports_operations; + } + retval = register_filesystem(&nfsd_fs_type); + if (retval) { + nfsd_export_shutdown(); + nfsd_cache_shutdown(); + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); + nfsd_stat_shutdown(); + nfsd_lockd_shutdown(); + } + return retval; +} + +static void __exit exit_nfsd(void) +{ + nfsd_export_shutdown(); + nfsd_cache_shutdown(); + remove_proc_entry("fs/nfs/exports", NULL); + remove_proc_entry("fs/nfs", NULL); + nfsd_stat_shutdown(); + nfsd_lockd_shutdown(); +#ifdef CONFIG_NFSD_V4 + nfsd_idmap_shutdown(); +#endif /* CONFIG_NFSD_V4 */ + unregister_filesystem(&nfsd_fs_type); +} + +MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>"); +MODULE_LICENSE("GPL"); +module_init(init_nfsd) +module_exit(exit_nfsd) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c new file mode 100644 index 00000000000..7a3e397b4ed --- /dev/null +++ b/fs/nfsd/nfsfh.c @@ -0,0 +1,532 @@ +/* + * linux/fs/nfsd/nfsfh.c + * + * NFS server file handle treatment. + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + * Portions Copyright (C) 1999 G. Allen Morris III <gam3@acm.org> + * Extensive rewrite by Neil Brown <neilb@cse.unsw.edu.au> Southern-Spring 1999 + * ... and again Southern-Winter 2001 to support export_operations + */ + +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/smp_lock.h> +#include <linux/fs.h> +#include <linux/unistd.h> +#include <linux/string.h> +#include <linux/stat.h> +#include <linux/dcache.h> +#include <linux/mount.h> +#include <asm/pgtable.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> + +#define NFSDDBG_FACILITY NFSDDBG_FH +#define NFSD_PARANOIA 1 +/* #define NFSD_DEBUG_VERBOSE 1 */ + + +static int nfsd_nr_verified; +static int nfsd_nr_put; + +extern struct export_operations export_op_default; + +#define CALL(ops,fun) ((ops->fun)?(ops->fun):export_op_default.fun) + +/* + * our acceptability function. + * if NOSUBTREECHECK, accept anything + * if not, require that we can walk up to exp->ex_dentry + * doing some checks on the 'x' bits + */ +static int nfsd_acceptable(void *expv, struct dentry *dentry) +{ + struct svc_export *exp = expv; + int rv; + struct dentry *tdentry; + struct dentry *parent; + + if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) + return 1; + + tdentry = dget(dentry); + while (tdentry != exp->ex_dentry && ! IS_ROOT(tdentry)) { + /* make sure parents give x permission to user */ + int err; + parent = dget_parent(tdentry); + err = permission(parent->d_inode, MAY_EXEC, NULL); + if (err < 0) { + dput(parent); + break; + } + dput(tdentry); + tdentry = parent; + } + if (tdentry != exp->ex_dentry) + dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name); + rv = (tdentry == exp->ex_dentry); + dput(tdentry); + return rv; +} + +/* Type check. The correct error return for type mismatches does not seem to be + * generally agreed upon. SunOS seems to use EISDIR if file isn't S_IFREG; a + * comment in the NFSv3 spec says this is incorrect (implementation notes for + * the write call). + */ +static inline int +nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type) +{ + /* Type can be negative when creating hardlinks - not to a dir */ + if (type > 0 && (mode & S_IFMT) != type) { + if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK) + return nfserr_symlink; + else if (type == S_IFDIR) + return nfserr_notdir; + else if ((mode & S_IFMT) == S_IFDIR) + return nfserr_isdir; + else + return nfserr_inval; + } + if (type < 0 && (mode & S_IFMT) == -type) { + if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK) + return nfserr_symlink; + else if (type == -S_IFDIR) + return nfserr_isdir; + else + return nfserr_notdir; + } + return 0; +} + +/* + * Perform sanity checks on the dentry in a client's file handle. + * + * Note that the file handle dentry may need to be freed even after + * an error return. + * + * This is only called at the start of an nfsproc call, so fhp points to + * a svc_fh which is all 0 except for the over-the-wire file handle. + */ +u32 +fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) +{ + struct knfsd_fh *fh = &fhp->fh_handle; + struct svc_export *exp = NULL; + struct dentry *dentry; + u32 error = 0; + + dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp)); + + /* keep this filehandle for possible reference when encoding attributes */ + rqstp->rq_reffh = fh; + + if (!fhp->fh_dentry) { + __u32 *datap=NULL; + __u32 tfh[3]; /* filehandle fragment for oldstyle filehandles */ + int fileid_type; + int data_left = fh->fh_size/4; + + error = nfserr_stale; + if (rqstp->rq_client == NULL) + goto out; + if (rqstp->rq_vers > 2) + error = nfserr_badhandle; + if (rqstp->rq_vers == 4 && fh->fh_size == 0) + return nfserr_nofilehandle; + + if (fh->fh_version == 1) { + int len; + datap = fh->fh_auth; + if (--data_left<0) goto out; + switch (fh->fh_auth_type) { + case 0: break; + default: goto out; + } + len = key_len(fh->fh_fsid_type) / 4; + if (len == 0) goto out; + if (fh->fh_fsid_type == 2) { + /* deprecated, convert to type 3 */ + len = 3; + fh->fh_fsid_type = 3; + fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1]))); + fh->fh_fsid[1] = fh->fh_fsid[2]; + } + if ((data_left -= len)<0) goto out; + exp = exp_find(rqstp->rq_client, fh->fh_fsid_type, datap, &rqstp->rq_chandle); + datap += len; + } else { + dev_t xdev; + ino_t xino; + if (fh->fh_size != NFS_FHSIZE) + goto out; + /* assume old filehandle format */ + xdev = old_decode_dev(fh->ofh_xdev); + xino = u32_to_ino_t(fh->ofh_xino); + mk_fsid_v0(tfh, xdev, xino); + exp = exp_find(rqstp->rq_client, 0, tfh, &rqstp->rq_chandle); + } + + error = nfserr_dropit; + if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN) + goto out; + + error = nfserr_stale; + if (!exp || IS_ERR(exp)) + goto out; + + /* Check if the request originated from a secure port. */ + error = nfserr_perm; + if (!rqstp->rq_secure && EX_SECURE(exp)) { + printk(KERN_WARNING + "nfsd: request from insecure port (%u.%u.%u.%u:%d)!\n", + NIPQUAD(rqstp->rq_addr.sin_addr.s_addr), + ntohs(rqstp->rq_addr.sin_port)); + goto out; + } + + /* Set user creds for this exportpoint */ + error = nfsd_setuser(rqstp, exp); + if (error) { + error = nfserrno(error); + goto out; + } + + /* + * Look up the dentry using the NFS file handle. + */ + error = nfserr_stale; + if (rqstp->rq_vers > 2) + error = nfserr_badhandle; + + if (fh->fh_version != 1) { + tfh[0] = fh->ofh_ino; + tfh[1] = fh->ofh_generation; + tfh[2] = fh->ofh_dirino; + datap = tfh; + data_left = 3; + if (fh->ofh_dirino == 0) + fileid_type = 1; + else + fileid_type = 2; + } else + fileid_type = fh->fh_fileid_type; + + if (fileid_type == 0) + dentry = dget(exp->ex_dentry); + else { + struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op; + dentry = CALL(nop,decode_fh)(exp->ex_mnt->mnt_sb, + datap, data_left, + fileid_type, + nfsd_acceptable, exp); + } + if (dentry == NULL) + goto out; + if (IS_ERR(dentry)) { + if (PTR_ERR(dentry) != -EINVAL) + error = nfserrno(PTR_ERR(dentry)); + goto out; + } +#ifdef NFSD_PARANOIA + if (S_ISDIR(dentry->d_inode->i_mode) && + (dentry->d_flags & DCACHE_DISCONNECTED)) { + printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + } +#endif + + fhp->fh_dentry = dentry; + fhp->fh_export = exp; + nfsd_nr_verified++; + } else { + /* just rechecking permissions + * (e.g. nfsproc_create calls fh_verify, then nfsd_create does as well) + */ + dprintk("nfsd: fh_verify - just checking\n"); + dentry = fhp->fh_dentry; + exp = fhp->fh_export; + } + cache_get(&exp->h); + + error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type); + if (error) + goto out; + + /* Finally, check access permissions. */ + error = nfsd_permission(exp, dentry, access); + +#ifdef NFSD_PARANOIA_EXTREME + if (error) { + printk("fh_verify: %s/%s permission failure, acc=%x, error=%d\n", + dentry->d_parent->d_name.name, dentry->d_name.name, access, (error >> 24)); + } +#endif +out: + if (exp && !IS_ERR(exp)) + exp_put(exp); + if (error == nfserr_stale) + nfsdstats.fh_stale++; + return error; +} + + +/* + * Compose a file handle for an NFS reply. + * + * Note that when first composed, the dentry may not yet have + * an inode. In this case a call to fh_update should be made + * before the fh goes out on the wire ... + */ +static inline int _fh_update(struct dentry *dentry, struct svc_export *exp, + __u32 *datap, int *maxsize) +{ + struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op; + + if (dentry == exp->ex_dentry) { + *maxsize = 0; + return 0; + } + + return CALL(nop,encode_fh)(dentry, datap, maxsize, + !(exp->ex_flags&NFSEXP_NOSUBTREECHECK)); +} + +/* + * for composing old style file handles + */ +static inline void _fh_update_old(struct dentry *dentry, + struct svc_export *exp, + struct knfsd_fh *fh) +{ + fh->ofh_ino = ino_t_to_u32(dentry->d_inode->i_ino); + fh->ofh_generation = dentry->d_inode->i_generation; + if (S_ISDIR(dentry->d_inode->i_mode) || + (exp->ex_flags & NFSEXP_NOSUBTREECHECK)) + fh->ofh_dirino = 0; +} + +int +fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct svc_fh *ref_fh) +{ + /* ref_fh is a reference file handle. + * if it is non-null, then we should compose a filehandle which is + * of the same version, where possible. + * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca + * Then create a 32byte filehandle using nfs_fhbase_old + * + */ + + u8 ref_fh_version = 0; + u8 ref_fh_fsid_type = 0; + struct inode * inode = dentry->d_inode; + struct dentry *parent = dentry->d_parent; + __u32 *datap; + dev_t ex_dev = exp->ex_dentry->d_inode->i_sb->s_dev; + + dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n", + MAJOR(ex_dev), MINOR(ex_dev), + (long) exp->ex_dentry->d_inode->i_ino, + parent->d_name.name, dentry->d_name.name, + (inode ? inode->i_ino : 0)); + + if (ref_fh) { + ref_fh_version = ref_fh->fh_handle.fh_version; + if (ref_fh_version == 0xca) + ref_fh_fsid_type = 0; + else + ref_fh_fsid_type = ref_fh->fh_handle.fh_fsid_type; + if (ref_fh_fsid_type > 3) + ref_fh_fsid_type = 0; + + /* make sure ref_fh type works for given export */ + if (ref_fh_fsid_type == 1 && + !(exp->ex_flags & NFSEXP_FSID)) { + /* if we don't have an fsid, we cannot provide one... */ + ref_fh_fsid_type = 0; + } + } else if (exp->ex_flags & NFSEXP_FSID) + ref_fh_fsid_type = 1; + + if (!old_valid_dev(ex_dev) && ref_fh_fsid_type == 0) { + /* for newer device numbers, we must use a newer fsid format */ + ref_fh_version = 1; + ref_fh_fsid_type = 3; + } + if (old_valid_dev(ex_dev) && + (ref_fh_fsid_type == 2 || ref_fh_fsid_type == 3)) + /* must use type1 for smaller device numbers */ + ref_fh_fsid_type = 0; + + if (ref_fh == fhp) + fh_put(ref_fh); + + if (fhp->fh_locked || fhp->fh_dentry) { + printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n", + parent->d_name.name, dentry->d_name.name); + } + if (fhp->fh_maxsize < NFS_FHSIZE) + printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n", + fhp->fh_maxsize, parent->d_name.name, dentry->d_name.name); + + fhp->fh_dentry = dget(dentry); /* our internal copy */ + fhp->fh_export = exp; + cache_get(&exp->h); + + if (ref_fh_version == 0xca) { + /* old style filehandle please */ + memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE); + fhp->fh_handle.fh_size = NFS_FHSIZE; + fhp->fh_handle.ofh_dcookie = 0xfeebbaca; + fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev); + fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev; + fhp->fh_handle.ofh_xino = ino_t_to_u32(exp->ex_dentry->d_inode->i_ino); + fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry)); + if (inode) + _fh_update_old(dentry, exp, &fhp->fh_handle); + } else { + int len; + fhp->fh_handle.fh_version = 1; + fhp->fh_handle.fh_auth_type = 0; + datap = fhp->fh_handle.fh_auth+0; + fhp->fh_handle.fh_fsid_type = ref_fh_fsid_type; + switch (ref_fh_fsid_type) { + case 0: + /* + * fsid_type 0: + * 2byte major, 2byte minor, 4byte inode + */ + mk_fsid_v0(datap, ex_dev, + exp->ex_dentry->d_inode->i_ino); + break; + case 1: + /* fsid_type 1 == 4 bytes filesystem id */ + mk_fsid_v1(datap, exp->ex_fsid); + break; + case 2: + /* + * fsid_type 2: + * 4byte major, 4byte minor, 4byte inode + */ + mk_fsid_v2(datap, ex_dev, + exp->ex_dentry->d_inode->i_ino); + break; + case 3: + /* + * fsid_type 3: + * 4byte devicenumber, 4byte inode + */ + mk_fsid_v3(datap, ex_dev, + exp->ex_dentry->d_inode->i_ino); + break; + } + len = key_len(ref_fh_fsid_type); + datap += len/4; + fhp->fh_handle.fh_size = 4 + len; + + if (inode) { + int size = (fhp->fh_maxsize-len-4)/4; + fhp->fh_handle.fh_fileid_type = + _fh_update(dentry, exp, datap, &size); + fhp->fh_handle.fh_size += size*4; + } + if (fhp->fh_handle.fh_fileid_type == 255) + return nfserr_opnotsupp; + } + + nfsd_nr_verified++; + return 0; +} + +/* + * Update file handle information after changing a dentry. + * This is only called by nfsd_create, nfsd_create_v3 and nfsd_proc_create + */ +int +fh_update(struct svc_fh *fhp) +{ + struct dentry *dentry; + __u32 *datap; + + if (!fhp->fh_dentry) + goto out_bad; + + dentry = fhp->fh_dentry; + if (!dentry->d_inode) + goto out_negative; + if (fhp->fh_handle.fh_version != 1) { + _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle); + } else { + int size; + if (fhp->fh_handle.fh_fileid_type != 0) + goto out_uptodate; + datap = fhp->fh_handle.fh_auth+ + fhp->fh_handle.fh_size/4 -1; + size = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4; + fhp->fh_handle.fh_fileid_type = + _fh_update(dentry, fhp->fh_export, datap, &size); + fhp->fh_handle.fh_size += size*4; + if (fhp->fh_handle.fh_fileid_type == 255) + return nfserr_opnotsupp; + } +out: + return 0; + +out_bad: + printk(KERN_ERR "fh_update: fh not verified!\n"); + goto out; +out_negative: + printk(KERN_ERR "fh_update: %s/%s still negative!\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + goto out; +out_uptodate: + printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n", + dentry->d_parent->d_name.name, dentry->d_name.name); + goto out; +} + +/* + * Release a file handle. + */ +void +fh_put(struct svc_fh *fhp) +{ + struct dentry * dentry = fhp->fh_dentry; + struct svc_export * exp = fhp->fh_export; + if (dentry) { + fh_unlock(fhp); + fhp->fh_dentry = NULL; + dput(dentry); +#ifdef CONFIG_NFSD_V3 + fhp->fh_pre_saved = 0; + fhp->fh_post_saved = 0; +#endif + nfsd_nr_put++; + } + if (exp) { + svc_export_put(&exp->h, &svc_export_cache); + fhp->fh_export = NULL; + } + return; +} + +/* + * Shorthand for dprintk()'s + */ +char * SVCFH_fmt(struct svc_fh *fhp) +{ + struct knfsd_fh *fh = &fhp->fh_handle; + + static char buf[80]; + sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x", + fh->fh_size, + fh->fh_base.fh_pad[0], + fh->fh_base.fh_pad[1], + fh->fh_base.fh_pad[2], + fh->fh_base.fh_pad[3], + fh->fh_base.fh_pad[4], + fh->fh_base.fh_pad[5]); + return buf; +} diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c new file mode 100644 index 00000000000..757f9d20803 --- /dev/null +++ b/fs/nfsd/nfsproc.c @@ -0,0 +1,605 @@ +/* + * nfsproc2.c Process version 2 NFS requests. + * linux/fs/nfsd/nfs2proc.c + * + * Process version 2 NFS requests. + * + * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/linkage.h> +#include <linux/time.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/net.h> +#include <linux/in.h> +#include <linux/namei.h> +#include <linux/unistd.h> +#include <linux/slab.h> + +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/cache.h> +#include <linux/nfsd/xdr.h> + +typedef struct svc_rqst svc_rqst; +typedef struct svc_buf svc_buf; + +#define NFSDDBG_FACILITY NFSDDBG_PROC + + +static int +nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return nfs_ok; +} + +/* + * Get a file's attributes + * N.B. After this call resp->fh needs an fh_put + */ +static int +nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, + struct nfsd_attrstat *resp) +{ + dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); + + fh_copy(&resp->fh, &argp->fh); + return fh_verify(rqstp, &resp->fh, 0, MAY_NOP); +} + +/* + * Set a file's attributes + * N.B. After this call resp->fh needs an fh_put + */ +static int +nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp, + struct nfsd_attrstat *resp) +{ + dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n", + SVCFH_fmt(&argp->fh), + argp->attrs.ia_valid, (long) argp->attrs.ia_size); + + fh_copy(&resp->fh, &argp->fh); + return nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0); +} + +/* + * Look up a path name component + * Note: the dentry in the resp->fh may be negative if the file + * doesn't exist yet. + * N.B. After this call resp->fh needs an fh_put + */ +static int +nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp, + struct nfsd_diropres *resp) +{ + int nfserr; + + dprintk("nfsd: LOOKUP %s %.*s\n", + SVCFH_fmt(&argp->fh), argp->len, argp->name); + + fh_init(&resp->fh, NFS_FHSIZE); + nfserr = nfsd_lookup(rqstp, &argp->fh, argp->name, argp->len, + &resp->fh); + + fh_put(&argp->fh); + return nfserr; +} + +/* + * Read a symlink. + */ +static int +nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp, + struct nfsd_readlinkres *resp) +{ + int nfserr; + + dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh)); + + /* Read the symlink. */ + resp->len = NFS_MAXPATHLEN; + nfserr = nfsd_readlink(rqstp, &argp->fh, argp->buffer, &resp->len); + + fh_put(&argp->fh); + return nfserr; +} + +/* + * Read a portion of a file. + * N.B. After this call resp->fh needs an fh_put + */ +static int +nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, + struct nfsd_readres *resp) +{ + int nfserr; + + dprintk("nfsd: READ %s %d bytes at %d\n", + SVCFH_fmt(&argp->fh), + argp->count, argp->offset); + + /* Obtain buffer pointer for payload. 19 is 1 word for + * status, 17 words for fattr, and 1 word for the byte count. + */ + + if (NFSSVC_MAXBLKSIZE < argp->count) { + printk(KERN_NOTICE + "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n", + NIPQUAD(rqstp->rq_addr.sin_addr.s_addr), + ntohs(rqstp->rq_addr.sin_port), + argp->count); + argp->count = NFSSVC_MAXBLKSIZE; + } + svc_reserve(rqstp, (19<<2) + argp->count + 4); + + resp->count = argp->count; + nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, + argp->offset, + argp->vec, argp->vlen, + &resp->count); + + return nfserr; +} + +/* + * Write data to a file + * N.B. After this call resp->fh needs an fh_put + */ +static int +nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, + struct nfsd_attrstat *resp) +{ + int nfserr; + int stable = 1; + + dprintk("nfsd: WRITE %s %d bytes at %d\n", + SVCFH_fmt(&argp->fh), + argp->len, argp->offset); + + nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL, + argp->offset, + argp->vec, argp->vlen, + argp->len, + &stable); + return nfserr; +} + +/* + * CREATE processing is complicated. The keyword here is `overloaded.' + * The parent directory is kept locked between the check for existence + * and the actual create() call in compliance with VFS protocols. + * N.B. After this call _both_ argp->fh and resp->fh need an fh_put + */ +static int +nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp, + struct nfsd_diropres *resp) +{ + svc_fh *dirfhp = &argp->fh; + svc_fh *newfhp = &resp->fh; + struct iattr *attr = &argp->attrs; + struct inode *inode; + struct dentry *dchild; + int nfserr, type, mode; + dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); + + dprintk("nfsd: CREATE %s %.*s\n", + SVCFH_fmt(dirfhp), argp->len, argp->name); + + /* First verify the parent file handle */ + nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_EXEC); + if (nfserr) + goto done; /* must fh_put dirfhp even on error */ + + /* Check for MAY_WRITE in nfsd_create if necessary */ + + nfserr = nfserr_acces; + if (!argp->len) + goto done; + nfserr = nfserr_exist; + if (isdotent(argp->name, argp->len)) + goto done; + fh_lock(dirfhp); + dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len); + if (IS_ERR(dchild)) { + nfserr = nfserrno(PTR_ERR(dchild)); + goto out_unlock; + } + fh_init(newfhp, NFS_FHSIZE); + nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp); + if (!nfserr && !dchild->d_inode) + nfserr = nfserr_noent; + dput(dchild); + if (nfserr) { + if (nfserr != nfserr_noent) + goto out_unlock; + /* + * If the new file handle wasn't verified, we can't tell + * whether the file exists or not. Time to bail ... + */ + nfserr = nfserr_acces; + if (!newfhp->fh_dentry) { + printk(KERN_WARNING + "nfsd_proc_create: file handle not verified\n"); + goto out_unlock; + } + } + + inode = newfhp->fh_dentry->d_inode; + + /* Unfudge the mode bits */ + if (attr->ia_valid & ATTR_MODE) { + type = attr->ia_mode & S_IFMT; + mode = attr->ia_mode & ~S_IFMT; + if (!type) { + /* no type, so if target exists, assume same as that, + * else assume a file */ + if (inode) { + type = inode->i_mode & S_IFMT; + switch(type) { + case S_IFCHR: + case S_IFBLK: + /* reserve rdev for later checking */ + rdev = inode->i_rdev; + attr->ia_valid |= ATTR_SIZE; + + /* FALLTHROUGH */ + case S_IFIFO: + /* this is probably a permission check.. + * at least IRIX implements perm checking on + * echo thing > device-special-file-or-pipe + * by doing a CREATE with type==0 + */ + nfserr = nfsd_permission(newfhp->fh_export, + newfhp->fh_dentry, + MAY_WRITE|MAY_LOCAL_ACCESS); + if (nfserr && nfserr != nfserr_rofs) + goto out_unlock; + } + } else + type = S_IFREG; + } + } else if (inode) { + type = inode->i_mode & S_IFMT; + mode = inode->i_mode & ~S_IFMT; + } else { + type = S_IFREG; + mode = 0; /* ??? */ + } + + attr->ia_valid |= ATTR_MODE; + attr->ia_mode = mode; + + /* Special treatment for non-regular files according to the + * gospel of sun micro + */ + if (type != S_IFREG) { + int is_borc = 0; + if (type != S_IFBLK && type != S_IFCHR) { + rdev = 0; + } else if (type == S_IFCHR && !(attr->ia_valid & ATTR_SIZE)) { + /* If you think you've seen the worst, grok this. */ + type = S_IFIFO; + } else { + /* Okay, char or block special */ + is_borc = 1; + if (!rdev) + rdev = wanted; + } + + /* we've used the SIZE information, so discard it */ + attr->ia_valid &= ~ATTR_SIZE; + + /* Make sure the type and device matches */ + nfserr = nfserr_exist; + if (inode && type != (inode->i_mode & S_IFMT)) + goto out_unlock; + } + + nfserr = 0; + if (!inode) { + /* File doesn't exist. Create it and set attrs */ + nfserr = nfsd_create(rqstp, dirfhp, argp->name, argp->len, + attr, type, rdev, newfhp); + } else if (type == S_IFREG) { + dprintk("nfsd: existing %s, valid=%x, size=%ld\n", + argp->name, attr->ia_valid, (long) attr->ia_size); + /* File already exists. We ignore all attributes except + * size, so that creat() behaves exactly like + * open(..., O_CREAT|O_TRUNC|O_WRONLY). + */ + attr->ia_valid &= ATTR_SIZE; + if (attr->ia_valid) + nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0); + } + +out_unlock: + /* We don't really need to unlock, as fh_put does it. */ + fh_unlock(dirfhp); + +done: + fh_put(dirfhp); + return nfserr; +} + +static int +nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp, + void *resp) +{ + int nfserr; + + dprintk("nfsd: REMOVE %s %.*s\n", SVCFH_fmt(&argp->fh), + argp->len, argp->name); + + /* Unlink. -SIFDIR means file must not be a directory */ + nfserr = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR, argp->name, argp->len); + fh_put(&argp->fh); + return nfserr; +} + +static int +nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp, + void *resp) +{ + int nfserr; + + dprintk("nfsd: RENAME %s %.*s -> \n", + SVCFH_fmt(&argp->ffh), argp->flen, argp->fname); + dprintk("nfsd: -> %s %.*s\n", + SVCFH_fmt(&argp->tfh), argp->tlen, argp->tname); + + nfserr = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen, + &argp->tfh, argp->tname, argp->tlen); + fh_put(&argp->ffh); + fh_put(&argp->tfh); + return nfserr; +} + +static int +nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp, + void *resp) +{ + int nfserr; + + dprintk("nfsd: LINK %s ->\n", + SVCFH_fmt(&argp->ffh)); + dprintk("nfsd: %s %.*s\n", + SVCFH_fmt(&argp->tfh), + argp->tlen, + argp->tname); + + nfserr = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen, + &argp->ffh); + fh_put(&argp->ffh); + fh_put(&argp->tfh); + return nfserr; +} + +static int +nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp, + void *resp) +{ + struct svc_fh newfh; + int nfserr; + + dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n", + SVCFH_fmt(&argp->ffh), argp->flen, argp->fname, + argp->tlen, argp->tname); + + fh_init(&newfh, NFS_FHSIZE); + /* + * Create the link, look up new file and set attrs. + */ + nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, + argp->tname, argp->tlen, + &newfh, &argp->attrs); + + + fh_put(&argp->ffh); + fh_put(&newfh); + return nfserr; +} + +/* + * Make directory. This operation is not idempotent. + * N.B. After this call resp->fh needs an fh_put + */ +static int +nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp, + struct nfsd_diropres *resp) +{ + int nfserr; + + dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); + + if (resp->fh.fh_dentry) { + printk(KERN_WARNING + "nfsd_proc_mkdir: response already verified??\n"); + } + + argp->attrs.ia_valid &= ~ATTR_SIZE; + fh_init(&resp->fh, NFS_FHSIZE); + nfserr = nfsd_create(rqstp, &argp->fh, argp->name, argp->len, + &argp->attrs, S_IFDIR, 0, &resp->fh); + fh_put(&argp->fh); + return nfserr; +} + +/* + * Remove a directory + */ +static int +nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp, + void *resp) +{ + int nfserr; + + dprintk("nfsd: RMDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); + + nfserr = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, argp->name, argp->len); + fh_put(&argp->fh); + return nfserr; +} + +/* + * Read a portion of a directory. + */ +static int +nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp, + struct nfsd_readdirres *resp) +{ + int nfserr, count; + loff_t offset; + + dprintk("nfsd: READDIR %s %d bytes at %d\n", + SVCFH_fmt(&argp->fh), + argp->count, argp->cookie); + + /* Shrink to the client read size */ + count = (argp->count >> 2) - 2; + + /* Make sure we've room for the NULL ptr & eof flag */ + count -= 2; + if (count < 0) + count = 0; + + resp->buffer = argp->buffer; + resp->offset = NULL; + resp->buflen = count; + resp->common.err = nfs_ok; + /* Read directory and encode entries on the fly */ + offset = argp->cookie; + nfserr = nfsd_readdir(rqstp, &argp->fh, &offset, + &resp->common, nfssvc_encode_entry); + + resp->count = resp->buffer - argp->buffer; + if (resp->offset) + *resp->offset = htonl(offset); + + fh_put(&argp->fh); + return nfserr; +} + +/* + * Get file system info + */ +static int +nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, + struct nfsd_statfsres *resp) +{ + int nfserr; + + dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); + + nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); + fh_put(&argp->fh); + return nfserr; +} + +/* + * NFSv2 Server procedures. + * Only the results of non-idempotent operations are cached. + */ +#define nfsd_proc_none NULL +#define nfssvc_release_none NULL +struct nfsd_void { int dummy; }; + +#define PROC(name, argt, rest, relt, cache, respsize) \ + { (svc_procfunc) nfsd_proc_##name, \ + (kxdrproc_t) nfssvc_decode_##argt, \ + (kxdrproc_t) nfssvc_encode_##rest, \ + (kxdrproc_t) nfssvc_release_##relt, \ + sizeof(struct nfsd_##argt), \ + sizeof(struct nfsd_##rest), \ + 0, \ + cache, \ + respsize, \ + } + +#define ST 1 /* status */ +#define FH 8 /* filehandle */ +#define AT 18 /* attributes */ + +static struct svc_procedure nfsd_procedures2[18] = { + PROC(null, void, void, none, RC_NOCACHE, ST), + PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT), + PROC(setattr, sattrargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), + PROC(none, void, void, none, RC_NOCACHE, ST), + PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT), + PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4), + PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE), + PROC(none, void, void, none, RC_NOCACHE, ST), + PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT), + PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), + PROC(remove, diropargs, void, none, RC_REPLSTAT, ST), + PROC(rename, renameargs, void, none, RC_REPLSTAT, ST), + PROC(link, linkargs, void, none, RC_REPLSTAT, ST), + PROC(symlink, symlinkargs, void, none, RC_REPLSTAT, ST), + PROC(mkdir, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT), + PROC(rmdir, diropargs, void, none, RC_REPLSTAT, ST), + PROC(readdir, readdirargs, readdirres, none, RC_NOCACHE, 0), + PROC(statfs, fhandle, statfsres, none, RC_NOCACHE, ST+5), +}; + + +struct svc_version nfsd_version2 = { + .vs_vers = 2, + .vs_nproc = 18, + .vs_proc = nfsd_procedures2, + .vs_dispatch = nfsd_dispatch, + .vs_xdrsize = NFS2_SVC_XDRSIZE, +}; + +/* + * Map errnos to NFS errnos. + */ +int +nfserrno (int errno) +{ + static struct { + int nfserr; + int syserr; + } nfs_errtbl[] = { + { nfs_ok, 0 }, + { nfserr_perm, -EPERM }, + { nfserr_noent, -ENOENT }, + { nfserr_io, -EIO }, + { nfserr_nxio, -ENXIO }, + { nfserr_acces, -EACCES }, + { nfserr_exist, -EEXIST }, + { nfserr_xdev, -EXDEV }, + { nfserr_mlink, -EMLINK }, + { nfserr_nodev, -ENODEV }, + { nfserr_notdir, -ENOTDIR }, + { nfserr_isdir, -EISDIR }, + { nfserr_inval, -EINVAL }, + { nfserr_fbig, -EFBIG }, + { nfserr_nospc, -ENOSPC }, + { nfserr_rofs, -EROFS }, + { nfserr_mlink, -EMLINK }, + { nfserr_nametoolong, -ENAMETOOLONG }, + { nfserr_notempty, -ENOTEMPTY }, +#ifdef EDQUOT + { nfserr_dquot, -EDQUOT }, +#endif + { nfserr_stale, -ESTALE }, + { nfserr_jukebox, -ETIMEDOUT }, + { nfserr_dropit, -EAGAIN }, + { nfserr_dropit, -ENOMEM }, + { nfserr_badname, -ESRCH }, + { nfserr_io, -ETXTBSY }, + { -1, -EIO } + }; + int i; + + for (i = 0; nfs_errtbl[i].nfserr != -1; i++) { + if (nfs_errtbl[i].syserr == errno) + return nfs_errtbl[i].nfserr; + } + printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno); + return nfserr_io; +} + diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c new file mode 100644 index 00000000000..39551657e65 --- /dev/null +++ b/fs/nfsd/nfssvc.c @@ -0,0 +1,385 @@ +/* + * linux/fs/nfsd/nfssvc.c + * + * Central processing for nfsd. + * + * Authors: Olaf Kirch (okir@monad.swb.de) + * + * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/config.h> +#include <linux/module.h> + +#include <linux/time.h> +#include <linux/errno.h> +#include <linux/nfs.h> +#include <linux/in.h> +#include <linux/uio.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/fs_struct.h> + +#include <linux/sunrpc/types.h> +#include <linux/sunrpc/stats.h> +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/svcsock.h> +#include <linux/sunrpc/cache.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/stats.h> +#include <linux/nfsd/cache.h> +#include <linux/lockd/bind.h> + +#define NFSDDBG_FACILITY NFSDDBG_SVC + +/* these signals will be delivered to an nfsd thread + * when handling a request + */ +#define ALLOWED_SIGS (sigmask(SIGKILL)) +/* these signals will be delivered to an nfsd thread + * when not handling a request. i.e. when waiting + */ +#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT)) +/* if the last thread dies with SIGHUP, then the exports table is + * left unchanged ( like 2.4-{0-9} ). Any other signal will clear + * the exports table (like 2.2). + */ +#define SIG_NOCLEAN SIGHUP + +extern struct svc_program nfsd_program; +static void nfsd(struct svc_rqst *rqstp); +struct timeval nfssvc_boot; +static struct svc_serv *nfsd_serv; +static atomic_t nfsd_busy; +static unsigned long nfsd_last_call; +static DEFINE_SPINLOCK(nfsd_call_lock); + +struct nfsd_list { + struct list_head list; + struct task_struct *task; +}; +static struct list_head nfsd_list = LIST_HEAD_INIT(nfsd_list); + +/* + * Maximum number of nfsd processes + */ +#define NFSD_MAXSERVS 8192 + +int nfsd_nrthreads(void) +{ + if (nfsd_serv == NULL) + return 0; + else + return nfsd_serv->sv_nrthreads; +} + +int +nfsd_svc(unsigned short port, int nrservs) +{ + int error; + int none_left; + struct list_head *victim; + + lock_kernel(); + dprintk("nfsd: creating service\n"); + error = -EINVAL; + if (nrservs <= 0) + nrservs = 0; + if (nrservs > NFSD_MAXSERVS) + nrservs = NFSD_MAXSERVS; + + /* Readahead param cache - will no-op if it already exists */ + error = nfsd_racache_init(2*nrservs); + if (error<0) + goto out; + error = nfs4_state_init(); + if (error<0) + goto out; + if (!nfsd_serv) { + atomic_set(&nfsd_busy, 0); + error = -ENOMEM; + nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE); + if (nfsd_serv == NULL) + goto out; + error = svc_makesock(nfsd_serv, IPPROTO_UDP, port); + if (error < 0) + goto failure; + +#ifdef CONFIG_NFSD_TCP + error = svc_makesock(nfsd_serv, IPPROTO_TCP, port); + if (error < 0) + goto failure; +#endif + do_gettimeofday(&nfssvc_boot); /* record boot time */ + } else + nfsd_serv->sv_nrthreads++; + nrservs -= (nfsd_serv->sv_nrthreads-1); + while (nrservs > 0) { + nrservs--; + __module_get(THIS_MODULE); + error = svc_create_thread(nfsd, nfsd_serv); + if (error < 0) { + module_put(THIS_MODULE); + break; + } + } + victim = nfsd_list.next; + while (nrservs < 0 && victim != &nfsd_list) { + struct nfsd_list *nl = + list_entry(victim,struct nfsd_list, list); + victim = victim->next; + send_sig(SIG_NOCLEAN, nl->task, 1); + nrservs++; + } + failure: + none_left = (nfsd_serv->sv_nrthreads == 1); + svc_destroy(nfsd_serv); /* Release server */ + if (none_left) { + nfsd_serv = NULL; + nfsd_racache_shutdown(); + nfs4_state_shutdown(); + } + out: + unlock_kernel(); + return error; +} + +static inline void +update_thread_usage(int busy_threads) +{ + unsigned long prev_call; + unsigned long diff; + int decile; + + spin_lock(&nfsd_call_lock); + prev_call = nfsd_last_call; + nfsd_last_call = jiffies; + decile = busy_threads*10/nfsdstats.th_cnt; + if (decile>0 && decile <= 10) { + diff = nfsd_last_call - prev_call; + if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP) + nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP; + if (decile == 10) + nfsdstats.th_fullcnt++; + } + spin_unlock(&nfsd_call_lock); +} + +/* + * This is the NFS server kernel thread + */ +static void +nfsd(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + struct fs_struct *fsp; + int err; + struct nfsd_list me; + sigset_t shutdown_mask, allowed_mask; + + /* Lock module and set up kernel thread */ + lock_kernel(); + daemonize("nfsd"); + + /* After daemonize() this kernel thread shares current->fs + * with the init process. We need to create files with a + * umask of 0 instead of init's umask. */ + fsp = copy_fs_struct(current->fs); + if (!fsp) { + printk("Unable to start nfsd thread: out of memory\n"); + goto out; + } + exit_fs(current); + current->fs = fsp; + current->fs->umask = 0; + + siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS); + siginitsetinv(&allowed_mask, ALLOWED_SIGS); + + nfsdstats.th_cnt++; + + lockd_up(); /* start lockd */ + + me.task = current; + list_add(&me.list, &nfsd_list); + + unlock_kernel(); + + /* + * We want less throttling in balance_dirty_pages() so that nfs to + * localhost doesn't cause nfsd to lock up due to all the client's + * dirty pages. + */ + current->flags |= PF_LESS_THROTTLE; + + /* + * The main request loop + */ + for (;;) { + /* Block all but the shutdown signals */ + sigprocmask(SIG_SETMASK, &shutdown_mask, NULL); + + /* + * Find a socket with data available and call its + * recvfrom routine. + */ + while ((err = svc_recv(serv, rqstp, + 60*60*HZ)) == -EAGAIN) + ; + if (err < 0) + break; + update_thread_usage(atomic_read(&nfsd_busy)); + atomic_inc(&nfsd_busy); + + /* Lock the export hash tables for reading. */ + exp_readlock(); + + /* Process request with signals blocked. */ + sigprocmask(SIG_SETMASK, &allowed_mask, NULL); + + svc_process(serv, rqstp); + + /* Unlock export hash tables */ + exp_readunlock(); + update_thread_usage(atomic_read(&nfsd_busy)); + atomic_dec(&nfsd_busy); + } + + if (err != -EINTR) { + printk(KERN_WARNING "nfsd: terminating on error %d\n", -err); + } else { + unsigned int signo; + + for (signo = 1; signo <= _NSIG; signo++) + if (sigismember(¤t->pending.signal, signo) && + !sigismember(¤t->blocked, signo)) + break; + err = signo; + } + + lock_kernel(); + + /* Release lockd */ + lockd_down(); + + /* Check if this is last thread */ + if (serv->sv_nrthreads==1) { + + printk(KERN_WARNING "nfsd: last server has exited\n"); + if (err != SIG_NOCLEAN) { + printk(KERN_WARNING "nfsd: unexporting all filesystems\n"); + nfsd_export_flush(); + } + nfsd_serv = NULL; + nfsd_racache_shutdown(); /* release read-ahead cache */ + nfs4_state_shutdown(); + } + list_del(&me.list); + nfsdstats.th_cnt --; + +out: + /* Release the thread */ + svc_exit_thread(rqstp); + + /* Release module */ + module_put_and_exit(0); +} + +int +nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp) +{ + struct svc_procedure *proc; + kxdrproc_t xdr; + u32 nfserr; + u32 *nfserrp; + + dprintk("nfsd_dispatch: vers %d proc %d\n", + rqstp->rq_vers, rqstp->rq_proc); + proc = rqstp->rq_procinfo; + + /* Check whether we have this call in the cache. */ + switch (nfsd_cache_lookup(rqstp, proc->pc_cachetype)) { + case RC_INTR: + case RC_DROPIT: + return 0; + case RC_REPLY: + return 1; + case RC_DOIT:; + /* do it */ + } + + /* Decode arguments */ + xdr = proc->pc_decode; + if (xdr && !xdr(rqstp, (u32*)rqstp->rq_arg.head[0].iov_base, + rqstp->rq_argp)) { + dprintk("nfsd: failed to decode arguments!\n"); + nfsd_cache_update(rqstp, RC_NOCACHE, NULL); + *statp = rpc_garbage_args; + return 1; + } + + /* need to grab the location to store the status, as + * nfsv4 does some encoding while processing + */ + nfserrp = rqstp->rq_res.head[0].iov_base + + rqstp->rq_res.head[0].iov_len; + rqstp->rq_res.head[0].iov_len += sizeof(u32); + + /* Now call the procedure handler, and encode NFS status. */ + nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); + if (nfserr == nfserr_jukebox && rqstp->rq_vers == 2) + nfserr = nfserr_dropit; + if (nfserr == nfserr_dropit) { + dprintk("nfsd: Dropping request due to malloc failure!\n"); + nfsd_cache_update(rqstp, RC_NOCACHE, NULL); + return 0; + } + + if (rqstp->rq_proc != 0) + *nfserrp++ = nfserr; + + /* Encode result. + * For NFSv2, additional info is never returned in case of an error. + */ + if (!(nfserr && rqstp->rq_vers == 2)) { + xdr = proc->pc_encode; + if (xdr && !xdr(rqstp, nfserrp, + rqstp->rq_resp)) { + /* Failed to encode result. Release cache entry */ + dprintk("nfsd: failed to encode result!\n"); + nfsd_cache_update(rqstp, RC_NOCACHE, NULL); + *statp = rpc_system_err; + return 1; + } + } + + /* Store reply in cache. */ + nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); + return 1; +} + +extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; + +static struct svc_version * nfsd_version[] = { + [2] = &nfsd_version2, +#if defined(CONFIG_NFSD_V3) + [3] = &nfsd_version3, +#endif +#if defined(CONFIG_NFSD_V4) + [4] = &nfsd_version4, +#endif +}; + +#define NFSD_NRVERS (sizeof(nfsd_version)/sizeof(nfsd_version[0])) +struct svc_program nfsd_program = { + .pg_prog = NFS_PROGRAM, /* program number */ + .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */ + .pg_vers = nfsd_version, /* version table */ + .pg_name = "nfsd", /* program name */ + .pg_class = "nfsd", /* authentication class */ + .pg_stats = &nfsd_svcstats, /* version table */ + .pg_authenticate = &svc_set_client, /* export authentication */ + +}; diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c new file mode 100644 index 00000000000..948b08287c9 --- /dev/null +++ b/fs/nfsd/nfsxdr.c @@ -0,0 +1,511 @@ +/* + * linux/fs/nfsd/xdr.c + * + * XDR support for nfsd + * + * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/types.h> +#include <linux/time.h> +#include <linux/nfs.h> +#include <linux/vfs.h> +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/xdr.h> +#include <linux/mm.h> + +#define NFSDDBG_FACILITY NFSDDBG_XDR + + +#ifdef NFSD_OPTIMIZE_SPACE +# define inline +#endif + +/* + * Mapping of S_IF* types to NFS file types + */ +static u32 nfs_ftypes[] = { + NFNON, NFCHR, NFCHR, NFBAD, + NFDIR, NFBAD, NFBLK, NFBAD, + NFREG, NFBAD, NFLNK, NFBAD, + NFSOCK, NFBAD, NFLNK, NFBAD, +}; + + +/* + * XDR functions for basic NFS types + */ +static inline u32 * +decode_fh(u32 *p, struct svc_fh *fhp) +{ + fh_init(fhp, NFS_FHSIZE); + memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE); + fhp->fh_handle.fh_size = NFS_FHSIZE; + + /* FIXME: Look up export pointer here and verify + * Sun Secure RPC if requested */ + return p + (NFS_FHSIZE >> 2); +} + +static inline u32 * +encode_fh(u32 *p, struct svc_fh *fhp) +{ + memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE); + return p + (NFS_FHSIZE>> 2); +} + +/* + * Decode a file name and make sure that the path contains + * no slashes or null bytes. + */ +static inline u32 * +decode_filename(u32 *p, char **namp, int *lenp) +{ + char *name; + int i; + + if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) { + for (i = 0, name = *namp; i < *lenp; i++, name++) { + if (*name == '\0' || *name == '/') + return NULL; + } + } + + return p; +} + +static inline u32 * +decode_pathname(u32 *p, char **namp, int *lenp) +{ + char *name; + int i; + + if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) { + for (i = 0, name = *namp; i < *lenp; i++, name++) { + if (*name == '\0') + return NULL; + } + } + + return p; +} + +static inline u32 * +decode_sattr(u32 *p, struct iattr *iap) +{ + u32 tmp, tmp1; + + iap->ia_valid = 0; + + /* Sun client bug compatibility check: some sun clients seem to + * put 0xffff in the mode field when they mean 0xffffffff. + * Quoting the 4.4BSD nfs server code: Nah nah nah nah na nah. + */ + if ((tmp = ntohl(*p++)) != (u32)-1 && tmp != 0xffff) { + iap->ia_valid |= ATTR_MODE; + iap->ia_mode = tmp; + } + if ((tmp = ntohl(*p++)) != (u32)-1) { + iap->ia_valid |= ATTR_UID; + iap->ia_uid = tmp; + } + if ((tmp = ntohl(*p++)) != (u32)-1) { + iap->ia_valid |= ATTR_GID; + iap->ia_gid = tmp; + } + if ((tmp = ntohl(*p++)) != (u32)-1) { + iap->ia_valid |= ATTR_SIZE; + iap->ia_size = tmp; + } + tmp = ntohl(*p++); tmp1 = ntohl(*p++); + if (tmp != (u32)-1 && tmp1 != (u32)-1) { + iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET; + iap->ia_atime.tv_sec = tmp; + iap->ia_atime.tv_nsec = tmp1 * 1000; + } + tmp = ntohl(*p++); tmp1 = ntohl(*p++); + if (tmp != (u32)-1 && tmp1 != (u32)-1) { + iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET; + iap->ia_mtime.tv_sec = tmp; + iap->ia_mtime.tv_nsec = tmp1 * 1000; + /* + * Passing the invalid value useconds=1000000 for mtime + * is a Sun convention for "set both mtime and atime to + * current server time". It's needed to make permissions + * checks for the "touch" program across v2 mounts to + * Solaris and Irix boxes work correctly. See description of + * sattr in section 6.1 of "NFS Illustrated" by + * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5 + */ + if (tmp1 == 1000000) + iap->ia_valid &= ~(ATTR_ATIME_SET|ATTR_MTIME_SET); + } + return p; +} + +static inline u32 * +encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) +{ + struct vfsmount *mnt = fhp->fh_export->ex_mnt; + struct dentry *dentry = fhp->fh_dentry; + struct kstat stat; + int type; + struct timespec time; + + vfs_getattr(mnt, dentry, &stat); + type = (stat.mode & S_IFMT); + + *p++ = htonl(nfs_ftypes[type >> 12]); + *p++ = htonl((u32) stat.mode); + *p++ = htonl((u32) stat.nlink); + *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid)); + *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid)); + + if (S_ISLNK(type) && stat.size > NFS_MAXPATHLEN) { + *p++ = htonl(NFS_MAXPATHLEN); + } else { + *p++ = htonl((u32) stat.size); + } + *p++ = htonl((u32) stat.blksize); + if (S_ISCHR(type) || S_ISBLK(type)) + *p++ = htonl(new_encode_dev(stat.rdev)); + else + *p++ = htonl(0xffffffff); + *p++ = htonl((u32) stat.blocks); + if (is_fsid(fhp, rqstp->rq_reffh)) + *p++ = htonl((u32) fhp->fh_export->ex_fsid); + else + *p++ = htonl(new_encode_dev(stat.dev)); + *p++ = htonl((u32) stat.ino); + *p++ = htonl((u32) stat.atime.tv_sec); + *p++ = htonl(stat.atime.tv_nsec ? stat.atime.tv_nsec / 1000 : 0); + lease_get_mtime(dentry->d_inode, &time); + *p++ = htonl((u32) time.tv_sec); + *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); + *p++ = htonl((u32) stat.ctime.tv_sec); + *p++ = htonl(stat.ctime.tv_nsec ? stat.ctime.tv_nsec / 1000 : 0); + + return p; +} + + +/* + * XDR decode functions + */ +int +nfssvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy) +{ + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_sattrargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_sattr(p, &args->attrs))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_diropargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_diropargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_readargs *args) +{ + unsigned int len; + int v,pn; + if (!(p = decode_fh(p, &args->fh))) + return 0; + + args->offset = ntohl(*p++); + len = args->count = ntohl(*p++); + p++; /* totalcount - unused */ + + if (len > NFSSVC_MAXBLKSIZE) + len = NFSSVC_MAXBLKSIZE; + + /* set up somewhere to store response. + * We take pages, put them on reslist and include in iovec + */ + v=0; + while (len > 0) { + pn=rqstp->rq_resused; + svc_take_page(rqstp); + args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]); + args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE; + len -= args->vec[v].iov_len; + v++; + } + args->vlen = v; + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_writeargs *args) +{ + unsigned int len; + int v; + if (!(p = decode_fh(p, &args->fh))) + return 0; + + p++; /* beginoffset */ + args->offset = ntohl(*p++); /* offset */ + p++; /* totalcount */ + len = args->len = ntohl(*p++); + args->vec[0].iov_base = (void*)p; + args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - + (((void*)p) - rqstp->rq_arg.head[0].iov_base); + if (len > NFSSVC_MAXBLKSIZE) + len = NFSSVC_MAXBLKSIZE; + v = 0; + while (len > args->vec[v].iov_len) { + len -= args->vec[v].iov_len; + v++; + args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]); + args->vec[v].iov_len = PAGE_SIZE; + } + args->vec[v].iov_len = len; + args->vlen = v+1; + return args->vec[0].iov_len > 0; +} + +int +nfssvc_decode_createargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_createargs *args) +{ + if (!(p = decode_fh(p, &args->fh)) + || !(p = decode_filename(p, &args->name, &args->len)) + || !(p = decode_sattr(p, &args->attrs))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_renameargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_renameargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinkargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + svc_take_page(rqstp); + args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_linkargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_linkargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_fh(p, &args->tfh)) + || !(p = decode_filename(p, &args->tname, &args->tlen))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_symlinkargs *args) +{ + if (!(p = decode_fh(p, &args->ffh)) + || !(p = decode_filename(p, &args->fname, &args->flen)) + || !(p = decode_pathname(p, &args->tname, &args->tlen)) + || !(p = decode_sattr(p, &args->attrs))) + return 0; + + return xdr_argsize_check(rqstp, p); +} + +int +nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p, + struct nfsd_readdirargs *args) +{ + if (!(p = decode_fh(p, &args->fh))) + return 0; + args->cookie = ntohl(*p++); + args->count = ntohl(*p++); + if (args->count > PAGE_SIZE) + args->count = PAGE_SIZE; + + svc_take_page(rqstp); + args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]); + + return xdr_argsize_check(rqstp, p); +} + +/* + * XDR encode functions + */ +int +nfssvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p, + struct nfsd_attrstat *resp) +{ + p = encode_fattr(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_diropres *resp) +{ + p = encode_fh(p, &resp->fh); + p = encode_fattr(rqstp, p, &resp->fh); + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_readlinkres *resp) +{ + *p++ = htonl(resp->len); + xdr_ressize_check(rqstp, p); + rqstp->rq_res.page_len = resp->len; + if (resp->len & 3) { + /* need to pad the tail */ + rqstp->rq_restailpage = 0; + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3); + } + return 1; +} + +int +nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_readres *resp) +{ + p = encode_fattr(rqstp, p, &resp->fh); + *p++ = htonl(resp->count); + xdr_ressize_check(rqstp, p); + + /* now update rqstp->rq_res to reflect data aswell */ + rqstp->rq_res.page_len = resp->count; + if (resp->count & 3) { + /* need to pad the tail */ + rqstp->rq_restailpage = 0; + rqstp->rq_res.tail[0].iov_base = p; + *p = 0; + rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3); + } + return 1; +} + +int +nfssvc_encode_readdirres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_readdirres *resp) +{ + xdr_ressize_check(rqstp, p); + p = resp->buffer; + *p++ = 0; /* no more entries */ + *p++ = htonl((resp->common.err == nfserr_eof)); + rqstp->rq_res.page_len = (((unsigned long)p-1) & ~PAGE_MASK)+1; + + return 1; +} + +int +nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p, + struct nfsd_statfsres *resp) +{ + struct kstatfs *stat = &resp->stats; + + *p++ = htonl(NFSSVC_MAXBLKSIZE); /* max transfer size */ + *p++ = htonl(stat->f_bsize); + *p++ = htonl(stat->f_blocks); + *p++ = htonl(stat->f_bfree); + *p++ = htonl(stat->f_bavail); + return xdr_ressize_check(rqstp, p); +} + +int +nfssvc_encode_entry(struct readdir_cd *ccd, const char *name, + int namlen, loff_t offset, ino_t ino, unsigned int d_type) +{ + struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common); + u32 *p = cd->buffer; + int buflen, slen; + + /* + dprintk("nfsd: entry(%.*s off %ld ino %ld)\n", + namlen, name, offset, ino); + */ + + if (offset > ~((u32) 0)) { + cd->common.err = nfserr_fbig; + return -EINVAL; + } + if (cd->offset) + *cd->offset = htonl(offset); + if (namlen > NFS2_MAXNAMLEN) + namlen = NFS2_MAXNAMLEN;/* truncate filename */ + + slen = XDR_QUADLEN(namlen); + if ((buflen = cd->buflen - slen - 4) < 0) { + cd->common.err = nfserr_toosmall; + return -EINVAL; + } + *p++ = xdr_one; /* mark entry present */ + *p++ = htonl((u32) ino); /* file id */ + p = xdr_encode_array(p, name, namlen);/* name length & name */ + cd->offset = p; /* remember pointer */ + *p++ = ~(u32) 0; /* offset of next entry */ + + cd->buflen = buflen; + cd->buffer = p; + cd->common.err = nfs_ok; + return 0; +} + +/* + * XDR release functions + */ +int +nfssvc_release_fhandle(struct svc_rqst *rqstp, u32 *p, + struct nfsd_fhandle *resp) +{ + fh_put(&resp->fh); + return 1; +} diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c new file mode 100644 index 00000000000..1cf955bcc52 --- /dev/null +++ b/fs/nfsd/stats.c @@ -0,0 +1,101 @@ +/* + * linux/fs/nfsd/stats.c + * + * procfs-based user access to knfsd statistics + * + * /proc/net/rpc/nfsd + * + * Format: + * rc <hits> <misses> <nocache> + * Statistsics for the reply cache + * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache> + * statistics for filehandle lookup + * io <bytes-read> <bytes-writtten> + * statistics for IO throughput + * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%> + * time (seconds) when nfsd thread usage above thresholds + * and number of times that all threads were in use + * ra cache-size <10% <20% <30% ... <100% not-found + * number of times that read-ahead entry was found that deep in + * the cache. + * plus generic RPC stats (see net/sunrpc/stats.c) + * + * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> + */ + +#include <linux/kernel.h> +#include <linux/time.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/stat.h> +#include <linux/module.h> + +#include <linux/sunrpc/svc.h> +#include <linux/sunrpc/stats.h> +#include <linux/nfsd/nfsd.h> +#include <linux/nfsd/stats.h> + +struct nfsd_stats nfsdstats; +struct svc_stat nfsd_svcstats = { + .program = &nfsd_program, +}; + +static int nfsd_proc_show(struct seq_file *seq, void *v) +{ + int i; + + seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n", + nfsdstats.rchits, + nfsdstats.rcmisses, + nfsdstats.rcnocache, + nfsdstats.fh_stale, + nfsdstats.fh_lookup, + nfsdstats.fh_anon, + nfsdstats.fh_nocache_dir, + nfsdstats.fh_nocache_nondir, + nfsdstats.io_read, + nfsdstats.io_write); + /* thread usage: */ + seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt); + for (i=0; i<10; i++) { + unsigned int jifs = nfsdstats.th_usage[i]; + unsigned int sec = jifs / HZ, msec = (jifs % HZ)*1000/HZ; + seq_printf(seq, " %u.%03u", sec, msec); + } + + /* newline and ra-cache */ + seq_printf(seq, "\nra %u", nfsdstats.ra_size); + for (i=0; i<11; i++) + seq_printf(seq, " %u", nfsdstats.ra_depth[i]); + seq_putc(seq, '\n'); + + /* show my rpc info */ + svc_seq_show(seq, &nfsd_svcstats); + + return 0; +} + +static int nfsd_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, nfsd_proc_show, NULL); +} + +static struct file_operations nfsd_proc_fops = { + .owner = THIS_MODULE, + .open = nfsd_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void +nfsd_stat_init(void) +{ + svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops); +} + +void +nfsd_stat_shutdown(void) +{ + svc_proc_unregister("nfsd"); +} diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c new file mode 100644 index 00000000000..e3e9d217236 --- /dev/null +++ b/fs/nfsd/vfs.c @@ -0,0 +1,1859 @@ +#define MSNFS /* HACK HACK */ +/* + * linux/fs/nfsd/vfs.c + * + * File operations used by nfsd. Some of these have been ripped from + * other parts of the kernel because they weren't exported, others + * are partial duplicates with added or changed functionality. + * + * Note that several functions dget() the dentry upon which they want + * to act, most notably those that create directory entries. Response + * dentry's are dput()'d if necessary in the release callback. + * So if you notice code paths that apparently fail to dput() the + * dentry, don't worry--they have been taken care of. + * + * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de> + * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp> + */ + +#include <linux/config.h> +#include <linux/string.h> +#include <linux/time.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/mount.h> +#include <linux/major.h> +#include <linux/ext2_fs.h> +#include <linux/proc_fs.h> +#include <linux/stat.h> +#include <linux/fcntl.h> +#include <linux/net.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/in.h> +#include <linux/module.h> +#include <linux/namei.h> +#include <linux/vfs.h> +#include <linux/delay.h> +#include <linux/sunrpc/svc.h> +#include <linux/nfsd/nfsd.h> +#ifdef CONFIG_NFSD_V3 +#include <linux/nfs3.h> +#include <linux/nfsd/xdr3.h> +#endif /* CONFIG_NFSD_V3 */ +#include <linux/nfsd/nfsfh.h> +#include <linux/quotaops.h> +#include <linux/dnotify.h> +#ifdef CONFIG_NFSD_V4 +#include <linux/posix_acl.h> +#include <linux/posix_acl_xattr.h> +#include <linux/xattr_acl.h> +#include <linux/xattr.h> +#include <linux/nfs4.h> +#include <linux/nfs4_acl.h> +#include <linux/nfsd_idmap.h> +#include <linux/security.h> +#endif /* CONFIG_NFSD_V4 */ + +#include <asm/uaccess.h> + +#define NFSDDBG_FACILITY NFSDDBG_FILEOP +#define NFSD_PARANOIA + + +/* We must ignore files (but only files) which might have mandatory + * locks on them because there is no way to know if the accesser has + * the lock. + */ +#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i)) + +/* + * This is a cache of readahead params that help us choose the proper + * readahead strategy. Initially, we set all readahead parameters to 0 + * and let the VFS handle things. + * If you increase the number of cached files very much, you'll need to + * add a hash table here. + */ +struct raparms { + struct raparms *p_next; + unsigned int p_count; + ino_t p_ino; + dev_t p_dev; + int p_set; + struct file_ra_state p_ra; +}; + +static struct raparms * raparml; +static struct raparms * raparm_cache; + +/* + * Called from nfsd_lookup and encode_dirent. Check if we have crossed + * a mount point. + * Returns -EAGAIN leaving *dpp and *expp unchanged, + * or nfs_ok having possibly changed *dpp and *expp + */ +int +nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, + struct svc_export **expp) +{ + struct svc_export *exp = *expp, *exp2 = NULL; + struct dentry *dentry = *dpp; + struct vfsmount *mnt = mntget(exp->ex_mnt); + struct dentry *mounts = dget(dentry); + int err = nfs_ok; + + while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts)); + + exp2 = exp_get_by_name(exp->ex_client, mnt, mounts, &rqstp->rq_chandle); + if (IS_ERR(exp2)) { + err = PTR_ERR(exp2); + dput(mounts); + mntput(mnt); + goto out; + } + if (exp2 && ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2))) { + /* successfully crossed mount point */ + exp_put(exp); + *expp = exp2; + dput(dentry); + *dpp = mounts; + } else { + if (exp2) exp_put(exp2); + dput(mounts); + } + mntput(mnt); +out: + return err; +} + +/* + * Look up one component of a pathname. + * N.B. After this call _both_ fhp and resfh need an fh_put + * + * If the lookup would cross a mountpoint, and the mounted filesystem + * is exported to the client with NFSEXP_NOHIDE, then the lookup is + * accepted as it stands and the mounted directory is + * returned. Otherwise the covered directory is returned. + * NOTE: this mountpoint crossing is not supported properly by all + * clients and is explicitly disallowed for NFSv3 + * NeilBrown <neilb@cse.unsw.edu.au> + */ +int +nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, + int len, struct svc_fh *resfh) +{ + struct svc_export *exp; + struct dentry *dparent; + struct dentry *dentry; + int err; + + dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); + + /* Obtain dentry and export. */ + err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC); + if (err) + return err; + + dparent = fhp->fh_dentry; + exp = fhp->fh_export; + exp_get(exp); + + err = nfserr_acces; + + /* Lookup the name, but don't follow links */ + if (isdotent(name, len)) { + if (len==1) + dentry = dget(dparent); + else if (dparent != exp->ex_dentry) { + dentry = dget_parent(dparent); + } else if (!EX_NOHIDE(exp)) + dentry = dget(dparent); /* .. == . just like at / */ + else { + /* checking mountpoint crossing is very different when stepping up */ + struct svc_export *exp2 = NULL; + struct dentry *dp; + struct vfsmount *mnt = mntget(exp->ex_mnt); + dentry = dget(dparent); + while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry)) + ; + dp = dget_parent(dentry); + dput(dentry); + dentry = dp; + + exp2 = exp_parent(exp->ex_client, mnt, dentry, + &rqstp->rq_chandle); + if (IS_ERR(exp2)) { + err = PTR_ERR(exp2); + dput(dentry); + mntput(mnt); + goto out_nfserr; + } + if (!exp2) { + dput(dentry); + dentry = dget(dparent); + } else { + exp_put(exp); + exp = exp2; + } + mntput(mnt); + } + } else { + fh_lock(fhp); + dentry = lookup_one_len(name, dparent, len); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out_nfserr; + /* + * check if we have crossed a mount point ... + */ + if (d_mountpoint(dentry)) { + if ((err = nfsd_cross_mnt(rqstp, &dentry, &exp))) { + dput(dentry); + goto out_nfserr; + } + } + } + /* + * Note: we compose the file handle now, but as the + * dentry may be negative, it may need to be updated. + */ + err = fh_compose(resfh, exp, dentry, fhp); + if (!err && !dentry->d_inode) + err = nfserr_noent; + dput(dentry); +out: + exp_put(exp); + return err; + +out_nfserr: + err = nfserrno(err); + goto out; +} + +/* + * Set various file attributes. + * N.B. After this call fhp needs an fh_put + */ +int +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, + int check_guard, time_t guardtime) +{ + struct dentry *dentry; + struct inode *inode; + int accmode = MAY_SATTR; + int ftype = 0; + int imode; + int err; + int size_change = 0; + + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE; + if (iap->ia_valid & ATTR_SIZE) + ftype = S_IFREG; + + /* Get inode */ + err = fh_verify(rqstp, fhp, ftype, accmode); + if (err || !iap->ia_valid) + goto out; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + /* NFSv2 does not differentiate between "set-[ac]time-to-now" + * which only requires access, and "set-[ac]time-to-X" which + * requires ownership. + * So if it looks like it might be "set both to the same time which + * is close to now", and if inode_change_ok fails, then we + * convert to "set to now" instead of "set to explicit time" + * + * We only call inode_change_ok as the last test as technically + * it is not an interface that we should be using. It is only + * valid if the filesystem does not define it's own i_op->setattr. + */ +#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) +#define MAX_TOUCH_TIME_ERROR (30*60) + if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET + && iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec + ) { + /* Looks probable. Now just make sure time is in the right ballpark. + * Solaris, at least, doesn't seem to care what the time request is. + * We require it be within 30 minutes of now. + */ + time_t delta = iap->ia_atime.tv_sec - get_seconds(); + if (delta<0) delta = -delta; + if (delta < MAX_TOUCH_TIME_ERROR && + inode_change_ok(inode, iap) != 0) { + /* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME + * this will cause notify_change to set these times to "now" + */ + iap->ia_valid &= ~BOTH_TIME_SET; + } + } + + /* The size case is special. It changes the file as well as the attributes. */ + if (iap->ia_valid & ATTR_SIZE) { + if (iap->ia_size < inode->i_size) { + err = nfsd_permission(fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); + if (err) + goto out; + } + + /* + * If we are changing the size of the file, then + * we need to break all leases. + */ + err = break_lease(inode, FMODE_WRITE | O_NONBLOCK); + if (err == -EWOULDBLOCK) + err = -ETIMEDOUT; + if (err) /* ENOMEM or EWOULDBLOCK */ + goto out_nfserr; + + err = get_write_access(inode); + if (err) + goto out_nfserr; + + size_change = 1; + err = locks_verify_truncate(inode, NULL, iap->ia_size); + if (err) { + put_write_access(inode); + goto out_nfserr; + } + DQUOT_INIT(inode); + } + + imode = inode->i_mode; + if (iap->ia_valid & ATTR_MODE) { + iap->ia_mode &= S_IALLUGO; + imode = iap->ia_mode |= (imode & ~S_IALLUGO); + } + + /* Revoke setuid/setgid bit on chown/chgrp */ + if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid) + iap->ia_valid |= ATTR_KILL_SUID; + if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid) + iap->ia_valid |= ATTR_KILL_SGID; + + /* Change the attributes. */ + + iap->ia_valid |= ATTR_CTIME; + + err = nfserr_notsync; + if (!check_guard || guardtime == inode->i_ctime.tv_sec) { + fh_lock(fhp); + err = notify_change(dentry, iap); + err = nfserrno(err); + fh_unlock(fhp); + } + if (size_change) + put_write_access(inode); + if (!err) + if (EX_ISSYNC(fhp->fh_export)) + write_inode_now(inode, 1); +out: + return err; + +out_nfserr: + err = nfserrno(err); + goto out; +} + +#if defined(CONFIG_NFSD_V4) + +static int +set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) +{ + int len; + size_t buflen; + char *buf = NULL; + int error = 0; + struct inode *inode = dentry->d_inode; + + buflen = posix_acl_xattr_size(pacl->a_count); + buf = kmalloc(buflen, GFP_KERNEL); + error = -ENOMEM; + if (buf == NULL) + goto out; + + len = posix_acl_to_xattr(pacl, buf, buflen); + if (len < 0) { + error = len; + goto out; + } + + error = -EOPNOTSUPP; + if (inode->i_op && inode->i_op->setxattr) { + down(&inode->i_sem); + security_inode_setxattr(dentry, key, buf, len, 0); + error = inode->i_op->setxattr(dentry, key, buf, len, 0); + if (!error) + security_inode_post_setxattr(dentry, key, buf, len, 0); + up(&inode->i_sem); + } +out: + kfree(buf); + return error; +} + +int +nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfs4_acl *acl) +{ + int error; + struct dentry *dentry; + struct inode *inode; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; + + /* Get inode */ + error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); + if (error) + goto out; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + if (S_ISDIR(inode->i_mode)) + flags = NFS4_ACL_DIR; + + error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags); + if (error == -EINVAL) { + error = nfserr_attrnotsupp; + goto out; + } else if (error < 0) + goto out_nfserr; + + if (pacl) { + error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS); + if (error < 0) + goto out_nfserr; + } + + if (dpacl) { + error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT); + if (error < 0) + goto out_nfserr; + } + + error = nfs_ok; + +out: + posix_acl_release(pacl); + posix_acl_release(dpacl); + return (error); +out_nfserr: + error = nfserrno(error); + goto out; +} + +static struct posix_acl * +_get_posix_acl(struct dentry *dentry, char *key) +{ + struct inode *inode = dentry->d_inode; + char *buf = NULL; + int buflen, error = 0; + struct posix_acl *pacl = NULL; + + error = -EOPNOTSUPP; + if (inode->i_op == NULL) + goto out_err; + if (inode->i_op->getxattr == NULL) + goto out_err; + + error = security_inode_getxattr(dentry, key); + if (error) + goto out_err; + + buflen = inode->i_op->getxattr(dentry, key, NULL, 0); + if (buflen <= 0) { + error = buflen < 0 ? buflen : -ENODATA; + goto out_err; + } + + buf = kmalloc(buflen, GFP_KERNEL); + if (buf == NULL) { + error = -ENOMEM; + goto out_err; + } + + error = inode->i_op->getxattr(dentry, key, buf, buflen); + if (error < 0) + goto out_err; + + pacl = posix_acl_from_xattr(buf, buflen); + out: + kfree(buf); + return pacl; + out_err: + pacl = ERR_PTR(error); + goto out; +} + +int +nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) +{ + struct inode *inode = dentry->d_inode; + int error = 0; + struct posix_acl *pacl = NULL, *dpacl = NULL; + unsigned int flags = 0; + + pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS); + if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) + pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(pacl)) { + error = PTR_ERR(pacl); + pacl = NULL; + goto out; + } + + if (S_ISDIR(inode->i_mode)) { + dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT); + if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) + dpacl = NULL; + else if (IS_ERR(dpacl)) { + error = PTR_ERR(dpacl); + dpacl = NULL; + goto out; + } + flags = NFS4_ACL_DIR; + } + + *acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags); + if (IS_ERR(*acl)) { + error = PTR_ERR(*acl); + *acl = NULL; + } + out: + posix_acl_release(pacl); + posix_acl_release(dpacl); + return error; +} + +#endif /* defined(CONFIG_NFS_V4) */ + +#ifdef CONFIG_NFSD_V3 +/* + * Check server access rights to a file system object + */ +struct accessmap { + u32 access; + int how; +}; +static struct accessmap nfs3_regaccess[] = { + { NFS3_ACCESS_READ, MAY_READ }, + { NFS3_ACCESS_EXECUTE, MAY_EXEC }, + { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_TRUNC }, + { NFS3_ACCESS_EXTEND, MAY_WRITE }, + + { 0, 0 } +}; + +static struct accessmap nfs3_diraccess[] = { + { NFS3_ACCESS_READ, MAY_READ }, + { NFS3_ACCESS_LOOKUP, MAY_EXEC }, + { NFS3_ACCESS_MODIFY, MAY_EXEC|MAY_WRITE|MAY_TRUNC }, + { NFS3_ACCESS_EXTEND, MAY_EXEC|MAY_WRITE }, + { NFS3_ACCESS_DELETE, MAY_REMOVE }, + + { 0, 0 } +}; + +static struct accessmap nfs3_anyaccess[] = { + /* Some clients - Solaris 2.6 at least, make an access call + * to the server to check for access for things like /dev/null + * (which really, the server doesn't care about). So + * We provide simple access checking for them, looking + * mainly at mode bits, and we make sure to ignore read-only + * filesystem checks + */ + { NFS3_ACCESS_READ, MAY_READ }, + { NFS3_ACCESS_EXECUTE, MAY_EXEC }, + { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_LOCAL_ACCESS }, + { NFS3_ACCESS_EXTEND, MAY_WRITE|MAY_LOCAL_ACCESS }, + + { 0, 0 } +}; + +int +nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported) +{ + struct accessmap *map; + struct svc_export *export; + struct dentry *dentry; + u32 query, result = 0, sresult = 0; + unsigned int error; + + error = fh_verify(rqstp, fhp, 0, MAY_NOP); + if (error) + goto out; + + export = fhp->fh_export; + dentry = fhp->fh_dentry; + + if (S_ISREG(dentry->d_inode->i_mode)) + map = nfs3_regaccess; + else if (S_ISDIR(dentry->d_inode->i_mode)) + map = nfs3_diraccess; + else + map = nfs3_anyaccess; + + + query = *access; + for (; map->access; map++) { + if (map->access & query) { + unsigned int err2; + + sresult |= map->access; + + err2 = nfsd_permission(export, dentry, map->how); + switch (err2) { + case nfs_ok: + result |= map->access; + break; + + /* the following error codes just mean the access was not allowed, + * rather than an error occurred */ + case nfserr_rofs: + case nfserr_acces: + case nfserr_perm: + /* simply don't "or" in the access bit. */ + break; + default: + error = err2; + goto out; + } + } + } + *access = result; + if (supported) + *supported = sresult; + + out: + return error; +} +#endif /* CONFIG_NFSD_V3 */ + + + +/* + * Open an existing file or directory. + * The access argument indicates the type of open (read/write/lock) + * N.B. After this call fhp needs an fh_put + */ +int +nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, + int access, struct file **filp) +{ + struct dentry *dentry; + struct inode *inode; + int flags = O_RDONLY|O_LARGEFILE, err; + + /* + * If we get here, then the client has already done an "open", + * and (hopefully) checked permission - so allow OWNER_OVERRIDE + * in case a chmod has now revoked permission. + */ + err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE); + if (err) + goto out; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + /* Disallow write access to files with the append-only bit set + * or any access when mandatory locking enabled + */ + err = nfserr_perm; + if (IS_APPEND(inode) && (access & MAY_WRITE)) + goto out; + if (IS_ISMNDLK(inode)) + goto out; + + if (!inode->i_fop) + goto out; + + /* + * Check to see if there are any leases on this file. + * This may block while leases are broken. + */ + err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0)); + if (err == -EWOULDBLOCK) + err = -ETIMEDOUT; + if (err) /* NOMEM or WOULDBLOCK */ + goto out_nfserr; + + if (access & MAY_WRITE) { + flags = O_WRONLY|O_LARGEFILE; + + DQUOT_INIT(inode); + } + *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_mnt), flags); + if (IS_ERR(*filp)) + err = PTR_ERR(*filp); +out_nfserr: + if (err) + err = nfserrno(err); +out: + return err; +} + +/* + * Close a file. + */ +void +nfsd_close(struct file *filp) +{ + fput(filp); +} + +/* + * Sync a file + * As this calls fsync (not fdatasync) there is no need for a write_inode + * after it. + */ +static inline void nfsd_dosync(struct file *filp, struct dentry *dp, + struct file_operations *fop) +{ + struct inode *inode = dp->d_inode; + int (*fsync) (struct file *, struct dentry *, int); + + filemap_fdatawrite(inode->i_mapping); + if (fop && (fsync = fop->fsync)) + fsync(filp, dp, 0); + filemap_fdatawait(inode->i_mapping); +} + + +static void +nfsd_sync(struct file *filp) +{ + struct inode *inode = filp->f_dentry->d_inode; + dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name); + down(&inode->i_sem); + nfsd_dosync(filp, filp->f_dentry, filp->f_op); + up(&inode->i_sem); +} + +static void +nfsd_sync_dir(struct dentry *dp) +{ + nfsd_dosync(NULL, dp, dp->d_inode->i_fop); +} + +/* + * Obtain the readahead parameters for the file + * specified by (dev, ino). + */ +static DEFINE_SPINLOCK(ra_lock); + +static inline struct raparms * +nfsd_get_raparms(dev_t dev, ino_t ino) +{ + struct raparms *ra, **rap, **frap = NULL; + int depth = 0; + + spin_lock(&ra_lock); + for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) { + if (ra->p_ino == ino && ra->p_dev == dev) + goto found; + depth++; + if (ra->p_count == 0) + frap = rap; + } + depth = nfsdstats.ra_size*11/10; + if (!frap) { + spin_unlock(&ra_lock); + return NULL; + } + rap = frap; + ra = *frap; + ra->p_dev = dev; + ra->p_ino = ino; + ra->p_set = 0; +found: + if (rap != &raparm_cache) { + *rap = ra->p_next; + ra->p_next = raparm_cache; + raparm_cache = ra; + } + ra->p_count++; + nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++; + spin_unlock(&ra_lock); + return ra; +} + +/* + * Grab and keep cached pages assosiated with a file in the svc_rqst + * so that they can be passed to the netowork sendmsg/sendpage routines + * directrly. They will be released after the sending has completed. + */ +static int +nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size) +{ + unsigned long count = desc->count; + struct svc_rqst *rqstp = desc->arg.data; + + if (size > count) + size = count; + + if (rqstp->rq_res.page_len == 0) { + get_page(page); + rqstp->rq_respages[rqstp->rq_resused++] = page; + rqstp->rq_res.page_base = offset; + rqstp->rq_res.page_len = size; + } else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) { + get_page(page); + rqstp->rq_respages[rqstp->rq_resused++] = page; + rqstp->rq_res.page_len += size; + } else { + rqstp->rq_res.page_len += size; + } + + desc->count = count - size; + desc->written += size; + return size; +} + +static inline int +nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, + loff_t offset, struct kvec *vec, int vlen, unsigned long *count) +{ + struct inode *inode; + struct raparms *ra; + mm_segment_t oldfs; + int err; + + err = nfserr_perm; + inode = file->f_dentry->d_inode; +#ifdef MSNFS + if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && + (!lock_may_read(inode, offset, *count))) + goto out; +#endif + + /* Get readahead parameters */ + ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino); + + if (ra && ra->p_set) + file->f_ra = ra->p_ra; + + if (file->f_op->sendfile) { + svc_pushback_unused_pages(rqstp); + err = file->f_op->sendfile(file, &offset, *count, + nfsd_read_actor, rqstp); + } else { + oldfs = get_fs(); + set_fs(KERNEL_DS); + err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset); + set_fs(oldfs); + } + + /* Write back readahead params */ + if (ra) { + spin_lock(&ra_lock); + ra->p_ra = file->f_ra; + ra->p_set = 1; + ra->p_count--; + spin_unlock(&ra_lock); + } + + if (err >= 0) { + nfsdstats.io_read += err; + *count = err; + err = 0; + dnotify_parent(file->f_dentry, DN_ACCESS); + } else + err = nfserrno(err); +out: + return err; +} + +static inline int +nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, + loff_t offset, struct kvec *vec, int vlen, + unsigned long cnt, int *stablep) +{ + struct svc_export *exp; + struct dentry *dentry; + struct inode *inode; + mm_segment_t oldfs; + int err = 0; + int stable = *stablep; + + err = nfserr_perm; + +#ifdef MSNFS + if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && + (!lock_may_write(file->f_dentry->d_inode, offset, cnt))) + goto out; +#endif + + dentry = file->f_dentry; + inode = dentry->d_inode; + exp = fhp->fh_export; + + /* + * Request sync writes if + * - the sync export option has been set, or + * - the client requested O_SYNC behavior (NFSv3 feature). + * - The file system doesn't support fsync(). + * When gathered writes have been configured for this volume, + * flushing the data to disk is handled separately below. + */ + + if (file->f_op->fsync == 0) {/* COMMIT3 cannot work */ + stable = 2; + *stablep = 2; /* FILE_SYNC */ + } + + if (!EX_ISSYNC(exp)) + stable = 0; + if (stable && !EX_WGATHER(exp)) + file->f_flags |= O_SYNC; + + /* Write the data. */ + oldfs = get_fs(); set_fs(KERNEL_DS); + err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset); + set_fs(oldfs); + if (err >= 0) { + nfsdstats.io_write += cnt; + dnotify_parent(file->f_dentry, DN_MODIFY); + } + + /* clear setuid/setgid flag after write */ + if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) { + struct iattr ia; + ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID; + + down(&inode->i_sem); + notify_change(dentry, &ia); + up(&inode->i_sem); + } + + if (err >= 0 && stable) { + static ino_t last_ino; + static dev_t last_dev; + + /* + * Gathered writes: If another process is currently + * writing to the file, there's a high chance + * this is another nfsd (triggered by a bulk write + * from a client's biod). Rather than syncing the + * file with each write request, we sleep for 10 msec. + * + * I don't know if this roughly approximates + * C. Juszak's idea of gathered writes, but it's a + * nice and simple solution (IMHO), and it seems to + * work:-) + */ + if (EX_WGATHER(exp)) { + if (atomic_read(&inode->i_writecount) > 1 + || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { + dprintk("nfsd: write defer %d\n", current->pid); + msleep(10); + dprintk("nfsd: write resume %d\n", current->pid); + } + + if (inode->i_state & I_DIRTY) { + dprintk("nfsd: write sync %d\n", current->pid); + nfsd_sync(file); + } +#if 0 + wake_up(&inode->i_wait); +#endif + } + last_ino = inode->i_ino; + last_dev = inode->i_sb->s_dev; + } + + dprintk("nfsd: write complete err=%d\n", err); + if (err >= 0) + err = 0; + else + err = nfserrno(err); +out: + return err; +} + +/* + * Read data from a file. count must contain the requested read count + * on entry. On return, *count contains the number of bytes actually read. + * N.B. After this call fhp needs an fh_put + */ +int +nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, + loff_t offset, struct kvec *vec, int vlen, + unsigned long *count) +{ + int err; + + if (file) { + err = nfsd_permission(fhp->fh_export, fhp->fh_dentry, + MAY_READ|MAY_OWNER_OVERRIDE); + if (err) + goto out; + err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); + } else { + err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file); + if (err) + goto out; + err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); + nfsd_close(file); + } +out: + return err; +} + +/* + * Write data to a file. + * The stable flag requests synchronous writes. + * N.B. After this call fhp needs an fh_put + */ +int +nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, + loff_t offset, struct kvec *vec, int vlen, unsigned long cnt, + int *stablep) +{ + int err = 0; + + if (file) { + err = nfsd_permission(fhp->fh_export, fhp->fh_dentry, + MAY_WRITE|MAY_OWNER_OVERRIDE); + if (err) + goto out; + err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, + stablep); + } else { + err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file); + if (err) + goto out; + + if (cnt) + err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, + cnt, stablep); + nfsd_close(file); + } +out: + return err; +} + +#ifdef CONFIG_NFSD_V3 +/* + * Commit all pending writes to stable storage. + * Strictly speaking, we could sync just the indicated file region here, + * but there's currently no way we can ask the VFS to do so. + * + * Unfortunately we cannot lock the file to make sure we return full WCC + * data to the client, as locking happens lower down in the filesystem. + */ +int +nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, unsigned long count) +{ + struct file *file; + int err; + + if ((u64)count > ~(u64)offset) + return nfserr_inval; + + if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0) + return err; + if (EX_ISSYNC(fhp->fh_export)) { + if (file->f_op && file->f_op->fsync) { + nfsd_sync(file); + } else { + err = nfserr_notsupp; + } + } + + nfsd_close(file); + return err; +} +#endif /* CONFIG_NFSD_V3 */ + +/* + * Create a file (regular, directory, device, fifo); UNIX sockets + * not yet implemented. + * If the response fh has been verified, the parent directory should + * already be locked. Note that the parent directory is left locked. + * + * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp + */ +int +nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, + char *fname, int flen, struct iattr *iap, + int type, dev_t rdev, struct svc_fh *resfhp) +{ + struct dentry *dentry, *dchild = NULL; + struct inode *dirp; + int err; + + err = nfserr_perm; + if (!flen) + goto out; + err = nfserr_exist; + if (isdotent(fname, flen)) + goto out; + + err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); + if (err) + goto out; + + dentry = fhp->fh_dentry; + dirp = dentry->d_inode; + + err = nfserr_notdir; + if(!dirp->i_op || !dirp->i_op->lookup) + goto out; + /* + * Check whether the response file handle has been verified yet. + * If it has, the parent directory should already be locked. + */ + if (!resfhp->fh_dentry) { + /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */ + fh_lock(fhp); + dchild = lookup_one_len(fname, dentry, flen); + err = PTR_ERR(dchild); + if (IS_ERR(dchild)) + goto out_nfserr; + err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); + if (err) + goto out; + } else { + /* called from nfsd_proc_create */ + dchild = dget(resfhp->fh_dentry); + if (!fhp->fh_locked) { + /* not actually possible */ + printk(KERN_ERR + "nfsd_create: parent %s/%s not locked!\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); + err = -EIO; + goto out; + } + } + /* + * Make sure the child dentry is still negative ... + */ + err = nfserr_exist; + if (dchild->d_inode) { + dprintk("nfsd_create: dentry %s/%s not negative!\n", + dentry->d_name.name, dchild->d_name.name); + goto out; + } + + if (!(iap->ia_valid & ATTR_MODE)) + iap->ia_mode = 0; + iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type; + + /* + * Get the dir op function pointer. + */ + err = nfserr_perm; + switch (type) { + case S_IFREG: + err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + break; + case S_IFDIR: + err = vfs_mkdir(dirp, dchild, iap->ia_mode); + break; + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); + break; + default: + printk("nfsd: bad file type %o in nfsd_create\n", type); + err = -EINVAL; + } + if (err < 0) + goto out_nfserr; + + if (EX_ISSYNC(fhp->fh_export)) { + nfsd_sync_dir(dentry); + write_inode_now(dchild->d_inode, 1); + } + + + /* Set file attributes. Mode has already been set and + * setting uid/gid works only for root. Irix appears to + * send along the gid when it tries to implement setgid + * directories via NFS. + */ + err = 0; + if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) + err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); + /* + * Update the file handle to get the new inode info. + */ + if (!err) + err = fh_update(resfhp); +out: + if (dchild && !IS_ERR(dchild)) + dput(dchild); + return err; + +out_nfserr: + err = nfserrno(err); + goto out; +} + +#ifdef CONFIG_NFSD_V3 +/* + * NFSv3 version of nfsd_create + */ +int +nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, + char *fname, int flen, struct iattr *iap, + struct svc_fh *resfhp, int createmode, u32 *verifier, + int *truncp) +{ + struct dentry *dentry, *dchild = NULL; + struct inode *dirp; + int err; + __u32 v_mtime=0, v_atime=0; + int v_mode=0; + + err = nfserr_perm; + if (!flen) + goto out; + err = nfserr_exist; + if (isdotent(fname, flen)) + goto out; + if (!(iap->ia_valid & ATTR_MODE)) + iap->ia_mode = 0; + err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); + if (err) + goto out; + + dentry = fhp->fh_dentry; + dirp = dentry->d_inode; + + /* Get all the sanity checks out of the way before + * we lock the parent. */ + err = nfserr_notdir; + if(!dirp->i_op || !dirp->i_op->lookup) + goto out; + fh_lock(fhp); + + /* + * Compose the response file handle. + */ + dchild = lookup_one_len(fname, dentry, flen); + err = PTR_ERR(dchild); + if (IS_ERR(dchild)) + goto out_nfserr; + + err = fh_compose(resfhp, fhp->fh_export, dchild, fhp); + if (err) + goto out; + + if (createmode == NFS3_CREATE_EXCLUSIVE) { + /* while the verifier would fit in mtime+atime, + * solaris7 gets confused (bugid 4218508) if these have + * the high bit set, so we use the mode as well + */ + v_mtime = verifier[0]&0x7fffffff; + v_atime = verifier[1]&0x7fffffff; + v_mode = S_IFREG + | ((verifier[0]&0x80000000) >> (32-7)) /* u+x */ + | ((verifier[1]&0x80000000) >> (32-9)) /* u+r */ + ; + } + + if (dchild->d_inode) { + err = 0; + + switch (createmode) { + case NFS3_CREATE_UNCHECKED: + if (! S_ISREG(dchild->d_inode->i_mode)) + err = nfserr_exist; + else if (truncp) { + /* in nfsv4, we need to treat this case a little + * differently. we don't want to truncate the + * file now; this would be wrong if the OPEN + * fails for some other reason. furthermore, + * if the size is nonzero, we should ignore it + * according to spec! + */ + *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size; + } + else { + iap->ia_valid &= ATTR_SIZE; + goto set_attr; + } + break; + case NFS3_CREATE_EXCLUSIVE: + if ( dchild->d_inode->i_mtime.tv_sec == v_mtime + && dchild->d_inode->i_atime.tv_sec == v_atime + && dchild->d_inode->i_mode == v_mode + && dchild->d_inode->i_size == 0 ) + break; + /* fallthru */ + case NFS3_CREATE_GUARDED: + err = nfserr_exist; + } + goto out; + } + + err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + if (err < 0) + goto out_nfserr; + + if (EX_ISSYNC(fhp->fh_export)) { + nfsd_sync_dir(dentry); + /* setattr will sync the child (or not) */ + } + + /* + * Update the filehandle to get the new inode info. + */ + err = fh_update(resfhp); + if (err) + goto out; + + if (createmode == NFS3_CREATE_EXCLUSIVE) { + /* Cram the verifier into atime/mtime/mode */ + iap->ia_valid = ATTR_MTIME|ATTR_ATIME + | ATTR_MTIME_SET|ATTR_ATIME_SET + | ATTR_MODE; + /* XXX someone who knows this better please fix it for nsec */ + iap->ia_mtime.tv_sec = v_mtime; + iap->ia_atime.tv_sec = v_atime; + iap->ia_mtime.tv_nsec = 0; + iap->ia_atime.tv_nsec = 0; + iap->ia_mode = v_mode; + } + + /* Set file attributes. + * Mode has already been set but we might need to reset it + * for CREATE_EXCLUSIVE + * Irix appears to send along the gid when it tries to + * implement setgid directories via NFS. Clear out all that cruft. + */ + set_attr: + if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0) + err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0); + + out: + fh_unlock(fhp); + if (dchild && !IS_ERR(dchild)) + dput(dchild); + return err; + + out_nfserr: + err = nfserrno(err); + goto out; +} +#endif /* CONFIG_NFSD_V3 */ + +/* + * Read a symlink. On entry, *lenp must contain the maximum path length that + * fits into the buffer. On return, it contains the true length. + * N.B. After this call fhp needs an fh_put + */ +int +nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp) +{ + struct dentry *dentry; + struct inode *inode; + mm_segment_t oldfs; + int err; + + err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP); + if (err) + goto out; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + err = nfserr_inval; + if (!inode->i_op || !inode->i_op->readlink) + goto out; + + touch_atime(fhp->fh_export->ex_mnt, dentry); + /* N.B. Why does this call need a get_fs()?? + * Remove the set_fs and watch the fireworks:-) --okir + */ + + oldfs = get_fs(); set_fs(KERNEL_DS); + err = inode->i_op->readlink(dentry, buf, *lenp); + set_fs(oldfs); + + if (err < 0) + goto out_nfserr; + *lenp = err; + err = 0; +out: + return err; + +out_nfserr: + err = nfserrno(err); + goto out; +} + +/* + * Create a symlink and look up its inode + * N.B. After this call _both_ fhp and resfhp need an fh_put + */ +int +nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, + char *fname, int flen, + char *path, int plen, + struct svc_fh *resfhp, + struct iattr *iap) +{ + struct dentry *dentry, *dnew; + int err, cerr; + umode_t mode; + + err = nfserr_noent; + if (!flen || !plen) + goto out; + err = nfserr_exist; + if (isdotent(fname, flen)) + goto out; + + err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); + if (err) + goto out; + fh_lock(fhp); + dentry = fhp->fh_dentry; + dnew = lookup_one_len(fname, dentry, flen); + err = PTR_ERR(dnew); + if (IS_ERR(dnew)) + goto out_nfserr; + + mode = S_IALLUGO; + /* Only the MODE ATTRibute is even vaguely meaningful */ + if (iap && (iap->ia_valid & ATTR_MODE)) + mode = iap->ia_mode & S_IALLUGO; + + if (unlikely(path[plen] != 0)) { + char *path_alloced = kmalloc(plen+1, GFP_KERNEL); + if (path_alloced == NULL) + err = -ENOMEM; + else { + strncpy(path_alloced, path, plen); + path_alloced[plen] = 0; + err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode); + kfree(path_alloced); + } + } else + err = vfs_symlink(dentry->d_inode, dnew, path, mode); + + if (!err) { + if (EX_ISSYNC(fhp->fh_export)) + nfsd_sync_dir(dentry); + } else + err = nfserrno(err); + fh_unlock(fhp); + + cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp); + dput(dnew); + if (err==0) err = cerr; +out: + return err; + +out_nfserr: + err = nfserrno(err); + goto out; +} + +/* + * Create a hardlink + * N.B. After this call _both_ ffhp and tfhp need an fh_put + */ +int +nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, + char *name, int len, struct svc_fh *tfhp) +{ + struct dentry *ddir, *dnew, *dold; + struct inode *dirp, *dest; + int err; + + err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE); + if (err) + goto out; + err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP); + if (err) + goto out; + + err = nfserr_perm; + if (!len) + goto out; + err = nfserr_exist; + if (isdotent(name, len)) + goto out; + + fh_lock(ffhp); + ddir = ffhp->fh_dentry; + dirp = ddir->d_inode; + + dnew = lookup_one_len(name, ddir, len); + err = PTR_ERR(dnew); + if (IS_ERR(dnew)) + goto out_nfserr; + + dold = tfhp->fh_dentry; + dest = dold->d_inode; + + err = vfs_link(dold, dirp, dnew); + if (!err) { + if (EX_ISSYNC(ffhp->fh_export)) { + nfsd_sync_dir(ddir); + write_inode_now(dest, 1); + } + } else { + if (err == -EXDEV && rqstp->rq_vers == 2) + err = nfserr_acces; + else + err = nfserrno(err); + } + + fh_unlock(ffhp); + dput(dnew); +out: + return err; + +out_nfserr: + err = nfserrno(err); + goto out; +} + +/* + * Rename a file + * N.B. After this call _both_ ffhp and tfhp need an fh_put + */ +int +nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, + struct svc_fh *tfhp, char *tname, int tlen) +{ + struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap; + struct inode *fdir, *tdir; + int err; + + err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE); + if (err) + goto out; + err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE); + if (err) + goto out; + + fdentry = ffhp->fh_dentry; + fdir = fdentry->d_inode; + + tdentry = tfhp->fh_dentry; + tdir = tdentry->d_inode; + + err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev; + if (fdir->i_sb != tdir->i_sb) + goto out; + + err = nfserr_perm; + if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen)) + goto out; + + /* cannot use fh_lock as we need deadlock protective ordering + * so do it by hand */ + trap = lock_rename(tdentry, fdentry); + ffhp->fh_locked = tfhp->fh_locked = 1; + fill_pre_wcc(ffhp); + fill_pre_wcc(tfhp); + + odentry = lookup_one_len(fname, fdentry, flen); + err = PTR_ERR(odentry); + if (IS_ERR(odentry)) + goto out_nfserr; + + err = -ENOENT; + if (!odentry->d_inode) + goto out_dput_old; + err = -EINVAL; + if (odentry == trap) + goto out_dput_old; + + ndentry = lookup_one_len(tname, tdentry, tlen); + err = PTR_ERR(ndentry); + if (IS_ERR(ndentry)) + goto out_dput_old; + err = -ENOTEMPTY; + if (ndentry == trap) + goto out_dput_new; + +#ifdef MSNFS + if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) && + ((atomic_read(&odentry->d_count) > 1) + || (atomic_read(&ndentry->d_count) > 1))) { + err = nfserr_perm; + } else +#endif + err = vfs_rename(fdir, odentry, tdir, ndentry); + if (!err && EX_ISSYNC(tfhp->fh_export)) { + nfsd_sync_dir(tdentry); + nfsd_sync_dir(fdentry); + } + + out_dput_new: + dput(ndentry); + out_dput_old: + dput(odentry); + out_nfserr: + if (err) + err = nfserrno(err); + + /* we cannot reply on fh_unlock on the two filehandles, + * as that would do the wrong thing if the two directories + * were the same, so again we do it by hand + */ + fill_post_wcc(ffhp); + fill_post_wcc(tfhp); + unlock_rename(tdentry, fdentry); + ffhp->fh_locked = tfhp->fh_locked = 0; + +out: + return err; +} + +/* + * Unlink a file or directory + * N.B. After this call fhp needs an fh_put + */ +int +nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, + char *fname, int flen) +{ + struct dentry *dentry, *rdentry; + struct inode *dirp; + int err; + + err = nfserr_acces; + if (!flen || isdotent(fname, flen)) + goto out; + err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE); + if (err) + goto out; + + fh_lock(fhp); + dentry = fhp->fh_dentry; + dirp = dentry->d_inode; + + rdentry = lookup_one_len(fname, dentry, flen); + err = PTR_ERR(rdentry); + if (IS_ERR(rdentry)) + goto out_nfserr; + + if (!rdentry->d_inode) { + dput(rdentry); + err = nfserr_noent; + goto out; + } + + if (!type) + type = rdentry->d_inode->i_mode & S_IFMT; + + if (type != S_IFDIR) { /* It's UNLINK */ +#ifdef MSNFS + if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) && + (atomic_read(&rdentry->d_count) > 1)) { + err = nfserr_perm; + } else +#endif + err = vfs_unlink(dirp, rdentry); + } else { /* It's RMDIR */ + err = vfs_rmdir(dirp, rdentry); + } + + dput(rdentry); + + if (err) + goto out_nfserr; + if (EX_ISSYNC(fhp->fh_export)) + nfsd_sync_dir(dentry); + +out: + return err; + +out_nfserr: + err = nfserrno(err); + goto out; +} + +/* + * Read entries from a directory. + * The NFSv3/4 verifier we ignore for now. + */ +int +nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, + struct readdir_cd *cdp, encode_dent_fn func) +{ + int err; + struct file *file; + loff_t offset = *offsetp; + + err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file); + if (err) + goto out; + + offset = vfs_llseek(file, offset, 0); + if (offset < 0) { + err = nfserrno((int)offset); + goto out_close; + } + + /* + * Read the directory entries. This silly loop is necessary because + * readdir() is not guaranteed to fill up the entire buffer, but + * may choose to do less. + */ + + do { + cdp->err = nfserr_eof; /* will be cleared on successful read */ + err = vfs_readdir(file, (filldir_t) func, cdp); + } while (err >=0 && cdp->err == nfs_ok); + if (err) + err = nfserrno(err); + else + err = cdp->err; + *offsetp = vfs_llseek(file, 0, 1); + + if (err == nfserr_eof || err == nfserr_toosmall) + err = nfs_ok; /* can still be found in ->err */ +out_close: + nfsd_close(file); +out: + return err; +} + +/* + * Get file system stats + * N.B. After this call fhp needs an fh_put + */ +int +nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) +{ + int err = fh_verify(rqstp, fhp, 0, MAY_NOP); + if (!err && vfs_statfs(fhp->fh_dentry->d_inode->i_sb,stat)) + err = nfserr_io; + return err; +} + +/* + * Check for a user's access permissions to this inode. + */ +int +nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) +{ + struct inode *inode = dentry->d_inode; + int err; + + if (acc == MAY_NOP) + return 0; +#if 0 + dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", + acc, + (acc & MAY_READ)? " read" : "", + (acc & MAY_WRITE)? " write" : "", + (acc & MAY_EXEC)? " exec" : "", + (acc & MAY_SATTR)? " sattr" : "", + (acc & MAY_TRUNC)? " trunc" : "", + (acc & MAY_LOCK)? " lock" : "", + (acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "", + inode->i_mode, + IS_IMMUTABLE(inode)? " immut" : "", + IS_APPEND(inode)? " append" : "", + IS_RDONLY(inode)? " ro" : ""); + dprintk(" owner %d/%d user %d/%d\n", + inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); +#endif + + /* Normally we reject any write/sattr etc access on a read-only file + * system. But if it is IRIX doing check on write-access for a + * device special file, we ignore rofs. + */ + if (!(acc & MAY_LOCAL_ACCESS)) + if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { + if (EX_RDONLY(exp) || IS_RDONLY(inode)) + return nfserr_rofs; + if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) + return nfserr_perm; + } + if ((acc & MAY_TRUNC) && IS_APPEND(inode)) + return nfserr_perm; + + if (acc & MAY_LOCK) { + /* If we cannot rely on authentication in NLM requests, + * just allow locks, otherwise require read permission, or + * ownership + */ + if (exp->ex_flags & NFSEXP_NOAUTHNLM) + return 0; + else + acc = MAY_READ | MAY_OWNER_OVERRIDE; + } + /* + * The file owner always gets access permission for accesses that + * would normally be checked at open time. This is to make + * file access work even when the client has done a fchmod(fd, 0). + * + * However, `cp foo bar' should fail nevertheless when bar is + * readonly. A sensible way to do this might be to reject all + * attempts to truncate a read-only file, because a creat() call + * always implies file truncation. + * ... but this isn't really fair. A process may reasonably call + * ftruncate on an open file descriptor on a file with perm 000. + * We must trust the client to do permission checking - using "ACCESS" + * with NFSv3. + */ + if ((acc & MAY_OWNER_OVERRIDE) && + inode->i_uid == current->fsuid) + return 0; + + err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL); + + /* Allow read access to binaries even when mode 111 */ + if (err == -EACCES && S_ISREG(inode->i_mode) && + acc == (MAY_READ | MAY_OWNER_OVERRIDE)) + err = permission(inode, MAY_EXEC, NULL); + + return err? nfserrno(err) : 0; +} + +void +nfsd_racache_shutdown(void) +{ + if (!raparm_cache) + return; + dprintk("nfsd: freeing readahead buffers.\n"); + kfree(raparml); + raparm_cache = raparml = NULL; +} +/* + * Initialize readahead param cache + */ +int +nfsd_racache_init(int cache_size) +{ + int i; + + if (raparm_cache) + return 0; + raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL); + + if (raparml != NULL) { + dprintk("nfsd: allocating %d readahead buffers.\n", + cache_size); + memset(raparml, 0, sizeof(struct raparms) * cache_size); + for (i = 0; i < cache_size - 1; i++) { + raparml[i].p_next = raparml + i + 1; + } + raparm_cache = raparml; + } else { + printk(KERN_WARNING + "nfsd: Could not allocate memory read-ahead cache.\n"); + return -ENOMEM; + } + nfsdstats.ra_size = cache_size; + return 0; +} |