From 09f5bf4e6d0607399c16ec7a2d8d166f31086686 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 22 May 2011 19:50:20 +0300 Subject: pnfs-obj: decode layout, alloc/free lseg objlayout_alloc_lseg prepares an xdr_stream and calls the raid engins objio_alloc_lseg() to allocate a private pnfs_layout_segment. objio_osd.c::objio_alloc_lseg() uses passed xdr_stream to decode and store the layout_segment information in an objio_segment struct, using the pnfs_osd_xdr.h API for the actual parsing the layout xdr. objlayout_free_lseg calls objio_free_lseg() to free the allocated space. Signed-off-by: Boaz Harrosh [gfp_flags] [removed "extern" from function definitions] Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objlayout.h | 67 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 fs/nfs/objlayout/objlayout.h (limited to 'fs/nfs/objlayout/objlayout.h') diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h new file mode 100644 index 00000000000..066280a07fa --- /dev/null +++ b/fs/nfs/objlayout/objlayout.h @@ -0,0 +1,67 @@ +/* + * Data types and function declerations for interfacing with the + * pNFS standard object layout driver. + * + * Copyright (C) 2007 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy + * Boaz Harrosh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _OBJLAYOUT_H +#define _OBJLAYOUT_H + +#include +#include +#include "../pnfs.h" + +/* + * Raid engine I/O API + */ +extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, + struct pnfs_layout_hdr *pnfslay, + struct pnfs_layout_range *range, + struct xdr_stream *xdr, + gfp_t gfp_flags); +extern void objio_free_lseg(struct pnfs_layout_segment *lseg); + +/* + * exported generic objects function vectors + */ +extern struct pnfs_layout_segment *objlayout_alloc_lseg( + struct pnfs_layout_hdr *, + struct nfs4_layoutget_res *, + gfp_t gfp_flags); +extern void objlayout_free_lseg(struct pnfs_layout_segment *); + +#endif /* _OBJLAYOUT_H */ -- cgit v1.2.3 From b6c05f1693115164c7b797152ac7ea3ef8e5d296 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 26 May 2011 21:45:34 +0300 Subject: pnfs-obj: objio_osd device information retrieval and caching When a new layout is received in objio_alloc_lseg all device_ids referenced are retrieved. The device information is queried for from MDS and then the osd_device is looked-up from the osd-initiator library. The devices are cached in a per-mount-point list, for later use. At unmount all devices are "put" back to the library. objlayout_get_deviceinfo(), objlayout_put_deviceinfo() middleware API for retrieving device information given a device_id. TODO: The device cache can get big. Cap its size. Keep an LRU and start to return devices which were not used, when list gets to big, or when new entries allocation fail. [pnfs-obj: Bugs in new global-device-cache code] Signed-off-by: Boaz Harrosh [gfp_flags] [use global device cache] [use layout driver in global device cache] Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objlayout.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/nfs/objlayout/objlayout.h') diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 066280a07fa..0814271bb9b 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -55,6 +55,14 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, gfp_t gfp_flags); extern void objio_free_lseg(struct pnfs_layout_segment *lseg); +/* + * callback API + */ +extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, + struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, + gfp_t gfp_flags); +extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr); + /* * exported generic objects function vectors */ -- cgit v1.2.3 From e51b841dd0be9ff53f740c44c32c32679edcb7c8 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 22 May 2011 19:51:48 +0300 Subject: pnfs-obj: define per-inode private structure allocate and deallocate per-inode private pnfs_layout_hdr in preparation for I/O implementation. Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objlayout.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'fs/nfs/objlayout/objlayout.h') diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 0814271bb9b..fa0262149f5 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -45,6 +45,19 @@ #include #include "../pnfs.h" +/* + * per-inode layout + */ +struct objlayout { + struct pnfs_layout_hdr pnfs_layout; +}; + +static inline struct objlayout * +OBJLAYOUT(struct pnfs_layout_hdr *lo) +{ + return container_of(lo, struct objlayout, pnfs_layout); +} + /* * Raid engine I/O API */ @@ -66,6 +79,10 @@ extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr); /* * exported generic objects function vectors */ + +extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags); +extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *); + extern struct pnfs_layout_segment *objlayout_alloc_lseg( struct pnfs_layout_hdr *, struct nfs4_layoutget_res *, -- cgit v1.2.3 From 04f83450388e87d86b387cf4a27b81eb7e69de7d Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 22 May 2011 19:52:19 +0300 Subject: pnfs-obj: osd raid engine read/write implementation With the use of the in-kernel osd library. Implement read/write of data from/to osd-objects according to information specified in the objects-layout. Support for stripping over mirrors with a received stripe_unit. There are however a few constrains which are not supported: 1. Stripe Unit must be a multiple of PAGE_SIZE 2. stripe length (stripe_unit * number_of_stripes) can not be bigger then 32bit. Also support raid-groups and partial-layout. Partial-layout is when not all the groups are received on the line, addressing only a partial range of the file. TODO: Only raid0! raid 4/5/6 support will come at later stage A none supported layout will send IO through the MDS [Important fallout from the last rebase] Signed-off-by: Boaz Harrosh [gfp_flags] Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objlayout.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'fs/nfs/objlayout/objlayout.h') diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index fa0262149f5..9a405e8069f 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -58,6 +58,26 @@ OBJLAYOUT(struct pnfs_layout_hdr *lo) return container_of(lo, struct objlayout, pnfs_layout); } +/* + * per-I/O operation state + * embedded in objects provider io_state data structure + */ +struct objlayout_io_state { + struct pnfs_layout_segment *lseg; + + struct page **pages; + unsigned pgbase; + unsigned nr_pages; + unsigned long count; + loff_t offset; + bool sync; + + void *rpcdata; + int status; /* res */ + int eof; /* res */ + int committed; /* res */ +}; + /* * Raid engine I/O API */ @@ -68,9 +88,24 @@ extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, gfp_t gfp_flags); extern void objio_free_lseg(struct pnfs_layout_segment *lseg); +extern int objio_alloc_io_state( + struct pnfs_layout_segment *lseg, + struct objlayout_io_state **outp, + gfp_t gfp_flags); +extern void objio_free_io_state(struct objlayout_io_state *state); + +extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state); +extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, + bool stable); + /* * callback API */ +extern void objlayout_read_done(struct objlayout_io_state *state, + ssize_t status, bool sync); +extern void objlayout_write_done(struct objlayout_io_state *state, + ssize_t status, bool sync); + extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, gfp_t gfp_flags); @@ -89,4 +124,11 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg( gfp_t gfp_flags); extern void objlayout_free_lseg(struct pnfs_layout_segment *); +extern enum pnfs_try_status objlayout_read_pagelist( + struct nfs_read_data *); + +extern enum pnfs_try_status objlayout_write_pagelist( + struct nfs_write_data *, + int how); + #endif /* _OBJLAYOUT_H */ -- cgit v1.2.3 From adb58535e604a564495a7d50dfb0afa0ddc21bcb Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 26 May 2011 21:49:46 +0300 Subject: pnfs-obj: report errors and .encode_layoutreturn Implementation. An io_state pre-allocates an error information structure for each possible osd-device that might error during IO. When IO is done if all was well the io_state is freed. (as today). If the I/O has ended with an error, the io_state is queued on a per-layout err_list. When eventually encode_layoutreturn() is called, each error is properly encoded on the XDR buffer and only then the io_state is removed from err_list and de-allocated. It is up to the io_engine to fill in the segment that fault and the type of osd_error that occurred. By calling objlayout_io_set_result() for each failing device. In objio_osd: * Allocate io-error descriptors space as part of io_state * Use generic objlayout error reporting at end of io. Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objlayout.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'fs/nfs/objlayout/objlayout.h') diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 9a405e8069f..b0bb975058e 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -50,6 +50,10 @@ */ struct objlayout { struct pnfs_layout_hdr pnfs_layout; + + /* for layout_return */ + spinlock_t lock; + struct list_head err_list; }; static inline struct objlayout * @@ -76,6 +80,16 @@ struct objlayout_io_state { int status; /* res */ int eof; /* res */ int committed; /* res */ + + /* Error reporting (layout_return) */ + struct list_head err_list; + unsigned num_comps; + /* Pointer to array of error descriptors of size num_comps. + * It should contain as many entries as devices in the osd_layout + * that participate in the I/O. It is up to the io_engine to allocate + * needed space and set num_comps. + */ + struct pnfs_osd_ioerr *ioerrs; }; /* @@ -101,6 +115,10 @@ extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, /* * callback API */ +extern void objlayout_io_set_result(struct objlayout_io_state *state, + unsigned index, struct pnfs_osd_objid *pooid, + int osd_error, u64 offset, u64 length, bool is_write); + extern void objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync); extern void objlayout_write_done(struct objlayout_io_state *state, @@ -131,4 +149,9 @@ extern enum pnfs_try_status objlayout_write_pagelist( struct nfs_write_data *, int how); +extern void objlayout_encode_layoutreturn( + struct pnfs_layout_hdr *, + struct xdr_stream *, + const struct nfs4_layoutreturn_args *); + #endif /* _OBJLAYOUT_H */ -- cgit v1.2.3 From a0fe8bf427f4987d7b82678292ca03cfd7331467 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Sun, 22 May 2011 19:54:13 +0300 Subject: pnfs-obj: objlayout_encode_layoutcommit implementation * Define API for io-engines to report delta_space_used in IOs * Encode the osd-layout specific information of the layoutcommit XDR buffer. Signed-off-by: Boaz Harrosh Signed-off-by: Benny Halevy --- fs/nfs/objlayout/objlayout.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'fs/nfs/objlayout/objlayout.h') diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index b0bb975058e..a8244c8e042 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h @@ -51,6 +51,14 @@ struct objlayout { struct pnfs_layout_hdr pnfs_layout; + /* for layout_commit */ + enum osd_delta_space_valid_enum { + OBJ_DSU_INIT = 0, + OBJ_DSU_VALID, + OBJ_DSU_INVALID, + } delta_space_valid; + s64 delta_space_used; /* consumed by write ops */ + /* for layout_return */ spinlock_t lock; struct list_head err_list; @@ -119,6 +127,23 @@ extern void objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, struct pnfs_osd_objid *pooid, int osd_error, u64 offset, u64 length, bool is_write); +static inline void +objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) +{ + struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); + + /* If one of the I/Os errored out and the delta_space_used was + * invalid we render the complete report as invalid. Protocol mandate + * the DSU be accurate or not reported. + */ + spin_lock(&objlay->lock); + if (objlay->delta_space_valid != OBJ_DSU_INVALID) { + objlay->delta_space_valid = OBJ_DSU_VALID; + objlay->delta_space_used += space_used; + } + spin_unlock(&objlay->lock); +} + extern void objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync); extern void objlayout_write_done(struct objlayout_io_state *state, @@ -149,6 +174,11 @@ extern enum pnfs_try_status objlayout_write_pagelist( struct nfs_write_data *, int how); +extern void objlayout_encode_layoutcommit( + struct pnfs_layout_hdr *, + struct xdr_stream *, + const struct nfs4_layoutcommit_args *); + extern void objlayout_encode_layoutreturn( struct pnfs_layout_hdr *, struct xdr_stream *, -- cgit v1.2.3