Logo Search packages:      
Sourcecode: ufsutils version File versions  Download package

ffs_rawread.c

/*-
 * Copyright (c) 2000-2003 Tor Egge
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/sys/ufs/ffs/ffs_rawread.c,v 1.29 2007/02/04 23:42:02 tegge Exp $");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <sys/proc.h>
#include <sys/limits.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/vnode.h>
#include <sys/conf.h>
#include <sys/filio.h>
#include <sys/ttycom.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/quota.h>
#include <ufs/ufs/inode.h>
#include <ufs/ufs/ufsmount.h>
#include <ufs/ufs/ufs_extern.h>
#include <ufs/ffs/fs.h>
#include <ufs/ffs/ffs_extern.h>

#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_object.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>

static int ffs_rawread_readahead(struct vnode *vp,
                         caddr_t udata,
                         off_t offset,
                         size_t len,
                         struct thread *td,
                         struct buf *bp,
                         caddr_t sa);
static int ffs_rawread_main(struct vnode *vp,
                      struct uio *uio);

static int ffs_rawread_sync(struct vnode *vp, struct thread *td);

int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone);

void ffs_rawread_setup(void);

SYSCTL_DECL(_vfs_ffs);

static int ffsrawbufcnt = 4;
SYSCTL_INT(_vfs_ffs, OID_AUTO, ffsrawbufcnt, CTLFLAG_RD, &ffsrawbufcnt, 0,
         "Buffers available for raw reads");

static int allowrawread = 1;
SYSCTL_INT(_vfs_ffs, OID_AUTO, allowrawread, CTLFLAG_RW, &allowrawread, 0,
         "Flag to enable raw reads");

static int rawreadahead = 1;
SYSCTL_INT(_vfs_ffs, OID_AUTO, rawreadahead, CTLFLAG_RW, &rawreadahead, 0,
         "Flag to enable readahead for long raw reads");


void
ffs_rawread_setup(void)
{
      ffsrawbufcnt = (nswbuf > 100 ) ? (nswbuf - (nswbuf >> 4)) : nswbuf - 8;
}


static int
ffs_rawread_sync(struct vnode *vp, struct thread *td)
{
      int spl;
      int error;
      int upgraded;
      struct bufobj *bo;
      struct mount *mp;

      /* Check for dirty mmap, pending writes and dirty buffers */
      spl = splbio();
      VI_LOCK(vp);
      bo = &vp->v_bufobj;
      if (bo->bo_numoutput > 0 ||
          bo->bo_dirty.bv_cnt > 0 ||
          (vp->v_iflag & VI_OBJDIRTY) != 0) {
            splx(spl);
            VI_UNLOCK(vp);
            
            if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
                  if (VOP_ISLOCKED(vp, td) != LK_EXCLUSIVE)
                        upgraded = 1;
                  else
                        upgraded = 0;
                  VOP_UNLOCK(vp, 0, td);
                  (void) vn_start_write(vp, &mp, V_WAIT);
                  VOP_LOCK(vp, LK_EXCLUSIVE, td);
            } else if (VOP_ISLOCKED(vp, td) != LK_EXCLUSIVE) {
                  upgraded = 1;
                  /* Upgrade to exclusive lock, this might block */
                  VOP_LOCK(vp, LK_UPGRADE, td);
            } else
                  upgraded = 0;
                  
            
            VI_LOCK(vp);
            /* Check if vnode was reclaimed while unlocked. */
            if ((vp->v_iflag & VI_DOOMED) != 0) {
                  VI_UNLOCK(vp);
                  if (upgraded != 0)
                        VOP_LOCK(vp, LK_DOWNGRADE, td);
                  vn_finished_write(mp);
                  return (EIO);
            }
            /* Attempt to msync mmap() regions to clean dirty mmap */ 
            if ((vp->v_iflag & VI_OBJDIRTY) != 0) {
                  VI_UNLOCK(vp);
                  if (vp->v_object != NULL) {
                        VM_OBJECT_LOCK(vp->v_object);
                        vm_object_page_clean(vp->v_object, 0, 0, OBJPC_SYNC);
                        VM_OBJECT_UNLOCK(vp->v_object);
                  }
                  VI_LOCK(vp);
            }

            /* Wait for pending writes to complete */
            spl = splbio();
            error = bufobj_wwait(&vp->v_bufobj, 0, 0);
            if (error != 0) {
                  /* XXX: can't happen with a zero timeout ??? */
                  splx(spl);
                  VI_UNLOCK(vp);
                  if (upgraded != 0)
                        VOP_LOCK(vp, LK_DOWNGRADE, td);
                  vn_finished_write(mp);
                  return (error);
            }
            /* Flush dirty buffers */
            if (bo->bo_dirty.bv_cnt > 0) {
                  splx(spl);
                  VI_UNLOCK(vp);
                  if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) {
                        if (upgraded != 0)
                              VOP_LOCK(vp, LK_DOWNGRADE, td);
                        vn_finished_write(mp);
                        return (error);
                  }
                  VI_LOCK(vp);
                  spl = splbio();
                  if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0)
                        panic("ffs_rawread_sync: dirty bufs");
            }
            splx(spl);
            VI_UNLOCK(vp);
            if (upgraded != 0)
                  VOP_LOCK(vp, LK_DOWNGRADE, td);
            vn_finished_write(mp);
      } else {
            splx(spl);
            VI_UNLOCK(vp);
      }
      return 0;
}


static int
ffs_rawread_readahead(struct vnode *vp,
                  caddr_t udata,
                  off_t offset,
                  size_t len,
                  struct thread *td,
                  struct buf *bp,
                  caddr_t sa)
{
      int error;
      u_int iolen;
      off_t blockno;
      int blockoff;
      int bsize;
      struct vnode *dp;
      int bforwards;
      struct inode *ip;
      ufs2_daddr_t blkno;
      
      bsize = vp->v_mount->mnt_stat.f_iosize;
      
      ip = VTOI(vp);
      dp = ip->i_devvp;

      iolen = ((vm_offset_t) udata) & PAGE_MASK;
      bp->b_bcount = len;
      if (bp->b_bcount + iolen > bp->b_kvasize) {
            bp->b_bcount = bp->b_kvasize;
            if (iolen != 0)
                  bp->b_bcount -= PAGE_SIZE;
      }
      bp->b_flags = 0;  /* XXX necessary ? */
      bp->b_iocmd = BIO_READ;
      bp->b_iodone = bdone;
      bp->b_data = udata;
      bp->b_saveaddr = sa;
      blockno = offset / bsize;
      blockoff = (offset % bsize) / DEV_BSIZE;
      if ((daddr_t) blockno != blockno) {
            return EINVAL; /* blockno overflow */
      }
      
      bp->b_lblkno = bp->b_blkno = blockno;
      
      error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, NULL, &bforwards, NULL);
      if (error != 0)
            return error;
      if (blkno == -1) {

            /* Fill holes with NULs to preserve semantics */
            
            if (bp->b_bcount + blockoff * DEV_BSIZE > bsize)
                  bp->b_bcount = bsize - blockoff * DEV_BSIZE;
            bp->b_bufsize = bp->b_bcount;
            
            if (vmapbuf(bp) < 0)
                  return EFAULT;
            
            if (ticks - PCPU_GET(switchticks) >= hogticks)
                  uio_yield();
            bzero(bp->b_data, bp->b_bufsize);

            /* Mark operation completed (similar to bufdone()) */

            bp->b_resid = 0;
            bp->b_flags |= B_DONE;
            return 0;
      }
      bp->b_blkno = blkno + blockoff;
      bp->b_offset = bp->b_iooffset = (blkno + blockoff) * DEV_BSIZE;
      
      if (bp->b_bcount + blockoff * DEV_BSIZE > bsize * (1 + bforwards))
            bp->b_bcount = bsize * (1 + bforwards) - blockoff * DEV_BSIZE;
      bp->b_bufsize = bp->b_bcount;
      
      if (vmapbuf(bp) < 0)
            return EFAULT;
      
      BO_STRATEGY(&dp->v_bufobj, bp);
      return 0;
}


static int
ffs_rawread_main(struct vnode *vp,
             struct uio *uio)
{
      int error, nerror;
      struct buf *bp, *nbp, *tbp;
      caddr_t sa, nsa, tsa;
      u_int iolen;
      int spl;
      caddr_t udata;
      long resid;
      off_t offset;
      struct thread *td;
      
      td = uio->uio_td ? uio->uio_td : curthread;
      udata = uio->uio_iov->iov_base;
      resid = uio->uio_resid;
      offset = uio->uio_offset;

      /*
       * keep the process from being swapped
       */
      PHOLD(td->td_proc);
      
      error = 0;
      nerror = 0;
      
      bp = NULL;
      nbp = NULL;
      sa = NULL;
      nsa = NULL;
      
      while (resid > 0) {
            
            if (bp == NULL) { /* Setup first read */
                  /* XXX: Leave some bufs for swap */
                  bp = getpbuf(&ffsrawbufcnt);
                  sa = bp->b_data;
                  pbgetvp(vp, bp);
                  error = ffs_rawread_readahead(vp, udata, offset,
                                         resid, td, bp, sa);
                  if (error != 0)
                        break;
                  
                  if (resid > bp->b_bufsize) { /* Setup fist readahead */
                        /* XXX: Leave bufs for swap */
                        if (rawreadahead != 0) 
                              nbp = trypbuf(&ffsrawbufcnt);
                        else
                              nbp = NULL;
                        if (nbp != NULL) {
                              nsa = nbp->b_data;
                              pbgetvp(vp, nbp);
                              
                              nerror = ffs_rawread_readahead(vp, 
                                                       udata +
                                                       bp->b_bufsize,
                                                       offset +
                                                       bp->b_bufsize,
                                                       resid -
                                                       bp->b_bufsize,
                                                       td,
                                                       nbp,
                                                       nsa);
                              if (nerror) {
                                    pbrelvp(nbp);
                                    relpbuf(nbp, &ffsrawbufcnt);
                                    nbp = NULL;
                              }
                        }
                  }
            }
            
            spl = splbio();
            bwait(bp, PRIBIO, "rawrd");
            splx(spl);
            
            vunmapbuf(bp);
            
            iolen = bp->b_bcount - bp->b_resid;
            if (iolen == 0 && (bp->b_ioflags & BIO_ERROR) == 0) {
                  nerror = 0; /* Ignore possible beyond EOF error */
                  break; /* EOF */
            }
            
            if ((bp->b_ioflags & BIO_ERROR) != 0) {
                  error = bp->b_error;
                  break;
            }
            resid -= iolen;
            udata += iolen;
            offset += iolen;
            if (iolen < bp->b_bufsize) {
                  /* Incomplete read.  Try to read remaining part */
                  error = ffs_rawread_readahead(vp,
                                          udata,
                                          offset,
                                          bp->b_bufsize - iolen,
                                          td,
                                          bp,
                                          sa);
                  if (error != 0)
                        break;
            } else if (nbp != NULL) { /* Complete read with readahead */
                  
                  tbp = bp;
                  bp = nbp;
                  nbp = tbp;
                  
                  tsa = sa;
                  sa = nsa;
                  nsa = tsa;
                  
                  if (resid <= bp->b_bufsize) { /* No more readaheads */
                        pbrelvp(nbp);
                        relpbuf(nbp, &ffsrawbufcnt);
                        nbp = NULL;
                  } else { /* Setup next readahead */
                        nerror = ffs_rawread_readahead(vp,
                                                 udata +
                                                 bp->b_bufsize,
                                                 offset +
                                                 bp->b_bufsize,
                                                 resid -
                                                 bp->b_bufsize,
                                                 td,
                                                 nbp,
                                                 nsa);
                        if (nerror != 0) {
                              pbrelvp(nbp);
                              relpbuf(nbp, &ffsrawbufcnt);
                              nbp = NULL;
                        }
                  }
            } else if (nerror != 0) {/* Deferred Readahead error */
                  break;            
            }  else if (resid > 0) { /* More to read, no readahead */
                  error = ffs_rawread_readahead(vp, udata, offset,
                                          resid, td, bp, sa);
                  if (error != 0)
                        break;
            }
      }
      
      if (bp != NULL) {
            pbrelvp(bp);
            relpbuf(bp, &ffsrawbufcnt);
      }
      if (nbp != NULL) {                  /* Run down readahead buffer */
            spl = splbio();
            bwait(nbp, PRIBIO, "rawrd");
            splx(spl);
            vunmapbuf(nbp);
            pbrelvp(nbp);
            relpbuf(nbp, &ffsrawbufcnt);
      }
      
      if (error == 0)
            error = nerror;
      PRELE(td->td_proc);
      uio->uio_iov->iov_base = udata;
      uio->uio_resid = resid;
      uio->uio_offset = offset;
      return error;
}


int
ffs_rawread(struct vnode *vp,
          struct uio *uio,
          int *workdone)
{
      if (allowrawread != 0 &&
          uio->uio_iovcnt == 1 && 
          uio->uio_segflg == UIO_USERSPACE &&
          uio->uio_resid == uio->uio_iov->iov_len &&
          (((uio->uio_td != NULL) ? uio->uio_td : curthread)->td_pflags &
           TDP_DEADLKTREAT) == 0) {
            int secsize;            /* Media sector size */
            off_t filebytes;  /* Bytes left of file */
            int blockbytes;         /* Bytes left of file in full blocks */
            int partialbytes; /* Bytes in last partial block */
            int skipbytes;          /* Bytes not to read in ffs_rawread */
            struct inode *ip;
            int error;
            

            /* Only handle sector aligned reads */
            ip = VTOI(vp);
            secsize = ip->i_devvp->v_bufobj.bo_bsize;
            if ((uio->uio_offset & (secsize - 1)) == 0 &&
                (uio->uio_resid & (secsize - 1)) == 0) {
                  
                  /* Sync dirty pages and buffers if needed */
                  error = ffs_rawread_sync(vp,
                                     (uio->uio_td != NULL) ?
                                     uio->uio_td : curthread);
                  if (error != 0)
                        return error;
                  
                  /* Check for end of file */
                  if (ip->i_size > uio->uio_offset) {
                        filebytes = ip->i_size - uio->uio_offset;

                        /* No special eof handling needed ? */
                        if (uio->uio_resid <= filebytes) {
                              *workdone = 1;
                              return ffs_rawread_main(vp, uio);
                        }
                        
                        partialbytes = ((unsigned int) ip->i_size) %
                              ip->i_fs->fs_bsize;
                        blockbytes = (int) filebytes - partialbytes;
                        if (blockbytes > 0) {
                              skipbytes = uio->uio_resid -
                                    blockbytes;
                              uio->uio_resid = blockbytes;
                              error = ffs_rawread_main(vp, uio);
                              uio->uio_resid += skipbytes;
                              if (error != 0)
                                    return error;
                              /* Read remaining part using buffer */
                        }
                  }
            }
      }
      *workdone = 0;
      return 0;
}

Generated by  Doxygen 1.6.0   Back to index