home *** CD-ROM | disk | FTP | other *** search
- Xref: sparky comp.unix.bsd:4485 comp.bugs.4bsd:205
- Newsgroups: comp.unix.bsd,comp.bugs.4bsd
- Path: sparky!uunet!iWarp.intel.com|ichips!intelhf!agora!davidg
- From: davidg@agora.rain.com (David Greenman)
- Subject: 386BSD kernel bugs w/fixes
- Message-ID: <1992Aug20.105712.26856@agora.uucp>
- Sender: davidg@agora.uucp (David Greenman)
- Organization: Open Communications Forum
- Date: Thu, 20 Aug 1992 10:57:12 GMT
- Lines: 176
-
-
- This patch-kit contains a number of bugfixes for 386BSD.
-
- 1) The filesystem buffer cache has a major problem with the proper
- queueing of buffers to the correct queue. Specifically, in the
- files /sys/ufs/ufs_vnops.c and /sys/kern/spec_vnops.c the flag
- B_AGE is incorrectly getting set in bp->b_flags when a read
- finishes. This causes almost all buffers to be released to the
- AGE queue where they are then immediately reused for the next
- read - resulting in worst case cache performance. My under-
- standing of the original intent in the setting of B_AGE (in the
- case of a buffered read - a different intent in a write) was to
- cause the quick re-use of buffers that are fragments of a complete
- filesystem block with the assumption that the same request for
- a fragment wasn't likely. The assumption and the algorithm that
- determines a 'fragment' are bogus. The fix is to simply not set
- the flag in the case of a read. This substantially improves file-
- system performance in 386BSD.
- 2) The FS buffer cache has a bug in the hash index calculation. In the
- file /sys/sys/buf.h the index for bufhash[] that is used in the
- #define BUFHASH almost always yields a zero index. This completely
- defeats the purpose/advantages of the buffer hash table. The
- problem developed during the conversion to a vnode based (rather
- than device based) buffer cache. The fix is to change the equa-
- tion to one that provides a fairly even distribution in the
- hash table. My fix for this substantially improves the perform-
- ance of the incore() routine which finds in-cache buffers.
- 3) The FS buffer cache has another bug that causes memory for buffers to
- be allocated twice. The bug is in the file /sys/kern/vfs__bio.c,
- in the function getnewbuf(). The problem is that bp->bufsize is
- not initialized when a new buffer is used for the first time.
- Fortunately, allocbuf() frees the previously malloc'd space so no
- memory is lost. However, the variable freebufspace is left incor-
- rectly reduced. The fix for this is to fill in bp->bufsize just
- after mallocing the memory in getnewbuf().
- 4) The virtual memory system has a bug that will prevent a process from
- growing in its virtual size greater than 6MB data/stack. This is
- caused by the incorrect process limit being checked in /sys/vm/
- vm_unix.c. As they are defined in /sys/i386/include/vmparam.h,
- the constants DFLDSIZ and DFLSSIZ specify the initial limit (the
- limit when the process is created in execve()) for a process. The
- constants MAXTSIZ and MAXSSIZ specify the maximum that a process
- can grow. These constants are copied into process zero's proc->
- p_limit[] and are used as a prototype for all processes. rlim_cur
- contains DFLDSIZ/DFLSSIZ, and rlim_max contains MAXDSIZ/MAXSSIZ.
- rlim_max should therefore be used to limit the process's virtual
- size growth. However, the check is against rlim_cur. The fix is
- to change the check to rlim_max. One might also note that 6MB for
- DFLDSIZ/DFLSSIZ in vmparam.h is very low and should be increased
- to something more reasonable. Most vendors specify an initial
- limit of 32MB.
- 5) The virtual memory system in 386BSD 0.1 has not yet been completed.
- The system has a stub routine for swapout() which does nothing
- but unset SLOAD and remove the process from the run queue. It
- does not pageout or swapout a single page. The calling of it
- with this behavior causes unnecessary scheduling overhead, and
- also causes the 'ps' command to omit some process information. The
- temporary fix is to comment out the call to swap_threads() in
- /sys/vm/vm_pageout.c. Of course this fix should be removed when
- the VM code has been completed. (Actually, with a good paging
- algorithm, swapping shouldn't be necessary...but then there's
- tradition... I'm working on writing some better paging code for
- 386BSD.)
-
-
- ---
- David Greenman
- davidg%implode@percy.rain.com
-
- Here are the patches:
-
-
- *** /sys/ufs/ufs_vnops.c.01orig Thu Aug 13 23:11:01 1992
- --- /sys/ufs/ufs_vnops.c Sun Aug 16 02:49:02 1992
- ***************
- *** 497,504 ****
- --- 497,506 ----
- return (error);
- }
- error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
- + #if 0
- if (n + on == fs->fs_bsize || uio->uio_offset == ip->i_size)
- bp->b_flags |= B_AGE;
- + #endif
- brelse(bp);
- } while (error == 0 && uio->uio_resid > 0 && n != 0);
- return (error);
- *** /sys/kern/spec_vnops.c.01orig Sun Jun 7 16:42:13 1992
- --- /sys/kern/spec_vnops.c Sun Aug 16 02:54:32 1992
- ***************
- *** 218,225 ****
- --- 218,227 ----
- return (error);
- }
- error = uiomove(bp->b_un.b_addr + on, n, uio);
- + #if 0
- if (n + on == bsize)
- bp->b_flags |= B_AGE;
- + #endif
- brelse(bp);
- } while (error == 0 && uio->uio_resid > 0 && n != 0);
- return (error);
- *** /sys/sys/buf.h.01orig Sat Jun 20 20:02:53 1992
- --- /sys/sys/buf.h Sun Aug 16 03:34:06 1992
- ***************
- *** 112,121 ****
- #define RND (MAXBSIZE/DEV_BSIZE)
- #if ((BUFHSZ&(BUFHSZ-1)) == 0)
- #define BUFHASH(dvp, dblkno) \
- ! ((struct buf *)&bufhash[((int)(dvp)+(((int)(dblkno))/RND))&(BUFHSZ-1)])
- #else
- #define BUFHASH(dvp, dblkno) \
- ! ((struct buf *)&bufhash[((int)(dvp)+(((int)(dblkno))/RND)) % BUFHSZ])
- #endif
-
- struct buf *buf; /* the buffer pool itself */
- --- 112,121 ----
- #define RND (MAXBSIZE/DEV_BSIZE)
- #if ((BUFHSZ&(BUFHSZ-1)) == 0)
- #define BUFHASH(dvp, dblkno) \
- ! ((struct buf *)&bufhash[((int)(dvp)/sizeof(struct vnode)+(int)(dblkno))&(BUFHSZ-1)])
- #else
- #define BUFHASH(dvp, dblkno) \
- ! ((struct buf *)&bufhash[((int)(dvp)/sizeof(struct vnode)+(int)(dblkno)) % BUFHSZ])
- #endif
-
- struct buf *buf; /* the buffer pool itself */
- *** /sys/kern/vfs__bio.c.01orig Sun Jun 28 20:49:18 1992
- --- /sys/kern/vfs__bio.c Mon Aug 17 02:40:09 1992
- ***************
- *** 344,349 ****
- --- 344,350 ----
- bp->b_flags = B_BUSY | B_INVAL;
- bremfree(bp);
- bp->b_un.b_addr = addr;
- + bp->b_bufsize = sz;
- goto fillin;
- }
-
- *** /sys/vm/vm_unix.c.01orig Sun Jul 5 11:53:46 1992
- --- /sys/vm/vm_unix.c Mon Aug 17 04:53:40 1992
- ***************
- *** 65,71 ****
-
- old = (vm_offset_t)vm->vm_daddr;
- new = round_page(uap->nsiz);
- ! if ((int)(new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur)
- return(ENOMEM);
- old = round_page(old + ctob(vm->vm_dsize));
- diff = new - old;
- --- 65,71 ----
-
- old = (vm_offset_t)vm->vm_daddr;
- new = round_page(uap->nsiz);
- ! if ((int)(new - old) > p->p_rlimit[RLIMIT_DATA].rlim_max)
- return(ENOMEM);
- old = round_page(old + ctob(vm->vm_dsize));
- diff = new - old;
- ***************
- *** 113,119 ****
- * Really need to check vs limit and increment stack size if ok.
- */
- si = clrnd(btoc(vm->vm_maxsaddr + MAXSSIZ - sp) - vm->vm_ssize);
- ! if (vm->vm_ssize + si > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
- return (0);
- vm->vm_ssize += si;
- return (1);
- --- 113,119 ----
- * Really need to check vs limit and increment stack size if ok.
- */
- si = clrnd(btoc(vm->vm_maxsaddr + MAXSSIZ - sp) - vm->vm_ssize);
- ! if (vm->vm_ssize + si > btoc(p->p_rlimit[RLIMIT_STACK].rlim_max))
- return (0);
- vm->vm_ssize += si;
- return (1);
-
-