home *** CD-ROM | disk | FTP | other *** search
- /*
- * bufmgr.c -- buffer manager interface routines
- *
- * Identification:
- * $Header: /private/postgres/src/storage/buffer/RCS/bufmgr.c,v 1.74 1992/07/13 17:37:28 hong Exp $
- *
- * BufferAlloc() -- lookup a buffer in the buffer table. If
- * it isn't there add it, but do not read it into memory.
- * This is used when we are about to reinitialize the
- * buffer so don't care what the current disk contents are.
- * BufferAlloc() pins the new buffer in memory.
- *
- * ReadBuffer() -- same as BufferAlloc() but reads the data
- * on a buffer cache miss.
- *
- * ReleaseBuffer() -- unpin the buffer
- *
- * WriteNoReleaseBuffer() -- mark the buffer contents as "dirty"
- * but don't unpin. The disk IO is delayed until buffer
- * replacement if LateWrite flag is set.
- *
- * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer()
- *
- * DirtyBufferCopy() -- For a given dbid/relid/blockno, if the buffer is
- * in the cache and is dirty, mark it clean and copy
- * it to the requested location. This is a logical
- * write, and has been installed to support the cache
- * management code for write-once storage managers.
- *
- * FlushBuffer() -- as above but never delayed write.
- *
- * BufferSync() -- flush all dirty buffers in the buffer pool.
- *
- * InitBufferPool() -- Init the buffer module.
- *
- * See other files:
- * freelist.c -- chooses victim for buffer replacement
- * buf_table.c -- manages the buffer lookup table
- */
- #include <sys/file.h>
- #include <stdio.h>
- #include <math.h>
- #include <signal.h>
-
- #include "storage/buf_internals.h"
- #include "storage/bufmgr.h"
-
- #include "storage/fd.h"
- #include "storage/ipc.h"
- #include "storage/ipci.h"
- #include "storage/shmem.h"
- #include "storage/spin.h"
- #include "storage/smgr.h"
- #include "storage/lmgr.h"
- #include "tmp/miscadmin.h"
- #include "utils/hsearch.h"
- #include "utils/log.h"
-
- /*
- * if BMTRACE is defined, we trace the last 200 buffer allocations and
- * deallocations in a circular buffer in shared memory.
- */
- #ifdef BMTRACE
- bmtrace *TraceBuf;
- int *CurTraceBuf;
- #define BMT_LIMIT 200
- #endif /* BMTRACE */
-
- int NBuffers = NDBUFS; /* NDBUFS defined in miscadmin.h */
- int Data_Descriptors;
- int Free_List_Descriptor;
- int Lookup_List_Descriptor;
- int Num_Descriptors;
-
- BufferDesc *BufferDescriptors;
- BufferBlock BufferBlocks;
- #ifndef HAS_TEST_AND_SET
- static int *NWaitIOBackendP;
- #endif
-
- Buffer BufferDescriptorGetBuffer();
-
- int *PrivateRefCount;
- int *LastRefCount; /* refcounts of last ExecMain level */
-
- /*
- * Data Structures:
- * buffers live in a freelist and a lookup data structure.
- *
- *
- * Buffer Lookup:
- * Two important notes. First, the buffer has to be
- * available for lookup BEFORE an IO begins. Otherwise
- * a second process trying to read the buffer will
- * allocate its own copy and the buffeer pool will
- * become inconsistent.
- *
- * Buffer Replacement:
- * see freelist.c. A buffer cannot be replaced while in
- * use either by data manager or during IO.
- *
- * WriteBufferBack:
- * currently, a buffer is only written back at the time
- * it is selected for replacement. It should
- * be done sooner if possible to reduce latency of
- * BufferAlloc(). Maybe there should be a daemon process.
- *
- * Synchronization/Locking:
- *
- * BufMgrLock lock -- must be acquired before manipulating the
- * buffer queues (lookup/freelist). Must be released
- * before exit and before doing any IO.
- *
- * IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
- * It must be set when an IO is initiated and cleared at
- * the end of the IO. It is there to make sure that one
- * process doesn't start to use a buffer while another is
- * faulting it in. see IOWait/IOSignal.
- *
- * refcount -- A buffer is pinned during IO and immediately
- * after a BufferAlloc(). A buffer is always either pinned
- * or on the freelist but never both. The buffer must be
- * released, written, or flushed before the end of
- * transaction.
- *
- * PrivateRefCount -- Each buffer also has a private refcount the keeps
- * track of the number of times the buffer is pinned in the current
- * processes. This is used for two purposes, first, if we pin a
- * a buffer more than once, we only need to change the shared refcount
- * once, thus only lock the buffer pool once, second, when a transaction
- * aborts, it should only unpin the buffers exactly the number of times it
- * has pinned them, so that it will not blow away buffers of another
- * backend.
- *
- */
-
- SPINLOCK BufMgrLock;
-
- /* delayed write: TRUE on, FALSE off */
- int LateWrite = TRUE;
-
- int ReadBufferCount;
- int BufferHitCount;
- int BufferFlushCount;
-
- /* ---------------------------------------------------
- * RelationGetBufferWithBuffer
- * see if the given buffer is what we want
- * if yes, we don't need to bother the buffer manager
- * ---------------------------------------------------
- */
- Buffer
- RelationGetBufferWithBuffer(relation, blockNumber, buffer)
- Relation relation;
- BlockNumber blockNumber;
- Buffer buffer;
- {
- BufferDesc *bufHdr;
- LRelId lrelId;
-
- if (BufferIsValid(buffer)) {
- bufHdr = BufferGetBufferDescriptor(buffer);
- lrelId = RelationGetLRelId(relation);
- if (bufHdr->tag.blockNum == blockNumber &&
- bufHdr->tag.relId.relId == lrelId.relId &&
- bufHdr->tag.relId.dbId == lrelId.dbId)
- return buffer;
- }
- return(ReadBuffer(relation,blockNumber));
- }
-
- /*
- * ReadBuffer -- returns a buffer containing the requested
- * block of the requested relation. If the blknum
- * requested is NEW_BLOCK, extend the relation file and
- * allocate a new block.
- *
- * Returns: the buffer number for the buffer containing
- * the block read or NULL on an error.
- *
- * Assume when this function is called, that reln has been
- * opened already.
- */
-
- extern int ShowPinTrace;
- #undef ReadBuffer
-
- Buffer
- ReadBuffer(reln, blockNum)
- Relation reln;
- BlockNumber blockNum;
- {
- return ReadBufferWithBufferLock(reln, blockNum, false);
- }
-
- bool
- is_userbuffer(buffer)
- Buffer buffer;
- {
- BufferDesc *buf;
- buf = BufferGetBufferDescriptor(buffer);
- if (strncmp(&buf->sb_relname, "pg_", 3) == 0)
- return false;
- else
- return true;
- }
-
- Buffer
- ReadBuffer_Debug(file, line, reln, blockNum)
- String file;
- int line;
- Relation reln;
- BlockNumber blockNum;
- {
- Buffer buffer;
-
- buffer = ReadBufferWithBufferLock(reln, blockNum, false);
- if (ShowPinTrace && is_userbuffer(buffer)) {
- BufferDesc *buf;
- buf = BufferGetBufferDescriptor(buffer);
- fprintf(stderr, "PIN(RD) %d relname = %s, blockNum = %d, refcount = %d, file: %s, line: %d\n", buffer, &(buf->sb_relname), buf->tag.blockNum, PrivateRefCount[buffer - 1], file, line);
- }
- return buffer;
- }
-
- /*
- * ReadBufferWithBufferLock -- does the work of
- * ReadBuffer() but with the possibility that
- * the buffer lock has already been held. this
- * is yet another effort to reduce the number of
- * semops in the system.
- *
- * This routine locks the buffer pool before calling BufferAlloc to
- * avoid two semops.
- */
-
- Buffer
- ReadBufferWithBufferLock(reln,blockNum, bufferLockHeld)
- Relation reln;
- BlockNumber blockNum;
- bool bufferLockHeld;
- {
- BufferDesc * bufHdr;
- int extend; /* extending the file by one block */
- int status;
- Boolean found;
-
- ReadBufferCount++;
- extend = (blockNum == NEW_BLOCK);
- /* lookup the buffer. IO_IN_PROGRESS is set if the requested
- * block is not currently in memory.
- */
- bufHdr = BufferAlloc(reln, blockNum, &found, bufferLockHeld);
- if (! bufHdr) {
- return(InvalidBuffer);
- }
-
- /* if its already in the buffer pool, we're done */
- if (found) {
- /*
- * This happens when a bogus buffer was returned previously and is
- * floating around in the buffer pool. A routine calling this would
- * want this extended.
- */
- if (extend) {
- (void) smgrextend(bufHdr->bufsmgr, reln,
- (char *) MAKE_PTR(bufHdr->data));
- }
- BufferHitCount++;
- return(BufferDescriptorGetBuffer(bufHdr));
- }
-
- /*
- * if we have gotten to this point, the reln pointer must be ok
- * and the relation file must be open.
- */
-
- if (extend) {
- status = smgrextend(bufHdr->bufsmgr, reln,
- (char *) MAKE_PTR(bufHdr->data));
- } else {
- status = smgrread(bufHdr->bufsmgr, reln, blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- }
-
- /* lock buffer manager again to update IO IN PROGRESS */
- SpinAcquire(BufMgrLock);
-
- if (status == SM_FAIL) {
- /* IO Failed. cleanup the data structures and go home */
-
- if (! BufTableDelete(bufHdr)) {
- SpinRelease(BufMgrLock);
- elog(FATAL,"BufRead: buffer table broken after IO error\n");
- }
- /* remember that BufferAlloc() pinned the buffer */
- UnpinBuffer(bufHdr);
-
- /*
- * Have to reset the flag so that anyone waiting for
- * the buffer can tell that the contents are invalid.
- */
- bufHdr->flags |= BM_IO_ERROR;
-
- } else {
- /* IO Succeeded. clear the flags, finish buffer update */
-
- bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);
- }
-
- /* If anyone was waiting for IO to complete, wake them up now */
- #ifdef HAS_TEST_AND_SET
- S_UNLOCK(&(bufHdr->io_in_progress_lock));
- #else
- if (bufHdr->refcount > 1)
- SignalIO(bufHdr);
- #endif
-
- SpinRelease(BufMgrLock);
-
- return(BufferDescriptorGetBuffer(bufHdr));
- }
-
- /*
- * BufferAlloc -- Get a buffer from the buffer pool but dont
- * read it.
- *
- * Returns: descriptor for buffer
- */
- BufferDesc *
- BufferAlloc(reln, blockNum, foundPtr, bufferLockHeld)
- Relation reln;
- BlockNumber blockNum;
- Boolean *foundPtr;
- bool bufferLockHeld;
- {
- BufferDesc *buf;
- BufferTag newTag; /* identity of requested block */
- Boolean inProgress; /* buffer undergoing IO */
- int status;
- Boolean newblock = FALSE;
- BufferDesc oldbufdesc;
-
-
- /* create a new tag so we can lookup the buffer */
- /* assume that the relation is already open */
- if (blockNum == NEW_BLOCK) {
- newblock = TRUE;
- blockNum = smgrnblocks(reln->rd_rel->relsmgr, reln);
- }
-
- INIT_BUFFERTAG(&newTag,reln,blockNum);
-
- if (!bufferLockHeld)
- SpinAcquire(BufMgrLock);
-
- /* see if the block is in the buffer pool already */
- buf = BufTableLookup(&newTag);
- if (buf != NULL) {
- /* Found it. Now, (a) pin the buffer so no
- * one steals it from the buffer pool,
- * (b) check IO_IN_PROGRESS, someone may be
- * faulting the buffer into the buffer pool.
- */
-
- PinBuffer(buf);
- inProgress = (buf->flags & BM_IO_IN_PROGRESS);
-
- *foundPtr = TRUE;
- if (inProgress) {
- WaitIO(buf, BufMgrLock);
- if (buf->flags & BM_IO_ERROR) {
- /* wierd race condition:
- *
- * We were waiting for someone else to read the buffer.
- * While we were waiting, the reader boof'd in some
- * way, so the contents of the buffer are still
- * invalid. By saying that we didn't find it, we can
- * make the caller reinitialize the buffer. If two
- * processes are waiting for this block, both will
- * read the block. The second one to finish may overwrite
- * any updates made by the first. (Assume higher level
- * synchronization prevents this from happening).
- *
- * This is never going to happen, don't worry about it.
- */
- *foundPtr = FALSE;
- }
- }
- #ifdef BMTRACE
- _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND);
- #endif BMTRACE
-
- SpinRelease(BufMgrLock);
-
- return(buf);
- }
-
- *foundPtr = FALSE;
-
- /* Didn't find it in the buffer pool. We'll have
- * to initialize a new buffer. First, grab one from
- * the free list. If it's dirty, flush it to disk.
- * Remember to unlock BufMgr spinloc while doing the IOs.
- */
- buf = GetFreeBuffer();
- if (! buf) {
- /* out of free buffers. In trouble now. */
- SpinRelease(BufMgrLock);
- return(NULL);
- }
-
- /* There should be exactly one pin on the buffer
- * after it is allocated. It isnt in the buffer
- * table yet so no one but us should have a pin.
- */
-
- Assert(buf->refcount == 0);
- buf->refcount = 1;
- PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
-
- /*
- * Change the name of the buffer in the lookup table:
- *
- * Need to update the lookup table before the read starts.
- * If someone comes along looking for the buffer while
- * we are reading it in, we don't want them to allocate
- * a new buffer. For the same reason, we didn't want
- * to erase the buf table entry for the buffer we were
- * writing back until now, either.
- */
-
- if (! BufTableDelete(buf)) {
- SpinRelease(BufMgrLock);
- elog(FATAL,"buffer wasn't in the buffer table\n");
- }
-
- /* save the old buffer descriptor */
- oldbufdesc = *buf;
- if (buf->flags & BM_DIRTY) {
- /* must clear flag first because of wierd race
- * condition described below.
- */
- buf->flags &= ~BM_DIRTY;
- }
-
- /* record the database name and relation name for this buffer */
- strncpy((char *)&(buf->sb_relname),
- (char *)&(reln->rd_rel->relname),
- sizeof (NameData));
- strncpy((char *)&(buf->sb_dbname), MyDatabaseName, sizeof (NameData));
-
- /* remember which storage manager is responsible for it */
- buf->bufsmgr = reln->rd_rel->relsmgr;
-
- INIT_BUFFERTAG(&(buf->tag),reln,blockNum);
- if (! BufTableInsert(buf)) {
- SpinRelease(BufMgrLock);
- elog(FATAL,"Buffer in lookup table twice \n");
- }
-
- /* Buffer contents are currently invalid. Have
- * to mark IO IN PROGRESS so no one fiddles with
- * them until the read completes. If this routine
- * has been called simply to allocate a buffer, no
- * io will be attempted, so the flag isnt set.
- */
- buf->flags |= BM_IO_IN_PROGRESS;
- #ifdef HAS_TEST_AND_SET
- /* lock the io_in_progress_lock before the read so that
- * other process will wait on it
- */
- Assert(!buf->io_in_progress_lock);
- S_LOCK(&(buf->io_in_progress_lock));
- #endif
-
- #ifdef BMTRACE
- _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), reln->rd_id, blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND);
- #endif BMTRACE
-
- SpinRelease(BufMgrLock);
-
- /* XXX mao what is this? XXX */
- if (oldbufdesc.flags & BM_DIRTY) {
- (void) BufferReplace(&oldbufdesc);
- BufferFlushCount++;
- }
- return (buf);
- }
-
- /*
- * WriteBuffer--
- *
- * Pushes buffer contents to disk if LateWrite is
- * not set. Otherwise, marks contents as dirty.
- *
- * Assume that buffer is pinned. Assume that reln is
- * valid.
- *
- * Side Effects:
- * Pin count is decremented.
- */
-
- #undef WriteBuffer
-
- WriteBuffer(buffer)
- Buffer buffer;
- {
- BufferDesc *bufHdr;
-
- if (! LateWrite) {
- return(FlushBuffer(buffer));
- } else {
-
- if (BAD_BUFFER_ID(buffer)) {
- return(FALSE);
- }
- bufHdr = BufferGetBufferDescriptor(buffer);
-
- Assert(bufHdr->refcount > 0);
- SpinAcquire(BufMgrLock);
- bufHdr->flags |= BM_DIRTY;
- UnpinBuffer(bufHdr);
- SpinRelease(BufMgrLock);
- }
- return(TRUE);
- }
-
- WriteBuffer_Debug(file, line, buffer)
- String file;
- int line;
- Buffer buffer;
- {
- WriteBuffer(buffer);
- if (ShowPinTrace && is_userbuffer(buffer)) {
- BufferDesc *buf;
- buf = BufferGetBufferDescriptor(buffer);
- fprintf(stderr, "UNPIN(WR) %d relname = %s, blockNum = %d, refcount = %d, file: %s, line: %d\n", buffer, &(buf->sb_relname), buf->tag.blockNum, PrivateRefCount[buffer - 1], file, line);
- }
- }
-
- /*
- * DirtyBufferCopy() -- Copy a given dirty buffer to the requested
- * destination.
- *
- * We treat this as a write. If the requested buffer is in the pool
- * and is dirty, we copy it to the location requested and mark it
- * clean. This routine supports the Sony jukebox storage manager,
- * which agrees to take responsibility for the data once we mark
- * it clean.
- */
-
- DirtyBufferCopy(dbid, relid, blkno, dest)
- ObjectId dbid;
- ObjectId relid;
- BlockNumber blkno;
- char *dest;
- {
- BufferDesc *buf;
- BufferTag btag;
-
- btag.relId.relId = relid;
- btag.relId.dbId = dbid;
- btag.blockNum = blkno;
-
- SpinAcquire(BufMgrLock);
- buf = BufTableLookup(&btag);
-
- if (buf == (BufferDesc *) NULL
- || !(buf->flags & BM_DIRTY)
- || !(buf->flags & BM_VALID)) {
- SpinRelease(BufMgrLock);
- return;
- }
-
- /* hate to do this holding the lock, but release and reacquire is slower */
- (void) bcopy((char *) MAKE_PTR(buf->data), dest, BLCKSZ);
-
- buf->flags &= ~BM_DIRTY;
-
- SpinRelease(BufMgrLock);
- }
-
- /*
- * BufferRewrite -- special version of WriteBuffer for
- * BufCopyCommit(). We want to write without
- * looking up the relation if possible.
- */
- Boolean
- BufferRewrite(buffer)
- Buffer buffer;
- {
- BufferDesc *bufHdr;
-
- if (BAD_BUFFER_ID(buffer)) {
- return(STATUS_ERROR);
- }
- bufHdr = BufferGetBufferDescriptor(buffer);
- Assert(bufHdr->refcount > 0);
-
- if (LateWrite) {
- SpinAcquire(BufMgrLock);
- bufHdr->flags |= BM_DIRTY;
- UnpinBuffer(bufHdr);
- SpinRelease(BufMgrLock);
- } else {
- BufferReplace(bufHdr);
- }
-
-
- return(STATUS_OK);
- }
-
-
- /*
- * FlushBuffer -- like WriteBuffer, but force the page to disk.
- */
- FlushBuffer(buffer)
- Buffer buffer;
- {
- BufferDesc *bufHdr;
- OID bufdb;
- OID bufrel;
- Relation reln;
- int status;
-
-
- if (BAD_BUFFER_ID(buffer)) {
- return(STATUS_ERROR);
- }
-
- bufHdr = BufferGetBufferDescriptor(buffer);
-
- /*
- * If the relation is not in our private cache, we don't bother trying
- * to instantiate it. Instead, we call the storage manager routine that
- * does a blind write. If we can get the reldesc, then we use the standard
- * write routine interface.
- */
-
- bufdb = bufHdr->tag.relId.dbId;
- bufrel = bufHdr->tag.relId.relId;
-
- if (bufdb == MyDatabaseId || bufdb == (OID) NULL)
- reln = RelationIdCacheGetRelation(bufrel);
- else
- reln = (Relation) NULL;
-
- if (reln != (Relation) NULL) {
- status = smgrflush(bufHdr->bufsmgr, reln, bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- } else {
-
- /* blind write always flushes */
- status = smgrblindwrt(bufHdr->bufsmgr, &bufHdr->sb_dbname,
- &bufHdr->sb_relname, bufdb, bufrel,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- }
-
- if (status == SM_FAIL) {
- elog(WARN, "FlushBuffer: cannot flush %d for %16s", bufHdr->tag.blockNum,
- reln->rd_rel->relname);
- /* NOTREACHED */
- return (STATUS_ERROR);
- }
-
- SpinAcquire(BufMgrLock);
- bufHdr->flags &= ~BM_DIRTY;
- UnpinBuffer(bufHdr);
- SpinRelease(BufMgrLock);
-
- return(STATUS_OK);
- }
-
- /*
- * WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer
- * when the operation is complete.
- *
- * We know that the buffer is for a relation in our private cache,
- * because this routine is called only to write out buffers that
- * were changed by the executing backend.
- */
-
- WriteNoReleaseBuffer(buffer)
- Buffer buffer;
- {
- BufferDesc *bufHdr;
- Relation reln;
-
- if (! LateWrite) {
- return(FlushBuffer(buffer));
- } else {
-
- if (BAD_BUFFER_ID(buffer)){
- return(STATUS_ERROR);
- }
- bufHdr = BufferGetBufferDescriptor(buffer);
-
- SpinAcquire(BufMgrLock);
- bufHdr->flags |= BM_DIRTY;
- SpinRelease(BufMgrLock);
- }
- return(STATUS_OK);
- }
-
-
- #undef ReleaseAndReadBuffer
- /*
- * ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer()
- * so that only one semop needs to be called.
- *
- */
- Buffer
- ReleaseAndReadBuffer(buffer, relation, blockNum)
- Buffer buffer;
- Relation relation;
- BlockNumber blockNum;
- {
- BufferDesc *bufHdr;
- Buffer retbuf;
- if (BufferIsValid(buffer)) {
- bufHdr = BufferGetBufferDescriptor(buffer);
- PrivateRefCount[buffer - 1]--;
- if (PrivateRefCount[buffer - 1] == 0 && LastRefCount[buffer - 1] == 0) {
- /* only release buffer if it is not pinned in previous ExecMain level */
- SpinAcquire(BufMgrLock);
- bufHdr->refcount--;
- if (bufHdr->refcount == 0) {
- AddBufferToFreelist(bufHdr);
- bufHdr->flags |= BM_FREE;
- }
- retbuf = ReadBufferWithBufferLock(relation, blockNum, true);
- return retbuf;
- }
- }
- return(ReadBuffer(relation, blockNum));
- }
-
- /*
- * AcquireBuffer -- Pin a buffer that we know is valid.
- *
- * ---There is a race condition. This routine doesnt make
- * any sense. We never really know the buffer is valid.
- */
- BufferAcquire(bufHdr)
- BufferDesc *bufHdr;
- {
-
- SpinAcquire(BufMgrLock);
- PinBuffer(bufHdr);
- SpinRelease(BufMgrLock);
- return (TRUE);
- }
-
- /*
- * BufferRepin -- get a second pin on an already pinned buffer
- */
- BufferRepin(buffer)
- Buffer buffer;
- {
- BufferDesc *bufHdr;
-
- if (BAD_BUFFER_ID(buffer)) {
- return(FALSE);
- }
- bufHdr = BufferGetBufferDescriptor(buffer);
-
- /* like we said -- already pinned */
- Assert(bufHdr->refcount);
-
- SpinAcquire(BufMgrLock);
- PinBuffer(bufHdr);
- SpinRelease(BufMgrLock);
- return (TRUE);
- }
-
- /*
- * BufferSync -- Flush all dirty buffers in the pool.
- *
- * This is called at transaction commit time. It does the wrong thing,
- * right now. We should flush only our own changes to stable storage,
- * and we should obey the lock protocol on the buffer manager metadata
- * as we do it. Also, we need to be sure that no other transaction is
- * modifying the page as we flush it. This is only a problem for objects
- * that use a non-two-phase locking protocol, like btree indices. For
- * those objects, we would like to set a write lock for the duration of
- * our IO. Another possibility is to code updates to btree pages
- * carefully, so that writing them out out of order cannot cause
- * any unrecoverable errors.
- *
- * I don't want to think hard about this right now, so I will try
- * to come back to it later.
- */
- void
- BufferSync()
- {
- int i;
- OID bufdb;
- OID bufrel;
- Relation reln;
- BufferDesc *bufHdr;
- int status;
-
- for (i=0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) {
- if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) {
- bufdb = bufHdr->tag.relId.dbId;
- bufrel = bufHdr->tag.relId.relId;
- if (bufdb == MyDatabaseId || bufdb == (OID) 0) {
- reln = RelationIdCacheGetRelation(bufrel);
-
- /*
- * If we didn't have the reldesc in our local cache, flush this
- * page out using the 'blind write' storage manager routine. If
- * we did find it, use the standard interface.
- */
-
- if (reln == (Relation) NULL) {
- status = smgrblindwrt(bufHdr->bufsmgr, &bufHdr->sb_dbname,
- &bufHdr->sb_relname, bufdb, bufrel,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- } else {
- status = smgrwrite(bufHdr->bufsmgr, reln,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- }
-
- if (status == SM_FAIL) {
- elog(WARN, "cannot write %d for %16s",
- bufHdr->tag.blockNum, bufHdr->sb_relname);
- }
-
- bufHdr->flags &= ~BM_DIRTY;
- if (reln != (Relation)NULL)
- RelationDecrementReferenceCount(reln);
- }
- }
- }
- }
-
-
- /*
- * WaitIO -- Block until the IO_IN_PROGRESS flag on 'buf'
- * is cleared. Because IO_IN_PROGRESS conflicts are
- * expected to be rare, there is only one BufferIO
- * lock in the entire system. All processes block
- * on this semaphore when they try to use a buffer
- * that someone else is faulting in. Whenever a
- * process finishes an IO and someone is waiting for
- * the buffer, BufferIO is signaled (SignalIO). All
- * waiting processes then wake up and check to see
- * if their buffer is now ready. This implementation
- * is simple, but efficient enough if WaitIO is
- * rarely called by multiple processes simultaneously.
- *
- * ProcSleep atomically releases the spinlock and goes to
- * sleep.
- *
- * Note: there is an easy fix if the queue becomes long.
- * save the id of the buffer we are waiting for in
- * the queue structure. That way signal can figure
- * out which proc to wake up.
- */
- #ifdef HAS_TEST_AND_SET
- WaitIO(buf, spinlock)
- BufferDesc *buf;
- SPINLOCK spinlock;
- {
- SpinRelease(spinlock);
- S_LOCK(&(buf->io_in_progress_lock));
- S_UNLOCK(&(buf->io_in_progress_lock));
- SpinAcquire(spinlock);
- }
-
- #else /* HAS_TEST_AND_SET */
- static IpcSemaphoreId WaitIOSemId;
-
- WaitIO(buf,spinlock)
- BufferDesc *buf;
- SPINLOCK spinlock;
- {
- Boolean inProgress;
-
- for (;;) {
-
- /* wait until someone releases IO lock */
- (*NWaitIOBackendP)++;
- SpinRelease(spinlock);
- IpcSemaphoreLock(WaitIOSemId, 0, 1);
- SpinAcquire(spinlock);
- inProgress = (buf->flags & BM_IO_IN_PROGRESS);
- if (!inProgress) break;
- }
- }
-
- /*
- * SignalIO --
- */
- SignalIO(buf)
- BufferDesc *buf;
- {
- /* somebody better be waiting. */
- Assert( buf->refcount > 1);
- IpcSemaphoreUnlock(WaitIOSemId, 0, *NWaitIOBackendP);
- *NWaitIOBackendP = 0;
- }
- #endif /* HAS_TEST_AND_SET */
-
- /*
- * Initialize module:
- *
- * should calculate size of pool dynamically based on the
- * amount of available memory.
- */
- InitBufferPool(key)
- IPCKey key;
- {
- Boolean foundBufs,foundDescs,foundNWaitIO;
- int i;
- int status;
-
-
- Data_Descriptors = NBuffers;
- Free_List_Descriptor = Data_Descriptors;
- Lookup_List_Descriptor = Data_Descriptors + 1;
- Num_Descriptors = Data_Descriptors + 1;
-
- SpinAcquire(BufMgrLock);
-
- #ifdef BMTRACE
- CurTraceBuf = (int *) ShmemInitStruct("Buffer trace",
- (BMT_LIMIT * sizeof(bmtrace)) + sizeof(int),
- &foundDescs);
- if (!foundDescs)
- bzero(CurTraceBuf, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(int));
-
- TraceBuf = (bmtrace *) &(CurTraceBuf[1]);
- #endif
-
- BufferDescriptors = (BufferDesc *)
- ShmemInitStruct("Buffer Descriptors",
- Num_Descriptors*sizeof(BufferDesc),&foundDescs);
-
- BufferBlocks = (BufferBlock)
- ShmemInitStruct("Buffer Blocks",
- NBuffers*BLOCK_SIZE,&foundBufs);
-
- #ifndef HAS_TEST_AND_SET
- NWaitIOBackendP = (int*)ShmemInitStruct("#Backends Waiting IO",
- sizeof(int),
- &foundNWaitIO);
- if (!foundNWaitIO)
- *NWaitIOBackendP = 0;
- #endif
-
- if (foundDescs || foundBufs) {
-
- /* both should be present or neither */
- Assert(foundDescs && foundBufs);
-
- } else {
- BufferDesc *buf;
- unsigned int block;
-
- buf = BufferDescriptors;
- block = (unsigned int) BufferBlocks;
-
- /*
- * link the buffers into a circular, doubly-linked list to
- * initialize free list. Still don't know anything about
- * replacement strategy in this file.
- */
- for (i = 0; i < Data_Descriptors; block+=BLOCK_SIZE,buf++,i++) {
- Assert(ShmemIsValid((unsigned int)block));
-
- buf->freeNext = i+1;
- buf->freePrev = i-1;
-
- CLEAR_BUFFERTAG(&(buf->tag));
- buf->data = MAKE_OFFSET(block);
- buf->flags = (BM_DELETED | BM_FREE | BM_VALID);
- buf->refcount = 0;
- buf->id = i;
- #ifdef HAS_TEST_AND_SET
- S_INIT_LOCK(&(buf->io_in_progress_lock));
- #endif
- }
-
- /* close the circular queue */
- BufferDescriptors[0].freePrev = Data_Descriptors-1;
- BufferDescriptors[Data_Descriptors-1].freeNext = 0;
- }
-
- /* Init the rest of the module */
- InitBufTable();
- InitFreeList(!foundDescs);
-
- SpinRelease(BufMgrLock);
-
- #ifndef HAS_TEST_AND_SET
- WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key),
- 1, IPCProtection, 0, &status);
- #endif
- PrivateRefCount = (int*)malloc(NBuffers * sizeof(int));
- LastRefCount = (int*)malloc(NBuffers * sizeof(int));
- for (i = 0; i < NBuffers; i++) {
- PrivateRefCount[i] = 0;
- LastRefCount[i] = 0;
- }
- }
-
- int NDirectFileRead; /* some I/O's are direct file access. bypass bufmgr */
- int NDirectFileWrite; /* e.g., I/O in psort and hashjoin. */
-
- void
- PrintBufferUsage(statfp)
- FILE *statfp;
- {
- float hitrate;
-
- if (ReadBufferCount==0)
- hitrate = 0.0;
- else
- hitrate = (float)BufferHitCount * 100.0/ReadBufferCount;
-
- fprintf(statfp, "!\t%d blocks read, %d blocks written, buffer hit rate = %.2f%%\n",
- ReadBufferCount - BufferHitCount + NDirectFileRead,
- BufferFlushCount + NDirectFileWrite,
- hitrate);
- }
-
- void
- ResetBufferUsage()
- {
- BufferHitCount = 0;
- ReadBufferCount = 0;
- BufferFlushCount = 0;
- NDirectFileRead = 0;
- NDirectFileWrite = 0;
- }
-
- /* ----------------------------------------------
- * ResetBufferPool
- *
- * this routine is supposed to be called when a transaction aborts.
- * it will release all the buffer pins held by the transaciton.
- *
- * ----------------------------------------------
- */
- void
- ResetBufferPool()
- {
- register int i;
- for (i=1; i<=NBuffers; i++) {
- if (BufferIsValid(i)) {
- while(PrivateRefCount[i - 1] > 0) {
- ReleaseBuffer(i);
- }
- }
- LastRefCount[i - 1] = 0;
- }
- }
-
- /* -----------------------------------------------
- * BufferPoolCheckLeak
- *
- * check if there is buffer leak
- *
- * -----------------------------------------------
- */
- int
- BufferPoolCheckLeak()
- {
- register int i;
- for (i=1; i<=NBuffers; i++) {
- if (BufferIsValid(i)) {
- elog(DEBUG, "BUFFER LEAK!!! send mail to wei.");
- return(1);
- }
- }
- return(0);
- }
-
- /* ------------------------------------------------
- * FlushBufferPool
- *
- * flush all dirty blocks in buffer pool to disk
- *
- * ------------------------------------------------
- */
- void
- FlushBufferPool(StableMainMemoryFlag)
- int StableMainMemoryFlag;
- {
- if (!StableMainMemoryFlag) {
- BufferSync();
- smgrcommit();
- }
- }
-
- /**************************************************
- BufferDescriptorIsValid
-
- **************************************************/
-
- bool
- BufferDescriptorIsValid(bufdesc)
- BufferDesc *bufdesc;
- {
- int temp;
-
- Assert(PointerIsValid(bufdesc));
-
- temp = (bufdesc-BufferDescriptors)/sizeof(BufferDesc);
- if (temp >= 0 && temp<NBuffers)
- return(true);
- else
- return(false);
-
- } /*BufferDescriptorIsValid*/
-
- /**************************************************
- BufferIsValid
- returns true iff the refcnt of the local
- buffer is > 0
- **************************************************/
- bool
- BufferIsValid(bufnum)
- Buffer bufnum;
- {
- if (BAD_BUFFER_ID(bufnum)) {
- return(false);
- }
- return((bool)(PrivateRefCount[bufnum - 1] > 0));
- } /* BufferIsValid */
-
- BlockSize
- BufferGetBlockSize(buffer)
- Buffer buffer;
- {
- Assert(BufferIsValid(buffer));
- /* Apparently, POSTGRES was supposed to have variable
- * sized buffer blocks. Current buffer manager will need
- * extensive redesign if that is ever to come to pass, so
- * for now hardwire it to BLCKSZ
- */
- return (BLCKSZ);
- }
-
- BlockNumber
- BufferGetBlockNumber(buffer)
- Buffer buffer;
- {
- Assert(BufferIsValid(buffer));
- return (BufferGetBufferDescriptor(buffer)->tag.blockNum);
- }
-
- Relation
- BufferGetRelation(buffer)
- Buffer buffer;
- {
- Relation relation;
-
- Assert(BufferIsValid(buffer));
-
- relation = RelationIdGetRelation(LRelIdGetRelationId
- (BufferGetBufferDescriptor(buffer)->tag.relId));
-
- RelationDecrementReferenceCount(relation);
-
- if (RelationHasReferenceCountZero(relation)) {
- /*
- elog(NOTICE, "BufferGetRelation: 0->1");
- */
-
- RelationIncrementReferenceCount(relation);
- }
-
- return (relation);
- }
-
- /**************************************************
- BufferDescriptorGetBuffer
-
- **************************************************/
-
-
- Buffer
- BufferDescriptorGetBuffer(descriptor)
- BufferDesc *descriptor;
- {
- Assert(BufferDescriptorIsValid(descriptor));
-
- return(1+descriptor - BufferDescriptors);
- }
-
- BufferReplace(bufHdr)
- BufferDesc *bufHdr;
- {
- int blockSize;
- int blockNum;
- LRelId *relIdPtr;
- Relation reln;
- ObjectId bufdb, bufrel;
- int status;
-
- blockSize = BLOCKSZ(bufHdr);
- blockNum = bufHdr->tag.blockNum;
-
- /*
- * first try to find the reldesc in the cache, if no luck,
- * don't bother to build the reldesc from scratch, just do
- * a blind write.
- */
-
- bufdb = bufHdr->tag.relId.dbId;
- bufrel = bufHdr->tag.relId.relId;
-
- if (bufdb == MyDatabaseId || bufdb == (OID) NULL)
- reln = RelationIdCacheGetRelation(bufrel);
- else
- reln = (Relation) NULL;
-
- if (reln != (Relation) NULL) {
- status = smgrflush(bufHdr->bufsmgr, reln, bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- } else {
-
- /* blind write always flushes */
- status = smgrblindwrt(bufHdr->bufsmgr, &bufHdr->sb_dbname,
- &bufHdr->sb_relname, bufdb, bufrel,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
- }
-
- if (status == SM_FAIL)
- return (FALSE);
-
- return (TRUE);
- }
-
- /**************************************************
- BufferIsDirty
-
- **************************************************/
-
- bool
- BufferIsDirty(buffer)
- Buffer buffer;
- {
- return (bool)
- (BufferGetBufferDescriptor(buffer)->flags & BM_DIRTY);
- }
-
-
- /**************************************************
- BufferIsInvalid
-
- **************************************************/
- bool
- BufferIsInvalid(buffer)
- Buffer buffer;
- {
- return (bool)
- (buffer == InvalidBuffer);
- }
-
-
- /**************************************************
- BufferIsUnknown
-
- **************************************************/
- bool
- BufferIsUnknown(buffer)
- Buffer buffer;
- {
- return (bool)
- (buffer == UnknownBuffer);
- }
-
- /***************************************************
- * RelationGetNumberOfPages --
- * Returns number of pages in an open relation.
- *
- * Note:
- * XXX may fail for huge relations.
- * XXX should be elsewhere.
- * XXX maybe should be hidden
- ***************************************************
- */
-
- BlockNumber
- RelationGetNumberOfBlocks(relation)
- Relation relation;
- {
- return (smgrnblocks(relation->rd_rel->relsmgr, relation));
- }
-
- /**************************************************
- BufferGetBlock
-
- **************************************************/
-
- Block
- BufferGetBlock(buffer)
- Buffer buffer;
- {
- Assert(BufferIsValid(buffer));
-
- return((Block)MAKE_PTR(BufferDescriptors[buffer-1].data));
- }
-
- /* ---------------------------------------------------------------------
- * ReleaseTmpRelBuffers
- *
- * this function unmarks all the dirty pages of a temporary
- * relation in the buffer pool so that at the end of transaction
- * these pages will not be flushed.
- * XXX currently it sequentially searches the buffer pool, should be
- * changed to more clever ways of searching.
- * --------------------------------------------------------------------
- */
- void
- ReleaseTmpRelBuffers(tempreldesc)
- Relation tempreldesc;
- {
- register int i;
- BufferDesc *buf;
-
- for (i=1; i<=NBuffers; i++) {
- buf = BufferGetBufferDescriptor(i);
- if (BufferIsDirty(i) &&
- (buf->tag.relId.dbId == MyDatabaseId) &&
- (buf->tag.relId.relId == tempreldesc->rd_id)) {
- buf->flags &= ~BM_DIRTY;
- if (!(buf->flags & BM_FREE))
- ReleaseBuffer(i);
- }
- }
- }
-
- /* ---------------------------------------------------------------------
- * DropBuffers
- *
- * This function marks all the buffers in the buffer cache for a
- * particular database as clean. This is used when we destroy a
- * database, to avoid trying to flush data to disk when the directory
- * tree no longer exists.
- *
- * This is an exceedingly non-public interface.
- * --------------------------------------------------------------------
- */
-
- void
- DropBuffers(dbid)
- ObjectId dbid;
- {
- register int i;
- BufferDesc *buf;
-
- for (i=1; i<=NBuffers; i++) {
- buf = BufferGetBufferDescriptor(i);
- if ((buf->tag.relId.dbId == dbid) && (buf->flags & BM_DIRTY)) {
- buf->flags &= ~BM_DIRTY;
- }
- }
- }
-
- /* -----------------------------------------------------------------
- * PrintBufferDescs
- *
- * this function prints all the buffer descriptors, for debugging
- * use only.
- * -----------------------------------------------------------------
- */
-
- void
- PrintBufferDescs()
- {
- register int i;
- BufferDesc *buf;
-
- for (i=0; i<NBuffers; i++) {
- buf = &(BufferDescriptors[i]);
- printf("(freeNext=%d, freePrev=%d, relname=%s, blockNum=%d, flags=0x%x, refcount=%d %d)\n", buf->freeNext, buf->freePrev, &(buf->sb_relname), buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]);
- }
- }
-
- void
- PrintPinnedBufs()
- {
- register int i;
- BufferDesc *buf;
-
- for (i=0; i<NBuffers; i++) {
- buf = &(BufferDescriptors[i]);
- if (PrivateRefCount[i] > 0)
- printf("(freeNext=%d, freePrev=%d, relname=%s, blockNum=%d, flags=0x%x, refcount=%d %d)\n", buf->freeNext, buf->freePrev, &(buf->sb_relname), buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]);
- }
- }
-
- /* -----------------------------------------------------
- * BufferShmemSize
- *
- * compute the size of shared memory for the buffer pool including
- * data pages, buffer descriptors, hash tables, etc.
- * ----------------------------------------------------
- */
-
- int
- BufferShmemSize()
- {
- int size;
- int nbuckets;
- int nsegs;
- int tmp;
-
- nbuckets = 1 << (int)my_log2((NBuffers - 1) / DEF_FFACTOR + 1);
- nsegs = 1 << (int)my_log2((nbuckets - 1) / DEF_SEGSIZE + 1);
- size = /* size of shmem binding table */
- my_log2(BTABLE_SIZE) + sizeof(HHDR);
- size += DEF_SEGSIZE * sizeof(SEGMENT) + BUCKET_ALLOC_INCR *
- (sizeof(BUCKET_INDEX) + BTABLE_KEYSIZE + BTABLE_DATASIZE);
- /* size of buffer descriptors */
- size += (NBuffers + 1) * sizeof(BufferDesc);
- /* size of data pages */
- size += NBuffers * BLOCK_SIZE;
- /* size of buffer hash table */
- size += my_log2(NBuffers) + sizeof(HHDR);
- size += nsegs * DEF_SEGSIZE * sizeof(SEGMENT);
- tmp = (int)ceil((double)NBuffers/BUCKET_ALLOC_INCR);
- size += tmp * BUCKET_ALLOC_INCR *
- (sizeof(BUCKET_INDEX) + sizeof(BufferTag) + sizeof(Buffer));
- /* extra space, just to make sure there is enough */
- size += NBuffers * 4 + 4096;
-
- #ifdef BMTRACE
- size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(int);
- #endif
- return size;
- }
-
- /*
- * BufferPoolBlowaway
- *
- * this routine is solely for the purpose of experiments -- sometimes
- * you may want to blowaway whatever is left from the past in buffer
- * pool and start measuring some performance with a clean empty buffer
- * pool.
- */
- void
- BufferPoolBlowaway()
- {
- register int i;
-
- BufferSync();
- for (i=1; i<=NBuffers; i++) {
- if (BufferIsValid(i)) {
- while(BufferIsValid(i))
- ReleaseBuffer(i);
- }
- BufTableDelete(BufferGetBufferDescriptor(i));
- }
- }
-
- #undef IncrBufferRefCount
- #undef ReleaseBuffer
-
- IncrBufferRefCount(buffer)
- Buffer buffer;
- {
- PrivateRefCount[buffer - 1]++;
- }
-
- /*
- * ReleaseBuffer -- remove the pin on a buffer without
- * marking it dirty.
- *
- */
-
- ReleaseBuffer(buffer)
- Buffer buffer;
- {
- BufferDesc *bufHdr;
-
- if (BAD_BUFFER_ID(buffer)) {
- return(STATUS_ERROR);
- }
- bufHdr = BufferGetBufferDescriptor(buffer);
-
- Assert(PrivateRefCount[buffer - 1] > 0);
- PrivateRefCount[buffer - 1]--;
- if (PrivateRefCount[buffer - 1] == 0 && LastRefCount[buffer - 1] == 0) {
- /* only release buffer if it is not pinned in previous ExecMain levels */
- SpinAcquire(BufMgrLock);
- bufHdr->refcount--;
- if (bufHdr->refcount == 0) {
- AddBufferToFreelist(bufHdr);
- bufHdr->flags |= BM_FREE;
- }
- SpinRelease(BufMgrLock);
- }
-
- return(STATUS_OK);
- }
-
- int ShowPinTrace = 0;
-
- IncrBufferRefCount_Debug(file, line, buffer)
- String file;
- int line;
- Buffer buffer;
- {
- IncrBufferRefCount(buffer);
- if (ShowPinTrace && is_userbuffer(buffer)) {
- BufferDesc *buf;
- buf = BufferGetBufferDescriptor(buffer);
- fprintf(stderr, "PIN(Incr) %d relname = %s, blockNum = %d, refcount = %d, file: %s, line: %d\n", buffer, &(buf->sb_relname), buf->tag.blockNum, PrivateRefCount[buffer - 1], file, line);
- }
- }
-
- ReleaseBuffer_Debug(file, line, buffer)
- String file;
- int line;
- Buffer buffer;
- {
- ReleaseBuffer(buffer);
- if (ShowPinTrace && is_userbuffer(buffer)) {
- BufferDesc *buf;
- buf = BufferGetBufferDescriptor(buffer);
- fprintf(stderr, "UNPIN(Rel) %d relname = %s, blockNum = %d, refcount = %d, file: %s, line: %d\n", buffer, &(buf->sb_relname), buf->tag.blockNum, PrivateRefCount[buffer - 1], file, line);
- }
- }
-
- ReleaseAndReadBuffer_Debug(file, line, buffer, relation, blockNum)
- String file;
- int line;
- Buffer buffer;
- Relation relation;
- BlockNumber blockNum;
- {
- bool bufferValid;
- Buffer b;
-
- bufferValid = BufferIsValid(buffer);
- b = ReleaseAndReadBuffer(buffer, relation, blockNum);
- if (ShowPinTrace && bufferValid && is_userbuffer(buffer)) {
- BufferDesc *buf;
- buf = BufferGetBufferDescriptor(buffer);
- fprintf(stderr, "UNPIN(Rel&Rd) %d relname = %s, blockNum = %d, refcount = %d, file: %s, line: %d\n", buffer, &(buf->sb_relname), buf->tag.blockNum, PrivateRefCount[buffer - 1], file, line);
- }
- if (ShowPinTrace && is_userbuffer(buffer)) {
- BufferDesc *buf;
- buf = BufferGetBufferDescriptor(b);
- fprintf(stderr, "PIN(Rel&Rd) %d relname = %s, blockNum = %d, refcount = %d, file: %s, line: %d\n", b, &(buf->sb_relname), buf->tag.blockNum, PrivateRefCount[b - 1], file, line);
- }
- return b;
- }
-
- #ifdef BMTRACE
-
- /*
- * trace allocations and deallocations in a circular buffer in
- * shared memory. check the buffer before doing the allocation,
- * and die if there's anything fishy.
- */
-
- _bm_trace(dbId, relId, blkNo, bufNo, allocType)
- long dbId;
- long relId;
- int blkNo;
- int bufNo;
- int allocType;
- {
- static int mypid = 0;
- int start, cur;
- bmtrace *tb;
-
- if (mypid == 0)
- mypid = getpid();
-
- start = *CurTraceBuf;
-
- if (start > 0)
- cur = start - 1;
- else
- cur = BMT_LIMIT - 1;
-
- for (;;) {
- tb = &TraceBuf[cur];
- if (tb->bmt_op != BMT_NOTUSED) {
- if (tb->bmt_buf == bufNo) {
- if ((tb->bmt_op == BMT_DEALLOC)
- || (tb->bmt_dbid == dbId && tb->bmt_relid == relId
- && tb->bmt_blkno == blkNo))
- goto okay;
-
- /* die holding the buffer lock */
- _bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur);
- }
- }
-
- if (cur == start)
- goto okay;
-
- if (cur == 0)
- cur = BMT_LIMIT - 1;
- else
- cur--;
- }
-
- okay:
- tb = &TraceBuf[start];
- tb->bmt_pid = mypid;
- tb->bmt_buf = bufNo;
- tb->bmt_dbid = dbId;
- tb->bmt_relid = relId;
- tb->bmt_blkno = blkNo;
- tb->bmt_op = allocType;
-
- *CurTraceBuf = (start + 1) % BMT_LIMIT;
- }
-
- _bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur)
- long dbId;
- long relId;
- int blkNo;
- int bufNo;
- int allocType;
- int start;
- int cur;
- {
- FILE *fp;
- bmtrace *tb;
- int i;
-
- tb = &TraceBuf[cur];
-
- if ((fp = fopen("/tmp/death_notice", "w")) == (FILE *) NULL)
- elog(FATAL, "buffer alloc trace error and can't open log file");
-
- fprintf(fp, "buffer alloc trace detected the following error:\n\n");
- fprintf(fp, " buffer %d being %s inconsistently with a previous %s\n\n",
- bufNo, (allocType == BMT_DEALLOC ? "deallocated" : "allocated"),
- (tb->bmt_op == BMT_DEALLOC ? "deallocation" : "allocation"));
-
- fprintf(fp, "the trace buffer contains:\n");
-
- i = start;
- for (;;) {
- tb = &TraceBuf[i];
- if (tb->bmt_op != BMT_NOTUSED) {
- fprintf(fp, " [%3d]%spid %d buf %2d for <%d,%d,%d> ",
- i, (i == cur ? " ---> " : "\t"),
- tb->bmt_pid, tb->bmt_buf,
- tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno);
-
- switch (tb->bmt_op) {
- case BMT_ALLOCFND:
- fprintf(fp, "allocate (found)\n");
- break;
-
- case BMT_ALLOCNOTFND:
- fprintf(fp, "allocate (not found)\n");
- break;
-
- case BMT_DEALLOC:
- fprintf(fp, "deallocate\n");
- break;
-
- default:
- fprintf(fp, "unknown op type %d\n", tb->bmt_op);
- break;
- }
- }
-
- i = (i + 1) % BMT_LIMIT;
- if (i == start)
- break;
- }
-
- fprintf(fp, "\noperation causing error:\n");
- fprintf(fp, "\tpid %d buf %d for <%d,%d,%d> ",
- getpid(), bufNo, dbId, relId, blkNo);
-
- switch (allocType) {
- case BMT_ALLOCFND:
- fprintf(fp, "allocate (found)\n");
- break;
-
- case BMT_ALLOCNOTFND:
- fprintf(fp, "allocate (not found)\n");
- break;
-
- case BMT_DEALLOC:
- fprintf(fp, "deallocate\n");
- break;
-
- default:
- fprintf(fp, "unknown op type %d\n", allocType);
- break;
- }
-
- (void) fclose(fp);
-
- kill(getpid(), SIGILL);
- }
-
- #endif /* BMTRACE */
-
- void
- BufferRefCountReset(refcountsave)
- int *refcountsave;
- {
- int i;
- for (i=0; i<NBuffers; i++) {
- refcountsave[i] = PrivateRefCount[i];
- LastRefCount[i] += PrivateRefCount[i];
- PrivateRefCount[i] = 0;
- }
- }
-
- void
- BufferRefCountRestore(refcountsave)
- int *refcountsave;
- {
- int i;
- for (i=0; i<NBuffers; i++) {
- PrivateRefCount[i] = refcountsave[i];
- LastRefCount[i] -= PrivateRefCount[i];
- refcountsave[i] = 0;
- }
- }
-