home *** CD-ROM | disk | FTP | other *** search
- /*
- * hc_slock_asm.h
- *
- * Queue-based spinlock specialized for situations where there is high
- * contention for the lock.
- *
- * In a regular spinlock, all threads requesting the lock spin, performing
- * test-and-set operations on a single shared memory location. The
- * resulting cache invalidation traffic quickly saturates the bus with
- * only a small number of spinning processors, impacting the performance
- * of the thread holding the lock (by interfering with it's ability to
- * access memory) and causing a rapid falloff in performance.
- *
- * The queue-based spinlock is designed to eliminate this problem. Threads
- * spin on a private location instead of a shared location, eliminating the
- * cache invalidation traffic. To release a lock, the thread holding the
- * lock simply sets the private location that the next waiter is spinning on
- * (in this case no atomic instruction is needed).
- *
- * Since C++ doesn't know about asm functions yet, this file gets pulled into
- * C++'s straight C output via the +hasmdefs.h C++ flag.
- *
- * Depends on:
- *
- * declaration of hc_slock_t in parallel.h
- * declaration of HC_Spinlock class in spinlock.h
- *
- * Modification History:
- *
- * 28-Dec-1989 JEF
- * Integrated HC_Spinlock asm functions (after original version by raj).
- *
- */
-
- /*
- * XXX Warning:
- * this will have to be changed if the structure of "thisproc"
- * changes. for now, we "know" that the "p_pid" field is at
- * offset 24.
- */
- #ifndef THISPROC_ID_OFFSET
- #define THISPROC_ID_OFFSET 24
- #endif THISPROC_ID_OFFSET
-
- /*
- * HC_S_LOCK - acquire high contention spinlock.
- */
- asm void HC_S_LOCK(laddr)
- {
- %reg laddr; lab spin, done;
- /PEEPOFF
- /*
- * Get process id for processor on which this thread is
- * running. Convert from 0 to NUMPROCS-1 form to 1 to NUMPROCS,
- * since the lock data structure uses row 0 for something special.
- */
- movl _thisproc, %edx
- movl THISPROC_ID_OFFSET(%edx), %edx
- incl %edx
-
- /*
- * We use the calulated process id (in range 1 to NUMPROCS) to index
- * into the lock data structure to find our private data location (the
- * location on which a thread that requests a lock after us will
- * spin).
- *
- * The complex addressing mode of the 'movb' below only accepts
- * a byte offset of 8 (max) -- since our private location is at
- * an offset of 16, double our process id.
- */
- addl %edx, %edx
- movl %edx, %eax
-
- /*
- * Load contents of this processor's private data location into
- * high bits of %eax.
- *
- * %ah = 16 (laddr + (2*p_pid)*8)
- * = 16 (laddr + 16*p_pid)
- * = laddr [p_pid] [16]
- */
- movb 16(laddr,%edx,8),%ah
-
- /*
- * Atomically exchange entire contents of %eax with contents of
- * first longword in row 0 of laddr. Row 0 always contains info
- * identifying last process to request lock.
- *
- * Before:
- * %ah = laddr [p_pid] [16] (orig contents of my private data area)
- * %al = p_pid*2 (next lock requestor uses this to
- * find CURRENT contents of my data area,
- * spins until they are different)
- * After:
- * %ah = laddr [last] [16] (orig contents of private data area of
- * last process to request lock)
- * %al = last_p_pid * 2 (used to find CURRENT contents of last
- * lock requestor's private data)
- */
- xchgl %eax,0(laddr)
-
- /*
- * Move last lock requestor's process_id*2 into low bits of %edx.
- * The 'cmpb' below will use this value in a complex addressing mode
- * as was done above.
- */
- movb %al,%dl
-
- /*
- * Now spin until last lock requestor is done with the lock and
- * updates his private data location. I.e., spin until original
- * contents of his location (%ah) and current contents
- * (laddr [last_pid] [16]) become different. Since we are the only
- * one spinning on this location, the location is in it's 'own'
- * cache line, and there will be exactly one write to the location,
- * cache coherency traffic is minimized.
- */
- spin: cmpb %ah,16(laddr,%edx,8)
- je spin /* values still equal - spin */
-
- /*
- * Values are no longer equal - so previous lock requestor must be
- * done with the lock.
- *
- * We now own it - proceed!
- */
- done:
- /PEEPON
- %mem laddr; lab spin, done;
- /PEEPOFF
- /* SEE COMMENTS ABOVE */
- movl laddr,%ecx
- movl _thisproc, %edx
- movl THISPROC_ID_OFFSET(%edx), %edx
- incl %edx
- addl %edx,%edx
- movl %edx,%eax
- movb 16(%ecx,%edx,8),%ah
- xchgl %eax,0(%ecx)
- movb %al,%dl
- spin: cmpb %ah,16(%ecx,%edx,8)
- je spin
- done:
- /PEEPON
- }
-
- /*
- * HC_S_UNLOCK - release high contention spinlock.
- */
- asm void HC_S_UNLOCK(laddr)
- {
- %reg laddr;
- /*
- * Get processor id and convert into 1 to NUMPROCS format
- * (from 0 to NUMPROCS-1).
- */
- movl _thisproc, %edx
- movl THISPROC_ID_OFFSET(%edx), %edx
- incl %edx
-
- /*
- * Find this processor's row in the lock data array (%edx = 16*p_pid).
- */
- shll $4,%edx
-
- /*
- * Increment this processor's private data location. This allows
- * anyone spinning on that location to proceed. If no one was
- * spinning (i.e., this processor was the last processor to request
- * the lock), then the private data location will contain a different
- * value then the original value stored in laddr [0][0], so later
- * when someone does request the lock, they'll get it immediately.
- */
- incb 16(laddr,%edx)
-
- %mem laddr;
- /* SEE COMMENTS ABOVE */
- movl laddr, %ecx
- movl _thisproc, %edx
- movl THISPROC_ID_OFFSET(%edx), %edx
- incl %edx
- shll $4,%edx
- incb 16(%ecx,%edx)
- }
-
- asm HC_S_INIT_LOCK(laddr)
- {
- %reg laddr;
- /*
- * The first row of the lock data array is used to store the id
- * of the last processor to request the lock, as well as the contents
- * of the requesting processor's private data area.
- *
- * Start out with 'dummy' processor 0 and a dummy data value of 0.
- */
- movl $0,(laddr)
-
- /*
- * Now set laddr [0][16] to 1. This is 'dummy' processor 0's
- * private data location. Since this value is different than the
- * original value of 0 loaded above, the first processor to request
- * the lock in HC_S_LOCK will get it immediately, without spinning.
- */
- movb $1,16(laddr)
- %mem laddr;
- /* SEE COMMENTS ABOVE */
- movl laddr, %ecx
- movl $0,(%ecx)
- movb $1,16(%ecx)
- }
-
- asm int HC_S_IS_LOCKED(laddr)
- {
- %reg laddr;
- /PEEPOFF
- /*
- * Get proc_id*2 of last thread to request the lock (%al), and
- * value of that processor's private data location when lock
- * was requested (%ah).
- */
- movl 0(laddr),%eax
-
- /*
- * Load last_pid*2 into low bits of %edx.
- */
- subl %edx,%edx /* %edx = 0 */
- movb %al,%dl
-
- /*
- * Use complex addressing mode (described in HC_S_LOCK) to see if
- * last lock requestor's private data has changed since he requested
- * the lock. If not, then either he still has the lock, or he is
- * still in the queue waiting to get the lock, and the lock is HELD.
- * Otherwise, the two values are different, and the lock must be FREE.
- *
- * %ah - original value of last lock requestor's private data
- * 16(laddr,%edx,8) - as described in HC_S_LOCK, this is
- * laddr [last_pid] [16], which stores the
- * CURRENT value of last lock requestor's
- * private data area.
- */
- cmpb %ah,16(laddr,%edx,8)
-
- /*
- * Return indication of whether lock is held or free.
- * If equal, set %al and mask the rest of %eax to 0.
- */
- sete %al
- andl $0xff,%eax
- /PEEPON
- %mem laddr;
- /PEEPOFF
- /* SEE COMMENTS ABOVE */
- movl laddr,%ecx
- movl 0(%ecx),%eax
- subl %edx,%edx
- movb %al,%dl
- cmpb %ah,16(%ecx,%edx,8)
- sete %al
- andl $0xff,%eax
- /PEEPON
- }
-
-
- /* XXX UNIMPLEMENTED -- should never be called */
- /* always return FALSE */
- asm int HC_S_CLOCK(laddr)
- {
- %reg laddr;
- movl $L_FAILED, %eax
- %mem laddr; lab spin, failed, done;
- movl $L_FAILED, %eax
- done:
- /PEEPON
- }
-