==== //depot/projects/kmacy_sun4v/src/sys/dev/md/md.c#3 - /shared/p4/sun4v/work_ifc/src/sys/dev/md/md.c ==== @@ -647,9 +647,10 @@ int error; sc = arg; - mtx_lock_spin(&sched_lock); + TD_SLOCK(curthread); sched_prio(curthread, PRIBIO); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(curthread); + for (;;) { mtx_lock(&sc->queue_mtx); ==== //depot/projects/kmacy_sun4v/src/sys/fs/procfs/procfs_ctl.c#2 - /shared/p4/sun4v/work_ifc/src/sys/fs/procfs/procfs_ctl.c ==== @@ -286,9 +286,9 @@ panic("procfs_control"); } - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); thread_unsuspend(p); /* If it can run, let it do so. */ - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); return (0); } @@ -344,9 +344,10 @@ #endif /* XXXKSE: */ p->p_flag &= ~P_STOPPED_SIG; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); thread_unsuspend(p); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); + } else psignal(p, nm->nm_val); PROC_UNLOCK(p); ==== //depot/projects/kmacy_sun4v/src/sys/fs/procfs/procfs_status.c#2 - /shared/p4/sun4v/work_ifc/src/sys/fs/procfs/procfs_status.c ==== @@ -112,19 +112,17 @@ sbuf_printf(sb, "noflags"); } - mtx_lock_spin(&sched_lock); - if (p->p_flag & P_SA) - wmesg = "-kse- "; - else { + PROC_SLOCK(p); tdfirst = FIRST_THREAD_IN_PROC(p); + TD_SLOCK(tdfirst); if (tdfirst->td_wchan != NULL) { KASSERT(tdfirst->td_wmesg != NULL, ("wchan %p has no wmesg", tdfirst->td_wchan)); wmesg = tdfirst->td_wmesg; } else wmesg = "nochan"; - } - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(tdfirst); + PROC_SUNLOCK(p); if (p->p_sflag & PS_INMEM) { struct timeval start, ut, st; ==== //depot/projects/kmacy_sun4v/src/sys/geom/eli/g_eli.c#4 - /shared/p4/sun4v/work_ifc/src/sys/geom/eli/g_eli.c ==== @@ -399,11 +399,12 @@ wr = arg; sc = wr->w_softc; - mtx_lock_spin(&sched_lock); + TD_SLOCK(curthread); sched_prio(curthread, PRIBIO); + TD_SUNLOCK(curthread); if (sc->sc_crypto == G_ELI_CRYPTO_SW && g_eli_threads == 0) sched_bind(curthread, wr->w_number); - mtx_unlock_spin(&sched_lock); + G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm); ==== //depot/projects/kmacy_sun4v/src/sys/geom/geom_kern.c#2 - /shared/p4/sun4v/work_ifc/src/sys/geom/geom_kern.c ==== @@ -88,9 +88,9 @@ struct thread *tp = FIRST_THREAD_IN_PROC(p); mtx_assert(&Giant, MA_NOTOWNED); - mtx_lock_spin(&sched_lock); + TD_SLOCK(tp); sched_prio(tp, PRIBIO); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(tp); for(;;) { g_io_schedule_up(tp); } @@ -111,9 +111,9 @@ struct thread *tp = FIRST_THREAD_IN_PROC(p); mtx_assert(&Giant, MA_NOTOWNED); - mtx_lock_spin(&sched_lock); + TD_SLOCK(tp); sched_prio(tp, PRIBIO); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(tp); for(;;) { g_io_schedule_down(tp); } @@ -134,9 +134,9 @@ struct thread *tp = FIRST_THREAD_IN_PROC(p); mtx_assert(&Giant, MA_NOTOWNED); - mtx_lock_spin(&sched_lock); + TD_SLOCK(tp); sched_prio(tp, PRIBIO); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(tp); for(;;) { g_run_events(); tsleep(&g_wait_event, PRIBIO, "-", hz/10); ==== //depot/projects/kmacy_sun4v/src/sys/geom/mirror/g_mirror.c#4 - /shared/p4/sun4v/work_ifc/src/sys/geom/mirror/g_mirror.c ==== @@ -1719,9 +1719,9 @@ int timeout; sc = arg; - mtx_lock_spin(&sched_lock); + TD_SLOCK(curthread); sched_prio(curthread, PRIBIO); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(curthread); sx_xlock(&sc->sc_lock); for (;;) { ==== //depot/projects/kmacy_sun4v/src/sys/geom/raid3/g_raid3.c#4 - /shared/p4/sun4v/work_ifc/src/sys/geom/raid3/g_raid3.c ==== @@ -1936,9 +1936,10 @@ int timeout; sc = arg; - mtx_lock_spin(&sched_lock); + TD_SLOCK(curthread); sched_prio(curthread, PRIBIO); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(curthread); + sx_xlock(&sc->sc_lock); for (;;) { ==== //depot/projects/kmacy_sun4v/src/sys/kern/init_main.c#5 - /shared/p4/sun4v/work_ifc/src/sys/kern/init_main.c ==== @@ -708,6 +708,7 @@ if (error != ENOENT) printf("exec %.*s: error %d\n", (int)(next - path), path, error); + printf("init started\n"); } printf("init: not found in path %s\n", init_path); panic("no init"); @@ -760,9 +761,9 @@ struct thread *td; td = FIRST_THREAD_IN_PROC(initproc); - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); TD_SET_CAN_RUN(td); setrunqueue(td, SRQ_BORING); /* XXXKSE */ - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL) ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_acct.c#3 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_acct.c ==== @@ -506,9 +506,9 @@ /* This is a low-priority kernel thread. */ pri = PRI_MAX_KERN; - mtx_lock_spin(&sched_lock); + TD_SLOCK(curthread); sched_prio(curthread, pri); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(curthread); /* If another accounting kthread is already running, just die. */ sx_xlock(&acct_sx); ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_clock.c#5 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_clock.c ==== @@ -156,7 +156,7 @@ int profprocs; int ticks; int psratio; - +struct mtx prof_lock; /* * Initialize clock frequencies and start both clocks running. */ @@ -173,6 +173,8 @@ */ cpu_initclocks(); + mtx_init(&prof_lock, "profiling lock", NULL, MTX_DEF); + /* * Compute profhz/stathz, and fix profhz if needed. */ @@ -201,7 +203,7 @@ /* * Run current process's virtual and profile time, as needed. */ - mtx_lock_spin_flags(&timer_lock, MTX_QUIET); + PROC_SLOCK(p); pstats = p->p_stats; if (usermode && timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && @@ -210,7 +212,7 @@ if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) sflag = PS_PROFPEND; - mtx_unlock_spin_flags(&timer_lock, MTX_QUIET); + PROC_SUNLOCK(p); #ifdef HWPMC_HOOKS if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); @@ -351,11 +353,11 @@ if (p->p_flag & P_STOPPROF) return; if ((p->p_flag & P_PROFIL) == 0) { - mtx_lock_spin(&sched_lock); p->p_flag |= P_PROFIL; + mtx_lock_spin(&prof_lock); if (++profprocs == 1) cpu_startprofclock(); - mtx_unlock_spin(&sched_lock); + mtx_unlock_spin(&prof_lock); } } @@ -363,8 +365,7 @@ * Stop profiling on a process. */ void -stopprofclock(p) - register struct proc *p; +stopprofclock(struct proc *p) { PROC_LOCK_ASSERT(p, MA_OWNED); @@ -378,11 +379,11 @@ } if ((p->p_flag & P_PROFIL) == 0) return; - mtx_lock_spin(&sched_lock); p->p_flag &= ~P_PROFIL; + mtx_lock_spin(&prof_lock); if (--profprocs == 0) cpu_stopprofclock(); - mtx_unlock_spin(&sched_lock); + mtx_unlock_spin(&prof_lock); } } ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_cpu.c#3 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_cpu.c ==== @@ -223,11 +223,13 @@ struct pcpu *pc; int cpu_id, error, i; static int once; + struct proc *p; sc = device_get_softc(dev); error = 0; set = NULL; saved_freq = NULL; + p = curthread->td_proc; /* * Check that the TSC isn't being used as a timecounter. @@ -300,17 +302,13 @@ cpu_id = PCPU_GET(cpuid); pc = cpu_get_pcpu(set->dev); if (cpu_id != pc->pc_cpuid) { - mtx_lock_spin(&sched_lock); sched_bind(curthread, pc->pc_cpuid); - mtx_unlock_spin(&sched_lock); } CF_DEBUG("setting abs freq %d on %s (cpu %d)\n", set->freq, device_get_nameunit(set->dev), PCPU_GET(cpuid)); error = CPUFREQ_DRV_SET(set->dev, set); if (cpu_id != pc->pc_cpuid) { - mtx_lock_spin(&sched_lock); sched_unbind(curthread); - mtx_unlock_spin(&sched_lock); } if (error) { goto out; @@ -329,17 +327,13 @@ cpu_id = PCPU_GET(cpuid); pc = cpu_get_pcpu(set->dev); if (cpu_id != pc->pc_cpuid) { - mtx_lock_spin(&sched_lock); sched_bind(curthread, pc->pc_cpuid); - mtx_unlock_spin(&sched_lock); } CF_DEBUG("setting rel freq %d on %s (cpu %d)\n", set->freq, device_get_nameunit(set->dev), PCPU_GET(cpuid)); error = CPUFREQ_DRV_SET(set->dev, set); if (cpu_id != pc->pc_cpuid) { - mtx_lock_spin(&sched_lock); sched_unbind(curthread); - mtx_unlock_spin(&sched_lock); } if (error) { /* XXX Back out any successful setting? */ ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_exit.c#4 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_exit.c ==== @@ -522,8 +522,9 @@ * proc lock. */ wakeup(p->p_pptr); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); p->p_state = PRS_ZOMBIE; + PROC_SUNLOCK(p); PROC_UNLOCK(p->p_pptr); sched_exit(p->p_pptr, td); @@ -533,12 +534,12 @@ * late in the game. */ knlist_destroy(&p->p_klist); - /* * Make sure the scheduler takes this thread out of its tables etc. * This will also release this thread's reference to the ucred. * Other thread parts to release include pcb bits and such. */ + TD_SLOCK(curthread); thread_exit(); } @@ -728,8 +729,8 @@ * sched_lock once we will wait long enough for the * thread to exit in that case. */ - mtx_lock_spin(&sched_lock); - mtx_unlock_spin(&sched_lock); + PROC_SLOCK(p); + PROC_SUNLOCK(p); td->td_retval[0] = p->p_pid; if (status) @@ -826,12 +827,12 @@ sx_xunlock(&allproc_lock); return (0); } - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if ((p->p_flag & P_STOPPED_SIG) && (p->p_suspcount == p->p_numthreads) && (p->p_flag & P_WAITED) == 0 && (p->p_flag & P_TRACED || options & WUNTRACED)) { - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); p->p_flag |= P_WAITED; sx_xunlock(&proctree_lock); td->td_retval[0] = p->p_pid; @@ -845,7 +846,7 @@ return (0); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if (options & WCONTINUED && (p->p_flag & P_CONTINUED)) { sx_xunlock(&proctree_lock); td->td_retval[0] = p->p_pid; ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_fork.c#4 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_fork.c ==== @@ -499,15 +499,15 @@ p2->p_flag = 0; if (p1->p_flag & P_PROFIL) startprofclock(p2); - mtx_lock_spin(&sched_lock); + PROC_SLOCK_ORDERED(p1, p2); p2->p_sflag = PS_INMEM; /* * Allow the scheduler to adjust the priority of the child and * parent while we hold the sched_lock. */ sched_fork(td, td2); + PROC_SUNLOCK_ORDERED(p1, p2); - mtx_unlock_spin(&sched_lock); p2->p_ucred = crhold(td->td_ucred); td2->td_ucred = crhold(p2->p_ucred); /* XXXKSE */ #ifdef AUDIT @@ -692,7 +692,7 @@ * Set the child start time and mark the process as being complete. */ microuptime(&p2->p_stats->p_start); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p2); p2->p_state = PRS_NORMAL; /* @@ -701,9 +701,11 @@ */ if ((flags & RFSTOPPED) == 0) { TD_SET_CAN_RUN(td2); + TD_SLOCK(td2); setrunqueue(td2, SRQ_BORING); + TD_SUNLOCK(td2); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p2); /* * Now can be swapped. @@ -764,6 +766,7 @@ return (error); } +#include /* * Handle the return of a child process from fork1(). This function * is called from the MD fork_trampoline() entry point. @@ -786,8 +789,6 @@ td->td_oncpu = PCPU_GET(cpuid); KASSERT(p->p_state == PRS_NORMAL, ("executing process is still new")); - sched_lock.mtx_lock = (uintptr_t)td; - mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); CTR4(KTR_PROC, "fork_exit: new thread %p (kse %p, pid %d, %s)", td, td->td_sched, p->p_pid, p->p_comm); @@ -801,9 +802,45 @@ PCPU_SET(deadthread, NULL); thread_stash(td); } + td = curthread; - mtx_unlock_spin(&sched_lock); + + PCPU_GET(preempted_thread)->td_running = 0; + td->td_running = 1; + + td->td_spin_mtx.mtx_lock = (uintptr_t)td; + TD_SLOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED); + TD_SUNLOCK(td); + + + if (PCPU_GET(preempted_thread) != curthread) { + + if (curthread->td_critnest != 1) { + DELAY(1000000*curcpu); + printf("prev thread=%d curthread=%d\n", PCPU_GET(preempted_thread)->td_tid, + curthread->td_tid); + printf("prev thread pc=0x%lx\n", PCPU_GET(preempted_thread)->td_pcb->pcb_pc); + panic("fork_exit: critnest should be 1 is %d", curthread->td_critnest); + } + + PCPU_GET(preempted_thread)->td_spin_mtx.mtx_lock = (uintptr_t)td; + TD_SLOCK_ASSERT(PCPU_GET(preempted_thread), MA_OWNED | MA_NOTRECURSED); + TD_SUNLOCK(PCPU_GET(preempted_thread)); + } + if (curthread->td_critnest != 0) { + DELAY(1000000*curcpu); + printf("prev thread=%d curthread=%d\n", PCPU_GET(preempted_thread)->td_tid, + curthread->td_tid); + printf("prev thread pc=0x%lx\n", PCPU_GET(preempted_thread)->td_pcb->pcb_pc); + panic("fork_exit: critnest should be 0 is %d", curthread->td_critnest); + } + if (rdpr(pil) != 0) + panic("pil in fork_exit not 0 - %ld", rdpr(pil)); + if (curthread->td_md.md_spinlock_count != 0) + panic("fork_exit: spinlock_count not 0"); + if (curthread->td_critnest != 0) + panic("fork_exit: critnest should be 0 is %d", curthread->td_critnest); /* * cpu_set_fork_handler intercepts this function call to * have this call a non-return function to stay in kernel mode. ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_idle.c#5 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_idle.c ==== @@ -75,13 +75,13 @@ PROC_LOCK(p); p->p_flag |= P_NOLOAD; - mtx_lock_spin(&sched_lock); td = FIRST_THREAD_IN_PROC(p); + TD_SLOCK(td); TD_SET_CAN_RUN(td); atomic_set_int(&td->td_flags, TDF_IDLETD); sched_class(td, PRI_IDLE); sched_prio(td, PRI_MAX_IDLE); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); PROC_UNLOCK(p); #ifdef SMP } @@ -111,19 +111,12 @@ while (sched_runnable() == 0) cpu_idle(); - - #ifdef SMP atomic_clear_int(&idle_cpus_mask, mycpu); #endif - spinlock_enter(); /* avoid preemption after choosethread */ - if ((td = choosethread()) != curthread) { - mtx_lock_spin(&sched_lock); - spinlock_exit(); - sched_switch(curthread, td, SW_VOL); - mtx_unlock_spin(&sched_lock); - } else - spinlock_exit(); + TD_SLOCK(td); + mi_switch(SW_VOL, NULL); + TD_SUNLOCK(td); #ifdef SMP atomic_set_int(&idle_cpus_mask, mycpu); #endif ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_intr.c#5 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_intr.c ==== @@ -149,9 +149,11 @@ struct intr_event *ie; struct thread *td; u_char pri; + struct proc *p; ie = ithd->it_event; td = ithd->it_thread; + p = td->td_proc; /* Determine the overall priority of this event. */ if (TAILQ_EMPTY(&ie->ie_handlers)) @@ -162,9 +164,9 @@ /* Update name and priority. */ strlcpy(td->td_proc->p_comm, ie->ie_fullname, sizeof(td->td_proc->p_comm)); - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); sched_prio(td, pri); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } /* @@ -295,12 +297,12 @@ if (error) panic("kthread_create() failed with %d", error); td = FIRST_THREAD_IN_PROC(p); /* XXXKSE */ - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); td->td_pri_class = PRI_ITHD; TD_SET_IWAIT(td); - mtx_unlock_spin(&sched_lock); td->td_pflags |= TDP_ITHREAD; ithd->it_thread = td; + TD_SUNLOCK(td); CTR2(KTR_INTR, "%s: created %s", __func__, name); return (ithd); } @@ -309,16 +311,18 @@ ithread_destroy(struct intr_thread *ithread) { struct thread *td; + struct proc *p; CTR2(KTR_INTR, "%s: killing %s", __func__, ithread->it_event->ie_name); td = ithread->it_thread; - mtx_lock_spin(&sched_lock); + p = td->td_proc; + TD_SLOCK(td); ithread->it_flags |= IT_DEAD; if (TD_AWAITING_INTR(td)) { TD_CLR_IWAIT(td); setrunqueue(td, SRQ_INTR); } - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } int @@ -445,7 +449,8 @@ * so we have to remove the handler here rather than letting the * thread do it. */ - mtx_lock_spin(&sched_lock); + + TD_SLOCK(ie->ie_thread->it_thread); if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) { handler->ih_flags |= IH_DEAD; @@ -457,7 +462,7 @@ ie->ie_thread->it_need = 1; } else TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(ie->ie_thread->it_thread); while (handler->ih_flags & IH_DEAD) msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0); intr_event_update(ie); @@ -522,11 +527,11 @@ /* * Set it_need to tell the thread to keep running if it is already - * running. Then, grab sched_lock and see if we actually need to + * running. Then, grab process spin lock and see if we actually need to * put this thread on the runqueue. */ it->it_need = 1; - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); if (TD_AWAITING_INTR(td)) { CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, p->p_comm); @@ -536,7 +541,7 @@ CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", __func__, p->p_pid, p->p_comm, it->it_need, td->td_state); } - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); return (0); } @@ -707,7 +712,7 @@ struct proc *p; td = curthread; - p = td->td_proc; + p = curproc; ithd = (struct intr_thread *)arg; KASSERT(ithd->it_thread == td, ("%s: ithread and proc linkage out of sync", __func__)); @@ -752,13 +757,13 @@ * lock. This may take a while and it_need may get * set again, so we have to check it again. */ - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) { TD_SET_IWAIT(td); ie->ie_count = 0; mi_switch(SW_VOL, NULL); } - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } } ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_kthread.c#2 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_kthread.c ==== @@ -107,15 +107,14 @@ /* call the processes' main()... */ td = FIRST_THREAD_IN_PROC(p2); + TD_SLOCK(td); cpu_set_fork_handler(td, func, arg); TD_SET_CAN_RUN(td); - /* Delay putting it on the run queue until now. */ if (!(flags & RFSTOPPED)) { - mtx_lock_spin(&sched_lock); setrunqueue(td, SRQ_BORING); - mtx_unlock_spin(&sched_lock); } + TD_SUNLOCK(td); return 0; } ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_lockf.c#2 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_lockf.c ==== @@ -274,7 +274,7 @@ /* The block is waiting on something */ /* XXXKSE this is not complete under threads */ wproc = (struct proc *)block->lf_id; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(wproc); FOREACH_THREAD_IN_PROC(wproc, td) { while (td->td_wchan && (td->td_wmesg == lockstr) && @@ -286,13 +286,13 @@ break; wproc = (struct proc *)waitblock->lf_id; if (wproc == (struct proc *)lock->lf_id) { - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(wproc); free(lock, M_LOCKF); return (EDEADLK); } } } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(wproc); } /* * For flock type locks, we must first remove ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_mutex.c#9 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_mutex.c ==== @@ -113,9 +113,7 @@ /* * System-wide mutexes */ -struct mtx sched_lock; struct mtx Giant; -struct mtx timer_lock; #ifdef SPIN_PROFILING SYSCTL_NODE(_debug, OID_AUTO, spinlock, CTLFLAG_RD, NULL, "spinlock debugging"); @@ -654,6 +652,29 @@ } #ifdef SMP +int +_mtx_trylock_spin(struct mtx *m, uintptr_t tid, const char *file, int line) +{ + uintptr_t _tid = (uintptr_t)(tid); + int v; + KASSERT(LOCK_CLASS(&m->mtx_object) == &lock_class_mtx_spin, + ("mtx_lock_spin() of sleep mutex %s @ %s:%d", + m->mtx_object.lo_name, file, line)); + spinlock_enter(); + v = _obtain_lock((m), _tid); + if (!v) + spinlock_exit(); + else { + WITNESS_LOCK(&m->mtx_object, LOP_EXCLUSIVE, file, line); + } + + return v; +} + + + + + /* * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock. * @@ -691,8 +712,14 @@ if (i < 60000000) DELAY(1); else if (!kdb_active && !panicstr) { + struct thread *locktd; printf("spin lock %s held by %p for > 5 seconds\n", m->mtx_object.lo_name, (void *)m->mtx_lock); + locktd = (struct thread *)m->mtx_lock; + if (locktd->td_proc) + printf("proc %s @ %s:%d", locktd->td_proc->p_comm, + m->mtx_filename, m->mtx_lineno); + #ifdef WITNESS witness_display_spinlock(&m->mtx_object, mtx_owner(m)); @@ -794,7 +821,7 @@ td = curthread; if (td->td_critnest > 0 || td1->td_priority >= td->td_priority) return; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(td1->td_proc); if (!TD_IS_RUNNING(td1)) { #ifdef notyet if (td->td_ithd != NULL) { @@ -813,13 +840,13 @@ CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p switching out lock=%p", m, (void *)m->mtx_lock); - + PROC_SUNLOCK(td1->td_proc); mi_switch(SW_INVOL, NULL); if (LOCK_LOG_TEST(&m->mtx_object, opts)) CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p", m, (void *)m->mtx_lock); - } - mtx_unlock_spin(&sched_lock); + } else + PROC_SUNLOCK(td1->td_proc); #endif return; @@ -1030,13 +1057,15 @@ } #endif + bzero(&thread0.td_spin_mtx, sizeof(struct mtx)); + bzero(&proc0.p_spin_mtx, sizeof(struct mtx)); /* * Initialize mutexes. */ mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE); - mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE); - mtx_init(&timer_lock, "timer lock", NULL, MTX_SPIN); mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); + mtx_init(&proc0.p_spin_mtx, "process spin lock", NULL, MTX_SPIN | MTX_DUPOK); + mtx_init(&thread0.td_spin_mtx, "thread spin lock", NULL, MTX_SPIN | MTX_DUPOK); mtx_init(&devmtx, "cdev", NULL, MTX_DEF); mtx_lock(&Giant); } ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_poll.c#4 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_poll.c ==== @@ -577,20 +577,20 @@ { struct thread *td = curthread; struct rtprio rtp; + struct proc *p = curproc; + rtp.prio = RTP_PRIO_MAX; /* lowest priority */ rtp.type = RTP_PRIO_IDLE; - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); rtp_to_pri(&rtp, td); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); for (;;) { if (poll_in_idle_loop && poll_handlers > 0) { idlepoll_sleeping = 0; ether_poll(poll_each_burst); - mtx_lock_spin(&sched_lock); mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); } else { idlepoll_sleeping = 1; tsleep(&idlepoll_sleeping, 0, "pollid", hz * 3); ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_proc.c#3 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_proc.c ==== @@ -177,7 +177,9 @@ p->p_sched = (struct p_sched *)&p[1]; td = thread_alloc(); bzero(&p->p_mtx, sizeof(struct mtx)); + bzero(&p->p_spin_mtx, sizeof(struct mtx)); mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); + mtx_init(&p->p_spin_mtx, "process spin lock", NULL, MTX_SPIN | MTX_DUPOK); p->p_stats = pstats_alloc(); proc_linkup(p, td); return (0); @@ -660,7 +662,7 @@ kp->ki_sigcatch = ps->ps_sigcatch; mtx_unlock(&ps->ps_mtx); } - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (p->p_state != PRS_NEW && p->p_state != PRS_ZOMBIE && p->p_vmspace != NULL) { @@ -669,6 +671,7 @@ kp->ki_size = vm->vm_map.size; kp->ki_rssize = vmspace_resident_count(vm); /*XXX*/ FOREACH_THREAD_IN_PROC(p, td0) { + /* unlocked read - statistics only */ if (!TD_IS_SWAPPED(td0)) kp->ki_rssize += td0->td_kstack_pages; if (td0->td_altkstack_obj != NULL) @@ -685,7 +688,7 @@ kp->ki_pid = p->p_pid; kp->ki_nice = p->p_nice; kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if ((p->p_sflag & PS_INMEM) && p->p_stats != NULL) { kp->ki_start = p->p_stats->p_start; timevaladd(&kp->ki_start, &boottime); @@ -811,8 +814,8 @@ SIGSETOR(kp->ki_siglist, td->td_siglist); kp->ki_sigmask = td->td_sigmask; + } - /* * Fill in a kinfo_proc structure for the specified process. * Must be called with the target process locked. @@ -822,10 +825,13 @@ { fill_kinfo_proc_only(p, kp); - mtx_lock_spin(&sched_lock); - if (FIRST_THREAD_IN_PROC(p) != NULL) + PROC_SLOCK(p); + if (FIRST_THREAD_IN_PROC(p) != NULL) { + TD_SLOCK(FIRST_THREAD_IN_PROC(p)); fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(FIRST_THREAD_IN_PROC(p)); + } + PROC_SUNLOCK(p); } struct pstats * @@ -892,26 +898,31 @@ fill_kinfo_proc_only(p, &kinfo_proc); if (flags & KERN_PROC_NOTHREADS) { - mtx_lock_spin(&sched_lock); - if (FIRST_THREAD_IN_PROC(p) != NULL) + PROC_SLOCK(p); + if (FIRST_THREAD_IN_PROC(p) != NULL) { + TD_SLOCK(FIRST_THREAD_IN_PROC(p)); fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), &kinfo_proc); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(FIRST_THREAD_IN_PROC(p)); + } + PROC_SUNLOCK(p); error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, sizeof(kinfo_proc)); } else { - mtx_lock_spin(&sched_lock); - if (FIRST_THREAD_IN_PROC(p) != NULL) + PROC_SLOCK(p); + if (FIRST_THREAD_IN_PROC(p) != NULL) { FOREACH_THREAD_IN_PROC(p, td) { + TD_SLOCK(td); fill_kinfo_thread(td, &kinfo_proc); + TD_SUNLOCK(td); error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, sizeof(kinfo_proc)); if (error) break; } - else + } else error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, sizeof(kinfo_proc)); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } PROC_UNLOCK(p); if (error) @@ -1001,13 +1012,12 @@ /* * Skip embryonic processes. */ - mtx_lock_spin(&sched_lock); + PROC_LOCK(p); if (p->p_state == PRS_NEW) { - mtx_unlock_spin(&sched_lock); + PROC_UNLOCK(p); continue; } - mtx_unlock_spin(&sched_lock); - PROC_LOCK(p); + KASSERT(p->p_ucred != NULL, ("process credential is NULL for non-NEW proc")); /* ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_resource.c#4 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_resource.c ==== @@ -266,9 +266,9 @@ n = PRIO_MIN; if (n < p->p_nice && suser(td) != 0) return (EACCES); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); sched_nice(p, n); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); return (0); } @@ -316,7 +316,7 @@ case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); /* * Return OUR priority if no pid specified, * or if one is, report the highest priority @@ -328,13 +328,16 @@ * as leaving it zero. */ if (uap->pid == 0) { + TD_SLOCK(td); pri_to_rtp(td, &rtp); + TD_SUNLOCK(td); } else { struct rtprio rtp2; rtp.type = RTP_PRIO_IDLE; rtp.prio = RTP_PRIO_MAX; FOREACH_THREAD_IN_PROC(p, tdp) { + TD_SLOCK(tdp); pri_to_rtp(tdp, &rtp2); if (rtp2.type < rtp.type || (rtp2.type == rtp.type && @@ -342,9 +345,10 @@ rtp.type = rtp2.type; rtp.prio = rtp2.prio; } + TD_SUNLOCK(tdp); } } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: @@ -382,16 +386,21 @@ * do all the threads on that process. If we * specify our own pid we do the latter. */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (uap->pid == 0) { + TD_SLOCK(td); error = rtp_to_pri(&rtp, td); + TD_SUNLOCK(td); } else { FOREACH_THREAD_IN_PROC(p, td) { - if ((error = rtp_to_pri(&rtp, td)) != 0) + TD_SLOCK(td); + error = rtp_to_pri(&rtp, td); + TD_SUNLOCK(td); + if (error != 0) break; } } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); break; default: error = EINVAL; @@ -405,7 +414,7 @@ rtp_to_pri(struct rtprio *rtp, struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); switch (RTP_PRIO_BASE(rtp->type)) { @@ -431,7 +440,7 @@ pri_to_rtp(struct thread *td, struct rtprio *rtp) { - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); switch (PRI_BASE(td->td_pri_class)) { case PRI_REALTIME: rtp->prio = td->td_user_pri - PRI_MIN_REALTIME; @@ -584,9 +593,9 @@ switch (which) { case RLIMIT_CPU: - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); p->p_cpulimit = limp->rlim_cur; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); break; case RLIMIT_DATA: if (limp->rlim_cur > maxdsiz) @@ -720,8 +729,8 @@ uint64_t u; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_NOTOWNED); - mtx_lock_spin(&sched_lock); + PROC_SLOCK_ASSERT(p, MA_NOTOWNED); + PROC_SLOCK(p); /* * If we are getting stats for the current process, then add in the @@ -729,7 +738,7 @@ * We reset the thread and CPU state as if we had performed a context * switch right here. */ - if (curthread->td_proc == p) { + if (curproc == p) { td = curthread; u = cpu_ticks(); p->p_rux.rux_runtime += u - PCPU_GET(switchtime); @@ -741,9 +750,9 @@ p->p_rux.rux_sticks += td->td_sticks; td->td_sticks = 0; } - /* Work on a copy of p_rux so we can let go of sched_lock */ + /* Work on a copy of p_rux so we can let go of p_spin_mtx */ rux = p->p_rux; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); calcru1(p, &rux, up, sp); /* Update the result from the p_rux copy */ p->p_rux.rux_uu = rux.rux_uu; ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_shutdown.c#4 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_shutdown.c ==== @@ -267,9 +267,7 @@ * systems don't shutdown properly (i.e., ACPI power off) if we * run on another processor. */ - mtx_lock_spin(&sched_lock); sched_bind(curthread, 0); - mtx_unlock_spin(&sched_lock); KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); #endif /* We're in the process of rebooting. */ @@ -340,9 +338,9 @@ */ DROP_GIANT(); for (subiter = 0; subiter < 50 * iter; subiter++) { - mtx_lock_spin(&sched_lock); + TD_SLOCK(curthread); mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(curthread); DELAY(1000); } PICKUP_GIANT(); ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_sig.c#7 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_sig.c ==== @@ -509,14 +509,18 @@ struct thread *td0; PROC_LOCK_ASSERT(p, MA_OWNED); + PROC_SLOCK_ASSERT(p, MA_NOTOWNED); sigqueue_init(&worklist, NULL); sigqueue_move_set(&p->p_sigqueue, &worklist, set); - mtx_lock_spin(&sched_lock); - FOREACH_THREAD_IN_PROC(p, td0) + PROC_SLOCK(p); + FOREACH_THREAD_IN_PROC(p, td0) { + TD_SLOCK(td0); sigqueue_move_set(&td0->td_sigqueue, &worklist, set); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td0); + } + PROC_SUNLOCK(p); sigqueue_flush(&worklist); } @@ -555,8 +559,9 @@ cursig(struct thread *td) { PROC_LOCK_ASSERT(td->td_proc, MA_OWNED); + PROC_SLOCK_ASSERT(td->td_proc, MA_NOTOWNED); + TD_SLOCK_ASSERT(td, MA_NOTOWNED); mtx_assert(&td->td_proc->p_sigacts->ps_mtx, MA_OWNED); - mtx_assert(&sched_lock, MA_NOTOWNED); return (SIGPENDING(td) ? issignal(td) : 0); } @@ -1937,7 +1942,7 @@ if (curproc == p && !SIGISMEMBER(curthread->td_sigmask, sig)) return (curthread); signal_td = NULL; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { if (!SIGISMEMBER(td->td_sigmask, sig)) { signal_td = td; @@ -1946,7 +1951,7 @@ } if (signal_td == NULL) signal_td = FIRST_THREAD_IN_PROC(p); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); return (signal_td); } @@ -2181,9 +2186,9 @@ /* * The signal is not ignored or caught. */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); thread_unsuspend(p); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); goto out; } @@ -2206,10 +2211,12 @@ * the PROCESS runnable, leave it stopped. * It may run a bit until it hits a thread_suspend_check(). */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(td->td_proc); + TD_SLOCK(td); if (TD_ON_SLEEPQ(td) && (td->td_flags & TDF_SINTR)) sleepq_abort(td, intrval); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); + PROC_SUNLOCK(td->td_proc); goto out; /* * Mutexes are short lived. Threads waiting on them will @@ -2217,9 +2224,11 @@ */ } else if (p->p_state == PRS_NORMAL) { if (p->p_flag & P_TRACED || action == SIG_CATCH) { - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); + TD_SLOCK(td); tdsigwakeup(td, sig, action, intrval); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); + PROC_SUNLOCK(p); goto out; } @@ -2230,7 +2239,7 @@ goto out; p->p_flag |= P_STOPPED_SIG; p->p_xstat = sig; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); sig_suspend_threads(td, p, 1); if (p->p_numthreads == p->p_suspcount) { /* @@ -2241,10 +2250,10 @@ * should never be equal to p_suspcount. */ thread_stopped(p); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); sigqueue_delete_proc(p, p->p_xstat); } else - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); goto out; } else @@ -2262,13 +2271,15 @@ */ runfast: - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); + TD_SLOCK(td); tdsigwakeup(td, sig, action, intrval); + TD_SUNLOCK(td); thread_unsuspend(p); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); out: - /* If we jump here, sched_lock should not be owned. */ - mtx_assert(&sched_lock, MA_NOTOWNED); + /* If we jump here, process' spin lock should not be owned. */ + PROC_SLOCK_ASSERT(p, MA_NOTOWNED); return (ret); } @@ -2284,7 +2295,8 @@ register int prop; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_OWNED); + PROC_SLOCK_ASSERT(p, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); prop = sigprop(sig); /* @@ -2313,14 +2325,14 @@ * be awakened. */ if ((prop & SA_CONT) && action == SIG_DFL) { - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); sigqueue_delete(&p->p_sigqueue, sig); /* * It may be on either list in this state. * Remove from both for now. */ sigqueue_delete(&td->td_sigqueue, sig); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); return; } @@ -2329,7 +2341,6 @@ */ if (td->td_priority > PUSER) sched_prio(td, PUSER); - sleepq_abort(td, intrval); } else { /* @@ -2350,9 +2361,10 @@ struct thread *td2; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_OWNED); + PROC_SLOCK_ASSERT(p, MA_OWNED); FOREACH_THREAD_IN_PROC(p, td2) { + TD_SLOCK(td2); if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) && (td2->td_flags & TDF_SINTR) && !TD_IS_SUSPENDED(td2)) { @@ -2365,6 +2377,7 @@ forward_signal(td2); #endif } + TD_SUNLOCK(td2); } } @@ -2391,15 +2404,20 @@ p->p_xstat = sig; p->p_xthread = td; p->p_flag |= (P_STOPPED_SIG|P_STOPPED_TRACE); - mtx_lock_spin(&sched_lock); + + PROC_SLOCK(p); sig_suspend_threads(td, p, 0); stopme: thread_stopped(p); + PROC_SUNLOCK(p); + + TD_SLOCK(td); thread_suspend_one(td); + TD_SUNLOCK(td); + PROC_UNLOCK(p); DROP_GIANT(); mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); PICKUP_GIANT(); PROC_LOCK(p); if (!(p->p_flag & P_TRACED)) @@ -2407,7 +2425,7 @@ if (td->td_flags & TDF_DBSUSPEND) { if (p->p_flag & P_SINGLE_EXIT) break; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); goto stopme; } } @@ -2551,14 +2569,14 @@ &p->p_mtx.mtx_object, "Catching SIGSTOP"); p->p_flag |= P_STOPPED_SIG; p->p_xstat = sig; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); sig_suspend_threads(td, p, 0); thread_stopped(p); thread_suspend_one(td); + PROC_SUNLOCK(p); PROC_UNLOCK(p); DROP_GIANT(); mi_switch(SW_INVOL, NULL); - mtx_unlock_spin(&sched_lock); PICKUP_GIANT(); PROC_LOCK(p); mtx_lock(&ps->ps_mtx); @@ -2605,18 +2623,18 @@ int n; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_OWNED); + PROC_SLOCK_ASSERT(p, MA_OWNED); n = p->p_suspcount; if (p == curproc) n++; if ((p->p_flag & P_STOPPED_SIG) && (n == p->p_numthreads)) { - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); p->p_flag &= ~P_WAITED; PROC_LOCK(p->p_pptr); childproc_stopped(p, (p->p_flag & P_TRACED) ? CLD_TRAPPED : CLD_STOPPED); PROC_UNLOCK(p->p_pptr); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); } } ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_subr.c#3 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_subr.c ==== @@ -428,11 +428,11 @@ struct thread *td; td = curthread; - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); DROP_GIANT(); sched_prio(td, td->td_user_pri); mi_switch(SW_INVOL, NULL); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); PICKUP_GIANT(); } ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_switch.c#6 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_switch.c ==== @@ -86,6 +86,9 @@ /************************************************************************ * Functions that manipulate runnability from a thread perspective. * ************************************************************************/ +TAILQ_HEAD(deferred_td_q, thread); + + /* * Select the thread that will be run next. */ @@ -93,6 +96,10 @@ choosethread(void) { struct thread *td; +#ifdef notyet + struct *ttd; + struct deferred_td_q td_q; +#endif #if defined(SMP) && (defined(__i386__) || defined(__amd64__)) if (smp_active == 0 && PCPU_GET(cpuid) != 0) { @@ -107,12 +114,64 @@ retry: td = sched_choose(); + if (td) { CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d", td, td->td_priority); + if (!mtx_trylock_spin(&td->td_spin_mtx)) { + if (td->td_tid < curthread->td_tid) + TD_SLOCK(td); + else if (curthread == &thread0) + td = curthread; + else { + + sched_add_internal(td, SRQ_BORING); + td = PCPU_GET(idlethread); + if (curthread != td) + TD_SLOCK(td); + +#ifdef notyet + TAILQ_INIT(&td_q); + TAILQ_INSERT_HEAD(&td_q, td, td_lockq); + while ((td = sched_choose()) != NULL) { + if (!mtx_trylock_spin(&td->td_spin_mtx)) { + if (td->td_tid < curthread->td_tid) { + TD_SLOCK(td); + break; + } else { + TAILQ_INSERT_HEAD(&td_q, td, td_lockq); + } } else { + break; + } + } + while (!TAILQ_EMPTY(&td_q)) { + ttd = TAILQ_FIRST(&td_q); + TAILQ_REMOVE(&td_q, ttd, td_lockq); + /* probably want to have a more appropriate flag */ + td->td_kse->ke_state = KES_ONRUNQ; + + } + if (td == NULL) { + td = PCPU_GET(idlethread); + if (curthread != td) + TD_SLOCK(td); + } + +#endif + } + + } + + } else { /* Simulate runq_choose() having returned the idle thread */ td = PCPU_GET(idlethread); + if (curthread != td && !mtx_trylock_spin(&td->td_spin_mtx)) { + if (curthread != &thread0) + TD_SLOCK(td); + else + td = curthread; + } CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); } atomic_set_int(&td->td_kse->ke_flags, KEF_DIDRUN); @@ -128,6 +187,7 @@ goto retry; } + TD_SLOCK_ASSERT(td, MA_OWNED); TD_SET_RUNNING(td); return (td); } @@ -140,15 +200,16 @@ { struct kse *ke; - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue")); ke = td->td_kse; CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td); /* We only care about the kse in the run queue. */ td->td_priority = newpri; - if (ke->ke_rqindex != (newpri / RQ_PPQ)) { - sched_rem(td); + if ((ke->ke_rqindex != (newpri / RQ_PPQ)) && (td->td_kse->ke_state == KES_ONRUNQ) && + sched_rem(td)) + { sched_add(td, SRQ_BORING); } } @@ -162,7 +223,7 @@ CTR5(KTR_SCHED, "setrunqueue: %p(%s) prio %d by %p(%s)", td, td->td_proc->p_comm, td->td_priority, curthread, curthread->td_proc->p_comm); - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); KASSERT((td->td_inhibitors == 0), ("setrunqueue: trying to run inhibitted thread")); KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), @@ -197,13 +258,13 @@ #ifdef PREEMPTION if (td->td_critnest == 1) { td->td_critnest = 0; - mtx_assert(&sched_lock, MA_NOTOWNED); + TD_SLOCK_ASSERT(td, MA_NOTOWNED); if (td->td_owepreempt) { td->td_critnest = 1; - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); td->td_critnest--; mi_switch(SW_INVOL, NULL); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } } else #endif @@ -228,7 +289,7 @@ int cpri, pri; #endif - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); #ifdef PREEMPTION /* * The new thread should not preempt the current thread if any of the @@ -524,19 +585,21 @@ * corresponding status bit if the queue becomes empty. * Caller must set ke->ke_state afterwards. */ -static __inline void +static __inline int _runq_remove(struct runq *rq, struct kse *ke) { struct rqhead *rqh; - int pri; + int pri, removed; KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM, ("runq_remove: process swapped out")); + removed = 0; pri = ke->ke_rqindex; rqh = &rq->rq_queues[pri]; CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p", ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh); KASSERT(ke != NULL, ("runq_remove: no proc on busy queue")); + if (ke->ke_state == KES_ONRUNQ) { TAILQ_REMOVE(rqh, ke, ke_procq); #ifndef SMP if (TAILQ_EMPTY(rqh)) { @@ -544,14 +607,20 @@ runq_clrbit(rq, pri); } #endif + removed = 1; + } + return (removed); } -void +int runq_remove(struct runq *rq, struct kse *ke) { + int removed; + runq_lock(rq, ke); - _runq_remove(rq, ke); + removed = _runq_remove(rq, ke); runq_unlock(rq, ke); + return removed; } void ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_synch.c#6 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_synch.c ==== @@ -128,7 +128,7 @@ WITNESS_SAVE_DECL(mtx); td = curthread; - p = td->td_proc; + p = curproc; #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 0); @@ -196,9 +196,9 @@ * Adjust this thread's priority. */ if ((priority & PRIMASK) != 0) { - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); sched_prio(td, priority & PRIMASK); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } if (timo && catch) @@ -341,7 +341,6 @@ struct thread *td; struct proc *p; - mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); td = curthread; /* XXX */ p = td->td_proc; /* XXX */ KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code")); @@ -361,7 +360,6 @@ * Don't perform context switches from the debugger. */ if (kdb_active) { - mtx_unlock_spin(&sched_lock); kdb_backtrace(); kdb_reenter(); panic("%s: did not reenter debugger", __func__); @@ -421,6 +419,7 @@ td->td_inhibitors, td->td_wmesg, td->td_lockname); #endif sched_switch(td, newtd, flags); + CTR3(KTR_SCHED, "mi_switch: running %p(%s) prio %d", td, td->td_proc->p_comm, td->td_priority); @@ -447,7 +446,7 @@ struct proc *p; p = td->td_proc; - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); switch (p->p_state) { case PRS_ZOMBIE: panic("setrunnable(1)"); @@ -542,10 +541,10 @@ { mtx_assert(&Giant, MA_NOTOWNED); - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); sched_prio(td, PRI_MAX_TIMESHARE); mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); td->td_retval[0] = 0; + TD_SUNLOCK(td); return (0); } ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_thr.c#5 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_thr.c ==== @@ -184,7 +184,7 @@ td->td_proc->p_flag |= P_HADTHREADS; newtd->td_sigmask = td->td_sigmask; - mtx_lock_spin(&sched_lock); + TD_SLOCK(newtd); thread_link(newtd, p); PROC_UNLOCK(p); @@ -193,7 +193,7 @@ TD_SET_CAN_RUN(newtd); /* if ((flags & THR_SUSPENDED) == 0) */ setrunqueue(newtd, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(newtd); return (error); } @@ -228,7 +228,7 @@ PROC_LOCK(p); sigqueue_flush(&td->td_sigqueue); - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); /* * Shutting down last thread in the proc. This will actually @@ -236,10 +236,11 @@ */ if (p->p_numthreads != 1) { thread_stopped(p); + /* thread_exit drops the thread's spin lock */ thread_exit(); /* NOTREACHED */ } - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); PROC_UNLOCK(p); return (0); } ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_thread.c#5 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_thread.c ==== @@ -98,15 +98,16 @@ * Note that td_critnest begins life as 1 because the thread is not * running and is thereby implicitly waiting to be on the receiving * end of a context switch. A context switch must occur inside a - * critical section, and in fact, includes hand-off of the sched_lock. - * After a context switch to a newly created thread, it will release - * sched_lock for the first time, and its td_critnest will hit 0 for - * the first time. This happens on the far end of a context switch, - * and when it context switches away from itself, it will in fact go - * back into a critical section, and hand off the sched lock to the - * next thread. + * critical section, and in fact, includes hand-off of this thread's + * spin-lock after a context switch to a newly created thread, the + * new thread will release its spin lock and the previous thread's + * spin lock the first time it runs in fork exit, and its td_critnest + * will hit 0 forthe first time. This happens on the far end of a + * context switch, and when it context switches away from itself, it + * will in fact go back into a critical section, and hand off its spin + * lock and the next thread's spin lock to the next thread. */ - td->td_critnest = 1; + td->td_critnest = 2; #ifdef AUDIT audit_thread_alloc(td); @@ -167,6 +168,8 @@ td->td_turnstile = turnstile_alloc(); td->td_umtxq = umtxq_alloc(); td->td_sched = (struct td_sched *)&td[1]; + bzero(&td->td_spin_mtx, sizeof(struct mtx)); + mtx_init(&td->td_spin_mtx, "thread spin lock", NULL, MTX_SPIN | MTX_DUPOK); sched_newthread(td); return (0); } @@ -316,6 +319,8 @@ * thr_exit() * thread_suspend_check() */ + +extern void sched_throw(struct thread *td); void thread_exit(void) { @@ -326,7 +331,7 @@ td = curthread; p = td->td_proc; - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); mtx_assert(&Giant, MA_NOTOWNED); PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(p != NULL, ("thread exiting without a process")); @@ -346,6 +351,7 @@ cpu_thread_exit(td); /* XXXSMP */ /* Do the same timestamp bookkeeping that mi_switch() would do. */ + /* stats keeping is unlocked */ new_switchtime = cpu_ticks(); p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime)); p->p_rux.rux_uticks += td->td_uticks; @@ -369,7 +375,9 @@ */ if (p->p_flag & P_HADTHREADS) { if (p->p_numthreads > 1) { + PROC_SLOCK(p); thread_unlink(td); + PROC_SUNLOCK(p); sched_exit(p, td); /* @@ -405,6 +413,7 @@ } td->td_state = TDS_INACTIVE; CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td); + PCPU_SET(preempted_thread, curthread); cpu_throw(td, choosethread()); panic("I'm a teapot!"); /* NOTREACHED */ @@ -446,7 +455,6 @@ td->td_state = TDS_INACTIVE; td->td_proc = p; td->td_flags = 0; - LIST_INIT(&td->td_contested); sigqueue_init(&td->td_sigqueue, p); callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); @@ -477,8 +485,7 @@ thread_unlink(struct thread *td) { struct proc *p = td->td_proc; - - mtx_assert(&sched_lock, MA_OWNED); + PROC_SLOCK_ASSERT(p, MA_OWNED); TAILQ_REMOVE(&p->p_threads, td, td_plist); p->p_numthreads--; /* could clear a few other things here */ @@ -530,7 +537,7 @@ p->p_flag &= ~P_SINGLE_BOUNDARY; } p->p_flag |= P_STOPPED_SINGLE; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); p->p_singlethread = td; if (mode == SINGLE_EXIT) remaining = p->p_numthreads; @@ -542,8 +549,11 @@ if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE) goto stopme; FOREACH_THREAD_IN_PROC(p, td2) { - if (td2 == td) + TD_SLOCK(td2); + if (td2 == td) { + TD_SUNLOCK(td2); continue; + } atomic_set_int(&td2->td_flags, TDF_ASTPENDING); if (TD_IS_INHIBITED(td2)) { switch (mode) { @@ -582,6 +592,7 @@ forward_signal(td2); } #endif + TD_SUNLOCK(td2); } if (mode == SINGLE_EXIT) remaining = p->p_numthreads; @@ -604,10 +615,12 @@ thread_stopped(p); thread_suspend_one(td); PROC_UNLOCK(p); + TD_SLOCK(td); mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); + PROC_LOCK(p); - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); if (mode == SINGLE_EXIT) remaining = p->p_numthreads; else if (mode == SINGLE_BOUNDARY) @@ -626,7 +639,7 @@ p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT); thread_unthread(td); } - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); return (0); } @@ -699,7 +712,7 @@ if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td)) sigqueue_flush(&td->td_sigqueue); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); thread_stopped(p); /* * If the process is waiting for us to exit, @@ -729,7 +742,7 @@ p->p_boundary_count--; atomic_clear_int(&td->td_flags, TDF_BOUNDARY); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_LOCK(p); } return (0); @@ -740,7 +753,7 @@ { struct proc *p = td->td_proc; - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); PROC_LOCK_ASSERT(p, MA_OWNED); KASSERT(!TD_IS_SUSPENDED(td), ("already suspended")); p->p_suspcount++; @@ -753,8 +766,8 @@ { struct proc *p = td->td_proc; - mtx_assert(&sched_lock, MA_OWNED); - PROC_LOCK_ASSERT(p, MA_OWNED); + PROC_SLOCK_ASSERT(p, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); TAILQ_REMOVE(&p->p_suspended, td, td_runq); TD_CLR_SUSPENDED(td); p->p_suspcount--; @@ -768,12 +781,13 @@ thread_unsuspend(struct proc *p) { struct thread *td; - - mtx_assert(&sched_lock, MA_OWNED); + PROC_SLOCK_ASSERT(p, MA_OWNED); PROC_LOCK_ASSERT(p, MA_OWNED); if (!P_SHOULDSTOP(p)) { while ((td = TAILQ_FIRST(&p->p_suspended))) { + TD_SLOCK(td); thread_unsuspend_one(td); + TD_SUNLOCK(td); } } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) && (p->p_numthreads == p->p_suspcount)) { @@ -799,7 +813,6 @@ p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY); - mtx_lock_spin(&sched_lock); p->p_singlethread = NULL; /* * If there are other threads they mey now run, @@ -807,12 +820,15 @@ * on the process. The single threader must be allowed * to continue however as this is a bad place to stop. */ + PROC_SLOCK(p); if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) { while ((td = TAILQ_FIRST(&p->p_suspended))) { + TD_SLOCK(td); thread_unsuspend_one(td); + TD_SUNLOCK(td); } } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } struct thread * @@ -821,11 +837,12 @@ struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { + /* unlocked read - safe */ if (td->td_tid == tid) break; } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); return (td); } ==== //depot/projects/kmacy_sun4v/src/sys/kern/kern_time.c#4 - /shared/p4/sun4v/work_ifc/src/sys/kern/kern_time.c ==== @@ -588,9 +588,9 @@ timevalsub(&aitv->it_value, &ctv); } } else { - mtx_lock_spin(&timer_lock); + PROC_SLOCK(p); *aitv = p->p_stats->p_timer[which]; - mtx_unlock_spin(&timer_lock); + PROC_SUNLOCK(p); } return (0); } @@ -663,10 +663,10 @@ timevalsub(&oitv->it_value, &ctv); } } else { - mtx_lock_spin(&timer_lock); + PROC_SLOCK(p); *oitv = p->p_stats->p_timer[which]; p->p_stats->p_timer[which] = *aitv; - mtx_unlock_spin(&timer_lock); + PROC_SUNLOCK(p); } return (0); } ==== //depot/projects/kmacy_sun4v/src/sys/kern/sched_4bsd.c#6 - /shared/p4/sun4v/work_ifc/src/sys/kern/sched_4bsd.c ==== @@ -54,10 +54,8 @@ #include #include #include +#include -#ifdef HWPMC_HOOKS -#include -#endif /* * INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in @@ -150,6 +148,8 @@ */ static struct runq runq; +static void sched_add_internal(struct thread *td, int flags); + #ifdef SMP /* * Per-CPU run queues @@ -257,7 +257,7 @@ maybe_resched(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); if (td->td_priority < curthread->td_priority) atomic_set_int(&curthread->td_flags, TDF_NEEDRESCHED); } @@ -275,11 +275,8 @@ { #ifdef SMP - mtx_lock_spin(&sched_lock); forward_roundrobin(); - mtx_unlock_spin(&sched_lock); #endif - callout_reset(&roundrobin_callout, sched_quantum, roundrobin, NULL); } @@ -389,13 +386,14 @@ /* * Prevent state changes and protect run queue. */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); /* * Increment time in/out of memory. We ignore overflow; with * 16-bit int's (remember them?) overflow takes 45 days. */ p->p_swtime++; FOREACH_THREAD_IN_PROC(p, td) { + TD_SLOCK(td); awake = 0; ke = td->td_kse; /* @@ -431,7 +429,7 @@ * it wakes up. */ if (ke->ke_cpticks == 0) - continue; + goto skip; #if (FSHIFT >= CCPU_SHIFT) ke->ke_pctcpu += (realstathz == 100) ? ((fixpt_t) ke->ke_cpticks) << @@ -466,12 +464,14 @@ } else td->td_slptime++; if (td->td_slptime > 1) - continue; + goto skip; td->td_estcpu = decay_cpu(loadfac, td->td_estcpu); resetpriority(td); resetpriority_thread(td); + skip: + TD_SUNLOCK(td); } /* end of thread loop */ - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } /* end of process loop */ sx_sunlock(&allproc_lock); } @@ -629,13 +629,13 @@ ke = td->td_kse; atomic_add_int(&ke->ke_cpticks, 1); + TD_SLOCK(td); td->td_estcpu = ESTCPULIM(td->td_estcpu + 1); if ((td->td_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { - mtx_lock_spin(&sched_lock); resetpriority(td); resetpriority_thread(td); - mtx_unlock_spin(&sched_lock); } + TD_SUNLOCK(td); } /* @@ -667,18 +667,20 @@ struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_OWNED); + PROC_SLOCK_ASSERT(p, MA_OWNED); p->p_nice = nice; FOREACH_THREAD_IN_PROC(p, td) { + TD_SLOCK(td); resetpriority(td); resetpriority_thread(td); + TD_SUNLOCK(td); } } void sched_class(struct thread *td, int class) { - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); td->td_pri_class = class; } @@ -692,7 +694,7 @@ td, td->td_proc->p_comm, td->td_priority, prio, curthread, curthread->td_proc->p_comm); - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); if (td->td_priority == prio) return; if (TD_ON_RUNQ(td)) { @@ -770,7 +772,7 @@ sched_sleep(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); td->td_slptime = 0; } @@ -779,24 +781,40 @@ { struct kse *ke; struct proc *p; - struct thread *choosetd; + int tdset; ke = td->td_kse; p = td->td_proc; - choosetd = NULL; + + if (newtd == NULL) { + tdset = 0; + newtd = choosethread(); + } else { + tdset = 1; + atomic_set_int(&newtd->td_flags, (td->td_flags & TDF_NEEDRESCHED)); + } - if (newtd == NULL) - choosetd = choosethread(); + TD_SLOCK_ASSERT(td, MA_OWNED); + TD_SLOCK_ASSERT(newtd, MA_OWNED); + if (td != newtd && td->td_critnest != 2) + panic("sched_switch expected nesting 2 is %d", td->td_critnest); + else if (td == newtd && td->td_critnest != 1) + panic("sched_switch expected nesting 1 is %d", td->td_critnest); + else if (!mtx_owned(&newtd->td_spin_mtx)) + panic("newtd not held"); + else if (!mtx_initialized(&newtd->td_spin_mtx)) + panic("newtd mtx not inited"); if ((p->p_flag & P_NOLOAD) == 0) sched_load_rem(); - if (newtd) - atomic_set_int(&newtd->td_flags, (td->td_flags & TDF_NEEDRESCHED)); td->td_lastcpu = td->td_oncpu; atomic_clear_int(&td->td_flags, TDF_NEEDRESCHED); td->td_owepreempt = 0; td->td_oncpu = NOCPU; + PCPU_SET(preempted_thread, td); + PCPU_SET(next_thread, newtd); + /* * At the last moment, if this thread is still marked RUNNING, * then put it back on the run queue as it has not been suspended @@ -813,7 +831,7 @@ SRQ_OURSELF|SRQ_YIELDING); } } - if (newtd) { + if (tdset) { /* * The thread we are about to run needs to be counted * as if it had been added to the run queue and selected. @@ -827,31 +845,30 @@ TD_SET_RUNNING(newtd); if ((newtd->td_proc->p_flag & P_NOLOAD) == 0) sched_load_add(); - } else { - newtd = choosetd; } - if (td != newtd) { -#ifdef HWPMC_HOOKS - if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); -#endif + + if (!mtx_owned(&newtd->td_spin_mtx)) + panic("newtd not held"); + cpu_switch(td, newtd); -#ifdef HWPMC_HOOKS - if (PMC_PROC_IS_USING_PMCS(td->td_proc)) + PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); -#endif + + td->td_spin_mtx.mtx_lock = (uintptr_t)td; + PCPU_GET(preempted_thread)->td_spin_mtx.mtx_lock = (uintptr_t)td; + TD_SUNLOCK(PCPU_GET(preempted_thread)); } - - sched_lock.mtx_lock = (uintptr_t)td; td->td_oncpu = PCPU_GET(cpuid); + TD_SLOCK_ASSERT(td, MA_OWNED); } void sched_wakeup(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); if (td->td_slptime > 1) { updatepri(td); resetpriority(td); @@ -870,8 +887,6 @@ struct pcpu *pc; cpumask_t id, map3; - mtx_assert(&sched_lock, MA_OWNED); - CTR0(KTR_RUNQ, "forward_wakeup()"); if ((!forward_wakeup_enabled) || @@ -981,8 +996,10 @@ } #endif /* SMP */ -void -sched_add(struct thread *td, int flags) + + +static inline void +_sched_add(struct thread *td, int flags) #ifdef SMP { struct kse *ke; @@ -991,7 +1008,6 @@ int single_cpu = 0; ke = td->td_kse; - mtx_assert(&sched_lock, MA_OWNED); KASSERT(ke->ke_state != KES_ONRUNQ, ("sched_add: kse %p (%s) already in run queue", ke, td->td_proc->p_comm)); @@ -1049,7 +1065,7 @@ { struct kse *ke; ke = td->td_kse; - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); KASSERT(ke->ke_state != KES_ONRUNQ, ("sched_add: kse %p (%s) already in run queue", ke, td->td_proc->p_comm)); @@ -1085,6 +1101,20 @@ #endif /* SMP */ void +sched_add(struct thread *td, int flags) +{ + TD_SLOCK_ASSERT(td, MA_OWNED); + _sched_add(td, flags); +} + +static void +sched_add_internal(struct thread *td, int flags) +{ + _sched_add(td, flags); +} + + +int sched_rem(struct thread *td) { struct kse *ke; @@ -1092,17 +1122,18 @@ ke = td->td_kse; KASSERT(td->td_proc->p_sflag & PS_INMEM, ("sched_rem: process swapped out")); - KASSERT((ke->ke_state == KES_ONRUNQ), - ("sched_rem: KSE not on run queue")); - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", td, td->td_proc->p_comm, td->td_priority, curthread, curthread->td_proc->p_comm); if ((td->td_proc->p_flag & P_NOLOAD) == 0) sched_load_rem(); - runq_remove(ke->ke_runq, ke); + if (runq_remove(ke->ke_runq, ke)) { ke->ke_state = KES_THREAD; + return (1); + } + return (0); } /* @@ -1164,7 +1195,7 @@ { struct kse *ke; - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); KASSERT(TD_IS_RUNNING(td), ("sched_bind: cannot bind non-running thread")); ke = td->td_kse; ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_prof.c#3 - /shared/p4/sun4v/work_ifc/src/sys/kern/subr_prof.c ==== @@ -426,12 +426,12 @@ } PROC_LOCK(p); upp = &td->td_proc->p_stats->p_prof; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); upp->pr_off = uap->offset; upp->pr_scale = uap->scale; upp->pr_base = uap->samples; upp->pr_size = uap->size; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); startprofclock(p); PROC_UNLOCK(p); @@ -467,19 +467,22 @@ caddr_t addr; u_int i; int v; + struct proc *p; if (ticks == 0) return; + + p = td->td_proc; prof = &td->td_proc->p_stats->p_prof; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (pc < prof->pr_off || (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) { - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); return; /* out of range; ignore */ } addr = prof->pr_base + i; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if ((v = fuswintr(addr)) == -1 || suswintr(addr, v + ticks) == -1) { td->td_profil_addr = pc; td->td_profil_ticks = ticks; @@ -528,7 +531,6 @@ } stop = 1; PROC_LOCK(p); - out: if (--p->p_profthreads == 0) { if (p->p_flag & P_STOPPROF) { ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_sleepqueue.c#5 - /shared/p4/sun4v/work_ifc/src/sys/kern/subr_sleepqueue.c ==== @@ -389,16 +389,16 @@ if (ret == 0) { mtx_lock_spin(&sc->sc_lock); /* - * Lock sched_lock before unlocking proc lock, + * Lock thread's spin lock before unlocking proc lock, * without this, we could lose a race. */ - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); PROC_UNLOCK(p); if (!(td->td_flags & TDF_INTERRUPT)) return (0); /* KSE threads tried unblocking us. */ ret = td->td_intrval; - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); MPASS(ret == EINTR || ret == ERESTART); } else { PROC_UNLOCK(p); @@ -409,7 +409,7 @@ * on the sleep queue, remove it from the sleep queue. */ sq = sleepq_lookup(wchan); - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); if (TD_ON_SLEEPQ(td)) sleepq_resume_thread(sq, td, -1); return (ret); @@ -417,7 +417,7 @@ /* * Switches to another thread if we are still asleep on a sleep queue and - * drop the lock on the sleep queue chain. Returns with sched_lock held. + * drop the lock on the sleep queue chain. Returns with the thread's spin lock held. */ static void sleepq_switch(void *wchan) @@ -428,7 +428,7 @@ td = curthread; sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); /* * If we have a sleep queue, then we've already been woken up, so @@ -460,8 +460,8 @@ { struct thread *td; - mtx_assert(&sched_lock, MA_OWNED); td = curthread; + TD_SLOCK_ASSERT(td, MA_OWNED); /* * If TDF_TIMEOUT is set, we timed out. @@ -485,7 +485,6 @@ */ else if (callout_stop(&td->td_slpcallout) == 0) { atomic_set_int(&td->td_flags, TDF_TIMEOUT); - mtx_lock_spin(&sched_lock); TD_SET_SLEEPING(td); mi_switch(SW_INVOL, NULL); } @@ -501,6 +500,7 @@ struct thread *td; td = curthread; + TD_SLOCK_ASSERT(td, MA_OWNED); /* We are no longer in an interruptible sleep. */ if (td->td_flags & TDF_SINTR) @@ -525,9 +525,9 @@ { MPASS(!(curthread->td_flags & TDF_SINTR)); - mtx_lock_spin(&sched_lock); + TD_SLOCK(curthread); sleepq_switch(wchan); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(curthread); } /* @@ -546,7 +546,7 @@ else sleepq_release(wchan); rval = sleepq_check_signals(); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(curthread); if (rcatch) return (rcatch); return (rval); @@ -562,10 +562,10 @@ int rval; MPASS(!(curthread->td_flags & TDF_SINTR)); - mtx_lock_spin(&sched_lock); + TD_SLOCK(curthread); sleepq_switch(wchan); rval = sleepq_check_timeout(); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(curthread); return (rval); } @@ -585,7 +585,7 @@ sleepq_release(wchan); rvalt = sleepq_check_timeout(); rvals = sleepq_check_signals(); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(curthread); if (rcatch) return (rcatch); if (rvals) @@ -607,7 +607,7 @@ MPASS(td->td_wchan == sq->sq_wchan); sc = SC_LOOKUP(sq->sq_wchan); mtx_assert(&sc->sc_lock, MA_OWNED); - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); /* Remove the thread from the queue. */ TAILQ_REMOVE(&sq->sq_blocked, td, td_slpq); @@ -682,9 +682,9 @@ besttd = td; } MPASS(besttd != NULL); - mtx_lock_spin(&sched_lock); + TD_SLOCK(besttd); sleepq_resume_thread(sq, besttd, pri); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(besttd); sleepq_release(wchan); } @@ -695,6 +695,7 @@ sleepq_broadcast(void *wchan, int flags, int pri) { struct sleepqueue *sq; + struct thread *td; CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags); KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); @@ -707,10 +708,12 @@ ("%s: mismatch between sleep/wakeup and cv_*", __func__)); /* Resume all blocked threads on the sleep queue. */ - mtx_lock_spin(&sched_lock); - while (!TAILQ_EMPTY(&sq->sq_blocked)) - sleepq_resume_thread(sq, TAILQ_FIRST(&sq->sq_blocked), pri); - mtx_unlock_spin(&sched_lock); + while (!TAILQ_EMPTY(&sq->sq_blocked)) { + td = TAILQ_FIRST(&sq->sq_blocked); + TD_SLOCK(td); + sleepq_resume_thread(sq, td, pri); + TD_SUNLOCK(td); + } sleepq_release(wchan); } @@ -733,13 +736,13 @@ * First, see if the thread is asleep and get the wait channel if * it is. */ - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); if (TD_ON_SLEEPQ(td)) { wchan = td->td_wchan; - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); sleepq_lock(wchan); sq = sleepq_lookup(wchan); - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); } else { wchan = NULL; sq = NULL; @@ -748,11 +751,11 @@ /* * At this point, if the thread is still on the sleep queue, * we have that sleep queue locked as it cannot migrate sleep - * queues while we dropped sched_lock. If it had resumed and + * queues while we dropped the thread's spin lock. If it had * was on another CPU while the lock was dropped, it would have - * seen that TDF_TIMEOUT and TDF_TIMOFAIL are clear and the - * call to callout_stop() to stop this routine would have failed - * meaning that it would have already set TDF_TIMEOUT to + * resumed and seen that TDF_TIMEOUT and TDF_TIMOFAIL are clear + * and the call to callout_stop() to stop this routine would have + * failed, meaning that it would have already set TDF_TIMEOUT to * synchronize with this function. */ if (TD_ON_SLEEPQ(td)) { @@ -760,7 +763,7 @@ MPASS(sq != NULL); atomic_set_int(&td->td_flags, TDF_TIMEOUT); sleepq_resume_thread(sq, td, -1); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); sleepq_release(wchan); return; } else if (wchan != NULL) @@ -782,7 +785,7 @@ setrunnable(td); } else atomic_set_int(&td->td_flags, TDF_TIMOFAIL); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } /* @@ -802,9 +805,9 @@ MPASS(wchan != NULL); sleepq_lock(wchan); sq = sleepq_lookup(wchan); - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) { - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); sleepq_release(wchan); return; } @@ -813,7 +816,7 @@ /* Thread is asleep on sleep queue sq, so wake it up. */ sleepq_resume_thread(sq, td, -1); sleepq_release(wchan); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } /* @@ -828,7 +831,8 @@ { void *wchan; - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); + PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED); MPASS(TD_ON_SLEEPQ(td)); MPASS(td->td_flags & TDF_SINTR); MPASS(intrval == EINTR || intrval == ERESTART); @@ -847,9 +851,11 @@ td->td_intrval = intrval; atomic_set_int(&td->td_flags, TDF_SLEEPABORT); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(td->td_proc); + TD_SUNLOCK(td); sleepq_remove(td, wchan); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(td->td_proc); + TD_SLOCK(td); } #ifdef DDB ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_smp.c#3 - /shared/p4/sun4v/work_ifc/src/sys/kern/subr_smp.c ==== @@ -159,7 +159,7 @@ * this thread, so all we need to do is poke it if it is currently * executing so that it executes ast(). */ - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); KASSERT(TD_IS_RUNNING(td), ("forward_signal: thread is not TDS_RUNNING")); @@ -187,8 +187,6 @@ struct thread *td; cpumask_t id, map, me; - mtx_assert(&sched_lock, MA_OWNED); - CTR0(KTR_SMP, "forward_roundrobin()"); if (!smp_started || cold || panicstr) ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_taskqueue.c#5 - /shared/p4/sun4v/work_ifc/src/sys/kern/subr_taskqueue.c ==== @@ -349,15 +349,16 @@ } else tq->tq_pcount++; } - mtx_lock_spin(&sched_lock); + for (i = 0; i < count; i++) { if (tq->tq_pproc[i] == NULL) continue; td = FIRST_THREAD_IN_PROC(tq->tq_pproc[i]); + TD_SLOCK(td); sched_prio(td, pri); setrunqueue(td, SRQ_BORING); + TD_SUNLOCK(td); } - mtx_unlock_spin(&sched_lock); return (0); } ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_trap.c#4 - /shared/p4/sun4v/work_ifc/src/sys/kern/subr_trap.c ==== @@ -83,11 +83,11 @@ #ifdef DIAGNOSTIC /* Check that we called signotify() enough. */ PROC_LOCK(p); - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 || (td->td_flags & TDF_ASTPENDING) == 0)) printf("failed to set signal flags properly for ast()\n"); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); PROC_UNLOCK(p); #endif @@ -156,7 +156,7 @@ KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode")); WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode"); mtx_assert(&Giant, MA_NOTOWNED); - mtx_assert(&sched_lock, MA_NOTOWNED); + TD_SLOCK_ASSERT(td, MA_NOTOWNED); td->td_frame = framep; td->td_pticks = 0; @@ -187,6 +187,9 @@ * This should probably change, by which I mean that * possibly the location of both might change. */ + /* + * unlocked read of p_ucred + */ if (td->td_ucred != p->p_ucred) cred_update_thread(td); if (td->td_pflags & TDP_OWEUPC && p->p_flag & P_PROFIL) { @@ -220,14 +223,14 @@ if (sflag & PS_XCPU) { PROC_LOCK(p); lim_rlimit(p, RLIMIT_CPU, &rlim); - mtx_lock_spin(&sched_lock); + /* unlocked read of process statistics */ if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { - mtx_unlock_spin(&sched_lock); killproc(p, "exceeded maximum CPU limit"); } else { + PROC_SLOCK(p); if (p->p_cpulimit < rlim.rlim_max) p->p_cpulimit += 5; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); psignal(p, SIGXCPU); } PROC_UNLOCK(p); @@ -241,10 +244,10 @@ if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 1); #endif - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); sched_prio(td, td->td_user_pri); mi_switch(SW_INVOL, NULL); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 1); ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_turnstile.c#5 - /shared/p4/sun4v/work_ifc/src/sys/kern/subr_turnstile.c ==== @@ -169,7 +169,7 @@ struct turnstile *ts; int pri; - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); pri = td->td_priority; ts = td->td_blocked; for (;;) { @@ -268,7 +268,7 @@ struct thread *td1, *td2; int queue; - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); MPASS(TD_ON_LOCK(td)); /* @@ -391,7 +391,7 @@ struct turnstile_chain *tc; struct turnstile *ts; - mtx_assert(&sched_lock, MA_OWNED); + TD_SLOCK_ASSERT(td, MA_OWNED); MPASS(TD_ON_LOCK(td)); /* @@ -561,10 +561,10 @@ /* * Update the priority of the new owner if needed. */ - mtx_lock_spin(&sched_lock); + TD_SLOCK_ORDERED(td, owner); if (td->td_priority < owner->td_priority) sched_lend_prio(owner, td->td_priority); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK_ORDERED(td, owner); } /* @@ -666,7 +666,7 @@ } } #endif - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); /* Save who we are blocked on and switch. */ td->td_tsqueue = queue; td->td_blocked = ts; @@ -683,8 +683,7 @@ if (LOCK_LOG_TEST(lock, 0)) CTR4(KTR_LOCK, "%s: td %d free from blocked on [%p] %s", __func__, td->td_tid, lock, lock->lo_name); - - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } /* @@ -838,7 +837,7 @@ */ td = curthread; pri = PRI_MAX; - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); mtx_lock_spin(&td_contested_lock); LIST_FOREACH(ts, &td->td_contested, ts_link) { cp = turnstile_first_waiter(ts)->td_priority; @@ -874,7 +873,7 @@ } } critical_exit(); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } /* @@ -915,7 +914,7 @@ */ td = curthread; pri = PRI_MAX; - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); mtx_lock_spin(&td_contested_lock); LIST_FOREACH(ts, &td->td_contested, ts_link) { cp = turnstile_first_waiter(ts)->td_priority; @@ -924,7 +923,7 @@ } mtx_unlock_spin(&td_contested_lock); sched_unlend_prio(td, pri); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } /* ==== //depot/projects/kmacy_sun4v/src/sys/kern/subr_witness.c#5 - /shared/p4/sun4v/work_ifc/src/sys/kern/subr_witness.c ==== @@ -389,6 +389,8 @@ { "intr table", &lock_class_mtx_spin }, { "sleepq chain", &lock_class_mtx_spin }, { "sched lock", &lock_class_mtx_spin }, + { "process spin lock", &lock_class_mtx_spin }, + { "thread spin lock", &lock_class_mtx_spin }, { "turnstile chain", &lock_class_mtx_spin }, { "td_contested", &lock_class_mtx_spin }, { "callout", &lock_class_mtx_spin }, @@ -399,6 +401,7 @@ { "allpmaps", &lock_class_mtx_spin }, { "vm page queue free mutex", &lock_class_mtx_spin }, { "icu", &lock_class_mtx_spin }, + { "ctx lock", &lock_class_mtx_spin }, #ifdef SMP { "runq lock", &lock_class_mtx_spin }, { "smp rendezvous", &lock_class_mtx_spin }, @@ -1083,7 +1086,6 @@ #ifdef KDB debugger: - if (witness_trace) kdb_backtrace(); if (witness_kdb) kdb_enter(__func__); @@ -1358,10 +1360,10 @@ if (flags & WARN_PANIC && n) panic("witness_warn"); #ifdef KDB - else if (witness_kdb && n) + if (witness_trace && n) + kdb_backtrace(); + if (witness_kdb && n) kdb_enter(__func__); - else if (witness_trace && n) - kdb_backtrace(); #endif return (n); } ==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_generic.c#3 - /shared/p4/sun4v/work_ifc/src/sys/kern/sys_generic.c ==== @@ -778,12 +778,9 @@ * collisions and rescan the file descriptors if * necessary. */ - mtx_lock_spin(&sched_lock); if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { - mtx_unlock_spin(&sched_lock); goto retry; } - mtx_unlock_spin(&sched_lock); if (timo > 0) error = cv_timedwait_sig(&selwait, &sellock, timo); ==== //depot/projects/kmacy_sun4v/src/sys/kern/sys_process.c#4 - /shared/p4/sun4v/work_ifc/src/sys/kern/sys_process.c ==== @@ -530,12 +530,13 @@ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td2) { + /* unlocked read is safe */ if (td2->td_tid == pid) break; } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if (td2 != NULL) break; /* proc lock held */ PROC_UNLOCK(p); @@ -788,7 +789,7 @@ p->p_xstat = data; p->p_xthread = NULL; if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) != 0) { - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (req == PT_DETACH) { struct thread *td3; FOREACH_THREAD_IN_PROC(p, td3) @@ -800,11 +801,9 @@ * you should use PT_SUSPEND to suspend it before * continuing process. */ - mtx_unlock_spin(&sched_lock); p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG|P_WAITED); - mtx_lock_spin(&sched_lock); thread_unsuspend(p); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } if (data) @@ -957,13 +956,14 @@ buf = malloc(num * sizeof(lwpid_t), M_TEMP, M_WAITOK); tmp = 0; PROC_LOCK(p); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td2) { if (tmp >= num) break; + /* unlocked read - safe */ buf[tmp++] = td2->td_tid; } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_UNLOCK(p); error = copyout(buf, addr, tmp * sizeof(lwpid_t)); free(buf, M_TEMP); ==== //depot/projects/kmacy_sun4v/src/sys/kern/tty.c#3 - /shared/p4/sun4v/work_ifc/src/sys/kern/tty.c ==== @@ -2564,18 +2564,22 @@ * copy of the state, but may increase interrupt latency * too much. */ - pick = NULL; - mtx_lock_spin(&sched_lock); - LIST_FOREACH(p, &tp->t_pgrp->pg_members, p_pglist) + pick = LIST_FIRST(&tp->t_pgrp->pg_members); + LIST_FOREACH(p, &tp->t_pgrp->pg_members, p_pglist) { + if (pick != p) { + PROC_SLOCK_ORDERED(pick, p); if (proc_compare(pick, p)) pick = p; - + PROC_SUNLOCK_ORDERED(pick, p); + } + } + PROC_SLOCK(pick); td = FIRST_THREAD_IN_PROC(pick); /* XXXKSE */ #if 0 KASSERT(td != NULL, ("ttyinfo: no thread")); #else if (td == NULL) { - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(pick); PGRP_UNLOCK(tp->t_pgrp); ttyprintf(tp, "foreground process without thread\n"); tp->t_rocount = 0; @@ -2607,7 +2611,7 @@ rss = 0; else rss = pgtok(vmspace_resident_count(pick->p_vmspace)); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(pick); PROC_LOCK(pick); PGRP_UNLOCK(tp->t_pgrp); calcru(pick, &utime, &stime); @@ -2662,7 +2666,8 @@ int esta, estb; struct thread *td; - mtx_assert(&sched_lock, MA_OWNED); + PROC_SLOCK_ASSERT(p1, MA_OWNED); + PROC_SLOCK_ASSERT(p2, MA_OWNED); if (p1 == NULL) return (1); ==== //depot/projects/kmacy_sun4v/src/sys/posix4/ksched.c#6 - /shared/p4/sun4v/work_ifc/src/sys/posix4/ksched.c ==== @@ -105,9 +105,9 @@ struct rtprio rtp; int e = 0; - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); pri_to_rtp(td, &rtp); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); switch (rtp.type) { case RTP_PRIO_FIFO: @@ -152,9 +152,9 @@ { struct rtprio rtp; - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); pri_to_rtp(td, &rtp); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); if (RTP_PRIO_IS_REALTIME(rtp.type)) param->sched_priority = rtpprio_to_p4prio(rtp.prio); @@ -186,10 +186,9 @@ rtp.prio = p4prio_to_rtpprio(param->sched_priority); rtp.type = (policy == SCHED_FIFO) ? RTP_PRIO_FIFO : RTP_PRIO_REALTIME; - - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); rtp_to_pri(&rtp, td); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } else e = EPERM; @@ -201,9 +200,9 @@ { rtp.type = RTP_PRIO_NORMAL; rtp.prio = p4prio_to_rtpprio(param->sched_priority); - mtx_lock_spin(&sched_lock); + TD_SLOCK(td); rtp_to_pri(&rtp, td); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); } break; ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/conf/GENERIC#15 - /shared/p4/sun4v/work_ifc/src/sys/sun4v/conf/GENERIC ==== @@ -66,14 +66,15 @@ options KDB_TRACE options DDB # Support DDB. #options GDB # Support remote GDB. -#options INVARIANTS # Enable calls of extra sanity checking -#options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS -#options WITNESS # Enable checks to detect deadlocks and cycles +#options DIAGNOSTIC +options INVARIANTS # Enable calls of extra sanity checking +options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS +options WITNESS # Enable checks to detect deadlocks and cycles #options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed #options DEBUG_LOCKS #options DEBUG_VFS_LOCKS -#options MUTEX_PROFILING +options MUTEX_PROFILING # To make an SMP kernel, the next line is needed options SMP # Symmetric MultiProcessor Kernel ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/pcpu.h#14 - /shared/p4/sun4v/work_ifc/src/sys/sun4v/include/pcpu.h ==== @@ -79,7 +79,7 @@ struct rwindow pc_tsbwbuf[2]; \ u_int pc_node; \ uint16_t pc_cpulist[MAXCPU]; \ - uint64_t pad[6]; + uint64_t pad[4]; /* XXX SUN4V_FIXME - as we access the *_ra and *_size fields in quick * succession we _really_ want them to be L1 cache line size aligned ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/exception.S#62 - /shared/p4/sun4v/work_ifc/src/sys/sun4v/sun4v/exception.S ==== @@ -1821,6 +1821,59 @@ PUTCHAR(0x5b) PUTCHAR(0x5b) PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + + PUTCHAR(0x5b) + PUTCHAR(0x5b) + PUTCHAR(0x5b) + ! MAGIC_TRAP_ON;MAGIC_TRAP_ON;MAGIC_EXIT END(unsupported_fault_trap) ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/mp_machdep.c#8 - /shared/p4/sun4v/work_ifc/src/sys/sun4v/sun4v/mp_machdep.c ==== @@ -321,6 +321,7 @@ cpu_mp_bootstrap(struct pcpu *pc) { volatile struct cpu_start_args *csa; + struct thread *newtd; csa = &cpu_start_args; cpu_setregs(pc); @@ -344,13 +345,22 @@ while (csa->csa_count != 0) ; - /* ok, now grab sched_lock and enter the scheduler */ - mtx_lock_spin(&sched_lock); + if (curthread->td_critnest != 2) + panic("critnest == %d", curthread->td_critnest); + + /* ok, lock process and enter the scheduler */ + TD_SLOCK(curthread); + newtd = choosethread(); + TD_SLOCK_ASSERT(newtd, MA_OWNED); + PCPU_SET(next_thread, newtd); + + spinlock_exit(); spinlock_exit(); + PCPU_SET(preempted_thread, curthread); PCPU_SET(switchtime, cpu_ticks()); PCPU_SET(switchticks, ticks); - cpu_throw(NULL, choosethread()); /* doesn't return */ + cpu_throw(NULL, newtd); /* doesn't return */ } void ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/swtch.S#22 - /shared/p4/sun4v/work_ifc/src/sys/sun4v/sun4v/swtch.S ==== @@ -214,9 +214,8 @@ cmp %g0, %o0 be %xcc, 4f nop - MAGIC_TRAP_ON - MAGIC_EXIT - + call pmap_set_ctx_panic + mov %i2, %o2 4: /* * install the new secondary context number in the cpu. */ ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/trap.c#14 - /shared/p4/sun4v/work_ifc/src/sys/sun4v/sun4v/trap.c ==== @@ -250,14 +250,15 @@ init_mondo_queue(); OF_set_mmfsa_traptable(&tl0_base, mmfsa); - for (i = 0; i < 128; i++) + for (i = 0; i < 64; i++) trap_conversion[i] = i; - for (i = 128; i < 256; i++) + for (i = 64; i < 256; i++) trap_conversion[i] = 0; trap_conversion[0x31] = 35; trap_conversion[0x34] = 15; trap_conversion[0x9] = 34; trap_conversion[0x6c] = 14; + trap_conversion[0x68] = 0; } @@ -394,9 +395,11 @@ break; } - if (error != 0) + if (error != 0) { + kdb_backtrace(); panic("trap: %ld=%s: 0x%lx at 0x%lx:0x%lx error=%d", trapno, trap_msg[trap_conversion[trapno]], data, tf->tf_tpc, tf->tf_tnpc, error); } + } CTR1(KTR_TRAP, "trap: td=%p return", td); } @@ -426,9 +429,14 @@ type = type & ~T_KERNEL; va = TLB_TAR_VA(data); -#if 0 +#if 1 + if (ctx == 0 && (tf->tf_tpc < (uint64_t)copy_nofault_begin || tf->tf_tpc > (uint64_t)fs_nofault_end)) { + spinlock_enter(); printf("trap_pfault(type=%ld, data=0x%lx, tpc=0x%lx, ctx=0x%lx)\n", type, data, tf->tf_tpc, ctx); + DELAY(1); + spinlock_exit(); + } #endif CTR4(KTR_TRAP, "trap_pfault: td=%p pm_ctx=%#lx va=%#lx ctx=%#lx", @@ -687,6 +695,7 @@ WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"); - mtx_assert(&sched_lock, MA_NOTOWNED); + PROC_SLOCK_ASSERT(curproc, MA_NOTOWNED); + TD_SLOCK_ASSERT(curthread, MA_NOTOWNED); mtx_assert(&Giant, MA_NOTOWNED); } ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/vm_machdep.c#10 - /shared/p4/sun4v/work_ifc/src/sys/sun4v/sun4v/vm_machdep.c ==== @@ -156,8 +156,8 @@ pcb->pcb_pc = (u_long)fork_trampoline - 8; pcb->pcb_sp = (u_long)fr - SPOFF; - /* Setup to release sched_lock in fork_exit(). */ - td->td_md.md_spinlock_count = 1; + /* Setup to release curthread spin lock and preempted thread spin lock in fork_exit */ + td->td_md.md_spinlock_count = 2; td->td_md.md_saved_pil = 0; } @@ -289,8 +289,8 @@ pcb2->pcb_pc = (u_long)fork_trampoline - 8; pcb2->pcb_kstack = (uint64_t)(((char *)pcb2orig) - (CCFSZ + SPOFF)); - /* Setup to release sched_lock in fork_exit(). */ - td2->td_md.md_spinlock_count = 1; + /* Setup to release thread spin lock in fork_exit */ + td2->td_md.md_spinlock_count = 2; td2->td_md.md_saved_pil = 0; /* ==== //depot/projects/kmacy_sun4v/src/sys/sys/mutex.h#6 - /shared/p4/sun4v/work_ifc/src/sys/sys/mutex.h ==== @@ -108,6 +108,7 @@ #ifdef SMP void _mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file, int line); +int _mtx_trylock_spin(struct mtx *m, uintptr_t tid, const char *file, int line); #endif void _mtx_unlock_spin(struct mtx *m, int opts, const char *file, int line); int _mtx_trylock(struct mtx *m, int opts, const char *file, int line); @@ -166,16 +167,7 @@ */ #ifndef _get_spin_lock #ifdef SMP -#define _mtx_spin_trylock(mp, tid, file, line) ({ \ - uintptr_t _tid = (uintptr_t)(tid); \ - int v; \ - \ - spinlock_enter(); \ - v = _obtain_lock((mp), _tid); \ - if (!v) \ - spinlock_exit(); \ - v; \ -}) + #ifdef SPIN_PROFILING @@ -340,7 +332,7 @@ #define mtx_unlock_spin_flags(m, opts) \ _mtx_unlock_spin_flags((m), (opts), LOCK_FILE, LOCK_LINE) #define mtx_trylock_spin(m) \ - _mtx_spin_trylock((m), curthread, LOCK_FILE, LOCK_LINE) + _mtx_trylock_spin((m), (uintptr_t)curthread, LOCK_FILE, LOCK_LINE) #else /* LOCK_DEBUG == 0 && !MUTEX_NOINLINE */ #define mtx_lock_flags(m, opts) \ _get_sleep_lock((m), curthread, (opts), LOCK_FILE, LOCK_LINE) @@ -351,7 +343,7 @@ #define mtx_unlock_spin_flags(m, opts) \ _rel_spin_lock((m)) #define mtx_trylock_spin(m) \ - _mtx_spin_trylock((m), curthread, LOCK_FILE, LOCK_LINE) + _mtx_trylock_spin((m), (uintptr_t)curthread, LOCK_FILE, LOCK_LINE) #endif /* LOCK_DEBUG > 0 || MUTEX_NOINLINE */ #define mtx_trylock_flags(m, opts) \ ==== //depot/projects/kmacy_sun4v/src/sys/sys/pcpu.h#2 - /shared/p4/sun4v/work_ifc/src/sys/sys/pcpu.h ==== @@ -71,6 +71,8 @@ int pc_ktr_idx; /* Index into trace table */ char *pc_ktr_buf; #endif + struct thread *pc_preempted_thread; + struct thread *pc_next_thread; PCPU_MD_FIELDS; struct vmmeter pc_cnt; /* VM stats counters */ struct device *pc_device; ==== //depot/projects/kmacy_sun4v/src/sys/sys/pmckern.h#3 - /shared/p4/sun4v/work_ifc/src/sys/sys/pmckern.h ==== @@ -97,6 +97,21 @@ sx_xunlock(&pmc_sx); \ } while (0) + +#define PMC_SYSTEM_SAMPLING_ACTIVE() (pmc_ss_count > 0) + +/* Check if a CPU has recorded samples. */ +#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(pmc_cpumask & (1 << (C)))) + +/* helper functions */ +int pmc_cpu_is_disabled(int _cpu); +int pmc_cpu_is_logical(int _cpu); + +/* Check if a process is using HWPMCs.*/ +#define PMC_PROC_IS_USING_PMCS(p) \ + (__predict_false(atomic_load_acq_int(&(p)->p_flag) & \ + P_HWPMC)) + /* * Some hook invocations (e.g., from context switch and clock handling * code) need to be lock-free. @@ -107,20 +122,17 @@ (pmc_hook)((t), (cmd), (arg)); \ } while (0) -#define PMC_SWITCH_CONTEXT(t,cmd) PMC_CALL_HOOK_UNLOCKED(t,cmd,NULL) +#ifdef HWPMC_HOOKS -/* Check if a process is using HWPMCs.*/ -#define PMC_PROC_IS_USING_PMCS(p) \ - (__predict_false(atomic_load_acq_int(&(p)->p_flag) & \ - P_HWPMC)) +#define PMC_SWITCH_CONTEXT(t,cmd) \ +do { \ + if (PMC_PROC_IS_USING_PMCS(t->td_proc)) \ + PMC_CALL_HOOK_UNLOCKED(t,cmd,NULL); \ +} while (0) -#define PMC_SYSTEM_SAMPLING_ACTIVE() (pmc_ss_count > 0) +#else -/* Check if a CPU has recorded samples. */ -#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(pmc_cpumask & (1 << (C)))) +#define PMC_SWITCH_CONTEXT(t,cmd) -/* helper functions */ -int pmc_cpu_is_disabled(int _cpu); -int pmc_cpu_is_logical(int _cpu); - +#endif /* !HWPMC_HOOKS */ #endif /* _SYS_PMCKERN_H_ */ ==== //depot/projects/kmacy_sun4v/src/sys/sys/proc.h#5 - /shared/p4/sun4v/work_ifc/src/sys/sys/proc.h ==== @@ -133,7 +133,7 @@ * g - process group mtx * h - callout_lock mtx * i - by curproc or the master session mtx - * j - locked by sched_lock mtx + * j - locked by the process/thread spin lock mtx * k - only accessed by curthread * k*- only accessed by curthread and from an interrupt * l - the attaching proc or attaching proc parent @@ -143,6 +143,7 @@ * p - select lock (sellock) * q - td_contested lock * r - p_peers lock + * s - atomic update * x - created at fork, only changes during single threading in exec * z - zombie threads lock * @@ -180,12 +181,14 @@ struct turnstile *td_turnstile; /* (k) Associated turnstile. */ struct umtx_q *td_umtxq; /* (c?) Link for when we're blocked. */ lwpid_t td_tid; /* (b) Thread ID. */ + struct mtx td_spin_mtx; /* (n) Spin lock for this struct. */ sigqueue_t td_sigqueue; /* (c) Sigs arrived, not delivered. */ #define td_siglist td_sigqueue.sq_signals /* Cleared during fork1() or thread_schedule_upcall(). */ #define td_startzero td_flags - int td_flags; /* (j) TDF_* flags. */ + int td_flags; /* (s) TDF_* flags. */ + int td_running; int td_inhibitors; /* (j) Why can not run. */ int td_pflags; /* (k) Private thread (TDP_*) flags. */ int td_dupfd; /* (k) Ret value from fdopen. XXX */ @@ -403,7 +406,7 @@ * See the td_ or ke_ versions of the same flags. */ int p_flag; /* (c) P_* flags. */ - int p_sflag; /* (j) PS_* flags. */ + int p_sflag; /* (s) PS_* flags. */ enum { PRS_NEW = 0, /* In creation */ PRS_NORMAL, /* threads can be run. */ @@ -417,6 +420,7 @@ LIST_ENTRY(proc) p_sibling; /* (e) List of sibling processes. */ LIST_HEAD(, proc) p_children; /* (e) Pointer to list of children. */ struct mtx p_mtx; /* (n) Lock for this struct. */ + struct mtx p_spin_mtx; /* (n) Spin lock for this struct. */ struct ksiginfo *p_ksi; /* Locked by parent proc lock */ sigqueue_t p_sigqueue; /* (c) Sigs not delivered to a td. */ #define p_siglist p_sigqueue.sq_signals @@ -618,6 +622,54 @@ #define PROC_LOCKED(p) mtx_owned(&(p)->p_mtx) #define PROC_LOCK_ASSERT(p, type) mtx_assert(&(p)->p_mtx, (type)) +/* Lock and unlock a process scheduling info. */ +#define PROC_SLOCK(p) mtx_lock_spin(&(p)->p_spin_mtx) +#define PROC_SUNLOCK(p) mtx_unlock_spin(&(p)->p_spin_mtx) +#define PROC_SLOCKED(p) mtx_owned(&(p)->p_spin_mtx) +#define PROC_SLOCK_ASSERT(p, type) mtx_assert(&(p)->p_spin_mtx, (type)) +#define PROC_SLOCK_ORDERED(p1, p2) \ +do { \ + if ((uintptr_t)p2 > (uintptr_t)p1) { \ + PROC_SLOCK(p1); \ + PROC_SLOCK(p2); \ + } else { \ + PROC_SLOCK(p2); \ + PROC_SLOCK(p1); \ + } \ +} while (0) +#define PROC_SUNLOCK_ORDERED(p1, p2) PROC_SUNLOCK(p1); PROC_SUNLOCK(p2); + +/* Lock and unlock a thread's scheduling info. */ +#define TD_SLOCK(td) mtx_lock_spin(&(td)->td_spin_mtx) +#define TD_SUNLOCK(td) mtx_unlock_spin(&(td)->td_spin_mtx) +#define TD_SLOCKED(td) mtx_owned(&(td)->td_spin_mtx) +#define TD_SLOCK_ASSERT(td, type) mtx_assert(&(td)->td_spin_mtx, (type)) + +/* + * In most cases it only matters that we lock in a consistent order. + * However, in the case of a thread switch, the idle thread will be the last thing + * that choosethread picks. With the exception of thread0, the idlethreads are the + * earliest created threads and hence have the lowest thread ids. By locking in + * descending order we guarantee that (with a special case for thread0), we will + * **eventually** be able to do a blocking acquisition of a thread's spin lock in + * choosethread. This constraint is required to avoid livelock and still satisfy the + * constraint that choosethread return a locked thread. + * + */ +#define TD_SLOCK_ORDERED(td1, td2) \ +do { \ + if (td1->td_tid > td2->td_tid) { \ + TD_SLOCK(td1); \ + TD_SLOCK(td2); \ + } else { \ + TD_SLOCK(td2); \ + TD_SLOCK(td1); \ + } \ +} while (0) +#define TD_SUNLOCK_ORDERED(td1, td2) TD_SUNLOCK(td1); TD_SUNLOCK(td2); + + + /* Lock and unlock a process group. */ #define PGRP_LOCK(pg) mtx_lock(&(pg)->pg_mtx) #define PGRP_UNLOCK(pg) mtx_unlock(&(pg)->pg_mtx) ==== //depot/projects/kmacy_sun4v/src/sys/sys/runq.h#3 - /shared/p4/sun4v/work_ifc/src/sys/sys/runq.h ==== @@ -60,14 +60,16 @@ struct runq { struct rqbits rq_status; rqb_word_t rq_lockbits[RQB_LEN]; + struct mtx rq_lock; struct rqhead rq_queues[RQ_NQS]; }; void runq_add(struct runq *, struct kse *, int flags); +void runq_add_unlocked(struct runq *, struct kse *, int flags); int runq_check(struct runq *); struct kse *runq_choose(struct runq *); void runq_init(struct runq *); -void runq_remove(struct runq *, struct kse *); +int runq_remove(struct runq *, struct kse *); void runq_remove_unlocked(struct runq *, struct kse *); void runq_lock(struct runq *, struct kse *); void runq_unlock(struct runq *, struct kse *); ==== //depot/projects/kmacy_sun4v/src/sys/sys/sched.h#3 - /shared/p4/sun4v/work_ifc/src/sys/sys/sched.h ==== @@ -73,7 +73,7 @@ */ void sched_add(struct thread *td, int flags); void sched_clock(struct thread *td); -void sched_rem(struct thread *td); +int sched_rem(struct thread *td); /* * Binding makes cpu affinity permanent while pinning is used to temporarily ==== //depot/projects/kmacy_sun4v/src/sys/sys/sleepqueue.h#2 - /shared/p4/sun4v/work_ifc/src/sys/sys/sleepqueue.h ==== ==== //depot/projects/kmacy_sun4v/src/sys/ufs/ffs/ffs_snapshot.c#5 - /shared/p4/sun4v/work_ifc/src/sys/ufs/ffs/ffs_snapshot.c ==== @@ -397,10 +397,10 @@ */ if (td->td_proc->p_nice > 0) { PROC_LOCK(td->td_proc); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(td->td_proc); saved_nice = td->td_proc->p_nice; sched_nice(td->td_proc, 0); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(td->td_proc); PROC_UNLOCK(td->td_proc); } /* @@ -817,9 +817,9 @@ out: if (saved_nice > 0) { PROC_LOCK(td->td_proc); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(td->td_proc); sched_nice(td->td_proc, saved_nice); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(td->td_proc); PROC_UNLOCK(td->td_proc); } UFS_LOCK(ump); ==== //depot/projects/kmacy_sun4v/src/sys/vm/vm_glue.c#5 - /shared/p4/sun4v/work_ifc/src/sys/vm/vm_glue.c ==== @@ -628,13 +628,15 @@ atomic_clear_int(&p->p_sflag, PS_SWAPPINGIN); atomic_set_int(&p->p_sflag, PS_INMEM); PROC_LOCK(p); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { + TD_SLOCK(td); TD_CLR_SWAPPED(td); if (TD_CAN_RUN(td)) setrunnable(td); + TD_SUNLOCK(td); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); wakeup(&p->p_sflag); /* Allow other threads to swap p out now. */ @@ -679,13 +681,14 @@ FOREACH_PROC_IN_SYSTEM(p) { if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) continue; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { /* * An otherwise runnable thread of a process * swapped out has only the TDI_SWAPPED bit set. * */ + TD_SLOCK(td); if (td->td_inhibitors == TDI_SWAPPED) { pri = p->p_swtime + td->td_slptime; if ((p->p_sflag & PS_SWAPINREQ) == 0) { @@ -703,8 +706,10 @@ ppri = pri; } } + TD_SUNLOCK(td); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); + } sx_sunlock(&allproc_lock); @@ -712,13 +717,13 @@ * Nothing to do, back to sleep. */ if ((p = pp) == NULL) { - mtx_lock_spin(&sched_lock); + TD_SLOCK(curthread); if (!proc0_rescan) { - TD_SET_IWAIT(&thread0); + TD_SET_IWAIT(curthread); mi_switch(SW_VOL, NULL); } proc0_rescan = 0; - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(curthread); goto loop; } PROC_LOCK(p); @@ -730,7 +735,9 @@ */ if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) { PROC_UNLOCK(p); + TD_SLOCK(&thread0); proc0_rescan = 0; + TD_SUNLOCK(&thread0); goto loop; } atomic_clear_int(&p->p_sflag, PS_SWAPINREQ); @@ -741,10 +748,12 @@ */ faultin(p); PROC_UNLOCK(p); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); + TD_SLOCK(&thread0); p->p_swtime = 0; proc0_rescan = 0; - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(&thread0); + PROC_SUNLOCK(p); goto loop; } @@ -752,7 +761,6 @@ { struct thread *td = &thread0; - if (TD_AWAITING_INTR(td)) { CTR2(KTR_INTR, "%s: setrunqueue %d", __func__, 0); TD_CLR_IWAIT(td); @@ -805,17 +813,17 @@ struct vmspace *vm; int minslptime = 100000; + /* XXX unlocked reads of p_state and p_flag - not clear if this is safe + * or not + */ /* * Watch out for a process in * creation. It may have no * address space or lock yet. */ - mtx_lock_spin(&sched_lock); if (p->p_state == PRS_NEW) { - mtx_unlock_spin(&sched_lock); continue; } - mtx_unlock_spin(&sched_lock); /* * An aio daemon switches its @@ -825,7 +833,6 @@ */ if ((p->p_flag & P_SYSTEM) != 0) continue; - /* * Do not swapout a process that * is waiting for VM data @@ -865,21 +872,22 @@ break; case PRS_NORMAL: - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); /* * do not swapout a realtime process * Check all the thread groups.. */ FOREACH_THREAD_IN_PROC(p, td) { + TD_SLOCK(td); if (PRI_IS_REALTIME(td->td_pri_class)) - goto nextproc; + goto skip; /* * Guarantee swap_idle_threshold1 * time in memory. */ if (td->td_slptime < swap_idle_threshold1) - goto nextproc; + goto skip; /* * Do not swapout a process if it is @@ -891,7 +899,7 @@ * swapping out a thread. */ if ((td->td_priority) < PSOCK || !thread_safetoswapout(td)) - goto nextproc; + goto skip; /* * If the system is under memory stress, * or if we are swapping @@ -901,10 +909,17 @@ if (((action & VM_SWAP_NORMAL) == 0) && (((action & VM_SWAP_IDLE) == 0) || (td->td_slptime < swap_idle_threshold2))) - goto nextproc; + goto skip; if (minslptime > td->td_slptime) minslptime = td->td_slptime; + + continue; + + /* skip remaining threads in process */ + skip: + TD_SUNLOCK(td); + goto nextproc; } /* @@ -917,7 +932,7 @@ (minslptime > swap_idle_threshold2))) { swapout(p); didswap++; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_UNLOCK(p); vm_map_unlock(&vm->vm_map); vmspace_free(vm); @@ -925,7 +940,7 @@ goto retry; } nextproc: - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } nextproc2: PROC_UNLOCK(p); @@ -950,7 +965,7 @@ struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); + PROC_SLOCK_ASSERT(p, MA_OWNED | MA_NOTRECURSED); #if defined(SWAP_DEBUG) printf("swapping out %d\n", p->p_pid); #endif @@ -970,8 +985,10 @@ * Alternatively, we could swap out only safe threads. */ FOREACH_THREAD_IN_PROC(p, td) { + TD_SLOCK(td); KASSERT(thread_safetoswapout(td), ("swapout: there is a thread not safe for swapout")); + TD_SUNLOCK(td); } #endif /* INVARIANTS */ @@ -984,17 +1001,22 @@ atomic_clear_int(&p->p_sflag, PS_INMEM); atomic_set_int(&p->p_sflag, PS_SWAPPINGOUT); PROC_UNLOCK(p); - FOREACH_THREAD_IN_PROC(p, td) + FOREACH_THREAD_IN_PROC(p, td) { + TD_SLOCK(td); TD_SET_SWAPPED(td); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); + } + PROC_SUNLOCK(p); - FOREACH_THREAD_IN_PROC(p, td) + FOREACH_THREAD_IN_PROC(p, td) { + TD_SLOCK(td); vm_thread_swapout(td); - + TD_SUNLOCK(td); + } atomic_clear_int(&p->p_sflag, PS_SWAPPINGOUT); PROC_LOCK(p); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); p->p_swtime = 0; } #endif /* !NO_SWAPPING */ ==== //depot/projects/kmacy_sun4v/src/sys/vm/vm_meter.c#3 - /shared/p4/sun4v/work_ifc/src/sys/vm/vm_meter.c ==== @@ -144,15 +144,16 @@ FOREACH_PROC_IN_SYSTEM(p) { if (p->p_flag & P_SYSTEM) continue; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); switch (p->p_state) { case PRS_NEW: - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); continue; break; default: FOREACH_THREAD_IN_PROC(p, td) { /* Need new statistics XXX */ + TD_SLOCK(td); switch (td->td_state) { case TDS_INHIBITED: if (TD_ON_LOCK(td) || @@ -175,13 +176,16 @@ case TDS_RUNQ: case TDS_RUNNING: totalp->t_rq++; - continue; + goto skip; default: break; } + skip: + TD_SUNLOCK(td); } + } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); /* * Note active objects. */ ==== //depot/projects/kmacy_sun4v/src/sys/vm/vm_pageout.c#2 - /shared/p4/sun4v/work_ifc/src/sys/vm/vm_pageout.c ==== @@ -1240,22 +1240,24 @@ * If the process is in a non-running type state, * don't touch it. Check all the threads individually. */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); breakout = 0; FOREACH_THREAD_IN_PROC(p, td) { + TD_SLOCK(td); if (!TD_ON_RUNQ(td) && !TD_IS_RUNNING(td) && !TD_IS_SLEEPING(td)) { breakout = 1; + TD_SUNLOCK(td); break; } + TD_SUNLOCK(td); } + PROC_SUNLOCK(p); if (breakout) { - mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); continue; } - mtx_unlock_spin(&sched_lock); /* * get the process size */ @@ -1281,9 +1283,9 @@ sx_sunlock(&allproc_lock); if (bigproc != NULL) { killproc(bigproc, "out of swap space"); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(bigproc); sched_nice(bigproc, PRIO_MIN); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(bigproc); PROC_UNLOCK(bigproc); wakeup(&cnt.v_free_count); } @@ -1584,17 +1586,20 @@ * if the process is in a non-running type state, * don't touch it. */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); breakout = 0; FOREACH_THREAD_IN_PROC(p, td) { + TD_SLOCK(td); if (!TD_ON_RUNQ(td) && !TD_IS_RUNNING(td) && !TD_IS_SLEEPING(td)) { breakout = 1; + TD_SUNLOCK(td); break; } + TD_SUNLOCK(td); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if (breakout) { PROC_UNLOCK(p); continue; ==== //depot/projects/kmacy_sun4v/src/sys/vm/vm_zeroidle.c#4 - /shared/p4/sun4v/work_ifc/src/sys/vm/vm_zeroidle.c ==== @@ -142,15 +142,14 @@ { idlezero_enable = idlezero_enable_default; - for (;;) { if (vm_page_zero_check()) { vm_page_zero_idle(); #ifndef PREEMPTION if (sched_runnable()) { - mtx_lock_spin(&sched_lock); + TD_SLOCK(curthread); mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(curthread); } #endif } else { @@ -180,11 +179,13 @@ PROC_LOCK(pagezero_proc); pagezero_proc->p_flag |= P_NOLOAD; PROC_UNLOCK(pagezero_proc); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(pagezero_proc); td = FIRST_THREAD_IN_PROC(pagezero_proc); + TD_SLOCK(td); sched_class(td, PRI_IDLE); sched_prio(td, PRI_MAX_IDLE); setrunqueue(td, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + TD_SUNLOCK(td); + PROC_SUNLOCK(pagezero_proc); } SYSINIT(pagezero, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, pagezero_start, NULL)