]> git.baikalelectronics.ru Git - kernel.git/commitdiff
Merge branch 'cpus4096-for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Jan 2009 19:44:09 +0000 (11:44 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Jan 2009 19:44:09 +0000 (11:44 -0800)
* 'cpus4096-for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (66 commits)
  x86: export vector_used_by_percpu_irq
  x86: use logical apicid in x2apic_cluster's x2apic_cpu_mask_to_apicid_and()
  sched: nominate preferred wakeup cpu, fix
  x86: fix lguest used_vectors breakage, -v2
  x86: fix warning in arch/x86/kernel/io_apic.c
  sched: fix warning in kernel/sched.c
  sched: move test_sd_parent() to an SMP section of sched.h
  sched: add SD_BALANCE_NEWIDLE at MC and CPU level for sched_mc>0
  sched: activate active load balancing in new idle cpus
  sched: bias task wakeups to preferred semi-idle packages
  sched: nominate preferred wakeup cpu
  sched: favour lower logical cpu number for sched_mc balance
  sched: framework for sched_mc/smt_power_savings=N
  sched: convert BALANCE_FOR_xx_POWER to inline functions
  x86: use possible_cpus=NUM to extend the possible cpus allowed
  x86: fix cpu_mask_to_apicid_and to include cpu_online_mask
  x86: update io_apic.c to the new cpumask code
  x86: Introduce topology_core_cpumask()/topology_thread_cpumask()
  x86: xen: use smp_call_function_many()
  x86: use work_on_cpu in x86/kernel/cpu/mcheck/mce_amd_64.c
  ...

Fixed up trivial conflict in kernel/time/tick-sched.c manually

55 files changed:
1  2 
arch/arm/kernel/smp.c
arch/arm/mach-at91/at91rm9200_time.c
arch/arm/mach-pxa/time.c
arch/arm/mach-realview/core.c
arch/arm/mach-realview/localtimer.c
arch/arm/mach-sa1100/time.c
arch/arm/mach-versatile/core.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/time.c
arch/powerpc/platforms/pseries/xics.c
arch/powerpc/sysdev/mpic.c
arch/s390/Kconfig
arch/s390/kernel/smp.c
arch/s390/kernel/time.c
arch/sparc/kernel/irq_64.c
arch/sparc/kernel/of_device_64.c
arch/sparc/kernel/pci_msi.c
arch/sparc/kernel/smp_32.c
arch/sparc/kernel/smp_64.c
arch/sparc/kernel/sparc_ksyms_32.c
arch/sparc/kernel/time_64.c
arch/x86/Kconfig
arch/x86/include/asm/irq.h
arch/x86/kernel/apic.c
arch/x86/kernel/cpu/intel_cacheinfo.c
arch/x86/kernel/cpu/mcheck/mce_amd_64.c
arch/x86/kernel/genx2apic_uv_x.c
arch/x86/kernel/hpet.c
arch/x86/kernel/io_apic.c
arch/x86/kernel/irq_64.c
arch/x86/kernel/irqinit_32.c
arch/x86/kernel/irqinit_64.c
arch/x86/kernel/reboot.c
arch/x86/kernel/smp.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/tlb_32.c
arch/x86/kernel/tlb_64.c
arch/x86/kernel/traps.c
arch/x86/lguest/boot.c
arch/x86/xen/mmu.c
drivers/xen/events.c
include/linux/interrupt.h
include/linux/irq.h
include/linux/sched.h
init/Kconfig
kernel/irq/chip.c
kernel/irq/manage.c
kernel/sched.c
kernel/sched_fair.c
kernel/sched_rt.c
kernel/sched_stats.h
kernel/time/tick-sched.c
kernel/trace/trace.c
lib/Kconfig
mm/slub.c

Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index 8ac3f721d2359e20b09476fbe85aafc7c161f97d,d1165566f06487a6860e8d14e3586f7230d6b66d..65484b2200b36add20b48dc38caf3f80971c4d29
  #define DBG(fmt...)
  #endif
  
 -int smp_hw_index[NR_CPUS];
  struct thread_info *secondary_ti;
  
- cpumask_t cpu_possible_map = CPU_MASK_NONE;
- cpumask_t cpu_online_map = CPU_MASK_NONE;
  DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
  DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE;
  
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
index a3ea2bcb95de6a39ebb7bf297ee2355f507f178c,0000000000000000000000000000000000000000..cab8e02868716d691a38b9bad239ec754dd39134
mode 100644,000000..100644
--- /dev/null
@@@ -1,1101 -1,0 +1,1104 @@@
- static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
 +/* irq.c: UltraSparc IRQ handling/init/registry.
 + *
 + * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
 + * Copyright (C) 1998  Eddie C. Dost    (ecd@skynet.be)
 + * Copyright (C) 1998  Jakub Jelinek    (jj@ultra.linux.cz)
 + */
 +
 +#include <linux/module.h>
 +#include <linux/sched.h>
 +#include <linux/linkage.h>
 +#include <linux/ptrace.h>
 +#include <linux/errno.h>
 +#include <linux/kernel_stat.h>
 +#include <linux/signal.h>
 +#include <linux/mm.h>
 +#include <linux/interrupt.h>
 +#include <linux/slab.h>
 +#include <linux/random.h>
 +#include <linux/init.h>
 +#include <linux/delay.h>
 +#include <linux/proc_fs.h>
 +#include <linux/seq_file.h>
 +#include <linux/bootmem.h>
 +#include <linux/irq.h>
 +
 +#include <asm/ptrace.h>
 +#include <asm/processor.h>
 +#include <asm/atomic.h>
 +#include <asm/system.h>
 +#include <asm/irq.h>
 +#include <asm/io.h>
 +#include <asm/iommu.h>
 +#include <asm/upa.h>
 +#include <asm/oplib.h>
 +#include <asm/prom.h>
 +#include <asm/timer.h>
 +#include <asm/smp.h>
 +#include <asm/starfire.h>
 +#include <asm/uaccess.h>
 +#include <asm/cache.h>
 +#include <asm/cpudata.h>
 +#include <asm/auxio.h>
 +#include <asm/head.h>
 +#include <asm/hypervisor.h>
 +#include <asm/cacheflush.h>
 +
 +#include "entry.h"
 +
 +#define NUM_IVECS     (IMAP_INR + 1)
 +
 +struct ino_bucket *ivector_table;
 +unsigned long ivector_table_pa;
 +
 +/* On several sun4u processors, it is illegal to mix bypass and
 + * non-bypass accesses.  Therefore we access all INO buckets
 + * using bypass accesses only.
 + */
 +static unsigned long bucket_get_chain_pa(unsigned long bucket_pa)
 +{
 +      unsigned long ret;
 +
 +      __asm__ __volatile__("ldxa      [%1] %2, %0"
 +                           : "=&r" (ret)
 +                           : "r" (bucket_pa +
 +                                  offsetof(struct ino_bucket,
 +                                           __irq_chain_pa)),
 +                             "i" (ASI_PHYS_USE_EC));
 +
 +      return ret;
 +}
 +
 +static void bucket_clear_chain_pa(unsigned long bucket_pa)
 +{
 +      __asm__ __volatile__("stxa      %%g0, [%0] %1"
 +                           : /* no outputs */
 +                           : "r" (bucket_pa +
 +                                  offsetof(struct ino_bucket,
 +                                           __irq_chain_pa)),
 +                             "i" (ASI_PHYS_USE_EC));
 +}
 +
 +static unsigned int bucket_get_virt_irq(unsigned long bucket_pa)
 +{
 +      unsigned int ret;
 +
 +      __asm__ __volatile__("lduwa     [%1] %2, %0"
 +                           : "=&r" (ret)
 +                           : "r" (bucket_pa +
 +                                  offsetof(struct ino_bucket,
 +                                           __virt_irq)),
 +                             "i" (ASI_PHYS_USE_EC));
 +
 +      return ret;
 +}
 +
 +static void bucket_set_virt_irq(unsigned long bucket_pa,
 +                              unsigned int virt_irq)
 +{
 +      __asm__ __volatile__("stwa      %0, [%1] %2"
 +                           : /* no outputs */
 +                           : "r" (virt_irq),
 +                             "r" (bucket_pa +
 +                                  offsetof(struct ino_bucket,
 +                                           __virt_irq)),
 +                             "i" (ASI_PHYS_USE_EC));
 +}
 +
 +#define irq_work_pa(__cpu)    &(trap_block[(__cpu)].irq_worklist_pa)
 +
 +static struct {
 +      unsigned int dev_handle;
 +      unsigned int dev_ino;
 +      unsigned int in_use;
 +} virt_irq_table[NR_IRQS];
 +static DEFINE_SPINLOCK(virt_irq_alloc_lock);
 +
 +unsigned char virt_irq_alloc(unsigned int dev_handle,
 +                           unsigned int dev_ino)
 +{
 +      unsigned long flags;
 +      unsigned char ent;
 +
 +      BUILD_BUG_ON(NR_IRQS >= 256);
 +
 +      spin_lock_irqsave(&virt_irq_alloc_lock, flags);
 +
 +      for (ent = 1; ent < NR_IRQS; ent++) {
 +              if (!virt_irq_table[ent].in_use)
 +                      break;
 +      }
 +      if (ent >= NR_IRQS) {
 +              printk(KERN_ERR "IRQ: Out of virtual IRQs.\n");
 +              ent = 0;
 +      } else {
 +              virt_irq_table[ent].dev_handle = dev_handle;
 +              virt_irq_table[ent].dev_ino = dev_ino;
 +              virt_irq_table[ent].in_use = 1;
 +      }
 +
 +      spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
 +
 +      return ent;
 +}
 +
 +#ifdef CONFIG_PCI_MSI
 +void virt_irq_free(unsigned int virt_irq)
 +{
 +      unsigned long flags;
 +
 +      if (virt_irq >= NR_IRQS)
 +              return;
 +
 +      spin_lock_irqsave(&virt_irq_alloc_lock, flags);
 +
 +      virt_irq_table[virt_irq].in_use = 0;
 +
 +      spin_unlock_irqrestore(&virt_irq_alloc_lock, flags);
 +}
 +#endif
 +
 +/*
 + * /proc/interrupts printing:
 + */
 +
 +int show_interrupts(struct seq_file *p, void *v)
 +{
 +      int i = *(loff_t *) v, j;
 +      struct irqaction * action;
 +      unsigned long flags;
 +
 +      if (i == 0) {
 +              seq_printf(p, "           ");
 +              for_each_online_cpu(j)
 +                      seq_printf(p, "CPU%d       ",j);
 +              seq_putc(p, '\n');
 +      }
 +
 +      if (i < NR_IRQS) {
 +              spin_lock_irqsave(&irq_desc[i].lock, flags);
 +              action = irq_desc[i].action;
 +              if (!action)
 +                      goto skip;
 +              seq_printf(p, "%3d: ",i);
 +#ifndef CONFIG_SMP
 +              seq_printf(p, "%10u ", kstat_irqs(i));
 +#else
 +              for_each_online_cpu(j)
 +                      seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 +#endif
 +              seq_printf(p, " %9s", irq_desc[i].chip->typename);
 +              seq_printf(p, "  %s", action->name);
 +
 +              for (action=action->next; action; action = action->next)
 +                      seq_printf(p, ", %s", action->name);
 +
 +              seq_putc(p, '\n');
 +skip:
 +              spin_unlock_irqrestore(&irq_desc[i].lock, flags);
 +      }
 +      return 0;
 +}
 +
 +static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
 +{
 +      unsigned int tid;
 +
 +      if (this_is_starfire) {
 +              tid = starfire_translate(imap, cpuid);
 +              tid <<= IMAP_TID_SHIFT;
 +              tid &= IMAP_TID_UPA;
 +      } else {
 +              if (tlb_type == cheetah || tlb_type == cheetah_plus) {
 +                      unsigned long ver;
 +
 +                      __asm__ ("rdpr %%ver, %0" : "=r" (ver));
 +                      if ((ver >> 32UL) == __JALAPENO_ID ||
 +                          (ver >> 32UL) == __SERRANO_ID) {
 +                              tid = cpuid << IMAP_TID_SHIFT;
 +                              tid &= IMAP_TID_JBUS;
 +                      } else {
 +                              unsigned int a = cpuid & 0x1f;
 +                              unsigned int n = (cpuid >> 5) & 0x1f;
 +
 +                              tid = ((a << IMAP_AID_SHIFT) |
 +                                     (n << IMAP_NID_SHIFT));
 +                              tid &= (IMAP_AID_SAFARI |
 +                                      IMAP_NID_SAFARI);;
 +                      }
 +              } else {
 +                      tid = cpuid << IMAP_TID_SHIFT;
 +                      tid &= IMAP_TID_UPA;
 +              }
 +      }
 +
 +      return tid;
 +}
 +
 +struct irq_handler_data {
 +      unsigned long   iclr;
 +      unsigned long   imap;
 +
 +      void            (*pre_handler)(unsigned int, void *, void *);
 +      void            *arg1;
 +      void            *arg2;
 +};
 +
 +#ifdef CONFIG_SMP
 +static int irq_choose_cpu(unsigned int virt_irq)
 +{
 +      cpumask_t mask = irq_desc[virt_irq].affinity;
 +      int cpuid;
 +
 +      if (cpus_equal(mask, CPU_MASK_ALL)) {
 +              static int irq_rover;
 +              static DEFINE_SPINLOCK(irq_rover_lock);
 +              unsigned long flags;
 +
 +              /* Round-robin distribution... */
 +      do_round_robin:
 +              spin_lock_irqsave(&irq_rover_lock, flags);
 +
 +              while (!cpu_online(irq_rover)) {
 +                      if (++irq_rover >= NR_CPUS)
 +                              irq_rover = 0;
 +              }
 +              cpuid = irq_rover;
 +              do {
 +                      if (++irq_rover >= NR_CPUS)
 +                              irq_rover = 0;
 +              } while (!cpu_online(irq_rover));
 +
 +              spin_unlock_irqrestore(&irq_rover_lock, flags);
 +      } else {
 +              cpumask_t tmp;
 +
 +              cpus_and(tmp, cpu_online_map, mask);
 +
 +              if (cpus_empty(tmp))
 +                      goto do_round_robin;
 +
 +              cpuid = first_cpu(tmp);
 +      }
 +
 +      return cpuid;
 +}
 +#else
 +static int irq_choose_cpu(unsigned int virt_irq)
 +{
 +      return real_hard_smp_processor_id();
 +}
 +#endif
 +
 +static void sun4u_irq_enable(unsigned int virt_irq)
 +{
 +      struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 +
 +      if (likely(data)) {
 +              unsigned long cpuid, imap, val;
 +              unsigned int tid;
 +
 +              cpuid = irq_choose_cpu(virt_irq);
 +              imap = data->imap;
 +
 +              tid = sun4u_compute_tid(imap, cpuid);
 +
 +              val = upa_readq(imap);
 +              val &= ~(IMAP_TID_UPA | IMAP_TID_JBUS |
 +                       IMAP_AID_SAFARI | IMAP_NID_SAFARI);
 +              val |= tid | IMAP_VALID;
 +              upa_writeq(val, imap);
 +              upa_writeq(ICLR_IDLE, data->iclr);
 +      }
 +}
 +
- static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
++static void sun4u_set_affinity(unsigned int virt_irq,
++                             const struct cpumask *mask)
 +{
 +      sun4u_irq_enable(virt_irq);
 +}
 +
 +static void sun4u_irq_disable(unsigned int virt_irq)
 +{
 +      struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 +
 +      if (likely(data)) {
 +              unsigned long imap = data->imap;
 +              unsigned long tmp = upa_readq(imap);
 +
 +              tmp &= ~IMAP_VALID;
 +              upa_writeq(tmp, imap);
 +      }
 +}
 +
 +static void sun4u_irq_eoi(unsigned int virt_irq)
 +{
 +      struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 +      struct irq_desc *desc = irq_desc + virt_irq;
 +
 +      if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
 +              return;
 +
 +      if (likely(data))
 +              upa_writeq(ICLR_IDLE, data->iclr);
 +}
 +
 +static void sun4v_irq_enable(unsigned int virt_irq)
 +{
 +      unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +      unsigned long cpuid = irq_choose_cpu(virt_irq);
 +      int err;
 +
 +      err = sun4v_intr_settarget(ino, cpuid);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
 +                     "err(%d)\n", ino, cpuid, err);
 +      err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_intr_setstate(%x): "
 +                     "err(%d)\n", ino, err);
 +      err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_intr_setenabled(%x): err(%d)\n",
 +                     ino, err);
 +}
 +
- static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
++static void sun4v_set_affinity(unsigned int virt_irq,
++                             const struct cpumask *mask)
 +{
 +      unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +      unsigned long cpuid = irq_choose_cpu(virt_irq);
 +      int err;
 +
 +      err = sun4v_intr_settarget(ino, cpuid);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
 +                     "err(%d)\n", ino, cpuid, err);
 +}
 +
 +static void sun4v_irq_disable(unsigned int virt_irq)
 +{
 +      unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +      int err;
 +
 +      err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_intr_setenabled(%x): "
 +                     "err(%d)\n", ino, err);
 +}
 +
 +static void sun4v_irq_eoi(unsigned int virt_irq)
 +{
 +      unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +      struct irq_desc *desc = irq_desc + virt_irq;
 +      int err;
 +
 +      if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
 +              return;
 +
 +      err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_intr_setstate(%x): "
 +                     "err(%d)\n", ino, err);
 +}
 +
 +static void sun4v_virq_enable(unsigned int virt_irq)
 +{
 +      unsigned long cpuid, dev_handle, dev_ino;
 +      int err;
 +
 +      cpuid = irq_choose_cpu(virt_irq);
 +
 +      dev_handle = virt_irq_table[virt_irq].dev_handle;
 +      dev_ino = virt_irq_table[virt_irq].dev_ino;
 +
 +      err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
 +                     "err(%d)\n",
 +                     dev_handle, dev_ino, cpuid, err);
 +      err = sun4v_vintr_set_state(dev_handle, dev_ino,
 +                                  HV_INTR_STATE_IDLE);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
 +                     "HV_INTR_STATE_IDLE): err(%d)\n",
 +                     dev_handle, dev_ino, err);
 +      err = sun4v_vintr_set_valid(dev_handle, dev_ino,
 +                                  HV_INTR_ENABLED);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
 +                     "HV_INTR_ENABLED): err(%d)\n",
 +                     dev_handle, dev_ino, err);
 +}
 +
-                                       irq_desc[irq].affinity);
++static void sun4v_virt_set_affinity(unsigned int virt_irq,
++                                  const struct cpumask *mask)
 +{
 +      unsigned long cpuid, dev_handle, dev_ino;
 +      int err;
 +
 +      cpuid = irq_choose_cpu(virt_irq);
 +
 +      dev_handle = virt_irq_table[virt_irq].dev_handle;
 +      dev_ino = virt_irq_table[virt_irq].dev_ino;
 +
 +      err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
 +                     "err(%d)\n",
 +                     dev_handle, dev_ino, cpuid, err);
 +}
 +
 +static void sun4v_virq_disable(unsigned int virt_irq)
 +{
 +      unsigned long dev_handle, dev_ino;
 +      int err;
 +
 +      dev_handle = virt_irq_table[virt_irq].dev_handle;
 +      dev_ino = virt_irq_table[virt_irq].dev_ino;
 +
 +      err = sun4v_vintr_set_valid(dev_handle, dev_ino,
 +                                  HV_INTR_DISABLED);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
 +                     "HV_INTR_DISABLED): err(%d)\n",
 +                     dev_handle, dev_ino, err);
 +}
 +
 +static void sun4v_virq_eoi(unsigned int virt_irq)
 +{
 +      struct irq_desc *desc = irq_desc + virt_irq;
 +      unsigned long dev_handle, dev_ino;
 +      int err;
 +
 +      if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
 +              return;
 +
 +      dev_handle = virt_irq_table[virt_irq].dev_handle;
 +      dev_ino = virt_irq_table[virt_irq].dev_ino;
 +
 +      err = sun4v_vintr_set_state(dev_handle, dev_ino,
 +                                  HV_INTR_STATE_IDLE);
 +      if (err != HV_EOK)
 +              printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx,"
 +                     "HV_INTR_STATE_IDLE): err(%d)\n",
 +                     dev_handle, dev_ino, err);
 +}
 +
 +static struct irq_chip sun4u_irq = {
 +      .typename       = "sun4u",
 +      .enable         = sun4u_irq_enable,
 +      .disable        = sun4u_irq_disable,
 +      .eoi            = sun4u_irq_eoi,
 +      .set_affinity   = sun4u_set_affinity,
 +};
 +
 +static struct irq_chip sun4v_irq = {
 +      .typename       = "sun4v",
 +      .enable         = sun4v_irq_enable,
 +      .disable        = sun4v_irq_disable,
 +      .eoi            = sun4v_irq_eoi,
 +      .set_affinity   = sun4v_set_affinity,
 +};
 +
 +static struct irq_chip sun4v_virq = {
 +      .typename       = "vsun4v",
 +      .enable         = sun4v_virq_enable,
 +      .disable        = sun4v_virq_disable,
 +      .eoi            = sun4v_virq_eoi,
 +      .set_affinity   = sun4v_virt_set_affinity,
 +};
 +
 +static void pre_flow_handler(unsigned int virt_irq,
 +                                    struct irq_desc *desc)
 +{
 +      struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 +      unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +
 +      data->pre_handler(ino, data->arg1, data->arg2);
 +
 +      handle_fasteoi_irq(virt_irq, desc);
 +}
 +
 +void irq_install_pre_handler(int virt_irq,
 +                           void (*func)(unsigned int, void *, void *),
 +                           void *arg1, void *arg2)
 +{
 +      struct irq_handler_data *data = get_irq_chip_data(virt_irq);
 +      struct irq_desc *desc = irq_desc + virt_irq;
 +
 +      data->pre_handler = func;
 +      data->arg1 = arg1;
 +      data->arg2 = arg2;
 +
 +      desc->handle_irq = pre_flow_handler;
 +}
 +
 +unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap)
 +{
 +      struct ino_bucket *bucket;
 +      struct irq_handler_data *data;
 +      unsigned int virt_irq;
 +      int ino;
 +
 +      BUG_ON(tlb_type == hypervisor);
 +
 +      ino = (upa_readq(imap) & (IMAP_IGN | IMAP_INO)) + inofixup;
 +      bucket = &ivector_table[ino];
 +      virt_irq = bucket_get_virt_irq(__pa(bucket));
 +      if (!virt_irq) {
 +              virt_irq = virt_irq_alloc(0, ino);
 +              bucket_set_virt_irq(__pa(bucket), virt_irq);
 +              set_irq_chip_and_handler_name(virt_irq,
 +                                            &sun4u_irq,
 +                                            handle_fasteoi_irq,
 +                                            "IVEC");
 +      }
 +
 +      data = get_irq_chip_data(virt_irq);
 +      if (unlikely(data))
 +              goto out;
 +
 +      data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
 +      if (unlikely(!data)) {
 +              prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
 +              prom_halt();
 +      }
 +      set_irq_chip_data(virt_irq, data);
 +
 +      data->imap  = imap;
 +      data->iclr  = iclr;
 +
 +out:
 +      return virt_irq;
 +}
 +
 +static unsigned int sun4v_build_common(unsigned long sysino,
 +                                     struct irq_chip *chip)
 +{
 +      struct ino_bucket *bucket;
 +      struct irq_handler_data *data;
 +      unsigned int virt_irq;
 +
 +      BUG_ON(tlb_type != hypervisor);
 +
 +      bucket = &ivector_table[sysino];
 +      virt_irq = bucket_get_virt_irq(__pa(bucket));
 +      if (!virt_irq) {
 +              virt_irq = virt_irq_alloc(0, sysino);
 +              bucket_set_virt_irq(__pa(bucket), virt_irq);
 +              set_irq_chip_and_handler_name(virt_irq, chip,
 +                                            handle_fasteoi_irq,
 +                                            "IVEC");
 +      }
 +
 +      data = get_irq_chip_data(virt_irq);
 +      if (unlikely(data))
 +              goto out;
 +
 +      data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
 +      if (unlikely(!data)) {
 +              prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n");
 +              prom_halt();
 +      }
 +      set_irq_chip_data(virt_irq, data);
 +
 +      /* Catch accidental accesses to these things.  IMAP/ICLR handling
 +       * is done by hypervisor calls on sun4v platforms, not by direct
 +       * register accesses.
 +       */
 +      data->imap = ~0UL;
 +      data->iclr = ~0UL;
 +
 +out:
 +      return virt_irq;
 +}
 +
 +unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino)
 +{
 +      unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino);
 +
 +      return sun4v_build_common(sysino, &sun4v_irq);
 +}
 +
 +unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino)
 +{
 +      struct irq_handler_data *data;
 +      unsigned long hv_err, cookie;
 +      struct ino_bucket *bucket;
 +      struct irq_desc *desc;
 +      unsigned int virt_irq;
 +
 +      bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC);
 +      if (unlikely(!bucket))
 +              return 0;
 +      __flush_dcache_range((unsigned long) bucket,
 +                           ((unsigned long) bucket +
 +                            sizeof(struct ino_bucket)));
 +
 +      virt_irq = virt_irq_alloc(devhandle, devino);
 +      bucket_set_virt_irq(__pa(bucket), virt_irq);
 +
 +      set_irq_chip_and_handler_name(virt_irq, &sun4v_virq,
 +                                    handle_fasteoi_irq,
 +                                    "IVEC");
 +
 +      data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC);
 +      if (unlikely(!data))
 +              return 0;
 +
 +      /* In order to make the LDC channel startup sequence easier,
 +       * especially wrt. locking, we do not let request_irq() enable
 +       * the interrupt.
 +       */
 +      desc = irq_desc + virt_irq;
 +      desc->status |= IRQ_NOAUTOEN;
 +
 +      set_irq_chip_data(virt_irq, data);
 +
 +      /* Catch accidental accesses to these things.  IMAP/ICLR handling
 +       * is done by hypervisor calls on sun4v platforms, not by direct
 +       * register accesses.
 +       */
 +      data->imap = ~0UL;
 +      data->iclr = ~0UL;
 +
 +      cookie = ~__pa(bucket);
 +      hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie);
 +      if (hv_err) {
 +              prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] "
 +                          "err=%lu\n", devhandle, devino, hv_err);
 +              prom_halt();
 +      }
 +
 +      return virt_irq;
 +}
 +
 +void ack_bad_irq(unsigned int virt_irq)
 +{
 +      unsigned int ino = virt_irq_table[virt_irq].dev_ino;
 +
 +      if (!ino)
 +              ino = 0xdeadbeef;
 +
 +      printk(KERN_CRIT "Unexpected IRQ from ino[%x] virt_irq[%u]\n",
 +             ino, virt_irq);
 +}
 +
 +void *hardirq_stack[NR_CPUS];
 +void *softirq_stack[NR_CPUS];
 +
 +static __attribute__((always_inline)) void *set_hardirq_stack(void)
 +{
 +      void *orig_sp, *sp = hardirq_stack[smp_processor_id()];
 +
 +      __asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp));
 +      if (orig_sp < sp ||
 +          orig_sp > (sp + THREAD_SIZE)) {
 +              sp += THREAD_SIZE - 192 - STACK_BIAS;
 +              __asm__ __volatile__("mov %0, %%sp" : : "r" (sp));
 +      }
 +
 +      return orig_sp;
 +}
 +static __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp)
 +{
 +      __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp));
 +}
 +
 +void handler_irq(int irq, struct pt_regs *regs)
 +{
 +      unsigned long pstate, bucket_pa;
 +      struct pt_regs *old_regs;
 +      void *orig_sp;
 +
 +      clear_softint(1 << irq);
 +
 +      old_regs = set_irq_regs(regs);
 +      irq_enter();
 +
 +      /* Grab an atomic snapshot of the pending IVECs.  */
 +      __asm__ __volatile__("rdpr      %%pstate, %0\n\t"
 +                           "wrpr      %0, %3, %%pstate\n\t"
 +                           "ldx       [%2], %1\n\t"
 +                           "stx       %%g0, [%2]\n\t"
 +                           "wrpr      %0, 0x0, %%pstate\n\t"
 +                           : "=&r" (pstate), "=&r" (bucket_pa)
 +                           : "r" (irq_work_pa(smp_processor_id())),
 +                             "i" (PSTATE_IE)
 +                           : "memory");
 +
 +      orig_sp = set_hardirq_stack();
 +
 +      while (bucket_pa) {
 +              struct irq_desc *desc;
 +              unsigned long next_pa;
 +              unsigned int virt_irq;
 +
 +              next_pa = bucket_get_chain_pa(bucket_pa);
 +              virt_irq = bucket_get_virt_irq(bucket_pa);
 +              bucket_clear_chain_pa(bucket_pa);
 +
 +              desc = irq_desc + virt_irq;
 +
 +              desc->handle_irq(virt_irq, desc);
 +
 +              bucket_pa = next_pa;
 +      }
 +
 +      restore_hardirq_stack(orig_sp);
 +
 +      irq_exit();
 +      set_irq_regs(old_regs);
 +}
 +
 +void do_softirq(void)
 +{
 +      unsigned long flags;
 +
 +      if (in_interrupt())
 +              return;
 +
 +      local_irq_save(flags);
 +
 +      if (local_softirq_pending()) {
 +              void *orig_sp, *sp = softirq_stack[smp_processor_id()];
 +
 +              sp += THREAD_SIZE - 192 - STACK_BIAS;
 +
 +              __asm__ __volatile__("mov %%sp, %0\n\t"
 +                                   "mov %1, %%sp"
 +                                   : "=&r" (orig_sp)
 +                                   : "r" (sp));
 +              __do_softirq();
 +              __asm__ __volatile__("mov %0, %%sp"
 +                                   : : "r" (orig_sp));
 +      }
 +
 +      local_irq_restore(flags);
 +}
 +
 +static void unhandled_perf_irq(struct pt_regs *regs)
 +{
 +      unsigned long pcr, pic;
 +
 +      read_pcr(pcr);
 +      read_pic(pic);
 +
 +      write_pcr(0);
 +
 +      printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n",
 +             smp_processor_id());
 +      printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n",
 +             smp_processor_id(), pcr, pic);
 +}
 +
 +/* Almost a direct copy of the powerpc PMC code.  */
 +static DEFINE_SPINLOCK(perf_irq_lock);
 +static void *perf_irq_owner_caller; /* mostly for debugging */
 +static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq;
 +
 +/* Invoked from level 15 PIL handler in trap table.  */
 +void perfctr_irq(int irq, struct pt_regs *regs)
 +{
 +      clear_softint(1 << irq);
 +      perf_irq(regs);
 +}
 +
 +int register_perfctr_intr(void (*handler)(struct pt_regs *))
 +{
 +      int ret;
 +
 +      if (!handler)
 +              return -EINVAL;
 +
 +      spin_lock(&perf_irq_lock);
 +      if (perf_irq != unhandled_perf_irq) {
 +              printk(KERN_WARNING "register_perfctr_intr: "
 +                     "perf IRQ busy (reserved by caller %p)\n",
 +                     perf_irq_owner_caller);
 +              ret = -EBUSY;
 +              goto out;
 +      }
 +
 +      perf_irq_owner_caller = __builtin_return_address(0);
 +      perf_irq = handler;
 +
 +      ret = 0;
 +out:
 +      spin_unlock(&perf_irq_lock);
 +
 +      return ret;
 +}
 +EXPORT_SYMBOL_GPL(register_perfctr_intr);
 +
 +void release_perfctr_intr(void (*handler)(struct pt_regs *))
 +{
 +      spin_lock(&perf_irq_lock);
 +      perf_irq_owner_caller = NULL;
 +      perf_irq = unhandled_perf_irq;
 +      spin_unlock(&perf_irq_lock);
 +}
 +EXPORT_SYMBOL_GPL(release_perfctr_intr);
 +
 +#ifdef CONFIG_HOTPLUG_CPU
 +void fixup_irqs(void)
 +{
 +      unsigned int irq;
 +
 +      for (irq = 0; irq < NR_IRQS; irq++) {
 +              unsigned long flags;
 +
 +              spin_lock_irqsave(&irq_desc[irq].lock, flags);
 +              if (irq_desc[irq].action &&
 +                  !(irq_desc[irq].status & IRQ_PER_CPU)) {
 +                      if (irq_desc[irq].chip->set_affinity)
 +                              irq_desc[irq].chip->set_affinity(irq,
++                                      &irq_desc[irq].affinity);
 +              }
 +              spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
 +      }
 +
 +      tick_ops->disable_irq();
 +}
 +#endif
 +
 +struct sun5_timer {
 +      u64     count0;
 +      u64     limit0;
 +      u64     count1;
 +      u64     limit1;
 +};
 +
 +static struct sun5_timer *prom_timers;
 +static u64 prom_limit0, prom_limit1;
 +
 +static void map_prom_timers(void)
 +{
 +      struct device_node *dp;
 +      const unsigned int *addr;
 +
 +      /* PROM timer node hangs out in the top level of device siblings... */
 +      dp = of_find_node_by_path("/");
 +      dp = dp->child;
 +      while (dp) {
 +              if (!strcmp(dp->name, "counter-timer"))
 +                      break;
 +              dp = dp->sibling;
 +      }
 +
 +      /* Assume if node is not present, PROM uses different tick mechanism
 +       * which we should not care about.
 +       */
 +      if (!dp) {
 +              prom_timers = (struct sun5_timer *) 0;
 +              return;
 +      }
 +
 +      /* If PROM is really using this, it must be mapped by him. */
 +      addr = of_get_property(dp, "address", NULL);
 +      if (!addr) {
 +              prom_printf("PROM does not have timer mapped, trying to continue.\n");
 +              prom_timers = (struct sun5_timer *) 0;
 +              return;
 +      }
 +      prom_timers = (struct sun5_timer *) ((unsigned long)addr[0]);
 +}
 +
 +static void kill_prom_timer(void)
 +{
 +      if (!prom_timers)
 +              return;
 +
 +      /* Save them away for later. */
 +      prom_limit0 = prom_timers->limit0;
 +      prom_limit1 = prom_timers->limit1;
 +
 +      /* Just as in sun4c/sun4m PROM uses timer which ticks at IRQ 14.
 +       * We turn both off here just to be paranoid.
 +       */
 +      prom_timers->limit0 = 0;
 +      prom_timers->limit1 = 0;
 +
 +      /* Wheee, eat the interrupt packet too... */
 +      __asm__ __volatile__(
 +"     mov     0x40, %%g2\n"
 +"     ldxa    [%%g0] %0, %%g1\n"
 +"     ldxa    [%%g2] %1, %%g1\n"
 +"     stxa    %%g0, [%%g0] %0\n"
 +"     membar  #Sync\n"
 +      : /* no outputs */
 +      : "i" (ASI_INTR_RECEIVE), "i" (ASI_INTR_R)
 +      : "g1", "g2");
 +}
 +
 +void notrace init_irqwork_curcpu(void)
 +{
 +      int cpu = hard_smp_processor_id();
 +
 +      trap_block[cpu].irq_worklist_pa = 0UL;
 +}
 +
 +/* Please be very careful with register_one_mondo() and
 + * sun4v_register_mondo_queues().
 + *
 + * On SMP this gets invoked from the CPU trampoline before
 + * the cpu has fully taken over the trap table from OBP,
 + * and it's kernel stack + %g6 thread register state is
 + * not fully cooked yet.
 + *
 + * Therefore you cannot make any OBP calls, not even prom_printf,
 + * from these two routines.
 + */
 +static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask)
 +{
 +      unsigned long num_entries = (qmask + 1) / 64;
 +      unsigned long status;
 +
 +      status = sun4v_cpu_qconf(type, paddr, num_entries);
 +      if (status != HV_EOK) {
 +              prom_printf("SUN4V: sun4v_cpu_qconf(%lu:%lx:%lu) failed, "
 +                          "err %lu\n", type, paddr, num_entries, status);
 +              prom_halt();
 +      }
 +}
 +
 +void __cpuinit notrace sun4v_register_mondo_queues(int this_cpu)
 +{
 +      struct trap_per_cpu *tb = &trap_block[this_cpu];
 +
 +      register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO,
 +                         tb->cpu_mondo_qmask);
 +      register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO,
 +                         tb->dev_mondo_qmask);
 +      register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR,
 +                         tb->resum_qmask);
 +      register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR,
 +                         tb->nonresum_qmask);
 +}
 +
 +static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask)
 +{
 +      unsigned long size = PAGE_ALIGN(qmask + 1);
 +      void *p = __alloc_bootmem(size, size, 0);
 +      if (!p) {
 +              prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
 +              prom_halt();
 +      }
 +
 +      *pa_ptr = __pa(p);
 +}
 +
 +static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask)
 +{
 +      unsigned long size = PAGE_ALIGN(qmask + 1);
 +      void *p = __alloc_bootmem(size, size, 0);
 +
 +      if (!p) {
 +              prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
 +              prom_halt();
 +      }
 +
 +      *pa_ptr = __pa(p);
 +}
 +
 +static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
 +{
 +#ifdef CONFIG_SMP
 +      void *page;
 +
 +      BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
 +
 +      page = alloc_bootmem_pages(PAGE_SIZE);
 +      if (!page) {
 +              prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
 +              prom_halt();
 +      }
 +
 +      tb->cpu_mondo_block_pa = __pa(page);
 +      tb->cpu_list_pa = __pa(page + 64);
 +#endif
 +}
 +
 +/* Allocate mondo and error queues for all possible cpus.  */
 +static void __init sun4v_init_mondo_queues(void)
 +{
 +      int cpu;
 +
 +      for_each_possible_cpu(cpu) {
 +              struct trap_per_cpu *tb = &trap_block[cpu];
 +
 +              alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask);
 +              alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask);
 +              alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask);
 +              alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask);
 +              alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
 +              alloc_one_kbuf(&tb->nonresum_kernel_buf_pa,
 +                             tb->nonresum_qmask);
 +      }
 +}
 +
 +static void __init init_send_mondo_info(void)
 +{
 +      int cpu;
 +
 +      for_each_possible_cpu(cpu) {
 +              struct trap_per_cpu *tb = &trap_block[cpu];
 +
 +              init_cpu_send_mondo_info(tb);
 +      }
 +}
 +
 +static struct irqaction timer_irq_action = {
 +      .name = "timer",
 +};
 +
 +/* Only invoked on boot processor. */
 +void __init init_IRQ(void)
 +{
 +      unsigned long size;
 +
 +      map_prom_timers();
 +      kill_prom_timer();
 +
 +      size = sizeof(struct ino_bucket) * NUM_IVECS;
 +      ivector_table = alloc_bootmem(size);
 +      if (!ivector_table) {
 +              prom_printf("Fatal error, cannot allocate ivector_table\n");
 +              prom_halt();
 +      }
 +      __flush_dcache_range((unsigned long) ivector_table,
 +                           ((unsigned long) ivector_table) + size);
 +
 +      ivector_table_pa = __pa(ivector_table);
 +
 +      if (tlb_type == hypervisor)
 +              sun4v_init_mondo_queues();
 +
 +      init_send_mondo_info();
 +
 +      if (tlb_type == hypervisor) {
 +              /* Load up the boot cpu's entries.  */
 +              sun4v_register_mondo_queues(hard_smp_processor_id());
 +      }
 +
 +      /* We need to clear any IRQ's pending in the soft interrupt
 +       * registers, a spurious one could be left around from the
 +       * PROM timer which we just disabled.
 +       */
 +      clear_softint(get_softint());
 +
 +      /* Now that ivector table is initialized, it is safe
 +       * to receive IRQ vector traps.  We will normally take
 +       * one or two right now, in case some device PROM used
 +       * to boot us wants to speak to us.  We just ignore them.
 +       */
 +      __asm__ __volatile__("rdpr      %%pstate, %%g1\n\t"
 +                           "or        %%g1, %0, %%g1\n\t"
 +                           "wrpr      %%g1, 0x0, %%pstate"
 +                           : /* No outputs */
 +                           : "i" (PSTATE_IE)
 +                           : "g1");
 +
 +      irq_desc[0].action = &timer_irq_action;
 +}
index 46e231f7c5ce2c37cf51b6111a0175f9dcc4280e,0000000000000000000000000000000000000000..322046cdf85f2298da755944d381537c585188be
mode 100644,000000..100644
--- /dev/null
@@@ -1,898 -1,0 +1,898 @@@
-               irq_set_affinity(irq, numa_mask);
 +#include <linux/string.h>
 +#include <linux/kernel.h>
 +#include <linux/of.h>
 +#include <linux/init.h>
 +#include <linux/module.h>
 +#include <linux/mod_devicetable.h>
 +#include <linux/slab.h>
 +#include <linux/errno.h>
 +#include <linux/irq.h>
 +#include <linux/of_device.h>
 +#include <linux/of_platform.h>
 +
 +void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name)
 +{
 +      unsigned long ret = res->start + offset;
 +      struct resource *r;
 +
 +      if (res->flags & IORESOURCE_MEM)
 +              r = request_mem_region(ret, size, name);
 +      else
 +              r = request_region(ret, size, name);
 +      if (!r)
 +              ret = 0;
 +
 +      return (void __iomem *) ret;
 +}
 +EXPORT_SYMBOL(of_ioremap);
 +
 +void of_iounmap(struct resource *res, void __iomem *base, unsigned long size)
 +{
 +      if (res->flags & IORESOURCE_MEM)
 +              release_mem_region((unsigned long) base, size);
 +      else
 +              release_region((unsigned long) base, size);
 +}
 +EXPORT_SYMBOL(of_iounmap);
 +
 +static int node_match(struct device *dev, void *data)
 +{
 +      struct of_device *op = to_of_device(dev);
 +      struct device_node *dp = data;
 +
 +      return (op->node == dp);
 +}
 +
 +struct of_device *of_find_device_by_node(struct device_node *dp)
 +{
 +      struct device *dev = bus_find_device(&of_platform_bus_type, NULL,
 +                                           dp, node_match);
 +
 +      if (dev)
 +              return to_of_device(dev);
 +
 +      return NULL;
 +}
 +EXPORT_SYMBOL(of_find_device_by_node);
 +
 +unsigned int irq_of_parse_and_map(struct device_node *node, int index)
 +{
 +      struct of_device *op = of_find_device_by_node(node);
 +
 +      if (!op || index >= op->num_irqs)
 +              return 0;
 +
 +      return op->irqs[index];
 +}
 +EXPORT_SYMBOL(irq_of_parse_and_map);
 +
 +/* Take the archdata values for IOMMU, STC, and HOSTDATA found in
 + * BUS and propagate to all child of_device objects.
 + */
 +void of_propagate_archdata(struct of_device *bus)
 +{
 +      struct dev_archdata *bus_sd = &bus->dev.archdata;
 +      struct device_node *bus_dp = bus->node;
 +      struct device_node *dp;
 +
 +      for (dp = bus_dp->child; dp; dp = dp->sibling) {
 +              struct of_device *op = of_find_device_by_node(dp);
 +
 +              op->dev.archdata.iommu = bus_sd->iommu;
 +              op->dev.archdata.stc = bus_sd->stc;
 +              op->dev.archdata.host_controller = bus_sd->host_controller;
 +              op->dev.archdata.numa_node = bus_sd->numa_node;
 +
 +              if (dp->child)
 +                      of_propagate_archdata(op);
 +      }
 +}
 +
 +struct bus_type of_platform_bus_type;
 +EXPORT_SYMBOL(of_platform_bus_type);
 +
 +static inline u64 of_read_addr(const u32 *cell, int size)
 +{
 +      u64 r = 0;
 +      while (size--)
 +              r = (r << 32) | *(cell++);
 +      return r;
 +}
 +
 +static void __init get_cells(struct device_node *dp,
 +                           int *addrc, int *sizec)
 +{
 +      if (addrc)
 +              *addrc = of_n_addr_cells(dp);
 +      if (sizec)
 +              *sizec = of_n_size_cells(dp);
 +}
 +
 +/* Max address size we deal with */
 +#define OF_MAX_ADDR_CELLS     4
 +
 +struct of_bus {
 +      const char      *name;
 +      const char      *addr_prop_name;
 +      int             (*match)(struct device_node *parent);
 +      void            (*count_cells)(struct device_node *child,
 +                                     int *addrc, int *sizec);
 +      int             (*map)(u32 *addr, const u32 *range,
 +                             int na, int ns, int pna);
 +      unsigned long   (*get_flags)(const u32 *addr, unsigned long);
 +};
 +
 +/*
 + * Default translator (generic bus)
 + */
 +
 +static void of_bus_default_count_cells(struct device_node *dev,
 +                                     int *addrc, int *sizec)
 +{
 +      get_cells(dev, addrc, sizec);
 +}
 +
 +/* Make sure the least significant 64-bits are in-range.  Even
 + * for 3 or 4 cell values it is a good enough approximation.
 + */
 +static int of_out_of_range(const u32 *addr, const u32 *base,
 +                         const u32 *size, int na, int ns)
 +{
 +      u64 a = of_read_addr(addr, na);
 +      u64 b = of_read_addr(base, na);
 +
 +      if (a < b)
 +              return 1;
 +
 +      b += of_read_addr(size, ns);
 +      if (a >= b)
 +              return 1;
 +
 +      return 0;
 +}
 +
 +static int of_bus_default_map(u32 *addr, const u32 *range,
 +                            int na, int ns, int pna)
 +{
 +      u32 result[OF_MAX_ADDR_CELLS];
 +      int i;
 +
 +      if (ns > 2) {
 +              printk("of_device: Cannot handle size cells (%d) > 2.", ns);
 +              return -EINVAL;
 +      }
 +
 +      if (of_out_of_range(addr, range, range + na + pna, na, ns))
 +              return -EINVAL;
 +
 +      /* Start with the parent range base.  */
 +      memcpy(result, range + na, pna * 4);
 +
 +      /* Add in the child address offset.  */
 +      for (i = 0; i < na; i++)
 +              result[pna - 1 - i] +=
 +                      (addr[na - 1 - i] -
 +                       range[na - 1 - i]);
 +
 +      memcpy(addr, result, pna * 4);
 +
 +      return 0;
 +}
 +
 +static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags)
 +{
 +      if (flags)
 +              return flags;
 +      return IORESOURCE_MEM;
 +}
 +
 +/*
 + * PCI bus specific translator
 + */
 +
 +static int of_bus_pci_match(struct device_node *np)
 +{
 +      if (!strcmp(np->name, "pci")) {
 +              const char *model = of_get_property(np, "model", NULL);
 +
 +              if (model && !strcmp(model, "SUNW,simba"))
 +                      return 0;
 +
 +              /* Do not do PCI specific frobbing if the
 +               * PCI bridge lacks a ranges property.  We
 +               * want to pass it through up to the next
 +               * parent as-is, not with the PCI translate
 +               * method which chops off the top address cell.
 +               */
 +              if (!of_find_property(np, "ranges", NULL))
 +                      return 0;
 +
 +              return 1;
 +      }
 +
 +      return 0;
 +}
 +
 +static int of_bus_simba_match(struct device_node *np)
 +{
 +      const char *model = of_get_property(np, "model", NULL);
 +
 +      if (model && !strcmp(model, "SUNW,simba"))
 +              return 1;
 +
 +      /* Treat PCI busses lacking ranges property just like
 +       * simba.
 +       */
 +      if (!strcmp(np->name, "pci")) {
 +              if (!of_find_property(np, "ranges", NULL))
 +                      return 1;
 +      }
 +
 +      return 0;
 +}
 +
 +static int of_bus_simba_map(u32 *addr, const u32 *range,
 +                          int na, int ns, int pna)
 +{
 +      return 0;
 +}
 +
 +static void of_bus_pci_count_cells(struct device_node *np,
 +                                 int *addrc, int *sizec)
 +{
 +      if (addrc)
 +              *addrc = 3;
 +      if (sizec)
 +              *sizec = 2;
 +}
 +
 +static int of_bus_pci_map(u32 *addr, const u32 *range,
 +                        int na, int ns, int pna)
 +{
 +      u32 result[OF_MAX_ADDR_CELLS];
 +      int i;
 +
 +      /* Check address type match */
 +      if ((addr[0] ^ range[0]) & 0x03000000)
 +              return -EINVAL;
 +
 +      if (of_out_of_range(addr + 1, range + 1, range + na + pna,
 +                          na - 1, ns))
 +              return -EINVAL;
 +
 +      /* Start with the parent range base.  */
 +      memcpy(result, range + na, pna * 4);
 +
 +      /* Add in the child address offset, skipping high cell.  */
 +      for (i = 0; i < na - 1; i++)
 +              result[pna - 1 - i] +=
 +                      (addr[na - 1 - i] -
 +                       range[na - 1 - i]);
 +
 +      memcpy(addr, result, pna * 4);
 +
 +      return 0;
 +}
 +
 +static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags)
 +{
 +      u32 w = addr[0];
 +
 +      /* For PCI, we override whatever child busses may have used.  */
 +      flags = 0;
 +      switch((w >> 24) & 0x03) {
 +      case 0x01:
 +              flags |= IORESOURCE_IO;
 +              break;
 +
 +      case 0x02: /* 32 bits */
 +      case 0x03: /* 64 bits */
 +              flags |= IORESOURCE_MEM;
 +              break;
 +      }
 +      if (w & 0x40000000)
 +              flags |= IORESOURCE_PREFETCH;
 +      return flags;
 +}
 +
 +/*
 + * SBUS bus specific translator
 + */
 +
 +static int of_bus_sbus_match(struct device_node *np)
 +{
 +      return !strcmp(np->name, "sbus") ||
 +              !strcmp(np->name, "sbi");
 +}
 +
 +static void of_bus_sbus_count_cells(struct device_node *child,
 +                                 int *addrc, int *sizec)
 +{
 +      if (addrc)
 +              *addrc = 2;
 +      if (sizec)
 +              *sizec = 1;
 +}
 +
 +/*
 + * FHC/Central bus specific translator.
 + *
 + * This is just needed to hard-code the address and size cell
 + * counts.  'fhc' and 'central' nodes lack the #address-cells and
 + * #size-cells properties, and if you walk to the root on such
 + * Enterprise boxes all you'll get is a #size-cells of 2 which is
 + * not what we want to use.
 + */
 +static int of_bus_fhc_match(struct device_node *np)
 +{
 +      return !strcmp(np->name, "fhc") ||
 +              !strcmp(np->name, "central");
 +}
 +
 +#define of_bus_fhc_count_cells of_bus_sbus_count_cells
 +
 +/*
 + * Array of bus specific translators
 + */
 +
 +static struct of_bus of_busses[] = {
 +      /* PCI */
 +      {
 +              .name = "pci",
 +              .addr_prop_name = "assigned-addresses",
 +              .match = of_bus_pci_match,
 +              .count_cells = of_bus_pci_count_cells,
 +              .map = of_bus_pci_map,
 +              .get_flags = of_bus_pci_get_flags,
 +      },
 +      /* SIMBA */
 +      {
 +              .name = "simba",
 +              .addr_prop_name = "assigned-addresses",
 +              .match = of_bus_simba_match,
 +              .count_cells = of_bus_pci_count_cells,
 +              .map = of_bus_simba_map,
 +              .get_flags = of_bus_pci_get_flags,
 +      },
 +      /* SBUS */
 +      {
 +              .name = "sbus",
 +              .addr_prop_name = "reg",
 +              .match = of_bus_sbus_match,
 +              .count_cells = of_bus_sbus_count_cells,
 +              .map = of_bus_default_map,
 +              .get_flags = of_bus_default_get_flags,
 +      },
 +      /* FHC */
 +      {
 +              .name = "fhc",
 +              .addr_prop_name = "reg",
 +              .match = of_bus_fhc_match,
 +              .count_cells = of_bus_fhc_count_cells,
 +              .map = of_bus_default_map,
 +              .get_flags = of_bus_default_get_flags,
 +      },
 +      /* Default */
 +      {
 +              .name = "default",
 +              .addr_prop_name = "reg",
 +              .match = NULL,
 +              .count_cells = of_bus_default_count_cells,
 +              .map = of_bus_default_map,
 +              .get_flags = of_bus_default_get_flags,
 +      },
 +};
 +
 +static struct of_bus *of_match_bus(struct device_node *np)
 +{
 +      int i;
 +
 +      for (i = 0; i < ARRAY_SIZE(of_busses); i ++)
 +              if (!of_busses[i].match || of_busses[i].match(np))
 +                      return &of_busses[i];
 +      BUG();
 +      return NULL;
 +}
 +
 +static int __init build_one_resource(struct device_node *parent,
 +                                   struct of_bus *bus,
 +                                   struct of_bus *pbus,
 +                                   u32 *addr,
 +                                   int na, int ns, int pna)
 +{
 +      const u32 *ranges;
 +      int rone, rlen;
 +
 +      ranges = of_get_property(parent, "ranges", &rlen);
 +      if (ranges == NULL || rlen == 0) {
 +              u32 result[OF_MAX_ADDR_CELLS];
 +              int i;
 +
 +              memset(result, 0, pna * 4);
 +              for (i = 0; i < na; i++)
 +                      result[pna - 1 - i] =
 +                              addr[na - 1 - i];
 +
 +              memcpy(addr, result, pna * 4);
 +              return 0;
 +      }
 +
 +      /* Now walk through the ranges */
 +      rlen /= 4;
 +      rone = na + pna + ns;
 +      for (; rlen >= rone; rlen -= rone, ranges += rone) {
 +              if (!bus->map(addr, ranges, na, ns, pna))
 +                      return 0;
 +      }
 +
 +      /* When we miss an I/O space match on PCI, just pass it up
 +       * to the next PCI bridge and/or controller.
 +       */
 +      if (!strcmp(bus->name, "pci") &&
 +          (addr[0] & 0x03000000) == 0x01000000)
 +              return 0;
 +
 +      return 1;
 +}
 +
 +static int __init use_1to1_mapping(struct device_node *pp)
 +{
 +      /* If we have a ranges property in the parent, use it.  */
 +      if (of_find_property(pp, "ranges", NULL) != NULL)
 +              return 0;
 +
 +      /* If the parent is the dma node of an ISA bus, pass
 +       * the translation up to the root.
 +       *
 +       * Some SBUS devices use intermediate nodes to express
 +       * hierarchy within the device itself.  These aren't
 +       * real bus nodes, and don't have a 'ranges' property.
 +       * But, we should still pass the translation work up
 +       * to the SBUS itself.
 +       */
 +      if (!strcmp(pp->name, "dma") ||
 +          !strcmp(pp->name, "espdma") ||
 +          !strcmp(pp->name, "ledma") ||
 +          !strcmp(pp->name, "lebuffer"))
 +              return 0;
 +
 +      /* Similarly for all PCI bridges, if we get this far
 +       * it lacks a ranges property, and this will include
 +       * cases like Simba.
 +       */
 +      if (!strcmp(pp->name, "pci"))
 +              return 0;
 +
 +      return 1;
 +}
 +
 +static int of_resource_verbose;
 +
 +static void __init build_device_resources(struct of_device *op,
 +                                        struct device *parent)
 +{
 +      struct of_device *p_op;
 +      struct of_bus *bus;
 +      int na, ns;
 +      int index, num_reg;
 +      const void *preg;
 +
 +      if (!parent)
 +              return;
 +
 +      p_op = to_of_device(parent);
 +      bus = of_match_bus(p_op->node);
 +      bus->count_cells(op->node, &na, &ns);
 +
 +      preg = of_get_property(op->node, bus->addr_prop_name, &num_reg);
 +      if (!preg || num_reg == 0)
 +              return;
 +
 +      /* Convert to num-cells.  */
 +      num_reg /= 4;
 +
 +      /* Convert to num-entries.  */
 +      num_reg /= na + ns;
 +
 +      /* Prevent overrunning the op->resources[] array.  */
 +      if (num_reg > PROMREG_MAX) {
 +              printk(KERN_WARNING "%s: Too many regs (%d), "
 +                     "limiting to %d.\n",
 +                     op->node->full_name, num_reg, PROMREG_MAX);
 +              num_reg = PROMREG_MAX;
 +      }
 +
 +      for (index = 0; index < num_reg; index++) {
 +              struct resource *r = &op->resource[index];
 +              u32 addr[OF_MAX_ADDR_CELLS];
 +              const u32 *reg = (preg + (index * ((na + ns) * 4)));
 +              struct device_node *dp = op->node;
 +              struct device_node *pp = p_op->node;
 +              struct of_bus *pbus, *dbus;
 +              u64 size, result = OF_BAD_ADDR;
 +              unsigned long flags;
 +              int dna, dns;
 +              int pna, pns;
 +
 +              size = of_read_addr(reg + na, ns);
 +              memcpy(addr, reg, na * 4);
 +
 +              flags = bus->get_flags(addr, 0);
 +
 +              if (use_1to1_mapping(pp)) {
 +                      result = of_read_addr(addr, na);
 +                      goto build_res;
 +              }
 +
 +              dna = na;
 +              dns = ns;
 +              dbus = bus;
 +
 +              while (1) {
 +                      dp = pp;
 +                      pp = dp->parent;
 +                      if (!pp) {
 +                              result = of_read_addr(addr, dna);
 +                              break;
 +                      }
 +
 +                      pbus = of_match_bus(pp);
 +                      pbus->count_cells(dp, &pna, &pns);
 +
 +                      if (build_one_resource(dp, dbus, pbus, addr,
 +                                             dna, dns, pna))
 +                              break;
 +
 +                      flags = pbus->get_flags(addr, flags);
 +
 +                      dna = pna;
 +                      dns = pns;
 +                      dbus = pbus;
 +              }
 +
 +      build_res:
 +              memset(r, 0, sizeof(*r));
 +
 +              if (of_resource_verbose)
 +                      printk("%s reg[%d] -> %lx\n",
 +                             op->node->full_name, index,
 +                             result);
 +
 +              if (result != OF_BAD_ADDR) {
 +                      if (tlb_type == hypervisor)
 +                              result &= 0x0fffffffffffffffUL;
 +
 +                      r->start = result;
 +                      r->end = result + size - 1;
 +                      r->flags = flags;
 +              }
 +              r->name = op->node->name;
 +      }
 +}
 +
 +static struct device_node * __init
 +apply_interrupt_map(struct device_node *dp, struct device_node *pp,
 +                  const u32 *imap, int imlen, const u32 *imask,
 +                  unsigned int *irq_p)
 +{
 +      struct device_node *cp;
 +      unsigned int irq = *irq_p;
 +      struct of_bus *bus;
 +      phandle handle;
 +      const u32 *reg;
 +      int na, num_reg, i;
 +
 +      bus = of_match_bus(pp);
 +      bus->count_cells(dp, &na, NULL);
 +
 +      reg = of_get_property(dp, "reg", &num_reg);
 +      if (!reg || !num_reg)
 +              return NULL;
 +
 +      imlen /= ((na + 3) * 4);
 +      handle = 0;
 +      for (i = 0; i < imlen; i++) {
 +              int j;
 +
 +              for (j = 0; j < na; j++) {
 +                      if ((reg[j] & imask[j]) != imap[j])
 +                              goto next;
 +              }
 +              if (imap[na] == irq) {
 +                      handle = imap[na + 1];
 +                      irq = imap[na + 2];
 +                      break;
 +              }
 +
 +      next:
 +              imap += (na + 3);
 +      }
 +      if (i == imlen) {
 +              /* Psycho and Sabre PCI controllers can have 'interrupt-map'
 +               * properties that do not include the on-board device
 +               * interrupts.  Instead, the device's 'interrupts' property
 +               * is already a fully specified INO value.
 +               *
 +               * Handle this by deciding that, if we didn't get a
 +               * match in the parent's 'interrupt-map', and the
 +               * parent is an IRQ translater, then use the parent as
 +               * our IRQ controller.
 +               */
 +              if (pp->irq_trans)
 +                      return pp;
 +
 +              return NULL;
 +      }
 +
 +      *irq_p = irq;
 +      cp = of_find_node_by_phandle(handle);
 +
 +      return cp;
 +}
 +
 +static unsigned int __init pci_irq_swizzle(struct device_node *dp,
 +                                         struct device_node *pp,
 +                                         unsigned int irq)
 +{
 +      const struct linux_prom_pci_registers *regs;
 +      unsigned int bus, devfn, slot, ret;
 +
 +      if (irq < 1 || irq > 4)
 +              return irq;
 +
 +      regs = of_get_property(dp, "reg", NULL);
 +      if (!regs)
 +              return irq;
 +
 +      bus = (regs->phys_hi >> 16) & 0xff;
 +      devfn = (regs->phys_hi >> 8) & 0xff;
 +      slot = (devfn >> 3) & 0x1f;
 +
 +      if (pp->irq_trans) {
 +              /* Derived from Table 8-3, U2P User's Manual.  This branch
 +               * is handling a PCI controller that lacks a proper set of
 +               * interrupt-map and interrupt-map-mask properties.  The
 +               * Ultra-E450 is one example.
 +               *
 +               * The bit layout is BSSLL, where:
 +               * B: 0 on bus A, 1 on bus B
 +               * D: 2-bit slot number, derived from PCI device number as
 +               *    (dev - 1) for bus A, or (dev - 2) for bus B
 +               * L: 2-bit line number
 +               */
 +              if (bus & 0x80) {
 +                      /* PBM-A */
 +                      bus  = 0x00;
 +                      slot = (slot - 1) << 2;
 +              } else {
 +                      /* PBM-B */
 +                      bus  = 0x10;
 +                      slot = (slot - 2) << 2;
 +              }
 +              irq -= 1;
 +
 +              ret = (bus | slot | irq);
 +      } else {
 +              /* Going through a PCI-PCI bridge that lacks a set of
 +               * interrupt-map and interrupt-map-mask properties.
 +               */
 +              ret = ((irq - 1 + (slot & 3)) & 3) + 1;
 +      }
 +
 +      return ret;
 +}
 +
 +static int of_irq_verbose;
 +
 +static unsigned int __init build_one_device_irq(struct of_device *op,
 +                                              struct device *parent,
 +                                              unsigned int irq)
 +{
 +      struct device_node *dp = op->node;
 +      struct device_node *pp, *ip;
 +      unsigned int orig_irq = irq;
 +      int nid;
 +
 +      if (irq == 0xffffffff)
 +              return irq;
 +
 +      if (dp->irq_trans) {
 +              irq = dp->irq_trans->irq_build(dp, irq,
 +                                             dp->irq_trans->data);
 +
 +              if (of_irq_verbose)
 +                      printk("%s: direct translate %x --> %x\n",
 +                             dp->full_name, orig_irq, irq);
 +
 +              goto out;
 +      }
 +
 +      /* Something more complicated.  Walk up to the root, applying
 +       * interrupt-map or bus specific translations, until we hit
 +       * an IRQ translator.
 +       *
 +       * If we hit a bus type or situation we cannot handle, we
 +       * stop and assume that the original IRQ number was in a
 +       * format which has special meaning to it's immediate parent.
 +       */
 +      pp = dp->parent;
 +      ip = NULL;
 +      while (pp) {
 +              const void *imap, *imsk;
 +              int imlen;
 +
 +              imap = of_get_property(pp, "interrupt-map", &imlen);
 +              imsk = of_get_property(pp, "interrupt-map-mask", NULL);
 +              if (imap && imsk) {
 +                      struct device_node *iret;
 +                      int this_orig_irq = irq;
 +
 +                      iret = apply_interrupt_map(dp, pp,
 +                                                 imap, imlen, imsk,
 +                                                 &irq);
 +
 +                      if (of_irq_verbose)
 +                              printk("%s: Apply [%s:%x] imap --> [%s:%x]\n",
 +                                     op->node->full_name,
 +                                     pp->full_name, this_orig_irq,
 +                                     (iret ? iret->full_name : "NULL"), irq);
 +
 +                      if (!iret)
 +                              break;
 +
 +                      if (iret->irq_trans) {
 +                              ip = iret;
 +                              break;
 +                      }
 +              } else {
 +                      if (!strcmp(pp->name, "pci")) {
 +                              unsigned int this_orig_irq = irq;
 +
 +                              irq = pci_irq_swizzle(dp, pp, irq);
 +                              if (of_irq_verbose)
 +                                      printk("%s: PCI swizzle [%s] "
 +                                             "%x --> %x\n",
 +                                             op->node->full_name,
 +                                             pp->full_name, this_orig_irq,
 +                                             irq);
 +
 +                      }
 +
 +                      if (pp->irq_trans) {
 +                              ip = pp;
 +                              break;
 +                      }
 +              }
 +              dp = pp;
 +              pp = pp->parent;
 +      }
 +      if (!ip)
 +              return orig_irq;
 +
 +      irq = ip->irq_trans->irq_build(op->node, irq,
 +                                     ip->irq_trans->data);
 +      if (of_irq_verbose)
 +              printk("%s: Apply IRQ trans [%s] %x --> %x\n",
 +                     op->node->full_name, ip->full_name, orig_irq, irq);
 +
 +out:
 +      nid = of_node_to_nid(dp);
 +      if (nid != -1) {
 +              cpumask_t numa_mask = node_to_cpumask(nid);
 +
++              irq_set_affinity(irq, &numa_mask);
 +      }
 +
 +      return irq;
 +}
 +
 +static struct of_device * __init scan_one_device(struct device_node *dp,
 +                                               struct device *parent)
 +{
 +      struct of_device *op = kzalloc(sizeof(*op), GFP_KERNEL);
 +      const unsigned int *irq;
 +      struct dev_archdata *sd;
 +      int len, i;
 +
 +      if (!op)
 +              return NULL;
 +
 +      sd = &op->dev.archdata;
 +      sd->prom_node = dp;
 +      sd->op = op;
 +
 +      op->node = dp;
 +
 +      op->clock_freq = of_getintprop_default(dp, "clock-frequency",
 +                                             (25*1000*1000));
 +      op->portid = of_getintprop_default(dp, "upa-portid", -1);
 +      if (op->portid == -1)
 +              op->portid = of_getintprop_default(dp, "portid", -1);
 +
 +      irq = of_get_property(dp, "interrupts", &len);
 +      if (irq) {
 +              op->num_irqs = len / 4;
 +
 +              /* Prevent overrunning the op->irqs[] array.  */
 +              if (op->num_irqs > PROMINTR_MAX) {
 +                      printk(KERN_WARNING "%s: Too many irqs (%d), "
 +                             "limiting to %d.\n",
 +                             dp->full_name, op->num_irqs, PROMINTR_MAX);
 +                      op->num_irqs = PROMINTR_MAX;
 +              }
 +              memcpy(op->irqs, irq, op->num_irqs * 4);
 +      } else {
 +              op->num_irqs = 0;
 +      }
 +
 +      build_device_resources(op, parent);
 +      for (i = 0; i < op->num_irqs; i++)
 +              op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]);
 +
 +      op->dev.parent = parent;
 +      op->dev.bus = &of_platform_bus_type;
 +      if (!parent)
 +              dev_set_name(&op->dev, "root");
 +      else
 +              dev_set_name(&op->dev, "%08x", dp->node);
 +
 +      if (of_device_register(op)) {
 +              printk("%s: Could not register of device.\n",
 +                     dp->full_name);
 +              kfree(op);
 +              op = NULL;
 +      }
 +
 +      return op;
 +}
 +
 +static void __init scan_tree(struct device_node *dp, struct device *parent)
 +{
 +      while (dp) {
 +              struct of_device *op = scan_one_device(dp, parent);
 +
 +              if (op)
 +                      scan_tree(dp->child, &op->dev);
 +
 +              dp = dp->sibling;
 +      }
 +}
 +
 +static void __init scan_of_devices(void)
 +{
 +      struct device_node *root = of_find_node_by_path("/");
 +      struct of_device *parent;
 +
 +      parent = scan_one_device(root, NULL);
 +      if (!parent)
 +              return;
 +
 +      scan_tree(root->child, &parent->dev);
 +}
 +
 +static int __init of_bus_driver_init(void)
 +{
 +      int err;
 +
 +      err = of_bus_type_init(&of_platform_bus_type, "of");
 +      if (!err)
 +              scan_of_devices();
 +
 +      return err;
 +}
 +
 +postcore_initcall(of_bus_driver_init);
 +
 +static int __init of_debug(char *str)
 +{
 +      int val = 0;
 +
 +      get_option(&str, &val);
 +      if (val & 1)
 +              of_resource_verbose = 1;
 +      if (val & 2)
 +              of_irq_verbose = 1;
 +      return 1;
 +}
 +
 +__setup("of_debug=", of_debug);
index 2e680f34f727fa61f5defef92e63b8144e365d70,0000000000000000000000000000000000000000..0d0cd815e83e505b24e4b12a14f276fa37be892a
mode 100644,000000..100644
--- /dev/null
@@@ -1,447 -1,0 +1,447 @@@
-               irq_set_affinity(irq, numa_mask);
 +/* pci_msi.c: Sparc64 MSI support common layer.
 + *
 + * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
 + */
 +#include <linux/kernel.h>
 +#include <linux/interrupt.h>
 +#include <linux/irq.h>
 +
 +#include "pci_impl.h"
 +
 +static irqreturn_t sparc64_msiq_interrupt(int irq, void *cookie)
 +{
 +      struct sparc64_msiq_cookie *msiq_cookie = cookie;
 +      struct pci_pbm_info *pbm = msiq_cookie->pbm;
 +      unsigned long msiqid = msiq_cookie->msiqid;
 +      const struct sparc64_msiq_ops *ops;
 +      unsigned long orig_head, head;
 +      int err;
 +
 +      ops = pbm->msi_ops;
 +
 +      err = ops->get_head(pbm, msiqid, &head);
 +      if (unlikely(err < 0))
 +              goto err_get_head;
 +
 +      orig_head = head;
 +      for (;;) {
 +              unsigned long msi;
 +
 +              err = ops->dequeue_msi(pbm, msiqid, &head, &msi);
 +              if (likely(err > 0)) {
 +                      struct irq_desc *desc;
 +                      unsigned int virt_irq;
 +
 +                      virt_irq = pbm->msi_irq_table[msi - pbm->msi_first];
 +                      desc = irq_desc + virt_irq;
 +
 +                      desc->handle_irq(virt_irq, desc);
 +              }
 +
 +              if (unlikely(err < 0))
 +                      goto err_dequeue;
 +
 +              if (err == 0)
 +                      break;
 +      }
 +      if (likely(head != orig_head)) {
 +              err = ops->set_head(pbm, msiqid, head);
 +              if (unlikely(err < 0))
 +                      goto err_set_head;
 +      }
 +      return IRQ_HANDLED;
 +
 +err_get_head:
 +      printk(KERN_EMERG "MSI: Get head on msiqid[%lu] gives error %d\n",
 +             msiqid, err);
 +      goto err_out;
 +
 +err_dequeue:
 +      printk(KERN_EMERG "MSI: Dequeue head[%lu] from msiqid[%lu] "
 +             "gives error %d\n",
 +             head, msiqid, err);
 +      goto err_out;
 +
 +err_set_head:
 +      printk(KERN_EMERG "MSI: Set head[%lu] on msiqid[%lu] "
 +             "gives error %d\n",
 +             head, msiqid, err);
 +      goto err_out;
 +
 +err_out:
 +      return IRQ_NONE;
 +}
 +
 +static u32 pick_msiq(struct pci_pbm_info *pbm)
 +{
 +      static DEFINE_SPINLOCK(rotor_lock);
 +      unsigned long flags;
 +      u32 ret, rotor;
 +
 +      spin_lock_irqsave(&rotor_lock, flags);
 +
 +      rotor = pbm->msiq_rotor;
 +      ret = pbm->msiq_first + rotor;
 +
 +      if (++rotor >= pbm->msiq_num)
 +              rotor = 0;
 +      pbm->msiq_rotor = rotor;
 +
 +      spin_unlock_irqrestore(&rotor_lock, flags);
 +
 +      return ret;
 +}
 +
 +
 +static int alloc_msi(struct pci_pbm_info *pbm)
 +{
 +      int i;
 +
 +      for (i = 0; i < pbm->msi_num; i++) {
 +              if (!test_and_set_bit(i, pbm->msi_bitmap))
 +                      return i + pbm->msi_first;
 +      }
 +
 +      return -ENOENT;
 +}
 +
 +static void free_msi(struct pci_pbm_info *pbm, int msi_num)
 +{
 +      msi_num -= pbm->msi_first;
 +      clear_bit(msi_num, pbm->msi_bitmap);
 +}
 +
 +static struct irq_chip msi_irq = {
 +      .typename       = "PCI-MSI",
 +      .mask           = mask_msi_irq,
 +      .unmask         = unmask_msi_irq,
 +      .enable         = unmask_msi_irq,
 +      .disable        = mask_msi_irq,
 +      /* XXX affinity XXX */
 +};
 +
 +static int sparc64_setup_msi_irq(unsigned int *virt_irq_p,
 +                               struct pci_dev *pdev,
 +                               struct msi_desc *entry)
 +{
 +      struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
 +      const struct sparc64_msiq_ops *ops = pbm->msi_ops;
 +      struct msi_msg msg;
 +      int msi, err;
 +      u32 msiqid;
 +
 +      *virt_irq_p = virt_irq_alloc(0, 0);
 +      err = -ENOMEM;
 +      if (!*virt_irq_p)
 +              goto out_err;
 +
 +      set_irq_chip_and_handler_name(*virt_irq_p, &msi_irq,
 +                                    handle_simple_irq, "MSI");
 +
 +      err = alloc_msi(pbm);
 +      if (unlikely(err < 0))
 +              goto out_virt_irq_free;
 +
 +      msi = err;
 +
 +      msiqid = pick_msiq(pbm);
 +
 +      err = ops->msi_setup(pbm, msiqid, msi,
 +                           (entry->msi_attrib.is_64 ? 1 : 0));
 +      if (err)
 +              goto out_msi_free;
 +
 +      pbm->msi_irq_table[msi - pbm->msi_first] = *virt_irq_p;
 +
 +      if (entry->msi_attrib.is_64) {
 +              msg.address_hi = pbm->msi64_start >> 32;
 +              msg.address_lo = pbm->msi64_start & 0xffffffff;
 +      } else {
 +              msg.address_hi = 0;
 +              msg.address_lo = pbm->msi32_start;
 +      }
 +      msg.data = msi;
 +
 +      set_irq_msi(*virt_irq_p, entry);
 +      write_msi_msg(*virt_irq_p, &msg);
 +
 +      return 0;
 +
 +out_msi_free:
 +      free_msi(pbm, msi);
 +
 +out_virt_irq_free:
 +      set_irq_chip(*virt_irq_p, NULL);
 +      virt_irq_free(*virt_irq_p);
 +      *virt_irq_p = 0;
 +
 +out_err:
 +      return err;
 +}
 +
 +static void sparc64_teardown_msi_irq(unsigned int virt_irq,
 +                                   struct pci_dev *pdev)
 +{
 +      struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller;
 +      const struct sparc64_msiq_ops *ops = pbm->msi_ops;
 +      unsigned int msi_num;
 +      int i, err;
 +
 +      for (i = 0; i < pbm->msi_num; i++) {
 +              if (pbm->msi_irq_table[i] == virt_irq)
 +                      break;
 +      }
 +      if (i >= pbm->msi_num) {
 +              printk(KERN_ERR "%s: teardown: No MSI for irq %u\n",
 +                     pbm->name, virt_irq);
 +              return;
 +      }
 +
 +      msi_num = pbm->msi_first + i;
 +      pbm->msi_irq_table[i] = ~0U;
 +
 +      err = ops->msi_teardown(pbm, msi_num);
 +      if (err) {
 +              printk(KERN_ERR "%s: teardown: ops->teardown() on MSI %u, "
 +                     "irq %u, gives error %d\n",
 +                     pbm->name, msi_num, virt_irq, err);
 +              return;
 +      }
 +
 +      free_msi(pbm, msi_num);
 +
 +      set_irq_chip(virt_irq, NULL);
 +      virt_irq_free(virt_irq);
 +}
 +
 +static int msi_bitmap_alloc(struct pci_pbm_info *pbm)
 +{
 +      unsigned long size, bits_per_ulong;
 +
 +      bits_per_ulong = sizeof(unsigned long) * 8;
 +      size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1);
 +      size /= 8;
 +      BUG_ON(size % sizeof(unsigned long));
 +
 +      pbm->msi_bitmap = kzalloc(size, GFP_KERNEL);
 +      if (!pbm->msi_bitmap)
 +              return -ENOMEM;
 +
 +      return 0;
 +}
 +
 +static void msi_bitmap_free(struct pci_pbm_info *pbm)
 +{
 +      kfree(pbm->msi_bitmap);
 +      pbm->msi_bitmap = NULL;
 +}
 +
 +static int msi_table_alloc(struct pci_pbm_info *pbm)
 +{
 +      int size, i;
 +
 +      size = pbm->msiq_num * sizeof(struct sparc64_msiq_cookie);
 +      pbm->msiq_irq_cookies = kzalloc(size, GFP_KERNEL);
 +      if (!pbm->msiq_irq_cookies)
 +              return -ENOMEM;
 +
 +      for (i = 0; i < pbm->msiq_num; i++) {
 +              struct sparc64_msiq_cookie *p;
 +
 +              p = &pbm->msiq_irq_cookies[i];
 +              p->pbm = pbm;
 +              p->msiqid = pbm->msiq_first + i;
 +      }
 +
 +      size = pbm->msi_num * sizeof(unsigned int);
 +      pbm->msi_irq_table = kzalloc(size, GFP_KERNEL);
 +      if (!pbm->msi_irq_table) {
 +              kfree(pbm->msiq_irq_cookies);
 +              pbm->msiq_irq_cookies = NULL;
 +              return -ENOMEM;
 +      }
 +
 +      return 0;
 +}
 +
 +static void msi_table_free(struct pci_pbm_info *pbm)
 +{
 +      kfree(pbm->msiq_irq_cookies);
 +      pbm->msiq_irq_cookies = NULL;
 +
 +      kfree(pbm->msi_irq_table);
 +      pbm->msi_irq_table = NULL;
 +}
 +
 +static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
 +                               const struct sparc64_msiq_ops *ops,
 +                               unsigned long msiqid,
 +                               unsigned long devino)
 +{
 +      int irq = ops->msiq_build_irq(pbm, msiqid, devino);
 +      int err, nid;
 +
 +      if (irq < 0)
 +              return irq;
 +
 +      nid = pbm->numa_node;
 +      if (nid != -1) {
 +              cpumask_t numa_mask = node_to_cpumask(nid);
 +
++              irq_set_affinity(irq, &numa_mask);
 +      }
 +      err = request_irq(irq, sparc64_msiq_interrupt, 0,
 +                        "MSIQ",
 +                        &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]);
 +      if (err)
 +              return err;
 +
 +      return 0;
 +}
 +
 +static int sparc64_bringup_msi_queues(struct pci_pbm_info *pbm,
 +                                    const struct sparc64_msiq_ops *ops)
 +{
 +      int i;
 +
 +      for (i = 0; i < pbm->msiq_num; i++) {
 +              unsigned long msiqid = i + pbm->msiq_first;
 +              unsigned long devino = i + pbm->msiq_first_devino;
 +              int err;
 +
 +              err = bringup_one_msi_queue(pbm, ops, msiqid, devino);
 +              if (err)
 +                      return err;
 +      }
 +
 +      return 0;
 +}
 +
 +void sparc64_pbm_msi_init(struct pci_pbm_info *pbm,
 +                        const struct sparc64_msiq_ops *ops)
 +{
 +      const u32 *val;
 +      int len;
 +
 +      val = of_get_property(pbm->op->node, "#msi-eqs", &len);
 +      if (!val || len != 4)
 +              goto no_msi;
 +      pbm->msiq_num = *val;
 +      if (pbm->msiq_num) {
 +              const struct msiq_prop {
 +                      u32 first_msiq;
 +                      u32 num_msiq;
 +                      u32 first_devino;
 +              } *mqp;
 +              const struct msi_range_prop {
 +                      u32 first_msi;
 +                      u32 num_msi;
 +              } *mrng;
 +              const struct addr_range_prop {
 +                      u32 msi32_high;
 +                      u32 msi32_low;
 +                      u32 msi32_len;
 +                      u32 msi64_high;
 +                      u32 msi64_low;
 +                      u32 msi64_len;
 +              } *arng;
 +
 +              val = of_get_property(pbm->op->node, "msi-eq-size", &len);
 +              if (!val || len != 4)
 +                      goto no_msi;
 +
 +              pbm->msiq_ent_count = *val;
 +
 +              mqp = of_get_property(pbm->op->node,
 +                                    "msi-eq-to-devino", &len);
 +              if (!mqp)
 +                      mqp = of_get_property(pbm->op->node,
 +                                            "msi-eq-devino", &len);
 +              if (!mqp || len != sizeof(struct msiq_prop))
 +                      goto no_msi;
 +
 +              pbm->msiq_first = mqp->first_msiq;
 +              pbm->msiq_first_devino = mqp->first_devino;
 +
 +              val = of_get_property(pbm->op->node, "#msi", &len);
 +              if (!val || len != 4)
 +                      goto no_msi;
 +              pbm->msi_num = *val;
 +
 +              mrng = of_get_property(pbm->op->node, "msi-ranges", &len);
 +              if (!mrng || len != sizeof(struct msi_range_prop))
 +                      goto no_msi;
 +              pbm->msi_first = mrng->first_msi;
 +
 +              val = of_get_property(pbm->op->node, "msi-data-mask", &len);
 +              if (!val || len != 4)
 +                      goto no_msi;
 +              pbm->msi_data_mask = *val;
 +
 +              val = of_get_property(pbm->op->node, "msix-data-width", &len);
 +              if (!val || len != 4)
 +                      goto no_msi;
 +              pbm->msix_data_width = *val;
 +
 +              arng = of_get_property(pbm->op->node, "msi-address-ranges",
 +                                     &len);
 +              if (!arng || len != sizeof(struct addr_range_prop))
 +                      goto no_msi;
 +              pbm->msi32_start = ((u64)arng->msi32_high << 32) |
 +                      (u64) arng->msi32_low;
 +              pbm->msi64_start = ((u64)arng->msi64_high << 32) |
 +                      (u64) arng->msi64_low;
 +              pbm->msi32_len = arng->msi32_len;
 +              pbm->msi64_len = arng->msi64_len;
 +
 +              if (msi_bitmap_alloc(pbm))
 +                      goto no_msi;
 +
 +              if (msi_table_alloc(pbm)) {
 +                      msi_bitmap_free(pbm);
 +                      goto no_msi;
 +              }
 +
 +              if (ops->msiq_alloc(pbm)) {
 +                      msi_table_free(pbm);
 +                      msi_bitmap_free(pbm);
 +                      goto no_msi;
 +              }
 +
 +              if (sparc64_bringup_msi_queues(pbm, ops)) {
 +                      ops->msiq_free(pbm);
 +                      msi_table_free(pbm);
 +                      msi_bitmap_free(pbm);
 +                      goto no_msi;
 +              }
 +
 +              printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] "
 +                     "devino[0x%x]\n",
 +                     pbm->name,
 +                     pbm->msiq_first, pbm->msiq_num,
 +                     pbm->msiq_ent_count,
 +                     pbm->msiq_first_devino);
 +              printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] "
 +                     "width[%u]\n",
 +                     pbm->name,
 +                     pbm->msi_first, pbm->msi_num, pbm->msi_data_mask,
 +                     pbm->msix_data_width);
 +              printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] "
 +                     "addr64[0x%lx:0x%x]\n",
 +                     pbm->name,
 +                     pbm->msi32_start, pbm->msi32_len,
 +                     pbm->msi64_start, pbm->msi64_len);
 +              printk(KERN_INFO "%s: MSI queues at RA [%016lx]\n",
 +                     pbm->name,
 +                     __pa(pbm->msi_queues));
 +
 +              pbm->msi_ops = ops;
 +              pbm->setup_msi_irq = sparc64_setup_msi_irq;
 +              pbm->teardown_msi_irq = sparc64_teardown_msi_irq;
 +      }
 +      return;
 +
 +no_msi:
 +      pbm->msiq_num = 0;
 +      printk(KERN_INFO "%s: No MSI support.\n", pbm->name);
 +}
index e396c1f17a922deaef7b03a0751d06f1345770fd,0000000000000000000000000000000000000000..1e5ac4e282e1285030aaa43380b91c01b7bcff48
mode 100644,000000..100644
--- /dev/null
@@@ -1,423 -1,0 +1,421 @@@
- cpumask_t cpu_online_map = CPU_MASK_NONE;
- cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
 +/* smp.c: Sparc SMP support.
 + *
 + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
 + * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
 + * Copyright (C) 2004 Keith M Wesolowski (wesolows@foobazco.org)
 + */
 +
 +#include <asm/head.h>
 +
 +#include <linux/kernel.h>
 +#include <linux/sched.h>
 +#include <linux/threads.h>
 +#include <linux/smp.h>
 +#include <linux/interrupt.h>
 +#include <linux/kernel_stat.h>
 +#include <linux/init.h>
 +#include <linux/spinlock.h>
 +#include <linux/mm.h>
 +#include <linux/fs.h>
 +#include <linux/seq_file.h>
 +#include <linux/cache.h>
 +#include <linux/delay.h>
 +
 +#include <asm/ptrace.h>
 +#include <asm/atomic.h>
 +
 +#include <asm/irq.h>
 +#include <asm/page.h>
 +#include <asm/pgalloc.h>
 +#include <asm/pgtable.h>
 +#include <asm/oplib.h>
 +#include <asm/cacheflush.h>
 +#include <asm/tlbflush.h>
 +#include <asm/cpudata.h>
 +
 +#include "irq.h"
 +
 +volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,};
 +unsigned char boot_cpu_id = 0;
 +unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */
 +
-                       cpu_set(mid, phys_cpu_present_map);
 +cpumask_t smp_commenced_mask = CPU_MASK_NONE;
 +
 +/* The only guaranteed locking primitive available on all Sparc
 + * processors is 'ldstub [%reg + immediate], %dest_reg' which atomically
 + * places the current byte at the effective address into dest_reg and
 + * places 0xff there afterwards.  Pretty lame locking primitive
 + * compared to the Alpha and the Intel no?  Most Sparcs have 'swap'
 + * instruction which is much better...
 + */
 +
 +void __cpuinit smp_store_cpu_info(int id)
 +{
 +      int cpu_node;
 +
 +      cpu_data(id).udelay_val = loops_per_jiffy;
 +
 +      cpu_find_by_mid(id, &cpu_node);
 +      cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
 +                                                   "clock-frequency", 0);
 +      cpu_data(id).prom_node = cpu_node;
 +      cpu_data(id).mid = cpu_get_hwmid(cpu_node);
 +
 +      if (cpu_data(id).mid < 0)
 +              panic("No MID found for CPU%d at node 0x%08d", id, cpu_node);
 +}
 +
 +void __init smp_cpus_done(unsigned int max_cpus)
 +{
 +      extern void smp4m_smp_done(void);
 +      extern void smp4d_smp_done(void);
 +      unsigned long bogosum = 0;
 +      int cpu, num;
 +
 +      for (cpu = 0, num = 0; cpu < NR_CPUS; cpu++)
 +              if (cpu_online(cpu)) {
 +                      num++;
 +                      bogosum += cpu_data(cpu).udelay_val;
 +              }
 +
 +      printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 +              num, bogosum/(500000/HZ),
 +              (bogosum/(5000/HZ))%100);
 +
 +      switch(sparc_cpu_model) {
 +      case sun4:
 +              printk("SUN4\n");
 +              BUG();
 +              break;
 +      case sun4c:
 +              printk("SUN4C\n");
 +              BUG();
 +              break;
 +      case sun4m:
 +              smp4m_smp_done();
 +              break;
 +      case sun4d:
 +              smp4d_smp_done();
 +              break;
 +      case sun4e:
 +              printk("SUN4E\n");
 +              BUG();
 +              break;
 +      case sun4u:
 +              printk("SUN4U\n");
 +              BUG();
 +              break;
 +      default:
 +              printk("UNKNOWN!\n");
 +              BUG();
 +              break;
 +      };
 +}
 +
 +void cpu_panic(void)
 +{
 +      printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
 +      panic("SMP bolixed\n");
 +}
 +
 +struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
 +
 +void smp_send_reschedule(int cpu)
 +{
 +      /* See sparc64 */
 +}
 +
 +void smp_send_stop(void)
 +{
 +}
 +
 +void smp_flush_cache_all(void)
 +{
 +      xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all));
 +      local_flush_cache_all();
 +}
 +
 +void smp_flush_tlb_all(void)
 +{
 +      xc0((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_all));
 +      local_flush_tlb_all();
 +}
 +
 +void smp_flush_cache_mm(struct mm_struct *mm)
 +{
 +      if(mm->context != NO_CONTEXT) {
 +              cpumask_t cpu_mask = mm->cpu_vm_mask;
 +              cpu_clear(smp_processor_id(), cpu_mask);
 +              if (!cpus_empty(cpu_mask))
 +                      xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm);
 +              local_flush_cache_mm(mm);
 +      }
 +}
 +
 +void smp_flush_tlb_mm(struct mm_struct *mm)
 +{
 +      if(mm->context != NO_CONTEXT) {
 +              cpumask_t cpu_mask = mm->cpu_vm_mask;
 +              cpu_clear(smp_processor_id(), cpu_mask);
 +              if (!cpus_empty(cpu_mask)) {
 +                      xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm);
 +                      if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
 +                              mm->cpu_vm_mask = cpumask_of_cpu(smp_processor_id());
 +              }
 +              local_flush_tlb_mm(mm);
 +      }
 +}
 +
 +void smp_flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 +                         unsigned long end)
 +{
 +      struct mm_struct *mm = vma->vm_mm;
 +
 +      if (mm->context != NO_CONTEXT) {
 +              cpumask_t cpu_mask = mm->cpu_vm_mask;
 +              cpu_clear(smp_processor_id(), cpu_mask);
 +              if (!cpus_empty(cpu_mask))
 +                      xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end);
 +              local_flush_cache_range(vma, start, end);
 +      }
 +}
 +
 +void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 +                       unsigned long end)
 +{
 +      struct mm_struct *mm = vma->vm_mm;
 +
 +      if (mm->context != NO_CONTEXT) {
 +              cpumask_t cpu_mask = mm->cpu_vm_mask;
 +              cpu_clear(smp_processor_id(), cpu_mask);
 +              if (!cpus_empty(cpu_mask))
 +                      xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end);
 +              local_flush_tlb_range(vma, start, end);
 +      }
 +}
 +
 +void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
 +{
 +      struct mm_struct *mm = vma->vm_mm;
 +
 +      if(mm->context != NO_CONTEXT) {
 +              cpumask_t cpu_mask = mm->cpu_vm_mask;
 +              cpu_clear(smp_processor_id(), cpu_mask);
 +              if (!cpus_empty(cpu_mask))
 +                      xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page);
 +              local_flush_cache_page(vma, page);
 +      }
 +}
 +
 +void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
 +{
 +      struct mm_struct *mm = vma->vm_mm;
 +
 +      if(mm->context != NO_CONTEXT) {
 +              cpumask_t cpu_mask = mm->cpu_vm_mask;
 +              cpu_clear(smp_processor_id(), cpu_mask);
 +              if (!cpus_empty(cpu_mask))
 +                      xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page);
 +              local_flush_tlb_page(vma, page);
 +      }
 +}
 +
 +void smp_reschedule_irq(void)
 +{
 +      set_need_resched();
 +}
 +
 +void smp_flush_page_to_ram(unsigned long page)
 +{
 +      /* Current theory is that those who call this are the one's
 +       * who have just dirtied their cache with the pages contents
 +       * in kernel space, therefore we only run this on local cpu.
 +       *
 +       * XXX This experiment failed, research further... -DaveM
 +       */
 +#if 1
 +      xc1((smpfunc_t) BTFIXUP_CALL(local_flush_page_to_ram), page);
 +#endif
 +      local_flush_page_to_ram(page);
 +}
 +
 +void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
 +{
 +      cpumask_t cpu_mask = mm->cpu_vm_mask;
 +      cpu_clear(smp_processor_id(), cpu_mask);
 +      if (!cpus_empty(cpu_mask))
 +              xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr);
 +      local_flush_sig_insns(mm, insn_addr);
 +}
 +
 +extern unsigned int lvl14_resolution;
 +
 +/* /proc/profile writes can call this, don't __init it please. */
 +static DEFINE_SPINLOCK(prof_setup_lock);
 +
 +int setup_profiling_timer(unsigned int multiplier)
 +{
 +      int i;
 +      unsigned long flags;
 +
 +      /* Prevent level14 ticker IRQ flooding. */
 +      if((!multiplier) || (lvl14_resolution / multiplier) < 500)
 +              return -EINVAL;
 +
 +      spin_lock_irqsave(&prof_setup_lock, flags);
 +      for_each_possible_cpu(i) {
 +              load_profile_irq(i, lvl14_resolution / multiplier);
 +              prof_multiplier(i) = multiplier;
 +      }
 +      spin_unlock_irqrestore(&prof_setup_lock, flags);
 +
 +      return 0;
 +}
 +
 +void __init smp_prepare_cpus(unsigned int max_cpus)
 +{
 +      extern void __init smp4m_boot_cpus(void);
 +      extern void __init smp4d_boot_cpus(void);
 +      int i, cpuid, extra;
 +
 +      printk("Entering SMP Mode...\n");
 +
 +      extra = 0;
 +      for (i = 0; !cpu_find_by_instance(i, NULL, &cpuid); i++) {
 +              if (cpuid >= NR_CPUS)
 +                      extra++;
 +      }
 +      /* i = number of cpus */
 +      if (extra && max_cpus > i - extra)
 +              printk("Warning: NR_CPUS is too low to start all cpus\n");
 +
 +      smp_store_cpu_info(boot_cpu_id);
 +
 +      switch(sparc_cpu_model) {
 +      case sun4:
 +              printk("SUN4\n");
 +              BUG();
 +              break;
 +      case sun4c:
 +              printk("SUN4C\n");
 +              BUG();
 +              break;
 +      case sun4m:
 +              smp4m_boot_cpus();
 +              break;
 +      case sun4d:
 +              smp4d_boot_cpus();
 +              break;
 +      case sun4e:
 +              printk("SUN4E\n");
 +              BUG();
 +              break;
 +      case sun4u:
 +              printk("SUN4U\n");
 +              BUG();
 +              break;
 +      default:
 +              printk("UNKNOWN!\n");
 +              BUG();
 +              break;
 +      };
 +}
 +
 +/* Set this up early so that things like the scheduler can init
 + * properly.  We use the same cpu mask for both the present and
 + * possible cpu map.
 + */
 +void __init smp_setup_cpu_possible_map(void)
 +{
 +      int instance, mid;
 +
 +      instance = 0;
 +      while (!cpu_find_by_instance(instance, NULL, &mid)) {
 +              if (mid < NR_CPUS) {
-       cpu_set(cpuid, phys_cpu_present_map);
++                      cpu_set(mid, cpu_possible_map);
 +                      cpu_set(mid, cpu_present_map);
 +              }
 +              instance++;
 +      }
 +}
 +
 +void __init smp_prepare_boot_cpu(void)
 +{
 +      int cpuid = hard_smp_processor_id();
 +
 +      if (cpuid >= NR_CPUS) {
 +              prom_printf("Serious problem, boot cpu id >= NR_CPUS\n");
 +              prom_halt();
 +      }
 +      if (cpuid != 0)
 +              printk("boot cpu id != 0, this could work but is untested\n");
 +
 +      current_thread_info()->cpu = cpuid;
 +      cpu_set(cpuid, cpu_online_map);
++      cpu_set(cpuid, cpu_possible_map);
 +}
 +
 +int __cpuinit __cpu_up(unsigned int cpu)
 +{
 +      extern int __cpuinit smp4m_boot_one_cpu(int);
 +      extern int __cpuinit smp4d_boot_one_cpu(int);
 +      int ret=0;
 +
 +      switch(sparc_cpu_model) {
 +      case sun4:
 +              printk("SUN4\n");
 +              BUG();
 +              break;
 +      case sun4c:
 +              printk("SUN4C\n");
 +              BUG();
 +              break;
 +      case sun4m:
 +              ret = smp4m_boot_one_cpu(cpu);
 +              break;
 +      case sun4d:
 +              ret = smp4d_boot_one_cpu(cpu);
 +              break;
 +      case sun4e:
 +              printk("SUN4E\n");
 +              BUG();
 +              break;
 +      case sun4u:
 +              printk("SUN4U\n");
 +              BUG();
 +              break;
 +      default:
 +              printk("UNKNOWN!\n");
 +              BUG();
 +              break;
 +      };
 +
 +      if (!ret) {
 +              cpu_set(cpu, smp_commenced_mask);
 +              while (!cpu_online(cpu))
 +                      mb();
 +      }
 +      return ret;
 +}
 +
 +void smp_bogo(struct seq_file *m)
 +{
 +      int i;
 +      
 +      for_each_online_cpu(i) {
 +              seq_printf(m,
 +                         "Cpu%dBogo\t: %lu.%02lu\n",
 +                         i,
 +                         cpu_data(i).udelay_val/(500000/HZ),
 +                         (cpu_data(i).udelay_val/(5000/HZ))%100);
 +      }
 +}
 +
 +void smp_info(struct seq_file *m)
 +{
 +      int i;
 +
 +      seq_printf(m, "State:\n");
 +      for_each_online_cpu(i)
 +              seq_printf(m, "CPU%d\t\t: online\n", i);
 +}
index bfe99d82d458702d32bf52a863cf00345e04a8e2,0000000000000000000000000000000000000000..46329799f3462bb4002024558be74f204e679f07
mode 100644,000000..100644
--- /dev/null
@@@ -1,1412 -1,0 +1,1408 @@@
- cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
- cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
 +/* smp.c: Sparc64 SMP support.
 + *
 + * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
 + */
 +
 +#include <linux/module.h>
 +#include <linux/kernel.h>
 +#include <linux/sched.h>
 +#include <linux/mm.h>
 +#include <linux/pagemap.h>
 +#include <linux/threads.h>
 +#include <linux/smp.h>
 +#include <linux/interrupt.h>
 +#include <linux/kernel_stat.h>
 +#include <linux/delay.h>
 +#include <linux/init.h>
 +#include <linux/spinlock.h>
 +#include <linux/fs.h>
 +#include <linux/seq_file.h>
 +#include <linux/cache.h>
 +#include <linux/jiffies.h>
 +#include <linux/profile.h>
 +#include <linux/lmb.h>
 +#include <linux/cpu.h>
 +
 +#include <asm/head.h>
 +#include <asm/ptrace.h>
 +#include <asm/atomic.h>
 +#include <asm/tlbflush.h>
 +#include <asm/mmu_context.h>
 +#include <asm/cpudata.h>
 +#include <asm/hvtramp.h>
 +#include <asm/io.h>
 +#include <asm/timer.h>
 +
 +#include <asm/irq.h>
 +#include <asm/irq_regs.h>
 +#include <asm/page.h>
 +#include <asm/pgtable.h>
 +#include <asm/oplib.h>
 +#include <asm/uaccess.h>
 +#include <asm/starfire.h>
 +#include <asm/tlb.h>
 +#include <asm/sections.h>
 +#include <asm/prom.h>
 +#include <asm/mdesc.h>
 +#include <asm/ldc.h>
 +#include <asm/hypervisor.h>
 +
 +int sparc64_multi_core __read_mostly;
 +
- EXPORT_SYMBOL(cpu_possible_map);
- EXPORT_SYMBOL(cpu_online_map);
 +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
 +cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
 +      { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
 +
 +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
 +EXPORT_SYMBOL(cpu_core_map);
 +
 +static cpumask_t smp_commenced_mask;
 +
 +void smp_info(struct seq_file *m)
 +{
 +      int i;
 +      
 +      seq_printf(m, "State:\n");
 +      for_each_online_cpu(i)
 +              seq_printf(m, "CPU%d:\t\tonline\n", i);
 +}
 +
 +void smp_bogo(struct seq_file *m)
 +{
 +      int i;
 +      
 +      for_each_online_cpu(i)
 +              seq_printf(m,
 +                         "Cpu%dClkTck\t: %016lx\n",
 +                         i, cpu_data(i).clock_tick);
 +}
 +
 +extern void setup_sparc64_timer(void);
 +
 +static volatile unsigned long callin_flag = 0;
 +
 +void __cpuinit smp_callin(void)
 +{
 +      int cpuid = hard_smp_processor_id();
 +
 +      __local_per_cpu_offset = __per_cpu_offset(cpuid);
 +
 +      if (tlb_type == hypervisor)
 +              sun4v_ktsb_register();
 +
 +      __flush_tlb_all();
 +
 +      setup_sparc64_timer();
 +
 +      if (cheetah_pcache_forced_on)
 +              cheetah_enable_pcache();
 +
 +      local_irq_enable();
 +
 +      callin_flag = 1;
 +      __asm__ __volatile__("membar #Sync\n\t"
 +                           "flush  %%g6" : : : "memory");
 +
 +      /* Clear this or we will die instantly when we
 +       * schedule back to this idler...
 +       */
 +      current_thread_info()->new_child = 0;
 +
 +      /* Attach to the address space of init_task. */
 +      atomic_inc(&init_mm.mm_count);
 +      current->active_mm = &init_mm;
 +
 +      /* inform the notifiers about the new cpu */
 +      notify_cpu_starting(cpuid);
 +
 +      while (!cpu_isset(cpuid, smp_commenced_mask))
 +              rmb();
 +
 +      ipi_call_lock();
 +      cpu_set(cpuid, cpu_online_map);
 +      ipi_call_unlock();
 +
 +      /* idle thread is expected to have preempt disabled */
 +      preempt_disable();
 +}
 +
 +void cpu_panic(void)
 +{
 +      printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
 +      panic("SMP bolixed\n");
 +}
 +
 +/* This tick register synchronization scheme is taken entirely from
 + * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit.
 + *
 + * The only change I've made is to rework it so that the master
 + * initiates the synchonization instead of the slave. -DaveM
 + */
 +
 +#define MASTER        0
 +#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))
 +
 +#define NUM_ROUNDS    64      /* magic value */
 +#define NUM_ITERS     5       /* likewise */
 +
 +static DEFINE_SPINLOCK(itc_sync_lock);
 +static unsigned long go[SLAVE + 1];
 +
 +#define DEBUG_TICK_SYNC       0
 +
 +static inline long get_delta (long *rt, long *master)
 +{
 +      unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
 +      unsigned long tcenter, t0, t1, tm;
 +      unsigned long i;
 +
 +      for (i = 0; i < NUM_ITERS; i++) {
 +              t0 = tick_ops->get_tick();
 +              go[MASTER] = 1;
 +              membar_safe("#StoreLoad");
 +              while (!(tm = go[SLAVE]))
 +                      rmb();
 +              go[SLAVE] = 0;
 +              wmb();
 +              t1 = tick_ops->get_tick();
 +
 +              if (t1 - t0 < best_t1 - best_t0)
 +                      best_t0 = t0, best_t1 = t1, best_tm = tm;
 +      }
 +
 +      *rt = best_t1 - best_t0;
 +      *master = best_tm - best_t0;
 +
 +      /* average best_t0 and best_t1 without overflow: */
 +      tcenter = (best_t0/2 + best_t1/2);
 +      if (best_t0 % 2 + best_t1 % 2 == 2)
 +              tcenter++;
 +      return tcenter - best_tm;
 +}
 +
 +void smp_synchronize_tick_client(void)
 +{
 +      long i, delta, adj, adjust_latency = 0, done = 0;
 +      unsigned long flags, rt, master_time_stamp, bound;
 +#if DEBUG_TICK_SYNC
 +      struct {
 +              long rt;        /* roundtrip time */
 +              long master;    /* master's timestamp */
 +              long diff;      /* difference between midpoint and master's timestamp */
 +              long lat;       /* estimate of itc adjustment latency */
 +      } t[NUM_ROUNDS];
 +#endif
 +
 +      go[MASTER] = 1;
 +
 +      while (go[MASTER])
 +              rmb();
 +
 +      local_irq_save(flags);
 +      {
 +              for (i = 0; i < NUM_ROUNDS; i++) {
 +                      delta = get_delta(&rt, &master_time_stamp);
 +                      if (delta == 0) {
 +                              done = 1;       /* let's lock on to this... */
 +                              bound = rt;
 +                      }
 +
 +                      if (!done) {
 +                              if (i > 0) {
 +                                      adjust_latency += -delta;
 +                                      adj = -delta + adjust_latency/4;
 +                              } else
 +                                      adj = -delta;
 +
 +                              tick_ops->add_tick(adj);
 +                      }
 +#if DEBUG_TICK_SYNC
 +                      t[i].rt = rt;
 +                      t[i].master = master_time_stamp;
 +                      t[i].diff = delta;
 +                      t[i].lat = adjust_latency/4;
 +#endif
 +              }
 +      }
 +      local_irq_restore(flags);
 +
 +#if DEBUG_TICK_SYNC
 +      for (i = 0; i < NUM_ROUNDS; i++)
 +              printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
 +                     t[i].rt, t[i].master, t[i].diff, t[i].lat);
 +#endif
 +
 +      printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
 +             "(last diff %ld cycles, maxerr %lu cycles)\n",
 +             smp_processor_id(), delta, rt);
 +}
 +
 +static void smp_start_sync_tick_client(int cpu);
 +
 +static void smp_synchronize_one_tick(int cpu)
 +{
 +      unsigned long flags, i;
 +
 +      go[MASTER] = 0;
 +
 +      smp_start_sync_tick_client(cpu);
 +
 +      /* wait for client to be ready */
 +      while (!go[MASTER])
 +              rmb();
 +
 +      /* now let the client proceed into his loop */
 +      go[MASTER] = 0;
 +      membar_safe("#StoreLoad");
 +
 +      spin_lock_irqsave(&itc_sync_lock, flags);
 +      {
 +              for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
 +                      while (!go[MASTER])
 +                              rmb();
 +                      go[MASTER] = 0;
 +                      wmb();
 +                      go[SLAVE] = tick_ops->get_tick();
 +                      membar_safe("#StoreLoad");
 +              }
 +      }
 +      spin_unlock_irqrestore(&itc_sync_lock, flags);
 +}
 +
 +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
 +/* XXX Put this in some common place. XXX */
 +static unsigned long kimage_addr_to_ra(void *p)
 +{
 +      unsigned long val = (unsigned long) p;
 +
 +      return kern_base + (val - KERNBASE);
 +}
 +
 +static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
 +{
 +      extern unsigned long sparc64_ttable_tl0;
 +      extern unsigned long kern_locked_tte_data;
 +      struct hvtramp_descr *hdesc;
 +      unsigned long trampoline_ra;
 +      struct trap_per_cpu *tb;
 +      u64 tte_vaddr, tte_data;
 +      unsigned long hv_err;
 +      int i;
 +
 +      hdesc = kzalloc(sizeof(*hdesc) +
 +                      (sizeof(struct hvtramp_mapping) *
 +                       num_kernel_image_mappings - 1),
 +                      GFP_KERNEL);
 +      if (!hdesc) {
 +              printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
 +                     "hvtramp_descr.\n");
 +              return;
 +      }
 +
 +      hdesc->cpu = cpu;
 +      hdesc->num_mappings = num_kernel_image_mappings;
 +
 +      tb = &trap_block[cpu];
 +      tb->hdesc = hdesc;
 +
 +      hdesc->fault_info_va = (unsigned long) &tb->fault_info;
 +      hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
 +
 +      hdesc->thread_reg = thread_reg;
 +
 +      tte_vaddr = (unsigned long) KERNBASE;
 +      tte_data = kern_locked_tte_data;
 +
 +      for (i = 0; i < hdesc->num_mappings; i++) {
 +              hdesc->maps[i].vaddr = tte_vaddr;
 +              hdesc->maps[i].tte   = tte_data;
 +              tte_vaddr += 0x400000;
 +              tte_data  += 0x400000;
 +      }
 +
 +      trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
 +
 +      hv_err = sun4v_cpu_start(cpu, trampoline_ra,
 +                               kimage_addr_to_ra(&sparc64_ttable_tl0),
 +                               __pa(hdesc));
 +      if (hv_err)
 +              printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
 +                     "gives error %lu\n", hv_err);
 +}
 +#endif
 +
 +extern unsigned long sparc64_cpu_startup;
 +
 +/* The OBP cpu startup callback truncates the 3rd arg cookie to
 + * 32-bits (I think) so to be safe we have it read the pointer
 + * contained here so we work on >4GB machines. -DaveM
 + */
 +static struct thread_info *cpu_new_thread = NULL;
 +
 +static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
 +{
 +      struct trap_per_cpu *tb = &trap_block[cpu];
 +      unsigned long entry =
 +              (unsigned long)(&sparc64_cpu_startup);
 +      unsigned long cookie =
 +              (unsigned long)(&cpu_new_thread);
 +      struct task_struct *p;
 +      int timeout, ret;
 +
 +      p = fork_idle(cpu);
 +      if (IS_ERR(p))
 +              return PTR_ERR(p);
 +      callin_flag = 0;
 +      cpu_new_thread = task_thread_info(p);
 +
 +      if (tlb_type == hypervisor) {
 +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
 +              if (ldom_domaining_enabled)
 +                      ldom_startcpu_cpuid(cpu,
 +                                          (unsigned long) cpu_new_thread);
 +              else
 +#endif
 +                      prom_startcpu_cpuid(cpu, entry, cookie);
 +      } else {
 +              struct device_node *dp = of_find_node_by_cpuid(cpu);
 +
 +              prom_startcpu(dp->node, entry, cookie);
 +      }
 +
 +      for (timeout = 0; timeout < 50000; timeout++) {
 +              if (callin_flag)
 +                      break;
 +              udelay(100);
 +      }
 +
 +      if (callin_flag) {
 +              ret = 0;
 +      } else {
 +              printk("Processor %d is stuck.\n", cpu);
 +              ret = -ENODEV;
 +      }
 +      cpu_new_thread = NULL;
 +
 +      if (tb->hdesc) {
 +              kfree(tb->hdesc);
 +              tb->hdesc = NULL;
 +      }
 +
 +      return ret;
 +}
 +
 +static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
 +{
 +      u64 result, target;
 +      int stuck, tmp;
 +
 +      if (this_is_starfire) {
 +              /* map to real upaid */
 +              cpu = (((cpu & 0x3c) << 1) |
 +                      ((cpu & 0x40) >> 4) |
 +                      (cpu & 0x3));
 +      }
 +
 +      target = (cpu << 14) | 0x70;
 +again:
 +      /* Ok, this is the real Spitfire Errata #54.
 +       * One must read back from a UDB internal register
 +       * after writes to the UDB interrupt dispatch, but
 +       * before the membar Sync for that write.
 +       * So we use the high UDB control register (ASI 0x7f,
 +       * ADDR 0x20) for the dummy read. -DaveM
 +       */
 +      tmp = 0x40;
 +      __asm__ __volatile__(
 +      "wrpr   %1, %2, %%pstate\n\t"
 +      "stxa   %4, [%0] %3\n\t"
 +      "stxa   %5, [%0+%8] %3\n\t"
 +      "add    %0, %8, %0\n\t"
 +      "stxa   %6, [%0+%8] %3\n\t"
 +      "membar #Sync\n\t"
 +      "stxa   %%g0, [%7] %3\n\t"
 +      "membar #Sync\n\t"
 +      "mov    0x20, %%g1\n\t"
 +      "ldxa   [%%g1] 0x7f, %%g0\n\t"
 +      "membar #Sync"
 +      : "=r" (tmp)
 +      : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
 +        "r" (data0), "r" (data1), "r" (data2), "r" (target),
 +        "r" (0x10), "0" (tmp)
 +        : "g1");
 +
 +      /* NOTE: PSTATE_IE is still clear. */
 +      stuck = 100000;
 +      do {
 +              __asm__ __volatile__("ldxa [%%g0] %1, %0"
 +                      : "=r" (result)
 +                      : "i" (ASI_INTR_DISPATCH_STAT));
 +              if (result == 0) {
 +                      __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
 +                                           : : "r" (pstate));
 +                      return;
 +              }
 +              stuck -= 1;
 +              if (stuck == 0)
 +                      break;
 +      } while (result & 0x1);
 +      __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
 +                           : : "r" (pstate));
 +      if (stuck == 0) {
 +              printk("CPU[%d]: mondo stuckage result[%016lx]\n",
 +                     smp_processor_id(), result);
 +      } else {
 +              udelay(2);
 +              goto again;
 +      }
 +}
 +
 +static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 +{
 +      u64 *mondo, data0, data1, data2;
 +      u16 *cpu_list;
 +      u64 pstate;
 +      int i;
 +
 +      __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
 +      cpu_list = __va(tb->cpu_list_pa);
 +      mondo = __va(tb->cpu_mondo_block_pa);
 +      data0 = mondo[0];
 +      data1 = mondo[1];
 +      data2 = mondo[2];
 +      for (i = 0; i < cnt; i++)
 +              spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
 +}
 +
 +/* Cheetah now allows to send the whole 64-bytes of data in the interrupt
 + * packet, but we have no use for that.  However we do take advantage of
 + * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
 + */
 +static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 +{
 +      int nack_busy_id, is_jbus, need_more;
 +      u64 *mondo, pstate, ver, busy_mask;
 +      u16 *cpu_list;
 +
 +      cpu_list = __va(tb->cpu_list_pa);
 +      mondo = __va(tb->cpu_mondo_block_pa);
 +
 +      /* Unfortunately, someone at Sun had the brilliant idea to make the
 +       * busy/nack fields hard-coded by ITID number for this Ultra-III
 +       * derivative processor.
 +       */
 +      __asm__ ("rdpr %%ver, %0" : "=r" (ver));
 +      is_jbus = ((ver >> 32) == __JALAPENO_ID ||
 +                 (ver >> 32) == __SERRANO_ID);
 +
 +      __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
 +
 +retry:
 +      need_more = 0;
 +      __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
 +                           : : "r" (pstate), "i" (PSTATE_IE));
 +
 +      /* Setup the dispatch data registers. */
 +      __asm__ __volatile__("stxa      %0, [%3] %6\n\t"
 +                           "stxa      %1, [%4] %6\n\t"
 +                           "stxa      %2, [%5] %6\n\t"
 +                           "membar    #Sync\n\t"
 +                           : /* no outputs */
 +                           : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
 +                             "r" (0x40), "r" (0x50), "r" (0x60),
 +                             "i" (ASI_INTR_W));
 +
 +      nack_busy_id = 0;
 +      busy_mask = 0;
 +      {
 +              int i;
 +
 +              for (i = 0; i < cnt; i++) {
 +                      u64 target, nr;
 +
 +                      nr = cpu_list[i];
 +                      if (nr == 0xffff)
 +                              continue;
 +
 +                      target = (nr << 14) | 0x70;
 +                      if (is_jbus) {
 +                              busy_mask |= (0x1UL << (nr * 2));
 +                      } else {
 +                              target |= (nack_busy_id << 24);
 +                              busy_mask |= (0x1UL <<
 +                                            (nack_busy_id * 2));
 +                      }
 +                      __asm__ __volatile__(
 +                              "stxa   %%g0, [%0] %1\n\t"
 +                              "membar #Sync\n\t"
 +                              : /* no outputs */
 +                              : "r" (target), "i" (ASI_INTR_W));
 +                      nack_busy_id++;
 +                      if (nack_busy_id == 32) {
 +                              need_more = 1;
 +                              break;
 +                      }
 +              }
 +      }
 +
 +      /* Now, poll for completion. */
 +      {
 +              u64 dispatch_stat, nack_mask;
 +              long stuck;
 +
 +              stuck = 100000 * nack_busy_id;
 +              nack_mask = busy_mask << 1;
 +              do {
 +                      __asm__ __volatile__("ldxa      [%%g0] %1, %0"
 +                                           : "=r" (dispatch_stat)
 +                                           : "i" (ASI_INTR_DISPATCH_STAT));
 +                      if (!(dispatch_stat & (busy_mask | nack_mask))) {
 +                              __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
 +                                                   : : "r" (pstate));
 +                              if (unlikely(need_more)) {
 +                                      int i, this_cnt = 0;
 +                                      for (i = 0; i < cnt; i++) {
 +                                              if (cpu_list[i] == 0xffff)
 +                                                      continue;
 +                                              cpu_list[i] = 0xffff;
 +                                              this_cnt++;
 +                                              if (this_cnt == 32)
 +                                                      break;
 +                                      }
 +                                      goto retry;
 +                              }
 +                              return;
 +                      }
 +                      if (!--stuck)
 +                              break;
 +              } while (dispatch_stat & busy_mask);
 +
 +              __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
 +                                   : : "r" (pstate));
 +
 +              if (dispatch_stat & busy_mask) {
 +                      /* Busy bits will not clear, continue instead
 +                       * of freezing up on this cpu.
 +                       */
 +                      printk("CPU[%d]: mondo stuckage result[%016lx]\n",
 +                             smp_processor_id(), dispatch_stat);
 +              } else {
 +                      int i, this_busy_nack = 0;
 +
 +                      /* Delay some random time with interrupts enabled
 +                       * to prevent deadlock.
 +                       */
 +                      udelay(2 * nack_busy_id);
 +
 +                      /* Clear out the mask bits for cpus which did not
 +                       * NACK us.
 +                       */
 +                      for (i = 0; i < cnt; i++) {
 +                              u64 check_mask, nr;
 +
 +                              nr = cpu_list[i];
 +                              if (nr == 0xffff)
 +                                      continue;
 +
 +                              if (is_jbus)
 +                                      check_mask = (0x2UL << (2*nr));
 +                              else
 +                                      check_mask = (0x2UL <<
 +                                                    this_busy_nack);
 +                              if ((dispatch_stat & check_mask) == 0)
 +                                      cpu_list[i] = 0xffff;
 +                              this_busy_nack += 2;
 +                              if (this_busy_nack == 64)
 +                                      break;
 +                      }
 +
 +                      goto retry;
 +              }
 +      }
 +}
 +
 +/* Multi-cpu list version.  */
 +static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
 +{
 +      int retries, this_cpu, prev_sent, i, saw_cpu_error;
 +      unsigned long status;
 +      u16 *cpu_list;
 +
 +      this_cpu = smp_processor_id();
 +
 +      cpu_list = __va(tb->cpu_list_pa);
 +
 +      saw_cpu_error = 0;
 +      retries = 0;
 +      prev_sent = 0;
 +      do {
 +              int forward_progress, n_sent;
 +
 +              status = sun4v_cpu_mondo_send(cnt,
 +                                            tb->cpu_list_pa,
 +                                            tb->cpu_mondo_block_pa);
 +
 +              /* HV_EOK means all cpus received the xcall, we're done.  */
 +              if (likely(status == HV_EOK))
 +                      break;
 +
 +              /* First, see if we made any forward progress.
 +               *
 +               * The hypervisor indicates successful sends by setting
 +               * cpu list entries to the value 0xffff.
 +               */
 +              n_sent = 0;
 +              for (i = 0; i < cnt; i++) {
 +                      if (likely(cpu_list[i] == 0xffff))
 +                              n_sent++;
 +              }
 +
 +              forward_progress = 0;
 +              if (n_sent > prev_sent)
 +                      forward_progress = 1;
 +
 +              prev_sent = n_sent;
 +
 +              /* If we get a HV_ECPUERROR, then one or more of the cpus
 +               * in the list are in error state.  Use the cpu_state()
 +               * hypervisor call to find out which cpus are in error state.
 +               */
 +              if (unlikely(status == HV_ECPUERROR)) {
 +                      for (i = 0; i < cnt; i++) {
 +                              long err;
 +                              u16 cpu;
 +
 +                              cpu = cpu_list[i];
 +                              if (cpu == 0xffff)
 +                                      continue;
 +
 +                              err = sun4v_cpu_state(cpu);
 +                              if (err == HV_CPU_STATE_ERROR) {
 +                                      saw_cpu_error = (cpu + 1);
 +                                      cpu_list[i] = 0xffff;
 +                              }
 +                      }
 +              } else if (unlikely(status != HV_EWOULDBLOCK))
 +                      goto fatal_mondo_error;
 +
 +              /* Don't bother rewriting the CPU list, just leave the
 +               * 0xffff and non-0xffff entries in there and the
 +               * hypervisor will do the right thing.
 +               *
 +               * Only advance timeout state if we didn't make any
 +               * forward progress.
 +               */
 +              if (unlikely(!forward_progress)) {
 +                      if (unlikely(++retries > 10000))
 +                              goto fatal_mondo_timeout;
 +
 +                      /* Delay a little bit to let other cpus catch up
 +                       * on their cpu mondo queue work.
 +                       */
 +                      udelay(2 * cnt);
 +              }
 +      } while (1);
 +
 +      if (unlikely(saw_cpu_error))
 +              goto fatal_mondo_cpu_error;
 +
 +      return;
 +
 +fatal_mondo_cpu_error:
 +      printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
 +             "(including %d) were in error state\n",
 +             this_cpu, saw_cpu_error - 1);
 +      return;
 +
 +fatal_mondo_timeout:
 +      printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
 +             " progress after %d retries.\n",
 +             this_cpu, retries);
 +      goto dump_cpu_list_and_out;
 +
 +fatal_mondo_error:
 +      printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
 +             this_cpu, status);
 +      printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
 +             "mondo_block_pa(%lx)\n",
 +             this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
 +
 +dump_cpu_list_and_out:
 +      printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
 +      for (i = 0; i < cnt; i++)
 +              printk("%u ", cpu_list[i]);
 +      printk("]\n");
 +}
 +
 +static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
 +
 +static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
 +{
 +      struct trap_per_cpu *tb;
 +      int this_cpu, i, cnt;
 +      unsigned long flags;
 +      u16 *cpu_list;
 +      u64 *mondo;
 +
 +      /* We have to do this whole thing with interrupts fully disabled.
 +       * Otherwise if we send an xcall from interrupt context it will
 +       * corrupt both our mondo block and cpu list state.
 +       *
 +       * One consequence of this is that we cannot use timeout mechanisms
 +       * that depend upon interrupts being delivered locally.  So, for
 +       * example, we cannot sample jiffies and expect it to advance.
 +       *
 +       * Fortunately, udelay() uses %stick/%tick so we can use that.
 +       */
 +      local_irq_save(flags);
 +
 +      this_cpu = smp_processor_id();
 +      tb = &trap_block[this_cpu];
 +
 +      mondo = __va(tb->cpu_mondo_block_pa);
 +      mondo[0] = data0;
 +      mondo[1] = data1;
 +      mondo[2] = data2;
 +      wmb();
 +
 +      cpu_list = __va(tb->cpu_list_pa);
 +
 +      /* Setup the initial cpu list.  */
 +      cnt = 0;
 +      for_each_cpu(i, mask) {
 +              if (i == this_cpu || !cpu_online(i))
 +                      continue;
 +              cpu_list[cnt++] = i;
 +      }
 +
 +      if (cnt)
 +              xcall_deliver_impl(tb, cnt);
 +
 +      local_irq_restore(flags);
 +}
 +
 +/* Send cross call to all processors mentioned in MASK_P
 + * except self.  Really, there are only two cases currently,
 + * "&cpu_online_map" and "&mm->cpu_vm_mask".
 + */
 +static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
 +{
 +      u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
 +
 +      xcall_deliver(data0, data1, data2, mask);
 +}
 +
 +/* Send cross call to all processors except self. */
 +static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
 +{
 +      smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map);
 +}
 +
 +extern unsigned long xcall_sync_tick;
 +
 +static void smp_start_sync_tick_client(int cpu)
 +{
 +      xcall_deliver((u64) &xcall_sync_tick, 0, 0,
 +                    &cpumask_of_cpu(cpu));
 +}
 +
 +extern unsigned long xcall_call_function;
 +
 +void arch_send_call_function_ipi(cpumask_t mask)
 +{
 +      xcall_deliver((u64) &xcall_call_function, 0, 0, &mask);
 +}
 +
 +extern unsigned long xcall_call_function_single;
 +
 +void arch_send_call_function_single_ipi(int cpu)
 +{
 +      xcall_deliver((u64) &xcall_call_function_single, 0, 0,
 +                    &cpumask_of_cpu(cpu));
 +}
 +
 +void smp_call_function_client(int irq, struct pt_regs *regs)
 +{
 +      clear_softint(1 << irq);
 +      generic_smp_call_function_interrupt();
 +}
 +
 +void smp_call_function_single_client(int irq, struct pt_regs *regs)
 +{
 +      clear_softint(1 << irq);
 +      generic_smp_call_function_single_interrupt();
 +}
 +
 +static void tsb_sync(void *info)
 +{
 +      struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
 +      struct mm_struct *mm = info;
 +
 +      /* It is not valid to test "currrent->active_mm == mm" here.
 +       *
 +       * The value of "current" is not changed atomically with
 +       * switch_mm().  But that's OK, we just need to check the
 +       * current cpu's trap block PGD physical address.
 +       */
 +      if (tp->pgd_paddr == __pa(mm->pgd))
 +              tsb_context_switch(mm);
 +}
 +
 +void smp_tsb_sync(struct mm_struct *mm)
 +{
 +      smp_call_function_mask(mm->cpu_vm_mask, tsb_sync, mm, 1);
 +}
 +
 +extern unsigned long xcall_flush_tlb_mm;
 +extern unsigned long xcall_flush_tlb_pending;
 +extern unsigned long xcall_flush_tlb_kernel_range;
 +extern unsigned long xcall_fetch_glob_regs;
 +extern unsigned long xcall_receive_signal;
 +extern unsigned long xcall_new_mmu_context_version;
 +#ifdef CONFIG_KGDB
 +extern unsigned long xcall_kgdb_capture;
 +#endif
 +
 +#ifdef DCACHE_ALIASING_POSSIBLE
 +extern unsigned long xcall_flush_dcache_page_cheetah;
 +#endif
 +extern unsigned long xcall_flush_dcache_page_spitfire;
 +
 +#ifdef CONFIG_DEBUG_DCFLUSH
 +extern atomic_t dcpage_flushes;
 +extern atomic_t dcpage_flushes_xcall;
 +#endif
 +
 +static inline void __local_flush_dcache_page(struct page *page)
 +{
 +#ifdef DCACHE_ALIASING_POSSIBLE
 +      __flush_dcache_page(page_address(page),
 +                          ((tlb_type == spitfire) &&
 +                           page_mapping(page) != NULL));
 +#else
 +      if (page_mapping(page) != NULL &&
 +          tlb_type == spitfire)
 +              __flush_icache_page(__pa(page_address(page)));
 +#endif
 +}
 +
 +void smp_flush_dcache_page_impl(struct page *page, int cpu)
 +{
 +      int this_cpu;
 +
 +      if (tlb_type == hypervisor)
 +              return;
 +
 +#ifdef CONFIG_DEBUG_DCFLUSH
 +      atomic_inc(&dcpage_flushes);
 +#endif
 +
 +      this_cpu = get_cpu();
 +
 +      if (cpu == this_cpu) {
 +              __local_flush_dcache_page(page);
 +      } else if (cpu_online(cpu)) {
 +              void *pg_addr = page_address(page);
 +              u64 data0 = 0;
 +
 +              if (tlb_type == spitfire) {
 +                      data0 = ((u64)&xcall_flush_dcache_page_spitfire);
 +                      if (page_mapping(page) != NULL)
 +                              data0 |= ((u64)1 << 32);
 +              } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
 +#ifdef DCACHE_ALIASING_POSSIBLE
 +                      data0 = ((u64)&xcall_flush_dcache_page_cheetah);
 +#endif
 +              }
 +              if (data0) {
 +                      xcall_deliver(data0, __pa(pg_addr),
 +                                    (u64) pg_addr, &cpumask_of_cpu(cpu));
 +#ifdef CONFIG_DEBUG_DCFLUSH
 +                      atomic_inc(&dcpage_flushes_xcall);
 +#endif
 +              }
 +      }
 +
 +      put_cpu();
 +}
 +
 +void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
 +{
 +      void *pg_addr;
 +      int this_cpu;
 +      u64 data0;
 +
 +      if (tlb_type == hypervisor)
 +              return;
 +
 +      this_cpu = get_cpu();
 +
 +#ifdef CONFIG_DEBUG_DCFLUSH
 +      atomic_inc(&dcpage_flushes);
 +#endif
 +      data0 = 0;
 +      pg_addr = page_address(page);
 +      if (tlb_type == spitfire) {
 +              data0 = ((u64)&xcall_flush_dcache_page_spitfire);
 +              if (page_mapping(page) != NULL)
 +                      data0 |= ((u64)1 << 32);
 +      } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
 +#ifdef DCACHE_ALIASING_POSSIBLE
 +              data0 = ((u64)&xcall_flush_dcache_page_cheetah);
 +#endif
 +      }
 +      if (data0) {
 +              xcall_deliver(data0, __pa(pg_addr),
 +                            (u64) pg_addr, &cpu_online_map);
 +#ifdef CONFIG_DEBUG_DCFLUSH
 +              atomic_inc(&dcpage_flushes_xcall);
 +#endif
 +      }
 +      __local_flush_dcache_page(page);
 +
 +      put_cpu();
 +}
 +
 +void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
 +{
 +      struct mm_struct *mm;
 +      unsigned long flags;
 +
 +      clear_softint(1 << irq);
 +
 +      /* See if we need to allocate a new TLB context because
 +       * the version of the one we are using is now out of date.
 +       */
 +      mm = current->active_mm;
 +      if (unlikely(!mm || (mm == &init_mm)))
 +              return;
 +
 +      spin_lock_irqsave(&mm->context.lock, flags);
 +
 +      if (unlikely(!CTX_VALID(mm->context)))
 +              get_new_mmu_context(mm);
 +
 +      spin_unlock_irqrestore(&mm->context.lock, flags);
 +
 +      load_secondary_context(mm);
 +      __flush_tlb_mm(CTX_HWBITS(mm->context),
 +                     SECONDARY_CONTEXT);
 +}
 +
 +void smp_new_mmu_context_version(void)
 +{
 +      smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
 +}
 +
 +#ifdef CONFIG_KGDB
 +void kgdb_roundup_cpus(unsigned long flags)
 +{
 +      smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
 +}
 +#endif
 +
 +void smp_fetch_global_regs(void)
 +{
 +      smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
 +}
 +
 +/* We know that the window frames of the user have been flushed
 + * to the stack before we get here because all callers of us
 + * are flush_tlb_*() routines, and these run after flush_cache_*()
 + * which performs the flushw.
 + *
 + * The SMP TLB coherency scheme we use works as follows:
 + *
 + * 1) mm->cpu_vm_mask is a bit mask of which cpus an address
 + *    space has (potentially) executed on, this is the heuristic
 + *    we use to avoid doing cross calls.
 + *
 + *    Also, for flushing from kswapd and also for clones, we
 + *    use cpu_vm_mask as the list of cpus to make run the TLB.
 + *
 + * 2) TLB context numbers are shared globally across all processors
 + *    in the system, this allows us to play several games to avoid
 + *    cross calls.
 + *
 + *    One invariant is that when a cpu switches to a process, and
 + *    that processes tsk->active_mm->cpu_vm_mask does not have the
 + *    current cpu's bit set, that tlb context is flushed locally.
 + *
 + *    If the address space is non-shared (ie. mm->count == 1) we avoid
 + *    cross calls when we want to flush the currently running process's
 + *    tlb state.  This is done by clearing all cpu bits except the current
 + *    processor's in current->active_mm->cpu_vm_mask and performing the
 + *    flush locally only.  This will force any subsequent cpus which run
 + *    this task to flush the context from the local tlb if the process
 + *    migrates to another cpu (again).
 + *
 + * 3) For shared address spaces (threads) and swapping we bite the
 + *    bullet for most cases and perform the cross call (but only to
 + *    the cpus listed in cpu_vm_mask).
 + *
 + *    The performance gain from "optimizing" away the cross call for threads is
 + *    questionable (in theory the big win for threads is the massive sharing of
 + *    address space state across processors).
 + */
 +
 +/* This currently is only used by the hugetlb arch pre-fault
 + * hook on UltraSPARC-III+ and later when changing the pagesize
 + * bits of the context register for an address space.
 + */
 +void smp_flush_tlb_mm(struct mm_struct *mm)
 +{
 +      u32 ctx = CTX_HWBITS(mm->context);
 +      int cpu = get_cpu();
 +
 +      if (atomic_read(&mm->mm_users) == 1) {
 +              mm->cpu_vm_mask = cpumask_of_cpu(cpu);
 +              goto local_flush_and_out;
 +      }
 +
 +      smp_cross_call_masked(&xcall_flush_tlb_mm,
 +                            ctx, 0, 0,
 +                            &mm->cpu_vm_mask);
 +
 +local_flush_and_out:
 +      __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
 +
 +      put_cpu();
 +}
 +
 +void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
 +{
 +      u32 ctx = CTX_HWBITS(mm->context);
 +      int cpu = get_cpu();
 +
 +      if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1)
 +              mm->cpu_vm_mask = cpumask_of_cpu(cpu);
 +      else
 +              smp_cross_call_masked(&xcall_flush_tlb_pending,
 +                                    ctx, nr, (unsigned long) vaddrs,
 +                                    &mm->cpu_vm_mask);
 +
 +      __flush_tlb_pending(ctx, nr, vaddrs);
 +
 +      put_cpu();
 +}
 +
 +void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
 +{
 +      start &= PAGE_MASK;
 +      end    = PAGE_ALIGN(end);
 +      if (start != end) {
 +              smp_cross_call(&xcall_flush_tlb_kernel_range,
 +                             0, start, end);
 +
 +              __flush_tlb_kernel_range(start, end);
 +      }
 +}
 +
 +/* CPU capture. */
 +/* #define CAPTURE_DEBUG */
 +extern unsigned long xcall_capture;
 +
 +static atomic_t smp_capture_depth = ATOMIC_INIT(0);
 +static atomic_t smp_capture_registry = ATOMIC_INIT(0);
 +static unsigned long penguins_are_doing_time;
 +
 +void smp_capture(void)
 +{
 +      int result = atomic_add_ret(1, &smp_capture_depth);
 +
 +      if (result == 1) {
 +              int ncpus = num_online_cpus();
 +
 +#ifdef CAPTURE_DEBUG
 +              printk("CPU[%d]: Sending penguins to jail...",
 +                     smp_processor_id());
 +#endif
 +              penguins_are_doing_time = 1;
 +              atomic_inc(&smp_capture_registry);
 +              smp_cross_call(&xcall_capture, 0, 0, 0);
 +              while (atomic_read(&smp_capture_registry) != ncpus)
 +                      rmb();
 +#ifdef CAPTURE_DEBUG
 +              printk("done\n");
 +#endif
 +      }
 +}
 +
 +void smp_release(void)
 +{
 +      if (atomic_dec_and_test(&smp_capture_depth)) {
 +#ifdef CAPTURE_DEBUG
 +              printk("CPU[%d]: Giving pardon to "
 +                     "imprisoned penguins\n",
 +                     smp_processor_id());
 +#endif
 +              penguins_are_doing_time = 0;
 +              membar_safe("#StoreLoad");
 +              atomic_dec(&smp_capture_registry);
 +      }
 +}
 +
 +/* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE
 + * set, so they can service tlb flush xcalls...
 + */
 +extern void prom_world(int);
 +
 +void smp_penguin_jailcell(int irq, struct pt_regs *regs)
 +{
 +      clear_softint(1 << irq);
 +
 +      preempt_disable();
 +
 +      __asm__ __volatile__("flushw");
 +      prom_world(1);
 +      atomic_inc(&smp_capture_registry);
 +      membar_safe("#StoreLoad");
 +      while (penguins_are_doing_time)
 +              rmb();
 +      atomic_dec(&smp_capture_registry);
 +      prom_world(0);
 +
 +      preempt_enable();
 +}
 +
 +/* /proc/profile writes can call this, don't __init it please. */
 +int setup_profiling_timer(unsigned int multiplier)
 +{
 +      return -EINVAL;
 +}
 +
 +void __init smp_prepare_cpus(unsigned int max_cpus)
 +{
 +}
 +
 +void __devinit smp_prepare_boot_cpu(void)
 +{
 +}
 +
 +void __init smp_setup_processor_id(void)
 +{
 +      if (tlb_type == spitfire)
 +              xcall_deliver_impl = spitfire_xcall_deliver;
 +      else if (tlb_type == cheetah || tlb_type == cheetah_plus)
 +              xcall_deliver_impl = cheetah_xcall_deliver;
 +      else
 +              xcall_deliver_impl = hypervisor_xcall_deliver;
 +}
 +
 +void __devinit smp_fill_in_sib_core_maps(void)
 +{
 +      unsigned int i;
 +
 +      for_each_present_cpu(i) {
 +              unsigned int j;
 +
 +              cpus_clear(cpu_core_map[i]);
 +              if (cpu_data(i).core_id == 0) {
 +                      cpu_set(i, cpu_core_map[i]);
 +                      continue;
 +              }
 +
 +              for_each_present_cpu(j) {
 +                      if (cpu_data(i).core_id ==
 +                          cpu_data(j).core_id)
 +                              cpu_set(j, cpu_core_map[i]);
 +              }
 +      }
 +
 +      for_each_present_cpu(i) {
 +              unsigned int j;
 +
 +              cpus_clear(per_cpu(cpu_sibling_map, i));
 +              if (cpu_data(i).proc_id == -1) {
 +                      cpu_set(i, per_cpu(cpu_sibling_map, i));
 +                      continue;
 +              }
 +
 +              for_each_present_cpu(j) {
 +                      if (cpu_data(i).proc_id ==
 +                          cpu_data(j).proc_id)
 +                              cpu_set(j, per_cpu(cpu_sibling_map, i));
 +              }
 +      }
 +}
 +
 +int __cpuinit __cpu_up(unsigned int cpu)
 +{
 +      int ret = smp_boot_one_cpu(cpu);
 +
 +      if (!ret) {
 +              cpu_set(cpu, smp_commenced_mask);
 +              while (!cpu_isset(cpu, cpu_online_map))
 +                      mb();
 +              if (!cpu_isset(cpu, cpu_online_map)) {
 +                      ret = -ENODEV;
 +              } else {
 +                      /* On SUN4V, writes to %tick and %stick are
 +                       * not allowed.
 +                       */
 +                      if (tlb_type != hypervisor)
 +                              smp_synchronize_one_tick(cpu);
 +              }
 +      }
 +      return ret;
 +}
 +
 +#ifdef CONFIG_HOTPLUG_CPU
 +void cpu_play_dead(void)
 +{
 +      int cpu = smp_processor_id();
 +      unsigned long pstate;
 +
 +      idle_task_exit();
 +
 +      if (tlb_type == hypervisor) {
 +              struct trap_per_cpu *tb = &trap_block[cpu];
 +
 +              sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
 +                              tb->cpu_mondo_pa, 0);
 +              sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
 +                              tb->dev_mondo_pa, 0);
 +              sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
 +                              tb->resum_mondo_pa, 0);
 +              sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
 +                              tb->nonresum_mondo_pa, 0);
 +      }
 +
 +      cpu_clear(cpu, smp_commenced_mask);
 +      membar_safe("#Sync");
 +
 +      local_irq_disable();
 +
 +      __asm__ __volatile__(
 +              "rdpr   %%pstate, %0\n\t"
 +              "wrpr   %0, %1, %%pstate"
 +              : "=r" (pstate)
 +              : "i" (PSTATE_IE));
 +
 +      while (1)
 +              barrier();
 +}
 +
 +int __cpu_disable(void)
 +{
 +      int cpu = smp_processor_id();
 +      cpuinfo_sparc *c;
 +      int i;
 +
 +      for_each_cpu_mask(i, cpu_core_map[cpu])
 +              cpu_clear(cpu, cpu_core_map[i]);
 +      cpus_clear(cpu_core_map[cpu]);
 +
 +      for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
 +              cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
 +      cpus_clear(per_cpu(cpu_sibling_map, cpu));
 +
 +      c = &cpu_data(cpu);
 +
 +      c->core_id = 0;
 +      c->proc_id = -1;
 +
 +      smp_wmb();
 +
 +      /* Make sure no interrupts point to this cpu.  */
 +      fixup_irqs();
 +
 +      local_irq_enable();
 +      mdelay(1);
 +      local_irq_disable();
 +
 +      ipi_call_lock();
 +      cpu_clear(cpu, cpu_online_map);
 +      ipi_call_unlock();
 +
 +      return 0;
 +}
 +
 +void __cpu_die(unsigned int cpu)
 +{
 +      int i;
 +
 +      for (i = 0; i < 100; i++) {
 +              smp_rmb();
 +              if (!cpu_isset(cpu, smp_commenced_mask))
 +                      break;
 +              msleep(100);
 +      }
 +      if (cpu_isset(cpu, smp_commenced_mask)) {
 +              printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 +      } else {
 +#if defined(CONFIG_SUN_LDOMS)
 +              unsigned long hv_err;
 +              int limit = 100;
 +
 +              do {
 +                      hv_err = sun4v_cpu_stop(cpu);
 +                      if (hv_err == HV_EOK) {
 +                              cpu_clear(cpu, cpu_present_map);
 +                              break;
 +                      }
 +              } while (--limit > 0);
 +              if (limit <= 0) {
 +                      printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
 +                             hv_err);
 +              }
 +#endif
 +      }
 +}
 +#endif
 +
 +void __init smp_cpus_done(unsigned int max_cpus)
 +{
 +}
 +
 +void smp_send_reschedule(int cpu)
 +{
 +      xcall_deliver((u64) &xcall_receive_signal, 0, 0,
 +                    &cpumask_of_cpu(cpu));
 +}
 +
 +void smp_receive_signal_client(int irq, struct pt_regs *regs)
 +{
 +      clear_softint(1 << irq);
 +}
 +
 +/* This is a nop because we capture all other cpus
 + * anyways when making the PROM active.
 + */
 +void smp_send_stop(void)
 +{
 +}
 +
 +unsigned long __per_cpu_base __read_mostly;
 +unsigned long __per_cpu_shift __read_mostly;
 +
 +EXPORT_SYMBOL(__per_cpu_base);
 +EXPORT_SYMBOL(__per_cpu_shift);
 +
 +void __init real_setup_per_cpu_areas(void)
 +{
 +      unsigned long paddr, goal, size, i;
 +      char *ptr;
 +
 +      /* Copy section for each CPU (we discard the original) */
 +      goal = PERCPU_ENOUGH_ROOM;
 +
 +      __per_cpu_shift = PAGE_SHIFT;
 +      for (size = PAGE_SIZE; size < goal; size <<= 1UL)
 +              __per_cpu_shift++;
 +
 +      paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE);
 +      if (!paddr) {
 +              prom_printf("Cannot allocate per-cpu memory.\n");
 +              prom_halt();
 +      }
 +
 +      ptr = __va(paddr);
 +      __per_cpu_base = ptr - __per_cpu_start;
 +
 +      for (i = 0; i < NR_CPUS; i++, ptr += size)
 +              memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
 +
 +      /* Setup %g5 for the boot cpu.  */
 +      __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
 +}
index a4d45fc29b21e4ac9c9c069bdab7b8c277c2f6f5,0000000000000000000000000000000000000000..e1e97639231b208e8b01ee99373771020b49ec2f
mode 100644,000000..100644
--- /dev/null
@@@ -1,257 -1,0 +1,253 @@@
- /* CPU online map and active count. */
- EXPORT_SYMBOL(cpu_online_map);
- EXPORT_SYMBOL(phys_cpu_present_map);
 +/*
 + * arch/sparc/kernel/ksyms.c: Sparc specific ksyms support.
 + *
 + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
 + * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
 + */
 +
 +/* Tell string.h we don't want memcpy etc. as cpp defines */
 +#define EXPORT_SYMTAB_STROPS
 +#define PROMLIB_INTERNAL
 +
 +#include <linux/module.h>
 +#include <linux/init.h>
 +#include <linux/smp.h>
 +#include <linux/types.h>
 +#include <linux/string.h>
 +#include <linux/sched.h>
 +#include <linux/interrupt.h>
 +#include <linux/in6.h>
 +#include <linux/spinlock.h>
 +#include <linux/mm.h>
 +#include <linux/syscalls.h>
 +#ifdef CONFIG_PCI
 +#include <linux/pci.h>
 +#endif
 +#include <linux/pm.h>
 +#ifdef CONFIG_HIGHMEM
 +#include <linux/highmem.h>
 +#endif
 +
 +#include <asm/oplib.h>
 +#include <asm/delay.h>
 +#include <asm/system.h>
 +#include <asm/auxio.h>
 +#include <asm/pgtable.h>
 +#include <asm/io.h>
 +#include <asm/irq.h>
 +#include <asm/idprom.h>
 +#include <asm/head.h>
 +#include <asm/smp.h>
 +#include <asm/ptrace.h>
 +#include <asm/uaccess.h>
 +#include <asm/checksum.h>
 +#ifdef CONFIG_SBUS
 +#include <asm/dma.h>
 +#endif
 +#include <asm/io-unit.h>
 +#include <asm/bug.h>
 +
 +extern spinlock_t rtc_lock;
 +
 +struct poll {
 +      int fd;
 +      short events;
 +      short revents;
 +};
 +
 +extern void (*__copy_1page)(void *, const void *);
 +extern void __memmove(void *, const void *, __kernel_size_t);
 +extern void (*bzero_1page)(void *);
 +extern void *__bzero(void *, size_t);
 +extern void *__memscan_zero(void *, size_t);
 +extern void *__memscan_generic(void *, int, size_t);
 +extern int __strncmp(const char *, const char *, __kernel_size_t);
 +
 +extern int __ashrdi3(int, int);
 +extern int __ashldi3(int, int);
 +extern int __lshrdi3(int, int);
 +extern int __muldi3(int, int);
 +extern int __divdi3(int, int);
 +
 +/* Private functions with odd calling conventions. */
 +extern void ___atomic24_add(void);
 +extern void ___atomic24_sub(void);
 +extern void ___rw_read_enter(void);
 +extern void ___rw_read_try(void);
 +extern void ___rw_read_exit(void);
 +extern void ___rw_write_enter(void);
 +
 +/* Alias functions whose names begin with "." and export the aliases.
 + * The module references will be fixed up by module_frob_arch_sections.
 + */
 +extern int _Div(int, int);
 +extern int _Mul(int, int);
 +extern int _Rem(int, int);
 +extern unsigned _Udiv(unsigned, unsigned);
 +extern unsigned _Umul(unsigned, unsigned);
 +extern unsigned _Urem(unsigned, unsigned);
 +
 +/* used by various drivers */
 +EXPORT_SYMBOL(sparc_cpu_model);
 +EXPORT_SYMBOL(kernel_thread);
 +#ifdef CONFIG_SMP
 +// XXX find what uses (or used) these.   AV: see asm/spinlock.h
 +EXPORT_SYMBOL(___rw_read_enter);
 +EXPORT_SYMBOL(___rw_read_try);
 +EXPORT_SYMBOL(___rw_read_exit);
 +EXPORT_SYMBOL(___rw_write_enter);
 +#endif
 +
 +EXPORT_SYMBOL(sparc_valid_addr_bitmap);
 +EXPORT_SYMBOL(phys_base);
 +EXPORT_SYMBOL(pfn_base);
 +
 +/* Atomic operations. */
 +EXPORT_SYMBOL(___atomic24_add);
 +EXPORT_SYMBOL(___atomic24_sub);
 +
 +/* Per-CPU information table */
 +EXPORT_PER_CPU_SYMBOL(__cpu_data);
 +
 +#ifdef CONFIG_SMP
 +/* IRQ implementation. */
 +EXPORT_SYMBOL(synchronize_irq);
 +#endif
 +
 +EXPORT_SYMBOL(__udelay);
 +EXPORT_SYMBOL(__ndelay);
 +EXPORT_SYMBOL(rtc_lock);
 +EXPORT_SYMBOL(set_auxio);
 +EXPORT_SYMBOL(get_auxio);
 +EXPORT_SYMBOL(io_remap_pfn_range);
 +
 +#ifndef CONFIG_SMP
 +EXPORT_SYMBOL(BTFIXUP_CALL(___xchg32));
 +#else
 +EXPORT_SYMBOL(BTFIXUP_CALL(__hard_smp_processor_id));
 +#endif
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_unlockarea));
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_lockarea));
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_get_scsi_sgl));
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_get_scsi_one));
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_release_scsi_sgl));
 +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_release_scsi_one));
 +
 +EXPORT_SYMBOL(BTFIXUP_CALL(pgprot_noncached));
 +
 +#ifdef CONFIG_SBUS
 +EXPORT_SYMBOL(sbus_set_sbus64);
 +#endif
 +#ifdef CONFIG_PCI
 +EXPORT_SYMBOL(insb);
 +EXPORT_SYMBOL(outsb);
 +EXPORT_SYMBOL(insw);
 +EXPORT_SYMBOL(outsw);
 +EXPORT_SYMBOL(insl);
 +EXPORT_SYMBOL(outsl);
 +EXPORT_SYMBOL(pci_alloc_consistent);
 +EXPORT_SYMBOL(pci_free_consistent);
 +EXPORT_SYMBOL(pci_map_single);
 +EXPORT_SYMBOL(pci_unmap_single);
 +EXPORT_SYMBOL(pci_dma_sync_single_for_cpu);
 +EXPORT_SYMBOL(pci_dma_sync_single_for_device);
 +EXPORT_SYMBOL(pci_dma_sync_sg_for_cpu);
 +EXPORT_SYMBOL(pci_dma_sync_sg_for_device);
 +EXPORT_SYMBOL(pci_map_sg);
 +EXPORT_SYMBOL(pci_unmap_sg);
 +EXPORT_SYMBOL(pci_map_page);
 +EXPORT_SYMBOL(pci_unmap_page);
 +/* Actually, ioremap/iounmap are not PCI specific. But it is ok for drivers. */
 +EXPORT_SYMBOL(ioremap);
 +EXPORT_SYMBOL(iounmap);
 +#endif
 +
 +/* in arch/sparc/mm/highmem.c */
 +#ifdef CONFIG_HIGHMEM
 +EXPORT_SYMBOL(kmap_atomic);
 +EXPORT_SYMBOL(kunmap_atomic);
 +#endif
 +
 +/* prom symbols */
 +EXPORT_SYMBOL(idprom);
 +EXPORT_SYMBOL(prom_root_node);
 +EXPORT_SYMBOL(prom_getchild);
 +EXPORT_SYMBOL(prom_getsibling);
 +EXPORT_SYMBOL(prom_searchsiblings);
 +EXPORT_SYMBOL(prom_firstprop);
 +EXPORT_SYMBOL(prom_nextprop);
 +EXPORT_SYMBOL(prom_getproplen);
 +EXPORT_SYMBOL(prom_getproperty);
 +EXPORT_SYMBOL(prom_node_has_property);
 +EXPORT_SYMBOL(prom_setprop);
 +EXPORT_SYMBOL(saved_command_line);
 +EXPORT_SYMBOL(prom_apply_obio_ranges);
 +EXPORT_SYMBOL(prom_feval);
 +EXPORT_SYMBOL(prom_getbool);
 +EXPORT_SYMBOL(prom_getstring);
 +EXPORT_SYMBOL(prom_getint);
 +EXPORT_SYMBOL(prom_getintdefault);
 +EXPORT_SYMBOL(prom_finddevice);
 +EXPORT_SYMBOL(romvec);
 +EXPORT_SYMBOL(__prom_getchild);
 +EXPORT_SYMBOL(__prom_getsibling);
 +
 +/* sparc library symbols */
 +EXPORT_SYMBOL(memscan);
 +EXPORT_SYMBOL(strlen);
 +EXPORT_SYMBOL(strncmp);
 +EXPORT_SYMBOL(page_kernel);
 +
 +/* Special internal versions of library functions. */
 +EXPORT_SYMBOL(__copy_1page);
 +EXPORT_SYMBOL(__memcpy);
 +EXPORT_SYMBOL(__memset);
 +EXPORT_SYMBOL(bzero_1page);
 +EXPORT_SYMBOL(__bzero);
 +EXPORT_SYMBOL(__memscan_zero);
 +EXPORT_SYMBOL(__memscan_generic);
 +EXPORT_SYMBOL(__strncmp);
 +EXPORT_SYMBOL(__memmove);
 +
 +/* Moving data to/from userspace. */
 +EXPORT_SYMBOL(__copy_user);
 +EXPORT_SYMBOL(__strncpy_from_user);
 +EXPORT_SYMBOL(__strnlen_user);
 +
 +/* Networking helper routines. */
 +EXPORT_SYMBOL(__csum_partial_copy_sparc_generic);
 +EXPORT_SYMBOL(csum_partial);
 +
 +/* Cache flushing.  */
 +EXPORT_SYMBOL(sparc_flush_page_to_ram);
 +
 +/* For when serial stuff is built as modules. */
 +EXPORT_SYMBOL(sun_do_break);
 +
 +EXPORT_SYMBOL(__ret_efault);
 +
 +EXPORT_SYMBOL(memcmp);
 +EXPORT_SYMBOL(memcpy);
 +EXPORT_SYMBOL(memset);
 +EXPORT_SYMBOL(memmove);
 +EXPORT_SYMBOL(__ashrdi3);
 +EXPORT_SYMBOL(__ashldi3);
 +EXPORT_SYMBOL(__lshrdi3);
 +EXPORT_SYMBOL(__muldi3);
 +EXPORT_SYMBOL(__divdi3);
 +
 +EXPORT_SYMBOL(_Rem);
 +EXPORT_SYMBOL(_Urem);
 +EXPORT_SYMBOL(_Mul);
 +EXPORT_SYMBOL(_Umul);
 +EXPORT_SYMBOL(_Div);
 +EXPORT_SYMBOL(_Udiv);
 +
 +#ifdef CONFIG_DEBUG_BUGVERBOSE
 +EXPORT_SYMBOL(do_BUG);
 +#endif
 +
 +/* Sun Power Management Idle Handler */
 +EXPORT_SYMBOL(pm_idle);
 +
 +EXPORT_SYMBOL(empty_zero_page);
index 141da375909129dea0ab0fffc9d5359d85e9a039,0000000000000000000000000000000000000000..9df8f095a8b11a59e448bec001ac6aec852c480c
mode 100644,000000..100644
--- /dev/null
@@@ -1,862 -1,0 +1,862 @@@
-       sevt->cpumask = cpumask_of_cpu(smp_processor_id());
 +/* time.c: UltraSparc timer and TOD clock support.
 + *
 + * Copyright (C) 1997, 2008 David S. Miller (davem@davemloft.net)
 + * Copyright (C) 1998 Eddie C. Dost   (ecd@skynet.be)
 + *
 + * Based largely on code which is:
 + *
 + * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu)
 + */
 +
 +#include <linux/errno.h>
 +#include <linux/module.h>
 +#include <linux/sched.h>
 +#include <linux/smp_lock.h>
 +#include <linux/kernel.h>
 +#include <linux/param.h>
 +#include <linux/string.h>
 +#include <linux/mm.h>
 +#include <linux/interrupt.h>
 +#include <linux/time.h>
 +#include <linux/timex.h>
 +#include <linux/init.h>
 +#include <linux/ioport.h>
 +#include <linux/mc146818rtc.h>
 +#include <linux/delay.h>
 +#include <linux/profile.h>
 +#include <linux/bcd.h>
 +#include <linux/jiffies.h>
 +#include <linux/cpufreq.h>
 +#include <linux/percpu.h>
 +#include <linux/miscdevice.h>
 +#include <linux/rtc.h>
 +#include <linux/rtc/m48t59.h>
 +#include <linux/kernel_stat.h>
 +#include <linux/clockchips.h>
 +#include <linux/clocksource.h>
 +#include <linux/of_device.h>
 +#include <linux/platform_device.h>
 +
 +#include <asm/oplib.h>
 +#include <asm/timer.h>
 +#include <asm/irq.h>
 +#include <asm/io.h>
 +#include <asm/prom.h>
 +#include <asm/starfire.h>
 +#include <asm/smp.h>
 +#include <asm/sections.h>
 +#include <asm/cpudata.h>
 +#include <asm/uaccess.h>
 +#include <asm/irq_regs.h>
 +
 +#include "entry.h"
 +
 +DEFINE_SPINLOCK(rtc_lock);
 +
 +#define TICK_PRIV_BIT (1UL << 63)
 +#define TICKCMP_IRQ_BIT       (1UL << 63)
 +
 +#ifdef CONFIG_SMP
 +unsigned long profile_pc(struct pt_regs *regs)
 +{
 +      unsigned long pc = instruction_pointer(regs);
 +
 +      if (in_lock_functions(pc))
 +              return regs->u_regs[UREG_RETPC];
 +      return pc;
 +}
 +EXPORT_SYMBOL(profile_pc);
 +#endif
 +
 +static void tick_disable_protection(void)
 +{
 +      /* Set things up so user can access tick register for profiling
 +       * purposes.  Also workaround BB_ERRATA_1 by doing a dummy
 +       * read back of %tick after writing it.
 +       */
 +      __asm__ __volatile__(
 +      "       ba,pt   %%xcc, 1f\n"
 +      "        nop\n"
 +      "       .align  64\n"
 +      "1:     rd      %%tick, %%g2\n"
 +      "       add     %%g2, 6, %%g2\n"
 +      "       andn    %%g2, %0, %%g2\n"
 +      "       wrpr    %%g2, 0, %%tick\n"
 +      "       rdpr    %%tick, %%g0"
 +      : /* no outputs */
 +      : "r" (TICK_PRIV_BIT)
 +      : "g2");
 +}
 +
 +static void tick_disable_irq(void)
 +{
 +      __asm__ __volatile__(
 +      "       ba,pt   %%xcc, 1f\n"
 +      "        nop\n"
 +      "       .align  64\n"
 +      "1:     wr      %0, 0x0, %%tick_cmpr\n"
 +      "       rd      %%tick_cmpr, %%g0"
 +      : /* no outputs */
 +      : "r" (TICKCMP_IRQ_BIT));
 +}
 +
 +static void tick_init_tick(void)
 +{
 +      tick_disable_protection();
 +      tick_disable_irq();
 +}
 +
 +static unsigned long tick_get_tick(void)
 +{
 +      unsigned long ret;
 +
 +      __asm__ __volatile__("rd        %%tick, %0\n\t"
 +                           "mov       %0, %0"
 +                           : "=r" (ret));
 +
 +      return ret & ~TICK_PRIV_BIT;
 +}
 +
 +static int tick_add_compare(unsigned long adj)
 +{
 +      unsigned long orig_tick, new_tick, new_compare;
 +
 +      __asm__ __volatile__("rd        %%tick, %0"
 +                           : "=r" (orig_tick));
 +
 +      orig_tick &= ~TICKCMP_IRQ_BIT;
 +
 +      /* Workaround for Spitfire Errata (#54 I think??), I discovered
 +       * this via Sun BugID 4008234, mentioned in Solaris-2.5.1 patch
 +       * number 103640.
 +       *
 +       * On Blackbird writes to %tick_cmpr can fail, the
 +       * workaround seems to be to execute the wr instruction
 +       * at the start of an I-cache line, and perform a dummy
 +       * read back from %tick_cmpr right after writing to it. -DaveM
 +       */
 +      __asm__ __volatile__("ba,pt     %%xcc, 1f\n\t"
 +                           " add      %1, %2, %0\n\t"
 +                           ".align    64\n"
 +                           "1:\n\t"
 +                           "wr        %0, 0, %%tick_cmpr\n\t"
 +                           "rd        %%tick_cmpr, %%g0\n\t"
 +                           : "=r" (new_compare)
 +                           : "r" (orig_tick), "r" (adj));
 +
 +      __asm__ __volatile__("rd        %%tick, %0"
 +                           : "=r" (new_tick));
 +      new_tick &= ~TICKCMP_IRQ_BIT;
 +
 +      return ((long)(new_tick - (orig_tick+adj))) > 0L;
 +}
 +
 +static unsigned long tick_add_tick(unsigned long adj)
 +{
 +      unsigned long new_tick;
 +
 +      /* Also need to handle Blackbird bug here too. */
 +      __asm__ __volatile__("rd        %%tick, %0\n\t"
 +                           "add       %0, %1, %0\n\t"
 +                           "wrpr      %0, 0, %%tick\n\t"
 +                           : "=&r" (new_tick)
 +                           : "r" (adj));
 +
 +      return new_tick;
 +}
 +
 +static struct sparc64_tick_ops tick_operations __read_mostly = {
 +      .name           =       "tick",
 +      .init_tick      =       tick_init_tick,
 +      .disable_irq    =       tick_disable_irq,
 +      .get_tick       =       tick_get_tick,
 +      .add_tick       =       tick_add_tick,
 +      .add_compare    =       tick_add_compare,
 +      .softint_mask   =       1UL << 0,
 +};
 +
 +struct sparc64_tick_ops *tick_ops __read_mostly = &tick_operations;
 +
 +static void stick_disable_irq(void)
 +{
 +      __asm__ __volatile__(
 +      "wr     %0, 0x0, %%asr25"
 +      : /* no outputs */
 +      : "r" (TICKCMP_IRQ_BIT));
 +}
 +
 +static void stick_init_tick(void)
 +{
 +      /* Writes to the %tick and %stick register are not
 +       * allowed on sun4v.  The Hypervisor controls that
 +       * bit, per-strand.
 +       */
 +      if (tlb_type != hypervisor) {
 +              tick_disable_protection();
 +              tick_disable_irq();
 +
 +              /* Let the user get at STICK too. */
 +              __asm__ __volatile__(
 +              "       rd      %%asr24, %%g2\n"
 +              "       andn    %%g2, %0, %%g2\n"
 +              "       wr      %%g2, 0, %%asr24"
 +              : /* no outputs */
 +              : "r" (TICK_PRIV_BIT)
 +              : "g1", "g2");
 +      }
 +
 +      stick_disable_irq();
 +}
 +
 +static unsigned long stick_get_tick(void)
 +{
 +      unsigned long ret;
 +
 +      __asm__ __volatile__("rd        %%asr24, %0"
 +                           : "=r" (ret));
 +
 +      return ret & ~TICK_PRIV_BIT;
 +}
 +
 +static unsigned long stick_add_tick(unsigned long adj)
 +{
 +      unsigned long new_tick;
 +
 +      __asm__ __volatile__("rd        %%asr24, %0\n\t"
 +                           "add       %0, %1, %0\n\t"
 +                           "wr        %0, 0, %%asr24\n\t"
 +                           : "=&r" (new_tick)
 +                           : "r" (adj));
 +
 +      return new_tick;
 +}
 +
 +static int stick_add_compare(unsigned long adj)
 +{
 +      unsigned long orig_tick, new_tick;
 +
 +      __asm__ __volatile__("rd        %%asr24, %0"
 +                           : "=r" (orig_tick));
 +      orig_tick &= ~TICKCMP_IRQ_BIT;
 +
 +      __asm__ __volatile__("wr        %0, 0, %%asr25"
 +                           : /* no outputs */
 +                           : "r" (orig_tick + adj));
 +
 +      __asm__ __volatile__("rd        %%asr24, %0"
 +                           : "=r" (new_tick));
 +      new_tick &= ~TICKCMP_IRQ_BIT;
 +
 +      return ((long)(new_tick - (orig_tick+adj))) > 0L;
 +}
 +
 +static struct sparc64_tick_ops stick_operations __read_mostly = {
 +      .name           =       "stick",
 +      .init_tick      =       stick_init_tick,
 +      .disable_irq    =       stick_disable_irq,
 +      .get_tick       =       stick_get_tick,
 +      .add_tick       =       stick_add_tick,
 +      .add_compare    =       stick_add_compare,
 +      .softint_mask   =       1UL << 16,
 +};
 +
 +/* On Hummingbird the STICK/STICK_CMPR register is implemented
 + * in I/O space.  There are two 64-bit registers each, the
 + * first holds the low 32-bits of the value and the second holds
 + * the high 32-bits.
 + *
 + * Since STICK is constantly updating, we have to access it carefully.
 + *
 + * The sequence we use to read is:
 + * 1) read high
 + * 2) read low
 + * 3) read high again, if it rolled re-read both low and high again.
 + *
 + * Writing STICK safely is also tricky:
 + * 1) write low to zero
 + * 2) write high
 + * 3) write low
 + */
 +#define HBIRD_STICKCMP_ADDR   0x1fe0000f060UL
 +#define HBIRD_STICK_ADDR      0x1fe0000f070UL
 +
 +static unsigned long __hbird_read_stick(void)
 +{
 +      unsigned long ret, tmp1, tmp2, tmp3;
 +      unsigned long addr = HBIRD_STICK_ADDR+8;
 +
 +      __asm__ __volatile__("ldxa      [%1] %5, %2\n"
 +                           "1:\n\t"
 +                           "sub       %1, 0x8, %1\n\t"
 +                           "ldxa      [%1] %5, %3\n\t"
 +                           "add       %1, 0x8, %1\n\t"
 +                           "ldxa      [%1] %5, %4\n\t"
 +                           "cmp       %4, %2\n\t"
 +                           "bne,a,pn  %%xcc, 1b\n\t"
 +                           " mov      %4, %2\n\t"
 +                           "sllx      %4, 32, %4\n\t"
 +                           "or        %3, %4, %0\n\t"
 +                           : "=&r" (ret), "=&r" (addr),
 +                             "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3)
 +                           : "i" (ASI_PHYS_BYPASS_EC_E), "1" (addr));
 +
 +      return ret;
 +}
 +
 +static void __hbird_write_stick(unsigned long val)
 +{
 +      unsigned long low = (val & 0xffffffffUL);
 +      unsigned long high = (val >> 32UL);
 +      unsigned long addr = HBIRD_STICK_ADDR;
 +
 +      __asm__ __volatile__("stxa      %%g0, [%0] %4\n\t"
 +                           "add       %0, 0x8, %0\n\t"
 +                           "stxa      %3, [%0] %4\n\t"
 +                           "sub       %0, 0x8, %0\n\t"
 +                           "stxa      %2, [%0] %4"
 +                           : "=&r" (addr)
 +                           : "0" (addr), "r" (low), "r" (high),
 +                             "i" (ASI_PHYS_BYPASS_EC_E));
 +}
 +
 +static void __hbird_write_compare(unsigned long val)
 +{
 +      unsigned long low = (val & 0xffffffffUL);
 +      unsigned long high = (val >> 32UL);
 +      unsigned long addr = HBIRD_STICKCMP_ADDR + 0x8UL;
 +
 +      __asm__ __volatile__("stxa      %3, [%0] %4\n\t"
 +                           "sub       %0, 0x8, %0\n\t"
 +                           "stxa      %2, [%0] %4"
 +                           : "=&r" (addr)
 +                           : "0" (addr), "r" (low), "r" (high),
 +                             "i" (ASI_PHYS_BYPASS_EC_E));
 +}
 +
 +static void hbtick_disable_irq(void)
 +{
 +      __hbird_write_compare(TICKCMP_IRQ_BIT);
 +}
 +
 +static void hbtick_init_tick(void)
 +{
 +      tick_disable_protection();
 +
 +      /* XXX This seems to be necessary to 'jumpstart' Hummingbird
 +       * XXX into actually sending STICK interrupts.  I think because
 +       * XXX of how we store %tick_cmpr in head.S this somehow resets the
 +       * XXX {TICK + STICK} interrupt mux.  -DaveM
 +       */
 +      __hbird_write_stick(__hbird_read_stick());
 +
 +      hbtick_disable_irq();
 +}
 +
 +static unsigned long hbtick_get_tick(void)
 +{
 +      return __hbird_read_stick() & ~TICK_PRIV_BIT;
 +}
 +
 +static unsigned long hbtick_add_tick(unsigned long adj)
 +{
 +      unsigned long val;
 +
 +      val = __hbird_read_stick() + adj;
 +      __hbird_write_stick(val);
 +
 +      return val;
 +}
 +
 +static int hbtick_add_compare(unsigned long adj)
 +{
 +      unsigned long val = __hbird_read_stick();
 +      unsigned long val2;
 +
 +      val &= ~TICKCMP_IRQ_BIT;
 +      val += adj;
 +      __hbird_write_compare(val);
 +
 +      val2 = __hbird_read_stick() & ~TICKCMP_IRQ_BIT;
 +
 +      return ((long)(val2 - val)) > 0L;
 +}
 +
 +static struct sparc64_tick_ops hbtick_operations __read_mostly = {
 +      .name           =       "hbtick",
 +      .init_tick      =       hbtick_init_tick,
 +      .disable_irq    =       hbtick_disable_irq,
 +      .get_tick       =       hbtick_get_tick,
 +      .add_tick       =       hbtick_add_tick,
 +      .add_compare    =       hbtick_add_compare,
 +      .softint_mask   =       1UL << 0,
 +};
 +
 +static unsigned long timer_ticks_per_nsec_quotient __read_mostly;
 +
 +int update_persistent_clock(struct timespec now)
 +{
 +      struct rtc_device *rtc = rtc_class_open("rtc0");
 +      int err = -1;
 +
 +      if (rtc) {
 +              err = rtc_set_mmss(rtc, now.tv_sec);
 +              rtc_class_close(rtc);
 +      }
 +
 +      return err;
 +}
 +
 +unsigned long cmos_regs;
 +EXPORT_SYMBOL(cmos_regs);
 +
 +static struct resource rtc_cmos_resource;
 +
 +static struct platform_device rtc_cmos_device = {
 +      .name           = "rtc_cmos",
 +      .id             = -1,
 +      .resource       = &rtc_cmos_resource,
 +      .num_resources  = 1,
 +};
 +
 +static int __devinit rtc_probe(struct of_device *op, const struct of_device_id *match)
 +{
 +      struct resource *r;
 +
 +      printk(KERN_INFO "%s: RTC regs at 0x%lx\n",
 +             op->node->full_name, op->resource[0].start);
 +
 +      /* The CMOS RTC driver only accepts IORESOURCE_IO, so cons
 +       * up a fake resource so that the probe works for all cases.
 +       * When the RTC is behind an ISA bus it will have IORESOURCE_IO
 +       * already, whereas when it's behind EBUS is will be IORESOURCE_MEM.
 +       */
 +
 +      r = &rtc_cmos_resource;
 +      r->flags = IORESOURCE_IO;
 +      r->name = op->resource[0].name;
 +      r->start = op->resource[0].start;
 +      r->end = op->resource[0].end;
 +
 +      cmos_regs = op->resource[0].start;
 +      return platform_device_register(&rtc_cmos_device);
 +}
 +
 +static struct of_device_id __initdata rtc_match[] = {
 +      {
 +              .name = "rtc",
 +              .compatible = "m5819",
 +      },
 +      {
 +              .name = "rtc",
 +              .compatible = "isa-m5819p",
 +      },
 +      {
 +              .name = "rtc",
 +              .compatible = "isa-m5823p",
 +      },
 +      {
 +              .name = "rtc",
 +              .compatible = "ds1287",
 +      },
 +      {},
 +};
 +
 +static struct of_platform_driver rtc_driver = {
 +      .match_table    = rtc_match,
 +      .probe          = rtc_probe,
 +      .driver         = {
 +              .name   = "rtc",
 +      },
 +};
 +
 +static struct platform_device rtc_bq4802_device = {
 +      .name           = "rtc-bq4802",
 +      .id             = -1,
 +      .num_resources  = 1,
 +};
 +
 +static int __devinit bq4802_probe(struct of_device *op, const struct of_device_id *match)
 +{
 +
 +      printk(KERN_INFO "%s: BQ4802 regs at 0x%lx\n",
 +             op->node->full_name, op->resource[0].start);
 +
 +      rtc_bq4802_device.resource = &op->resource[0];
 +      return platform_device_register(&rtc_bq4802_device);
 +}
 +
 +static struct of_device_id __initdata bq4802_match[] = {
 +      {
 +              .name = "rtc",
 +              .compatible = "bq4802",
 +      },
 +      {},
 +};
 +
 +static struct of_platform_driver bq4802_driver = {
 +      .match_table    = bq4802_match,
 +      .probe          = bq4802_probe,
 +      .driver         = {
 +              .name   = "bq4802",
 +      },
 +};
 +
 +static unsigned char mostek_read_byte(struct device *dev, u32 ofs)
 +{
 +      struct platform_device *pdev = to_platform_device(dev);
 +      void __iomem *regs = (void __iomem *) pdev->resource[0].start;
 +
 +      return readb(regs + ofs);
 +}
 +
 +static void mostek_write_byte(struct device *dev, u32 ofs, u8 val)
 +{
 +      struct platform_device *pdev = to_platform_device(dev);
 +      void __iomem *regs = (void __iomem *) pdev->resource[0].start;
 +
 +      writeb(val, regs + ofs);
 +}
 +
 +static struct m48t59_plat_data m48t59_data = {
 +      .read_byte      = mostek_read_byte,
 +      .write_byte     = mostek_write_byte,
 +};
 +
 +static struct platform_device m48t59_rtc = {
 +      .name           = "rtc-m48t59",
 +      .id             = 0,
 +      .num_resources  = 1,
 +      .dev    = {
 +              .platform_data = &m48t59_data,
 +      },
 +};
 +
 +static int __devinit mostek_probe(struct of_device *op, const struct of_device_id *match)
 +{
 +      struct device_node *dp = op->node;
 +
 +      /* On an Enterprise system there can be multiple mostek clocks.
 +       * We should only match the one that is on the central FHC bus.
 +       */
 +      if (!strcmp(dp->parent->name, "fhc") &&
 +          strcmp(dp->parent->parent->name, "central") != 0)
 +              return -ENODEV;
 +
 +      printk(KERN_INFO "%s: Mostek regs at 0x%lx\n",
 +             dp->full_name, op->resource[0].start);
 +
 +      m48t59_rtc.resource = &op->resource[0];
 +      return platform_device_register(&m48t59_rtc);
 +}
 +
 +static struct of_device_id __initdata mostek_match[] = {
 +      {
 +              .name = "eeprom",
 +      },
 +      {},
 +};
 +
 +static struct of_platform_driver mostek_driver = {
 +      .match_table    = mostek_match,
 +      .probe          = mostek_probe,
 +      .driver         = {
 +              .name   = "mostek",
 +      },
 +};
 +
 +static struct platform_device rtc_sun4v_device = {
 +      .name           = "rtc-sun4v",
 +      .id             = -1,
 +};
 +
 +static struct platform_device rtc_starfire_device = {
 +      .name           = "rtc-starfire",
 +      .id             = -1,
 +};
 +
 +static int __init clock_init(void)
 +{
 +      if (this_is_starfire)
 +              return platform_device_register(&rtc_starfire_device);
 +
 +      if (tlb_type == hypervisor)
 +              return platform_device_register(&rtc_sun4v_device);
 +
 +      (void) of_register_driver(&rtc_driver, &of_platform_bus_type);
 +      (void) of_register_driver(&mostek_driver, &of_platform_bus_type);
 +      (void) of_register_driver(&bq4802_driver, &of_platform_bus_type);
 +
 +      return 0;
 +}
 +
 +/* Must be after subsys_initcall() so that busses are probed.  Must
 + * be before device_initcall() because things like the RTC driver
 + * need to see the clock registers.
 + */
 +fs_initcall(clock_init);
 +
 +/* This is gets the master TICK_INT timer going. */
 +static unsigned long sparc64_init_timers(void)
 +{
 +      struct device_node *dp;
 +      unsigned long freq;
 +
 +      dp = of_find_node_by_path("/");
 +      if (tlb_type == spitfire) {
 +              unsigned long ver, manuf, impl;
 +
 +              __asm__ __volatile__ ("rdpr %%ver, %0"
 +                                    : "=&r" (ver));
 +              manuf = ((ver >> 48) & 0xffff);
 +              impl = ((ver >> 32) & 0xffff);
 +              if (manuf == 0x17 && impl == 0x13) {
 +                      /* Hummingbird, aka Ultra-IIe */
 +                      tick_ops = &hbtick_operations;
 +                      freq = of_getintprop_default(dp, "stick-frequency", 0);
 +              } else {
 +                      tick_ops = &tick_operations;
 +                      freq = local_cpu_data().clock_tick;
 +              }
 +      } else {
 +              tick_ops = &stick_operations;
 +              freq = of_getintprop_default(dp, "stick-frequency", 0);
 +      }
 +
 +      return freq;
 +}
 +
 +struct freq_table {
 +      unsigned long clock_tick_ref;
 +      unsigned int ref_freq;
 +};
 +static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0 };
 +
 +unsigned long sparc64_get_clock_tick(unsigned int cpu)
 +{
 +      struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu);
 +
 +      if (ft->clock_tick_ref)
 +              return ft->clock_tick_ref;
 +      return cpu_data(cpu).clock_tick;
 +}
 +
 +#ifdef CONFIG_CPU_FREQ
 +
 +static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 +                                  void *data)
 +{
 +      struct cpufreq_freqs *freq = data;
 +      unsigned int cpu = freq->cpu;
 +      struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu);
 +
 +      if (!ft->ref_freq) {
 +              ft->ref_freq = freq->old;
 +              ft->clock_tick_ref = cpu_data(cpu).clock_tick;
 +      }
 +      if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
 +          (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
 +          (val == CPUFREQ_RESUMECHANGE)) {
 +              cpu_data(cpu).clock_tick =
 +                      cpufreq_scale(ft->clock_tick_ref,
 +                                    ft->ref_freq,
 +                                    freq->new);
 +      }
 +
 +      return 0;
 +}
 +
 +static struct notifier_block sparc64_cpufreq_notifier_block = {
 +      .notifier_call  = sparc64_cpufreq_notifier
 +};
 +
 +static int __init register_sparc64_cpufreq_notifier(void)
 +{
 +
 +      cpufreq_register_notifier(&sparc64_cpufreq_notifier_block,
 +                                CPUFREQ_TRANSITION_NOTIFIER);
 +      return 0;
 +}
 +
 +core_initcall(register_sparc64_cpufreq_notifier);
 +
 +#endif /* CONFIG_CPU_FREQ */
 +
 +static int sparc64_next_event(unsigned long delta,
 +                            struct clock_event_device *evt)
 +{
 +      return tick_ops->add_compare(delta) ? -ETIME : 0;
 +}
 +
 +static void sparc64_timer_setup(enum clock_event_mode mode,
 +                              struct clock_event_device *evt)
 +{
 +      switch (mode) {
 +      case CLOCK_EVT_MODE_ONESHOT:
 +      case CLOCK_EVT_MODE_RESUME:
 +              break;
 +
 +      case CLOCK_EVT_MODE_SHUTDOWN:
 +              tick_ops->disable_irq();
 +              break;
 +
 +      case CLOCK_EVT_MODE_PERIODIC:
 +      case CLOCK_EVT_MODE_UNUSED:
 +              WARN_ON(1);
 +              break;
 +      };
 +}
 +
 +static struct clock_event_device sparc64_clockevent = {
 +      .features       = CLOCK_EVT_FEAT_ONESHOT,
 +      .set_mode       = sparc64_timer_setup,
 +      .set_next_event = sparc64_next_event,
 +      .rating         = 100,
 +      .shift          = 30,
 +      .irq            = -1,
 +};
 +static DEFINE_PER_CPU(struct clock_event_device, sparc64_events);
 +
 +void timer_interrupt(int irq, struct pt_regs *regs)
 +{
 +      struct pt_regs *old_regs = set_irq_regs(regs);
 +      unsigned long tick_mask = tick_ops->softint_mask;
 +      int cpu = smp_processor_id();
 +      struct clock_event_device *evt = &per_cpu(sparc64_events, cpu);
 +
 +      clear_softint(tick_mask);
 +
 +      irq_enter();
 +
 +      kstat_this_cpu.irqs[0]++;
 +
 +      if (unlikely(!evt->event_handler)) {
 +              printk(KERN_WARNING
 +                     "Spurious SPARC64 timer interrupt on cpu %d\n", cpu);
 +      } else
 +              evt->event_handler(evt);
 +
 +      irq_exit();
 +
 +      set_irq_regs(old_regs);
 +}
 +
 +void __devinit setup_sparc64_timer(void)
 +{
 +      struct clock_event_device *sevt;
 +      unsigned long pstate;
 +
 +      /* Guarantee that the following sequences execute
 +       * uninterrupted.
 +       */
 +      __asm__ __volatile__("rdpr      %%pstate, %0\n\t"
 +                           "wrpr      %0, %1, %%pstate"
 +                           : "=r" (pstate)
 +                           : "i" (PSTATE_IE));
 +
 +      tick_ops->init_tick();
 +
 +      /* Restore PSTATE_IE. */
 +      __asm__ __volatile__("wrpr      %0, 0x0, %%pstate"
 +                           : /* no outputs */
 +                           : "r" (pstate));
 +
 +      sevt = &__get_cpu_var(sparc64_events);
 +
 +      memcpy(sevt, &sparc64_clockevent, sizeof(*sevt));
++      sevt->cpumask = cpumask_of(smp_processor_id());
 +
 +      clockevents_register_device(sevt);
 +}
 +
 +#define SPARC64_NSEC_PER_CYC_SHIFT    10UL
 +
 +static struct clocksource clocksource_tick = {
 +      .rating         = 100,
 +      .mask           = CLOCKSOURCE_MASK(64),
 +      .shift          = 16,
 +      .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 +};
 +
 +static void __init setup_clockevent_multiplier(unsigned long hz)
 +{
 +      unsigned long mult, shift = 32;
 +
 +      while (1) {
 +              mult = div_sc(hz, NSEC_PER_SEC, shift);
 +              if (mult && (mult >> 32UL) == 0UL)
 +                      break;
 +
 +              shift--;
 +      }
 +
 +      sparc64_clockevent.shift = shift;
 +      sparc64_clockevent.mult = mult;
 +}
 +
 +static unsigned long tb_ticks_per_usec __read_mostly;
 +
 +void __delay(unsigned long loops)
 +{
 +      unsigned long bclock, now;
 +
 +      bclock = tick_ops->get_tick();
 +      do {
 +              now = tick_ops->get_tick();
 +      } while ((now-bclock) < loops);
 +}
 +EXPORT_SYMBOL(__delay);
 +
 +void udelay(unsigned long usecs)
 +{
 +      __delay(tb_ticks_per_usec * usecs);
 +}
 +EXPORT_SYMBOL(udelay);
 +
 +void __init time_init(void)
 +{
 +      unsigned long freq = sparc64_init_timers();
 +
 +      tb_ticks_per_usec = freq / USEC_PER_SEC;
 +
 +      timer_ticks_per_nsec_quotient =
 +              clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT);
 +
 +      clocksource_tick.name = tick_ops->name;
 +      clocksource_tick.mult =
 +              clocksource_hz2mult(freq,
 +                                  clocksource_tick.shift);
 +      clocksource_tick.read = tick_ops->get_tick;
 +
 +      printk("clocksource: mult[%x] shift[%d]\n",
 +             clocksource_tick.mult, clocksource_tick.shift);
 +
 +      clocksource_register(&clocksource_tick);
 +
 +      sparc64_clockevent.name = tick_ops->name;
 +
 +      setup_clockevent_multiplier(freq);
 +
 +      sparc64_clockevent.max_delta_ns =
 +              clockevent_delta2ns(0x7fffffffffffffffUL, &sparc64_clockevent);
 +      sparc64_clockevent.min_delta_ns =
 +              clockevent_delta2ns(0xF, &sparc64_clockevent);
 +
 +      printk("clockevent: mult[%lx] shift[%d]\n",
 +             sparc64_clockevent.mult, sparc64_clockevent.shift);
 +
 +      setup_sparc64_timer();
 +}
 +
 +unsigned long long sched_clock(void)
 +{
 +      unsigned long ticks = tick_ops->get_tick();
 +
 +      return (ticks * timer_ticks_per_nsec_quotient)
 +              >> SPARC64_NSEC_PER_CYC_SHIFT;
 +}
 +
 +int __devinit read_current_timer(unsigned long *timer_val)
 +{
 +      *timer_val = tick_ops->get_tick();
 +      return 0;
 +}
Simple merge
index 28e409fc73f3df33e4c6b2cbde5e99e0433e0fa1,4bb732e45a85f5f1296452f31c34b043783b8ecd..592688ed04d33462d172936d56dff55fc6e52eac
@@@ -31,9 -31,13 +31,9 @@@ static inline int irq_canonicalize(int 
  # endif
  #endif
  
 -#ifdef CONFIG_IRQBALANCE
 -extern int irqbalance_disable(char *str);
 -#endif
 -
  #ifdef CONFIG_HOTPLUG_CPU
  #include <linux/cpumask.h>
- extern void fixup_irqs(cpumask_t map);
+ extern void fixup_irqs(void);
  #endif
  
  extern unsigned int do_IRQ(struct pt_regs *regs);
Simple merge
Simple merge
Simple merge
Simple merge
index 74917658b004aea2eb08dca5c5c9ba8a775eb81e,1cbf7c8d46e0d681752f4e6ac9fe83d0464254f0..62ecfc991e1e61b747567cb2ada8f8d68d0774b2
@@@ -152,25 -152,25 +152,25 @@@ static struct irq_cfg irq_cfgx[] = 
  #else
  static struct irq_cfg irq_cfgx[NR_IRQS] = {
  #endif
-       [0]  = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR,  },
-       [1]  = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR,  },
-       [2]  = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR,  },
-       [3]  = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR,  },
-       [4]  = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR,  },
-       [5]  = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR,  },
-       [6]  = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR,  },
-       [7]  = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR,  },
-       [8]  = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR,  },
-       [9]  = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR,  },
-       [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, },
-       [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, },
-       [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, },
-       [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, },
-       [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, },
-       [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, },
+       [0]  = { .vector = IRQ0_VECTOR,  },
+       [1]  = { .vector = IRQ1_VECTOR,  },
+       [2]  = { .vector = IRQ2_VECTOR,  },
+       [3]  = { .vector = IRQ3_VECTOR,  },
+       [4]  = { .vector = IRQ4_VECTOR,  },
+       [5]  = { .vector = IRQ5_VECTOR,  },
+       [6]  = { .vector = IRQ6_VECTOR,  },
+       [7]  = { .vector = IRQ7_VECTOR,  },
+       [8]  = { .vector = IRQ8_VECTOR,  },
+       [9]  = { .vector = IRQ9_VECTOR,  },
+       [10] = { .vector = IRQ10_VECTOR, },
+       [11] = { .vector = IRQ11_VECTOR, },
+       [12] = { .vector = IRQ12_VECTOR, },
+       [13] = { .vector = IRQ13_VECTOR, },
+       [14] = { .vector = IRQ14_VECTOR, },
+       [15] = { .vector = IRQ15_VECTOR, },
  };
  
 -void __init arch_early_irq_init(void)
 +int __init arch_early_irq_init(void)
  {
        struct irq_cfg *cfg;
        struct irq_desc *desc;
        for (i = 0; i < count; i++) {
                desc = irq_to_desc(i);
                desc->chip_data = &cfg[i];
+               alloc_bootmem_cpumask_var(&cfg[i].domain);
+               alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+               if (i < NR_IRQS_LEGACY)
+                       cpumask_setall(cfg[i].domain);
        }
 +
 +      return 0;
  }
  
  #ifdef CONFIG_SPARSE_IRQ
@@@ -1349,8 -1400,10 +1404,8 @@@ void __setup_vector_irq(int cpu
  
        /* Mark the inuse vectors */
        for_each_irq_desc(irq, desc) {
 -              if (!desc)
 -                      continue;
                cfg = desc->chip_data;
-               if (!cpu_isset(cpu, cfg->domain))
+               if (!cpumask_test_cpu(cpu, cfg->domain))
                        continue;
                vector = cfg->vector;
                per_cpu(vector_irq, cpu)[vector] = irq;
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
diff --cc init/Kconfig
Simple merge
Simple merge
Simple merge
diff --cc kernel/sched.c
index fff1c4a20b6538966a0cf2b97a012c045d52b84d,756d981d91a40e0e80239ebca62355c62db06d78..27ba1d642f0f0c4c370e81a61b067874310532d0
@@@ -5426,8 -5477,17 +5495,16 @@@ long sched_setaffinity(pid_t pid, cons
        get_task_struct(p);
        read_unlock(&tasklist_lock);
  
+       if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+               retval = -ENOMEM;
+               goto out_put_task;
+       }
+       if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
+               retval = -ENOMEM;
+               goto out_free_cpus_allowed;
+       }
        retval = -EPERM;
 -      if ((current->euid != p->euid) && (current->euid != p->uid) &&
 -                      !capable(CAP_SYS_NICE))
 +      if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
                goto out_unlock;
  
        retval = security_task_setscheduler(p, 0, NULL);
Simple merge
Simple merge
Simple merge
index 8f3fc2582d38b7073927e9dc004ef38e16909ab0,70f872c71f4e47dbaef945cc99990fc9ce01d1db..76a574bbef97270672dee229b81b77872ba319e6
@@@ -282,31 -282,8 +282,31 @@@ void tick_nohz_stop_sched_tick(int inid
        /* Schedule the tick, if we are at least one jiffie off */
        if ((long)delta_jiffies >= 1) {
  
 +              /*
 +              * calculate the expiry time for the next timer wheel
 +              * timer
 +              */
 +              expires = ktime_add_ns(last_update, tick_period.tv64 *
 +                                 delta_jiffies);
 +
 +              /*
 +               * If this cpu is the one which updates jiffies, then
 +               * give up the assignment and let it be taken by the
 +               * cpu which runs the tick timer next, which might be
 +               * this cpu as well. If we don't drop this here the
 +               * jiffies might be stale and do_timer() never
 +               * invoked.
 +               */
 +              if (cpu == tick_do_timer_cpu)
 +                      tick_do_timer_cpu = TICK_DO_TIMER_NONE;
 +
                if (delta_jiffies > 1)
-                       cpu_set(cpu, nohz_cpu_mask);
+                       cpumask_set_cpu(cpu, nohz_cpu_mask);
 +
 +              /* Skip reprogram of event if its not changed */
 +              if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
 +                      goto out;
 +
                /*
                 * nohz_stop_sched_tick can be called several times before
                 * the nohz_restart_sched_tick is called. This happens when
Simple merge
diff --cc lib/Kconfig
Simple merge
diff --cc mm/slub.c
Simple merge