summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-20 18:08:07 +0200
committerIngo Molnar <mingo@elte.hu>2009-04-20 18:08:12 +0200
commit62d170290979e0bb805d969cca4ea852bdd45260 (patch)
tree837372297501a2d144358b44e7db3f88c5612aa2 /arch/x86
parent8b5b94e4e9813cdd77103827f48d58c806ab45c6 (diff)
parentd91dfbb41bb2e9bdbfbd2cc7078ed7436eab027a (diff)
Merge branch 'linus' into x86/urgent
Merge reason: We need the x86/uv updates from upstream, to queue up dependent fix. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/io.h6
-rw-r--r--arch/x86/include/asm/lguest_hcall.h2
-rw-r--r--arch/x86/include/asm/paravirt.h4
-rw-r--r--arch/x86/include/asm/processor.h6
-rw-r--r--arch/x86/include/asm/required-features.h2
-rw-r--r--arch/x86/include/asm/sigcontext.h6
-rw-r--r--arch/x86/include/asm/uv/uv_mmrs.h5
-rw-r--r--arch/x86/include/asm/xen/page.h3
-rw-r--r--arch/x86/include/asm/xsave.h3
-rw-r--r--arch/x86/kernel/apic/apic.c6
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c16
-rw-r--r--arch/x86/kernel/bios_uv.c3
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c81
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c1
-rw-r--r--arch/x86/kernel/ftrace.c2
-rw-r--r--arch/x86/kernel/microcode_core.c33
-rw-r--r--arch/x86/kernel/mpparse.c7
-rw-r--r--arch/x86/kernel/ptrace.c3
-rw-r--r--arch/x86/kernel/tlb_uv.c189
-rw-r--r--arch/x86/kernel/uv_sysfs.c4
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/lguest/boot.c16
-rw-r--r--arch/x86/mm/ioremap.c23
-rw-r--r--arch/x86/mm/pat.c2
-rw-r--r--arch/x86/mm/pgtable.c3
-rw-r--r--arch/x86/xen/enlighten.c89
-rw-r--r--arch/x86/xen/mmu.c118
-rw-r--r--arch/x86/xen/mmu.h3
-rw-r--r--arch/x86/xen/smp.c4
-rw-r--r--arch/x86/xen/xen-ops.h2
34 files changed, 433 insertions, 218 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index bc25b9f5e4cd..c9086e6307a5 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -353,6 +353,7 @@ config X86_UV
bool "SGI Ultraviolet"
depends on X86_64
depends on X86_EXTENDED_PLATFORM
+ depends on NUMA
select X86_X2APIC
---help---
This option is needed in order to support SGI Ultraviolet systems.
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 0beba0d1468d..bb83b1c397aa 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -154,6 +154,7 @@
* CPUID levels like 0x6, 0xA etc
*/
#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (7*32+ 1) /* Always Running APIC Timer */
/* Virtualization flags: Linux defined */
#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 81937a5dc77c..2d81af3974a0 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -151,11 +151,11 @@ extern pte_t *pkmap_page_table;
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
void native_set_fixmap(enum fixed_addresses idx,
- unsigned long phys, pgprot_t flags);
+ phys_addr_t phys, pgprot_t flags);
#ifndef CONFIG_PARAVIRT
static inline void __set_fixmap(enum fixed_addresses idx,
- unsigned long phys, pgprot_t flags)
+ phys_addr_t phys, pgprot_t flags)
{
native_set_fixmap(idx, phys, flags);
}
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index e5383e3d2f8c..73739322b6d0 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -193,8 +193,10 @@ extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
*/
extern void early_ioremap_init(void);
extern void early_ioremap_reset(void);
-extern void __iomem *early_ioremap(unsigned long offset, unsigned long size);
-extern void __iomem *early_memremap(unsigned long offset, unsigned long size);
+extern void __iomem *early_ioremap(resource_size_t phys_addr,
+ unsigned long size);
+extern void __iomem *early_memremap(resource_size_t phys_addr,
+ unsigned long size);
extern void early_iounmap(void __iomem *addr, unsigned long size);
#define IO_SPACE_LIMIT 0xffff
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index 0f4ee7148afe..faae1996487b 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -5,7 +5,6 @@
#define LHCALL_FLUSH_ASYNC 0
#define LHCALL_LGUEST_INIT 1
#define LHCALL_SHUTDOWN 2
-#define LHCALL_LOAD_GDT 3
#define LHCALL_NEW_PGTABLE 4
#define LHCALL_FLUSH_TLB 5
#define LHCALL_LOAD_IDT_ENTRY 6
@@ -17,6 +16,7 @@
#define LHCALL_SET_PMD 15
#define LHCALL_LOAD_TLS 16
#define LHCALL_NOTIFY 17
+#define LHCALL_LOAD_GDT_ENTRY 18
#define LGUEST_TRAP_ENTRY 0x1F
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 7727aa8b7dda..378e3691c08c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -347,7 +347,7 @@ struct pv_mmu_ops {
/* Sometimes the physical address is a pfn, and sometimes its
an mfn. We can tell which is which from the index. */
void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
- unsigned long phys, pgprot_t flags);
+ phys_addr_t phys, pgprot_t flags);
};
struct raw_spinlock;
@@ -1432,7 +1432,7 @@ static inline void arch_leave_lazy_mmu_mode(void)
void arch_flush_lazy_mmu_mode(void);
static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
- unsigned long phys, pgprot_t flags)
+ phys_addr_t phys, pgprot_t flags)
{
pv_mmu_ops.set_fixmap(idx, phys, flags);
}
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 34c52370f2fe..fcf4d92e7e04 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -352,6 +352,11 @@ struct i387_soft_struct {
u32 entry_eip;
};
+struct ymmh_struct {
+ /* 16 * 16 bytes for each YMMH-reg = 256 bytes */
+ u32 ymmh_space[64];
+};
+
struct xsave_hdr_struct {
u64 xstate_bv;
u64 reserved1[2];
@@ -361,6 +366,7 @@ struct xsave_hdr_struct {
struct xsave_struct {
struct i387_fxsave_struct i387;
struct xsave_hdr_struct xsave_hdr;
+ struct ymmh_struct ymmh;
/* new processor state extensions will go here */
} __attribute__ ((packed, aligned (64)));
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index d5cd6c586881..a4737dddfd58 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -50,7 +50,7 @@
#ifdef CONFIG_X86_64
#define NEED_PSE 0
#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
-#define NEED_PGE (1<<(X86_FEATURE_PGE & 31))
+#define NEED_PGE 0
#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index ec666491aaa4..72e5a4491661 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -269,6 +269,11 @@ struct _xsave_hdr {
__u64 reserved2[5];
};
+struct _ymmh_state {
+ /* 16 * 16 bytes for each YMMH-reg */
+ __u32 ymmh_space[64];
+};
+
/*
* Extended state pointed by the fpstate pointer in the sigcontext.
* In addition to the fpstate, information encoded in the xstate_hdr
@@ -278,6 +283,7 @@ struct _xsave_hdr {
struct _xstate {
struct _fpstate fpstate;
struct _xsave_hdr xstate_hdr;
+ struct _ymmh_state ymmh;
/* new processor state extensions go here */
};
diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h
index db68ac8a5ac2..2cae46c7c8a2 100644
--- a/arch/x86/include/asm/uv/uv_mmrs.h
+++ b/arch/x86/include/asm/uv/uv_mmrs.h
@@ -17,6 +17,11 @@
/* ========================================================================= */
/* UVH_BAU_DATA_CONFIG */
/* ========================================================================= */
+#define UVH_LB_BAU_MISC_CONTROL 0x320170UL
+#define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15
+#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16
+#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL
+/* 1011 timebase 7 (168millisec) * 3 ticks -> 500ms */
#define UVH_BAU_DATA_CONFIG 0x61680UL
#define UVH_BAU_DATA_CONFIG_32 0x0438
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 1a918dde46b5..018a0a400799 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
/* VIRT <-> MACHINE conversion */
#define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v))))
-#define virt_to_mfn(v) (pfn_to_mfn(PFN_DOWN(__pa(v))))
+#define virt_to_pfn(v) (PFN_DOWN(__pa(v)))
+#define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v)))
#define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT))
static inline unsigned long pte_mfn(pte_t pte)
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 08e9a1ac07a9..727acc152344 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -7,6 +7,7 @@
#define XSTATE_FP 0x1
#define XSTATE_SSE 0x2
+#define XSTATE_YMM 0x4
#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
@@ -15,7 +16,7 @@
/*
* These are the features that the OS can handle currently.
*/
-#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE)
+#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 098ec84b8c00..f2870920f246 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -431,6 +431,12 @@ static void __cpuinit setup_APIC_timer(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+ if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)) {
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
+ /* Make LAPIC timer preferrable over percpu HPET */
+ lapic_clockevent.rating = 150;
+ }
+
memcpy(levt, &lapic_clockevent, sizeof(*levt));
levt->cpumask = cpumask_of(smp_processor_id());
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 49d2f5c739b7..d6712334ce4b 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -563,7 +563,8 @@ void __init uv_system_init(void)
unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
int max_pnode = 0;
- unsigned long mmr_base, present;
+ unsigned long mmr_base, present, paddr;
+ unsigned short pnode_mask;
map_low_mmrs();
@@ -606,6 +607,7 @@ void __init uv_system_init(void)
}
}
+ pnode_mask = (1 << n_val) - 1;
node_id.v = uv_read_local_mmr(UVH_NODE_ID);
gnode_upper = (((unsigned long)node_id.s.node_id) &
~((1 << n_val) - 1)) << m_val;
@@ -629,7 +631,7 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->numa_blade_id = blade;
uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
uv_cpu_hub_info(cpu)->pnode = pnode;
- uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
+ uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
@@ -645,6 +647,16 @@ void __init uv_system_init(void)
lcpu, blade);
}
+ /* Add blade/pnode info for nodes without cpus */
+ for_each_online_node(nid) {
+ if (uv_node_to_blade[nid] >= 0)
+ continue;
+ paddr = node_start_pfn(nid) << PAGE_SHIFT;
+ pnode = (paddr >> m_val) & pnode_mask;
+ blade = boot_pnode_to_blade(pnode);
+ uv_node_to_blade[nid] = blade;
+ }
+
map_gru_high(max_pnode);
map_mmr_high(max_pnode);
map_config_high(max_pnode);
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index f63882728d91..63a88e1f987d 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -182,7 +182,8 @@ void uv_bios_init(void)
memcpy(&uv_systab, tab, sizeof(struct uv_systab));
iounmap(tab);
- printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision);
+ printk(KERN_INFO "EFI UV System Table Revision %d\n",
+ uv_systab.revision);
}
#else /* !CONFIG_EFI */
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 8220ae69849d..c965e5212714 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
+ { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
{ 0, 0, 0, 0 }
};
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 19f6b9d27e83..ecdb682ab516 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -68,6 +68,7 @@ struct acpi_cpufreq_data {
unsigned int max_freq;
unsigned int resume;
unsigned int cpu_feature;
+ u64 saved_aperf, saved_mperf;
};
static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
@@ -152,7 +153,8 @@ struct drv_cmd {
u32 val;
};
-static long do_drv_read(void *_cmd)
+/* Called via smp_call_function_single(), on the target CPU */
+static void do_drv_read(void *_cmd)
{
struct drv_cmd *cmd = _cmd;
u32 h;
@@ -169,10 +171,10 @@ static long do_drv_read(void *_cmd)
default:
break;
}
- return 0;
}
-static long do_drv_write(void *_cmd)
+/* Called via smp_call_function_many(), on the target CPUs */
+static void do_drv_write(void *_cmd)
{
struct drv_cmd *cmd = _cmd;
u32 lo, hi;
@@ -191,23 +193,24 @@ static long do_drv_write(void *_cmd)
default:
break;
}
- return 0;
}
static void drv_read(struct drv_cmd *cmd)
{
cmd->val = 0;
- work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd);
+ smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1);
}
static void drv_write(struct drv_cmd *cmd)
{
- unsigned int i;
+ int this_cpu;
- for_each_cpu(i, cmd->mask) {
- work_on_cpu(i, do_drv_write, cmd);
- }
+ this_cpu = get_cpu();
+ if (cpumask_test_cpu(this_cpu, cmd->mask))
+ do_drv_write(cmd);
+ smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
+ put_cpu();
}
static u32 get_cur_val(const struct cpumask *mask)
@@ -241,28 +244,23 @@ static u32 get_cur_val(const struct cpumask *mask)
return cmd.val;
}
-struct perf_cur {
+struct perf_pair {
union {
struct {
u32 lo;
u32 hi;
} split;
u64 whole;
- } aperf_cur, mperf_cur;
+ } aperf, mperf;
};
-
-static long read_measured_perf_ctrs(void *_cur)
+/* Called via smp_call_function_single(), on the target CPU */
+static void read_measured_perf_ctrs(void *_cur)
{
- struct perf_cur *cur = _cur;
+ struct perf_pair *cur = _cur;
- rdmsr(MSR_IA32_APERF, cur->aperf_cur.split.lo, cur->aperf_cur.split.hi);
- rdmsr(MSR_IA32_MPERF, cur->mperf_cur.split.lo, cur->mperf_cur.split.hi);
-
- wrmsr(MSR_IA32_APERF, 0, 0);
- wrmsr(MSR_IA32_MPERF, 0, 0);
-
- return 0;
+ rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
+ rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
}
/*
@@ -281,52 +279,57 @@ static long read_measured_perf_ctrs(void *_cur)
static unsigned int get_measured_perf(struct cpufreq_policy *policy,
unsigned int cpu)
{
- struct perf_cur cur;
+ struct perf_pair readin, cur;
unsigned int perf_percent;
unsigned int retval;
- if (!work_on_cpu(cpu, read_measured_perf_ctrs, &cur))
+ if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1))
return 0;
+ cur.aperf.whole = readin.aperf.whole -
+ per_cpu(drv_data, cpu)->saved_aperf;
+ cur.mperf.whole = readin.mperf.whole -
+ per_cpu(drv_data, cpu)->saved_mperf;
+ per_cpu(drv_data, cpu)->saved_aperf = readin.aperf.whole;
+ per_cpu(drv_data, cpu)->saved_mperf = readin.mperf.whole;
+
#ifdef __i386__
/*
* We dont want to do 64 bit divide with 32 bit kernel
* Get an approximate value. Return failure in case we cannot get
* an approximate value.
*/
- if (unlikely(cur.aperf_cur.split.hi || cur.mperf_cur.split.hi)) {
+ if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
int shift_count;
u32 h;
- h = max_t(u32, cur.aperf_cur.split.hi, cur.mperf_cur.split.hi);
+ h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
shift_count = fls(h);
- cur.aperf_cur.whole >>= shift_count;
- cur.mperf_cur.whole >>= shift_count;
+ cur.aperf.whole >>= shift_count;
+ cur.mperf.whole >>= shift_count;
}
- if (((unsigned long)(-1) / 100) < cur.aperf_cur.split.lo) {
+ if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
int shift_count = 7;
- cur.aperf_cur.split.lo >>= shift_count;
- cur.mperf_cur.split.lo >>= shift_count;
+ cur.aperf.split.lo >>= shift_count;
+ cur.mperf.split.lo >>= shift_count;
}
- if (cur.aperf_cur.split.lo && cur.mperf_cur.split.lo)
- perf_percent = (cur.aperf_cur.split.lo * 100) /
- cur.mperf_cur.split.lo;
+ if (cur.aperf.split.lo && cur.mperf.split.lo)
+ perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
else
perf_percent = 0;
#else
- if (unlikely(((unsigned long)(-1) / 100) < cur.aperf_cur.whole)) {
+ if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
int shift_count = 7;
- cur.aperf_cur.whole >>= shift_count;
- cur.mperf_cur.whole >>= shift_count;
+ cur.aperf.whole >>= shift_count;
+ cur.mperf.whole >>= shift_count;
}
- if (cur.aperf_cur.whole && cur.mperf_cur.whole)
- perf_percent = (cur.aperf_cur.whole * 100) /
- cur.mperf_cur.whole;
+ if (cur.aperf.whole && cur.mperf.whole)
+ perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
else
perf_percent = 0;
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index 0bd48e65a0ca..ce2ed3e4aad9 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -33,7 +33,6 @@
#include <linux/timex.h>
#include <linux/io.h>
#include <linux/acpi.h>
-#include <linux/kernel.h>
#include <asm/msr.h>
#include <acpi/processor.h>
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 70a10ca100f6..18dfa30795c9 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -18,6 +18,8 @@
#include <linux/init.h>
#include <linux/list.h>
+#include <trace/syscall.h>
+
#include <asm/cacheflush.h>
#include <asm/ftrace.h>
#include <asm/nops.h>
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 4d420de9ac61..98c470c069d1 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -108,40 +108,29 @@ struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
EXPORT_SYMBOL_GPL(ucode_cpu_info);
#ifdef CONFIG_MICROCODE_OLD_INTERFACE
-struct update_for_cpu {
- const void __user *buf;
- size_t size;
-};
-
-static long update_for_cpu(void *_ufc)
-{
- struct update_for_cpu *ufc = _ufc;
- int error;
-
- error = microcode_ops->request_microcode_user(smp_processor_id(),
- ufc->buf, ufc->size);
- if (error < 0)
- return error;
- if (!error)
- microcode_ops->apply_microcode(smp_processor_id());
- return error;
-}
-
static int do_microcode_update(const void __user *buf, size_t size)
{
+ cpumask_t old;
int error = 0;
int cpu;
- struct update_for_cpu ufc = { .buf = buf, .size = size };
+
+ old = current->cpus_allowed;
for_each_online_cpu(cpu) {
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
if (!uci->valid)
continue;
- error = work_on_cpu(cpu, update_for_cpu, &ufc);
+
+ set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+ error = microcode_ops->request_microcode_user(cpu, buf, size);
if (error < 0)
- break;
+ goto out;
+ if (!error)
+ microcode_ops->apply_microcode(cpu);
}
+out:
+ set_cpus_allowed_ptr(current, &old);
return error;
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index dce99dca6cf8..70fd7e414c15 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -679,7 +679,7 @@ void __init get_smp_config(void)
__get_smp_config(0);
}
-static void smp_reserve_bootmem(struct mpf_intel *mpf)
+static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
{
unsigned long size = get_mpc_size(mpf->physptr);
#ifdef CONFIG_X86_32
@@ -838,7 +838,7 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
-static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
+static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
{
int i;
@@ -866,7 +866,8 @@ static void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
}
}
#else /* CONFIG_X86_IO_APIC */
-static inline void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
+static
+inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
#endif /* CONFIG_X86_IO_APIC */
static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fe9345c967de..23b7c8f017e2 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,7 +21,6 @@
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <linux/signal.h>
-#include <linux/ftrace.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -35,6 +34,8 @@
#include <asm/proto.h>
#include <asm/ds.h>
+#include <trace/syscall.h>
+
#include "tls.h"
enum x86_regset {
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index deb5ebb32c3b..ed0c33761e6d 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -25,6 +25,8 @@ static int uv_bau_retry_limit __read_mostly;
/* position of pnode (which is nasid>>1): */
static int uv_nshift __read_mostly;
+/* base pnode in this partition */
+static int uv_partition_base_pnode __read_mostly;
static unsigned long uv_mmask __read_mostly;
@@ -32,6 +34,34 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
static DEFINE_PER_CPU(struct bau_control, bau_control);
/*
+ * Determine the first node on a blade.
+ */
+static int __init blade_to_first_node(int blade)
+{
+ int node, b;
+
+ for_each_online_node(node) {
+ b = uv_node_to_blade_id(node);
+ if (blade == b)
+ return node;
+ }
+ return -1; /* shouldn't happen */
+}
+
+/*
+ * Determine the apicid of the first cpu on a blade.
+ */
+static int __init blade_to_first_apicid(int blade)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu)
+ if (blade == uv_cpu_to_blade_id(cpu))
+ return per_cpu(x86_cpu_to_apicid, cpu);
+ return -1;
+}
+
+/*
* Free a software acknowledge hardware resource by clearing its Pending
* bit. This will return a reply to the sender.
* If the message has timed out, a reply has already been sent by the
@@ -67,7 +97,7 @@ static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
cpu = uv_blade_processor_id();
msg->number_of_cpus =
- uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
+ uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
this_cpu_mask = 1UL << cpu;
if (msp->seen_by.bits & this_cpu_mask)
return;
@@ -215,14 +245,14 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
* Returns @flush_mask if some remote flushing remains to be done. The
* mask will have some bits still set.
*/
-const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
+const struct cpumask *uv_flush_send_and_wait(int cpu, int this_pnode,
struct bau_desc *bau_desc,
struct cpumask *flush_mask)
{
int completion_status = 0;
int right_shift;
int tries = 0;
- int blade;
+ int pnode;
int bit;
unsigned long mmr_offset;
unsigned long index;
@@ -265,8 +295,8 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
* use the IPI method of shootdown on them.
*/
for_each_cpu(bit, flush_mask) {
- blade = uv_cpu_to_blade_id(bit);
- if (blade == this_blade)
+ pnode = uv_cpu_to_pnode(bit);
+ if (pnode == this_pnode)
continue;
cpumask_clear_cpu(bit, flush_mask);
}
@@ -309,16 +339,16 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
int i;
int bit;
- int blade;
+ int pnode;
int uv_cpu;
- int this_blade;
+ int this_pnode;
int locals = 0;
struct bau_desc *bau_desc;
cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
uv_cpu = uv_blade_processor_id();
- this_blade = uv_numa_blade_id();
+ this_pnode = uv_hub_info->pnode;
bau_desc = __get_cpu_var(bau_control).descriptor_base;
bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
@@ -326,13 +356,14 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
i = 0;
for_each_cpu(bit, flush_mask) {
- blade = uv_cpu_to_blade_id(bit);
- BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
- if (blade == this_blade) {
+ pnode = uv_cpu_to_pnode(bit);
+ BUG_ON(pnode > (UV_DISTRIBUTION_SIZE - 1));
+ if (pnode == this_pnode) {
locals++;
continue;
}
- bau_node_set(blade, &bau_desc->distribution);
+ bau_node_set(pnode - uv_partition_base_pnode,
+ &bau_desc->distribution);
i++;
}
if (i == 0) {
@@ -350,7 +381,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
bau_desc->payload.address = va;
bau_desc->payload.sending_cpu = cpu;
- return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
+ return uv_flush_send_and_wait(uv_cpu, this_pnode, bau_desc, flush_mask);
}
/*
@@ -418,24 +449,58 @@ void uv_bau_message_interrupt(struct pt_regs *regs)
set_irq_regs(old_regs);
}
+/*
+ * uv_enable_timeouts
+ *
+ * Each target blade (i.e. blades that have cpu's) needs to have
+ * shootdown message timeouts enabled. The timeout does not cause
+ * an interrupt, but causes an error message to be returned to
+ * the sender.
+ */
static void uv_enable_timeouts(void)
{
- int i;
int blade;
- int last_blade;
+ int nblades;
int pnode;
- int cur_cpu = 0;
- unsigned long apicid;
+ unsigned long mmr_image;
- last_blade = -1;
- for_each_online_node(i) {
- blade = uv_node_to_blade_id(i);
- if (blade == last_blade)
+ nblades = uv_num_possible_blades();
+
+ for (blade = 0; blade < nblades; blade++) {
+ if (!uv_blade_nr_possible_cpus(blade))
continue;
- last_blade = blade;
- apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+
pnode = uv_blade_to_pnode(blade);
- cur_cpu += uv_blade_nr_possible_cpus(i);
+ mmr_image =
+ uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL);
+ /*
+ * Set the timeout period and then lock it in, in three
+ * steps; captures and locks in the period.
+ *
+ * To program the period, the SOFT_ACK_MODE must be off.
+ */
+ mmr_image &= ~((unsigned long)1 <<
+ UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+ /*
+ * Set the 4-bit period.
+ */
+ mmr_image &= ~((unsigned long)0xf <<
+ UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+ mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD <<
+ UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
+ /*
+ * Subsequent reversals of the timebase bit (3) cause an
+ * immediate timeout of one or all INTD resources as
+ * indicated in bits 2:0 (7 causes all of them to timeout).
+ */
+ mmr_image |= ((unsigned long)1 <<
+ UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT);
+ uv_write_global_mmr64
+ (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image);
}
}
@@ -482,8 +547,7 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data)
stat->requestee, stat->onetlb, stat->alltlb,
stat->s_retry, stat->d_retry, stat->ptc_i);
seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
- uv_read_global_mmr64(uv_blade_to_pnode
- (uv_cpu_to_blade_id(cpu)),
+ uv_read_global_mmr64(uv_cpu_to_pnode(cpu),
UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
stat->sflush, stat->dflush,
stat->retriesok, stat->nomsg,
@@ -617,16 +681,18 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
* finish the initialization of the per-blade control structures
*/
static void __init
-uv_table_bases_finish(int blade, int node, int cur_cpu,
+uv_table_bases_finish(int blade,
struct bau_control *bau_tablesp,
struct bau_desc *adp)
{
struct bau_control *bcp;
- int i;
+ int cpu;
- for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) {
- bcp = (struct bau_control *)&per_cpu(bau_control, i);
+ for_each_present_cpu(cpu) {
+ if (blade != uv_cpu_to_blade_id(cpu))
+ continue;
+ bcp = (struct bau_control *)&per_cpu(bau_control, cpu);
bcp->bau_msg_head = bau_tablesp->va_queue_first;
bcp->va_queue_first = bau_tablesp->va_queue_first;
bcp->va_queue_last = bau_tablesp->va_queue_last;
@@ -649,11 +715,10 @@ uv_activation_descriptor_init(int node, int pnode)
struct bau_desc *adp;
struct bau_desc *ad2;
- adp = (struct bau_desc *)
- kmalloc_node(16384, GFP_KERNEL, node);
+ adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node);
BUG_ON(!adp);
- pa = __pa((unsigned long)adp);
+ pa = uv_gpa(adp); /* need the real nasid*/
n = pa >> uv_nshift;
m = pa & uv_mmask;
@@ -667,8 +732,12 @@ uv_activation_descriptor_init(int node, int pnode)
for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
memset(ad2, 0, sizeof(struct bau_desc));
ad2->header.sw_ack_flag = 1;
- ad2->header.base_dest_nodeid =
- uv_blade_to_pnode(uv_cpu_to_blade_id(0));
+ /*
+ * base_dest_nodeid is the first node in the partition, so
+ * the bit map will indicate partition-relative node numbers.
+ * note that base_dest_nodeid is actually a nasid.
+ */
+ ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1;
ad2->header.command = UV_NET_ENDPOINT_INTD;
ad2->header.int_both = 1;
/*
@@ -686,6 +755,8 @@ static struct bau_payload_queue_entry * __init
uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
{
struct bau_payload_queue_entry *pqp;
+ unsigned long pa;
+ int pn;
char *cp;
pqp = (struct bau_payload_queue_entry *) kmalloc_node(
@@ -696,10 +767,14 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
cp = (char *)pqp + 31;
pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
bau_tablesp->va_queue_first = pqp;
+ /*
+ * need the pnode of where the memory was really allocated
+ */
+ pa = uv_gpa(pqp);
+ pn = pa >> uv_nshift;
uv_write_global_mmr64(pnode,
UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
- ((unsigned long)pnode <<
- UV_PAYLOADQ_PNODE_SHIFT) |
+ ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
uv_physnodeaddr(pqp));
uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
uv_physnodeaddr(pqp));
@@ -715,8 +790,9 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
/*
* Initialization of each UV blade's structures
*/
-static int __init uv_init_blade(int blade, int node, int cur_cpu)
+static int __init uv_init_blade(int blade)
{
+ int node;
int pnode;
unsigned long pa;
unsigned long apicid;
@@ -724,16 +800,17 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu)
struct bau_payload_queue_entry *pqp;
struct bau_control *bau_tablesp;
+ node = blade_to_first_node(blade);
bau_tablesp = uv_table_bases_init(blade, node);
pnode = uv_blade_to_pnode(blade);
adp = uv_activation_descriptor_init(node, pnode);
pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
- uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp);
+ uv_table_bases_finish(blade, bau_tablesp, adp);
/*
* the below initialization can't be in firmware because the
* messaging IRQ will be determined by the OS
*/
- apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
+ apicid = blade_to_first_apicid(blade);
pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
if ((pa & 0xff) != UV_BAU_MESSAGE) {
uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
@@ -748,9 +825,7 @@ static int __init uv_init_blade(int blade, int node, int cur_cpu)
static int __init uv_bau_init(void)
{
int blade;
- int node;
int nblades;
- int last_blade;
int cur_cpu;
if (!is_uv_system())
@@ -763,29 +838,21 @@ static int __init uv_bau_init(void)
uv_bau_retry_limit = 1;
uv_nshift = uv_hub_info->n_val;
uv_mmask = (1UL << uv_hub_info->n_val) - 1;
- nblades = 0;
- last_blade = -1;
- cur_cpu = 0;
- for_each_online_node(node) {
- blade = uv_node_to_blade_id(node);
- if (blade == last_blade)
- continue;
- last_blade = blade;
- nblades++;
- }
+ nblades = uv_num_possible_blades();
+
uv_bau_table_bases = (struct bau_control **)
kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
BUG_ON(!uv_bau_table_bases);
- last_blade = -1;
- for_each_online_node(node) {
- blade = uv_node_to_blade_id(node);
- if (blade == last_blade)
- continue;
- last_blade = blade;
- uv_init_blade(blade, node, cur_cpu);
- cur_cpu += uv_blade_nr_possible_cpus(blade);
- }
+ uv_partition_base_pnode = 0x7fffffff;
+ for (blade = 0; blade < nblades; blade++)
+ if (uv_blade_nr_possible_cpus(blade) &&
+ (uv_blade_to_pnode(blade) < uv_partition_base_pnode))
+ uv_partition_base_pnode = uv_blade_to_pnode(blade);
+ for (blade = 0; blade < nblades; blade++)
+ if (uv_blade_nr_possible_cpus(blade))
+ uv_init_blade(blade);
+
alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
uv_enable_timeouts();
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c
index 67f9b9dbf800..36afb98675a4 100644
--- a/arch/x86/kernel/uv_sysfs.c
+++ b/arch/x86/kernel/uv_sysfs.c
@@ -21,6 +21,7 @@
#include <linux/sysdev.h>
#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
struct kobject *sgi_uv_kobj;
@@ -47,6 +48,9 @@ static int __init sgi_uv_sysfs_init(void)
{
unsigned long ret;
+ if (!is_uv_system())
+ return -ENODEV;
+
if (!sgi_uv_kobj)
sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj);
if (!sgi_uv_kobj) {
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 2b54fe002e94..0a5b04aa98f1 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -324,7 +324,7 @@ void __ref xsave_cntxt_init(void)
}
/*
- * for now OS knows only about FP/SSE
+ * Support only the state known to OS.
*/
pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
xsave_init();
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index e94a11e42f98..a2085368a3dc 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -273,15 +273,15 @@ static void lguest_load_idt(const struct desc_ptr *desc)
* controls the entire thing and the Guest asks it to make changes using the
* LOAD_GDT hypercall.
*
- * This is the opposite of the IDT code where we have a LOAD_IDT_ENTRY
- * hypercall and use that repeatedly to load a new IDT. I don't think it
- * really matters, but wouldn't it be nice if they were the same? Wouldn't
- * it be even better if you were the one to send the patch to fix it?
+ * This is the exactly like the IDT code.
*/
static void lguest_load_gdt(const struct desc_ptr *desc)
{
- BUG_ON((desc->size + 1) / 8 != GDT_ENTRIES);
- kvm_hypercall2(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES);
+ unsigned int i;
+ struct desc_struct *gdt = (void *)desc->address;
+
+ for (i = 0; i < (desc->size+1)/8; i++)
+ kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b);
}
/* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
@@ -291,7 +291,9 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
const void *desc, int type)
{
native_write_gdt_entry(dt, entrynum, desc, type);
- kvm_hypercall2(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES);
+ /* Tell Host about this new entry. */
+ kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum,
+ dt[entrynum].a, dt[entrynum].b);
}
/* OK, I lied. There are three "thread local storage" GDT entries which change
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index d4c4b2c4dbbe..8a450930834f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -549,7 +549,7 @@ void __init early_ioremap_reset(void)
}
static void __init __early_set_fixmap(enum fixed_addresses idx,
- unsigned long phys, pgprot_t flags)
+ phys_addr_t phys, pgprot_t flags)
{
unsigned long addr = __fix_to_virt(idx);
pte_t *pte;
@@ -568,7 +568,7 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
}
static inline void __init early_set_fixmap(enum fixed_addresses idx,
- unsigned long phys, pgprot_t prot)
+ phys_addr_t phys, pgprot_t prot)
{
if (after_paging_init)
__set_fixmap(idx, phys, prot);
@@ -609,9 +609,10 @@ static int __init check_early_ioremap_leak(void)
late_initcall(check_early_ioremap_leak);
static void __init __iomem *
-__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
+__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
{
- unsigned long offset, last_addr;
+ unsigned long offset;
+ resource_size_t last_addr;
unsigned int nrpages;
enum fixed_addresses idx0, idx;
int i, slot;
@@ -627,15 +628,15 @@ __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
}
if (slot < 0) {
- printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n",
- phys_addr, size);
+ printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n",
+ (u64)phys_addr, size);
WARN_ON(1);
return NULL;
}
if (early_ioremap_debug) {
- printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ",
- phys_addr, size, slot);
+ printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ",
+ (u64)phys_addr, size, slot);
dump_stack();
}
@@ -682,13 +683,15 @@ __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
}
/* Remap an IO device */
-void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size)
+void __init __iomem *
+early_ioremap(resource_size_t phys_addr, unsigned long size)
{
return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
}
/* Remap memory */
-void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size)
+void __init __iomem *
+early_memremap(resource_size_t phys_addr, unsigned long size)
{
return __early_ioremap(phys_addr, size, PAGE_KERNEL);
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 41c805718158..e6718bb28065 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -31,7 +31,7 @@
#ifdef CONFIG_X86_PAT
int __read_mostly pat_enabled = 1;
-void __cpuinit pat_disable(const char *reason)
+static inline void pat_disable(const char *reason)
{
pat_enabled = 0;
printk(KERN_INFO "%s\n", reason);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5b7c7c8464fe..7aa03a5389f5 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -345,7 +345,8 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
fixmaps_set++;
}
-void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
+void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
+ pgprot_t flags)
{
__native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 82cd39a6cbd3..f09e8c36ee80 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -42,6 +42,7 @@
#include <asm/xen/hypervisor.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
+#include <asm/proto.h>
#include <asm/msr-index.h>
#include <asm/setup.h>
#include <asm/desc.h>
@@ -168,21 +169,23 @@ static void __init xen_banner(void)
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
}
+static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
+static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
+
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
unsigned int *cx, unsigned int *dx)
{
+ unsigned maskecx = ~0;
unsigned maskedx = ~0;
/*
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
*/
- if (*ax == 1)
- maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
- (1 << X86_FEATURE_ACPI) | /* disable ACPI */
- (1 << X86_FEATURE_MCE) | /* disable MCE */
- (1 << X86_FEATURE_MCA) | /* disable MCA */
- (1 << X86_FEATURE_ACC)); /* thermal monitoring */
+ if (*ax == 1) {
+ maskecx = cpuid_leaf1_ecx_mask;
+ maskedx = cpuid_leaf1_edx_mask;
+ }
asm(XEN_EMULATE_PREFIX "cpuid"
: "=a" (*ax),
@@ -190,9 +193,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
"=c" (*cx),
"=d" (*dx)
: "0" (*ax), "2" (*cx));
+
+ *cx &= maskecx;
*dx &= maskedx;
}
+static __init void xen_init_cpuid_mask(void)
+{
+ unsigned int ax, bx, cx, dx;
+
+ cpuid_leaf1_edx_mask =
+ ~((1 << X86_FEATURE_MCE) | /* disable MCE */
+ (1 << X86_FEATURE_MCA) | /* disable MCA */
+ (1 << X86_FEATURE_ACC)); /* thermal monitoring */
+
+ if (!xen_initial_domain())
+ cpuid_leaf1_edx_mask &=
+ ~((1 << X86_FEATURE_APIC) | /* disable local APIC */
+ (1 << X86_FEATURE_ACPI)); /* disable ACPI */
+
+ ax = 1;
+ xen_cpuid(&ax, &bx, &cx, &dx);
+
+ /* cpuid claims we support xsave; try enabling it to see what happens */
+ if (cx & (1 << (X86_FEATURE_XSAVE % 32))) {
+ unsigned long cr4;
+
+ set_in_cr4(X86_CR4_OSXSAVE);
+
+ cr4 = read_cr4();
+
+ if ((cr4 & X86_CR4_OSXSAVE) == 0)
+ cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32));
+
+ clear_in_cr4(X86_CR4_OSXSAVE);
+ }
+}
+
static void xen_set_debugreg(int reg, unsigned long val)
{
HYPERVISOR_set_debugreg(reg, val);
@@ -284,12 +321,11 @@ static void xen_set_ldt(const void *addr, unsigned entries)
static void xen_load_gdt(const struct desc_ptr *dtr)
{
- unsigned long *frames;
unsigned long va = dtr->address;
unsigned int size = dtr->size + 1;
unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ unsigned long frames[pages];
int f;
- struct multicall_space mcs;
/* A GDT can be up to 64k in size, which corresponds to 8192
8-byte entries, or 16 4k pages.. */
@@ -297,19 +333,26 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
BUG_ON(size > 65536);
BUG_ON(va & ~PAGE_MASK);
- mcs = xen_mc_entry(sizeof(*frames) * pages);
- frames = mcs.args;
-
for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
- frames[f] = arbitrary_virt_to_mfn((void *)va);
+ int level;
+ pte_t *ptep = lookup_address(va, &level);
+ unsigned long pfn, mfn;
+ void *virt;
+
+ BUG_ON(ptep == NULL);
+
+ pfn = pte_pfn(*ptep);
+ mfn = pfn_to_mfn(pfn);
+ virt = __va(PFN_PHYS(pfn));
+
+ frames[f] = mfn;
make_lowmem_page_readonly((void *)va);
- make_lowmem_page_readonly(mfn_to_virt(frames[f]));
+ make_lowmem_page_readonly(virt);
}
- MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
-
- xen_mc_issue(PARAVIRT_LAZY_CPU);
+ if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+ BUG();
}
static void load_TLS_descriptor(struct thread_struct *t,
@@ -385,7 +428,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
static int cvt_gate_to_trap(int vector, const gate_desc *val,
struct trap_info *info)
{
- if (val->type != 0xf && val->type != 0xe)
+ if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
return 0;
info->vector = vector;
@@ -393,8 +436,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
info->cs = gate_segment(*val);
info->flags = val->dpl;
/* interrupt gates clear IF */
- if (val->type == 0xe)
- info->flags |= 4;
+ if (val->type == GATE_INTERRUPT)
+ info->flags |= 1 << 2;
return 1;
}
@@ -872,7 +915,6 @@ static const struct machine_ops __initdata xen_machine_ops = {
.emergency_restart = xen_emergency_restart,
};
-
/* First C function to be called on Xen boot */
asmlinkage void __init xen_start_kernel(void)
{
@@ -897,6 +939,8 @@ asmlinkage void __init xen_start_kernel(void)
xen_init_irq_ops();
+ xen_init_cpuid_mask();
+
#ifdef CONFIG_X86_LOCAL_APIC
/*
* set up the basic apic ops.
@@ -938,6 +982,11 @@ asmlinkage void __init xen_start_kernel(void)
if (!xen_initial_domain())
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+#ifdef CONFIG_X86_64
+ /* Work out if we support NX */
+ check_efer();
+#endif
+
/* Don't do the full vcpu_info placement stuff until we have a
possible map and a non-dummy shared_info. */
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index db3802fb7b84..9842b1212407 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn)
}
/* Build the parallel p2m_top_mfn structures */
-void xen_setup_mfn_list_list(void)
+static void __init xen_build_mfn_list_list(void)
{
unsigned pfn, idx;
@@ -198,7 +198,10 @@ void xen_setup_mfn_list_list(void)
unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
}
+}
+void xen_setup_mfn_list_list(void)
+{
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -218,6 +221,8 @@ void __init xen_build_dynamic_phys_to_machine(void)
p2m_top[topidx] = &mfn_list[pfn];
}
+
+ xen_build_mfn_list_list();
}
unsigned long get_phys_to_machine(unsigned long pfn)
@@ -233,47 +238,74 @@ unsigned long get_phys_to_machine(unsigned long pfn)
}
EXPORT_SYMBOL_GPL(get_phys_to_machine);
-static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
+/* install a new p2m_top page */
+bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
{
- unsigned long *p;
+ unsigned topidx = p2m_top_index(pfn);
+ unsigned long **pfnp, *mfnp;
unsigned i;
- p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
- BUG_ON(p == NULL);
+ pfnp = &p2m_top[topidx];
+ mfnp = &p2m_top_mfn[topidx];
for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
p[i] = INVALID_P2M_ENTRY;
- if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
- free_page((unsigned long)p);
- else
+ if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
*mfnp = virt_to_mfn(p);
+ return true;
+ }
+
+ return false;
}
-void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+static void alloc_p2m(unsigned long pfn)
{
- unsigned topidx, idx;
+ unsigned long *p;
- if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
- BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
- return;
- }
+ p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+ BUG_ON(p == NULL);
+
+ if (!install_p2mtop_page(pfn, p))
+ free_page((unsigned long)p);
+}
+
+/* Try to install p2m mapping; fail if intermediate bits missing */
+bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+ unsigned topidx, idx;
if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
BUG_ON(mfn != INVALID_P2M_ENTRY);
- return;
+ return true;
}
topidx = p2m_top_index(pfn);
if (p2m_top[topidx] == p2m_missing) {
- /* no need to allocate a page to store an invalid entry */
if (mfn == INVALID_P2M_ENTRY)
- return;
- alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
+ return true;
+ return false;
}
idx = p2m_index(pfn);
p2m_top[topidx][idx] = mfn;
+
+ return true;
+}
+
+void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+ if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+ BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+ return;
+ }
+
+ if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
+ alloc_p2m(pfn);
+
+ if (!__set_phys_to_machine(pfn, mfn))
+ BUG();
+ }
}
unsigned long arbitrary_virt_to_mfn(void *vaddr)
@@ -987,7 +1019,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
return 0;
}
-void __init xen_mark_init_mm_pinned(void)
+static void __init xen_mark_init_mm_pinned(void)
{
xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
}
@@ -1270,8 +1302,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
} *args;
struct multicall_space mcs;
- BUG_ON(cpumask_empty(cpus));
- BUG_ON(!mm);
+ if (cpumask_empty(cpus))
+ return; /* nothing to do */
mcs = xen_mc_entry(sizeof(*args));
args = mcs.args;
@@ -1438,6 +1470,15 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
}
#endif
+static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+{
+ struct mmuext_op op;
+ op.cmd = cmd;
+ op.arg1.mfn = pfn_to_mfn(pfn);
+ if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+ BUG();
+}
+
/* Early in boot, while setting up the initial pagetable, assume
everything is pinned. */
static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
@@ -1446,22 +1487,29 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
BUG_ON(mem_map); /* should only be used early */
#endif
make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
+ pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+}
+
+/* Used for pmd and pud */
+static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
+{
+#ifdef CONFIG_FLATMEM
+ BUG_ON(mem_map); /* should only be used early */
+#endif
+ make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
}
/* Early release_pte assumes that all pts are pinned, since there's
only init_mm and anything attached to that is pinned. */
-static void xen_release_pte_init(unsigned long pfn)
+static __init void xen_release_pte_init(unsigned long pfn)
{
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
-static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+static __init void xen_release_pmd_init(unsigned long pfn)
{
- struct mmuext_op op;
- op.cmd = cmd;
- op.arg1.mfn = pfn_to_mfn(pfn);
- if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
- BUG();
+ make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
}
/* This needs to make sure the new pte page is pinned iff its being
@@ -1750,7 +1798,7 @@ __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
}
#endif /* CONFIG_X86_64 */
-static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
+static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
{
pte_t pte;
@@ -1773,6 +1821,9 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
#ifdef CONFIG_X86_LOCAL_APIC
case FIX_APIC_BASE: /* maps dummy local APIC */
#endif
+ case FIX_TEXT_POKE0:
+ case FIX_TEXT_POKE1:
+ /* All local page mappings */
pte = pfn_pte(phys, prot);
break;
@@ -1819,7 +1870,6 @@ __init void xen_post_allocator_init(void)
xen_mark_init_mm_pinned();
}
-
const struct pv_mmu_ops xen_mmu_ops __initdata = {
.pagetable_setup_start = xen_pagetable_setup_start,
.pagetable_setup_done = xen_pagetable_setup_done,
@@ -1843,9 +1893,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
.alloc_pte = xen_alloc_pte_init,
.release_pte = xen_release_pte_init,
- .alloc_pmd = xen_alloc_pte_init,
+ .alloc_pmd = xen_alloc_pmd_init,
.alloc_pmd_clone = paravirt_nop,
- .release_pmd = xen_release_pte_init,
+ .release_pmd = xen_release_pmd_init,
#ifdef CONFIG_HIGHPTE
.kmap_atomic_pte = xen_kmap_atomic_pte,
@@ -1883,8 +1933,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
.make_pud = PV_CALLEE_SAVE(xen_make_pud),
.set_pgd = xen_set_pgd_hyper,
- .alloc_pud = xen_alloc_pte_init,
- .release_pud = xen_release_pte_init,
+ .alloc_pud = xen_alloc_pmd_init,
+ .release_pud = xen_release_pmd_init,
#endif /* PAGETABLE_LEVELS == 4 */
.activate_mm = xen_activate_mm,
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 24d1b44a337d..da7302624897 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -11,6 +11,9 @@ enum pt_level {
};
+bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+bool install_p2mtop_page(unsigned long pfn, unsigned long *p);
+
void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 585a6e330837..429834ec1687 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
BUG_ON(rc);
while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
- HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+ HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
barrier();
}
@@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
/* Make sure other vcpus get a chance to run if they need to. */
for_each_cpu(cpu, mask) {
if (xen_vcpu_stolen(cpu)) {
- HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+ HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
break;
}
}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f5ef2632ea2..20139464943c 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -57,8 +57,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
bool xen_vcpu_stolen(int vcpu);
-void xen_mark_init_mm_pinned(void);
-
void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP