diff options
Diffstat (limited to 'arch/powerpc/kernel')
24 files changed, 968 insertions, 354 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b23664a0b86c..c002b0410219 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -42,10 +42,11 @@ obj-$(CONFIG_ALTIVEC) += vecemu.o obj-$(CONFIG_PPC_970_NAP) += idle_power4.o obj-$(CONFIG_PPC_OF) += of_device.o of_platform.o prom_parse.o obj-$(CONFIG_PPC_CLOCK) += clock.o -procfs-$(CONFIG_PPC64) := proc_ppc64.o +procfs-y := proc_powerpc.o obj-$(CONFIG_PROC_FS) += $(procfs-y) rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y) +obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_LPARCFG) += lparcfg.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 0812b0f414bb..a6c2b63227b3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -190,6 +190,11 @@ int main(void) DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_DATA_OFFSET, offsetof(struct paca_struct, data_offset)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest)); + DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb)); + DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max)); +#endif #endif /* CONFIG_PPC64 */ /* RTAS */ @@ -398,14 +403,24 @@ int main(void) DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); + + /* book3s_64 */ +#ifdef CONFIG_PPC64 + DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr)); + DEFINE(VCPU_HOST_RETIP, offsetof(struct kvm_vcpu, arch.host_retip)); + DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2)); + DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr)); + DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr)); + DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem)); + DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter)); + DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler)); + DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags)); +#endif #endif #ifdef CONFIG_44x DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif -#ifdef CONFIG_FSL_BOOKE - DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); -#endif #ifdef CONFIG_KVM_EXIT_TIMING DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index 0a8439aafdd1..6f4613dd05ef 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -373,7 +373,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) hard_irq_disable(); for_each_irq(i) { - struct irq_desc *desc = irq_desc + i; + struct irq_desc *desc = irq_to_desc(i); if (desc->status & IRQ_INPROGRESS) desc->chip->eoi(i); diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index e96cbbd9b449..59c928564a03 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -21,7 +21,6 @@ #include <asm/dma.h> #include <asm/abs_addr.h> -int swiotlb __read_mostly; unsigned int ppc_swiotlb_enable; /* diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index c7eb4e0eb86c..e3be98ffe2a7 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -41,6 +41,7 @@ __start_interrupts: . = 0x200 _machine_check_pSeries: HMT_MEDIUM + DO_KVM 0x200 mtspr SPRN_SPRG_SCRATCH0,r13 /* save r13 */ EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) @@ -48,6 +49,7 @@ _machine_check_pSeries: .globl data_access_pSeries data_access_pSeries: HMT_MEDIUM + DO_KVM 0x300 mtspr SPRN_SPRG_SCRATCH0,r13 BEGIN_FTR_SECTION mfspr r13,SPRN_SPRG_PACA @@ -77,6 +79,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_SLB) .globl data_access_slb_pSeries data_access_slb_pSeries: HMT_MEDIUM + DO_KVM 0x380 mtspr SPRN_SPRG_SCRATCH0,r13 mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) @@ -115,6 +118,7 @@ data_access_slb_pSeries: .globl instruction_access_slb_pSeries instruction_access_slb_pSeries: HMT_MEDIUM + DO_KVM 0x480 mtspr SPRN_SPRG_SCRATCH0,r13 mfspr r13,SPRN_SPRG_PACA /* get paca address into r13 */ std r3,PACA_EXSLB+EX_R3(r13) @@ -154,6 +158,7 @@ instruction_access_slb_pSeries: .globl system_call_pSeries system_call_pSeries: HMT_MEDIUM + DO_KVM 0xc00 BEGIN_FTR_SECTION cmpdi r0,0x1ebe beq- 1f @@ -187,14 +192,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) */ performance_monitor_pSeries_1: . = 0xf00 + DO_KVM 0xf00 b performance_monitor_pSeries altivec_unavailable_pSeries_1: . = 0xf20 + DO_KVM 0xf20 b altivec_unavailable_pSeries vsx_unavailable_pSeries_1: . = 0xf40 + DO_KVM 0xf40 b vsx_unavailable_pSeries #ifdef CONFIG_CBE_RAS diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index c38afdb45d7b..925807488022 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -37,6 +37,7 @@ #include <asm/firmware.h> #include <asm/page_64.h> #include <asm/irqflags.h> +#include <asm/kvm_book3s_64_asm.h> /* The physical memory is layed out such that the secondary processor * spin code sits at 0x0000...0x00ff. On server, the vectors follow @@ -165,6 +166,12 @@ exception_marker: #include "exceptions-64s.S" #endif +/* KVM trampoline code needs to be close to the interrupt handlers */ + +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER +#include "../kvm/book3s_64_rmhandlers.S" +#endif + _GLOBAL(generic_secondary_thread_init) mr r24,r3 diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 6ded19d01891..678f98cd5e64 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -206,6 +206,8 @@ MachineCheck: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) + li r5,0x00f0 + mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD @@ -222,6 +224,8 @@ DataAccess: stw r10,_DSISR(r11) mr r5,r10 mfspr r4,SPRN_DAR + li r10,0x00f0 + mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */ EXC_XFER_EE_LITE(0x300, handle_page_fault) /* Instruction access exception. @@ -244,6 +248,8 @@ Alignment: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) + li r5,0x00f0 + mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) addi r3,r1,STACK_FRAME_OVERHEAD @@ -333,26 +339,20 @@ InstructionTLBMiss: mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ lwz r10, 0(r11) /* Get the pte */ -#ifdef CONFIG_SWAP - /* do not set the _PAGE_ACCESSED bit of a non-present page */ - andi. r11, r10, _PAGE_PRESENT - beq 4f - ori r10, r10, _PAGE_ACCESSED - mfspr r11, SPRN_MD_TWC /* get the pte address again */ - stw r10, 0(r11) -4: -#else - ori r10, r10, _PAGE_ACCESSED - stw r10, 0(r11) -#endif + andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT + cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT + bne- cr0, 2f + + /* Clear PP lsb, 0x400 */ + rlwinm r10, r10, 0, 22, 20 /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bits 22 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ -2: li r11, 0x00f0 + li r11, 0x00f0 rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ DO_8xx_CPU6(0x2d80, r3) mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ @@ -365,6 +365,22 @@ InstructionTLBMiss: lwz r3, 8(r0) #endif rfi +2: + mfspr r11, SPRN_SRR1 + /* clear all error bits as TLB Miss + * sets a few unconditionally + */ + rlwinm r11, r11, 0, 0xffff + mtspr SPRN_SRR1, r11 + + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + b InstructionAccess . = 0x1200 DataStoreTLBMiss: @@ -406,29 +422,45 @@ DataStoreTLBMiss: * above. */ rlwimi r11, r10, 0, 27, 27 + /* Insert the WriteThru flag into the TWC from the Linux PTE. + * It is bit 25 in the Linux PTE and bit 30 in the TWC + */ + rlwimi r11, r10, 32-5, 30, 30 DO_8xx_CPU6(0x3b80, r3) mtspr SPRN_MD_TWC, r11 -#ifdef CONFIG_SWAP - /* do not set the _PAGE_ACCESSED bit of a non-present page */ - andi. r11, r10, _PAGE_PRESENT - beq 4f - ori r10, r10, _PAGE_ACCESSED -4: - /* and update pte in table */ -#else - ori r10, r10, _PAGE_ACCESSED -#endif - mfspr r11, SPRN_MD_TWC /* get the pte address again */ - stw r10, 0(r11) + /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. + * We also need to know if the insn is a load/store, so: + * Clear _PAGE_PRESENT and load that which will + * trap into DTLB Error with store bit set accordinly. + */ + /* PRESENT=0x1, ACCESSED=0x20 + * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); + * r10 = (r10 & ~PRESENT) | r11; + */ + rlwinm r11, r10, 32-5, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT + + /* Honour kernel RO, User NA */ + /* 0x200 == Extended encoding, bit 22 */ + /* r11 = (r10 & _PAGE_USER) >> 2 */ + rlwinm r11, r10, 32-2, 0x200 + or r10, r11, r10 + /* r11 = (r10 & _PAGE_RW) >> 1 */ + rlwinm r11, r10, 32-1, 0x200 + or r10, r11, r10 + /* invert RW and 0x200 bits */ + xori r10, r10, _PAGE_RW | 0x200 /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bits 22 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior * of the MMU. */ 2: li r11, 0x00f0 + mtspr SPRN_DAR,r11 /* Tag DAR */ rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ DO_8xx_CPU6(0x3d80, r3) mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ @@ -469,97 +501,10 @@ DataTLBError: stw r10, 0(r0) stw r11, 4(r0) - /* First, make sure this was a store operation. - */ - mfspr r10, SPRN_DSISR - andis. r11, r10, 0x0200 /* If set, indicates store op */ - beq 2f - - /* The EA of a data TLB miss is automatically stored in the MD_EPN - * register. The EA of a data TLB error is automatically stored in - * the DAR, but not the MD_EPN register. We must copy the 20 most - * significant bits of the EA from the DAR to MD_EPN before we - * start walking the page tables. We also need to copy the CASID - * value from the M_CASID register. - * Addendum: The EA of a data TLB error is _supposed_ to be stored - * in DAR, but it seems that this doesn't happen in some cases, such - * as when the error is due to a dcbi instruction to a page with a - * TLB that doesn't have the changed bit set. In such cases, there - * does not appear to be any way to recover the EA of the error - * since it is neither in DAR nor MD_EPN. As a workaround, the - * _PAGE_HWWRITE bit is set for all kernel data pages when the PTEs - * are initialized in mapin_ram(). This will avoid the problem, - * assuming we only use the dcbi instruction on kernel addresses. - */ mfspr r10, SPRN_DAR - rlwinm r11, r10, 0, 0, 19 - ori r11, r11, MD_EVALID - mfspr r10, SPRN_M_CASID - rlwimi r11, r10, 0, 28, 31 - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r11 - - mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - andi. r11, r10, 0x0800 - beq 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - rlwimi r10, r11, 0, 2, 19 -3: - lwz r11, 0(r10) /* Get the level 1 entry */ - rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ - beq 2f /* If zero, bail */ - - /* We have a pte table, so fetch the pte from the table. - */ - ori r11, r11, 1 /* Set valid bit in physical L2 page */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ - mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ - lwz r10, 0(r11) /* Get the pte */ - - andi. r11, r10, _PAGE_RW /* Is it writeable? */ - beq 2f /* Bail out if not */ - - /* Update 'changed', among others. - */ -#ifdef CONFIG_SWAP - ori r10, r10, _PAGE_DIRTY|_PAGE_HWWRITE - /* do not set the _PAGE_ACCESSED bit of a non-present page */ - andi. r11, r10, _PAGE_PRESENT - beq 4f - ori r10, r10, _PAGE_ACCESSED -4: -#else - ori r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE -#endif - mfspr r11, SPRN_MD_TWC /* Get pte address again */ - stw r10, 0(r11) /* and update pte in table */ - - /* The Linux PTE won't go exactly into the MMU TLB. - * Software indicator bits 21, 22 and 28 must be clear. - * Software indicator bits 24, 25, 26, and 27 must be - * set. All other Linux PTE bits control the behavior - * of the MMU. - */ - li r11, 0x00f0 - rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ - DO_8xx_CPU6(0x3d80, r3) - mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ - - mfspr r10, SPRN_M_TW /* Restore registers */ - lwz r11, 0(r0) - mtcr r11 - lwz r11, 4(r0) -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) -#endif - rfi -2: + cmpwi cr0, r10, 0x00f0 + beq- FixupDAR /* must be a buggy dcbX, icbi insn. */ +DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */ mfspr r10, SPRN_M_TW /* Restore registers */ lwz r11, 0(r0) mtcr r11 @@ -588,6 +533,140 @@ DataTLBError: . = 0x2000 +/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions + * by decoding the registers used by the dcbx instruction and adding them. + * DAR is set to the calculated address and r10 also holds the EA on exit. + */ + /* define if you don't want to use self modifying code */ +#define NO_SELF_MODIFYING_CODE +FixupDAR:/* Entry point for dcbx workaround. */ + /* fetch instruction from memory. */ + mfspr r10, SPRN_SRR0 + DO_8xx_CPU6(0x3780, r3) + mtspr SPRN_MD_EPN, r10 + mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */ + cmplwi cr0, r11, 0x0800 + blt- 3f /* Branch if user space */ + lis r11, (swapper_pg_dir-PAGE_OFFSET)@h + ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + rlwimi r11, r10, 32-20, 0xffc /* r11 = r11&~0xffc|(r10>>20)&0xffc */ +3: lwz r11, 0(r11) /* Get the level 1 entry */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ + lwz r11, 0(r11) /* Get the pte */ + /* concat physical page address(r11) and page offset(r10) */ + rlwimi r11, r10, 0, 20, 31 + lwz r11,0(r11) +/* Check if it really is a dcbx instruction. */ +/* dcbt and dcbtst does not generate DTLB Misses/Errors, + * no need to include them here */ + srwi r10, r11, 26 /* check if major OP code is 31 */ + cmpwi cr0, r10, 31 + bne- 141f + rlwinm r10, r11, 0, 21, 30 + cmpwi cr0, r10, 2028 /* Is dcbz? */ + beq+ 142f + cmpwi cr0, r10, 940 /* Is dcbi? */ + beq+ 142f + cmpwi cr0, r10, 108 /* Is dcbst? */ + beq+ 144f /* Fix up store bit! */ + cmpwi cr0, r10, 172 /* Is dcbf? */ + beq+ 142f + cmpwi cr0, r10, 1964 /* Is icbi? */ + beq+ 142f +141: mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */ + b DARFixed /* Nope, go back to normal TLB processing */ + +144: mfspr r10, SPRN_DSISR + rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ + mtspr SPRN_DSISR, r10 +142: /* continue, it was a dcbx, dcbi instruction. */ +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) /* restore r3 from memory */ +#endif +#ifndef NO_SELF_MODIFYING_CODE + andis. r10,r11,0x1f /* test if reg RA is r0 */ + li r10,modified_instr@l + dcbtst r0,r10 /* touch for store */ + rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */ + oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */ + ori r11,r11,532 + stw r11,0(r10) /* store add/and instruction */ + dcbf 0,r10 /* flush new instr. to memory. */ + icbi 0,r10 /* invalidate instr. cache line */ + lwz r11, 4(r0) /* restore r11 from memory */ + mfspr r10, SPRN_M_TW /* restore r10 from M_TW */ + isync /* Wait until new instr is loaded from memory */ +modified_instr: + .space 4 /* this is where the add instr. is stored */ + bne+ 143f + subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */ +143: mtdar r10 /* store faulting EA in DAR */ + b DARFixed /* Go back to normal TLB handling */ +#else + mfctr r10 + mtdar r10 /* save ctr reg in DAR */ + rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */ + addi r10, r10, 150f@l /* add start of table */ + mtctr r10 /* load ctr with jump address */ + xor r10, r10, r10 /* sum starts at zero */ + bctr /* jump into table */ +150: + add r10, r10, r0 ;b 151f + add r10, r10, r1 ;b 151f + add r10, r10, r2 ;b 151f + add r10, r10, r3 ;b 151f + add r10, r10, r4 ;b 151f + add r10, r10, r5 ;b 151f + add r10, r10, r6 ;b 151f + add r10, r10, r7 ;b 151f + add r10, r10, r8 ;b 151f + add r10, r10, r9 ;b 151f + mtctr r11 ;b 154f /* r10 needs special handling */ + mtctr r11 ;b 153f /* r11 needs special handling */ + add r10, r10, r12 ;b 151f + add r10, r10, r13 ;b 151f + add r10, r10, r14 ;b 151f + add r10, r10, r15 ;b 151f + add r10, r10, r16 ;b 151f + add r10, r10, r17 ;b 151f + add r10, r10, r18 ;b 151f + add r10, r10, r19 ;b 151f + add r10, r10, r20 ;b 151f + add r10, r10, r21 ;b 151f + add r10, r10, r22 ;b 151f + add r10, r10, r23 ;b 151f + add r10, r10, r24 ;b 151f + add r10, r10, r25 ;b 151f + add r10, r10, r26 ;b 151f + add r10, r10, r27 ;b 151f + add r10, r10, r28 ;b 151f + add r10, r10, r29 ;b 151f + add r10, r10, r30 ;b 151f + add r10, r10, r31 +151: + rlwinm. r11,r11,19,24,28 /* offset into jump table for reg RA */ + beq 152f /* if reg RA is zero, don't add it */ + addi r11, r11, 150b@l /* add start of table */ + mtctr r11 /* load ctr with jump address */ + rlwinm r11,r11,0,16,10 /* make sure we don't execute this more than once */ + bctr /* jump into table */ +152: + mfdar r11 + mtctr r11 /* restore ctr reg from DAR */ + mtdar r10 /* save fault EA to DAR */ + b DARFixed /* Go back to normal TLB handling */ + + /* special handling for r10,r11 since these are modified already */ +153: lwz r11, 4(r0) /* load r11 from memory */ + b 155f +154: mfspr r11, SPRN_M_TW /* load r10 from M_TW */ +155: add r10, r10, r11 /* add it */ + mfctr r11 /* restore r11 */ + b 151b +#endif + .globl giveup_fpu giveup_fpu: blr diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 975788ca05d2..7f4bd7f3b6af 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -944,28 +944,6 @@ _GLOBAL(__setup_e500mc_ivors) blr /* - * extern void loadcam_entry(unsigned int index) - * - * Load TLBCAM[index] entry in to the L2 CAM MMU - */ -_GLOBAL(loadcam_entry) - lis r4,TLBCAM@ha - addi r4,r4,TLBCAM@l - mulli r5,r3,TLBCAM_SIZE - add r3,r5,r4 - lwz r4,0(r3) - mtspr SPRN_MAS0,r4 - lwz r4,4(r3) - mtspr SPRN_MAS1,r4 - lwz r4,8(r3) - mtspr SPRN_MAS2,r4 - lwz r4,12(r3) - mtspr SPRN_MAS3,r4 - tlbwe - isync - blr - -/* * extern void giveup_altivec(struct task_struct *prev) * * The e500 core does not have an AltiVec unit. diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index 1882bf419fa6..8dc7547c2377 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -161,7 +161,7 @@ void _memcpy_fromio(void *dest, const volatile void __iomem *src, dest++; n--; } - while(n > 4) { + while(n >= 4) { *((u32 *)dest) = *((volatile u32 *)vsrc); eieio(); vsrc += 4; @@ -190,7 +190,7 @@ void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n) vdest++; n--; } - while(n > 4) { + while(n >= 4) { *((volatile u32 *)vdest) = *((volatile u32 *)src); src += 4; vdest += 4; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 02a334662cc0..f6dca4f4b295 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -87,7 +87,10 @@ extern int tau_interrupts(int); #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 + +#ifndef CONFIG_SPARSE_IRQ EXPORT_SYMBOL(irq_desc); +#endif int distribute_irqs = 1; @@ -189,33 +192,7 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "CPU%d ", j); seq_putc(p, '\n'); - } - - if (i < NR_IRQS) { - desc = get_irq_desc(i); - spin_lock_irqsave(&desc->lock, flags); - action = desc->action; - if (!action || !action->handler) - goto skip; - seq_printf(p, "%3d: ", i); -#ifdef CONFIG_SMP - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); -#else - seq_printf(p, "%10u ", kstat_irqs(i)); -#endif /* CONFIG_SMP */ - if (desc->chip) - seq_printf(p, " %s ", desc->chip->typename); - else - seq_puts(p, " None "); - seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge "); - seq_printf(p, " %s", action->name); - for (action = action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&desc->lock, flags); - } else if (i == NR_IRQS) { + } else if (i == nr_irqs) { #if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) if (tau_initialized){ seq_puts(p, "TAU: "); @@ -225,30 +202,68 @@ skip: } #endif /* CONFIG_PPC32 && CONFIG_TAU_INT*/ seq_printf(p, "BAD: %10u\n", ppc_spurious_interrupts); + + return 0; } + + desc = irq_to_desc(i); + if (!desc) + return 0; + + spin_lock_irqsave(&desc->lock, flags); + + action = desc->action; + if (!action || !action->handler) + goto skip; + + seq_printf(p, "%3d: ", i); +#ifdef CONFIG_SMP + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); +#else + seq_printf(p, "%10u ", kstat_irqs(i)); +#endif /* CONFIG_SMP */ + + if (desc->chip) + seq_printf(p, " %s ", desc->chip->name); + else + seq_puts(p, " None "); + + seq_printf(p, "%s", (desc->status & IRQ_LEVEL) ? "Level " : "Edge "); + seq_printf(p, " %s", action->name); + + for (action = action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + seq_putc(p, '\n'); + +skip: + spin_unlock_irqrestore(&desc->lock, flags); + return 0; } #ifdef CONFIG_HOTPLUG_CPU void fixup_irqs(cpumask_t map) { + struct irq_desc *desc; unsigned int irq; static int warned; for_each_irq(irq) { cpumask_t mask; - if (irq_desc[irq].status & IRQ_PER_CPU) + desc = irq_to_desc(irq); + if (desc && desc->status & IRQ_PER_CPU) continue; - cpumask_and(&mask, irq_desc[irq].affinity, &map); + cpumask_and(&mask, desc->affinity, &map); if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); mask = map; } - if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, &mask); - else if (irq_desc[irq].action && !(warned++)) + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, &mask); + else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } @@ -275,7 +290,7 @@ static inline void handle_one_irq(unsigned int irq) return; } - desc = irq_desc + irq; + desc = irq_to_desc(irq); saved_sp_limit = current->thread.ksp_limit; irqtp->task = curtp->task; @@ -541,7 +556,7 @@ struct irq_host *irq_alloc_host(struct device_node *of_node, smp_wmb(); /* Clear norequest flags */ - get_irq_desc(i)->status &= ~IRQ_NOREQUEST; + irq_to_desc(i)->status &= ~IRQ_NOREQUEST; /* Legacy flags are left to default at this point, * one can then use irq_create_mapping() to @@ -607,8 +622,16 @@ void irq_set_virq_count(unsigned int count) static int irq_setup_virq(struct irq_host *host, unsigned int virq, irq_hw_number_t hwirq) { + struct irq_desc *desc; + + desc = irq_to_desc_alloc_node(virq, 0); + if (!desc) { + pr_debug("irq: -> allocating desc failed\n"); + goto error; + } + /* Clear IRQ_NOREQUEST flag */ - get_irq_desc(virq)->status &= ~IRQ_NOREQUEST; + desc->status &= ~IRQ_NOREQUEST; /* map it */ smp_wmb(); @@ -617,11 +640,14 @@ static int irq_setup_virq(struct irq_host *host, unsigned int virq, if (host->ops->map(host, virq, hwirq)) { pr_debug("irq: -> mapping failed, freeing\n"); - irq_free_virt(virq, 1); - return -1; + goto error; } return 0; + +error: + irq_free_virt(virq, 1); + return -1; } unsigned int irq_create_direct_mapping(struct irq_host *host) @@ -705,7 +731,7 @@ unsigned int irq_create_mapping(struct irq_host *host, EXPORT_SYMBOL_GPL(irq_create_mapping); unsigned int irq_create_of_mapping(struct device_node *controller, - u32 *intspec, unsigned int intsize) + const u32 *intspec, unsigned int intsize) { struct irq_host *host; irq_hw_number_t hwirq; @@ -738,7 +764,7 @@ unsigned int irq_create_of_mapping(struct device_node *controller, /* Set type if specified and different than the current one */ if (type != IRQ_TYPE_NONE && - type != (get_irq_desc(virq)->status & IRQF_TRIGGER_MASK)) + type != (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK)) set_irq_type(virq, type); return virq; } @@ -810,7 +836,7 @@ void irq_dispose_mapping(unsigned int virq) irq_map[virq].hwirq = host->inval_irq; /* Set some flags */ - get_irq_desc(virq)->status |= IRQ_NOREQUEST; + irq_to_desc(virq)->status |= IRQ_NOREQUEST; /* Free it */ irq_free_virt(virq, 1); @@ -1002,12 +1028,24 @@ void irq_free_virt(unsigned int virq, unsigned int count) spin_unlock_irqrestore(&irq_big_lock, flags); } -void irq_early_init(void) +int arch_early_irq_init(void) { - unsigned int i; + struct irq_desc *desc; + int i; + + for (i = 0; i < NR_IRQS; i++) { + desc = irq_to_desc(i); + if (desc) + desc->status |= IRQ_NOREQUEST; + } - for (i = 0; i < NR_IRQS; i++) - get_irq_desc(i)->status |= IRQ_NOREQUEST; + return 0; +} + +int arch_init_chip_data(struct irq_desc *desc, int node) +{ + desc->status |= IRQ_NOREQUEST; + return 0; } /* We need to create the radix trees late */ @@ -1069,16 +1107,19 @@ static int virq_debug_show(struct seq_file *m, void *private) seq_printf(m, "%-5s %-7s %-15s %s\n", "virq", "hwirq", "chip name", "host name"); - for (i = 1; i < NR_IRQS; i++) { - desc = get_irq_desc(i); + for (i = 1; i < nr_irqs; i++) { + desc = irq_to_desc(i); + if (!desc) + continue; + spin_lock_irqsave(&desc->lock, flags); if (desc->action && desc->action->handler) { seq_printf(m, "%5d ", i); seq_printf(m, "0x%05lx ", virq_to_hw(i)); - if (desc->chip && desc->chip->typename) - p = desc->chip->typename; + if (desc->chip && desc->chip->name) + p = desc->chip->name; else p = none; seq_printf(m, "%-15s ", p); diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index ed0ac4e4b8d8..79a00bb9c64c 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -781,9 +781,9 @@ static int __init lparcfg_init(void) !firmware_has_feature(FW_FEATURE_ISERIES)) mode |= S_IWUSR; - ent = proc_create("ppc64/lparcfg", mode, NULL, &lparcfg_fops); + ent = proc_create("powerpc/lparcfg", mode, NULL, &lparcfg_fops); if (!ent) { - printk(KERN_ERR "Failed to create ppc64/lparcfg\n"); + printk(KERN_ERR "Failed to create powerpc/lparcfg\n"); return -EIO; } diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index da9c0c4c10f3..8649f536f8df 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -502,15 +502,7 @@ _GLOBAL(clear_pages) li r0,PAGE_SIZE/L1_CACHE_BYTES slw r0,r0,r4 mtctr r0 -#ifdef CONFIG_8xx - li r4, 0 -1: stw r4, 0(r3) - stw r4, 4(r3) - stw r4, 8(r3) - stw r4, 12(r3) -#else 1: dcbz 0,r3 -#endif addi r3,r3,L1_CACHE_BYTES bdnz 1b blr @@ -535,15 +527,6 @@ _GLOBAL(copy_page) addi r3,r3,-4 addi r4,r4,-4 -#ifdef CONFIG_8xx - /* don't use prefetch on 8xx */ - li r0,4096/L1_CACHE_BYTES - mtctr r0 -1: COPY_16_BYTES - bdnz 1b - blr - -#else /* not 8xx, we can prefetch */ li r5,4 #if MAX_COPY_PREFETCH > 1 @@ -584,7 +567,6 @@ _GLOBAL(copy_page) li r0,MAX_COPY_PREFETCH li r11,4 b 2b -#endif /* CONFIG_8xx */ /* * void atomic_clear_mask(atomic_t mask, atomic_t *addr) diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index 0ed31f220482..ad461e735aec 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -139,8 +139,8 @@ out: } -static int dev_nvram_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static long dev_nvram_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) { switch(cmd) { #ifdef CONFIG_PPC_PMAC @@ -169,11 +169,11 @@ static int dev_nvram_ioctl(struct inode *inode, struct file *file, } const struct file_operations nvram_fops = { - .owner = THIS_MODULE, - .llseek = dev_nvram_llseek, - .read = dev_nvram_read, - .write = dev_nvram_write, - .ioctl = dev_nvram_ioctl, + .owner = THIS_MODULE, + .llseek = dev_nvram_llseek, + .read = dev_nvram_read, + .write = dev_nvram_write, + .unlocked_ioctl = dev_nvram_ioctl, }; static struct miscdevice nvram_dev = { @@ -184,7 +184,7 @@ static struct miscdevice nvram_dev = { #ifdef DEBUG_NVRAM -static void nvram_print_partitions(char * label) +static void __init nvram_print_partitions(char * label) { struct list_head * p; struct nvram_partition * tmp_part; @@ -202,7 +202,7 @@ static void nvram_print_partitions(char * label) #endif -static int nvram_write_header(struct nvram_partition * part) +static int __init nvram_write_header(struct nvram_partition * part) { loff_t tmp_index; int rc; @@ -214,7 +214,7 @@ static int nvram_write_header(struct nvram_partition * part) } -static unsigned char nvram_checksum(struct nvram_header *p) +static unsigned char __init nvram_checksum(struct nvram_header *p) { unsigned int c_sum, c_sum2; unsigned short *sp = (unsigned short *)p->name; /* assume 6 shorts */ @@ -228,32 +228,7 @@ static unsigned char nvram_checksum(struct nvram_header *p) return c_sum; } - -/* - * Find an nvram partition, sig can be 0 for any - * partition or name can be NULL for any name, else - * tries to match both - */ -struct nvram_partition *nvram_find_partition(int sig, const char *name) -{ - struct nvram_partition * part; - struct list_head * p; - - list_for_each(p, &nvram_part->partition) { - part = list_entry(p, struct nvram_partition, partition); - - if (sig && part->header.signature != sig) - continue; - if (name && 0 != strncmp(name, part->header.name, 12)) - continue; - return part; - } - return NULL; -} -EXPORT_SYMBOL(nvram_find_partition); - - -static int nvram_remove_os_partition(void) +static int __init nvram_remove_os_partition(void) { struct list_head *i; struct list_head *j; @@ -319,7 +294,7 @@ static int nvram_remove_os_partition(void) * Will create a partition starting at the first free * space found if space has enough room. */ -static int nvram_create_os_partition(void) +static int __init nvram_create_os_partition(void) { struct nvram_partition *part; struct nvram_partition *new_part; @@ -422,7 +397,7 @@ static int nvram_create_os_partition(void) * 5.) If the max chunk cannot be allocated then try finding a chunk * that will satisfy the minum needed (NVRAM_MIN_REQ). */ -static int nvram_setup_partition(void) +static int __init nvram_setup_partition(void) { struct list_head * p; struct nvram_partition * part; @@ -480,7 +455,7 @@ static int nvram_setup_partition(void) } -static int nvram_scan_partitions(void) +static int __init nvram_scan_partitions(void) { loff_t cur_index = 0; struct nvram_header phead; @@ -706,6 +681,9 @@ int nvram_clear_error_log(void) int clear_word = ERR_FLAG_ALREADY_LOGGED; int rc; + if (nvram_error_log_index == -1) + return -1; + tmp_index = nvram_error_log_index; rc = ppc_md.nvram_write((char *)&clear_word, sizeof(int), &tmp_index); diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 0a03cf70d247..936f04dbfc6f 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -119,13 +119,6 @@ static void perf_callchain_kernel(struct pt_regs *regs, } #ifdef CONFIG_PPC64 - -#ifdef CONFIG_HUGETLB_PAGE -#define is_huge_psize(pagesize) (HPAGE_SHIFT && mmu_huge_psizes[pagesize]) -#else -#define is_huge_psize(pagesize) 0 -#endif - /* * On 64-bit we don't want to invoke hash_page on user addresses from * interrupt context, so if the access faults, we read the page tables @@ -135,7 +128,7 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb) { pgd_t *pgdir; pte_t *ptep, pte; - int pagesize; + unsigned shift; unsigned long addr = (unsigned long) ptr; unsigned long offset; unsigned long pfn; @@ -145,17 +138,14 @@ static int read_user_stack_slow(void __user *ptr, void *ret, int nb) if (!pgdir) return -EFAULT; - pagesize = get_slice_psize(current->mm, addr); + ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift); + if (!shift) + shift = PAGE_SHIFT; /* align address to page boundary */ - offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1); + offset = addr & ((1UL << shift) - 1); addr -= offset; - if (is_huge_psize(pagesize)) - ptep = huge_pte_offset(current->mm, addr); - else - ptep = find_linux_pte(pgdir, addr); - if (ptep == NULL) return -EFAULT; pte = *ptep; diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index c8b27bb4dbde..425451453e96 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -96,8 +96,6 @@ EXPORT_SYMBOL(copy_4K_page); EXPORT_SYMBOL(isa_io_base); EXPORT_SYMBOL(isa_mem_base); EXPORT_SYMBOL(pci_dram_offset); -EXPORT_SYMBOL(pci_alloc_consistent); -EXPORT_SYMBOL(pci_free_consistent); #endif /* CONFIG_PCI */ EXPORT_SYMBOL(start_thread); @@ -162,7 +160,6 @@ EXPORT_SYMBOL(screen_info); #ifdef CONFIG_PPC32 EXPORT_SYMBOL(timer_interrupt); -EXPORT_SYMBOL(irq_desc); EXPORT_SYMBOL(tb_ticks_per_jiffy); EXPORT_SYMBOL(cacheable_memcpy); EXPORT_SYMBOL(cacheable_memzero); diff --git a/arch/powerpc/kernel/proc_ppc64.c b/arch/powerpc/kernel/proc_powerpc.c index c647ddef40dc..1ed3b8d7981e 100644 --- a/arch/powerpc/kernel/proc_ppc64.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -28,55 +28,7 @@ #include <asm/uaccess.h> #include <asm/prom.h> -static loff_t page_map_seek( struct file *file, loff_t off, int whence); -static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes, - loff_t *ppos); -static int page_map_mmap( struct file *file, struct vm_area_struct *vma ); - -static const struct file_operations page_map_fops = { - .llseek = page_map_seek, - .read = page_map_read, - .mmap = page_map_mmap -}; - -/* - * Create the ppc64 and ppc64/rtas directories early. This allows us to - * assume that they have been previously created in drivers. - */ -static int __init proc_ppc64_create(void) -{ - struct proc_dir_entry *root; - - root = proc_mkdir("ppc64", NULL); - if (!root) - return 1; - - if (!of_find_node_by_path("/rtas")) - return 0; - - if (!proc_mkdir("rtas", root)) - return 1; - - if (!proc_symlink("rtas", NULL, "ppc64/rtas")) - return 1; - - return 0; -} -core_initcall(proc_ppc64_create); - -static int __init proc_ppc64_init(void) -{ - struct proc_dir_entry *pde; - - pde = proc_create_data("ppc64/systemcfg", S_IFREG|S_IRUGO, NULL, - &page_map_fops, vdso_data); - if (!pde) - return 1; - pde->size = PAGE_SIZE; - - return 0; -} -__initcall(proc_ppc64_init); +#ifdef CONFIG_PPC64 static loff_t page_map_seek( struct file *file, loff_t off, int whence) { @@ -120,3 +72,55 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) return 0; } +static const struct file_operations page_map_fops = { + .llseek = page_map_seek, + .read = page_map_read, + .mmap = page_map_mmap +}; + + +static int __init proc_ppc64_init(void) +{ + struct proc_dir_entry *pde; + + pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL, + &page_map_fops, vdso_data); + if (!pde) + return 1; + pde->size = PAGE_SIZE; + + return 0; +} +__initcall(proc_ppc64_init); + +#endif /* CONFIG_PPC64 */ + +/* + * Create the ppc64 and ppc64/rtas directories early. This allows us to + * assume that they have been previously created in drivers. + */ +static int __init proc_ppc64_create(void) +{ + struct proc_dir_entry *root; + + root = proc_mkdir("powerpc", NULL); + if (!root) + return 1; + +#ifdef CONFIG_PPC64 + if (!proc_symlink("ppc64", NULL, "powerpc")) + pr_err("Failed to create link /proc/ppc64 -> /proc/powerpc\n"); +#endif + + if (!of_find_node_by_path("/rtas")) + return 0; + + if (!proc_mkdir("rtas", root)) + return 1; + + if (!proc_symlink("rtas", NULL, "powerpc/rtas")) + return 1; + + return 0; +} +core_initcall(proc_ppc64_create); diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 13011a96a977..a85117d5c9a4 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -6,7 +6,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * /proc/ppc64/rtas/firmware_flash interface + * /proc/powerpc/rtas/firmware_flash interface * * This file implements a firmware_flash interface to pump a firmware * image into the kernel. At reboot time rtas_restart() will see the @@ -740,7 +740,7 @@ static int __init rtas_flash_init(void) return 1; } - firmware_flash_pde = create_flash_pde("ppc64/rtas/" + firmware_flash_pde = create_flash_pde("powerpc/rtas/" FIRMWARE_FLASH_NAME, &rtas_flash_operations); if (firmware_flash_pde == NULL) { @@ -754,7 +754,7 @@ static int __init rtas_flash_init(void) if (rc != 0) goto cleanup; - firmware_update_pde = create_flash_pde("ppc64/rtas/" + firmware_update_pde = create_flash_pde("powerpc/rtas/" FIRMWARE_UPDATE_NAME, &rtas_flash_operations); if (firmware_update_pde == NULL) { @@ -768,7 +768,7 @@ static int __init rtas_flash_init(void) if (rc != 0) goto cleanup; - validate_pde = create_flash_pde("ppc64/rtas/" VALIDATE_FLASH_NAME, + validate_pde = create_flash_pde("powerpc/rtas/" VALIDATE_FLASH_NAME, &validate_flash_operations); if (validate_pde == NULL) { rc = -ENOMEM; @@ -781,7 +781,7 @@ static int __init rtas_flash_init(void) if (rc != 0) goto cleanup; - manage_pde = create_flash_pde("ppc64/rtas/" MANAGE_FLASH_NAME, + manage_pde = create_flash_pde("powerpc/rtas/" MANAGE_FLASH_NAME, &manage_flash_operations); if (manage_pde == NULL) { rc = -ENOMEM; diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c new file mode 100644 index 000000000000..2e4832ab2108 --- /dev/null +++ b/arch/powerpc/kernel/rtasd.c @@ -0,0 +1,539 @@ +/* + * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Communication to userspace based on kernel/printk.c + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/poll.h> +#include <linux/proc_fs.h> +#include <linux/init.h> +#include <linux/vmalloc.h> +#include <linux/spinlock.h> +#include <linux/cpu.h> +#include <linux/workqueue.h> + +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/rtas.h> +#include <asm/prom.h> +#include <asm/nvram.h> +#include <asm/atomic.h> +#include <asm/machdep.h> + + +static DEFINE_SPINLOCK(rtasd_log_lock); + +static DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait); + +static char *rtas_log_buf; +static unsigned long rtas_log_start; +static unsigned long rtas_log_size; + +static int surveillance_timeout = -1; + +static unsigned int rtas_error_log_max; +static unsigned int rtas_error_log_buffer_max; + +/* RTAS service tokens */ +static unsigned int event_scan; +static unsigned int rtas_event_scan_rate; + +static int full_rtas_msgs = 0; + +/* Stop logging to nvram after first fatal error */ +static int logging_enabled; /* Until we initialize everything, + * make sure we don't try logging + * anything */ +static int error_log_cnt; + +/* + * Since we use 32 bit RTAS, the physical address of this must be below + * 4G or else bad things happen. Allocate this in the kernel data and + * make it big enough. + */ +static unsigned char logdata[RTAS_ERROR_LOG_MAX]; + +static char *rtas_type[] = { + "Unknown", "Retry", "TCE Error", "Internal Device Failure", + "Timeout", "Data Parity", "Address Parity", "Cache Parity", + "Address Invalid", "ECC Uncorrected", "ECC Corrupted", +}; + +static char *rtas_event_type(int type) +{ + if ((type > 0) && (type < 11)) + return rtas_type[type]; + + switch (type) { + case RTAS_TYPE_EPOW: + return "EPOW"; + case RTAS_TYPE_PLATFORM: + return "Platform Error"; + case RTAS_TYPE_IO: + return "I/O Event"; + case RTAS_TYPE_INFO: + return "Platform Information Event"; + case RTAS_TYPE_DEALLOC: + return "Resource Deallocation Event"; + case RTAS_TYPE_DUMP: + return "Dump Notification Event"; + } + + return rtas_type[0]; +} + +/* To see this info, grep RTAS /var/log/messages and each entry + * will be collected together with obvious begin/end. + * There will be a unique identifier on the begin and end lines. + * This will persist across reboots. + * + * format of error logs returned from RTAS: + * bytes (size) : contents + * -------------------------------------------------------- + * 0-7 (8) : rtas_error_log + * 8-47 (40) : extended info + * 48-51 (4) : vendor id + * 52-1023 (vendor specific) : location code and debug data + */ +static void printk_log_rtas(char *buf, int len) +{ + + int i,j,n = 0; + int perline = 16; + char buffer[64]; + char * str = "RTAS event"; + + if (full_rtas_msgs) { + printk(RTAS_DEBUG "%d -------- %s begin --------\n", + error_log_cnt, str); + + /* + * Print perline bytes on each line, each line will start + * with RTAS and a changing number, so syslogd will + * print lines that are otherwise the same. Separate every + * 4 bytes with a space. + */ + for (i = 0; i < len; i++) { + j = i % perline; + if (j == 0) { + memset(buffer, 0, sizeof(buffer)); + n = sprintf(buffer, "RTAS %d:", i/perline); + } + + if ((i % 4) == 0) + n += sprintf(buffer+n, " "); + + n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]); + + if (j == (perline-1)) + printk(KERN_DEBUG "%s\n", buffer); + } + if ((i % perline) != 0) + printk(KERN_DEBUG "%s\n", buffer); + + printk(RTAS_DEBUG "%d -------- %s end ----------\n", + error_log_cnt, str); + } else { + struct rtas_error_log *errlog = (struct rtas_error_log *)buf; + + printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n", + error_log_cnt, rtas_event_type(errlog->type), + errlog->severity); + } +} + +static int log_rtas_len(char * buf) +{ + int len; + struct rtas_error_log *err; + + /* rtas fixed header */ + len = 8; + err = (struct rtas_error_log *)buf; + if (err->extended_log_length) { + + /* extended header */ + len += err->extended_log_length; + } + + if (rtas_error_log_max == 0) + rtas_error_log_max = rtas_get_error_log_max(); + + if (len > rtas_error_log_max) + len = rtas_error_log_max; + + return len; +} + +/* + * First write to nvram, if fatal error, that is the only + * place we log the info. The error will be picked up + * on the next reboot by rtasd. If not fatal, run the + * method for the type of error. Currently, only RTAS + * errors have methods implemented, but in the future + * there might be a need to store data in nvram before a + * call to panic(). + * + * XXX We write to nvram periodically, to indicate error has + * been written and sync'd, but there is a possibility + * that if we don't shutdown correctly, a duplicate error + * record will be created on next reboot. + */ +void pSeries_log_error(char *buf, unsigned int err_type, int fatal) +{ + unsigned long offset; + unsigned long s; + int len = 0; + + pr_debug("rtasd: logging event\n"); + if (buf == NULL) + return; + + spin_lock_irqsave(&rtasd_log_lock, s); + + /* get length and increase count */ + switch (err_type & ERR_TYPE_MASK) { + case ERR_TYPE_RTAS_LOG: + len = log_rtas_len(buf); + if (!(err_type & ERR_FLAG_BOOT)) + error_log_cnt++; + break; + case ERR_TYPE_KERNEL_PANIC: + default: + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + +#ifdef CONFIG_PPC64 + /* Write error to NVRAM */ + if (logging_enabled && !(err_type & ERR_FLAG_BOOT)) + nvram_write_error_log(buf, len, err_type, error_log_cnt); +#endif /* CONFIG_PPC64 */ + + /* + * rtas errors can occur during boot, and we do want to capture + * those somewhere, even if nvram isn't ready (why not?), and even + * if rtasd isn't ready. Put them into the boot log, at least. + */ + if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG) + printk_log_rtas(buf, len); + + /* Check to see if we need to or have stopped logging */ + if (fatal || !logging_enabled) { + logging_enabled = 0; + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + + /* call type specific method for error */ + switch (err_type & ERR_TYPE_MASK) { + case ERR_TYPE_RTAS_LOG: + offset = rtas_error_log_buffer_max * + ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK); + + /* First copy over sequence number */ + memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int)); + + /* Second copy over error log data */ + offset += sizeof(int); + memcpy(&rtas_log_buf[offset], buf, len); + + if (rtas_log_size < LOG_NUMBER) + rtas_log_size += 1; + else + rtas_log_start += 1; + + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + wake_up_interruptible(&rtas_log_wait); + break; + case ERR_TYPE_KERNEL_PANIC: + default: + WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ + spin_unlock_irqrestore(&rtasd_log_lock, s); + return; + } + +} + +static int rtas_log_open(struct inode * inode, struct file * file) +{ + return 0; +} + +static int rtas_log_release(struct inode * inode, struct file * file) +{ + return 0; +} + +/* This will check if all events are logged, if they are then, we + * know that we can safely clear the events in NVRAM. + * Next we'll sit and wait for something else to log. + */ +static ssize_t rtas_log_read(struct file * file, char __user * buf, + size_t count, loff_t *ppos) +{ + int error; + char *tmp; + unsigned long s; + unsigned long offset; + + if (!buf || count < rtas_error_log_buffer_max) + return -EINVAL; + + count = rtas_error_log_buffer_max; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + tmp = kmalloc(count, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + spin_lock_irqsave(&rtasd_log_lock, s); + + /* if it's 0, then we know we got the last one (the one in NVRAM) */ + while (rtas_log_size == 0) { + if (file->f_flags & O_NONBLOCK) { + spin_unlock_irqrestore(&rtasd_log_lock, s); + error = -EAGAIN; + goto out; + } + + if (!logging_enabled) { + spin_unlock_irqrestore(&rtasd_log_lock, s); + error = -ENODATA; + goto out; + } +#ifdef CONFIG_PPC64 + nvram_clear_error_log(); +#endif /* CONFIG_PPC64 */ + + spin_unlock_irqrestore(&rtasd_log_lock, s); + error = wait_event_interruptible(rtas_log_wait, rtas_log_size); + if (error) + goto out; + spin_lock_irqsave(&rtasd_log_lock, s); + } + + offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK); + memcpy(tmp, &rtas_log_buf[offset], count); + + rtas_log_start += 1; + rtas_log_size -= 1; + spin_unlock_irqrestore(&rtasd_log_lock, s); + + error = copy_to_user(buf, tmp, count) ? -EFAULT : count; +out: + kfree(tmp); + return error; +} + +static unsigned int rtas_log_poll(struct file *file, poll_table * wait) +{ + poll_wait(file, &rtas_log_wait, wait); + if (rtas_log_size) + return POLLIN | POLLRDNORM; + return 0; +} + +static const struct file_operations proc_rtas_log_operations = { + .read = rtas_log_read, + .poll = rtas_log_poll, + .open = rtas_log_open, + .release = rtas_log_release, +}; + +static int enable_surveillance(int timeout) +{ + int error; + + error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout); + + if (error == 0) + return 0; + + if (error == -EINVAL) { + printk(KERN_DEBUG "rtasd: surveillance not supported\n"); + return 0; + } + + printk(KERN_ERR "rtasd: could not update surveillance\n"); + return -1; +} + +static void do_event_scan(void) +{ + int error; + do { + memset(logdata, 0, rtas_error_log_max); + error = rtas_call(event_scan, 4, 1, NULL, + RTAS_EVENT_SCAN_ALL_EVENTS, 0, + __pa(logdata), rtas_error_log_max); + if (error == -1) { + printk(KERN_ERR "event-scan failed\n"); + break; + } + + if (error == 0) + pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0); + + } while(error == 0); +} + +static void rtas_event_scan(struct work_struct *w); +DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan); + +/* + * Delay should be at least one second since some machines have problems if + * we call event-scan too quickly. + */ +static unsigned long event_scan_delay = 1*HZ; +static int first_pass = 1; + +static void rtas_event_scan(struct work_struct *w) +{ + unsigned int cpu; + + do_event_scan(); + + get_online_cpus(); + + cpu = next_cpu(smp_processor_id(), cpu_online_map); + if (cpu == NR_CPUS) { + cpu = first_cpu(cpu_online_map); + + if (first_pass) { + first_pass = 0; + event_scan_delay = 30*HZ/rtas_event_scan_rate; + + if (surveillance_timeout != -1) { + pr_debug("rtasd: enabling surveillance\n"); + enable_surveillance(surveillance_timeout); + pr_debug("rtasd: surveillance enabled\n"); + } + } + } + + schedule_delayed_work_on(cpu, &event_scan_work, + __round_jiffies_relative(event_scan_delay, cpu)); + + put_online_cpus(); +} + +#ifdef CONFIG_PPC64 +static void retreive_nvram_error_log(void) +{ + unsigned int err_type ; + int rc ; + + /* See if we have any error stored in NVRAM */ + memset(logdata, 0, rtas_error_log_max); + rc = nvram_read_error_log(logdata, rtas_error_log_max, + &err_type, &error_log_cnt); + /* We can use rtas_log_buf now */ + logging_enabled = 1; + if (!rc) { + if (err_type != ERR_FLAG_ALREADY_LOGGED) { + pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0); + } + } +} +#else /* CONFIG_PPC64 */ +static void retreive_nvram_error_log(void) +{ +} +#endif /* CONFIG_PPC64 */ + +static void start_event_scan(void) +{ + printk(KERN_DEBUG "RTAS daemon started\n"); + pr_debug("rtasd: will sleep for %d milliseconds\n", + (30000 / rtas_event_scan_rate)); + + /* Retreive errors from nvram if any */ + retreive_nvram_error_log(); + + schedule_delayed_work_on(first_cpu(cpu_online_map), &event_scan_work, + event_scan_delay); +} + +static int __init rtas_init(void) +{ + struct proc_dir_entry *entry; + + if (!machine_is(pseries) && !machine_is(chrp)) + return 0; + + /* No RTAS */ + event_scan = rtas_token("event-scan"); + if (event_scan == RTAS_UNKNOWN_SERVICE) { + printk(KERN_INFO "rtasd: No event-scan on system\n"); + return -ENODEV; + } + + rtas_event_scan_rate = rtas_token("rtas-event-scan-rate"); + if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) { + printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n"); + return -ENODEV; + } + + /* Make room for the sequence number */ + rtas_error_log_max = rtas_get_error_log_max(); + rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int); + + rtas_log_buf = vmalloc(rtas_error_log_buffer_max*LOG_NUMBER); + if (!rtas_log_buf) { + printk(KERN_ERR "rtasd: no memory\n"); + return -ENOMEM; + } + + entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL, + &proc_rtas_log_operations); + if (!entry) + printk(KERN_ERR "Failed to create error_log proc entry\n"); + + start_event_scan(); + + return 0; +} +__initcall(rtas_init); + +static int __init surveillance_setup(char *str) +{ + int i; + + /* We only do surveillance on pseries */ + if (!machine_is(pseries)) + return 0; + + if (get_option(&str,&i)) { + if (i >= 0 && i <= 255) + surveillance_timeout = i; + } + + return 1; +} +__setup("surveillance=", surveillance_setup); + +static int __init rtasmsgs_setup(char *str) +{ + if (strcmp(str, "on") == 0) + full_rtas_msgs = 1; + else if (strcmp(str, "off") == 0) + full_rtas_msgs = 0; + + return 1; +} +__setup("rtasmsgs=", rtasmsgs_setup); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index df2c9e932b37..6568406b2a30 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -356,11 +356,6 @@ void __init setup_system(void) */ initialize_cache_info(); - /* - * Initialize irq remapping subsystem - */ - irq_early_init(); - #ifdef CONFIG_PPC_RTAS /* * Initialize RTAS if available diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 9b86a74d2815..97196eefef3e 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -218,6 +218,9 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) static void stop_this_cpu(void *dummy) { + /* Remove this CPU */ + set_cpu_online(smp_processor_id(), false); + local_irq_disable(); while (1) ; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 956ab33fd73f..e235e52dc4fe 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -461,6 +461,25 @@ static void unregister_cpu_online(unsigned int cpu) cacheinfo_cpu_offline(cpu); } + +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE +ssize_t arch_cpu_probe(const char *buf, size_t count) +{ + if (ppc_md.cpu_probe) + return ppc_md.cpu_probe(buf, count); + + return -EINVAL; +} + +ssize_t arch_cpu_release(const char *buf, size_t count) +{ + if (ppc_md.cpu_release) + return ppc_md.cpu_release(buf, count); + + return -EINVAL; +} +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ + #endif /* CONFIG_HOTPLUG_CPU */ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 674800b242d6..9ba2cc88591d 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -269,6 +269,7 @@ void account_system_vtime(struct task_struct *tsk) per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(account_system_vtime); /* * Transfer the user and system times accumulated in the paca diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 9d1f9354d6ca..804f0f30f227 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -198,28 +198,6 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) info.si_code = code; info.si_addr = (void __user *) addr; force_sig_info(signr, &info, current); - - /* - * Init gets no signals that it doesn't have a handler for. - * That's all very well, but if it has caused a synchronous - * exception and we ignore the resulting signal, it will just - * generate the same exception over and over again and we get - * nowhere. Better to kill it and let the kernel panic. - */ - if (is_global_init(current)) { - __sighandler_t handler; - - spin_lock_irq(¤t->sighand->siglock); - handler = current->sighand->action[signr-1].sa.sa_handler; - spin_unlock_irq(¤t->sighand->siglock); - if (handler == SIG_DFL) { - /* init has generated a synchronous exception - and it doesn't have a handler for the signal */ - printk(KERN_CRIT "init has generated signal %d " - "but has no handler for it\n", signr); - do_exit(signr); - } - } } #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 67b6916f0e94..fe460482fa68 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -58,7 +58,7 @@ _GLOBAL(load_up_altivec) * all 1's */ mfspr r4,SPRN_VRSAVE - cmpdi 0,r4,0 + cmpwi 0,r4,0 bne+ 1f li r4,-1 mtspr SPRN_VRSAVE,r4 |